feat(health): SiteAuditBacklog metric (count + age + bytes) (#23 M6)

This commit is contained in:
Joseph Doherty
2026-05-20 19:02:01 -04:00
parent 75b060e0a8
commit e93f655ce4
11 changed files with 511 additions and 2 deletions

View File

@@ -218,6 +218,14 @@ public static class ServiceCollectionExtensions
ServiceDescriptor.Singleton<IAuditWriteFailureCounter, HealthMetricsAuditWriteFailureCounter>()); ServiceDescriptor.Singleton<IAuditWriteFailureCounter, HealthMetricsAuditWriteFailureCounter>());
services.Replace( services.Replace(
ServiceDescriptor.Singleton<IAuditRedactionFailureCounter, HealthMetricsAuditRedactionFailureCounter>()); ServiceDescriptor.Singleton<IAuditRedactionFailureCounter, HealthMetricsAuditRedactionFailureCounter>());
// M6 Bundle E (T6): the site-side backlog reporter polls the
// SqliteAuditWriter every 30 s and pushes the snapshot into the
// collector so the next SiteHealthReport carries a fresh
// SiteAuditBacklog field. Registered alongside the other site-only
// metric bridges so AddAuditLog (which runs on central too) stays
// free of hosted-service registrations that would resolve a missing
// ISiteHealthCollector on central.
services.AddHostedService<SiteAuditBacklogReporter>();
return services; return services;
} }

View File

@@ -0,0 +1,133 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.HealthMonitoring;
namespace ScadaLink.AuditLog.Site;
/// <summary>
/// Audit Log (#23) M6 Bundle E (T6) — site-side hosted service that
/// periodically pulls a backlog snapshot from <see cref="ISiteAuditQueue"/>
/// and pushes it into <see cref="ISiteHealthCollector"/> so the next
/// <see cref="ISiteHealthCollector.CollectReport"/> emits a fresh
/// <c>SiteAuditBacklog</c> field on the site health report.
/// </summary>
/// <remarks>
/// <para>
/// <b>Why a hosted service, not the report sender.</b> Querying SQLite for the
/// backlog requires the queue's write lock; doing it inline in
/// <see cref="ISiteHealthCollector.CollectReport"/> would couple the collector
/// to <see cref="ISiteAuditQueue"/> and turn an in-memory snapshot read into
/// a synchronous I/O call on the report path. The hosted-service pattern keeps
/// the report path pure and the SQL probe off the report timing budget.
/// </para>
/// <para>
/// <b>Cadence.</b> 30 s by default — coarse enough to amortise the SQL probe
/// across many reports, fine enough that the central dashboard never lags by
/// more than one health-report interval. Tunable via
/// <see cref="ScadaLink.AuditLog.Site.SqliteAuditWriterOptions"/> in a follow-up
/// if ops needs a different cadence; for M6 we hard-code the value because the
/// brief calls it out explicitly.
/// </para>
/// <para>
/// <b>Failure containment.</b> The probe call is wrapped in a try/catch so a
/// transient SQLite error never tears down the hosted service — the next tick
/// retries. Mirrors <see cref="ScadaLink.AuditLog.Central.AuditLogPartitionMaintenanceService"/>'s
/// "exception logged, not propagated" contract.
/// </para>
/// </remarks>
public sealed class SiteAuditBacklogReporter : IHostedService, IDisposable
{
/// <summary>
/// Default poll cadence. Half a typical 60 s health-report interval keeps
/// the snapshot fresh without spinning the SQL probe more often than
/// necessary.
/// </summary>
internal static readonly TimeSpan DefaultRefreshInterval = TimeSpan.FromSeconds(30);
private readonly ISiteAuditQueue _queue;
private readonly ISiteHealthCollector _collector;
private readonly ILogger<SiteAuditBacklogReporter> _logger;
private readonly TimeSpan _refreshInterval;
private CancellationTokenSource? _cts;
private Task? _loop;
public SiteAuditBacklogReporter(
ISiteAuditQueue queue,
ISiteHealthCollector collector,
ILogger<SiteAuditBacklogReporter> logger,
TimeSpan? refreshInterval = null)
{
_queue = queue ?? throw new ArgumentNullException(nameof(queue));
_collector = collector ?? throw new ArgumentNullException(nameof(collector));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_refreshInterval = refreshInterval ?? DefaultRefreshInterval;
}
/// <inheritdoc />
public Task StartAsync(CancellationToken ct)
{
// Linked CTS lets StopAsync's cancellation AND the host's shutdown
// token both terminate the loop; either side firing aborts the
// pending Task.Delay.
_cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
_loop = Task.Run(() => RunLoopAsync(_cts.Token));
return Task.CompletedTask;
}
private async Task RunLoopAsync(CancellationToken ct)
{
// First tick runs immediately so the very first health report after
// process start carries a real backlog snapshot — without this the
// dashboard would show null for the first 30 s after a deploy.
await SafeProbeAsync(ct).ConfigureAwait(false);
while (!ct.IsCancellationRequested)
{
try
{
await Task.Delay(_refreshInterval, ct).ConfigureAwait(false);
}
catch (OperationCanceledException)
{
break;
}
await SafeProbeAsync(ct).ConfigureAwait(false);
}
}
private async Task SafeProbeAsync(CancellationToken ct)
{
try
{
var snapshot = await _queue.GetBacklogStatsAsync(ct).ConfigureAwait(false);
_collector.UpdateSiteAuditBacklog(snapshot);
}
catch (OperationCanceledException)
{
// Shutdown — let the outer loop exit cleanly.
throw;
}
catch (Exception ex)
{
// Catch-all is deliberate: the hosted service must survive every
// class of probe failure (transient SQLite lock contention, disk
// I/O hiccup, …) so the next tick gets a chance.
_logger.LogWarning(ex, "SiteAuditBacklogReporter probe failed; next tick will retry.");
}
}
/// <inheritdoc />
public Task StopAsync(CancellationToken ct)
{
_cts?.Cancel();
return _loop ?? Task.CompletedTask;
}
/// <inheritdoc />
public void Dispose()
{
_cts?.Dispose();
}
}

View File

@@ -4,6 +4,7 @@ using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Interfaces.Services; using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.Commons.Types;
using ScadaLink.Commons.Types.Enums; using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.AuditLog.Site; namespace ScadaLink.AuditLog.Site;
@@ -484,6 +485,84 @@ public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable
} }
} }
/// <summary>
/// M6 Bundle E (T6) health-metric surface: returns a point-in-time snapshot
/// of the site queue's pending count, the oldest pending row's
/// <see cref="AuditEvent.OccurredAtUtc"/>, and the on-disk file size. Called
/// by the site-side <c>SiteAuditBacklogReporter</c> hosted service on its
/// 30 s tick to refresh the <c>SiteHealthReport.SiteAuditBacklog</c> field.
/// </summary>
/// <remarks>
/// The pending-count + oldest-row queries run inside the same write lock as
/// the hot-path INSERT batch so the snapshot is consistent against the
/// connection's view (no torn read of an in-flight transaction). The on-disk
/// size lookup happens OUTSIDE the lock — it's a stat() call on the file
/// path and doesn't touch the connection. In-memory and missing files
/// return 0 bytes (the snapshot is for ops dashboards, not a correctness
/// invariant).
/// </remarks>
public Task<SiteAuditBacklogSnapshot> GetBacklogStatsAsync(CancellationToken ct = default)
{
int pendingCount;
DateTime? oldestPending;
lock (_writeLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
// Single round-trip — COUNT(*) + MIN(OccurredAtUtc) over the same
// index range avoids a second scan. The IX_SiteAuditLog_ForwardState_Occurred
// index makes both aggregates cheap (count is a covering scan, min
// is the first key).
using var cmd = _connection.CreateCommand();
cmd.CommandText = """
SELECT COUNT(*), MIN(OccurredAtUtc)
FROM AuditLog
WHERE ForwardState = $pending;
""";
cmd.Parameters.AddWithValue("$pending", AuditForwardState.Pending.ToString());
using var reader = cmd.ExecuteReader();
reader.Read();
pendingCount = reader.GetInt32(0);
oldestPending = reader.IsDBNull(1)
? null
: DateTime.Parse(reader.GetString(1),
System.Globalization.CultureInfo.InvariantCulture,
System.Globalization.DateTimeStyles.RoundtripKind);
}
// File-size lookup outside the lock — the DatabasePath option is the
// canonical source. The connection-string-override branch (used by
// some tests) keeps the same DatabasePath value, so this works
// uniformly. In-memory / mode=memory paths return 0 because the file
// doesn't exist on disk.
long onDiskBytes = 0;
try
{
if (!string.IsNullOrEmpty(_options.DatabasePath) &&
!_options.DatabasePath.StartsWith(":memory:", StringComparison.Ordinal) &&
!_options.DatabasePath.Contains("mode=memory", StringComparison.OrdinalIgnoreCase) &&
File.Exists(_options.DatabasePath))
{
onDiskBytes = new FileInfo(_options.DatabasePath).Length;
}
}
catch (Exception ex)
{
// File system probe is a best-effort health-metric — never abort
// a backlog snapshot because stat() failed. Log and report 0.
_logger.LogDebug(ex,
"SqliteAuditWriter could not stat DB path {Path} for backlog snapshot.",
_options.DatabasePath);
}
return Task.FromResult(new SiteAuditBacklogSnapshot(
PendingCount: pendingCount,
OldestPendingUtc: oldestPending,
OnDiskBytes: onDiskBytes));
}
private static DateTime EnsureUtc(DateTime value) => private static DateTime EnsureUtc(DateTime value) =>
value.Kind == DateTimeKind.Utc value.Kind == DateTimeKind.Utc
? value ? value

View File

@@ -1,4 +1,5 @@
using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Types;
namespace ScadaLink.Commons.Interfaces.Services; namespace ScadaLink.Commons.Interfaces.Services;
@@ -70,4 +71,17 @@ public interface ISiteAuditQueue
/// are left untouched (idempotent re-call). Non-existent ids are silent no-ops. /// are left untouched (idempotent re-call). Non-existent ids are silent no-ops.
/// </summary> /// </summary>
Task MarkReconciledAsync(IReadOnlyList<Guid> eventIds, CancellationToken ct = default); Task MarkReconciledAsync(IReadOnlyList<Guid> eventIds, CancellationToken ct = default);
/// <summary>
/// M6 Bundle E (T6) health-metric surface: returns a point-in-time snapshot
/// of the site queue's pending count + oldest pending timestamp + on-disk
/// SQLite file size. Surfaced on
/// <see cref="ScadaLink.Commons.Messages.Health.SiteHealthReport"/> as
/// <c>SiteAuditBacklog</c> by the periodic <c>SiteAuditBacklogReporter</c>
/// hosted service so a stuck site→central drain is visible on the central
/// health dashboard. Safe to call concurrently with hot-path writes —
/// implementations are expected to take the same connection lock used by
/// the hot-path INSERT batch and the drain queries.
/// </summary>
Task<SiteAuditBacklogSnapshot> GetBacklogStatsAsync(CancellationToken ct = default);
} }

View File

@@ -1,3 +1,4 @@
using ScadaLink.Commons.Types;
using ScadaLink.Commons.Types.Enums; using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.Commons.Messages.Health; namespace ScadaLink.Commons.Messages.Health;
@@ -32,7 +33,14 @@ public record SiteHealthReport(
// marker). Surfaces a misconfigured / catastrophic regex on // marker). Surfaces a misconfigured / catastrophic regex on
// /monitoring/health. Defaults to 0 for back-compat with existing // /monitoring/health. Defaults to 0 for back-compat with existing
// producers and tests that don't construct the field. // producers and tests that don't construct the field.
int AuditRedactionFailure = 0); int AuditRedactionFailure = 0,
// Audit Log (#23) M6 Bundle E (T6): point-in-time snapshot of the
// site-local SQLite audit-log queue (pending count, oldest pending row,
// on-disk bytes). Populated by the site-side SiteAuditBacklogReporter
// hosted service every 30 s. Defaults to null so existing producers /
// tests that don't refresh the snapshot stay valid; the central health
// surface treats null as "no data yet" rather than a zeroed queue.
SiteAuditBacklogSnapshot? SiteAuditBacklog = null);
/// <summary> /// <summary>
/// Broadcast wrapper used between central nodes to keep per-node /// Broadcast wrapper used between central nodes to keep per-node

View File

@@ -0,0 +1,32 @@
namespace ScadaLink.Commons.Types;
/// <summary>
/// Audit Log (#23) M6 Bundle E (T6) — point-in-time snapshot of the site-local
/// SQLite audit-log queue health, surfaced on
/// <see cref="ScadaLink.Commons.Messages.Health.SiteHealthReport"/> as
/// <c>SiteAuditBacklog</c> and refreshed periodically by the
/// <c>SiteAuditBacklogReporter</c> hosted service.
/// </summary>
/// <param name="PendingCount">
/// Number of rows currently in
/// <see cref="ScadaLink.Commons.Types.Enums.AuditForwardState.Pending"/> — i.e.
/// not yet acknowledged by central via either the push-telemetry or
/// reconciliation-pull paths. A persistently non-zero value with rising
/// <see cref="OldestPendingUtc"/> indicates the site→central drain isn't
/// keeping up.
/// </param>
/// <param name="OldestPendingUtc">
/// <see cref="ScadaLink.Commons.Entities.Audit.AuditEvent.OccurredAtUtc"/> of
/// the oldest Pending row, or <c>null</c> if the queue is empty. Used by ops
/// to compute backlog age without a separate query.
/// </param>
/// <param name="OnDiskBytes">
/// Size of the SQLite file on disk in bytes, or <c>0</c> if the writer is
/// running against an in-memory database. Mirrors the 7-day retention
/// invariant (alog.md §10) — a steady file-size growth past the retention
/// window points at a stuck purge or a stuck forwarder.
/// </param>
public sealed record SiteAuditBacklogSnapshot(
int PendingCount,
DateTime? OldestPendingUtc,
long OnDiskBytes);

View File

@@ -1,4 +1,5 @@
using ScadaLink.Commons.Messages.Health; using ScadaLink.Commons.Messages.Health;
using ScadaLink.Commons.Types;
using ScadaLink.Commons.Types.Enums; using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.HealthMonitoring; namespace ScadaLink.HealthMonitoring;
@@ -28,6 +29,15 @@ public interface ISiteHealthCollector
/// <c>AddAuditLogHealthMetricsBridge()</c>. /// <c>AddAuditLogHealthMetricsBridge()</c>.
/// </summary> /// </summary>
void IncrementAuditRedactionFailure(); void IncrementAuditRedactionFailure();
/// <summary>
/// Audit Log (#23) M6 Bundle E (T6) — replace the latest site-local
/// audit-queue backlog snapshot (pending count, oldest pending row,
/// on-disk file bytes) used by the next <see cref="CollectReport"/> call.
/// Refreshed periodically by the <c>SiteAuditBacklogReporter</c> hosted
/// service so each report carries a recent point-in-time view of the
/// site→central drain health.
/// </summary>
void UpdateSiteAuditBacklog(SiteAuditBacklogSnapshot snapshot);
void UpdateConnectionHealth(string connectionName, ConnectionHealth health); void UpdateConnectionHealth(string connectionName, ConnectionHealth health);
void RemoveConnection(string connectionName); void RemoveConnection(string connectionName);
void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved); void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved);

View File

@@ -1,5 +1,6 @@
using System.Collections.Concurrent; using System.Collections.Concurrent;
using ScadaLink.Commons.Messages.Health; using ScadaLink.Commons.Messages.Health;
using ScadaLink.Commons.Types;
using ScadaLink.Commons.Types.Enums; using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.HealthMonitoring; namespace ScadaLink.HealthMonitoring;
@@ -15,6 +16,7 @@ public class SiteHealthCollector : ISiteHealthCollector
private int _deadLetterCount; private int _deadLetterCount;
private int _siteAuditWriteFailures; private int _siteAuditWriteFailures;
private int _auditRedactionFailures; private int _auditRedactionFailures;
private volatile SiteAuditBacklogSnapshot? _siteAuditBacklog;
private readonly ConcurrentDictionary<string, ConnectionHealth> _connectionStatuses = new(); private readonly ConcurrentDictionary<string, ConnectionHealth> _connectionStatuses = new();
private readonly ConcurrentDictionary<string, TagResolutionStatus> _tagResolutionCounts = new(); private readonly ConcurrentDictionary<string, TagResolutionStatus> _tagResolutionCounts = new();
private readonly ConcurrentDictionary<string, string> _connectionEndpoints = new(); private readonly ConcurrentDictionary<string, string> _connectionEndpoints = new();
@@ -89,6 +91,18 @@ public class SiteHealthCollector : ISiteHealthCollector
Interlocked.Increment(ref _auditRedactionFailures); Interlocked.Increment(ref _auditRedactionFailures);
} }
/// <summary>
/// Audit Log (#23) M6 Bundle E (T6) — replace the latest backlog snapshot
/// from the site SQLite writer. The field is a single reference write
/// (volatile) so the next <see cref="CollectReport"/> sees the most recent
/// snapshot — there is no count to reset, the report just carries forward
/// whatever was last refreshed.
/// </summary>
public void UpdateSiteAuditBacklog(SiteAuditBacklogSnapshot snapshot)
{
_siteAuditBacklog = snapshot ?? throw new ArgumentNullException(nameof(snapshot));
}
/// <summary> /// <summary>
/// Update the health status for a named data connection. /// Update the health status for a named data connection.
/// Called by DCL when connection state changes. /// Called by DCL when connection state changes.
@@ -207,6 +221,7 @@ public class SiteHealthCollector : ISiteHealthCollector
ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0), ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0),
ClusterNodes: _clusterNodes?.ToList(), ClusterNodes: _clusterNodes?.ToList(),
SiteAuditWriteFailures: siteAuditWriteFailures, SiteAuditWriteFailures: siteAuditWriteFailures,
AuditRedactionFailure: auditRedactionFailures); AuditRedactionFailure: auditRedactionFailures,
SiteAuditBacklog: _siteAuditBacklog);
} }
} }

View File

@@ -0,0 +1,136 @@
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using ScadaLink.AuditLog.Site;
using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.AuditLog.Tests.Site;
/// <summary>
/// Bundle E (M6-T6) tests for <see cref="SqliteAuditWriter.GetBacklogStatsAsync"/>.
/// Exercises the health-metric surface that <c>SiteAuditBacklogReporter</c>
/// polls every 30 s and pushes onto the site health report as
/// <c>SiteAuditBacklog</c>.
/// </summary>
public class SqliteAuditWriterBacklogStatsTests : IDisposable
{
private readonly string _dbPath;
public SqliteAuditWriterBacklogStatsTests()
{
// OnDiskBytes assertions only make sense against a real file — the
// shared-cache in-memory mode returns 0 for the file size, so this
// suite is opinionated about file-backed storage. Tests in
// SqliteAuditWriterWriteTests use in-memory for performance reasons.
_dbPath = Path.Combine(Path.GetTempPath(),
$"audit-backlog-stats-{Guid.NewGuid():N}.db");
}
public void Dispose()
{
if (File.Exists(_dbPath))
{
try { File.Delete(_dbPath); } catch { /* test cleanup best-effort */ }
}
}
private SqliteAuditWriter CreateWriter()
{
var options = new SqliteAuditWriterOptions { DatabasePath = _dbPath };
return new SqliteAuditWriter(
Options.Create(options),
NullLogger<SqliteAuditWriter>.Instance);
}
private static AuditEvent NewEvent(DateTime? occurredAtUtc = null) => new()
{
EventId = Guid.NewGuid(),
OccurredAtUtc = occurredAtUtc ?? DateTime.UtcNow,
Channel = AuditChannel.ApiOutbound,
Kind = AuditKind.ApiCall,
Status = AuditStatus.Delivered,
PayloadTruncated = false,
};
[Fact]
public async Task EmptyDb_Returns_Zero_Null_AndZeroBytes()
{
// No file exists yet — the writer ctor creates one but no rows are
// inserted; the snapshot should report a clean queue. OnDiskBytes is
// allowed to be zero (fresh ftruncate) OR small (page header) — the
// contract only requires non-negative; we assert >= 0 and exercise
// the pending fields strictly.
await using var writer = CreateWriter();
var snapshot = await writer.GetBacklogStatsAsync();
Assert.Equal(0, snapshot.PendingCount);
Assert.Null(snapshot.OldestPendingUtc);
Assert.True(snapshot.OnDiskBytes >= 0,
$"OnDiskBytes must be non-negative, got {snapshot.OnDiskBytes}");
}
[Fact]
public async Task Pending_5_Returns_5()
{
await using var writer = CreateWriter();
for (var i = 0; i < 5; i++)
{
await writer.WriteAsync(NewEvent());
}
var snapshot = await writer.GetBacklogStatsAsync();
Assert.Equal(5, snapshot.PendingCount);
}
[Fact]
public async Task OldestPending_Is_Earliest_OccurredAtUtc()
{
await using var writer = CreateWriter();
var t1 = new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc);
var t2 = new DateTime(2026, 5, 20, 10, 1, 0, DateTimeKind.Utc);
var t3 = new DateTime(2026, 5, 20, 10, 2, 0, DateTimeKind.Utc);
// Insert out of order so the snapshot is not "the last write" by
// accident — the OldestPendingUtc must come from a column-min, not
// an insertion-order proxy.
await writer.WriteAsync(NewEvent(t2));
await writer.WriteAsync(NewEvent(t1));
await writer.WriteAsync(NewEvent(t3));
var snapshot = await writer.GetBacklogStatsAsync();
Assert.Equal(3, snapshot.PendingCount);
Assert.NotNull(snapshot.OldestPendingUtc);
// The DB round-trips OccurredAtUtc through the "o" format which
// preserves Kind=Utc — assert tick-equality.
Assert.Equal(t1, snapshot.OldestPendingUtc!.Value);
}
[Fact]
public async Task OnDiskBytes_ReturnsFileSize()
{
await using var writer = CreateWriter();
// Insert enough rows to grow the file past the empty schema baseline.
for (var i = 0; i < 100; i++)
{
await writer.WriteAsync(NewEvent());
}
var snapshot = await writer.GetBacklogStatsAsync();
// The exact size depends on SQLite page allocation, but a file-backed
// db with 100 inserted rows MUST be larger than the empty schema
// (a few pages, ~4 KB). The implementation should return the
// FileInfo.Length value verbatim.
Assert.True(File.Exists(_dbPath), $"DB file should exist at {_dbPath}");
var expected = new FileInfo(_dbPath).Length;
Assert.Equal(expected, snapshot.OnDiskBytes);
Assert.True(snapshot.OnDiskBytes > 0,
$"after 100 inserts OnDiskBytes must be > 0, got {snapshot.OnDiskBytes}");
}
}

View File

@@ -0,0 +1,73 @@
using ScadaLink.Commons.Types;
namespace ScadaLink.HealthMonitoring.Tests;
/// <summary>
/// Bundle E (M6-T6) regression coverage. The site-side audit-log SQLite writer
/// exposes a backlog snapshot (<c>SiteAuditBacklogSnapshot</c>) via the
/// <c>ISiteAuditQueue.GetBacklogStatsAsync</c> surface. A periodic
/// <c>SiteAuditBacklogReporter</c> hosted service polls that snapshot and
/// pushes it into the collector via <see cref="ISiteHealthCollector.UpdateSiteAuditBacklog"/>
/// so the next <see cref="ISiteHealthCollector.CollectReport"/> includes it in
/// the report payload as <c>SiteAuditBacklog</c>. Unlike the
/// SiteAuditWriteFailures / AuditRedactionFailure interval counters, the
/// backlog snapshot is not reset on collect — the field carries forward
/// whatever the most recent refresh pushed in.
/// </summary>
public class SiteAuditBacklogMetricTests
{
private readonly SiteHealthCollector _collector = new();
[Fact]
public void Update_Then_CollectReport_IncludesBacklog()
{
var snapshot = new SiteAuditBacklogSnapshot(
PendingCount: 42,
OldestPendingUtc: new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc),
OnDiskBytes: 1234567);
_collector.UpdateSiteAuditBacklog(snapshot);
var report = _collector.CollectReport("site-1");
Assert.Equal(snapshot, report.SiteAuditBacklog);
}
[Fact]
public void Report_Payload_Includes_SiteAuditBacklog_AsNullByDefault()
{
// No refresh has been pushed yet — the report carries null so the
// central UI can distinguish "no data yet" from "queue empty".
var report = _collector.CollectReport("site-1");
Assert.Null(report.SiteAuditBacklog);
}
[Fact]
public void CollectReport_DoesNotReset_SiteAuditBacklog()
{
// Backlog snapshot is a point-in-time reading, not a per-interval
// counter — successive CollectReport calls before the next
// SiteAuditBacklogReporter tick MUST keep returning the same snapshot
// so a slow refresh cadence doesn't blank the central dashboard.
var snapshot = new SiteAuditBacklogSnapshot(
PendingCount: 7,
OldestPendingUtc: null,
OnDiskBytes: 8192);
_collector.UpdateSiteAuditBacklog(snapshot);
var first = _collector.CollectReport("site-1");
var second = _collector.CollectReport("site-1");
Assert.Equal(snapshot, first.SiteAuditBacklog);
Assert.Equal(snapshot, second.SiteAuditBacklog);
}
[Fact]
public void Update_With_Null_Throws_ArgumentNullException()
{
Assert.Throws<ArgumentNullException>(
() => _collector.UpdateSiteAuditBacklog(null!));
}
}

View File

@@ -71,6 +71,7 @@ public class DeploymentManagerRedeployTests : TestKit, IDisposable
public void IncrementDeadLetter() { } public void IncrementDeadLetter() { }
public void IncrementSiteAuditWriteFailures() { } public void IncrementSiteAuditWriteFailures() { }
public void IncrementAuditRedactionFailure() { } public void IncrementAuditRedactionFailure() { }
public void UpdateSiteAuditBacklog(ScadaLink.Commons.Types.SiteAuditBacklogSnapshot snapshot) { }
public void UpdateConnectionHealth(string connectionName, ConnectionHealth health) { } public void UpdateConnectionHealth(string connectionName, ConnectionHealth health) { }
public void RemoveConnection(string connectionName) { } public void RemoveConnection(string connectionName) { }
public void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved) { } public void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved) { }