feat(health): SiteAuditBacklog metric (count + age + bytes) (#23 M6)
This commit is contained in:
@@ -218,6 +218,14 @@ public static class ServiceCollectionExtensions
|
||||
ServiceDescriptor.Singleton<IAuditWriteFailureCounter, HealthMetricsAuditWriteFailureCounter>());
|
||||
services.Replace(
|
||||
ServiceDescriptor.Singleton<IAuditRedactionFailureCounter, HealthMetricsAuditRedactionFailureCounter>());
|
||||
// M6 Bundle E (T6): the site-side backlog reporter polls the
|
||||
// SqliteAuditWriter every 30 s and pushes the snapshot into the
|
||||
// collector so the next SiteHealthReport carries a fresh
|
||||
// SiteAuditBacklog field. Registered alongside the other site-only
|
||||
// metric bridges so AddAuditLog (which runs on central too) stays
|
||||
// free of hosted-service registrations that would resolve a missing
|
||||
// ISiteHealthCollector on central.
|
||||
services.AddHostedService<SiteAuditBacklogReporter>();
|
||||
return services;
|
||||
}
|
||||
|
||||
|
||||
133
src/ScadaLink.AuditLog/Site/SiteAuditBacklogReporter.cs
Normal file
133
src/ScadaLink.AuditLog/Site/SiteAuditBacklogReporter.cs
Normal file
@@ -0,0 +1,133 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
|
||||
namespace ScadaLink.AuditLog.Site;
|
||||
|
||||
/// <summary>
|
||||
/// Audit Log (#23) M6 Bundle E (T6) — site-side hosted service that
|
||||
/// periodically pulls a backlog snapshot from <see cref="ISiteAuditQueue"/>
|
||||
/// and pushes it into <see cref="ISiteHealthCollector"/> so the next
|
||||
/// <see cref="ISiteHealthCollector.CollectReport"/> emits a fresh
|
||||
/// <c>SiteAuditBacklog</c> field on the site health report.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <b>Why a hosted service, not the report sender.</b> Querying SQLite for the
|
||||
/// backlog requires the queue's write lock; doing it inline in
|
||||
/// <see cref="ISiteHealthCollector.CollectReport"/> would couple the collector
|
||||
/// to <see cref="ISiteAuditQueue"/> and turn an in-memory snapshot read into
|
||||
/// a synchronous I/O call on the report path. The hosted-service pattern keeps
|
||||
/// the report path pure and the SQL probe off the report timing budget.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Cadence.</b> 30 s by default — coarse enough to amortise the SQL probe
|
||||
/// across many reports, fine enough that the central dashboard never lags by
|
||||
/// more than one health-report interval. Tunable via
|
||||
/// <see cref="ScadaLink.AuditLog.Site.SqliteAuditWriterOptions"/> in a follow-up
|
||||
/// if ops needs a different cadence; for M6 we hard-code the value because the
|
||||
/// brief calls it out explicitly.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Failure containment.</b> The probe call is wrapped in a try/catch so a
|
||||
/// transient SQLite error never tears down the hosted service — the next tick
|
||||
/// retries. Mirrors <see cref="ScadaLink.AuditLog.Central.AuditLogPartitionMaintenanceService"/>'s
|
||||
/// "exception logged, not propagated" contract.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class SiteAuditBacklogReporter : IHostedService, IDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Default poll cadence. Half a typical 60 s health-report interval keeps
|
||||
/// the snapshot fresh without spinning the SQL probe more often than
|
||||
/// necessary.
|
||||
/// </summary>
|
||||
internal static readonly TimeSpan DefaultRefreshInterval = TimeSpan.FromSeconds(30);
|
||||
|
||||
private readonly ISiteAuditQueue _queue;
|
||||
private readonly ISiteHealthCollector _collector;
|
||||
private readonly ILogger<SiteAuditBacklogReporter> _logger;
|
||||
private readonly TimeSpan _refreshInterval;
|
||||
private CancellationTokenSource? _cts;
|
||||
private Task? _loop;
|
||||
|
||||
public SiteAuditBacklogReporter(
|
||||
ISiteAuditQueue queue,
|
||||
ISiteHealthCollector collector,
|
||||
ILogger<SiteAuditBacklogReporter> logger,
|
||||
TimeSpan? refreshInterval = null)
|
||||
{
|
||||
_queue = queue ?? throw new ArgumentNullException(nameof(queue));
|
||||
_collector = collector ?? throw new ArgumentNullException(nameof(collector));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_refreshInterval = refreshInterval ?? DefaultRefreshInterval;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task StartAsync(CancellationToken ct)
|
||||
{
|
||||
// Linked CTS lets StopAsync's cancellation AND the host's shutdown
|
||||
// token both terminate the loop; either side firing aborts the
|
||||
// pending Task.Delay.
|
||||
_cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
_loop = Task.Run(() => RunLoopAsync(_cts.Token));
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private async Task RunLoopAsync(CancellationToken ct)
|
||||
{
|
||||
// First tick runs immediately so the very first health report after
|
||||
// process start carries a real backlog snapshot — without this the
|
||||
// dashboard would show null for the first 30 s after a deploy.
|
||||
await SafeProbeAsync(ct).ConfigureAwait(false);
|
||||
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await Task.Delay(_refreshInterval, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
await SafeProbeAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task SafeProbeAsync(CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
var snapshot = await _queue.GetBacklogStatsAsync(ct).ConfigureAwait(false);
|
||||
_collector.UpdateSiteAuditBacklog(snapshot);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Shutdown — let the outer loop exit cleanly.
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Catch-all is deliberate: the hosted service must survive every
|
||||
// class of probe failure (transient SQLite lock contention, disk
|
||||
// I/O hiccup, …) so the next tick gets a chance.
|
||||
_logger.LogWarning(ex, "SiteAuditBacklogReporter probe failed; next tick will retry.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task StopAsync(CancellationToken ct)
|
||||
{
|
||||
_cts?.Cancel();
|
||||
return _loop ?? Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Dispose()
|
||||
{
|
||||
_cts?.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@ using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.AuditLog.Site;
|
||||
@@ -484,6 +485,84 @@ public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// M6 Bundle E (T6) health-metric surface: returns a point-in-time snapshot
|
||||
/// of the site queue's pending count, the oldest pending row's
|
||||
/// <see cref="AuditEvent.OccurredAtUtc"/>, and the on-disk file size. Called
|
||||
/// by the site-side <c>SiteAuditBacklogReporter</c> hosted service on its
|
||||
/// 30 s tick to refresh the <c>SiteHealthReport.SiteAuditBacklog</c> field.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The pending-count + oldest-row queries run inside the same write lock as
|
||||
/// the hot-path INSERT batch so the snapshot is consistent against the
|
||||
/// connection's view (no torn read of an in-flight transaction). The on-disk
|
||||
/// size lookup happens OUTSIDE the lock — it's a stat() call on the file
|
||||
/// path and doesn't touch the connection. In-memory and missing files
|
||||
/// return 0 bytes (the snapshot is for ops dashboards, not a correctness
|
||||
/// invariant).
|
||||
/// </remarks>
|
||||
public Task<SiteAuditBacklogSnapshot> GetBacklogStatsAsync(CancellationToken ct = default)
|
||||
{
|
||||
int pendingCount;
|
||||
DateTime? oldestPending;
|
||||
|
||||
lock (_writeLock)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
// Single round-trip — COUNT(*) + MIN(OccurredAtUtc) over the same
|
||||
// index range avoids a second scan. The IX_SiteAuditLog_ForwardState_Occurred
|
||||
// index makes both aggregates cheap (count is a covering scan, min
|
||||
// is the first key).
|
||||
using var cmd = _connection.CreateCommand();
|
||||
cmd.CommandText = """
|
||||
SELECT COUNT(*), MIN(OccurredAtUtc)
|
||||
FROM AuditLog
|
||||
WHERE ForwardState = $pending;
|
||||
""";
|
||||
cmd.Parameters.AddWithValue("$pending", AuditForwardState.Pending.ToString());
|
||||
|
||||
using var reader = cmd.ExecuteReader();
|
||||
reader.Read();
|
||||
pendingCount = reader.GetInt32(0);
|
||||
oldestPending = reader.IsDBNull(1)
|
||||
? null
|
||||
: DateTime.Parse(reader.GetString(1),
|
||||
System.Globalization.CultureInfo.InvariantCulture,
|
||||
System.Globalization.DateTimeStyles.RoundtripKind);
|
||||
}
|
||||
|
||||
// File-size lookup outside the lock — the DatabasePath option is the
|
||||
// canonical source. The connection-string-override branch (used by
|
||||
// some tests) keeps the same DatabasePath value, so this works
|
||||
// uniformly. In-memory / mode=memory paths return 0 because the file
|
||||
// doesn't exist on disk.
|
||||
long onDiskBytes = 0;
|
||||
try
|
||||
{
|
||||
if (!string.IsNullOrEmpty(_options.DatabasePath) &&
|
||||
!_options.DatabasePath.StartsWith(":memory:", StringComparison.Ordinal) &&
|
||||
!_options.DatabasePath.Contains("mode=memory", StringComparison.OrdinalIgnoreCase) &&
|
||||
File.Exists(_options.DatabasePath))
|
||||
{
|
||||
onDiskBytes = new FileInfo(_options.DatabasePath).Length;
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// File system probe is a best-effort health-metric — never abort
|
||||
// a backlog snapshot because stat() failed. Log and report 0.
|
||||
_logger.LogDebug(ex,
|
||||
"SqliteAuditWriter could not stat DB path {Path} for backlog snapshot.",
|
||||
_options.DatabasePath);
|
||||
}
|
||||
|
||||
return Task.FromResult(new SiteAuditBacklogSnapshot(
|
||||
PendingCount: pendingCount,
|
||||
OldestPendingUtc: oldestPending,
|
||||
OnDiskBytes: onDiskBytes));
|
||||
}
|
||||
|
||||
private static DateTime EnsureUtc(DateTime value) =>
|
||||
value.Kind == DateTimeKind.Utc
|
||||
? value
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Types;
|
||||
|
||||
namespace ScadaLink.Commons.Interfaces.Services;
|
||||
|
||||
@@ -70,4 +71,17 @@ public interface ISiteAuditQueue
|
||||
/// are left untouched (idempotent re-call). Non-existent ids are silent no-ops.
|
||||
/// </summary>
|
||||
Task MarkReconciledAsync(IReadOnlyList<Guid> eventIds, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// M6 Bundle E (T6) health-metric surface: returns a point-in-time snapshot
|
||||
/// of the site queue's pending count + oldest pending timestamp + on-disk
|
||||
/// SQLite file size. Surfaced on
|
||||
/// <see cref="ScadaLink.Commons.Messages.Health.SiteHealthReport"/> as
|
||||
/// <c>SiteAuditBacklog</c> by the periodic <c>SiteAuditBacklogReporter</c>
|
||||
/// hosted service so a stuck site→central drain is visible on the central
|
||||
/// health dashboard. Safe to call concurrently with hot-path writes —
|
||||
/// implementations are expected to take the same connection lock used by
|
||||
/// the hot-path INSERT batch and the drain queries.
|
||||
/// </summary>
|
||||
Task<SiteAuditBacklogSnapshot> GetBacklogStatsAsync(CancellationToken ct = default);
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.Commons.Messages.Health;
|
||||
@@ -32,7 +33,14 @@ public record SiteHealthReport(
|
||||
// marker). Surfaces a misconfigured / catastrophic regex on
|
||||
// /monitoring/health. Defaults to 0 for back-compat with existing
|
||||
// producers and tests that don't construct the field.
|
||||
int AuditRedactionFailure = 0);
|
||||
int AuditRedactionFailure = 0,
|
||||
// Audit Log (#23) M6 Bundle E (T6): point-in-time snapshot of the
|
||||
// site-local SQLite audit-log queue (pending count, oldest pending row,
|
||||
// on-disk bytes). Populated by the site-side SiteAuditBacklogReporter
|
||||
// hosted service every 30 s. Defaults to null so existing producers /
|
||||
// tests that don't refresh the snapshot stay valid; the central health
|
||||
// surface treats null as "no data yet" rather than a zeroed queue.
|
||||
SiteAuditBacklogSnapshot? SiteAuditBacklog = null);
|
||||
|
||||
/// <summary>
|
||||
/// Broadcast wrapper used between central nodes to keep per-node
|
||||
|
||||
32
src/ScadaLink.Commons/Types/SiteAuditBacklogSnapshot.cs
Normal file
32
src/ScadaLink.Commons/Types/SiteAuditBacklogSnapshot.cs
Normal file
@@ -0,0 +1,32 @@
|
||||
namespace ScadaLink.Commons.Types;
|
||||
|
||||
/// <summary>
|
||||
/// Audit Log (#23) M6 Bundle E (T6) — point-in-time snapshot of the site-local
|
||||
/// SQLite audit-log queue health, surfaced on
|
||||
/// <see cref="ScadaLink.Commons.Messages.Health.SiteHealthReport"/> as
|
||||
/// <c>SiteAuditBacklog</c> and refreshed periodically by the
|
||||
/// <c>SiteAuditBacklogReporter</c> hosted service.
|
||||
/// </summary>
|
||||
/// <param name="PendingCount">
|
||||
/// Number of rows currently in
|
||||
/// <see cref="ScadaLink.Commons.Types.Enums.AuditForwardState.Pending"/> — i.e.
|
||||
/// not yet acknowledged by central via either the push-telemetry or
|
||||
/// reconciliation-pull paths. A persistently non-zero value with rising
|
||||
/// <see cref="OldestPendingUtc"/> indicates the site→central drain isn't
|
||||
/// keeping up.
|
||||
/// </param>
|
||||
/// <param name="OldestPendingUtc">
|
||||
/// <see cref="ScadaLink.Commons.Entities.Audit.AuditEvent.OccurredAtUtc"/> of
|
||||
/// the oldest Pending row, or <c>null</c> if the queue is empty. Used by ops
|
||||
/// to compute backlog age without a separate query.
|
||||
/// </param>
|
||||
/// <param name="OnDiskBytes">
|
||||
/// Size of the SQLite file on disk in bytes, or <c>0</c> if the writer is
|
||||
/// running against an in-memory database. Mirrors the 7-day retention
|
||||
/// invariant (alog.md §10) — a steady file-size growth past the retention
|
||||
/// window points at a stuck purge or a stuck forwarder.
|
||||
/// </param>
|
||||
public sealed record SiteAuditBacklogSnapshot(
|
||||
int PendingCount,
|
||||
DateTime? OldestPendingUtc,
|
||||
long OnDiskBytes);
|
||||
@@ -1,4 +1,5 @@
|
||||
using ScadaLink.Commons.Messages.Health;
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.HealthMonitoring;
|
||||
@@ -28,6 +29,15 @@ public interface ISiteHealthCollector
|
||||
/// <c>AddAuditLogHealthMetricsBridge()</c>.
|
||||
/// </summary>
|
||||
void IncrementAuditRedactionFailure();
|
||||
/// <summary>
|
||||
/// Audit Log (#23) M6 Bundle E (T6) — replace the latest site-local
|
||||
/// audit-queue backlog snapshot (pending count, oldest pending row,
|
||||
/// on-disk file bytes) used by the next <see cref="CollectReport"/> call.
|
||||
/// Refreshed periodically by the <c>SiteAuditBacklogReporter</c> hosted
|
||||
/// service so each report carries a recent point-in-time view of the
|
||||
/// site→central drain health.
|
||||
/// </summary>
|
||||
void UpdateSiteAuditBacklog(SiteAuditBacklogSnapshot snapshot);
|
||||
void UpdateConnectionHealth(string connectionName, ConnectionHealth health);
|
||||
void RemoveConnection(string connectionName);
|
||||
void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
using System.Collections.Concurrent;
|
||||
using ScadaLink.Commons.Messages.Health;
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.HealthMonitoring;
|
||||
@@ -15,6 +16,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
||||
private int _deadLetterCount;
|
||||
private int _siteAuditWriteFailures;
|
||||
private int _auditRedactionFailures;
|
||||
private volatile SiteAuditBacklogSnapshot? _siteAuditBacklog;
|
||||
private readonly ConcurrentDictionary<string, ConnectionHealth> _connectionStatuses = new();
|
||||
private readonly ConcurrentDictionary<string, TagResolutionStatus> _tagResolutionCounts = new();
|
||||
private readonly ConcurrentDictionary<string, string> _connectionEndpoints = new();
|
||||
@@ -89,6 +91,18 @@ public class SiteHealthCollector : ISiteHealthCollector
|
||||
Interlocked.Increment(ref _auditRedactionFailures);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Audit Log (#23) M6 Bundle E (T6) — replace the latest backlog snapshot
|
||||
/// from the site SQLite writer. The field is a single reference write
|
||||
/// (volatile) so the next <see cref="CollectReport"/> sees the most recent
|
||||
/// snapshot — there is no count to reset, the report just carries forward
|
||||
/// whatever was last refreshed.
|
||||
/// </summary>
|
||||
public void UpdateSiteAuditBacklog(SiteAuditBacklogSnapshot snapshot)
|
||||
{
|
||||
_siteAuditBacklog = snapshot ?? throw new ArgumentNullException(nameof(snapshot));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Update the health status for a named data connection.
|
||||
/// Called by DCL when connection state changes.
|
||||
@@ -207,6 +221,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
||||
ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0),
|
||||
ClusterNodes: _clusterNodes?.ToList(),
|
||||
SiteAuditWriteFailures: siteAuditWriteFailures,
|
||||
AuditRedactionFailure: auditRedactionFailures);
|
||||
AuditRedactionFailure: auditRedactionFailures,
|
||||
SiteAuditBacklog: _siteAuditBacklog);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.AuditLog.Site;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.AuditLog.Tests.Site;
|
||||
|
||||
/// <summary>
|
||||
/// Bundle E (M6-T6) tests for <see cref="SqliteAuditWriter.GetBacklogStatsAsync"/>.
|
||||
/// Exercises the health-metric surface that <c>SiteAuditBacklogReporter</c>
|
||||
/// polls every 30 s and pushes onto the site health report as
|
||||
/// <c>SiteAuditBacklog</c>.
|
||||
/// </summary>
|
||||
public class SqliteAuditWriterBacklogStatsTests : IDisposable
|
||||
{
|
||||
private readonly string _dbPath;
|
||||
|
||||
public SqliteAuditWriterBacklogStatsTests()
|
||||
{
|
||||
// OnDiskBytes assertions only make sense against a real file — the
|
||||
// shared-cache in-memory mode returns 0 for the file size, so this
|
||||
// suite is opinionated about file-backed storage. Tests in
|
||||
// SqliteAuditWriterWriteTests use in-memory for performance reasons.
|
||||
_dbPath = Path.Combine(Path.GetTempPath(),
|
||||
$"audit-backlog-stats-{Guid.NewGuid():N}.db");
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (File.Exists(_dbPath))
|
||||
{
|
||||
try { File.Delete(_dbPath); } catch { /* test cleanup best-effort */ }
|
||||
}
|
||||
}
|
||||
|
||||
private SqliteAuditWriter CreateWriter()
|
||||
{
|
||||
var options = new SqliteAuditWriterOptions { DatabasePath = _dbPath };
|
||||
return new SqliteAuditWriter(
|
||||
Options.Create(options),
|
||||
NullLogger<SqliteAuditWriter>.Instance);
|
||||
}
|
||||
|
||||
private static AuditEvent NewEvent(DateTime? occurredAtUtc = null) => new()
|
||||
{
|
||||
EventId = Guid.NewGuid(),
|
||||
OccurredAtUtc = occurredAtUtc ?? DateTime.UtcNow,
|
||||
Channel = AuditChannel.ApiOutbound,
|
||||
Kind = AuditKind.ApiCall,
|
||||
Status = AuditStatus.Delivered,
|
||||
PayloadTruncated = false,
|
||||
};
|
||||
|
||||
[Fact]
|
||||
public async Task EmptyDb_Returns_Zero_Null_AndZeroBytes()
|
||||
{
|
||||
// No file exists yet — the writer ctor creates one but no rows are
|
||||
// inserted; the snapshot should report a clean queue. OnDiskBytes is
|
||||
// allowed to be zero (fresh ftruncate) OR small (page header) — the
|
||||
// contract only requires non-negative; we assert >= 0 and exercise
|
||||
// the pending fields strictly.
|
||||
await using var writer = CreateWriter();
|
||||
|
||||
var snapshot = await writer.GetBacklogStatsAsync();
|
||||
|
||||
Assert.Equal(0, snapshot.PendingCount);
|
||||
Assert.Null(snapshot.OldestPendingUtc);
|
||||
Assert.True(snapshot.OnDiskBytes >= 0,
|
||||
$"OnDiskBytes must be non-negative, got {snapshot.OnDiskBytes}");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Pending_5_Returns_5()
|
||||
{
|
||||
await using var writer = CreateWriter();
|
||||
|
||||
for (var i = 0; i < 5; i++)
|
||||
{
|
||||
await writer.WriteAsync(NewEvent());
|
||||
}
|
||||
|
||||
var snapshot = await writer.GetBacklogStatsAsync();
|
||||
|
||||
Assert.Equal(5, snapshot.PendingCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task OldestPending_Is_Earliest_OccurredAtUtc()
|
||||
{
|
||||
await using var writer = CreateWriter();
|
||||
|
||||
var t1 = new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc);
|
||||
var t2 = new DateTime(2026, 5, 20, 10, 1, 0, DateTimeKind.Utc);
|
||||
var t3 = new DateTime(2026, 5, 20, 10, 2, 0, DateTimeKind.Utc);
|
||||
|
||||
// Insert out of order so the snapshot is not "the last write" by
|
||||
// accident — the OldestPendingUtc must come from a column-min, not
|
||||
// an insertion-order proxy.
|
||||
await writer.WriteAsync(NewEvent(t2));
|
||||
await writer.WriteAsync(NewEvent(t1));
|
||||
await writer.WriteAsync(NewEvent(t3));
|
||||
|
||||
var snapshot = await writer.GetBacklogStatsAsync();
|
||||
|
||||
Assert.Equal(3, snapshot.PendingCount);
|
||||
Assert.NotNull(snapshot.OldestPendingUtc);
|
||||
// The DB round-trips OccurredAtUtc through the "o" format which
|
||||
// preserves Kind=Utc — assert tick-equality.
|
||||
Assert.Equal(t1, snapshot.OldestPendingUtc!.Value);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task OnDiskBytes_ReturnsFileSize()
|
||||
{
|
||||
await using var writer = CreateWriter();
|
||||
|
||||
// Insert enough rows to grow the file past the empty schema baseline.
|
||||
for (var i = 0; i < 100; i++)
|
||||
{
|
||||
await writer.WriteAsync(NewEvent());
|
||||
}
|
||||
|
||||
var snapshot = await writer.GetBacklogStatsAsync();
|
||||
|
||||
// The exact size depends on SQLite page allocation, but a file-backed
|
||||
// db with 100 inserted rows MUST be larger than the empty schema
|
||||
// (a few pages, ~4 KB). The implementation should return the
|
||||
// FileInfo.Length value verbatim.
|
||||
Assert.True(File.Exists(_dbPath), $"DB file should exist at {_dbPath}");
|
||||
var expected = new FileInfo(_dbPath).Length;
|
||||
Assert.Equal(expected, snapshot.OnDiskBytes);
|
||||
Assert.True(snapshot.OnDiskBytes > 0,
|
||||
$"after 100 inserts OnDiskBytes must be > 0, got {snapshot.OnDiskBytes}");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
using ScadaLink.Commons.Types;
|
||||
|
||||
namespace ScadaLink.HealthMonitoring.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Bundle E (M6-T6) regression coverage. The site-side audit-log SQLite writer
|
||||
/// exposes a backlog snapshot (<c>SiteAuditBacklogSnapshot</c>) via the
|
||||
/// <c>ISiteAuditQueue.GetBacklogStatsAsync</c> surface. A periodic
|
||||
/// <c>SiteAuditBacklogReporter</c> hosted service polls that snapshot and
|
||||
/// pushes it into the collector via <see cref="ISiteHealthCollector.UpdateSiteAuditBacklog"/>
|
||||
/// so the next <see cref="ISiteHealthCollector.CollectReport"/> includes it in
|
||||
/// the report payload as <c>SiteAuditBacklog</c>. Unlike the
|
||||
/// SiteAuditWriteFailures / AuditRedactionFailure interval counters, the
|
||||
/// backlog snapshot is not reset on collect — the field carries forward
|
||||
/// whatever the most recent refresh pushed in.
|
||||
/// </summary>
|
||||
public class SiteAuditBacklogMetricTests
|
||||
{
|
||||
private readonly SiteHealthCollector _collector = new();
|
||||
|
||||
[Fact]
|
||||
public void Update_Then_CollectReport_IncludesBacklog()
|
||||
{
|
||||
var snapshot = new SiteAuditBacklogSnapshot(
|
||||
PendingCount: 42,
|
||||
OldestPendingUtc: new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc),
|
||||
OnDiskBytes: 1234567);
|
||||
|
||||
_collector.UpdateSiteAuditBacklog(snapshot);
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(snapshot, report.SiteAuditBacklog);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Report_Payload_Includes_SiteAuditBacklog_AsNullByDefault()
|
||||
{
|
||||
// No refresh has been pushed yet — the report carries null so the
|
||||
// central UI can distinguish "no data yet" from "queue empty".
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Null(report.SiteAuditBacklog);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CollectReport_DoesNotReset_SiteAuditBacklog()
|
||||
{
|
||||
// Backlog snapshot is a point-in-time reading, not a per-interval
|
||||
// counter — successive CollectReport calls before the next
|
||||
// SiteAuditBacklogReporter tick MUST keep returning the same snapshot
|
||||
// so a slow refresh cadence doesn't blank the central dashboard.
|
||||
var snapshot = new SiteAuditBacklogSnapshot(
|
||||
PendingCount: 7,
|
||||
OldestPendingUtc: null,
|
||||
OnDiskBytes: 8192);
|
||||
|
||||
_collector.UpdateSiteAuditBacklog(snapshot);
|
||||
|
||||
var first = _collector.CollectReport("site-1");
|
||||
var second = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(snapshot, first.SiteAuditBacklog);
|
||||
Assert.Equal(snapshot, second.SiteAuditBacklog);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Update_With_Null_Throws_ArgumentNullException()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(
|
||||
() => _collector.UpdateSiteAuditBacklog(null!));
|
||||
}
|
||||
}
|
||||
@@ -71,6 +71,7 @@ public class DeploymentManagerRedeployTests : TestKit, IDisposable
|
||||
public void IncrementDeadLetter() { }
|
||||
public void IncrementSiteAuditWriteFailures() { }
|
||||
public void IncrementAuditRedactionFailure() { }
|
||||
public void UpdateSiteAuditBacklog(ScadaLink.Commons.Types.SiteAuditBacklogSnapshot snapshot) { }
|
||||
public void UpdateConnectionHealth(string connectionName, ConnectionHealth health) { }
|
||||
public void RemoveConnection(string connectionName) { }
|
||||
public void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved) { }
|
||||
|
||||
Reference in New Issue
Block a user