feat(health): SiteAuditBacklog metric (count + age + bytes) (#23 M6)
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Types;
|
||||
|
||||
namespace ScadaLink.Commons.Interfaces.Services;
|
||||
|
||||
@@ -70,4 +71,17 @@ public interface ISiteAuditQueue
|
||||
/// are left untouched (idempotent re-call). Non-existent ids are silent no-ops.
|
||||
/// </summary>
|
||||
Task MarkReconciledAsync(IReadOnlyList<Guid> eventIds, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// M6 Bundle E (T6) health-metric surface: returns a point-in-time snapshot
|
||||
/// of the site queue's pending count + oldest pending timestamp + on-disk
|
||||
/// SQLite file size. Surfaced on
|
||||
/// <see cref="ScadaLink.Commons.Messages.Health.SiteHealthReport"/> as
|
||||
/// <c>SiteAuditBacklog</c> by the periodic <c>SiteAuditBacklogReporter</c>
|
||||
/// hosted service so a stuck site→central drain is visible on the central
|
||||
/// health dashboard. Safe to call concurrently with hot-path writes —
|
||||
/// implementations are expected to take the same connection lock used by
|
||||
/// the hot-path INSERT batch and the drain queries.
|
||||
/// </summary>
|
||||
Task<SiteAuditBacklogSnapshot> GetBacklogStatsAsync(CancellationToken ct = default);
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.Commons.Messages.Health;
|
||||
@@ -32,7 +33,14 @@ public record SiteHealthReport(
|
||||
// marker). Surfaces a misconfigured / catastrophic regex on
|
||||
// /monitoring/health. Defaults to 0 for back-compat with existing
|
||||
// producers and tests that don't construct the field.
|
||||
int AuditRedactionFailure = 0);
|
||||
int AuditRedactionFailure = 0,
|
||||
// Audit Log (#23) M6 Bundle E (T6): point-in-time snapshot of the
|
||||
// site-local SQLite audit-log queue (pending count, oldest pending row,
|
||||
// on-disk bytes). Populated by the site-side SiteAuditBacklogReporter
|
||||
// hosted service every 30 s. Defaults to null so existing producers /
|
||||
// tests that don't refresh the snapshot stay valid; the central health
|
||||
// surface treats null as "no data yet" rather than a zeroed queue.
|
||||
SiteAuditBacklogSnapshot? SiteAuditBacklog = null);
|
||||
|
||||
/// <summary>
|
||||
/// Broadcast wrapper used between central nodes to keep per-node
|
||||
|
||||
32
src/ScadaLink.Commons/Types/SiteAuditBacklogSnapshot.cs
Normal file
32
src/ScadaLink.Commons/Types/SiteAuditBacklogSnapshot.cs
Normal file
@@ -0,0 +1,32 @@
|
||||
namespace ScadaLink.Commons.Types;
|
||||
|
||||
/// <summary>
|
||||
/// Audit Log (#23) M6 Bundle E (T6) — point-in-time snapshot of the site-local
|
||||
/// SQLite audit-log queue health, surfaced on
|
||||
/// <see cref="ScadaLink.Commons.Messages.Health.SiteHealthReport"/> as
|
||||
/// <c>SiteAuditBacklog</c> and refreshed periodically by the
|
||||
/// <c>SiteAuditBacklogReporter</c> hosted service.
|
||||
/// </summary>
|
||||
/// <param name="PendingCount">
|
||||
/// Number of rows currently in
|
||||
/// <see cref="ScadaLink.Commons.Types.Enums.AuditForwardState.Pending"/> — i.e.
|
||||
/// not yet acknowledged by central via either the push-telemetry or
|
||||
/// reconciliation-pull paths. A persistently non-zero value with rising
|
||||
/// <see cref="OldestPendingUtc"/> indicates the site→central drain isn't
|
||||
/// keeping up.
|
||||
/// </param>
|
||||
/// <param name="OldestPendingUtc">
|
||||
/// <see cref="ScadaLink.Commons.Entities.Audit.AuditEvent.OccurredAtUtc"/> of
|
||||
/// the oldest Pending row, or <c>null</c> if the queue is empty. Used by ops
|
||||
/// to compute backlog age without a separate query.
|
||||
/// </param>
|
||||
/// <param name="OnDiskBytes">
|
||||
/// Size of the SQLite file on disk in bytes, or <c>0</c> if the writer is
|
||||
/// running against an in-memory database. Mirrors the 7-day retention
|
||||
/// invariant (alog.md §10) — a steady file-size growth past the retention
|
||||
/// window points at a stuck purge or a stuck forwarder.
|
||||
/// </param>
|
||||
public sealed record SiteAuditBacklogSnapshot(
|
||||
int PendingCount,
|
||||
DateTime? OldestPendingUtc,
|
||||
long OnDiskBytes);
|
||||
Reference in New Issue
Block a user