feat(health): SiteAuditWriteFailures counter + AuditLog bridge (#23)

Bundle G of Audit Log #23 M2. Bridges the FallbackAuditWriter primary-
failure counter into the Site Health Monitoring report payload so a
sustained audit-write outage surfaces on /monitoring/health instead of
disappearing into a NoOp sink.

- SiteHealthReport: add SiteAuditWriteFailures (defaulted, additive).
- ISiteHealthCollector + SiteHealthCollector: new
  IncrementSiteAuditWriteFailures() counter, per-interval reset
  semantics matching ScriptErrorCount / DeadLetterCount.
- HealthMetricsAuditWriteFailureCounter: adapter forwarding
  IAuditWriteFailureCounter.Increment() to the collector.
- AddAuditLogHealthMetricsBridge(): swaps the NoOp default
  registration for the real bridge; called from
  SiteServiceRegistration after AddSiteHealthMonitoring + AddAuditLog.
- Existing host-wiring test updated: site composition now resolves
  HealthMetricsAuditWriteFailureCounter (not NoOp).

Tests: HealthMonitoring 60 -> 63 (3 new), AuditLog 56 -> 59 (3 new),
full solution green.
This commit is contained in:
Joseph Doherty
2026-05-20 13:22:25 -04:00
parent 82a8bbf225
commit dd3351da93
11 changed files with 261 additions and 4 deletions

View File

@@ -7,6 +7,7 @@ using ScadaLink.AuditLog.Configuration;
using ScadaLink.AuditLog.Site;
using ScadaLink.AuditLog.Site.Telemetry;
using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.HealthMonitoring;
namespace ScadaLink.AuditLog.Tests;
@@ -187,4 +188,56 @@ public class AddAuditLogTests
Assert.Equal(3, opts.BusyIntervalSeconds);
Assert.Equal(60, opts.IdleIntervalSeconds);
}
// -- Bundle G (M2 Task G1) Site Health Monitoring bridge ----------------
[Fact]
public void AddAuditLogHealthMetricsBridge_Swaps_FailureCounter_To_HealthMetrics_Implementation()
{
var config = new ConfigurationBuilder()
.AddInMemoryCollection(new Dictionary<string, string?>
{
["AuditLog:SiteWriter:DatabasePath"] = ":memory:",
})
.Build();
var services = new ServiceCollection();
services.AddSingleton<ILoggerFactory, NullLoggerFactory>();
services.AddSingleton(typeof(ILogger<>), typeof(NullLogger<>));
services.AddAuditLog(config);
// The bridge depends on ISiteHealthCollector; AddHealthMonitoring is
// what registers it on the site (and the central self-host).
services.AddHealthMonitoring();
services.AddAuditLogHealthMetricsBridge();
using var provider = services.BuildServiceProvider();
var counter = provider.GetRequiredService<IAuditWriteFailureCounter>();
Assert.IsType<HealthMetricsAuditWriteFailureCounter>(counter);
}
[Fact]
public void AddAuditLogHealthMetricsBridge_Without_HealthMonitoring_Still_Resolves_But_Errors_On_Use()
{
// The bridge replaces the registration unconditionally; resolving the
// counter when ISiteHealthCollector is missing throws at GetRequiredService
// time. This documents the contract — callers must register
// AddHealthMonitoring() before the bridge.
var config = new ConfigurationBuilder()
.AddInMemoryCollection(new Dictionary<string, string?>
{
["AuditLog:SiteWriter:DatabasePath"] = ":memory:",
})
.Build();
var services = new ServiceCollection();
services.AddSingleton<ILoggerFactory, NullLoggerFactory>();
services.AddSingleton(typeof(ILogger<>), typeof(NullLogger<>));
services.AddAuditLog(config);
services.AddAuditLogHealthMetricsBridge();
using var provider = services.BuildServiceProvider();
Assert.Throws<InvalidOperationException>(
() => provider.GetRequiredService<IAuditWriteFailureCounter>());
}
}

View File

@@ -0,0 +1,46 @@
using NSubstitute;
using ScadaLink.AuditLog.Site;
using ScadaLink.HealthMonitoring;
namespace ScadaLink.AuditLog.Tests.Site;
/// <summary>
/// Bundle G (M2-T11) — the <see cref="HealthMetricsAuditWriteFailureCounter"/>
/// adapter is the production binding for <see cref="IAuditWriteFailureCounter"/>
/// on site nodes; it forwards every FallbackAuditWriter primary failure into
/// the shared <see cref="ISiteHealthCollector"/> so the site health report
/// surfaces the failure count as <c>SiteAuditWriteFailures</c>.
/// </summary>
public class HealthMetricsAuditWriteFailureCounterTests
{
[Fact]
public void Increment_Routes_To_Collector_IncrementSiteAuditWriteFailures()
{
var collector = Substitute.For<ISiteHealthCollector>();
var counter = new HealthMetricsAuditWriteFailureCounter(collector);
counter.Increment();
collector.Received(1).IncrementSiteAuditWriteFailures();
}
[Fact]
public void Increment_Multiple_Calls_Route_To_Collector_Each_Time()
{
var collector = Substitute.For<ISiteHealthCollector>();
var counter = new HealthMetricsAuditWriteFailureCounter(collector);
counter.Increment();
counter.Increment();
counter.Increment();
collector.Received(3).IncrementSiteAuditWriteFailures();
}
[Fact]
public void Construction_With_Null_Collector_Throws_ArgumentNullException()
{
Assert.Throws<ArgumentNullException>(
() => new HealthMetricsAuditWriteFailureCounter(null!));
}
}

View File

@@ -0,0 +1,52 @@
namespace ScadaLink.HealthMonitoring.Tests;
/// <summary>
/// Bundle G (M2-T11) regression coverage. The site-side Audit Log writer chain
/// (FallbackAuditWriter) increments <see cref="IAuditWriteFailureCounter"/>
/// every time the primary SQLite writer throws. Bundle G bridges that counter
/// into the Site Health Monitoring report payload as <c>SiteAuditWriteFailures</c>
/// so a sustained audit-write outage surfaces on /monitoring/health rather than
/// disappearing into a NoOp sink.
/// </summary>
public class SiteAuditWriteFailuresMetricTests
{
private readonly SiteHealthCollector _collector = new();
[Fact]
public void Increment_Three_Times_Counter_Reports_3()
{
_collector.IncrementSiteAuditWriteFailures();
_collector.IncrementSiteAuditWriteFailures();
_collector.IncrementSiteAuditWriteFailures();
var report = _collector.CollectReport("site-1");
Assert.Equal(3, report.SiteAuditWriteFailures);
}
[Fact]
public void Report_Payload_Includes_SiteAuditWriteFailures_AsZeroByDefault()
{
var report = _collector.CollectReport("site-1");
Assert.Equal(0, report.SiteAuditWriteFailures);
}
/// <summary>
/// Mirrors the existing per-interval reset semantics for ScriptErrorCount /
/// AlarmEvaluationErrorCount / DeadLetterCount — SiteAuditWriteFailures is an
/// interval count, not a running total.
/// </summary>
[Fact]
public void CollectReport_Resets_SiteAuditWriteFailures()
{
_collector.IncrementSiteAuditWriteFailures();
_collector.IncrementSiteAuditWriteFailures();
var first = _collector.CollectReport("site-1");
Assert.Equal(2, first.SiteAuditWriteFailures);
var second = _collector.CollectReport("site-1");
Assert.Equal(0, second.SiteAuditWriteFailures);
}
}

View File

@@ -274,11 +274,16 @@ public class SiteAuditWiringTests : IDisposable
}
[Fact]
public void Site_Resolves_IAuditWriteFailureCounter_AsNoOpDefault()
public void Site_Resolves_IAuditWriteFailureCounter_AsHealthMetricsBridge()
{
// Bundle G (M2-T11): site composition root calls
// AddAuditLogHealthMetricsBridge() after AddAuditLog + AddSiteHealthMonitoring,
// which swaps the NoOp default for the real health-metrics bridge so
// FallbackAuditWriter primary failures surface in the site health
// report payload as SiteAuditWriteFailures.
var counter = _host.Services.GetService<IAuditWriteFailureCounter>();
Assert.NotNull(counter);
Assert.IsType<NoOpAuditWriteFailureCounter>(counter);
Assert.IsType<HealthMetricsAuditWriteFailureCounter>(counter);
}
[Fact]

View File

@@ -69,6 +69,7 @@ public class DeploymentManagerRedeployTests : TestKit, IDisposable
public void IncrementScriptError() { }
public void IncrementAlarmError() { }
public void IncrementDeadLetter() { }
public void IncrementSiteAuditWriteFailures() { }
public void UpdateConnectionHealth(string connectionName, ConnectionHealth health) { }
public void RemoveConnection(string connectionName) { }
public void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved) { }