feat(health): SiteAuditWriteFailures counter + AuditLog bridge (#23)
Bundle G of Audit Log #23 M2. Bridges the FallbackAuditWriter primary- failure counter into the Site Health Monitoring report payload so a sustained audit-write outage surfaces on /monitoring/health instead of disappearing into a NoOp sink. - SiteHealthReport: add SiteAuditWriteFailures (defaulted, additive). - ISiteHealthCollector + SiteHealthCollector: new IncrementSiteAuditWriteFailures() counter, per-interval reset semantics matching ScriptErrorCount / DeadLetterCount. - HealthMetricsAuditWriteFailureCounter: adapter forwarding IAuditWriteFailureCounter.Increment() to the collector. - AddAuditLogHealthMetricsBridge(): swaps the NoOp default registration for the real bridge; called from SiteServiceRegistration after AddSiteHealthMonitoring + AddAuditLog. - Existing host-wiring test updated: site composition now resolves HealthMetricsAuditWriteFailureCounter (not NoOp). Tests: HealthMonitoring 60 -> 63 (3 new), AuditLog 56 -> 59 (3 new), full solution green.
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.AuditLog.Configuration;
|
||||
@@ -103,4 +104,36 @@ public static class ServiceCollectionExtensions
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Audit Log (#23) M2 Bundle G — swap the default
|
||||
/// <see cref="NoOpAuditWriteFailureCounter"/> registration for the real
|
||||
/// <see cref="HealthMetricsAuditWriteFailureCounter"/> bridge so the
|
||||
/// FallbackAuditWriter primary-failure counter surfaces in the site health
|
||||
/// report payload as <c>SiteHealthReport.SiteAuditWriteFailures</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Must be called AFTER both <see cref="AddAuditLog"/> (registers the
|
||||
/// NoOp default this method replaces) and
|
||||
/// <c>ScadaLink.HealthMonitoring.ServiceCollectionExtensions.AddHealthMonitoring</c>
|
||||
/// or <c>AddSiteHealthMonitoring</c> (registers the
|
||||
/// <see cref="ISiteHealthCollector"/> the bridge depends on). Resolving
|
||||
/// <see cref="IAuditWriteFailureCounter"/> without the latter throws
|
||||
/// <see cref="InvalidOperationException"/> at <c>GetRequiredService</c>
|
||||
/// time — by design, since a silent NoOp would mask a misconfiguration.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Idempotent — calling twice replaces the descriptor each time without
|
||||
/// piling up registrations.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public static IServiceCollection AddAuditLogHealthMetricsBridge(this IServiceCollection services)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
|
||||
services.Replace(
|
||||
ServiceDescriptor.Singleton<IAuditWriteFailureCounter, HealthMetricsAuditWriteFailureCounter>());
|
||||
return services;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
using ScadaLink.HealthMonitoring;
|
||||
|
||||
namespace ScadaLink.AuditLog.Site;
|
||||
|
||||
/// <summary>
|
||||
/// Audit Log (#23) M2 Bundle G — bridges <see cref="IAuditWriteFailureCounter"/>
|
||||
/// (incremented by <see cref="FallbackAuditWriter"/> every time the primary
|
||||
/// SQLite writer throws) into <see cref="ISiteHealthCollector"/> so the count
|
||||
/// surfaces in the site health report payload as
|
||||
/// <c>SiteHealthReport.SiteAuditWriteFailures</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Registered by <see cref="ServiceCollectionExtensions.AddAuditLogHealthMetricsBridge"/>;
|
||||
/// callers must register <c>AddHealthMonitoring()</c> first so
|
||||
/// <see cref="ISiteHealthCollector"/> resolves. The default <see cref="AddAuditLog"/>
|
||||
/// registration keeps <see cref="NoOpAuditWriteFailureCounter"/> for nodes
|
||||
/// where Site Health Monitoring is not wired (the silent-sink contract — audit
|
||||
/// write failures must NEVER abort the user-facing action, alog.md §7).
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class HealthMetricsAuditWriteFailureCounter : IAuditWriteFailureCounter
|
||||
{
|
||||
private readonly ISiteHealthCollector _collector;
|
||||
|
||||
public HealthMetricsAuditWriteFailureCounter(ISiteHealthCollector collector)
|
||||
{
|
||||
_collector = collector ?? throw new ArgumentNullException(nameof(collector));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public void Increment() => _collector.IncrementSiteAuditWriteFailures();
|
||||
}
|
||||
@@ -20,7 +20,12 @@ public record SiteHealthReport(
|
||||
IReadOnlyDictionary<string, string>? DataConnectionEndpoints = null,
|
||||
IReadOnlyDictionary<string, TagQualityCounts>? DataConnectionTagQuality = null,
|
||||
int ParkedMessageCount = 0,
|
||||
IReadOnlyList<NodeStatus>? ClusterNodes = null);
|
||||
IReadOnlyList<NodeStatus>? ClusterNodes = null,
|
||||
// Audit Log (#23) M2 Bundle G: per-interval count of FallbackAuditWriter
|
||||
// primary failures (SQLite throws routed to the drop-oldest ring). Surfaces
|
||||
// a sustained audit-write outage on /monitoring/health. Defaults to 0 so
|
||||
// existing producers / tests that don't construct the field stay valid.
|
||||
int SiteAuditWriteFailures = 0);
|
||||
|
||||
/// <summary>
|
||||
/// Broadcast wrapper used between central nodes to keep per-node
|
||||
|
||||
@@ -12,6 +12,13 @@ public interface ISiteHealthCollector
|
||||
void IncrementScriptError();
|
||||
void IncrementAlarmError();
|
||||
void IncrementDeadLetter();
|
||||
/// <summary>
|
||||
/// Audit Log (#23) Bundle G — increment the per-interval count of
|
||||
/// <c>FallbackAuditWriter</c> primary failures. Bridged from the
|
||||
/// <c>IAuditWriteFailureCounter</c> binding registered via
|
||||
/// <c>AddAuditLogHealthMetricsBridge()</c>.
|
||||
/// </summary>
|
||||
void IncrementSiteAuditWriteFailures();
|
||||
void UpdateConnectionHealth(string connectionName, ConnectionHealth health);
|
||||
void RemoveConnection(string connectionName);
|
||||
void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved);
|
||||
|
||||
@@ -13,6 +13,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
||||
private int _scriptErrorCount;
|
||||
private int _alarmErrorCount;
|
||||
private int _deadLetterCount;
|
||||
private int _siteAuditWriteFailures;
|
||||
private readonly ConcurrentDictionary<string, ConnectionHealth> _connectionStatuses = new();
|
||||
private readonly ConcurrentDictionary<string, TagResolutionStatus> _tagResolutionCounts = new();
|
||||
private readonly ConcurrentDictionary<string, string> _connectionEndpoints = new();
|
||||
@@ -61,6 +62,18 @@ public class SiteHealthCollector : ISiteHealthCollector
|
||||
Interlocked.Increment(ref _deadLetterCount);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Audit Log (#23) Bundle G — increment the per-interval count of
|
||||
/// <c>FallbackAuditWriter</c> primary failures. Bridged from the
|
||||
/// <c>IAuditWriteFailureCounter</c> binding registered via
|
||||
/// <c>AddAuditLogHealthMetricsBridge()</c>; reset every interval together
|
||||
/// with the other per-interval counters.
|
||||
/// </summary>
|
||||
public void IncrementSiteAuditWriteFailures()
|
||||
{
|
||||
Interlocked.Increment(ref _siteAuditWriteFailures);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Update the health status for a named data connection.
|
||||
/// Called by DCL when connection state changes.
|
||||
@@ -144,6 +157,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
||||
var scriptErrors = Interlocked.Exchange(ref _scriptErrorCount, 0);
|
||||
var alarmErrors = Interlocked.Exchange(ref _alarmErrorCount, 0);
|
||||
var deadLetters = Interlocked.Exchange(ref _deadLetterCount, 0);
|
||||
var siteAuditWriteFailures = Interlocked.Exchange(ref _siteAuditWriteFailures, 0);
|
||||
|
||||
// Snapshot current connection and tag resolution state
|
||||
var connectionStatuses = new Dictionary<string, ConnectionHealth>(_connectionStatuses);
|
||||
@@ -175,6 +189,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
||||
DataConnectionEndpoints: connectionEndpoints,
|
||||
DataConnectionTagQuality: tagQuality,
|
||||
ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0),
|
||||
ClusterNodes: _clusterNodes?.ToList());
|
||||
ClusterNodes: _clusterNodes?.ToList(),
|
||||
SiteAuditWriteFailures: siteAuditWriteFailures);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,6 +51,13 @@ public static class SiteServiceRegistration
|
||||
// ScriptRuntimeContext, when Bundle F lands) reaches for.
|
||||
services.AddAuditLog(config);
|
||||
|
||||
// Audit Log (#23) M2 Bundle G — bridge FallbackAuditWriter primary
|
||||
// failures into the site health report payload as
|
||||
// SiteAuditWriteFailures. Must come AFTER both AddSiteHealthMonitoring
|
||||
// (registers ISiteHealthCollector) and AddAuditLog (registers the
|
||||
// NoOp default this call replaces).
|
||||
services.AddAuditLogHealthMetricsBridge();
|
||||
|
||||
// WP-13: Akka.NET bootstrap via hosted service
|
||||
services.AddSingleton<AkkaHostedService>();
|
||||
services.AddHostedService(sp => sp.GetRequiredService<AkkaHostedService>());
|
||||
|
||||
@@ -7,6 +7,7 @@ using ScadaLink.AuditLog.Configuration;
|
||||
using ScadaLink.AuditLog.Site;
|
||||
using ScadaLink.AuditLog.Site.Telemetry;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
|
||||
namespace ScadaLink.AuditLog.Tests;
|
||||
|
||||
@@ -187,4 +188,56 @@ public class AddAuditLogTests
|
||||
Assert.Equal(3, opts.BusyIntervalSeconds);
|
||||
Assert.Equal(60, opts.IdleIntervalSeconds);
|
||||
}
|
||||
|
||||
// -- Bundle G (M2 Task G1) Site Health Monitoring bridge ----------------
|
||||
|
||||
[Fact]
|
||||
public void AddAuditLogHealthMetricsBridge_Swaps_FailureCounter_To_HealthMetrics_Implementation()
|
||||
{
|
||||
var config = new ConfigurationBuilder()
|
||||
.AddInMemoryCollection(new Dictionary<string, string?>
|
||||
{
|
||||
["AuditLog:SiteWriter:DatabasePath"] = ":memory:",
|
||||
})
|
||||
.Build();
|
||||
|
||||
var services = new ServiceCollection();
|
||||
services.AddSingleton<ILoggerFactory, NullLoggerFactory>();
|
||||
services.AddSingleton(typeof(ILogger<>), typeof(NullLogger<>));
|
||||
services.AddAuditLog(config);
|
||||
// The bridge depends on ISiteHealthCollector; AddHealthMonitoring is
|
||||
// what registers it on the site (and the central self-host).
|
||||
services.AddHealthMonitoring();
|
||||
services.AddAuditLogHealthMetricsBridge();
|
||||
using var provider = services.BuildServiceProvider();
|
||||
|
||||
var counter = provider.GetRequiredService<IAuditWriteFailureCounter>();
|
||||
|
||||
Assert.IsType<HealthMetricsAuditWriteFailureCounter>(counter);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AddAuditLogHealthMetricsBridge_Without_HealthMonitoring_Still_Resolves_But_Errors_On_Use()
|
||||
{
|
||||
// The bridge replaces the registration unconditionally; resolving the
|
||||
// counter when ISiteHealthCollector is missing throws at GetRequiredService
|
||||
// time. This documents the contract — callers must register
|
||||
// AddHealthMonitoring() before the bridge.
|
||||
var config = new ConfigurationBuilder()
|
||||
.AddInMemoryCollection(new Dictionary<string, string?>
|
||||
{
|
||||
["AuditLog:SiteWriter:DatabasePath"] = ":memory:",
|
||||
})
|
||||
.Build();
|
||||
|
||||
var services = new ServiceCollection();
|
||||
services.AddSingleton<ILoggerFactory, NullLoggerFactory>();
|
||||
services.AddSingleton(typeof(ILogger<>), typeof(NullLogger<>));
|
||||
services.AddAuditLog(config);
|
||||
services.AddAuditLogHealthMetricsBridge();
|
||||
using var provider = services.BuildServiceProvider();
|
||||
|
||||
Assert.Throws<InvalidOperationException>(
|
||||
() => provider.GetRequiredService<IAuditWriteFailureCounter>());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
using NSubstitute;
|
||||
using ScadaLink.AuditLog.Site;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
|
||||
namespace ScadaLink.AuditLog.Tests.Site;
|
||||
|
||||
/// <summary>
|
||||
/// Bundle G (M2-T11) — the <see cref="HealthMetricsAuditWriteFailureCounter"/>
|
||||
/// adapter is the production binding for <see cref="IAuditWriteFailureCounter"/>
|
||||
/// on site nodes; it forwards every FallbackAuditWriter primary failure into
|
||||
/// the shared <see cref="ISiteHealthCollector"/> so the site health report
|
||||
/// surfaces the failure count as <c>SiteAuditWriteFailures</c>.
|
||||
/// </summary>
|
||||
public class HealthMetricsAuditWriteFailureCounterTests
|
||||
{
|
||||
[Fact]
|
||||
public void Increment_Routes_To_Collector_IncrementSiteAuditWriteFailures()
|
||||
{
|
||||
var collector = Substitute.For<ISiteHealthCollector>();
|
||||
var counter = new HealthMetricsAuditWriteFailureCounter(collector);
|
||||
|
||||
counter.Increment();
|
||||
|
||||
collector.Received(1).IncrementSiteAuditWriteFailures();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Increment_Multiple_Calls_Route_To_Collector_Each_Time()
|
||||
{
|
||||
var collector = Substitute.For<ISiteHealthCollector>();
|
||||
var counter = new HealthMetricsAuditWriteFailureCounter(collector);
|
||||
|
||||
counter.Increment();
|
||||
counter.Increment();
|
||||
counter.Increment();
|
||||
|
||||
collector.Received(3).IncrementSiteAuditWriteFailures();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Construction_With_Null_Collector_Throws_ArgumentNullException()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(
|
||||
() => new HealthMetricsAuditWriteFailureCounter(null!));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
namespace ScadaLink.HealthMonitoring.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Bundle G (M2-T11) regression coverage. The site-side Audit Log writer chain
|
||||
/// (FallbackAuditWriter) increments <see cref="IAuditWriteFailureCounter"/>
|
||||
/// every time the primary SQLite writer throws. Bundle G bridges that counter
|
||||
/// into the Site Health Monitoring report payload as <c>SiteAuditWriteFailures</c>
|
||||
/// so a sustained audit-write outage surfaces on /monitoring/health rather than
|
||||
/// disappearing into a NoOp sink.
|
||||
/// </summary>
|
||||
public class SiteAuditWriteFailuresMetricTests
|
||||
{
|
||||
private readonly SiteHealthCollector _collector = new();
|
||||
|
||||
[Fact]
|
||||
public void Increment_Three_Times_Counter_Reports_3()
|
||||
{
|
||||
_collector.IncrementSiteAuditWriteFailures();
|
||||
_collector.IncrementSiteAuditWriteFailures();
|
||||
_collector.IncrementSiteAuditWriteFailures();
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(3, report.SiteAuditWriteFailures);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Report_Payload_Includes_SiteAuditWriteFailures_AsZeroByDefault()
|
||||
{
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(0, report.SiteAuditWriteFailures);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Mirrors the existing per-interval reset semantics for ScriptErrorCount /
|
||||
/// AlarmEvaluationErrorCount / DeadLetterCount — SiteAuditWriteFailures is an
|
||||
/// interval count, not a running total.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void CollectReport_Resets_SiteAuditWriteFailures()
|
||||
{
|
||||
_collector.IncrementSiteAuditWriteFailures();
|
||||
_collector.IncrementSiteAuditWriteFailures();
|
||||
|
||||
var first = _collector.CollectReport("site-1");
|
||||
Assert.Equal(2, first.SiteAuditWriteFailures);
|
||||
|
||||
var second = _collector.CollectReport("site-1");
|
||||
Assert.Equal(0, second.SiteAuditWriteFailures);
|
||||
}
|
||||
}
|
||||
@@ -274,11 +274,16 @@ public class SiteAuditWiringTests : IDisposable
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Site_Resolves_IAuditWriteFailureCounter_AsNoOpDefault()
|
||||
public void Site_Resolves_IAuditWriteFailureCounter_AsHealthMetricsBridge()
|
||||
{
|
||||
// Bundle G (M2-T11): site composition root calls
|
||||
// AddAuditLogHealthMetricsBridge() after AddAuditLog + AddSiteHealthMonitoring,
|
||||
// which swaps the NoOp default for the real health-metrics bridge so
|
||||
// FallbackAuditWriter primary failures surface in the site health
|
||||
// report payload as SiteAuditWriteFailures.
|
||||
var counter = _host.Services.GetService<IAuditWriteFailureCounter>();
|
||||
Assert.NotNull(counter);
|
||||
Assert.IsType<NoOpAuditWriteFailureCounter>(counter);
|
||||
Assert.IsType<HealthMetricsAuditWriteFailureCounter>(counter);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
|
||||
@@ -69,6 +69,7 @@ public class DeploymentManagerRedeployTests : TestKit, IDisposable
|
||||
public void IncrementScriptError() { }
|
||||
public void IncrementAlarmError() { }
|
||||
public void IncrementDeadLetter() { }
|
||||
public void IncrementSiteAuditWriteFailures() { }
|
||||
public void UpdateConnectionHealth(string connectionName, ConnectionHealth health) { }
|
||||
public void RemoveConnection(string connectionName) { }
|
||||
public void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved) { }
|
||||
|
||||
Reference in New Issue
Block a user