feat(health): CentralAuditWriteFailures + AuditCentralHealthSnapshot (#23 M6)

This commit is contained in:
Joseph Doherty
2026-05-20 19:11:52 -04:00
parent 42333a72ed
commit 70ed8d4557
8 changed files with 398 additions and 2 deletions

View File

@@ -155,6 +155,13 @@ public static class ServiceCollectionExtensions
services.AddSingleton<ICachedCallLifecycleObserver>(
sp => sp.GetRequiredService<CachedCallLifecycleBridge>());
// M6 Bundle E (T8): central audit-write failure counter — NoOp default
// for site/test composition roots that don't wire the central health
// snapshot. AddAuditLogCentralMaintenance below replaces this binding
// with the AuditCentralHealthSnapshot implementation so increments
// surface on the central dashboard.
services.TryAddSingleton<ICentralAuditWriteFailureCounter, NoOpCentralAuditWriteFailureCounter>();
// M4 Bundle B: central direct-write audit writer used by
// NotificationOutboxActor (Bundle B) and Inbound API (Bundle C/D) to
// emit AuditLog rows that originate ON central, not via site telemetry.
@@ -167,10 +174,13 @@ public static class ServiceCollectionExtensions
// Bundle C (M5-T6): wire the IAuditPayloadFilter into the factory so
// NotificationOutboxActor + Inbound API rows are truncated + redacted
// before they hit MS SQL.
// M6 Bundle E (T8): also wire the ICentralAuditWriteFailureCounter
// so swallowed repo throws bump the central health counter.
services.AddSingleton<ICentralAuditWriter>(sp => new CentralAuditWriter(
sp,
sp.GetRequiredService<ILogger<CentralAuditWriter>>(),
sp.GetRequiredService<IAuditPayloadFilter>()));
sp.GetRequiredService<IAuditPayloadFilter>(),
sp.GetRequiredService<ICentralAuditWriteFailureCounter>()));
return services;
}
@@ -270,6 +280,30 @@ public static class ServiceCollectionExtensions
new SiteAuditTelemetryStalledTracker(
sp.GetRequiredService<Akka.Actor.ActorSystem>()));
// M6 Bundle E (T8 + T9): central health snapshot — a single object
// that owns the CentralAuditWriteFailures + AuditRedactionFailure
// Interlocked counters AND surfaces them on
// IAuditCentralHealthSnapshot. The same instance is bound to BOTH
// writer-side interfaces (ICentralAuditWriteFailureCounter +
// IAuditRedactionFailureCounter) so every central-side increment
// routes into the shared counters; site nodes keep their existing
// Site bridges (registered by AddAuditLogHealthMetricsBridge) so
// the same counter type does not shadow the site-side metric.
services.AddSingleton<AuditCentralHealthSnapshot>();
services.AddSingleton<IAuditCentralHealthSnapshot>(
sp => sp.GetRequiredService<AuditCentralHealthSnapshot>());
services.Replace(ServiceDescriptor.Singleton<ICentralAuditWriteFailureCounter>(
sp => sp.GetRequiredService<AuditCentralHealthSnapshot>()));
// M6 Bundle E (T9): override the NoOp IAuditRedactionFailureCounter
// (registered by AddAuditLog) with the central snapshot binding so
// payload-filter throws on CentralAuditWriter / AuditLogIngestActor
// paths surface on the central dashboard. The site composition root
// overrides this binding AGAIN via AddAuditLogHealthMetricsBridge —
// central nodes do not call that bridge, so this is the final
// binding on a central host.
services.Replace(ServiceDescriptor.Singleton<IAuditRedactionFailureCounter>(
sp => sp.GetRequiredService<AuditCentralHealthSnapshot>()));
return services;
}
}