feat(health): SiteAuditWriteFailures counter + AuditLog bridge (#23)

Bundle G of Audit Log #23 M2. Bridges the FallbackAuditWriter primary-
failure counter into the Site Health Monitoring report payload so a
sustained audit-write outage surfaces on /monitoring/health instead of
disappearing into a NoOp sink.

- SiteHealthReport: add SiteAuditWriteFailures (defaulted, additive).
- ISiteHealthCollector + SiteHealthCollector: new
  IncrementSiteAuditWriteFailures() counter, per-interval reset
  semantics matching ScriptErrorCount / DeadLetterCount.
- HealthMetricsAuditWriteFailureCounter: adapter forwarding
  IAuditWriteFailureCounter.Increment() to the collector.
- AddAuditLogHealthMetricsBridge(): swaps the NoOp default
  registration for the real bridge; called from
  SiteServiceRegistration after AddSiteHealthMonitoring + AddAuditLog.
- Existing host-wiring test updated: site composition now resolves
  HealthMetricsAuditWriteFailureCounter (not NoOp).

Tests: HealthMonitoring 60 -> 63 (3 new), AuditLog 56 -> 59 (3 new),
full solution green.
This commit is contained in:
Joseph Doherty
2026-05-20 13:22:25 -04:00
parent 82a8bbf225
commit dd3351da93
11 changed files with 261 additions and 4 deletions

View File

@@ -12,6 +12,13 @@ public interface ISiteHealthCollector
void IncrementScriptError();
void IncrementAlarmError();
void IncrementDeadLetter();
/// <summary>
/// Audit Log (#23) Bundle G — increment the per-interval count of
/// <c>FallbackAuditWriter</c> primary failures. Bridged from the
/// <c>IAuditWriteFailureCounter</c> binding registered via
/// <c>AddAuditLogHealthMetricsBridge()</c>.
/// </summary>
void IncrementSiteAuditWriteFailures();
void UpdateConnectionHealth(string connectionName, ConnectionHealth health);
void RemoveConnection(string connectionName);
void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved);

View File

@@ -13,6 +13,7 @@ public class SiteHealthCollector : ISiteHealthCollector
private int _scriptErrorCount;
private int _alarmErrorCount;
private int _deadLetterCount;
private int _siteAuditWriteFailures;
private readonly ConcurrentDictionary<string, ConnectionHealth> _connectionStatuses = new();
private readonly ConcurrentDictionary<string, TagResolutionStatus> _tagResolutionCounts = new();
private readonly ConcurrentDictionary<string, string> _connectionEndpoints = new();
@@ -61,6 +62,18 @@ public class SiteHealthCollector : ISiteHealthCollector
Interlocked.Increment(ref _deadLetterCount);
}
/// <summary>
/// Audit Log (#23) Bundle G — increment the per-interval count of
/// <c>FallbackAuditWriter</c> primary failures. Bridged from the
/// <c>IAuditWriteFailureCounter</c> binding registered via
/// <c>AddAuditLogHealthMetricsBridge()</c>; reset every interval together
/// with the other per-interval counters.
/// </summary>
public void IncrementSiteAuditWriteFailures()
{
Interlocked.Increment(ref _siteAuditWriteFailures);
}
/// <summary>
/// Update the health status for a named data connection.
/// Called by DCL when connection state changes.
@@ -144,6 +157,7 @@ public class SiteHealthCollector : ISiteHealthCollector
var scriptErrors = Interlocked.Exchange(ref _scriptErrorCount, 0);
var alarmErrors = Interlocked.Exchange(ref _alarmErrorCount, 0);
var deadLetters = Interlocked.Exchange(ref _deadLetterCount, 0);
var siteAuditWriteFailures = Interlocked.Exchange(ref _siteAuditWriteFailures, 0);
// Snapshot current connection and tag resolution state
var connectionStatuses = new Dictionary<string, ConnectionHealth>(_connectionStatuses);
@@ -175,6 +189,7 @@ public class SiteHealthCollector : ISiteHealthCollector
DataConnectionEndpoints: connectionEndpoints,
DataConnectionTagQuality: tagQuality,
ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0),
ClusterNodes: _clusterNodes?.ToList());
ClusterNodes: _clusterNodes?.ToList(),
SiteAuditWriteFailures: siteAuditWriteFailures);
}
}