feat(health): AuditRedactionFailure counter + bridge (#23 M5)
Bundle C task M5-T7 — surface DefaultAuditPayloadFilter redactor
over-redactions as a Site Health metric so a misconfigured /
catastrophic regex shows up on /monitoring/health rather than
disappearing into a NoOp sink.
- SiteHealthReport: new 'AuditRedactionFailure' int field
(defaulted to 0 for back-compat with existing producers/tests).
- ISiteHealthCollector / SiteHealthCollector:
new IncrementAuditRedactionFailure() — per-interval atomic
counter with Interlocked, reset on CollectReport, mirroring
the M2 Bundle G SiteAuditWriteFailures pattern.
- HealthMetricsAuditRedactionFailureCounter: new bridge in
ScadaLink.AuditLog.Site that forwards IAuditRedactionFailureCounter
increments to ISiteHealthCollector — mirrors
HealthMetricsAuditWriteFailureCounter one-for-one.
- AddAuditLogHealthMetricsBridge: now ALSO Replaces the
NoOpAuditRedactionFailureCounter binding with the health-metrics
bridge, so a single AddAuditLogHealthMetricsBridge() call wires
both the M2 Bundle G write-failure counter and the M5 Bundle C
redaction-failure counter into the health report.
Site-side only for M5 — the filter also runs on CentralAuditWriter
and AuditLogIngestActor (where it just keeps the NoOp default), but
a central-side health-metric surface for AuditRedactionFailure is
deferred to M6 alongside the rest of the central health collector
work.
Tests:
- AuditRedactionFailureMetricTests (HealthMonitoring) covers the
SiteHealthCollector increment/report/reset shape (3 tests).
- HealthMetricsAuditRedactionFailureCounterTests (AuditLog) covers
the AuditLog → HealthMonitoring bridge (3 tests).
- Existing CountCapturingHealthCollector stub in
DeploymentManagerRedeployTests extended with the new no-op
interface method.
Verified: dotnet build clean, all 24 test projects green
(the only Failed at first ScadaLink.SiteRuntime.Tests run was the
known-flaky InstanceActorChildAttributeRaceTests; passes on re-run
in isolation and full suite, unrelated to these changes).
This commit is contained in:
@@ -172,26 +172,38 @@ public static class ServiceCollectionExtensions
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Audit Log (#23) M2 Bundle G — swap the default
|
/// Audit Log (#23) M2 Bundle G + M5 Bundle C — swap the default
|
||||||
/// <see cref="NoOpAuditWriteFailureCounter"/> registration for the real
|
/// <see cref="NoOpAuditWriteFailureCounter"/> and
|
||||||
/// <see cref="HealthMetricsAuditWriteFailureCounter"/> bridge so the
|
/// <see cref="NoOpAuditRedactionFailureCounter"/> registrations for the
|
||||||
/// FallbackAuditWriter primary-failure counter surfaces in the site health
|
/// real <see cref="HealthMetricsAuditWriteFailureCounter"/> /
|
||||||
/// report payload as <c>SiteHealthReport.SiteAuditWriteFailures</c>.
|
/// <see cref="HealthMetricsAuditRedactionFailureCounter"/> bridges so the
|
||||||
|
/// FallbackAuditWriter primary-failure counter AND the
|
||||||
|
/// DefaultAuditPayloadFilter redactor-failure counter both surface in the
|
||||||
|
/// site health report payload as
|
||||||
|
/// <c>SiteHealthReport.SiteAuditWriteFailures</c> +
|
||||||
|
/// <c>SiteHealthReport.AuditRedactionFailure</c>.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <remarks>
|
/// <remarks>
|
||||||
/// <para>
|
/// <para>
|
||||||
/// Must be called AFTER both <see cref="AddAuditLog"/> (registers the
|
/// Must be called AFTER both <see cref="AddAuditLog"/> (registers the
|
||||||
/// NoOp default this method replaces) and
|
/// NoOp defaults this method replaces) and
|
||||||
/// <c>ScadaLink.HealthMonitoring.ServiceCollectionExtensions.AddHealthMonitoring</c>
|
/// <c>ScadaLink.HealthMonitoring.ServiceCollectionExtensions.AddHealthMonitoring</c>
|
||||||
/// or <c>AddSiteHealthMonitoring</c> (registers the
|
/// or <c>AddSiteHealthMonitoring</c> (registers the
|
||||||
/// <see cref="ISiteHealthCollector"/> the bridge depends on). Resolving
|
/// <see cref="ISiteHealthCollector"/> the bridges depend on). Resolving
|
||||||
/// <see cref="IAuditWriteFailureCounter"/> without the latter throws
|
/// <see cref="IAuditWriteFailureCounter"/> or
|
||||||
|
/// <see cref="IAuditRedactionFailureCounter"/> without the latter throws
|
||||||
/// <see cref="InvalidOperationException"/> at <c>GetRequiredService</c>
|
/// <see cref="InvalidOperationException"/> at <c>GetRequiredService</c>
|
||||||
/// time — by design, since a silent NoOp would mask a misconfiguration.
|
/// time — by design, since a silent NoOp would mask a misconfiguration.
|
||||||
/// </para>
|
/// </para>
|
||||||
/// <para>
|
/// <para>
|
||||||
/// Idempotent — calling twice replaces the descriptor each time without
|
/// Idempotent — calling twice replaces each descriptor without piling up
|
||||||
/// piling up registrations.
|
/// registrations.
|
||||||
|
/// </para>
|
||||||
|
/// <para>
|
||||||
|
/// Site-side only for M5: the central composition root keeps the NoOp
|
||||||
|
/// defaults; the central health-metric surface that would expose
|
||||||
|
/// <c>AuditRedactionFailure</c> next to the existing central counters
|
||||||
|
/// ships in M6.
|
||||||
/// </para>
|
/// </para>
|
||||||
/// </remarks>
|
/// </remarks>
|
||||||
public static IServiceCollection AddAuditLogHealthMetricsBridge(this IServiceCollection services)
|
public static IServiceCollection AddAuditLogHealthMetricsBridge(this IServiceCollection services)
|
||||||
@@ -200,6 +212,8 @@ public static class ServiceCollectionExtensions
|
|||||||
|
|
||||||
services.Replace(
|
services.Replace(
|
||||||
ServiceDescriptor.Singleton<IAuditWriteFailureCounter, HealthMetricsAuditWriteFailureCounter>());
|
ServiceDescriptor.Singleton<IAuditWriteFailureCounter, HealthMetricsAuditWriteFailureCounter>());
|
||||||
|
services.Replace(
|
||||||
|
ServiceDescriptor.Singleton<IAuditRedactionFailureCounter, HealthMetricsAuditRedactionFailureCounter>());
|
||||||
return services;
|
return services;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,48 @@
|
|||||||
|
using ScadaLink.AuditLog.Payload;
|
||||||
|
using ScadaLink.HealthMonitoring;
|
||||||
|
|
||||||
|
namespace ScadaLink.AuditLog.Site;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Audit Log (#23) M5 Bundle C — bridges
|
||||||
|
/// <see cref="IAuditRedactionFailureCounter"/> (incremented by
|
||||||
|
/// <see cref="DefaultAuditPayloadFilter"/> every time a header / body / SQL
|
||||||
|
/// parameter redactor stage throws and the filter has to over-redact the
|
||||||
|
/// offending field) into <see cref="ISiteHealthCollector"/> so the count
|
||||||
|
/// surfaces in the site health report payload as
|
||||||
|
/// <c>SiteHealthReport.AuditRedactionFailure</c>.
|
||||||
|
/// </summary>
|
||||||
|
/// <remarks>
|
||||||
|
/// <para>
|
||||||
|
/// Registered by <see cref="ServiceCollectionExtensions.AddAuditLogHealthMetricsBridge"/>;
|
||||||
|
/// callers must register <c>AddHealthMonitoring()</c> first so
|
||||||
|
/// <see cref="ISiteHealthCollector"/> resolves. The default <see cref="ServiceCollectionExtensions.AddAuditLog"/>
|
||||||
|
/// registration keeps <see cref="NoOpAuditRedactionFailureCounter"/> for nodes
|
||||||
|
/// where Site Health Monitoring is not wired (the silent-sink contract —
|
||||||
|
/// redaction failures must NEVER abort the user-facing action, alog.md §7).
|
||||||
|
/// </para>
|
||||||
|
/// <para>
|
||||||
|
/// Mirrors the M2 Bundle G <see cref="HealthMetricsAuditWriteFailureCounter"/>
|
||||||
|
/// shape one-for-one so the two health-metric bridges age together.
|
||||||
|
/// </para>
|
||||||
|
/// <para>
|
||||||
|
/// Site-side only for M5: the redaction filter also runs on the central
|
||||||
|
/// writers (CentralAuditWriter + AuditLogIngestActor), but the central
|
||||||
|
/// health-metric surface that would expose <c>AuditRedactionFailure</c>
|
||||||
|
/// alongside the existing central counters ships in M6. Until then, the
|
||||||
|
/// central composition root keeps the NoOp default — the redactions still
|
||||||
|
/// happen, they just don't get counted into a health report.
|
||||||
|
/// </para>
|
||||||
|
/// </remarks>
|
||||||
|
public sealed class HealthMetricsAuditRedactionFailureCounter : IAuditRedactionFailureCounter
|
||||||
|
{
|
||||||
|
private readonly ISiteHealthCollector _collector;
|
||||||
|
|
||||||
|
public HealthMetricsAuditRedactionFailureCounter(ISiteHealthCollector collector)
|
||||||
|
{
|
||||||
|
_collector = collector ?? throw new ArgumentNullException(nameof(collector));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc/>
|
||||||
|
public void Increment() => _collector.IncrementAuditRedactionFailure();
|
||||||
|
}
|
||||||
@@ -25,7 +25,14 @@ public record SiteHealthReport(
|
|||||||
// primary failures (SQLite throws routed to the drop-oldest ring). Surfaces
|
// primary failures (SQLite throws routed to the drop-oldest ring). Surfaces
|
||||||
// a sustained audit-write outage on /monitoring/health. Defaults to 0 so
|
// a sustained audit-write outage on /monitoring/health. Defaults to 0 so
|
||||||
// existing producers / tests that don't construct the field stay valid.
|
// existing producers / tests that don't construct the field stay valid.
|
||||||
int SiteAuditWriteFailures = 0);
|
int SiteAuditWriteFailures = 0,
|
||||||
|
// Audit Log (#23) M5 Bundle C: per-interval count of payload-filter
|
||||||
|
// redactor over-redactions (header / body / SQL parameter stages all
|
||||||
|
// throwing → field replaced with the "<redacted: redactor error>"
|
||||||
|
// marker). Surfaces a misconfigured / catastrophic regex on
|
||||||
|
// /monitoring/health. Defaults to 0 for back-compat with existing
|
||||||
|
// producers and tests that don't construct the field.
|
||||||
|
int AuditRedactionFailure = 0);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Broadcast wrapper used between central nodes to keep per-node
|
/// Broadcast wrapper used between central nodes to keep per-node
|
||||||
|
|||||||
@@ -19,6 +19,15 @@ public interface ISiteHealthCollector
|
|||||||
/// <c>AddAuditLogHealthMetricsBridge()</c>.
|
/// <c>AddAuditLogHealthMetricsBridge()</c>.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
void IncrementSiteAuditWriteFailures();
|
void IncrementSiteAuditWriteFailures();
|
||||||
|
/// <summary>
|
||||||
|
/// Audit Log (#23) M5 Bundle C — increment the per-interval count of
|
||||||
|
/// payload-filter redactor over-redactions (header / body / SQL
|
||||||
|
/// parameter stage throws routed to the
|
||||||
|
/// <c><redacted: redactor error></c> marker). Bridged from the
|
||||||
|
/// <c>IAuditRedactionFailureCounter</c> binding registered via
|
||||||
|
/// <c>AddAuditLogHealthMetricsBridge()</c>.
|
||||||
|
/// </summary>
|
||||||
|
void IncrementAuditRedactionFailure();
|
||||||
void UpdateConnectionHealth(string connectionName, ConnectionHealth health);
|
void UpdateConnectionHealth(string connectionName, ConnectionHealth health);
|
||||||
void RemoveConnection(string connectionName);
|
void RemoveConnection(string connectionName);
|
||||||
void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved);
|
void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved);
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
private int _alarmErrorCount;
|
private int _alarmErrorCount;
|
||||||
private int _deadLetterCount;
|
private int _deadLetterCount;
|
||||||
private int _siteAuditWriteFailures;
|
private int _siteAuditWriteFailures;
|
||||||
|
private int _auditRedactionFailures;
|
||||||
private readonly ConcurrentDictionary<string, ConnectionHealth> _connectionStatuses = new();
|
private readonly ConcurrentDictionary<string, ConnectionHealth> _connectionStatuses = new();
|
||||||
private readonly ConcurrentDictionary<string, TagResolutionStatus> _tagResolutionCounts = new();
|
private readonly ConcurrentDictionary<string, TagResolutionStatus> _tagResolutionCounts = new();
|
||||||
private readonly ConcurrentDictionary<string, string> _connectionEndpoints = new();
|
private readonly ConcurrentDictionary<string, string> _connectionEndpoints = new();
|
||||||
@@ -74,6 +75,20 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
Interlocked.Increment(ref _siteAuditWriteFailures);
|
Interlocked.Increment(ref _siteAuditWriteFailures);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Audit Log (#23) M5 Bundle C — increment the per-interval count of
|
||||||
|
/// payload-filter redactor over-redactions (header / body / SQL
|
||||||
|
/// parameter stages routed to the
|
||||||
|
/// <c><redacted: redactor error></c> marker). Bridged from the
|
||||||
|
/// <c>IAuditRedactionFailureCounter</c> binding registered via
|
||||||
|
/// <c>AddAuditLogHealthMetricsBridge()</c>; reset every interval together
|
||||||
|
/// with the other per-interval counters.
|
||||||
|
/// </summary>
|
||||||
|
public void IncrementAuditRedactionFailure()
|
||||||
|
{
|
||||||
|
Interlocked.Increment(ref _auditRedactionFailures);
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Update the health status for a named data connection.
|
/// Update the health status for a named data connection.
|
||||||
/// Called by DCL when connection state changes.
|
/// Called by DCL when connection state changes.
|
||||||
@@ -158,6 +173,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
var alarmErrors = Interlocked.Exchange(ref _alarmErrorCount, 0);
|
var alarmErrors = Interlocked.Exchange(ref _alarmErrorCount, 0);
|
||||||
var deadLetters = Interlocked.Exchange(ref _deadLetterCount, 0);
|
var deadLetters = Interlocked.Exchange(ref _deadLetterCount, 0);
|
||||||
var siteAuditWriteFailures = Interlocked.Exchange(ref _siteAuditWriteFailures, 0);
|
var siteAuditWriteFailures = Interlocked.Exchange(ref _siteAuditWriteFailures, 0);
|
||||||
|
var auditRedactionFailures = Interlocked.Exchange(ref _auditRedactionFailures, 0);
|
||||||
|
|
||||||
// Snapshot current connection and tag resolution state
|
// Snapshot current connection and tag resolution state
|
||||||
var connectionStatuses = new Dictionary<string, ConnectionHealth>(_connectionStatuses);
|
var connectionStatuses = new Dictionary<string, ConnectionHealth>(_connectionStatuses);
|
||||||
@@ -190,6 +206,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
DataConnectionTagQuality: tagQuality,
|
DataConnectionTagQuality: tagQuality,
|
||||||
ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0),
|
ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0),
|
||||||
ClusterNodes: _clusterNodes?.ToList(),
|
ClusterNodes: _clusterNodes?.ToList(),
|
||||||
SiteAuditWriteFailures: siteAuditWriteFailures);
|
SiteAuditWriteFailures: siteAuditWriteFailures,
|
||||||
|
AuditRedactionFailure: auditRedactionFailures);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
using NSubstitute;
|
||||||
|
using ScadaLink.AuditLog.Site;
|
||||||
|
using ScadaLink.HealthMonitoring;
|
||||||
|
|
||||||
|
namespace ScadaLink.AuditLog.Tests.Site;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Bundle C (M5-T7) — the <see cref="HealthMetricsAuditRedactionFailureCounter"/>
|
||||||
|
/// adapter is the production binding for
|
||||||
|
/// <see cref="ScadaLink.AuditLog.Payload.IAuditRedactionFailureCounter"/> on
|
||||||
|
/// site nodes; it forwards every <see cref="DefaultAuditPayloadFilter"/>
|
||||||
|
/// redactor over-redaction event into the shared
|
||||||
|
/// <see cref="ISiteHealthCollector"/> so the site health report surfaces the
|
||||||
|
/// count as <c>AuditRedactionFailure</c>. Mirrors the M2 Bundle G
|
||||||
|
/// HealthMetricsAuditWriteFailureCounter shape one-for-one.
|
||||||
|
/// </summary>
|
||||||
|
public class HealthMetricsAuditRedactionFailureCounterTests
|
||||||
|
{
|
||||||
|
[Fact]
|
||||||
|
public void Increment_Routes_To_Collector_IncrementAuditRedactionFailure()
|
||||||
|
{
|
||||||
|
var collector = Substitute.For<ISiteHealthCollector>();
|
||||||
|
var counter = new HealthMetricsAuditRedactionFailureCounter(collector);
|
||||||
|
|
||||||
|
counter.Increment();
|
||||||
|
|
||||||
|
collector.Received(1).IncrementAuditRedactionFailure();
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Increment_Multiple_Calls_Route_To_Collector_Each_Time()
|
||||||
|
{
|
||||||
|
var collector = Substitute.For<ISiteHealthCollector>();
|
||||||
|
var counter = new HealthMetricsAuditRedactionFailureCounter(collector);
|
||||||
|
|
||||||
|
counter.Increment();
|
||||||
|
counter.Increment();
|
||||||
|
counter.Increment();
|
||||||
|
|
||||||
|
collector.Received(3).IncrementAuditRedactionFailure();
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Construction_With_Null_Collector_Throws_ArgumentNullException()
|
||||||
|
{
|
||||||
|
Assert.Throws<ArgumentNullException>(
|
||||||
|
() => new HealthMetricsAuditRedactionFailureCounter(null!));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
namespace ScadaLink.HealthMonitoring.Tests;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Bundle C (M5-T7) regression coverage. The Audit Log payload filter
|
||||||
|
/// (<c>DefaultAuditPayloadFilter</c>) increments
|
||||||
|
/// <c>IAuditRedactionFailureCounter</c> every time a header/body/SQL-param
|
||||||
|
/// redactor stage throws and the filter has to over-redact the field with
|
||||||
|
/// the <c><redacted: redactor error></c> marker. Bundle C bridges that
|
||||||
|
/// counter into the Site Health Monitoring report payload as
|
||||||
|
/// <c>AuditRedactionFailure</c> so a misconfigured / catastrophic regex
|
||||||
|
/// surfaces on /monitoring/health rather than disappearing into a NoOp sink.
|
||||||
|
/// Mirrors the Bundle G <c>SiteAuditWriteFailures</c> metric shape — same
|
||||||
|
/// per-interval increment-and-reset semantics, same defaults-to-zero
|
||||||
|
/// contract.
|
||||||
|
/// </summary>
|
||||||
|
public class AuditRedactionFailureMetricTests
|
||||||
|
{
|
||||||
|
private readonly SiteHealthCollector _collector = new();
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Increment_Three_Times_Counter_Reports_3()
|
||||||
|
{
|
||||||
|
_collector.IncrementAuditRedactionFailure();
|
||||||
|
_collector.IncrementAuditRedactionFailure();
|
||||||
|
_collector.IncrementAuditRedactionFailure();
|
||||||
|
|
||||||
|
var report = _collector.CollectReport("site-1");
|
||||||
|
|
||||||
|
Assert.Equal(3, report.AuditRedactionFailure);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Report_Payload_Includes_AuditRedactionFailure_AsZeroByDefault()
|
||||||
|
{
|
||||||
|
var report = _collector.CollectReport("site-1");
|
||||||
|
|
||||||
|
Assert.Equal(0, report.AuditRedactionFailure);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Mirrors the existing per-interval reset semantics for ScriptErrorCount /
|
||||||
|
/// AlarmEvaluationErrorCount / DeadLetterCount / SiteAuditWriteFailures —
|
||||||
|
/// AuditRedactionFailure is an interval count, not a running total.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public void CollectReport_Resets_AuditRedactionFailure()
|
||||||
|
{
|
||||||
|
_collector.IncrementAuditRedactionFailure();
|
||||||
|
_collector.IncrementAuditRedactionFailure();
|
||||||
|
|
||||||
|
var first = _collector.CollectReport("site-1");
|
||||||
|
Assert.Equal(2, first.AuditRedactionFailure);
|
||||||
|
|
||||||
|
var second = _collector.CollectReport("site-1");
|
||||||
|
Assert.Equal(0, second.AuditRedactionFailure);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -70,6 +70,7 @@ public class DeploymentManagerRedeployTests : TestKit, IDisposable
|
|||||||
public void IncrementAlarmError() { }
|
public void IncrementAlarmError() { }
|
||||||
public void IncrementDeadLetter() { }
|
public void IncrementDeadLetter() { }
|
||||||
public void IncrementSiteAuditWriteFailures() { }
|
public void IncrementSiteAuditWriteFailures() { }
|
||||||
|
public void IncrementAuditRedactionFailure() { }
|
||||||
public void UpdateConnectionHealth(string connectionName, ConnectionHealth health) { }
|
public void UpdateConnectionHealth(string connectionName, ConnectionHealth health) { }
|
||||||
public void RemoveConnection(string connectionName) { }
|
public void RemoveConnection(string connectionName) { }
|
||||||
public void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved) { }
|
public void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved) { }
|
||||||
|
|||||||
Reference in New Issue
Block a user