feat(audit): M5.3 response-capture increments — request headers, ceiling-hits counter, per-method body opt-out (T7)

1. Request headers in Extra JSON (AuditWriteMiddleware): adds a `requestHeaders`
   object to the existing Extra JSON alongside remoteIp/userAgent; headers whose
   names appear in AuditLogOptions.HeaderRedactList (Authorization, X-Api-Key,
   Cookie, Set-Cookie by default) are replaced with "<redacted>" using
   OrdinalIgnoreCase matching — same policy as ScadaBridgeAuditRedactor.

2. AuditInboundCeilingHits counter: new IAuditInboundCeilingHitsCounter interface
   + NoOpAuditInboundCeilingHitsCounter default; AuditCentralHealthSnapshot
   implements the interface (Interlocked field, thread-safe) and exposes
   AuditInboundCeilingHits on IAuditCentralHealthSnapshot; AddAuditLog registers
   the NoOp default, AddAuditLogCentralMaintenance forwards to the snapshot;
   AuditWriteMiddleware accepts the counter as an optional ctor arg and increments
   it once per request where either the request or response body hit the cap.

3. Per-method SkipBodyCapture opt-out: adds SkipBodyCapture bool to
   PerTargetRedactionOverride; AuditWriteMiddleware consults the per-target
   override map at the start of InvokeAsync (before EnableBuffering) and, when
   set, skips body read + capture entirely — the audit row still emits with
   headers/metadata but null RequestSummary/ResponseSummary; truncation flags
   are also cleared so the ceiling-hits counter is not bumped for opted-out methods.
This commit is contained in:
Joseph Doherty
2026-06-16 21:23:07 -04:00
parent 0569c5ff23
commit a07ff28f10
9 changed files with 643 additions and 8 deletions
@@ -39,10 +39,12 @@ namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
public sealed class AuditCentralHealthSnapshot
: IAuditCentralHealthSnapshot,
ICentralAuditWriteFailureCounter,
IAuditRedactionFailureCounter
IAuditRedactionFailureCounter,
IAuditInboundCeilingHitsCounter
{
private int _centralAuditWriteFailures;
private int _auditRedactionFailure;
private int _auditInboundCeilingHits;
private readonly ConcurrentDictionary<string, bool> _stalled = new();
/// <inheritdoc/>
@@ -53,6 +55,10 @@ public sealed class AuditCentralHealthSnapshot
public int AuditRedactionFailure =>
Interlocked.CompareExchange(ref _auditRedactionFailure, 0, 0);
/// <inheritdoc/>
public int AuditInboundCeilingHits =>
Interlocked.CompareExchange(ref _auditInboundCeilingHits, 0, 0);
/// <inheritdoc/>
public IReadOnlyDictionary<string, bool> SiteAuditTelemetryStalled =>
new Dictionary<string, bool>(_stalled);
@@ -78,4 +84,8 @@ public sealed class AuditCentralHealthSnapshot
/// <inheritdoc/>
void IAuditRedactionFailureCounter.Increment() =>
Interlocked.Increment(ref _auditRedactionFailure);
/// <inheritdoc/>
void IAuditInboundCeilingHitsCounter.Increment() =>
Interlocked.Increment(ref _auditInboundCeilingHits);
}
@@ -50,6 +50,17 @@ public interface IAuditCentralHealthSnapshot
/// </summary>
int AuditRedactionFailure { get; }
/// <summary>
/// Count of inbound request/response body truncations at the
/// <see cref="ZB.MOM.WW.ScadaBridge.AuditLog.Configuration.AuditLogOptions.InboundMaxBytes"/>
/// ceiling since process start. Incremented by
/// <see cref="ZB.MOM.WW.ScadaBridge.InboundAPI.Middleware.AuditWriteMiddleware"/>
/// whenever either the request or response body exceeds the cap and is
/// truncated in the audit copy. A sustained non-zero count can indicate
/// callers sending unexpectedly large bodies.
/// </summary>
int AuditInboundCeilingHits { get; }
/// <summary>
/// Per-site latched stalled state: <c>true</c> when the
/// <see cref="SiteAuditReconciliationActor"/> has observed two
@@ -0,0 +1,24 @@
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
/// <summary>
/// Audit Log (#23) M5.3 (T7) counter sink incremented by
/// <see cref="ZB.MOM.WW.ScadaBridge.InboundAPI.Middleware.AuditWriteMiddleware"/>
/// whenever an inbound request or response body is truncated at the
/// <see cref="ZB.MOM.WW.ScadaBridge.AuditLog.Configuration.AuditLogOptions.InboundMaxBytes"/>
/// ceiling. Mirrors the <see cref="ICentralAuditWriteFailureCounter"/> shape:
/// one-method, NoOp default, must-never-abort-the-user-facing-action invariant.
/// </summary>
/// <remarks>
/// A ceiling hit is a normal operational event (the caller sent a large
/// body) rather than a failure, but surfacing a cumulative count lets
/// operators detect over-size callers early. The
/// <see cref="AuditCentralHealthSnapshot"/> production implementation
/// accumulates the count via an <c>Interlocked</c> field alongside
/// <see cref="ICentralAuditWriteFailureCounter"/> and
/// <see cref="ZB.MOM.WW.ScadaBridge.AuditLog.Payload.IAuditRedactionFailureCounter"/>.
/// </remarks>
public interface IAuditInboundCeilingHitsCounter
{
/// <summary>Increment the inbound body-ceiling hit counter by one.</summary>
void Increment();
}
@@ -0,0 +1,13 @@
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
/// <summary>
/// Default <see cref="IAuditInboundCeilingHitsCounter"/> binding used when
/// the central health snapshot is not wired (e.g. site composition roots,
/// test harnesses that have no health dashboard). All increments are silently
/// dropped — correct for environments that have no audit KPI surface.
/// </summary>
public sealed class NoOpAuditInboundCeilingHitsCounter : IAuditInboundCeilingHitsCounter
{
/// <inheritdoc/>
public void Increment() { }
}
@@ -25,4 +25,15 @@ public sealed class PerTargetRedactionOverride
/// rows.
/// </summary>
public string? RedactSqlParamsMatching { get; set; }
/// <summary>
/// When <c>true</c>, the inbound API audit row for this target records
/// request/response headers and metadata (status, duration, actor, etc.)
/// but the request and response body strings are omitted
/// (<c>RequestSummary</c> / <c>ResponseSummary</c> are left null). The
/// audit row itself is always emitted — only the body content is suppressed.
/// Null (the default, equivalent to <c>false</c>) means body capture
/// proceeds normally up to <see cref="AuditLogOptions.InboundMaxBytes"/>.
/// </summary>
public bool SkipBodyCapture { get; set; }
}
@@ -200,6 +200,13 @@ public static class ServiceCollectionExtensions
// surface on the central dashboard.
services.TryAddSingleton<ICentralAuditWriteFailureCounter, NoOpCentralAuditWriteFailureCounter>();
// M5.3 (T7): inbound body-ceiling hit counter — NoOp default for
// site/test roots. AddAuditLogCentralMaintenance replaces this binding
// with the AuditCentralHealthSnapshot implementation so ceiling-hit
// counts surface on the central dashboard alongside write-failure and
// redaction-failure counters.
services.TryAddSingleton<IAuditInboundCeilingHitsCounter, NoOpAuditInboundCeilingHitsCounter>();
// M4 Bundle B: central direct-write audit writer used by
// NotificationOutboxActor (Bundle B) and Inbound API (Bundle C/D) to
// emit AuditLog rows that originate ON central, not via site telemetry.
@@ -383,6 +390,12 @@ public static class ServiceCollectionExtensions
// HealthMetricsAuditRedactionFailureCounter shape one-for-one.
services.Replace(ServiceDescriptor.Singleton<IAuditRedactionFailureCounter,
CentralAuditRedactionFailureCounter>());
// M5.3 (T7): replace the NoOp IAuditInboundCeilingHitsCounter with the
// AuditCentralHealthSnapshot so ceiling-hit counts surface on the
// central dashboard. Same singleton-forward pattern as
// ICentralAuditWriteFailureCounter above.
services.Replace(ServiceDescriptor.Singleton<IAuditInboundCeilingHitsCounter>(
sp => sp.GetRequiredService<AuditCentralHealthSnapshot>()));
return services;
}
@@ -6,6 +6,7 @@ using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ZB.MOM.WW.Audit;
using ZB.MOM.WW.ScadaBridge.AuditLog.Central;
using ZB.MOM.WW.ScadaBridge.AuditLog.Configuration;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
@@ -95,6 +96,7 @@ public sealed class AuditWriteMiddleware
private readonly ILogger<AuditWriteMiddleware> _logger;
private readonly IOptionsMonitor<AuditLogOptions> _options;
private readonly IAuditActorAccessor? _actorAccessor;
private readonly IAuditInboundCeilingHitsCounter _ceilingHitsCounter;
/// <summary>
/// Initializes the middleware with its required dependencies.
@@ -110,18 +112,26 @@ public sealed class AuditWriteMiddleware
/// construct the middleware; when absent, actor resolution falls back to the
/// stashed API-key name only.
/// </param>
/// <param name="ceilingHitsCounter">
/// M5.3 (T7, optional): incremented whenever an inbound request or response
/// body is truncated at <see cref="AuditLogOptions.InboundMaxBytes"/>. Optional
/// so existing tests and composition roots without the central health snapshot
/// wired still construct without the counter; a NoOp is used when absent.
/// </param>
public AuditWriteMiddleware(
RequestDelegate next,
ICentralAuditWriter auditWriter,
ILogger<AuditWriteMiddleware> logger,
IOptionsMonitor<AuditLogOptions> options,
IAuditActorAccessor? actorAccessor = null)
IAuditActorAccessor? actorAccessor = null,
IAuditInboundCeilingHitsCounter? ceilingHitsCounter = null)
{
_next = next ?? throw new ArgumentNullException(nameof(next));
_auditWriter = auditWriter ?? throw new ArgumentNullException(nameof(auditWriter));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options ?? throw new ArgumentNullException(nameof(options));
_actorAccessor = actorAccessor;
_ceilingHitsCounter = ceilingHitsCounter ?? new NoOpAuditInboundCeilingHitsCounter();
}
/// <summary>
@@ -133,9 +143,11 @@ public sealed class AuditWriteMiddleware
{
var sw = Stopwatch.StartNew();
// Per-request hot read of the inbound cap so a live config change
// Per-request hot read of the options snapshot so a live config change
// picks up on the next request without re-resolving the singleton.
var cap = _options.CurrentValue.InboundMaxBytes;
// InboundMaxBytes is read once here and passed to the capture helpers.
var opts = _options.CurrentValue;
var cap = opts.InboundMaxBytes;
// Audit Log #23 (ParentExecutionId): mint the inbound request's per-request
// ExecutionId ONCE, here at the start of the request, and stash it on
@@ -163,9 +175,20 @@ public sealed class AuditWriteMiddleware
// ReadBufferedRequestBodyAsync's own ContentLength is 0 short-circuit
// returns (null, false) for the bodyless case anyway, so the audit row
// is unchanged.
//
// M5.3 (T7): check if the matched method/target has SkipBodyCapture set.
// The route value is resolved BEFORE the pipeline runs (route matching
// has already bound {methodName} at this point), so we can skip the
// EnableBuffering allocation and body read up front.
var methodNameForOverride = ctx.Request.RouteValues.TryGetValue("methodName", out var rv)
&& rv is string mn && !string.IsNullOrWhiteSpace(mn) ? mn : null;
var skipBody = methodNameForOverride != null
&& opts.PerTargetOverrides.TryGetValue(methodNameForOverride, out var perTarget)
&& perTarget.SkipBodyCapture;
var requestBody = (string?)null;
var requestTruncated = false;
if (RequestHasBody(ctx.Request))
if (!skipBody && RequestHasBody(ctx.Request))
{
ctx.Request.EnableBuffering();
(requestBody, requestTruncated) =
@@ -200,7 +223,14 @@ public sealed class AuditWriteMiddleware
// The forwarding wrapper has already written every byte to the
// original sink; this just pulls back the bounded UTF-8 string.
ctx.Response.Body = originalResponseBody;
var (responseBody, responseTruncated) = captureStream.GetCapturedBody();
var (capturedResponseBody, capturedResponseTruncated) = captureStream.GetCapturedBody();
// M5.3 (T7): if SkipBodyCapture is set, discard the captured response
// body (the request body was never captured above). The row + headers
// still emit with null RequestSummary / ResponseSummary.
// Truncation flags are also cleared so ceiling-hit counter is not
// bumped for methods that deliberately opt out of body capture.
var responseBody = skipBody ? null : capturedResponseBody;
var responseTruncated = skipBody ? false : capturedResponseTruncated;
EmitInboundAudit(
ctx,
@@ -208,7 +238,9 @@ public sealed class AuditWriteMiddleware
thrown,
requestBody,
responseBody,
requestTruncated || responseTruncated);
requestTruncated || responseTruncated,
requestTruncated,
responseTruncated);
}
}
@@ -223,7 +255,9 @@ public sealed class AuditWriteMiddleware
Exception? thrown,
string? requestBody,
string? responseBody,
bool payloadTruncated)
bool payloadTruncated,
bool requestTruncated = false,
bool responseTruncated = false)
{
try
{
@@ -243,10 +277,40 @@ public sealed class AuditWriteMiddleware
var actor = isAuthFailure ? null : ResolveActor(ctx);
var methodName = ResolveMethodName(ctx);
// M5.3 (T7): increment the ceiling-hits counter once per request
// that hit the cap on EITHER the request or response body.
if (requestTruncated || responseTruncated)
{
try { _ceilingHitsCounter.Increment(); } catch { /* swallow per §7 */ }
}
// M5.3 (T7): capture request headers into Extra JSON alongside the
// existing remoteIp / userAgent provenance fields. The header
// collection is run through the SAME header-redaction list
// (AuditLogOptions.HeaderRedactList) that the ScadaBridgeAuditRedactor
// applies to RequestSummary / ResponseSummary — auth/sensitive
// headers are redacted before they land in the row.
var currentOpts = _options.CurrentValue;
var redactSet = new HashSet<string>(
currentOpts.HeaderRedactList,
StringComparer.OrdinalIgnoreCase);
var headerDict = new Dictionary<string, string>(StringComparer.Ordinal);
foreach (var header in ctx.Request.Headers)
{
// Redact headers whose name appears in the HeaderRedactList —
// the same "<redacted>" marker used by ScadaBridgeAuditRedactor.
var value = redactSet.Contains(header.Key)
? "<redacted>"
: header.Value.ToString();
headerDict[header.Key] = value;
}
var extra = JsonSerializer.Serialize(new
{
remoteIp = ctx.Connection.RemoteIpAddress?.ToString(),
userAgent = ctx.Request.Headers.UserAgent.ToString(),
requestHeaders = headerDict,
});
var evt = ScadaBridgeAuditEventFactory.Create(