From 37f17dc4a8bea47a6df8f3cfc88b5f6bf3b5e9dd Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Wed, 20 May 2026 17:09:36 -0400 Subject: [PATCH] feat(auditlog): body regex redaction with over-redaction safety net (#23 M5) --- .../Payload/DefaultAuditPayloadFilter.cs | 179 ++++++++++++++- .../Payload/BodyRegexRedactionTests.cs | 207 ++++++++++++++++++ 2 files changed, 384 insertions(+), 2 deletions(-) create mode 100644 tests/ScadaLink.AuditLog.Tests/Payload/BodyRegexRedactionTests.cs diff --git a/src/ScadaLink.AuditLog/Payload/DefaultAuditPayloadFilter.cs b/src/ScadaLink.AuditLog/Payload/DefaultAuditPayloadFilter.cs index a191542..faa9742 100644 --- a/src/ScadaLink.AuditLog/Payload/DefaultAuditPayloadFilter.cs +++ b/src/ScadaLink.AuditLog/Payload/DefaultAuditPayloadFilter.cs @@ -1,7 +1,9 @@ +using System.Collections.Concurrent; using System.Text; using System.Text.Encodings.Web; using System.Text.Json; using System.Text.Json.Nodes; +using System.Text.RegularExpressions; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using ScadaLink.AuditLog.Configuration; @@ -39,8 +41,10 @@ namespace ScadaLink.AuditLog.Payload; /// /// /// Stage order (each runs on every applicable field): -/// header redaction → truncation. Bundle B will append body-regex and -/// SQL-parameter stages after header redaction and before truncation. +/// header redaction → body regex redaction → truncation. The SQL-parameter +/// stage piggybacks on the body-redactor path; both run BEFORE truncation so +/// the cap trims the redacted result, never bytes the redactor intended to +/// hide. /// /// public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter @@ -48,6 +52,16 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter private const string RedactedMarker = ""; private const string RedactorErrorMarker = ""; + /// + /// Per-match regex timeout. Catastrophic-backtracking patterns trip a + /// when a single match takes + /// longer than this; the offending field is then over-redacted with + /// and the failure counter is bumped. + /// 50 ms is generous for normal patterns yet short enough that the + /// audit hot-path isn't held up by a misconfigured regex. + /// + private static readonly TimeSpan RegexMatchTimeout = TimeSpan.FromMilliseconds(50); + /// /// JSON serializer options used to re-emit redacted summaries. The /// UnsafeRelaxedJsonEscaping encoder is required so the redaction marker @@ -67,6 +81,17 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter private readonly ILogger _logger; private readonly IAuditRedactionFailureCounter _failureCounter; + /// + /// Compiled-regex cache keyed by pattern string. Lazy population: each + /// pattern is compiled on first use and cached forever (the entry's + /// carries either the working + /// or a sentinel marking the pattern as invalid so we don't retry the + /// failing compile on every call). ConcurrentDictionary is the right + /// thread-safety primitive here because the filter is a DI singleton + /// shared across the audit hot-path. + /// + private readonly ConcurrentDictionary _regexCache = new(); + /// /// Primary constructor used by DI — pulls the optional redaction-failure /// counter from the container; a NoOp default is registered in @@ -95,6 +120,20 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter var errorDetail = rawEvent.ErrorDetail; var extra = rawEvent.Extra; + // --- Body-regex stage (also runs BEFORE truncation) ----------- + // Resolves the active regex set per event so per-target overrides + // bound to AuditEvent.Target are picked up; effectively a no-op + // when neither GlobalBodyRedactors nor the per-target additions + // are configured. + var bodyRegexes = ResolveBodyRegexes(opts, rawEvent.Target); + if (bodyRegexes.Count > 0) + { + request = RedactBody(request, bodyRegexes); + response = RedactBody(response, bodyRegexes); + errorDetail = RedactBody(errorDetail, bodyRegexes); + extra = RedactBody(extra, bodyRegexes); + } + // --- Truncation stage ----------------------------------------- var truncated = false; request = TruncateField(request, cap, ref truncated); @@ -207,6 +246,125 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter } } + /// + /// Combine the global and per-target body-redactor lists for a single + /// event, returning the compiled-regex set to apply. Patterns that failed + /// compilation are silently skipped — the compile-time failure was logged + /// once on first encounter; we never let one bad pattern starve the rest. + /// + private IReadOnlyList ResolveBodyRegexes(AuditLogOptions opts, string? target) + { + var hasGlobal = opts.GlobalBodyRedactors is { Count: > 0 }; + var perTargetAdditions = (target != null + && opts.PerTargetOverrides.TryGetValue(target, out var over) + && over.AdditionalBodyRedactors is { Count: > 0 }) + ? over.AdditionalBodyRedactors + : null; + + if (!hasGlobal && perTargetAdditions == null) + { + return Array.Empty(); + } + + var result = new List(); + if (hasGlobal) + { + foreach (var pattern in opts.GlobalBodyRedactors) + { + if (TryGetCompiledRegex(pattern, out var rx)) + { + result.Add(rx!); + } + } + } + if (perTargetAdditions != null) + { + foreach (var pattern in perTargetAdditions) + { + if (TryGetCompiledRegex(pattern, out var rx)) + { + result.Add(rx!); + } + } + } + return result; + } + + /// + /// Resolve a compiled regex from the cache, compiling it on first use. + /// Returns false for patterns that are invalid OR whose compile + /// took longer than 100 ms (the spec calls catastrophic-backtracking + /// guesses at compile time "invalid"); the failure is logged once and + /// the sentinel cache entry prevents repeat compile attempts. + /// + private bool TryGetCompiledRegex(string pattern, out Regex? regex) + { + var entry = _regexCache.GetOrAdd(pattern, CompileRegex); + regex = entry.Regex; + return entry.Regex != null; + } + + private CompiledRegex CompileRegex(string pattern) + { + try + { + var swStart = System.Diagnostics.Stopwatch.GetTimestamp(); + var rx = new Regex(pattern, RegexOptions.Compiled, RegexMatchTimeout); + var elapsedMs = (System.Diagnostics.Stopwatch.GetTimestamp() - swStart) + * 1000d / System.Diagnostics.Stopwatch.Frequency; + if (elapsedMs > 100) + { + _logger.LogWarning( + "Body redactor pattern compiled in {Elapsed}ms (> 100ms cap); rejecting '{Pattern}'", + elapsedMs, pattern); + return CompiledRegex.Invalid; + } + return new CompiledRegex(rx); + } + catch (Exception ex) + { + _logger.LogWarning( + ex, + "Body redactor pattern '{Pattern}' failed to compile; skipping", + pattern); + return CompiledRegex.Invalid; + } + } + + /// + /// Apply each compiled body-redactor regex to in + /// turn, replacing every match with . If any + /// single regex match throws (most commonly + /// ) the field is over-redacted + /// with and the failure counter is + /// incremented — the user-facing action is never aborted. + /// + private string? RedactBody(string? value, IReadOnlyList regexes) + { + if (value is null) + { + return null; + } + var current = value; + foreach (var rx in regexes) + { + try + { + current = rx.Replace(current, RedactedMarker); + } + catch (Exception ex) + { + _logger.LogWarning( + ex, + "Body redactor '{Pattern}' faulted; over-redacting field with '{Marker}'", + rx.ToString(), RedactorErrorMarker); + try { _failureCounter.Increment(); } catch { /* swallow per §7 */ } + return RedactorErrorMarker; + } + } + return current; + } + private static string? TruncateField(string? value, int cap, ref bool truncated) { if (value is null) @@ -251,4 +409,21 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter AuditStatus.Delivered or AuditStatus.Submitted or AuditStatus.Forwarded => false, _ => true, }; + + /// + /// Cache entry for a body-redactor pattern. Carries the working + /// on the success path, or the + /// sentinel for patterns that failed to compile + /// (or exceeded the 100 ms compile budget). The sentinel lets us skip + /// repeat compile attempts on every event without re-throwing on the + /// hot-path. + /// + private readonly struct CompiledRegex + { + public static readonly CompiledRegex Invalid = new(null); + + public Regex? Regex { get; } + + public CompiledRegex(Regex? regex) => Regex = regex; + } } diff --git a/tests/ScadaLink.AuditLog.Tests/Payload/BodyRegexRedactionTests.cs b/tests/ScadaLink.AuditLog.Tests/Payload/BodyRegexRedactionTests.cs new file mode 100644 index 0000000..85393d1 --- /dev/null +++ b/tests/ScadaLink.AuditLog.Tests/Payload/BodyRegexRedactionTests.cs @@ -0,0 +1,207 @@ +using System.Text; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; +using ScadaLink.AuditLog.Configuration; +using ScadaLink.AuditLog.Payload; +using ScadaLink.Commons.Entities.Audit; +using ScadaLink.Commons.Types.Enums; + +namespace ScadaLink.AuditLog.Tests.Payload; + +/// +/// Bundle B (M5-T4) tests for body regex redaction in +/// . The body-redactor stage runs +/// regex replace against RequestSummary / ResponseSummary / ErrorDetail / +/// Extra, replacing every match with <redacted>. Regexes come +/// from plus the per-target +/// . Each +/// regex is compiled with a 50 ms timeout so catastrophic-backtracking +/// patterns trip a ; +/// when that happens the offending field is over-redacted with +/// <redacted: redactor error> and the +/// is incremented. The stage runs +/// BEFORE truncation. +/// +public class BodyRegexRedactionTests +{ + private static IOptionsMonitor Monitor(AuditLogOptions? opts = null) => + new StaticMonitor(opts ?? new AuditLogOptions()); + + private static DefaultAuditPayloadFilter Filter( + AuditLogOptions? opts = null, + IAuditRedactionFailureCounter? counter = null) => + new(Monitor(opts), NullLogger.Instance, counter); + + private static AuditEvent NewEvent( + AuditStatus status = AuditStatus.Delivered, + string? request = null, + string? response = null, + string? errorDetail = null, + string? extra = null, + string? target = null) => new() + { + EventId = Guid.NewGuid(), + OccurredAtUtc = DateTime.UtcNow, + Channel = AuditChannel.ApiOutbound, + Kind = AuditKind.ApiCall, + Status = status, + Target = target, + RequestSummary = request, + ResponseSummary = response, + ErrorDetail = errorDetail, + Extra = extra, + }; + + [Fact] + public void GlobalRegex_HunterPassword_Redacted() + { + var opts = new AuditLogOptions + { + GlobalBodyRedactors = new List { "\"password\":\\s*\"[^\"]*\"" }, + }; + const string input = "{\"user\":\"alice\",\"password\":\"hunter2\"}"; + var evt = NewEvent(request: input); + + var result = Filter(opts).Apply(evt); + + Assert.NotNull(result.RequestSummary); + Assert.Contains("", result.RequestSummary); + Assert.DoesNotContain("hunter2", result.RequestSummary); + Assert.Contains("alice", result.RequestSummary); + } + + [Fact] + public void PerTargetRegex_OnlyAppliedToMatchingTarget() + { + var opts = new AuditLogOptions + { + PerTargetOverrides = new Dictionary + { + ["esg.A"] = new PerTargetRedactionOverride + { + AdditionalBodyRedactors = new List { "SECRET-[A-Z0-9]+" }, + }, + }, + }; + + const string input = "token=SECRET-XYZ123 normal-text"; + + var matchedEvt = NewEvent(request: input, target: "esg.A"); + var matchedResult = Filter(opts).Apply(matchedEvt); + Assert.Contains("", matchedResult.RequestSummary!); + Assert.DoesNotContain("SECRET-XYZ123", matchedResult.RequestSummary!); + + var unmatchedEvt = NewEvent(request: input, target: "esg.B"); + var unmatchedResult = Filter(opts).Apply(unmatchedEvt); + Assert.Equal(input, unmatchedResult.RequestSummary); + } + + [Fact] + public void RegexThrowsTimeout_FieldBecomesRedactedMarker_CounterIncrements() + { + // Catastrophic backtracking pattern: alternation with overlapping + // groups + non-matching suffix forces the engine into exponential + // work that blows past the 50 ms timeout. Append a non-'a' character + // so the suffix anchor fails and the engine has to exhaust every + // permutation. + var opts = new AuditLogOptions + { + GlobalBodyRedactors = new List { "^(a+)+$" }, + }; + // 30 'a's followed by '!' — small enough to keep the test fast, big + // enough to overflow the 50 ms regex timeout on every machine the CI + // grid runs on. + var input = new string('a', 30) + "!"; + var counter = new CountingRedactionFailureCounter(); + var evt = NewEvent(request: input); + + var result = Filter(opts, counter).Apply(evt); + + Assert.Equal("", result.RequestSummary); + Assert.True(counter.Count >= 1, $"expected counter >= 1, got {counter.Count}"); + } + + [Fact] + public void NoRegexConfigured_FieldUnchanged() + { + var opts = new AuditLogOptions(); // no GlobalBodyRedactors, no per-target + const string input = "{\"password\":\"hunter2\"}"; + var evt = NewEvent(request: input); + + var result = Filter(opts).Apply(evt); + + Assert.Equal(input, result.RequestSummary); + } + + [Fact] + public void RedactionAppliedBeforeTruncation() + { + // A pattern that matches a long secret in the body. The full input is + // > 8 KB so truncation must run. After redaction: + // * the marker survives the cap (redaction ran first), + // * the original secret bytes do NOT survive, + // * PayloadTruncated is set. + var opts = new AuditLogOptions + { + GlobalBodyRedactors = new List { "SECRET-[A-Z0-9]+" }, + }; + var secret = "SECRET-ABCDEF123"; + var padding = new string('x', 9 * 1024); + var input = secret + padding; + Assert.True(Encoding.UTF8.GetByteCount(input) > 8192); + + var evt = NewEvent(AuditStatus.Delivered, request: input); + + var result = Filter(opts).Apply(evt); + + Assert.NotNull(result.RequestSummary); + Assert.True(Encoding.UTF8.GetByteCount(result.RequestSummary!) <= 8192); + Assert.Contains("", result.RequestSummary); + Assert.DoesNotContain(secret, result.RequestSummary); + Assert.True(result.PayloadTruncated); + } + + [Fact] + public void CatastrophicBacktrackingRegex_AtCompileTime_RejectedAtStartup() + { + // .NET's regex engine has no compile-time detection for catastrophic + // backtracking (only structural validation), so the filter's + // protection is RUNTIME — the 50 ms per-match timeout. We assert the + // safety net behaviour: a known evil pattern compiles cleanly but + // matches time out at runtime, the field is over-redacted, and the + // failure counter is incremented. Future engines that DO support + // compile-time analysis can tighten this further; the contract here + // is that the user-facing action is never aborted. + var evilPattern = "^(a+)+$"; + var opts = new AuditLogOptions + { + GlobalBodyRedactors = new List { evilPattern }, + }; + var input = new string('a', 30) + "!"; + var counter = new CountingRedactionFailureCounter(); + var evt = NewEvent(request: input); + + var result = Filter(opts, counter).Apply(evt); + + Assert.Equal("", result.RequestSummary); + Assert.True(counter.Count >= 1); + } + + /// Test double that counts increments. + private sealed class CountingRedactionFailureCounter : IAuditRedactionFailureCounter + { + private int _count; + public int Count => _count; + public void Increment() => System.Threading.Interlocked.Increment(ref _count); + } + + /// IOptionsMonitor test double — returns the same snapshot on every read. + private sealed class StaticMonitor : IOptionsMonitor + { + private readonly AuditLogOptions _value; + public StaticMonitor(AuditLogOptions value) => _value = value; + public AuditLogOptions CurrentValue => _value; + public AuditLogOptions Get(string? name) => _value; + public IDisposable? OnChange(Action listener) => null; + } +}