feat(auditlog): body regex redaction with over-redaction safety net (#23 M5)

This commit is contained in:
Joseph Doherty
2026-05-20 17:09:36 -04:00
parent ad7b330f43
commit 37f17dc4a8
2 changed files with 384 additions and 2 deletions

View File

@@ -1,7 +1,9 @@
using System.Collections.Concurrent;
using System.Text;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.Json.Nodes;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ScadaLink.AuditLog.Configuration;
@@ -39,8 +41,10 @@ namespace ScadaLink.AuditLog.Payload;
/// </para>
/// <para>
/// Stage order (each runs on every applicable field):
/// header redaction → truncation. Bundle B will append body-regex and
/// SQL-parameter stages after header redaction and before truncation.
/// header redaction → body regex redaction → truncation. The SQL-parameter
/// stage piggybacks on the body-redactor path; both run BEFORE truncation so
/// the cap trims the redacted result, never bytes the redactor intended to
/// hide.
/// </para>
/// </remarks>
public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter
@@ -48,6 +52,16 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter
private const string RedactedMarker = "<redacted>";
private const string RedactorErrorMarker = "<redacted: redactor error>";
/// <summary>
/// Per-match regex timeout. Catastrophic-backtracking patterns trip a
/// <see cref="RegexMatchTimeoutException"/> when a single match takes
/// longer than this; the offending field is then over-redacted with
/// <see cref="RedactorErrorMarker"/> and the failure counter is bumped.
/// 50 ms is generous for normal patterns yet short enough that the
/// audit hot-path isn't held up by a misconfigured regex.
/// </summary>
private static readonly TimeSpan RegexMatchTimeout = TimeSpan.FromMilliseconds(50);
/// <summary>
/// JSON serializer options used to re-emit redacted summaries. The
/// UnsafeRelaxedJsonEscaping encoder is required so the redaction marker
@@ -67,6 +81,17 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter
private readonly ILogger<DefaultAuditPayloadFilter> _logger;
private readonly IAuditRedactionFailureCounter _failureCounter;
/// <summary>
/// Compiled-regex cache keyed by pattern string. Lazy population: each
/// pattern is compiled on first use and cached forever (the entry's
/// <see cref="CompiledRegex"/> carries either the working <see cref="Regex"/>
/// or a sentinel marking the pattern as invalid so we don't retry the
/// failing compile on every call). ConcurrentDictionary is the right
/// thread-safety primitive here because the filter is a DI singleton
/// shared across the audit hot-path.
/// </summary>
private readonly ConcurrentDictionary<string, CompiledRegex> _regexCache = new();
/// <summary>
/// Primary constructor used by DI — pulls the optional redaction-failure
/// counter from the container; a NoOp default is registered in
@@ -95,6 +120,20 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter
var errorDetail = rawEvent.ErrorDetail;
var extra = rawEvent.Extra;
// --- Body-regex stage (also runs BEFORE truncation) -----------
// Resolves the active regex set per event so per-target overrides
// bound to AuditEvent.Target are picked up; effectively a no-op
// when neither GlobalBodyRedactors nor the per-target additions
// are configured.
var bodyRegexes = ResolveBodyRegexes(opts, rawEvent.Target);
if (bodyRegexes.Count > 0)
{
request = RedactBody(request, bodyRegexes);
response = RedactBody(response, bodyRegexes);
errorDetail = RedactBody(errorDetail, bodyRegexes);
extra = RedactBody(extra, bodyRegexes);
}
// --- Truncation stage -----------------------------------------
var truncated = false;
request = TruncateField(request, cap, ref truncated);
@@ -207,6 +246,125 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter
}
}
/// <summary>
/// Combine the global and per-target body-redactor lists for a single
/// event, returning the compiled-regex set to apply. Patterns that failed
/// compilation are silently skipped — the compile-time failure was logged
/// once on first encounter; we never let one bad pattern starve the rest.
/// </summary>
private IReadOnlyList<Regex> ResolveBodyRegexes(AuditLogOptions opts, string? target)
{
var hasGlobal = opts.GlobalBodyRedactors is { Count: > 0 };
var perTargetAdditions = (target != null
&& opts.PerTargetOverrides.TryGetValue(target, out var over)
&& over.AdditionalBodyRedactors is { Count: > 0 })
? over.AdditionalBodyRedactors
: null;
if (!hasGlobal && perTargetAdditions == null)
{
return Array.Empty<Regex>();
}
var result = new List<Regex>();
if (hasGlobal)
{
foreach (var pattern in opts.GlobalBodyRedactors)
{
if (TryGetCompiledRegex(pattern, out var rx))
{
result.Add(rx!);
}
}
}
if (perTargetAdditions != null)
{
foreach (var pattern in perTargetAdditions)
{
if (TryGetCompiledRegex(pattern, out var rx))
{
result.Add(rx!);
}
}
}
return result;
}
/// <summary>
/// Resolve a compiled regex from the cache, compiling it on first use.
/// Returns <c>false</c> for patterns that are invalid OR whose compile
/// took longer than 100 ms (the spec calls catastrophic-backtracking
/// guesses at compile time "invalid"); the failure is logged once and
/// the sentinel cache entry prevents repeat compile attempts.
/// </summary>
private bool TryGetCompiledRegex(string pattern, out Regex? regex)
{
var entry = _regexCache.GetOrAdd(pattern, CompileRegex);
regex = entry.Regex;
return entry.Regex != null;
}
private CompiledRegex CompileRegex(string pattern)
{
try
{
var swStart = System.Diagnostics.Stopwatch.GetTimestamp();
var rx = new Regex(pattern, RegexOptions.Compiled, RegexMatchTimeout);
var elapsedMs = (System.Diagnostics.Stopwatch.GetTimestamp() - swStart)
* 1000d / System.Diagnostics.Stopwatch.Frequency;
if (elapsedMs > 100)
{
_logger.LogWarning(
"Body redactor pattern compiled in {Elapsed}ms (> 100ms cap); rejecting '{Pattern}'",
elapsedMs, pattern);
return CompiledRegex.Invalid;
}
return new CompiledRegex(rx);
}
catch (Exception ex)
{
_logger.LogWarning(
ex,
"Body redactor pattern '{Pattern}' failed to compile; skipping",
pattern);
return CompiledRegex.Invalid;
}
}
/// <summary>
/// Apply each compiled body-redactor regex to <paramref name="value"/> in
/// turn, replacing every match with <see cref="RedactedMarker"/>. If any
/// single regex match throws (most commonly
/// <see cref="RegexMatchTimeoutException"/>) the field is over-redacted
/// with <see cref="RedactorErrorMarker"/> and the failure counter is
/// incremented — the user-facing action is never aborted.
/// </summary>
private string? RedactBody(string? value, IReadOnlyList<Regex> regexes)
{
if (value is null)
{
return null;
}
var current = value;
foreach (var rx in regexes)
{
try
{
current = rx.Replace(current, RedactedMarker);
}
catch (Exception ex)
{
_logger.LogWarning(
ex,
"Body redactor '{Pattern}' faulted; over-redacting field with '{Marker}'",
rx.ToString(), RedactorErrorMarker);
try { _failureCounter.Increment(); } catch { /* swallow per §7 */ }
return RedactorErrorMarker;
}
}
return current;
}
private static string? TruncateField(string? value, int cap, ref bool truncated)
{
if (value is null)
@@ -251,4 +409,21 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter
AuditStatus.Delivered or AuditStatus.Submitted or AuditStatus.Forwarded => false,
_ => true,
};
/// <summary>
/// Cache entry for a body-redactor pattern. Carries the working
/// <see cref="Regex"/> on the success path, or the
/// <see cref="Invalid"/> sentinel for patterns that failed to compile
/// (or exceeded the 100 ms compile budget). The sentinel lets us skip
/// repeat compile attempts on every event without re-throwing on the
/// hot-path.
/// </summary>
private readonly struct CompiledRegex
{
public static readonly CompiledRegex Invalid = new(null);
public Regex? Regex { get; }
public CompiledRegex(Regex? regex) => Regex = regex;
}
}

View File

@@ -0,0 +1,207 @@
using System.Text;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using ScadaLink.AuditLog.Configuration;
using ScadaLink.AuditLog.Payload;
using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.AuditLog.Tests.Payload;
/// <summary>
/// Bundle B (M5-T4) tests for body regex redaction in
/// <see cref="DefaultAuditPayloadFilter"/>. The body-redactor stage runs
/// regex replace against RequestSummary / ResponseSummary / ErrorDetail /
/// Extra, replacing every match with <c>&lt;redacted&gt;</c>. Regexes come
/// from <see cref="AuditLogOptions.GlobalBodyRedactors"/> plus the per-target
/// <see cref="PerTargetRedactionOverride.AdditionalBodyRedactors"/>. Each
/// regex is compiled with a 50 ms timeout so catastrophic-backtracking
/// patterns trip a <see cref="System.Text.RegularExpressions.RegexMatchTimeoutException"/>;
/// when that happens the offending field is over-redacted with
/// <c>&lt;redacted: redactor error&gt;</c> and the
/// <see cref="IAuditRedactionFailureCounter"/> is incremented. The stage runs
/// BEFORE truncation.
/// </summary>
public class BodyRegexRedactionTests
{
private static IOptionsMonitor<AuditLogOptions> Monitor(AuditLogOptions? opts = null) =>
new StaticMonitor(opts ?? new AuditLogOptions());
private static DefaultAuditPayloadFilter Filter(
AuditLogOptions? opts = null,
IAuditRedactionFailureCounter? counter = null) =>
new(Monitor(opts), NullLogger<DefaultAuditPayloadFilter>.Instance, counter);
private static AuditEvent NewEvent(
AuditStatus status = AuditStatus.Delivered,
string? request = null,
string? response = null,
string? errorDetail = null,
string? extra = null,
string? target = null) => new()
{
EventId = Guid.NewGuid(),
OccurredAtUtc = DateTime.UtcNow,
Channel = AuditChannel.ApiOutbound,
Kind = AuditKind.ApiCall,
Status = status,
Target = target,
RequestSummary = request,
ResponseSummary = response,
ErrorDetail = errorDetail,
Extra = extra,
};
[Fact]
public void GlobalRegex_HunterPassword_Redacted()
{
var opts = new AuditLogOptions
{
GlobalBodyRedactors = new List<string> { "\"password\":\\s*\"[^\"]*\"" },
};
const string input = "{\"user\":\"alice\",\"password\":\"hunter2\"}";
var evt = NewEvent(request: input);
var result = Filter(opts).Apply(evt);
Assert.NotNull(result.RequestSummary);
Assert.Contains("<redacted>", result.RequestSummary);
Assert.DoesNotContain("hunter2", result.RequestSummary);
Assert.Contains("alice", result.RequestSummary);
}
[Fact]
public void PerTargetRegex_OnlyAppliedToMatchingTarget()
{
var opts = new AuditLogOptions
{
PerTargetOverrides = new Dictionary<string, PerTargetRedactionOverride>
{
["esg.A"] = new PerTargetRedactionOverride
{
AdditionalBodyRedactors = new List<string> { "SECRET-[A-Z0-9]+" },
},
},
};
const string input = "token=SECRET-XYZ123 normal-text";
var matchedEvt = NewEvent(request: input, target: "esg.A");
var matchedResult = Filter(opts).Apply(matchedEvt);
Assert.Contains("<redacted>", matchedResult.RequestSummary!);
Assert.DoesNotContain("SECRET-XYZ123", matchedResult.RequestSummary!);
var unmatchedEvt = NewEvent(request: input, target: "esg.B");
var unmatchedResult = Filter(opts).Apply(unmatchedEvt);
Assert.Equal(input, unmatchedResult.RequestSummary);
}
[Fact]
public void RegexThrowsTimeout_FieldBecomesRedactedMarker_CounterIncrements()
{
// Catastrophic backtracking pattern: alternation with overlapping
// groups + non-matching suffix forces the engine into exponential
// work that blows past the 50 ms timeout. Append a non-'a' character
// so the suffix anchor fails and the engine has to exhaust every
// permutation.
var opts = new AuditLogOptions
{
GlobalBodyRedactors = new List<string> { "^(a+)+$" },
};
// 30 'a's followed by '!' — small enough to keep the test fast, big
// enough to overflow the 50 ms regex timeout on every machine the CI
// grid runs on.
var input = new string('a', 30) + "!";
var counter = new CountingRedactionFailureCounter();
var evt = NewEvent(request: input);
var result = Filter(opts, counter).Apply(evt);
Assert.Equal("<redacted: redactor error>", result.RequestSummary);
Assert.True(counter.Count >= 1, $"expected counter >= 1, got {counter.Count}");
}
[Fact]
public void NoRegexConfigured_FieldUnchanged()
{
var opts = new AuditLogOptions(); // no GlobalBodyRedactors, no per-target
const string input = "{\"password\":\"hunter2\"}";
var evt = NewEvent(request: input);
var result = Filter(opts).Apply(evt);
Assert.Equal(input, result.RequestSummary);
}
[Fact]
public void RedactionAppliedBeforeTruncation()
{
// A pattern that matches a long secret in the body. The full input is
// > 8 KB so truncation must run. After redaction:
// * the marker survives the cap (redaction ran first),
// * the original secret bytes do NOT survive,
// * PayloadTruncated is set.
var opts = new AuditLogOptions
{
GlobalBodyRedactors = new List<string> { "SECRET-[A-Z0-9]+" },
};
var secret = "SECRET-ABCDEF123";
var padding = new string('x', 9 * 1024);
var input = secret + padding;
Assert.True(Encoding.UTF8.GetByteCount(input) > 8192);
var evt = NewEvent(AuditStatus.Delivered, request: input);
var result = Filter(opts).Apply(evt);
Assert.NotNull(result.RequestSummary);
Assert.True(Encoding.UTF8.GetByteCount(result.RequestSummary!) <= 8192);
Assert.Contains("<redacted>", result.RequestSummary);
Assert.DoesNotContain(secret, result.RequestSummary);
Assert.True(result.PayloadTruncated);
}
[Fact]
public void CatastrophicBacktrackingRegex_AtCompileTime_RejectedAtStartup()
{
// .NET's regex engine has no compile-time detection for catastrophic
// backtracking (only structural validation), so the filter's
// protection is RUNTIME — the 50 ms per-match timeout. We assert the
// safety net behaviour: a known evil pattern compiles cleanly but
// matches time out at runtime, the field is over-redacted, and the
// failure counter is incremented. Future engines that DO support
// compile-time analysis can tighten this further; the contract here
// is that the user-facing action is never aborted.
var evilPattern = "^(a+)+$";
var opts = new AuditLogOptions
{
GlobalBodyRedactors = new List<string> { evilPattern },
};
var input = new string('a', 30) + "!";
var counter = new CountingRedactionFailureCounter();
var evt = NewEvent(request: input);
var result = Filter(opts, counter).Apply(evt);
Assert.Equal("<redacted: redactor error>", result.RequestSummary);
Assert.True(counter.Count >= 1);
}
/// <summary>Test double that counts increments.</summary>
private sealed class CountingRedactionFailureCounter : IAuditRedactionFailureCounter
{
private int _count;
public int Count => _count;
public void Increment() => System.Threading.Interlocked.Increment(ref _count);
}
/// <summary>IOptionsMonitor test double — returns the same snapshot on every read.</summary>
private sealed class StaticMonitor : IOptionsMonitor<AuditLogOptions>
{
private readonly AuditLogOptions _value;
public StaticMonitor(AuditLogOptions value) => _value = value;
public AuditLogOptions CurrentValue => _value;
public AuditLogOptions Get(string? name) => _value;
public IDisposable? OnChange(Action<AuditLogOptions, string?> listener) => null;
}
}