feat(auditlog): body regex redaction with over-redaction safety net (#23 M5)
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Text;
|
||||
using System.Text.Encodings.Web;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Nodes;
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.AuditLog.Configuration;
|
||||
@@ -39,8 +41,10 @@ namespace ScadaLink.AuditLog.Payload;
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Stage order (each runs on every applicable field):
|
||||
/// header redaction → truncation. Bundle B will append body-regex and
|
||||
/// SQL-parameter stages after header redaction and before truncation.
|
||||
/// header redaction → body regex redaction → truncation. The SQL-parameter
|
||||
/// stage piggybacks on the body-redactor path; both run BEFORE truncation so
|
||||
/// the cap trims the redacted result, never bytes the redactor intended to
|
||||
/// hide.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter
|
||||
@@ -48,6 +52,16 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter
|
||||
private const string RedactedMarker = "<redacted>";
|
||||
private const string RedactorErrorMarker = "<redacted: redactor error>";
|
||||
|
||||
/// <summary>
|
||||
/// Per-match regex timeout. Catastrophic-backtracking patterns trip a
|
||||
/// <see cref="RegexMatchTimeoutException"/> when a single match takes
|
||||
/// longer than this; the offending field is then over-redacted with
|
||||
/// <see cref="RedactorErrorMarker"/> and the failure counter is bumped.
|
||||
/// 50 ms is generous for normal patterns yet short enough that the
|
||||
/// audit hot-path isn't held up by a misconfigured regex.
|
||||
/// </summary>
|
||||
private static readonly TimeSpan RegexMatchTimeout = TimeSpan.FromMilliseconds(50);
|
||||
|
||||
/// <summary>
|
||||
/// JSON serializer options used to re-emit redacted summaries. The
|
||||
/// UnsafeRelaxedJsonEscaping encoder is required so the redaction marker
|
||||
@@ -67,6 +81,17 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter
|
||||
private readonly ILogger<DefaultAuditPayloadFilter> _logger;
|
||||
private readonly IAuditRedactionFailureCounter _failureCounter;
|
||||
|
||||
/// <summary>
|
||||
/// Compiled-regex cache keyed by pattern string. Lazy population: each
|
||||
/// pattern is compiled on first use and cached forever (the entry's
|
||||
/// <see cref="CompiledRegex"/> carries either the working <see cref="Regex"/>
|
||||
/// or a sentinel marking the pattern as invalid so we don't retry the
|
||||
/// failing compile on every call). ConcurrentDictionary is the right
|
||||
/// thread-safety primitive here because the filter is a DI singleton
|
||||
/// shared across the audit hot-path.
|
||||
/// </summary>
|
||||
private readonly ConcurrentDictionary<string, CompiledRegex> _regexCache = new();
|
||||
|
||||
/// <summary>
|
||||
/// Primary constructor used by DI — pulls the optional redaction-failure
|
||||
/// counter from the container; a NoOp default is registered in
|
||||
@@ -95,6 +120,20 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter
|
||||
var errorDetail = rawEvent.ErrorDetail;
|
||||
var extra = rawEvent.Extra;
|
||||
|
||||
// --- Body-regex stage (also runs BEFORE truncation) -----------
|
||||
// Resolves the active regex set per event so per-target overrides
|
||||
// bound to AuditEvent.Target are picked up; effectively a no-op
|
||||
// when neither GlobalBodyRedactors nor the per-target additions
|
||||
// are configured.
|
||||
var bodyRegexes = ResolveBodyRegexes(opts, rawEvent.Target);
|
||||
if (bodyRegexes.Count > 0)
|
||||
{
|
||||
request = RedactBody(request, bodyRegexes);
|
||||
response = RedactBody(response, bodyRegexes);
|
||||
errorDetail = RedactBody(errorDetail, bodyRegexes);
|
||||
extra = RedactBody(extra, bodyRegexes);
|
||||
}
|
||||
|
||||
// --- Truncation stage -----------------------------------------
|
||||
var truncated = false;
|
||||
request = TruncateField(request, cap, ref truncated);
|
||||
@@ -207,6 +246,125 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Combine the global and per-target body-redactor lists for a single
|
||||
/// event, returning the compiled-regex set to apply. Patterns that failed
|
||||
/// compilation are silently skipped — the compile-time failure was logged
|
||||
/// once on first encounter; we never let one bad pattern starve the rest.
|
||||
/// </summary>
|
||||
private IReadOnlyList<Regex> ResolveBodyRegexes(AuditLogOptions opts, string? target)
|
||||
{
|
||||
var hasGlobal = opts.GlobalBodyRedactors is { Count: > 0 };
|
||||
var perTargetAdditions = (target != null
|
||||
&& opts.PerTargetOverrides.TryGetValue(target, out var over)
|
||||
&& over.AdditionalBodyRedactors is { Count: > 0 })
|
||||
? over.AdditionalBodyRedactors
|
||||
: null;
|
||||
|
||||
if (!hasGlobal && perTargetAdditions == null)
|
||||
{
|
||||
return Array.Empty<Regex>();
|
||||
}
|
||||
|
||||
var result = new List<Regex>();
|
||||
if (hasGlobal)
|
||||
{
|
||||
foreach (var pattern in opts.GlobalBodyRedactors)
|
||||
{
|
||||
if (TryGetCompiledRegex(pattern, out var rx))
|
||||
{
|
||||
result.Add(rx!);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (perTargetAdditions != null)
|
||||
{
|
||||
foreach (var pattern in perTargetAdditions)
|
||||
{
|
||||
if (TryGetCompiledRegex(pattern, out var rx))
|
||||
{
|
||||
result.Add(rx!);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolve a compiled regex from the cache, compiling it on first use.
|
||||
/// Returns <c>false</c> for patterns that are invalid OR whose compile
|
||||
/// took longer than 100 ms (the spec calls catastrophic-backtracking
|
||||
/// guesses at compile time "invalid"); the failure is logged once and
|
||||
/// the sentinel cache entry prevents repeat compile attempts.
|
||||
/// </summary>
|
||||
private bool TryGetCompiledRegex(string pattern, out Regex? regex)
|
||||
{
|
||||
var entry = _regexCache.GetOrAdd(pattern, CompileRegex);
|
||||
regex = entry.Regex;
|
||||
return entry.Regex != null;
|
||||
}
|
||||
|
||||
private CompiledRegex CompileRegex(string pattern)
|
||||
{
|
||||
try
|
||||
{
|
||||
var swStart = System.Diagnostics.Stopwatch.GetTimestamp();
|
||||
var rx = new Regex(pattern, RegexOptions.Compiled, RegexMatchTimeout);
|
||||
var elapsedMs = (System.Diagnostics.Stopwatch.GetTimestamp() - swStart)
|
||||
* 1000d / System.Diagnostics.Stopwatch.Frequency;
|
||||
if (elapsedMs > 100)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Body redactor pattern compiled in {Elapsed}ms (> 100ms cap); rejecting '{Pattern}'",
|
||||
elapsedMs, pattern);
|
||||
return CompiledRegex.Invalid;
|
||||
}
|
||||
return new CompiledRegex(rx);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"Body redactor pattern '{Pattern}' failed to compile; skipping",
|
||||
pattern);
|
||||
return CompiledRegex.Invalid;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Apply each compiled body-redactor regex to <paramref name="value"/> in
|
||||
/// turn, replacing every match with <see cref="RedactedMarker"/>. If any
|
||||
/// single regex match throws (most commonly
|
||||
/// <see cref="RegexMatchTimeoutException"/>) the field is over-redacted
|
||||
/// with <see cref="RedactorErrorMarker"/> and the failure counter is
|
||||
/// incremented — the user-facing action is never aborted.
|
||||
/// </summary>
|
||||
private string? RedactBody(string? value, IReadOnlyList<Regex> regexes)
|
||||
{
|
||||
if (value is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
var current = value;
|
||||
foreach (var rx in regexes)
|
||||
{
|
||||
try
|
||||
{
|
||||
current = rx.Replace(current, RedactedMarker);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"Body redactor '{Pattern}' faulted; over-redacting field with '{Marker}'",
|
||||
rx.ToString(), RedactorErrorMarker);
|
||||
try { _failureCounter.Increment(); } catch { /* swallow per §7 */ }
|
||||
return RedactorErrorMarker;
|
||||
}
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
private static string? TruncateField(string? value, int cap, ref bool truncated)
|
||||
{
|
||||
if (value is null)
|
||||
@@ -251,4 +409,21 @@ public sealed class DefaultAuditPayloadFilter : IAuditPayloadFilter
|
||||
AuditStatus.Delivered or AuditStatus.Submitted or AuditStatus.Forwarded => false,
|
||||
_ => true,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Cache entry for a body-redactor pattern. Carries the working
|
||||
/// <see cref="Regex"/> on the success path, or the
|
||||
/// <see cref="Invalid"/> sentinel for patterns that failed to compile
|
||||
/// (or exceeded the 100 ms compile budget). The sentinel lets us skip
|
||||
/// repeat compile attempts on every event without re-throwing on the
|
||||
/// hot-path.
|
||||
/// </summary>
|
||||
private readonly struct CompiledRegex
|
||||
{
|
||||
public static readonly CompiledRegex Invalid = new(null);
|
||||
|
||||
public Regex? Regex { get; }
|
||||
|
||||
public CompiledRegex(Regex? regex) => Regex = regex;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,207 @@
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.AuditLog.Configuration;
|
||||
using ScadaLink.AuditLog.Payload;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.AuditLog.Tests.Payload;
|
||||
|
||||
/// <summary>
|
||||
/// Bundle B (M5-T4) tests for body regex redaction in
|
||||
/// <see cref="DefaultAuditPayloadFilter"/>. The body-redactor stage runs
|
||||
/// regex replace against RequestSummary / ResponseSummary / ErrorDetail /
|
||||
/// Extra, replacing every match with <c><redacted></c>. Regexes come
|
||||
/// from <see cref="AuditLogOptions.GlobalBodyRedactors"/> plus the per-target
|
||||
/// <see cref="PerTargetRedactionOverride.AdditionalBodyRedactors"/>. Each
|
||||
/// regex is compiled with a 50 ms timeout so catastrophic-backtracking
|
||||
/// patterns trip a <see cref="System.Text.RegularExpressions.RegexMatchTimeoutException"/>;
|
||||
/// when that happens the offending field is over-redacted with
|
||||
/// <c><redacted: redactor error></c> and the
|
||||
/// <see cref="IAuditRedactionFailureCounter"/> is incremented. The stage runs
|
||||
/// BEFORE truncation.
|
||||
/// </summary>
|
||||
public class BodyRegexRedactionTests
|
||||
{
|
||||
private static IOptionsMonitor<AuditLogOptions> Monitor(AuditLogOptions? opts = null) =>
|
||||
new StaticMonitor(opts ?? new AuditLogOptions());
|
||||
|
||||
private static DefaultAuditPayloadFilter Filter(
|
||||
AuditLogOptions? opts = null,
|
||||
IAuditRedactionFailureCounter? counter = null) =>
|
||||
new(Monitor(opts), NullLogger<DefaultAuditPayloadFilter>.Instance, counter);
|
||||
|
||||
private static AuditEvent NewEvent(
|
||||
AuditStatus status = AuditStatus.Delivered,
|
||||
string? request = null,
|
||||
string? response = null,
|
||||
string? errorDetail = null,
|
||||
string? extra = null,
|
||||
string? target = null) => new()
|
||||
{
|
||||
EventId = Guid.NewGuid(),
|
||||
OccurredAtUtc = DateTime.UtcNow,
|
||||
Channel = AuditChannel.ApiOutbound,
|
||||
Kind = AuditKind.ApiCall,
|
||||
Status = status,
|
||||
Target = target,
|
||||
RequestSummary = request,
|
||||
ResponseSummary = response,
|
||||
ErrorDetail = errorDetail,
|
||||
Extra = extra,
|
||||
};
|
||||
|
||||
[Fact]
|
||||
public void GlobalRegex_HunterPassword_Redacted()
|
||||
{
|
||||
var opts = new AuditLogOptions
|
||||
{
|
||||
GlobalBodyRedactors = new List<string> { "\"password\":\\s*\"[^\"]*\"" },
|
||||
};
|
||||
const string input = "{\"user\":\"alice\",\"password\":\"hunter2\"}";
|
||||
var evt = NewEvent(request: input);
|
||||
|
||||
var result = Filter(opts).Apply(evt);
|
||||
|
||||
Assert.NotNull(result.RequestSummary);
|
||||
Assert.Contains("<redacted>", result.RequestSummary);
|
||||
Assert.DoesNotContain("hunter2", result.RequestSummary);
|
||||
Assert.Contains("alice", result.RequestSummary);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PerTargetRegex_OnlyAppliedToMatchingTarget()
|
||||
{
|
||||
var opts = new AuditLogOptions
|
||||
{
|
||||
PerTargetOverrides = new Dictionary<string, PerTargetRedactionOverride>
|
||||
{
|
||||
["esg.A"] = new PerTargetRedactionOverride
|
||||
{
|
||||
AdditionalBodyRedactors = new List<string> { "SECRET-[A-Z0-9]+" },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const string input = "token=SECRET-XYZ123 normal-text";
|
||||
|
||||
var matchedEvt = NewEvent(request: input, target: "esg.A");
|
||||
var matchedResult = Filter(opts).Apply(matchedEvt);
|
||||
Assert.Contains("<redacted>", matchedResult.RequestSummary!);
|
||||
Assert.DoesNotContain("SECRET-XYZ123", matchedResult.RequestSummary!);
|
||||
|
||||
var unmatchedEvt = NewEvent(request: input, target: "esg.B");
|
||||
var unmatchedResult = Filter(opts).Apply(unmatchedEvt);
|
||||
Assert.Equal(input, unmatchedResult.RequestSummary);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RegexThrowsTimeout_FieldBecomesRedactedMarker_CounterIncrements()
|
||||
{
|
||||
// Catastrophic backtracking pattern: alternation with overlapping
|
||||
// groups + non-matching suffix forces the engine into exponential
|
||||
// work that blows past the 50 ms timeout. Append a non-'a' character
|
||||
// so the suffix anchor fails and the engine has to exhaust every
|
||||
// permutation.
|
||||
var opts = new AuditLogOptions
|
||||
{
|
||||
GlobalBodyRedactors = new List<string> { "^(a+)+$" },
|
||||
};
|
||||
// 30 'a's followed by '!' — small enough to keep the test fast, big
|
||||
// enough to overflow the 50 ms regex timeout on every machine the CI
|
||||
// grid runs on.
|
||||
var input = new string('a', 30) + "!";
|
||||
var counter = new CountingRedactionFailureCounter();
|
||||
var evt = NewEvent(request: input);
|
||||
|
||||
var result = Filter(opts, counter).Apply(evt);
|
||||
|
||||
Assert.Equal("<redacted: redactor error>", result.RequestSummary);
|
||||
Assert.True(counter.Count >= 1, $"expected counter >= 1, got {counter.Count}");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NoRegexConfigured_FieldUnchanged()
|
||||
{
|
||||
var opts = new AuditLogOptions(); // no GlobalBodyRedactors, no per-target
|
||||
const string input = "{\"password\":\"hunter2\"}";
|
||||
var evt = NewEvent(request: input);
|
||||
|
||||
var result = Filter(opts).Apply(evt);
|
||||
|
||||
Assert.Equal(input, result.RequestSummary);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RedactionAppliedBeforeTruncation()
|
||||
{
|
||||
// A pattern that matches a long secret in the body. The full input is
|
||||
// > 8 KB so truncation must run. After redaction:
|
||||
// * the marker survives the cap (redaction ran first),
|
||||
// * the original secret bytes do NOT survive,
|
||||
// * PayloadTruncated is set.
|
||||
var opts = new AuditLogOptions
|
||||
{
|
||||
GlobalBodyRedactors = new List<string> { "SECRET-[A-Z0-9]+" },
|
||||
};
|
||||
var secret = "SECRET-ABCDEF123";
|
||||
var padding = new string('x', 9 * 1024);
|
||||
var input = secret + padding;
|
||||
Assert.True(Encoding.UTF8.GetByteCount(input) > 8192);
|
||||
|
||||
var evt = NewEvent(AuditStatus.Delivered, request: input);
|
||||
|
||||
var result = Filter(opts).Apply(evt);
|
||||
|
||||
Assert.NotNull(result.RequestSummary);
|
||||
Assert.True(Encoding.UTF8.GetByteCount(result.RequestSummary!) <= 8192);
|
||||
Assert.Contains("<redacted>", result.RequestSummary);
|
||||
Assert.DoesNotContain(secret, result.RequestSummary);
|
||||
Assert.True(result.PayloadTruncated);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CatastrophicBacktrackingRegex_AtCompileTime_RejectedAtStartup()
|
||||
{
|
||||
// .NET's regex engine has no compile-time detection for catastrophic
|
||||
// backtracking (only structural validation), so the filter's
|
||||
// protection is RUNTIME — the 50 ms per-match timeout. We assert the
|
||||
// safety net behaviour: a known evil pattern compiles cleanly but
|
||||
// matches time out at runtime, the field is over-redacted, and the
|
||||
// failure counter is incremented. Future engines that DO support
|
||||
// compile-time analysis can tighten this further; the contract here
|
||||
// is that the user-facing action is never aborted.
|
||||
var evilPattern = "^(a+)+$";
|
||||
var opts = new AuditLogOptions
|
||||
{
|
||||
GlobalBodyRedactors = new List<string> { evilPattern },
|
||||
};
|
||||
var input = new string('a', 30) + "!";
|
||||
var counter = new CountingRedactionFailureCounter();
|
||||
var evt = NewEvent(request: input);
|
||||
|
||||
var result = Filter(opts, counter).Apply(evt);
|
||||
|
||||
Assert.Equal("<redacted: redactor error>", result.RequestSummary);
|
||||
Assert.True(counter.Count >= 1);
|
||||
}
|
||||
|
||||
/// <summary>Test double that counts increments.</summary>
|
||||
private sealed class CountingRedactionFailureCounter : IAuditRedactionFailureCounter
|
||||
{
|
||||
private int _count;
|
||||
public int Count => _count;
|
||||
public void Increment() => System.Threading.Interlocked.Increment(ref _count);
|
||||
}
|
||||
|
||||
/// <summary>IOptionsMonitor test double — returns the same snapshot on every read.</summary>
|
||||
private sealed class StaticMonitor : IOptionsMonitor<AuditLogOptions>
|
||||
{
|
||||
private readonly AuditLogOptions _value;
|
||||
public StaticMonitor(AuditLogOptions value) => _value = value;
|
||||
public AuditLogOptions CurrentValue => _value;
|
||||
public AuditLogOptions Get(string? name) => _value;
|
||||
public IDisposable? OnChange(Action<AuditLogOptions, string?> listener) => null;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user