Files
scadalink-design/src/ScadaLink.InboundAPI/Middleware/AuditWriteMiddleware.cs

593 lines
25 KiB
C#

using System.Buffers;
using System.Diagnostics;
using System.Text;
using System.Text.Json;
using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ScadaLink.AuditLog.Configuration;
using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.InboundAPI.Middleware;
/// <summary>
/// Audit Log #23 (M4 Bundle D, T7) — emits one <see cref="AuditChannel.ApiInbound"/>
/// row per inbound API request via <see cref="ICentralAuditWriter"/> covering the
/// full set of response shapes:
///
/// <list type="bullet">
/// <item><description>2xx / non-error → <see cref="AuditKind.InboundRequest"/> with <see cref="AuditStatus.Delivered"/>.</description></item>
/// <item><description>401/403 → <see cref="AuditKind.InboundAuthFailure"/> with <see cref="AuditStatus.Failed"/>.</description></item>
/// <item><description>4xx (non-auth) / 5xx / thrown exception → <see cref="AuditKind.InboundRequest"/> with <see cref="AuditStatus.Failed"/>.</description></item>
/// </list>
///
/// <para>
/// <b>Best-effort contract (alog.md §13).</b> Audit emission NEVER alters the
/// user-facing HTTP response — a thrown writer or any other failure during
/// emission is caught, logged at warning, and dropped. A handler exception is
/// recorded on the audit row then re-thrown so the framework error path stays
/// authoritative.
/// </para>
///
/// <para>
/// <b>Actor resolution.</b> Inbound API auth runs inside the endpoint handler
/// (no <c>UseAuthentication</c>-backed scheme populates <see cref="HttpContext.User"/>
/// for X-API-Key callers), so the handler stashes the resolved API key name on
/// <see cref="HttpContext.Items"/> under <see cref="AuditActorItemKey"/> after
/// <c>ApiKeyValidator.ValidateAsync</c> succeeds. The middleware reads it in
/// its <c>finally</c> block — on auth failures the key remains absent and
/// <see cref="AuditEvent.Actor"/> stays null (we never echo back an
/// unauthenticated principal).
/// </para>
///
/// <para>
/// <b>Body capture.</b> The request body is buffered via
/// <see cref="HttpRequestRewindExtensions.EnableBuffering(HttpRequest)"/> then
/// rewound so the downstream endpoint handler still sees the full payload. The
/// response body is captured by wrapping <see cref="HttpResponse.Body"/> in a
/// forwarding stream that mirrors writes to the original sink (transparent to
/// the real client) while capturing a bounded copy for audit.
/// </para>
///
/// <para>
/// <b>Bounded capture at the source.</b> Both the request- and response-body
/// audit copies are bounded at <see cref="AuditLogOptions.InboundMaxBytes"/>
/// (default 1 MiB) AT THE CAPTURE SITE — we never buffer more than
/// <c>cap + 1</c> bytes per body even when the client streams hundreds of MiB.
/// The downstream handler and the real client still see every byte; only the
/// audit copy is bounded. The cap is also enforced again by
/// <see cref="ScadaLink.AuditLog.Payload.DefaultAuditPayloadFilter"/> (which OR's
/// in its own <see cref="AuditEvent.PayloadTruncated"/> determination), so a
/// row truncated here remains truncated even if the filter is bypassed.
/// </para>
/// </summary>
public sealed class AuditWriteMiddleware
{
/// <summary>
/// <see cref="HttpContext.Items"/> key used by the endpoint handler to publish
/// the resolved API key name once <c>ApiKeyValidator.ValidateAsync</c> has
/// succeeded. Exposed as a constant so the handler and middleware share a
/// single source of truth (no stringly-typed coupling).
/// </summary>
public const string AuditActorItemKey = "ScadaLink.InboundAPI.AuditActor";
/// <summary>
/// Audit Log #23 (ParentExecutionId): <see cref="HttpContext.Items"/> key under
/// which this middleware stashes the inbound request's per-request
/// <c>ExecutionId</c> (a <see cref="Guid"/>) at the very start of the request.
/// The id is minted ONCE and shared: the endpoint handler reads it to thread it
/// onto a routed <c>RouteToCallRequest.ParentExecutionId</c>, and the
/// middleware's own inbound audit row uses the same id for its
/// <see cref="AuditEvent.ExecutionId"/>. Exposed as a constant so the handler
/// and middleware share a single source of truth (no stringly-typed coupling).
/// </summary>
public const string InboundExecutionIdItemKey = "ScadaLink.InboundAPI.InboundExecutionId";
private readonly RequestDelegate _next;
private readonly ICentralAuditWriter _auditWriter;
private readonly ILogger<AuditWriteMiddleware> _logger;
private readonly IOptionsMonitor<AuditLogOptions> _options;
public AuditWriteMiddleware(
RequestDelegate next,
ICentralAuditWriter auditWriter,
ILogger<AuditWriteMiddleware> logger,
IOptionsMonitor<AuditLogOptions> options)
{
_next = next ?? throw new ArgumentNullException(nameof(next));
_auditWriter = auditWriter ?? throw new ArgumentNullException(nameof(auditWriter));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options ?? throw new ArgumentNullException(nameof(options));
}
public async Task InvokeAsync(HttpContext ctx)
{
var sw = Stopwatch.StartNew();
// Per-request hot read of the inbound cap — mirrors the convention used
// by DefaultAuditPayloadFilter so a live config change picks up on the
// next request without re-resolving the singleton.
var cap = _options.CurrentValue.InboundMaxBytes;
// Audit Log #23 (ParentExecutionId): mint the inbound request's per-request
// ExecutionId ONCE, here at the start of the request, and stash it on
// HttpContext.Items. Two consumers share this single id:
// (a) the endpoint handler reads it to thread onto a routed
// RouteToCallRequest.ParentExecutionId, so a spawned site script
// execution points back at this inbound request;
// (b) the inbound audit row this middleware emits uses it as its own
// ExecutionId (the row stays top-level — its ParentExecutionId is
// never set).
ctx.Items[InboundExecutionIdItemKey] = Guid.NewGuid();
// Buffer the request body up front so we can both audit it and let the
// downstream handler still parse it. EnableBuffering swaps the request
// stream for a seekable wrapper that the framework rewinds at the end
// of the pipeline for us — but we also rewind to position 0 after our
// own read so the very next reader starts from the top.
ctx.Request.EnableBuffering();
var (requestBody, requestTruncated) =
await ReadBufferedRequestBodyAsync(ctx.Request, cap).ConfigureAwait(false);
// Response body — wrap Response.Body in a forwarding stream that mirrors
// every write to the original sink (transparent to the real client)
// while capturing AT MOST `cap + 1` bytes for the audit copy. The
// original Response.Body is restored in the finally block.
var originalResponseBody = ctx.Response.Body;
using var captureStream = new CapturedResponseStream(originalResponseBody, cap);
ctx.Response.Body = captureStream;
Exception? thrown = null;
try
{
await _next(ctx).ConfigureAwait(false);
}
catch (Exception ex)
{
thrown = ex;
// Re-throw — audit emission is BEST EFFORT, but the user-facing
// request's own error path must remain authoritative (alog.md §13).
throw;
}
finally
{
sw.Stop();
// Restore the original stream and resolve the captured audit copy.
// The forwarding wrapper has already written every byte to the
// original sink; this just pulls back the bounded UTF-8 string.
ctx.Response.Body = originalResponseBody;
var (responseBody, responseTruncated) = captureStream.GetCapturedBody();
EmitInboundAudit(
ctx,
sw.ElapsedMilliseconds,
thrown,
requestBody,
responseBody,
requestTruncated || responseTruncated);
}
}
/// <summary>
/// Builds and writes the <see cref="AuditChannel.ApiInbound"/> row for the
/// request. Wrapped in try/catch so a thrown writer or any other emission
/// failure stays out of the user-facing response (alog.md §13).
/// </summary>
private void EmitInboundAudit(
HttpContext ctx,
long durationMs,
Exception? thrown,
string? requestBody,
string? responseBody,
bool payloadTruncated)
{
try
{
var statusCode = ctx.Response.StatusCode;
var isAuthFailure = statusCode is 401 or 403;
var kind = isAuthFailure
? AuditKind.InboundAuthFailure
: AuditKind.InboundRequest;
// A thrown handler exception is always Failed; otherwise any 4xx/5xx
// response signals failure. 2xx/3xx are Delivered.
var status = (thrown != null || statusCode >= 400)
? AuditStatus.Failed
: AuditStatus.Delivered;
var actor = isAuthFailure ? null : ResolveActor(ctx);
var methodName = ResolveMethodName(ctx);
var extra = JsonSerializer.Serialize(new
{
remoteIp = ctx.Connection.RemoteIpAddress?.ToString(),
userAgent = ctx.Request.Headers.UserAgent.ToString(),
});
var evt = new AuditEvent
{
EventId = Guid.NewGuid(),
OccurredAtUtc = DateTime.UtcNow,
Channel = AuditChannel.ApiInbound,
Kind = kind,
// Audit Log #23: the per-request execution id minted ONCE at the
// start of the request (InvokeAsync) and stashed on
// HttpContext.Items. The same id is threaded onto a routed
// RouteToCallRequest.ParentExecutionId by the endpoint handler,
// so an inbound request and the site script it routes to share
// one correlation point. This inbound row stays top-level — its
// own ParentExecutionId is never set (see below).
ExecutionId = ResolveInboundExecutionId(ctx),
// CorrelationId is purely the per-operation-lifecycle id; an
// inbound request is a one-shot from the audit row's
// perspective with no multi-row operation to correlate.
CorrelationId = null,
Actor = actor,
Target = methodName,
Status = status,
HttpStatus = statusCode,
DurationMs = (int)Math.Min(durationMs, int.MaxValue),
ErrorMessage = thrown?.Message,
RequestSummary = requestBody,
ResponseSummary = responseBody,
PayloadTruncated = payloadTruncated,
Extra = extra,
// Central direct-write — no site-local forwarding state.
ForwardState = null,
};
// Fire-and-forget — the writer itself swallows; the additional
// try/catch around the fire still protects us if WriteAsync throws
// synchronously before returning a task.
_ = _auditWriter.WriteAsync(evt);
}
catch (Exception ex)
{
_logger.LogWarning(
ex,
"AuditWriteMiddleware emission failed for {Method} {Path} (status {Status})",
ctx.Request.Method, ctx.Request.Path, ctx.Response.StatusCode);
}
}
/// <summary>
/// Reads the buffered request body up to <paramref name="capBytes"/> bytes
/// into a string for the audit copy and rewinds the stream so the
/// downstream handler sees the unconsumed payload. Returns
/// <c>(null, false)</c> for empty/missing bodies so the audit row's
/// <see cref="AuditEvent.RequestSummary"/> stays null rather than
/// containing an empty string.
/// </summary>
/// <remarks>
/// Reads AT MOST <c>cap + 1</c> bytes from the request stream into a
/// scratch buffer; if the extra byte arrives the body is over the cap and
/// the returned string is UTF-8 byte-safe truncated to exactly
/// <c>cap</c> bytes with <c>truncated = true</c>. The cap applies only to
/// the audit copy — the request stream is always rewound to position 0
/// afterwards so the framework's next reader (the endpoint handler's
/// JSON parser) sees the full body.
/// </remarks>
private static async Task<(string? body, bool truncated)> ReadBufferedRequestBodyAsync(
HttpRequest request,
int capBytes)
{
if (request.ContentLength is 0)
{
return (null, false);
}
// Read AT MOST cap + 1 bytes — the extra byte tells us the body was
// over the cap without forcing us to allocate the whole payload. Rent
// the scratch buffer from the shared ArrayPool so we don't allocate
// (and immediately discard) `cap + 1` bytes per request — the pool
// may hand back a buffer LARGER than `limit`, so we treat `limit`
// (not `buffer.Length`) as the read ceiling.
var limit = capBytes + 1;
var buffer = ArrayPool<byte>.Shared.Rent(limit);
try
{
request.Body.Position = 0;
var total = 0;
while (total < limit)
{
var read = await request.Body
.ReadAsync(buffer.AsMemory(total, limit - total))
.ConfigureAwait(false);
if (read == 0)
{
break;
}
total += read;
}
if (total == 0)
{
return (null, false);
}
var truncated = total > capBytes;
var bytesForString = truncated ? capBytes : total;
var content = DecodeUtf8Bounded(buffer, bytesForString, cutAtValidBytes: truncated);
return (string.IsNullOrEmpty(content) ? null : content, truncated);
}
catch
{
// A failed body read must not abort the request — fall through
// with a null RequestSummary; the audit row still records the
// outcome.
return (null, false);
}
finally
{
// Even on a thrown read, the downstream handler must see the full
// body from position 0 — never let a failed audit copy leak a
// truncated view. A rewind failure is swallowed: best-effort,
// same philosophy as the rest of the file.
try { request.Body.Position = 0; } catch { /* swallow */ }
ArrayPool<byte>.Shared.Return(buffer);
}
}
/// <summary>
/// UTF-8 byte-safe decode of <paramref name="validBytes"/> bytes from
/// <paramref name="bytes"/>. When <paramref name="cutAtValidBytes"/> is
/// <c>true</c> the input is the result of a hard byte-count truncation, so
/// we walk back from <c>validBytes</c> while the byte is a continuation
/// byte (<c>byte &amp; 0xC0 == 0x80</c>) to avoid splitting a multi-byte
/// codepoint. When <c>false</c> the caller is decoding the full payload
/// and the boundary stands as-is.
/// </summary>
/// <remarks>
/// Mirrors the algorithm in <c>DefaultAuditPayloadFilter.TruncateUtf8</c>;
/// kept local to avoid a backwards project reference from
/// ScadaLink.AuditLog into ScadaLink.InboundAPI.
/// </remarks>
private static string DecodeUtf8Bounded(byte[] bytes, int validBytes, bool cutAtValidBytes)
{
if (validBytes <= 0)
{
return string.Empty;
}
var boundary = validBytes;
if (cutAtValidBytes)
{
while (boundary > 0 && (bytes[boundary] & 0xC0) == 0x80)
{
boundary--;
}
}
return Encoding.UTF8.GetString(bytes, 0, boundary);
}
/// <summary>
/// Audit Log #23 (ParentExecutionId): reads the inbound request's per-request
/// <c>ExecutionId</c> that <see cref="InvokeAsync"/> minted and stashed on
/// <see cref="HttpContext.Items"/> under <see cref="InboundExecutionIdItemKey"/>.
/// Throws <see cref="InvalidOperationException"/> if the slot is absent — for a
/// correlation feature a silently-divergent id is the worst failure mode, so we
/// fail fast rather than mint a fresh one. <see cref="EmitInboundAudit"/>'s
/// try/catch degrades the throw to a dropped best-effort audit row, never a
/// failed request.
/// </summary>
private static Guid ResolveInboundExecutionId(HttpContext ctx)
{
if (ctx.Items.TryGetValue(InboundExecutionIdItemKey, out var stashed)
&& stashed is Guid id)
{
return id;
}
throw new InvalidOperationException(
"Inbound ExecutionId invariant violated: the inbound ExecutionId must be "
+ "stashed by AuditWriteMiddleware.InvokeAsync before the audit row is emitted.");
}
/// <summary>
/// Reads the API key name the endpoint handler stashed on
/// <see cref="HttpContext.Items"/> after successful auth. Falls back to
/// the authenticated user name when an ASP.NET scheme has populated
/// <see cref="HttpContext.User"/> (defensive — currently unused for inbound
/// API but cheap and forward-compatible).
/// </summary>
private static string? ResolveActor(HttpContext ctx)
{
if (ctx.Items.TryGetValue(AuditActorItemKey, out var stashed)
&& stashed is string name
&& !string.IsNullOrWhiteSpace(name))
{
return name;
}
var user = ctx.User;
if (user?.Identity is { IsAuthenticated: true, Name: { Length: > 0 } userName })
{
return userName;
}
return null;
}
/// <summary>
/// Pulls the <c>{methodName}</c> route value off the request. Falls back to
/// the last segment of <see cref="HttpRequest.Path"/> when no route value
/// is bound (e.g. when the request never reached the matched endpoint).
/// </summary>
private static string? ResolveMethodName(HttpContext ctx)
{
if (ctx.Request.RouteValues.TryGetValue("methodName", out var raw)
&& raw is string method
&& !string.IsNullOrWhiteSpace(method))
{
return method;
}
var path = ctx.Request.Path.Value;
if (string.IsNullOrEmpty(path))
{
return null;
}
var lastSlash = path.LastIndexOf('/');
if (lastSlash < 0 || lastSlash == path.Length - 1)
{
return null;
}
return path[(lastSlash + 1)..];
}
/// <summary>
/// Write-only forwarding <see cref="Stream"/> wrapper that mirrors every
/// write to the inner ASP.NET <see cref="HttpResponse.Body"/> (so the real
/// client receives all bytes) while capturing AT MOST <c>cap + 1</c> bytes
/// into a private bounded <see cref="MemoryStream"/> for the audit copy.
/// </summary>
/// <remarks>
/// <para>
/// The inner sink is owned by the framework and is NOT disposed when this
/// wrapper is disposed — we only own the capture <see cref="MemoryStream"/>.
/// </para>
/// <para>
/// All Write overloads forward to the inner stream FIRST, then capture the
/// remaining quota. If the inner sink throws (e.g. the client disconnects),
/// the exception is allowed to propagate — capture is best-effort, the
/// real I/O is authoritative. The handler-throws-mid-response test
/// (<c>ResponseBody_OnHandlerThrow_BodyCapturedUpToTheThrow</c>) verifies
/// that captured bytes up to the throw are still recoverable.
/// </para>
/// </remarks>
private sealed class CapturedResponseStream : Stream
{
private readonly Stream _inner;
private readonly int _capBytes;
private readonly MemoryStream _captured;
private bool _disposed;
public CapturedResponseStream(Stream inner, int capBytes)
{
_inner = inner ?? throw new ArgumentNullException(nameof(inner));
_capBytes = Math.Max(0, capBytes);
// Capture up to cap + 1 bytes so we can detect the over-cap case
// without growing the buffer further.
_captured = new MemoryStream();
}
public override bool CanRead => false;
public override bool CanSeek => false;
public override bool CanWrite => true;
public override long Length =>
throw new NotSupportedException("CapturedResponseStream is write-only.");
public override long Position
{
get => throw new NotSupportedException("CapturedResponseStream is write-only.");
set => throw new NotSupportedException("CapturedResponseStream is write-only.");
}
public override void Flush() => _inner.Flush();
public override Task FlushAsync(CancellationToken cancellationToken) =>
_inner.FlushAsync(cancellationToken);
public override int Read(byte[] buffer, int offset, int count) =>
throw new NotSupportedException("CapturedResponseStream is write-only.");
public override long Seek(long offset, SeekOrigin origin) =>
throw new NotSupportedException("CapturedResponseStream is write-only.");
public override void SetLength(long value) =>
throw new NotSupportedException("CapturedResponseStream is write-only.");
public override void Write(byte[] buffer, int offset, int count)
{
// Forward to the real sink FIRST — the client must never miss
// bytes if capture throws.
_inner.Write(buffer, offset, count);
CaptureBytes(buffer.AsSpan(offset, count));
}
public override void Write(ReadOnlySpan<byte> buffer)
{
_inner.Write(buffer);
CaptureBytes(buffer);
}
public override async Task WriteAsync(
byte[] buffer, int offset, int count, CancellationToken cancellationToken)
{
await _inner.WriteAsync(buffer.AsMemory(offset, count), cancellationToken)
.ConfigureAwait(false);
CaptureBytes(buffer.AsSpan(offset, count));
}
public override async ValueTask WriteAsync(
ReadOnlyMemory<byte> buffer, CancellationToken cancellationToken = default)
{
await _inner.WriteAsync(buffer, cancellationToken).ConfigureAwait(false);
CaptureBytes(buffer.Span);
}
/// <summary>
/// Capture up to <c>cap + 1</c> bytes total into the private
/// <see cref="MemoryStream"/>. Once the cap quota is reached, further
/// bytes are silently dropped from the audit copy (the real sink has
/// already received them upstream of this call).
/// </summary>
private void CaptureBytes(ReadOnlySpan<byte> span)
{
if (span.Length == 0)
{
return;
}
var quota = (_capBytes + 1) - (int)_captured.Length;
if (quota <= 0)
{
return;
}
var take = Math.Min(quota, span.Length);
_captured.Write(span[..take]);
}
/// <summary>
/// Returns the captured response body as a UTF-8 string (byte-safe
/// truncated to <c>cap</c> bytes) and a flag indicating whether the
/// audit copy hit the cap. Returns <c>(null, false)</c> when no bytes
/// were captured, mirroring the request-body empty contract.
/// </summary>
public (string? body, bool truncated) GetCapturedBody()
{
var length = (int)_captured.Length;
if (length == 0)
{
return (null, false);
}
var truncated = length > _capBytes;
var bytes = _captured.GetBuffer();
var bytesForString = truncated ? _capBytes : length;
var content = DecodeUtf8Bounded(bytes, bytesForString, cutAtValidBytes: truncated);
return (string.IsNullOrEmpty(content) ? null : content, truncated);
}
protected override void Dispose(bool disposing)
{
if (!_disposed)
{
if (disposing)
{
// Own only the capture stream; the inner sink belongs to
// the framework's response pipeline.
_captured.Dispose();
}
_disposed = true;
}
base.Dispose(disposing);
}
}
}