Files
ScadaBridge/src/ScadaLink.InboundAPI/Middleware/AuditWriteMiddleware.cs
T

387 lines
16 KiB
C#

using System.Diagnostics;
using System.Text;
using System.Text.Json;
using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.InboundAPI.Middleware;
/// <summary>
/// Audit Log #23 (M4 Bundle D, T7) — emits one <see cref="AuditChannel.ApiInbound"/>
/// row per inbound API request via <see cref="ICentralAuditWriter"/> covering the
/// full set of response shapes:
///
/// <list type="bullet">
/// <item><description>2xx / non-error → <see cref="AuditKind.InboundRequest"/> with <see cref="AuditStatus.Delivered"/>.</description></item>
/// <item><description>401/403 → <see cref="AuditKind.InboundAuthFailure"/> with <see cref="AuditStatus.Failed"/>.</description></item>
/// <item><description>4xx (non-auth) / 5xx / thrown exception → <see cref="AuditKind.InboundRequest"/> with <see cref="AuditStatus.Failed"/>.</description></item>
/// </list>
///
/// <para>
/// <b>Best-effort contract (alog.md §13).</b> Audit emission NEVER alters the
/// user-facing HTTP response — a thrown writer or any other failure during
/// emission is caught, logged at warning, and dropped. A handler exception is
/// recorded on the audit row then re-thrown so the framework error path stays
/// authoritative.
/// </para>
///
/// <para>
/// <b>Actor resolution.</b> Inbound API auth runs inside the endpoint handler
/// (no <c>UseAuthentication</c>-backed scheme populates <see cref="HttpContext.User"/>
/// for X-API-Key callers), so the handler stashes the resolved API key name on
/// <see cref="HttpContext.Items"/> under <see cref="AuditActorItemKey"/> after
/// <c>ApiKeyValidator.ValidateAsync</c> succeeds. The middleware reads it in
/// its <c>finally</c> block — on auth failures the key remains absent and
/// <see cref="AuditEvent.Actor"/> stays null (we never echo back an
/// unauthenticated principal).
/// </para>
///
/// <para>
/// <b>Body capture.</b> The request body is buffered via
/// <see cref="HttpRequestRewindExtensions.EnableBuffering(HttpRequest)"/> then
/// rewound so the downstream endpoint handler still sees the full payload. The
/// response body is captured by swapping <see cref="HttpResponse.Body"/> for a
/// <see cref="MemoryStream"/> before the pipeline runs; after the pipeline
/// returns, the buffered bytes are copied to the original stream (transparent
/// to the real client) and read into <see cref="AuditEvent.ResponseSummary"/>.
/// Truncation to the configured inbound ceiling happens in
/// <see cref="ScadaLink.AuditLog.Payload.DefaultAuditPayloadFilter"/>; the
/// middleware itself stores the full buffered content.
/// </para>
/// </summary>
public sealed class AuditWriteMiddleware
{
/// <summary>
/// <see cref="HttpContext.Items"/> key used by the endpoint handler to publish
/// the resolved API key name once <c>ApiKeyValidator.ValidateAsync</c> has
/// succeeded. Exposed as a constant so the handler and middleware share a
/// single source of truth (no stringly-typed coupling).
/// </summary>
public const string AuditActorItemKey = "ScadaLink.InboundAPI.AuditActor";
/// <summary>
/// Audit Log #23 (ParentExecutionId): <see cref="HttpContext.Items"/> key under
/// which this middleware stashes the inbound request's per-request
/// <c>ExecutionId</c> (a <see cref="Guid"/>) at the very start of the request.
/// The id is minted ONCE and shared: the endpoint handler reads it to thread it
/// onto a routed <c>RouteToCallRequest.ParentExecutionId</c>, and the
/// middleware's own inbound audit row uses the same id for its
/// <see cref="AuditEvent.ExecutionId"/>. Exposed as a constant so the handler
/// and middleware share a single source of truth (no stringly-typed coupling).
/// </summary>
public const string InboundExecutionIdItemKey = "ScadaLink.InboundAPI.InboundExecutionId";
private readonly RequestDelegate _next;
private readonly ICentralAuditWriter _auditWriter;
private readonly ILogger<AuditWriteMiddleware> _logger;
public AuditWriteMiddleware(
RequestDelegate next,
ICentralAuditWriter auditWriter,
ILogger<AuditWriteMiddleware> logger)
{
_next = next ?? throw new ArgumentNullException(nameof(next));
_auditWriter = auditWriter ?? throw new ArgumentNullException(nameof(auditWriter));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task InvokeAsync(HttpContext ctx)
{
var sw = Stopwatch.StartNew();
// Audit Log #23 (ParentExecutionId): mint the inbound request's per-request
// ExecutionId ONCE, here at the start of the request, and stash it on
// HttpContext.Items. Two consumers share this single id:
// (a) the endpoint handler reads it to thread onto a routed
// RouteToCallRequest.ParentExecutionId, so a spawned site script
// execution points back at this inbound request;
// (b) the inbound audit row this middleware emits uses it as its own
// ExecutionId (the row stays top-level — its ParentExecutionId is
// never set).
ctx.Items[InboundExecutionIdItemKey] = Guid.NewGuid();
// Buffer the request body up front so we can both audit it and let the
// downstream handler still parse it. EnableBuffering swaps the request
// stream for a seekable wrapper that the framework rewinds at the end
// of the pipeline for us — but we also rewind to position 0 after our
// own read so the very next reader starts from the top.
ctx.Request.EnableBuffering();
var requestBody = await ReadBufferedRequestBodyAsync(ctx.Request).ConfigureAwait(false);
// Response body — swap in a MemoryStream so the pipeline writes are
// captured. The original Response.Body is restored in the finally block,
// and the captured bytes are copied back to it so the real client still
// receives every byte (transparent wrap). The captured string is then
// available for the audit row.
var originalResponseBody = ctx.Response.Body;
using var responseBuffer = new MemoryStream();
ctx.Response.Body = responseBuffer;
string? responseBody = null;
Exception? thrown = null;
try
{
await _next(ctx).ConfigureAwait(false);
}
catch (Exception ex)
{
thrown = ex;
// Re-throw — audit emission is BEST EFFORT, but the user-facing
// request's own error path must remain authoritative (alog.md §13).
throw;
}
finally
{
sw.Stop();
// Whatever the handler managed to write — full success, partial
// success before throwing, or nothing at all — copy back to the
// original stream and read for audit.
responseBody = await DrainResponseBufferAsync(responseBuffer, originalResponseBody)
.ConfigureAwait(false);
ctx.Response.Body = originalResponseBody;
EmitInboundAudit(ctx, sw.ElapsedMilliseconds, thrown, requestBody, responseBody);
}
}
/// <summary>
/// Builds and writes the <see cref="AuditChannel.ApiInbound"/> row for the
/// request. Wrapped in try/catch so a thrown writer or any other emission
/// failure stays out of the user-facing response (alog.md §13).
/// </summary>
private void EmitInboundAudit(
HttpContext ctx,
long durationMs,
Exception? thrown,
string? requestBody,
string? responseBody)
{
try
{
var statusCode = ctx.Response.StatusCode;
var isAuthFailure = statusCode is 401 or 403;
var kind = isAuthFailure
? AuditKind.InboundAuthFailure
: AuditKind.InboundRequest;
// A thrown handler exception is always Failed; otherwise any 4xx/5xx
// response signals failure. 2xx/3xx are Delivered.
var status = (thrown != null || statusCode >= 400)
? AuditStatus.Failed
: AuditStatus.Delivered;
var actor = isAuthFailure ? null : ResolveActor(ctx);
var methodName = ResolveMethodName(ctx);
var extra = JsonSerializer.Serialize(new
{
remoteIp = ctx.Connection.RemoteIpAddress?.ToString(),
userAgent = ctx.Request.Headers.UserAgent.ToString(),
});
var evt = new AuditEvent
{
EventId = Guid.NewGuid(),
OccurredAtUtc = DateTime.UtcNow,
Channel = AuditChannel.ApiInbound,
Kind = kind,
// Audit Log #23: the per-request execution id minted ONCE at the
// start of the request (InvokeAsync) and stashed on
// HttpContext.Items. The same id is threaded onto a routed
// RouteToCallRequest.ParentExecutionId by the endpoint handler,
// so an inbound request and the site script it routes to share
// one correlation point. This inbound row stays top-level — its
// own ParentExecutionId is never set (see below).
ExecutionId = ResolveInboundExecutionId(ctx),
// CorrelationId is purely the per-operation-lifecycle id; an
// inbound request is a one-shot from the audit row's
// perspective with no multi-row operation to correlate.
CorrelationId = null,
Actor = actor,
Target = methodName,
Status = status,
HttpStatus = statusCode,
DurationMs = (int)Math.Min(durationMs, int.MaxValue),
ErrorMessage = thrown?.Message,
RequestSummary = requestBody,
ResponseSummary = responseBody,
PayloadTruncated = false,
Extra = extra,
// Central direct-write — no site-local forwarding state.
ForwardState = null,
};
// Fire-and-forget — the writer itself swallows; the additional
// try/catch around the fire still protects us if WriteAsync throws
// synchronously before returning a task.
_ = _auditWriter.WriteAsync(evt);
}
catch (Exception ex)
{
_logger.LogWarning(
ex,
"AuditWriteMiddleware emission failed for {Method} {Path} (status {Status})",
ctx.Request.Method, ctx.Request.Path, ctx.Response.StatusCode);
}
}
/// <summary>
/// Reads the buffered request body fully into a string and rewinds the
/// stream so the downstream handler sees the unconsumed payload. Returns
/// null for empty/missing bodies so the audit row's
/// <see cref="AuditEvent.RequestSummary"/> stays null rather than
/// containing an empty string.
/// </summary>
private static async Task<string?> ReadBufferedRequestBodyAsync(HttpRequest request)
{
if (request.ContentLength is 0)
{
return null;
}
try
{
request.Body.Position = 0;
using var reader = new StreamReader(
request.Body,
Encoding.UTF8,
detectEncodingFromByteOrderMarks: false,
bufferSize: 1024,
leaveOpen: true);
var content = await reader.ReadToEndAsync().ConfigureAwait(false);
request.Body.Position = 0;
return string.IsNullOrEmpty(content) ? null : content;
}
catch
{
// A failed body read must not abort the request — fall through
// with a null RequestSummary; the audit row still records the
// outcome.
return null;
}
}
/// <summary>
/// Copies the bytes buffered in <paramref name="buffer"/> to
/// <paramref name="originalBody"/> (so the real client still receives them)
/// and returns a UTF-8 string copy for <see cref="AuditEvent.ResponseSummary"/>.
/// Returns null when no bytes were written, mirroring the
/// <see cref="ReadBufferedRequestBodyAsync"/> empty-body contract.
/// </summary>
private static async Task<string?> DrainResponseBufferAsync(
MemoryStream buffer,
Stream originalBody)
{
if (buffer.Length == 0)
{
return null;
}
buffer.Position = 0;
// Copy first so the client never misses bytes even if the read for audit
// throws somehow (defensive — MemoryStream.CopyToAsync to a sink shouldn't
// throw on its own, but the original body may).
try
{
await buffer.CopyToAsync(originalBody).ConfigureAwait(false);
}
catch
{
// Best-effort: a sink that refuses our copy is the sink's problem;
// the audit still records what the handler produced. Do NOT rethrow.
}
buffer.Position = 0;
using var reader = new StreamReader(
buffer,
Encoding.UTF8,
detectEncodingFromByteOrderMarks: false,
bufferSize: 1024,
leaveOpen: true);
var content = await reader.ReadToEndAsync().ConfigureAwait(false);
return string.IsNullOrEmpty(content) ? null : content;
}
/// <summary>
/// Audit Log #23 (ParentExecutionId): reads the inbound request's per-request
/// <c>ExecutionId</c> that <see cref="InvokeAsync"/> minted and stashed on
/// <see cref="HttpContext.Items"/> under <see cref="InboundExecutionIdItemKey"/>.
/// Throws <see cref="InvalidOperationException"/> if the slot is absent — for a
/// correlation feature a silently-divergent id is the worst failure mode, so we
/// fail fast rather than mint a fresh one. <see cref="EmitInboundAudit"/>'s
/// try/catch degrades the throw to a dropped best-effort audit row, never a
/// failed request.
/// </summary>
private static Guid ResolveInboundExecutionId(HttpContext ctx)
{
if (ctx.Items.TryGetValue(InboundExecutionIdItemKey, out var stashed)
&& stashed is Guid id)
{
return id;
}
throw new InvalidOperationException(
"Inbound ExecutionId invariant violated: the inbound ExecutionId must be "
+ "stashed by AuditWriteMiddleware.InvokeAsync before the audit row is emitted.");
}
/// <summary>
/// Reads the API key name the endpoint handler stashed on
/// <see cref="HttpContext.Items"/> after successful auth. Falls back to
/// the authenticated user name when an ASP.NET scheme has populated
/// <see cref="HttpContext.User"/> (defensive — currently unused for inbound
/// API but cheap and forward-compatible).
/// </summary>
private static string? ResolveActor(HttpContext ctx)
{
if (ctx.Items.TryGetValue(AuditActorItemKey, out var stashed)
&& stashed is string name
&& !string.IsNullOrWhiteSpace(name))
{
return name;
}
var user = ctx.User;
if (user?.Identity is { IsAuthenticated: true, Name: { Length: > 0 } userName })
{
return userName;
}
return null;
}
/// <summary>
/// Pulls the <c>{methodName}</c> route value off the request. Falls back to
/// the last segment of <see cref="HttpRequest.Path"/> when no route value
/// is bound (e.g. when the request never reached the matched endpoint).
/// </summary>
private static string? ResolveMethodName(HttpContext ctx)
{
if (ctx.Request.RouteValues.TryGetValue("methodName", out var raw)
&& raw is string method
&& !string.IsNullOrWhiteSpace(method))
{
return method;
}
var path = ctx.Request.Path.Value;
if (string.IsNullOrEmpty(path))
{
return null;
}
var lastSlash = path.LastIndexOf('/');
if (lastSlash < 0 || lastSlash == path.Length - 1)
{
return null;
}
return path[(lastSlash + 1)..];
}
}