Files
lmxopcua/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Audit/AuditWriterActor.cs
T
Joseph Doherty b7f5e887ee feat(audit): OtOpcUa ConfigAuditLog.Outcome column + migration + ClusterAudit visibility fix (Task 2.2)
Persist the canonical AuditOutcome and make structured audit rows visible.

- ConfigAuditLog gains a nullable Outcome column, stored as the AuditOutcome
  enum member name (nvarchar(16), mirroring how AdminRole is persisted). The
  AuditWriterActor flush now writes Outcome = evt.Outcome.ToString(). Nullable so
  legacy rows and the bespoke stored-procedure path (no derived outcome) write
  NULL.
- Migration 20260602135350_AddConfigAuditLogOutcome: additive nullable column,
  no backfill. Up adds the column, Down drops it. Chains after
  20260602112419_CanonicalizeAdminRoles; `dotnet ef migrations
  has-pending-model-changes` is clean.
- ClusterAudit visibility fix: the page filtered solely on ClusterId, but the
  structured AuditWriterActor path stamps NodeId (ClusterId null), so those rows
  were invisible. Extracted ClusterAuditQuery.ForClusterAsync (shared by the page
  and tests) which ORs in rows whose NodeId belongs to a node in the cluster —
  membership resolved from ClusterNode (NodeId -> ClusterId). SP-path
  ClusterId-stamped rows still match.

Tests: ControlPlane 45/45 (adds Outcome persistence + Denied-outcome asserts);
new Configuration ClusterAuditQueryTests 3/3 (both-paths visible, other-cluster
excluded, page-size cap); AdminUI 121/121. Configuration Unit suite is green on a
clean run (a pre-existing timing flake in ResilientConfigReaderTests, untouched
here, occasionally fails under parallel load and passes in isolation).
2026-06-02 09:59:22 -04:00

138 lines
5.4 KiB
C#

using Akka.Actor;
using Akka.Event;
using Microsoft.EntityFrameworkCore;
using ZB.MOM.WW.Audit;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
namespace ZB.MOM.WW.OtOpcUa.ControlPlane.Audit;
/// <summary>
/// Cluster-singleton actor that batches <see cref="AuditEvent"/> messages from the cluster
/// and bulk-inserts them into <c>ConfigAuditLog</c>. Flush triggers:
/// - Buffer reaches <see cref="FlushBatchSize"/> events.
/// - <see cref="FlushInterval"/> elapses with a non-empty buffer.
/// - <c>PreRestart</c> / <c>PostStop</c> (supervisor swap or coordinated shutdown).
///
/// Dedup is two-layer: in-buffer (the <see cref="Dictionary{TKey, TValue}"/> below collapses
/// duplicate EventIds before flush) and at the database via the filtered unique index
/// <c>UX_ConfigAuditLog_EventId</c> (cross-restart safety — a retry of an already-flushed
/// batch hits the constraint and we drop the duplicate insert without losing the rest of
/// the batch).
///
/// Implements the shared <see cref="IAuditWriter"/> seam: <see cref="WriteAsync"/> is a
/// best-effort, never-throwing entry point that simply <c>Tell</c>s this actor and returns
/// a completed task, so non-Akka callers can emit canonical audit events through the same
/// batching/dedup pipeline as in-cluster <c>Tell</c> traffic.
/// </summary>
public sealed class AuditWriterActor : ReceiveActor, IWithTimers, IAuditWriter
{
public const int FlushBatchSize = 500;
public static readonly TimeSpan FlushInterval = TimeSpan.FromSeconds(5);
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
private readonly ILoggingAdapter _log = Context.GetLogger();
private readonly Dictionary<Guid, AuditEvent> _buffer = new();
/// <summary>Gets or sets the timer scheduler for the actor.</summary>
public ITimerScheduler Timers { get; set; } = null!;
/// <summary>Creates a Props factory for the AuditWriterActor.</summary>
/// <param name="dbFactory">The database context factory for creating ConfigDb connections.</param>
public static Props Props(IDbContextFactory<OtOpcUaConfigDbContext> dbFactory) =>
Akka.Actor.Props.Create(() => new AuditWriterActor(dbFactory));
/// <summary>Initializes a new instance of the AuditWriterActor class.</summary>
/// <param name="dbFactory">The database context factory for creating ConfigDb connections.</param>
public AuditWriterActor(IDbContextFactory<OtOpcUaConfigDbContext> dbFactory)
{
_dbFactory = dbFactory;
Receive<AuditEvent>(HandleEvent);
Receive<Flush>(_ => FlushBuffer());
}
/// <inheritdoc />
protected override void PreStart()
{
Timers.StartPeriodicTimer("flush", Flush.Instance, FlushInterval);
}
/// <summary>
/// <see cref="IAuditWriter"/> seam. Best-effort and never throws: routes the event onto this
/// actor's mailbox via <c>Tell</c> (thread-safe from any caller) so it flows through the same
/// batching + dedup pipeline as in-cluster traffic, then returns immediately. The actual
/// persistence happens asynchronously on the next flush; a write failure there is logged and
/// the batch dropped (per the best-effort audit contract).
/// </summary>
/// <param name="evt">The canonical audit event to persist.</param>
/// <param name="ct">Unused — enqueue is synchronous and non-blocking.</param>
/// <returns>A completed task.</returns>
public Task WriteAsync(AuditEvent evt, CancellationToken ct = default)
{
// Akka Tell is safe to call from any thread and never throws to the caller.
Self.Tell(evt);
return Task.CompletedTask;
}
private void HandleEvent(AuditEvent evt)
{
// In-buffer dedup. Last write wins on duplicate EventId within the batch — events
// with the same EventId are by contract identical, so this is a no-op.
_buffer[evt.EventId] = evt;
if (_buffer.Count >= FlushBatchSize) FlushBuffer();
}
private void FlushBuffer()
{
if (_buffer.Count == 0) return;
var snapshot = _buffer.Values.ToList();
_buffer.Clear();
try
{
using var db = _dbFactory.CreateDbContext();
foreach (var evt in snapshot)
{
db.ConfigAuditLogs.Add(new ConfigAuditLog
{
Timestamp = evt.OccurredAtUtc.UtcDateTime,
Principal = evt.Actor,
EventType = $"{evt.Category}:{evt.Action}",
NodeId = evt.SourceNode,
DetailsJson = evt.DetailsJson,
EventId = evt.EventId,
CorrelationId = evt.CorrelationId,
Outcome = evt.Outcome.ToString(),
});
}
db.SaveChanges();
_log.Debug("AuditWriter flushed {Count} events", snapshot.Count);
}
catch (Exception ex)
{
_log.Error(ex, "AuditWriter flush failed; {Count} events dropped", snapshot.Count);
}
}
/// <inheritdoc />
protected override void PreRestart(Exception reason, object message)
{
FlushBuffer();
base.PreRestart(reason, message);
}
/// <inheritdoc />
protected override void PostStop()
{
FlushBuffer();
base.PostStop();
}
public sealed class Flush
{
public static readonly Flush Instance = new();
private Flush() { }
}
}