feat(auditlog): AuditLogIngestActor + gRPC handler (#23)
This commit is contained in:
95
src/ScadaLink.AuditLog/Central/AuditLogIngestActor.cs
Normal file
95
src/ScadaLink.AuditLog/Central/AuditLogIngestActor.cs
Normal file
@@ -0,0 +1,95 @@
|
||||
using Akka.Actor;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Messages.Audit;
|
||||
|
||||
namespace ScadaLink.AuditLog.Central;
|
||||
|
||||
/// <summary>
|
||||
/// Central-side singleton (per Bundle E wiring) that ingests batches of
|
||||
/// <see cref="AuditEvent"/> rows pushed from sites via the
|
||||
/// <c>IngestAuditEvents</c> gRPC RPC. Each row is stamped with the central-side
|
||||
/// <see cref="AuditEvent.IngestedAtUtc"/> and inserted idempotently via
|
||||
/// <see cref="IAuditLogRepository.InsertIfNotExistsAsync"/> — duplicates are
|
||||
/// silently swallowed (first-write-wins per Bundle A's hardening).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Idempotency is the contract: a row that already exists at central counts
|
||||
/// as "accepted" for the purposes of the reply, because the storage state is
|
||||
/// consistent and the site is free to flip its local row to <c>Forwarded</c>.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Per Bundle D's brief, audit-write failures must NEVER abort the user-facing
|
||||
/// action. The actor wraps each repository call in its own try/catch so a
|
||||
/// single bad row cannot cause the rest of the batch to be lost; the actor's
|
||||
/// <see cref="SupervisorStrategy"/> uses <c>Resume</c> so a thrown exception
|
||||
/// inside <c>ReceiveAsync</c> does not restart the actor (which would also
|
||||
/// reset any in-flight state).
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public class AuditLogIngestActor : ReceiveActor
|
||||
{
|
||||
private readonly IAuditLogRepository _repository;
|
||||
private readonly ILogger<AuditLogIngestActor> _logger;
|
||||
|
||||
public AuditLogIngestActor(
|
||||
IAuditLogRepository repository,
|
||||
ILogger<AuditLogIngestActor> logger)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(repository);
|
||||
ArgumentNullException.ThrowIfNull(logger);
|
||||
|
||||
_repository = repository;
|
||||
_logger = logger;
|
||||
|
||||
ReceiveAsync<IngestAuditEventsCommand>(OnIngestAsync);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Audit-write failures are best-effort by design (see alog.md §13): a
|
||||
/// thrown exception in the ingest pipeline must not crash the actor.
|
||||
/// Resume keeps the actor's state intact so the next batch is processed
|
||||
/// against the same repository instance.
|
||||
/// </summary>
|
||||
protected override SupervisorStrategy SupervisorStrategy()
|
||||
{
|
||||
return new OneForOneStrategy(maxNrOfRetries: 0, withinTimeRange: TimeSpan.Zero, decider:
|
||||
Akka.Actor.SupervisorStrategy.DefaultDecider);
|
||||
}
|
||||
|
||||
private async Task OnIngestAsync(IngestAuditEventsCommand cmd)
|
||||
{
|
||||
// Sender is captured before the first await — Akka resets Sender
|
||||
// between message dispatches, so a post-await Tell would go to
|
||||
// DeadLetters.
|
||||
var replyTo = Sender;
|
||||
var nowUtc = DateTime.UtcNow;
|
||||
var accepted = new List<Guid>(cmd.Events.Count);
|
||||
|
||||
foreach (var evt in cmd.Events)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Stamp IngestedAtUtc here, not at the site. Bundle A's
|
||||
// repository hardening already swallows duplicate-key races,
|
||||
// so the same id arriving twice (site retry, reconciliation)
|
||||
// is a silent no-op.
|
||||
var ingested = evt with { IngestedAtUtc = nowUtc };
|
||||
await _repository.InsertIfNotExistsAsync(ingested).ConfigureAwait(false);
|
||||
accepted.Add(evt.EventId);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Per-row catch — one bad row never sinks the whole batch.
|
||||
// The row stays Pending at the site; the next drain retries.
|
||||
_logger.LogError(ex,
|
||||
"Failed to persist audit event {EventId} during batch ingest; row will be retried by the site.",
|
||||
evt.EventId);
|
||||
}
|
||||
}
|
||||
|
||||
replyTo.Tell(new IngestAuditEventsReply(accepted));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
|
||||
namespace ScadaLink.Commons.Messages.Audit;
|
||||
|
||||
/// <summary>
|
||||
/// Akka message sent to the central <c>AuditLogIngestActor</c> (Audit Log #23,
|
||||
/// M2 site-sync pipeline) carrying a batch of <see cref="AuditEvent"/> rows
|
||||
/// decoded by the <c>SiteStreamGrpcServer</c> from a site's
|
||||
/// <c>IngestAuditEvents</c> gRPC RPC. The actor stamps
|
||||
/// <see cref="AuditEvent.IngestedAtUtc"/> and writes the rows idempotently to
|
||||
/// the central <c>AuditLog</c> table.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Lives in <c>ScadaLink.Commons</c> rather than <c>ScadaLink.AuditLog</c>
|
||||
/// because the gRPC server in <c>ScadaLink.Communication</c> needs to construct
|
||||
/// it, and <c>ScadaLink.AuditLog</c> already references
|
||||
/// <c>ScadaLink.Communication</c> (the proto DTOs live there). Putting the
|
||||
/// message in Commons avoids a project-reference cycle.
|
||||
/// </remarks>
|
||||
public sealed record IngestAuditEventsCommand(IReadOnlyList<AuditEvent> Events);
|
||||
@@ -0,0 +1,11 @@
|
||||
namespace ScadaLink.Commons.Messages.Audit;
|
||||
|
||||
/// <summary>
|
||||
/// Reply from the central <c>AuditLogIngestActor</c> for an
|
||||
/// <see cref="IngestAuditEventsCommand"/>. <see cref="AcceptedEventIds"/> lists
|
||||
/// every row the actor considers durably persisted at central — including ids
|
||||
/// that were already present before the call (first-write-wins idempotency).
|
||||
/// The gRPC handler echoes these ids back over the wire as the <c>IngestAck</c>
|
||||
/// the site uses to flip rows to <c>Forwarded</c>.
|
||||
/// </summary>
|
||||
public sealed record IngestAuditEventsReply(IReadOnlyList<Guid> AcceptedEventIds);
|
||||
@@ -4,6 +4,9 @@ using Akka.Actor;
|
||||
using Grpc.Core;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Messages.Audit;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using GrpcStatus = Grpc.Core.Status;
|
||||
|
||||
namespace ScadaLink.Communication.Grpc;
|
||||
@@ -23,6 +26,15 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
|
||||
private readonly TimeSpan _maxStreamLifetime;
|
||||
private volatile bool _ready;
|
||||
private long _actorCounter;
|
||||
// Audit Log (#23 M2): central-side ingest actor proxy. Set by the host
|
||||
// after the cluster singleton starts (see Bundle E wiring). When null the
|
||||
// IngestAuditEvents RPC replies with an empty IngestAck so sites can
|
||||
// safely retry — wiring-incomplete is treated as transient, never fatal.
|
||||
private IActorRef? _auditIngestActor;
|
||||
// Per Bundle D's brief — Ask timeout is 30 s. The ingest actor's repo
|
||||
// calls are sub-100 ms in steady state; a generous timeout absorbs a slow
|
||||
// MSSQL connection without surfacing as a gRPC failure on a healthy site.
|
||||
private static readonly TimeSpan AuditIngestAskTimeout = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>
|
||||
/// Test-only constructor — kept <c>internal</c> so the DI container sees a
|
||||
@@ -76,6 +88,19 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
|
||||
_ready = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Hands the central-side <c>AuditLogIngestActor</c> proxy to the gRPC
|
||||
/// server so the <see cref="IngestAuditEvents"/> RPC can route incoming
|
||||
/// site batches. Audit Log (#23) M2 wiring point — mirrors the way
|
||||
/// <c>CommunicationService.SetNotificationOutbox</c> takes the Notification
|
||||
/// Outbox singleton proxy. Bundle E supplies the actor after the cluster
|
||||
/// singleton starts.
|
||||
/// </summary>
|
||||
public void SetAuditIngestActor(IActorRef proxy)
|
||||
{
|
||||
_auditIngestActor = proxy;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Number of currently active streaming subscriptions. Exposed for diagnostics.
|
||||
/// </summary>
|
||||
@@ -168,6 +193,114 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Audit Log (#23) M2 site→central push RPC. Decodes a site batch into
|
||||
/// <see cref="AuditEvent"/> rows, Asks the central <c>AuditLogIngestActor</c>
|
||||
/// proxy to persist them, and echoes the accepted EventIds back so the site
|
||||
/// can flip its local rows to <c>Forwarded</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The DTO→entity conversion is inlined here (rather than calling the
|
||||
/// AuditLog mapper) to avoid a project-reference cycle:
|
||||
/// <c>ScadaLink.AuditLog</c> already references
|
||||
/// <c>ScadaLink.Communication</c>, so the gRPC server cannot reach back
|
||||
/// into AuditLog for its mapper. The shape mirrors
|
||||
/// <c>AuditEventMapper.FromDto</c> in <c>ScadaLink.AuditLog.Telemetry</c>;
|
||||
/// the two must evolve together.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// When <see cref="_auditIngestActor"/> is not yet wired (host startup
|
||||
/// race window), the RPC returns an empty <see cref="IngestAck"/> rather
|
||||
/// than failing — the site treats the missing ack as a transient outcome
|
||||
/// and retries on the next drain, which is the desired idempotent
|
||||
/// behaviour.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public override async Task<IngestAck> IngestAuditEvents(
|
||||
AuditEventBatch request,
|
||||
ServerCallContext context)
|
||||
{
|
||||
// Empty batch is a no-op; reply immediately so the client moves on.
|
||||
if (request.Events.Count == 0)
|
||||
{
|
||||
return new IngestAck();
|
||||
}
|
||||
|
||||
var actor = _auditIngestActor;
|
||||
if (actor is null)
|
||||
{
|
||||
// Wiring incomplete (host startup race). Sites treat an empty
|
||||
// ack as "nothing was acked, leave rows Pending, retry next
|
||||
// drain" — exactly the right behaviour during host bring-up.
|
||||
_logger.LogWarning(
|
||||
"IngestAuditEvents received {Count} events before SetAuditIngestActor was called; returning empty ack.",
|
||||
request.Events.Count);
|
||||
return new IngestAck();
|
||||
}
|
||||
|
||||
// Inlined FromDto. Keep in sync with AuditEventMapper.FromDto in
|
||||
// ScadaLink.AuditLog.Telemetry — there is no shared mapper because
|
||||
// doing so would create a project-reference cycle (AuditLog → Communication).
|
||||
var entities = new List<AuditEvent>(request.Events.Count);
|
||||
foreach (var dto in request.Events)
|
||||
{
|
||||
entities.Add(new AuditEvent
|
||||
{
|
||||
EventId = Guid.Parse(dto.EventId),
|
||||
OccurredAtUtc = DateTime.SpecifyKind(dto.OccurredAtUtc.ToDateTime(), DateTimeKind.Utc),
|
||||
IngestedAtUtc = null,
|
||||
Channel = Enum.Parse<AuditChannel>(dto.Channel),
|
||||
Kind = Enum.Parse<AuditKind>(dto.Kind),
|
||||
CorrelationId = string.IsNullOrEmpty(dto.CorrelationId) ? null : Guid.Parse(dto.CorrelationId),
|
||||
SourceSiteId = NullIfEmpty(dto.SourceSiteId),
|
||||
SourceInstanceId = NullIfEmpty(dto.SourceInstanceId),
|
||||
SourceScript = NullIfEmpty(dto.SourceScript),
|
||||
Actor = NullIfEmpty(dto.Actor),
|
||||
Target = NullIfEmpty(dto.Target),
|
||||
Status = Enum.Parse<AuditStatus>(dto.Status),
|
||||
HttpStatus = dto.HttpStatus,
|
||||
DurationMs = dto.DurationMs,
|
||||
ErrorMessage = NullIfEmpty(dto.ErrorMessage),
|
||||
ErrorDetail = NullIfEmpty(dto.ErrorDetail),
|
||||
RequestSummary = NullIfEmpty(dto.RequestSummary),
|
||||
ResponseSummary = NullIfEmpty(dto.ResponseSummary),
|
||||
PayloadTruncated = dto.PayloadTruncated,
|
||||
Extra = NullIfEmpty(dto.Extra),
|
||||
ForwardState = null,
|
||||
});
|
||||
}
|
||||
|
||||
var cmd = new IngestAuditEventsCommand(entities);
|
||||
IngestAuditEventsReply reply;
|
||||
try
|
||||
{
|
||||
reply = await actor.Ask<IngestAuditEventsReply>(
|
||||
cmd, AuditIngestAskTimeout, context.CancellationToken);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Audit ingest is best-effort; failing this RPC at the gRPC layer
|
||||
// would surface as a transport error and force the site to retry
|
||||
// (which it would do anyway). Logging + an empty ack keeps the
|
||||
// semantics consistent with the "wiring incomplete" path above.
|
||||
_logger.LogError(ex,
|
||||
"AuditLogIngestActor Ask failed for batch of {Count} events; returning empty ack.",
|
||||
request.Events.Count);
|
||||
return new IngestAck();
|
||||
}
|
||||
|
||||
var ack = new IngestAck();
|
||||
foreach (var id in reply.AcceptedEventIds)
|
||||
{
|
||||
ack.AcceptedEventIds.Add(id.ToString());
|
||||
}
|
||||
return ack;
|
||||
}
|
||||
|
||||
private static string? NullIfEmpty(string? value) =>
|
||||
string.IsNullOrEmpty(value) ? null : value;
|
||||
|
||||
/// <summary>
|
||||
/// Tracks a single active stream so cleanup only removes its own entry.
|
||||
/// </summary>
|
||||
|
||||
Reference in New Issue
Block a user