feat(auditlog): real ClusterClient-based site audit push client
This commit is contained in:
@@ -121,11 +121,14 @@ public static class ServiceCollectionExtensions
|
||||
logger: sp.GetRequiredService<ILogger<FallbackAuditWriter>>(),
|
||||
filter: sp.GetRequiredService<IAuditPayloadFilter>()));
|
||||
|
||||
// ISiteStreamAuditClient: NoOp default. M6's reconciliation work brings
|
||||
// the real gRPC-backed implementation (no site→central gRPC channel
|
||||
// exists today — sites talk to central via Akka ClusterClient only).
|
||||
// Bundle H's integration test substitutes a stub directly into the
|
||||
// SiteAuditTelemetryActor's Props.Create call.
|
||||
// ISiteStreamAuditClient: NoOp default. This binding remains correct for
|
||||
// central/test composition roots that have no SiteCommunicationActor.
|
||||
// The real implementation is ClusterClientSiteAuditClient, which pushes
|
||||
// audit telemetry to central over Akka ClusterClient via the site's
|
||||
// SiteCommunicationActor — the Host wires it directly into the
|
||||
// SiteAuditTelemetryActor's Props.Create call for site roles (it cannot
|
||||
// be a DI singleton because it needs the SiteCommunicationActor IActorRef,
|
||||
// created during Akka bootstrap, not at DI-composition time).
|
||||
services.AddSingleton<ISiteStreamAuditClient, NoOpSiteStreamAuditClient>();
|
||||
|
||||
// M3 Bundle F: site-side dual emitter for cached-call lifecycle
|
||||
|
||||
@@ -0,0 +1,146 @@
|
||||
using Akka.Actor;
|
||||
using ScadaLink.AuditLog.Telemetry;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Messages.Audit;
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.Communication.Grpc;
|
||||
|
||||
namespace ScadaLink.AuditLog.Site.Telemetry;
|
||||
|
||||
/// <summary>
|
||||
/// Production <see cref="ISiteStreamAuditClient"/> binding for site composition
|
||||
/// roots: pushes audit telemetry to central over Akka <c>ClusterClient</c> via
|
||||
/// the site's <c>SiteCommunicationActor</c>. The actor forwards the command to
|
||||
/// <c>/user/central-communication</c> and the central
|
||||
/// <c>CentralCommunicationActor</c> Asks the <c>AuditLogIngestActor</c> proxy —
|
||||
/// the same command/control transport notifications already use. Wired by the
|
||||
/// Host for site roles; central and test composition roots keep the
|
||||
/// <see cref="NoOpSiteStreamAuditClient"/> DI default (they have no
|
||||
/// <c>SiteCommunicationActor</c>).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <b>Throw-on-failure contract.</b> An Ask timeout or a faulted reply
|
||||
/// (<see cref="Status.Failure"/>) propagates as a thrown exception out of the
|
||||
/// <c>Ingest*Async</c> methods — it is NOT caught and turned into an empty ack.
|
||||
/// The <see cref="SiteAuditTelemetryActor"/> drain loop treats a thrown
|
||||
/// exception as transient and leaves the rows <c>Pending</c> for the next tick.
|
||||
/// Swallowing the fault into an empty ack would be indistinguishable from "zero
|
||||
/// rows accepted" and would silently lose the retry signal. Task 1 confirmed
|
||||
/// the central receiving end does not collapse an ingest fault into an empty
|
||||
/// ack either, so a site-side Ask through the whole path faults cleanly on a
|
||||
/// central-side timeout.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The batches arrive as proto DTOs (<see cref="AuditEventBatch"/> /
|
||||
/// <see cref="CachedTelemetryBatch"/>) because the
|
||||
/// <see cref="SiteAuditTelemetryActor"/> builds them with
|
||||
/// <see cref="AuditEventMapper.ToDto"/>. This client converts them back into
|
||||
/// the <see cref="AuditEvent"/> / <see cref="SiteCall"/> entities the Akka
|
||||
/// command messages carry — the same DTO→entity translation the
|
||||
/// <c>SiteStreamGrpcServer</c> performs for the gRPC reconciliation path.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class ClusterClientSiteAuditClient : ISiteStreamAuditClient
|
||||
{
|
||||
private readonly IActorRef _siteCommunicationActor;
|
||||
private readonly TimeSpan _askTimeout;
|
||||
|
||||
/// <param name="siteCommunicationActor">
|
||||
/// The site's <c>SiteCommunicationActor</c> — it forwards the ingest command
|
||||
/// over the registered central ClusterClient and routes the reply back to
|
||||
/// this client's Ask.
|
||||
/// </param>
|
||||
/// <param name="askTimeout">
|
||||
/// Ask timeout for the round-trip to central. On expiry the Ask throws
|
||||
/// <see cref="Akka.Actor.AskTimeoutException"/>, which the drain loop treats
|
||||
/// as transient (rows stay <c>Pending</c>).
|
||||
/// </param>
|
||||
public ClusterClientSiteAuditClient(IActorRef siteCommunicationActor, TimeSpan askTimeout)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(siteCommunicationActor);
|
||||
_siteCommunicationActor = siteCommunicationActor;
|
||||
_askTimeout = askTimeout;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<IngestAck> IngestAuditEventsAsync(AuditEventBatch batch, CancellationToken ct)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(batch);
|
||||
|
||||
var events = new List<AuditEvent>(batch.Events.Count);
|
||||
foreach (var dto in batch.Events)
|
||||
{
|
||||
events.Add(AuditEventMapper.FromDto(dto));
|
||||
}
|
||||
|
||||
// Ask<T> throws AskTimeoutException on timeout and rethrows a
|
||||
// Status.Failure's inner cause — both surface as a thrown exception so
|
||||
// the drain loop keeps the rows Pending. We deliberately do NOT catch.
|
||||
var reply = await _siteCommunicationActor
|
||||
.Ask<IngestAuditEventsReply>(new IngestAuditEventsCommand(events), _askTimeout, ct)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
return ToAck(reply.AcceptedEventIds);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<IngestAck> IngestCachedTelemetryAsync(CachedTelemetryBatch batch, CancellationToken ct)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(batch);
|
||||
|
||||
var entries = new List<CachedTelemetryEntry>(batch.Packets.Count);
|
||||
foreach (var packet in batch.Packets)
|
||||
{
|
||||
var audit = AuditEventMapper.FromDto(packet.AuditEvent);
|
||||
var siteCall = MapSiteCall(packet.Operational);
|
||||
entries.Add(new CachedTelemetryEntry(audit, siteCall));
|
||||
}
|
||||
|
||||
// Same throw-on-failure contract as IngestAuditEventsAsync. The reply
|
||||
// type is IngestCachedTelemetryReply (the central dual-write reply),
|
||||
// distinct from IngestAuditEventsReply.
|
||||
var reply = await _siteCommunicationActor
|
||||
.Ask<IngestCachedTelemetryReply>(new IngestCachedTelemetryCommand(entries), _askTimeout, ct)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
return ToAck(reply.AcceptedEventIds);
|
||||
}
|
||||
|
||||
private static IngestAck ToAck(IReadOnlyList<Guid> acceptedEventIds)
|
||||
{
|
||||
var ack = new IngestAck();
|
||||
foreach (var id in acceptedEventIds)
|
||||
{
|
||||
ack.AcceptedEventIds.Add(id.ToString());
|
||||
}
|
||||
return ack;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Translates a <see cref="SiteCallOperationalDto"/> into the
|
||||
/// <see cref="SiteCall"/> persistence entity. Mirrors
|
||||
/// <c>SiteStreamGrpcServer.MapSiteCallFromDto</c> — there is no shared
|
||||
/// mapper because that lives in <c>ScadaLink.Communication</c> as a private
|
||||
/// helper. <see cref="SiteCall.IngestedAtUtc"/> is a placeholder; the
|
||||
/// central <c>AuditLogIngestActor</c> overwrites it inside the dual-write
|
||||
/// transaction so the AuditLog and SiteCalls rows share one instant.
|
||||
/// </summary>
|
||||
private static SiteCall MapSiteCall(SiteCallOperationalDto dto) => new()
|
||||
{
|
||||
TrackedOperationId = TrackedOperationId.Parse(dto.TrackedOperationId),
|
||||
Channel = dto.Channel,
|
||||
Target = dto.Target,
|
||||
SourceSite = dto.SourceSite,
|
||||
Status = dto.Status,
|
||||
RetryCount = dto.RetryCount,
|
||||
LastError = string.IsNullOrEmpty(dto.LastError) ? null : dto.LastError,
|
||||
HttpStatus = dto.HttpStatus,
|
||||
CreatedAtUtc = DateTime.SpecifyKind(dto.CreatedAtUtc.ToDateTime(), DateTimeKind.Utc),
|
||||
UpdatedAtUtc = DateTime.SpecifyKind(dto.UpdatedAtUtc.ToDateTime(), DateTimeKind.Utc),
|
||||
TerminalAtUtc = dto.TerminalAtUtc is null
|
||||
? null
|
||||
: DateTime.SpecifyKind(dto.TerminalAtUtc.ToDateTime(), DateTimeKind.Utc),
|
||||
IngestedAtUtc = DateTime.UtcNow, // overwritten by AuditLogIngestActor
|
||||
};
|
||||
}
|
||||
@@ -2,6 +2,7 @@ using Akka.Actor;
|
||||
using Akka.Cluster.Tools.Client;
|
||||
using Akka.Event;
|
||||
using ScadaLink.Commons.Messages.Artifacts;
|
||||
using ScadaLink.Commons.Messages.Audit;
|
||||
using ScadaLink.Commons.Messages.DebugView;
|
||||
using ScadaLink.Commons.Messages.Deployment;
|
||||
using ScadaLink.Commons.Messages.Health;
|
||||
@@ -214,6 +215,54 @@ public class SiteCommunicationActor : ReceiveActor, IWithTimers
|
||||
new ClusterClient.Send("/user/central-communication", msg), Sender);
|
||||
});
|
||||
|
||||
// Audit Log (#23): forward a batch of site-local audit events to the
|
||||
// central cluster. The site SiteAuditTelemetryActor drains its SQLite
|
||||
// Pending queue through the ClusterClientSiteAuditClient, which Asks
|
||||
// this actor; the original Sender (that Ask) is passed as the
|
||||
// ClusterClient.Send sender so the IngestAuditEventsReply routes
|
||||
// straight back to the waiting Ask, not here. Mirrors NotificationSubmit.
|
||||
Receive<IngestAuditEventsCommand>(msg =>
|
||||
{
|
||||
if (_centralClient == null)
|
||||
{
|
||||
// No ClusterClient registered yet (e.g. central contact points
|
||||
// not configured, or registration not yet completed). Faulting
|
||||
// the Ask makes the SiteAuditTelemetryActor drain loop treat
|
||||
// this as transient and keep the rows Pending for the next tick.
|
||||
_log.Warning(
|
||||
"Cannot forward IngestAuditEventsCommand ({0} events) — no central ClusterClient registered",
|
||||
msg.Events.Count);
|
||||
Sender.Tell(new Status.Failure(
|
||||
new InvalidOperationException("Central ClusterClient not registered")));
|
||||
return;
|
||||
}
|
||||
|
||||
_log.Debug("Forwarding IngestAuditEventsCommand ({0} events) to central", msg.Events.Count);
|
||||
_centralClient.Tell(
|
||||
new ClusterClient.Send("/user/central-communication", msg), Sender);
|
||||
});
|
||||
|
||||
// Audit Log (#23) M3: forward a batch of combined cached-call telemetry
|
||||
// packets to the central cluster. Same forward + reply-routing pattern
|
||||
// as IngestAuditEventsCommand; central replies with an
|
||||
// IngestCachedTelemetryReply.
|
||||
Receive<IngestCachedTelemetryCommand>(msg =>
|
||||
{
|
||||
if (_centralClient == null)
|
||||
{
|
||||
_log.Warning(
|
||||
"Cannot forward IngestCachedTelemetryCommand ({0} entries) — no central ClusterClient registered",
|
||||
msg.Entries.Count);
|
||||
Sender.Tell(new Status.Failure(
|
||||
new InvalidOperationException("Central ClusterClient not registered")));
|
||||
return;
|
||||
}
|
||||
|
||||
_log.Debug("Forwarding IngestCachedTelemetryCommand ({0} entries) to central", msg.Entries.Count);
|
||||
_centralClient.Tell(
|
||||
new ClusterClient.Send("/user/central-communication", msg), Sender);
|
||||
});
|
||||
|
||||
// Internal: send heartbeat tick
|
||||
Receive<SendHeartbeat>(_ => SendHeartbeatToCentral());
|
||||
|
||||
|
||||
@@ -668,8 +668,18 @@ akka {{
|
||||
.GetRequiredService<IOptions<ScadaLink.AuditLog.Site.Telemetry.SiteAuditTelemetryOptions>>();
|
||||
var siteAuditQueue = _serviceProvider
|
||||
.GetRequiredService<ScadaLink.Commons.Interfaces.Services.ISiteAuditQueue>();
|
||||
var siteAuditClient = _serviceProvider
|
||||
.GetRequiredService<ScadaLink.AuditLog.Site.Telemetry.ISiteStreamAuditClient>();
|
||||
// Audit Log (#23) Task 2 follow-up: the production site→central audit
|
||||
// push uses the ClusterClient transport via the SiteCommunicationActor,
|
||||
// not the DI-resolved NoOpSiteStreamAuditClient. The NoOp default stays
|
||||
// correct for central/test composition roots (no SiteCommunicationActor);
|
||||
// a site role wires the real ClusterClient-based client here so the
|
||||
// SQLite Pending backlog actually drains to central. The forward Ask
|
||||
// reuses NotificationForwardTimeout — the same site→central command
|
||||
// forward bound notifications already use over this transport.
|
||||
var siteAuditClient = (ScadaLink.AuditLog.Site.Telemetry.ISiteStreamAuditClient)
|
||||
new ScadaLink.AuditLog.Site.Telemetry.ClusterClientSiteAuditClient(
|
||||
siteCommActor,
|
||||
_communicationOptions.NotificationForwardTimeout);
|
||||
var siteAuditLogger = _serviceProvider.GetRequiredService<ILoggerFactory>()
|
||||
.CreateLogger<ScadaLink.AuditLog.Site.Telemetry.SiteAuditTelemetryActor>();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user