feat(auditlog): SiteAuditTelemetryActor + ISiteStreamAuditClient seam (#23)
This commit is contained in:
@@ -8,6 +8,9 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<!-- Bundle D D1: SiteAuditTelemetryActor + (D2) AuditLogIngestActor live
|
||||
in this project, so Akka is an explicit dependency. -->
|
||||
<PackageReference Include="Akka" />
|
||||
<PackageReference Include="Microsoft.Data.Sqlite" />
|
||||
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
|
||||
@@ -2,6 +2,7 @@ using System.Threading.Channels;
|
||||
using Microsoft.Data.Sqlite;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.AuditLog.Site.Telemetry;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
@@ -28,7 +29,7 @@ namespace ScadaLink.AuditLog.Site;
|
||||
/// the site SQLite schema — central stamps it on ingest.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public class SqliteAuditWriter : IAuditWriter, IAsyncDisposable, IDisposable
|
||||
public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable, IDisposable
|
||||
{
|
||||
// Microsoft.Data.Sqlite reports a generic SQLITE_CONSTRAINT (error code 19)
|
||||
// on a PRIMARY KEY violation; the extended subcode 1555 (SQLITE_CONSTRAINT_PRIMARYKEY)
|
||||
|
||||
34
src/ScadaLink.AuditLog/Site/Telemetry/ISiteAuditQueue.cs
Normal file
34
src/ScadaLink.AuditLog/Site/Telemetry/ISiteAuditQueue.cs
Normal file
@@ -0,0 +1,34 @@
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
|
||||
namespace ScadaLink.AuditLog.Site.Telemetry;
|
||||
|
||||
/// <summary>
|
||||
/// Site-local audit-log queue surface consumed by <see cref="SiteAuditTelemetryActor"/>.
|
||||
/// Extracted from <see cref="SqliteAuditWriter"/> so the telemetry actor can be
|
||||
/// unit-tested against a stub without touching SQLite. <see cref="SqliteAuditWriter"/>
|
||||
/// implements this interface; production wiring injects the same instance.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Only the two methods the drain loop needs are exposed — the hot-path
|
||||
/// <c>WriteAsync</c> stays on <see cref="Commons.Interfaces.Services.IAuditWriter"/>
|
||||
/// (script-thread surface), separated by concern from the
|
||||
/// telemetry-actor surface so each side can be mocked independently.
|
||||
/// </remarks>
|
||||
public interface ISiteAuditQueue
|
||||
{
|
||||
/// <summary>
|
||||
/// Returns up to <paramref name="limit"/> rows currently in
|
||||
/// <see cref="ScadaLink.Commons.Types.Enums.AuditForwardState.Pending"/>,
|
||||
/// oldest first. Idempotent — repeated calls before
|
||||
/// <see cref="MarkForwardedAsync"/> will yield the same rows again.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<AuditEvent>> ReadPendingAsync(int limit, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Flips the supplied EventIds from
|
||||
/// <see cref="ScadaLink.Commons.Types.Enums.AuditForwardState.Pending"/> to
|
||||
/// <see cref="ScadaLink.Commons.Types.Enums.AuditForwardState.Forwarded"/>.
|
||||
/// Non-existent or already-forwarded ids are silent no-ops.
|
||||
/// </summary>
|
||||
Task MarkForwardedAsync(IReadOnlyList<Guid> eventIds, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
using ScadaLink.Communication.Grpc;
|
||||
|
||||
namespace ScadaLink.AuditLog.Site.Telemetry;
|
||||
|
||||
/// <summary>
|
||||
/// Mockable abstraction over the central site-stream gRPC client surface that
|
||||
/// <see cref="SiteAuditTelemetryActor"/> uses to push <see cref="AuditEventBatch"/>
|
||||
/// payloads. The production implementation (added in Bundle E host wiring)
|
||||
/// wraps the auto-generated <c>SiteStreamService.SiteStreamServiceClient</c>;
|
||||
/// unit tests substitute via NSubstitute against this interface so the actor
|
||||
/// never needs a live gRPC channel.
|
||||
/// </summary>
|
||||
public interface ISiteStreamAuditClient
|
||||
{
|
||||
/// <summary>
|
||||
/// Pushes <paramref name="batch"/> to the central <c>IngestAuditEvents</c>
|
||||
/// RPC. The returned <see cref="IngestAck"/> carries the
|
||||
/// <c>accepted_event_ids</c> the actor will flip to
|
||||
/// <see cref="ScadaLink.Commons.Types.Enums.AuditForwardState.Forwarded"/>
|
||||
/// in the site SQLite queue.
|
||||
/// </summary>
|
||||
Task<IngestAck> IngestAuditEventsAsync(AuditEventBatch batch, CancellationToken ct);
|
||||
}
|
||||
179
src/ScadaLink.AuditLog/Site/Telemetry/SiteAuditTelemetryActor.cs
Normal file
179
src/ScadaLink.AuditLog/Site/Telemetry/SiteAuditTelemetryActor.cs
Normal file
@@ -0,0 +1,179 @@
|
||||
using Akka.Actor;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.AuditLog.Telemetry;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Communication.Grpc;
|
||||
|
||||
namespace ScadaLink.AuditLog.Site.Telemetry;
|
||||
|
||||
/// <summary>
|
||||
/// Site-side actor that drains the local SQLite audit queue and pushes Pending
|
||||
/// rows to central via the <c>IngestAuditEvents</c> gRPC RPC. On a successful
|
||||
/// ack the matching EventIds flip to
|
||||
/// <see cref="ScadaLink.Commons.Types.Enums.AuditForwardState.Forwarded"/>; on
|
||||
/// a gRPC failure the rows stay Pending and the next drain retries.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The drain self-tick is a private <c>Drain</c> message scheduled via the
|
||||
/// actor system scheduler. The cadence is options-driven: <c>BusyIntervalSeconds</c>
|
||||
/// when the previous drain found rows (or faulted — we want quick recovery),
|
||||
/// <c>IdleIntervalSeconds</c> when the queue was empty.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Both collaborators are injected as interfaces (<see cref="ISiteAuditQueue"/>
|
||||
/// and <see cref="ISiteStreamAuditClient"/>) so unit tests substitute with
|
||||
/// NSubstitute and never touch real SQLite or gRPC.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Per Bundle D's brief, audit-write paths must be fail-safe — a thrown
|
||||
/// exception inside the actor MUST NOT crash it. The Drain handler wraps the
|
||||
/// pipeline in a top-level try/catch that logs and re-schedules, and the
|
||||
/// actor's <see cref="SupervisorStrategy"/> defaults to
|
||||
/// <see cref="Akka.Actor.SupervisorStrategy.DefaultStrategy"/>'s Restart for
|
||||
/// child actors — but this actor has no children, so the catch is what matters.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public class SiteAuditTelemetryActor : ReceiveActor
|
||||
{
|
||||
private readonly ISiteAuditQueue _queue;
|
||||
private readonly ISiteStreamAuditClient _client;
|
||||
private readonly SiteAuditTelemetryOptions _options;
|
||||
private readonly ILogger<SiteAuditTelemetryActor> _logger;
|
||||
private ICancelable? _pendingTick;
|
||||
|
||||
public SiteAuditTelemetryActor(
|
||||
ISiteAuditQueue queue,
|
||||
ISiteStreamAuditClient client,
|
||||
IOptions<SiteAuditTelemetryOptions> options,
|
||||
ILogger<SiteAuditTelemetryActor> logger)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(queue);
|
||||
ArgumentNullException.ThrowIfNull(client);
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
ArgumentNullException.ThrowIfNull(logger);
|
||||
|
||||
_queue = queue;
|
||||
_client = client;
|
||||
_options = options.Value;
|
||||
_logger = logger;
|
||||
|
||||
ReceiveAsync<Drain>(_ => OnDrainAsync());
|
||||
}
|
||||
|
||||
protected override void PreStart()
|
||||
{
|
||||
base.PreStart();
|
||||
// Initial tick fires on the busy interval so the actor starts polling
|
||||
// soon after host startup. A subsequent empty drain will move to the
|
||||
// idle interval naturally.
|
||||
ScheduleNext(TimeSpan.FromSeconds(_options.BusyIntervalSeconds));
|
||||
}
|
||||
|
||||
protected override void PostStop()
|
||||
{
|
||||
_pendingTick?.Cancel();
|
||||
base.PostStop();
|
||||
}
|
||||
|
||||
private async Task OnDrainAsync()
|
||||
{
|
||||
var nextDelay = TimeSpan.FromSeconds(_options.BusyIntervalSeconds);
|
||||
try
|
||||
{
|
||||
var pending = await _queue.ReadPendingAsync(_options.BatchSize, CancellationToken.None)
|
||||
.ConfigureAwait(false);
|
||||
if (pending.Count == 0)
|
||||
{
|
||||
// No rows — settle into the idle cadence until the next write
|
||||
// bumps us back into the busy cadence.
|
||||
nextDelay = TimeSpan.FromSeconds(_options.IdleIntervalSeconds);
|
||||
return;
|
||||
}
|
||||
|
||||
var batch = BuildBatch(pending);
|
||||
|
||||
IngestAck ack;
|
||||
try
|
||||
{
|
||||
ack = await _client.IngestAuditEventsAsync(batch, CancellationToken.None)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// gRPC fault — leave the rows in Pending so the next drain
|
||||
// retries. Bundle D's brief: "On gRPC exception (any), log
|
||||
// Warning, schedule next Drain in BusyIntervalSeconds."
|
||||
_logger.LogWarning(ex,
|
||||
"IngestAuditEvents push failed for {Count} pending events; will retry next drain.",
|
||||
pending.Count);
|
||||
return;
|
||||
}
|
||||
|
||||
var acceptedIds = ParseAcceptedIds(ack);
|
||||
if (acceptedIds.Count > 0)
|
||||
{
|
||||
await _queue.MarkForwardedAsync(acceptedIds, CancellationToken.None)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Catch-all so a SQLite hiccup or mapper bug never crashes the
|
||||
// actor. The next tick is still scheduled in the finally block.
|
||||
_logger.LogError(ex, "Unexpected error during audit-log telemetry drain.");
|
||||
}
|
||||
finally
|
||||
{
|
||||
ScheduleNext(nextDelay);
|
||||
}
|
||||
}
|
||||
|
||||
private static AuditEventBatch BuildBatch(IReadOnlyList<AuditEvent> events)
|
||||
{
|
||||
var batch = new AuditEventBatch();
|
||||
foreach (var e in events)
|
||||
{
|
||||
batch.Events.Add(AuditEventMapper.ToDto(e));
|
||||
}
|
||||
return batch;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<Guid> ParseAcceptedIds(IngestAck ack)
|
||||
{
|
||||
if (ack.AcceptedEventIds.Count == 0)
|
||||
{
|
||||
return Array.Empty<Guid>();
|
||||
}
|
||||
|
||||
var list = new List<Guid>(ack.AcceptedEventIds.Count);
|
||||
foreach (var raw in ack.AcceptedEventIds)
|
||||
{
|
||||
if (Guid.TryParse(raw, out var id))
|
||||
{
|
||||
list.Add(id);
|
||||
}
|
||||
// Malformed ids are ignored — central should never emit them, but
|
||||
// we refuse to crash the actor over a bad string.
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
private void ScheduleNext(TimeSpan delay)
|
||||
{
|
||||
_pendingTick?.Cancel();
|
||||
_pendingTick = Context.System.Scheduler.ScheduleTellOnceCancelable(
|
||||
delay,
|
||||
Self,
|
||||
Drain.Instance,
|
||||
Self);
|
||||
}
|
||||
|
||||
/// <summary>Self-tick message that triggers a drain cycle.</summary>
|
||||
private sealed class Drain
|
||||
{
|
||||
public static readonly Drain Instance = new();
|
||||
private Drain() { }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
namespace ScadaLink.AuditLog.Site.Telemetry;
|
||||
|
||||
/// <summary>
|
||||
/// Tuning knobs for the site-side <see cref="SiteAuditTelemetryActor"/> drain
|
||||
/// loop. Defaults mirror Bundle D's plan: drain every 5 s while rows are
|
||||
/// flowing (busy), every 30 s when the queue is empty (idle).
|
||||
/// </summary>
|
||||
public sealed class SiteAuditTelemetryOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Maximum number of <see cref="ScadaLink.Commons.Entities.Audit.AuditEvent"/>
|
||||
/// rows read from the site SQLite queue and pushed in a single gRPC batch.
|
||||
/// </summary>
|
||||
public int BatchSize { get; set; } = 256;
|
||||
|
||||
/// <summary>
|
||||
/// Delay between drains when the previous drain found at least one Pending
|
||||
/// row OR the previous push faulted. Re-drain quickly to keep telemetry
|
||||
/// flowing and to retry transient gRPC errors.
|
||||
/// </summary>
|
||||
public int BusyIntervalSeconds { get; set; } = 5;
|
||||
|
||||
/// <summary>
|
||||
/// Delay between drains when the previous drain found no Pending rows.
|
||||
/// Longer interval avoids hammering an idle SQLite + gRPC channel.
|
||||
/// </summary>
|
||||
public int IdleIntervalSeconds { get; set; } = 30;
|
||||
}
|
||||
Reference in New Issue
Block a user