feat(audit): close AuditLog-001 — wire combined-telemetry dual-write transport
Closes the last open code-review finding. The unreachable
IngestCachedTelemetryAsync path now carries production cached-call
lifecycle traffic, delivering the design's "AuditLog + SiteCalls in one
MS SQL transaction" guarantee. Before this commit, the SiteCalls
operational half had NO production transport at all — central's
SiteCallAuditActor.OnUpsertAsync had zero producers, so cached-call
operational state never reached the central mirror.
Site-side partition (so neither path double-emits):
- ISiteAuditQueue.ReadPendingCachedTelemetryAsync — new method returning
rows where Kind ∈ {CachedSubmit, ApiCallCached, DbWriteCached,
CachedResolve} AND ForwardState = Pending.
- ISiteAuditQueue.ReadPendingAsync — XML doc updated, SQLite impl now
filters Kind NOT IN the cached set so cached rows no longer ride the
audit-only drain.
New cached-drain in SiteAuditTelemetryActor:
- Optional IOperationTrackingStore? ctor param (null on central
composition roots — the cached scheduler is never armed there).
- Independent CachedDrain message + scheduler tick parallel to the
existing Drain — a stall on one path can't block the other; shared
lifecycle CTS gates both.
- OnCachedDrainAsync: reads cached audit rows, joins each with its
matching SiteCallOperational snapshot via CorrelationId →
TrackedOperationId from the tracking store, builds CachedTelemetryBatch,
pushes via IngestCachedTelemetryAsync, marks ack'd rows Forwarded.
- Orphan rows (no tracking snapshot, thrown tracking-store call,
missing CorrelationId) logged at Warning + skipped — they stay
Pending so reconciliation/retry picks them up later. Best-effort
contract preserved.
Central side: AuditLogIngestActor.OnCachedTelemetryAsync was already
implemented (M3 Bundle G dead code today, alive after this commit) —
performs InsertIfNotExists for AuditLog + UpsertAsync for SiteCalls
inside a BeginTransactionAsync. The handler is idempotent on EventId,
so any duplicate arrivals from concurrent push + reconciliation are
silent no-ops.
Composition root: AkkaHostedService now resolves IOperationTrackingStore
via GetService<>() (site-only) and threads it through the actor's
Props.Create.
Tests added (+3 in SiteAuditTelemetryActorTests):
- Cached rows route through the new transport, not the audit-only drain.
- Orphan cached row (no tracking match) is logged + skipped, drain
doesn't crash.
- Ordinary audit rows still flow through the audit-only drain unchanged.
- ParentExecutionIdCorrelationTests now unions both queues to assert
all expected Kinds remain covered after the partition.
Build clean; AuditLog.Tests 250/251 (the 1 fail is the pre-existing
date-sensitive PartitionPurgeTests integration flake explicitly accepted
across the session); SiteRuntime.Tests 302/302.
README regenerated: 0 pending of 481 total.
Session-final totals: 136 of 136 originally-open Theme findings closed
across 11 commits (10 themed batches + this architectural close).
This commit is contained in:
@@ -495,14 +495,20 @@ public class ParentExecutionIdCorrelationTests : TestKit, IClassFixture<MsSqlMig
|
||||
async () =>
|
||||
{
|
||||
var pending = await sqliteWriter.ReadPendingAsync(256);
|
||||
// AuditLog-001: ReadPendingAsync now excludes the cached-lifecycle
|
||||
// kinds (they ride the combined-telemetry drain), so we union
|
||||
// them in via the dedicated read surface to keep the durability
|
||||
// assertion covering EVERY expected Kind.
|
||||
var pendingCached = await sqliteWriter.ReadPendingCachedTelemetryAsync(256);
|
||||
var forwarded = await sqliteWriter.ReadForwardedAsync(256);
|
||||
var kinds = pending.Concat(forwarded).Select(r => r.Kind).ToHashSet();
|
||||
var kinds = pending.Concat(pendingCached).Concat(forwarded)
|
||||
.Select(r => r.Kind).ToHashSet();
|
||||
var missing = expectedKinds.Where(k => !kinds.Contains(k)).ToList();
|
||||
Assert.True(
|
||||
missing.Count == 0,
|
||||
"Expected every routed-run audit Kind durably in SQLite; missing: "
|
||||
+ string.Join(", ", missing)
|
||||
+ $" (saw {pending.Count} Pending + {forwarded.Count} Forwarded).");
|
||||
+ $" (saw {pending.Count} Pending + {pendingCached.Count} PendingCached + {forwarded.Count} Forwarded).");
|
||||
},
|
||||
TimeSpan.FromSeconds(30),
|
||||
TimeSpan.FromMilliseconds(50));
|
||||
|
||||
@@ -7,7 +7,9 @@ using NSubstitute;
|
||||
using NSubstitute.ExceptionExtensions;
|
||||
using ScadaLink.AuditLog.Site.Telemetry;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Interfaces;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using ScadaLink.Communication.Grpc;
|
||||
|
||||
@@ -24,6 +26,7 @@ public class SiteAuditTelemetryActorTests : TestKit
|
||||
{
|
||||
private readonly ISiteAuditQueue _queue = Substitute.For<ISiteAuditQueue>();
|
||||
private readonly ISiteStreamAuditClient _client = Substitute.For<ISiteStreamAuditClient>();
|
||||
private readonly IOperationTrackingStore _trackingStore = Substitute.For<IOperationTrackingStore>();
|
||||
|
||||
/// <summary>
|
||||
/// Fast options so tests don't stall waiting for the scheduler. 1s busy /
|
||||
@@ -46,7 +49,22 @@ public class SiteAuditTelemetryActorTests : TestKit
|
||||
_queue,
|
||||
_client,
|
||||
options ?? Opts(),
|
||||
NullLogger<SiteAuditTelemetryActor>.Instance)));
|
||||
NullLogger<SiteAuditTelemetryActor>.Instance,
|
||||
(IOperationTrackingStore?)null)));
|
||||
|
||||
/// <summary>
|
||||
/// AuditLog-001: builds an actor with the optional
|
||||
/// <see cref="IOperationTrackingStore"/> wired in so the cached-drain
|
||||
/// scheduler is armed alongside the audit-only drain. Used by the new
|
||||
/// cached-drain regression tests below.
|
||||
/// </summary>
|
||||
private IActorRef CreateActorWithCachedDrain(IOptions<SiteAuditTelemetryOptions>? options = null) =>
|
||||
Sys.ActorOf(Props.Create(() => new SiteAuditTelemetryActor(
|
||||
_queue,
|
||||
_client,
|
||||
options ?? Opts(),
|
||||
NullLogger<SiteAuditTelemetryActor>.Instance,
|
||||
(IOperationTrackingStore?)_trackingStore)));
|
||||
|
||||
private static AuditEvent NewEvent(Guid? id = null) => new()
|
||||
{
|
||||
@@ -233,4 +251,206 @@ public class SiteAuditTelemetryActorTests : TestKit
|
||||
Arg.Is<IReadOnlyList<Guid>>(g => g.Count == 3 && g.ToHashSet().SetEquals(ackedSet)),
|
||||
Arg.Any<CancellationToken>());
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────
|
||||
// AuditLog-001: combined-telemetry cached-drain regression tests. Verify
|
||||
// that the production wiring of the previously-unreachable cached transport
|
||||
// routes cached rows through ReadPendingCachedTelemetryAsync +
|
||||
// IngestCachedTelemetryAsync (and NOT IngestAuditEventsAsync), and that
|
||||
// orphaned audit rows (no tracking snapshot) are logged + skipped rather
|
||||
// than crashing the drain.
|
||||
// ────────────────────────────────────────────────────────────────────────
|
||||
|
||||
private static AuditEvent NewCachedEvent(
|
||||
AuditKind kind = AuditKind.CachedSubmit,
|
||||
Guid? eventId = null,
|
||||
Guid? correlationId = null,
|
||||
string sourceSiteId = "site-1") => new()
|
||||
{
|
||||
EventId = eventId ?? Guid.NewGuid(),
|
||||
OccurredAtUtc = new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc),
|
||||
Channel = AuditChannel.ApiOutbound,
|
||||
Kind = kind,
|
||||
Status = AuditStatus.Submitted,
|
||||
SourceSiteId = sourceSiteId,
|
||||
Target = "ERP.GetOrder",
|
||||
CorrelationId = correlationId ?? Guid.NewGuid(),
|
||||
ForwardState = AuditForwardState.Pending,
|
||||
};
|
||||
|
||||
private static TrackingStatusSnapshot NewSnapshot(
|
||||
TrackedOperationId id,
|
||||
string status = "Submitted",
|
||||
int retryCount = 0) => new(
|
||||
Id: id,
|
||||
Kind: nameof(AuditKind.ApiCallCached),
|
||||
TargetSummary: "ERP.GetOrder",
|
||||
Status: status,
|
||||
RetryCount: retryCount,
|
||||
LastError: null,
|
||||
HttpStatus: null,
|
||||
CreatedAtUtc: new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc),
|
||||
UpdatedAtUtc: new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc),
|
||||
TerminalAtUtc: null,
|
||||
SourceInstanceId: "instance-1",
|
||||
SourceScript: "script-1",
|
||||
SourceNode: "node-a");
|
||||
|
||||
[Fact]
|
||||
public async Task CachedDrain_CachedRows_RouteThrough_IngestCachedTelemetry_NotIngestAuditEvents()
|
||||
{
|
||||
// Arrange — three cached audit rows on the cached queue, each with a
|
||||
// matching tracking snapshot. The audit-only queue is empty (those
|
||||
// rows are excluded by ReadPendingAsync after AuditLog-001).
|
||||
var cachedRows = new[]
|
||||
{
|
||||
NewCachedEvent(AuditKind.CachedSubmit),
|
||||
NewCachedEvent(AuditKind.ApiCallCached),
|
||||
NewCachedEvent(AuditKind.CachedResolve),
|
||||
};
|
||||
|
||||
_queue.ReadPendingAsync(Arg.Any<int>(), Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IReadOnlyList<AuditEvent>>(Array.Empty<AuditEvent>()));
|
||||
_queue.ReadPendingCachedTelemetryAsync(Arg.Any<int>(), Arg.Any<CancellationToken>())
|
||||
.Returns(
|
||||
Task.FromResult<IReadOnlyList<AuditEvent>>(cachedRows),
|
||||
Task.FromResult<IReadOnlyList<AuditEvent>>(Array.Empty<AuditEvent>()));
|
||||
|
||||
foreach (var row in cachedRows)
|
||||
{
|
||||
var tid = new TrackedOperationId(row.CorrelationId!.Value);
|
||||
_trackingStore.GetStatusAsync(tid, Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<TrackingStatusSnapshot?>(NewSnapshot(tid)));
|
||||
}
|
||||
|
||||
CachedTelemetryBatch? capturedBatch = null;
|
||||
_client.IngestCachedTelemetryAsync(Arg.Any<CachedTelemetryBatch>(), Arg.Any<CancellationToken>())
|
||||
.Returns(call =>
|
||||
{
|
||||
capturedBatch = call.Arg<CachedTelemetryBatch>();
|
||||
var ack = new IngestAck();
|
||||
foreach (var packet in capturedBatch.Packets)
|
||||
{
|
||||
ack.AcceptedEventIds.Add(packet.AuditEvent.EventId);
|
||||
}
|
||||
return Task.FromResult(ack);
|
||||
});
|
||||
|
||||
// Act
|
||||
CreateActorWithCachedDrain();
|
||||
|
||||
// Assert — exactly one IngestCachedTelemetryAsync push containing all
|
||||
// three packets, and zero IngestAuditEventsAsync pushes (the audit-only
|
||||
// drain saw an empty queue).
|
||||
await AwaitAssertAsync(async () =>
|
||||
{
|
||||
await _client.Received(1).IngestCachedTelemetryAsync(
|
||||
Arg.Any<CachedTelemetryBatch>(), Arg.Any<CancellationToken>());
|
||||
await _queue.Received(1).MarkForwardedAsync(
|
||||
Arg.Is<IReadOnlyList<Guid>>(g => g.Count == 3), Arg.Any<CancellationToken>());
|
||||
}, TimeSpan.FromSeconds(5));
|
||||
|
||||
Assert.NotNull(capturedBatch);
|
||||
Assert.Equal(3, capturedBatch!.Packets.Count);
|
||||
|
||||
await _client.DidNotReceiveWithAnyArgs().IngestAuditEventsAsync(default!, default);
|
||||
|
||||
var emittedEventIds = capturedBatch.Packets
|
||||
.Select(p => Guid.Parse(p.AuditEvent.EventId))
|
||||
.ToHashSet();
|
||||
var expectedIds = cachedRows.Select(r => r.EventId).ToHashSet();
|
||||
Assert.Equal(expectedIds, emittedEventIds);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task CachedDrain_OrphanRow_NoTrackingSnapshot_IsSkipped_DoesNotCrash()
|
||||
{
|
||||
// Arrange — two cached audit rows: one with a tracking snapshot, one
|
||||
// orphaned (the tracking store returns null). The orphaned row must be
|
||||
// skipped without aborting the batch — the valid row still flows.
|
||||
var orphan = NewCachedEvent(AuditKind.CachedSubmit);
|
||||
var valid = NewCachedEvent(AuditKind.CachedResolve);
|
||||
|
||||
_queue.ReadPendingAsync(Arg.Any<int>(), Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IReadOnlyList<AuditEvent>>(Array.Empty<AuditEvent>()));
|
||||
_queue.ReadPendingCachedTelemetryAsync(Arg.Any<int>(), Arg.Any<CancellationToken>())
|
||||
.Returns(
|
||||
Task.FromResult<IReadOnlyList<AuditEvent>>(new[] { orphan, valid }),
|
||||
Task.FromResult<IReadOnlyList<AuditEvent>>(Array.Empty<AuditEvent>()));
|
||||
|
||||
// orphan: tracking returns null
|
||||
_trackingStore.GetStatusAsync(
|
||||
new TrackedOperationId(orphan.CorrelationId!.Value),
|
||||
Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<TrackingStatusSnapshot?>(null));
|
||||
// valid: tracking returns a snapshot
|
||||
var validTid = new TrackedOperationId(valid.CorrelationId!.Value);
|
||||
_trackingStore.GetStatusAsync(validTid, Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<TrackingStatusSnapshot?>(NewSnapshot(validTid, "Delivered")));
|
||||
|
||||
CachedTelemetryBatch? capturedBatch = null;
|
||||
_client.IngestCachedTelemetryAsync(Arg.Any<CachedTelemetryBatch>(), Arg.Any<CancellationToken>())
|
||||
.Returns(call =>
|
||||
{
|
||||
capturedBatch = call.Arg<CachedTelemetryBatch>();
|
||||
var ack = new IngestAck();
|
||||
foreach (var packet in capturedBatch.Packets)
|
||||
{
|
||||
ack.AcceptedEventIds.Add(packet.AuditEvent.EventId);
|
||||
}
|
||||
return Task.FromResult(ack);
|
||||
});
|
||||
|
||||
// Act
|
||||
CreateActorWithCachedDrain();
|
||||
|
||||
// Assert — exactly one push containing ONLY the valid row; the orphan
|
||||
// is skipped and stays Pending (not in MarkForwardedAsync's id list).
|
||||
await AwaitAssertAsync(async () =>
|
||||
{
|
||||
await _client.Received(1).IngestCachedTelemetryAsync(
|
||||
Arg.Any<CachedTelemetryBatch>(), Arg.Any<CancellationToken>());
|
||||
}, TimeSpan.FromSeconds(5));
|
||||
|
||||
Assert.NotNull(capturedBatch);
|
||||
Assert.Single(capturedBatch!.Packets);
|
||||
Assert.Equal(valid.EventId.ToString(), capturedBatch.Packets[0].AuditEvent.EventId);
|
||||
|
||||
await _queue.Received(1).MarkForwardedAsync(
|
||||
Arg.Is<IReadOnlyList<Guid>>(g => g.Count == 1 && g[0] == valid.EventId),
|
||||
Arg.Any<CancellationToken>());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task AuditOnlyDrain_StillFlows_When_CachedDrain_IsDisabled()
|
||||
{
|
||||
// Arrange — ordinary (non-cached) audit rows on the audit-only queue;
|
||||
// the actor is constructed WITHOUT a tracking store so the cached
|
||||
// scheduler is never armed. Regression guard against the audit-only
|
||||
// drain regressing during the AuditLog-001 refactor.
|
||||
var rows = Enumerable.Range(0, 3).Select(_ => NewEvent()).ToList();
|
||||
_queue.ReadPendingAsync(Arg.Any<int>(), Arg.Any<CancellationToken>())
|
||||
.Returns(
|
||||
Task.FromResult<IReadOnlyList<AuditEvent>>(rows),
|
||||
Task.FromResult<IReadOnlyList<AuditEvent>>(Array.Empty<AuditEvent>()));
|
||||
|
||||
_client.IngestAuditEventsAsync(Arg.Any<AuditEventBatch>(), Arg.Any<CancellationToken>())
|
||||
.Returns(_ => Task.FromResult(AckAll(rows)));
|
||||
|
||||
// Act — note: CreateActor (no tracking store), not CreateActorWithCachedDrain.
|
||||
CreateActor();
|
||||
|
||||
// Assert — audit-only drain flows normally; the cached client is
|
||||
// never called and ReadPendingCachedTelemetryAsync is never queried.
|
||||
await AwaitAssertAsync(async () =>
|
||||
{
|
||||
await _client.Received(1).IngestAuditEventsAsync(
|
||||
Arg.Any<AuditEventBatch>(), Arg.Any<CancellationToken>());
|
||||
await _queue.Received(1).MarkForwardedAsync(
|
||||
Arg.Is<IReadOnlyList<Guid>>(g => g.Count == 3), Arg.Any<CancellationToken>());
|
||||
}, TimeSpan.FromSeconds(5));
|
||||
|
||||
await _client.DidNotReceiveWithAnyArgs().IngestCachedTelemetryAsync(default!, default);
|
||||
await _queue.DidNotReceiveWithAnyArgs().ReadPendingCachedTelemetryAsync(default, default);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user