feat(auditlog): combined telemetry dual-write transaction (#23 M3)
This commit is contained in:
@@ -4,6 +4,7 @@ using Microsoft.Extensions.Logging;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Messages.Audit;
|
||||
using ScadaLink.ConfigurationDatabase;
|
||||
|
||||
namespace ScadaLink.AuditLog.Central;
|
||||
|
||||
@@ -61,6 +62,11 @@ public class AuditLogIngestActor : ReceiveActor
|
||||
_logger = logger;
|
||||
|
||||
ReceiveAsync<IngestAuditEventsCommand>(OnIngestAsync);
|
||||
// The single-repository test ctor cannot service the M3 dual-write —
|
||||
// it has no SiteCalls repo and no DbContext. The handler still
|
||||
// registers (so callers don't dead-letter) but replies empty so the
|
||||
// test surface stays explicit about what this ctor supports.
|
||||
ReceiveAsync<IngestCachedTelemetryCommand>(OnCachedTelemetryWithoutDualWriteAsync);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -81,6 +87,7 @@ public class AuditLogIngestActor : ReceiveActor
|
||||
_logger = logger;
|
||||
|
||||
ReceiveAsync<IngestAuditEventsCommand>(OnIngestAsync);
|
||||
ReceiveAsync<IngestCachedTelemetryCommand>(OnCachedTelemetryAsync);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -150,4 +157,98 @@ public class AuditLogIngestActor : ReceiveActor
|
||||
|
||||
replyTo.Tell(new IngestAuditEventsReply(accepted));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// M3 dual-write handler. For every <see cref="CachedTelemetryEntry"/> the
|
||||
/// actor opens a fresh MS SQL transaction, inserts the AuditLog row
|
||||
/// idempotently AND upserts the SiteCalls row monotonically. Both succeed
|
||||
/// or both roll back, so the audit and operational mirrors never drift
|
||||
/// mid-row. The IngestedAtUtc stamp is unified between the two rows so a
|
||||
/// downstream join lines up cleanly.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per-entry isolation — one entry's failed transaction does NOT abort
|
||||
/// other entries in the batch (each gets its own
|
||||
/// <see cref="Microsoft.EntityFrameworkCore.RelationalDatabaseFacadeExtensions.BeginTransactionAsync"/>
|
||||
/// scope and a try/catch around it). Audit-write failure NEVER aborts the
|
||||
/// user-facing action — the site keeps the row Pending and retries on the
|
||||
/// next drain.
|
||||
/// </remarks>
|
||||
private async Task OnCachedTelemetryAsync(IngestCachedTelemetryCommand cmd)
|
||||
{
|
||||
var replyTo = Sender;
|
||||
var accepted = new List<Guid>(cmd.Entries.Count);
|
||||
|
||||
try
|
||||
{
|
||||
await using var scope = _serviceProvider!.CreateAsyncScope();
|
||||
var auditRepo = scope.ServiceProvider.GetRequiredService<IAuditLogRepository>();
|
||||
var siteCallRepo = scope.ServiceProvider.GetRequiredService<ISiteCallAuditRepository>();
|
||||
var dbContext = scope.ServiceProvider.GetRequiredService<ScadaLinkDbContext>();
|
||||
|
||||
foreach (var entry in cmd.Entries)
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var tx = await dbContext.Database
|
||||
.BeginTransactionAsync()
|
||||
.ConfigureAwait(false);
|
||||
|
||||
// Stamp IngestedAtUtc on both rows from a single
|
||||
// central-side instant so a join on the two tables sees
|
||||
// matching timestamps (debugging convenience, not a
|
||||
// correctness invariant).
|
||||
var ingestedAt = DateTime.UtcNow;
|
||||
var auditStamped = entry.Audit with { IngestedAtUtc = ingestedAt };
|
||||
var siteCallStamped = entry.SiteCall with { IngestedAtUtc = ingestedAt };
|
||||
|
||||
await auditRepo.InsertIfNotExistsAsync(auditStamped)
|
||||
.ConfigureAwait(false);
|
||||
await siteCallRepo.UpsertAsync(siteCallStamped)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
await tx.CommitAsync().ConfigureAwait(false);
|
||||
accepted.Add(entry.Audit.EventId);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Both rows rolled back via the disposing transaction. The
|
||||
// EventId is NOT added to `accepted` so the site keeps its
|
||||
// row Pending and retries on the next drain. Other entries
|
||||
// in the batch continue with their own transactions.
|
||||
_logger.LogError(
|
||||
ex,
|
||||
"Combined telemetry dual-write failed for AuditEvent {EventId} / TrackedOperationId {TrackedOpId}; rolled back.",
|
||||
entry.Audit.EventId,
|
||||
entry.SiteCall.TrackedOperationId);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Resolving the scope itself threw (e.g. DI mis-wiring). Log and
|
||||
// reply with whatever we managed to accept (likely empty) — the
|
||||
// central singleton MUST stay alive.
|
||||
_logger.LogError(
|
||||
ex,
|
||||
"Combined telemetry batch ingest failed before per-entry processing.");
|
||||
}
|
||||
|
||||
replyTo.Tell(new IngestCachedTelemetryReply(accepted));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fallback handler installed on the single-repository test ctor — that
|
||||
/// ctor has no DbContext and no <see cref="ISiteCallAuditRepository"/>, so
|
||||
/// it cannot service the dual-write. Logs a warning and replies with an
|
||||
/// empty ack so callers fall through to their retry path.
|
||||
/// </summary>
|
||||
private Task OnCachedTelemetryWithoutDualWriteAsync(IngestCachedTelemetryCommand cmd)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"AuditLogIngestActor received IngestCachedTelemetryCommand on the single-repository ctor; dual-write requires the IServiceProvider ctor. Replying with empty ack ({Count} entries).",
|
||||
cmd.Entries.Count);
|
||||
Sender.Tell(new IngestCachedTelemetryReply(Array.Empty<Guid>()));
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user