Files
ScadaBridge/src/ScadaLink.AuditLog/Site/Telemetry/CachedCallLifecycleBridge.cs
T
Joseph Doherty c00603e2a4 feat(auditlog): thread ParentExecutionId through S&F for retry-loop cached rows
The store-and-forward retry loop emits the per-attempt and terminal cached
audit rows (ApiCallCached/DbWriteCached Attempted, CachedResolve) via
CachedCallLifecycleBridge from a CachedCallAttemptContext, not from the
script context. The ExecutionId rollout (Task 4) already threaded ExecutionId
and SourceScript through this path; ParentExecutionId — the spawning
inbound-API request's ExecutionId — was not, so those retry-loop rows had
ParentExecutionId = null even for an inbound-API-routed run.

Thread it additively as a sibling at every carry point ExecutionId passes
through:

- StoreAndForwardMessage gains ParentExecutionId (Guid?).
- StoreAndForwardStorage adds a nullable parent_execution_id column via the
  same idempotent PRAGMA-probed ALTER TABLE migration; rows persisted by an
  older build read back null (back-compat). The defensive Guid.TryParse read
  helper (ParseExecutionId) is renamed ParseGuidColumn and reused for both
  columns so a corrupt value cannot abort the retry sweep.
- StoreAndForwardService.EnqueueAsync gains an optional parentExecutionId
  param, stamped onto the buffered message and surfaced on the
  CachedCallAttemptContext built in the retry loop.
- CachedCallAttemptContext gains ParentExecutionId.
- CachedCallLifecycleBridge.BuildPacket sets AuditEvent.ParentExecutionId
  from the context, beside the existing ExecutionId.
- IExternalSystemClient.CachedCallAsync / IDatabaseGateway.CachedWriteAsync
  gain an optional parentExecutionId param; ScriptRuntimeContext's CachedCall
  / CachedWrite helpers pass _parentExecutionId.

All threading is additive — ParentExecutionId is Guid? everywhere, null for
non-routed runs, and old buffered S&F rows still deserialize with the new
field null.
2026-05-21 17:58:11 -04:00

217 lines
9.4 KiB
C#

using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.Commons.Messages.Integration;
using ScadaLink.Commons.Types;
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.AuditLog.Site.Telemetry;
/// <summary>
/// Audit Log #23 (M3 Bundle E — Tasks E4/E5): translates per-attempt
/// notifications from the store-and-forward retry loop into one (or two)
/// <see cref="CachedCallTelemetry"/> packets and pushes them through
/// <see cref="ICachedCallTelemetryForwarder"/>.
/// </summary>
/// <remarks>
/// <para>
/// The S&amp;F loop's <see cref="ICachedCallLifecycleObserver"/> reports a
/// single coarse outcome per attempt; the audit pipeline however models the
/// lifecycle as TWO rows on terminal outcomes — an <c>Attempted</c>
/// (<see cref="AuditKind.ApiCallCached"/> / <see cref="AuditKind.DbWriteCached"/>)
/// row capturing the per-attempt mechanics, plus a <see cref="AuditKind.CachedResolve"/>
/// row marking the terminal state for downstream consumers. The bridge fans
/// out per outcome:
/// </para>
/// <list type="bullet">
/// <item><description><c>TransientFailure</c> -> one Attempted(Failed) row.</description></item>
/// <item><description><c>Delivered</c> -> Attempted(Delivered) + CachedResolve(Delivered).</description></item>
/// <item><description><c>PermanentFailure</c> -> Attempted(Failed) + CachedResolve(Parked).</description></item>
/// <item><description><c>ParkedMaxRetries</c> -> Attempted(Failed) + CachedResolve(Parked).</description></item>
/// </list>
/// <para>
/// <b>Best-effort emission (alog.md §7):</b> the bridge itself never throws;
/// the underlying forwarder swallows + logs its own failures.
/// </para>
/// </remarks>
public sealed class CachedCallLifecycleBridge : ICachedCallLifecycleObserver
{
private readonly ICachedCallTelemetryForwarder _forwarder;
private readonly ILogger<CachedCallLifecycleBridge> _logger;
public CachedCallLifecycleBridge(
ICachedCallTelemetryForwarder forwarder,
ILogger<CachedCallLifecycleBridge> logger)
{
_forwarder = forwarder ?? throw new ArgumentNullException(nameof(forwarder));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc/>
public async Task OnAttemptCompletedAsync(
CachedCallAttemptContext context, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(context);
try
{
await EmitAttemptedAsync(context, ct).ConfigureAwait(false);
if (IsTerminal(context.Outcome))
{
await EmitResolveAsync(context, ct).ConfigureAwait(false);
}
}
catch (Exception ex)
{
// Defensive — both EmitX paths call the forwarder which is itself
// best-effort. A throw here is unexpected, but the alog.md §7
// contract requires we never propagate.
_logger.LogWarning(ex,
"CachedCallLifecycleBridge: unexpected throw for {TrackedOperationId} (Outcome {Outcome})",
context.TrackedOperationId, context.Outcome);
}
}
private async Task EmitAttemptedAsync(CachedCallAttemptContext context, CancellationToken ct)
{
// Per-attempt row: kind discriminates channel; status is always
// Attempted regardless of outcome (success vs. failure is captured
// by the companion HttpStatus / ErrorMessage fields, NOT by flipping
// the status — CachedResolve carries the terminal Status). Per the
// M3 brief and alog.md §4.
var kind = ChannelToAttemptKind(context.Channel);
var status = AuditStatus.Attempted;
var packet = BuildPacket(
context,
kind: kind,
status: status,
// Operational status mirror — for the per-attempt row the
// operational state is the running status; the bridge always
// writes "Attempted" so reconciliation can't roll back.
operationalStatus: "Attempted",
terminalAtUtc: null,
lastError: context.LastError,
httpStatus: context.HttpStatus);
await _forwarder.ForwardAsync(packet, ct).ConfigureAwait(false);
}
private async Task EmitResolveAsync(CachedCallAttemptContext context, CancellationToken ct)
{
var (auditStatus, operationalStatus) = TerminalOutcomeToStatuses(context.Outcome);
var packet = BuildPacket(
context,
kind: AuditKind.CachedResolve,
status: auditStatus,
operationalStatus: operationalStatus,
terminalAtUtc: context.OccurredAtUtc,
lastError: context.LastError,
httpStatus: context.HttpStatus);
await _forwarder.ForwardAsync(packet, ct).ConfigureAwait(false);
}
private static CachedCallTelemetry BuildPacket(
CachedCallAttemptContext context,
AuditKind kind,
AuditStatus status,
string operationalStatus,
DateTime? terminalAtUtc,
string? lastError,
int? httpStatus)
{
var channel = ChannelStringToEnum(context.Channel);
return new CachedCallTelemetry(
Audit: new AuditEvent
{
EventId = Guid.NewGuid(),
OccurredAtUtc = DateTime.SpecifyKind(context.OccurredAtUtc, DateTimeKind.Utc),
Channel = channel,
Kind = kind,
CorrelationId = context.TrackedOperationId.Value,
// Audit Log #23 (ExecutionId Task 4): the originating script
// execution's per-run correlation id, threaded through the S&F
// buffer; null on rows buffered before Task 4 (back-compat).
ExecutionId = context.ExecutionId,
// Audit Log #23 (ParentExecutionId Task 6): the spawning
// inbound-API request's ExecutionId, threaded through the S&F
// buffer alongside ExecutionId so the retry-loop cached rows
// correlate back to the cross-execution chain. Null for a
// non-routed run and on rows buffered before Task 6.
ParentExecutionId = context.ParentExecutionId,
SourceSiteId = string.IsNullOrEmpty(context.SourceSite) ? null : context.SourceSite,
SourceInstanceId = context.SourceInstanceId,
// Audit Log #23 (ExecutionId Task 4): SourceScript is now
// threaded through the S&F buffer alongside ExecutionId — the
// retry-loop cached rows carry the same provenance the
// script-side cached rows do. Null on pre-Task-4 buffered rows.
SourceScript = context.SourceScript,
Target = context.Target,
Status = status,
HttpStatus = httpStatus,
DurationMs = context.DurationMs,
ErrorMessage = lastError,
ForwardState = AuditForwardState.Pending,
},
Operational: new SiteCallOperational(
TrackedOperationId: context.TrackedOperationId,
Channel: context.Channel,
Target: context.Target,
SourceSite: context.SourceSite,
Status: operationalStatus,
RetryCount: context.RetryCount,
LastError: lastError,
HttpStatus: httpStatus,
CreatedAtUtc: DateTime.SpecifyKind(context.CreatedAtUtc, DateTimeKind.Utc),
UpdatedAtUtc: DateTime.SpecifyKind(context.OccurredAtUtc, DateTimeKind.Utc),
TerminalAtUtc: terminalAtUtc is null
? null
: DateTime.SpecifyKind(terminalAtUtc.Value, DateTimeKind.Utc)));
}
private static AuditKind ChannelToAttemptKind(string channel) => channel switch
{
"ApiOutbound" => AuditKind.ApiCallCached,
"DbOutbound" => AuditKind.DbWriteCached,
// Defensive default — the S&F observer is filtered to cached-call
// categories so this branch shouldn't fire in practice.
_ => AuditKind.ApiCallCached,
};
private static AuditChannel ChannelStringToEnum(string channel) => channel switch
{
"ApiOutbound" => AuditChannel.ApiOutbound,
"DbOutbound" => AuditChannel.DbOutbound,
_ => AuditChannel.ApiOutbound,
};
private static (AuditStatus auditStatus, string operationalStatus) TerminalOutcomeToStatuses(
CachedCallAttemptOutcome outcome) => outcome switch
{
CachedCallAttemptOutcome.Delivered =>
(AuditStatus.Delivered, "Delivered"),
CachedCallAttemptOutcome.PermanentFailure =>
(AuditStatus.Parked, "Parked"),
CachedCallAttemptOutcome.ParkedMaxRetries =>
(AuditStatus.Parked, "Parked"),
// TransientFailure isn't terminal — see IsTerminal — but the switch
// is exhaustive so we route it through Failed for safety.
CachedCallAttemptOutcome.TransientFailure =>
(AuditStatus.Failed, "Failed"),
_ => (AuditStatus.Failed, "Failed"),
};
private static bool IsTerminal(CachedCallAttemptOutcome outcome) => outcome switch
{
CachedCallAttemptOutcome.Delivered => true,
CachedCallAttemptOutcome.PermanentFailure => true,
CachedCallAttemptOutcome.ParkedMaxRetries => true,
CachedCallAttemptOutcome.TransientFailure => false,
_ => false,
};
}