feat(auditlog): thread ParentExecutionId through S&F for retry-loop cached rows

The store-and-forward retry loop emits the per-attempt and terminal cached
audit rows (ApiCallCached/DbWriteCached Attempted, CachedResolve) via
CachedCallLifecycleBridge from a CachedCallAttemptContext, not from the
script context. The ExecutionId rollout (Task 4) already threaded ExecutionId
and SourceScript through this path; ParentExecutionId — the spawning
inbound-API request's ExecutionId — was not, so those retry-loop rows had
ParentExecutionId = null even for an inbound-API-routed run.

Thread it additively as a sibling at every carry point ExecutionId passes
through:

- StoreAndForwardMessage gains ParentExecutionId (Guid?).
- StoreAndForwardStorage adds a nullable parent_execution_id column via the
  same idempotent PRAGMA-probed ALTER TABLE migration; rows persisted by an
  older build read back null (back-compat). The defensive Guid.TryParse read
  helper (ParseExecutionId) is renamed ParseGuidColumn and reused for both
  columns so a corrupt value cannot abort the retry sweep.
- StoreAndForwardService.EnqueueAsync gains an optional parentExecutionId
  param, stamped onto the buffered message and surfaced on the
  CachedCallAttemptContext built in the retry loop.
- CachedCallAttemptContext gains ParentExecutionId.
- CachedCallLifecycleBridge.BuildPacket sets AuditEvent.ParentExecutionId
  from the context, beside the existing ExecutionId.
- IExternalSystemClient.CachedCallAsync / IDatabaseGateway.CachedWriteAsync
  gain an optional parentExecutionId param; ScriptRuntimeContext's CachedCall
  / CachedWrite helpers pass _parentExecutionId.

All threading is additive — ParentExecutionId is Guid? everywhere, null for
non-routed runs, and old buffered S&F rows still deserialize with the new
field null.
This commit is contained in:
Joseph Doherty
2026-05-21 17:58:11 -04:00
parent 150ba5e63f
commit c00603e2a4
15 changed files with 581 additions and 51 deletions

View File

@@ -33,7 +33,8 @@ public class CachedCallLifecycleBridgeTests
string? lastError = null,
int? httpStatus = null,
Guid? executionId = null,
string? sourceScript = null) =>
string? sourceScript = null,
Guid? parentExecutionId = null) =>
new(
TrackedOperationId: _id,
Channel: channel,
@@ -48,7 +49,8 @@ public class CachedCallLifecycleBridgeTests
DurationMs: 42,
SourceInstanceId: "Plant.Pump42",
ExecutionId: executionId,
SourceScript: sourceScript);
SourceScript: sourceScript,
ParentExecutionId: parentExecutionId);
[Fact]
public async Task TransientFailure_EmitsOneAttemptedRow_NoResolve()
@@ -259,4 +261,70 @@ public class CachedCallLifecycleBridgeTests
Assert.Null(captured!.Audit.ExecutionId);
Assert.Null(captured.Audit.SourceScript);
}
// ── Audit Log #23 (ParentExecutionId Task 6): ParentExecutionId ──
[Fact]
public async Task RetryLoopAttemptedRow_CarriesParentExecutionId_FromContext()
{
// Task 6: the ParentExecutionId threaded through the S&F buffer (the
// inbound-API run that spawned the originating script) arrives on the
// CachedCallAttemptContext; the bridge must stamp it onto the
// per-attempt ApiCallCached row beside ExecutionId.
var parentExecutionId = Guid.NewGuid();
var captured = new List<CachedCallTelemetry>();
_forwarder.ForwardAsync(Arg.Do<CachedCallTelemetry>(t => captured.Add(t)), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
var sut = CreateSut();
await sut.OnAttemptCompletedAsync(Ctx(
CachedCallAttemptOutcome.TransientFailure,
parentExecutionId: parentExecutionId));
var packet = Assert.Single(captured);
Assert.Equal(AuditKind.ApiCallCached, packet.Audit.Kind);
Assert.Equal(parentExecutionId, packet.Audit.ParentExecutionId);
}
[Fact]
public async Task RetryLoopCachedResolveRow_CarriesParentExecutionId_FromContext()
{
// The terminal CachedResolve row must also carry the threaded
// ParentExecutionId so the whole retry-loop lifecycle correlates back
// to the spawning inbound-API execution.
var parentExecutionId = Guid.NewGuid();
var captured = new List<CachedCallTelemetry>();
_forwarder.ForwardAsync(Arg.Do<CachedCallTelemetry>(t => captured.Add(t)), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
var sut = CreateSut();
await sut.OnAttemptCompletedAsync(Ctx(
CachedCallAttemptOutcome.Delivered,
channel: "DbOutbound",
parentExecutionId: parentExecutionId));
Assert.Equal(2, captured.Count);
var resolve = Assert.Single(captured, p => p.Audit.Kind == AuditKind.CachedResolve);
Assert.Equal(parentExecutionId, resolve.Audit.ParentExecutionId);
var attempted = Assert.Single(captured, p => p.Audit.Kind == AuditKind.DbWriteCached);
Assert.Equal(parentExecutionId, attempted.Audit.ParentExecutionId);
}
[Fact]
public async Task RetryLoopRow_NullParentExecutionId_RemainsNull()
{
// Back-compat / non-routed run: the originating script was not spawned
// by an inbound-API request, so ParentExecutionId is null; the bridge
// must leave the audit row's ParentExecutionId null rather than throwing.
CachedCallTelemetry? captured = null;
_forwarder.ForwardAsync(Arg.Do<CachedCallTelemetry>(t => captured = t), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
var sut = CreateSut();
await sut.OnAttemptCompletedAsync(Ctx(CachedCallAttemptOutcome.TransientFailure));
Assert.NotNull(captured);
Assert.Null(captured!.Audit.ParentExecutionId);
}
}