feat(auditlog): thread ParentExecutionId through S&F for retry-loop cached rows

The store-and-forward retry loop emits the per-attempt and terminal cached
audit rows (ApiCallCached/DbWriteCached Attempted, CachedResolve) via
CachedCallLifecycleBridge from a CachedCallAttemptContext, not from the
script context. The ExecutionId rollout (Task 4) already threaded ExecutionId
and SourceScript through this path; ParentExecutionId — the spawning
inbound-API request's ExecutionId — was not, so those retry-loop rows had
ParentExecutionId = null even for an inbound-API-routed run.

Thread it additively as a sibling at every carry point ExecutionId passes
through:

- StoreAndForwardMessage gains ParentExecutionId (Guid?).
- StoreAndForwardStorage adds a nullable parent_execution_id column via the
  same idempotent PRAGMA-probed ALTER TABLE migration; rows persisted by an
  older build read back null (back-compat). The defensive Guid.TryParse read
  helper (ParseExecutionId) is renamed ParseGuidColumn and reused for both
  columns so a corrupt value cannot abort the retry sweep.
- StoreAndForwardService.EnqueueAsync gains an optional parentExecutionId
  param, stamped onto the buffered message and surfaced on the
  CachedCallAttemptContext built in the retry loop.
- CachedCallAttemptContext gains ParentExecutionId.
- CachedCallLifecycleBridge.BuildPacket sets AuditEvent.ParentExecutionId
  from the context, beside the existing ExecutionId.
- IExternalSystemClient.CachedCallAsync / IDatabaseGateway.CachedWriteAsync
  gain an optional parentExecutionId param; ScriptRuntimeContext's CachedCall
  / CachedWrite helpers pass _parentExecutionId.

All threading is additive — ParentExecutionId is Guid? everywhere, null for
non-routed runs, and old buffered S&F rows still deserialize with the new
field null.
This commit is contained in:
Joseph Doherty
2026-05-21 17:58:11 -04:00
parent 150ba5e63f
commit c00603e2a4
15 changed files with 581 additions and 51 deletions

View File

@@ -357,6 +357,83 @@ public class CachedCallAttemptEmissionTests : IAsyncLifetime, IDisposable
Assert.Equal("Plant.Tank/OnAlarm", notification.SourceScript);
}
// ── Audit Log #23 (ParentExecutionId Task 6): ParentExecutionId ──
[Fact]
public async Task Attempt_CarriesParentExecutionId_FromBufferedMessage()
{
// A cached call enqueued from an inbound-API-routed script run carries
// the spawning execution's ParentExecutionId. The retry sweep must
// surface it on the CachedCallAttemptContext beside ExecutionId so the
// audit bridge can stamp it on the retry-loop cached rows.
var parentExecutionId = Guid.NewGuid();
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
_ => throw new HttpRequestException("HTTP 503"));
var trackedId = TrackedOperationId.New();
await _service.EnqueueAsync(
StoreAndForwardCategory.ExternalSystem,
"ERP",
"""{"payload":"x"}""",
originInstanceName: "Plant.Pump42",
maxRetries: 5,
retryInterval: TimeSpan.Zero,
attemptImmediateDelivery: false,
messageId: trackedId.ToString(),
parentExecutionId: parentExecutionId);
await _service.RetryPendingMessagesAsync();
var notification = Assert.Single(_observer.Notifications);
Assert.Equal(parentExecutionId, notification.ParentExecutionId);
}
[Fact]
public async Task Attempt_NullParentExecutionId_SurfacesAsNull()
{
// Non-routed run: the originating script was not spawned by an
// inbound-API request, so no ParentExecutionId is threaded. It must
// surface as null on the context, not throw.
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
_ => Task.FromResult(true));
var trackedId = await EnqueueBufferedAsync(
StoreAndForwardCategory.ExternalSystem, "ERP");
await _service.RetryPendingMessagesAsync();
var notification = Assert.Single(_observer.Notifications);
Assert.Null(notification.ParentExecutionId);
}
[Fact]
public async Task TerminalResolve_CarriesParentExecutionId()
{
// The terminal Delivered notification must also carry the threaded
// ParentExecutionId so the CachedResolve audit row correlates back to
// the spawning inbound-API execution.
var parentExecutionId = Guid.NewGuid();
_service.RegisterDeliveryHandler(StoreAndForwardCategory.CachedDbWrite,
_ => Task.FromResult(true));
var trackedId = TrackedOperationId.New();
await _service.EnqueueAsync(
StoreAndForwardCategory.CachedDbWrite,
"myDb",
"""{"payload":"x"}""",
originInstanceName: "Plant.Tank",
maxRetries: 3,
retryInterval: TimeSpan.Zero,
attemptImmediateDelivery: false,
messageId: trackedId.ToString(),
parentExecutionId: parentExecutionId);
await _service.RetryPendingMessagesAsync();
var notification = Assert.Single(_observer.Notifications);
Assert.Equal(CachedCallAttemptOutcome.Delivered, notification.Outcome);
Assert.Equal(parentExecutionId, notification.ParentExecutionId);
}
// ── Best-effort contract: observer throws must NOT corrupt retry bookkeeping ──
[Fact]

View File

@@ -452,6 +452,141 @@ public class StoreAndForwardStorageTests : IAsyncLifetime, IDisposable
Assert.Equal(message.ExecutionId, retrieved!.ExecutionId);
}
// ── Audit Log #23 (ParentExecutionId Task 6): parent_execution_id ──
[Fact]
public async Task EnqueueAsync_RoundTripsParentExecutionId()
{
// A cached call buffered from an inbound-API-routed script run carries
// the spawning execution's ParentExecutionId; it must survive a persist
// + read-back so the retry loop can stamp it on audit rows.
var parentExecutionId = Guid.NewGuid();
var message = CreateMessage("parent1", StoreAndForwardCategory.ExternalSystem);
message.ParentExecutionId = parentExecutionId;
await _storage.EnqueueAsync(message);
var retrieved = await _storage.GetMessageByIdAsync("parent1");
Assert.NotNull(retrieved);
Assert.Equal(parentExecutionId, retrieved!.ParentExecutionId);
}
[Fact]
public async Task EnqueueAsync_NullParentExecutionId_RoundTripsAsNull()
{
// A non-routed run supplies no ParentExecutionId — it must round-trip
// as null rather than throwing or coercing.
var message = CreateMessage("noparent1", StoreAndForwardCategory.ExternalSystem);
Assert.Null(message.ParentExecutionId);
await _storage.EnqueueAsync(message);
var retrieved = await _storage.GetMessageByIdAsync("noparent1");
Assert.NotNull(retrieved);
Assert.Null(retrieved!.ParentExecutionId);
}
[Fact]
public async Task ParentExecutionId_SurvivesRetrySweepRead()
{
// The retry sweep reads due rows via GetMessagesForRetryAsync; the new
// parent_execution_id field must be present on that read path too — it
// is the path that feeds the CachedCallAttemptContext.
var parentExecutionId = Guid.NewGuid();
var message = CreateMessage("psweep1", StoreAndForwardCategory.CachedDbWrite);
message.ParentExecutionId = parentExecutionId;
message.LastAttemptAt = null; // due immediately
await _storage.EnqueueAsync(message);
var due = await _storage.GetMessagesForRetryAsync();
var row = Assert.Single(due, m => m.Id == "psweep1");
Assert.Equal(parentExecutionId, row.ParentExecutionId);
}
[Fact]
public async Task LegacyRowWithoutParentExecutionIdColumn_ReadsBackAsNull()
{
// Back-compat: a row persisted by a build that pre-dates the
// parent_execution_id column must still deserialize, with
// ParentExecutionId reading back as null. Simulate the pre-Task-6
// schema (which already has execution_id / source_script from the
// ExecutionId rollout) by recreating the table without
// parent_execution_id, inserting directly, then running InitializeAsync
// which ALTER-adds the column.
await using (var setup = new SqliteConnection($"Data Source={_dbName};Mode=Memory;Cache=Shared"))
{
await setup.OpenAsync();
await using var drop = setup.CreateCommand();
drop.CommandText = @"
DROP TABLE IF EXISTS sf_messages;
CREATE TABLE sf_messages (
id TEXT PRIMARY KEY,
category INTEGER NOT NULL,
target TEXT NOT NULL,
payload_json TEXT NOT NULL,
retry_count INTEGER NOT NULL DEFAULT 0,
max_retries INTEGER NOT NULL DEFAULT 50,
retry_interval_ms INTEGER NOT NULL DEFAULT 30000,
created_at TEXT NOT NULL,
last_attempt_at TEXT,
status INTEGER NOT NULL DEFAULT 0,
last_error TEXT,
origin_instance TEXT,
execution_id TEXT,
source_script TEXT
);
INSERT INTO sf_messages (id, category, target, payload_json, created_at, status)
VALUES ('plegacy1', 0, 'ERP', '{}', '2026-01-01T00:00:00.0000000+00:00', 0);";
await drop.ExecuteNonQueryAsync();
}
// InitializeAsync must additively ALTER-in parent_execution_id without
// disturbing the pre-existing legacy row.
await _storage.InitializeAsync();
var retrieved = await _storage.GetMessageByIdAsync("plegacy1");
Assert.NotNull(retrieved);
Assert.Equal("plegacy1", retrieved!.Id);
Assert.Null(retrieved.ParentExecutionId);
}
[Fact]
public async Task MalformedParentExecutionId_ReadsBackAsNull_DoesNotAbortRetrySweep()
{
// Defensive read path: a corrupt (non-null, non-GUID) parent_execution_id
// must be treated as "no parent execution id" rather than throwing
// FormatException — a single bad row must not abort the whole
// GetMessagesForRetryAsync sweep.
var goodParent = Guid.NewGuid();
var good = CreateMessage("pgood1", StoreAndForwardCategory.ExternalSystem);
good.ParentExecutionId = goodParent;
good.LastAttemptAt = null; // due immediately
await _storage.EnqueueAsync(good);
var bad = CreateMessage("pbad1", StoreAndForwardCategory.ExternalSystem);
bad.ParentExecutionId = Guid.NewGuid();
bad.LastAttemptAt = null; // due immediately
await _storage.EnqueueAsync(bad);
await using (var conn = new SqliteConnection($"Data Source={_dbName};Mode=Memory;Cache=Shared"))
{
await conn.OpenAsync();
await using var corrupt = conn.CreateCommand();
corrupt.CommandText =
"UPDATE sf_messages SET parent_execution_id = 'not-a-guid' WHERE id = 'pbad1';";
await corrupt.ExecuteNonQueryAsync();
}
var due = await _storage.GetMessagesForRetryAsync();
Assert.Null(Assert.Single(due, m => m.Id == "pbad1").ParentExecutionId);
Assert.Equal(goodParent, Assert.Single(due, m => m.Id == "pgood1").ParentExecutionId);
var retrieved = await _storage.GetMessageByIdAsync("pbad1");
Assert.NotNull(retrieved);
Assert.Null(retrieved!.ParentExecutionId);
}
private static StoreAndForwardMessage CreateMessage(string id, StoreAndForwardCategory category)
{
return new StoreAndForwardMessage