feat(notif): emit NotifyDeliver(Attempted) per dispatcher attempt (#23 M4)

M4 Bundle B (B2) — NotificationOutboxActor's dispatcher loop emits a single
AuditChannel.Notification / AuditKind.NotifyDeliver row with AuditStatus.Attempted
for every delivery attempt (success, transient failure, permanent failure,
and the missing-adapter park).

- BuildNotifyDeliverEvent helper populates correlation id (parsed from the
  string NotificationId — sites generate Guid.NewGuid().ToString("N"),
  non-Guid ids fall through as null), list-name target, source site/instance/script
  provenance, and Actor=null (central dispatch has no authenticated end-user).
- Attempt duration is measured around the adapter call and recorded as
  DurationMs so KPIs can compute per-attempt latency.
- Emission is fire-and-forget (the writer swallows internally) and wrapped
  in try/catch — audit failure NEVER aborts the user-facing dispatch.

Terminal-state emission lands separately in B3.
This commit is contained in:
Joseph Doherty
2026-05-20 16:08:06 -04:00
parent b31747a632
commit 1dfd67a90d
2 changed files with 365 additions and 1 deletions

View File

@@ -270,6 +270,24 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
/// status transition. A missing adapter parks the notification; otherwise the
/// <see cref="DeliveryOutcome"/> drives the transition. The updated row is always persisted.
/// </summary>
/// <remarks>
/// <para>
/// M4 Bundle B2: a single
/// <see cref="AuditChannel.Notification"/>/<see cref="AuditKind.NotifyDeliver"/>
/// row is emitted with <see cref="AuditStatus.Attempted"/> per attempt
/// (success, transient, permanent). The emission is wrapped in a
/// try/catch so a thrown audit writer NEVER aborts the user-facing
/// dispatch — the <see cref="CentralAuditWriter"/> itself swallows
/// internal failures, but the dispatcher wraps defensively per
/// alog.md §13. The missing-adapter park path also emits an Attempted
/// row because it IS an attempt from the dispatcher's point of view.
/// </para>
/// <para>
/// Attempt duration is measured around the adapter call and recorded on
/// the Attempted row so downstream KPIs can compute per-attempt latency
/// without joining to the row update timestamps.
/// </para>
/// </remarks>
private async Task DeliverOneAsync(
Notification notification,
DateTimeOffset now,
@@ -280,14 +298,28 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
{
if (!adapters.TryGetValue(notification.Type, out var adapter))
{
// Missing-adapter park: from the dispatcher's perspective this is an
// attempt that resolved to a park, so we emit the Attempted row
// alongside the row update.
var missingAdapterError = $"no delivery adapter for type {notification.Type}";
notification.Status = NotificationStatus.Parked;
notification.LastError = $"no delivery adapter for type {notification.Type}";
notification.LastError = missingAdapterError;
notification.LastAttemptAt = now;
await outboxRepository.UpdateAsync(notification);
EmitAttemptAudit(
notification,
now,
durationMs: 0,
errorMessage: missingAdapterError);
return;
}
// Measure the attempt duration around the adapter call so the
// Attempted row carries it for KPI use.
var attemptStart = DateTimeOffset.UtcNow;
var outcome = await adapter.DeliverAsync(notification);
var durationMs = (int)Math.Min(
int.MaxValue, Math.Max(0, (DateTimeOffset.UtcNow - attemptStart).TotalMilliseconds));
switch (outcome.Result)
{
@@ -322,6 +354,86 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
}
await outboxRepository.UpdateAsync(notification);
// Emit the per-attempt Attempted row exactly once regardless of the
// outcome (B2). The error message comes from the outcome, not from
// notification.LastError, so a success row is null and a transient
// row carries the SMTP failure reason verbatim.
EmitAttemptAudit(
notification,
now,
durationMs: durationMs,
errorMessage: outcome.Result == DeliveryResult.Success ? null : outcome.Error);
}
/// <summary>
/// Emits a single
/// <see cref="AuditChannel.Notification"/>/<see cref="AuditKind.NotifyDeliver"/>
/// audit row with <see cref="AuditStatus.Attempted"/>. Wrapped in
/// try/catch so an audit-write failure never propagates back into the
/// dispatcher loop — the <see cref="CentralAuditWriter"/> already
/// swallows, this is defensive (alog.md §13).
/// </summary>
private void EmitAttemptAudit(
Notification notification,
DateTimeOffset now,
int durationMs,
string? errorMessage)
{
try
{
var evt = BuildNotifyDeliverEvent(notification, now, AuditStatus.Attempted, errorMessage)
with { DurationMs = durationMs };
// Fire-and-forget — we do NOT await: the dispatcher loop must not
// be blocked by audit IO, and the writer swallows its own faults.
// PipeTo is not used because the writer never throws.
_ = _auditWriter.WriteAsync(evt);
}
catch (Exception ex)
{
_logger.LogWarning(
ex,
"Failed to emit Attempted audit row for notification {NotificationId}.",
notification.NotificationId);
}
}
/// <summary>
/// Builds a <see cref="AuditChannel.Notification"/>/<see cref="AuditKind.NotifyDeliver"/>
/// row with the per-notification provenance fields (correlation id, list
/// name, source site/instance/script) populated from
/// <paramref name="notification"/>. <see cref="AuditEvent.CorrelationId"/>
/// parses the notification's id as a Guid; sites generate the id with
/// <c>Guid.NewGuid().ToString("N")</c> so the parse always succeeds, but
/// a non-Guid id is recorded as null rather than crashing the dispatcher.
/// </summary>
private static AuditEvent BuildNotifyDeliverEvent(
Notification notification,
DateTimeOffset now,
AuditStatus status,
string? errorMessage)
{
Guid? correlationId = Guid.TryParse(notification.NotificationId, out var parsed)
? parsed
: null;
return new AuditEvent
{
EventId = Guid.NewGuid(),
OccurredAtUtc = now.UtcDateTime,
Channel = AuditChannel.Notification,
Kind = AuditKind.NotifyDeliver,
CorrelationId = correlationId,
// Central dispatch — no authenticated actor (the originating
// script's identity is captured on the upstream NotifySend row).
Actor = null,
SourceSiteId = notification.SourceSiteId,
SourceInstanceId = notification.SourceInstanceId,
SourceScript = notification.SourceScript,
Target = notification.ListName,
Status = status,
ErrorMessage = errorMessage,
};
}
/// <summary>