feat(snf): per-attempt and terminal cached-call lifecycle observer (#23 M3)
Hook the store-and-forward retry loop so the audit pipeline can emit
per-attempt + terminal telemetry under the original TrackedOperationId
(Bundle E Tasks E4 + E5).
New seam:
* ICachedCallLifecycleObserver + CachedCallAttemptContext in
Commons.Interfaces.Services. Outcome enum
(Delivered / TransientFailure / PermanentFailure / ParkedMaxRetries)
is S&F-vocabulary; the bridge living in ScadaLink.AuditLog (Bundle F)
will map it to the AuditKind/AuditStatus pair when building the
CachedCallTelemetry packet.
* StoreAndForwardService gains an optional cachedCallObserver
constructor parameter + siteId. RetryMessageAsync fires the observer
exactly once per attempt with the appropriate outcome:
- handler returns true -> Delivered
- handler returns false -> PermanentFailure (and parks)
- handler throws + retries remaining -> TransientFailure
- handler throws + max retries hit -> ParkedMaxRetries (and parks)
Hook is best-effort: a thrown observer is logged + swallowed so a
failing audit pipeline can never be misclassified as a transient
delivery failure or corrupt the retry-count bookkeeping (alog.md §7).
Only cached-call categories (ExternalSystem, CachedDbWrite) generate
notifications — Notification category has its own central-side
audit pipeline (Notification Outbox / #21).
Pre-M3 callers that didn't thread a TrackedOperationId into the S&F
message id are silently skipped — the observer requires a parseable id
by contract. New S&F callers stamp the id as messageId (Bundle E3).
Bundle E tasks E4 + E5.
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.StoreAndForward;
|
||||
@@ -33,6 +35,19 @@ public class StoreAndForwardService
|
||||
private readonly StoreAndForwardOptions _options;
|
||||
private readonly ReplicationService? _replication;
|
||||
private readonly ILogger<StoreAndForwardService> _logger;
|
||||
/// <summary>
|
||||
/// Audit Log #23 (M3 Bundle E — Task E4): site-side observer notified
|
||||
/// after every cached-call delivery attempt. Optional — when null no
|
||||
/// telemetry is emitted; the legacy pre-M3 retry loop behaviour is
|
||||
/// preserved exactly.
|
||||
/// </summary>
|
||||
private readonly ICachedCallLifecycleObserver? _cachedCallObserver;
|
||||
/// <summary>
|
||||
/// Audit Log #23 (M3 Bundle E — Task E4): site id stamped onto the
|
||||
/// cached-call attempt context so the audit bridge can build the
|
||||
/// <see cref="SiteCallOperational"/> half of the telemetry packet.
|
||||
/// </summary>
|
||||
private readonly string _siteId;
|
||||
private Timer? _retryTimer;
|
||||
private int _retryInProgress;
|
||||
|
||||
@@ -63,12 +78,16 @@ public class StoreAndForwardService
|
||||
StoreAndForwardStorage storage,
|
||||
StoreAndForwardOptions options,
|
||||
ILogger<StoreAndForwardService> logger,
|
||||
ReplicationService? replication = null)
|
||||
ReplicationService? replication = null,
|
||||
ICachedCallLifecycleObserver? cachedCallObserver = null,
|
||||
string siteId = "")
|
||||
{
|
||||
_storage = storage;
|
||||
_options = options;
|
||||
_logger = logger;
|
||||
_replication = replication;
|
||||
_cachedCallObserver = cachedCallObserver;
|
||||
_siteId = siteId;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -280,15 +299,33 @@ public class StoreAndForwardService
|
||||
return;
|
||||
}
|
||||
|
||||
// Audit Log #23 (M3 Bundle E — Tasks E4/E5): measure per-attempt
|
||||
// duration so the audit row carries a meaningful DurationMs. Captured
|
||||
// around the handler invocation only — storage / replication overhead
|
||||
// is excluded.
|
||||
var attemptStartUtc = DateTime.UtcNow;
|
||||
var attemptStopwatch = System.Diagnostics.Stopwatch.StartNew();
|
||||
|
||||
try
|
||||
{
|
||||
var success = await handler(message);
|
||||
attemptStopwatch.Stop();
|
||||
if (success)
|
||||
{
|
||||
await _storage.RemoveMessageAsync(message.Id);
|
||||
_replication?.ReplicateRemove(message.Id);
|
||||
RaiseActivity("Delivered", message.Category,
|
||||
$"Delivered to {message.Target} after {message.RetryCount} retries");
|
||||
|
||||
// M3: terminal Delivered observer notification — the audit
|
||||
// bridge maps this to Attempted + CachedResolve(Delivered).
|
||||
await NotifyCachedCallObserverAsync(
|
||||
message,
|
||||
CachedCallAttemptOutcome.Delivered,
|
||||
lastError: null,
|
||||
httpStatus: null,
|
||||
occurredAtUtc: attemptStartUtc,
|
||||
durationMs: (int)attemptStopwatch.ElapsedMilliseconds);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -311,9 +348,20 @@ public class StoreAndForwardService
|
||||
_replication?.ReplicatePark(message);
|
||||
RaiseActivity("Parked", message.Category,
|
||||
$"Permanent failure for {message.Target}: handler returned false");
|
||||
|
||||
// M3: terminal PermanentFailure observer notification — the
|
||||
// audit bridge maps this to Attempted(Failed) + CachedResolve(Parked).
|
||||
await NotifyCachedCallObserverAsync(
|
||||
message,
|
||||
CachedCallAttemptOutcome.PermanentFailure,
|
||||
lastError: message.LastError,
|
||||
httpStatus: null,
|
||||
occurredAtUtc: attemptStartUtc,
|
||||
durationMs: (int)attemptStopwatch.ElapsedMilliseconds);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
attemptStopwatch.Stop();
|
||||
// Transient failure — increment retry, check max
|
||||
message.RetryCount++;
|
||||
message.LastAttemptAt = DateTimeOffset.UtcNow;
|
||||
@@ -339,6 +387,16 @@ public class StoreAndForwardService
|
||||
_logger.LogWarning(
|
||||
"Message {MessageId} parked after {MaxRetries} retries to {Target}",
|
||||
message.Id, message.MaxRetries, message.Target);
|
||||
|
||||
// M3: terminal ParkedMaxRetries observer notification — the
|
||||
// audit bridge maps this to Attempted(Failed) + CachedResolve(Parked).
|
||||
await NotifyCachedCallObserverAsync(
|
||||
message,
|
||||
CachedCallAttemptOutcome.ParkedMaxRetries,
|
||||
lastError: ex.Message,
|
||||
httpStatus: null,
|
||||
occurredAtUtc: attemptStartUtc,
|
||||
durationMs: (int)attemptStopwatch.ElapsedMilliseconds);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -355,10 +413,113 @@ public class StoreAndForwardService
|
||||
}
|
||||
RaiseActivity("Retried", message.Category,
|
||||
$"Retry {message.RetryCount}/{message.MaxRetries} for {message.Target}: {ex.Message}");
|
||||
|
||||
// M3: per-attempt TransientFailure observer notification —
|
||||
// the audit bridge maps this to Attempted(Failed).
|
||||
await NotifyCachedCallObserverAsync(
|
||||
message,
|
||||
CachedCallAttemptOutcome.TransientFailure,
|
||||
lastError: ex.Message,
|
||||
httpStatus: null,
|
||||
occurredAtUtc: attemptStartUtc,
|
||||
durationMs: (int)attemptStopwatch.ElapsedMilliseconds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Audit Log #23 (M3 Bundle E — Tasks E4/E5): notify the registered
|
||||
/// <see cref="ICachedCallLifecycleObserver"/> of the just-completed
|
||||
/// attempt. Only fires for cached-call categories
|
||||
/// (<see cref="StoreAndForwardCategory.ExternalSystem"/> and
|
||||
/// <see cref="StoreAndForwardCategory.CachedDbWrite"/>); the
|
||||
/// <see cref="StoreAndForwardCategory.Notification"/> category has its
|
||||
/// own central-side audit pipeline (Notification Outbox / #21) and must
|
||||
/// not surface on this hook.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Best-effort: an observer that throws is logged and swallowed so a
|
||||
/// failing audit pipeline cannot corrupt S&F retry bookkeeping
|
||||
/// (alog.md §7 contract). Messages whose ids are not valid GUIDs (pre-M3
|
||||
/// callers that didn't thread a TrackedOperationId in) are silently
|
||||
/// skipped — the observer requires a parseable id by contract.
|
||||
/// </remarks>
|
||||
private async Task NotifyCachedCallObserverAsync(
|
||||
StoreAndForwardMessage message,
|
||||
CachedCallAttemptOutcome outcome,
|
||||
string? lastError,
|
||||
int? httpStatus,
|
||||
DateTime occurredAtUtc,
|
||||
int? durationMs)
|
||||
{
|
||||
if (_cachedCallObserver == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Only cached-call categories generate audit telemetry on this hook —
|
||||
// notifications have their own outbox-side audit pipeline.
|
||||
var channel = message.Category switch
|
||||
{
|
||||
StoreAndForwardCategory.ExternalSystem => "ApiOutbound",
|
||||
StoreAndForwardCategory.CachedDbWrite => "DbOutbound",
|
||||
_ => null,
|
||||
};
|
||||
if (channel is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (!TrackedOperationId.TryParse(message.Id, out var trackedId))
|
||||
{
|
||||
// Pre-M3 message (random GUID-N id from S&F itself, no
|
||||
// TrackedOperationId threaded in). Skip — no audit row to bind to.
|
||||
return;
|
||||
}
|
||||
|
||||
CachedCallAttemptContext context;
|
||||
try
|
||||
{
|
||||
context = new CachedCallAttemptContext(
|
||||
TrackedOperationId: trackedId,
|
||||
Channel: channel,
|
||||
Target: message.Target,
|
||||
SourceSite: _siteId,
|
||||
Outcome: outcome,
|
||||
RetryCount: message.RetryCount,
|
||||
LastError: lastError,
|
||||
HttpStatus: httpStatus,
|
||||
CreatedAtUtc: message.CreatedAt.UtcDateTime,
|
||||
OccurredAtUtc: DateTime.SpecifyKind(occurredAtUtc, DateTimeKind.Utc),
|
||||
DurationMs: durationMs,
|
||||
SourceInstanceId: message.OriginInstanceName);
|
||||
}
|
||||
catch (Exception buildEx)
|
||||
{
|
||||
// Defensive — record construction shouldn't throw, but the alog.md
|
||||
// §7 contract requires this path be exception-safe regardless.
|
||||
_logger.LogWarning(buildEx,
|
||||
"Failed to build cached-call attempt context for {MessageId}; observer skipped",
|
||||
message.Id);
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await _cachedCallObserver.OnAttemptCompletedAsync(context, CancellationToken.None)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// alog.md §7 best-effort: an audit observer outage must NEVER be
|
||||
// misclassified as a transient delivery failure or corrupt the
|
||||
// S&F retry bookkeeping.
|
||||
_logger.LogWarning(ex,
|
||||
"ICachedCallLifecycleObserver threw for {MessageId} (Outcome {Outcome}); ignored",
|
||||
message.Id, outcome);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-12: Gets parked messages for central query (Pattern 8).
|
||||
/// </summary>
|
||||
|
||||
Reference in New Issue
Block a user