feat(snf): per-attempt and terminal cached-call lifecycle observer (#23 M3)
Hook the store-and-forward retry loop so the audit pipeline can emit
per-attempt + terminal telemetry under the original TrackedOperationId
(Bundle E Tasks E4 + E5).
New seam:
* ICachedCallLifecycleObserver + CachedCallAttemptContext in
Commons.Interfaces.Services. Outcome enum
(Delivered / TransientFailure / PermanentFailure / ParkedMaxRetries)
is S&F-vocabulary; the bridge living in ScadaLink.AuditLog (Bundle F)
will map it to the AuditKind/AuditStatus pair when building the
CachedCallTelemetry packet.
* StoreAndForwardService gains an optional cachedCallObserver
constructor parameter + siteId. RetryMessageAsync fires the observer
exactly once per attempt with the appropriate outcome:
- handler returns true -> Delivered
- handler returns false -> PermanentFailure (and parks)
- handler throws + retries remaining -> TransientFailure
- handler throws + max retries hit -> ParkedMaxRetries (and parks)
Hook is best-effort: a thrown observer is logged + swallowed so a
failing audit pipeline can never be misclassified as a transient
delivery failure or corrupt the retry-count bookkeeping (alog.md §7).
Only cached-call categories (ExternalSystem, CachedDbWrite) generate
notifications — Notification category has its own central-side
audit pipeline (Notification Outbox / #21).
Pre-M3 callers that didn't thread a TrackedOperationId into the S&F
message id are silently skipped — the observer requires a parseable id
by contract. New S&F callers stamp the id as messageId (Bundle E3).
Bundle E tasks E4 + E5.
This commit is contained in:
@@ -0,0 +1,93 @@
|
|||||||
|
using ScadaLink.Commons.Types;
|
||||||
|
|
||||||
|
namespace ScadaLink.Commons.Interfaces.Services;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Audit Log #23 (M3 Bundle E — Tasks E4/E5): site-side hook the
|
||||||
|
/// store-and-forward retry loop invokes after every cached-call attempt and
|
||||||
|
/// at terminal-state transitions, so the audit pipeline can emit
|
||||||
|
/// <c>ApiCallCached</c>/<c>DbWriteCached</c> per-attempt rows and the
|
||||||
|
/// <c>CachedResolve</c> terminal row under the original
|
||||||
|
/// <see cref="TrackedOperationId"/>.
|
||||||
|
/// </summary>
|
||||||
|
/// <remarks>
|
||||||
|
/// <para>
|
||||||
|
/// The interface deliberately uses <see cref="CachedCallAttemptOutcome"/>
|
||||||
|
/// rather than <see cref="ScadaLink.Commons.Types.Enums.AuditStatus"/> so the
|
||||||
|
/// S&F project does not need to depend on the audit vocabulary — the
|
||||||
|
/// bridge living in <c>ScadaLink.AuditLog</c> maps the outcome to the right
|
||||||
|
/// audit kind + status when materialising the <c>CachedCallTelemetry</c>
|
||||||
|
/// packet.
|
||||||
|
/// </para>
|
||||||
|
/// <para>
|
||||||
|
/// <b>Best-effort contract (alog.md §7):</b> implementations MUST swallow
|
||||||
|
/// internal failures rather than propagating to the S&F service — a
|
||||||
|
/// thrown observer must not be misclassified as a transient delivery
|
||||||
|
/// failure and must not corrupt the retry-count bookkeeping.
|
||||||
|
/// </para>
|
||||||
|
/// </remarks>
|
||||||
|
public interface ICachedCallLifecycleObserver
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Called by the store-and-forward retry loop after every cached-call
|
||||||
|
/// delivery attempt. Receives the message's TrackedOperationId-bearing id,
|
||||||
|
/// the per-category channel discriminator, retry-count + last-error
|
||||||
|
/// context, and whether the outcome reached a terminal state.
|
||||||
|
/// </summary>
|
||||||
|
Task OnAttemptCompletedAsync(CachedCallAttemptContext context, CancellationToken ct = default);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Per-attempt context handed to <see cref="ICachedCallLifecycleObserver"/>.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="TrackedOperationId">
|
||||||
|
/// Tracking id parsed from the underlying <c>StoreAndForwardMessage.Id</c>.
|
||||||
|
/// </param>
|
||||||
|
/// <param name="Channel">
|
||||||
|
/// Trust-boundary channel string — <c>"ApiOutbound"</c> for ExternalSystem
|
||||||
|
/// cached calls, <c>"DbOutbound"</c> for cached DB writes.
|
||||||
|
/// </param>
|
||||||
|
/// <param name="Target">Human-readable target (system name / DB connection).</param>
|
||||||
|
/// <param name="SourceSite">Site id that submitted the cached call.</param>
|
||||||
|
/// <param name="Outcome">Per-attempt outcome.</param>
|
||||||
|
/// <param name="RetryCount">Number of retries performed so far (S&F bookkeeping).</param>
|
||||||
|
/// <param name="LastError">Most recent error message (null on success).</param>
|
||||||
|
/// <param name="HttpStatus">Most recent HTTP status (null when not applicable).</param>
|
||||||
|
/// <param name="CreatedAtUtc">When the underlying S&F message was first enqueued.</param>
|
||||||
|
/// <param name="OccurredAtUtc">When this attempt completed.</param>
|
||||||
|
/// <param name="DurationMs">Duration of the attempt in milliseconds (null when not measured).</param>
|
||||||
|
/// <param name="SourceInstanceId">Originating instance, when known.</param>
|
||||||
|
public sealed record CachedCallAttemptContext(
|
||||||
|
TrackedOperationId TrackedOperationId,
|
||||||
|
string Channel,
|
||||||
|
string Target,
|
||||||
|
string SourceSite,
|
||||||
|
CachedCallAttemptOutcome Outcome,
|
||||||
|
int RetryCount,
|
||||||
|
string? LastError,
|
||||||
|
int? HttpStatus,
|
||||||
|
DateTime CreatedAtUtc,
|
||||||
|
DateTime OccurredAtUtc,
|
||||||
|
int? DurationMs,
|
||||||
|
string? SourceInstanceId);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Coarse outcome of one cached-call delivery attempt, observed from inside
|
||||||
|
/// the store-and-forward retry loop. The audit bridge maps this to the
|
||||||
|
/// <c>ApiCallCached</c>/<c>DbWriteCached</c> Attempted row and, when terminal,
|
||||||
|
/// the corresponding <c>CachedResolve</c> row.
|
||||||
|
/// </summary>
|
||||||
|
public enum CachedCallAttemptOutcome
|
||||||
|
{
|
||||||
|
/// <summary>Attempt delivered successfully — terminal Delivered state.</summary>
|
||||||
|
Delivered,
|
||||||
|
|
||||||
|
/// <summary>Attempt failed transiently; another retry will follow.</summary>
|
||||||
|
TransientFailure,
|
||||||
|
|
||||||
|
/// <summary>Attempt returned permanent failure — terminal Parked state (S&F semantics).</summary>
|
||||||
|
PermanentFailure,
|
||||||
|
|
||||||
|
/// <summary>Retry budget exhausted — terminal Parked state.</summary>
|
||||||
|
ParkedMaxRetries,
|
||||||
|
}
|
||||||
@@ -1,4 +1,6 @@
|
|||||||
using Microsoft.Extensions.Logging;
|
using Microsoft.Extensions.Logging;
|
||||||
|
using ScadaLink.Commons.Interfaces.Services;
|
||||||
|
using ScadaLink.Commons.Types;
|
||||||
using ScadaLink.Commons.Types.Enums;
|
using ScadaLink.Commons.Types.Enums;
|
||||||
|
|
||||||
namespace ScadaLink.StoreAndForward;
|
namespace ScadaLink.StoreAndForward;
|
||||||
@@ -33,6 +35,19 @@ public class StoreAndForwardService
|
|||||||
private readonly StoreAndForwardOptions _options;
|
private readonly StoreAndForwardOptions _options;
|
||||||
private readonly ReplicationService? _replication;
|
private readonly ReplicationService? _replication;
|
||||||
private readonly ILogger<StoreAndForwardService> _logger;
|
private readonly ILogger<StoreAndForwardService> _logger;
|
||||||
|
/// <summary>
|
||||||
|
/// Audit Log #23 (M3 Bundle E — Task E4): site-side observer notified
|
||||||
|
/// after every cached-call delivery attempt. Optional — when null no
|
||||||
|
/// telemetry is emitted; the legacy pre-M3 retry loop behaviour is
|
||||||
|
/// preserved exactly.
|
||||||
|
/// </summary>
|
||||||
|
private readonly ICachedCallLifecycleObserver? _cachedCallObserver;
|
||||||
|
/// <summary>
|
||||||
|
/// Audit Log #23 (M3 Bundle E — Task E4): site id stamped onto the
|
||||||
|
/// cached-call attempt context so the audit bridge can build the
|
||||||
|
/// <see cref="SiteCallOperational"/> half of the telemetry packet.
|
||||||
|
/// </summary>
|
||||||
|
private readonly string _siteId;
|
||||||
private Timer? _retryTimer;
|
private Timer? _retryTimer;
|
||||||
private int _retryInProgress;
|
private int _retryInProgress;
|
||||||
|
|
||||||
@@ -63,12 +78,16 @@ public class StoreAndForwardService
|
|||||||
StoreAndForwardStorage storage,
|
StoreAndForwardStorage storage,
|
||||||
StoreAndForwardOptions options,
|
StoreAndForwardOptions options,
|
||||||
ILogger<StoreAndForwardService> logger,
|
ILogger<StoreAndForwardService> logger,
|
||||||
ReplicationService? replication = null)
|
ReplicationService? replication = null,
|
||||||
|
ICachedCallLifecycleObserver? cachedCallObserver = null,
|
||||||
|
string siteId = "")
|
||||||
{
|
{
|
||||||
_storage = storage;
|
_storage = storage;
|
||||||
_options = options;
|
_options = options;
|
||||||
_logger = logger;
|
_logger = logger;
|
||||||
_replication = replication;
|
_replication = replication;
|
||||||
|
_cachedCallObserver = cachedCallObserver;
|
||||||
|
_siteId = siteId;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -280,15 +299,33 @@ public class StoreAndForwardService
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Audit Log #23 (M3 Bundle E — Tasks E4/E5): measure per-attempt
|
||||||
|
// duration so the audit row carries a meaningful DurationMs. Captured
|
||||||
|
// around the handler invocation only — storage / replication overhead
|
||||||
|
// is excluded.
|
||||||
|
var attemptStartUtc = DateTime.UtcNow;
|
||||||
|
var attemptStopwatch = System.Diagnostics.Stopwatch.StartNew();
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
var success = await handler(message);
|
var success = await handler(message);
|
||||||
|
attemptStopwatch.Stop();
|
||||||
if (success)
|
if (success)
|
||||||
{
|
{
|
||||||
await _storage.RemoveMessageAsync(message.Id);
|
await _storage.RemoveMessageAsync(message.Id);
|
||||||
_replication?.ReplicateRemove(message.Id);
|
_replication?.ReplicateRemove(message.Id);
|
||||||
RaiseActivity("Delivered", message.Category,
|
RaiseActivity("Delivered", message.Category,
|
||||||
$"Delivered to {message.Target} after {message.RetryCount} retries");
|
$"Delivered to {message.Target} after {message.RetryCount} retries");
|
||||||
|
|
||||||
|
// M3: terminal Delivered observer notification — the audit
|
||||||
|
// bridge maps this to Attempted + CachedResolve(Delivered).
|
||||||
|
await NotifyCachedCallObserverAsync(
|
||||||
|
message,
|
||||||
|
CachedCallAttemptOutcome.Delivered,
|
||||||
|
lastError: null,
|
||||||
|
httpStatus: null,
|
||||||
|
occurredAtUtc: attemptStartUtc,
|
||||||
|
durationMs: (int)attemptStopwatch.ElapsedMilliseconds);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -311,9 +348,20 @@ public class StoreAndForwardService
|
|||||||
_replication?.ReplicatePark(message);
|
_replication?.ReplicatePark(message);
|
||||||
RaiseActivity("Parked", message.Category,
|
RaiseActivity("Parked", message.Category,
|
||||||
$"Permanent failure for {message.Target}: handler returned false");
|
$"Permanent failure for {message.Target}: handler returned false");
|
||||||
|
|
||||||
|
// M3: terminal PermanentFailure observer notification — the
|
||||||
|
// audit bridge maps this to Attempted(Failed) + CachedResolve(Parked).
|
||||||
|
await NotifyCachedCallObserverAsync(
|
||||||
|
message,
|
||||||
|
CachedCallAttemptOutcome.PermanentFailure,
|
||||||
|
lastError: message.LastError,
|
||||||
|
httpStatus: null,
|
||||||
|
occurredAtUtc: attemptStartUtc,
|
||||||
|
durationMs: (int)attemptStopwatch.ElapsedMilliseconds);
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
|
attemptStopwatch.Stop();
|
||||||
// Transient failure — increment retry, check max
|
// Transient failure — increment retry, check max
|
||||||
message.RetryCount++;
|
message.RetryCount++;
|
||||||
message.LastAttemptAt = DateTimeOffset.UtcNow;
|
message.LastAttemptAt = DateTimeOffset.UtcNow;
|
||||||
@@ -339,6 +387,16 @@ public class StoreAndForwardService
|
|||||||
_logger.LogWarning(
|
_logger.LogWarning(
|
||||||
"Message {MessageId} parked after {MaxRetries} retries to {Target}",
|
"Message {MessageId} parked after {MaxRetries} retries to {Target}",
|
||||||
message.Id, message.MaxRetries, message.Target);
|
message.Id, message.MaxRetries, message.Target);
|
||||||
|
|
||||||
|
// M3: terminal ParkedMaxRetries observer notification — the
|
||||||
|
// audit bridge maps this to Attempted(Failed) + CachedResolve(Parked).
|
||||||
|
await NotifyCachedCallObserverAsync(
|
||||||
|
message,
|
||||||
|
CachedCallAttemptOutcome.ParkedMaxRetries,
|
||||||
|
lastError: ex.Message,
|
||||||
|
httpStatus: null,
|
||||||
|
occurredAtUtc: attemptStartUtc,
|
||||||
|
durationMs: (int)attemptStopwatch.ElapsedMilliseconds);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -355,10 +413,113 @@ public class StoreAndForwardService
|
|||||||
}
|
}
|
||||||
RaiseActivity("Retried", message.Category,
|
RaiseActivity("Retried", message.Category,
|
||||||
$"Retry {message.RetryCount}/{message.MaxRetries} for {message.Target}: {ex.Message}");
|
$"Retry {message.RetryCount}/{message.MaxRetries} for {message.Target}: {ex.Message}");
|
||||||
|
|
||||||
|
// M3: per-attempt TransientFailure observer notification —
|
||||||
|
// the audit bridge maps this to Attempted(Failed).
|
||||||
|
await NotifyCachedCallObserverAsync(
|
||||||
|
message,
|
||||||
|
CachedCallAttemptOutcome.TransientFailure,
|
||||||
|
lastError: ex.Message,
|
||||||
|
httpStatus: null,
|
||||||
|
occurredAtUtc: attemptStartUtc,
|
||||||
|
durationMs: (int)attemptStopwatch.ElapsedMilliseconds);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Audit Log #23 (M3 Bundle E — Tasks E4/E5): notify the registered
|
||||||
|
/// <see cref="ICachedCallLifecycleObserver"/> of the just-completed
|
||||||
|
/// attempt. Only fires for cached-call categories
|
||||||
|
/// (<see cref="StoreAndForwardCategory.ExternalSystem"/> and
|
||||||
|
/// <see cref="StoreAndForwardCategory.CachedDbWrite"/>); the
|
||||||
|
/// <see cref="StoreAndForwardCategory.Notification"/> category has its
|
||||||
|
/// own central-side audit pipeline (Notification Outbox / #21) and must
|
||||||
|
/// not surface on this hook.
|
||||||
|
/// </summary>
|
||||||
|
/// <remarks>
|
||||||
|
/// Best-effort: an observer that throws is logged and swallowed so a
|
||||||
|
/// failing audit pipeline cannot corrupt S&F retry bookkeeping
|
||||||
|
/// (alog.md §7 contract). Messages whose ids are not valid GUIDs (pre-M3
|
||||||
|
/// callers that didn't thread a TrackedOperationId in) are silently
|
||||||
|
/// skipped — the observer requires a parseable id by contract.
|
||||||
|
/// </remarks>
|
||||||
|
private async Task NotifyCachedCallObserverAsync(
|
||||||
|
StoreAndForwardMessage message,
|
||||||
|
CachedCallAttemptOutcome outcome,
|
||||||
|
string? lastError,
|
||||||
|
int? httpStatus,
|
||||||
|
DateTime occurredAtUtc,
|
||||||
|
int? durationMs)
|
||||||
|
{
|
||||||
|
if (_cachedCallObserver == null)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only cached-call categories generate audit telemetry on this hook —
|
||||||
|
// notifications have their own outbox-side audit pipeline.
|
||||||
|
var channel = message.Category switch
|
||||||
|
{
|
||||||
|
StoreAndForwardCategory.ExternalSystem => "ApiOutbound",
|
||||||
|
StoreAndForwardCategory.CachedDbWrite => "DbOutbound",
|
||||||
|
_ => null,
|
||||||
|
};
|
||||||
|
if (channel is null)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!TrackedOperationId.TryParse(message.Id, out var trackedId))
|
||||||
|
{
|
||||||
|
// Pre-M3 message (random GUID-N id from S&F itself, no
|
||||||
|
// TrackedOperationId threaded in). Skip — no audit row to bind to.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
CachedCallAttemptContext context;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
context = new CachedCallAttemptContext(
|
||||||
|
TrackedOperationId: trackedId,
|
||||||
|
Channel: channel,
|
||||||
|
Target: message.Target,
|
||||||
|
SourceSite: _siteId,
|
||||||
|
Outcome: outcome,
|
||||||
|
RetryCount: message.RetryCount,
|
||||||
|
LastError: lastError,
|
||||||
|
HttpStatus: httpStatus,
|
||||||
|
CreatedAtUtc: message.CreatedAt.UtcDateTime,
|
||||||
|
OccurredAtUtc: DateTime.SpecifyKind(occurredAtUtc, DateTimeKind.Utc),
|
||||||
|
DurationMs: durationMs,
|
||||||
|
SourceInstanceId: message.OriginInstanceName);
|
||||||
|
}
|
||||||
|
catch (Exception buildEx)
|
||||||
|
{
|
||||||
|
// Defensive — record construction shouldn't throw, but the alog.md
|
||||||
|
// §7 contract requires this path be exception-safe regardless.
|
||||||
|
_logger.LogWarning(buildEx,
|
||||||
|
"Failed to build cached-call attempt context for {MessageId}; observer skipped",
|
||||||
|
message.Id);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
await _cachedCallObserver.OnAttemptCompletedAsync(context, CancellationToken.None)
|
||||||
|
.ConfigureAwait(false);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
// alog.md §7 best-effort: an audit observer outage must NEVER be
|
||||||
|
// misclassified as a transient delivery failure or corrupt the
|
||||||
|
// S&F retry bookkeeping.
|
||||||
|
_logger.LogWarning(ex,
|
||||||
|
"ICachedCallLifecycleObserver threw for {MessageId} (Outcome {Outcome}); ignored",
|
||||||
|
message.Id, outcome);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// WP-12: Gets parked messages for central query (Pattern 8).
|
/// WP-12: Gets parked messages for central query (Pattern 8).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|||||||
@@ -0,0 +1,298 @@
|
|||||||
|
using Microsoft.Data.Sqlite;
|
||||||
|
using Microsoft.Extensions.Logging.Abstractions;
|
||||||
|
using ScadaLink.Commons.Interfaces.Services;
|
||||||
|
using ScadaLink.Commons.Types;
|
||||||
|
using ScadaLink.Commons.Types.Enums;
|
||||||
|
|
||||||
|
namespace ScadaLink.StoreAndForward.Tests;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Audit Log #23 — M3 Bundle E Tasks E4 + E5: the store-and-forward retry
|
||||||
|
/// loop invokes <see cref="ICachedCallLifecycleObserver"/> after every
|
||||||
|
/// cached-call attempt. The observer is given a
|
||||||
|
/// <see cref="CachedCallAttemptContext"/> derived from the underlying
|
||||||
|
/// <see cref="StoreAndForwardMessage"/>; the audit bridge then materialises
|
||||||
|
/// the right <c>CachedCallTelemetry</c> packet (Attempted on every retry,
|
||||||
|
/// CachedResolve on terminal transitions). Tests run with
|
||||||
|
/// <c>DefaultRetryInterval=Zero</c> so the timer-driven retry sweep is
|
||||||
|
/// short-circuited by directly invoking
|
||||||
|
/// <see cref="StoreAndForwardService.RetryPendingMessagesAsync"/>.
|
||||||
|
/// </summary>
|
||||||
|
public class CachedCallAttemptEmissionTests : IAsyncLifetime, IDisposable
|
||||||
|
{
|
||||||
|
private readonly SqliteConnection _keepAlive;
|
||||||
|
private readonly StoreAndForwardStorage _storage;
|
||||||
|
private readonly StoreAndForwardService _service;
|
||||||
|
private readonly StoreAndForwardOptions _options;
|
||||||
|
private readonly CapturingObserver _observer;
|
||||||
|
|
||||||
|
public CachedCallAttemptEmissionTests()
|
||||||
|
{
|
||||||
|
var dbName = $"E4Tests_{Guid.NewGuid():N}";
|
||||||
|
var connStr = $"Data Source={dbName};Mode=Memory;Cache=Shared";
|
||||||
|
_keepAlive = new SqliteConnection(connStr);
|
||||||
|
_keepAlive.Open();
|
||||||
|
|
||||||
|
_storage = new StoreAndForwardStorage(connStr, NullLogger<StoreAndForwardStorage>.Instance);
|
||||||
|
|
||||||
|
_options = new StoreAndForwardOptions
|
||||||
|
{
|
||||||
|
DefaultRetryInterval = TimeSpan.Zero,
|
||||||
|
DefaultMaxRetries = 3,
|
||||||
|
RetryTimerInterval = TimeSpan.FromMinutes(10),
|
||||||
|
};
|
||||||
|
|
||||||
|
_observer = new CapturingObserver();
|
||||||
|
|
||||||
|
_service = new StoreAndForwardService(
|
||||||
|
_storage,
|
||||||
|
_options,
|
||||||
|
NullLogger<StoreAndForwardService>.Instance,
|
||||||
|
replication: null,
|
||||||
|
cachedCallObserver: _observer,
|
||||||
|
siteId: "site-77");
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task InitializeAsync() => await _storage.InitializeAsync();
|
||||||
|
public Task DisposeAsync() => Task.CompletedTask;
|
||||||
|
public void Dispose() => _keepAlive.Dispose();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Captures every observer notification so tests can assert on the
|
||||||
|
/// emitted lifecycle sequence.
|
||||||
|
/// </summary>
|
||||||
|
private sealed class CapturingObserver : ICachedCallLifecycleObserver
|
||||||
|
{
|
||||||
|
public List<CachedCallAttemptContext> Notifications { get; } = new();
|
||||||
|
public Exception? ThrowOnNotify { get; set; }
|
||||||
|
|
||||||
|
public Task OnAttemptCompletedAsync(CachedCallAttemptContext context, CancellationToken ct = default)
|
||||||
|
{
|
||||||
|
if (ThrowOnNotify != null)
|
||||||
|
{
|
||||||
|
return Task.FromException(ThrowOnNotify);
|
||||||
|
}
|
||||||
|
lock (Notifications)
|
||||||
|
{
|
||||||
|
Notifications.Add(context);
|
||||||
|
}
|
||||||
|
return Task.CompletedTask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<TrackedOperationId> EnqueueBufferedAsync(
|
||||||
|
StoreAndForwardCategory category, string target, int maxRetries = 3)
|
||||||
|
{
|
||||||
|
// The TrackedOperationId is the S&F message id (Bundle E3 contract).
|
||||||
|
var trackedId = TrackedOperationId.New();
|
||||||
|
await _service.EnqueueAsync(
|
||||||
|
category,
|
||||||
|
target,
|
||||||
|
"""{"payload":"x"}""",
|
||||||
|
originInstanceName: "Plant.Pump42",
|
||||||
|
maxRetries: maxRetries,
|
||||||
|
retryInterval: TimeSpan.Zero,
|
||||||
|
attemptImmediateDelivery: false,
|
||||||
|
messageId: trackedId.ToString());
|
||||||
|
return trackedId;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Task E4: per-attempt observer notifications ──
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Attempt_FailWithHttp500_EmitsAttemptedTelemetry()
|
||||||
|
{
|
||||||
|
// ExternalSystem cached call buffered, retry sweep encounters a
|
||||||
|
// transient failure on the first attempt.
|
||||||
|
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
|
||||||
|
_ => throw new HttpRequestException("HTTP 500 from ERP"));
|
||||||
|
var trackedId = await EnqueueBufferedAsync(
|
||||||
|
StoreAndForwardCategory.ExternalSystem, "ERP", maxRetries: 5);
|
||||||
|
|
||||||
|
await _service.RetryPendingMessagesAsync();
|
||||||
|
|
||||||
|
var notification = Assert.Single(_observer.Notifications);
|
||||||
|
Assert.Equal(trackedId, notification.TrackedOperationId);
|
||||||
|
Assert.Equal("ApiOutbound", notification.Channel);
|
||||||
|
Assert.Equal("ERP", notification.Target);
|
||||||
|
Assert.Equal("site-77", notification.SourceSite);
|
||||||
|
Assert.Equal(CachedCallAttemptOutcome.TransientFailure, notification.Outcome);
|
||||||
|
Assert.Equal(1, notification.RetryCount);
|
||||||
|
Assert.Contains("HTTP 500", notification.LastError);
|
||||||
|
Assert.Equal("Plant.Pump42", notification.SourceInstanceId);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Attempt_Success_EmitsDeliveredOutcome()
|
||||||
|
{
|
||||||
|
// ExternalSystem cached call buffered, retry sweep delivers the
|
||||||
|
// message successfully on its first attempt.
|
||||||
|
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
|
||||||
|
_ => Task.FromResult(true));
|
||||||
|
var trackedId = await EnqueueBufferedAsync(
|
||||||
|
StoreAndForwardCategory.ExternalSystem, "ERP");
|
||||||
|
|
||||||
|
await _service.RetryPendingMessagesAsync();
|
||||||
|
|
||||||
|
var notification = Assert.Single(_observer.Notifications);
|
||||||
|
Assert.Equal(trackedId, notification.TrackedOperationId);
|
||||||
|
Assert.Equal(CachedCallAttemptOutcome.Delivered, notification.Outcome);
|
||||||
|
Assert.Null(notification.LastError);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Attempt_PermanentFailure_EmitsPermanentFailureOutcome()
|
||||||
|
{
|
||||||
|
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
|
||||||
|
_ => Task.FromResult(false));
|
||||||
|
var trackedId = await EnqueueBufferedAsync(
|
||||||
|
StoreAndForwardCategory.ExternalSystem, "ERP");
|
||||||
|
|
||||||
|
await _service.RetryPendingMessagesAsync();
|
||||||
|
|
||||||
|
var notification = Assert.Single(_observer.Notifications);
|
||||||
|
Assert.Equal(trackedId, notification.TrackedOperationId);
|
||||||
|
Assert.Equal(CachedCallAttemptOutcome.PermanentFailure, notification.Outcome);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Attempt_CachedDbWrite_EmitsDbOutboundChannel()
|
||||||
|
{
|
||||||
|
_service.RegisterDeliveryHandler(StoreAndForwardCategory.CachedDbWrite,
|
||||||
|
_ => Task.FromResult(true));
|
||||||
|
var trackedId = await EnqueueBufferedAsync(
|
||||||
|
StoreAndForwardCategory.CachedDbWrite, "myDb");
|
||||||
|
|
||||||
|
await _service.RetryPendingMessagesAsync();
|
||||||
|
|
||||||
|
var notification = Assert.Single(_observer.Notifications);
|
||||||
|
Assert.Equal(trackedId, notification.TrackedOperationId);
|
||||||
|
Assert.Equal("DbOutbound", notification.Channel);
|
||||||
|
Assert.Equal("myDb", notification.Target);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Attempt_NotificationCategory_NoObserverNotification()
|
||||||
|
{
|
||||||
|
// Notifications are NOT cached calls — they're forwarded to central via
|
||||||
|
// a separate forwarder. The observer must not fire for Notification
|
||||||
|
// category messages.
|
||||||
|
_service.RegisterDeliveryHandler(StoreAndForwardCategory.Notification,
|
||||||
|
_ => Task.FromResult(true));
|
||||||
|
await _service.EnqueueAsync(
|
||||||
|
StoreAndForwardCategory.Notification,
|
||||||
|
"alerts",
|
||||||
|
"""{"subject":"x"}""",
|
||||||
|
originInstanceName: "Plant.Pump42",
|
||||||
|
attemptImmediateDelivery: false);
|
||||||
|
|
||||||
|
await _service.RetryPendingMessagesAsync();
|
||||||
|
|
||||||
|
Assert.Empty(_observer.Notifications);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Attempt_MessageIdNotAGuid_NoObserverNotification()
|
||||||
|
{
|
||||||
|
// Pre-M3 cached calls (no TrackedOperationId threaded in) use a random
|
||||||
|
// GUID-N message id from S&F itself. We should still emit (M3 expects
|
||||||
|
// post-rollout these are tracked) — BUT pre-rollout messages can have
|
||||||
|
// a non-parseable id, in which case the observer is silently skipped
|
||||||
|
// to keep S&F bookkeeping intact.
|
||||||
|
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
|
||||||
|
_ => Task.FromResult(true));
|
||||||
|
await _service.EnqueueAsync(
|
||||||
|
StoreAndForwardCategory.ExternalSystem,
|
||||||
|
"ERP",
|
||||||
|
"""{}""",
|
||||||
|
originInstanceName: "Plant.Pump42",
|
||||||
|
attemptImmediateDelivery: false,
|
||||||
|
messageId: "not-a-valid-guid-id");
|
||||||
|
|
||||||
|
await _service.RetryPendingMessagesAsync();
|
||||||
|
|
||||||
|
Assert.Empty(_observer.Notifications);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Task E5: terminal-state observer notifications ──
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Terminal_Delivered_EmitsResolveWithDeliveredStatus()
|
||||||
|
{
|
||||||
|
// A successful retry produces a single Delivered observer notification
|
||||||
|
// — the audit bridge maps this to both an Attempted-Delivered audit row
|
||||||
|
// and the terminal CachedResolve(Delivered) row. The S&F layer fires
|
||||||
|
// ONE notification per attempt and lets the bridge fan out as needed.
|
||||||
|
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
|
||||||
|
_ => Task.FromResult(true));
|
||||||
|
var trackedId = await EnqueueBufferedAsync(
|
||||||
|
StoreAndForwardCategory.ExternalSystem, "ERP");
|
||||||
|
|
||||||
|
await _service.RetryPendingMessagesAsync();
|
||||||
|
|
||||||
|
var notification = Assert.Single(_observer.Notifications);
|
||||||
|
Assert.Equal(trackedId, notification.TrackedOperationId);
|
||||||
|
Assert.Equal(CachedCallAttemptOutcome.Delivered, notification.Outcome);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Terminal_Parked_OnMaxRetries_EmitsParkedMaxRetries()
|
||||||
|
{
|
||||||
|
// Configure handler to throw transient every time.
|
||||||
|
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
|
||||||
|
_ => throw new HttpRequestException("Connection refused"));
|
||||||
|
var trackedId = await EnqueueBufferedAsync(
|
||||||
|
StoreAndForwardCategory.ExternalSystem, "ERP", maxRetries: 2);
|
||||||
|
|
||||||
|
// Two sweeps -> RetryCount climbs to 2 -> parked on the second sweep.
|
||||||
|
await _service.RetryPendingMessagesAsync();
|
||||||
|
await _service.RetryPendingMessagesAsync();
|
||||||
|
|
||||||
|
Assert.Equal(2, _observer.Notifications.Count);
|
||||||
|
Assert.Equal(CachedCallAttemptOutcome.TransientFailure, _observer.Notifications[0].Outcome);
|
||||||
|
Assert.Equal(CachedCallAttemptOutcome.ParkedMaxRetries, _observer.Notifications[1].Outcome);
|
||||||
|
Assert.Equal(trackedId, _observer.Notifications[1].TrackedOperationId);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Lifecycle_RetryFail_RetrySucceed_EmitsExpectedSequence()
|
||||||
|
{
|
||||||
|
var calls = 0;
|
||||||
|
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem, _ =>
|
||||||
|
{
|
||||||
|
calls++;
|
||||||
|
if (calls == 1) throw new HttpRequestException("transient");
|
||||||
|
return Task.FromResult(true);
|
||||||
|
});
|
||||||
|
var trackedId = await EnqueueBufferedAsync(
|
||||||
|
StoreAndForwardCategory.ExternalSystem, "ERP", maxRetries: 5);
|
||||||
|
|
||||||
|
await _service.RetryPendingMessagesAsync();
|
||||||
|
await _service.RetryPendingMessagesAsync();
|
||||||
|
|
||||||
|
Assert.Equal(2, _observer.Notifications.Count);
|
||||||
|
Assert.Equal(CachedCallAttemptOutcome.TransientFailure, _observer.Notifications[0].Outcome);
|
||||||
|
Assert.Equal(1, _observer.Notifications[0].RetryCount);
|
||||||
|
Assert.Equal(CachedCallAttemptOutcome.Delivered, _observer.Notifications[1].Outcome);
|
||||||
|
Assert.Equal(trackedId, _observer.Notifications[1].TrackedOperationId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Best-effort contract: observer throws must NOT corrupt retry bookkeeping ──
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Observer_Throws_DoesNotCorruptRetryCount()
|
||||||
|
{
|
||||||
|
_observer.ThrowOnNotify = new InvalidOperationException("simulated audit failure");
|
||||||
|
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
|
||||||
|
_ => Task.FromResult(true));
|
||||||
|
var trackedId = await EnqueueBufferedAsync(
|
||||||
|
StoreAndForwardCategory.ExternalSystem, "ERP");
|
||||||
|
|
||||||
|
// Must not throw — observer is best-effort.
|
||||||
|
await _service.RetryPendingMessagesAsync();
|
||||||
|
|
||||||
|
// The message was delivered (handler returned true) so it should be gone.
|
||||||
|
var msg = await _storage.GetMessageByIdAsync(trackedId.ToString());
|
||||||
|
Assert.Null(msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user