fix(store-and-forward): resolve StoreAndForward-015..017 — document maxRetries=0 contract, replicate operator retry/discard, real category in activity log

This commit is contained in:
Joseph Doherty
2026-05-17 03:18:41 -04:00
parent be274212f0
commit 0135a6b2a6
6 changed files with 283 additions and 12 deletions

View File

@@ -73,6 +73,22 @@ public class ReplicationService
message));
}
/// <summary>
/// WP-11 / StoreAndForward-016: Replicates an operator-initiated requeue (a parked
/// message moved back to the pending queue) to standby (fire-and-forget). The
/// carried message reflects the active node's post-requeue state (Pending,
/// retry_count = 0) so the standby's copy can be brought into sync.
/// </summary>
public void ReplicateRequeue(StoreAndForwardMessage message)
{
if (!_options.ReplicationEnabled || _replicationHandler == null) return;
FireAndForget(new ReplicationOperation(
ReplicationOperationType.Requeue,
message.Id,
message));
}
/// <summary>
/// WP-11: Applies a replicated operation received from the active node.
/// Used by the standby node to keep its SQLite in sync.
@@ -95,6 +111,15 @@ public class ReplicationService
operation.Message.Status = StoreAndForwardMessageStatus.Parked;
await storage.UpdateMessageAsync(operation.Message);
break;
case ReplicationOperationType.Requeue when operation.Message != null:
// StoreAndForward-016: an operator retried a parked message on the
// active node; mirror that on the standby by moving its row back to
// Pending with retry_count = 0 so a failover preserves the retry.
operation.Message.Status = StoreAndForwardMessageStatus.Pending;
operation.Message.RetryCount = 0;
await storage.UpdateMessageAsync(operation.Message);
break;
}
}
@@ -132,5 +157,10 @@ public enum ReplicationOperationType
{
Add,
Remove,
Park
Park,
/// <summary>
/// StoreAndForward-016: an operator moved a parked message back to the pending
/// queue. The standby resets its matching row to Pending with retry_count = 0.
/// </summary>
Requeue
}

View File

@@ -27,7 +27,12 @@ public class StoreAndForwardMessage
/// </summary>
public int RetryCount { get; set; }
/// <summary>Maximum retry-sweep attempts before parking (0 = no limit).</summary>
/// <summary>
/// Maximum retry-sweep attempts before the message is parked.
/// <c>0</c> = no limit — the message is retried on every sweep until delivered
/// and is never parked for exhausting retries. This is <b>not</b> a "never retry"
/// value; a positive value is required to bound delivery attempts.
/// </summary>
public int MaxRetries { get; set; }
/// <summary>Retry interval in milliseconds.</summary>

View File

@@ -10,7 +10,8 @@ namespace ScadaLink.StoreAndForward;
/// 1. Caller attempts immediate delivery via IDeliveryHandler
/// 2. On transient failure → buffer in SQLite → retry loop
/// 3. On success → remove from buffer
/// 4. On max retries → park
/// 4. On reaching MaxRetries → park (a MaxRetries of 0 means "no limit" — the
/// message is retried until delivered and is never parked for retry exhaustion)
/// 5. Permanent failures are returned to caller immediately (never buffered)
///
/// WP-10: Fixed retry interval (not exponential). Per-source-entity retry settings.
@@ -116,10 +117,38 @@ public class StoreAndForwardService
/// Attempts immediate delivery first. On transient failure, buffers for retry.
/// On permanent failure (handler returns false), returns false immediately.
///
/// WP-10: Retry-count lifecycle — the immediate (or caller-made) delivery attempt
/// is attempt 0 and is not counted; the background retry sweep increments
/// <see cref="StoreAndForwardMessage.RetryCount"/> on each retry. A buffered
/// message is parked once <c>RetryCount</c> reaches <paramref name="maxRetries"/>
/// — <b>but only when <paramref name="maxRetries"/> is greater than 0</b>. A
/// <paramref name="maxRetries"/> of <c>0</c> means <b>no limit</b>: the message is
/// retried on every sweep until it is delivered and is <b>never parked</b> on a
/// retry-count basis. It is therefore <i>not</i> a "do not retry" value — callers
/// that want delivery abandoned after a bounded number of attempts must pass a
/// positive <paramref name="maxRetries"/>.
///
/// WP-15: CachedCall idempotency note — this method does not deduplicate.
/// The caller (e.g., ExternalSystem.CachedCall()) is responsible for ensuring
/// that the remote system can handle duplicate deliveries safely.
/// </summary>
/// <param name="category">Message category (selects the delivery handler).</param>
/// <param name="target">Target system name (external system / notification list / DB connection).</param>
/// <param name="payloadJson">JSON-serialized call payload, treated opaquely.</param>
/// <param name="originInstanceName">Instance that originated the message (WP-13: survives instance deletion).</param>
/// <param name="maxRetries">
/// Maximum background retry-sweep attempts before the message is parked.
/// <b><c>0</c> = no limit</b> — the message is retried on every sweep until
/// delivered and is never parked for exhausting retries; it is <b>not</b> a
/// "never retry" value. <c>null</c> uses <see cref="StoreAndForwardOptions.DefaultMaxRetries"/>.
/// Must be positive to bound delivery attempts. Mirrors the
/// <see cref="StoreAndForwardMessage.MaxRetries"/> contract.
/// </param>
/// <param name="retryInterval">Fixed interval between retry sweeps for this message; <c>null</c> uses the configured default.</param>
/// <param name="attemptImmediateDelivery">
/// When <c>false</c>, the caller has already made its own delivery attempt and the
/// message is buffered directly for the retry sweep (the handler is not invoked here).
/// </param>
public async Task<StoreAndForwardResult> EnqueueAsync(
StoreAndForwardCategory category,
string target,
@@ -335,13 +364,27 @@ public class StoreAndForwardService
/// <summary>
/// WP-12: Retries a parked message (moves back to pending queue).
///
/// StoreAndForward-016: an operator requeue is a buffer state change and is
/// replicated to the standby (as a <see cref="ReplicationOperationType.Requeue"/>)
/// so a failover preserves the operator's retry intent.
/// StoreAndForward-017: the activity-log entry carries the message's true
/// category rather than a hard-coded one.
/// </summary>
public async Task<bool> RetryParkedMessageAsync(string messageId)
{
var success = await _storage.RetryParkedMessageAsync(messageId);
if (success)
{
RaiseActivity("Retry", StoreAndForwardCategory.ExternalSystem,
// Re-load the requeued row so the activity log gets the real category
// and the standby gets the post-requeue state (Pending, retry_count = 0).
var message = await _storage.GetMessageByIdAsync(messageId);
var category = message?.Category ?? StoreAndForwardCategory.ExternalSystem;
if (message != null)
{
_replication?.ReplicateRequeue(message);
}
RaiseActivity("Retry", category,
$"Parked message {messageId} moved back to queue");
}
return success;
@@ -349,13 +392,23 @@ public class StoreAndForwardService
/// <summary>
/// WP-12: Permanently discards a parked message.
///
/// StoreAndForward-016: an operator discard is a buffer removal and is replicated
/// to the standby (as a <see cref="ReplicationOperationType.Remove"/>) so the
/// discarded message does not reappear after a failover.
/// StoreAndForward-017: the activity-log entry carries the message's true
/// category rather than a hard-coded one.
/// </summary>
public async Task<bool> DiscardParkedMessageAsync(string messageId)
{
// Capture the category before the row is deleted so the activity log is
// labelled correctly.
var message = await _storage.GetMessageByIdAsync(messageId);
var success = await _storage.DiscardParkedMessageAsync(messageId);
if (success)
{
RaiseActivity("Discard", StoreAndForwardCategory.ExternalSystem,
_replication?.ReplicateRemove(messageId);
RaiseActivity("Discard", message?.Category ?? StoreAndForwardCategory.ExternalSystem,
$"Parked message {messageId} discarded");
}
return success;