fix(store-and-forward): resolve S&F delivery + replication wiring (3 Critical findings)
Resolves StoreAndForward-001, ExternalSystemGateway-001, NotificationService-001 — one systemic gap where buffered messages were persisted but never delivered, and the active node never replicated its buffer to the standby. Delivery handlers (ExternalSystemGateway-001 / NotificationService-001): - AkkaHostedService registers delivery handlers for the ExternalSystem, CachedDbWrite and Notification categories after StoreAndForwardService starts; each resolves its scoped consumer in a fresh DI scope. - ExternalSystemClient, DatabaseGateway and NotificationDeliveryService each gain a DeliverBufferedAsync method: re-resolve the target and re-attempt delivery, returning true/false/throwing per the transient-vs-permanent contract. - EnqueueAsync gains an attemptImmediateDelivery flag; CachedCallAsync and NotificationDeliveryService.SendAsync pass false (they already attempted delivery themselves) so registering a handler does not dispatch twice. Replication (StoreAndForward-001): - ReplicationService is injected into StoreAndForwardService; a new BufferAsync helper replicates every enqueue, and successful-retry removes and parks are replicated too. Fire-and-forget, no-op when replication is disabled. Tests: StoreAndForwardReplicationTests (Add/Remove/Park observed), attemptImmediateDelivery behaviour, and DeliverBufferedAsync paths for each consumer. Full solution builds; StoreAndForward/ExternalSystemGateway/ NotificationService suites green.
This commit is contained in:
@@ -22,7 +22,8 @@ public static class ServiceCollectionExtensions
|
||||
var storage = sp.GetRequiredService<StoreAndForwardStorage>();
|
||||
var options = sp.GetRequiredService<IOptions<StoreAndForwardOptions>>().Value;
|
||||
var logger = sp.GetRequiredService<ILogger<StoreAndForwardService>>();
|
||||
return new StoreAndForwardService(storage, options, logger);
|
||||
var replication = sp.GetRequiredService<ReplicationService>();
|
||||
return new StoreAndForwardService(storage, options, logger, replication);
|
||||
});
|
||||
|
||||
services.AddSingleton<ReplicationService>(sp =>
|
||||
|
||||
@@ -30,6 +30,7 @@ public class StoreAndForwardService
|
||||
{
|
||||
private readonly StoreAndForwardStorage _storage;
|
||||
private readonly StoreAndForwardOptions _options;
|
||||
private readonly ReplicationService? _replication;
|
||||
private readonly ILogger<StoreAndForwardService> _logger;
|
||||
private Timer? _retryTimer;
|
||||
private int _retryInProgress;
|
||||
@@ -48,11 +49,13 @@ public class StoreAndForwardService
|
||||
public StoreAndForwardService(
|
||||
StoreAndForwardStorage storage,
|
||||
StoreAndForwardOptions options,
|
||||
ILogger<StoreAndForwardService> logger)
|
||||
ILogger<StoreAndForwardService> logger,
|
||||
ReplicationService? replication = null)
|
||||
{
|
||||
_storage = storage;
|
||||
_options = options;
|
||||
_logger = logger;
|
||||
_replication = replication;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -109,7 +112,8 @@ public class StoreAndForwardService
|
||||
string payloadJson,
|
||||
string? originInstanceName = null,
|
||||
int? maxRetries = null,
|
||||
TimeSpan? retryInterval = null)
|
||||
TimeSpan? retryInterval = null,
|
||||
bool attemptImmediateDelivery = true)
|
||||
{
|
||||
var message = new StoreAndForwardMessage
|
||||
{
|
||||
@@ -125,8 +129,10 @@ public class StoreAndForwardService
|
||||
OriginInstanceName = originInstanceName
|
||||
};
|
||||
|
||||
// Attempt immediate delivery
|
||||
if (_deliveryHandlers.TryGetValue(category, out var handler))
|
||||
// Attempt immediate delivery — unless the caller has already made a
|
||||
// delivery attempt of its own (attemptImmediateDelivery: false). In that
|
||||
// case re-invoking the handler here would dispatch the request twice.
|
||||
if (attemptImmediateDelivery && _deliveryHandlers.TryGetValue(category, out var handler))
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -136,11 +142,9 @@ public class StoreAndForwardService
|
||||
RaiseActivity("Delivered", category, $"Immediate delivery to {target}");
|
||||
return new StoreAndForwardResult(true, message.Id, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Permanent failure — do not buffer
|
||||
return new StoreAndForwardResult(false, message.Id, false);
|
||||
}
|
||||
|
||||
// Permanent failure — do not buffer
|
||||
return new StoreAndForwardResult(false, message.Id, false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
@@ -152,19 +156,39 @@ public class StoreAndForwardService
|
||||
message.LastAttemptAt = DateTimeOffset.UtcNow;
|
||||
message.RetryCount = 1;
|
||||
message.LastError = ex.Message;
|
||||
await _storage.EnqueueAsync(message);
|
||||
await BufferAsync(message);
|
||||
|
||||
RaiseActivity("Queued", category, $"Buffered for retry: {target} ({ex.Message})");
|
||||
return new StoreAndForwardResult(true, message.Id, true);
|
||||
}
|
||||
}
|
||||
|
||||
// No handler registered — buffer for later
|
||||
await _storage.EnqueueAsync(message);
|
||||
RaiseActivity("Queued", category, $"No handler registered, buffered: {target}");
|
||||
// Either no handler is registered yet, or the caller already attempted
|
||||
// delivery itself — buffer for the background retry sweep to deliver.
|
||||
if (!attemptImmediateDelivery)
|
||||
{
|
||||
// The caller made (and failed) one attempt before handing the
|
||||
// message over, so it counts as the first retry.
|
||||
message.RetryCount = 1;
|
||||
message.LastAttemptAt = DateTimeOffset.UtcNow;
|
||||
}
|
||||
await BufferAsync(message);
|
||||
RaiseActivity("Queued", category, attemptImmediateDelivery
|
||||
? $"No handler registered, buffered: {target}"
|
||||
: $"Buffered for retry: {target}");
|
||||
return new StoreAndForwardResult(true, message.Id, true);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Persists a message to the local SQLite buffer and (WP-11) replicates the
|
||||
/// add to the standby node so a failover does not lose the buffered message.
|
||||
/// </summary>
|
||||
private async Task BufferAsync(StoreAndForwardMessage message)
|
||||
{
|
||||
await _storage.EnqueueAsync(message);
|
||||
_replication?.ReplicateEnqueue(message);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-10: Background retry sweep. Processes all pending messages that are due for retry.
|
||||
/// </summary>
|
||||
@@ -210,6 +234,7 @@ public class StoreAndForwardService
|
||||
if (success)
|
||||
{
|
||||
await _storage.RemoveMessageAsync(message.Id);
|
||||
_replication?.ReplicateRemove(message.Id);
|
||||
RaiseActivity("Delivered", message.Category,
|
||||
$"Delivered to {message.Target} after {message.RetryCount} retries");
|
||||
return;
|
||||
@@ -220,6 +245,7 @@ public class StoreAndForwardService
|
||||
message.LastAttemptAt = DateTimeOffset.UtcNow;
|
||||
message.LastError = "Permanent failure (handler returned false)";
|
||||
await _storage.UpdateMessageAsync(message);
|
||||
_replication?.ReplicatePark(message);
|
||||
RaiseActivity("Parked", message.Category,
|
||||
$"Permanent failure for {message.Target}: handler returned false");
|
||||
}
|
||||
@@ -234,6 +260,7 @@ public class StoreAndForwardService
|
||||
{
|
||||
message.Status = StoreAndForwardMessageStatus.Parked;
|
||||
await _storage.UpdateMessageAsync(message);
|
||||
_replication?.ReplicatePark(message);
|
||||
RaiseActivity("Parked", message.Category,
|
||||
$"Max retries ({message.MaxRetries}) reached for {message.Target}");
|
||||
_logger.LogWarning(
|
||||
|
||||
Reference in New Issue
Block a user