fix(store-and-forward): resolve S&F delivery + replication wiring (3 Critical findings)

Resolves StoreAndForward-001, ExternalSystemGateway-001, NotificationService-001
— one systemic gap where buffered messages were persisted but never delivered,
and the active node never replicated its buffer to the standby.

Delivery handlers (ExternalSystemGateway-001 / NotificationService-001):
- AkkaHostedService registers delivery handlers for the ExternalSystem,
  CachedDbWrite and Notification categories after StoreAndForwardService starts;
  each resolves its scoped consumer in a fresh DI scope.
- ExternalSystemClient, DatabaseGateway and NotificationDeliveryService each
  gain a DeliverBufferedAsync method: re-resolve the target and re-attempt
  delivery, returning true/false/throwing per the transient-vs-permanent contract.
- EnqueueAsync gains an attemptImmediateDelivery flag; CachedCallAsync and
  NotificationDeliveryService.SendAsync pass false (they already attempted
  delivery themselves) so registering a handler does not dispatch twice.

Replication (StoreAndForward-001):
- ReplicationService is injected into StoreAndForwardService; a new BufferAsync
  helper replicates every enqueue, and successful-retry removes and parks are
  replicated too. Fire-and-forget, no-op when replication is disabled.

Tests: StoreAndForwardReplicationTests (Add/Remove/Park observed),
attemptImmediateDelivery behaviour, and DeliverBufferedAsync paths for each
consumer. Full solution builds; StoreAndForward/ExternalSystemGateway/
NotificationService suites green.
This commit is contained in:
Joseph Doherty
2026-05-16 18:58:11 -04:00
parent a9bd7ee37c
commit 61253e3269
15 changed files with 538 additions and 37 deletions

View File

@@ -87,6 +87,64 @@ public class DatabaseGateway : IDatabaseGateway
definition.RetryDelay > TimeSpan.Zero ? definition.RetryDelay : null);
}
/// <summary>
/// WP-9/10: Delivers a buffered CachedDbWrite during a store-and-forward retry
/// sweep — executes the SQL against the named connection. Returns true on
/// success, false if the connection no longer exists (the message is parked);
/// throws on any execution error so the engine retries.
/// </summary>
public async Task<bool> DeliverBufferedAsync(
StoreAndForwardMessage message, CancellationToken cancellationToken = default)
{
var payload = JsonSerializer.Deserialize<CachedWritePayload>(message.PayloadJson);
if (payload == null || string.IsNullOrEmpty(payload.ConnectionName) || string.IsNullOrEmpty(payload.Sql))
{
_logger.LogError("Buffered CachedDbWrite message {Id} has an unreadable payload; parking.", message.Id);
return false;
}
var definition = await ResolveConnectionAsync(payload.ConnectionName, cancellationToken);
if (definition == null)
{
_logger.LogError(
"Buffered DB write to '{Connection}' cannot be delivered — the connection no longer exists; parking.",
payload.ConnectionName);
return false;
}
await using var connection = new SqlConnection(definition.ConnectionString);
await connection.OpenAsync(cancellationToken);
using var command = connection.CreateCommand();
command.CommandText = payload.Sql;
if (payload.Parameters != null)
{
foreach (var (key, value) in payload.Parameters)
{
var parameter = command.CreateParameter();
parameter.ParameterName = key.StartsWith('@') ? key : "@" + key;
parameter.Value = JsonElementToParameterValue(value);
command.Parameters.Add(parameter);
}
}
await command.ExecuteNonQueryAsync(cancellationToken);
return true;
}
private static object JsonElementToParameterValue(JsonElement element) => element.ValueKind switch
{
JsonValueKind.String => (object?)element.GetString() ?? DBNull.Value,
JsonValueKind.Number => element.TryGetInt64(out var l) ? l : element.GetDouble(),
JsonValueKind.True => true,
JsonValueKind.False => false,
JsonValueKind.Null or JsonValueKind.Undefined => DBNull.Value,
_ => element.GetRawText()
};
private sealed record CachedWritePayload(
string ConnectionName,
string Sql,
Dictionary<string, JsonElement>? Parameters);
private async Task<DatabaseConnectionDefinition?> ResolveConnectionAsync(
string connectionName,
CancellationToken cancellationToken)

View File

@@ -106,18 +106,67 @@ public class ExternalSystemClient : IExternalSystemClient
Parameters = parameters
});
var sfResult = await _storeAndForward.EnqueueAsync(
// attemptImmediateDelivery: false — this method already made the HTTP
// attempt above; letting EnqueueAsync re-invoke the handler would
// dispatch the same request a second time.
await _storeAndForward.EnqueueAsync(
StoreAndForwardCategory.ExternalSystem,
systemName,
payload,
originInstanceName,
system.MaxRetries > 0 ? system.MaxRetries : null,
system.RetryDelay > TimeSpan.Zero ? system.RetryDelay : null);
system.RetryDelay > TimeSpan.Zero ? system.RetryDelay : null,
attemptImmediateDelivery: false);
return new ExternalCallResult(true, null, null, WasBuffered: true);
}
}
/// <summary>
/// WP-7/10: Delivers a buffered ExternalSystem call during a store-and-forward
/// retry sweep. Returns true on success, false on permanent failure (the message
/// is parked); throws <see cref="TransientExternalSystemException"/> on a
/// transient failure so the engine retries.
/// </summary>
public async Task<bool> DeliverBufferedAsync(
StoreAndForwardMessage message, CancellationToken cancellationToken = default)
{
var payload = JsonSerializer.Deserialize<CachedCallPayload>(message.PayloadJson);
if (payload == null || string.IsNullOrEmpty(payload.SystemName) || string.IsNullOrEmpty(payload.MethodName))
{
_logger.LogError("Buffered ExternalSystem message {Id} has an unreadable payload; parking.", message.Id);
return false;
}
var (system, method) = await ResolveSystemAndMethodAsync(
payload.SystemName, payload.MethodName, cancellationToken);
if (system == null || method == null)
{
_logger.LogError(
"Buffered call to '{System}'/'{Method}' cannot be delivered — the system or method no longer exists; parking.",
payload.SystemName, payload.MethodName);
return false;
}
var parameters = payload.Parameters?.ToDictionary(kv => kv.Key, kv => (object?)kv.Value);
try
{
await InvokeHttpAsync(system, method, parameters, cancellationToken);
return true;
}
catch (PermanentExternalSystemException ex)
{
_logger.LogError(ex, "Buffered call to '{System}' failed permanently; parking.", payload.SystemName);
return false;
}
// TransientExternalSystemException propagates — the S&F engine retries.
}
private sealed record CachedCallPayload(
string SystemName,
string MethodName,
Dictionary<string, JsonElement>? Parameters);
/// <summary>
/// WP-6: Executes the HTTP request against the external system.
/// </summary>