Phase 3C: Deployment pipeline & Store-and-Forward engine
Deployment Manager (WP-1–8, WP-16): - DeploymentService: full pipeline (flatten→validate→send→track→audit) - OperationLockManager: per-instance concurrency control - StateTransitionValidator: Enabled/Disabled/NotDeployed transition matrix - ArtifactDeploymentService: broadcast to all sites with per-site results - Deployment identity (GUID + revision hash), idempotency, staleness detection - Instance lifecycle commands (disable/enable/delete) with deduplication Store-and-Forward (WP-9–15): - StoreAndForwardStorage: SQLite persistence, 3 categories, no max buffer - StoreAndForwardService: fixed-interval retry, transient-only buffering, parking - ReplicationService: async best-effort to standby (fire-and-forget) - Parked message management (query/retry/discard from central) - Messages survive instance deletion, S&F drains on disable 620 tests pass (+79 new), zero warnings.
This commit is contained in:
322
src/ScadaLink.StoreAndForward/StoreAndForwardService.cs
Normal file
322
src/ScadaLink.StoreAndForward/StoreAndForwardService.cs
Normal file
@@ -0,0 +1,322 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.StoreAndForward;
|
||||
|
||||
/// <summary>
|
||||
/// WP-9/10: Core store-and-forward service.
|
||||
///
|
||||
/// Lifecycle:
|
||||
/// 1. Caller attempts immediate delivery via IDeliveryHandler
|
||||
/// 2. On transient failure → buffer in SQLite → retry loop
|
||||
/// 3. On success → remove from buffer
|
||||
/// 4. On max retries → park
|
||||
/// 5. Permanent failures are returned to caller immediately (never buffered)
|
||||
///
|
||||
/// WP-10: Fixed retry interval (not exponential). Per-source-entity retry settings.
|
||||
/// Background timer-based retry sweep.
|
||||
///
|
||||
/// WP-12: Parked messages queryable, retryable, and discardable.
|
||||
///
|
||||
/// WP-14: Buffer depth reported as health metric. Activity logged to site event log.
|
||||
///
|
||||
/// WP-15: CachedCall idempotency is the caller's responsibility.
|
||||
/// This service does not deduplicate — if the same message is enqueued twice,
|
||||
/// it will be delivered twice. Callers using ExternalSystem.CachedCall() must
|
||||
/// design their payloads to be idempotent (e.g., include unique request IDs
|
||||
/// and handle duplicate detection on the remote end).
|
||||
/// </summary>
|
||||
public class StoreAndForwardService
|
||||
{
|
||||
private readonly StoreAndForwardStorage _storage;
|
||||
private readonly StoreAndForwardOptions _options;
|
||||
private readonly ILogger<StoreAndForwardService> _logger;
|
||||
private Timer? _retryTimer;
|
||||
private int _retryInProgress;
|
||||
|
||||
/// <summary>
|
||||
/// WP-10: Delivery handler delegate. Returns true on success, throws on transient failure.
|
||||
/// Permanent failures should return false (message will NOT be buffered).
|
||||
/// </summary>
|
||||
private readonly Dictionary<StoreAndForwardCategory, Func<StoreAndForwardMessage, Task<bool>>> _deliveryHandlers = new();
|
||||
|
||||
/// <summary>
|
||||
/// WP-14: Event callback for logging S&F activity to site event log.
|
||||
/// </summary>
|
||||
public event Action<string, StoreAndForwardCategory, string>? OnActivity;
|
||||
|
||||
public StoreAndForwardService(
|
||||
StoreAndForwardStorage storage,
|
||||
StoreAndForwardOptions options,
|
||||
ILogger<StoreAndForwardService> logger)
|
||||
{
|
||||
_storage = storage;
|
||||
_options = options;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers a delivery handler for a given message category.
|
||||
/// </summary>
|
||||
public void RegisterDeliveryHandler(
|
||||
StoreAndForwardCategory category,
|
||||
Func<StoreAndForwardMessage, Task<bool>> handler)
|
||||
{
|
||||
_deliveryHandlers[category] = handler;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes storage and starts the background retry timer.
|
||||
/// </summary>
|
||||
public async Task StartAsync()
|
||||
{
|
||||
await _storage.InitializeAsync();
|
||||
_retryTimer = new Timer(
|
||||
_ => _ = RetryPendingMessagesAsync(),
|
||||
null,
|
||||
_options.RetryTimerInterval,
|
||||
_options.RetryTimerInterval);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Store-and-forward service started. Retry interval: {Interval}s",
|
||||
_options.DefaultRetryInterval.TotalSeconds);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops the background retry timer.
|
||||
/// </summary>
|
||||
public async Task StopAsync()
|
||||
{
|
||||
if (_retryTimer != null)
|
||||
{
|
||||
await _retryTimer.DisposeAsync();
|
||||
_retryTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-10: Enqueues a message for store-and-forward delivery.
|
||||
/// Attempts immediate delivery first. On transient failure, buffers for retry.
|
||||
/// On permanent failure (handler returns false), returns false immediately.
|
||||
///
|
||||
/// WP-15: CachedCall idempotency note — this method does not deduplicate.
|
||||
/// The caller (e.g., ExternalSystem.CachedCall()) is responsible for ensuring
|
||||
/// that the remote system can handle duplicate deliveries safely.
|
||||
/// </summary>
|
||||
public async Task<StoreAndForwardResult> EnqueueAsync(
|
||||
StoreAndForwardCategory category,
|
||||
string target,
|
||||
string payloadJson,
|
||||
string? originInstanceName = null,
|
||||
int? maxRetries = null,
|
||||
TimeSpan? retryInterval = null)
|
||||
{
|
||||
var message = new StoreAndForwardMessage
|
||||
{
|
||||
Id = Guid.NewGuid().ToString("N"),
|
||||
Category = category,
|
||||
Target = target,
|
||||
PayloadJson = payloadJson,
|
||||
RetryCount = 0,
|
||||
MaxRetries = maxRetries ?? _options.DefaultMaxRetries,
|
||||
RetryIntervalMs = (long)(retryInterval ?? _options.DefaultRetryInterval).TotalMilliseconds,
|
||||
CreatedAt = DateTimeOffset.UtcNow,
|
||||
Status = StoreAndForwardMessageStatus.Pending,
|
||||
OriginInstanceName = originInstanceName
|
||||
};
|
||||
|
||||
// Attempt immediate delivery
|
||||
if (_deliveryHandlers.TryGetValue(category, out var handler))
|
||||
{
|
||||
try
|
||||
{
|
||||
var success = await handler(message);
|
||||
if (success)
|
||||
{
|
||||
RaiseActivity("Delivered", category, $"Immediate delivery to {target}");
|
||||
return new StoreAndForwardResult(true, message.Id, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Permanent failure — do not buffer
|
||||
return new StoreAndForwardResult(false, message.Id, false);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Transient failure — buffer for retry
|
||||
_logger.LogWarning(ex,
|
||||
"Immediate delivery to {Target} failed (transient), buffering for retry",
|
||||
target);
|
||||
|
||||
message.LastAttemptAt = DateTimeOffset.UtcNow;
|
||||
message.RetryCount = 1;
|
||||
message.LastError = ex.Message;
|
||||
await _storage.EnqueueAsync(message);
|
||||
|
||||
RaiseActivity("Queued", category, $"Buffered for retry: {target} ({ex.Message})");
|
||||
return new StoreAndForwardResult(true, message.Id, true);
|
||||
}
|
||||
}
|
||||
|
||||
// No handler registered — buffer for later
|
||||
await _storage.EnqueueAsync(message);
|
||||
RaiseActivity("Queued", category, $"No handler registered, buffered: {target}");
|
||||
return new StoreAndForwardResult(true, message.Id, true);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-10: Background retry sweep. Processes all pending messages that are due for retry.
|
||||
/// </summary>
|
||||
internal async Task RetryPendingMessagesAsync()
|
||||
{
|
||||
// Prevent overlapping retry sweeps
|
||||
if (Interlocked.CompareExchange(ref _retryInProgress, 1, 0) != 0)
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
var messages = await _storage.GetMessagesForRetryAsync();
|
||||
if (messages.Count == 0) return;
|
||||
|
||||
_logger.LogDebug("Retry sweep: {Count} messages due for retry", messages.Count);
|
||||
|
||||
foreach (var message in messages)
|
||||
{
|
||||
await RetryMessageAsync(message);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error during retry sweep");
|
||||
}
|
||||
finally
|
||||
{
|
||||
Interlocked.Exchange(ref _retryInProgress, 0);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task RetryMessageAsync(StoreAndForwardMessage message)
|
||||
{
|
||||
if (!_deliveryHandlers.TryGetValue(message.Category, out var handler))
|
||||
{
|
||||
_logger.LogWarning("No delivery handler for category {Category}", message.Category);
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var success = await handler(message);
|
||||
if (success)
|
||||
{
|
||||
await _storage.RemoveMessageAsync(message.Id);
|
||||
RaiseActivity("Delivered", message.Category,
|
||||
$"Delivered to {message.Target} after {message.RetryCount} retries");
|
||||
return;
|
||||
}
|
||||
|
||||
// Permanent failure on retry — park immediately
|
||||
message.Status = StoreAndForwardMessageStatus.Parked;
|
||||
message.LastAttemptAt = DateTimeOffset.UtcNow;
|
||||
message.LastError = "Permanent failure (handler returned false)";
|
||||
await _storage.UpdateMessageAsync(message);
|
||||
RaiseActivity("Parked", message.Category,
|
||||
$"Permanent failure for {message.Target}: handler returned false");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Transient failure — increment retry, check max
|
||||
message.RetryCount++;
|
||||
message.LastAttemptAt = DateTimeOffset.UtcNow;
|
||||
message.LastError = ex.Message;
|
||||
|
||||
if (message.MaxRetries > 0 && message.RetryCount >= message.MaxRetries)
|
||||
{
|
||||
message.Status = StoreAndForwardMessageStatus.Parked;
|
||||
await _storage.UpdateMessageAsync(message);
|
||||
RaiseActivity("Parked", message.Category,
|
||||
$"Max retries ({message.MaxRetries}) reached for {message.Target}");
|
||||
_logger.LogWarning(
|
||||
"Message {MessageId} parked after {MaxRetries} retries to {Target}",
|
||||
message.Id, message.MaxRetries, message.Target);
|
||||
}
|
||||
else
|
||||
{
|
||||
await _storage.UpdateMessageAsync(message);
|
||||
RaiseActivity("Retried", message.Category,
|
||||
$"Retry {message.RetryCount}/{message.MaxRetries} for {message.Target}: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-12: Gets parked messages for central query (Pattern 8).
|
||||
/// </summary>
|
||||
public async Task<(List<StoreAndForwardMessage> Messages, int TotalCount)> GetParkedMessagesAsync(
|
||||
StoreAndForwardCategory? category = null,
|
||||
int pageNumber = 1,
|
||||
int pageSize = 50)
|
||||
{
|
||||
return await _storage.GetParkedMessagesAsync(category, pageNumber, pageSize);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-12: Retries a parked message (moves back to pending queue).
|
||||
/// </summary>
|
||||
public async Task<bool> RetryParkedMessageAsync(string messageId)
|
||||
{
|
||||
var success = await _storage.RetryParkedMessageAsync(messageId);
|
||||
if (success)
|
||||
{
|
||||
RaiseActivity("Retry", StoreAndForwardCategory.ExternalSystem,
|
||||
$"Parked message {messageId} moved back to queue");
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-12: Permanently discards a parked message.
|
||||
/// </summary>
|
||||
public async Task<bool> DiscardParkedMessageAsync(string messageId)
|
||||
{
|
||||
var success = await _storage.DiscardParkedMessageAsync(messageId);
|
||||
if (success)
|
||||
{
|
||||
RaiseActivity("Discard", StoreAndForwardCategory.ExternalSystem,
|
||||
$"Parked message {messageId} discarded");
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-14: Gets buffer depth by category for health reporting.
|
||||
/// </summary>
|
||||
public async Task<Dictionary<StoreAndForwardCategory, int>> GetBufferDepthAsync()
|
||||
{
|
||||
return await _storage.GetBufferDepthByCategoryAsync();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-13: Gets count of S&F messages for a given instance (for verifying survival on deletion).
|
||||
/// </summary>
|
||||
public async Task<int> GetMessageCountForInstanceAsync(string instanceName)
|
||||
{
|
||||
return await _storage.GetMessageCountByOriginInstanceAsync(instanceName);
|
||||
}
|
||||
|
||||
private void RaiseActivity(string action, StoreAndForwardCategory category, string detail)
|
||||
{
|
||||
OnActivity?.Invoke(action, category, detail);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of an enqueue operation.
|
||||
/// </summary>
|
||||
public record StoreAndForwardResult(
|
||||
/// <summary>True if the message was accepted (either delivered immediately or buffered).</summary>
|
||||
bool Accepted,
|
||||
/// <summary>Unique message ID for tracking.</summary>
|
||||
string MessageId,
|
||||
/// <summary>True if the message was buffered (not delivered immediately).</summary>
|
||||
bool WasBuffered);
|
||||
Reference in New Issue
Block a user