Phase 3C: Deployment pipeline & Store-and-Forward engine

Deployment Manager (WP-1–8, WP-16):
- DeploymentService: full pipeline (flatten→validate→send→track→audit)
- OperationLockManager: per-instance concurrency control
- StateTransitionValidator: Enabled/Disabled/NotDeployed transition matrix
- ArtifactDeploymentService: broadcast to all sites with per-site results
- Deployment identity (GUID + revision hash), idempotency, staleness detection
- Instance lifecycle commands (disable/enable/delete) with deduplication

Store-and-Forward (WP-9–15):
- StoreAndForwardStorage: SQLite persistence, 3 categories, no max buffer
- StoreAndForwardService: fixed-interval retry, transient-only buffering, parking
- ReplicationService: async best-effort to standby (fire-and-forget)
- Parked message management (query/retry/discard from central)
- Messages survive instance deletion, S&F drains on disable

620 tests pass (+79 new), zero warnings.
This commit is contained in:
Joseph Doherty
2026-03-16 21:27:18 -04:00
parent b75bf52fb4
commit 6ea38faa6f
40 changed files with 3289 additions and 29 deletions

View File

@@ -0,0 +1,178 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ScadaLink.Commons.Entities.Deployment;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.Commons.Messages.Artifacts;
using ScadaLink.Commons.Types;
using ScadaLink.Communication;
namespace ScadaLink.DeploymentManager;
/// <summary>
/// WP-7: System-wide artifact deployment.
/// Broadcasts artifacts (shared scripts, external systems, notification lists, DB connections)
/// to all sites with per-site tracking.
///
/// - Successful sites are NOT rolled back on other failures.
/// - Failed sites are retryable individually.
/// - 120s timeout per site.
/// - Cross-site version skew is supported.
/// </summary>
public class ArtifactDeploymentService
{
private readonly ISiteRepository _siteRepo;
private readonly IDeploymentManagerRepository _deploymentRepo;
private readonly CommunicationService _communicationService;
private readonly IAuditService _auditService;
private readonly DeploymentManagerOptions _options;
private readonly ILogger<ArtifactDeploymentService> _logger;
public ArtifactDeploymentService(
ISiteRepository siteRepo,
IDeploymentManagerRepository deploymentRepo,
CommunicationService communicationService,
IAuditService auditService,
IOptions<DeploymentManagerOptions> options,
ILogger<ArtifactDeploymentService> logger)
{
_siteRepo = siteRepo;
_deploymentRepo = deploymentRepo;
_communicationService = communicationService;
_auditService = auditService;
_options = options.Value;
_logger = logger;
}
/// <summary>
/// Deploys artifacts to all sites. Returns per-site result matrix.
/// </summary>
public async Task<Result<ArtifactDeploymentSummary>> DeployToAllSitesAsync(
DeployArtifactsCommand command,
string user,
CancellationToken cancellationToken = default)
{
var sites = await _siteRepo.GetAllSitesAsync(cancellationToken);
if (sites.Count == 0)
return Result<ArtifactDeploymentSummary>.Failure("No sites configured.");
var perSiteResults = new Dictionary<string, SiteArtifactResult>();
// Deploy to each site with per-site timeout
var tasks = sites.Select(async site =>
{
try
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
cts.CancelAfter(_options.ArtifactDeploymentTimeoutPerSite);
_logger.LogInformation(
"Deploying artifacts to site {SiteId} ({SiteName}), deploymentId={DeploymentId}",
site.SiteIdentifier, site.Name, command.DeploymentId);
var response = await _communicationService.DeployArtifactsAsync(
site.SiteIdentifier, command, cts.Token);
return new SiteArtifactResult(
site.SiteIdentifier, site.Name, response.Success, response.ErrorMessage);
}
catch (Exception ex) when (ex is TimeoutException or OperationCanceledException or TaskCanceledException)
{
_logger.LogWarning(
"Artifact deployment to site {SiteId} timed out: {Error}",
site.SiteIdentifier, ex.Message);
return new SiteArtifactResult(
site.SiteIdentifier, site.Name, false, $"Timeout: {ex.Message}");
}
catch (Exception ex)
{
_logger.LogError(ex,
"Artifact deployment to site {SiteId} failed",
site.SiteIdentifier);
return new SiteArtifactResult(
site.SiteIdentifier, site.Name, false, ex.Message);
}
}).ToList();
var results = await Task.WhenAll(tasks);
foreach (var result in results)
{
perSiteResults[result.SiteId] = result;
}
// Persist the system artifact deployment record
var record = new SystemArtifactDeploymentRecord("Artifacts", user)
{
DeployedAt = DateTimeOffset.UtcNow,
PerSiteStatus = JsonSerializer.Serialize(perSiteResults)
};
await _deploymentRepo.AddSystemArtifactDeploymentAsync(record, cancellationToken);
await _deploymentRepo.SaveChangesAsync(cancellationToken);
var summary = new ArtifactDeploymentSummary(
command.DeploymentId,
results.ToList(),
results.Count(r => r.Success),
results.Count(r => !r.Success));
await _auditService.LogAsync(user, "DeployArtifacts", "SystemArtifact",
command.DeploymentId, "Artifacts",
new { summary.SuccessCount, summary.FailureCount },
cancellationToken);
return Result<ArtifactDeploymentSummary>.Success(summary);
}
/// <summary>
/// WP-7: Retry artifact deployment to a specific site that previously failed.
/// </summary>
public async Task<Result<SiteArtifactResult>> RetryForSiteAsync(
string siteId,
DeployArtifactsCommand command,
string user,
CancellationToken cancellationToken = default)
{
try
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
cts.CancelAfter(_options.ArtifactDeploymentTimeoutPerSite);
var response = await _communicationService.DeployArtifactsAsync(siteId, command, cts.Token);
var result = new SiteArtifactResult(siteId, siteId, response.Success, response.ErrorMessage);
await _auditService.LogAsync(user, "RetryArtifactDeployment", "SystemArtifact",
command.DeploymentId, siteId, new { response.Success }, cancellationToken);
return response.Success
? Result<SiteArtifactResult>.Success(result)
: Result<SiteArtifactResult>.Failure(response.ErrorMessage ?? "Retry failed.");
}
catch (Exception ex)
{
return Result<SiteArtifactResult>.Failure($"Retry failed for site {siteId}: {ex.Message}");
}
}
}
/// <summary>
/// WP-7: Per-site result for artifact deployment.
/// </summary>
public record SiteArtifactResult(
string SiteId,
string SiteName,
bool Success,
string? ErrorMessage);
/// <summary>
/// WP-7: Summary of system-wide artifact deployment with per-site results.
/// </summary>
public record ArtifactDeploymentSummary(
string DeploymentId,
IReadOnlyList<SiteArtifactResult> SiteResults,
int SuccessCount,
int FailureCount);