Phase 3C: Deployment pipeline & Store-and-Forward engine

Deployment Manager (WP-1–8, WP-16):
- DeploymentService: full pipeline (flatten→validate→send→track→audit)
- OperationLockManager: per-instance concurrency control
- StateTransitionValidator: Enabled/Disabled/NotDeployed transition matrix
- ArtifactDeploymentService: broadcast to all sites with per-site results
- Deployment identity (GUID + revision hash), idempotency, staleness detection
- Instance lifecycle commands (disable/enable/delete) with deduplication

Store-and-Forward (WP-9–15):
- StoreAndForwardStorage: SQLite persistence, 3 categories, no max buffer
- StoreAndForwardService: fixed-interval retry, transient-only buffering, parking
- ReplicationService: async best-effort to standby (fire-and-forget)
- Parked message management (query/retry/discard from central)
- Messages survive instance deletion, S&F drains on disable

620 tests pass (+79 new), zero warnings.
This commit is contained in:
Joseph Doherty
2026-03-16 21:27:18 -04:00
parent b75bf52fb4
commit 6ea38faa6f
40 changed files with 3289 additions and 29 deletions

View File

@@ -0,0 +1,27 @@
namespace ScadaLink.Commons.Entities.Deployment;
/// <summary>
/// WP-8: Stores the deployed configuration snapshot for an instance.
/// Captured at deploy time; compared against template-derived (live flattened) config for staleness detection.
/// </summary>
public class DeployedConfigSnapshot
{
public int Id { get; set; }
public int InstanceId { get; set; }
public string DeploymentId { get; set; }
public string RevisionHash { get; set; }
/// <summary>
/// JSON-serialized FlattenedConfiguration captured at deploy time.
/// </summary>
public string ConfigurationJson { get; set; }
public DateTimeOffset DeployedAt { get; set; }
public DeployedConfigSnapshot(string deploymentId, string revisionHash, string configurationJson)
{
DeploymentId = deploymentId ?? throw new ArgumentNullException(nameof(deploymentId));
RevisionHash = revisionHash ?? throw new ArgumentNullException(nameof(revisionHash));
ConfigurationJson = configurationJson ?? throw new ArgumentNullException(nameof(configurationJson));
}
}

View File

@@ -12,6 +12,12 @@ public class DeploymentRecord
public string DeployedBy { get; set; }
public DateTimeOffset DeployedAt { get; set; }
public DateTimeOffset? CompletedAt { get; set; }
public string? ErrorMessage { get; set; }
/// <summary>
/// WP-4: Optimistic concurrency token for deployment status updates.
/// </summary>
public byte[] RowVersion { get; set; } = [];
public DeploymentRecord(string deploymentId, string deployedBy)
{

View File

@@ -1,4 +1,5 @@
using ScadaLink.Commons.Entities.Deployment;
using ScadaLink.Commons.Entities.Instances;
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.Commons.Interfaces.Repositories;
@@ -22,5 +23,16 @@ public interface IDeploymentManagerRepository
Task UpdateSystemArtifactDeploymentAsync(SystemArtifactDeploymentRecord record, CancellationToken cancellationToken = default);
Task DeleteSystemArtifactDeploymentAsync(int id, CancellationToken cancellationToken = default);
// WP-8: DeployedConfigSnapshot
Task<DeployedConfigSnapshot?> GetDeployedSnapshotByInstanceIdAsync(int instanceId, CancellationToken cancellationToken = default);
Task AddDeployedSnapshotAsync(DeployedConfigSnapshot snapshot, CancellationToken cancellationToken = default);
Task UpdateDeployedSnapshotAsync(DeployedConfigSnapshot snapshot, CancellationToken cancellationToken = default);
Task DeleteDeployedSnapshotAsync(int instanceId, CancellationToken cancellationToken = default);
// Instance lookups for deployment pipeline
Task<Instance?> GetInstanceByIdAsync(int instanceId, CancellationToken cancellationToken = default);
Task<Instance?> GetInstanceByUniqueNameAsync(string uniqueName, CancellationToken cancellationToken = default);
Task UpdateInstanceAsync(Instance instance, CancellationToken cancellationToken = default);
Task<int> SaveChangesAsync(CancellationToken cancellationToken = default);
}

View File

@@ -2,6 +2,7 @@ namespace ScadaLink.Commons.Types.Enums;
public enum InstanceState
{
NotDeployed,
Enabled,
Disabled
}

View File

@@ -0,0 +1,11 @@
namespace ScadaLink.Commons.Types.Enums;
/// <summary>
/// WP-9: Categories for store-and-forward messages.
/// </summary>
public enum StoreAndForwardCategory
{
ExternalSystem,
Notification,
CachedDbWrite
}

View File

@@ -0,0 +1,12 @@
namespace ScadaLink.Commons.Types.Enums;
/// <summary>
/// WP-9: Status of a store-and-forward message.
/// </summary>
public enum StoreAndForwardMessageStatus
{
Pending,
InFlight,
Parked,
Delivered
}

View File

@@ -41,6 +41,33 @@ public class DeploymentRecordConfiguration : IEntityTypeConfiguration<Deployment
}
}
public class DeployedConfigSnapshotConfiguration : IEntityTypeConfiguration<DeployedConfigSnapshot>
{
public void Configure(EntityTypeBuilder<DeployedConfigSnapshot> builder)
{
builder.HasKey(s => s.Id);
builder.Property(s => s.DeploymentId)
.IsRequired()
.HasMaxLength(100);
builder.Property(s => s.RevisionHash)
.IsRequired()
.HasMaxLength(100);
builder.Property(s => s.ConfigurationJson)
.IsRequired();
builder.HasOne<Instance>()
.WithMany()
.HasForeignKey(s => s.InstanceId)
.OnDelete(DeleteBehavior.Cascade);
builder.HasIndex(s => s.InstanceId).IsUnique();
builder.HasIndex(s => s.DeploymentId);
}
}
public class SystemArtifactDeploymentRecordConfiguration : IEntityTypeConfiguration<SystemArtifactDeploymentRecord>
{
public void Configure(EntityTypeBuilder<SystemArtifactDeploymentRecord> builder)

View File

@@ -1,5 +1,6 @@
using Microsoft.EntityFrameworkCore;
using ScadaLink.Commons.Entities.Deployment;
using ScadaLink.Commons.Entities.Instances;
using ScadaLink.Commons.Interfaces.Repositories;
namespace ScadaLink.ConfigurationDatabase.Repositories;
@@ -133,6 +134,59 @@ public class DeploymentManagerRepository : IDeploymentManagerRepository
return Task.CompletedTask;
}
// --- WP-8: DeployedConfigSnapshot ---
public async Task<DeployedConfigSnapshot?> GetDeployedSnapshotByInstanceIdAsync(int instanceId, CancellationToken cancellationToken = default)
{
return await _dbContext.Set<DeployedConfigSnapshot>()
.FirstOrDefaultAsync(s => s.InstanceId == instanceId, cancellationToken);
}
public async Task AddDeployedSnapshotAsync(DeployedConfigSnapshot snapshot, CancellationToken cancellationToken = default)
{
await _dbContext.Set<DeployedConfigSnapshot>().AddAsync(snapshot, cancellationToken);
}
public Task UpdateDeployedSnapshotAsync(DeployedConfigSnapshot snapshot, CancellationToken cancellationToken = default)
{
_dbContext.Set<DeployedConfigSnapshot>().Update(snapshot);
return Task.CompletedTask;
}
public async Task DeleteDeployedSnapshotAsync(int instanceId, CancellationToken cancellationToken = default)
{
var snapshot = await _dbContext.Set<DeployedConfigSnapshot>()
.FirstOrDefaultAsync(s => s.InstanceId == instanceId, cancellationToken);
if (snapshot != null)
{
_dbContext.Set<DeployedConfigSnapshot>().Remove(snapshot);
}
}
// --- Instance lookups for deployment pipeline ---
public async Task<Instance?> GetInstanceByIdAsync(int instanceId, CancellationToken cancellationToken = default)
{
return await _dbContext.Set<Instance>()
.Include(i => i.AttributeOverrides)
.Include(i => i.ConnectionBindings)
.FirstOrDefaultAsync(i => i.Id == instanceId, cancellationToken);
}
public async Task<Instance?> GetInstanceByUniqueNameAsync(string uniqueName, CancellationToken cancellationToken = default)
{
return await _dbContext.Set<Instance>()
.Include(i => i.AttributeOverrides)
.Include(i => i.ConnectionBindings)
.FirstOrDefaultAsync(i => i.UniqueName == uniqueName, cancellationToken);
}
public Task UpdateInstanceAsync(Instance instance, CancellationToken cancellationToken = default)
{
_dbContext.Set<Instance>().Update(instance);
return Task.CompletedTask;
}
public async Task<int> SaveChangesAsync(CancellationToken cancellationToken = default)
{
return await _dbContext.SaveChangesAsync(cancellationToken);

View File

@@ -40,6 +40,7 @@ public class ScadaLinkDbContext : DbContext, IDataProtectionKeyContext
// Deployment
public DbSet<DeploymentRecord> DeploymentRecords => Set<DeploymentRecord>();
public DbSet<SystemArtifactDeploymentRecord> SystemArtifactDeploymentRecords => Set<SystemArtifactDeploymentRecord>();
public DbSet<DeployedConfigSnapshot> DeployedConfigSnapshots => Set<DeployedConfigSnapshot>();
// External Systems
public DbSet<ExternalSystemDefinition> ExternalSystemDefinitions => Set<ExternalSystemDefinition>();

View File

@@ -0,0 +1,178 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ScadaLink.Commons.Entities.Deployment;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.Commons.Messages.Artifacts;
using ScadaLink.Commons.Types;
using ScadaLink.Communication;
namespace ScadaLink.DeploymentManager;
/// <summary>
/// WP-7: System-wide artifact deployment.
/// Broadcasts artifacts (shared scripts, external systems, notification lists, DB connections)
/// to all sites with per-site tracking.
///
/// - Successful sites are NOT rolled back on other failures.
/// - Failed sites are retryable individually.
/// - 120s timeout per site.
/// - Cross-site version skew is supported.
/// </summary>
public class ArtifactDeploymentService
{
private readonly ISiteRepository _siteRepo;
private readonly IDeploymentManagerRepository _deploymentRepo;
private readonly CommunicationService _communicationService;
private readonly IAuditService _auditService;
private readonly DeploymentManagerOptions _options;
private readonly ILogger<ArtifactDeploymentService> _logger;
public ArtifactDeploymentService(
ISiteRepository siteRepo,
IDeploymentManagerRepository deploymentRepo,
CommunicationService communicationService,
IAuditService auditService,
IOptions<DeploymentManagerOptions> options,
ILogger<ArtifactDeploymentService> logger)
{
_siteRepo = siteRepo;
_deploymentRepo = deploymentRepo;
_communicationService = communicationService;
_auditService = auditService;
_options = options.Value;
_logger = logger;
}
/// <summary>
/// Deploys artifacts to all sites. Returns per-site result matrix.
/// </summary>
public async Task<Result<ArtifactDeploymentSummary>> DeployToAllSitesAsync(
DeployArtifactsCommand command,
string user,
CancellationToken cancellationToken = default)
{
var sites = await _siteRepo.GetAllSitesAsync(cancellationToken);
if (sites.Count == 0)
return Result<ArtifactDeploymentSummary>.Failure("No sites configured.");
var perSiteResults = new Dictionary<string, SiteArtifactResult>();
// Deploy to each site with per-site timeout
var tasks = sites.Select(async site =>
{
try
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
cts.CancelAfter(_options.ArtifactDeploymentTimeoutPerSite);
_logger.LogInformation(
"Deploying artifacts to site {SiteId} ({SiteName}), deploymentId={DeploymentId}",
site.SiteIdentifier, site.Name, command.DeploymentId);
var response = await _communicationService.DeployArtifactsAsync(
site.SiteIdentifier, command, cts.Token);
return new SiteArtifactResult(
site.SiteIdentifier, site.Name, response.Success, response.ErrorMessage);
}
catch (Exception ex) when (ex is TimeoutException or OperationCanceledException or TaskCanceledException)
{
_logger.LogWarning(
"Artifact deployment to site {SiteId} timed out: {Error}",
site.SiteIdentifier, ex.Message);
return new SiteArtifactResult(
site.SiteIdentifier, site.Name, false, $"Timeout: {ex.Message}");
}
catch (Exception ex)
{
_logger.LogError(ex,
"Artifact deployment to site {SiteId} failed",
site.SiteIdentifier);
return new SiteArtifactResult(
site.SiteIdentifier, site.Name, false, ex.Message);
}
}).ToList();
var results = await Task.WhenAll(tasks);
foreach (var result in results)
{
perSiteResults[result.SiteId] = result;
}
// Persist the system artifact deployment record
var record = new SystemArtifactDeploymentRecord("Artifacts", user)
{
DeployedAt = DateTimeOffset.UtcNow,
PerSiteStatus = JsonSerializer.Serialize(perSiteResults)
};
await _deploymentRepo.AddSystemArtifactDeploymentAsync(record, cancellationToken);
await _deploymentRepo.SaveChangesAsync(cancellationToken);
var summary = new ArtifactDeploymentSummary(
command.DeploymentId,
results.ToList(),
results.Count(r => r.Success),
results.Count(r => !r.Success));
await _auditService.LogAsync(user, "DeployArtifacts", "SystemArtifact",
command.DeploymentId, "Artifacts",
new { summary.SuccessCount, summary.FailureCount },
cancellationToken);
return Result<ArtifactDeploymentSummary>.Success(summary);
}
/// <summary>
/// WP-7: Retry artifact deployment to a specific site that previously failed.
/// </summary>
public async Task<Result<SiteArtifactResult>> RetryForSiteAsync(
string siteId,
DeployArtifactsCommand command,
string user,
CancellationToken cancellationToken = default)
{
try
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
cts.CancelAfter(_options.ArtifactDeploymentTimeoutPerSite);
var response = await _communicationService.DeployArtifactsAsync(siteId, command, cts.Token);
var result = new SiteArtifactResult(siteId, siteId, response.Success, response.ErrorMessage);
await _auditService.LogAsync(user, "RetryArtifactDeployment", "SystemArtifact",
command.DeploymentId, siteId, new { response.Success }, cancellationToken);
return response.Success
? Result<SiteArtifactResult>.Success(result)
: Result<SiteArtifactResult>.Failure(response.ErrorMessage ?? "Retry failed.");
}
catch (Exception ex)
{
return Result<SiteArtifactResult>.Failure($"Retry failed for site {siteId}: {ex.Message}");
}
}
}
/// <summary>
/// WP-7: Per-site result for artifact deployment.
/// </summary>
public record SiteArtifactResult(
string SiteId,
string SiteName,
bool Success,
string? ErrorMessage);
/// <summary>
/// WP-7: Summary of system-wide artifact deployment with per-site results.
/// </summary>
public record ArtifactDeploymentSummary(
string DeploymentId,
IReadOnlyList<SiteArtifactResult> SiteResults,
int SuccessCount,
int FailureCount);

View File

@@ -0,0 +1,16 @@
namespace ScadaLink.DeploymentManager;
/// <summary>
/// Configuration options for the central-side Deployment Manager.
/// </summary>
public class DeploymentManagerOptions
{
/// <summary>Timeout for lifecycle commands sent to sites (disable, enable, delete).</summary>
public TimeSpan LifecycleCommandTimeout { get; set; } = TimeSpan.FromSeconds(30);
/// <summary>WP-7: Timeout per site for system-wide artifact deployment.</summary>
public TimeSpan ArtifactDeploymentTimeoutPerSite { get; set; } = TimeSpan.FromSeconds(120);
/// <summary>WP-3: Timeout for acquiring an operation lock on an instance.</summary>
public TimeSpan OperationLockTimeout { get; set; } = TimeSpan.FromSeconds(5);
}

View File

@@ -0,0 +1,393 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ScadaLink.Commons.Entities.Deployment;
using ScadaLink.Commons.Entities.Instances;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.Commons.Messages.Deployment;
using ScadaLink.Commons.Messages.Lifecycle;
using ScadaLink.Commons.Types;
using ScadaLink.Commons.Types.Enums;
using ScadaLink.Commons.Types.Flattening;
using ScadaLink.Communication;
using ScadaLink.TemplateEngine.Flattening;
using ScadaLink.TemplateEngine.Validation;
namespace ScadaLink.DeploymentManager;
/// <summary>
/// WP-1: Central-side deployment orchestration service.
/// Coordinates the full deployment pipeline:
/// 1. Validate instance state transition (WP-4)
/// 2. Acquire per-instance operation lock (WP-3)
/// 3. Flatten configuration via TemplateEngine (captures template state at time of flatten -- WP-16)
/// 4. Validate flattened configuration
/// 5. Compute revision hash and diff
/// 6. Send DeployInstanceCommand to site via CommunicationService
/// 7. Track deployment status with optimistic concurrency (WP-4)
/// 8. Store deployed config snapshot (WP-8)
/// 9. Audit log all actions
///
/// WP-2: Each deployment has a unique deployment ID (GUID) + revision hash.
/// WP-16: Template state captured at flatten time -- last-write-wins on templates is safe.
/// </summary>
public class DeploymentService
{
private readonly IDeploymentManagerRepository _repository;
private readonly IFlatteningPipeline _flatteningPipeline;
private readonly CommunicationService _communicationService;
private readonly OperationLockManager _lockManager;
private readonly IAuditService _auditService;
private readonly DeploymentManagerOptions _options;
private readonly ILogger<DeploymentService> _logger;
public DeploymentService(
IDeploymentManagerRepository repository,
IFlatteningPipeline flatteningPipeline,
CommunicationService communicationService,
OperationLockManager lockManager,
IAuditService auditService,
IOptions<DeploymentManagerOptions> options,
ILogger<DeploymentService> logger)
{
_repository = repository;
_flatteningPipeline = flatteningPipeline;
_communicationService = communicationService;
_lockManager = lockManager;
_auditService = auditService;
_options = options.Value;
_logger = logger;
}
/// <summary>
/// WP-1: Deploy an instance to its site.
/// WP-2: Generates unique deployment ID, computes revision hash.
/// WP-4: Validates state transitions, uses optimistic concurrency.
/// WP-5: Site-side apply is all-or-nothing (handled by DeploymentManagerActor).
/// WP-8: Stores deployed config snapshot on success.
/// WP-16: Captures template state at time of flatten.
/// </summary>
public async Task<Result<DeploymentRecord>> DeployInstanceAsync(
int instanceId,
string user,
CancellationToken cancellationToken = default)
{
// Load instance
var instance = await _repository.GetInstanceByIdAsync(instanceId, cancellationToken);
if (instance == null)
return Result<DeploymentRecord>.Failure($"Instance with ID {instanceId} not found.");
// WP-4: Validate state transition
var transitionError = StateTransitionValidator.ValidateTransition(instance.State, "deploy");
if (transitionError != null)
return Result<DeploymentRecord>.Failure(transitionError);
// WP-3: Acquire per-instance operation lock
using var lockHandle = await _lockManager.AcquireAsync(
instance.UniqueName, _options.OperationLockTimeout, cancellationToken);
// WP-2: Generate unique deployment ID
var deploymentId = Guid.NewGuid().ToString("N");
// WP-1/16: Flatten configuration (captures template state at this point in time)
var flattenResult = await _flatteningPipeline.FlattenAndValidateAsync(instanceId, cancellationToken);
if (flattenResult.IsFailure)
return Result<DeploymentRecord>.Failure($"Validation failed: {flattenResult.Error}");
var flattenedConfig = flattenResult.Value.Configuration;
var revisionHash = flattenResult.Value.RevisionHash;
var validationResult = flattenResult.Value.Validation;
if (!validationResult.IsValid)
{
var errors = string.Join("; ", validationResult.Errors.Select(e => e.Message));
return Result<DeploymentRecord>.Failure($"Pre-deployment validation failed: {errors}");
}
// Serialize for transmission
var configJson = JsonSerializer.Serialize(flattenedConfig);
// WP-4: Create deployment record with Pending status
var record = new DeploymentRecord(deploymentId, user)
{
InstanceId = instanceId,
Status = DeploymentStatus.Pending,
RevisionHash = revisionHash,
DeployedAt = DateTimeOffset.UtcNow
};
await _repository.AddDeploymentRecordAsync(record, cancellationToken);
await _repository.SaveChangesAsync(cancellationToken);
// Update status to InProgress
record.Status = DeploymentStatus.InProgress;
await _repository.UpdateDeploymentRecordAsync(record, cancellationToken);
await _repository.SaveChangesAsync(cancellationToken);
try
{
// WP-1: Send to site via CommunicationService
var siteId = instance.SiteId.ToString();
var command = new DeployInstanceCommand(
deploymentId, instance.UniqueName, revisionHash, configJson, user, DateTimeOffset.UtcNow);
_logger.LogInformation(
"Sending deployment {DeploymentId} for instance {Instance} to site {SiteId}",
deploymentId, instance.UniqueName, siteId);
var response = await _communicationService.DeployInstanceAsync(siteId, command, cancellationToken);
// WP-1: Update status based on site response
record.Status = response.Status;
record.ErrorMessage = response.ErrorMessage;
record.CompletedAt = DateTimeOffset.UtcNow;
await _repository.UpdateDeploymentRecordAsync(record, cancellationToken);
if (response.Status == DeploymentStatus.Success)
{
// WP-4: Update instance state to Enabled on successful deployment
instance.State = InstanceState.Enabled;
await _repository.UpdateInstanceAsync(instance, cancellationToken);
// WP-8: Store deployed config snapshot
await StoreDeployedSnapshotAsync(instanceId, deploymentId, revisionHash, configJson, cancellationToken);
}
await _repository.SaveChangesAsync(cancellationToken);
// Audit log
await _auditService.LogAsync(user, "Deploy", "Instance", instanceId.ToString(),
instance.UniqueName, new { DeploymentId = deploymentId, Status = record.Status.ToString() },
cancellationToken);
_logger.LogInformation(
"Deployment {DeploymentId} for instance {Instance}: {Status}",
deploymentId, instance.UniqueName, record.Status);
return record.Status == DeploymentStatus.Success
? Result<DeploymentRecord>.Success(record)
: Result<DeploymentRecord>.Failure(
$"Deployment failed: {response.ErrorMessage ?? "Unknown error"}");
}
catch (Exception ex) when (ex is TimeoutException or OperationCanceledException)
{
record.Status = DeploymentStatus.Failed;
record.ErrorMessage = $"Communication failure: {ex.Message}";
record.CompletedAt = DateTimeOffset.UtcNow;
await _repository.UpdateDeploymentRecordAsync(record, cancellationToken);
await _repository.SaveChangesAsync(cancellationToken);
await _auditService.LogAsync(user, "DeployFailed", "Instance", instanceId.ToString(),
instance.UniqueName, new { DeploymentId = deploymentId, Error = ex.Message },
cancellationToken);
return Result<DeploymentRecord>.Failure($"Deployment timed out: {ex.Message}");
}
}
/// <summary>
/// WP-6: Disable an instance. Stops Instance Actor, retains config, S&amp;F drains.
/// </summary>
public async Task<Result<InstanceLifecycleResponse>> DisableInstanceAsync(
int instanceId,
string user,
CancellationToken cancellationToken = default)
{
var instance = await _repository.GetInstanceByIdAsync(instanceId, cancellationToken);
if (instance == null)
return Result<InstanceLifecycleResponse>.Failure($"Instance with ID {instanceId} not found.");
var transitionError = StateTransitionValidator.ValidateTransition(instance.State, "disable");
if (transitionError != null)
return Result<InstanceLifecycleResponse>.Failure(transitionError);
using var lockHandle = await _lockManager.AcquireAsync(
instance.UniqueName, _options.OperationLockTimeout, cancellationToken);
var commandId = Guid.NewGuid().ToString("N");
var siteId = instance.SiteId.ToString();
var command = new DisableInstanceCommand(commandId, instance.UniqueName, DateTimeOffset.UtcNow);
var response = await _communicationService.DisableInstanceAsync(siteId, command, cancellationToken);
if (response.Success)
{
instance.State = InstanceState.Disabled;
await _repository.UpdateInstanceAsync(instance, cancellationToken);
await _repository.SaveChangesAsync(cancellationToken);
}
await _auditService.LogAsync(user, "Disable", "Instance", instanceId.ToString(),
instance.UniqueName, new { CommandId = commandId, response.Success },
cancellationToken);
return response.Success
? Result<InstanceLifecycleResponse>.Success(response)
: Result<InstanceLifecycleResponse>.Failure(response.ErrorMessage ?? "Disable failed.");
}
/// <summary>
/// WP-6: Enable an instance. Re-creates Instance Actor from stored config.
/// </summary>
public async Task<Result<InstanceLifecycleResponse>> EnableInstanceAsync(
int instanceId,
string user,
CancellationToken cancellationToken = default)
{
var instance = await _repository.GetInstanceByIdAsync(instanceId, cancellationToken);
if (instance == null)
return Result<InstanceLifecycleResponse>.Failure($"Instance with ID {instanceId} not found.");
var transitionError = StateTransitionValidator.ValidateTransition(instance.State, "enable");
if (transitionError != null)
return Result<InstanceLifecycleResponse>.Failure(transitionError);
using var lockHandle = await _lockManager.AcquireAsync(
instance.UniqueName, _options.OperationLockTimeout, cancellationToken);
var commandId = Guid.NewGuid().ToString("N");
var siteId = instance.SiteId.ToString();
var command = new EnableInstanceCommand(commandId, instance.UniqueName, DateTimeOffset.UtcNow);
var response = await _communicationService.EnableInstanceAsync(siteId, command, cancellationToken);
if (response.Success)
{
instance.State = InstanceState.Enabled;
await _repository.UpdateInstanceAsync(instance, cancellationToken);
await _repository.SaveChangesAsync(cancellationToken);
}
await _auditService.LogAsync(user, "Enable", "Instance", instanceId.ToString(),
instance.UniqueName, new { CommandId = commandId, response.Success },
cancellationToken);
return response.Success
? Result<InstanceLifecycleResponse>.Success(response)
: Result<InstanceLifecycleResponse>.Failure(response.ErrorMessage ?? "Enable failed.");
}
/// <summary>
/// WP-6: Delete an instance. Stops actor, removes config. S&amp;F NOT cleared.
/// Delete fails if site unreachable (30s timeout via CommunicationOptions).
/// </summary>
public async Task<Result<InstanceLifecycleResponse>> DeleteInstanceAsync(
int instanceId,
string user,
CancellationToken cancellationToken = default)
{
var instance = await _repository.GetInstanceByIdAsync(instanceId, cancellationToken);
if (instance == null)
return Result<InstanceLifecycleResponse>.Failure($"Instance with ID {instanceId} not found.");
var transitionError = StateTransitionValidator.ValidateTransition(instance.State, "delete");
if (transitionError != null)
return Result<InstanceLifecycleResponse>.Failure(transitionError);
using var lockHandle = await _lockManager.AcquireAsync(
instance.UniqueName, _options.OperationLockTimeout, cancellationToken);
var commandId = Guid.NewGuid().ToString("N");
var siteId = instance.SiteId.ToString();
var command = new DeleteInstanceCommand(commandId, instance.UniqueName, DateTimeOffset.UtcNow);
var response = await _communicationService.DeleteInstanceAsync(siteId, command, cancellationToken);
if (response.Success)
{
// Remove deployed snapshot
await _repository.DeleteDeployedSnapshotAsync(instanceId, cancellationToken);
// Set state to NotDeployed (or the instance record could be deleted entirely by higher layers)
instance.State = InstanceState.NotDeployed;
await _repository.UpdateInstanceAsync(instance, cancellationToken);
await _repository.SaveChangesAsync(cancellationToken);
}
await _auditService.LogAsync(user, "Delete", "Instance", instanceId.ToString(),
instance.UniqueName, new { CommandId = commandId, response.Success },
cancellationToken);
return response.Success
? Result<InstanceLifecycleResponse>.Success(response)
: Result<InstanceLifecycleResponse>.Failure(
response.ErrorMessage ?? "Delete failed. Site may be unreachable.");
}
/// <summary>
/// WP-8: Get the deployed config snapshot and compare with current template-derived state.
/// </summary>
public async Task<Result<DeploymentComparisonResult>> GetDeploymentComparisonAsync(
int instanceId,
CancellationToken cancellationToken = default)
{
var snapshot = await _repository.GetDeployedSnapshotByInstanceIdAsync(instanceId, cancellationToken);
if (snapshot == null)
return Result<DeploymentComparisonResult>.Failure("No deployed snapshot found for this instance.");
// Compute current template-derived config
var currentResult = await _flatteningPipeline.FlattenAndValidateAsync(instanceId, cancellationToken);
if (currentResult.IsFailure)
return Result<DeploymentComparisonResult>.Failure($"Cannot compute current config: {currentResult.Error}");
var currentHash = currentResult.Value.RevisionHash;
var isStale = snapshot.RevisionHash != currentHash;
var result = new DeploymentComparisonResult(
instanceId,
snapshot.RevisionHash,
currentHash,
isStale,
snapshot.DeployedAt);
return Result<DeploymentComparisonResult>.Success(result);
}
/// <summary>
/// WP-2: After failover/timeout, query site for current deployment state before re-deploying.
/// </summary>
public async Task<DeploymentRecord?> GetDeploymentStatusAsync(
string deploymentId,
CancellationToken cancellationToken = default)
{
return await _repository.GetDeploymentByDeploymentIdAsync(deploymentId, cancellationToken);
}
private async Task StoreDeployedSnapshotAsync(
int instanceId,
string deploymentId,
string revisionHash,
string configJson,
CancellationToken cancellationToken)
{
var existing = await _repository.GetDeployedSnapshotByInstanceIdAsync(instanceId, cancellationToken);
if (existing != null)
{
existing.DeploymentId = deploymentId;
existing.RevisionHash = revisionHash;
existing.ConfigurationJson = configJson;
existing.DeployedAt = DateTimeOffset.UtcNow;
await _repository.UpdateDeployedSnapshotAsync(existing, cancellationToken);
}
else
{
var snapshot = new DeployedConfigSnapshot(deploymentId, revisionHash, configJson)
{
InstanceId = instanceId,
DeployedAt = DateTimeOffset.UtcNow
};
await _repository.AddDeployedSnapshotAsync(snapshot, cancellationToken);
}
}
}
/// <summary>
/// WP-8: Result of comparing deployed vs template-derived configuration.
/// </summary>
public record DeploymentComparisonResult(
int InstanceId,
string DeployedRevisionHash,
string CurrentRevisionHash,
bool IsStale,
DateTimeOffset DeployedAt);

View File

@@ -0,0 +1,121 @@
using ScadaLink.Commons.Entities.Sites;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Types;
using ScadaLink.Commons.Types.Flattening;
using ScadaLink.TemplateEngine.Flattening;
using ScadaLink.TemplateEngine.Validation;
namespace ScadaLink.DeploymentManager;
/// <summary>
/// Orchestrates the TemplateEngine services (FlatteningService, ValidationService, RevisionHashService)
/// into a single pipeline for deployment use.
///
/// WP-16: This captures template state at the time of flatten, ensuring that concurrent template edits
/// (last-write-wins) do not conflict with in-progress deployments.
/// </summary>
public class FlatteningPipeline : IFlatteningPipeline
{
private readonly ITemplateEngineRepository _templateRepo;
private readonly ISiteRepository _siteRepo;
private readonly FlatteningService _flatteningService;
private readonly ValidationService _validationService;
private readonly RevisionHashService _revisionHashService;
public FlatteningPipeline(
ITemplateEngineRepository templateRepo,
ISiteRepository siteRepo,
FlatteningService flatteningService,
ValidationService validationService,
RevisionHashService revisionHashService)
{
_templateRepo = templateRepo;
_siteRepo = siteRepo;
_flatteningService = flatteningService;
_validationService = validationService;
_revisionHashService = revisionHashService;
}
public async Task<Result<FlatteningPipelineResult>> FlattenAndValidateAsync(
int instanceId,
CancellationToken cancellationToken = default)
{
// Load instance with full graph
var instance = await _templateRepo.GetInstanceByIdAsync(instanceId, cancellationToken);
if (instance == null)
return Result<FlatteningPipelineResult>.Failure($"Instance with ID {instanceId} not found.");
// Build template chain
var templateChain = await BuildTemplateChainAsync(instance.TemplateId, cancellationToken);
if (templateChain.Count == 0)
return Result<FlatteningPipelineResult>.Failure("Template chain is empty.");
// Build composition maps
var compositionMap = new Dictionary<int, IReadOnlyList<Commons.Entities.Templates.TemplateComposition>>();
var composedChains = new Dictionary<int, IReadOnlyList<Commons.Entities.Templates.Template>>();
foreach (var template in templateChain)
{
var compositions = await _templateRepo.GetCompositionsByTemplateIdAsync(template.Id, cancellationToken);
if (compositions.Count > 0)
{
compositionMap[template.Id] = compositions;
foreach (var comp in compositions)
{
if (!composedChains.ContainsKey(comp.ComposedTemplateId))
{
composedChains[comp.ComposedTemplateId] =
await BuildTemplateChainAsync(comp.ComposedTemplateId, cancellationToken);
}
}
}
}
// Load data connections for the site
var dataConnections = await LoadDataConnectionsAsync(instance.SiteId, cancellationToken);
// Flatten
var flattenResult = _flatteningService.Flatten(
instance, templateChain, compositionMap, composedChains, dataConnections);
if (flattenResult.IsFailure)
return Result<FlatteningPipelineResult>.Failure(flattenResult.Error);
var config = flattenResult.Value;
// Validate
var validation = _validationService.Validate(config);
// Compute revision hash
var hash = _revisionHashService.ComputeHash(config);
return Result<FlatteningPipelineResult>.Success(
new FlatteningPipelineResult(config, hash, validation));
}
private async Task<IReadOnlyList<Commons.Entities.Templates.Template>> BuildTemplateChainAsync(
int templateId,
CancellationToken cancellationToken)
{
var chain = new List<Commons.Entities.Templates.Template>();
var currentId = (int?)templateId;
while (currentId.HasValue)
{
var template = await _templateRepo.GetTemplateWithChildrenAsync(currentId.Value, cancellationToken);
if (template == null) break;
chain.Add(template);
currentId = template.ParentTemplateId;
}
return chain;
}
private async Task<IReadOnlyDictionary<int, DataConnection>> LoadDataConnectionsAsync(
int siteId,
CancellationToken cancellationToken)
{
var connections = await _siteRepo.GetDataConnectionsBySiteIdAsync(siteId, cancellationToken);
return connections.ToDictionary(c => c.Id);
}
}

View File

@@ -0,0 +1,27 @@
using ScadaLink.Commons.Types;
using ScadaLink.Commons.Types.Flattening;
namespace ScadaLink.DeploymentManager;
/// <summary>
/// Abstraction over the TemplateEngine flattening + validation + hashing pipeline.
/// Used by DeploymentService to obtain a validated, hashed FlattenedConfiguration.
/// </summary>
public interface IFlatteningPipeline
{
/// <summary>
/// Flattens and validates an instance configuration, returning the configuration,
/// revision hash, and validation result.
/// </summary>
Task<Result<FlatteningPipelineResult>> FlattenAndValidateAsync(
int instanceId,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Result of the flattening pipeline: configuration, hash, and validation.
/// </summary>
public record FlatteningPipelineResult(
FlattenedConfiguration Configuration,
string RevisionHash,
ValidationResult Validation);

View File

@@ -0,0 +1,58 @@
using System.Collections.Concurrent;
namespace ScadaLink.DeploymentManager;
/// <summary>
/// WP-3: Per-instance operation lock. Only one mutating operation (deploy, disable, enable, delete)
/// may be in progress per instance at a time. Different instances can proceed in parallel.
///
/// Implementation: ConcurrentDictionary of SemaphoreSlim(1,1) keyed by instance unique name.
/// Lock released on completion, timeout, or failure.
/// Lost on central failover (acceptable per design -- in-progress treated as failed).
/// </summary>
public class OperationLockManager
{
private readonly ConcurrentDictionary<string, SemaphoreSlim> _locks = new(StringComparer.Ordinal);
/// <summary>
/// Acquires the operation lock for the given instance. Returns a disposable that releases the lock.
/// Throws TimeoutException if the lock cannot be acquired within the timeout.
/// </summary>
public async Task<IDisposable> AcquireAsync(string instanceUniqueName, TimeSpan timeout, CancellationToken cancellationToken = default)
{
var semaphore = _locks.GetOrAdd(instanceUniqueName, _ => new SemaphoreSlim(1, 1));
if (!await semaphore.WaitAsync(timeout, cancellationToken))
{
throw new TimeoutException(
$"Could not acquire operation lock for instance '{instanceUniqueName}' within {timeout.TotalSeconds}s. " +
"Another mutating operation is in progress.");
}
return new LockRelease(semaphore);
}
/// <summary>
/// Checks whether a lock is currently held for the given instance (for diagnostics).
/// </summary>
public bool IsLocked(string instanceUniqueName)
{
return _locks.TryGetValue(instanceUniqueName, out var semaphore) && semaphore.CurrentCount == 0;
}
private sealed class LockRelease : IDisposable
{
private readonly SemaphoreSlim _semaphore;
private int _disposed;
public LockRelease(SemaphoreSlim semaphore) => _semaphore = semaphore;
public void Dispose()
{
if (Interlocked.CompareExchange(ref _disposed, 1, 0) == 0)
{
_semaphore.Release();
}
}
}
}

View File

@@ -9,11 +9,14 @@
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="10.0.5" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.5" />
<PackageReference Include="Microsoft.Extensions.Options" Version="10.0.5" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
<ProjectReference Include="../ScadaLink.Communication/ScadaLink.Communication.csproj" />
<ProjectReference Include="../ScadaLink.TemplateEngine/ScadaLink.TemplateEngine.csproj" />
</ItemGroup>
</Project>

View File

@@ -6,13 +6,16 @@ public static class ServiceCollectionExtensions
{
public static IServiceCollection AddDeploymentManager(this IServiceCollection services)
{
// Phase 0: skeleton only
services.AddSingleton<OperationLockManager>();
services.AddScoped<IFlatteningPipeline, FlatteningPipeline>();
services.AddScoped<DeploymentService>();
services.AddScoped<ArtifactDeploymentService>();
return services;
}
public static IServiceCollection AddDeploymentManagerActors(this IServiceCollection services)
{
// Phase 0: placeholder for Akka actor registration
// Akka actor registration is handled by Host component during actor system startup
return services;
}
}

View File

@@ -0,0 +1,48 @@
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.DeploymentManager;
/// <summary>
/// WP-4: State transition matrix for instance lifecycle.
///
/// State | Deploy | Disable | Enable | Delete
/// ----------|--------|---------|--------|-------
/// NotDeploy | OK | NO | NO | NO
/// Enabled | OK | OK | NO | OK
/// Disabled | OK* | NO | OK | OK
///
/// * Deploy on a Disabled instance also enables it.
/// </summary>
public static class StateTransitionValidator
{
public static bool CanDeploy(InstanceState currentState) =>
currentState is InstanceState.NotDeployed or InstanceState.Enabled or InstanceState.Disabled;
public static bool CanDisable(InstanceState currentState) =>
currentState == InstanceState.Enabled;
public static bool CanEnable(InstanceState currentState) =>
currentState == InstanceState.Disabled;
public static bool CanDelete(InstanceState currentState) =>
currentState is InstanceState.Enabled or InstanceState.Disabled;
/// <summary>
/// Returns a human-readable error message if the transition is invalid, or null if valid.
/// </summary>
public static string? ValidateTransition(InstanceState currentState, string operation)
{
var allowed = operation.ToLowerInvariant() switch
{
"deploy" => CanDeploy(currentState),
"disable" => CanDisable(currentState),
"enable" => CanEnable(currentState),
"delete" => CanDelete(currentState),
_ => false
};
if (allowed) return null;
return $"Operation '{operation}' is not allowed when instance is in state '{currentState}'.";
}
}

View File

@@ -0,0 +1,136 @@
using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.StoreAndForward;
/// <summary>
/// WP-11: Async replication of buffer operations to standby node.
///
/// - Forwards add/remove/park operations to standby via a replication handler.
/// - No ack wait (fire-and-forget per design).
/// - Standby applies operations to its own SQLite.
/// - On failover, standby resumes delivery from its replicated state.
/// </summary>
public class ReplicationService
{
private readonly StoreAndForwardOptions _options;
private readonly ILogger<ReplicationService> _logger;
private Func<ReplicationOperation, Task>? _replicationHandler;
public ReplicationService(
StoreAndForwardOptions options,
ILogger<ReplicationService> logger)
{
_options = options;
_logger = logger;
}
/// <summary>
/// Sets the handler for forwarding replication operations to the standby node.
/// Typically wraps Akka Tell to the standby's replication actor.
/// </summary>
public void SetReplicationHandler(Func<ReplicationOperation, Task> handler)
{
_replicationHandler = handler;
}
/// <summary>
/// WP-11: Replicates an enqueue operation to standby (fire-and-forget).
/// </summary>
public void ReplicateEnqueue(StoreAndForwardMessage message)
{
if (!_options.ReplicationEnabled || _replicationHandler == null) return;
FireAndForget(new ReplicationOperation(
ReplicationOperationType.Add,
message.Id,
message));
}
/// <summary>
/// WP-11: Replicates a remove operation to standby (fire-and-forget).
/// </summary>
public void ReplicateRemove(string messageId)
{
if (!_options.ReplicationEnabled || _replicationHandler == null) return;
FireAndForget(new ReplicationOperation(
ReplicationOperationType.Remove,
messageId,
null));
}
/// <summary>
/// WP-11: Replicates a park operation to standby (fire-and-forget).
/// </summary>
public void ReplicatePark(StoreAndForwardMessage message)
{
if (!_options.ReplicationEnabled || _replicationHandler == null) return;
FireAndForget(new ReplicationOperation(
ReplicationOperationType.Park,
message.Id,
message));
}
/// <summary>
/// WP-11: Applies a replicated operation received from the active node.
/// Used by the standby node to keep its SQLite in sync.
/// </summary>
public async Task ApplyReplicatedOperationAsync(
ReplicationOperation operation,
StoreAndForwardStorage storage)
{
switch (operation.OperationType)
{
case ReplicationOperationType.Add when operation.Message != null:
await storage.EnqueueAsync(operation.Message);
break;
case ReplicationOperationType.Remove:
await storage.RemoveMessageAsync(operation.MessageId);
break;
case ReplicationOperationType.Park when operation.Message != null:
operation.Message.Status = StoreAndForwardMessageStatus.Parked;
await storage.UpdateMessageAsync(operation.Message);
break;
}
}
private void FireAndForget(ReplicationOperation operation)
{
Task.Run(async () =>
{
try
{
await _replicationHandler!.Invoke(operation);
}
catch (Exception ex)
{
// WP-11: No ack wait — log and move on
_logger.LogDebug(ex,
"Replication of {OpType} for message {MessageId} failed (best-effort)",
operation.OperationType, operation.MessageId);
}
});
}
}
/// <summary>
/// WP-11: Represents a buffer operation to be replicated to standby.
/// </summary>
public record ReplicationOperation(
ReplicationOperationType OperationType,
string MessageId,
StoreAndForwardMessage? Message);
/// <summary>
/// WP-11: Types of buffer operations that are replicated.
/// </summary>
public enum ReplicationOperationType
{
Add,
Remove,
Park
}

View File

@@ -8,7 +8,9 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Data.Sqlite" Version="10.0.5" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="10.0.5" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.5" />
<PackageReference Include="Microsoft.Extensions.Options" Version="10.0.5" />
</ItemGroup>
@@ -16,4 +18,8 @@
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
</ItemGroup>
<ItemGroup>
<InternalsVisibleTo Include="ScadaLink.StoreAndForward.Tests" />
</ItemGroup>
</Project>

View File

@@ -1,4 +1,6 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ScadaLink.StoreAndForward;
@@ -6,13 +8,36 @@ public static class ServiceCollectionExtensions
{
public static IServiceCollection AddStoreAndForward(this IServiceCollection services)
{
// Phase 0: skeleton only
services.AddSingleton<StoreAndForwardStorage>(sp =>
{
var options = sp.GetRequiredService<IOptions<StoreAndForwardOptions>>().Value;
var logger = sp.GetRequiredService<ILogger<StoreAndForwardStorage>>();
return new StoreAndForwardStorage(
$"Data Source={options.SqliteDbPath}",
logger);
});
services.AddSingleton<StoreAndForwardService>(sp =>
{
var storage = sp.GetRequiredService<StoreAndForwardStorage>();
var options = sp.GetRequiredService<IOptions<StoreAndForwardOptions>>().Value;
var logger = sp.GetRequiredService<ILogger<StoreAndForwardService>>();
return new StoreAndForwardService(storage, options, logger);
});
services.AddSingleton<ReplicationService>(sp =>
{
var options = sp.GetRequiredService<IOptions<StoreAndForwardOptions>>().Value;
var logger = sp.GetRequiredService<ILogger<ReplicationService>>();
return new ReplicationService(options, logger);
});
return services;
}
public static IServiceCollection AddStoreAndForwardActors(this IServiceCollection services)
{
// Phase 0: placeholder for Akka actor registration
// Akka actor registration handled by Host component during actor system startup
return services;
}
}

View File

@@ -0,0 +1,49 @@
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.StoreAndForward;
/// <summary>
/// WP-9: Represents a single store-and-forward message as stored in SQLite.
/// Maps to the sf_messages table.
/// </summary>
public class StoreAndForwardMessage
{
/// <summary>Unique message ID (GUID).</summary>
public string Id { get; set; } = string.Empty;
/// <summary>WP-9: Category: ExternalSystem, Notification, or CachedDbWrite.</summary>
public StoreAndForwardCategory Category { get; set; }
/// <summary>Target system name (external system, notification list, or DB connection).</summary>
public string Target { get; set; } = string.Empty;
/// <summary>JSON-serialized payload containing the call details.</summary>
public string PayloadJson { get; set; } = string.Empty;
/// <summary>Number of delivery attempts so far.</summary>
public int RetryCount { get; set; }
/// <summary>Maximum retry attempts before parking (0 = no limit).</summary>
public int MaxRetries { get; set; }
/// <summary>Retry interval in milliseconds.</summary>
public long RetryIntervalMs { get; set; }
/// <summary>When this message was first enqueued.</summary>
public DateTimeOffset CreatedAt { get; set; }
/// <summary>When delivery was last attempted (null if never attempted).</summary>
public DateTimeOffset? LastAttemptAt { get; set; }
/// <summary>Current status of the message.</summary>
public StoreAndForwardMessageStatus Status { get; set; }
/// <summary>Last error message from a failed delivery attempt.</summary>
public string? LastError { get; set; }
/// <summary>
/// Instance that originated this message (for S&amp;F-survives-delete behavior).
/// WP-13: Messages are NOT cleared when instance is deleted.
/// </summary>
public string? OriginInstanceName { get; set; }
}

View File

@@ -1,7 +1,22 @@
namespace ScadaLink.StoreAndForward;
/// <summary>
/// WP-9/10: Configuration options for the Store-and-Forward Engine.
/// </summary>
public class StoreAndForwardOptions
{
/// <summary>Path to the SQLite database for S&amp;F message persistence.</summary>
public string SqliteDbPath { get; set; } = "./data/store-and-forward.db";
/// <summary>WP-11: Whether to replicate buffer operations to standby node.</summary>
public bool ReplicationEnabled { get; set; } = true;
/// <summary>WP-10: Default retry interval for messages without per-source settings.</summary>
public TimeSpan DefaultRetryInterval { get; set; } = TimeSpan.FromSeconds(30);
/// <summary>WP-10: Default maximum retry count before parking.</summary>
public int DefaultMaxRetries { get; set; } = 50;
/// <summary>WP-10: Interval for the background retry timer sweep.</summary>
public TimeSpan RetryTimerInterval { get; set; } = TimeSpan.FromSeconds(10);
}

View File

@@ -0,0 +1,322 @@
using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.StoreAndForward;
/// <summary>
/// WP-9/10: Core store-and-forward service.
///
/// Lifecycle:
/// 1. Caller attempts immediate delivery via IDeliveryHandler
/// 2. On transient failure → buffer in SQLite → retry loop
/// 3. On success → remove from buffer
/// 4. On max retries → park
/// 5. Permanent failures are returned to caller immediately (never buffered)
///
/// WP-10: Fixed retry interval (not exponential). Per-source-entity retry settings.
/// Background timer-based retry sweep.
///
/// WP-12: Parked messages queryable, retryable, and discardable.
///
/// WP-14: Buffer depth reported as health metric. Activity logged to site event log.
///
/// WP-15: CachedCall idempotency is the caller's responsibility.
/// This service does not deduplicate — if the same message is enqueued twice,
/// it will be delivered twice. Callers using ExternalSystem.CachedCall() must
/// design their payloads to be idempotent (e.g., include unique request IDs
/// and handle duplicate detection on the remote end).
/// </summary>
public class StoreAndForwardService
{
private readonly StoreAndForwardStorage _storage;
private readonly StoreAndForwardOptions _options;
private readonly ILogger<StoreAndForwardService> _logger;
private Timer? _retryTimer;
private int _retryInProgress;
/// <summary>
/// WP-10: Delivery handler delegate. Returns true on success, throws on transient failure.
/// Permanent failures should return false (message will NOT be buffered).
/// </summary>
private readonly Dictionary<StoreAndForwardCategory, Func<StoreAndForwardMessage, Task<bool>>> _deliveryHandlers = new();
/// <summary>
/// WP-14: Event callback for logging S&amp;F activity to site event log.
/// </summary>
public event Action<string, StoreAndForwardCategory, string>? OnActivity;
public StoreAndForwardService(
StoreAndForwardStorage storage,
StoreAndForwardOptions options,
ILogger<StoreAndForwardService> logger)
{
_storage = storage;
_options = options;
_logger = logger;
}
/// <summary>
/// Registers a delivery handler for a given message category.
/// </summary>
public void RegisterDeliveryHandler(
StoreAndForwardCategory category,
Func<StoreAndForwardMessage, Task<bool>> handler)
{
_deliveryHandlers[category] = handler;
}
/// <summary>
/// Initializes storage and starts the background retry timer.
/// </summary>
public async Task StartAsync()
{
await _storage.InitializeAsync();
_retryTimer = new Timer(
_ => _ = RetryPendingMessagesAsync(),
null,
_options.RetryTimerInterval,
_options.RetryTimerInterval);
_logger.LogInformation(
"Store-and-forward service started. Retry interval: {Interval}s",
_options.DefaultRetryInterval.TotalSeconds);
}
/// <summary>
/// Stops the background retry timer.
/// </summary>
public async Task StopAsync()
{
if (_retryTimer != null)
{
await _retryTimer.DisposeAsync();
_retryTimer = null;
}
}
/// <summary>
/// WP-10: Enqueues a message for store-and-forward delivery.
/// Attempts immediate delivery first. On transient failure, buffers for retry.
/// On permanent failure (handler returns false), returns false immediately.
///
/// WP-15: CachedCall idempotency note — this method does not deduplicate.
/// The caller (e.g., ExternalSystem.CachedCall()) is responsible for ensuring
/// that the remote system can handle duplicate deliveries safely.
/// </summary>
public async Task<StoreAndForwardResult> EnqueueAsync(
StoreAndForwardCategory category,
string target,
string payloadJson,
string? originInstanceName = null,
int? maxRetries = null,
TimeSpan? retryInterval = null)
{
var message = new StoreAndForwardMessage
{
Id = Guid.NewGuid().ToString("N"),
Category = category,
Target = target,
PayloadJson = payloadJson,
RetryCount = 0,
MaxRetries = maxRetries ?? _options.DefaultMaxRetries,
RetryIntervalMs = (long)(retryInterval ?? _options.DefaultRetryInterval).TotalMilliseconds,
CreatedAt = DateTimeOffset.UtcNow,
Status = StoreAndForwardMessageStatus.Pending,
OriginInstanceName = originInstanceName
};
// Attempt immediate delivery
if (_deliveryHandlers.TryGetValue(category, out var handler))
{
try
{
var success = await handler(message);
if (success)
{
RaiseActivity("Delivered", category, $"Immediate delivery to {target}");
return new StoreAndForwardResult(true, message.Id, false);
}
else
{
// Permanent failure — do not buffer
return new StoreAndForwardResult(false, message.Id, false);
}
}
catch (Exception ex)
{
// Transient failure — buffer for retry
_logger.LogWarning(ex,
"Immediate delivery to {Target} failed (transient), buffering for retry",
target);
message.LastAttemptAt = DateTimeOffset.UtcNow;
message.RetryCount = 1;
message.LastError = ex.Message;
await _storage.EnqueueAsync(message);
RaiseActivity("Queued", category, $"Buffered for retry: {target} ({ex.Message})");
return new StoreAndForwardResult(true, message.Id, true);
}
}
// No handler registered — buffer for later
await _storage.EnqueueAsync(message);
RaiseActivity("Queued", category, $"No handler registered, buffered: {target}");
return new StoreAndForwardResult(true, message.Id, true);
}
/// <summary>
/// WP-10: Background retry sweep. Processes all pending messages that are due for retry.
/// </summary>
internal async Task RetryPendingMessagesAsync()
{
// Prevent overlapping retry sweeps
if (Interlocked.CompareExchange(ref _retryInProgress, 1, 0) != 0)
return;
try
{
var messages = await _storage.GetMessagesForRetryAsync();
if (messages.Count == 0) return;
_logger.LogDebug("Retry sweep: {Count} messages due for retry", messages.Count);
foreach (var message in messages)
{
await RetryMessageAsync(message);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Error during retry sweep");
}
finally
{
Interlocked.Exchange(ref _retryInProgress, 0);
}
}
private async Task RetryMessageAsync(StoreAndForwardMessage message)
{
if (!_deliveryHandlers.TryGetValue(message.Category, out var handler))
{
_logger.LogWarning("No delivery handler for category {Category}", message.Category);
return;
}
try
{
var success = await handler(message);
if (success)
{
await _storage.RemoveMessageAsync(message.Id);
RaiseActivity("Delivered", message.Category,
$"Delivered to {message.Target} after {message.RetryCount} retries");
return;
}
// Permanent failure on retry — park immediately
message.Status = StoreAndForwardMessageStatus.Parked;
message.LastAttemptAt = DateTimeOffset.UtcNow;
message.LastError = "Permanent failure (handler returned false)";
await _storage.UpdateMessageAsync(message);
RaiseActivity("Parked", message.Category,
$"Permanent failure for {message.Target}: handler returned false");
}
catch (Exception ex)
{
// Transient failure — increment retry, check max
message.RetryCount++;
message.LastAttemptAt = DateTimeOffset.UtcNow;
message.LastError = ex.Message;
if (message.MaxRetries > 0 && message.RetryCount >= message.MaxRetries)
{
message.Status = StoreAndForwardMessageStatus.Parked;
await _storage.UpdateMessageAsync(message);
RaiseActivity("Parked", message.Category,
$"Max retries ({message.MaxRetries}) reached for {message.Target}");
_logger.LogWarning(
"Message {MessageId} parked after {MaxRetries} retries to {Target}",
message.Id, message.MaxRetries, message.Target);
}
else
{
await _storage.UpdateMessageAsync(message);
RaiseActivity("Retried", message.Category,
$"Retry {message.RetryCount}/{message.MaxRetries} for {message.Target}: {ex.Message}");
}
}
}
/// <summary>
/// WP-12: Gets parked messages for central query (Pattern 8).
/// </summary>
public async Task<(List<StoreAndForwardMessage> Messages, int TotalCount)> GetParkedMessagesAsync(
StoreAndForwardCategory? category = null,
int pageNumber = 1,
int pageSize = 50)
{
return await _storage.GetParkedMessagesAsync(category, pageNumber, pageSize);
}
/// <summary>
/// WP-12: Retries a parked message (moves back to pending queue).
/// </summary>
public async Task<bool> RetryParkedMessageAsync(string messageId)
{
var success = await _storage.RetryParkedMessageAsync(messageId);
if (success)
{
RaiseActivity("Retry", StoreAndForwardCategory.ExternalSystem,
$"Parked message {messageId} moved back to queue");
}
return success;
}
/// <summary>
/// WP-12: Permanently discards a parked message.
/// </summary>
public async Task<bool> DiscardParkedMessageAsync(string messageId)
{
var success = await _storage.DiscardParkedMessageAsync(messageId);
if (success)
{
RaiseActivity("Discard", StoreAndForwardCategory.ExternalSystem,
$"Parked message {messageId} discarded");
}
return success;
}
/// <summary>
/// WP-14: Gets buffer depth by category for health reporting.
/// </summary>
public async Task<Dictionary<StoreAndForwardCategory, int>> GetBufferDepthAsync()
{
return await _storage.GetBufferDepthByCategoryAsync();
}
/// <summary>
/// WP-13: Gets count of S&amp;F messages for a given instance (for verifying survival on deletion).
/// </summary>
public async Task<int> GetMessageCountForInstanceAsync(string instanceName)
{
return await _storage.GetMessageCountByOriginInstanceAsync(instanceName);
}
private void RaiseActivity(string action, StoreAndForwardCategory category, string detail)
{
OnActivity?.Invoke(action, category, detail);
}
}
/// <summary>
/// Result of an enqueue operation.
/// </summary>
public record StoreAndForwardResult(
/// <summary>True if the message was accepted (either delivered immediately or buffered).</summary>
bool Accepted,
/// <summary>Unique message ID for tracking.</summary>
string MessageId,
/// <summary>True if the message was buffered (not delivered immediately).</summary>
bool WasBuffered);

View File

@@ -0,0 +1,339 @@
using Microsoft.Data.Sqlite;
using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.StoreAndForward;
/// <summary>
/// WP-9: SQLite persistence layer for store-and-forward messages.
/// Uses direct Microsoft.Data.Sqlite (not EF Core) for lightweight site-side storage.
/// No max buffer size per design decision.
/// </summary>
public class StoreAndForwardStorage
{
private readonly string _connectionString;
private readonly ILogger<StoreAndForwardStorage> _logger;
public StoreAndForwardStorage(string connectionString, ILogger<StoreAndForwardStorage> logger)
{
_connectionString = connectionString;
_logger = logger;
}
/// <summary>
/// Creates the sf_messages table if it does not exist.
/// </summary>
public async Task InitializeAsync()
{
await using var connection = new SqliteConnection(_connectionString);
await connection.OpenAsync();
await using var command = connection.CreateCommand();
command.CommandText = @"
CREATE TABLE IF NOT EXISTS sf_messages (
id TEXT PRIMARY KEY,
category INTEGER NOT NULL,
target TEXT NOT NULL,
payload_json TEXT NOT NULL,
retry_count INTEGER NOT NULL DEFAULT 0,
max_retries INTEGER NOT NULL DEFAULT 50,
retry_interval_ms INTEGER NOT NULL DEFAULT 30000,
created_at TEXT NOT NULL,
last_attempt_at TEXT,
status INTEGER NOT NULL DEFAULT 0,
last_error TEXT,
origin_instance TEXT
);
CREATE INDEX IF NOT EXISTS idx_sf_messages_status ON sf_messages(status);
CREATE INDEX IF NOT EXISTS idx_sf_messages_category ON sf_messages(category);
";
await command.ExecuteNonQueryAsync();
_logger.LogInformation("Store-and-forward SQLite storage initialized");
}
/// <summary>
/// WP-9: Enqueues a new message with Pending status.
/// </summary>
public async Task EnqueueAsync(StoreAndForwardMessage message)
{
await using var connection = new SqliteConnection(_connectionString);
await connection.OpenAsync();
await using var cmd = connection.CreateCommand();
cmd.CommandText = @"
INSERT INTO sf_messages (id, category, target, payload_json, retry_count, max_retries,
retry_interval_ms, created_at, last_attempt_at, status, last_error, origin_instance)
VALUES (@id, @category, @target, @payload, @retryCount, @maxRetries,
@retryIntervalMs, @createdAt, @lastAttempt, @status, @lastError, @origin)";
cmd.Parameters.AddWithValue("@id", message.Id);
cmd.Parameters.AddWithValue("@category", (int)message.Category);
cmd.Parameters.AddWithValue("@target", message.Target);
cmd.Parameters.AddWithValue("@payload", message.PayloadJson);
cmd.Parameters.AddWithValue("@retryCount", message.RetryCount);
cmd.Parameters.AddWithValue("@maxRetries", message.MaxRetries);
cmd.Parameters.AddWithValue("@retryIntervalMs", message.RetryIntervalMs);
cmd.Parameters.AddWithValue("@createdAt", message.CreatedAt.ToString("O"));
cmd.Parameters.AddWithValue("@lastAttempt", message.LastAttemptAt.HasValue
? message.LastAttemptAt.Value.ToString("O") : DBNull.Value);
cmd.Parameters.AddWithValue("@status", (int)message.Status);
cmd.Parameters.AddWithValue("@lastError", (object?)message.LastError ?? DBNull.Value);
cmd.Parameters.AddWithValue("@origin", (object?)message.OriginInstanceName ?? DBNull.Value);
await cmd.ExecuteNonQueryAsync();
}
/// <summary>
/// WP-10: Gets all messages that are due for retry (Pending status, last attempt older than retry interval).
/// </summary>
public async Task<List<StoreAndForwardMessage>> GetMessagesForRetryAsync()
{
await using var connection = new SqliteConnection(_connectionString);
await connection.OpenAsync();
await using var cmd = connection.CreateCommand();
cmd.CommandText = @"
SELECT id, category, target, payload_json, retry_count, max_retries,
retry_interval_ms, created_at, last_attempt_at, status, last_error, origin_instance
FROM sf_messages
WHERE status = @pending
AND (last_attempt_at IS NULL
OR retry_interval_ms = 0
OR (julianday('now') - julianday(last_attempt_at)) * 86400000 >= retry_interval_ms)
ORDER BY created_at ASC";
cmd.Parameters.AddWithValue("@pending", (int)StoreAndForwardMessageStatus.Pending);
return await ReadMessagesAsync(cmd);
}
/// <summary>
/// WP-10: Updates a message after a delivery attempt.
/// </summary>
public async Task UpdateMessageAsync(StoreAndForwardMessage message)
{
await using var connection = new SqliteConnection(_connectionString);
await connection.OpenAsync();
await using var cmd = connection.CreateCommand();
cmd.CommandText = @"
UPDATE sf_messages
SET retry_count = @retryCount,
last_attempt_at = @lastAttempt,
status = @status,
last_error = @lastError
WHERE id = @id";
cmd.Parameters.AddWithValue("@id", message.Id);
cmd.Parameters.AddWithValue("@retryCount", message.RetryCount);
cmd.Parameters.AddWithValue("@lastAttempt", message.LastAttemptAt.HasValue
? message.LastAttemptAt.Value.ToString("O") : DBNull.Value);
cmd.Parameters.AddWithValue("@status", (int)message.Status);
cmd.Parameters.AddWithValue("@lastError", (object?)message.LastError ?? DBNull.Value);
await cmd.ExecuteNonQueryAsync();
}
/// <summary>
/// WP-10: Removes a successfully delivered message.
/// </summary>
public async Task RemoveMessageAsync(string messageId)
{
await using var connection = new SqliteConnection(_connectionString);
await connection.OpenAsync();
await using var cmd = connection.CreateCommand();
cmd.CommandText = "DELETE FROM sf_messages WHERE id = @id";
cmd.Parameters.AddWithValue("@id", messageId);
await cmd.ExecuteNonQueryAsync();
}
/// <summary>
/// WP-12: Gets all parked messages, optionally filtered by category, with pagination.
/// </summary>
public async Task<(List<StoreAndForwardMessage> Messages, int TotalCount)> GetParkedMessagesAsync(
StoreAndForwardCategory? category = null,
int pageNumber = 1,
int pageSize = 50)
{
await using var connection = new SqliteConnection(_connectionString);
await connection.OpenAsync();
// Count
await using var countCmd = connection.CreateCommand();
countCmd.CommandText = category.HasValue
? "SELECT COUNT(*) FROM sf_messages WHERE status = @parked AND category = @category"
: "SELECT COUNT(*) FROM sf_messages WHERE status = @parked";
countCmd.Parameters.AddWithValue("@parked", (int)StoreAndForwardMessageStatus.Parked);
if (category.HasValue) countCmd.Parameters.AddWithValue("@category", (int)category.Value);
var totalCount = Convert.ToInt32(await countCmd.ExecuteScalarAsync());
// Page
await using var pageCmd = connection.CreateCommand();
var categoryFilter = category.HasValue ? " AND category = @category" : "";
pageCmd.CommandText = $@"
SELECT id, category, target, payload_json, retry_count, max_retries,
retry_interval_ms, created_at, last_attempt_at, status, last_error, origin_instance
FROM sf_messages
WHERE status = @parked{categoryFilter}
ORDER BY created_at ASC
LIMIT @limit OFFSET @offset";
pageCmd.Parameters.AddWithValue("@parked", (int)StoreAndForwardMessageStatus.Parked);
if (category.HasValue) pageCmd.Parameters.AddWithValue("@category", (int)category.Value);
pageCmd.Parameters.AddWithValue("@limit", pageSize);
pageCmd.Parameters.AddWithValue("@offset", (pageNumber - 1) * pageSize);
var messages = await ReadMessagesAsync(pageCmd);
return (messages, totalCount);
}
/// <summary>
/// WP-12: Moves a parked message back to pending for retry.
/// </summary>
public async Task<bool> RetryParkedMessageAsync(string messageId)
{
await using var connection = new SqliteConnection(_connectionString);
await connection.OpenAsync();
await using var cmd = connection.CreateCommand();
cmd.CommandText = @"
UPDATE sf_messages
SET status = @pending, retry_count = 0, last_error = NULL
WHERE id = @id AND status = @parked";
cmd.Parameters.AddWithValue("@id", messageId);
cmd.Parameters.AddWithValue("@pending", (int)StoreAndForwardMessageStatus.Pending);
cmd.Parameters.AddWithValue("@parked", (int)StoreAndForwardMessageStatus.Parked);
var rows = await cmd.ExecuteNonQueryAsync();
return rows > 0;
}
/// <summary>
/// WP-12: Permanently discards a parked message.
/// </summary>
public async Task<bool> DiscardParkedMessageAsync(string messageId)
{
await using var connection = new SqliteConnection(_connectionString);
await connection.OpenAsync();
await using var cmd = connection.CreateCommand();
cmd.CommandText = "DELETE FROM sf_messages WHERE id = @id AND status = @parked";
cmd.Parameters.AddWithValue("@id", messageId);
cmd.Parameters.AddWithValue("@parked", (int)StoreAndForwardMessageStatus.Parked);
var rows = await cmd.ExecuteNonQueryAsync();
return rows > 0;
}
/// <summary>
/// WP-14: Gets buffer depth by category (count of pending messages per category).
/// </summary>
public async Task<Dictionary<StoreAndForwardCategory, int>> GetBufferDepthByCategoryAsync()
{
await using var connection = new SqliteConnection(_connectionString);
await connection.OpenAsync();
await using var cmd = connection.CreateCommand();
cmd.CommandText = @"
SELECT category, COUNT(*) as cnt
FROM sf_messages
WHERE status = @pending
GROUP BY category";
cmd.Parameters.AddWithValue("@pending", (int)StoreAndForwardMessageStatus.Pending);
var result = new Dictionary<StoreAndForwardCategory, int>();
await using var reader = await cmd.ExecuteReaderAsync();
while (await reader.ReadAsync())
{
var category = (StoreAndForwardCategory)reader.GetInt32(0);
var count = reader.GetInt32(1);
result[category] = count;
}
return result;
}
/// <summary>
/// WP-13: Verifies messages are NOT deleted when an instance is deleted.
/// Returns the count of messages for a given origin instance.
/// </summary>
public async Task<int> GetMessageCountByOriginInstanceAsync(string instanceName)
{
await using var connection = new SqliteConnection(_connectionString);
await connection.OpenAsync();
await using var cmd = connection.CreateCommand();
cmd.CommandText = @"
SELECT COUNT(*)
FROM sf_messages
WHERE origin_instance = @origin";
cmd.Parameters.AddWithValue("@origin", instanceName);
return Convert.ToInt32(await cmd.ExecuteScalarAsync());
}
/// <summary>
/// Gets a message by ID.
/// </summary>
public async Task<StoreAndForwardMessage?> GetMessageByIdAsync(string messageId)
{
await using var connection = new SqliteConnection(_connectionString);
await connection.OpenAsync();
await using var cmd = connection.CreateCommand();
cmd.CommandText = @"
SELECT id, category, target, payload_json, retry_count, max_retries,
retry_interval_ms, created_at, last_attempt_at, status, last_error, origin_instance
FROM sf_messages
WHERE id = @id";
cmd.Parameters.AddWithValue("@id", messageId);
var messages = await ReadMessagesAsync(cmd);
return messages.FirstOrDefault();
}
/// <summary>
/// Gets total message count by status.
/// </summary>
public async Task<int> GetMessageCountByStatusAsync(StoreAndForwardMessageStatus status)
{
await using var connection = new SqliteConnection(_connectionString);
await connection.OpenAsync();
await using var cmd = connection.CreateCommand();
cmd.CommandText = "SELECT COUNT(*) FROM sf_messages WHERE status = @status";
cmd.Parameters.AddWithValue("@status", (int)status);
return Convert.ToInt32(await cmd.ExecuteScalarAsync());
}
private static async Task<List<StoreAndForwardMessage>> ReadMessagesAsync(SqliteCommand cmd)
{
var results = new List<StoreAndForwardMessage>();
await using var reader = await cmd.ExecuteReaderAsync();
while (await reader.ReadAsync())
{
results.Add(new StoreAndForwardMessage
{
Id = reader.GetString(0),
Category = (StoreAndForwardCategory)reader.GetInt32(1),
Target = reader.GetString(2),
PayloadJson = reader.GetString(3),
RetryCount = reader.GetInt32(4),
MaxRetries = reader.GetInt32(5),
RetryIntervalMs = reader.GetInt64(6),
CreatedAt = DateTimeOffset.Parse(reader.GetString(7)),
LastAttemptAt = reader.IsDBNull(8) ? null : DateTimeOffset.Parse(reader.GetString(8)),
Status = (StoreAndForwardMessageStatus)reader.GetInt32(9),
LastError = reader.IsDBNull(10) ? null : reader.GetString(10),
OriginInstanceName = reader.IsDBNull(11) ? null : reader.GetString(11)
});
}
return results;
}
}