Phase 3C: Deployment pipeline & Store-and-Forward engine
Deployment Manager (WP-1–8, WP-16): - DeploymentService: full pipeline (flatten→validate→send→track→audit) - OperationLockManager: per-instance concurrency control - StateTransitionValidator: Enabled/Disabled/NotDeployed transition matrix - ArtifactDeploymentService: broadcast to all sites with per-site results - Deployment identity (GUID + revision hash), idempotency, staleness detection - Instance lifecycle commands (disable/enable/delete) with deduplication Store-and-Forward (WP-9–15): - StoreAndForwardStorage: SQLite persistence, 3 categories, no max buffer - StoreAndForwardService: fixed-interval retry, transient-only buffering, parking - ReplicationService: async best-effort to standby (fire-and-forget) - Parked message management (query/retry/discard from central) - Messages survive instance deletion, S&F drains on disable 620 tests pass (+79 new), zero warnings.
This commit is contained in:
@@ -0,0 +1,393 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.Commons.Entities.Deployment;
|
||||
using ScadaLink.Commons.Entities.Instances;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
using ScadaLink.Commons.Messages.Deployment;
|
||||
using ScadaLink.Commons.Messages.Lifecycle;
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using ScadaLink.Commons.Types.Flattening;
|
||||
using ScadaLink.Communication;
|
||||
using ScadaLink.TemplateEngine.Flattening;
|
||||
using ScadaLink.TemplateEngine.Validation;
|
||||
|
||||
namespace ScadaLink.DeploymentManager;
|
||||
|
||||
/// <summary>
|
||||
/// WP-1: Central-side deployment orchestration service.
|
||||
/// Coordinates the full deployment pipeline:
|
||||
/// 1. Validate instance state transition (WP-4)
|
||||
/// 2. Acquire per-instance operation lock (WP-3)
|
||||
/// 3. Flatten configuration via TemplateEngine (captures template state at time of flatten -- WP-16)
|
||||
/// 4. Validate flattened configuration
|
||||
/// 5. Compute revision hash and diff
|
||||
/// 6. Send DeployInstanceCommand to site via CommunicationService
|
||||
/// 7. Track deployment status with optimistic concurrency (WP-4)
|
||||
/// 8. Store deployed config snapshot (WP-8)
|
||||
/// 9. Audit log all actions
|
||||
///
|
||||
/// WP-2: Each deployment has a unique deployment ID (GUID) + revision hash.
|
||||
/// WP-16: Template state captured at flatten time -- last-write-wins on templates is safe.
|
||||
/// </summary>
|
||||
public class DeploymentService
|
||||
{
|
||||
private readonly IDeploymentManagerRepository _repository;
|
||||
private readonly IFlatteningPipeline _flatteningPipeline;
|
||||
private readonly CommunicationService _communicationService;
|
||||
private readonly OperationLockManager _lockManager;
|
||||
private readonly IAuditService _auditService;
|
||||
private readonly DeploymentManagerOptions _options;
|
||||
private readonly ILogger<DeploymentService> _logger;
|
||||
|
||||
public DeploymentService(
|
||||
IDeploymentManagerRepository repository,
|
||||
IFlatteningPipeline flatteningPipeline,
|
||||
CommunicationService communicationService,
|
||||
OperationLockManager lockManager,
|
||||
IAuditService auditService,
|
||||
IOptions<DeploymentManagerOptions> options,
|
||||
ILogger<DeploymentService> logger)
|
||||
{
|
||||
_repository = repository;
|
||||
_flatteningPipeline = flatteningPipeline;
|
||||
_communicationService = communicationService;
|
||||
_lockManager = lockManager;
|
||||
_auditService = auditService;
|
||||
_options = options.Value;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-1: Deploy an instance to its site.
|
||||
/// WP-2: Generates unique deployment ID, computes revision hash.
|
||||
/// WP-4: Validates state transitions, uses optimistic concurrency.
|
||||
/// WP-5: Site-side apply is all-or-nothing (handled by DeploymentManagerActor).
|
||||
/// WP-8: Stores deployed config snapshot on success.
|
||||
/// WP-16: Captures template state at time of flatten.
|
||||
/// </summary>
|
||||
public async Task<Result<DeploymentRecord>> DeployInstanceAsync(
|
||||
int instanceId,
|
||||
string user,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Load instance
|
||||
var instance = await _repository.GetInstanceByIdAsync(instanceId, cancellationToken);
|
||||
if (instance == null)
|
||||
return Result<DeploymentRecord>.Failure($"Instance with ID {instanceId} not found.");
|
||||
|
||||
// WP-4: Validate state transition
|
||||
var transitionError = StateTransitionValidator.ValidateTransition(instance.State, "deploy");
|
||||
if (transitionError != null)
|
||||
return Result<DeploymentRecord>.Failure(transitionError);
|
||||
|
||||
// WP-3: Acquire per-instance operation lock
|
||||
using var lockHandle = await _lockManager.AcquireAsync(
|
||||
instance.UniqueName, _options.OperationLockTimeout, cancellationToken);
|
||||
|
||||
// WP-2: Generate unique deployment ID
|
||||
var deploymentId = Guid.NewGuid().ToString("N");
|
||||
|
||||
// WP-1/16: Flatten configuration (captures template state at this point in time)
|
||||
var flattenResult = await _flatteningPipeline.FlattenAndValidateAsync(instanceId, cancellationToken);
|
||||
if (flattenResult.IsFailure)
|
||||
return Result<DeploymentRecord>.Failure($"Validation failed: {flattenResult.Error}");
|
||||
|
||||
var flattenedConfig = flattenResult.Value.Configuration;
|
||||
var revisionHash = flattenResult.Value.RevisionHash;
|
||||
var validationResult = flattenResult.Value.Validation;
|
||||
|
||||
if (!validationResult.IsValid)
|
||||
{
|
||||
var errors = string.Join("; ", validationResult.Errors.Select(e => e.Message));
|
||||
return Result<DeploymentRecord>.Failure($"Pre-deployment validation failed: {errors}");
|
||||
}
|
||||
|
||||
// Serialize for transmission
|
||||
var configJson = JsonSerializer.Serialize(flattenedConfig);
|
||||
|
||||
// WP-4: Create deployment record with Pending status
|
||||
var record = new DeploymentRecord(deploymentId, user)
|
||||
{
|
||||
InstanceId = instanceId,
|
||||
Status = DeploymentStatus.Pending,
|
||||
RevisionHash = revisionHash,
|
||||
DeployedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
|
||||
await _repository.AddDeploymentRecordAsync(record, cancellationToken);
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
|
||||
// Update status to InProgress
|
||||
record.Status = DeploymentStatus.InProgress;
|
||||
await _repository.UpdateDeploymentRecordAsync(record, cancellationToken);
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
|
||||
try
|
||||
{
|
||||
// WP-1: Send to site via CommunicationService
|
||||
var siteId = instance.SiteId.ToString();
|
||||
var command = new DeployInstanceCommand(
|
||||
deploymentId, instance.UniqueName, revisionHash, configJson, user, DateTimeOffset.UtcNow);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Sending deployment {DeploymentId} for instance {Instance} to site {SiteId}",
|
||||
deploymentId, instance.UniqueName, siteId);
|
||||
|
||||
var response = await _communicationService.DeployInstanceAsync(siteId, command, cancellationToken);
|
||||
|
||||
// WP-1: Update status based on site response
|
||||
record.Status = response.Status;
|
||||
record.ErrorMessage = response.ErrorMessage;
|
||||
record.CompletedAt = DateTimeOffset.UtcNow;
|
||||
await _repository.UpdateDeploymentRecordAsync(record, cancellationToken);
|
||||
|
||||
if (response.Status == DeploymentStatus.Success)
|
||||
{
|
||||
// WP-4: Update instance state to Enabled on successful deployment
|
||||
instance.State = InstanceState.Enabled;
|
||||
await _repository.UpdateInstanceAsync(instance, cancellationToken);
|
||||
|
||||
// WP-8: Store deployed config snapshot
|
||||
await StoreDeployedSnapshotAsync(instanceId, deploymentId, revisionHash, configJson, cancellationToken);
|
||||
}
|
||||
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
|
||||
// Audit log
|
||||
await _auditService.LogAsync(user, "Deploy", "Instance", instanceId.ToString(),
|
||||
instance.UniqueName, new { DeploymentId = deploymentId, Status = record.Status.ToString() },
|
||||
cancellationToken);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Deployment {DeploymentId} for instance {Instance}: {Status}",
|
||||
deploymentId, instance.UniqueName, record.Status);
|
||||
|
||||
return record.Status == DeploymentStatus.Success
|
||||
? Result<DeploymentRecord>.Success(record)
|
||||
: Result<DeploymentRecord>.Failure(
|
||||
$"Deployment failed: {response.ErrorMessage ?? "Unknown error"}");
|
||||
}
|
||||
catch (Exception ex) when (ex is TimeoutException or OperationCanceledException)
|
||||
{
|
||||
record.Status = DeploymentStatus.Failed;
|
||||
record.ErrorMessage = $"Communication failure: {ex.Message}";
|
||||
record.CompletedAt = DateTimeOffset.UtcNow;
|
||||
await _repository.UpdateDeploymentRecordAsync(record, cancellationToken);
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
|
||||
await _auditService.LogAsync(user, "DeployFailed", "Instance", instanceId.ToString(),
|
||||
instance.UniqueName, new { DeploymentId = deploymentId, Error = ex.Message },
|
||||
cancellationToken);
|
||||
|
||||
return Result<DeploymentRecord>.Failure($"Deployment timed out: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-6: Disable an instance. Stops Instance Actor, retains config, S&F drains.
|
||||
/// </summary>
|
||||
public async Task<Result<InstanceLifecycleResponse>> DisableInstanceAsync(
|
||||
int instanceId,
|
||||
string user,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var instance = await _repository.GetInstanceByIdAsync(instanceId, cancellationToken);
|
||||
if (instance == null)
|
||||
return Result<InstanceLifecycleResponse>.Failure($"Instance with ID {instanceId} not found.");
|
||||
|
||||
var transitionError = StateTransitionValidator.ValidateTransition(instance.State, "disable");
|
||||
if (transitionError != null)
|
||||
return Result<InstanceLifecycleResponse>.Failure(transitionError);
|
||||
|
||||
using var lockHandle = await _lockManager.AcquireAsync(
|
||||
instance.UniqueName, _options.OperationLockTimeout, cancellationToken);
|
||||
|
||||
var commandId = Guid.NewGuid().ToString("N");
|
||||
var siteId = instance.SiteId.ToString();
|
||||
var command = new DisableInstanceCommand(commandId, instance.UniqueName, DateTimeOffset.UtcNow);
|
||||
|
||||
var response = await _communicationService.DisableInstanceAsync(siteId, command, cancellationToken);
|
||||
|
||||
if (response.Success)
|
||||
{
|
||||
instance.State = InstanceState.Disabled;
|
||||
await _repository.UpdateInstanceAsync(instance, cancellationToken);
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
await _auditService.LogAsync(user, "Disable", "Instance", instanceId.ToString(),
|
||||
instance.UniqueName, new { CommandId = commandId, response.Success },
|
||||
cancellationToken);
|
||||
|
||||
return response.Success
|
||||
? Result<InstanceLifecycleResponse>.Success(response)
|
||||
: Result<InstanceLifecycleResponse>.Failure(response.ErrorMessage ?? "Disable failed.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-6: Enable an instance. Re-creates Instance Actor from stored config.
|
||||
/// </summary>
|
||||
public async Task<Result<InstanceLifecycleResponse>> EnableInstanceAsync(
|
||||
int instanceId,
|
||||
string user,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var instance = await _repository.GetInstanceByIdAsync(instanceId, cancellationToken);
|
||||
if (instance == null)
|
||||
return Result<InstanceLifecycleResponse>.Failure($"Instance with ID {instanceId} not found.");
|
||||
|
||||
var transitionError = StateTransitionValidator.ValidateTransition(instance.State, "enable");
|
||||
if (transitionError != null)
|
||||
return Result<InstanceLifecycleResponse>.Failure(transitionError);
|
||||
|
||||
using var lockHandle = await _lockManager.AcquireAsync(
|
||||
instance.UniqueName, _options.OperationLockTimeout, cancellationToken);
|
||||
|
||||
var commandId = Guid.NewGuid().ToString("N");
|
||||
var siteId = instance.SiteId.ToString();
|
||||
var command = new EnableInstanceCommand(commandId, instance.UniqueName, DateTimeOffset.UtcNow);
|
||||
|
||||
var response = await _communicationService.EnableInstanceAsync(siteId, command, cancellationToken);
|
||||
|
||||
if (response.Success)
|
||||
{
|
||||
instance.State = InstanceState.Enabled;
|
||||
await _repository.UpdateInstanceAsync(instance, cancellationToken);
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
await _auditService.LogAsync(user, "Enable", "Instance", instanceId.ToString(),
|
||||
instance.UniqueName, new { CommandId = commandId, response.Success },
|
||||
cancellationToken);
|
||||
|
||||
return response.Success
|
||||
? Result<InstanceLifecycleResponse>.Success(response)
|
||||
: Result<InstanceLifecycleResponse>.Failure(response.ErrorMessage ?? "Enable failed.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-6: Delete an instance. Stops actor, removes config. S&F NOT cleared.
|
||||
/// Delete fails if site unreachable (30s timeout via CommunicationOptions).
|
||||
/// </summary>
|
||||
public async Task<Result<InstanceLifecycleResponse>> DeleteInstanceAsync(
|
||||
int instanceId,
|
||||
string user,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var instance = await _repository.GetInstanceByIdAsync(instanceId, cancellationToken);
|
||||
if (instance == null)
|
||||
return Result<InstanceLifecycleResponse>.Failure($"Instance with ID {instanceId} not found.");
|
||||
|
||||
var transitionError = StateTransitionValidator.ValidateTransition(instance.State, "delete");
|
||||
if (transitionError != null)
|
||||
return Result<InstanceLifecycleResponse>.Failure(transitionError);
|
||||
|
||||
using var lockHandle = await _lockManager.AcquireAsync(
|
||||
instance.UniqueName, _options.OperationLockTimeout, cancellationToken);
|
||||
|
||||
var commandId = Guid.NewGuid().ToString("N");
|
||||
var siteId = instance.SiteId.ToString();
|
||||
var command = new DeleteInstanceCommand(commandId, instance.UniqueName, DateTimeOffset.UtcNow);
|
||||
|
||||
var response = await _communicationService.DeleteInstanceAsync(siteId, command, cancellationToken);
|
||||
|
||||
if (response.Success)
|
||||
{
|
||||
// Remove deployed snapshot
|
||||
await _repository.DeleteDeployedSnapshotAsync(instanceId, cancellationToken);
|
||||
|
||||
// Set state to NotDeployed (or the instance record could be deleted entirely by higher layers)
|
||||
instance.State = InstanceState.NotDeployed;
|
||||
await _repository.UpdateInstanceAsync(instance, cancellationToken);
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
await _auditService.LogAsync(user, "Delete", "Instance", instanceId.ToString(),
|
||||
instance.UniqueName, new { CommandId = commandId, response.Success },
|
||||
cancellationToken);
|
||||
|
||||
return response.Success
|
||||
? Result<InstanceLifecycleResponse>.Success(response)
|
||||
: Result<InstanceLifecycleResponse>.Failure(
|
||||
response.ErrorMessage ?? "Delete failed. Site may be unreachable.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-8: Get the deployed config snapshot and compare with current template-derived state.
|
||||
/// </summary>
|
||||
public async Task<Result<DeploymentComparisonResult>> GetDeploymentComparisonAsync(
|
||||
int instanceId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var snapshot = await _repository.GetDeployedSnapshotByInstanceIdAsync(instanceId, cancellationToken);
|
||||
if (snapshot == null)
|
||||
return Result<DeploymentComparisonResult>.Failure("No deployed snapshot found for this instance.");
|
||||
|
||||
// Compute current template-derived config
|
||||
var currentResult = await _flatteningPipeline.FlattenAndValidateAsync(instanceId, cancellationToken);
|
||||
if (currentResult.IsFailure)
|
||||
return Result<DeploymentComparisonResult>.Failure($"Cannot compute current config: {currentResult.Error}");
|
||||
|
||||
var currentHash = currentResult.Value.RevisionHash;
|
||||
var isStale = snapshot.RevisionHash != currentHash;
|
||||
|
||||
var result = new DeploymentComparisonResult(
|
||||
instanceId,
|
||||
snapshot.RevisionHash,
|
||||
currentHash,
|
||||
isStale,
|
||||
snapshot.DeployedAt);
|
||||
|
||||
return Result<DeploymentComparisonResult>.Success(result);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-2: After failover/timeout, query site for current deployment state before re-deploying.
|
||||
/// </summary>
|
||||
public async Task<DeploymentRecord?> GetDeploymentStatusAsync(
|
||||
string deploymentId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await _repository.GetDeploymentByDeploymentIdAsync(deploymentId, cancellationToken);
|
||||
}
|
||||
|
||||
private async Task StoreDeployedSnapshotAsync(
|
||||
int instanceId,
|
||||
string deploymentId,
|
||||
string revisionHash,
|
||||
string configJson,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var existing = await _repository.GetDeployedSnapshotByInstanceIdAsync(instanceId, cancellationToken);
|
||||
if (existing != null)
|
||||
{
|
||||
existing.DeploymentId = deploymentId;
|
||||
existing.RevisionHash = revisionHash;
|
||||
existing.ConfigurationJson = configJson;
|
||||
existing.DeployedAt = DateTimeOffset.UtcNow;
|
||||
await _repository.UpdateDeployedSnapshotAsync(existing, cancellationToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
var snapshot = new DeployedConfigSnapshot(deploymentId, revisionHash, configJson)
|
||||
{
|
||||
InstanceId = instanceId,
|
||||
DeployedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
await _repository.AddDeployedSnapshotAsync(snapshot, cancellationToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-8: Result of comparing deployed vs template-derived configuration.
|
||||
/// </summary>
|
||||
public record DeploymentComparisonResult(
|
||||
int InstanceId,
|
||||
string DeployedRevisionHash,
|
||||
string CurrentRevisionHash,
|
||||
bool IsStale,
|
||||
DateTimeOffset DeployedAt);
|
||||
Reference in New Issue
Block a user