fix(deployment-manager): resolve DeploymentManager-009,010,012,014 — shared deployment ID, lifecycle-timeout enforcement, doc/test cleanup; DeploymentManager-013 flagged
This commit is contained in:
@@ -58,9 +58,17 @@ public class ArtifactDeploymentService
|
||||
/// Collects all artifact types from repositories and builds a <see cref="DeployArtifactsCommand"/>
|
||||
/// scoped to a specific site's data connections.
|
||||
/// </summary>
|
||||
/// <param name="siteId">The DB id of the site whose data connections are collected.</param>
|
||||
/// <param name="deploymentId">
|
||||
/// DeploymentManager-010: the logical deployment id for this artifact deployment. All per-site
|
||||
/// commands of one <see cref="DeployToAllSitesAsync"/> call share this id so the audit log,
|
||||
/// UI summary, and persisted record correlate. When <c>null</c> a fresh id is minted (used by
|
||||
/// single-site retries).
|
||||
/// </param>
|
||||
public async Task<DeployArtifactsCommand> BuildDeployArtifactsCommandAsync(
|
||||
int siteId,
|
||||
CancellationToken cancellationToken = default)
|
||||
CancellationToken cancellationToken = default,
|
||||
string? deploymentId = null)
|
||||
{
|
||||
var sharedScripts = await _templateRepo.GetAllSharedScriptsAsync(cancellationToken);
|
||||
var externalSystems = await _externalSystemRepo.GetAllExternalSystemsAsync(cancellationToken);
|
||||
@@ -111,7 +119,7 @@ public class ArtifactDeploymentService
|
||||
smtp.Credentials, null, smtp.TlsMode)).ToList();
|
||||
|
||||
return new DeployArtifactsCommand(
|
||||
Guid.NewGuid().ToString("N"),
|
||||
deploymentId ?? Guid.NewGuid().ToString("N"),
|
||||
scriptArtifacts,
|
||||
externalSystemArtifacts,
|
||||
dbConnectionArtifacts,
|
||||
@@ -136,11 +144,15 @@ public class ArtifactDeploymentService
|
||||
var deploymentId = Guid.NewGuid().ToString("N");
|
||||
var perSiteResults = new Dictionary<string, SiteArtifactResult>();
|
||||
|
||||
// Build per-site commands sequentially (DbContext is not thread-safe)
|
||||
// Build per-site commands sequentially (DbContext is not thread-safe).
|
||||
// DeploymentManager-010: every per-site command carries the SAME logical
|
||||
// deploymentId, so the per-site commands, audit log, persisted record,
|
||||
// and UI summary all reference one id instead of N+1 unrelated GUIDs.
|
||||
var siteCommands = new Dictionary<int, DeployArtifactsCommand>();
|
||||
foreach (var site in sites)
|
||||
{
|
||||
siteCommands[site.Id] = await BuildDeployArtifactsCommandAsync(site.Id, cancellationToken);
|
||||
siteCommands[site.Id] = await BuildDeployArtifactsCommandAsync(
|
||||
site.Id, cancellationToken, deploymentId);
|
||||
}
|
||||
|
||||
// Deploy to each site in parallel with per-site timeout
|
||||
@@ -190,11 +202,20 @@ public class ArtifactDeploymentService
|
||||
perSiteResults[result.SiteId] = result;
|
||||
}
|
||||
|
||||
// Persist the system artifact deployment record
|
||||
// Persist the system artifact deployment record.
|
||||
// DeploymentManager-010: SystemArtifactDeploymentRecord has no dedicated
|
||||
// DeploymentId column (adding one is a Commons/ConfigurationDatabase
|
||||
// schema change outside this module). The logical deploymentId is
|
||||
// embedded in the PerSiteStatus payload so the persisted record can be
|
||||
// correlated with the audit log and UI summary that report the same id.
|
||||
var record = new SystemArtifactDeploymentRecord("Artifacts", user)
|
||||
{
|
||||
DeployedAt = DateTimeOffset.UtcNow,
|
||||
PerSiteStatus = JsonSerializer.Serialize(perSiteResults)
|
||||
PerSiteStatus = JsonSerializer.Serialize(new
|
||||
{
|
||||
DeploymentId = deploymentId,
|
||||
Sites = perSiteResults
|
||||
})
|
||||
};
|
||||
await _deploymentRepo.AddSystemArtifactDeploymentAsync(record, cancellationToken);
|
||||
await _deploymentRepo.SaveChangesAsync(cancellationToken);
|
||||
|
||||
@@ -5,7 +5,11 @@ namespace ScadaLink.DeploymentManager;
|
||||
/// </summary>
|
||||
public class DeploymentManagerOptions
|
||||
{
|
||||
/// <summary>Timeout for lifecycle commands sent to sites (disable, enable, delete).</summary>
|
||||
/// <summary>
|
||||
/// WP-6: Timeout for a lifecycle command round-trip (disable, enable, delete).
|
||||
/// Applied as a linked-CTS deadline in <c>DeploymentService</c> so a hung or
|
||||
/// unreachable site does not hold the per-instance operation lock indefinitely.
|
||||
/// </summary>
|
||||
public TimeSpan LifecycleCommandTimeout { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>WP-7: Timeout per site for system-wide artifact deployment.</summary>
|
||||
|
||||
@@ -302,7 +302,21 @@ public class DeploymentService
|
||||
var siteId = await ResolveSiteIdentifierAsync(instance.SiteId, cancellationToken);
|
||||
var command = new DisableInstanceCommand(commandId, instance.UniqueName, DateTimeOffset.UtcNow);
|
||||
|
||||
var response = await _communicationService.DisableInstanceAsync(siteId, command, cancellationToken);
|
||||
// WP-6: bound the round-trip with the configured lifecycle timeout so a
|
||||
// hung/unreachable site does not block the operation lock indefinitely.
|
||||
InstanceLifecycleResponse response;
|
||||
try
|
||||
{
|
||||
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
cts.CancelAfter(_options.LifecycleCommandTimeout);
|
||||
response = await _communicationService.DisableInstanceAsync(siteId, command, cts.Token);
|
||||
}
|
||||
catch (Exception ex) when (ex is TimeoutException or OperationCanceledException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Disable of instance {Instance} timed out", instance.UniqueName);
|
||||
return Result<InstanceLifecycleResponse>.Failure(
|
||||
$"Disable failed: the site did not respond within {_options.LifecycleCommandTimeout}.");
|
||||
}
|
||||
|
||||
if (response.Success)
|
||||
{
|
||||
@@ -343,7 +357,20 @@ public class DeploymentService
|
||||
var siteId = await ResolveSiteIdentifierAsync(instance.SiteId, cancellationToken);
|
||||
var command = new EnableInstanceCommand(commandId, instance.UniqueName, DateTimeOffset.UtcNow);
|
||||
|
||||
var response = await _communicationService.EnableInstanceAsync(siteId, command, cancellationToken);
|
||||
// WP-6: bound the round-trip with the configured lifecycle timeout.
|
||||
InstanceLifecycleResponse response;
|
||||
try
|
||||
{
|
||||
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
cts.CancelAfter(_options.LifecycleCommandTimeout);
|
||||
response = await _communicationService.EnableInstanceAsync(siteId, command, cts.Token);
|
||||
}
|
||||
catch (Exception ex) when (ex is TimeoutException or OperationCanceledException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Enable of instance {Instance} timed out", instance.UniqueName);
|
||||
return Result<InstanceLifecycleResponse>.Failure(
|
||||
$"Enable failed: the site did not respond within {_options.LifecycleCommandTimeout}.");
|
||||
}
|
||||
|
||||
if (response.Success)
|
||||
{
|
||||
@@ -365,7 +392,9 @@ public class DeploymentService
|
||||
/// WP-6: Delete an instance. Stops the site actor, removes site config, and
|
||||
/// removes the central instance record (deployment history, snapshot,
|
||||
/// overrides, and connection bindings go with it). S&F NOT cleared.
|
||||
/// Delete fails if site unreachable (30s timeout via CommunicationOptions).
|
||||
/// Delete fails if the site is unreachable within
|
||||
/// <c>CommunicationOptions.LifecycleTimeout</c> (applied inside
|
||||
/// <see cref="CommunicationService.DeleteInstanceAsync"/>).
|
||||
/// </summary>
|
||||
public async Task<Result<InstanceLifecycleResponse>> DeleteInstanceAsync(
|
||||
int instanceId,
|
||||
@@ -387,7 +416,20 @@ public class DeploymentService
|
||||
var siteId = await ResolveSiteIdentifierAsync(instance.SiteId, cancellationToken);
|
||||
var command = new DeleteInstanceCommand(commandId, instance.UniqueName, DateTimeOffset.UtcNow);
|
||||
|
||||
var response = await _communicationService.DeleteInstanceAsync(siteId, command, cancellationToken);
|
||||
// WP-6: bound the round-trip with the configured lifecycle timeout.
|
||||
InstanceLifecycleResponse response;
|
||||
try
|
||||
{
|
||||
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
cts.CancelAfter(_options.LifecycleCommandTimeout);
|
||||
response = await _communicationService.DeleteInstanceAsync(siteId, command, cts.Token);
|
||||
}
|
||||
catch (Exception ex) when (ex is TimeoutException or OperationCanceledException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Delete of instance {Instance} timed out", instance.UniqueName);
|
||||
return Result<InstanceLifecycleResponse>.Failure(
|
||||
$"Delete failed: the site did not respond within {_options.LifecycleCommandTimeout}.");
|
||||
}
|
||||
|
||||
if (response.Success)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user