fix(deployment-manager): resolve DeploymentManager-015..017 — reconciliation applies post-success side effects, updates RevisionHash, corrected XML doc
This commit is contained in:
@@ -142,6 +142,10 @@ public class DeploymentService
|
||||
return Result<DeploymentRecord>.Failure($"Pre-deployment validation failed: {errors}");
|
||||
}
|
||||
|
||||
// Serialize for transmission (also the payload stored in the deployed
|
||||
// snapshot on success / reconciliation).
|
||||
var configJson = JsonSerializer.Serialize(flattenedConfig);
|
||||
|
||||
// DeploymentManager-006: query-the-site-before-redeploy idempotency.
|
||||
// If a prior deployment for this instance is stuck InProgress or Failed
|
||||
// due to a timeout, the site may have actually applied the config. Query
|
||||
@@ -150,13 +154,10 @@ public class DeploymentService
|
||||
// Idempotency"). A clean prior Success or a fresh first-time deploy
|
||||
// skips this extra round-trip.
|
||||
var reconciled = await TryReconcileWithSiteAsync(
|
||||
instance, revisionHash, cancellationToken);
|
||||
instance, revisionHash, configJson, cancellationToken);
|
||||
if (reconciled != null)
|
||||
return Result<DeploymentRecord>.Success(reconciled);
|
||||
|
||||
// Serialize for transmission
|
||||
var configJson = JsonSerializer.Serialize(flattenedConfig);
|
||||
|
||||
// WP-4: Create deployment record with Pending status
|
||||
var record = new DeploymentRecord(deploymentId, user)
|
||||
{
|
||||
@@ -210,25 +211,8 @@ public class DeploymentService
|
||||
// persistence below is best-effort: a failure here must be
|
||||
// logged loudly for operator reconciliation but must not flip
|
||||
// the already-committed Success record back to Failed.
|
||||
try
|
||||
{
|
||||
// WP-4: Update instance state to Enabled on successful deployment
|
||||
instance.State = InstanceState.Enabled;
|
||||
await _repository.UpdateInstanceAsync(instance, cancellationToken);
|
||||
|
||||
// WP-8: Store deployed config snapshot
|
||||
await StoreDeployedSnapshotAsync(instanceId, deploymentId, revisionHash, configJson, cancellationToken);
|
||||
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
catch (Exception postEx)
|
||||
{
|
||||
_logger.LogError(postEx,
|
||||
"Deployment {DeploymentId} for instance {Instance} was applied by the site and " +
|
||||
"recorded Success, but post-success persistence (instance state / config snapshot) " +
|
||||
"failed -- central and site state may diverge until reconciled",
|
||||
deploymentId, instance.UniqueName);
|
||||
}
|
||||
await ApplyPostSuccessSideEffectsAsync(
|
||||
instance, deploymentId, revisionHash, configJson, cancellationToken);
|
||||
}
|
||||
|
||||
// Audit log
|
||||
@@ -560,7 +544,12 @@ public class DeploymentService
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-2: After failover/timeout, query site for current deployment state before re-deploying.
|
||||
/// WP-2: Returns the current persisted <see cref="DeploymentRecord"/> for
|
||||
/// the given deployment ID from the configuration database. This is a pure
|
||||
/// local DB read — it does not contact the site. The query-the-site-before-
|
||||
/// redeploy reconciliation (design: "Deployment Identity & Idempotency")
|
||||
/// lives in <see cref="TryReconcileWithSiteAsync"/>, which
|
||||
/// <see cref="DeployInstanceAsync"/> invokes on the deploy path.
|
||||
/// </summary>
|
||||
public async Task<DeploymentRecord?> GetDeploymentStatusAsync(
|
||||
string deploymentId,
|
||||
@@ -580,9 +569,14 @@ public class DeploymentService
|
||||
/// prior <see cref="DeploymentStatus.Success"/> skip the extra round-trip.
|
||||
///
|
||||
/// Reconciliation: if the site already has the TARGET revision hash, the
|
||||
/// prior record is marked <see cref="DeploymentStatus.Success"/> and
|
||||
/// returned (the caller must NOT re-send the deploy). Otherwise <c>null</c>
|
||||
/// is returned and the normal deploy proceeds.
|
||||
/// prior record is marked <see cref="DeploymentStatus.Success"/> (with its
|
||||
/// <see cref="DeploymentRecord.RevisionHash"/> corrected to the target —
|
||||
/// DeploymentManager-016) and returned (the caller must NOT re-send the
|
||||
/// deploy). The same post-success side effects as the normal deploy path
|
||||
/// are applied — instance <see cref="InstanceState.Enabled"/> and a stored
|
||||
/// <see cref="DeployedConfigSnapshot"/> (DeploymentManager-015) — so central
|
||||
/// and site state do not diverge. Otherwise <c>null</c> is returned and the
|
||||
/// normal deploy proceeds.
|
||||
///
|
||||
/// Query failure: if the site is unreachable or the query times out, this
|
||||
/// returns <c>null</c> (fall through to a normal deploy) — site-side
|
||||
@@ -592,6 +586,7 @@ public class DeploymentService
|
||||
private async Task<DeploymentRecord?> TryReconcileWithSiteAsync(
|
||||
Instance instance,
|
||||
string targetRevisionHash,
|
||||
string configJson,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var prior = await _repository.GetCurrentDeploymentStatusAsync(instance.Id, cancellationToken);
|
||||
@@ -639,10 +634,23 @@ public class DeploymentService
|
||||
prior.Status = DeploymentStatus.Success;
|
||||
prior.ErrorMessage = null;
|
||||
prior.CompletedAt = DateTimeOffset.UtcNow;
|
||||
// DeploymentManager-016: the prior record can legitimately carry a
|
||||
// different (stale) revision hash than the current target. The site
|
||||
// confirmed it is running the target revision, so the persisted
|
||||
// record, the audit entry below, and the site must all agree.
|
||||
prior.RevisionHash = targetRevisionHash;
|
||||
await _repository.UpdateDeploymentRecordAsync(prior, cancellationToken);
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
NotifyStatusChange(prior);
|
||||
|
||||
// DeploymentManager-015: a reconciled deployment must perform the
|
||||
// SAME post-success side effects as the normal deploy path — set
|
||||
// the instance State to Enabled and store/refresh the deployed
|
||||
// config snapshot — otherwise the central state machine and the
|
||||
// deployed-snapshot invariant diverge from what the site is running.
|
||||
await ApplyPostSuccessSideEffectsAsync(
|
||||
instance, prior.DeploymentId, targetRevisionHash, configJson, cancellationToken);
|
||||
|
||||
await _auditService.LogAsync(prior.DeployedBy, "DeployReconciled", "Instance",
|
||||
instance.Id.ToString(), instance.UniqueName,
|
||||
new { DeploymentId = prior.DeploymentId, RevisionHash = targetRevisionHash },
|
||||
@@ -669,6 +677,47 @@ public class DeploymentService
|
||||
&& prior.ErrorMessage != null
|
||||
&& prior.ErrorMessage.StartsWith(TimeoutFailurePrefix, StringComparison.Ordinal));
|
||||
|
||||
/// <summary>
|
||||
/// Post-success side effects shared by the normal deploy path and the
|
||||
/// DeploymentManager-006 reconciliation path: set the instance
|
||||
/// <see cref="InstanceState.Enabled"/> (WP-4) and store/refresh the
|
||||
/// deployed config snapshot (WP-8). Factored into one helper so the two
|
||||
/// paths cannot drift (DeploymentManager-015).
|
||||
///
|
||||
/// Best-effort: the deployment record's terminal <see cref="DeploymentStatus.Success"/>
|
||||
/// status is already committed by the caller before this runs. A failure
|
||||
/// here is logged loudly for operator reconciliation but is NOT propagated —
|
||||
/// it must not flip the already-committed Success record back to Failed.
|
||||
/// </summary>
|
||||
private async Task ApplyPostSuccessSideEffectsAsync(
|
||||
Instance instance,
|
||||
string deploymentId,
|
||||
string revisionHash,
|
||||
string configJson,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
// WP-4: Update instance state to Enabled on successful deployment
|
||||
instance.State = InstanceState.Enabled;
|
||||
await _repository.UpdateInstanceAsync(instance, cancellationToken);
|
||||
|
||||
// WP-8: Store deployed config snapshot
|
||||
await StoreDeployedSnapshotAsync(
|
||||
instance.Id, deploymentId, revisionHash, configJson, cancellationToken);
|
||||
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
catch (Exception postEx)
|
||||
{
|
||||
_logger.LogError(postEx,
|
||||
"Deployment {DeploymentId} for instance {Instance} was applied by the site and " +
|
||||
"recorded Success, but post-success persistence (instance state / config snapshot) " +
|
||||
"failed -- central and site state may diverge until reconciled",
|
||||
deploymentId, instance.UniqueName);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task StoreDeployedSnapshotAsync(
|
||||
int instanceId,
|
||||
string deploymentId,
|
||||
|
||||
Reference in New Issue
Block a user