fix(deployment-manager): resolve DeploymentManager-015..017 — reconciliation applies post-success side effects, updates RevisionHash, corrected XML doc
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
| Last reviewed | 2026-05-17 |
|
||||
| Reviewer | claude-agent |
|
||||
| Commit reviewed | `39d737e` |
|
||||
| Open findings | 3 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -740,7 +740,7 @@ covers DeploymentManager-010), `DeployToAllSitesAsync_PartialFailure_ReportsPerS
|
||||
|--|--|
|
||||
| Severity | High |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.DeploymentManager/DeploymentService.cs:631-655` |
|
||||
|
||||
**Description**
|
||||
@@ -785,7 +785,15 @@ reconciled deployment leaves the instance `Enabled` and a snapshot stored.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-17 (commit pending): extracted the shared post-success side
|
||||
effects into `ApplyPostSuccessSideEffectsAsync` (sets instance `State =
|
||||
Enabled` + `UpdateInstanceAsync`, stores/refreshes the `DeployedConfigSnapshot`)
|
||||
and invoked it from both the normal deploy success path and the
|
||||
`TryReconcileWithSiteAsync` reconciled-success branch, so a reconciled
|
||||
deployment now performs the same instance-state and snapshot updates as a
|
||||
normal one (`configJson` is now computed before the reconciliation call and
|
||||
threaded into `TryReconcileWithSiteAsync`). Regression test:
|
||||
`DeployInstanceAsync_Reconciled_SetsInstanceEnabledAndStoresSnapshot`.
|
||||
|
||||
### DeploymentManager-016 — Reconciled prior record keeps its stale `RevisionHash`
|
||||
|
||||
@@ -793,7 +801,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Medium |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.DeploymentManager/DeploymentService.cs:639-651` |
|
||||
|
||||
**Description**
|
||||
@@ -824,7 +832,11 @@ normal deploy.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-17 (commit pending): the reconciled-success branch of
|
||||
`TryReconcileWithSiteAsync` now also sets `prior.RevisionHash =
|
||||
targetRevisionHash`, so the persisted record, the `DeployReconciled` audit
|
||||
entry, and the site's actually-applied revision all agree. Regression test:
|
||||
`DeployInstanceAsync_Reconciled_PriorRecordRevisionHashUpdatedToTarget`.
|
||||
|
||||
### DeploymentManager-017 — `GetDeploymentStatusAsync` XML doc describes behaviour it does not implement
|
||||
|
||||
@@ -832,7 +844,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.DeploymentManager/DeploymentService.cs:562-570` |
|
||||
|
||||
**Description**
|
||||
@@ -855,4 +867,9 @@ configuration database" — and, if useful, cross-reference
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-17 (commit pending): the `GetDeploymentStatusAsync` XML doc
|
||||
now states it returns the persisted `DeploymentRecord` from the configuration
|
||||
database as a pure local read, and cross-references `TryReconcileWithSiteAsync`
|
||||
as where the query-the-site-before-redeploy reconciliation actually lives.
|
||||
Documentation-only change; no regression test (a test asserting comment text
|
||||
would be meaningless).
|
||||
|
||||
@@ -142,6 +142,10 @@ public class DeploymentService
|
||||
return Result<DeploymentRecord>.Failure($"Pre-deployment validation failed: {errors}");
|
||||
}
|
||||
|
||||
// Serialize for transmission (also the payload stored in the deployed
|
||||
// snapshot on success / reconciliation).
|
||||
var configJson = JsonSerializer.Serialize(flattenedConfig);
|
||||
|
||||
// DeploymentManager-006: query-the-site-before-redeploy idempotency.
|
||||
// If a prior deployment for this instance is stuck InProgress or Failed
|
||||
// due to a timeout, the site may have actually applied the config. Query
|
||||
@@ -150,13 +154,10 @@ public class DeploymentService
|
||||
// Idempotency"). A clean prior Success or a fresh first-time deploy
|
||||
// skips this extra round-trip.
|
||||
var reconciled = await TryReconcileWithSiteAsync(
|
||||
instance, revisionHash, cancellationToken);
|
||||
instance, revisionHash, configJson, cancellationToken);
|
||||
if (reconciled != null)
|
||||
return Result<DeploymentRecord>.Success(reconciled);
|
||||
|
||||
// Serialize for transmission
|
||||
var configJson = JsonSerializer.Serialize(flattenedConfig);
|
||||
|
||||
// WP-4: Create deployment record with Pending status
|
||||
var record = new DeploymentRecord(deploymentId, user)
|
||||
{
|
||||
@@ -210,25 +211,8 @@ public class DeploymentService
|
||||
// persistence below is best-effort: a failure here must be
|
||||
// logged loudly for operator reconciliation but must not flip
|
||||
// the already-committed Success record back to Failed.
|
||||
try
|
||||
{
|
||||
// WP-4: Update instance state to Enabled on successful deployment
|
||||
instance.State = InstanceState.Enabled;
|
||||
await _repository.UpdateInstanceAsync(instance, cancellationToken);
|
||||
|
||||
// WP-8: Store deployed config snapshot
|
||||
await StoreDeployedSnapshotAsync(instanceId, deploymentId, revisionHash, configJson, cancellationToken);
|
||||
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
catch (Exception postEx)
|
||||
{
|
||||
_logger.LogError(postEx,
|
||||
"Deployment {DeploymentId} for instance {Instance} was applied by the site and " +
|
||||
"recorded Success, but post-success persistence (instance state / config snapshot) " +
|
||||
"failed -- central and site state may diverge until reconciled",
|
||||
deploymentId, instance.UniqueName);
|
||||
}
|
||||
await ApplyPostSuccessSideEffectsAsync(
|
||||
instance, deploymentId, revisionHash, configJson, cancellationToken);
|
||||
}
|
||||
|
||||
// Audit log
|
||||
@@ -560,7 +544,12 @@ public class DeploymentService
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-2: After failover/timeout, query site for current deployment state before re-deploying.
|
||||
/// WP-2: Returns the current persisted <see cref="DeploymentRecord"/> for
|
||||
/// the given deployment ID from the configuration database. This is a pure
|
||||
/// local DB read — it does not contact the site. The query-the-site-before-
|
||||
/// redeploy reconciliation (design: "Deployment Identity & Idempotency")
|
||||
/// lives in <see cref="TryReconcileWithSiteAsync"/>, which
|
||||
/// <see cref="DeployInstanceAsync"/> invokes on the deploy path.
|
||||
/// </summary>
|
||||
public async Task<DeploymentRecord?> GetDeploymentStatusAsync(
|
||||
string deploymentId,
|
||||
@@ -580,9 +569,14 @@ public class DeploymentService
|
||||
/// prior <see cref="DeploymentStatus.Success"/> skip the extra round-trip.
|
||||
///
|
||||
/// Reconciliation: if the site already has the TARGET revision hash, the
|
||||
/// prior record is marked <see cref="DeploymentStatus.Success"/> and
|
||||
/// returned (the caller must NOT re-send the deploy). Otherwise <c>null</c>
|
||||
/// is returned and the normal deploy proceeds.
|
||||
/// prior record is marked <see cref="DeploymentStatus.Success"/> (with its
|
||||
/// <see cref="DeploymentRecord.RevisionHash"/> corrected to the target —
|
||||
/// DeploymentManager-016) and returned (the caller must NOT re-send the
|
||||
/// deploy). The same post-success side effects as the normal deploy path
|
||||
/// are applied — instance <see cref="InstanceState.Enabled"/> and a stored
|
||||
/// <see cref="DeployedConfigSnapshot"/> (DeploymentManager-015) — so central
|
||||
/// and site state do not diverge. Otherwise <c>null</c> is returned and the
|
||||
/// normal deploy proceeds.
|
||||
///
|
||||
/// Query failure: if the site is unreachable or the query times out, this
|
||||
/// returns <c>null</c> (fall through to a normal deploy) — site-side
|
||||
@@ -592,6 +586,7 @@ public class DeploymentService
|
||||
private async Task<DeploymentRecord?> TryReconcileWithSiteAsync(
|
||||
Instance instance,
|
||||
string targetRevisionHash,
|
||||
string configJson,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var prior = await _repository.GetCurrentDeploymentStatusAsync(instance.Id, cancellationToken);
|
||||
@@ -639,10 +634,23 @@ public class DeploymentService
|
||||
prior.Status = DeploymentStatus.Success;
|
||||
prior.ErrorMessage = null;
|
||||
prior.CompletedAt = DateTimeOffset.UtcNow;
|
||||
// DeploymentManager-016: the prior record can legitimately carry a
|
||||
// different (stale) revision hash than the current target. The site
|
||||
// confirmed it is running the target revision, so the persisted
|
||||
// record, the audit entry below, and the site must all agree.
|
||||
prior.RevisionHash = targetRevisionHash;
|
||||
await _repository.UpdateDeploymentRecordAsync(prior, cancellationToken);
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
NotifyStatusChange(prior);
|
||||
|
||||
// DeploymentManager-015: a reconciled deployment must perform the
|
||||
// SAME post-success side effects as the normal deploy path — set
|
||||
// the instance State to Enabled and store/refresh the deployed
|
||||
// config snapshot — otherwise the central state machine and the
|
||||
// deployed-snapshot invariant diverge from what the site is running.
|
||||
await ApplyPostSuccessSideEffectsAsync(
|
||||
instance, prior.DeploymentId, targetRevisionHash, configJson, cancellationToken);
|
||||
|
||||
await _auditService.LogAsync(prior.DeployedBy, "DeployReconciled", "Instance",
|
||||
instance.Id.ToString(), instance.UniqueName,
|
||||
new { DeploymentId = prior.DeploymentId, RevisionHash = targetRevisionHash },
|
||||
@@ -669,6 +677,47 @@ public class DeploymentService
|
||||
&& prior.ErrorMessage != null
|
||||
&& prior.ErrorMessage.StartsWith(TimeoutFailurePrefix, StringComparison.Ordinal));
|
||||
|
||||
/// <summary>
|
||||
/// Post-success side effects shared by the normal deploy path and the
|
||||
/// DeploymentManager-006 reconciliation path: set the instance
|
||||
/// <see cref="InstanceState.Enabled"/> (WP-4) and store/refresh the
|
||||
/// deployed config snapshot (WP-8). Factored into one helper so the two
|
||||
/// paths cannot drift (DeploymentManager-015).
|
||||
///
|
||||
/// Best-effort: the deployment record's terminal <see cref="DeploymentStatus.Success"/>
|
||||
/// status is already committed by the caller before this runs. A failure
|
||||
/// here is logged loudly for operator reconciliation but is NOT propagated —
|
||||
/// it must not flip the already-committed Success record back to Failed.
|
||||
/// </summary>
|
||||
private async Task ApplyPostSuccessSideEffectsAsync(
|
||||
Instance instance,
|
||||
string deploymentId,
|
||||
string revisionHash,
|
||||
string configJson,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
// WP-4: Update instance state to Enabled on successful deployment
|
||||
instance.State = InstanceState.Enabled;
|
||||
await _repository.UpdateInstanceAsync(instance, cancellationToken);
|
||||
|
||||
// WP-8: Store deployed config snapshot
|
||||
await StoreDeployedSnapshotAsync(
|
||||
instance.Id, deploymentId, revisionHash, configJson, cancellationToken);
|
||||
|
||||
await _repository.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
catch (Exception postEx)
|
||||
{
|
||||
_logger.LogError(postEx,
|
||||
"Deployment {DeploymentId} for instance {Instance} was applied by the site and " +
|
||||
"recorded Success, but post-success persistence (instance state / config snapshot) " +
|
||||
"failed -- central and site state may diverge until reconciled",
|
||||
deploymentId, instance.UniqueName);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task StoreDeployedSnapshotAsync(
|
||||
int instanceId,
|
||||
string deploymentId,
|
||||
|
||||
@@ -766,6 +766,99 @@ public class DeploymentServiceTests : TestKit
|
||||
Assert.Equal(1, ReconcileProbeActor.DeployCount);
|
||||
}
|
||||
|
||||
// ── DeploymentManager-015: reconciliation must perform the normal success side effects ──
|
||||
|
||||
[Fact]
|
||||
public async Task DeployInstanceAsync_Reconciled_SetsInstanceEnabledAndStoresSnapshot()
|
||||
{
|
||||
// A prior deploy timed out; the site actually applied the target
|
||||
// revision. Reconciliation marks the prior record Success WITHOUT
|
||||
// re-sending -- but it must still perform the same side effects as the
|
||||
// normal success path: set the instance State to Enabled and store the
|
||||
// deployed-config snapshot. Otherwise central and site diverge.
|
||||
var instance = new Instance("ReconcileSideEffects")
|
||||
{
|
||||
Id = 70, SiteId = 1, State = InstanceState.NotDeployed
|
||||
};
|
||||
_repo.GetInstanceByIdAsync(70, Arg.Any<CancellationToken>()).Returns(instance);
|
||||
SetupValidPipeline(70, "ReconcileSideEffects", "sha256:target");
|
||||
|
||||
var prior = new DeploymentRecord("dep-prior-70", "admin")
|
||||
{
|
||||
InstanceId = 70,
|
||||
Status = DeploymentStatus.InProgress,
|
||||
RevisionHash = "sha256:target"
|
||||
};
|
||||
_repo.GetCurrentDeploymentStatusAsync(70, Arg.Any<CancellationToken>()).Returns(prior);
|
||||
_repo.GetDeployedSnapshotByInstanceIdAsync(70, Arg.Any<CancellationToken>())
|
||||
.Returns((DeployedConfigSnapshot?)null);
|
||||
|
||||
DeployedConfigSnapshot? storedSnapshot = null;
|
||||
await _repo.AddDeployedSnapshotAsync(
|
||||
Arg.Do<DeployedConfigSnapshot>(s => storedSnapshot = s), Arg.Any<CancellationToken>());
|
||||
|
||||
var commActor = Sys.ActorOf(Props.Create(() =>
|
||||
new ReconcileProbeActor(siteHash: "sha256:target", failQuery: false)));
|
||||
var service = CreateServiceWithCommActor(commActor);
|
||||
|
||||
var result = await service.DeployInstanceAsync(70, "admin");
|
||||
|
||||
Assert.True(result.IsSuccess);
|
||||
// No re-deploy was sent -- this was reconciled.
|
||||
Assert.Equal(1, ReconcileProbeActor.QueryCount);
|
||||
Assert.Equal(0, ReconcileProbeActor.DeployCount);
|
||||
|
||||
// DeploymentManager-015: the instance State must reflect the deployed
|
||||
// config the site is actually running.
|
||||
Assert.Equal(InstanceState.Enabled, instance.State);
|
||||
await _repo.Received().UpdateInstanceAsync(instance, Arg.Any<CancellationToken>());
|
||||
|
||||
// DeploymentManager-015: a deployed-config snapshot must be stored so
|
||||
// GetDeploymentComparisonAsync has something to compare against.
|
||||
Assert.NotNull(storedSnapshot);
|
||||
Assert.Equal(70, storedSnapshot!.InstanceId);
|
||||
Assert.Equal("sha256:target", storedSnapshot.RevisionHash);
|
||||
}
|
||||
|
||||
// ── DeploymentManager-016: reconciled record must carry the target revision hash ──
|
||||
|
||||
[Fact]
|
||||
public async Task DeployInstanceAsync_Reconciled_PriorRecordRevisionHashUpdatedToTarget()
|
||||
{
|
||||
// The prior record carries a stale revision hash (R1), but the site
|
||||
// reports it has the freshly-flattened target revision (R2). After
|
||||
// reconciliation the prior record's RevisionHash must agree with the
|
||||
// site's applied revision -- not keep the stale R1.
|
||||
var instance = new Instance("ReconcileStaleHash")
|
||||
{
|
||||
Id = 71, SiteId = 1, State = InstanceState.Enabled
|
||||
};
|
||||
_repo.GetInstanceByIdAsync(71, Arg.Any<CancellationToken>()).Returns(instance);
|
||||
SetupValidPipeline(71, "ReconcileStaleHash", "sha256:target");
|
||||
|
||||
var prior = new DeploymentRecord("dep-prior-71", "admin")
|
||||
{
|
||||
InstanceId = 71,
|
||||
Status = DeploymentStatus.InProgress,
|
||||
RevisionHash = "sha256:stale-R1"
|
||||
};
|
||||
_repo.GetCurrentDeploymentStatusAsync(71, Arg.Any<CancellationToken>()).Returns(prior);
|
||||
_repo.GetDeployedSnapshotByInstanceIdAsync(71, Arg.Any<CancellationToken>())
|
||||
.Returns((DeployedConfigSnapshot?)null);
|
||||
|
||||
var commActor = Sys.ActorOf(Props.Create(() =>
|
||||
new ReconcileProbeActor(siteHash: "sha256:target", failQuery: false)));
|
||||
var service = CreateServiceWithCommActor(commActor);
|
||||
|
||||
var result = await service.DeployInstanceAsync(71, "admin");
|
||||
|
||||
Assert.True(result.IsSuccess);
|
||||
Assert.Equal(DeploymentStatus.Success, prior.Status);
|
||||
// DeploymentManager-016: the persisted record's RevisionHash must agree
|
||||
// with the site's applied revision, not keep the stale R1.
|
||||
Assert.Equal("sha256:target", prior.RevisionHash);
|
||||
}
|
||||
|
||||
// ── DeploymentManager-012: LifecycleCommandTimeout must actually bound lifecycle commands ──
|
||||
|
||||
[Fact]
|
||||
|
||||
Reference in New Issue
Block a user