diff --git a/code-reviews/DeploymentManager/findings.md b/code-reviews/DeploymentManager/findings.md index 8b2c350..7c19ca1 100644 --- a/code-reviews/DeploymentManager/findings.md +++ b/code-reviews/DeploymentManager/findings.md @@ -8,7 +8,7 @@ | Last reviewed | 2026-05-17 | | Reviewer | claude-agent | | Commit reviewed | `39d737e` | -| Open findings | 3 | +| Open findings | 0 | ## Summary @@ -740,7 +740,7 @@ covers DeploymentManager-010), `DeployToAllSitesAsync_PartialFailure_ReportsPerS |--|--| | Severity | High | | Category | Correctness & logic bugs | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.DeploymentManager/DeploymentService.cs:631-655` | **Description** @@ -785,7 +785,15 @@ reconciled deployment leaves the instance `Enabled` and a snapshot stored. **Resolution** -_Unresolved._ +Resolved 2026-05-17 (commit pending): extracted the shared post-success side +effects into `ApplyPostSuccessSideEffectsAsync` (sets instance `State = +Enabled` + `UpdateInstanceAsync`, stores/refreshes the `DeployedConfigSnapshot`) +and invoked it from both the normal deploy success path and the +`TryReconcileWithSiteAsync` reconciled-success branch, so a reconciled +deployment now performs the same instance-state and snapshot updates as a +normal one (`configJson` is now computed before the reconciliation call and +threaded into `TryReconcileWithSiteAsync`). Regression test: +`DeployInstanceAsync_Reconciled_SetsInstanceEnabledAndStoresSnapshot`. ### DeploymentManager-016 — Reconciled prior record keeps its stale `RevisionHash` @@ -793,7 +801,7 @@ _Unresolved._ |--|--| | Severity | Medium | | Category | Correctness & logic bugs | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.DeploymentManager/DeploymentService.cs:639-651` | **Description** @@ -824,7 +832,11 @@ normal deploy. **Resolution** -_Unresolved._ +Resolved 2026-05-17 (commit pending): the reconciled-success branch of +`TryReconcileWithSiteAsync` now also sets `prior.RevisionHash = +targetRevisionHash`, so the persisted record, the `DeployReconciled` audit +entry, and the site's actually-applied revision all agree. Regression test: +`DeployInstanceAsync_Reconciled_PriorRecordRevisionHashUpdatedToTarget`. ### DeploymentManager-017 — `GetDeploymentStatusAsync` XML doc describes behaviour it does not implement @@ -832,7 +844,7 @@ _Unresolved._ |--|--| | Severity | Low | | Category | Documentation & comments | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.DeploymentManager/DeploymentService.cs:562-570` | **Description** @@ -855,4 +867,9 @@ configuration database" — and, if useful, cross-reference **Resolution** -_Unresolved._ +Resolved 2026-05-17 (commit pending): the `GetDeploymentStatusAsync` XML doc +now states it returns the persisted `DeploymentRecord` from the configuration +database as a pure local read, and cross-references `TryReconcileWithSiteAsync` +as where the query-the-site-before-redeploy reconciliation actually lives. +Documentation-only change; no regression test (a test asserting comment text +would be meaningless). diff --git a/src/ScadaLink.DeploymentManager/DeploymentService.cs b/src/ScadaLink.DeploymentManager/DeploymentService.cs index 039b7c4..10513e8 100644 --- a/src/ScadaLink.DeploymentManager/DeploymentService.cs +++ b/src/ScadaLink.DeploymentManager/DeploymentService.cs @@ -142,6 +142,10 @@ public class DeploymentService return Result.Failure($"Pre-deployment validation failed: {errors}"); } + // Serialize for transmission (also the payload stored in the deployed + // snapshot on success / reconciliation). + var configJson = JsonSerializer.Serialize(flattenedConfig); + // DeploymentManager-006: query-the-site-before-redeploy idempotency. // If a prior deployment for this instance is stuck InProgress or Failed // due to a timeout, the site may have actually applied the config. Query @@ -150,13 +154,10 @@ public class DeploymentService // Idempotency"). A clean prior Success or a fresh first-time deploy // skips this extra round-trip. var reconciled = await TryReconcileWithSiteAsync( - instance, revisionHash, cancellationToken); + instance, revisionHash, configJson, cancellationToken); if (reconciled != null) return Result.Success(reconciled); - // Serialize for transmission - var configJson = JsonSerializer.Serialize(flattenedConfig); - // WP-4: Create deployment record with Pending status var record = new DeploymentRecord(deploymentId, user) { @@ -210,25 +211,8 @@ public class DeploymentService // persistence below is best-effort: a failure here must be // logged loudly for operator reconciliation but must not flip // the already-committed Success record back to Failed. - try - { - // WP-4: Update instance state to Enabled on successful deployment - instance.State = InstanceState.Enabled; - await _repository.UpdateInstanceAsync(instance, cancellationToken); - - // WP-8: Store deployed config snapshot - await StoreDeployedSnapshotAsync(instanceId, deploymentId, revisionHash, configJson, cancellationToken); - - await _repository.SaveChangesAsync(cancellationToken); - } - catch (Exception postEx) - { - _logger.LogError(postEx, - "Deployment {DeploymentId} for instance {Instance} was applied by the site and " + - "recorded Success, but post-success persistence (instance state / config snapshot) " + - "failed -- central and site state may diverge until reconciled", - deploymentId, instance.UniqueName); - } + await ApplyPostSuccessSideEffectsAsync( + instance, deploymentId, revisionHash, configJson, cancellationToken); } // Audit log @@ -560,7 +544,12 @@ public class DeploymentService } /// - /// WP-2: After failover/timeout, query site for current deployment state before re-deploying. + /// WP-2: Returns the current persisted for + /// the given deployment ID from the configuration database. This is a pure + /// local DB read — it does not contact the site. The query-the-site-before- + /// redeploy reconciliation (design: "Deployment Identity & Idempotency") + /// lives in , which + /// invokes on the deploy path. /// public async Task GetDeploymentStatusAsync( string deploymentId, @@ -580,9 +569,14 @@ public class DeploymentService /// prior skip the extra round-trip. /// /// Reconciliation: if the site already has the TARGET revision hash, the - /// prior record is marked and - /// returned (the caller must NOT re-send the deploy). Otherwise null - /// is returned and the normal deploy proceeds. + /// prior record is marked (with its + /// corrected to the target — + /// DeploymentManager-016) and returned (the caller must NOT re-send the + /// deploy). The same post-success side effects as the normal deploy path + /// are applied — instance and a stored + /// (DeploymentManager-015) — so central + /// and site state do not diverge. Otherwise null is returned and the + /// normal deploy proceeds. /// /// Query failure: if the site is unreachable or the query times out, this /// returns null (fall through to a normal deploy) — site-side @@ -592,6 +586,7 @@ public class DeploymentService private async Task TryReconcileWithSiteAsync( Instance instance, string targetRevisionHash, + string configJson, CancellationToken cancellationToken) { var prior = await _repository.GetCurrentDeploymentStatusAsync(instance.Id, cancellationToken); @@ -639,10 +634,23 @@ public class DeploymentService prior.Status = DeploymentStatus.Success; prior.ErrorMessage = null; prior.CompletedAt = DateTimeOffset.UtcNow; + // DeploymentManager-016: the prior record can legitimately carry a + // different (stale) revision hash than the current target. The site + // confirmed it is running the target revision, so the persisted + // record, the audit entry below, and the site must all agree. + prior.RevisionHash = targetRevisionHash; await _repository.UpdateDeploymentRecordAsync(prior, cancellationToken); await _repository.SaveChangesAsync(cancellationToken); NotifyStatusChange(prior); + // DeploymentManager-015: a reconciled deployment must perform the + // SAME post-success side effects as the normal deploy path — set + // the instance State to Enabled and store/refresh the deployed + // config snapshot — otherwise the central state machine and the + // deployed-snapshot invariant diverge from what the site is running. + await ApplyPostSuccessSideEffectsAsync( + instance, prior.DeploymentId, targetRevisionHash, configJson, cancellationToken); + await _auditService.LogAsync(prior.DeployedBy, "DeployReconciled", "Instance", instance.Id.ToString(), instance.UniqueName, new { DeploymentId = prior.DeploymentId, RevisionHash = targetRevisionHash }, @@ -669,6 +677,47 @@ public class DeploymentService && prior.ErrorMessage != null && prior.ErrorMessage.StartsWith(TimeoutFailurePrefix, StringComparison.Ordinal)); + /// + /// Post-success side effects shared by the normal deploy path and the + /// DeploymentManager-006 reconciliation path: set the instance + /// (WP-4) and store/refresh the + /// deployed config snapshot (WP-8). Factored into one helper so the two + /// paths cannot drift (DeploymentManager-015). + /// + /// Best-effort: the deployment record's terminal + /// status is already committed by the caller before this runs. A failure + /// here is logged loudly for operator reconciliation but is NOT propagated — + /// it must not flip the already-committed Success record back to Failed. + /// + private async Task ApplyPostSuccessSideEffectsAsync( + Instance instance, + string deploymentId, + string revisionHash, + string configJson, + CancellationToken cancellationToken) + { + try + { + // WP-4: Update instance state to Enabled on successful deployment + instance.State = InstanceState.Enabled; + await _repository.UpdateInstanceAsync(instance, cancellationToken); + + // WP-8: Store deployed config snapshot + await StoreDeployedSnapshotAsync( + instance.Id, deploymentId, revisionHash, configJson, cancellationToken); + + await _repository.SaveChangesAsync(cancellationToken); + } + catch (Exception postEx) + { + _logger.LogError(postEx, + "Deployment {DeploymentId} for instance {Instance} was applied by the site and " + + "recorded Success, but post-success persistence (instance state / config snapshot) " + + "failed -- central and site state may diverge until reconciled", + deploymentId, instance.UniqueName); + } + } + private async Task StoreDeployedSnapshotAsync( int instanceId, string deploymentId, diff --git a/tests/ScadaLink.DeploymentManager.Tests/DeploymentServiceTests.cs b/tests/ScadaLink.DeploymentManager.Tests/DeploymentServiceTests.cs index 2931bec..8b06a32 100644 --- a/tests/ScadaLink.DeploymentManager.Tests/DeploymentServiceTests.cs +++ b/tests/ScadaLink.DeploymentManager.Tests/DeploymentServiceTests.cs @@ -766,6 +766,99 @@ public class DeploymentServiceTests : TestKit Assert.Equal(1, ReconcileProbeActor.DeployCount); } + // ── DeploymentManager-015: reconciliation must perform the normal success side effects ── + + [Fact] + public async Task DeployInstanceAsync_Reconciled_SetsInstanceEnabledAndStoresSnapshot() + { + // A prior deploy timed out; the site actually applied the target + // revision. Reconciliation marks the prior record Success WITHOUT + // re-sending -- but it must still perform the same side effects as the + // normal success path: set the instance State to Enabled and store the + // deployed-config snapshot. Otherwise central and site diverge. + var instance = new Instance("ReconcileSideEffects") + { + Id = 70, SiteId = 1, State = InstanceState.NotDeployed + }; + _repo.GetInstanceByIdAsync(70, Arg.Any()).Returns(instance); + SetupValidPipeline(70, "ReconcileSideEffects", "sha256:target"); + + var prior = new DeploymentRecord("dep-prior-70", "admin") + { + InstanceId = 70, + Status = DeploymentStatus.InProgress, + RevisionHash = "sha256:target" + }; + _repo.GetCurrentDeploymentStatusAsync(70, Arg.Any()).Returns(prior); + _repo.GetDeployedSnapshotByInstanceIdAsync(70, Arg.Any()) + .Returns((DeployedConfigSnapshot?)null); + + DeployedConfigSnapshot? storedSnapshot = null; + await _repo.AddDeployedSnapshotAsync( + Arg.Do(s => storedSnapshot = s), Arg.Any()); + + var commActor = Sys.ActorOf(Props.Create(() => + new ReconcileProbeActor(siteHash: "sha256:target", failQuery: false))); + var service = CreateServiceWithCommActor(commActor); + + var result = await service.DeployInstanceAsync(70, "admin"); + + Assert.True(result.IsSuccess); + // No re-deploy was sent -- this was reconciled. + Assert.Equal(1, ReconcileProbeActor.QueryCount); + Assert.Equal(0, ReconcileProbeActor.DeployCount); + + // DeploymentManager-015: the instance State must reflect the deployed + // config the site is actually running. + Assert.Equal(InstanceState.Enabled, instance.State); + await _repo.Received().UpdateInstanceAsync(instance, Arg.Any()); + + // DeploymentManager-015: a deployed-config snapshot must be stored so + // GetDeploymentComparisonAsync has something to compare against. + Assert.NotNull(storedSnapshot); + Assert.Equal(70, storedSnapshot!.InstanceId); + Assert.Equal("sha256:target", storedSnapshot.RevisionHash); + } + + // ── DeploymentManager-016: reconciled record must carry the target revision hash ── + + [Fact] + public async Task DeployInstanceAsync_Reconciled_PriorRecordRevisionHashUpdatedToTarget() + { + // The prior record carries a stale revision hash (R1), but the site + // reports it has the freshly-flattened target revision (R2). After + // reconciliation the prior record's RevisionHash must agree with the + // site's applied revision -- not keep the stale R1. + var instance = new Instance("ReconcileStaleHash") + { + Id = 71, SiteId = 1, State = InstanceState.Enabled + }; + _repo.GetInstanceByIdAsync(71, Arg.Any()).Returns(instance); + SetupValidPipeline(71, "ReconcileStaleHash", "sha256:target"); + + var prior = new DeploymentRecord("dep-prior-71", "admin") + { + InstanceId = 71, + Status = DeploymentStatus.InProgress, + RevisionHash = "sha256:stale-R1" + }; + _repo.GetCurrentDeploymentStatusAsync(71, Arg.Any()).Returns(prior); + _repo.GetDeployedSnapshotByInstanceIdAsync(71, Arg.Any()) + .Returns((DeployedConfigSnapshot?)null); + + var commActor = Sys.ActorOf(Props.Create(() => + new ReconcileProbeActor(siteHash: "sha256:target", failQuery: false))); + var service = CreateServiceWithCommActor(commActor); + + var result = await service.DeployInstanceAsync(71, "admin"); + + Assert.True(result.IsSuccess); + Assert.Equal(DeploymentStatus.Success, prior.Status); + // DeploymentManager-016: the persisted record's RevisionHash must agree + // with the site's applied revision, not keep the stale R1. + Assert.Equal("sha256:target", prior.RevisionHash); + } + // ── DeploymentManager-012: LifecycleCommandTimeout must actually bound lifecycle commands ── [Fact]