fix(deployment-manager): resolve DeploymentManager-001/002 — broaden failure catch, persist failure status with non-cancellable token

This commit is contained in:
Joseph Doherty
2026-05-16 19:40:40 -04:00
parent fccd3274d3
commit ab098bf6c8
3 changed files with 149 additions and 14 deletions

View File

@@ -150,6 +150,82 @@ public class DeploymentServiceTests
await _repo.Received().AddDeploymentRecordAsync(Arg.Any<DeploymentRecord>(), Arg.Any<CancellationToken>());
}
// ── DeploymentManager-001: unexpected exception must not leave record InProgress ──
[Fact]
public async Task DeployInstanceAsync_CommunicationThrowsUnexpectedException_RecordMarkedFailed()
{
var instance = new Instance("TestInst") { Id = 1, SiteId = 1, State = InstanceState.NotDeployed };
_repo.GetInstanceByIdAsync(1).Returns(instance);
var config = new FlattenedConfiguration { InstanceUniqueName = "TestInst" };
_pipeline.FlattenAndValidateAsync(1, Arg.Any<CancellationToken>())
.Returns(Result<FlatteningPipelineResult>.Success(
new FlatteningPipelineResult(config, "sha256:abc", ValidationResult.Success())));
// Capture the deployment record so we can inspect its final state.
DeploymentRecord? captured = null;
await _repo.AddDeploymentRecordAsync(
Arg.Do<DeploymentRecord>(r => captured = r), Arg.Any<CancellationToken>());
// _comms has no actor set, so DeployInstanceAsync throws
// InvalidOperationException -- a non-timeout, non-cancellation exception.
var result = await _service.DeployInstanceAsync(1, "admin");
// The exception must be handled, not escape.
Assert.True(result.IsFailure);
Assert.Contains("Deployment failed", result.Error);
// The record must not be left stuck in InProgress.
Assert.NotNull(captured);
Assert.Equal(DeploymentStatus.Failed, captured!.Status);
Assert.NotNull(captured.ErrorMessage);
Assert.NotNull(captured.CompletedAt);
}
// ── DeploymentManager-002: failure write must not use a cancelled token ──
[Fact]
public async Task DeployInstanceAsync_FailureWrite_UsesNonCancellableToken()
{
var instance = new Instance("TestInst") { Id = 1, SiteId = 1, State = InstanceState.NotDeployed };
_repo.GetInstanceByIdAsync(Arg.Any<int>(), Arg.Any<CancellationToken>()).Returns(instance);
var config = new FlattenedConfiguration { InstanceUniqueName = "TestInst" };
_pipeline.FlattenAndValidateAsync(Arg.Any<int>(), Arg.Any<CancellationToken>())
.Returns(Result<FlatteningPipelineResult>.Success(
new FlatteningPipelineResult(config, "sha256:abc", ValidationResult.Success())));
DeploymentRecord? captured = null;
await _repo.AddDeploymentRecordAsync(
Arg.Do<DeploymentRecord>(r => captured = r), Arg.Any<CancellationToken>());
// Simulate a repository that rejects already-cancelled tokens (the
// real EF Core behaviour when the operation token is cancelled). If the
// catch block passes the operation's cancelled token, the Failed-status
// write throws and the record stays InProgress -- the exact bug.
_repo.UpdateDeploymentRecordAsync(
Arg.Is<DeploymentRecord>(r => r.Status == DeploymentStatus.Failed),
Arg.Is<CancellationToken>(ct => ct.IsCancellationRequested))
.Returns<Task>(_ => throw new OperationCanceledException());
_repo.SaveChangesAsync(Arg.Is<CancellationToken>(ct => ct.IsCancellationRequested))
.Returns<Task<int>>(_ => throw new OperationCanceledException());
// The communication call fails (no actor set). The catch block must
// persist the Failed status with a non-cancellable token, so cleanup
// succeeds even when the caller's token is cancelled.
var result = await _service.DeployInstanceAsync(1, "admin");
Assert.True(result.IsFailure);
Assert.NotNull(captured);
Assert.Equal(DeploymentStatus.Failed, captured!.Status);
// The Failed-status write happened with a non-cancelled token.
await _repo.Received().UpdateDeploymentRecordAsync(
Arg.Is<DeploymentRecord>(r => r.Status == DeploymentStatus.Failed),
Arg.Is<CancellationToken>(ct => !ct.IsCancellationRequested));
}
// ── WP-6: Lifecycle commands ──
[Fact]