fix(deploy): classify AskTimeoutException as a deploy timeout

Akka.Actor.AskTimeoutException does not derive from System.TimeoutException,
so the isTimeout check in DeployInstanceAsync's catch block missed it and
routed it to the generic "Deployment error:" branch. This broke the
DeploymentManager-006 reconciliation query (query-before-redeploy), which
keys off the "Communication failure:" prefix to detect a prior timeout-induced
failure. Add AskTimeoutException to the pattern; add a covering regression test.
This commit is contained in:
Joseph Doherty
2026-06-26 13:15:19 -04:00
parent 10f752df02
commit 298a9af59e
2 changed files with 67 additions and 1 deletions
@@ -349,6 +349,63 @@ public class DeploymentServiceTests : TestKit
Arg.Is<CancellationToken>(ct => !ct.IsCancellationRequested));
}
// ── AskTimeoutException: Akka's AskTimeoutException must be classified as a deploy timeout ──
[Fact]
public async Task DeployInstanceAsync_AskTimeoutException_ClassifiedAsTimeout()
{
// Arrange: first-time deploy with an actor that drops every message, so
// the RefreshDeploymentAsync Ask never receives a reply and throws
// AskTimeoutException (which does NOT derive from System.TimeoutException).
// Prior to the fix this fell through to the generic "Deployment error:"
// branch instead of the timeout "Communication failure:" branch, which
// broke the DeploymentManager-006 reconciliation that keys off that prefix.
var instance = new Instance("AskTimeoutInst") { Id = 99, SiteId = 1, State = InstanceState.NotDeployed };
_repo.GetInstanceByIdAsync(99, Arg.Any<CancellationToken>()).Returns(instance);
SetupValidPipeline(99, "AskTimeoutInst", "sha256:target");
_repo.GetCurrentDeploymentStatusAsync(99, Arg.Any<CancellationToken>())
.Returns((DeploymentRecord?)null);
DeploymentRecord? captured = null;
await _repo.AddDeploymentRecordAsync(
Arg.Do<DeploymentRecord>(r => captured = r), Arg.Any<CancellationToken>());
// A 50 ms DeploymentTimeout forces AskTimeoutException immediately
// when the actor silently drops every message (never replies).
var droppingActor = Sys.ActorOf(Props.Create<DroppingActor>());
var comms = new CommunicationService(
Options.Create(new CommunicationOptions { DeploymentTimeout = TimeSpan.FromMilliseconds(50) }),
NullLogger<CommunicationService>.Instance);
comms.SetCommunicationActor(droppingActor);
var siteRepo = CreateSiteRepoStub();
var service = new DeploymentService(
_repo, siteRepo, _pipeline, comms, _lockManager, _audit,
new DiffService(),
new RevisionHashService(),
new DeploymentStatusNotifier(NullLogger<DeploymentStatusNotifier>.Instance),
Options.Create(new DeploymentManagerOptions { OperationLockTimeout = TimeSpan.FromSeconds(5) }),
DeployCommOptions(),
NullLogger<DeploymentService>.Instance);
// Act
var result = await service.DeployInstanceAsync(99, "admin");
// Assert: AskTimeoutException must route through the isTimeout branch.
Assert.True(result.IsFailure);
// Result error must reflect a timeout, not a generic deploy failure.
Assert.StartsWith("Deployment timed out:", result.Error);
Assert.DoesNotContain("Deployment failed:", result.Error);
// The deployment record must be Failed with the timeout-failure prefix
// (which the DeploymentManager-006 reconciliation check keys off).
Assert.NotNull(captured);
Assert.Equal(DeploymentStatus.Failed, captured!.Status);
Assert.NotNull(captured.ErrorMessage);
Assert.StartsWith("Communication failure:", captured.ErrorMessage);
Assert.DoesNotContain("Deployment error:", captured.ErrorMessage);
}
// ── WP-6: Lifecycle commands ──
[Fact]
@@ -1900,4 +1957,13 @@ public class DeploymentServiceTests : TestKit
});
}
}
/// <summary>
/// Akka actor that silently discards every message, causing any Ask call
/// directed at it to time out with <see cref="Akka.Actor.AskTimeoutException"/>.
/// Used by <c>DeployInstanceAsync_AskTimeoutException_ClassifiedAsTimeout</c>
/// to verify that the deploy catch block treats
/// <see cref="Akka.Actor.AskTimeoutException"/> as a timeout.
/// </summary>
private class DroppingActor : ReceiveActor { }
}