fix(deploy): classify AskTimeoutException as a deploy timeout
Akka.Actor.AskTimeoutException does not derive from System.TimeoutException, so the isTimeout check in DeployInstanceAsync's catch block missed it and routed it to the generic "Deployment error:" branch. This broke the DeploymentManager-006 reconciliation query (query-before-redeploy), which keys off the "Communication failure:" prefix to detect a prior timeout-induced failure. Add AskTimeoutException to the pattern; add a covering regression test.
This commit is contained in:
@@ -353,7 +353,7 @@ public class DeploymentService
|
||||
// timed out, that token is already cancelled and the cleanup writes
|
||||
// would themselves throw before the Failed status is persisted.
|
||||
// Use CancellationToken.None so the failure is durably recorded.
|
||||
var isTimeout = ex is TimeoutException or OperationCanceledException;
|
||||
var isTimeout = ex is TimeoutException or OperationCanceledException or Akka.Actor.AskTimeoutException;
|
||||
|
||||
record.Status = DeploymentStatus.Failed;
|
||||
record.ErrorMessage = isTimeout
|
||||
|
||||
@@ -349,6 +349,63 @@ public class DeploymentServiceTests : TestKit
|
||||
Arg.Is<CancellationToken>(ct => !ct.IsCancellationRequested));
|
||||
}
|
||||
|
||||
// ── AskTimeoutException: Akka's AskTimeoutException must be classified as a deploy timeout ──
|
||||
|
||||
[Fact]
|
||||
public async Task DeployInstanceAsync_AskTimeoutException_ClassifiedAsTimeout()
|
||||
{
|
||||
// Arrange: first-time deploy with an actor that drops every message, so
|
||||
// the RefreshDeploymentAsync Ask never receives a reply and throws
|
||||
// AskTimeoutException (which does NOT derive from System.TimeoutException).
|
||||
// Prior to the fix this fell through to the generic "Deployment error:"
|
||||
// branch instead of the timeout "Communication failure:" branch, which
|
||||
// broke the DeploymentManager-006 reconciliation that keys off that prefix.
|
||||
var instance = new Instance("AskTimeoutInst") { Id = 99, SiteId = 1, State = InstanceState.NotDeployed };
|
||||
_repo.GetInstanceByIdAsync(99, Arg.Any<CancellationToken>()).Returns(instance);
|
||||
SetupValidPipeline(99, "AskTimeoutInst", "sha256:target");
|
||||
_repo.GetCurrentDeploymentStatusAsync(99, Arg.Any<CancellationToken>())
|
||||
.Returns((DeploymentRecord?)null);
|
||||
|
||||
DeploymentRecord? captured = null;
|
||||
await _repo.AddDeploymentRecordAsync(
|
||||
Arg.Do<DeploymentRecord>(r => captured = r), Arg.Any<CancellationToken>());
|
||||
|
||||
// A 50 ms DeploymentTimeout forces AskTimeoutException immediately
|
||||
// when the actor silently drops every message (never replies).
|
||||
var droppingActor = Sys.ActorOf(Props.Create<DroppingActor>());
|
||||
var comms = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions { DeploymentTimeout = TimeSpan.FromMilliseconds(50) }),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
comms.SetCommunicationActor(droppingActor);
|
||||
|
||||
var siteRepo = CreateSiteRepoStub();
|
||||
var service = new DeploymentService(
|
||||
_repo, siteRepo, _pipeline, comms, _lockManager, _audit,
|
||||
new DiffService(),
|
||||
new RevisionHashService(),
|
||||
new DeploymentStatusNotifier(NullLogger<DeploymentStatusNotifier>.Instance),
|
||||
Options.Create(new DeploymentManagerOptions { OperationLockTimeout = TimeSpan.FromSeconds(5) }),
|
||||
DeployCommOptions(),
|
||||
NullLogger<DeploymentService>.Instance);
|
||||
|
||||
// Act
|
||||
var result = await service.DeployInstanceAsync(99, "admin");
|
||||
|
||||
// Assert: AskTimeoutException must route through the isTimeout branch.
|
||||
Assert.True(result.IsFailure);
|
||||
// Result error must reflect a timeout, not a generic deploy failure.
|
||||
Assert.StartsWith("Deployment timed out:", result.Error);
|
||||
Assert.DoesNotContain("Deployment failed:", result.Error);
|
||||
|
||||
// The deployment record must be Failed with the timeout-failure prefix
|
||||
// (which the DeploymentManager-006 reconciliation check keys off).
|
||||
Assert.NotNull(captured);
|
||||
Assert.Equal(DeploymentStatus.Failed, captured!.Status);
|
||||
Assert.NotNull(captured.ErrorMessage);
|
||||
Assert.StartsWith("Communication failure:", captured.ErrorMessage);
|
||||
Assert.DoesNotContain("Deployment error:", captured.ErrorMessage);
|
||||
}
|
||||
|
||||
// ── WP-6: Lifecycle commands ──
|
||||
|
||||
[Fact]
|
||||
@@ -1900,4 +1957,13 @@ public class DeploymentServiceTests : TestKit
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Akka actor that silently discards every message, causing any Ask call
|
||||
/// directed at it to time out with <see cref="Akka.Actor.AskTimeoutException"/>.
|
||||
/// Used by <c>DeployInstanceAsync_AskTimeoutException_ClassifiedAsTimeout</c>
|
||||
/// to verify that the deploy catch block treats
|
||||
/// <see cref="Akka.Actor.AskTimeoutException"/> as a timeout.
|
||||
/// </summary>
|
||||
private class DroppingActor : ReceiveActor { }
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user