fix(deploy): classify AskTimeoutException as a deploy timeout
Akka.Actor.AskTimeoutException does not derive from System.TimeoutException, so the isTimeout check in DeployInstanceAsync's catch block missed it and routed it to the generic "Deployment error:" branch. This broke the DeploymentManager-006 reconciliation query (query-before-redeploy), which keys off the "Communication failure:" prefix to detect a prior timeout-induced failure. Add AskTimeoutException to the pattern; add a covering regression test.
This commit is contained in:
@@ -353,7 +353,7 @@ public class DeploymentService
|
|||||||
// timed out, that token is already cancelled and the cleanup writes
|
// timed out, that token is already cancelled and the cleanup writes
|
||||||
// would themselves throw before the Failed status is persisted.
|
// would themselves throw before the Failed status is persisted.
|
||||||
// Use CancellationToken.None so the failure is durably recorded.
|
// Use CancellationToken.None so the failure is durably recorded.
|
||||||
var isTimeout = ex is TimeoutException or OperationCanceledException;
|
var isTimeout = ex is TimeoutException or OperationCanceledException or Akka.Actor.AskTimeoutException;
|
||||||
|
|
||||||
record.Status = DeploymentStatus.Failed;
|
record.Status = DeploymentStatus.Failed;
|
||||||
record.ErrorMessage = isTimeout
|
record.ErrorMessage = isTimeout
|
||||||
|
|||||||
@@ -349,6 +349,63 @@ public class DeploymentServiceTests : TestKit
|
|||||||
Arg.Is<CancellationToken>(ct => !ct.IsCancellationRequested));
|
Arg.Is<CancellationToken>(ct => !ct.IsCancellationRequested));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── AskTimeoutException: Akka's AskTimeoutException must be classified as a deploy timeout ──
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task DeployInstanceAsync_AskTimeoutException_ClassifiedAsTimeout()
|
||||||
|
{
|
||||||
|
// Arrange: first-time deploy with an actor that drops every message, so
|
||||||
|
// the RefreshDeploymentAsync Ask never receives a reply and throws
|
||||||
|
// AskTimeoutException (which does NOT derive from System.TimeoutException).
|
||||||
|
// Prior to the fix this fell through to the generic "Deployment error:"
|
||||||
|
// branch instead of the timeout "Communication failure:" branch, which
|
||||||
|
// broke the DeploymentManager-006 reconciliation that keys off that prefix.
|
||||||
|
var instance = new Instance("AskTimeoutInst") { Id = 99, SiteId = 1, State = InstanceState.NotDeployed };
|
||||||
|
_repo.GetInstanceByIdAsync(99, Arg.Any<CancellationToken>()).Returns(instance);
|
||||||
|
SetupValidPipeline(99, "AskTimeoutInst", "sha256:target");
|
||||||
|
_repo.GetCurrentDeploymentStatusAsync(99, Arg.Any<CancellationToken>())
|
||||||
|
.Returns((DeploymentRecord?)null);
|
||||||
|
|
||||||
|
DeploymentRecord? captured = null;
|
||||||
|
await _repo.AddDeploymentRecordAsync(
|
||||||
|
Arg.Do<DeploymentRecord>(r => captured = r), Arg.Any<CancellationToken>());
|
||||||
|
|
||||||
|
// A 50 ms DeploymentTimeout forces AskTimeoutException immediately
|
||||||
|
// when the actor silently drops every message (never replies).
|
||||||
|
var droppingActor = Sys.ActorOf(Props.Create<DroppingActor>());
|
||||||
|
var comms = new CommunicationService(
|
||||||
|
Options.Create(new CommunicationOptions { DeploymentTimeout = TimeSpan.FromMilliseconds(50) }),
|
||||||
|
NullLogger<CommunicationService>.Instance);
|
||||||
|
comms.SetCommunicationActor(droppingActor);
|
||||||
|
|
||||||
|
var siteRepo = CreateSiteRepoStub();
|
||||||
|
var service = new DeploymentService(
|
||||||
|
_repo, siteRepo, _pipeline, comms, _lockManager, _audit,
|
||||||
|
new DiffService(),
|
||||||
|
new RevisionHashService(),
|
||||||
|
new DeploymentStatusNotifier(NullLogger<DeploymentStatusNotifier>.Instance),
|
||||||
|
Options.Create(new DeploymentManagerOptions { OperationLockTimeout = TimeSpan.FromSeconds(5) }),
|
||||||
|
DeployCommOptions(),
|
||||||
|
NullLogger<DeploymentService>.Instance);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
var result = await service.DeployInstanceAsync(99, "admin");
|
||||||
|
|
||||||
|
// Assert: AskTimeoutException must route through the isTimeout branch.
|
||||||
|
Assert.True(result.IsFailure);
|
||||||
|
// Result error must reflect a timeout, not a generic deploy failure.
|
||||||
|
Assert.StartsWith("Deployment timed out:", result.Error);
|
||||||
|
Assert.DoesNotContain("Deployment failed:", result.Error);
|
||||||
|
|
||||||
|
// The deployment record must be Failed with the timeout-failure prefix
|
||||||
|
// (which the DeploymentManager-006 reconciliation check keys off).
|
||||||
|
Assert.NotNull(captured);
|
||||||
|
Assert.Equal(DeploymentStatus.Failed, captured!.Status);
|
||||||
|
Assert.NotNull(captured.ErrorMessage);
|
||||||
|
Assert.StartsWith("Communication failure:", captured.ErrorMessage);
|
||||||
|
Assert.DoesNotContain("Deployment error:", captured.ErrorMessage);
|
||||||
|
}
|
||||||
|
|
||||||
// ── WP-6: Lifecycle commands ──
|
// ── WP-6: Lifecycle commands ──
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
@@ -1900,4 +1957,13 @@ public class DeploymentServiceTests : TestKit
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Akka actor that silently discards every message, causing any Ask call
|
||||||
|
/// directed at it to time out with <see cref="Akka.Actor.AskTimeoutException"/>.
|
||||||
|
/// Used by <c>DeployInstanceAsync_AskTimeoutException_ClassifiedAsTimeout</c>
|
||||||
|
/// to verify that the deploy catch block treats
|
||||||
|
/// <see cref="Akka.Actor.AskTimeoutException"/> as a timeout.
|
||||||
|
/// </summary>
|
||||||
|
private class DroppingActor : ReceiveActor { }
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user