From 298a9af59e664dc10409b7465e791d8bcb919f12 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Fri, 26 Jun 2026 13:15:19 -0400 Subject: [PATCH] fix(deploy): classify AskTimeoutException as a deploy timeout Akka.Actor.AskTimeoutException does not derive from System.TimeoutException, so the isTimeout check in DeployInstanceAsync's catch block missed it and routed it to the generic "Deployment error:" branch. This broke the DeploymentManager-006 reconciliation query (query-before-redeploy), which keys off the "Communication failure:" prefix to detect a prior timeout-induced failure. Add AskTimeoutException to the pattern; add a covering regression test. --- .../DeploymentService.cs | 2 +- .../DeploymentServiceTests.cs | 66 +++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/src/ZB.MOM.WW.ScadaBridge.DeploymentManager/DeploymentService.cs b/src/ZB.MOM.WW.ScadaBridge.DeploymentManager/DeploymentService.cs index 585f42e0..4f410ea9 100644 --- a/src/ZB.MOM.WW.ScadaBridge.DeploymentManager/DeploymentService.cs +++ b/src/ZB.MOM.WW.ScadaBridge.DeploymentManager/DeploymentService.cs @@ -353,7 +353,7 @@ public class DeploymentService // timed out, that token is already cancelled and the cleanup writes // would themselves throw before the Failed status is persisted. // Use CancellationToken.None so the failure is durably recorded. - var isTimeout = ex is TimeoutException or OperationCanceledException; + var isTimeout = ex is TimeoutException or OperationCanceledException or Akka.Actor.AskTimeoutException; record.Status = DeploymentStatus.Failed; record.ErrorMessage = isTimeout diff --git a/tests/ZB.MOM.WW.ScadaBridge.DeploymentManager.Tests/DeploymentServiceTests.cs b/tests/ZB.MOM.WW.ScadaBridge.DeploymentManager.Tests/DeploymentServiceTests.cs index 00a3e22e..b79cc6c9 100644 --- a/tests/ZB.MOM.WW.ScadaBridge.DeploymentManager.Tests/DeploymentServiceTests.cs +++ b/tests/ZB.MOM.WW.ScadaBridge.DeploymentManager.Tests/DeploymentServiceTests.cs @@ -349,6 +349,63 @@ public class DeploymentServiceTests : TestKit Arg.Is(ct => !ct.IsCancellationRequested)); } + // ── AskTimeoutException: Akka's AskTimeoutException must be classified as a deploy timeout ── + + [Fact] + public async Task DeployInstanceAsync_AskTimeoutException_ClassifiedAsTimeout() + { + // Arrange: first-time deploy with an actor that drops every message, so + // the RefreshDeploymentAsync Ask never receives a reply and throws + // AskTimeoutException (which does NOT derive from System.TimeoutException). + // Prior to the fix this fell through to the generic "Deployment error:" + // branch instead of the timeout "Communication failure:" branch, which + // broke the DeploymentManager-006 reconciliation that keys off that prefix. + var instance = new Instance("AskTimeoutInst") { Id = 99, SiteId = 1, State = InstanceState.NotDeployed }; + _repo.GetInstanceByIdAsync(99, Arg.Any()).Returns(instance); + SetupValidPipeline(99, "AskTimeoutInst", "sha256:target"); + _repo.GetCurrentDeploymentStatusAsync(99, Arg.Any()) + .Returns((DeploymentRecord?)null); + + DeploymentRecord? captured = null; + await _repo.AddDeploymentRecordAsync( + Arg.Do(r => captured = r), Arg.Any()); + + // A 50 ms DeploymentTimeout forces AskTimeoutException immediately + // when the actor silently drops every message (never replies). + var droppingActor = Sys.ActorOf(Props.Create()); + var comms = new CommunicationService( + Options.Create(new CommunicationOptions { DeploymentTimeout = TimeSpan.FromMilliseconds(50) }), + NullLogger.Instance); + comms.SetCommunicationActor(droppingActor); + + var siteRepo = CreateSiteRepoStub(); + var service = new DeploymentService( + _repo, siteRepo, _pipeline, comms, _lockManager, _audit, + new DiffService(), + new RevisionHashService(), + new DeploymentStatusNotifier(NullLogger.Instance), + Options.Create(new DeploymentManagerOptions { OperationLockTimeout = TimeSpan.FromSeconds(5) }), + DeployCommOptions(), + NullLogger.Instance); + + // Act + var result = await service.DeployInstanceAsync(99, "admin"); + + // Assert: AskTimeoutException must route through the isTimeout branch. + Assert.True(result.IsFailure); + // Result error must reflect a timeout, not a generic deploy failure. + Assert.StartsWith("Deployment timed out:", result.Error); + Assert.DoesNotContain("Deployment failed:", result.Error); + + // The deployment record must be Failed with the timeout-failure prefix + // (which the DeploymentManager-006 reconciliation check keys off). + Assert.NotNull(captured); + Assert.Equal(DeploymentStatus.Failed, captured!.Status); + Assert.NotNull(captured.ErrorMessage); + Assert.StartsWith("Communication failure:", captured.ErrorMessage); + Assert.DoesNotContain("Deployment error:", captured.ErrorMessage); + } + // ── WP-6: Lifecycle commands ── [Fact] @@ -1900,4 +1957,13 @@ public class DeploymentServiceTests : TestKit }); } } + + /// + /// Akka actor that silently discards every message, causing any Ask call + /// directed at it to time out with . + /// Used by DeployInstanceAsync_AskTimeoutException_ClassifiedAsTimeout + /// to verify that the deploy catch block treats + /// as a timeout. + /// + private class DroppingActor : ReceiveActor { } }