using Akka.Actor; using Akka.TestKit.Xunit2; using Microsoft.Extensions.Logging.Abstractions; using ZB.MOM.WW.ScadaBridge.Commons.Messages.Deployment; using ZB.MOM.WW.ScadaBridge.Commons.Messages.Health; using ZB.MOM.WW.ScadaBridge.Commons.Messages.Lifecycle; using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums; using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening; using ZB.MOM.WW.ScadaBridge.HealthMonitoring; using ZB.MOM.WW.ScadaBridge.SiteRuntime.Actors; using ZB.MOM.WW.ScadaBridge.SiteRuntime.Persistence; using ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts; using System.Text.Json; namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.Actors; /// /// Regression tests for SiteRuntime-003: redeployment of an existing instance must /// wait for the terminating Instance Actor before recreating the child, instead of /// relying on a fixed 500 ms reschedule that can collide on the child actor name. /// public class DeploymentManagerRedeployTests : TestKit, IDisposable { private readonly SiteStorageService _storage; private readonly ScriptCompilationService _compilationService; private readonly SharedScriptLibrary _sharedScriptLibrary; private readonly string _dbFile; public DeploymentManagerRedeployTests() { _dbFile = Path.Combine(Path.GetTempPath(), $"dm-redeploy-test-{Guid.NewGuid():N}.db"); _storage = new SiteStorageService( $"Data Source={_dbFile}", NullLogger.Instance); _storage.InitializeAsync().GetAwaiter().GetResult(); _compilationService = new ScriptCompilationService( NullLogger.Instance); _sharedScriptLibrary = new SharedScriptLibrary( _compilationService, NullLogger.Instance); } void IDisposable.Dispose() { Shutdown(); try { File.Delete(_dbFile); } catch { /* cleanup */ } } private IActorRef CreateDeploymentManager(ISiteHealthCollector? healthCollector = null) { return ActorOf(Props.Create(() => new DeploymentManagerActor( _storage, _compilationService, _sharedScriptLibrary, null, new SiteRuntimeOptions(), NullLogger.Instance, null, null, healthCollector, null))); } /// /// Minimal fake that records the most recent deployed-instance count. /// private sealed class CountCapturingHealthCollector : ISiteHealthCollector { public int LastDeployedCount { get; private set; } public void IncrementScriptError() { } public void IncrementAlarmError() { } public void IncrementDeadLetter() { } public void IncrementSiteAuditWriteFailures() { } public void IncrementAuditRedactionFailure() { } public void UpdateSiteAuditBacklog(ZB.MOM.WW.ScadaBridge.Commons.Types.SiteAuditBacklogSnapshot snapshot) { } public void UpdateConnectionHealth(string connectionName, ConnectionHealth health) { } public void RemoveConnection(string connectionName) { } public void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved) { } public void UpdateConnectionEndpoint(string connectionName, string endpoint) { } public void UpdateTagQuality(string connectionName, int good, int bad, int uncertain) { } public void SetStoreAndForwardDepths(IReadOnlyDictionary depths) { } public void SetInstanceCounts(int deployed, int enabled, int disabled) => LastDeployedCount = deployed; public void SetParkedMessageCount(int count) { } public void SetNodeHostname(string hostname) { } public void SetClusterNodes(IReadOnlyList nodes) { } public void SetActiveNode(bool isActive) { } public bool IsActiveNode => true; public SiteHealthReport CollectReport(string siteId) => throw new NotSupportedException(); } private static string MakeConfigJson(string instanceName) { var config = new FlattenedConfiguration { InstanceUniqueName = instanceName, Attributes = [ new ResolvedAttribute { CanonicalName = "TestAttr", Value = "1", DataType = "Int32" } ] }; return JsonSerializer.Serialize(config); } [Fact] public async Task Redeploy_ExistingInstance_SucceedsWithoutNameCollision() { var actor = CreateDeploymentManager(); await Task.Delay(500); // empty startup // Initial deploy. actor.Tell(new DeployInstanceCommand( "dep-1", "RedeployPump", "h1", MakeConfigJson("RedeployPump"), "admin", DateTimeOffset.UtcNow)); var first = ExpectMsg(TimeSpan.FromSeconds(5)); Assert.Equal(DeploymentStatus.Success, first.Status); await Task.Delay(500); // Redeploy the same instance — must replace the existing actor cleanly. actor.Tell(new DeployInstanceCommand( "dep-2", "RedeployPump", "h2", MakeConfigJson("RedeployPump"), "admin", DateTimeOffset.UtcNow)); var second = ExpectMsg(TimeSpan.FromSeconds(10)); Assert.Equal(DeploymentStatus.Success, second.Status); // The redeployed instance must still be operable (no orphaned/broken actor). actor.Tell(new DisableInstanceCommand("cmd-1", "RedeployPump", DateTimeOffset.UtcNow)); var disable = ExpectMsg(TimeSpan.FromSeconds(5)); Assert.True(disable.Success); } [Fact] public async Task SR020_ThreeRapidDeploys_DoNotThrowInvalidActorNameException_LatestWins() { // Regression test for SiteRuntime-020. The previous implementation tracked // pending redeploys by IActorRef (_pendingRedeploys) but had no // name-keyed shadow, so a third DeployInstanceCommand arriving WHILE the // first redeploy's predecessor was still terminating saw // _instanceActors.TryGetValue==false and fell through to // ApplyDeployment → CreateInstanceActor → Context.ActorOf, which threw // InvalidActorNameException because the child name was still registered // until Terminated fires. The supervisor's Stop directive then silently // dropped the deploy, leaving the deployer waiting forever and the // persistence Task.Run dangling. After the fix, _terminatingActorsByName // tracks the in-flight terminator by name; the third deploy overwrites // the buffered pending command (last-write-wins) and tells the displaced // sender it was superseded. var actor = CreateDeploymentManager(); await Task.Delay(500); // Initial deploy — establishes the running instance. actor.Tell(new DeployInstanceCommand( "dep-1", "RapidPump", "h1", MakeConfigJson("RapidPump"), "admin", DateTimeOffset.UtcNow)); var first = ExpectMsg(TimeSpan.FromSeconds(5)); Assert.Equal(DeploymentStatus.Success, first.Status); await Task.Delay(200); // Two rapid redeploys before the predecessor has time to fully terminate. // The second deploy stops the actor (watching it) and buffers itself. // The third deploy arrives almost immediately and must NOT crash — it // overwrites the buffered pending command and tells dep-2 it was superseded. var probe2 = CreateTestProbe(); var probe3 = CreateTestProbe(); actor.Tell(new DeployInstanceCommand( "dep-2", "RapidPump", "h2", MakeConfigJson("RapidPump"), "admin", DateTimeOffset.UtcNow), probe2.Ref); actor.Tell(new DeployInstanceCommand( "dep-3", "RapidPump", "h3", MakeConfigJson("RapidPump"), "admin", DateTimeOffset.UtcNow), probe3.Ref); // dep-2 must be told it was superseded; dep-3 must succeed once the // predecessor finishes terminating. var superseded = probe2.ExpectMsg(TimeSpan.FromSeconds(10)); Assert.Equal("dep-2", superseded.DeploymentId); Assert.Equal(DeploymentStatus.Failed, superseded.Status); Assert.NotNull(superseded.ErrorMessage); Assert.Contains("superseded", superseded.ErrorMessage!, StringComparison.OrdinalIgnoreCase); var winner = probe3.ExpectMsg(TimeSpan.FromSeconds(10)); Assert.Equal("dep-3", winner.DeploymentId); Assert.Equal(DeploymentStatus.Success, winner.Status); // The instance must still be operable — proves no orphaned actor / no // half-created child holding the name. actor.Tell(new DisableInstanceCommand("cmd-1", "RapidPump", DateTimeOffset.UtcNow)); var disable = ExpectMsg(TimeSpan.FromSeconds(5)); Assert.True(disable.Success); } [Fact] public async Task Redeploy_ExistingInstance_DoesNotOverCountDeployedInstances() { var health = new CountCapturingHealthCollector(); var actor = CreateDeploymentManager(health); await Task.Delay(500); // Deploy once. actor.Tell(new DeployInstanceCommand( "dep-1", "CountPump", "h1", MakeConfigJson("CountPump"), "admin", DateTimeOffset.UtcNow)); ExpectMsg(TimeSpan.FromSeconds(5)); await Task.Delay(500); // Redeploy several times. for (var i = 2; i <= 4; i++) { actor.Tell(new DeployInstanceCommand( $"dep-{i}", "CountPump", $"h{i}", MakeConfigJson("CountPump"), "admin", DateTimeOffset.UtcNow)); ExpectMsg(TimeSpan.FromSeconds(10)); await Task.Delay(500); } // Storage uses UPSERT — exactly one deployed config row should exist. var configs = await _storage.GetAllDeployedConfigsAsync(); Assert.Single(configs, c => c.InstanceUniqueName == "CountPump"); // The reported deployed count must be exactly 1 — a redeploy is an update, // not a new instance, so the in-memory counter must not drift upward. Assert.Equal(1, health.LastDeployedCount); } }