using Akka.Actor;
using Akka.TestKit.Xunit2;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Deployment;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Health;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Lifecycle;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Actors;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Persistence;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
using System.Text.Json;
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.Actors;
///
/// Regression tests for SiteRuntime-003: redeployment of an existing instance must
/// wait for the terminating Instance Actor before recreating the child, instead of
/// relying on a fixed 500 ms reschedule that can collide on the child actor name.
///
public class DeploymentManagerRedeployTests : TestKit, IDisposable
{
private readonly SiteStorageService _storage;
private readonly ScriptCompilationService _compilationService;
private readonly SharedScriptLibrary _sharedScriptLibrary;
private readonly string _dbFile;
public DeploymentManagerRedeployTests()
{
_dbFile = Path.Combine(Path.GetTempPath(), $"dm-redeploy-test-{Guid.NewGuid():N}.db");
_storage = new SiteStorageService(
$"Data Source={_dbFile}",
NullLogger.Instance);
_storage.InitializeAsync().GetAwaiter().GetResult();
_compilationService = new ScriptCompilationService(
NullLogger.Instance);
_sharedScriptLibrary = new SharedScriptLibrary(
_compilationService, NullLogger.Instance);
}
void IDisposable.Dispose()
{
Shutdown();
try { File.Delete(_dbFile); } catch { /* cleanup */ }
}
private IActorRef CreateDeploymentManager(ISiteHealthCollector? healthCollector = null)
{
return ActorOf(Props.Create(() => new DeploymentManagerActor(
_storage,
_compilationService,
_sharedScriptLibrary,
null,
new SiteRuntimeOptions(),
NullLogger.Instance,
null,
null,
healthCollector,
null)));
}
///
/// Minimal fake that records the most recent deployed-instance count.
///
private sealed class CountCapturingHealthCollector : ISiteHealthCollector
{
public int LastDeployedCount { get; private set; }
public void IncrementScriptError() { }
public void IncrementAlarmError() { }
public void IncrementDeadLetter() { }
public void IncrementSiteAuditWriteFailures() { }
public void IncrementAuditRedactionFailure() { }
public void UpdateSiteAuditBacklog(ZB.MOM.WW.ScadaBridge.Commons.Types.SiteAuditBacklogSnapshot snapshot) { }
public void UpdateConnectionHealth(string connectionName, ConnectionHealth health) { }
public void RemoveConnection(string connectionName) { }
public void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved) { }
public void UpdateConnectionEndpoint(string connectionName, string endpoint) { }
public void UpdateTagQuality(string connectionName, int good, int bad, int uncertain) { }
public void SetStoreAndForwardDepths(IReadOnlyDictionary depths) { }
public void SetInstanceCounts(int deployed, int enabled, int disabled) => LastDeployedCount = deployed;
public void SetParkedMessageCount(int count) { }
public void SetNodeHostname(string hostname) { }
public void SetClusterNodes(IReadOnlyList nodes) { }
public void SetActiveNode(bool isActive) { }
public bool IsActiveNode => true;
public SiteHealthReport CollectReport(string siteId) => throw new NotSupportedException();
}
private static string MakeConfigJson(string instanceName)
{
var config = new FlattenedConfiguration
{
InstanceUniqueName = instanceName,
Attributes =
[
new ResolvedAttribute { CanonicalName = "TestAttr", Value = "1", DataType = "Int32" }
]
};
return JsonSerializer.Serialize(config);
}
[Fact]
public async Task Redeploy_ExistingInstance_SucceedsWithoutNameCollision()
{
var actor = CreateDeploymentManager();
await Task.Delay(500); // empty startup
// Initial deploy.
actor.Tell(new DeployInstanceCommand(
"dep-1", "RedeployPump", "h1", MakeConfigJson("RedeployPump"), "admin", DateTimeOffset.UtcNow));
var first = ExpectMsg(TimeSpan.FromSeconds(5));
Assert.Equal(DeploymentStatus.Success, first.Status);
await Task.Delay(500);
// Redeploy the same instance — must replace the existing actor cleanly.
actor.Tell(new DeployInstanceCommand(
"dep-2", "RedeployPump", "h2", MakeConfigJson("RedeployPump"), "admin", DateTimeOffset.UtcNow));
var second = ExpectMsg(TimeSpan.FromSeconds(10));
Assert.Equal(DeploymentStatus.Success, second.Status);
// The redeployed instance must still be operable (no orphaned/broken actor).
actor.Tell(new DisableInstanceCommand("cmd-1", "RedeployPump", DateTimeOffset.UtcNow));
var disable = ExpectMsg(TimeSpan.FromSeconds(5));
Assert.True(disable.Success);
}
[Fact]
public async Task SR020_ThreeRapidDeploys_DoNotThrowInvalidActorNameException_LatestWins()
{
// Regression test for SiteRuntime-020. The previous implementation tracked
// pending redeploys by IActorRef (_pendingRedeploys) but had no
// name-keyed shadow, so a third DeployInstanceCommand arriving WHILE the
// first redeploy's predecessor was still terminating saw
// _instanceActors.TryGetValue==false and fell through to
// ApplyDeployment → CreateInstanceActor → Context.ActorOf, which threw
// InvalidActorNameException because the child name was still registered
// until Terminated fires. The supervisor's Stop directive then silently
// dropped the deploy, leaving the deployer waiting forever and the
// persistence Task.Run dangling. After the fix, _terminatingActorsByName
// tracks the in-flight terminator by name; the third deploy overwrites
// the buffered pending command (last-write-wins) and tells the displaced
// sender it was superseded.
var actor = CreateDeploymentManager();
await Task.Delay(500);
// Initial deploy — establishes the running instance.
actor.Tell(new DeployInstanceCommand(
"dep-1", "RapidPump", "h1", MakeConfigJson("RapidPump"), "admin", DateTimeOffset.UtcNow));
var first = ExpectMsg(TimeSpan.FromSeconds(5));
Assert.Equal(DeploymentStatus.Success, first.Status);
await Task.Delay(200);
// Two rapid redeploys before the predecessor has time to fully terminate.
// The second deploy stops the actor (watching it) and buffers itself.
// The third deploy arrives almost immediately and must NOT crash — it
// overwrites the buffered pending command and tells dep-2 it was superseded.
var probe2 = CreateTestProbe();
var probe3 = CreateTestProbe();
actor.Tell(new DeployInstanceCommand(
"dep-2", "RapidPump", "h2", MakeConfigJson("RapidPump"), "admin", DateTimeOffset.UtcNow),
probe2.Ref);
actor.Tell(new DeployInstanceCommand(
"dep-3", "RapidPump", "h3", MakeConfigJson("RapidPump"), "admin", DateTimeOffset.UtcNow),
probe3.Ref);
// dep-2 must be told it was superseded; dep-3 must succeed once the
// predecessor finishes terminating.
var superseded = probe2.ExpectMsg(TimeSpan.FromSeconds(10));
Assert.Equal("dep-2", superseded.DeploymentId);
Assert.Equal(DeploymentStatus.Failed, superseded.Status);
Assert.NotNull(superseded.ErrorMessage);
Assert.Contains("superseded", superseded.ErrorMessage!, StringComparison.OrdinalIgnoreCase);
var winner = probe3.ExpectMsg(TimeSpan.FromSeconds(10));
Assert.Equal("dep-3", winner.DeploymentId);
Assert.Equal(DeploymentStatus.Success, winner.Status);
// The instance must still be operable — proves no orphaned actor / no
// half-created child holding the name.
actor.Tell(new DisableInstanceCommand("cmd-1", "RapidPump", DateTimeOffset.UtcNow));
var disable = ExpectMsg(TimeSpan.FromSeconds(5));
Assert.True(disable.Success);
}
[Fact]
public async Task Redeploy_ExistingInstance_DoesNotOverCountDeployedInstances()
{
var health = new CountCapturingHealthCollector();
var actor = CreateDeploymentManager(health);
await Task.Delay(500);
// Deploy once.
actor.Tell(new DeployInstanceCommand(
"dep-1", "CountPump", "h1", MakeConfigJson("CountPump"), "admin", DateTimeOffset.UtcNow));
ExpectMsg(TimeSpan.FromSeconds(5));
await Task.Delay(500);
// Redeploy several times.
for (var i = 2; i <= 4; i++)
{
actor.Tell(new DeployInstanceCommand(
$"dep-{i}", "CountPump", $"h{i}", MakeConfigJson("CountPump"), "admin", DateTimeOffset.UtcNow));
ExpectMsg(TimeSpan.FromSeconds(10));
await Task.Delay(500);
}
// Storage uses UPSERT — exactly one deployed config row should exist.
var configs = await _storage.GetAllDeployedConfigsAsync();
Assert.Single(configs, c => c.InstanceUniqueName == "CountPump");
// The reported deployed count must be exactly 1 — a redeploy is an update,
// not a new instance, so the in-memory counter must not drift upward.
Assert.Equal(1, health.LastDeployedCount);
}
}