fd618cf1dc
Remediation from the full per-module code review at 4307c381 (findings recorded
separately in code-reviews/).
Highs fixed:
- DeploymentManager-025/SiteRuntime-031: stop broadcasting notification lists + SMTP
configs (incl. credentials) to sites; site purges already-persisted rows on apply
(enforces the central-only delivery design; clears plaintext SMTP creds at rest).
- DataConnectionLayer-023: guard the native-alarm subscribe path against the
mid-flight-unsubscribe adapter-feed leak (mirrors the DCL-021 tag-path fix).
- SiteEventLogging-024: normalize From/To query bounds to UTC (the -016 fix the
audit trail claimed but never committed).
- KpiHistory-001: add an in-flight guard to the recorder sample tick.
- ScriptAnalysis-001: harden the trust analyzer's TPA-absent fallback (resolve
forbidden anchors in the minimal reference set; warn on degraded mode) — anchors
added to validation references only, never the compile gate.
(InboundAPI-026 left to the feat/ipsen-movein effort per owner decision.)
Medium/Low: DM-026 deterministic deploy-status tiebreaker; SR-027/028/029/030
native-alarm leak/phantom-active/delete-during-redeploy fixes; AL-013/014/016;
TE-024 (folder-mutation audit rows now persisted)/025; SF-025 gauge-provider
clear-on-stop; ESG-025/026; SEC-023/024/025; SCA-007/008/009; plus doc/test
accuracy COM-023/024, HOST-025/026, HM-024/025, NS-027/028.
Full-solution build 0 warnings; ~3560 tests across 18 touched suites green.
274 lines
13 KiB
C#
274 lines
13 KiB
C#
using Akka.Actor;
|
|
using Akka.TestKit.Xunit2;
|
|
using Microsoft.Extensions.Logging.Abstractions;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Deployment;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Health;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Lifecycle;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
|
|
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
|
|
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Actors;
|
|
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Persistence;
|
|
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
|
|
using System.Text.Json;
|
|
|
|
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.Actors;
|
|
|
|
/// <summary>
|
|
/// Regression tests for SiteRuntime-003: redeployment of an existing instance must
|
|
/// wait for the terminating Instance Actor before recreating the child, instead of
|
|
/// relying on a fixed 500 ms reschedule that can collide on the child actor name.
|
|
/// </summary>
|
|
public class DeploymentManagerRedeployTests : TestKit, IDisposable
|
|
{
|
|
private readonly SiteStorageService _storage;
|
|
private readonly ScriptCompilationService _compilationService;
|
|
private readonly SharedScriptLibrary _sharedScriptLibrary;
|
|
private readonly string _dbFile;
|
|
|
|
public DeploymentManagerRedeployTests()
|
|
{
|
|
_dbFile = Path.Combine(Path.GetTempPath(), $"dm-redeploy-test-{Guid.NewGuid():N}.db");
|
|
_storage = new SiteStorageService(
|
|
$"Data Source={_dbFile}",
|
|
NullLogger<SiteStorageService>.Instance);
|
|
_storage.InitializeAsync().GetAwaiter().GetResult();
|
|
_compilationService = new ScriptCompilationService(
|
|
NullLogger<ScriptCompilationService>.Instance);
|
|
_sharedScriptLibrary = new SharedScriptLibrary(
|
|
_compilationService, NullLogger<SharedScriptLibrary>.Instance);
|
|
}
|
|
|
|
void IDisposable.Dispose()
|
|
{
|
|
Shutdown();
|
|
try { File.Delete(_dbFile); } catch { /* cleanup */ }
|
|
}
|
|
|
|
private IActorRef CreateDeploymentManager(ISiteHealthCollector? healthCollector = null)
|
|
{
|
|
return ActorOf(Props.Create(() => new DeploymentManagerActor(
|
|
_storage,
|
|
_compilationService,
|
|
_sharedScriptLibrary,
|
|
null,
|
|
new SiteRuntimeOptions(),
|
|
NullLogger<DeploymentManagerActor>.Instance,
|
|
null,
|
|
null,
|
|
healthCollector,
|
|
null)));
|
|
}
|
|
|
|
/// <summary>
|
|
/// Minimal fake that records the most recent deployed-instance count.
|
|
/// </summary>
|
|
private sealed class CountCapturingHealthCollector : ISiteHealthCollector
|
|
{
|
|
public int LastDeployedCount { get; private set; }
|
|
public void IncrementScriptError() { }
|
|
public void IncrementAlarmError() { }
|
|
public void IncrementDeadLetter() { }
|
|
public void IncrementSiteAuditWriteFailures() { }
|
|
public void IncrementAuditRedactionFailure() { }
|
|
public void UpdateSiteAuditBacklog(ZB.MOM.WW.ScadaBridge.Commons.Types.SiteAuditBacklogSnapshot snapshot) { }
|
|
public void UpdateConnectionHealth(string connectionName, ConnectionHealth health) { }
|
|
public void RemoveConnection(string connectionName) { }
|
|
public void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved) { }
|
|
public void UpdateConnectionEndpoint(string connectionName, string endpoint) { }
|
|
public void UpdateTagQuality(string connectionName, int good, int bad, int uncertain) { }
|
|
public void SetStoreAndForwardDepths(IReadOnlyDictionary<string, int> depths) { }
|
|
public void SetInstanceCounts(int deployed, int enabled, int disabled) => LastDeployedCount = deployed;
|
|
public void SetParkedMessageCount(int count) { }
|
|
public void SetNodeHostname(string hostname) { }
|
|
public void SetClusterNodes(IReadOnlyList<NodeStatus> nodes) { }
|
|
public void SetActiveNode(bool isActive) { }
|
|
public bool IsActiveNode => true;
|
|
public SiteHealthReport CollectReport(string siteId) => throw new NotSupportedException();
|
|
}
|
|
|
|
private static string MakeConfigJson(string instanceName)
|
|
{
|
|
var config = new FlattenedConfiguration
|
|
{
|
|
InstanceUniqueName = instanceName,
|
|
Attributes =
|
|
[
|
|
new ResolvedAttribute { CanonicalName = "TestAttr", Value = "1", DataType = "Int32" }
|
|
]
|
|
};
|
|
return JsonSerializer.Serialize(config);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Redeploy_ExistingInstance_SucceedsWithoutNameCollision()
|
|
{
|
|
var actor = CreateDeploymentManager();
|
|
await Task.Delay(500); // empty startup
|
|
|
|
// Initial deploy.
|
|
actor.Tell(new DeployInstanceCommand(
|
|
"dep-1", "RedeployPump", "h1", MakeConfigJson("RedeployPump"), "admin", DateTimeOffset.UtcNow));
|
|
var first = ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(5));
|
|
Assert.Equal(DeploymentStatus.Success, first.Status);
|
|
await Task.Delay(500);
|
|
|
|
// Redeploy the same instance — must replace the existing actor cleanly.
|
|
actor.Tell(new DeployInstanceCommand(
|
|
"dep-2", "RedeployPump", "h2", MakeConfigJson("RedeployPump"), "admin", DateTimeOffset.UtcNow));
|
|
var second = ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(10));
|
|
Assert.Equal(DeploymentStatus.Success, second.Status);
|
|
|
|
// The redeployed instance must still be operable (no orphaned/broken actor).
|
|
actor.Tell(new DisableInstanceCommand("cmd-1", "RedeployPump", DateTimeOffset.UtcNow));
|
|
var disable = ExpectMsg<InstanceLifecycleResponse>(TimeSpan.FromSeconds(5));
|
|
Assert.True(disable.Success);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task SR020_ThreeRapidDeploys_DoNotThrowInvalidActorNameException_LatestWins()
|
|
{
|
|
// Regression test for SiteRuntime-020. The previous implementation tracked
|
|
// pending redeploys by IActorRef (_pendingRedeploys) but had no
|
|
// name-keyed shadow, so a third DeployInstanceCommand arriving WHILE the
|
|
// first redeploy's predecessor was still terminating saw
|
|
// _instanceActors.TryGetValue==false and fell through to
|
|
// ApplyDeployment → CreateInstanceActor → Context.ActorOf, which threw
|
|
// InvalidActorNameException because the child name was still registered
|
|
// until Terminated fires. The supervisor's Stop directive then silently
|
|
// dropped the deploy, leaving the deployer waiting forever and the
|
|
// persistence Task.Run dangling. After the fix, _terminatingActorsByName
|
|
// tracks the in-flight terminator by name; the third deploy overwrites
|
|
// the buffered pending command (last-write-wins) and tells the displaced
|
|
// sender it was superseded.
|
|
var actor = CreateDeploymentManager();
|
|
await Task.Delay(500);
|
|
|
|
// Initial deploy — establishes the running instance.
|
|
actor.Tell(new DeployInstanceCommand(
|
|
"dep-1", "RapidPump", "h1", MakeConfigJson("RapidPump"), "admin", DateTimeOffset.UtcNow));
|
|
var first = ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(5));
|
|
Assert.Equal(DeploymentStatus.Success, first.Status);
|
|
await Task.Delay(200);
|
|
|
|
// Two rapid redeploys before the predecessor has time to fully terminate.
|
|
// The second deploy stops the actor (watching it) and buffers itself.
|
|
// The third deploy arrives almost immediately and must NOT crash — it
|
|
// overwrites the buffered pending command and tells dep-2 it was superseded.
|
|
var probe2 = CreateTestProbe();
|
|
var probe3 = CreateTestProbe();
|
|
|
|
actor.Tell(new DeployInstanceCommand(
|
|
"dep-2", "RapidPump", "h2", MakeConfigJson("RapidPump"), "admin", DateTimeOffset.UtcNow),
|
|
probe2.Ref);
|
|
actor.Tell(new DeployInstanceCommand(
|
|
"dep-3", "RapidPump", "h3", MakeConfigJson("RapidPump"), "admin", DateTimeOffset.UtcNow),
|
|
probe3.Ref);
|
|
|
|
// dep-2 must be told it was superseded; dep-3 must succeed once the
|
|
// predecessor finishes terminating.
|
|
var superseded = probe2.ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(10));
|
|
Assert.Equal("dep-2", superseded.DeploymentId);
|
|
Assert.Equal(DeploymentStatus.Failed, superseded.Status);
|
|
Assert.NotNull(superseded.ErrorMessage);
|
|
Assert.Contains("superseded", superseded.ErrorMessage!, StringComparison.OrdinalIgnoreCase);
|
|
|
|
var winner = probe3.ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(10));
|
|
Assert.Equal("dep-3", winner.DeploymentId);
|
|
Assert.Equal(DeploymentStatus.Success, winner.Status);
|
|
|
|
// The instance must still be operable — proves no orphaned actor / no
|
|
// half-created child holding the name.
|
|
actor.Tell(new DisableInstanceCommand("cmd-1", "RapidPump", DateTimeOffset.UtcNow));
|
|
var disable = ExpectMsg<InstanceLifecycleResponse>(TimeSpan.FromSeconds(5));
|
|
Assert.True(disable.Success);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task SR029_DeleteDuringPendingRedeploy_InstanceStaysDeleted_AndCounterIsCorrect()
|
|
{
|
|
// Regression test for SiteRuntime-029. A delete arriving WHILE a redeploy is
|
|
// still terminating used to: (1) over-decrement _totalDeployedCount, and
|
|
// (2) leave the buffered _pendingRedeploys entry intact — so when Terminated
|
|
// fired, HandleTerminated called ApplyDeployment(isRedeploy: true) and
|
|
// RESURRECTED the just-deleted instance (re-creating the actor and re-writing
|
|
// the deployed-config SQLite row). After the fix, HandleDelete is authoritative
|
|
// over the mid-redeploy bookkeeping: it cancels the pending redeploy (telling
|
|
// the displaced deployer it was superseded), clears the terminating shadow, and
|
|
// decrements the counter exactly once.
|
|
var health = new CountCapturingHealthCollector();
|
|
var actor = CreateDeploymentManager(health);
|
|
await Task.Delay(500);
|
|
|
|
// Establish the running instance.
|
|
actor.Tell(new DeployInstanceCommand(
|
|
"dep-1", "RaceTarget", "h1", MakeConfigJson("RaceTarget"), "admin", DateTimeOffset.UtcNow));
|
|
var first = ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(5));
|
|
Assert.Equal(DeploymentStatus.Success, first.Status);
|
|
await Task.Delay(300);
|
|
|
|
// Fire a redeploy immediately followed by a delete. Both queue on the
|
|
// singleton mailbox: HandleDeploy runs first (removes from _instanceActors,
|
|
// watches + stops the predecessor, buffers the redeploy, sets the terminating
|
|
// shadow), then HandleDelete runs while the predecessor is still terminating
|
|
// (Terminated has not fired) — exactly the SiteRuntime-029 window.
|
|
var redeployProbe = CreateTestProbe();
|
|
actor.Tell(new DeployInstanceCommand(
|
|
"dep-2", "RaceTarget", "h2", MakeConfigJson("RaceTarget"), "admin", DateTimeOffset.UtcNow),
|
|
redeployProbe.Ref);
|
|
actor.Tell(new DeleteInstanceCommand("del-1", "RaceTarget", DateTimeOffset.UtcNow));
|
|
|
|
// The delete succeeds...
|
|
var delete = ExpectMsg<InstanceLifecycleResponse>(TimeSpan.FromSeconds(10));
|
|
Assert.True(delete.Success);
|
|
|
|
// ...and the displaced redeploy is told it was superseded (not silently lost).
|
|
var superseded = redeployProbe.ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(10));
|
|
Assert.Equal("dep-2", superseded.DeploymentId);
|
|
Assert.Equal(DeploymentStatus.Failed, superseded.Status);
|
|
Assert.Contains("superseded", superseded.ErrorMessage!, StringComparison.OrdinalIgnoreCase);
|
|
|
|
// Give the predecessor's Terminated signal time to fire — it must NOT
|
|
// resurrect the deleted instance.
|
|
await Task.Delay(1000);
|
|
|
|
// The instance stays deleted: no deployed-config row remains.
|
|
var configs = await _storage.GetAllDeployedConfigsAsync();
|
|
Assert.DoesNotContain(configs, c => c.InstanceUniqueName == "RaceTarget");
|
|
|
|
// The deployed count is back to 0 — neither over-decremented nor resurrected.
|
|
Assert.Equal(0, health.LastDeployedCount);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Redeploy_ExistingInstance_DoesNotOverCountDeployedInstances()
|
|
{
|
|
var health = new CountCapturingHealthCollector();
|
|
var actor = CreateDeploymentManager(health);
|
|
await Task.Delay(500);
|
|
|
|
// Deploy once.
|
|
actor.Tell(new DeployInstanceCommand(
|
|
"dep-1", "CountPump", "h1", MakeConfigJson("CountPump"), "admin", DateTimeOffset.UtcNow));
|
|
ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(5));
|
|
await Task.Delay(500);
|
|
|
|
// Redeploy several times.
|
|
for (var i = 2; i <= 4; i++)
|
|
{
|
|
actor.Tell(new DeployInstanceCommand(
|
|
$"dep-{i}", "CountPump", $"h{i}", MakeConfigJson("CountPump"), "admin", DateTimeOffset.UtcNow));
|
|
ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(10));
|
|
await Task.Delay(500);
|
|
}
|
|
|
|
// Storage uses UPSERT — exactly one deployed config row should exist.
|
|
var configs = await _storage.GetAllDeployedConfigsAsync();
|
|
Assert.Single(configs, c => c.InstanceUniqueName == "CountPump");
|
|
|
|
// The reported deployed count must be exactly 1 — a redeploy is an update,
|
|
// not a new instance, so the in-memory counter must not drift upward.
|
|
Assert.Equal(1, health.LastDeployedCount);
|
|
}
|
|
}
|