Files
ScadaBridge/tests/ScadaLink.SiteRuntime.Tests/Actors/DeploymentManagerRedeployTests.cs
T
Joseph Doherty f936f55f51 fix(concurrency): close 8 race / thread-safety findings across CD, DCL, SR
CD-015: rewrite NotificationOutboxRepository.InsertIfNotExistsAsync as raw-SQL
IF NOT EXISTS … INSERT with SqlException 2601/2627 catch, ending the
at-least-once livelock on the site→central notification handoff.

DCL-018/019/020/021/022: add _subscribesInFlight guard so concurrent
same-tag subscribes don't orphan an adapter handle; delete the latent
dead _subscriptionHandles dictionary; stop double-counting
_totalSubscribed when an unresolved tag is promoted via another instance;
release adapter handles on mid-flight unsubscribe; gate the
tag-resolution retry timer with IsTimerActive so subscribe bursts don't
reset it into starvation.

SR-020: add _terminatingActorsByName shadow so a third deploy arriving
during a pending redeploy doesn't crash on InvalidActorNameException —
displaced senders get a Failed/superseded response and the latest
command wins on Terminated.

SR-024: split OperationTrackingStore reads from writes (fresh
SqliteConnection per GetStatusAsync) so long writes don't block status
queries; rewrite Dispose to drop the sync-over-async bridge that could
deadlock on a non-reentrant SyncContext; Interlocked.Exchange makes the
dispose-once flag race-safe across both paths.
2026-05-28 05:20:13 -04:00

218 lines
10 KiB
C#

using Akka.Actor;
using Akka.TestKit.Xunit2;
using Microsoft.Extensions.Logging.Abstractions;
using ScadaLink.Commons.Messages.Deployment;
using ScadaLink.Commons.Messages.Health;
using ScadaLink.Commons.Messages.Lifecycle;
using ScadaLink.Commons.Types.Enums;
using ScadaLink.Commons.Types.Flattening;
using ScadaLink.HealthMonitoring;
using ScadaLink.SiteRuntime.Actors;
using ScadaLink.SiteRuntime.Persistence;
using ScadaLink.SiteRuntime.Scripts;
using System.Text.Json;
namespace ScadaLink.SiteRuntime.Tests.Actors;
/// <summary>
/// Regression tests for SiteRuntime-003: redeployment of an existing instance must
/// wait for the terminating Instance Actor before recreating the child, instead of
/// relying on a fixed 500 ms reschedule that can collide on the child actor name.
/// </summary>
public class DeploymentManagerRedeployTests : TestKit, IDisposable
{
private readonly SiteStorageService _storage;
private readonly ScriptCompilationService _compilationService;
private readonly SharedScriptLibrary _sharedScriptLibrary;
private readonly string _dbFile;
public DeploymentManagerRedeployTests()
{
_dbFile = Path.Combine(Path.GetTempPath(), $"dm-redeploy-test-{Guid.NewGuid():N}.db");
_storage = new SiteStorageService(
$"Data Source={_dbFile}",
NullLogger<SiteStorageService>.Instance);
_storage.InitializeAsync().GetAwaiter().GetResult();
_compilationService = new ScriptCompilationService(
NullLogger<ScriptCompilationService>.Instance);
_sharedScriptLibrary = new SharedScriptLibrary(
_compilationService, NullLogger<SharedScriptLibrary>.Instance);
}
void IDisposable.Dispose()
{
Shutdown();
try { File.Delete(_dbFile); } catch { /* cleanup */ }
}
private IActorRef CreateDeploymentManager(ISiteHealthCollector? healthCollector = null)
{
return ActorOf(Props.Create(() => new DeploymentManagerActor(
_storage,
_compilationService,
_sharedScriptLibrary,
null,
new SiteRuntimeOptions(),
NullLogger<DeploymentManagerActor>.Instance,
null,
null,
healthCollector,
null)));
}
/// <summary>
/// Minimal fake that records the most recent deployed-instance count.
/// </summary>
private sealed class CountCapturingHealthCollector : ISiteHealthCollector
{
public int LastDeployedCount { get; private set; }
public void IncrementScriptError() { }
public void IncrementAlarmError() { }
public void IncrementDeadLetter() { }
public void IncrementSiteAuditWriteFailures() { }
public void IncrementAuditRedactionFailure() { }
public void UpdateSiteAuditBacklog(ScadaLink.Commons.Types.SiteAuditBacklogSnapshot snapshot) { }
public void UpdateConnectionHealth(string connectionName, ConnectionHealth health) { }
public void RemoveConnection(string connectionName) { }
public void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved) { }
public void UpdateConnectionEndpoint(string connectionName, string endpoint) { }
public void UpdateTagQuality(string connectionName, int good, int bad, int uncertain) { }
public void SetStoreAndForwardDepths(IReadOnlyDictionary<string, int> depths) { }
public void SetInstanceCounts(int deployed, int enabled, int disabled) => LastDeployedCount = deployed;
public void SetParkedMessageCount(int count) { }
public void SetNodeHostname(string hostname) { }
public void SetClusterNodes(IReadOnlyList<NodeStatus> nodes) { }
public void SetActiveNode(bool isActive) { }
public bool IsActiveNode => true;
public SiteHealthReport CollectReport(string siteId) => throw new NotSupportedException();
}
private static string MakeConfigJson(string instanceName)
{
var config = new FlattenedConfiguration
{
InstanceUniqueName = instanceName,
Attributes =
[
new ResolvedAttribute { CanonicalName = "TestAttr", Value = "1", DataType = "Int32" }
]
};
return JsonSerializer.Serialize(config);
}
[Fact]
public async Task Redeploy_ExistingInstance_SucceedsWithoutNameCollision()
{
var actor = CreateDeploymentManager();
await Task.Delay(500); // empty startup
// Initial deploy.
actor.Tell(new DeployInstanceCommand(
"dep-1", "RedeployPump", "h1", MakeConfigJson("RedeployPump"), "admin", DateTimeOffset.UtcNow));
var first = ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(5));
Assert.Equal(DeploymentStatus.Success, first.Status);
await Task.Delay(500);
// Redeploy the same instance — must replace the existing actor cleanly.
actor.Tell(new DeployInstanceCommand(
"dep-2", "RedeployPump", "h2", MakeConfigJson("RedeployPump"), "admin", DateTimeOffset.UtcNow));
var second = ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(10));
Assert.Equal(DeploymentStatus.Success, second.Status);
// The redeployed instance must still be operable (no orphaned/broken actor).
actor.Tell(new DisableInstanceCommand("cmd-1", "RedeployPump", DateTimeOffset.UtcNow));
var disable = ExpectMsg<InstanceLifecycleResponse>(TimeSpan.FromSeconds(5));
Assert.True(disable.Success);
}
[Fact]
public async Task SR020_ThreeRapidDeploys_DoNotThrowInvalidActorNameException_LatestWins()
{
// Regression test for SiteRuntime-020. The previous implementation tracked
// pending redeploys by IActorRef (_pendingRedeploys) but had no
// name-keyed shadow, so a third DeployInstanceCommand arriving WHILE the
// first redeploy's predecessor was still terminating saw
// _instanceActors.TryGetValue==false and fell through to
// ApplyDeployment → CreateInstanceActor → Context.ActorOf, which threw
// InvalidActorNameException because the child name was still registered
// until Terminated fires. The supervisor's Stop directive then silently
// dropped the deploy, leaving the deployer waiting forever and the
// persistence Task.Run dangling. After the fix, _terminatingActorsByName
// tracks the in-flight terminator by name; the third deploy overwrites
// the buffered pending command (last-write-wins) and tells the displaced
// sender it was superseded.
var actor = CreateDeploymentManager();
await Task.Delay(500);
// Initial deploy — establishes the running instance.
actor.Tell(new DeployInstanceCommand(
"dep-1", "RapidPump", "h1", MakeConfigJson("RapidPump"), "admin", DateTimeOffset.UtcNow));
var first = ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(5));
Assert.Equal(DeploymentStatus.Success, first.Status);
await Task.Delay(200);
// Two rapid redeploys before the predecessor has time to fully terminate.
// The second deploy stops the actor (watching it) and buffers itself.
// The third deploy arrives almost immediately and must NOT crash — it
// overwrites the buffered pending command and tells dep-2 it was superseded.
var probe2 = CreateTestProbe();
var probe3 = CreateTestProbe();
actor.Tell(new DeployInstanceCommand(
"dep-2", "RapidPump", "h2", MakeConfigJson("RapidPump"), "admin", DateTimeOffset.UtcNow),
probe2.Ref);
actor.Tell(new DeployInstanceCommand(
"dep-3", "RapidPump", "h3", MakeConfigJson("RapidPump"), "admin", DateTimeOffset.UtcNow),
probe3.Ref);
// dep-2 must be told it was superseded; dep-3 must succeed once the
// predecessor finishes terminating.
var superseded = probe2.ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(10));
Assert.Equal("dep-2", superseded.DeploymentId);
Assert.Equal(DeploymentStatus.Failed, superseded.Status);
Assert.NotNull(superseded.ErrorMessage);
Assert.Contains("superseded", superseded.ErrorMessage!, StringComparison.OrdinalIgnoreCase);
var winner = probe3.ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(10));
Assert.Equal("dep-3", winner.DeploymentId);
Assert.Equal(DeploymentStatus.Success, winner.Status);
// The instance must still be operable — proves no orphaned actor / no
// half-created child holding the name.
actor.Tell(new DisableInstanceCommand("cmd-1", "RapidPump", DateTimeOffset.UtcNow));
var disable = ExpectMsg<InstanceLifecycleResponse>(TimeSpan.FromSeconds(5));
Assert.True(disable.Success);
}
[Fact]
public async Task Redeploy_ExistingInstance_DoesNotOverCountDeployedInstances()
{
var health = new CountCapturingHealthCollector();
var actor = CreateDeploymentManager(health);
await Task.Delay(500);
// Deploy once.
actor.Tell(new DeployInstanceCommand(
"dep-1", "CountPump", "h1", MakeConfigJson("CountPump"), "admin", DateTimeOffset.UtcNow));
ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(5));
await Task.Delay(500);
// Redeploy several times.
for (var i = 2; i <= 4; i++)
{
actor.Tell(new DeployInstanceCommand(
$"dep-{i}", "CountPump", $"h{i}", MakeConfigJson("CountPump"), "admin", DateTimeOffset.UtcNow));
ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(10));
await Task.Delay(500);
}
// Storage uses UPSERT — exactly one deployed config row should exist.
var configs = await _storage.GetAllDeployedConfigsAsync();
Assert.Single(configs, c => c.InstanceUniqueName == "CountPump");
// The reported deployed count must be exactly 1 — a redeploy is an update,
// not a new instance, so the in-memory counter must not drift upward.
Assert.Equal(1, health.LastDeployedCount);
}
}