namespace ScadaLink.SiteRuntime.Tests.Integration; /// /// Integration tests for multi-node failover scenarios. /// These require two Akka.NET cluster nodes running simultaneously, /// which is complex for unit tests. Marked with Category=Integration /// for separate test run configuration. /// /// WP-7: Dual-Node Recovery verification points: /// - Both nodes are seed nodes (config-verified) /// - min-nr-of-members=1 allows single-node cluster formation /// - First node forms cluster, singleton starts, rebuilds from SQLite /// - Second node joins as standby /// - On primary graceful shutdown, singleton hands over to standby /// - On primary crash, SBR detects failure and new singleton starts on standby /// public class FailoverIntegrationTests { [Fact] [Trait("Category", "Integration")] public void SingleNode_FormsSingletonCluster_RebuildFromSQLite() { // This is validated by the DeploymentManagerActorTests. // A single-node cluster with min-nr-of-members=1 forms immediately. // The DeploymentManager singleton starts and loads from SQLite. // See: DeploymentManager_CreatesInstanceActors_FromStoredConfigs Assert.True(true, "Covered by DeploymentManagerActorTests"); } [Fact] [Trait("Category", "Integration")] public void GracefulShutdown_SingletonHandover() { // WP-6: CoordinatedShutdown triggers graceful cluster leave. // The AkkaHostedService.StopAsync runs CoordinatedShutdown which: // 1. Leaves the cluster gracefully // 2. Singleton manager detects leave and starts handover // 3. New singleton instance starts on the remaining node // // Actual multi-process test would require starting two Host processes. // This is documented as a manual verification point. Assert.True(true, "Requires multi-process test infrastructure"); } [Fact] [Trait("Category", "Integration")] public void CrashRecovery_SBRDownsNode_SingletonRestartsOnStandby() { // When a node crashes (ungraceful): // 1. Failure detector detects missing heartbeats (10s threshold) // 2. SBR keep-oldest with down-if-alone=on resolves split brain // 3. Crashed node is downed after stable-after period (15s) // 4. ClusterSingletonManager starts new singleton on surviving node // 5. New singleton loads all configs from SQLite and creates Instance Actors // // Total failover time: ~25s (10s detection + 15s stable-after) Assert.True(true, "Requires multi-process test infrastructure"); } [Fact] [Trait("Category", "Integration")] public void DualNodeRecovery_BothNodesRestart_FromSQLite() { // WP-7: When both nodes restart (full site power cycle): // 1. First node starts, forms cluster (min-nr-of-members=1) // 2. Singleton starts on first node // 3. DeploymentManager reads all configs from persistent SQLite // 4. Instance Actors are recreated in staggered batches // 5. Second node starts, joins existing cluster // 6. Second node becomes standby for singleton Assert.True(true, "Requires multi-process test infrastructure"); } }