using Shouldly; using Xunit; using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime; namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests.Runtime; /// /// Tests for 's state machine + backoff. Each /// scenario drives the supervisor with controllable reopen/replay callbacks and /// observes the resulting state transitions. /// public sealed class ReconnectSupervisorTests { private const int WaitMs = 2_000; private static ReconnectOptions FastBackoff() => new(InitialBackoffOverride: TimeSpan.FromMilliseconds(5), MaxBackoffOverride: TimeSpan.FromMilliseconds(20)); [Fact] public void InitialState_IsHealthy() { using var sup = new ReconnectSupervisor(_ => Task.CompletedTask, _ => Task.CompletedTask, FastBackoff()); sup.CurrentState.ShouldBe(ReconnectSupervisor.State.Healthy); sup.IsDegraded.ShouldBeFalse(); sup.LastError.ShouldBeNull(); } [Fact] public async Task ReportTransportFailure_DrivesThroughReopenReplay_BackToHealthy() { var transitions = new List(); var lockObj = new object(); using var sup = new ReconnectSupervisor( reopen: _ => Task.CompletedTask, replay: _ => Task.CompletedTask, options: FastBackoff()); sup.StateChanged += (_, t) => { lock (lockObj) transitions.Add(t); }; sup.ReportTransportFailure(new IOException("transport drop")); await sup.WaitForHealthyAsync(new CancellationTokenSource(WaitMs).Token); // Expected sequence: Healthy → TransportLost → Reopening → Replaying → Healthy. IReadOnlyList snapshot; lock (lockObj) snapshot = [.. transitions]; var states = snapshot.Select(t => t.Next).ToArray(); states.ShouldContain(ReconnectSupervisor.State.TransportLost); states.ShouldContain(ReconnectSupervisor.State.Reopening); states.ShouldContain(ReconnectSupervisor.State.Replaying); states[^1].ShouldBe(ReconnectSupervisor.State.Healthy); sup.IsDegraded.ShouldBeFalse(); } [Fact] public async Task ReopenFailure_RetriesUntilSuccess_StaysInReopeningBetweenAttempts() { var attempts = 0; using var sup = new ReconnectSupervisor( reopen: _ => { attempts++; return attempts < 3 ? Task.FromException(new IOException("nope")) : Task.CompletedTask; }, replay: _ => Task.CompletedTask, options: FastBackoff()); sup.ReportTransportFailure(new IOException("kick off")); await sup.WaitForHealthyAsync(new CancellationTokenSource(WaitMs).Token); attempts.ShouldBe(3); sup.CurrentState.ShouldBe(ReconnectSupervisor.State.Healthy); sup.LastError.ShouldBeNull(); // cleared on Healthy transition } [Fact] public async Task ReplayFailure_RetriesEntireCycle() { var reopens = 0; var replays = 0; using var sup = new ReconnectSupervisor( reopen: _ => { reopens++; return Task.CompletedTask; }, replay: _ => { replays++; return replays < 2 ? Task.FromException(new IOException("replay nope")) : Task.CompletedTask; }, options: FastBackoff()); sup.ReportTransportFailure(new IOException("kick off")); await sup.WaitForHealthyAsync(new CancellationTokenSource(WaitMs).Token); // First cycle: reopen succeeds, replay fails. Second cycle: both succeed. reopens.ShouldBe(2); replays.ShouldBe(2); sup.CurrentState.ShouldBe(ReconnectSupervisor.State.Healthy); } [Fact] public async Task RepeatedFailureReports_DuringRecovery_DoNotSpawnParallelLoops() { var attempts = 0; using var sup = new ReconnectSupervisor( reopen: async ct => { attempts++; await Task.Delay(50, ct).ConfigureAwait(false); }, replay: _ => Task.CompletedTask, options: FastBackoff()); sup.ReportTransportFailure(new IOException("first")); // Fire several more reports while reopen is in flight. for (var i = 0; i < 5; i++) { sup.ReportTransportFailure(new IOException($"rapid-{i}")); } await sup.WaitForHealthyAsync(new CancellationTokenSource(WaitMs).Token); // One Reopen call regardless of how many failures arrived during recovery. attempts.ShouldBe(1); } [Fact] public async Task LastError_ReflectsMostRecentFailureCause() { using var sup = new ReconnectSupervisor( reopen: _ => Task.FromException(new IOException("reopen broke")), replay: _ => Task.CompletedTask, options: new ReconnectOptions( InitialBackoffOverride: TimeSpan.FromMilliseconds(5), MaxBackoffOverride: TimeSpan.FromMilliseconds(10))); sup.ReportTransportFailure(new IOException("initial")); // Allow the loop to attempt at least twice. await Task.Delay(100); sup.LastError.ShouldNotBeNull(); sup.LastError.ShouldContain("reopen broke"); // updates from the loop's failed reopen attempts } [Fact] public async Task Dispose_CancelsRunningRecoveryLoop_Cleanly() { var cancelled = false; var sup = new ReconnectSupervisor( reopen: async ct => { try { await Task.Delay(10_000, ct).ConfigureAwait(false); } catch (OperationCanceledException) { cancelled = true; throw; } }, replay: _ => Task.CompletedTask, options: FastBackoff()); sup.ReportTransportFailure(new IOException("kick off")); await Task.Delay(50); // let the loop start the long reopen Should.NotThrow(() => sup.Dispose()); cancelled.ShouldBeTrue(); } [Fact] public void ReportTransportFailure_AfterDispose_Throws() { var sup = new ReconnectSupervisor(_ => Task.CompletedTask, _ => Task.CompletedTask, FastBackoff()); sup.Dispose(); Should.Throw(() => sup.ReportTransportFailure(new IOException("x"))); } [Fact] public async Task WaitForHealthy_ReturnsImmediately_WhenAlreadyHealthy() { using var sup = new ReconnectSupervisor(_ => Task.CompletedTask, _ => Task.CompletedTask, FastBackoff()); // No failure reported — should be Healthy from the start. var deadline = DateTime.UtcNow.AddMilliseconds(50); await sup.WaitForHealthyAsync(new CancellationTokenSource(50).Token); DateTime.UtcNow.ShouldBeLessThan(deadline.AddMilliseconds(100)); // returned promptly } }