using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests.Runtime;
///
/// Tests for 's state machine + backoff. Each
/// scenario drives the supervisor with controllable reopen/replay callbacks and
/// observes the resulting state transitions.
///
public sealed class ReconnectSupervisorTests
{
private const int WaitMs = 2_000;
private static ReconnectOptions FastBackoff() =>
new(InitialBackoffOverride: TimeSpan.FromMilliseconds(5),
MaxBackoffOverride: TimeSpan.FromMilliseconds(20));
[Fact]
public void InitialState_IsHealthy()
{
using var sup = new ReconnectSupervisor(_ => Task.CompletedTask, _ => Task.CompletedTask, FastBackoff());
sup.CurrentState.ShouldBe(ReconnectSupervisor.State.Healthy);
sup.IsDegraded.ShouldBeFalse();
sup.LastError.ShouldBeNull();
}
[Fact]
public async Task ReportTransportFailure_DrivesThroughReopenReplay_BackToHealthy()
{
var transitions = new List();
var lockObj = new object();
using var sup = new ReconnectSupervisor(
reopen: _ => Task.CompletedTask,
replay: _ => Task.CompletedTask,
options: FastBackoff());
sup.StateChanged += (_, t) => { lock (lockObj) transitions.Add(t); };
sup.ReportTransportFailure(new IOException("transport drop"));
await sup.WaitForHealthyAsync(new CancellationTokenSource(WaitMs).Token);
// Expected sequence: Healthy → TransportLost → Reopening → Replaying → Healthy.
IReadOnlyList snapshot;
lock (lockObj) snapshot = [.. transitions];
var states = snapshot.Select(t => t.Next).ToArray();
states.ShouldContain(ReconnectSupervisor.State.TransportLost);
states.ShouldContain(ReconnectSupervisor.State.Reopening);
states.ShouldContain(ReconnectSupervisor.State.Replaying);
states[^1].ShouldBe(ReconnectSupervisor.State.Healthy);
sup.IsDegraded.ShouldBeFalse();
}
[Fact]
public async Task ReopenFailure_RetriesUntilSuccess_StaysInReopeningBetweenAttempts()
{
var attempts = 0;
using var sup = new ReconnectSupervisor(
reopen: _ => { attempts++; return attempts < 3 ? Task.FromException(new IOException("nope")) : Task.CompletedTask; },
replay: _ => Task.CompletedTask,
options: FastBackoff());
sup.ReportTransportFailure(new IOException("kick off"));
await sup.WaitForHealthyAsync(new CancellationTokenSource(WaitMs).Token);
attempts.ShouldBe(3);
sup.CurrentState.ShouldBe(ReconnectSupervisor.State.Healthy);
sup.LastError.ShouldBeNull(); // cleared on Healthy transition
}
[Fact]
public async Task ReplayFailure_RetriesEntireCycle()
{
var reopens = 0;
var replays = 0;
using var sup = new ReconnectSupervisor(
reopen: _ => { reopens++; return Task.CompletedTask; },
replay: _ => { replays++; return replays < 2 ? Task.FromException(new IOException("replay nope")) : Task.CompletedTask; },
options: FastBackoff());
sup.ReportTransportFailure(new IOException("kick off"));
await sup.WaitForHealthyAsync(new CancellationTokenSource(WaitMs).Token);
// First cycle: reopen succeeds, replay fails. Second cycle: both succeed.
reopens.ShouldBe(2);
replays.ShouldBe(2);
sup.CurrentState.ShouldBe(ReconnectSupervisor.State.Healthy);
}
[Fact]
public async Task RepeatedFailureReports_DuringRecovery_DoNotSpawnParallelLoops()
{
var attempts = 0;
using var sup = new ReconnectSupervisor(
reopen: async ct =>
{
attempts++;
await Task.Delay(50, ct).ConfigureAwait(false);
},
replay: _ => Task.CompletedTask,
options: FastBackoff());
sup.ReportTransportFailure(new IOException("first"));
// Fire several more reports while reopen is in flight.
for (var i = 0; i < 5; i++)
{
sup.ReportTransportFailure(new IOException($"rapid-{i}"));
}
await sup.WaitForHealthyAsync(new CancellationTokenSource(WaitMs).Token);
// One Reopen call regardless of how many failures arrived during recovery.
attempts.ShouldBe(1);
}
[Fact]
public async Task LastError_ReflectsMostRecentFailureCause()
{
using var sup = new ReconnectSupervisor(
reopen: _ => Task.FromException(new IOException("reopen broke")),
replay: _ => Task.CompletedTask,
options: new ReconnectOptions(
InitialBackoffOverride: TimeSpan.FromMilliseconds(5),
MaxBackoffOverride: TimeSpan.FromMilliseconds(10)));
sup.ReportTransportFailure(new IOException("initial"));
// Allow the loop to attempt at least twice.
await Task.Delay(100);
sup.LastError.ShouldNotBeNull();
sup.LastError.ShouldContain("reopen broke"); // updates from the loop's failed reopen attempts
}
[Fact]
public async Task Dispose_CancelsRunningRecoveryLoop_Cleanly()
{
var cancelled = false;
var sup = new ReconnectSupervisor(
reopen: async ct =>
{
try { await Task.Delay(10_000, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { cancelled = true; throw; }
},
replay: _ => Task.CompletedTask,
options: FastBackoff());
sup.ReportTransportFailure(new IOException("kick off"));
await Task.Delay(50); // let the loop start the long reopen
Should.NotThrow(() => sup.Dispose());
cancelled.ShouldBeTrue();
}
[Fact]
public void ReportTransportFailure_AfterDispose_Throws()
{
var sup = new ReconnectSupervisor(_ => Task.CompletedTask, _ => Task.CompletedTask, FastBackoff());
sup.Dispose();
Should.Throw(() => sup.ReportTransportFailure(new IOException("x")));
}
[Fact]
public async Task WaitForHealthy_ReturnsImmediately_WhenAlreadyHealthy()
{
using var sup = new ReconnectSupervisor(_ => Task.CompletedTask, _ => Task.CompletedTask, FastBackoff());
// No failure reported — should be Healthy from the start.
var deadline = DateTime.UtcNow.AddMilliseconds(50);
await sup.WaitForHealthyAsync(new CancellationTokenSource(50).Token);
DateTime.UtcNow.ShouldBeLessThan(deadline.AddMilliseconds(100)); // returned promptly
}
}