lmxopcua/tests/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests/Runtime/ReconnectSupervisorTests.cs

using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;

namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests.Runtime;

/// <summary>
///     Tests for <see cref="ReconnectSupervisor"/>'s state machine + backoff. Each
///     scenario drives the supervisor with controllable reopen/replay callbacks and
///     observes the resulting state transitions.
/// </summary>
public sealed class ReconnectSupervisorTests
{
    private const int WaitMs = 2_000;

    private static ReconnectOptions FastBackoff() =>
        new(InitialBackoffOverride: TimeSpan.FromMilliseconds(5),
            MaxBackoffOverride: TimeSpan.FromMilliseconds(20));

    [Fact]
    public void InitialState_IsHealthy()
    {
        using var sup = new ReconnectSupervisor(_ => Task.CompletedTask, _ => Task.CompletedTask, FastBackoff());
        sup.CurrentState.ShouldBe(ReconnectSupervisor.State.Healthy);
        sup.IsDegraded.ShouldBeFalse();
        sup.LastError.ShouldBeNull();
    }

    [Fact]
    public async Task ReportTransportFailure_DrivesThroughReopenReplay_BackToHealthy()
    {
        var transitions = new List<StateTransition>();
        var lockObj = new object();

        using var sup = new ReconnectSupervisor(
            reopen: _ => Task.CompletedTask,
            replay: _ => Task.CompletedTask,
            options: FastBackoff());

        sup.StateChanged += (_, t) => { lock (lockObj) transitions.Add(t); };

        sup.ReportTransportFailure(new IOException("transport drop"));
        await sup.WaitForHealthyAsync(new CancellationTokenSource(WaitMs).Token);

        // Expected sequence: Healthy → TransportLost → Reopening → Replaying → Healthy.
        IReadOnlyList<StateTransition> snapshot;
        lock (lockObj) snapshot = [.. transitions];
        var states = snapshot.Select(t => t.Next).ToArray();

        states.ShouldContain(ReconnectSupervisor.State.TransportLost);
        states.ShouldContain(ReconnectSupervisor.State.Reopening);
        states.ShouldContain(ReconnectSupervisor.State.Replaying);
        states[^1].ShouldBe(ReconnectSupervisor.State.Healthy);
        sup.IsDegraded.ShouldBeFalse();
    }

    [Fact]
    public async Task ReopenFailure_RetriesUntilSuccess_StaysInReopeningBetweenAttempts()
    {
        var attempts = 0;
        using var sup = new ReconnectSupervisor(
            reopen: _ => { attempts++; return attempts < 3 ? Task.FromException(new IOException("nope")) : Task.CompletedTask; },
            replay: _ => Task.CompletedTask,
            options: FastBackoff());

        sup.ReportTransportFailure(new IOException("kick off"));
        await sup.WaitForHealthyAsync(new CancellationTokenSource(WaitMs).Token);

        attempts.ShouldBe(3);
        sup.CurrentState.ShouldBe(ReconnectSupervisor.State.Healthy);
        sup.LastError.ShouldBeNull(); // cleared on Healthy transition
    }

    [Fact]
    public async Task ReplayFailure_RetriesEntireCycle()
    {
        var reopens = 0;
        var replays = 0;
        using var sup = new ReconnectSupervisor(
            reopen: _ => { reopens++; return Task.CompletedTask; },
            replay: _ => { replays++; return replays < 2 ? Task.FromException(new IOException("replay nope")) : Task.CompletedTask; },
            options: FastBackoff());

        sup.ReportTransportFailure(new IOException("kick off"));
        await sup.WaitForHealthyAsync(new CancellationTokenSource(WaitMs).Token);

        // First cycle: reopen succeeds, replay fails. Second cycle: both succeed.
        reopens.ShouldBe(2);
        replays.ShouldBe(2);
        sup.CurrentState.ShouldBe(ReconnectSupervisor.State.Healthy);
    }

    [Fact]
    public async Task RepeatedFailureReports_DuringRecovery_DoNotSpawnParallelLoops()
    {
        var attempts = 0;
        using var sup = new ReconnectSupervisor(
            reopen: async ct =>
            {
                attempts++;
                await Task.Delay(50, ct).ConfigureAwait(false);
            },
            replay: _ => Task.CompletedTask,
            options: FastBackoff());

        sup.ReportTransportFailure(new IOException("first"));
        // Fire several more reports while reopen is in flight.
        for (var i = 0; i < 5; i++)
        {
            sup.ReportTransportFailure(new IOException($"rapid-{i}"));
        }

        await sup.WaitForHealthyAsync(new CancellationTokenSource(WaitMs).Token);

        // One Reopen call regardless of how many failures arrived during recovery.
        attempts.ShouldBe(1);
    }

    [Fact]
    public async Task LastError_ReflectsMostRecentFailureCause()
    {
        using var sup = new ReconnectSupervisor(
            reopen: _ => Task.FromException(new IOException("reopen broke")),
            replay: _ => Task.CompletedTask,
            options: new ReconnectOptions(
                InitialBackoffOverride: TimeSpan.FromMilliseconds(5),
                MaxBackoffOverride: TimeSpan.FromMilliseconds(10)));

        sup.ReportTransportFailure(new IOException("initial"));

        // Allow the loop to attempt at least twice.
        await Task.Delay(100);
        sup.LastError.ShouldNotBeNull();
        sup.LastError.ShouldContain("reopen broke"); // updates from the loop's failed reopen attempts
    }

    [Fact]
    public async Task Dispose_CancelsRunningRecoveryLoop_Cleanly()
    {
        var cancelled = false;
        var sup = new ReconnectSupervisor(
            reopen: async ct =>
            {
                try { await Task.Delay(10_000, ct).ConfigureAwait(false); }
                catch (OperationCanceledException) { cancelled = true; throw; }
            },
            replay: _ => Task.CompletedTask,
            options: FastBackoff());

        sup.ReportTransportFailure(new IOException("kick off"));
        await Task.Delay(50); // let the loop start the long reopen
        Should.NotThrow(() => sup.Dispose());
        cancelled.ShouldBeTrue();
    }

    [Fact]
    public void ReportTransportFailure_AfterDispose_Throws()
    {
        var sup = new ReconnectSupervisor(_ => Task.CompletedTask, _ => Task.CompletedTask, FastBackoff());
        sup.Dispose();
        Should.Throw<ObjectDisposedException>(() => sup.ReportTransportFailure(new IOException("x")));
    }

    [Fact]
    public async Task WaitForHealthy_ReturnsImmediately_WhenAlreadyHealthy()
    {
        using var sup = new ReconnectSupervisor(_ => Task.CompletedTask, _ => Task.CompletedTask, FastBackoff());
        // No failure reported — should be Healthy from the start.
        var deadline = DateTime.UtcNow.AddMilliseconds(50);
        await sup.WaitForHealthyAsync(new CancellationTokenSource(50).Token);
        DateTime.UtcNow.ShouldBeLessThan(deadline.AddMilliseconds(100)); // returned promptly
    }
}