PR 5.7 — Reconnect / disruption parity scenarios

- Reinitialize_returns_both_backends_to_Healthy — drives ReinitializeAsync on each backend, asserts DriverState.Healthy afterwards, then re-reads a 3-tag sample to confirm the runtime surface is back. Recovery latency isn't pinned tightly (legacy = pipe + MxAccess COM client, mxgw = re-Register gw session — different cadences are expected). - Health_state_diverges_only_when_one_backend_is_in_recovery — soft pin that both backends sit in Healthy or Degraded after init. A tighter fault-injection scenario (toxiproxy-style) is the 5.7 follow-up — landed when the parity rig grows that capability. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 16:29:44 -04:00
parent 8d042c631b
commit 80a0ca2651
1 changed files with 69 additions and 0 deletions
--- a/tests/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.ParityTests/ReconnectParityTests.cs
+++ b/tests/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.ParityTests/ReconnectParityTests.cs
@@ -0,0 +1,69 @@
+using Shouldly;
+using Xunit;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+
+namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.ParityTests;
+
+/// <summary>
+///     PR 5.7 — Reconnect / disruption parity. After <see cref="IDriver.ReinitializeAsync"/>
+///     both backends must return to <see cref="DriverState.Healthy"/> and continue serving
+///     reads against the same Galaxy. Recovery time isn't pinned tightly because the
+///     legacy proxy reconnects the named pipe + Galaxy.Host's MxAccess client while the
+///     mxgw driver re-Registers the gateway session — different latencies are expected,
+///     but both must converge.
+/// </summary>
+[Trait("Category", "ParityE2E")]
+[Collection(nameof(ParityCollection))]
+public sealed class ReconnectParityTests
+{
+    private readonly ParityHarness _h;
+    public ReconnectParityTests(ParityHarness h) => _h = h;
+
+    [Fact]
+    public async Task Reinitialize_returns_both_backends_to_Healthy()
+    {
+        _h.RequireBoth();
+
+        // Capture an initial read off both backends so we have a comparison baseline.
+        var b = new RecordingAddressSpaceBuilder();
+        await ((ITagDiscovery)_h.LegacyDriver!).DiscoverAsync(b, CancellationToken.None);
+        var sample = b.Variables.Take(3).Select(v => v.AttributeInfo.FullName).ToArray();
+        if (sample.Length == 0) Assert.Skip("dev Galaxy has no discoverable variables");
+
+        await _h.RunOnAvailableAsync(async (driver, ct) =>
+        {
+            await driver.ReinitializeAsync(driverConfigJson: "{}", ct);
+            var health = driver.GetHealth();
+            health.State.ShouldBe(DriverState.Healthy,
+                $"{driver.DriverType} must return to Healthy after Reinitialize");
+            return health.State;
+        }, CancellationToken.None);
+
+        // Reads must continue to succeed after reinit on both sides.
+        var reads = await _h.RunOnAvailableAsync(
+            (driver, ct) => ((IReadable)driver).ReadAsync(sample, ct),
+            CancellationToken.None);
+
+        reads[ParityHarness.Backend.LegacyHost].Count.ShouldBe(sample.Length);
+        reads[ParityHarness.Backend.MxGateway].Count.ShouldBe(sample.Length);
+    }
+
+    [Fact]
+    public async Task Health_state_diverges_only_when_one_backend_is_in_recovery()
+    {
+        _h.RequireBoth();
+
+        var legacyHealth = _h.LegacyDriver!.GetHealth().State;
+        var mxgwHealth = _h.MxGatewayDriver!.GetHealth().State;
+
+        // Both backends were Healthy at end of InitializeAsync. If either has gone
+        // Degraded, that's a real issue — surface it directly.
+        legacyHealth.ShouldBeOneOf(DriverState.Healthy, DriverState.Degraded);
+        mxgwHealth.ShouldBeOneOf(DriverState.Healthy, DriverState.Degraded);
+
+        // For now we don't pin them to be identical because the supervisor's
+        // sampling cadence differs between backends. The 5.7 follow-up scenario
+        // (when we introduce a toxiproxy-style fault injection) tightens this.
+        await Task.CompletedTask;
+    }
+}