Files
lmxopcua/tests/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.ParityTests/ParityHarness.cs
Joseph Doherty 5e890ec9d6 parity: triage 3 false-positives from first-rig run (2026-04-30)
After running the matrix end-to-end against the live rig for the
first time, three of the nine failures were false positives — bugs in
the harness and test invariants, not real backend deltas:

1. ParityHarness configured the legacy backend with
   OTOPCUA_GALAXY_BACKEND=db, which is Discover-only. Reads, writes,
   and reinits all returned "MXAccess code lift pending — DB-backed
   backend covers Discover only". Switched to mxaccess backend; the
   ZB connection string still drives the discovery path.

2. HistoryReadParityTests asserted "neither backend implements
   IHistoryProvider" — but the legacy GalaxyProxyDriver still does
   (it's an accepted back-compat delta retired in PR 7.2). The
   architectural pin we *want* is "the new path doesn't regress to
   per-driver history", so the test now asserts only the mxgw side.

3. AlarmTransitionParityTests strict-pinned the five sub-attribute
   refs (InAlarmRef, etc.) on the legacy condition. PR 2.1 added
   those refs specifically so the new mxgw driver could populate them
   via AlarmRefBuilder; legacy pre-dates PR 2.1 and leaves them null
   — that's correct, not a regression. Test now asserts a one-way
   invariant: when legacy populated a ref, mxgw must match. When
   legacy is null, mxgw is free to populate (the mxgw → server-side
   AlarmConditionService direction).

The six remaining failures are real:

- 2 from the gw-side `[]` array suffix (filed in
  mxaccessgw/requirements-array-suffix-fix.md)
- 2 write-StatusCode mapping deltas (0x80050000 vs 0x80020000) —
  Bad-status both ways but mapped to different OPC UA codes
- 1 event-rate ratio of 5x (mxgw dispatches 5x legacy in the same
  3s window)
- (Plus the 2 ScanState scenarios that skip cleanly — single-platform
  rig as documented)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 03:00:44 -04:00

291 lines
12 KiB
C#

using System.Diagnostics;
using System.Net.Sockets;
using System.Reflection;
using System.Security.Principal;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy;
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.ParityTests;
/// <summary>
/// Side-by-side fixture that drives both the legacy <see cref="GalaxyProxyDriver"/>
/// (talking to an out-of-process <c>OtOpcUa.Driver.Galaxy.Host.exe</c>) and the new
/// in-process <see cref="GalaxyDriver"/> (talking to a running <c>mxaccessgw</c>
/// gateway) against the same dev Galaxy. Phase 5 scenario tests use this harness
/// to capture comparable snapshots from each backend.
/// </summary>
/// <remarks>
/// Per-backend availability is independent — a developer running just the legacy
/// Galaxy.Host EXE without an mxaccessgw process up will see the legacy driver
/// resolve and the mxgw driver mark itself unavailable. Each test decides how to
/// handle partial availability:
/// <list type="bullet">
/// <item>Strict-parity tests call <see cref="RequireBoth"/> to skip when either side
/// is missing.</item>
/// <item>Single-backend smoke tests call <see cref="GetDriver"/> for the backend they
/// care about and skip with the recorded <c>SkipReason</c>.</item>
/// </list>
/// Endpoint overrides come from environment variables so dev VMs and the central
/// parity host can target the same suite without touching the test source:
/// <list type="bullet">
/// <item><c>OTOPCUA_PARITY_GW_ENDPOINT</c> — defaults to <c>http://localhost:5120</c>
/// (mxaccessgw <c>launchSettings.json</c> http profile).</item>
/// <item><c>OTOPCUA_PARITY_GW_API_KEY</c> — defaults to <c>parity-suite-key</c>.</item>
/// <item><c>OTOPCUA_PARITY_CLIENT_NAME</c> — defaults to <c>OtOpcUa-Parity</c>.</item>
/// </list>
/// </remarks>
public sealed class ParityHarness : IAsyncLifetime
{
public enum Backend { LegacyHost, MxGateway }
private const string LegacySecret = "parity-suite-secret";
private const string DefaultGwEndpoint = "http://localhost:5120";
private const string DefaultGwApiKey = "parity-suite-key";
private const string DefaultClientName = "OtOpcUa-Parity";
public IDriver? LegacyDriver { get; private set; }
public string? LegacySkipReason { get; private set; }
public IDriver? MxGatewayDriver { get; private set; }
public string? MxGatewaySkipReason { get; private set; }
private Process? _legacyHost;
public async ValueTask InitializeAsync()
{
if (!OperatingSystem.IsWindows())
{
LegacySkipReason = "Windows-only";
MxGatewaySkipReason = "Windows-only";
return;
}
await InitializeLegacyAsync();
await InitializeMxGatewayAsync();
}
public async ValueTask DisposeAsync()
{
// Independent teardown — failure on one side must not prevent the other from
// releasing its resources (esp. the legacy Host EXE subprocess).
if (LegacyDriver is not null)
{
try { await LegacyDriver.ShutdownAsync(CancellationToken.None); } catch { /* shutdown */ }
(LegacyDriver as IDisposable)?.Dispose();
LegacyDriver = null;
}
if (_legacyHost is not null && !_legacyHost.HasExited)
{
try { _legacyHost.Kill(entireProcessTree: true); } catch { /* ignore */ }
try { _legacyHost.WaitForExit(5_000); } catch { /* ignore */ }
}
_legacyHost?.Dispose();
_legacyHost = null;
if (MxGatewayDriver is not null)
{
try { await MxGatewayDriver.ShutdownAsync(CancellationToken.None); } catch { /* shutdown */ }
(MxGatewayDriver as IDisposable)?.Dispose();
MxGatewayDriver = null;
}
}
/// <summary>Skip the test if either backend isn't available — strict-parity scenarios.</summary>
public void RequireBoth()
{
if (LegacySkipReason is not null) Assert.Skip($"legacy backend unavailable: {LegacySkipReason}");
if (MxGatewaySkipReason is not null) Assert.Skip($"mxgateway backend unavailable: {MxGatewaySkipReason}");
}
/// <summary>Get a backend driver or skip if it's unavailable.</summary>
public IDriver GetDriver(Backend backend)
{
return backend switch
{
Backend.LegacyHost when LegacyDriver is not null => LegacyDriver,
Backend.LegacyHost => SkipAndThrow($"legacy backend unavailable: {LegacySkipReason}"),
Backend.MxGateway when MxGatewayDriver is not null => MxGatewayDriver,
Backend.MxGateway => SkipAndThrow($"mxgateway backend unavailable: {MxGatewaySkipReason}"),
_ => throw new ArgumentOutOfRangeException(nameof(backend), backend, null),
};
}
/// <summary>
/// Drive the same closure against every available backend. Tests use the
/// returned dictionary to diff snapshots — keys are the backends that
/// successfully resolved during <see cref="InitializeAsync"/>. If neither
/// resolved, the result is empty and the test should skip.
/// </summary>
public async Task<IReadOnlyDictionary<Backend, T>> RunOnAvailableAsync<T>(
Func<IDriver, CancellationToken, Task<T>> scenario, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(scenario);
var results = new Dictionary<Backend, T>();
if (LegacyDriver is not null)
{
results[Backend.LegacyHost] = await scenario(LegacyDriver, cancellationToken).ConfigureAwait(false);
}
if (MxGatewayDriver is not null)
{
results[Backend.MxGateway] = await scenario(MxGatewayDriver, cancellationToken).ConfigureAwait(false);
}
return results;
}
[System.Runtime.Versioning.SupportedOSPlatform("windows")]
private async Task InitializeLegacyAsync()
{
if (!await ZbReachableAsync())
{
LegacySkipReason = "Galaxy ZB SQL not reachable on localhost:1433";
return;
}
var hostExe = FindLegacyHostExe();
if (hostExe is null)
{
LegacySkipReason = "Galaxy.Host EXE not built — run `dotnet build src/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host`";
return;
}
var pipe = $"OtOpcUaGalaxyParity-{Guid.NewGuid():N}";
using var identity = WindowsIdentity.GetCurrent();
var sid = identity.User!.Value;
var psi = new ProcessStartInfo(hostExe)
{
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardOutput = true,
RedirectStandardError = true,
EnvironmentVariables =
{
["OTOPCUA_GALAXY_PIPE"] = pipe,
["OTOPCUA_ALLOWED_SID"] = sid,
["OTOPCUA_GALAXY_SECRET"] = LegacySecret,
// PR 5.W triage 2026-04-30: db-backend is Discover-only. The parity
// matrix needs Read / Write / Subscribe over a real MxAccess session,
// so use the mxaccess backend. ZB conn string is still consulted for
// the discovery path (the mxaccess backend layers MxAccess on top of
// the same DB).
["OTOPCUA_GALAXY_BACKEND"] = "mxaccess",
["OTOPCUA_GALAXY_ZB_CONN"] = "Server=localhost;Database=ZB;Integrated Security=True;TrustServerCertificate=True;Encrypt=False;",
},
};
try
{
_legacyHost = Process.Start(psi)
?? throw new InvalidOperationException("Failed to spawn Galaxy.Host EXE");
await Task.Delay(2_000); // PipeServer warm-up — ParityFixture's settled value
var driver = new GalaxyProxyDriver(new GalaxyProxyOptions
{
DriverInstanceId = "parity-legacy",
PipeName = pipe,
SharedSecret = LegacySecret,
ConnectTimeout = TimeSpan.FromSeconds(5),
});
await driver.InitializeAsync(driverConfigJson: "{}", CancellationToken.None);
LegacyDriver = driver;
}
catch (Exception ex)
{
LegacySkipReason = $"legacy backend boot failed: {ex.Message}";
if (_legacyHost is not null && !_legacyHost.HasExited)
{
try { _legacyHost.Kill(entireProcessTree: true); } catch { /* ignore */ }
}
}
}
private async Task InitializeMxGatewayAsync()
{
var endpoint = Environment.GetEnvironmentVariable("OTOPCUA_PARITY_GW_ENDPOINT") ?? DefaultGwEndpoint;
var apiKey = Environment.GetEnvironmentVariable("OTOPCUA_PARITY_GW_API_KEY") ?? DefaultGwApiKey;
var clientName = Environment.GetEnvironmentVariable("OTOPCUA_PARITY_CLIENT_NAME") ?? DefaultClientName;
if (!await GwReachableAsync(endpoint))
{
MxGatewaySkipReason = $"mxaccessgw not reachable at {endpoint}";
return;
}
var configJson = $$"""
{
"Gateway": {
"Endpoint": "{{endpoint}}",
"ApiKeySecretRef": "{{apiKey}}",
"UseTls": {{(endpoint.StartsWith("https") ? "true" : "false")}}
},
"MxAccess": { "ClientName": "{{clientName}}" }
}
""";
try
{
var driver = GalaxyDriverFactoryExtensions.CreateInstance("parity-mxgw", configJson);
await driver.InitializeAsync(configJson, CancellationToken.None);
MxGatewayDriver = driver;
}
catch (Exception ex)
{
MxGatewaySkipReason = $"mxgateway backend boot failed: {ex.GetType().Name}: {ex.Message}";
}
}
private static IDriver SkipAndThrow(string reason)
{
Assert.Skip(reason);
throw new UnreachableException(); // Assert.Skip throws SkipException; this satisfies the compiler
}
private static async Task<bool> ZbReachableAsync()
{
try
{
using var client = new TcpClient();
var task = client.ConnectAsync("localhost", 1433);
return await Task.WhenAny(task, Task.Delay(1_500)) == task && client.Connected;
}
catch { return false; }
}
private static async Task<bool> GwReachableAsync(string endpoint)
{
// Lightweight TCP probe — avoids spending the full gRPC connect timeout when the
// gateway just isn't running. We can't validate the API-key handshake here without
// doing a real RPC, so a successful TCP connect is the "available" signal and any
// auth/protocol failure surfaces during InitializeAsync below.
try
{
var uri = new Uri(endpoint, UriKind.Absolute);
using var client = new TcpClient();
var port = uri.Port > 0 ? uri.Port : (uri.Scheme == "https" ? 443 : 80);
var task = client.ConnectAsync(uri.Host, port);
return await Task.WhenAny(task, Task.Delay(1_500)) == task && client.Connected;
}
catch { return false; }
}
private static string? FindLegacyHostExe()
{
var asmDir = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location)!;
var solutionRoot = asmDir;
for (var i = 0; i < 8 && solutionRoot is not null; i++)
{
if (File.Exists(Path.Combine(solutionRoot, "ZB.MOM.WW.OtOpcUa.slnx"))) break;
solutionRoot = Path.GetDirectoryName(solutionRoot);
}
if (solutionRoot is null) return null;
var path = Path.Combine(solutionRoot,
"src", "ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host", "bin", "Debug", "net48",
"OtOpcUa.Driver.Galaxy.Host.exe");
return File.Exists(path) ? path : null;
}
}
[CollectionDefinition(nameof(ParityCollection))]
public sealed class ParityCollection : ICollectionFixture<ParityHarness> { }