fix(probe): Galaxy gRPC ping — drop invalid Retry, treat MxGatewayAuth exceptions as reachable (live /run)
v2-ci / build (push) Failing after 44s
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests) (push) Has been skipped
v2-ci / build (push) Failing after 44s
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests) (push) Has been skipped
Two bugs caught by live verification against the mxaccessgw at 10.100.0.48:5120: - MaxAttempts=1 produced an invalid Polly RetryStrategyOptions -> the probe failed on every real gateway. Removed the Retry override (matches GalaxyDriver); fail-fast is already guaranteed by the TCP preflight + the per-call deadline. - A rejected key surfaces as a typed MxGatewayAuthenticationException, not a raw RpcException, so 'auth-rejection = reachable' was bypassed. Catch the typed auth/ authorization exceptions -> Ok=true. Adds DriverProbeHandshakeE2eTests: direct-probe, skip-gated cross-protocol green/red discrimination (Modbus, OpcUaClient, Galaxy + a local real OPC UA server).
This commit is contained in:
@@ -107,10 +107,20 @@ key string in the transient config (possibly empty or unresolved) is used as-is.
|
||||
|
||||
- `Unavailable` / transport failure → `Ok=false` (gateway is down or unreachable).
|
||||
- `Unauthenticated` / `PermissionDenied` → **`Ok=true`**,
|
||||
`"gateway reachable & speaking gRPC; auth not checked"` — an auth rejection
|
||||
`"gateway reachable & speaking gRPC (auth not checked)"` — an auth rejection
|
||||
proves a live mxaccessgw gRPC server. This is the correct result: the driver's
|
||||
own session-layer will handle auth; the probe is testing reachability only.
|
||||
|
||||
The mxaccessgw client surfaces a rejected key as a typed
|
||||
`MxGatewayAuthenticationException` / `MxGatewayAuthorizationException`, **not** a
|
||||
raw `RpcException` — the probe catches both and maps them to the reachable result
|
||||
above. (Live verification on `10.100.0.48:5120` with no key returns
|
||||
`MxGatewayAuthenticationException("Missing or invalid API key.")` → `Ok=true`.)
|
||||
|
||||
> **Config note:** `UseTls` must match the endpoint scheme — `UseTls:false` for an
|
||||
> `http://` (h2c) gateway, `UseTls:true` for `https://`. A mismatch fails the
|
||||
> client's own validation (the same constraint the Galaxy driver enforces).
|
||||
|
||||
---
|
||||
|
||||
## Live-verify scope
|
||||
|
||||
@@ -101,6 +101,15 @@ public sealed class GalaxyDriverProbe : IDriverProbe
|
||||
var (ok, message) = ClassifyRpc(ex.StatusCode, host, port);
|
||||
return new(ok, message, ok ? sw.Elapsed : null);
|
||||
}
|
||||
catch (Exception ex) when (ex is MxGatewayAuthenticationException or MxGatewayAuthorizationException)
|
||||
{
|
||||
// The gateway authenticated/authorized our call and rejected the (unresolved /
|
||||
// placeholder) key — the mxaccessgw client surfaces this as a typed exception, NOT a
|
||||
// raw RpcException. It still PROVES a live gateway gRPC server answered, so auth
|
||||
// rejection counts as reachable (the probe never resolves the real secret).
|
||||
sw.Stop();
|
||||
return new(true, "gateway reachable & speaking gRPC (auth not checked)", sw.Elapsed);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
// The caller cancelled (their own timeout / shutdown) — surface a timeout message.
|
||||
@@ -169,9 +178,11 @@ public sealed class GalaxyDriverProbe : IDriverProbe
|
||||
CaCertificatePath = gw.CaCertificatePath,
|
||||
ConnectTimeout = budget,
|
||||
DefaultCallTimeout = budget,
|
||||
// One shot — the probe must not spin on transient (Unavailable/DeadlineExceeded)
|
||||
// retries; the linked deadline above bounds the whole call regardless.
|
||||
Retry = new MxGatewayClientRetryOptions { MaxAttempts = 1 },
|
||||
// Leave Retry at the client default (as GalaxyDriver does) — an explicit
|
||||
// MaxAttempts=1 maps to 0 Polly retries, which Polly rejects as an invalid
|
||||
// RetryStrategyOptions. Fast-fail is already guaranteed: the TCP preflight rejects
|
||||
// unreachable hosts before the gRPC call, and the linked deadline caps the call to
|
||||
// the probe budget regardless of retries.
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,180 @@
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.AbCip;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Modbus;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.S7;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.IntegrationTests;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 5 live verification that the real protocol-handshake Test-Connect probes actually
|
||||
/// discriminate a speaking device from a merely-TCP-reachable one. Each probe is exercised
|
||||
/// DIRECTLY (no cluster harness / SQL needed) against the shared docker-host sims, skip-gated
|
||||
/// on reachability so <c>dotnet test</c> stays clean on a machine without fixture access.
|
||||
///
|
||||
/// <para>The decisive assertions are the cross-protocol RED cases: pointing a probe at a
|
||||
/// DIFFERENT protocol's open port (which accepts TCP but does not speak the probe's protocol)
|
||||
/// must now read <c>Ok = false</c> — the exact false-green bug Phase 5 fixes. Before Phase 5
|
||||
/// every one of these read a false-healthy green.</para>
|
||||
///
|
||||
/// <para>S7 (<c>:1102</c>) and AbCip (<c>:44818</c>) happy-path verification skips unless those
|
||||
/// fixtures are up (<c>lmxopcua-fix up s7 s7_1500</c> / <c>up abcip controllogix</c>); they are
|
||||
/// unit-proven + code-reviewed. AbLegacy / TwinCAT / FOCAS have no rig target and are
|
||||
/// unit-proven + degrade-guarded only (see <c>docs/drivers/TestConnectProbes.md</c>).</para>
|
||||
/// </summary>
|
||||
[Trait("Category", "Integration")]
|
||||
[Trait("Phase", "5-probes")]
|
||||
public sealed class DriverProbeHandshakeE2eTests
|
||||
{
|
||||
private const string DockerHost = "10.100.0.35";
|
||||
private const int ModbusPort = 5020; // pymodbus sim — speaks Modbus
|
||||
private const int OpcUaPort = 50000; // opc-plc — speaks OPC UA
|
||||
private const int S7Port = 1102;
|
||||
private const int AbCipPort = 44818;
|
||||
private const string GalaxyHost = "10.100.0.48";
|
||||
private const int GalaxyPort = 5120; // mxaccessgw — speaks gRPC
|
||||
|
||||
// Local docker-dev rig (on the dev host): a REAL OPC UA server + a real non-OPC-UA server.
|
||||
private const int LocalOpcUaPort = 4840; // central-1 OtOpcUa OPC UA server — speaks OPC UA
|
||||
private const int LocalSqlPort = 14330; // SQL Server — accepts TCP, speaks neither OPC UA nor gRPC
|
||||
|
||||
private static readonly TimeSpan Timeout = TimeSpan.FromSeconds(10);
|
||||
private static CancellationToken Ct => TestContext.Current.CancellationToken;
|
||||
|
||||
private static void SkipUnless(string host, int port)
|
||||
{
|
||||
// Generous timeout: the first connect from a cold test process (JIT + DNS warmup) can
|
||||
// exceed the 500 ms default, and these targets may be a VPN hop away.
|
||||
if (!DockerFixtureAvailability.IsReachable(host, port, 3000))
|
||||
Assert.Skip($"Fixture {host}:{port} unreachable — skipping live handshake check.");
|
||||
}
|
||||
|
||||
// ---- Modbus : FC03 handshake ----
|
||||
|
||||
[Fact]
|
||||
public async Task Modbus_Green_AgainstModbusSim()
|
||||
{
|
||||
SkipUnless(DockerHost, ModbusPort);
|
||||
var result = await new ModbusDriverProbe().ProbeAsync(
|
||||
$"{{\"Host\":\"{DockerHost}\",\"Port\":{ModbusPort}}}", Timeout, Ct);
|
||||
result.Ok.ShouldBeTrue($"Probe message: {result.Message}");
|
||||
result.Message!.ShouldContain("Modbus FC03");
|
||||
result.Latency.ShouldNotBeNull();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Modbus_Red_AgainstNonModbusPort()
|
||||
{
|
||||
// The OPC UA port accepts TCP but does not speak Modbus — must NOT read green.
|
||||
SkipUnless(DockerHost, OpcUaPort);
|
||||
var result = await new ModbusDriverProbe().ProbeAsync(
|
||||
$"{{\"Host\":\"{DockerHost}\",\"Port\":{OpcUaPort}}}", Timeout, Ct);
|
||||
result.Ok.ShouldBeFalse("A non-Modbus TCP server must not pass the FC03 handshake.");
|
||||
}
|
||||
|
||||
// ---- OpcUaClient : GetEndpoints handshake ----
|
||||
|
||||
[Fact]
|
||||
public async Task OpcUaClient_Green_AgainstOpcPlc()
|
||||
{
|
||||
SkipUnless(DockerHost, OpcUaPort);
|
||||
var result = await new OpcUaClientDriverProbe().ProbeAsync(
|
||||
$"{{\"EndpointUrl\":\"opc.tcp://{DockerHost}:{OpcUaPort}\"}}", Timeout, Ct);
|
||||
result.Ok.ShouldBeTrue($"Probe message: {result.Message}");
|
||||
result.Message!.ShouldContain("OPC UA");
|
||||
result.Latency.ShouldNotBeNull();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task OpcUaClient_Red_AgainstNonOpcUaPort()
|
||||
{
|
||||
// The Modbus port accepts TCP but does not speak OPC UA — must NOT read green.
|
||||
SkipUnless(DockerHost, ModbusPort);
|
||||
var result = await new OpcUaClientDriverProbe().ProbeAsync(
|
||||
$"{{\"EndpointUrl\":\"opc.tcp://{DockerHost}:{ModbusPort}\"}}", Timeout, Ct);
|
||||
result.Ok.ShouldBeFalse("A non-OPC-UA TCP server must not pass the GetEndpoints handshake.");
|
||||
}
|
||||
|
||||
// ---- Galaxy : gRPC ping (auth-rejection = reachable) ----
|
||||
|
||||
[Fact]
|
||||
public async Task Galaxy_Green_AgainstGateway()
|
||||
{
|
||||
SkipUnless(GalaxyHost, GalaxyPort);
|
||||
// No API key supplied — an Unauthenticated reply still proves a live mxaccessgw gRPC server.
|
||||
// UseTls:false matches the dev gateway's http2-cleartext endpoint (mirrors the dev config).
|
||||
var result = await new GalaxyDriverProbe().ProbeAsync(
|
||||
$"{{\"Gateway\":{{\"Endpoint\":\"http://{GalaxyHost}:{GalaxyPort}\",\"UseTls\":false}}}}", Timeout, Ct);
|
||||
result.Ok.ShouldBeTrue($"Probe message: {result.Message}");
|
||||
result.Latency.ShouldNotBeNull();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Galaxy_Red_AgainstNonGrpcPort()
|
||||
{
|
||||
// The Modbus port accepts TCP but does not speak gRPC — must NOT read green.
|
||||
SkipUnless(DockerHost, ModbusPort);
|
||||
var result = await new GalaxyDriverProbe().ProbeAsync(
|
||||
$"{{\"Gateway\":{{\"Endpoint\":\"http://{DockerHost}:{ModbusPort}\",\"UseTls\":false}}}}", Timeout, Ct);
|
||||
result.Ok.ShouldBeFalse("A non-gRPC TCP server must not pass the gateway gRPC handshake.");
|
||||
}
|
||||
|
||||
// ---- Local docker-dev rig: real OPC UA server (central-1) vs a real non-OPC-UA server ----
|
||||
|
||||
[Fact]
|
||||
public async Task OpcUaClient_Green_AgainstLocalOtOpcUaServer()
|
||||
{
|
||||
SkipUnless("127.0.0.1", LocalOpcUaPort);
|
||||
var result = await new OpcUaClientDriverProbe().ProbeAsync(
|
||||
$"{{\"EndpointUrl\":\"opc.tcp://127.0.0.1:{LocalOpcUaPort}\"}}", Timeout, Ct);
|
||||
result.Ok.ShouldBeTrue($"Probe message: {result.Message}");
|
||||
result.Message!.ShouldContain("OPC UA");
|
||||
result.Latency.ShouldNotBeNull();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task OpcUaClient_Red_AgainstLocalNonOpcUaServer()
|
||||
{
|
||||
// SQL Server accepts TCP but does not speak OPC UA — the false-green bug Phase 5 fixes.
|
||||
SkipUnless("127.0.0.1", LocalSqlPort);
|
||||
var result = await new OpcUaClientDriverProbe().ProbeAsync(
|
||||
$"{{\"EndpointUrl\":\"opc.tcp://127.0.0.1:{LocalSqlPort}\"}}", Timeout, Ct);
|
||||
result.Ok.ShouldBeFalse("A SQL Server (non-OPC-UA) must not pass the GetEndpoints handshake.");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Modbus_Red_AgainstLocalNonModbusServer()
|
||||
{
|
||||
// SQL Server accepts TCP but does not speak Modbus.
|
||||
SkipUnless("127.0.0.1", LocalSqlPort);
|
||||
var result = await new ModbusDriverProbe().ProbeAsync(
|
||||
$"{{\"Host\":\"127.0.0.1\",\"Port\":{LocalSqlPort}}}", Timeout, Ct);
|
||||
result.Ok.ShouldBeFalse("A SQL Server (non-Modbus) must not pass the FC03 handshake.");
|
||||
}
|
||||
|
||||
// ---- S7 : Plc.OpenAsync handshake (skips unless the sim fixture is up) ----
|
||||
|
||||
[Fact]
|
||||
public async Task S7_Green_AgainstSim()
|
||||
{
|
||||
SkipUnless(DockerHost, S7Port);
|
||||
var result = await new S7DriverProbe().ProbeAsync(
|
||||
$"{{\"Host\":\"{DockerHost}\",\"Port\":{S7Port},\"CpuType\":\"S71500\",\"Rack\":0,\"Slot\":1}}", Timeout, Ct);
|
||||
result.Ok.ShouldBeTrue($"Probe message: {result.Message}");
|
||||
result.Message!.ShouldContain("S7 connected");
|
||||
}
|
||||
|
||||
// ---- AbCip : libplctag CIP session handshake (skips unless the sim fixture is up) ----
|
||||
|
||||
[Fact]
|
||||
public async Task AbCip_Green_AgainstSim()
|
||||
{
|
||||
SkipUnless(DockerHost, AbCipPort);
|
||||
var result = await new AbCipDriverProbe().ProbeAsync(
|
||||
$"{{\"Devices\":[{{\"HostAddress\":\"ab://{DockerHost}:{AbCipPort}/1,0\"}}]}}", Timeout, Ct);
|
||||
result.Ok.ShouldBeTrue($"Probe message: {result.Message}");
|
||||
result.Message!.ShouldContain("CIP session OK");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user