diff --git a/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/TwinCATDriverProbe.cs b/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/TwinCATDriverProbe.cs
index e77d4dc2..fedf29cf 100644
--- a/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/TwinCATDriverProbe.cs
+++ b/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/TwinCATDriverProbe.cs
@@ -2,22 +2,65 @@ using System.Diagnostics;
using System.Net.Sockets;
using System.Text.Json;
using System.Text.Json.Serialization;
+using TwinCAT.Ads;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Driver.TwinCAT;
///
-/// Cheap TCP-connect probe for the -shaped driver config.
-/// Opens a socket to the first device's AMS router host (first four octets of the AMS Net ID)
-/// on the AMS port from the address and closes immediately. Surfaces a green tick + latency
-/// on success; red chip + SocketError on failure; "timed out" on the caller's cancellation.
-/// Does NOT exchange any ADS bytes — a richer ADS-state probe is a documented follow-up.
+/// Two-phase, degrade-guarded Test-Connect probe for the -shaped
+/// driver config. Phase 1: a bare TCP connect to the first device's AMS router host (first four
+/// octets of the AMS Net ID) on the AMS port — fast rejection of unreachable targets. Phase 2:
+/// a real ADS handshake — AdsClient.Connect(netId, port) + ReadStateAsync — to
+/// confirm the endpoint speaks ADS and report the controller's run-state, not just that a TCP
+/// socket opened.
+///
+/// Outcome classification — three cases:
+///
+/// -
+/// ADS connected + ReadState OK → Ok=true, message "ADS state: {AdsState}"
+/// (e.g. "Run" / "Config" / "Stop"), with latency.
+///
+/// -
+/// Route/auth rejection from a reachable router — an
+/// (or a non-success ReadStateAsync result) whose means the
+/// router answered but won't let us in (e.g. ,
+/// , ,
+/// ) → Ok=false, message
+/// "Reachable at {host}:{port} but ADS handshake failed: {code} — check the target's ADS
+/// route table authorizes this host". This is a TRUE red: the driver itself also needs
+/// the route, so a green tick here would be a false positive.
+///
+/// -
+/// Handshake could not be ATTEMPTED on this host — the managed AMS router cannot run
+/// headless (Beckhoff's AdsClient.Connect throws a server exception
+/// "Check for a running TwinCAT router instance!"), or a
+/// / / /
+/// surfaces, or ReadStateAsync reports a client-side
+/// port-not-open status → DEGRADE: Ok=true, message
+/// "Reachable at {host}:{port} (ADS handshake unavailable on this host — TCP reachability
+/// only)", with latency. The probe NEVER produces a result worse than the old TCP-only
+/// probe.
+///
+///
+///
///
///
-/// AMS Net ID format is six dot-separated octets (e.g. 192.168.1.10.1.1); the first
-/// four are typically the host IPv4 address by Beckhoff convention, but the AMS router
-/// resolves the real IP route server-side. The probe uses the first-four-octet heuristic
+/// The line between case 2 (device-rejected → RED) and case 3 (can't-attempt → DEGRADE) is the
+/// crux. Classification rests on the exception's identity: only an
+/// (or a result ) carrying a route/target-port code is a RED — that is the
+/// ADS router answering and refusing the route. Beckhoff's TwinCAT.Ads.Server.AdsServerException
+/// ("running TwinCAT router instance!") derives from plain , NOT from
+/// , so it is correctly classified as "can't attempt → DEGRADE".
+/// When genuinely ambiguous, the probe DEGRADES (Ok=true, TCP-only note) rather than emit a false RED.
+///
+/// AMS Net ID format is six dot-separated octets (e.g. 192.168.1.10.1.1); the first four
+/// are typically the host IPv4 address by Beckhoff convention. The AMS router resolves the real IP
+/// route server-side; the probe uses the first-four-octet heuristic for the TCP preflight target,
/// which is reliable for the overwhelming majority of production deployments.
+///
+/// The probe is read-only — ReadStateAsync never mutates PLC state — and always disposes the
+/// .
///
public sealed class TwinCATDriverProbe : IDriverProbe
{
@@ -27,6 +70,22 @@ public sealed class TwinCATDriverProbe : IDriverProbe
UnmappedMemberHandling = JsonUnmappedMemberHandling.Skip,
};
+ ///
+ /// AMS error codes that mean "the router answered but refused the route / target port" — a
+ /// genuine RED. The driver itself would also be denied, so a green tick would be a false
+ /// positive. Everything NOT in this set (client-side port/connection errors, sync timeouts,
+ /// router-not-initialised, etc.) is treated as "couldn't attempt the handshake" → DEGRADE.
+ ///
+ private static readonly HashSet _routeRejectCodes =
+ [
+ AdsErrorCode.TargetPortNotFound,
+ AdsErrorCode.TargetMachineNotFound,
+ AdsErrorCode.PortNotConnected,
+ AdsErrorCode.PortDisabled,
+ AdsErrorCode.AccessDenied,
+ AdsErrorCode.DeviceAccessDenied,
+ ];
+
///
public string DriverType => "TwinCAT";
@@ -38,23 +97,23 @@ public sealed class TwinCATDriverProbe : IDriverProbe
catch (Exception ex) { return new(false, $"Config JSON is invalid: {ex.Message}", null); }
if (opts is null) return new(false, "Config JSON deserialized to null.", null);
- var (host, port) = ExtractTarget(opts);
- if (string.IsNullOrWhiteSpace(host) || port <= 0)
+ var (host, port, parsed) = ExtractTarget(opts);
+ if (parsed is null || string.IsNullOrWhiteSpace(host) || port <= 0)
return new(false, "Config has no host/port to probe.", null);
+ // Phase 1: bare TCP preflight — fast rejection for unreachable hosts. Messages here are
+ // UNCHANGED from the original TCP-only probe.
var sw = Stopwatch.StartNew();
try
{
using var socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
await socket.ConnectAsync(host, port, ct);
- sw.Stop();
- return new(true, null, sw.Elapsed);
}
catch (SocketException ex)
{
return new(false, $"Connect failed: {ex.SocketErrorCode}", null);
}
- catch (OperationCanceledException)
+ catch (OperationCanceledException) when (ct.IsCancellationRequested)
{
return new(false, $"Probe timed out after {timeout.TotalSeconds:F0}s.", null);
}
@@ -62,23 +121,86 @@ public sealed class TwinCATDriverProbe : IDriverProbe
{
return new(false, ex.Message, null);
}
+
+ // Phase 2: real ADS handshake. Connect + ReadStateAsync. The crux is the three-way
+ // classification of how this can fail — see the class-doc and ClassifyHandshakeFailure.
+ var degradeNote = $"Reachable at {host}:{port} (ADS handshake unavailable on this host — TCP reachability only)";
+ try
+ {
+ using var client = new AdsClient();
+ // Bound the ADS round-trip by the caller's timeout (clamped >=1s, mirrors AdsTwinCATClient).
+ client.Timeout = (int)Math.Max(1_000, timeout.TotalMilliseconds);
+
+ // Connect can throw a server exception ("running TwinCAT router instance!") on a headless
+ // host with no AMS router — that is a can't-attempt DEGRADE, classified below.
+ client.Connect(AmsNetId.Parse(parsed.NetId), parsed.Port);
+
+ var state = await client.ReadStateAsync(ct).ConfigureAwait(false);
+ sw.Stop();
+
+ if (state.Succeeded)
+ return new(true, $"ADS state: {state.State.AdsState}", sw.Elapsed);
+
+ // Non-throwing failure carried in the result's error code.
+ return state.ErrorCode == AdsErrorCode.ClientPortNotOpen
+ ? new(true, degradeNote, sw.Elapsed) // client never opened — DEGRADE
+ : ClassifyHandshakeFailure(state.ErrorCode, host, port, sw, degradeNote);
+ }
+ catch (OperationCanceledException) when (ct.IsCancellationRequested)
+ {
+ // Caller timeout — keep the original timed-out message.
+ return new(false, $"Probe timed out after {timeout.TotalSeconds:F0}s.", null);
+ }
+ catch (AdsErrorException ex)
+ {
+ // The router answered with an ADS-level error. Route/auth rejection → RED; anything
+ // else (sync timeout, client port issues, …) → DEGRADE.
+ return ClassifyHandshakeFailure(ex.ErrorCode, host, port, sw, degradeNote);
+ }
+ catch (Exception)
+ {
+ // Everything else — TwinCAT.Ads.Server.AdsServerException ("running TwinCAT router
+ // instance!"), PlatformNotSupportedException, TypeInitializationException,
+ // DllNotFoundException, NotSupportedException, etc. — means the handshake could not be
+ // ATTEMPTED on this host. DEGRADE: never worse than the TCP-only probe.
+ sw.Stop();
+ return new(true, degradeNote, sw.Elapsed);
+ }
}
- private static (string host, int port) ExtractTarget(TwinCATDriverOptions opts)
+ ///
+ /// Classifies an ADS-level failure (from an or a non-success
+ /// ReadStateAsync result). A route/target-port/access code means the router answered
+ /// but refused the route → RED. Any other code is treated as "couldn't attempt" → DEGRADE,
+ /// so the probe never under-reports a host with no usable ADS runtime.
+ ///
+ private static DriverProbeResult ClassifyHandshakeFailure(
+ AdsErrorCode code, string host, int port, Stopwatch sw, string degradeNote)
+ {
+ if (_routeRejectCodes.Contains(code))
+ return new(false,
+ $"Reachable at {host}:{port} but ADS handshake failed: {code} — check the target's ADS route table authorizes this host",
+ null);
+
+ sw.Stop();
+ return new(true, degradeNote, sw.Elapsed);
+ }
+
+ private static (string host, int port, TwinCATAmsAddress? parsed) ExtractTarget(TwinCATDriverOptions opts)
{
// Parse the first device's ads:// address. AMS Net ID is six-octet; by Beckhoff
// convention the first four octets are the host IPv4. Extract those as the TCP target.
var firstDevice = opts.Devices.FirstOrDefault();
- if (firstDevice is null) return (string.Empty, 0);
+ if (firstDevice is null) return (string.Empty, 0, null);
var parsed = TwinCATAmsAddress.TryParse(firstDevice.HostAddress);
- if (parsed is null) return (string.Empty, 0);
+ if (parsed is null) return (string.Empty, 0, null);
// NetId = "a.b.c.d.e.f" — take the first 4 octets as the host IP.
var parts = parsed.NetId.Split('.');
- if (parts.Length < 4) return (string.Empty, 0);
+ if (parts.Length < 4) return (string.Empty, 0, null);
var hostIp = string.Join('.', parts[0], parts[1], parts[2], parts[3]);
- return (hostIp, parsed.Port);
+ return (hostIp, parsed.Port, parsed);
}
}
diff --git a/tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Tests/TwinCATDriverProbeTests.cs b/tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Tests/TwinCATDriverProbeTests.cs
new file mode 100644
index 00000000..8d9a8c23
--- /dev/null
+++ b/tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Tests/TwinCATDriverProbeTests.cs
@@ -0,0 +1,222 @@
+// Unit tests for TwinCATDriverProbe — the degrade-guarded ADS ReadState Test-Connect probe.
+//
+// Coverage here is the set of outcomes that are *deterministic on a CI/dev box with no TwinCAT
+// runtime*: invalid JSON, missing host/port, an unreachable TCP target, and — the crux — the
+// DEGRADE path. On this box there is no AMS router, so once the TCP preflight succeeds against a
+// reachable listener the managed ADS client cannot initialise (it throws a "Check for a running
+// TwinCAT router instance!" server exception, NOT an ADS device rejection). The probe MUST treat
+// that as "TCP reachability only" (Ok=true) so it never regresses below the old TCP-only probe.
+//
+// The two paths that REQUIRE a live ADS target are LIVE-VERIFY DEFERRED (no TwinCAT runtime on the
+// rig): (a) the happy "ADS state: Run/Config/Stop" path (Ok=true with the AdsState name), and
+// (b) the route/auth-reject RED path (a reachable router that refuses the route, surfaced as an
+// AdsErrorException carrying a TargetPortNotFound / route AdsErrorCode → Ok=false with the
+// "check the target's ADS route table" message). Those are exercised against a real TwinCAT XAR /
+// remote runtime, out of scope for this unit suite.
+
+using System.Net;
+using System.Net.Sockets;
+using Shouldly;
+using Xunit;
+
+namespace ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Tests;
+
+[Trait("Category", "Unit")]
+public sealed class TwinCATDriverProbeTests
+{
+ private static readonly TwinCATDriverProbe Probe = new();
+
+ // -------------------------------------------------------------------------
+ // 1. Invalid JSON
+ // -------------------------------------------------------------------------
+
+ /// Invalid JSON returns Ok=false with a message containing "invalid".
+ [Fact]
+ public async Task InvalidJson_Returns_OkFalse_WithInvalidMessage()
+ {
+ var result = await Probe.ProbeAsync(
+ "not-valid-json{{{",
+ TimeSpan.FromSeconds(3),
+ TestContext.Current.CancellationToken);
+
+ result.Ok.ShouldBeFalse();
+ result.Message.ShouldNotBeNull();
+ result.Message!.ToLowerInvariant().ShouldContain("invalid");
+ result.Latency.ShouldBeNull();
+ }
+
+ // -------------------------------------------------------------------------
+ // 2. Config with no device / no host
+ // -------------------------------------------------------------------------
+
+ /// An empty Devices list returns Ok=false with a "no host/port" message.
+ [Fact]
+ public async Task NoDevices_Returns_OkFalse_WithNoHostPortMessage()
+ {
+ var result = await Probe.ProbeAsync(
+ "{\"devices\":[]}",
+ TimeSpan.FromSeconds(3),
+ TestContext.Current.CancellationToken);
+
+ result.Ok.ShouldBeFalse();
+ result.Message.ShouldNotBeNull();
+ result.Message!.ShouldContain("no host/port");
+ result.Latency.ShouldBeNull();
+ }
+
+ ///
+ /// A first device with a malformed (non-ads://) host address returns Ok=false with a
+ /// "no host/port" message because returns null.
+ ///
+ [Fact]
+ public async Task MalformedHostAddress_Returns_OkFalse_WithNoHostPortMessage()
+ {
+ var result = await Probe.ProbeAsync(
+ "{\"devices\":[{\"hostAddress\":\"not-an-ads-url\"}]}",
+ TimeSpan.FromSeconds(3),
+ TestContext.Current.CancellationToken);
+
+ result.Ok.ShouldBeFalse();
+ result.Message.ShouldNotBeNull();
+ result.Message!.ShouldContain("no host/port");
+ result.Latency.ShouldBeNull();
+ }
+
+ // -------------------------------------------------------------------------
+ // 3. Unreachable target — TCP preflight fails first
+ // -------------------------------------------------------------------------
+
+ ///
+ /// A closed port causes the TCP preflight ConnectAsync to throw
+ /// ; the probe returns Ok=false with "Connect failed".
+ /// The port is reserved then released so the OS refuses the connection immediately.
+ ///
+ [Fact]
+ public async Task ClosedPort_Returns_OkFalse_WithConnectFailedMessage()
+ {
+ // Reserve an ephemeral port then release it so the port is definitely closed.
+ var reserved = new TcpListener(IPAddress.Loopback, 0);
+ reserved.Start();
+ var port = ((IPEndPoint)reserved.LocalEndpoint).Port;
+ reserved.Stop();
+
+ // 127.0.0.1 as the first four AMS-Net-ID octets → the probe's TCP target is 127.0.0.1.
+ var configJson = $"{{\"devices\":[{{\"hostAddress\":\"ads://127.0.0.1.1.1:{port}\"}}]}}";
+
+ using var cts = CancellationTokenSource.CreateLinkedTokenSource(
+ TestContext.Current.CancellationToken);
+ cts.CancelAfter(TimeSpan.FromSeconds(5));
+
+ var result = await Probe.ProbeAsync(
+ configJson,
+ TimeSpan.FromSeconds(3),
+ cts.Token);
+
+ result.Ok.ShouldBeFalse();
+ result.Message.ShouldNotBeNull();
+ // Either a fast SocketException ("Connect failed") or, if the OS stalls, a timeout.
+ var lower = result.Message!.ToLowerInvariant();
+ (lower.Contains("connect failed") || lower.Contains("timed out")).ShouldBeTrue(
+ $"Expected 'Connect failed' or 'timed out' but got: {result.Message}");
+ result.Latency.ShouldBeNull();
+ }
+
+ ///
+ /// A target on a non-routable IP (192.0.2.1 — TEST-NET-1, RFC 5737) with a short
+ /// cancellation window times out (or fails fast on EHOSTUNREACH) at the TCP preflight.
+ ///
+ [Fact]
+ public async Task BlackHoleTarget_Returns_OkFalse_WithTimedOutOrConnectFailed()
+ {
+ using var cts = CancellationTokenSource.CreateLinkedTokenSource(
+ TestContext.Current.CancellationToken);
+ cts.CancelAfter(TimeSpan.FromSeconds(1));
+
+ var configJson = "{\"devices\":[{\"hostAddress\":\"ads://192.0.2.1.1.1:851\"}]}";
+ var result = await Probe.ProbeAsync(
+ configJson,
+ TimeSpan.FromSeconds(1),
+ cts.Token);
+
+ result.Ok.ShouldBeFalse();
+ result.Message.ShouldNotBeNull();
+ var lower = result.Message!.ToLowerInvariant();
+ (lower.Contains("timed out") || lower.Contains("connect failed")).ShouldBeTrue(
+ $"Expected 'timed out' or 'Connect failed' but got: {result.Message}");
+ result.Latency.ShouldBeNull();
+ }
+
+ // -------------------------------------------------------------------------
+ // 4. DEGRADE path — TCP reachable, but the ADS handshake can't be attempted here
+ // -------------------------------------------------------------------------
+
+ ///
+ /// The crux. A reachable TCP listener satisfies the preflight, but on this box (no TwinCAT
+ /// AMS router) the managed ADS client cannot establish a session — AdsClient.Connect
+ /// throws an environment/router exception (NOT an ADS device rejection). The probe MUST
+ /// DEGRADE: Ok=true with the "TCP reachability only" note, never a false RED. This
+ /// guarantees the new probe never reports worse than the old TCP-only probe on a host with
+ /// no ADS runtime.
+ ///
+ [Fact]
+ public async Task ReachableTcp_NoAdsRuntime_Degrades_To_OkTrue_WithTcpOnlyNote()
+ {
+ // Stand up a real TCP listener and accept (then drop) connections so the preflight
+ // ConnectAsync succeeds. We never speak ADS — the ADS handshake is what must degrade.
+ var listener = new TcpListener(IPAddress.Loopback, 0);
+ listener.Start();
+ var port = ((IPEndPoint)listener.LocalEndpoint).Port;
+
+ using var acceptCts = CancellationTokenSource.CreateLinkedTokenSource(
+ TestContext.Current.CancellationToken);
+ var acceptLoop = Task.Run(async () =>
+ {
+ try
+ {
+ while (true)
+ {
+ var client = await listener.AcceptTcpClientAsync(acceptCts.Token);
+ // Hold the socket briefly so the preflight sees a clean accept, then close.
+ _ = Task.Run(async () =>
+ {
+ await Task.Delay(200);
+ client.Dispose();
+ });
+ }
+ }
+ catch (OperationCanceledException) { /* accept cancelled — expected on teardown */ }
+ catch (ObjectDisposedException) { /* listener stopped — expected on teardown */ }
+ catch (SocketException) { /* listener stopped — expected on teardown */ }
+ });
+
+ try
+ {
+ var configJson = $"{{\"devices\":[{{\"hostAddress\":\"ads://127.0.0.1.1.1:{port}\"}}]}}";
+
+ using var cts = CancellationTokenSource.CreateLinkedTokenSource(
+ TestContext.Current.CancellationToken);
+ cts.CancelAfter(TimeSpan.FromSeconds(10));
+
+ var result = await Probe.ProbeAsync(
+ configJson,
+ TimeSpan.FromSeconds(3),
+ cts.Token);
+
+ // DEGRADE guard: must be Ok=true (never worse than TCP-only), with the explicit note
+ // and a latency. If on some box AdsClient instead threw synchronously at construction,
+ // the probe still degrades the same way — this assertion holds either way.
+ result.Ok.ShouldBeTrue(
+ $"Degrade guard violated — probe returned a worse-than-TCP result: {result.Message}");
+ result.Message.ShouldNotBeNull();
+ result.Message!.ShouldContain("ADS handshake unavailable on this host");
+ result.Message!.ShouldContain("TCP reachability only");
+ result.Latency.ShouldNotBeNull();
+ }
+ finally
+ {
+ await acceptCts.CancelAsync();
+ listener.Stop();
+ try { await acceptLoop; } catch { /* ignore teardown races */ }
+ }
+ }
+}