feat(probe): TwinCAT Test-Connect does an ADS ReadState (degrade-guarded)

This commit is contained in:
Joseph Doherty
2026-06-16 06:48:22 -04:00
parent 5ed0276ffb
commit b663ae6eff
2 changed files with 362 additions and 18 deletions
@@ -2,22 +2,65 @@ using System.Diagnostics;
using System.Net.Sockets;
using System.Text.Json;
using System.Text.Json.Serialization;
using TwinCAT.Ads;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Driver.TwinCAT;
/// <summary>
/// Cheap TCP-connect probe for the <see cref="TwinCATDriverOptions"/>-shaped driver config.
/// Opens a socket to the first device's AMS router host (first four octets of the AMS Net ID)
/// on the AMS port from the address and closes immediately. Surfaces a green tick + latency
/// on success; red chip + SocketError on failure; "timed out" on the caller's cancellation.
/// Does NOT exchange any ADS bytes — a richer ADS-state probe is a documented follow-up.
/// Two-phase, degrade-guarded Test-Connect probe for the <see cref="TwinCATDriverOptions"/>-shaped
/// driver config. Phase 1: a bare TCP connect to the first device's AMS router host (first four
/// octets of the AMS Net ID) on the AMS port — fast rejection of unreachable targets. Phase 2:
/// a real ADS handshake — <c>AdsClient.Connect(netId, port)</c> + <c>ReadStateAsync</c> — to
/// confirm the endpoint speaks ADS and report the controller's run-state, not just that a TCP
/// socket opened.
/// <para>
/// <b>Outcome classification</b> — three cases:
/// <list type="number">
/// <item>
/// <b>ADS connected + ReadState OK</b> → <c>Ok=true</c>, message <c>"ADS state: {AdsState}"</c>
/// (e.g. "Run" / "Config" / "Stop"), with latency.
/// </item>
/// <item>
/// <b>Route/auth rejection from a reachable router</b> — an <see cref="AdsErrorException"/>
/// (or a non-success <c>ReadStateAsync</c> result) whose <see cref="AdsErrorCode"/> means the
/// router answered but won't let us in (e.g. <see cref="AdsErrorCode.TargetPortNotFound"/>,
/// <see cref="AdsErrorCode.TargetMachineNotFound"/>, <see cref="AdsErrorCode.PortNotConnected"/>,
/// <see cref="AdsErrorCode.PortDisabled"/>) → <c>Ok=false</c>, message
/// <c>"Reachable at {host}:{port} but ADS handshake failed: {code} — check the target's ADS
/// route table authorizes this host"</c>. This is a TRUE red: the driver itself also needs
/// the route, so a green tick here would be a false positive.
/// </item>
/// <item>
/// <b>Handshake could not be ATTEMPTED on this host</b> — the managed AMS router cannot run
/// headless (Beckhoff's <c>AdsClient.Connect</c> throws a server exception
/// "Check for a running TwinCAT router instance!"), or a <see cref="PlatformNotSupportedException"/>
/// / <see cref="TypeInitializationException"/> / <see cref="DllNotFoundException"/> /
/// <see cref="NotSupportedException"/> surfaces, or <c>ReadStateAsync</c> reports a client-side
/// port-not-open status → <b>DEGRADE</b>: <c>Ok=true</c>, message
/// <c>"Reachable at {host}:{port} (ADS handshake unavailable on this host — TCP reachability
/// only)"</c>, with latency. The probe NEVER produces a result worse than the old TCP-only
/// probe.
/// </item>
/// </list>
/// </para>
/// </summary>
/// <remarks>
/// AMS Net ID format is six dot-separated octets (e.g. <c>192.168.1.10.1.1</c>); the first
/// four are typically the host IPv4 address by Beckhoff convention, but the AMS router
/// resolves the real IP route server-side. The probe uses the first-four-octet heuristic
/// The line between case 2 (device-rejected → RED) and case 3 (can't-attempt → DEGRADE) is the
/// crux. Classification rests on the exception's identity: only an <see cref="AdsErrorException"/>
/// (or a result <see cref="AdsErrorCode"/>) carrying a route/target-port code is a RED — that is the
/// ADS router answering and refusing the route. Beckhoff's <c>TwinCAT.Ads.Server.AdsServerException</c>
/// ("running TwinCAT router instance!") derives from plain <see cref="Exception"/>, NOT from
/// <see cref="AdsErrorException"/>, so it is correctly classified as "can't attempt → DEGRADE".
/// When genuinely ambiguous, the probe DEGRADES (Ok=true, TCP-only note) rather than emit a false RED.
/// <para>
/// AMS Net ID format is six dot-separated octets (e.g. <c>192.168.1.10.1.1</c>); the first four
/// are typically the host IPv4 address by Beckhoff convention. The AMS router resolves the real IP
/// route server-side; the probe uses the first-four-octet heuristic for the TCP preflight target,
/// which is reliable for the overwhelming majority of production deployments.
/// </para>
/// The probe is read-only — <c>ReadStateAsync</c> never mutates PLC state — and always disposes the
/// <see cref="AdsClient"/>.
/// </remarks>
public sealed class TwinCATDriverProbe : IDriverProbe
{
@@ -27,6 +70,22 @@ public sealed class TwinCATDriverProbe : IDriverProbe
UnmappedMemberHandling = JsonUnmappedMemberHandling.Skip,
};
/// <summary>
/// AMS error codes that mean "the router answered but refused the route / target port" — a
/// genuine RED. The driver itself would also be denied, so a green tick would be a false
/// positive. Everything NOT in this set (client-side port/connection errors, sync timeouts,
/// router-not-initialised, etc.) is treated as "couldn't attempt the handshake" → DEGRADE.
/// </summary>
private static readonly HashSet<AdsErrorCode> _routeRejectCodes =
[
AdsErrorCode.TargetPortNotFound,
AdsErrorCode.TargetMachineNotFound,
AdsErrorCode.PortNotConnected,
AdsErrorCode.PortDisabled,
AdsErrorCode.AccessDenied,
AdsErrorCode.DeviceAccessDenied,
];
/// <inheritdoc />
public string DriverType => "TwinCAT";
@@ -38,23 +97,23 @@ public sealed class TwinCATDriverProbe : IDriverProbe
catch (Exception ex) { return new(false, $"Config JSON is invalid: {ex.Message}", null); }
if (opts is null) return new(false, "Config JSON deserialized to null.", null);
var (host, port) = ExtractTarget(opts);
if (string.IsNullOrWhiteSpace(host) || port <= 0)
var (host, port, parsed) = ExtractTarget(opts);
if (parsed is null || string.IsNullOrWhiteSpace(host) || port <= 0)
return new(false, "Config has no host/port to probe.", null);
// Phase 1: bare TCP preflight — fast rejection for unreachable hosts. Messages here are
// UNCHANGED from the original TCP-only probe.
var sw = Stopwatch.StartNew();
try
{
using var socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
await socket.ConnectAsync(host, port, ct);
sw.Stop();
return new(true, null, sw.Elapsed);
}
catch (SocketException ex)
{
return new(false, $"Connect failed: {ex.SocketErrorCode}", null);
}
catch (OperationCanceledException)
catch (OperationCanceledException) when (ct.IsCancellationRequested)
{
return new(false, $"Probe timed out after {timeout.TotalSeconds:F0}s.", null);
}
@@ -62,23 +121,86 @@ public sealed class TwinCATDriverProbe : IDriverProbe
{
return new(false, ex.Message, null);
}
// Phase 2: real ADS handshake. Connect + ReadStateAsync. The crux is the three-way
// classification of how this can fail — see the class-doc and ClassifyHandshakeFailure.
var degradeNote = $"Reachable at {host}:{port} (ADS handshake unavailable on this host — TCP reachability only)";
try
{
using var client = new AdsClient();
// Bound the ADS round-trip by the caller's timeout (clamped >=1s, mirrors AdsTwinCATClient).
client.Timeout = (int)Math.Max(1_000, timeout.TotalMilliseconds);
// Connect can throw a server exception ("running TwinCAT router instance!") on a headless
// host with no AMS router — that is a can't-attempt DEGRADE, classified below.
client.Connect(AmsNetId.Parse(parsed.NetId), parsed.Port);
var state = await client.ReadStateAsync(ct).ConfigureAwait(false);
sw.Stop();
if (state.Succeeded)
return new(true, $"ADS state: {state.State.AdsState}", sw.Elapsed);
// Non-throwing failure carried in the result's error code.
return state.ErrorCode == AdsErrorCode.ClientPortNotOpen
? new(true, degradeNote, sw.Elapsed) // client never opened — DEGRADE
: ClassifyHandshakeFailure(state.ErrorCode, host, port, sw, degradeNote);
}
catch (OperationCanceledException) when (ct.IsCancellationRequested)
{
// Caller timeout — keep the original timed-out message.
return new(false, $"Probe timed out after {timeout.TotalSeconds:F0}s.", null);
}
catch (AdsErrorException ex)
{
// The router answered with an ADS-level error. Route/auth rejection → RED; anything
// else (sync timeout, client port issues, …) → DEGRADE.
return ClassifyHandshakeFailure(ex.ErrorCode, host, port, sw, degradeNote);
}
catch (Exception)
{
// Everything else — TwinCAT.Ads.Server.AdsServerException ("running TwinCAT router
// instance!"), PlatformNotSupportedException, TypeInitializationException,
// DllNotFoundException, NotSupportedException, etc. — means the handshake could not be
// ATTEMPTED on this host. DEGRADE: never worse than the TCP-only probe.
sw.Stop();
return new(true, degradeNote, sw.Elapsed);
}
}
private static (string host, int port) ExtractTarget(TwinCATDriverOptions opts)
/// <summary>
/// Classifies an ADS-level failure (from an <see cref="AdsErrorException"/> or a non-success
/// <c>ReadStateAsync</c> result). A route/target-port/access code means the router answered
/// but refused the route → RED. Any other code is treated as "couldn't attempt" → DEGRADE,
/// so the probe never under-reports a host with no usable ADS runtime.
/// </summary>
private static DriverProbeResult ClassifyHandshakeFailure(
AdsErrorCode code, string host, int port, Stopwatch sw, string degradeNote)
{
if (_routeRejectCodes.Contains(code))
return new(false,
$"Reachable at {host}:{port} but ADS handshake failed: {code} — check the target's ADS route table authorizes this host",
null);
sw.Stop();
return new(true, degradeNote, sw.Elapsed);
}
private static (string host, int port, TwinCATAmsAddress? parsed) ExtractTarget(TwinCATDriverOptions opts)
{
// Parse the first device's ads:// address. AMS Net ID is six-octet; by Beckhoff
// convention the first four octets are the host IPv4. Extract those as the TCP target.
var firstDevice = opts.Devices.FirstOrDefault();
if (firstDevice is null) return (string.Empty, 0);
if (firstDevice is null) return (string.Empty, 0, null);
var parsed = TwinCATAmsAddress.TryParse(firstDevice.HostAddress);
if (parsed is null) return (string.Empty, 0);
if (parsed is null) return (string.Empty, 0, null);
// NetId = "a.b.c.d.e.f" — take the first 4 octets as the host IP.
var parts = parsed.NetId.Split('.');
if (parts.Length < 4) return (string.Empty, 0);
if (parts.Length < 4) return (string.Empty, 0, null);
var hostIp = string.Join('.', parts[0], parts[1], parts[2], parts[3]);
return (hostIp, parsed.Port);
return (hostIp, parsed.Port, parsed);
}
}