Auto: twincat-3.2 — cycle-time / jitter / PLC-state diagnostics

Closes #314
This commit is contained in:
Joseph Doherty
2026-04-26 01:59:56 -04:00
parent 30e39a752a
commit 24a3cda56a
8 changed files with 642 additions and 1 deletions

View File

@@ -0,0 +1,51 @@
namespace ZB.MOM.WW.OtOpcUa.Driver.TwinCAT;
/// <summary>
/// Per-device runtime diagnostics surfaced by the TwinCAT probe loop (PR 3.2 / issue #314).
/// Built by reading four well-known system symbols once per probe interval and stashed on
/// <see cref="TwinCATDriver.DeviceState.LastDiagnostics"/> alongside the existing reachability
/// state. The driver also folds the latest snapshot into
/// <see cref="Core.Abstractions.DriverHealth.Diagnostics"/> so the cross-driver
/// <c>driver-diagnostics</c> RPC (introduced for Modbus task #154) can render TwinCAT
/// cycle-time / jitter / online-change counters next to its peers without a per-driver
/// special-case.
/// </summary>
/// <param name="AppName">
/// Value of <c>TwinCAT_SystemInfoVarList._AppInfo.AppName</c>. Identifies the running PLC
/// application (e.g. <c>"Plc1"</c>) so an operator can tell which project is loaded against
/// the AMS target.
/// </param>
/// <param name="OnlineChangeCnt">
/// Value of <c>TwinCAT_SystemInfoVarList._AppInfo.OnlineChangeCnt</c>. Increments on every
/// accepted online-change. Diffing the value across probe ticks lets operators (and the
/// driver itself) react to runtime symbol-table churn — see the
/// <c>TwinCAT.OnlineChangeCntDelta</c> diagnostic key.
/// </param>
/// <param name="CycleTimeMs">
/// Value of <c>TwinCAT_SystemInfoVarList._TaskInfo[1].CycleTime</c> after the
/// 100 ns → ms conversion. Configured task period; constant under normal operation.
/// </param>
/// <param name="LastExecTimeMs">
/// Value of <c>TwinCAT_SystemInfoVarList._TaskInfo[1].LastExecTime</c> after the
/// 100 ns → ms conversion. Wall-clock time the last task tick spent running. Compared to
/// <see cref="CycleTimeMs"/> to surface jitter / overrun.
/// </param>
/// <param name="JitterMs">
/// <c>LastExecTimeMs - CycleTimeMs</c>. Positive values indicate the task overran its
/// configured cycle (potential cycle exceedance); negative / zero values are healthy.
/// </param>
/// <param name="UpdatedAt">UTC instant the snapshot was captured.</param>
/// <remarks>
/// Each field is best-effort — the probe wraps individual symbol reads in try/catch so a
/// runtime that doesn't expose <c>_TaskInfo[1]</c> (older TwinCAT 2 builds) still produces a
/// partial snapshot with a non-null <see cref="AppName"/> + <see cref="OnlineChangeCnt"/>.
/// Missing reads fall back to the previous snapshot's value (or the type default for the
/// first probe tick).
/// </remarks>
public sealed record TwinCATDeviceDiagnostics(
string? AppName,
uint OnlineChangeCnt,
double CycleTimeMs,
double LastExecTimeMs,
double JitterMs,
DateTimeOffset UpdatedAt);

View File

@@ -106,6 +106,15 @@ public sealed class TwinCATDriver : IDriver, IReadable, IWritable, ITagDiscovery
internal DeviceState? GetDeviceState(string hostAddress) =>
_devices.TryGetValue(hostAddress, out var s) ? s : null;
/// <summary>
/// PR 3.2 / #314 — most recent <see cref="TwinCATDeviceDiagnostics"/> snapshot for a
/// given AMS target, or <c>null</c> if the device hasn't completed a probe tick yet
/// (or the host address isn't configured on this driver). The aggregate view across
/// all devices is folded into <see cref="DriverHealth.Diagnostics"/> on every probe tick.
/// </summary>
public TwinCATDeviceDiagnostics? GetDeviceDiagnostics(string hostAddress) =>
_devices.TryGetValue(hostAddress, out var s) ? s.LastDiagnostics : null;
// ---- IReadable ----
/// <summary>
@@ -522,14 +531,31 @@ public sealed class TwinCATDriver : IDriver, IReadable, IWritable, ITagDiscovery
public IReadOnlyList<HostConnectivityStatus> GetHostStatuses() =>
[.. _devices.Values.Select(s => new HostConnectivityStatus(s.Options.HostAddress, s.HostState, s.HostStateChangedUtc))];
// PR 3.2 / #314 — well-known TwinCAT system-symbol names. All four live under
// TwinCAT_SystemInfoVarList, exported by every TC2/TC3 PLC runtime alongside user
// symbols. Names are stable across runtime versions; the underlying type widths
// are also stable (UDINT for the time + count fields, STRING(80) for AppName).
internal const string SymOnlineChangeCnt = "TwinCAT_SystemInfoVarList._AppInfo.OnlineChangeCnt";
internal const string SymAppName = "TwinCAT_SystemInfoVarList._AppInfo.AppName";
internal const string SymCycleTime = "TwinCAT_SystemInfoVarList._TaskInfo[1].CycleTime";
internal const string SymLastExecTime = "TwinCAT_SystemInfoVarList._TaskInfo[1].LastExecTime";
/// <summary>
/// Convert a TwinCAT 100 ns tick count to milliseconds. <c>_TaskInfo[1].CycleTime</c>
/// and <c>LastExecTime</c> both use the <c>UDINT</c>-sized 100 ns tick unit (1 tick =
/// 0.0001 ms), so the divisor is 10000.
/// </summary>
internal static double TicksToMilliseconds(uint ticks100ns) => ticks100ns / 10_000.0;
private async Task ProbeLoopAsync(DeviceState state, CancellationToken ct)
{
while (!ct.IsCancellationRequested)
{
var success = false;
ITwinCATClient? client = null;
try
{
var client = await EnsureConnectedAsync(state, ct).ConfigureAwait(false);
client = await EnsureConnectedAsync(state, ct).ConfigureAwait(false);
success = await client.ProbeAsync(ct).ConfigureAwait(false);
}
catch (OperationCanceledException) when (ct.IsCancellationRequested) { break; }
@@ -539,6 +565,24 @@ public sealed class TwinCATDriver : IDriver, IReadable, IWritable, ITagDiscovery
// + cleared the client, so next tick will reconnect.
}
// PR 3.2 / #314 — only sample the four system symbols when the basic probe
// succeeded. Reading them on a stopped runtime would just generate noise +
// would race with the reconnect path.
if (success && client is not null)
{
try
{
await SampleDeviceDiagnosticsAsync(state, client, ct).ConfigureAwait(false);
}
catch (OperationCanceledException) when (ct.IsCancellationRequested) { break; }
catch
{
// Diagnostics sampling is best-effort. A wholesale failure (target dropped
// mid-probe, transient AMS error) leaves the previous snapshot in place;
// the next tick retries.
}
}
TransitionDeviceState(state, success ? HostState.Running : HostState.Stopped);
try { await Task.Delay(_options.Probe.Interval, ct).ConfigureAwait(false); }
@@ -546,6 +590,149 @@ public sealed class TwinCATDriver : IDriver, IReadable, IWritable, ITagDiscovery
}
}
/// <summary>
/// Read the four well-known TwinCAT system symbols (<see cref="SymAppName"/>,
/// <see cref="SymOnlineChangeCnt"/>, <see cref="SymCycleTime"/>,
/// <see cref="SymLastExecTime"/>) and stash the resulting
/// <see cref="TwinCATDeviceDiagnostics"/> snapshot on
/// <see cref="DeviceState.LastDiagnostics"/>. Each read is wrapped individually so a
/// runtime that doesn't expose <c>_TaskInfo[1]</c> (older TwinCAT 2 builds) still
/// produces a partial snapshot rather than failing the whole probe tick.
/// </summary>
internal async Task SampleDeviceDiagnosticsAsync(
DeviceState state, ITwinCATClient client, CancellationToken ct)
{
var prev = state.LastDiagnostics;
var appName = prev?.AppName;
var onlineChangeCnt = prev?.OnlineChangeCnt ?? 0u;
var cycleTimeMs = prev?.CycleTimeMs ?? 0.0;
var lastExecMs = prev?.LastExecTimeMs ?? 0.0;
// AppName — STRING(80) on the wire. Fixed-size; AdsClient marshals as System.String.
try
{
var (rawAppName, statusName) = await client.ReadValueAsync(
SymAppName, TwinCATDataType.String, bitIndex: null, arrayDimensions: null, ct)
.ConfigureAwait(false);
if (statusName == TwinCATStatusMapper.Good && rawAppName is string s)
appName = s;
}
catch (OperationCanceledException) when (ct.IsCancellationRequested) { throw; }
catch
{
// Older TC2 / soft-PLC builds may not surface AppName via the system var list —
// leave the previous snapshot's value in place.
}
// OnlineChangeCnt — UDINT.
try
{
var (rawCnt, statusCnt) = await client.ReadValueAsync(
SymOnlineChangeCnt, TwinCATDataType.UDInt, bitIndex: null, arrayDimensions: null, ct)
.ConfigureAwait(false);
if (statusCnt == TwinCATStatusMapper.Good && rawCnt is not null)
onlineChangeCnt = Convert.ToUInt32(rawCnt);
}
catch (OperationCanceledException) when (ct.IsCancellationRequested) { throw; }
catch { /* skip without failing the probe */ }
// CycleTime — UDINT, 100 ns ticks.
try
{
var (rawCycle, statusCycle) = await client.ReadValueAsync(
SymCycleTime, TwinCATDataType.UDInt, bitIndex: null, arrayDimensions: null, ct)
.ConfigureAwait(false);
if (statusCycle == TwinCATStatusMapper.Good && rawCycle is not null)
cycleTimeMs = TicksToMilliseconds(Convert.ToUInt32(rawCycle));
}
catch (OperationCanceledException) when (ct.IsCancellationRequested) { throw; }
catch { /* skip without failing the probe */ }
// LastExecTime — UDINT, 100 ns ticks.
try
{
var (rawExec, statusExec) = await client.ReadValueAsync(
SymLastExecTime, TwinCATDataType.UDInt, bitIndex: null, arrayDimensions: null, ct)
.ConfigureAwait(false);
if (statusExec == TwinCATStatusMapper.Good && rawExec is not null)
lastExecMs = TicksToMilliseconds(Convert.ToUInt32(rawExec));
}
catch (OperationCanceledException) when (ct.IsCancellationRequested) { throw; }
catch { /* skip without failing the probe */ }
var jitterMs = lastExecMs - cycleTimeMs;
var snapshot = new TwinCATDeviceDiagnostics(
appName, onlineChangeCnt, cycleTimeMs, lastExecMs, jitterMs, DateTimeOffset.UtcNow);
// OnlineChangeCnt increment — surface the delta on _health.Diagnostics so the
// cross-driver RPC + the Admin UI can highlight runtime symbol-table churn.
// The actual cache invalidation is handled separately by the AdsSymbolVersionChanged
// listener (PR 2.3); this is purely informational.
if (prev is not null && snapshot.OnlineChangeCnt > prev.OnlineChangeCnt)
Interlocked.Add(ref _onlineChangeIncrementsObserved,
(long)(snapshot.OnlineChangeCnt - prev.OnlineChangeCnt));
state.LastDiagnostics = snapshot;
RebuildHealthDiagnostics();
}
// Counter for OnlineChangeCnt increments observed across the lifetime of the driver
// instance, summed across every device. Surfaced as TwinCAT.OnlineChangeIncrements
// in DriverHealth.Diagnostics so operators can see "how many online changes have we
// observed since process start" without having to diff successive _TaskInfo snapshots.
private long _onlineChangeIncrementsObserved;
/// <summary>Test-only — count of OnlineChangeCnt increments folded into the diagnostic dictionary.</summary>
internal long OnlineChangeIncrementsObserved => Interlocked.Read(ref _onlineChangeIncrementsObserved);
/// <summary>
/// Refresh <see cref="_health"/> with a freshly-built diagnostics dictionary that
/// aggregates the <see cref="DeviceState.LastDiagnostics"/> snapshots across every
/// device. Keys follow the cross-driver
/// <c>"&lt;DriverType&gt;.&lt;Counter&gt;"</c> convention so the driver-diagnostics
/// RPC can render them alongside Modbus / S7 / OPC UA Client metrics. Single-device
/// deployments produce flat keys (<c>TwinCAT.CycleTimeMs</c>); multi-device deployments
/// prefix the host address (<c>TwinCAT.&lt;hostAddress&gt;.CycleTimeMs</c>) so the
/// readout is unambiguous when multiple AMS targets share one driver instance.
/// </summary>
internal void RebuildHealthDiagnostics()
{
var diags = BuildAggregateDiagnostics();
var current = _health;
_health = new DriverHealth(current.State, current.LastSuccessfulRead, current.LastError, diags);
}
/// <summary>
/// Snapshot the per-device <see cref="TwinCATDeviceDiagnostics"/> into the cross-driver
/// dictionary shape <see cref="DriverHealth.Diagnostics"/> exposes. Empty when no probe
/// has completed yet — the caller (Admin UI / driver-diagnostics RPC) is expected to
/// handle the empty case the same way it does for any other driver pre-connect.
/// </summary>
internal IReadOnlyDictionary<string, double> BuildAggregateDiagnostics()
{
var dict = new Dictionary<string, double>(StringComparer.Ordinal);
var multiDevice = _devices.Count > 1;
foreach (var (hostAddress, state) in _devices)
{
if (state.LastDiagnostics is not { } snap) continue;
var prefix = multiDevice
? $"TwinCAT.{hostAddress}."
: "TwinCAT.";
dict[prefix + "OnlineChangeCnt"] = snap.OnlineChangeCnt;
dict[prefix + "CycleTimeMs"] = snap.CycleTimeMs;
dict[prefix + "LastExecTimeMs"] = snap.LastExecTimeMs;
dict[prefix + "JitterMs"] = snap.JitterMs;
dict[prefix + "DiagnosticsUpdatedAtTicks"] = snap.UpdatedAt.UtcTicks;
}
var increments = Interlocked.Read(ref _onlineChangeIncrementsObserved);
if (increments > 0)
dict["TwinCAT.OnlineChangeIncrements"] = increments;
return dict;
}
private void TransitionDeviceState(DeviceState state, HostState newState)
{
HostState old;
@@ -620,6 +807,15 @@ public sealed class TwinCATDriver : IDriver, IReadable, IWritable, ITagDiscovery
public DateTime HostStateChangedUtc { get; set; } = DateTime.UtcNow;
public CancellationTokenSource? ProbeCts { get; set; }
/// <summary>
/// PR 3.2 / #314 — most recent <see cref="TwinCATDeviceDiagnostics"/> sample taken
/// by the probe loop, or <c>null</c> before the first successful probe tick. Single
/// writer (the per-device probe task) + many readers (
/// <see cref="TwinCATDriver.GetDeviceDiagnostics"/>, the diagnostic-dictionary
/// rebuild). The record is immutable so a torn-read on assignment is impossible.
/// </summary>
public TwinCATDeviceDiagnostics? LastDiagnostics { get; set; }
public void DisposeClient()
{
Client?.Dispose();