Auto: ablegacy-10 — diagnostic counters as tags

Closes #253
This commit is contained in:
Joseph Doherty
2026-04-26 03:50:47 -04:00
parent 14876ea210
commit 42472b5549
10 changed files with 1000 additions and 5 deletions

View File

@@ -0,0 +1,261 @@
namespace ZB.MOM.WW.OtOpcUa.Driver.AbLegacy;
/// <summary>
/// PR ablegacy-10 / #253 — diagnostic-counter tag source. Holds per-device live
/// counters (request / response / error / retry / last-error / comm-failures) that
/// the driver surfaces under each device's synthetic <c>_Diagnostics</c> folder. The
/// read path short-circuits before the libplctag dispatch when the incoming
/// reference targets a <c>_Diagnostics/&lt;host&gt;/&lt;name&gt;</c> address — the
/// values come straight from the driver-local counters.
/// </summary>
/// <remarks>
/// <para>Mirrors AbCip's <c>AbCipSystemTagSource</c> pattern (the abcip-4.3 PR that
/// just merged) — same per-device folder, same read-only semantics, but the seven
/// names + their counter shape match the AB-Legacy plan: numerical counters that
/// HMIs can bind directly without a separate diagnostics RPC. Counters are
/// <c>long</c> (Int64) so a long-running deployment can't roll an
/// <c>RequestCount</c> over inside a maintenance window.</para>
/// <list type="bullet">
/// <item><c>RequestCount</c> — total <see cref="AbLegacyDriver.ReadAsync"/>
/// requests issued against this device (each non-diagnostic reference counts
/// once per call, success or fail).</item>
/// <item><c>ResponseCount</c> — successful read responses.</item>
/// <item><c>ErrorCount</c> — failed read responses (any non-Good status).</item>
/// <item><c>RetryCount</c> — retry attempts beyond the first per the PR 9 retry
/// loop. Incremented once per extra attempt, not per successful retry.</item>
/// <item><c>LastErrorCode</c> — most recent libplctag status code on a failed
/// read (0 when no error has been seen since reset).</item>
/// <item><c>LastErrorMessage</c> — most recent libplctag error message on a
/// failed read (empty when no error has been seen).</item>
/// <item><c>CommFailures</c> — count of read failures mapped to
/// <see cref="AbLegacyStatusMapper.BadCommunicationError"/>. Spans transient
/// exceptions + retried-out chains so operators see a single "wire fell off"
/// counter without having to sum across error-code subtotals.</item>
/// </list>
/// </remarks>
public sealed class AbLegacyDiagnosticTags
{
/// <summary>Address-space prefix the driver stamps on every diagnostic variable's
/// <see cref="ZB.MOM.WW.OtOpcUa.Core.Abstractions.DriverAttributeInfo.FullName"/>.</summary>
public const string DiagnosticsFolderPrefix = "_Diagnostics/";
/// <summary>Canonical names the diagnostics folder exposes. Keep in lockstep with discovery.</summary>
public static readonly IReadOnlyList<string> DiagnosticTagNames =
[
"RequestCount",
"ResponseCount",
"ErrorCount",
"RetryCount",
"LastErrorCode",
"LastErrorMessage",
"CommFailures",
];
private static readonly HashSet<string> DiagnosticTagNameSet =
new(DiagnosticTagNames, StringComparer.Ordinal);
private readonly Dictionary<string, DiagnosticsCounters> _counters =
new(StringComparer.OrdinalIgnoreCase);
private readonly object _lock = new();
/// <summary>
/// Make sure a slot exists for <paramref name="deviceHostAddress"/>. Called from
/// <see cref="AbLegacyDriver.InitializeAsync"/> so the counters are zero-initialised
/// by the time the first read or probe iteration fires.
/// </summary>
public void EnsureDevice(string deviceHostAddress)
{
ArgumentNullException.ThrowIfNull(deviceHostAddress);
lock (_lock)
{
if (!_counters.ContainsKey(deviceHostAddress))
_counters[deviceHostAddress] = new DiagnosticsCounters();
}
}
private DiagnosticsCounters GetOrCreate(string deviceHostAddress)
{
// Fast path: already-tracked device. Slow path: lazy add when a caller hits an
// unregistered host (defensive — production callers all go through EnsureDevice).
lock (_lock)
{
if (!_counters.TryGetValue(deviceHostAddress, out var c))
{
c = new DiagnosticsCounters();
_counters[deviceHostAddress] = c;
}
return c;
}
}
/// <summary>Increment <c>RequestCount</c> for <paramref name="deviceHostAddress"/>.</summary>
public void RecordRequest(string deviceHostAddress)
{
ArgumentNullException.ThrowIfNull(deviceHostAddress);
var c = GetOrCreate(deviceHostAddress);
Interlocked.Increment(ref c.Request);
}
/// <summary>Increment <c>ResponseCount</c> for a successful read.</summary>
public void RecordResponse(string deviceHostAddress)
{
ArgumentNullException.ThrowIfNull(deviceHostAddress);
var c = GetOrCreate(deviceHostAddress);
Interlocked.Increment(ref c.Response);
}
/// <summary>
/// Increment <c>ErrorCount</c> + record the latest libplctag status code +
/// message for a failed read. <paramref name="commFailure"/> = true also bumps
/// <c>CommFailures</c> when the failure mapped to <c>BadCommunicationError</c>.
/// </summary>
public void RecordError(
string deviceHostAddress, int libplctagStatus, string? errorMessage, bool commFailure)
{
ArgumentNullException.ThrowIfNull(deviceHostAddress);
var c = GetOrCreate(deviceHostAddress);
Interlocked.Increment(ref c.Error);
if (commFailure) Interlocked.Increment(ref c.CommFailures);
// Atomic int32 store on a 32-bit-aligned field; .NET reference-write atomicity
// covers the message swap. Last-write-wins matches the spec.
Interlocked.Exchange(ref c.LastErrorCode, libplctagStatus);
c.LastErrorMessage = errorMessage ?? string.Empty;
}
/// <summary>Increment <c>RetryCount</c> per retry attempt beyond the first.</summary>
public void RecordRetry(string deviceHostAddress)
{
ArgumentNullException.ThrowIfNull(deviceHostAddress);
var c = GetOrCreate(deviceHostAddress);
Interlocked.Increment(ref c.Retry);
}
/// <summary>Snapshot the current counters for a device. Returns zeros for unknown hosts.</summary>
public DiagnosticsSnapshot Snapshot(string deviceHostAddress)
{
ArgumentNullException.ThrowIfNull(deviceHostAddress);
DiagnosticsCounters? c;
lock (_lock)
{
_counters.TryGetValue(deviceHostAddress, out c);
}
if (c is null) return new DiagnosticsSnapshot(0, 0, 0, 0, 0, string.Empty, 0);
return new DiagnosticsSnapshot(
Request: Interlocked.Read(ref c.Request),
Response: Interlocked.Read(ref c.Response),
Error: Interlocked.Read(ref c.Error),
Retry: Interlocked.Read(ref c.Retry),
LastErrorCode: Volatile.Read(ref c.LastErrorCode),
LastErrorMessage: c.LastErrorMessage ?? string.Empty,
CommFailures: Interlocked.Read(ref c.CommFailures));
}
/// <summary>
/// Reset every counter for <paramref name="deviceHostAddress"/> back to zero. Called
/// from <see cref="AbLegacyDriver.ReinitializeAsync"/> so a config redeploy starts
/// with a clean diagnostic surface.
/// </summary>
public void Reset(string deviceHostAddress)
{
ArgumentNullException.ThrowIfNull(deviceHostAddress);
var c = GetOrCreate(deviceHostAddress);
Interlocked.Exchange(ref c.Request, 0);
Interlocked.Exchange(ref c.Response, 0);
Interlocked.Exchange(ref c.Error, 0);
Interlocked.Exchange(ref c.Retry, 0);
Interlocked.Exchange(ref c.LastErrorCode, 0);
c.LastErrorMessage = string.Empty;
Interlocked.Exchange(ref c.CommFailures, 0);
}
/// <summary>Reset every tracked device. Called on full <c>ShutdownAsync</c>.</summary>
public void ResetAll()
{
lock (_lock)
{
_counters.Clear();
}
}
/// <summary>
/// Resolve a <c>_Diagnostics/&lt;host&gt;/&lt;name&gt;</c> reference into a counter
/// value. Returns <c>true</c> when the reference shape matches; <paramref name="value"/>
/// carries the counter (or empty string for <c>LastErrorMessage</c>) on success.
/// </summary>
public bool TryRead(string fullReference, out object? value)
{
ArgumentNullException.ThrowIfNull(fullReference);
if (!IsDiagnosticAddress(fullReference)) { value = null; return false; }
var withoutPrefix = fullReference[DiagnosticsFolderPrefix.Length..];
var slashIdx = withoutPrefix.LastIndexOf('/');
if (slashIdx <= 0 || slashIdx >= withoutPrefix.Length - 1) { value = null; return false; }
var host = withoutPrefix[..slashIdx];
var name = withoutPrefix[(slashIdx + 1)..];
if (!IsReservedName(name)) { value = null; return false; }
var snapshot = Snapshot(host);
value = name switch
{
"RequestCount" => snapshot.Request,
"ResponseCount" => snapshot.Response,
"ErrorCount" => snapshot.Error,
"RetryCount" => snapshot.Retry,
"LastErrorCode" => snapshot.LastErrorCode,
"LastErrorMessage" => snapshot.LastErrorMessage,
"CommFailures" => snapshot.CommFailures,
_ => null,
};
return true;
}
/// <summary>
/// <c>true</c> when <paramref name="reference"/> targets a node under the synthetic
/// <c>_Diagnostics/</c> folder. The driver's read path uses this to bypass the
/// libplctag runtime and dispatch to <see cref="TryRead"/> directly.
/// </summary>
public static bool IsDiagnosticAddress(string? reference) =>
!string.IsNullOrEmpty(reference)
&& reference.StartsWith(DiagnosticsFolderPrefix, StringComparison.Ordinal);
/// <summary>
/// <c>true</c> when <paramref name="name"/> matches one of the seven reserved
/// diagnostic names. Used by <see cref="AbLegacyDriver.InitializeAsync"/> to reject
/// user-config tags that would shadow the driver-emitted counters.
/// </summary>
public static bool IsReservedName(string? name) =>
!string.IsNullOrEmpty(name) && DiagnosticTagNameSet.Contains(name);
private sealed class DiagnosticsCounters
{
public long Request;
public long Response;
public long Error;
public long Retry;
public int LastErrorCode;
public string? LastErrorMessage = string.Empty;
public long CommFailures;
}
}
/// <summary>
/// PR ablegacy-10 / #253 — immutable snapshot of one device's diagnostic counters.
/// Returned through <see cref="AbLegacyDriver.ReadAsync"/> when an OPC UA client
/// reads any of the seven <c>_Diagnostics/&lt;host&gt;/&lt;name&gt;</c> variables.
/// </summary>
/// <param name="Request">Total <c>ReadAsync</c> requests issued against this device.</param>
/// <param name="Response">Successful read responses.</param>
/// <param name="Error">Failed read responses (any non-Good status).</param>
/// <param name="Retry">Retry attempts beyond the first per the PR 9 retry loop.</param>
/// <param name="LastErrorCode">Most recent libplctag status code on a failed read.</param>
/// <param name="LastErrorMessage">Most recent libplctag error message on a failed read.</param>
/// <param name="CommFailures">Count of read failures mapped to <c>BadCommunicationError</c>.</param>
public sealed record DiagnosticsSnapshot(
long Request,
long Response,
long Error,
long Retry,
int LastErrorCode,
string LastErrorMessage,
long CommFailures);

View File

@@ -29,6 +29,18 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
private readonly Dictionary<string, (object? Value, uint StatusCode)> _lastPublished =
new(StringComparer.OrdinalIgnoreCase);
private readonly object _lastPublishedLock = new();
/// <summary>
/// PR ablegacy-10 / #253 — per-device diagnostic counters surfaced as
/// <c>_Diagnostics/&lt;host&gt;/&lt;name&gt;</c> read-only variables. Updated on
/// every <see cref="ReadAsync"/> call (success, failure, retry) so HMIs can bind
/// directly without a separate diagnostics RPC.
/// </summary>
private readonly AbLegacyDiagnosticTags _diagnosticTags = new();
/// <summary>Test seam — exposes the live diagnostic-tag source so unit tests can poke counters.</summary>
internal AbLegacyDiagnosticTags DiagnosticTags => _diagnosticTags;
private DriverHealth _health = new(DriverState.Unknown, null, null);
public event EventHandler<DataChangeEventArgs>? OnDataChange;
@@ -153,8 +165,35 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
$"AbLegacy device has invalid HostAddress '{device.HostAddress}' — expected 'ab://gateway[:port]/cip-path'.");
var profile = AbLegacyPlcFamilyProfile.ForFamily(device.PlcFamily);
_devices[device.HostAddress] = new DeviceState(addr, device, profile);
// PR ablegacy-10 / #253 — pre-allocate the diagnostic-counter slot so the
// first read against this device sees zero-initialised counters instead of
// having to lazy-add on the request path.
_diagnosticTags.EnsureDevice(device.HostAddress);
}
foreach (var tag in _options.Tags)
{
// PR ablegacy-10 / #253 — collision rejection. User-config tags must not
// shadow the seven driver-emitted diagnostic names, and they must not live
// under the synthetic _Diagnostics/ folder. Both shapes would silently
// never resolve at read time (the diagnostics short-circuit wins) so we
// reject up front with a clear error rather than letting the operator wonder
// why their tag returns BadNodeIdUnknown.
if (AbLegacyDiagnosticTags.IsDiagnosticAddress(tag.Address))
{
throw new InvalidOperationException(
$"AbLegacy tag '{tag.Name}' has Address '{tag.Address}' under the reserved " +
$"'_Diagnostics/' namespace; that prefix is owned by the auto-emitted " +
$"diagnostic counters. Choose a different address.");
}
if (AbLegacyDiagnosticTags.IsReservedName(tag.Name))
{
throw new InvalidOperationException(
$"AbLegacy tag name '{tag.Name}' collides with a reserved diagnostic " +
$"counter ({string.Join(", ", AbLegacyDiagnosticTags.DiagnosticTagNames)}). " +
$"Rename the tag.");
}
_tagsByName[tag.Name] = tag;
}
foreach (var tag in _options.Tags) _tagsByName[tag.Name] = tag;
// Probe loops — one per device when enabled + probe address configured.
if (_options.Probe.Enabled && !string.IsNullOrWhiteSpace(_options.Probe.ProbeAddress))
@@ -179,6 +218,11 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
public async Task ReinitializeAsync(string driverConfigJson, CancellationToken cancellationToken)
{
await ShutdownAsync(cancellationToken).ConfigureAwait(false);
// PR ablegacy-10 / #253 — counters were dropped along with the device map when
// ShutdownAsync called ResetAll; the InitializeAsync below re-EnsureDevice's each
// host so the freshly registered counters start at zero. Belt-and-braces clear
// here in case a downstream override of either method skips the cycle.
_diagnosticTags.ResetAll();
await InitializeAsync(driverConfigJson, cancellationToken).ConfigureAwait(false);
}
@@ -198,6 +242,10 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
// reconnect-driven shutdown) doesn't suppress the very first post-reconnect sample
// by comparing it against pre-disconnect state.
lock (_lastPublishedLock) { _lastPublished.Clear(); }
// PR ablegacy-10 / #253 — drop every per-device counter so a reinit / redeploy
// starts with a clean diagnostic surface. Reset (per-host) is also exposed so a
// future "clear counters" admin RPC can reach in without a full shutdown.
_diagnosticTags.ResetAll();
_health = new DriverHealth(DriverState.Unknown, _health.LastSuccessfulRead, null);
}
@@ -239,6 +287,25 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
for (var i = 0; i < fullReferences.Count; i++)
{
var reference = fullReferences[i];
// PR ablegacy-10 / #253 — synthetic _Diagnostics/<host>/<name> reference;
// serve from the in-process counter store and skip the libplctag dispatch
// entirely. Diagnostic reads do NOT bump RequestCount — they're driver-local
// observability, not field traffic, and counting them would make the
// counter chase its own tail when a subscription polls at 1 Hz.
if (AbLegacyDiagnosticTags.IsDiagnosticAddress(reference))
{
if (_diagnosticTags.TryRead(reference, out var diagValue))
{
results[i] = new DataValueSnapshot(diagValue, AbLegacyStatusMapper.Good, now, now);
}
else
{
results[i] = new DataValueSnapshot(null, AbLegacyStatusMapper.BadNodeIdUnknown, null, now);
}
continue;
}
if (!_tagsByName.TryGetValue(reference, out var def))
{
results[i] = new DataValueSnapshot(null, AbLegacyStatusMapper.BadNodeIdUnknown, null, now);
@@ -250,6 +317,12 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
continue;
}
// PR ablegacy-10 / #253 — bump RequestCount once per non-diagnostic reference,
// success or fail. The retry loop below counts retries through RecordRetry so
// operators can spot a flapping link via the RetryCount counter without us
// double-counting the original attempt as a retry.
_diagnosticTags.RecordRequest(def.DeviceHostAddress);
// PR 9 — per-device retry loop: on transient BadCommunicationError (libplctag throw
// OR a non-zero status that maps to BadCommunicationError) retry up to N times. A
// terminal mapped status (e.g. BadNodeIdUnknown for a missing PLC tag, BadTypeMismatch
@@ -259,6 +332,11 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
DataValueSnapshot? snapshot = null;
for (var attempt = 0; attempt <= retries; attempt++)
{
// PR ablegacy-10 / #253 — second + later attempts count as retries for the
// diagnostic counter. Increment BEFORE the work so a thrown exception still
// shows up in the retry tally.
if (attempt > 0) _diagnosticTags.RecordRetry(def.DeviceHostAddress);
try
{
var runtime = await EnsureTagRuntimeAsync(device, def, cancellationToken).ConfigureAwait(false);
@@ -273,6 +351,15 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
{
continue;
}
// PR ablegacy-10 / #253 — terminal failure: bump the error counter
// + record the libplctag status. CommFailure tally rolls only when
// the mapped status is BadCommunicationError so operators see a
// single "wire fell off" counter independent of other error codes.
_diagnosticTags.RecordError(
def.DeviceHostAddress,
status,
$"libplctag status {status} reading {reference}",
commFailure: mappedStatus == AbLegacyStatusMapper.BadCommunicationError);
snapshot = new DataValueSnapshot(null, mappedStatus, null, now);
_health = new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead,
$"libplctag status {status} reading {reference}");
@@ -296,6 +383,8 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
var arr = DecodeArrayAs(runtime, def.DataType, arrayCount);
snapshot = new DataValueSnapshot(arr, AbLegacyStatusMapper.Good, now, now);
_health = new DriverHealth(DriverState.Healthy, now, null);
// PR ablegacy-10 / #253 — successful array read.
_diagnosticTags.RecordResponse(def.DeviceHostAddress);
break;
}
@@ -307,6 +396,8 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
var value = runtime.DecodeValue(def.DataType, decodeBit);
snapshot = new DataValueSnapshot(value, AbLegacyStatusMapper.Good, now, now);
_health = new DriverHealth(DriverState.Healthy, now, null);
// PR ablegacy-10 / #253 — successful scalar / sub-element / bit read.
_diagnosticTags.RecordResponse(def.DeviceHostAddress);
break;
}
catch (OperationCanceledException) { throw; }
@@ -314,6 +405,15 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
{
// Transient — exhaust retries before reporting BadCommunicationError.
if (attempt < retries) continue;
// PR ablegacy-10 / #253 — exhausted retries surface as a comm
// failure. Pass libplctag status 0 because the throw means we never
// got a status code back, but record the exception message so the
// LastErrorMessage diagnostic still has actionable text.
_diagnosticTags.RecordError(
def.DeviceHostAddress,
libplctagStatus: 0,
errorMessage: ex.Message,
commFailure: true);
snapshot = new DataValueSnapshot(null,
AbLegacyStatusMapper.BadCommunicationError, null, now);
_health = new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, ex.Message);
@@ -456,10 +556,61 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
IsAlarm: false,
WriteIdempotent: tag.WriteIdempotent));
}
// PR ablegacy-10 / #253 — auto-emit the per-device _Diagnostics folder + its
// seven read-only counter variables. FullName carries the synthetic
// _Diagnostics/<host>/<name> reference so ReadAsync can short-circuit before
// EnsureTagRuntimeAsync. Mirrors AbCip's _System/ pattern from abcip-4.3.
EmitDiagnosticsFolder(deviceFolder, device.HostAddress);
}
return Task.CompletedTask;
}
/// <summary>
/// PR ablegacy-10 / #253 — emit the per-device <c>_Diagnostics</c> folder + its
/// seven read-only diagnostic-counter variables. The <c>FullName</c> on each
/// variable encodes the owning device's host address
/// (<c>_Diagnostics/&lt;host&gt;/&lt;name&gt;</c>) so the read path can route to
/// <see cref="AbLegacyDiagnosticTags.TryRead"/> without a separate registry. Names
/// + types stay in lockstep with <see cref="AbLegacyDiagnosticTags.DiagnosticTagNames"/>.
/// </summary>
private static void EmitDiagnosticsFolder(IAddressSpaceBuilder deviceFolder, string deviceHostAddress)
{
var diag = deviceFolder.Folder("_Diagnostics", "_Diagnostics");
EmitDiagnosticVariable(diag, deviceHostAddress, "RequestCount", DriverDataType.Int64,
"Total ReadAsync requests issued against this device (one per non-diagnostic reference per call, success or fail).");
EmitDiagnosticVariable(diag, deviceHostAddress, "ResponseCount", DriverDataType.Int64,
"Successful read responses for this device.");
EmitDiagnosticVariable(diag, deviceHostAddress, "ErrorCount", DriverDataType.Int64,
"Failed read responses for this device (any non-Good status).");
EmitDiagnosticVariable(diag, deviceHostAddress, "RetryCount", DriverDataType.Int64,
"Retry attempts beyond the first per the AbLegacy retry loop. Bumps once per extra attempt — a single read with two retries adds two.");
EmitDiagnosticVariable(diag, deviceHostAddress, "LastErrorCode", DriverDataType.Int32,
"Most recent libplctag status code on a failed read; 0 when no error has been seen since the last reset.");
EmitDiagnosticVariable(diag, deviceHostAddress, "LastErrorMessage", DriverDataType.String,
"Most recent libplctag error message on a failed read; empty when no error has been seen since the last reset.");
EmitDiagnosticVariable(diag, deviceHostAddress, "CommFailures", DriverDataType.Int64,
"Count of read failures mapped to BadCommunicationError. Spans transient libplctag throws + retried-out chains so operators see a single 'wire fell off' counter.");
}
private static void EmitDiagnosticVariable(
IAddressSpaceBuilder folder, string deviceHostAddress, string name,
DriverDataType type, string description)
{
var fullName = $"{AbLegacyDiagnosticTags.DiagnosticsFolderPrefix}{deviceHostAddress}/{name}";
folder.Variable(name, name, new DriverAttributeInfo(
FullName: fullName,
DriverDataType: type,
IsArray: false,
ArrayDim: null,
// Read-only — operators can't write the diagnostic surface from a SCADA template.
SecurityClass: SecurityClassification.ViewOnly,
IsHistorized: false,
IsAlarm: false,
WriteIdempotent: false,
Description: description));
}
// ---- ISubscribable (polling overlay via shared engine) ----
public Task<ISubscriptionHandle> SubscribeAsync(