@@ -38,4 +38,16 @@ public sealed record HostStatusChangedEventArgs(
|
||||
HostState NewState);
|
||||
|
||||
/// <summary>Host lifecycle state. Generalization of Galaxy's Platform/Engine ScanState.</summary>
|
||||
public enum HostState { Unknown, Running, Stopped, Faulted }
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <see cref="Demoted"/> (PR ablegacy-12 / #255) is a soft-stopped state used by drivers
|
||||
/// that auto-throttle a host after N consecutive comm failures. Reads are short-circuited
|
||||
/// with <c>BadCommunicationError</c> for a configurable cool-down window so one slow PLC
|
||||
/// doesn't starve faster peers sharing the same driver. Demoted is *not* the same as
|
||||
/// <see cref="Stopped"/> (which means "probe says it's down") nor <see cref="Faulted"/>
|
||||
/// (which means "the driver itself is broken"); it's a deliberate driver-side back-off.
|
||||
/// Consumers that don't recognize <c>Demoted</c> can safely treat it as <c>Stopped</c>
|
||||
/// (see <c>HostStatusPublisher.MapState</c>).
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public enum HostState { Unknown, Running, Stopped, Faulted, Demoted }
|
||||
|
||||
@@ -25,6 +25,34 @@ public abstract class AbLegacyCommandBase : DriverCommandBase
|
||||
[CommandOption("timeout-ms", Description = "Per-operation timeout in ms (default 5000).")]
|
||||
public int TimeoutMs { get; init; } = 5000;
|
||||
|
||||
/// <summary>
|
||||
/// PR ablegacy-12 / #255 — consecutive comm failures before this device is
|
||||
/// auto-demoted. Reads against a demoted device short-circuit with
|
||||
/// <c>BadCommunicationError</c> for <see cref="DemoteForMs"/> ms so one
|
||||
/// unreachable PLC can't starve faster peers sharing the driver thread.
|
||||
/// </summary>
|
||||
[CommandOption("demote-failure-threshold", Description =
|
||||
"Consecutive comm failures before the device is auto-demoted (PR ablegacy-12). Default 3.")]
|
||||
public int DemoteFailureThreshold { get; init; } = 3;
|
||||
|
||||
/// <summary>
|
||||
/// PR ablegacy-12 / #255 — auto-demote cool-down window in ms. Reads while
|
||||
/// this window is active short-circuit with <c>BadCommunicationError</c>;
|
||||
/// a successful probe clears it early.
|
||||
/// </summary>
|
||||
[CommandOption("demote-for-ms", Description =
|
||||
"Auto-demote cool-down window in ms (PR ablegacy-12). Default 30000 (30s).")]
|
||||
public int DemoteForMs { get; init; } = 30_000;
|
||||
|
||||
/// <summary>
|
||||
/// PR ablegacy-12 / #255 — opt out of the auto-demote behaviour. The
|
||||
/// consecutive-failure tally still ticks (so DemoteCount/LastDemotedUtc
|
||||
/// stay zero) but reads never short-circuit.
|
||||
/// </summary>
|
||||
[CommandOption("no-demote", Description =
|
||||
"Disable auto-demote on consecutive comm failures (PR ablegacy-12). Default off (auto-demote enabled).")]
|
||||
public bool NoDemote { get; init; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public override TimeSpan Timeout
|
||||
{
|
||||
@@ -41,7 +69,11 @@ public abstract class AbLegacyCommandBase : DriverCommandBase
|
||||
Devices = [new AbLegacyDeviceOptions(
|
||||
HostAddress: Gateway,
|
||||
PlcFamily: PlcType,
|
||||
DeviceName: $"cli-{PlcType}")],
|
||||
DeviceName: $"cli-{PlcType}",
|
||||
Demote: new AbLegacyDemoteOptions(
|
||||
FailureThreshold: DemoteFailureThreshold,
|
||||
DemoteFor: TimeSpan.FromMilliseconds(DemoteForMs),
|
||||
Enabled: !NoDemote))],
|
||||
Tags = tags,
|
||||
Timeout = Timeout,
|
||||
Probe = new AbLegacyProbeOptions { Enabled = false },
|
||||
|
||||
@@ -40,10 +40,19 @@ public sealed class ProbeCommand : AbLegacyCommandBase
|
||||
await driver.InitializeAsync("{}", ct);
|
||||
var snapshot = await driver.ReadAsync(["__probe"], ct);
|
||||
var health = driver.GetHealth();
|
||||
// PR ablegacy-12 / #255 — surface Demoted alongside the probe-driven
|
||||
// HostState. After a one-shot probe the host hasn't been observed
|
||||
// (no probe loop runs in CLI mode), so HostState is typically Unknown
|
||||
// unless the read above tripped the demote threshold.
|
||||
var hostStatus = driver.GetHostStatuses().FirstOrDefault();
|
||||
|
||||
await console.Output.WriteLineAsync($"Gateway: {Gateway}");
|
||||
await console.Output.WriteLineAsync($"PLC type: {PlcType}");
|
||||
await console.Output.WriteLineAsync($"Health: {health.State}");
|
||||
if (hostStatus is not null)
|
||||
{
|
||||
await console.Output.WriteLineAsync($"Host state: {hostStatus.State}");
|
||||
}
|
||||
if (health.LastError is { } err)
|
||||
await console.Output.WriteLineAsync($"Last error: {err}");
|
||||
await console.Output.WriteLineAsync();
|
||||
|
||||
@@ -40,6 +40,11 @@ public sealed class AbLegacyDiagnosticTags
|
||||
public const string DiagnosticsFolderPrefix = "_Diagnostics/";
|
||||
|
||||
/// <summary>Canonical names the diagnostics folder exposes. Keep in lockstep with discovery.</summary>
|
||||
/// <remarks>
|
||||
/// PR ablegacy-12 / #255 — <c>DemoteCount</c> + <c>LastDemotedUtc</c> ride
|
||||
/// alongside the original seven so HMIs can spot a flapping device by
|
||||
/// watching <c>DemoteCount</c> climb without scraping logs.
|
||||
/// </remarks>
|
||||
public static readonly IReadOnlyList<string> DiagnosticTagNames =
|
||||
[
|
||||
"RequestCount",
|
||||
@@ -49,6 +54,9 @@ public sealed class AbLegacyDiagnosticTags
|
||||
"LastErrorCode",
|
||||
"LastErrorMessage",
|
||||
"CommFailures",
|
||||
// PR ablegacy-12 / #255 — auto-demote on comm failure surface.
|
||||
"DemoteCount",
|
||||
"LastDemotedUtc",
|
||||
];
|
||||
|
||||
private static readonly HashSet<string> DiagnosticTagNameSet =
|
||||
@@ -130,6 +138,39 @@ public sealed class AbLegacyDiagnosticTags
|
||||
Interlocked.Increment(ref c.Retry);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// PR ablegacy-12 / #255 — record an auto-demotion event: bumps cumulative
|
||||
/// <c>DemoteCount</c> and stamps <c>LastDemotedUtc</c>. Fires every time the
|
||||
/// driver crosses the failure threshold and arms a fresh cool-down window —
|
||||
/// a single flapping link that demotes hourly will surface as a steadily
|
||||
/// climbing counter, which is the operator-facing signal we want.
|
||||
/// </summary>
|
||||
public void RecordDemote(string deviceHostAddress, DateTime nowUtc)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(deviceHostAddress);
|
||||
var c = GetOrCreate(deviceHostAddress);
|
||||
Interlocked.Increment(ref c.DemoteCount);
|
||||
// DateTime is 64 bits — use Interlocked.Exchange on the Ticks field so a
|
||||
// concurrent reader sees a torn-free snapshot. On x86 a 64-bit non-aligned
|
||||
// write isn't atomic; on x64 it is, but routing through Interlocked is
|
||||
// platform-independent + costs almost nothing.
|
||||
Interlocked.Exchange(ref c.LastDemotedUtcTicks, nowUtc.Ticks);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// PR ablegacy-12 / #255 — restore cumulative demote bookkeeping after a
|
||||
/// <see cref="AbLegacyDriver.ReinitializeAsync"/> cycle so an operator
|
||||
/// redeploying config mid-incident doesn't lose flapping-link history.
|
||||
/// Sets the counters to absolute values rather than incrementing.
|
||||
/// </summary>
|
||||
public void RestoreDemote(string deviceHostAddress, long demoteCount, DateTime? lastDemotedUtc)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(deviceHostAddress);
|
||||
var c = GetOrCreate(deviceHostAddress);
|
||||
Interlocked.Exchange(ref c.DemoteCount, demoteCount);
|
||||
Interlocked.Exchange(ref c.LastDemotedUtcTicks, lastDemotedUtc?.Ticks ?? 0);
|
||||
}
|
||||
|
||||
/// <summary>Snapshot the current counters for a device. Returns zeros for unknown hosts.</summary>
|
||||
public DiagnosticsSnapshot Snapshot(string deviceHostAddress)
|
||||
{
|
||||
@@ -139,7 +180,8 @@ public sealed class AbLegacyDiagnosticTags
|
||||
{
|
||||
_counters.TryGetValue(deviceHostAddress, out c);
|
||||
}
|
||||
if (c is null) return new DiagnosticsSnapshot(0, 0, 0, 0, 0, string.Empty, 0);
|
||||
if (c is null) return new DiagnosticsSnapshot(0, 0, 0, 0, 0, string.Empty, 0, 0, null);
|
||||
var ticks = Interlocked.Read(ref c.LastDemotedUtcTicks);
|
||||
return new DiagnosticsSnapshot(
|
||||
Request: Interlocked.Read(ref c.Request),
|
||||
Response: Interlocked.Read(ref c.Response),
|
||||
@@ -147,7 +189,10 @@ public sealed class AbLegacyDiagnosticTags
|
||||
Retry: Interlocked.Read(ref c.Retry),
|
||||
LastErrorCode: Volatile.Read(ref c.LastErrorCode),
|
||||
LastErrorMessage: c.LastErrorMessage ?? string.Empty,
|
||||
CommFailures: Interlocked.Read(ref c.CommFailures));
|
||||
CommFailures: Interlocked.Read(ref c.CommFailures),
|
||||
// PR ablegacy-12 / #255 — auto-demote surface.
|
||||
DemoteCount: Interlocked.Read(ref c.DemoteCount),
|
||||
LastDemotedUtc: ticks == 0 ? null : new DateTime(ticks, DateTimeKind.Utc));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -155,7 +200,14 @@ public sealed class AbLegacyDiagnosticTags
|
||||
/// from <see cref="AbLegacyDriver.ReinitializeAsync"/> so a config redeploy starts
|
||||
/// with a clean diagnostic surface.
|
||||
/// </summary>
|
||||
public void Reset(string deviceHostAddress)
|
||||
/// <remarks>
|
||||
/// PR ablegacy-12 / #255 — when <paramref name="preserveDemote"/> is <c>true</c> the
|
||||
/// cumulative <c>DemoteCount</c> + <c>LastDemotedUtc</c> survive the reset.
|
||||
/// <see cref="AbLegacyDriver.ReinitializeAsync"/> uses that mode so an operator
|
||||
/// redeploying a config doesn't lose their flapping-link history; a fresh process
|
||||
/// start clears them naturally because the dictionary is rebuilt from scratch.
|
||||
/// </remarks>
|
||||
public void Reset(string deviceHostAddress, bool preserveDemote = false)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(deviceHostAddress);
|
||||
var c = GetOrCreate(deviceHostAddress);
|
||||
@@ -166,14 +218,40 @@ public sealed class AbLegacyDiagnosticTags
|
||||
Interlocked.Exchange(ref c.LastErrorCode, 0);
|
||||
c.LastErrorMessage = string.Empty;
|
||||
Interlocked.Exchange(ref c.CommFailures, 0);
|
||||
if (!preserveDemote)
|
||||
{
|
||||
Interlocked.Exchange(ref c.DemoteCount, 0);
|
||||
Interlocked.Exchange(ref c.LastDemotedUtcTicks, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Reset every tracked device. Called on full <c>ShutdownAsync</c>.</summary>
|
||||
public void ResetAll()
|
||||
/// <remarks>
|
||||
/// PR ablegacy-12 / #255 — when <paramref name="preserveDemote"/> is <c>true</c> the
|
||||
/// cumulative demote counters survive a per-device reset of every other field.
|
||||
/// The default (<c>false</c>) clears the dictionary outright, which is what
|
||||
/// <see cref="AbLegacyDriver.ShutdownAsync"/> wants.
|
||||
/// </remarks>
|
||||
public void ResetAll(bool preserveDemote = false)
|
||||
{
|
||||
if (!preserveDemote)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
_counters.Clear();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Preserve mode: keep the dictionary keys + cumulative demote fields, but
|
||||
// zero everything else. Used by Reinitialize to span a config redeploy
|
||||
// without losing flapping-link history.
|
||||
lock (_lock)
|
||||
{
|
||||
_counters.Clear();
|
||||
foreach (var key in _counters.Keys.ToList())
|
||||
{
|
||||
Reset(key, preserveDemote: true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -205,6 +283,11 @@ public sealed class AbLegacyDiagnosticTags
|
||||
"LastErrorCode" => snapshot.LastErrorCode,
|
||||
"LastErrorMessage" => snapshot.LastErrorMessage,
|
||||
"CommFailures" => snapshot.CommFailures,
|
||||
// PR ablegacy-12 / #255 — auto-demote surface. LastDemotedUtc returns
|
||||
// the empty string when no demotion has happened yet, mirroring the
|
||||
// LastErrorMessage convention so HMIs can bind directly to a string.
|
||||
"DemoteCount" => snapshot.DemoteCount,
|
||||
"LastDemotedUtc" => snapshot.LastDemotedUtc?.ToString("o") ?? string.Empty,
|
||||
_ => null,
|
||||
};
|
||||
return true;
|
||||
@@ -236,6 +319,11 @@ public sealed class AbLegacyDiagnosticTags
|
||||
public int LastErrorCode;
|
||||
public string? LastErrorMessage = string.Empty;
|
||||
public long CommFailures;
|
||||
// PR ablegacy-12 / #255 — cumulative across config redeploys. Cleared only
|
||||
// on full driver process restart (the dictionary is rebuilt from scratch);
|
||||
// ReinitializeAsync uses preserveDemote: true.
|
||||
public long DemoteCount;
|
||||
public long LastDemotedUtcTicks;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -251,6 +339,9 @@ public sealed class AbLegacyDiagnosticTags
|
||||
/// <param name="LastErrorCode">Most recent libplctag status code on a failed read.</param>
|
||||
/// <param name="LastErrorMessage">Most recent libplctag error message on a failed read.</param>
|
||||
/// <param name="CommFailures">Count of read failures mapped to <c>BadCommunicationError</c>.</param>
|
||||
/// <param name="DemoteCount">PR ablegacy-12 / #255 — cumulative auto-demote events.</param>
|
||||
/// <param name="LastDemotedUtc">PR ablegacy-12 / #255 — UTC timestamp of the most
|
||||
/// recent demotion, or <c>null</c> if the device has never been demoted.</param>
|
||||
public sealed record DiagnosticsSnapshot(
|
||||
long Request,
|
||||
long Response,
|
||||
@@ -258,4 +349,6 @@ public sealed record DiagnosticsSnapshot(
|
||||
long Retry,
|
||||
int LastErrorCode,
|
||||
string LastErrorMessage,
|
||||
long CommFailures);
|
||||
long CommFailures,
|
||||
long DemoteCount,
|
||||
DateTime? LastDemotedUtc);
|
||||
|
||||
@@ -217,6 +217,20 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
|
||||
|
||||
public async Task ReinitializeAsync(string driverConfigJson, CancellationToken cancellationToken)
|
||||
{
|
||||
// PR ablegacy-12 / #255 — capture the cumulative DemoteCount + LastDemotedUtc
|
||||
// for every currently-tracked device before we tear down. The Shutdown below
|
||||
// calls ResetAll() which clears the dictionary; the per-host InitializeAsync
|
||||
// below re-EnsureDevice's the slots; we restore the cumulative demote
|
||||
// history so an operator who redeploys mid-incident doesn't lose the trail
|
||||
// of how often this device was flapping.
|
||||
var preservedDemote = new Dictionary<string, (long DemoteCount, DateTime? LastDemotedUtc)>(
|
||||
StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var (host, _) in _devices)
|
||||
{
|
||||
var snap = _diagnosticTags.Snapshot(host);
|
||||
preservedDemote[host] = (snap.DemoteCount, snap.LastDemotedUtc);
|
||||
}
|
||||
|
||||
await ShutdownAsync(cancellationToken).ConfigureAwait(false);
|
||||
// PR ablegacy-10 / #253 — counters were dropped along with the device map when
|
||||
// ShutdownAsync called ResetAll; the InitializeAsync below re-EnsureDevice's each
|
||||
@@ -224,6 +238,16 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
|
||||
// here in case a downstream override of either method skips the cycle.
|
||||
_diagnosticTags.ResetAll();
|
||||
await InitializeAsync(driverConfigJson, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// PR ablegacy-12 / #255 — restore the cumulative demote history. Only hosts
|
||||
// that survive the redeploy get their counters back; a device removed from
|
||||
// config legitimately drops its history (it isn't being tracked any more).
|
||||
foreach (var (host, (count, lastUtc)) in preservedDemote)
|
||||
{
|
||||
if (count == 0 && lastUtc is null) continue;
|
||||
if (!_devices.ContainsKey(host)) continue;
|
||||
_diagnosticTags.RestoreDemote(host, count, lastUtc);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task ShutdownAsync(CancellationToken cancellationToken)
|
||||
@@ -275,6 +299,38 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
|
||||
internal int ResolveRetries(DeviceState device) =>
|
||||
device.Options.Retries ?? _options.Retries ?? 0;
|
||||
|
||||
/// <summary>
|
||||
/// PR ablegacy-12 / #255 — resolve the active <see cref="AbLegacyDemoteOptions"/> for
|
||||
/// a device. Per-device options win; otherwise the documented defaults (3 failures /
|
||||
/// 30 s / enabled). Returns a non-null record so callers can assume a usable value.
|
||||
/// </summary>
|
||||
internal AbLegacyDemoteOptions ResolveDemote(DeviceState device) =>
|
||||
device.Options.Demote ?? new AbLegacyDemoteOptions();
|
||||
|
||||
/// <summary>
|
||||
/// PR ablegacy-12 / #255 — common bookkeeping for one comm failure: bump the
|
||||
/// consecutive-failure counter and arm the demote window once the threshold is
|
||||
/// crossed. Returns <c>true</c> when this call tipped the device into Demoted (so
|
||||
/// the caller can fire <see cref="OnHostStatusChanged"/>); <c>false</c> when the
|
||||
/// device was already demoted or stayed below the threshold.
|
||||
/// </summary>
|
||||
private bool RecordFailureAndMaybeDemote(DeviceState state, DateTime nowUtc)
|
||||
{
|
||||
var demote = ResolveDemote(state);
|
||||
var consecutive = Interlocked.Increment(ref state.ConsecutiveFailures);
|
||||
|
||||
if (!demote.Enabled || consecutive < demote.FailureThreshold) return false;
|
||||
// Already demoted? Don't re-arm — the original window's expiry is the
|
||||
// operator-facing recovery clock and re-arming on every subsequent failed
|
||||
// read would suppress reads forever on a fully-down device. The probe
|
||||
// loop is what eventually clears the demotion (or the window expiring).
|
||||
if (state.DemotedUntilUtc is { } until && until > nowUtc) return false;
|
||||
|
||||
state.DemotedUntilUtc = nowUtc + demote.EffectiveDemoteFor;
|
||||
_diagnosticTags.RecordDemote(state.Options.HostAddress, nowUtc);
|
||||
return true;
|
||||
}
|
||||
|
||||
// ---- IReadable ----
|
||||
|
||||
public async Task<IReadOnlyList<DataValueSnapshot>> ReadAsync(
|
||||
@@ -323,6 +379,45 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
|
||||
// double-counting the original attempt as a retry.
|
||||
_diagnosticTags.RecordRequest(def.DeviceHostAddress);
|
||||
|
||||
// PR ablegacy-12 / #255 — auto-demote short-circuit. When the device's demote
|
||||
// window is still active we return BadCommunicationError immediately, without
|
||||
// touching libplctag or its retry loop. That's the whole point of the feature:
|
||||
// one slow PLC sharing the driver thread can't drag down healthy peers. We
|
||||
// don't bump ErrorCount/CommFailures here because this isn't a fresh field
|
||||
// failure — it's the cool-down on a previously-counted one.
|
||||
if (device.DemotedUntilUtc is { } demotedUntil)
|
||||
{
|
||||
if (demotedUntil > now)
|
||||
{
|
||||
results[i] = new DataValueSnapshot(null,
|
||||
AbLegacyStatusMapper.BadCommunicationError, null, now);
|
||||
continue;
|
||||
}
|
||||
// Window expired without an early-clear from a probe success — drop the
|
||||
// marker but don't reset ConsecutiveFailures yet. If this read also
|
||||
// fails the failure tally keeps counting from where it left off, so a
|
||||
// permanently-down device re-arms the window after one more
|
||||
// consecutive failure (vs. having to repeat the full threshold).
|
||||
lock (device.ProbeLock)
|
||||
{
|
||||
if (device.DemotedUntilUtc is { } stillUntil && stillUntil <= now)
|
||||
{
|
||||
device.DemotedUntilUtc = null;
|
||||
// Mirror Stopped→Running on a probe-driven recovery: leave the
|
||||
// HostState transition to the probe loop (or the upcoming success
|
||||
// below); we just clear the cool-down marker so the next read
|
||||
// dispatches normally.
|
||||
if (device.HostState == HostState.Demoted)
|
||||
{
|
||||
// Surface a transition out of Demoted. The probe loop will
|
||||
// bring it Running once a probe succeeds; until then leave
|
||||
// it in Stopped to reflect "we don't actually know it's up".
|
||||
TransitionDeviceState(device, HostState.Stopped);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PR 9 — per-device retry loop: on transient BadCommunicationError (libplctag throw
|
||||
// OR a non-zero status that maps to BadCommunicationError) retry up to N times. A
|
||||
// terminal mapped status (e.g. BadNodeIdUnknown for a missing PLC tag, BadTypeMismatch
|
||||
@@ -360,6 +455,15 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
|
||||
status,
|
||||
$"libplctag status {status} reading {reference}",
|
||||
commFailure: mappedStatus == AbLegacyStatusMapper.BadCommunicationError);
|
||||
// PR ablegacy-12 / #255 — only comm failures count toward the
|
||||
// demote tally. A BadNodeIdUnknown / BadTypeMismatch is a config
|
||||
// / decoder mismatch, not a sign the host is unreachable, so
|
||||
// demoting on it would punish the operator for a typo.
|
||||
if (mappedStatus == AbLegacyStatusMapper.BadCommunicationError
|
||||
&& RecordFailureAndMaybeDemote(device, now))
|
||||
{
|
||||
TransitionDeviceState(device, HostState.Demoted);
|
||||
}
|
||||
snapshot = new DataValueSnapshot(null, mappedStatus, null, now);
|
||||
_health = new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead,
|
||||
$"libplctag status {status} reading {reference}");
|
||||
@@ -385,6 +489,13 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
|
||||
_health = new DriverHealth(DriverState.Healthy, now, null);
|
||||
// PR ablegacy-10 / #253 — successful array read.
|
||||
_diagnosticTags.RecordResponse(def.DeviceHostAddress);
|
||||
// PR ablegacy-12 / #255 — successful read clears the
|
||||
// consecutive-failure tally. We do NOT auto-clear DemotedUntilUtc
|
||||
// here — the demote window is honoured to its full duration so an
|
||||
// intermittent link that just happened to answer once doesn't
|
||||
// immediately re-flood the channel. Probe success is the early
|
||||
// recovery path.
|
||||
Interlocked.Exchange(ref device.ConsecutiveFailures, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -398,6 +509,10 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
|
||||
_health = new DriverHealth(DriverState.Healthy, now, null);
|
||||
// PR ablegacy-10 / #253 — successful scalar / sub-element / bit read.
|
||||
_diagnosticTags.RecordResponse(def.DeviceHostAddress);
|
||||
// PR ablegacy-12 / #255 — successful read clears the
|
||||
// consecutive-failure tally; demote window keeps running
|
||||
// until a probe success or natural expiry.
|
||||
Interlocked.Exchange(ref device.ConsecutiveFailures, 0);
|
||||
break;
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
@@ -414,6 +529,12 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
|
||||
libplctagStatus: 0,
|
||||
errorMessage: ex.Message,
|
||||
commFailure: true);
|
||||
// PR ablegacy-12 / #255 — exception-driven comm failure counts
|
||||
// toward the demote tally just like a status-mapped one.
|
||||
if (RecordFailureAndMaybeDemote(device, now))
|
||||
{
|
||||
TransitionDeviceState(device, HostState.Demoted);
|
||||
}
|
||||
snapshot = new DataValueSnapshot(null,
|
||||
AbLegacyStatusMapper.BadCommunicationError, null, now);
|
||||
_health = new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, ex.Message);
|
||||
@@ -591,6 +712,14 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
|
||||
"Most recent libplctag error message on a failed read; empty when no error has been seen since the last reset.");
|
||||
EmitDiagnosticVariable(diag, deviceHostAddress, "CommFailures", DriverDataType.Int64,
|
||||
"Count of read failures mapped to BadCommunicationError. Spans transient libplctag throws + retried-out chains so operators see a single 'wire fell off' counter.");
|
||||
// PR ablegacy-12 / #255 — auto-demote surface. DemoteCount is cumulative
|
||||
// across reinit (preserved in ReinitializeAsync); LastDemotedUtc is a
|
||||
// string (ISO-8601 UTC) so HMIs can bind directly without a separate
|
||||
// DateTime decoder. Empty string means "never demoted".
|
||||
EmitDiagnosticVariable(diag, deviceHostAddress, "DemoteCount", DriverDataType.Int64,
|
||||
"Cumulative auto-demote events for this device — bumps every time the driver crosses the consecutive-failure threshold and arms a fresh cool-down window. Survives ReinitializeAsync.");
|
||||
EmitDiagnosticVariable(diag, deviceHostAddress, "LastDemotedUtc", DriverDataType.String,
|
||||
"ISO-8601 UTC timestamp of the most recent auto-demotion; empty when this device has never been demoted.");
|
||||
}
|
||||
|
||||
private static void EmitDiagnosticVariable(
|
||||
@@ -665,7 +794,39 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
|
||||
state.ProbeInitialized = false;
|
||||
}
|
||||
|
||||
TransitionDeviceState(state, success ? HostState.Running : HostState.Stopped);
|
||||
// PR ablegacy-12 / #255 — probe success is the early-recovery path: clear
|
||||
// any active demote window + reset the failure tally so the next read
|
||||
// dispatches normally. Probe failure participates in the same shared
|
||||
// failure-tally as ReadAsync so a device with no live read traffic still
|
||||
// demotes on a sustained outage.
|
||||
if (success)
|
||||
{
|
||||
bool wasDemoted;
|
||||
lock (state.ProbeLock)
|
||||
{
|
||||
wasDemoted = state.DemotedUntilUtc is not null;
|
||||
state.DemotedUntilUtc = null;
|
||||
}
|
||||
Interlocked.Exchange(ref state.ConsecutiveFailures, 0);
|
||||
TransitionDeviceState(state, HostState.Running);
|
||||
_ = wasDemoted; // intentionally observed for future telemetry hooks
|
||||
}
|
||||
else
|
||||
{
|
||||
if (RecordFailureAndMaybeDemote(state, DateTime.UtcNow))
|
||||
{
|
||||
TransitionDeviceState(state, HostState.Demoted);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Mid-tally probe failure: surface as Stopped if not already
|
||||
// Demoted. This preserves pre-PR-12 behaviour for the common
|
||||
// case (FailureThreshold=3 + a single hiccup ends up Stopped,
|
||||
// not Demoted).
|
||||
if (state.HostState != HostState.Demoted)
|
||||
TransitionDeviceState(state, HostState.Stopped);
|
||||
}
|
||||
}
|
||||
|
||||
try { await Task.Delay(_options.Probe.Interval, ct).ConfigureAwait(false); }
|
||||
catch (OperationCanceledException) { break; }
|
||||
@@ -890,6 +1051,25 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
|
||||
public CancellationTokenSource? ProbeCts { get; set; }
|
||||
public bool ProbeInitialized { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// PR ablegacy-12 / #255 — running tally of consecutive read / probe failures.
|
||||
/// Reset on every successful read or probe; tripping
|
||||
/// <see cref="AbLegacyDemoteOptions.FailureThreshold"/> arms the demote window.
|
||||
/// Read + written via <see cref="Interlocked"/> because read + probe loops can
|
||||
/// touch it concurrently.
|
||||
/// </summary>
|
||||
public int ConsecutiveFailures;
|
||||
|
||||
/// <summary>
|
||||
/// PR ablegacy-12 / #255 — when set, reads against this device short-circuit
|
||||
/// with <c>BadCommunicationError</c> until the timestamp passes; cleared early
|
||||
/// by a successful probe. Guarded by <see cref="ProbeLock"/> for the mutator
|
||||
/// paths (TransitionDeviceState + RecordFailureAndMaybeDemote); reads grab
|
||||
/// the property without locking — a torn DateTime? read is harmless here
|
||||
/// because the worst case is one extra dispatched read on an x86 boundary.
|
||||
/// </summary>
|
||||
public DateTimeOffset? DemotedUntilUtc { get; set; }
|
||||
|
||||
public void DisposeRuntimes()
|
||||
{
|
||||
foreach (var r in Runtimes.Values) r.Dispose();
|
||||
|
||||
@@ -45,7 +45,14 @@ public static class AbLegacyDriverFactoryExtensions
|
||||
DeviceName: d.DeviceName,
|
||||
// PR 9 — per-device timeout / retry overrides. Device-level wins over driver-wide.
|
||||
Timeout: d.TimeoutMs is int devMs ? TimeSpan.FromMilliseconds(devMs) : null,
|
||||
Retries: d.Retries))]
|
||||
Retries: d.Retries,
|
||||
// PR ablegacy-12 / #255 — auto-demote knobs.
|
||||
Demote: d.Demote is null ? null : new AbLegacyDemoteOptions(
|
||||
FailureThreshold: d.Demote.FailureThreshold ?? 3,
|
||||
DemoteFor: d.Demote.DemoteForMs is int demMs
|
||||
? TimeSpan.FromMilliseconds(demMs)
|
||||
: null,
|
||||
Enabled: d.Demote.Enabled ?? true)))]
|
||||
: [],
|
||||
Tags = dto.Tags is { Count: > 0 }
|
||||
? [.. dto.Tags.Select(t => new AbLegacyTagDefinition(
|
||||
@@ -209,6 +216,26 @@ public static class AbLegacyDriverFactoryExtensions
|
||||
/// <c>null</c> at both levels = single attempt.
|
||||
/// </summary>
|
||||
public int? Retries { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// PR ablegacy-12 / #255 — optional per-device auto-demote knobs. <c>null</c>
|
||||
/// means "use the documented defaults" (<c>FailureThreshold=3</c>,
|
||||
/// <c>DemoteFor=30s</c>, <c>Enabled=true</c>) — the driver still demotes by
|
||||
/// default. Set <c>Enabled=false</c> in the JSON to opt out entirely.
|
||||
/// </summary>
|
||||
public AbLegacyDemoteDto? Demote { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// PR ablegacy-12 / #255 — JSON DTO for the auto-demote knobs. Times are
|
||||
/// ms-suffixed for consistency with the rest of the driver config (TimeoutMs,
|
||||
/// IntervalMs).
|
||||
/// </summary>
|
||||
internal sealed class AbLegacyDemoteDto
|
||||
{
|
||||
public int? FailureThreshold { get; init; }
|
||||
public int? DemoteForMs { get; init; }
|
||||
public bool? Enabled { get; init; }
|
||||
}
|
||||
|
||||
internal sealed class AbLegacyTagDto
|
||||
|
||||
@@ -41,7 +41,39 @@ public sealed record AbLegacyDeviceOptions(
|
||||
AbLegacyPlcFamily PlcFamily = AbLegacyPlcFamily.Slc500,
|
||||
string? DeviceName = null,
|
||||
TimeSpan? Timeout = null,
|
||||
int? Retries = null);
|
||||
int? Retries = null,
|
||||
AbLegacyDemoteOptions? Demote = null);
|
||||
|
||||
/// <summary>
|
||||
/// PR ablegacy-12 / #255 — auto-demote knobs. After
|
||||
/// <see cref="FailureThreshold"/> consecutive read / probe failures the driver
|
||||
/// marks the device <c>Demoted</c> for <see cref="DemoteFor"/>; reads against
|
||||
/// a demoted device short-circuit with <c>BadCommunicationError</c> instead
|
||||
/// of dispatching through libplctag, so one slow PLC can't starve faster
|
||||
/// peers sharing the same driver. A successful probe clears the demotion
|
||||
/// early; a successful read just resets the consecutive-failure counter
|
||||
/// without leaving the demoted window.
|
||||
/// </summary>
|
||||
/// <param name="FailureThreshold">Consecutive read or probe failures that trip
|
||||
/// the demotion. Default <c>3</c>.</param>
|
||||
/// <param name="DemoteFor">Cool-down window before reads are dispatched again
|
||||
/// without a successful probe in between. Default <c>30s</c>.</param>
|
||||
/// <param name="Enabled">When <c>false</c> the failure tally still ticks but the
|
||||
/// driver never sets the demoted window — useful when an operator wants the
|
||||
/// diagnostic counters without the throttling behaviour.</param>
|
||||
public sealed record AbLegacyDemoteOptions(
|
||||
int FailureThreshold = 3,
|
||||
TimeSpan? DemoteFor = null,
|
||||
bool Enabled = true)
|
||||
{
|
||||
/// <summary>
|
||||
/// Effective demote window. Records can't have <c>TimeSpan</c> defaults
|
||||
/// because <c>TimeSpan.FromSeconds(30)</c> isn't a compile-time constant;
|
||||
/// callers that pass <c>null</c> get the documented 30-second default
|
||||
/// here.
|
||||
/// </summary>
|
||||
public TimeSpan EffectiveDemoteFor => DemoteFor ?? TimeSpan.FromSeconds(30);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// One PCCC-backed OPC UA variable. <c>Address</c> is the canonical PCCC file-address
|
||||
|
||||
@@ -138,6 +138,11 @@ public sealed class HostStatusPublisher(
|
||||
HostState.Running => DriverHostState.Running,
|
||||
HostState.Stopped => DriverHostState.Stopped,
|
||||
HostState.Faulted => DriverHostState.Faulted,
|
||||
// PR ablegacy-12 / #255 — Demoted is a driver-side back-off (skipped reads while
|
||||
// we wait for a flaky host to recover). The Configuration enum doesn't have a
|
||||
// dedicated value; surface it as Stopped so the Admin UI lights it up red-ish
|
||||
// without the publisher needing a schema migration to differentiate.
|
||||
HostState.Demoted => DriverHostState.Stopped,
|
||||
_ => DriverHostState.Unknown,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user