Auto: abcip-5.2 — HSBY failover routing in ResolveHost

Closes #243
This commit is contained in:
Joseph Doherty
2026-04-26 08:13:41 -04:00
parent 258ce8e937
commit 9e157fc8a4
5 changed files with 1031 additions and 42 deletions

View File

@@ -44,6 +44,24 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
private IAddressSpaceBuilder? _cachedBuilder;
private DriverHealth _health = new(DriverState.Unknown, null, null);
// PR abcip-5.2 — failover bookkeeping. Counter is surfaced through driver-diagnostics
// as AbCip.HsbyFailoverCount; the event lets internal subscribers react to an
// ActiveAddress flip without HsbyProbeLoopAsync calling deep into the runtime cache
// directly. The driver subscribes itself in the constructor so cache invalidation +
// write-coalescer reset run inline with the address-change observation.
private long _hsbyFailoverCount;
/// <summary>
/// PR abcip-5.2 — raised by <see cref="HsbyProbeLoopAsync"/> whenever a device's
/// <see cref="DeviceState.ActiveAddress"/> transitions to a value different from
/// the one observed on the previous tick. Args carry the device + the
/// (oldAddress, newAddress) pair so subscribers can decide whether the change
/// matters for them. Internal seam — the driver wires its own runtime-cache /
/// write-coalescer invalidation through this event so the bookkeeping runs in
/// one place + tests can assert via the public diagnostics counter.
/// </summary>
internal event EventHandler<HsbyActiveAddressChangedEventArgs>? OnActiveAddressChanged;
public event EventHandler<DataChangeEventArgs>? OnDataChange;
public event EventHandler<HostStatusChangedEventArgs>? OnHostStatusChanged;
public event EventHandler<AlarmEventArgs>? OnAlarmEvent;
@@ -67,6 +85,12 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
onChange: (handle, tagRef, snapshot) =>
OnDataChange?.Invoke(this, new DataChangeEventArgs(handle, tagRef, snapshot)));
_alarmProjection = new AbCipAlarmProjection(this, _options.AlarmPollInterval);
// PR abcip-5.2 — wire the failover-handling subscriber. Drops every cached per-tag
// / parent-DINT runtime against the now-standby gateway, resets the write-coalescer
// (the prior known-written values were against the standby chassis), clears the
// logical-walk state so the @tags walk reruns against the new active gateway, and
// bumps the diagnostics counter that BuildDiagnostics surfaces.
OnActiveAddressChanged += HandleActiveAddressChanged;
}
/// <summary>
@@ -258,6 +282,13 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
&& !string.IsNullOrWhiteSpace(state.Options.PartnerHostAddress))
{
state.PartnerAddress = state.Options.PartnerHostAddress;
// PR abcip-5.2 — pre-parse the partner address once so the runtime hot
// path can swap (Gateway, Port, CipPath) without re-parsing on every
// ResolveHost / EnsureTagRuntimeAsync call. A bad partner address is a
// hard config error already flagged by HsbyProbeLoopAsync's TryParse +
// OnWarning path, so a TryParse miss here is non-fatal — the runtime
// never resolves to it because PartnerParsedAddress stays null.
state.PartnerParsedAddress = AbCipHostAddress.TryParse(state.Options.PartnerHostAddress!);
state.HsbyCts = new CancellationTokenSource();
var ct = state.HsbyCts.Token;
_ = Task.Run(() => HsbyProbeLoopAsync(state, hsby, ct), ct);
@@ -784,7 +815,28 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
// No chassis Active — clear so PR abcip-5.2's ResolveHost can fault writes.
newActive = null;
}
// PR abcip-5.2 — fire OnActiveAddressChanged on every transition so the
// runtime-cache invalidation handler runs exactly once per flip. We compare
// before assigning so a steady-state tick (Active didn't change) is a no-op.
var prevActive = state.ActiveAddress;
state.ActiveAddress = newActive;
if (!string.Equals(prevActive, newActive, StringComparison.OrdinalIgnoreCase))
{
try
{
OnActiveAddressChanged?.Invoke(this,
new HsbyActiveAddressChangedEventArgs(state, prevActive, newActive));
}
catch (Exception ex)
{
// A handler that throws must never tear the probe loop down. Surface
// the failure through the warning sink + keep ticking; the next flip
// gets another shot at invalidation.
_options.OnWarning?.Invoke(
$"AbCip HSBY active-address-changed handler threw on " +
$"primary='{state.Options.HostAddress}' partner='{partnerAddress}': {ex.Message}");
}
}
try { await Task.Delay(hsby.ProbeInterval, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { break; }
@@ -836,6 +888,46 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
}
}
/// <summary>
/// PR abcip-5.2 — invalidation hook for an HSBY failover. Disposes every cached
/// per-tag / parent-DINT runtime on the device so the next read / write re-creates
/// against the new Active gateway, resets the write-coalescer's per-device cache
/// (the prior known-written values were against the now-standby chassis), wipes
/// the Logical-mode @tags walk so the new chassis gets a fresh symbol-table
/// resolution, and bumps the AbCip.HsbyFailoverCount diagnostic. Idempotent — a
/// re-fire against the same address (e.g. an event handler that races the assign)
/// short-circuits on the RuntimesAddress equality check inside
/// <see cref="EnsureTagRuntimeAsync"/>.
/// </summary>
private void HandleActiveAddressChanged(object? sender, HsbyActiveAddressChangedEventArgs e)
{
var state = e.Device;
// Drop the runtime cache. The runtime creators repopulate against the new active
// gateway on next read/write; the disposed handles' libplctag pointers are
// released so the native heap doesn't leak.
foreach (var rt in state.Runtimes.Values)
{
try { rt.Dispose(); } catch { }
}
state.Runtimes.Clear();
foreach (var rt in state.ParentRuntimes.Values)
{
try { rt.Dispose(); } catch { }
}
state.ParentRuntimes.Clear();
// Reset the @tags symbol-table walk so the new chassis re-fires it on next read;
// the standby chassis's instance IDs don't transfer to the now-Active partner.
state.LogicalInstanceMap.Clear();
state.LogicalWalkComplete = false;
// Reset the write-coalescer so the first post-flip write of any value pays the
// full round-trip and the cache rebuilds from the new baseline.
_writeCoalescer.Reset(state.Options.HostAddress);
// Clear the per-device runtimes-address marker so the next runtime creator stamps
// it with whatever the new ActiveParsedAddress resolves to.
state.RuntimesAddress = null;
Interlocked.Increment(ref _hsbyFailoverCount);
}
private void TransitionDeviceState(DeviceState state, HostState newState)
{
HostState old;
@@ -911,11 +1003,34 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
if (AbCipSystemTagSource.IsSystemReference(fullReference))
{
var host = ExtractSystemDeviceHost(fullReference);
if (host is not null) return host;
if (host is not null) return ResolveActiveHostFor(host);
}
if (_tagsByName.TryGetValue(fullReference, out var def))
return def.DeviceHostAddress;
return _options.Devices.FirstOrDefault()?.HostAddress ?? DriverInstanceId;
return ResolveActiveHostFor(def.DeviceHostAddress);
return ResolveActiveHostFor(_options.Devices.FirstOrDefault()?.HostAddress ?? DriverInstanceId);
}
/// <summary>
/// PR abcip-5.2 — failover-aware bulkhead-key resolver. The configured primary
/// <c>HostAddress</c> stays the device-state lookup key (it never changes for a
/// given device), but the resilience pipeline (Polly bulkhead + breaker per plan
/// decision #144) keys on whatever this method returns. When HSBY is enabled and
/// <see cref="DeviceState.ActiveAddress"/> resolves to the partner, we route the
/// bulkhead through the partner's address so the new active partner gets its own
/// fresh breaker state instead of inheriting the now-standby's tripped breaker.
/// <para>
/// When HSBY isn't enabled or no chassis is Active, returns the original
/// primary host address — that's the legacy pre-5.2 behaviour and keeps the
/// bulkhead state stable for the dial flow's BadCommunicationError surface.
/// </para>
/// </summary>
internal string ResolveActiveHostFor(string deviceHostAddress)
{
if (!_devices.TryGetValue(deviceHostAddress, out var state)) return deviceHostAddress;
if (state.Options.Hsby is not { Enabled: true }) return deviceHostAddress;
var active = state.ActiveAddress;
if (string.IsNullOrEmpty(active)) return deviceHostAddress;
return active;
}
/// <summary>
@@ -1367,10 +1482,12 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
{
sliceLogicalId = sliceId;
}
// PR abcip-5.2 — slice handles also follow the active address.
var sliceActive = device.ActiveParsedAddress;
var baseParams = new AbCipTagCreateParams(
Gateway: device.ParsedAddress.Gateway,
Port: device.ParsedAddress.Port,
CipPath: device.ParsedAddress.CipPath,
Gateway: sliceActive.Gateway,
Port: sliceActive.Port,
CipPath: sliceActive.CipPath,
LibplctagPlcAttribute: device.Profile.LibplctagPlcAttribute,
TagName: parsedPath.ToLibplctagName(),
Timeout: _options.Timeout,
@@ -1439,6 +1556,13 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
throw;
}
device.Runtimes[tagName] = runtime;
// PR abcip-5.2 — keep the slice path's runtime cache lifecycle in lockstep with
// the per-tag handles. The failover handler clears Runtimes wholesale, so the
// address stamp here matches whatever ActiveAddress resolved to when the slice
// params were built (the caller passed createParams pre-resolved).
device.RuntimesAddress = device.Options.Hsby is { Enabled: true }
? device.ActiveAddress ?? device.Options.HostAddress
: device.Options.HostAddress;
return runtime;
}
@@ -1859,10 +1983,13 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
{
parentLogicalId = pid;
}
// PR abcip-5.2 — same active-address routing as EnsureTagRuntimeAsync so
// BOOL-in-DINT RMW handles follow the failover.
var active = device.ActiveParsedAddress;
var runtime = _tagFactory.Create(new AbCipTagCreateParams(
Gateway: device.ParsedAddress.Gateway,
Port: device.ParsedAddress.Port,
CipPath: device.ParsedAddress.CipPath,
Gateway: active.Gateway,
Port: active.Port,
CipPath: active.CipPath,
LibplctagPlcAttribute: device.Profile.LibplctagPlcAttribute,
TagName: parentTagName,
Timeout: _options.Timeout,
@@ -1879,6 +2006,9 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
throw;
}
device.ParentRuntimes[parentTagName] = runtime;
device.RuntimesAddress = device.Options.Hsby is { Enabled: true }
? device.ActiveAddress ?? device.Options.HostAddress
: device.Options.HostAddress;
return runtime;
}
@@ -1906,10 +2036,15 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
logicalId = resolvedId;
}
// PR abcip-5.2 — route through the resolved active address so an HSBY pair that
// failed-over to the partner targets the partner's gateway / port / cip-path.
// When HSBY is off or no chassis is Active the getter returns ParsedAddress and
// behaviour is identical to pre-5.2 builds.
var active = device.ActiveParsedAddress;
var runtime = _tagFactory.Create(new AbCipTagCreateParams(
Gateway: device.ParsedAddress.Gateway,
Port: device.ParsedAddress.Port,
CipPath: device.ParsedAddress.CipPath,
Gateway: active.Gateway,
Port: active.Port,
CipPath: active.CipPath,
LibplctagPlcAttribute: device.Profile.LibplctagPlcAttribute,
TagName: parsed.ToLibplctagName(),
Timeout: _options.Timeout,
@@ -1927,6 +2062,12 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
throw;
}
device.Runtimes[def.Name] = runtime;
// Stamp the per-device runtimes-address marker so the failover handler can detect
// a stale cache. Compared in DEBUG builds + diagnostics; production code routes
// invalidation through OnActiveAddressChanged.
device.RuntimesAddress = device.Options.Hsby is { Enabled: true }
? device.ActiveAddress ?? device.Options.HostAddress
: device.Options.HostAddress;
return runtime;
}
@@ -1951,6 +2092,11 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
["AbCip.WritesPassedThrough"] = _writeCoalescer.TotalWritesPassedThrough,
// PR abcip-4.4 — total _RefreshTagDb truthy writes that dispatched to RebrowseAsync.
["AbCip.RefreshTriggers"] = _systemTagSource.TotalRefreshTriggers,
// PR abcip-5.2 — count of HSBY active-address transitions the probe loop has
// observed. Aggregated across every HSBY-enabled device on this driver
// instance; the per-device breakdown is observable via the per-pair role
// counters below.
["AbCip.HsbyFailoverCount"] = Interlocked.Read(ref _hsbyFailoverCount),
};
// PR abcip-5.1 — HSBY role surface. One <Counter> per HSBY-enabled device:
// AbCip.HsbyActive — 1 if ActiveAddress == primary, 2 if == partner, 0 otherwise.
@@ -2368,6 +2514,49 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
/// </summary>
public string? PartnerAddress { get; set; }
/// <summary>
/// PR abcip-5.2 — parsed form of <see cref="PartnerAddress"/>, populated at init
/// when HSBY is configured. <c>ResolveHost</c>'s caller side keeps using the
/// opaque <see cref="AbCipDeviceOptions.HostAddress"/>; the **runtime hot path**
/// consults <see cref="ActiveParsedAddress"/> so libplctag handles target the
/// currently Active gateway / port / cip-path.
/// </summary>
public AbCipHostAddress? PartnerParsedAddress { get; set; }
/// <summary>
/// PR abcip-5.2 — parsed wire address that per-tag / per-slice / parent-DINT
/// runtimes should be created against right now. Returns <see cref="ParsedAddress"/>
/// (the configured primary) when (a) HSBY isn't enabled, (b) <see cref="ActiveAddress"/>
/// is null (no chassis Active — fall through to the dial flow which will fault
/// with BadCommunicationError on the next wire op), or (c) the active address
/// equals the configured primary host. Returns <see cref="PartnerParsedAddress"/>
/// when the partner is the live chassis. Cheap getter — every tag-runtime
/// creation calls it.
/// </summary>
public AbCipHostAddress ActiveParsedAddress
{
get
{
if (Options.Hsby is not { Enabled: true } || ActiveAddress is null)
return ParsedAddress;
if (PartnerParsedAddress is not null
&& string.Equals(ActiveAddress, PartnerAddress, StringComparison.OrdinalIgnoreCase))
return PartnerParsedAddress;
return ParsedAddress;
}
}
/// <summary>
/// PR abcip-5.2 — address every entry in <see cref="Runtimes"/> +
/// <see cref="ParentRuntimes"/> was created against. <c>null</c> until the first
/// read / write materialises a runtime; set to the resolved active address each
/// time a runtime is created. <see cref="AbCipDriver.HsbyProbeLoopAsync"/>'s
/// active-address-changed callback compares this against the new active and
/// drops every cached handle on mismatch so the next read / write re-creates
/// against the new gateway.
/// </summary>
public string? RuntimesAddress { get; set; }
/// <summary>PR abcip-5.1 — most-recent role observed on the primary chassis.</summary>
public HsbyRole PrimaryRole { get; set; } = HsbyRole.Unknown;
@@ -2420,3 +2609,26 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
}
}
}
/// <summary>
/// PR abcip-5.2 — event payload raised by <see cref="AbCipDriver"/> when the HSBY
/// probe loop observes a transition in <see cref="AbCipDriver.DeviceState.ActiveAddress"/>.
/// Subscribers consume <see cref="OldAddress"/> / <see cref="NewAddress"/> to decide
/// whether to invalidate cached state. <see cref="OldAddress"/> is <c>null</c> on the
/// first transition (driver freshly initialised) and <see cref="NewAddress"/> is
/// <c>null</c> when neither chassis is Active (both Standby / Disqualified / Unknown).
/// </summary>
internal sealed class HsbyActiveAddressChangedEventArgs : EventArgs
{
public AbCipDriver.DeviceState Device { get; }
public string? OldAddress { get; }
public string? NewAddress { get; }
public HsbyActiveAddressChangedEventArgs(
AbCipDriver.DeviceState device, string? oldAddress, string? newAddress)
{
Device = device;
OldAddress = oldAddress;
NewAddress = newAddress;
}
}