diff --git a/docs/Driver.AbCip.Cli.md b/docs/Driver.AbCip.Cli.md index 184a2f4..ca0ec57 100644 --- a/docs/Driver.AbCip.Cli.md +++ b/docs/Driver.AbCip.Cli.md @@ -21,6 +21,7 @@ dotnet run --project src/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Cli -- --help | `-f` / `--family` | `ControlLogix` | ControlLogix / CompactLogix / Micro800 / GuardLogix | | `--timeout-ms` | `5000` | Per-operation timeout | | `--addressing-mode` | `Auto` | `Auto` / `Symbolic` / `Logical` — see [AbCip-Performance §Addressing mode](drivers/AbCip-Performance.md#addressing-mode). `Logical` against Micro800 silently falls back to Symbolic with a warning. | +| `--partner` | _(unset)_ | PR abcip-5.1 — partner gateway URI for a ControlLogix HSBY pair (e.g. `ab://10.0.0.6/1,0`). When set, the driver runs a second role-probe loop against the partner and the [`hsby-status`](#hsby-status--which-chassis-is-active-now) command can surface which chassis is currently Active. See [AbCip-HSBY.md](drivers/AbCip-HSBY.md) for the full guide. | | `--verbose` | off | Serilog debug output | Family ↔ CIP-path cheat sheet: @@ -89,6 +90,33 @@ otopcua-abcip-cli write -g ab://10.0.0.5/1,0 -t StartCommand --type Bool -v true otopcua-abcip-cli subscribe -g ab://10.0.0.5/1,0 -t Motor01_Speed --type Real -i 500 ``` +### `hsby-status` — which chassis is Active now? + +PR abcip-5.1 — read the role tag (`WallClockTime.SyncStatus` by default, +`S:34` for legacy SLC500 / PLC-5 fronts) on a ControlLogix HSBY pair and +print which chassis is currently Active. Requires `--partner`. + +```powershell +otopcua-abcip-cli hsby-status -g ab://10.0.0.5/1,0 --partner ab://10.0.0.6/1,0 + +# Custom role tag (legacy fronts) and more samples +otopcua-abcip-cli hsby-status -g ab://10.0.0.5/1,0 --partner ab://10.0.0.6/1,0 \ + --role-tag S:34 --samples 5 +``` + +| Flag | Default | Purpose | +|---|---|---| +| `--role-tag` | `WallClockTime.SyncStatus` | Address of the role tag. Use `S:34` for SLC500 / PLC-5. | +| `--samples` | `3` | Number of role-probe ticks to wait for before printing. | + +The output prints the resolved roles + the address of whichever chassis the +driver currently considers Active. PR abcip-5.1 only **reports** the role — +PR abcip-5.2 will land the routing change so reads / writes flow to the +Active chassis automatically. + +See [AbCip-HSBY.md](drivers/AbCip-HSBY.md) for the role-tag detection matrix ++ active-resolution rules + the feature-flag gate. + ### `rebrowse` — force a controller-side `@tags` re-walk PR abcip-2.5 (issue #233) added `RebrowseAsync` to drop the cached UDT diff --git a/docs/drivers/AbCip-HSBY.md b/docs/drivers/AbCip-HSBY.md new file mode 100644 index 0000000..436cdfb --- /dev/null +++ b/docs/drivers/AbCip-HSBY.md @@ -0,0 +1,185 @@ +# AbCip — ControlLogix HSBY paired-IP support + +PR abcip-5.1 adds **non-transparent** HSBY (Hot-Standby) awareness to the AB +CIP driver. Each device may declare a partner gateway; when both gateways are +up the driver concurrently probes a role tag on each chassis and reports +which one is currently Active. + +PR abcip-5.1 only **gathers + reports** the role. PR abcip-5.2 is the +follow-up that wires the resolved active address into +`AbCipDriver.ResolveHost` so reads and writes route to whichever chassis is +Active without operator intervention. + +## When to use HSBY paired IPs + +You have a redundant **ControlLogix** chassis pair (1756-RM redundancy +module, two CPUs, one acting + one standby) and the SCADA / OPC UA layer +needs to keep talking to *whichever chassis is currently Active* without an +operator manually re-pointing the connection. + +Pre-5.1 the driver only knew about a single `HostAddress`. After a +hot-standby switch-over, the standby (now Active) carried a **different IP** +and the driver kept probing the dead-but-was-Active address until someone +edited the config. + +PR abcip-5.1 closes the visibility half of that gap by reading the role tag +on both chassis. PR abcip-5.2 closes the routing half by re-pointing +`ResolveHost` at the Active address each tick. + +## Configuration + +```jsonc +{ + "Devices": [ + { + "HostAddress": "ab://10.0.0.5/1,0", + "PartnerHostAddress": "ab://10.0.0.6/1,0", + "Hsby": { + "Enabled": true, + "RoleTagAddress": "WallClockTime.SyncStatus", + "ProbeIntervalMs": 2000 + } + } + ] +} +``` + +| Field | Default | Notes | +|---|---|---| +| `PartnerHostAddress` | `null` | Canonical `ab://gateway[:port]/cip-path` of the partner chassis. `null` = no HSBY pair; the driver behaves exactly like every pre-5.1 build. | +| `Hsby.Enabled` | `false` | Master switch. When `false` (or `Hsby` omitted) no role probing happens, even if `PartnerHostAddress` is set. | +| `Hsby.RoleTagAddress` | `WallClockTime.SyncStatus` | Address of the role tag on each chassis. See [role-tag detection matrix](#role-tag-detection-matrix). | +| `Hsby.ProbeIntervalMs` | `2000` | How often each chassis is sampled. 2 s is a good default — tight enough to detect a switch-over within one Admin-UI refresh, loose enough to leave headroom for the regular probe loop. | + +## Feature-flag gate (`Redundancy.Hsby.Enabled`) + +`Hsby.Enabled = false` (the default) is the off-switch for the entire +feature. The role-probe loop never starts, the diagnostics keys are not +emitted, and the driver behaves identically to a pre-5.1 build. This is the +gate to flip when an operator wants to roll the feature out cautiously +across a fleet — set `Hsby.Enabled = true` per-device in driver config (no +build flag, no env var). + +When the gate is on but the partner gateway is unreachable, the role-probe +loop reports `HsbyRole.Unknown` for the partner each tick. The primary's +role still drives the active-chassis resolution; the operator sees the +partner's role as Unknown in the Admin UI / driver diagnostics, which is the +correct surface for "we can't reach the standby chassis right now." + +## Role-tag detection matrix + +| Firmware / fronts | Address | Decode | +|---|---|---| +| **v20 / v24 / v32+ ControlLogix HSBY** | `WallClockTime.SyncStatus` (DINT) | `0` = Standby, `1` = Synchronized / Active, `2` = Disqualified, anything else = Unknown | +| **PLC-5 / SLC500 status-byte fallback** | `S:34` Module Status word | bit 0 = "this chassis is Active". Bit set → `Active`; clear → `Standby` | +| **Custom user role tag** | any DINT-typed CIP path | Same matrix as `WallClockTime.SyncStatus` (0 / 1 / 2). Out-of-range values → Unknown. | + +`AbCipHsbyRoleProber.MapValueToRole` is the value-to-role mapper; unit tests +in `tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Tests/AbCipHsbyTests.cs` pin every +row of the matrix. + +## What gets reported + +The driver surfaces three diagnostics counters per HSBY-enabled device +(visible via `driver-diagnostics` RPC + the Admin UI): + +| Counter | Value | +|---|---| +| `AbCip.HsbyActive` | `1` if primary is Active, `2` if partner is Active, `0` if neither (or HSBY off) | +| `AbCip.HsbyPrimaryRole` | `(int)HsbyRole` — `0` = Unknown, `1` = Active, `2` = Standby, `3` = Disqualified | +| `AbCip.HsbyPartnerRole` | Same encoding as `HsbyPrimaryRole`, observed on the partner chassis | + +When more than one HSBY pair is configured on the same driver instance the +flat keys are scoped per primary host: `AbCip.HsbyActive[ab://10.0.0.5/1,0]`, +etc. + +The `DeviceState.ActiveAddress` field (internal; surfaced via +`HsbyActive` diagnostics) is the address PR 5.2 will route through +`ResolveHost`. + +### Active-resolution rules + +| Primary role | Partner role | `ActiveAddress` resolution | +|---|---|---| +| Active | Standby / Disqualified / Unknown | primary | +| Standby / Disqualified / Unknown | Active | partner | +| Active | Active (split-brain) | **primary wins**, warning logged | +| Standby + Standby | Standby + Standby | `null` (PR 5.2 will surface as `BadCommunicationError`) | +| Unknown + Unknown | Unknown + Unknown | `null` | + +Split-brain (both chassis claim Active simultaneously) is a real +production failure mode — typically a redundancy-module misconfiguration or +a partial network split. The driver picks primary deterministically + emits +a warning through `AbCipDriverOptions.OnWarning` so operators see it in the +log. + +## CLI flags + +The `otopcua-abcip-cli` tool exposes the HSBY plumbing through two surfaces +(see [Driver.AbCip.Cli.md](../Driver.AbCip.Cli.md) for the full CLI guide): + +- `--partner ` — global flag on every command. Sets + `PartnerHostAddress` + auto-enables `Hsby.Enabled = true` so the role + probe runs alongside any read / write / subscribe. +- `hsby-status` — dedicated command that prints which chassis is + currently Active. Reads the role tag on both gateways for a few ticks + + prints the `(primary, partner, active)` tuple. + +```powershell +# Print which chassis is Active right now +otopcua-abcip-cli hsby-status -g ab://10.0.0.5/1,0 --partner ab://10.0.0.6/1,0 + +# Subscribe through the active chassis (PR 5.2 follow-up — today the +# subscribe stays pointed at the primary; the role probe runs alongside). +otopcua-abcip-cli subscribe -g ab://10.0.0.5/1,0 --partner ab://10.0.0.6/1,0 \ + -t Motor01_Speed --type Real -i 500 +``` + +## Test coverage + +- **Unit** (`tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Tests/AbCipHsbyTests.cs`): + - Pure `MapValueToRole` matrix (WallClockTime.SyncStatus + S:34 bit + mask + Unknown values). + - End-to-end driver loop: primary Active / partner Standby resolves to + primary; both Active resolves to primary with a warning; both + Standby clears `ActiveAddress`; primary read failure routes to + partner. + - Diagnostics surface (`AbCip.HsbyActive` / `HsbyPrimaryRole` / + `HsbyPartnerRole`). + - DTO JSON round-trip (`PartnerHostAddress` + `Hsby.{Enabled, + RoleTagAddress, ProbeIntervalMs}` survive deserialise → driver → + `DeviceState`). + - `Hsby.Enabled = false` → no role probing. +- **Integration** (`tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.IntegrationTests/AbCipHsbyRoleProberTests.cs`): + - **Skipped by default** (`Assert.Skip`) — `ab_server` cannot emulate + a ControlLogix HSBY pair (no `WallClockTime.SyncStatus`, no second + chassis concept). The Docker `paired` profile (PR 5.1) brings up two + `ab_server` instances + a stub `hsby-mux` sidecar so the topology is + documented, but PR 5.2 follow-up needs a patched `ab_server` image + that actually serves the role tag before the integration test can + assert anything against the wire. + - Trait `Category=Hsby` so `dotnet test --filter Category=Hsby` finds + this test once it's promoted. + +## Follow-ups (PR 5.2 + beyond) + +- **PR 5.2** — wire `ActiveAddress` into `ResolveHost` so reads/writes + route to the live chassis automatically. Today's PR only **gathers** the + role. +- **Patched `ab_server` image** — add a writable `WallClockTime.SyncStatus` + tag (or a separate Python shim) so the Docker `paired` profile can + exercise the wire-level role probe. +- **`hsby-mux` REST endpoint** — `POST /flip {"active": "primary"}` writes + `1` to the chosen chassis + `0` to the other so integration tests can + drive switch-overs deterministically. +- **GuardLogix HSBY** — same role-tag plumbing applies; verify against a + real 1756-L8xS pair when one is on-site. + +## See also + +- [`docs/Driver.AbCip.Cli.md`](../Driver.AbCip.Cli.md) — `--partner` flag + + `hsby-status` command reference +- [`docs/drivers/AbServer-Test-Fixture.md`](AbServer-Test-Fixture.md) §"What + it does NOT cover" — HSBY entry +- [`docs/Redundancy.md`](../Redundancy.md) — server-level (OPC UA-stack) + redundancy; HSBY is the **driver-level** companion diff --git a/docs/drivers/AbServer-Test-Fixture.md b/docs/drivers/AbServer-Test-Fixture.md index 9f7d6ad..2c55a65 100644 --- a/docs/drivers/AbServer-Test-Fixture.md +++ b/docs/drivers/AbServer-Test-Fixture.md @@ -160,6 +160,28 @@ The driver implements all of these + they have unit coverage, but the only end-to-end paths `ab_server` validates today are atomic `ReadAsync` and write-deadband / write-on-change suppression. +### 8. ControlLogix HSBY paired-IP role probing (PR abcip-5.1) + +`ab_server` has no second-chassis concept and no `WallClockTime.SyncStatus` +tag. The HSBY paired-IP role-prober (PR abcip-5.1) is unit-tested only — +`AbCipHsbyTests` drives two fake runtimes (primary + partner), pins each +chassis's role-tag value, and asserts the active-resolution rules + DTO +round-trip + diagnostics surface. + +The `paired` Docker compose profile spins up two `ab_server` instances + +a stub `hsby-mux` sidecar so the topology is documented, but PR 5.2 follow- +up needs a patched `ab_server` image (or a Python shim) that actually +serves the role tag before the integration test +(`AbCipHsbyRoleProberTests`) can flip its `Assert.Skip` into a real wire +assertion. Until then the test is gated on `Category=Hsby` + skipped by +default. + +Lab-rig coverage is the authoritative path — a real 1756-RM redundant +chassis pair is the only place the live `WallClockTime.SyncStatus` matrix ++ split-brain handling can be exercised end-to-end. See +[`AbCip-HSBY.md`](AbCip-HSBY.md) for the full configuration + role-tag +detection matrix. + ## Logix Emulate golden-box tier Rockwell Studio 5000 Logix Emulate sits **above** ab_server in fidelity + diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Cli/AbCipCommandBase.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Cli/AbCipCommandBase.cs index 27f7d89..8e78af0 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Cli/AbCipCommandBase.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Cli/AbCipCommandBase.cs @@ -40,6 +40,18 @@ public abstract class AbCipCommandBase : DriverCommandBase "walk; unsupported on Micro800 (silent fallback to Symbolic with warning).")] public AddressingMode AddressingMode { get; init; } = AddressingMode.Auto; + /// + /// PR abcip-5.1 — partner gateway URI for HSBY (Hot-Standby) paired chassis. When + /// supplied, every CLI command auto-enables HSBY role probing on the device options + /// so subcommands like hsby-status + diagnostics surface the active chassis + /// without extra flags. Unset for non-redundant deployments. + /// + [CommandOption("partner", Description = + "Partner gateway URI for ControlLogix HSBY pair (e.g. ab://10.0.0.6/1,0). When " + + "set, the driver runs a second role-probe loop and the hsby-status command can " + + "surface which chassis is currently Active. Optional.")] + public string? Partner { get; init; } + /// public override TimeSpan Timeout { @@ -58,7 +70,17 @@ public abstract class AbCipCommandBase : DriverCommandBase HostAddress: Gateway, PlcFamily: Family, DeviceName: $"cli-{Family}", - AddressingMode: AddressingMode)], + AddressingMode: AddressingMode, + // PR abcip-5.1 — surface --partner through the device options so commands that + // use BuildOptions can take advantage of HSBY role probing without subclassing. + // Hsby auto-enables only when a partner was actually supplied; pre-5.1 invocations + // (no --partner) see exactly the legacy options shape. + PartnerHostAddress: Partner, + Hsby: string.IsNullOrWhiteSpace(Partner) ? null : new AbCipHsbyOptions + { + Enabled = true, + ProbeInterval = TimeSpan.FromSeconds(2), + })], Tags = tags, Timeout = Timeout, Probe = new AbCipProbeOptions { Enabled = false }, diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Cli/Commands/HsbyStatusCommand.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Cli/Commands/HsbyStatusCommand.cs new file mode 100644 index 0000000..369ee1b --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Cli/Commands/HsbyStatusCommand.cs @@ -0,0 +1,103 @@ +using CliFx.Attributes; +using CliFx.Infrastructure; + +namespace ZB.MOM.WW.OtOpcUa.Driver.AbCip.Cli.Commands; + +/// +/// PR abcip-5.1 — print the current HSBY role on each chassis of a paired ControlLogix +/// ControlLogix Hot-Standby setup. Requires --partner on the base command + +/// reads WallClockTime.SyncStatus on both gateways once before printing. +/// +[Command("hsby-status", Description = + "Read the WallClockTime.SyncStatus role tag on a ControlLogix HSBY pair and print " + + "which chassis is currently Active. Requires --partner.")] +public sealed class HsbyStatusCommand : AbCipCommandBase +{ + [CommandOption("role-tag", Description = + "Role-tag address. Default WallClockTime.SyncStatus matches v20+ ControlLogix HSBY; " + + "use S:34 for legacy SLC500 / PLC-5 status-byte fronts.")] + public string RoleTagAddress { get; init; } = "WallClockTime.SyncStatus"; + + [CommandOption("samples", Description = + "Number of role-probe ticks to wait for before printing (default 3). Larger values " + + "give the role-prober loop more chances to sample both chassis through transient " + + "transport hiccups.")] + public int Samples { get; init; } = 3; + + public override async ValueTask ExecuteAsync(IConsole console) + { + ConfigureLogging(); + var ct = console.RegisterCancellationHandler(); + + if (string.IsNullOrWhiteSpace(Partner)) + { + await console.Error.WriteLineAsync( + "hsby-status requires --partner . Without a partner the " + + "command has no second chassis to compare roles against."); + return; + } + + // Override the base BuildOptions so we can pin the role-tag address + a tight probe + // interval — the default 2 s would mean Samples * 2 s before the print fires, too slow + // for an interactive CLI. Tag list stays empty; only the role probe runs. + var options = new AbCipDriverOptions + { + Devices = [new AbCipDeviceOptions( + HostAddress: Gateway, + PlcFamily: Family, + DeviceName: $"cli-{Family}", + AddressingMode: AddressingMode, + PartnerHostAddress: Partner, + Hsby: new AbCipHsbyOptions + { + Enabled = true, + RoleTagAddress = RoleTagAddress, + ProbeInterval = TimeSpan.FromMilliseconds(500), + })], + Tags = [], + Timeout = Timeout, + Probe = new AbCipProbeOptions { Enabled = false }, + EnableControllerBrowse = false, + EnableAlarmProjection = false, + }; + + await using var driver = new AbCipDriver(options, DriverInstanceId); + try + { + await driver.InitializeAsync("{}", ct); + + // Wait Samples * ProbeInterval so the role probe has had time to sample each + // chassis at least times. The role probe loop spins inside the driver; + // we just sleep + read GetDeviceState's ActiveAddress. + await Task.Delay(TimeSpan.FromMilliseconds(500 * Math.Max(1, Samples)), ct); + + // Pull HSBY state out via DriverHealth.Diagnostics. Single-pair config emits + // the flat AbCip.HsbyActive / AbCip.HsbyPrimaryRole / AbCip.HsbyPartnerRole keys. + var diag = driver.GetHealth().Diagnostics + ?? new Dictionary(); + var primaryRole = diag.TryGetValue("AbCip.HsbyPrimaryRole", out var pr) + ? (HsbyRole)(int)pr : HsbyRole.Unknown; + var partnerRole = diag.TryGetValue("AbCip.HsbyPartnerRole", out var qr) + ? (HsbyRole)(int)qr : HsbyRole.Unknown; + var activeCode = diag.TryGetValue("AbCip.HsbyActive", out var ac) ? (int)ac : 0; + var activeAddress = activeCode switch + { + 1 => Gateway, + 2 => Partner, + _ => null, + }; + + await console.Output.WriteLineAsync($"Primary: {Gateway}"); + await console.Output.WriteLineAsync($"Partner: {Partner}"); + await console.Output.WriteLineAsync($"Role tag: {RoleTagAddress}"); + await console.Output.WriteLineAsync(); + await console.Output.WriteLineAsync($"Primary role: {primaryRole}"); + await console.Output.WriteLineAsync($"Partner role: {partnerRole}"); + await console.Output.WriteLineAsync($"Active chassis: {activeAddress ?? ""}"); + } + finally + { + await driver.ShutdownAsync(CancellationToken.None); + } + } +} diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriver.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriver.cs index 16ea093..9de6e76 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriver.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriver.cs @@ -247,6 +247,22 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery, _ = Task.Run(() => ProbeLoopAsync(state, ct), ct); } } + // PR abcip-5.1 — HSBY role-probe loops. Independent of the connectivity-probe loop + // above; one role-prober task per (primary, partner) pair. Disabled by default; an + // operator opts in by setting Hsby.Enabled = true + PartnerHostAddress on the + // device options. The probe reads WallClockTime.SyncStatus (or S:34) on each + // chassis + updates DeviceState.PrimaryRole / PartnerRole / ActiveAddress. + foreach (var state in _devices.Values) + { + if (state.Options.Hsby is { Enabled: true } hsby + && !string.IsNullOrWhiteSpace(state.Options.PartnerHostAddress)) + { + state.PartnerAddress = state.Options.PartnerHostAddress; + state.HsbyCts = new CancellationTokenSource(); + var ct = state.HsbyCts.Token; + _ = Task.Run(() => HsbyProbeLoopAsync(state, hsby, ct), ct); + } + } _health = new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null); } catch (Exception ex) @@ -424,6 +440,10 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery, try { state.ProbeCts?.Cancel(); } catch { } state.ProbeCts?.Dispose(); state.ProbeCts = null; + // PR abcip-5.1 — also tear down the HSBY role-probe loop if one is running. + try { state.HsbyCts?.Cancel(); } catch { } + state.HsbyCts?.Dispose(); + state.HsbyCts = null; state.DisposeHandles(); } _devices.Clear(); @@ -644,6 +664,178 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery, try { probeRuntime?.Dispose(); } catch { } } + /// + /// PR abcip-5.1 — HSBY role-probe loop. Concurrently reads the configured role tag + /// (default WallClockTime.SyncStatus) on the primary chassis (the device's own + /// ) and on the partner address (parsed from + /// ), maps each via + /// , and updates the device's + /// / / + /// . + /// + /// Active-resolution rules: + /// + /// Primary , partner not Active → ActiveAddress = primary. + /// Partner Active, primary not Active → ActiveAddress = partner. + /// Both Active → primary wins (warns via sink). + /// Neither Active (Standby / Disqualified / Unknown) → ActiveAddress = null + /// so PR abcip-5.2's ResolveHost can surface BadCommunicationError. + /// + /// PR abcip-5.1 only **gathers** the role + reports it through driver diagnostics. + /// PR abcip-5.2 will plumb the resolved active address back into + /// for live read/write routing. + /// + /// + private async Task HsbyProbeLoopAsync(DeviceState state, AbCipHsbyOptions hsby, CancellationToken ct) + { + var partnerAddress = state.Options.PartnerHostAddress; + if (string.IsNullOrWhiteSpace(partnerAddress)) return; + + var partnerParsed = AbCipHostAddress.TryParse(partnerAddress); + if (partnerParsed is null) + { + _options.OnWarning?.Invoke( + $"AbCip device '{state.Options.HostAddress}' has invalid PartnerHostAddress " + + $"'{partnerAddress}' — expected 'ab://gateway[:port]/cip-path'. HSBY role probing disabled."); + return; + } + + // Per-chassis runtime params. Both chassis share the device's family / ConnectionSize + // / addressing-mode resolution so the role-tag read uses the same wire conventions as + // a regular tag read on either side. + var primaryParams = new AbCipTagCreateParams( + Gateway: state.ParsedAddress.Gateway, + Port: state.ParsedAddress.Port, + CipPath: state.ParsedAddress.CipPath, + LibplctagPlcAttribute: state.Profile.LibplctagPlcAttribute, + TagName: hsby.RoleTagAddress, + Timeout: _options.Probe.Timeout, + ConnectionSize: state.ConnectionSize, + AddressingMode: AddressingMode.Symbolic); + var partnerParams = primaryParams with + { + Gateway = partnerParsed.Gateway, + Port = partnerParsed.Port, + CipPath = partnerParsed.CipPath, + }; + + IAbCipTagRuntime? primaryRuntime = null; + IAbCipTagRuntime? partnerRuntime = null; + var primaryInitialized = false; + var partnerInitialized = false; + + try + { + while (!ct.IsCancellationRequested) + { + var primaryRoleTask = ProbeOneAsync( + primaryParams, + () => primaryRuntime, + rt => primaryRuntime = rt, + () => primaryInitialized, + v => primaryInitialized = v, + hsby.RoleTagAddress, + ct); + var partnerRoleTask = ProbeOneAsync( + partnerParams, + () => partnerRuntime, + rt => partnerRuntime = rt, + () => partnerInitialized, + v => partnerInitialized = v, + hsby.RoleTagAddress, + ct); + + HsbyRole primaryRole, partnerRole; + try + { + var roles = await Task.WhenAll(primaryRoleTask, partnerRoleTask).ConfigureAwait(false); + primaryRole = roles[0]; + partnerRole = roles[1]; + } + catch (OperationCanceledException) when (ct.IsCancellationRequested) + { + break; + } + + state.PrimaryRole = primaryRole; + state.PartnerRole = partnerRole; + + string? newActive; + if (primaryRole == HsbyRole.Active && partnerRole == HsbyRole.Active) + { + // Split-brain — both chassis claim Active. Primary wins (deterministic + // tie-break) + we shout via the warning sink so operators see it. + _options.OnWarning?.Invoke( + $"AbCip HSBY split-brain detected on pair " + + $"primary='{state.Options.HostAddress}' partner='{partnerAddress}' — both " + + $"chassis report Active; routing to primary."); + newActive = state.Options.HostAddress; + } + else if (primaryRole == HsbyRole.Active) + { + newActive = state.Options.HostAddress; + } + else if (partnerRole == HsbyRole.Active) + { + newActive = partnerAddress; + } + else + { + // No chassis Active — clear so PR abcip-5.2's ResolveHost can fault writes. + newActive = null; + } + state.ActiveAddress = newActive; + + try { await Task.Delay(hsby.ProbeInterval, ct).ConfigureAwait(false); } + catch (OperationCanceledException) { break; } + } + } + finally + { + try { primaryRuntime?.Dispose(); } catch { } + try { partnerRuntime?.Dispose(); } catch { } + } + + async Task ProbeOneAsync( + AbCipTagCreateParams createParams, + Func get, + Action set, + Func getInit, + Action setInit, + string roleTagAddress, + CancellationToken token) + { + try + { + var rt = get(); + if (rt is null) + { + rt = _tagFactory.Create(createParams); + set(rt); + } + if (!getInit()) + { + await rt.InitializeAsync(token).ConfigureAwait(false); + setInit(true); + } + return await AbCipHsbyRoleProber.ProbeAsync(rt, roleTagAddress, token).ConfigureAwait(false); + } + catch (OperationCanceledException) when (token.IsCancellationRequested) + { + throw; + } + catch + { + // Tear down so the next tick re-creates the runtime; this matches the regular + // probe loop's recovery pattern. + try { get()?.Dispose(); } catch { } + set(null); + setInit(false); + return HsbyRole.Unknown; + } + } + } + private void TransitionDeviceState(DeviceState state, HostState newState) { HostState old; @@ -1751,13 +1943,53 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery, /// counters (Forward Open count, multi-service-packet ratio, etc.) by extending this /// dictionary. /// - private IReadOnlyDictionary BuildDiagnostics() => new Dictionary + private IReadOnlyDictionary BuildDiagnostics() { - ["AbCip.WritesSuppressed"] = _writeCoalescer.TotalWritesSuppressed, - ["AbCip.WritesPassedThrough"] = _writeCoalescer.TotalWritesPassedThrough, - // PR abcip-4.4 — total _RefreshTagDb truthy writes that dispatched to RebrowseAsync. - ["AbCip.RefreshTriggers"] = _systemTagSource.TotalRefreshTriggers, - }; + var dict = new Dictionary + { + ["AbCip.WritesSuppressed"] = _writeCoalescer.TotalWritesSuppressed, + ["AbCip.WritesPassedThrough"] = _writeCoalescer.TotalWritesPassedThrough, + // PR abcip-4.4 — total _RefreshTagDb truthy writes that dispatched to RebrowseAsync. + ["AbCip.RefreshTriggers"] = _systemTagSource.TotalRefreshTriggers, + }; + // PR abcip-5.1 — HSBY role surface. One per HSBY-enabled device: + // AbCip.HsbyActive — 1 if ActiveAddress == primary, 2 if == partner, 0 otherwise. + // AbCip.HsbyPrimaryRole — most-recent (HsbyRole)int observed on the primary. + // AbCip.HsbyPartnerRole — most-recent (HsbyRole)int observed on the partner. + // The single-driver case (one HSBY pair) collapses these to flat keys; multi-pair + // configurations get scoped keys per host so the Admin UI can render each pair. + var hsbyDevices = _devices.Values + .Where(d => d.Options.Hsby is { Enabled: true } && !string.IsNullOrWhiteSpace(d.Options.PartnerHostAddress)) + .ToList(); + if (hsbyDevices.Count == 1) + { + var d = hsbyDevices[0]; + dict["AbCip.HsbyActive"] = HsbyActiveCode(d); + dict["AbCip.HsbyPrimaryRole"] = (int)d.PrimaryRole; + dict["AbCip.HsbyPartnerRole"] = (int)d.PartnerRole; + } + else + { + foreach (var d in hsbyDevices) + { + var key = d.Options.HostAddress; + dict[$"AbCip.HsbyActive[{key}]"] = HsbyActiveCode(d); + dict[$"AbCip.HsbyPrimaryRole[{key}]"] = (int)d.PrimaryRole; + dict[$"AbCip.HsbyPartnerRole[{key}]"] = (int)d.PartnerRole; + } + } + return dict; + + static double HsbyActiveCode(DeviceState d) + { + if (d.ActiveAddress is null) return 0; + if (string.Equals(d.ActiveAddress, d.Options.HostAddress, StringComparison.OrdinalIgnoreCase)) + return 1; + if (string.Equals(d.ActiveAddress, d.PartnerAddress, StringComparison.OrdinalIgnoreCase)) + return 2; + return 0; + } + } /// /// Test seam — exposes the live coalescer for unit tests that want to inspect counters @@ -2120,6 +2352,31 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery, public CancellationTokenSource? ProbeCts { get; set; } public bool ProbeInitialized { get; set; } + /// + /// PR abcip-5.1 — currently active chassis address in an HSBY pair, or + /// null when (a) HSBY isn't configured for this device or (b) neither + /// chassis returned on the latest probe tick. + /// PR abcip-5.2 will consult this in to + /// route reads / writes; PR 5.1 only reports it through driver diagnostics. + /// + public string? ActiveAddress { get; set; } + + /// + /// PR abcip-5.1 — partner chassis address pulled from + /// at init. null when + /// HSBY isn't configured. + /// + public string? PartnerAddress { get; set; } + + /// PR abcip-5.1 — most-recent role observed on the primary chassis. + public HsbyRole PrimaryRole { get; set; } = HsbyRole.Unknown; + + /// PR abcip-5.1 — most-recent role observed on the partner chassis. + public HsbyRole PartnerRole { get; set; } = HsbyRole.Unknown; + + /// PR abcip-5.1 — cancellation source for the HSBY probe loop. Disposed at shutdown. + public CancellationTokenSource? HsbyCts { get; set; } + /// /// PR abcip-4.3 — wall-clock duration of the most recent /// iteration that touched any tag on this device, in milliseconds. Surfaces as diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriverFactoryExtensions.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriverFactoryExtensions.cs index 24313ab..38ea3b7 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriverFactoryExtensions.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriverFactoryExtensions.cs @@ -43,7 +43,15 @@ public static class AbCipDriverFactoryExtensions "AddressingMode", fallback: AddressingMode.Auto), ReadStrategy: ParseEnum(d.ReadStrategy, "device", driverInstanceId, "ReadStrategy", fallback: ReadStrategy.Auto), - MultiPacketSparsityThreshold: d.MultiPacketSparsityThreshold ?? 0.25))] + MultiPacketSparsityThreshold: d.MultiPacketSparsityThreshold ?? 0.25, + // PR abcip-5.1 — HSBY paired-IP knobs. Both null / absent = no HSBY. + PartnerHostAddress: d.PartnerHostAddress, + Hsby: d.Hsby is null ? null : new AbCipHsbyOptions + { + Enabled = d.Hsby.Enabled ?? false, + RoleTagAddress = d.Hsby.RoleTagAddress ?? "WallClockTime.SyncStatus", + ProbeInterval = TimeSpan.FromMilliseconds(d.Hsby.ProbeIntervalMs ?? 2_000), + }))] : [], Tags = dto.Tags is { Count: > 0 } ? [.. dto.Tags.Select(t => BuildTag(t, driverInstanceId))] @@ -163,6 +171,32 @@ public static class AbCipDriverFactoryExtensions /// resolves to Auto. Default 0.25; clamped to [0..1]. /// public double? MultiPacketSparsityThreshold { get; init; } + + /// + /// PR abcip-5.1 — canonical AB CIP gateway URI of the partner chassis in a + /// ControlLogix HSBY pair. null = no HSBY partner; the driver behaves + /// exactly like every pre-5.1 build. When set together with + /// .Enabled = true, the driver runs a second probe loop + /// against the partner + reports the active chassis through driver diagnostics. + /// + public string? PartnerHostAddress { get; init; } + + /// + /// PR abcip-5.1 — HSBY (Hot-Standby) sub-options. Defaults to + /// Enabled = false when omitted; pre-5.1 deployments are unaffected. + /// + public AbCipHsbyDto? Hsby { get; init; } + } + + /// + /// PR abcip-5.1 — JSON-mirror of . Off by default; enabled + /// by setting Enabled = true + the parent device's PartnerHostAddress. + /// + internal sealed class AbCipHsbyDto + { + public bool? Enabled { get; init; } + public string? RoleTagAddress { get; init; } + public int? ProbeIntervalMs { get; init; } } internal sealed class AbCipTagDto diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriverOptions.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriverOptions.cs index 08ae38f..ca3ad4d 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriverOptions.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriverOptions.cs @@ -151,6 +151,23 @@ public sealed class AbCipDriverOptions /// where the wire-cost of one whole-UDT read still beats N member reads on ControlLogix's /// 4002-byte connection size; see docs/drivers/AbCip-Performance.md §"Read strategy". /// Clamped to [0..1] at planner time; values outside the range silently saturate. +/// PR abcip-5.1 — optional canonical AB CIP gateway URI of the +/// partner chassis in a ControlLogix HSBY (Hot-Standby) pair. When set together with +/// .Enabled = true, the driver runs a second probe loop against +/// this partner address + uses the configured role tag (default +/// WallClockTime.SyncStatus, fall-back S:34 for PLC-5 / SLC-style fronts) to +/// determine which chassis is currently Active. PR abcip-5.1 only **discovers + reports** +/// the active chassis through driver diagnostics; PR abcip-5.2 is the follow-up that wires +/// the resolved active address into for live read / +/// write routing. null = no HSBY partner; the driver behaves exactly like every +/// pre-5.1 build. +/// PR abcip-5.1 — HSBY (Hot-Standby) sub-options. Defaults to +/// Enabled = false so back-compat deployments that don't set +/// see no behaviour change. +/// gates the second probe loop + role-tag read; +/// picks WallClockTime.SyncStatus (v20+ ControlLogix) vs S:34 (legacy +/// SLC500 / PLC-5 status byte fallback); +/// controls the role-tag poll cadence. public sealed record AbCipDeviceOptions( string HostAddress, AbCipPlcFamily PlcFamily = AbCipPlcFamily.ControlLogix, @@ -158,7 +175,52 @@ public sealed record AbCipDeviceOptions( int? ConnectionSize = null, AddressingMode AddressingMode = AddressingMode.Auto, ReadStrategy ReadStrategy = ReadStrategy.Auto, - double MultiPacketSparsityThreshold = 0.25); + double MultiPacketSparsityThreshold = 0.25, + string? PartnerHostAddress = null, + AbCipHsbyOptions? Hsby = null); + +/// +/// PR abcip-5.1 — HSBY (Hot-Standby) per-device options. Off by default. When +/// = true + the device sets +/// , the driver runs two probe loops +/// concurrently — primary + the partner — +/// reads the configured role tag on each, and reports which chassis is Active through +/// driver diagnostics (AbCip.HsbyActive, AbCip.HsbyPrimaryRole, +/// AbCip.HsbyPartnerRole). PR abcip-5.2 is the follow-up that wires the resolved +/// active address back into for live read / write +/// routing — 5.1 just gathers the role. +/// +/// +/// Role-tag detection matrix: +/// +/// v20 / v24 / v32+ ControlLogix HSBYWallClockTime.SyncStatus +/// (DINT). Values: 0 = Standby (Synchronized but not Active), +/// 1 = Synchronized / Active (active chassis), 2 = Disqualified. +/// PLC-5 / SLC500 fallbackS:34 Module Status word (PLC-5 has a +/// role bit in word 34 of the status file). Bit 0 = "this chassis is Active". This +/// is the legacy fallback for sites that haven't migrated to ControlLogix HSBY. +/// +/// +public sealed record AbCipHsbyOptions +{ + /// Master switch. Default false — no role probing, no second probe loop. + public bool Enabled { get; init; } + + /// + /// Address of the role tag the driver reads on each probe tick. Default + /// WallClockTime.SyncStatus matches v20+ ControlLogix HSBY firmware. Legacy + /// PLC-5 / SLC500 fronts that expose a status-file role bit pass S:34 here + + /// the role prober applies the bit-mask interpretation automatically. + /// + public string RoleTagAddress { get; init; } = "WallClockTime.SyncStatus"; + + /// + /// Cadence the HSBY role probe ticks at. Default 2 seconds — tight enough to detect + /// a manual switch-over within one Admin-UI refresh, loose enough to leave headroom + /// for the regular probe loop on the same gateway. + /// + public TimeSpan ProbeInterval { get; init; } = TimeSpan.FromSeconds(2); +} /// /// PR abcip-3.3 — per-device strategy for reading multi-member UDT batches. diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipHsbyRoleProber.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipHsbyRoleProber.cs new file mode 100644 index 0000000..bd50ee9 --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipHsbyRoleProber.cs @@ -0,0 +1,124 @@ +namespace ZB.MOM.WW.OtOpcUa.Driver.AbCip; + +/// +/// PR abcip-5.1 — resolved HSBY role for one chassis in a ControlLogix Hot-Standby pair. +/// covers "couldn't read the role tag" (transport failure, tag not +/// found, decode failure); the driver treats it as "no information yet, don't change +/// ActiveAddress" rather than as a vote for Standby. +/// +public enum HsbyRole +{ + /// Read failed or value was not decodable. Surface as "no information". + Unknown = 0, + + /// Chassis is the active member of the HSBY pair (Synchronized + serving I/O). + Active = 1, + + /// Chassis is the standby member — Synchronized but not driving I/O. + Standby = 2, + + /// Chassis has been disqualified by the HSBY module (e.g. firmware mismatch). + Disqualified = 3, +} + +/// +/// PR abcip-5.1 — reads a ControlLogix HSBY role tag from one chassis and maps the value +/// to . Two address formats are supported: +/// +/// v20 / v24 / v32+ ControlLogix HSBYWallClockTime.SyncStatus +/// (DINT-typed). Values: 0 = Standby, 1 = Synchronized / Active, +/// 2 = Disqualified. Other values map to . +/// PLC-5 / SLC500 fallbackS:34 Module Status word. Bit 0 of the +/// integer value indicates "this chassis is Active"; the prober applies the +/// bit-mask interpretation when the address starts with "S:" + maps +/// (value & 1) == 1 → Active, otherwise → Standby. +/// +/// Read failure (initialise / read throw, non-zero libplctag status, undecodable buffer) +/// returns — callers (the driver's HSBY probe loop) +/// interpret Unknown as "leave ActiveAddress alone for this tick". +/// +/// +/// The prober is stateless / static — the per-chassis runtime is provided by +/// + drives initialise / read on the runtime +/// before delegating to . Keeping the value-mapping logic isolated +/// here lets unit tests assert the matrix (0 / 1 / 2 / S:34 bit 0 / unknown values) without +/// standing up a probe loop. +/// +public static class AbCipHsbyRoleProber +{ + /// + /// Read on + map the + /// decoded value to a . The runtime is already initialised by + /// the caller ( shares the same lazy-init + /// pattern with the regular probe loop); this method only issues the read + decodes. + /// + public static async Task ProbeAsync( + IAbCipTagRuntime runtime, string roleTagAddress, CancellationToken cancellationToken) + { + ArgumentNullException.ThrowIfNull(runtime); + ArgumentException.ThrowIfNullOrWhiteSpace(roleTagAddress); + try + { + await runtime.ReadAsync(cancellationToken).ConfigureAwait(false); + if (runtime.GetStatus() != 0) return HsbyRole.Unknown; + var raw = runtime.DecodeValue(AbCipDataType.DInt, bitIndex: null); + return MapValueToRole(raw, roleTagAddress); + } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + throw; + } + catch + { + // Wire / init / decode failure — surface as Unknown so the caller doesn't + // misinterpret a transient transport hiccup as "this chassis went Standby". + return HsbyRole.Unknown; + } + } + + /// + /// Pure value-to-role mapper. Exposed for unit tests so the matrix assertions can run + /// without a runtime in scope. is the production entry point. + /// + public static HsbyRole MapValueToRole(object? raw, string roleTagAddress) + { + if (raw is null) return HsbyRole.Unknown; + if (!TryToInt64(raw, out var value)) return HsbyRole.Unknown; + + // PLC-5 / SLC500 status-file fallback — bit 0 of S:34 is the role bit. Pattern-match + // on the "S:" prefix because operators do put the file number after it (S:34, S:2, + // etc) + the role bit lives in S:34 specifically on PLC-5 fronts but the bit-mask + // semantics apply to any S:NN address an integration plumbs in. + if (roleTagAddress.StartsWith("S:", StringComparison.OrdinalIgnoreCase)) + return (value & 1) == 1 ? HsbyRole.Active : HsbyRole.Standby; + + // Default — WallClockTime.SyncStatus matrix (v20 / v24 / v32+ ControlLogix HSBY). + return value switch + { + 0 => HsbyRole.Standby, + 1 => HsbyRole.Active, + 2 => HsbyRole.Disqualified, + _ => HsbyRole.Unknown, + }; + } + + private static bool TryToInt64(object raw, out long value) + { + switch (raw) + { + case long l: value = l; return true; + case int i: value = i; return true; + case short s: value = s; return true; + case sbyte sb: value = sb; return true; + case byte b: value = b; return true; + case ushort us: value = us; return true; + case uint ui: value = ui; return true; + case ulong ul when ul <= long.MaxValue: value = (long)ul; return true; + case bool boolean: value = boolean ? 1 : 0; return true; + case string str when long.TryParse(str, System.Globalization.NumberStyles.Integer, + System.Globalization.CultureInfo.InvariantCulture, out var parsed): + value = parsed; return true; + default: value = 0; return false; + } + } +} diff --git a/tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.IntegrationTests/AbCipHsbyRoleProberTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.IntegrationTests/AbCipHsbyRoleProberTests.cs new file mode 100644 index 0000000..9d0ffd1 --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.IntegrationTests/AbCipHsbyRoleProberTests.cs @@ -0,0 +1,35 @@ +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Driver.AbCip; + +namespace ZB.MOM.WW.OtOpcUa.Driver.AbCip.IntegrationTests; + +/// +/// PR abcip-5.1 — integration scaffold for HSBY paired-IP role probing. Skipped by default +/// because ab_server cannot emulate a ControlLogix HSBY pair (it has no second-chassis +/// concept + no WallClockTime.SyncStatus tag). Promoted from skipped to active when +/// the Docker fixture grows the hsby-mux sidecar (planned in PR abcip-5.2 follow-up +/// work) or when a real lab rig is available; the unit-level coverage in +/// AbCipHsbyTests exercises the value-mapping + active-resolution rules in the +/// meantime. +/// +/// The skip lives in the test body so the file still compiles + the trait is discoverable +/// by dotnet test --filter "Category=Hsby"; the body never gets to assert anything +/// against ab_server. +/// +/// +[Trait("Category", "Hsby")] +[Trait("Requires", "AbServer")] +public sealed class AbCipHsbyRoleProberTests +{ + [AbServerFact] + public Task Role_prober_resolves_active_chassis_against_paired_fixture() + { + // ab_server cannot emulate an HSBY pair; the paired-fixture compose service + + // hsby-mux sidecar that PR abcip-5.2 ships will let this body do real wire work. + // For PR abcip-5.1 we keep the file as a scaffold so the integration trait is + // discoverable and a future PR can flip the skip into a real assertion. + Assert.Skip("HSBY paired-fixture (controllogix-secondary + hsby-mux sidecar) not yet wired — PR abcip-5.2 follow-up."); + return Task.CompletedTask; + } +} diff --git a/tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.IntegrationTests/Docker/docker-compose.yml b/tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.IntegrationTests/Docker/docker-compose.yml index a9b56f2..a3c044c 100644 --- a/tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.IntegrationTests/Docker/docker-compose.yml +++ b/tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.IntegrationTests/Docker/docker-compose.yml @@ -95,3 +95,81 @@ services: "--tag=TestDINT:DINT[1]", "--tag=SafetyDINT_S:DINT[1]" ] + + # ---- PR abcip-5.1 — paired-fixture for HSBY role probing ------------------ + # The "paired" profile spins up two ab_server instances (controllogix-primary + # on :44818, controllogix-secondary on :44819) plus a stub hsby-mux sidecar + # that flips a role bit on demand. The mux is a placeholder — it does NOT + # currently inject role bits because ab_server has no WallClockTime.SyncStatus + # tag concept. PR abcip-5.2 follow-up will land: + # 1. A patched ab_server image (or a separate Python TCP shim) that exposes + # a writable WallClockTime.SyncStatus DINT per chassis. + # 2. A real hsby-mux REST endpoint (POST /flip {"active": "primary"}) that + # writes 1 to the chosen chassis + 0 to the other. + # For now the services exist so the compose file documents the topology + the + # AbCipHsbyRoleProberTests integration test has a place to land its + # [AbServerFact] without breaking the pre-5.1 ab_server profiles. + controllogix-primary: + profiles: ["paired"] + image: otopcua-ab-server:libplctag-release + build: + context: . + dockerfile: Dockerfile + container_name: otopcua-ab-server-controllogix-primary + restart: "no" + ports: + - "44818:44818" + command: [ + "ab_server", + "--plc=ControlLogix", + "--path=1,0", + "--port=44818", + "--tag=TestDINT:DINT[1]", + # Stand-in for WallClockTime.SyncStatus until the patched image lands. + "--tag=SyncStatus:DINT[1]" + ] + + controllogix-secondary: + profiles: ["paired"] + image: otopcua-ab-server:libplctag-release + build: + context: . + dockerfile: Dockerfile + container_name: otopcua-ab-server-controllogix-secondary + restart: "no" + ports: + - "44819:44818" + command: [ + "ab_server", + "--plc=ControlLogix", + "--path=1,0", + "--port=44818", + "--tag=TestDINT:DINT[1]", + "--tag=SyncStatus:DINT[1]" + ] + + # Stub hsby-mux — placeholder. Today's image is a tiny Python script that + # exposes a /health endpoint + nothing else. PR abcip-5.2 will replace this + # with a real role-flip endpoint that writes SyncStatus on either chassis. + hsby-mux: + profiles: ["paired"] + image: python:3.12-alpine + container_name: otopcua-ab-hsby-mux + restart: "no" + ports: + - "8080:8080" + command: + - sh + - -c + - | + python -c " + import http.server, socketserver + class H(http.server.BaseHTTPRequestHandler): + def do_GET(s): + s.send_response(200); s.send_header('Content-Type','text/plain'); s.end_headers() + s.wfile.write(b'hsby-mux stub - PR abcip-5.2 follow-up will wire role flips') + socketserver.TCPServer(('', 8080), H).serve_forever() + " + depends_on: + - controllogix-primary + - controllogix-secondary diff --git a/tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Tests/AbCipHsbyTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Tests/AbCipHsbyTests.cs new file mode 100644 index 0000000..6b5429a --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Tests/AbCipHsbyTests.cs @@ -0,0 +1,301 @@ +using System.Collections.Concurrent; +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Driver.AbCip; + +namespace ZB.MOM.WW.OtOpcUa.Driver.AbCip.Tests; + +/// +/// PR abcip-5.1 — unit tests for HSBY paired-IP role probing. Drives two fake-runtime +/// gateways (primary + partner), forces each to return a chosen WallClockTime.SyncStatus +/// value, asserts diagnostics + the device-state +/// ActiveAddress resolves to the expected chassis under each split-state combination. +/// +[Trait("Category", "Unit")] +public sealed class AbCipHsbyTests +{ + // ---- Pure value mapping ---- + + [Theory] + [InlineData(0L, HsbyRole.Standby)] // WallClockTime.SyncStatus matrix + [InlineData(1L, HsbyRole.Active)] + [InlineData(2L, HsbyRole.Disqualified)] + [InlineData(99L, HsbyRole.Unknown)] // out-of-range integer + public void MapValueToRole_handles_WallClockTime_SyncStatus_matrix(long raw, HsbyRole expected) + { + AbCipHsbyRoleProber.MapValueToRole(raw, "WallClockTime.SyncStatus").ShouldBe(expected); + } + + [Theory] + [InlineData(0L, HsbyRole.Standby)] // bit 0 = 0 → Standby + [InlineData(1L, HsbyRole.Active)] // bit 0 = 1 → Active + [InlineData(2L, HsbyRole.Standby)] // bit 0 = 0 → Standby (2 = 0b10) + [InlineData(3L, HsbyRole.Active)] // bit 0 = 1 → Active (3 = 0b11) + public void MapValueToRole_handles_S34_bitmask_fallback(long raw, HsbyRole expected) + { + AbCipHsbyRoleProber.MapValueToRole(raw, "S:34").ShouldBe(expected); + } + + [Fact] + public void MapValueToRole_returns_Unknown_for_null_raw() + { + AbCipHsbyRoleProber.MapValueToRole(null, "WallClockTime.SyncStatus").ShouldBe(HsbyRole.Unknown); + } + + // ---- ProbeAsync against fake runtime ---- + + [Fact] + public async Task ProbeAsync_returns_Active_when_runtime_decodes_one() + { + var rt = new FakeAbCipTag(MakeParams("WallClockTime.SyncStatus")) { Value = 1 }; + var role = await AbCipHsbyRoleProber.ProbeAsync(rt, "WallClockTime.SyncStatus", CancellationToken.None); + role.ShouldBe(HsbyRole.Active); + } + + [Fact] + public async Task ProbeAsync_returns_Unknown_when_read_throws() + { + var rt = new FakeAbCipTag(MakeParams("WallClockTime.SyncStatus")) { ThrowOnRead = true }; + var role = await AbCipHsbyRoleProber.ProbeAsync(rt, "WallClockTime.SyncStatus", CancellationToken.None); + role.ShouldBe(HsbyRole.Unknown); + } + + [Fact] + public async Task ProbeAsync_returns_Unknown_on_non_zero_status() + { + var rt = new FakeAbCipTag(MakeParams("WallClockTime.SyncStatus")) { Value = 1, Status = -1 }; + var role = await AbCipHsbyRoleProber.ProbeAsync(rt, "WallClockTime.SyncStatus", CancellationToken.None); + role.ShouldBe(HsbyRole.Unknown); + } + + // ---- End-to-end driver loop ---- + + [Fact] + public async Task Primary_active_partner_standby_resolves_ActiveAddress_to_primary() + { + var (drv, _) = await BuildHsbyDriverAsync(primaryRoleValue: 1, partnerRoleValue: 0); + try + { + await WaitForRoleAsync(drv, "ab://10.0.0.5/1,0"); + var state = drv.GetDeviceState("ab://10.0.0.5/1,0").ShouldNotBeNull(); + state.ActiveAddress.ShouldBe("ab://10.0.0.5/1,0"); + state.PrimaryRole.ShouldBe(HsbyRole.Active); + state.PartnerRole.ShouldBe(HsbyRole.Standby); + } + finally + { + await drv.ShutdownAsync(CancellationToken.None); + } + } + + [Fact] + public async Task Both_active_primary_wins_and_warning_is_emitted() + { + var warnings = new ConcurrentQueue(); + var (drv, _) = await BuildHsbyDriverAsync(primaryRoleValue: 1, partnerRoleValue: 1, + warningSink: warnings.Enqueue); + try + { + await WaitForRoleAsync(drv, "ab://10.0.0.5/1,0"); + var state = drv.GetDeviceState("ab://10.0.0.5/1,0").ShouldNotBeNull(); + state.ActiveAddress.ShouldBe("ab://10.0.0.5/1,0", + "split-brain ties must resolve to primary deterministically"); + state.PrimaryRole.ShouldBe(HsbyRole.Active); + state.PartnerRole.ShouldBe(HsbyRole.Active); + warnings.ShouldContain(w => w.Contains("split-brain", StringComparison.OrdinalIgnoreCase)); + } + finally + { + await drv.ShutdownAsync(CancellationToken.None); + } + } + + [Fact] + public async Task Both_standby_clears_ActiveAddress() + { + var (drv, _) = await BuildHsbyDriverAsync(primaryRoleValue: 0, partnerRoleValue: 0); + try + { + // Let the loop tick at least once + sample the role state. + await WaitForAsync(() => drv.GetDeviceState("ab://10.0.0.5/1,0")?.PrimaryRole != HsbyRole.Unknown); + var state = drv.GetDeviceState("ab://10.0.0.5/1,0").ShouldNotBeNull(); + state.ActiveAddress.ShouldBeNull( + "neither chassis Active means no routing target — PR abcip-5.2 will fault writes here"); + state.PrimaryRole.ShouldBe(HsbyRole.Standby); + state.PartnerRole.ShouldBe(HsbyRole.Standby); + } + finally + { + await drv.ShutdownAsync(CancellationToken.None); + } + } + + [Fact] + public async Task Primary_read_fails_and_partner_active_routes_to_partner() + { + var factory = new FakeAbCipTagFactory + { + Customise = p => p.Gateway == "10.0.0.5" + ? new FakeAbCipTag(p) { ThrowOnRead = true } + : new FakeAbCipTag(p) { Value = 1 }, + }; + var drv = BuildDriver(factory); + await drv.InitializeAsync("{}", CancellationToken.None); + try + { + await WaitForRoleAsync(drv, "ab://10.0.0.6/1,0"); + var state = drv.GetDeviceState("ab://10.0.0.5/1,0").ShouldNotBeNull(); + state.ActiveAddress.ShouldBe("ab://10.0.0.6/1,0"); + state.PrimaryRole.ShouldBe(HsbyRole.Unknown); + state.PartnerRole.ShouldBe(HsbyRole.Active); + } + finally + { + await drv.ShutdownAsync(CancellationToken.None); + } + } + + [Fact] + public async Task Hsby_disabled_skips_role_probing_entirely() + { + var factory = new FakeAbCipTagFactory(); + var drv = new AbCipDriver(new AbCipDriverOptions + { + Devices = + [ + new AbCipDeviceOptions( + "ab://10.0.0.5/1,0", + PartnerHostAddress: "ab://10.0.0.6/1,0", + Hsby: new AbCipHsbyOptions { Enabled = false }), + ], + Probe = new AbCipProbeOptions { Enabled = false }, + }, "drv-hsby-off", factory); + try + { + await drv.InitializeAsync("{}", CancellationToken.None); + await Task.Delay(150); + + var state = drv.GetDeviceState("ab://10.0.0.5/1,0").ShouldNotBeNull(); + state.PrimaryRole.ShouldBe(HsbyRole.Unknown); + state.PartnerRole.ShouldBe(HsbyRole.Unknown); + state.ActiveAddress.ShouldBeNull(); + // Factory must not have been used since Hsby.Enabled = false + probe disabled. + factory.Tags.ShouldBeEmpty(); + } + finally + { + await drv.ShutdownAsync(CancellationToken.None); + } + } + + [Fact] + public async Task Diagnostics_surface_HsbyActive_and_role_codes() + { + var (drv, _) = await BuildHsbyDriverAsync(primaryRoleValue: 1, partnerRoleValue: 0); + try + { + await WaitForRoleAsync(drv, "ab://10.0.0.5/1,0"); + + var diag = drv.GetHealth().Diagnostics.ShouldNotBeNull(); + diag.ShouldContainKey("AbCip.HsbyActive"); + diag["AbCip.HsbyActive"].ShouldBe(1); // primary is the active chassis + diag["AbCip.HsbyPrimaryRole"].ShouldBe((int)HsbyRole.Active); + diag["AbCip.HsbyPartnerRole"].ShouldBe((int)HsbyRole.Standby); + } + finally + { + await drv.ShutdownAsync(CancellationToken.None); + } + } + + // ---- DTO round-trip ---- + + [Fact] + public async Task DTO_json_round_trip_preserves_PartnerHostAddress_and_Hsby() + { + const string json = """ + { + "Devices": [ + { + "HostAddress": "ab://10.0.0.5/1,0", + "PartnerHostAddress": "ab://10.0.0.6/1,0", + "Hsby": { + "Enabled": true, + "RoleTagAddress": "S:34", + "ProbeIntervalMs": 5000 + } + } + ] + } + """; + var driver = AbCipDriverFactoryExtensions.CreateInstance("drv-roundtrip", json); + try + { + // Initialise so the device map is populated, then read back via GetDeviceState. + await driver.InitializeAsync(json, CancellationToken.None); + var state = driver.GetDeviceState("ab://10.0.0.5/1,0").ShouldNotBeNull(); + state.Options.PartnerHostAddress.ShouldBe("ab://10.0.0.6/1,0"); + state.Options.Hsby.ShouldNotBeNull(); + state.Options.Hsby!.Enabled.ShouldBeTrue(); + state.Options.Hsby.RoleTagAddress.ShouldBe("S:34"); + state.Options.Hsby.ProbeInterval.ShouldBe(TimeSpan.FromMilliseconds(5000)); + } + finally + { + await driver.ShutdownAsync(CancellationToken.None); + } + } + + // ---- Helpers ---- + + private static AbCipDriver BuildDriver(FakeAbCipTagFactory factory, Action? warningSink = null) => + new AbCipDriver(new AbCipDriverOptions + { + Devices = + [ + new AbCipDeviceOptions( + "ab://10.0.0.5/1,0", + PartnerHostAddress: "ab://10.0.0.6/1,0", + Hsby: new AbCipHsbyOptions + { + Enabled = true, + RoleTagAddress = "WallClockTime.SyncStatus", + ProbeInterval = TimeSpan.FromMilliseconds(50), + }), + ], + Probe = new AbCipProbeOptions { Enabled = false }, + OnWarning = warningSink, + }, "drv-hsby", factory); + + private static async Task<(AbCipDriver Driver, FakeAbCipTagFactory Factory)> + BuildHsbyDriverAsync(int primaryRoleValue, int partnerRoleValue, Action? warningSink = null) + { + var factory = new FakeAbCipTagFactory + { + Customise = p => p.Gateway == "10.0.0.5" + ? new FakeAbCipTag(p) { Value = primaryRoleValue } + : new FakeAbCipTag(p) { Value = partnerRoleValue }, + }; + var drv = BuildDriver(factory, warningSink); + await drv.InitializeAsync("{}", CancellationToken.None); + return (drv, factory); + } + + private static AbCipTagCreateParams MakeParams(string tagName) => new( + Gateway: "10.0.0.5", + Port: 44818, + CipPath: "1,0", + LibplctagPlcAttribute: "ControlLogix", + TagName: tagName, + Timeout: TimeSpan.FromSeconds(2)); + + private static Task WaitForRoleAsync(AbCipDriver drv, string expectedActive) => + WaitForAsync(() => drv.GetDeviceState("ab://10.0.0.5/1,0")?.ActiveAddress == expectedActive); + + private static async Task WaitForAsync(Func condition, TimeSpan? timeout = null) + { + var deadline = DateTime.UtcNow + (timeout ?? TimeSpan.FromSeconds(2)); + while (!condition() && DateTime.UtcNow < deadline) + await Task.Delay(20); + } +}