From c292dcc1dbc2a2415442d2a53d267b97c467e042 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 03:32:45 -0400 Subject: [PATCH] =?UTF-8?q?Auto:=20ablegacy-9=20=E2=80=94=20per-device=20t?= =?UTF-8?q?imeout=20/=20retry=20overrides?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #252 --- docs/Driver.AbLegacy.Cli.md | 28 +- docs/drivers/AbLegacy-Test-Fixture.md | 27 ++ scripts/smoke/seed-ablegacy-smoke.sql | 10 +- .../AbLegacyDriver.cs | 136 ++++++--- .../AbLegacyDriverFactoryExtensions.cs | 28 +- .../AbLegacyDriverOptions.cs | 24 +- .../AbLegacyPerDeviceTimeoutTests.cs | 68 +++++ .../Docker/README.md | 32 ++ .../AbLegacyPerDeviceTimeoutTests.cs | 283 ++++++++++++++++++ 9 files changed, 585 insertions(+), 51 deletions(-) create mode 100644 tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests/AbLegacyPerDeviceTimeoutTests.cs create mode 100644 tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Tests/AbLegacyPerDeviceTimeoutTests.cs diff --git a/docs/Driver.AbLegacy.Cli.md b/docs/Driver.AbLegacy.Cli.md index 0fc9d8b..44e9f24 100644 --- a/docs/Driver.AbLegacy.Cli.md +++ b/docs/Driver.AbLegacy.Cli.md @@ -19,7 +19,8 @@ dotnet run --project src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Cli -- --help |---|---|---| | `-g` / `--gateway` | **required** | Canonical `ab://host[:port]/cip-path` | | `-P` / `--plc-type` | `Slc500` | Slc500 / MicroLogix / Plc5 / LogixPccc | -| `--timeout-ms` | `5000` | Per-operation timeout | +| `--timeout-ms` | `5000` | Per-operation timeout — see precedence note below | +| `--retries` | `0` | Retry count on transient `BadCommunicationError` (PR 9 / #252) | | `--verbose` | off | Serilog debug output | Family ↔ CIP-path cheat sheet: @@ -29,6 +30,31 @@ Family ↔ CIP-path cheat sheet: - **LogixPccc** — `1,0` (Logix controller accessed via the PCCC compatibility layer; rare) +### Per-device timeout / retry tuning (#252, PR 9) + +The CLI's `--timeout-ms` is the **driver-wide default** when launched as a +one-shot test client. In production (server-side, multi-device deployment) +each `AbLegacyDeviceOptions` row carries its own optional `Timeout` / +`Retries` that override the driver-wide value. + +Precedence (highest → lowest): per-device override → driver-wide default → +hard-coded fallback (2000 ms / 0 retries). + +Tuning cheat sheet — start here, measure, then trim: + +| Family | Recommended `Timeout` | Notes | +|---|---|---| +| SLC 5/01 (RS-232 / DH+ bridge) | **5000 ms** | Slowest of the bunch; serial round-trip plus DH+ hop | +| SLC 5/02 / 5/03 (DH+) | 3000 ms | Bridged Ethernet → DH+ adds ~1 s | +| **SLC 5/04 / 5/05** (Ethernet) | **2000 ms** | Fastest of the SLC family — direct EIP/PCCC | +| MicroLogix 1100 / 1400 | **3000 ms** | Single-CPU, slow scan; no backplane | +| PLC-5 (Ethernet I/F) | 2500 ms | Comparable to SLC 5/05 over EIP | +| LogixPccc compat layer | 2000 ms | Logix CPU is fast; PCCC layer is the floor | + +A small `--retries 1` (or `2` for slow chassis) is generally safe — the retry +loop only fires on transient `BadCommunicationError`; terminal errors +(`BadNodeIdUnknown`, `BadTypeMismatch`, …) surface on the first attempt. + ## PCCC address primer File letters imply data type; type flag still required so the CLI knows how to diff --git a/docs/drivers/AbLegacy-Test-Fixture.md b/docs/drivers/AbLegacy-Test-Fixture.md index 8bfc17e..de488c5 100644 --- a/docs/drivers/AbLegacy-Test-Fixture.md +++ b/docs/drivers/AbLegacy-Test-Fixture.md @@ -123,6 +123,33 @@ cover the common ones but uncommon ones (`R` counters, `S` status files, network; parts are end-of-life but still available. PLC-5 + LogixPccc-mode behaviour + DF1 serial need specific controllers. +## Per-device options (`AbLegacyDeviceOptions`) + +Each entry in `AbLegacyDriverOptions.Devices` carries: + +| Field | Type | Default | Notes | +|---|---|---|---| +| `HostAddress` | string | required | `ab://host[:port]/cip-path` | +| `PlcFamily` | enum | `Slc500` | Slc500 / MicroLogix / Plc5 / LogixPccc | +| `DeviceName` | string | null | Friendly label used in browse + diagnostics | +| `Timeout` | TimeSpan? | null → driver-wide default | **PR 9 / #252** — wins over the driver-wide `Timeout`. Mix-and-match: SLC 5/01 ≈ 5 s, SLC 5/05 ≈ 2 s, MicroLogix 1100 ≈ 3 s | +| `Retries` | int? | null → driver-wide default → 0 | **PR 9 / #252** — retries on transient `BadCommunicationError`; terminal errors surface on the first attempt | + +JSON shape (mirrored on `AbLegacyDeviceDto`): + +```json +{ + "HostAddress": "ab://192.168.1.10/1,0", + "PlcFamily": "Slc500", + "DeviceName": "slc-5-01-line-A", + "TimeoutMs": 5000, + "Retries": 1 +} +``` + +Per-device overrides also flow into the probe loop — slow chassis won't be +falsely marked Stopped just because the driver-wide probe timeout is tight. + ## Key fixture / config files - `tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests/AbLegacyServerFixture.cs` diff --git a/scripts/smoke/seed-ablegacy-smoke.sql b/scripts/smoke/seed-ablegacy-smoke.sql index d7daed8..43387be 100644 --- a/scripts/smoke/seed-ablegacy-smoke.sql +++ b/scripts/smoke/seed-ablegacy-smoke.sql @@ -80,15 +80,23 @@ VALUES (@Gen, @EqId, @EqUuid, @DrvId, @LineId, 'slc-sim', 'ablegacy-001', 1); -- AB Legacy DriverInstance — SLC 500 target. Replace the placeholder gateway -- `192.168.1.10` with the real PLC / RSEmulate host before running. +-- +-- PR 9 / #252 demo: the device row carries `"TimeoutMs": 500` + `"Retries": 1`, +-- both overriding the driver-wide `TimeoutMs: 2000` / `Retries: 0` defaults. +-- For real chassis tune per family (SLC 5/01 ≈ 5000, SLC 5/05 ≈ 2000, +-- MicroLogix 1100 ≈ 3000); see docs/Driver.AbLegacy.Cli.md for the cheat sheet. INSERT dbo.DriverInstance(GenerationId, DriverInstanceId, ClusterId, NamespaceId, Name, DriverType, DriverConfig, Enabled) VALUES (@Gen, @DrvId, @ClusterId, @NsId, 'ablegacy-smoke', 'AbLegacy', N'{ "TimeoutMs": 2000, + "Retries": 0, "Devices": [ { "HostAddress": "ab://127.0.0.1:44818/1,0", "PlcFamily": "Slc500", - "DeviceName": "slc-500" + "DeviceName": "slc-500", + "TimeoutMs": 500, + "Retries": 1 } ], "Probe": { "Enabled": true, "IntervalMs": 5000, "TimeoutMs": 2000, "ProbeAddress": "S:0" }, diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriver.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriver.cs index 3f68be6..62640c6 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriver.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriver.cs @@ -209,6 +209,24 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover internal DeviceState? GetDeviceState(string hostAddress) => _devices.TryGetValue(hostAddress, out var s) ? s : null; + /// + /// PR 9 — per-device timeout precedence: device-level override wins, otherwise the + /// driver-wide default. Probe loop has its own timeout knob via + /// but still falls back to the per-device + /// value when the probe override is absent (handled at the call site). + /// + internal TimeSpan ResolveTimeout(DeviceState device) => + device.Options.Timeout ?? _options.Timeout; + + /// + /// PR 9 — per-device retry count: device-level override wins, otherwise the driver-wide + /// default, otherwise zero (single attempt). The driver-wide default itself is + /// null by default so a vanilla AbLegacy config still issues exactly one read per + /// reference, matching pre-PR-9 behaviour. + /// + internal int ResolveRetries(DeviceState device) => + device.Options.Retries ?? _options.Retries ?? 0; + // ---- IReadable ---- public async Task> ReadAsync( @@ -232,57 +250,77 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover continue; } - try + // PR 9 — per-device retry loop: on transient BadCommunicationError (libplctag throw + // OR a non-zero status that maps to BadCommunicationError) retry up to N times. A + // terminal mapped status (e.g. BadNodeIdUnknown for a missing PLC tag, BadTypeMismatch + // for a decoder mismatch) is surfaced as-is — retrying won't fix it. Cancellation + // always rethrows. + var retries = ResolveRetries(device); + DataValueSnapshot? snapshot = null; + for (var attempt = 0; attempt <= retries; attempt++) { - var runtime = await EnsureTagRuntimeAsync(device, def, cancellationToken).ConfigureAwait(false); - await runtime.ReadAsync(cancellationToken).ConfigureAwait(false); - - var status = runtime.GetStatus(); - if (status != 0) + try { - results[i] = new DataValueSnapshot(null, - AbLegacyStatusMapper.MapLibplctagStatus(status), null, now); - _health = new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, - $"libplctag status {status} reading {reference}"); - continue; - } + var runtime = await EnsureTagRuntimeAsync(device, def, cancellationToken).ConfigureAwait(false); + await runtime.ReadAsync(cancellationToken).ConfigureAwait(false); - var parsed = AbLegacyAddress.TryParse(def.Address, device.Options.PlcFamily); - // PR 7 — array contiguous block. Decode N consecutive elements via the runtime's - // per-index accessor and box the result as a typed .NET array. The parser has - // already rejected array+bit and array+sub-element combinations, so the array - // path can ignore the bit/sub-element decoders entirely. - int arrayCount; - if (parsed is not null && (def.ArrayLength is not null || (parsed.ArrayCount ?? 1) > 1)) - { - arrayCount = ResolveElementCount(def, parsed); - } - else arrayCount = 1; + var status = runtime.GetStatus(); + if (status != 0) + { + var mappedStatus = AbLegacyStatusMapper.MapLibplctagStatus(status); + // Transient: BadCommunicationError → eligible for retry. + if (mappedStatus == AbLegacyStatusMapper.BadCommunicationError && attempt < retries) + { + continue; + } + snapshot = new DataValueSnapshot(null, mappedStatus, null, now); + _health = new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, + $"libplctag status {status} reading {reference}"); + break; + } - if (arrayCount > 1) - { - var arr = DecodeArrayAs(runtime, def.DataType, arrayCount); - results[i] = new DataValueSnapshot(arr, AbLegacyStatusMapper.Good, now, now); + var parsed = AbLegacyAddress.TryParse(def.Address, device.Options.PlcFamily); + // PR 7 — array contiguous block. Decode N consecutive elements via the runtime's + // per-index accessor and box the result as a typed .NET array. The parser has + // already rejected array+bit and array+sub-element combinations, so the array + // path can ignore the bit/sub-element decoders entirely. + int arrayCount; + if (parsed is not null && (def.ArrayLength is not null || (parsed.ArrayCount ?? 1) > 1)) + { + arrayCount = ResolveElementCount(def, parsed); + } + else arrayCount = 1; + + if (arrayCount > 1) + { + var arr = DecodeArrayAs(runtime, def.DataType, arrayCount); + snapshot = new DataValueSnapshot(arr, AbLegacyStatusMapper.Good, now, now); + _health = new DriverHealth(DriverState.Healthy, now, null); + break; + } + + // Timer/Counter/Control status bits route through GetBit at the parent-word + // address — translate the .DN/.EN/etc. sub-element to its standard bit position + // and pass it down to the runtime as a synthetic bitIndex. + var decodeBit = parsed?.BitIndex + ?? AbLegacyDataTypeExtensions.StatusBitIndex(def.DataType, parsed?.SubElement); + var value = runtime.DecodeValue(def.DataType, decodeBit); + snapshot = new DataValueSnapshot(value, AbLegacyStatusMapper.Good, now, now); _health = new DriverHealth(DriverState.Healthy, now, null); - continue; + break; + } + catch (OperationCanceledException) { throw; } + catch (Exception ex) + { + // Transient — exhaust retries before reporting BadCommunicationError. + if (attempt < retries) continue; + snapshot = new DataValueSnapshot(null, + AbLegacyStatusMapper.BadCommunicationError, null, now); + _health = new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, ex.Message); } - - // Timer/Counter/Control status bits route through GetBit at the parent-word - // address — translate the .DN/.EN/etc. sub-element to its standard bit position - // and pass it down to the runtime as a synthetic bitIndex. - var decodeBit = parsed?.BitIndex - ?? AbLegacyDataTypeExtensions.StatusBitIndex(def.DataType, parsed?.SubElement); - var value = runtime.DecodeValue(def.DataType, decodeBit); - results[i] = new DataValueSnapshot(value, AbLegacyStatusMapper.Good, now, now); - _health = new DriverHealth(DriverState.Healthy, now, null); - } - catch (OperationCanceledException) { throw; } - catch (Exception ex) - { - results[i] = new DataValueSnapshot(null, - AbLegacyStatusMapper.BadCommunicationError, null, now); - _health = new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, ex.Message); } + results[i] = snapshot ?? new DataValueSnapshot(null, + AbLegacyStatusMapper.BadCommunicationError, null, now); } return results; @@ -441,13 +479,17 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover private async Task ProbeLoopAsync(DeviceState state, CancellationToken ct) { + // PR 9 — per-device timeout wins over the probe's own timeout. Slow chassis (SLC 5/01 + // RS-232 ~5 s round-trip) need their per-device override to flow into the probe too, + // otherwise the probe times out before the device ever has a chance to respond. + var probeTimeout = state.Options.Timeout ?? _options.Probe.Timeout; var probeParams = new AbLegacyTagCreateParams( Gateway: state.ParsedAddress.Gateway, Port: state.ParsedAddress.Port, CipPath: state.ParsedAddress.CipPath, LibplctagPlcAttribute: state.Profile.LibplctagPlcAttribute, TagName: _options.Probe.ProbeAddress!, - Timeout: _options.Probe.Timeout); + Timeout: probeTimeout); IAbLegacyTagRuntime? probeRuntime = null; while (!ct.IsCancellationRequested) @@ -553,7 +595,7 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover CipPath: device.ParsedAddress.CipPath, LibplctagPlcAttribute: device.Profile.LibplctagPlcAttribute, TagName: parentName, - Timeout: _options.Timeout)); + Timeout: ResolveTimeout(device))); try { await runtime.InitializeAsync(ct).ConfigureAwait(false); @@ -601,7 +643,7 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover CipPath: device.ParsedAddress.CipPath, LibplctagPlcAttribute: device.Profile.LibplctagPlcAttribute, TagName: tagName, - Timeout: _options.Timeout, + Timeout: ResolveTimeout(device), ElementCount: elementCount)); try { diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriverFactoryExtensions.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriverFactoryExtensions.cs index ce69dab..e7b4e9d 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriverFactoryExtensions.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriverFactoryExtensions.cs @@ -38,7 +38,10 @@ public static class AbLegacyDriverFactoryExtensions $"AB Legacy config for '{driverInstanceId}' has a device missing HostAddress"), PlcFamily: ParseEnum(d.PlcFamily, driverInstanceId, "PlcFamily", fallback: AbLegacyPlcFamily.Slc500), - DeviceName: d.DeviceName))] + DeviceName: d.DeviceName, + // PR 9 — per-device timeout / retry overrides. Device-level wins over driver-wide. + Timeout: d.TimeoutMs is int devMs ? TimeSpan.FromMilliseconds(devMs) : null, + Retries: d.Retries))] : [], Tags = dto.Tags is { Count: > 0 } ? [.. dto.Tags.Select(t => new AbLegacyTagDefinition( @@ -64,6 +67,9 @@ public static class AbLegacyDriverFactoryExtensions ProbeAddress = dto.Probe?.ProbeAddress ?? "S:0", }, Timeout = TimeSpan.FromMilliseconds(dto.TimeoutMs ?? 2_000), + // PR 9 — driver-wide retry default. null ≡ 0 retries (single attempt). Per-device + // Retries on AbLegacyDeviceOptions still wins. + Retries = dto.Retries, }; return new AbLegacyDriver(options, driverInstanceId); @@ -95,6 +101,12 @@ public static class AbLegacyDriverFactoryExtensions internal sealed class AbLegacyDriverConfigDto { public int? TimeoutMs { get; init; } + /// + /// PR 9 — driver-wide retry count for transient BadCommunicationError reads. + /// null0 (single attempt). A per-device override on + /// wins. + /// + public int? Retries { get; init; } public List? Devices { get; init; } public List? Tags { get; init; } public AbLegacyProbeDto? Probe { get; init; } @@ -105,6 +117,20 @@ public static class AbLegacyDriverFactoryExtensions public string? HostAddress { get; init; } public string? PlcFamily { get; init; } public string? DeviceName { get; init; } + + /// + /// PR 9 — optional per-device timeout in ms. Wins over the driver-wide + /// . Tune this per chassis: SLC 5/01 + /// RS-232 ≈ 5000, SLC 5/05 ≈ 2000, MicroLogix 1100 ≈ 3000. + /// + public int? TimeoutMs { get; init; } + + /// + /// PR 9 — optional per-device retry count for transient BadCommunicationError + /// reads. Wins over the driver-wide . + /// null at both levels = single attempt. + /// + public int? Retries { get; init; } } internal sealed class AbLegacyTagDto diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriverOptions.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriverOptions.cs index 2c14e04..5aef5ba 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriverOptions.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriverOptions.cs @@ -13,13 +13,35 @@ public sealed class AbLegacyDriverOptions public IReadOnlyList Devices { get; init; } = []; public IReadOnlyList Tags { get; init; } = []; public AbLegacyProbeOptions Probe { get; init; } = new(); + + /// + /// Driver-wide default per-operation timeout. Applies to every device unless that device + /// overrides it via (PR 9). + /// public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(2); + + /// + /// PR 9 — driver-wide default retry count for transient + /// BadCommunicationError reads. null0 (single attempt). Applies + /// to every device unless that device overrides it via + /// . + /// + public int? Retries { get; init; } } +/// +/// Per-device options for the AB Legacy driver. PR 9 added optional +/// and overrides — chassis families have very different per-operation +/// latency floors (SLC 5/01 RS-232 ~5 s; SLC 5/05 ~2 s; ML1100 ~3 s) so a single driver-wide +/// timeout always misfires on at least one device. Both fields are optional and fall back +/// to the driver-wide default on . +/// public sealed record AbLegacyDeviceOptions( string HostAddress, AbLegacyPlcFamily PlcFamily = AbLegacyPlcFamily.Slc500, - string? DeviceName = null); + string? DeviceName = null, + TimeSpan? Timeout = null, + int? Retries = null); /// /// One PCCC-backed OPC UA variable. Address is the canonical PCCC file-address diff --git a/tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests/AbLegacyPerDeviceTimeoutTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests/AbLegacyPerDeviceTimeoutTests.cs new file mode 100644 index 0000000..c3496e1 --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests/AbLegacyPerDeviceTimeoutTests.cs @@ -0,0 +1,68 @@ +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; +using ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.PlcFamilies; + +namespace ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests; + +/// +/// PR 9 — per-device timeout integration scaffold. Build-only at PR 9 time: the ab_server +/// PCCC simulator answers in <100 ms locally so a 500 ms per-device timeout doesn't +/// normally trip. Either an iptables --delay sidecar or a tc qdisc netem +/// filter must be wired up first; until then the test asserts that a generous +/// per-device timeout still completes successfully (the precedence path itself is +/// exercised), with the slow-path failure case expressed in unit tests via +/// . +/// +[Collection(AbLegacyServerCollection.Name)] +[Trait("Category", "Integration")] +[Trait("Simulator", "ab_server-PCCC")] +public sealed class AbLegacyPerDeviceTimeoutTests(AbLegacyServerFixture sim) +{ + [AbLegacyFact] + public async Task Per_device_Timeout_override_flows_into_runtime_against_ab_server() + { + if (sim.SkipReason is not null) Assert.Skip(sim.SkipReason); + + var deviceUri = $"ab://{sim.Host}:{sim.Port}/{sim.CipPath}"; + await using var drv = new AbLegacyDriver(new AbLegacyDriverOptions + { + // Driver-wide tight 500 ms; per-device override gives the simulator 5 s headroom + // to demonstrate the precedence rule in a wire-level setting. + Timeout = TimeSpan.FromMilliseconds(500), + Devices = [new AbLegacyDeviceOptions( + deviceUri, + AbLegacyPlcFamily.Slc500, + Timeout: TimeSpan.FromSeconds(5))], + Tags = [new AbLegacyTagDefinition( + Name: "IntCounter", + DeviceHostAddress: deviceUri, + Address: "N7:0", + DataType: AbLegacyDataType.Int)], + Probe = new AbLegacyProbeOptions { Enabled = false }, + }, driverInstanceId: "ablegacy-pr9-timeout"); + + await drv.InitializeAsync("{}", TestContext.Current.CancellationToken); + var snapshots = await drv.ReadAsync(["IntCounter"], TestContext.Current.CancellationToken); + + // Per-device override picked up; the read against the simulator succeeds because the + // 5 s per-device cap supersedes the otherwise-too-tight 500 ms driver-wide default. + snapshots.Single().StatusCode.ShouldBe(AbLegacyStatusMapper.Good); + } + + /// + /// Skeleton for the inverse — slow-link (tc qdisc / iptables --delay) + + /// tight per-device timeout. Skipped pending the netem sidecar work tracked in + /// Docker/README.md. + /// + [AbLegacyFact(Skip = "Pending netem / iptables-delay sidecar — see Docker/README.md")] + public Task Per_device_Timeout_below_simulated_delay_surfaces_BadCommunicationError() + { + // Future shape: + // docker compose --profile slc500-slow up -d (adds netem qdisc on the egress) + // override Timeout: TimeSpan.FromMilliseconds(100) + // ReadAsync ⇒ snapshots.Single().StatusCode == BadCommunicationError + // while a sibling device (no override → 5 s) keeps reading Good. + return Task.CompletedTask; + } +} diff --git a/tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests/Docker/README.md b/tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests/Docker/README.md index 6dcda63..e480c6b 100644 --- a/tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests/Docker/README.md +++ b/tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests/Docker/README.md @@ -139,6 +139,38 @@ bit writes to real hardware or RSEmulate 500 until upstream resolves. See [`docs/drivers/AbLegacy-Test-Fixture.md`](../../../docs/drivers/AbLegacy-Test-Fixture.md) for the full coverage map. +## Per-device timeout fixture (PR 9 / #252) — TODO + +`AbLegacyPerDeviceTimeoutTests.Per_device_Timeout_below_simulated_delay_surfaces_BadCommunicationError` +needs a slow-link sidecar before it can run for real. The simulator answers +in <100 ms locally, so a 500 ms per-device timeout never trips against +the unmodified container. + +Two options, neither wired up at PR 9 time: + +1. **`tc qdisc` netem inside the container** — add to `docker-compose.yml`: + + ```yaml + # services: + # ablegacy-slc500-slow: + # extends: ablegacy-slc500 + # cap_add: [NET_ADMIN] + # command: > + # sh -c "tc qdisc add dev eth0 root netem delay 800ms && + # ab_server --plc=SLC500 --port=44818 --path=1,0 --tag=N7[200]:INT16" + ``` + + `--cap-add=NET_ADMIN` is required because `tc qdisc` mutates the + container's egress queue. Combine with `AB_LEGACY_COMPOSE_PROFILE=slc500-slow` + to point the suite at the slow profile. + +2. **`iptables --delay` shim** — sidecar container that NATs port 44818 and + adds a fixed delay on the SYN/ACK + payload path. More portable than + netem (no `NET_ADMIN` on the simulator itself) but adds a hop. + +When either lands, drop the `Skip = …` on the integration test and assert +the precedence rule end-to-end. + ## References - [libplctag on GitHub](https://github.com/libplctag/libplctag) — `ab_server` diff --git a/tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Tests/AbLegacyPerDeviceTimeoutTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Tests/AbLegacyPerDeviceTimeoutTests.cs new file mode 100644 index 0000000..1342416 --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Tests/AbLegacyPerDeviceTimeoutTests.cs @@ -0,0 +1,283 @@ +using System.Text.Json; +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; +using ZB.MOM.WW.OtOpcUa.Driver.AbLegacy; +using ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.PlcFamilies; + +namespace ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Tests; + +/// +/// PR 9 — per-device Timeout + Retries overrides. SLC 5/01 needs ~5 s, +/// SLC 5/05 ~2 s, MicroLogix 1100 ~3 s — a single driver-wide timeout always misfires on +/// at least one chassis. Verifies the precedence rules (device > driver-wide > default), +/// that the resolved timeout flows into , and +/// that the retry loop honours the per-device count. +/// +[Trait("Category", "Unit")] +public sealed class AbLegacyPerDeviceTimeoutTests +{ + private const string Host = "ab://10.0.0.5/1,0"; + + [Fact] + public async Task Per_device_Timeout_flows_into_AbLegacyTagCreateParams() + { + var factory = new FakeAbLegacyTagFactory(); + var drv = new AbLegacyDriver(new AbLegacyDriverOptions + { + // Driver-wide default 2 s — the device override below should win. + Timeout = TimeSpan.FromSeconds(2), + Devices = + [ + new AbLegacyDeviceOptions(Host, AbLegacyPlcFamily.Slc500, + DeviceName: "slc-501", + Timeout: TimeSpan.FromSeconds(5)), + ], + Tags = [new AbLegacyTagDefinition("X", Host, "N7:0", AbLegacyDataType.Int)], + }, "drv-1", factory); + await drv.InitializeAsync("{}", CancellationToken.None); + + await drv.ReadAsync(["X"], CancellationToken.None); + + factory.Tags["N7:0"].CreationParams.Timeout.ShouldBe(TimeSpan.FromSeconds(5)); + } + + [Fact] + public async Task Absent_per_device_Timeout_falls_back_to_driver_wide() + { + var factory = new FakeAbLegacyTagFactory(); + var drv = new AbLegacyDriver(new AbLegacyDriverOptions + { + Timeout = TimeSpan.FromSeconds(7), + Devices = [new AbLegacyDeviceOptions(Host, AbLegacyPlcFamily.Slc500)], + Tags = [new AbLegacyTagDefinition("X", Host, "N7:0", AbLegacyDataType.Int)], + }, "drv-1", factory); + await drv.InitializeAsync("{}", CancellationToken.None); + + await drv.ReadAsync(["X"], CancellationToken.None); + + factory.Tags["N7:0"].CreationParams.Timeout.ShouldBe(TimeSpan.FromSeconds(7)); + } + + [Fact] + public async Task Two_devices_each_use_their_own_Timeout_override() + { + const string fastHost = "ab://10.0.0.5/1,0"; + const string slowHost = "ab://10.0.0.6/"; + var factory = new FakeAbLegacyTagFactory(); + var drv = new AbLegacyDriver(new AbLegacyDriverOptions + { + Timeout = TimeSpan.FromSeconds(2), + Devices = + [ + new AbLegacyDeviceOptions(fastHost, AbLegacyPlcFamily.Slc500, + Timeout: TimeSpan.FromMilliseconds(500)), + new AbLegacyDeviceOptions(slowHost, AbLegacyPlcFamily.MicroLogix, + Timeout: TimeSpan.FromSeconds(5)), + ], + Tags = + [ + new AbLegacyTagDefinition("Fast", fastHost, "N7:0", AbLegacyDataType.Int), + new AbLegacyTagDefinition("Slow", slowHost, "N7:1", AbLegacyDataType.Int), + ], + }, "drv-1", factory); + await drv.InitializeAsync("{}", CancellationToken.None); + + await drv.ReadAsync(["Fast", "Slow"], CancellationToken.None); + + factory.Tags["N7:0"].CreationParams.Timeout.ShouldBe(TimeSpan.FromMilliseconds(500)); + factory.Tags["N7:1"].CreationParams.Timeout.ShouldBe(TimeSpan.FromSeconds(5)); + } + + [Fact] + public async Task Per_device_Retries_2_yields_3_attempts_before_failure() + { + var factory = new FakeAbLegacyTagFactory(); + factory.Customise = p => new FakeAbLegacyTag(p) + { + ThrowOnRead = true, + Exception = new TimeoutException("simulated transient"), + }; + var drv = new AbLegacyDriver(new AbLegacyDriverOptions + { + Devices = + [ + new AbLegacyDeviceOptions(Host, AbLegacyPlcFamily.Slc500, Retries: 2), + ], + Tags = [new AbLegacyTagDefinition("X", Host, "N7:0", AbLegacyDataType.Int)], + }, "drv-1", factory); + await drv.InitializeAsync("{}", CancellationToken.None); + + var snapshots = await drv.ReadAsync(["X"], CancellationToken.None); + + snapshots.Single().StatusCode.ShouldBe(AbLegacyStatusMapper.BadCommunicationError); + // 1 initial + 2 retries = 3 attempts. + factory.Tags["N7:0"].ReadCount.ShouldBe(3); + } + + [Fact] + public async Task No_Retries_yields_single_attempt() + { + var factory = new FakeAbLegacyTagFactory(); + factory.Customise = p => new FakeAbLegacyTag(p) + { + ThrowOnRead = true, + Exception = new TimeoutException("simulated transient"), + }; + var drv = new AbLegacyDriver(new AbLegacyDriverOptions + { + // Both null — defaults to 0 retries (single attempt). + Devices = [new AbLegacyDeviceOptions(Host, AbLegacyPlcFamily.Slc500)], + Tags = [new AbLegacyTagDefinition("X", Host, "N7:0", AbLegacyDataType.Int)], + }, "drv-1", factory); + await drv.InitializeAsync("{}", CancellationToken.None); + + await drv.ReadAsync(["X"], CancellationToken.None); + + factory.Tags["N7:0"].ReadCount.ShouldBe(1); + } + + [Fact] + public async Task Driver_wide_Retries_applies_when_device_omits_override() + { + var factory = new FakeAbLegacyTagFactory(); + factory.Customise = p => new FakeAbLegacyTag(p) + { + ThrowOnRead = true, + Exception = new TimeoutException("simulated transient"), + }; + var drv = new AbLegacyDriver(new AbLegacyDriverOptions + { + Retries = 1, // driver-wide → 2 attempts total + Devices = [new AbLegacyDeviceOptions(Host, AbLegacyPlcFamily.Slc500)], + Tags = [new AbLegacyTagDefinition("X", Host, "N7:0", AbLegacyDataType.Int)], + }, "drv-1", factory); + await drv.InitializeAsync("{}", CancellationToken.None); + + await drv.ReadAsync(["X"], CancellationToken.None); + + factory.Tags["N7:0"].ReadCount.ShouldBe(2); + } + + [Fact] + public async Task Per_device_Retries_overrides_driver_wide_default() + { + var factory = new FakeAbLegacyTagFactory(); + factory.Customise = p => new FakeAbLegacyTag(p) + { + ThrowOnRead = true, + Exception = new TimeoutException("simulated transient"), + }; + var drv = new AbLegacyDriver(new AbLegacyDriverOptions + { + Retries = 5, // driver-wide + // Per-device says zero retries — should win, single attempt. + Devices = [new AbLegacyDeviceOptions(Host, AbLegacyPlcFamily.Slc500, Retries: 0)], + Tags = [new AbLegacyTagDefinition("X", Host, "N7:0", AbLegacyDataType.Int)], + }, "drv-1", factory); + await drv.InitializeAsync("{}", CancellationToken.None); + + await drv.ReadAsync(["X"], CancellationToken.None); + + factory.Tags["N7:0"].ReadCount.ShouldBe(1); + } + + [Fact] + public async Task Successful_read_after_one_transient_does_not_burn_remaining_retries() + { + // Verifies retries stop once the call succeeds — we shouldn't keep hammering. + var factory = new FakeAbLegacyTagFactory(); + var attemptsBeforeSuccess = 1; + factory.Customise = p => + { + var fake = new FlappyFake(p) + { + FailFirstN = attemptsBeforeSuccess, + FinalValue = 42, + }; + return fake; + }; + var drv = new AbLegacyDriver(new AbLegacyDriverOptions + { + Devices = [new AbLegacyDeviceOptions(Host, AbLegacyPlcFamily.Slc500, Retries: 3)], + Tags = [new AbLegacyTagDefinition("X", Host, "N7:0", AbLegacyDataType.Int)], + }, "drv-1", factory); + await drv.InitializeAsync("{}", CancellationToken.None); + + var snapshots = await drv.ReadAsync(["X"], CancellationToken.None); + + snapshots.Single().StatusCode.ShouldBe(AbLegacyStatusMapper.Good); + snapshots.Single().Value.ShouldBe(42); + // 1 throw + 1 success = 2 attempts (we should NOT use all 4). + factory.Tags["N7:0"].ReadCount.ShouldBe(2); + } + + [Fact] + public async Task Dto_round_trip_preserves_TimeoutMs_and_Retries_at_both_levels() + { + const string json = """ + { + "TimeoutMs": 4000, + "Retries": 1, + "Devices": [ + { + "HostAddress": "ab://10.0.0.5/1,0", + "PlcFamily": "Slc500", + "TimeoutMs": 5000, + "Retries": 2 + } + ], + "Probe": { "Enabled": false }, + "Tags": [ + { "Name": "X", "DeviceHostAddress": "ab://10.0.0.5/1,0", "Address": "N7:0", "DataType": "Int" } + ] + } + """; + + // Use the static factory so we exercise the deserialisation path used in production. + var drv = AbLegacyDriverFactoryExtensions.CreateInstance("drv-roundtrip", json); + await drv.InitializeAsync(json, CancellationToken.None); + + var state = drv.GetDeviceState("ab://10.0.0.5/1,0").ShouldNotBeNull(); + + state.Options.Timeout.ShouldBe(TimeSpan.FromMilliseconds(5000)); + state.Options.Retries.ShouldBe(2); + + // Per-device override wins over driver-wide. + drv.ResolveRetries(state).ShouldBe(2); + drv.ResolveTimeout(state).ShouldBe(TimeSpan.FromMilliseconds(5000)); + + await drv.ShutdownAsync(CancellationToken.None); + } + + /// + /// A fake that throws the first FailFirstN reads then succeeds. Used to assert + /// the retry loop stops once a call succeeds — it should not exhaust the retry budget. + /// + private sealed class FlappyFake : FakeAbLegacyTag + { + public int FailFirstN { get; set; } + public object? FinalValue { get; set; } + private int _calls; + public FlappyFake(AbLegacyTagCreateParams p) : base(p) { } + + public override Task ReadAsync(CancellationToken ct) + { + _calls++; + // Increment ReadCount via the base accessor (it does its own increment + throw + // bookkeeping). Toggle ThrowOnRead based on the call number so the base helper does + // the throw for us. + if (_calls <= FailFirstN) + { + ThrowOnRead = true; + Exception = new TimeoutException("flap"); + } + else + { + ThrowOnRead = false; + Value = FinalValue; + } + return base.ReadAsync(ct); + } + } +}