Files
lmxopcua/tests/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests/ModbusCoalescingAutoRecoveryTests.cs
Joseph Doherty 9e4aae350b Task #151 — Modbus coalescing: periodic re-probe of auto-prohibitions
#148 introduced auto-prohibited coalesced ranges that persist for the
driver lifetime. Long-running deployments with transient PLC permission
changes (firmware update unlocking a previously-protected register,
operator reconfiguring the device) had no recovery short of operator
restart.

Adds an opt-in background loop that re-probes each prohibition periodically:

- ModbusDriverOptions.AutoProhibitReprobeInterval (TimeSpan?, default null
  = disabled). Set to e.g. TimeSpan.FromHours(1) to opt in.
- _autoProhibited refactored from HashSet<key> to Dictionary<key, DateTime>
  so each entry tracks its last failure / last re-probe timestamp.
- ReprobeLoopAsync runs on the same Task.Run pattern as ProbeLoopAsync;
  cancelled by ShutdownAsync. Each tick snapshots the prohibition set
  and issues a one-shot coalesced read per range. Successful re-probes
  drop the prohibition; failed ones bump the timestamp + leave the
  prohibition in place.
- Communication failures during re-probe (transport-level) are treated
  the same as PLC-exception failures — the prohibition stays, but isn't
  upgraded to "permanent" since transports recover. The driver-instance
  health surface picks up the failure separately.
- ShutdownAsync explicitly clears the prohibition set so a manual restart
  via ReinitializeAsync starts with a clean slate (matches the old
  "restart to clear" semantics).
- Factory DTO + JSON binding extended with AutoProhibitReprobeMs field.

Tests (2 new, additive to the 3 in ModbusCoalescingAutoRecoveryTests):
- Reprobe_Clears_Prohibition_When_Range_Becomes_Healthy — protected
  register at 102 records prohibition; clearing the simulated protection
  + invoking the re-probe drops the prohibition.
- Reprobe_Leaves_Prohibition_When_Range_Is_Still_Bad — re-probe on a
  still-failing range keeps the prohibition in place.

Tests use a new internal RunReprobeOnceForTestAsync helper to fire one
re-probe pass synchronously, so the suite doesn't have to wait on the
background timer (the loop's timer behaviour is exercised implicitly via
the InitializeAsync wire-up + the synchronous helper sharing the actual
re-probe code path).

234 + 2 = 236 unit tests green.
2026-04-25 01:12:48 -04:00

189 lines
10 KiB
C#

using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests;
/// <summary>
/// #148 — block-coalescing auto-recovery from protected register holes. When a coalesced
/// FC03 fails with a Modbus exception, the planner records the failed range and stops
/// re-coalescing across it on subsequent scans. Healthy tags around the protected hole
/// keep working without operator intervention.
/// </summary>
[Trait("Category", "Unit")]
public sealed class ModbusCoalescingAutoRecoveryTests
{
/// <summary>
/// Programmable transport that returns IllegalDataAddress (Modbus exception code 0x02)
/// when a read covers a configured "protected" register address. Otherwise responds
/// normally with zero-filled data of the requested size.
/// </summary>
private sealed class ProtectedHoleTransport : IModbusTransport
{
public ushort ProtectedAddress { get; set; } = ushort.MaxValue;
public readonly List<(byte Fc, ushort Address, ushort Quantity)> Reads = new();
public Task ConnectAsync(CancellationToken ct) => Task.CompletedTask;
public Task<byte[]> SendAsync(byte unitId, byte[] pdu, CancellationToken ct)
{
var addr = (ushort)((pdu[1] << 8) | pdu[2]);
var qty = (ushort)((pdu[3] << 8) | pdu[4]);
if (pdu[0] is 0x03 or 0x04) Reads.Add((pdu[0], addr, qty));
// If the protected address falls within the request span, return a Modbus exception
// PDU. The driver's transport layer detects exceptions by the high bit on the FC.
if (pdu[0] is 0x03 or 0x04 && ProtectedAddress >= addr && ProtectedAddress < addr + qty)
return Task.FromException<byte[]>(new ModbusException(pdu[0], 0x02, "IllegalDataAddress"));
switch (pdu[0])
{
case 0x03: case 0x04:
{
var resp = new byte[2 + qty * 2];
resp[0] = pdu[0]; resp[1] = (byte)(qty * 2);
return Task.FromResult(resp);
}
default: return Task.FromResult(new byte[] { pdu[0], 0, 0 });
}
}
public ValueTask DisposeAsync() => ValueTask.CompletedTask;
}
[Fact]
public async Task First_Failure_Falls_Back_To_PerTag_Same_Scan()
{
var fake = new ProtectedHoleTransport { ProtectedAddress = 102 };
// Three tags: 100, 102 (protected), 104. With MaxReadGap=5, the coalesced block is
// 100..104 — covers the protected register, so FC03 quantity=5 fails. Pre-#148 marked
// ALL three Bad. Post-#148, the failure auto-falls back to per-tag in the same scan
// so 100 and 104 still surface Good values.
var t100 = new ModbusTagDefinition("T100", ModbusRegion.HoldingRegisters, 100, ModbusDataType.Int16);
var t102 = new ModbusTagDefinition("T102", ModbusRegion.HoldingRegisters, 102, ModbusDataType.Int16);
var t104 = new ModbusTagDefinition("T104", ModbusRegion.HoldingRegisters, 104, ModbusDataType.Int16);
var opts = new ModbusDriverOptions { Host = "f", Tags = [t100, t102, t104], MaxReadGap = 5,
Probe = new ModbusProbeOptions { Enabled = false } };
var drv = new ModbusDriver(opts, "m1", _ => fake);
await drv.InitializeAsync("{}", CancellationToken.None);
var values = await drv.ReadAsync(["T100", "T102", "T104"], CancellationToken.None);
// T100 + T104 should fall through per-tag and succeed; T102 is the protected register
// and surfaces the exception status code at single-tag granularity.
values[0].StatusCode.ShouldBe(0u, "T100 should succeed via per-tag fallback");
values[2].StatusCode.ShouldBe(0u, "T104 should succeed via per-tag fallback");
values[1].StatusCode.ShouldNotBe(0u, "T102 is the protected address — single-tag read still surfaces the exception");
await drv.ShutdownAsync(CancellationToken.None);
}
[Fact]
public async Task Second_Scan_Skips_Coalesced_Read_Of_Prohibited_Range()
{
var fake = new ProtectedHoleTransport { ProtectedAddress = 102 };
var t100 = new ModbusTagDefinition("T100", ModbusRegion.HoldingRegisters, 100, ModbusDataType.Int16);
var t102 = new ModbusTagDefinition("T102", ModbusRegion.HoldingRegisters, 102, ModbusDataType.Int16);
var t104 = new ModbusTagDefinition("T104", ModbusRegion.HoldingRegisters, 104, ModbusDataType.Int16);
var opts = new ModbusDriverOptions { Host = "f", Tags = [t100, t102, t104], MaxReadGap = 5,
Probe = new ModbusProbeOptions { Enabled = false } };
var drv = new ModbusDriver(opts, "m1", _ => fake);
await drv.InitializeAsync("{}", CancellationToken.None);
// Scan 1: planner forms 100..104 block, fails, records the prohibition.
await drv.ReadAsync(["T100", "T102", "T104"], CancellationToken.None);
drv.AutoProhibitedRangeCount.ShouldBe(1);
var scan1Reads = fake.Reads.Count;
// Scan 2: planner sees the prohibition, doesn't form the 100..104 block, falls back to
// per-tag for everyone. Total scan-2 PDUs: 3 (one per tag) — vs 1 failed coalesced
// read + 3 per-tag fallbacks if we re-tried the merge.
fake.Reads.Clear();
await drv.ReadAsync(["T100", "T102", "T104"], CancellationToken.None);
var coalescedAttemptedAgain = fake.Reads.Any(r => r.Address == 100 && r.Quantity > 1);
coalescedAttemptedAgain.ShouldBeFalse("planner must NOT re-attempt the prohibited block");
await drv.ShutdownAsync(CancellationToken.None);
}
[Fact]
public async Task Reprobe_Clears_Prohibition_When_Range_Becomes_Healthy()
{
// #151 — when AutoProhibitReprobeInterval is set, the background loop retries each
// prohibition periodically. We exercise that via the test-only RunReprobeOnceForTestAsync
// helper rather than waiting for the timer (which would slow the suite).
var fake = new ProtectedHoleTransport { ProtectedAddress = 102 };
var t100 = new ModbusTagDefinition("T100", ModbusRegion.HoldingRegisters, 100, ModbusDataType.Int16);
var t102 = new ModbusTagDefinition("T102", ModbusRegion.HoldingRegisters, 102, ModbusDataType.Int16);
var t104 = new ModbusTagDefinition("T104", ModbusRegion.HoldingRegisters, 104, ModbusDataType.Int16);
var opts = new ModbusDriverOptions { Host = "f", Tags = [t100, t102, t104], MaxReadGap = 5,
AutoProhibitReprobeInterval = TimeSpan.FromMilliseconds(100),
Probe = new ModbusProbeOptions { Enabled = false } };
var drv = new ModbusDriver(opts, "m1", _ => fake);
await drv.InitializeAsync("{}", CancellationToken.None);
// Scan 1: coalesced read fails, prohibition recorded.
await drv.ReadAsync(["T100", "T102", "T104"], CancellationToken.None);
drv.AutoProhibitedRangeCount.ShouldBe(1);
// Operator unlocks the protected register at the PLC (firmware update etc.). The
// re-probe should now succeed and clear the prohibition.
fake.ProtectedAddress = ushort.MaxValue;
await drv.RunReprobeOnceForTestAsync(CancellationToken.None);
drv.AutoProhibitedRangeCount.ShouldBe(0, "re-probe must clear the prohibition once the range is healthy");
await drv.ShutdownAsync(CancellationToken.None);
}
[Fact]
public async Task Reprobe_Leaves_Prohibition_When_Range_Is_Still_Bad()
{
var fake = new ProtectedHoleTransport { ProtectedAddress = 102 };
var t100 = new ModbusTagDefinition("T100", ModbusRegion.HoldingRegisters, 100, ModbusDataType.Int16);
var t102 = new ModbusTagDefinition("T102", ModbusRegion.HoldingRegisters, 102, ModbusDataType.Int16);
var t104 = new ModbusTagDefinition("T104", ModbusRegion.HoldingRegisters, 104, ModbusDataType.Int16);
var opts = new ModbusDriverOptions { Host = "f", Tags = [t100, t102, t104], MaxReadGap = 5,
AutoProhibitReprobeInterval = TimeSpan.FromMilliseconds(100),
Probe = new ModbusProbeOptions { Enabled = false } };
var drv = new ModbusDriver(opts, "m1", _ => fake);
await drv.InitializeAsync("{}", CancellationToken.None);
await drv.ReadAsync(["T100", "T102", "T104"], CancellationToken.None);
drv.AutoProhibitedRangeCount.ShouldBe(1);
// Re-probe with the protected register still bad — prohibition stays.
await drv.RunReprobeOnceForTestAsync(CancellationToken.None);
drv.AutoProhibitedRangeCount.ShouldBe(1, "re-probe failure must keep the prohibition in place");
await drv.ShutdownAsync(CancellationToken.None);
}
[Fact]
public async Task Tags_Outside_Prohibited_Range_Still_Coalesce()
{
var fake = new ProtectedHoleTransport { ProtectedAddress = 102 };
// Tags split across the protected boundary: cluster 100..104 (will fail) and cluster
// 200..204 (well clear of the protected register). The 200-cluster should keep
// coalescing on subsequent scans even after the 100-cluster is prohibited.
var t100 = new ModbusTagDefinition("T100", ModbusRegion.HoldingRegisters, 100, ModbusDataType.Int16);
var t102 = new ModbusTagDefinition("T102", ModbusRegion.HoldingRegisters, 102, ModbusDataType.Int16);
var t104 = new ModbusTagDefinition("T104", ModbusRegion.HoldingRegisters, 104, ModbusDataType.Int16);
var t200 = new ModbusTagDefinition("T200", ModbusRegion.HoldingRegisters, 200, ModbusDataType.Int16);
var t202 = new ModbusTagDefinition("T202", ModbusRegion.HoldingRegisters, 202, ModbusDataType.Int16);
var opts = new ModbusDriverOptions { Host = "f", Tags = [t100, t102, t104, t200, t202], MaxReadGap = 5,
Probe = new ModbusProbeOptions { Enabled = false } };
var drv = new ModbusDriver(opts, "m1", _ => fake);
await drv.InitializeAsync("{}", CancellationToken.None);
await drv.ReadAsync(["T100", "T102", "T104", "T200", "T202"], CancellationToken.None);
fake.Reads.Clear();
await drv.ReadAsync(["T100", "T102", "T104", "T200", "T202"], CancellationToken.None);
// The 200..202 block should still coalesce — its range doesn't overlap the
// 100..104 prohibition.
var coalesced200Block = fake.Reads.Any(r => r.Address == 200 && r.Quantity == 3);
coalesced200Block.ShouldBeTrue("the 200..202 block must keep coalescing — it's outside the prohibited range");
await drv.ShutdownAsync(CancellationToken.None);
}
}