Task #150 — Modbus coalescing: bisection-style range narrowing

Pre-#150 a coalesced read failure recorded the FULL failed range as
permanently prohibited. Healthy registers around the actual protected
register stayed in per-tag mode forever (until ReinitializeAsync). The
re-probe loop shipped in #151 retried the whole range as a single block,
which would either succeed (clearing everything) or fail (changing
nothing).

Post-#150 the re-probe loop bisects multi-register prohibitions:

- _autoProhibited refactored from Dictionary<key, DateTime> to
  Dictionary<key, ProhibitionState> where ProhibitionState carries
  LastProbedUtc + SplitPending. Multi-register prohibitions enter with
  SplitPending=true; single-register prohibitions enter with
  SplitPending=false (already minimal).
- ReprobeLoopAsync delegates the per-pass work to
  RunReprobeOnceForTestAsync (also exposed for synchronous test driving).
  Each entry routes to BisectAndReprobeAsync (split-pending + multi-reg)
  or StraightReprobeAsync (single-reg / non-split-pending).
- Bisection: split (start, end) at mid = (start+end)/2. Try (start, mid)
  and (mid+1, end) as separate coalesced reads. Each FAILED half re-enters
  the prohibition map with SplitPending = (its end > its start). SUCCEEDED
  halves vanish, freeing the planner to coalesce across them on the next
  scan.
- Convergence: log2(span) re-probe ticks pin the prohibition to the
  actual single offending register(s). For a 100-register block with one
  protected address that's ~7 ticks.

Tests (3 new ModbusCoalescingBisectionTests):
- Bisection_Narrows_Multi_Register_Prohibition_Per_Reprobe — 11 tags
  100..110 with protected address 105. After 4 re-probe passes the
  prohibition collapses from (100..110) → (100..105) → (103..105) →
  (105..105).
- Bisection_Clears_When_Both_Halves_Are_Healthy — transient failure
  scenario; protection lifted before re-probe; both bisection halves
  succeed and the parent vanishes entirely.
- Bisection_Splits_Into_Two_When_Both_Halves_Still_Fail — TwoHoleTransport
  with protected addresses 102 + 108 in the same coalesced range. After
  bisection both halves still fail (each contains one of the protected
  addresses); the prohibition map grows to 2 entries.

236 + 3 = 239 unit tests green. Solution build clean.
This commit is contained in:
Joseph Doherty
2026-04-25 01:16:09 -04:00
parent 9e4aae350b
commit f823c81c96
2 changed files with 291 additions and 58 deletions

View File

@@ -409,7 +409,19 @@ public sealed class ModbusDriver
/// Cleared by ReinitializeAsync (operator restart) or by an explicit re-probe API
/// (not yet shipped).
/// </summary>
private readonly Dictionary<(byte Unit, ModbusRegion Region, ushort Start, ushort End), DateTime> _autoProhibited = new();
/// <summary>
/// #150 — per-prohibition state. <c>SplitPending</c> drives the re-probe loop's
/// bisection: when true and the range spans &gt; 1 register, the next re-probe
/// tries the two halves separately to narrow the actual offending register(s).
/// Single-register prohibitions can't be split further; they stay re-probed as-is.
/// </summary>
private sealed class ProhibitionState
{
public DateTime LastProbedUtc;
public bool SplitPending;
}
private readonly Dictionary<(byte Unit, ModbusRegion Region, ushort Start, ushort End), ProhibitionState> _autoProhibited = new();
private readonly object _autoProhibitedLock = new();
private CancellationTokenSource? _reprobeCts;
@@ -431,7 +443,16 @@ public sealed class ModbusDriver
private void RecordAutoProhibition(byte unit, ModbusRegion region, ushort start, ushort end)
{
lock (_autoProhibitedLock) _autoProhibited[(unit, region, start, end)] = DateTime.UtcNow;
lock (_autoProhibitedLock)
{
// Multi-register prohibitions enter the bisection workflow on the next re-probe;
// single-register prohibitions are already minimal and skip bisection.
_autoProhibited[(unit, region, start, end)] = new ProhibitionState
{
LastProbedUtc = DateTime.UtcNow,
SplitPending = end > start,
};
}
}
/// <summary>Test/diagnostic accessor — returns the current auto-prohibited range count.</summary>
@@ -441,80 +462,121 @@ public sealed class ModbusDriver
}
/// <summary>
/// #151 — periodic re-probe loop. Wakes every <c>AutoProhibitReprobeInterval</c> and
/// retries each auto-prohibited range with a one-shot coalesced read. Successful
/// re-probes drop the prohibition; failed ones leave it in place + bump the
/// last-probed timestamp so the next attempt waits another full interval.
/// Lives for the driver lifetime; cancelled by <c>ShutdownAsync</c>.
/// #151 — periodic re-probe loop, augmented in #150 with bisection-style narrowing.
/// Each tick processes every prohibition: split-pending multi-register ranges get
/// bisected (try left + right halves; replace with whichever halves still fail),
/// single-register or non-split-pending ranges get a straight re-probe. Lives for
/// the driver lifetime; cancelled by <c>ShutdownAsync</c>.
/// </summary>
private async Task ReprobeLoopAsync(CancellationToken ct)
{
var interval = _options.AutoProhibitReprobeInterval!.Value;
var transport = _transport;
while (!ct.IsCancellationRequested)
{
try { await Task.Delay(interval, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { return; }
if (transport is null) continue;
// Snapshot the prohibition set so we can release the lock during the wire calls.
(byte Unit, ModbusRegion Region, ushort Start, ushort End)[] candidates;
lock (_autoProhibitedLock)
candidates = _autoProhibited.Keys.ToArray();
foreach (var p in candidates)
{
if (ct.IsCancellationRequested) return;
var fc = p.Region == ModbusRegion.HoldingRegisters ? (byte)0x03 : (byte)0x04;
var qty = (ushort)(p.End - p.Start + 1);
try
{
using var probeCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
probeCts.CancelAfter(_options.Timeout);
_ = await ReadRegisterBlockAsync(transport, p.Unit, fc, p.Start, qty, probeCts.Token).ConfigureAwait(false);
// Range is healthy now — drop the prohibition. Next data scan re-coalesces normally.
lock (_autoProhibitedLock) _autoProhibited.Remove(p);
}
catch (OperationCanceledException) when (ct.IsCancellationRequested) { return; }
catch
{
// Still bad. Bump the timestamp so it shows up on diagnostics as recently
// re-probed — the prohibition stays in place.
lock (_autoProhibitedLock)
{
if (_autoProhibited.ContainsKey(p))
_autoProhibited[p] = DateTime.UtcNow;
}
}
}
try { await RunReprobeOnceForTestAsync(ct).ConfigureAwait(false); }
catch (OperationCanceledException) when (ct.IsCancellationRequested) { return; }
}
}
/// <summary>Test/diagnostic accessor — fires one re-probe pass synchronously for tests.</summary>
/// <summary>
/// One re-probe pass. Public-but-internal so tests can drive it synchronously rather
/// than wait on the background timer. Iterates a snapshot of the prohibition set; for
/// each entry decides between bisection (multi-register + SplitPending) or straight
/// retry (single-register or already-narrowed).
/// </summary>
internal async Task RunReprobeOnceForTestAsync(CancellationToken ct)
{
var transport = _transport ?? throw new InvalidOperationException("Transport not connected");
(byte Unit, ModbusRegion Region, ushort Start, ushort End)[] candidates;
lock (_autoProhibitedLock) candidates = _autoProhibited.Keys.ToArray();
foreach (var p in candidates)
((byte Unit, ModbusRegion Region, ushort Start, ushort End) Key, bool SplitPending)[] candidates;
lock (_autoProhibitedLock)
candidates = _autoProhibited
.Select(kv => (Key: kv.Key, SplitPending: kv.Value.SplitPending))
.ToArray();
foreach (var (key, splitPending) in candidates)
{
var fc = p.Region == ModbusRegion.HoldingRegisters ? (byte)0x03 : (byte)0x04;
var qty = (ushort)(p.End - p.Start + 1);
try
{
_ = await ReadRegisterBlockAsync(transport, p.Unit, fc, p.Start, qty, ct).ConfigureAwait(false);
lock (_autoProhibitedLock) _autoProhibited.Remove(p);
}
catch
{
lock (_autoProhibitedLock)
if (_autoProhibited.ContainsKey(p))
_autoProhibited[p] = DateTime.UtcNow;
}
if (ct.IsCancellationRequested) return;
if (splitPending && key.End > key.Start)
await BisectAndReprobeAsync(transport, key, ct).ConfigureAwait(false);
else
await StraightReprobeAsync(transport, key, ct).ConfigureAwait(false);
}
}
private async Task StraightReprobeAsync(IModbusTransport transport,
(byte Unit, ModbusRegion Region, ushort Start, ushort End) key, CancellationToken ct)
{
var fc = key.Region == ModbusRegion.HoldingRegisters ? (byte)0x03 : (byte)0x04;
var qty = (ushort)(key.End - key.Start + 1);
try
{
_ = await ReadRegisterBlockAsync(transport, key.Unit, fc, key.Start, qty, ct).ConfigureAwait(false);
lock (_autoProhibitedLock) _autoProhibited.Remove(key);
}
catch (OperationCanceledException) when (ct.IsCancellationRequested) { throw; }
catch
{
lock (_autoProhibitedLock)
if (_autoProhibited.TryGetValue(key, out var st)) st.LastProbedUtc = DateTime.UtcNow;
}
}
/// <summary>
/// #150 — bisect a multi-register prohibition. Removes the parent entry and re-adds
/// whichever halves still fail. Over multiple re-probe ticks the prohibition narrows
/// log2(span) times until it pinpoints the actual protected register(s).
/// </summary>
private async Task BisectAndReprobeAsync(IModbusTransport transport,
(byte Unit, ModbusRegion Region, ushort Start, ushort End) key, CancellationToken ct)
{
var fc = key.Region == ModbusRegion.HoldingRegisters ? (byte)0x03 : (byte)0x04;
var mid = (ushort)((key.Start + key.End) / 2);
var leftEnd = mid;
var rightStart = (ushort)(mid + 1);
var leftFailed = await ProbeFailsAsync(transport, fc, key.Unit, key.Start, leftEnd, ct).ConfigureAwait(false);
var rightFailed = await ProbeFailsAsync(transport, fc, key.Unit, rightStart, key.End, ct).ConfigureAwait(false);
lock (_autoProhibitedLock)
{
_autoProhibited.Remove(key);
if (leftFailed)
{
_autoProhibited[(key.Unit, key.Region, key.Start, leftEnd)] = new ProhibitionState
{
LastProbedUtc = DateTime.UtcNow,
SplitPending = leftEnd > key.Start,
};
}
if (rightFailed)
{
_autoProhibited[(key.Unit, key.Region, rightStart, key.End)] = new ProhibitionState
{
LastProbedUtc = DateTime.UtcNow,
SplitPending = key.End > rightStart,
};
}
// Both halves succeeded → entry is just removed. The parent prohibition is gone
// and the next normal scan can re-coalesce across the whole original range.
}
}
private async Task<bool> ProbeFailsAsync(IModbusTransport transport, byte fc, byte unit,
ushort start, ushort end, CancellationToken ct)
{
var qty = (ushort)(end - start + 1);
try
{
_ = await ReadRegisterBlockAsync(transport, unit, fc, start, qty, ct).ConfigureAwait(false);
return false;
}
catch (OperationCanceledException) when (ct.IsCancellationRequested) { throw; }
catch { return true; }
}
/// <summary>
/// #143 block-read coalescing planner. Groups eligible tags by (UnitId, Region), sorts
/// by start address, and merges adjacent / near-adjacent (gap ≤ MaxReadGap) into single

View File

@@ -0,0 +1,171 @@
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests;
/// <summary>
/// #150 — bisection-style range narrowing for coalescing prohibitions. After a coalesced
/// read fails, the re-probe loop bisects the prohibited range over multiple ticks until
/// it pinpoints the actual protected register(s). Healthy halves get cleared as the
/// bisection narrows.
/// </summary>
[Trait("Category", "Unit")]
public sealed class ModbusCoalescingBisectionTests
{
/// <summary>
/// Programmable transport like the one in ModbusCoalescingAutoRecoveryTests but local
/// to keep this test file standalone — having the protection model live next to the
/// bisection assertions makes the test intent easier to read.
/// </summary>
private sealed class ProtectedHoleTransport : IModbusTransport
{
public ushort ProtectedAddress { get; set; } = ushort.MaxValue;
public Task ConnectAsync(CancellationToken ct) => Task.CompletedTask;
public Task<byte[]> SendAsync(byte unitId, byte[] pdu, CancellationToken ct)
{
var addr = (ushort)((pdu[1] << 8) | pdu[2]);
var qty = (ushort)((pdu[3] << 8) | pdu[4]);
if (pdu[0] is 0x03 or 0x04 && ProtectedAddress >= addr && ProtectedAddress < addr + qty)
return Task.FromException<byte[]>(new ModbusException(pdu[0], 0x02, "IllegalDataAddress"));
switch (pdu[0])
{
case 0x03: case 0x04:
{
var resp = new byte[2 + qty * 2];
resp[0] = pdu[0]; resp[1] = (byte)(qty * 2);
return Task.FromResult(resp);
}
default: return Task.FromResult(new byte[] { pdu[0], 0, 0 });
}
}
public ValueTask DisposeAsync() => ValueTask.CompletedTask;
}
[Fact]
public async Task Bisection_Narrows_Multi_Register_Prohibition_Per_Reprobe()
{
var fake = new ProtectedHoleTransport { ProtectedAddress = 105 };
// 11 tags 100..110 with MaxReadGap=10 → coalesce into one block 100..110. The protected
// register is in the middle (105). After the first failure the planner records the
// full 100..110 range as split-pending. Each subsequent re-probe bisects until the
// prohibition is pinned at register 105.
var tags = Enumerable.Range(100, 11)
.Select(i => new ModbusTagDefinition($"T{i}", ModbusRegion.HoldingRegisters, (ushort)i, ModbusDataType.Int16))
.ToArray();
var opts = new ModbusDriverOptions { Host = "f", Tags = tags, MaxReadGap = 10,
AutoProhibitReprobeInterval = TimeSpan.FromMilliseconds(100),
Probe = new ModbusProbeOptions { Enabled = false } };
var drv = new ModbusDriver(opts, "m1", _ => fake);
await drv.InitializeAsync("{}", CancellationToken.None);
await drv.ReadAsync(tags.Select(t => t.Name).ToArray(), CancellationToken.None);
// Initial prohibition: full 100..110 range, split-pending.
drv.AutoProhibitedRangeCount.ShouldBe(1);
// Re-probe pass 1: bisect 100..110 → mid=105 → left=100..105 (fails because 105 is
// protected), right=106..110 (succeeds). Result: prohibition collapses to 100..105.
await drv.RunReprobeOnceForTestAsync(CancellationToken.None);
drv.AutoProhibitedRangeCount.ShouldBe(1, "after pass 1 the prohibition narrows but doesn't disappear");
// Re-probe pass 2: bisect 100..105 → mid=102 → left=100..102 (succeeds), right=103..105 (fails).
// Result: prohibition collapses to 103..105.
await drv.RunReprobeOnceForTestAsync(CancellationToken.None);
// Re-probe pass 3: bisect 103..105 → mid=104 → left=103..104 (succeeds), right=105..105 (fails).
// Result: prohibition collapses to 105..105 (single register, no longer split-pending).
await drv.RunReprobeOnceForTestAsync(CancellationToken.None);
drv.AutoProhibitedRangeCount.ShouldBe(1, "single-register prohibition stays after bisection terminates");
// Re-probe pass 4: 105..105 is single-register; straight-retry path. Still fails;
// prohibition stays.
await drv.RunReprobeOnceForTestAsync(CancellationToken.None);
drv.AutoProhibitedRangeCount.ShouldBe(1);
await drv.ShutdownAsync(CancellationToken.None);
}
[Fact]
public async Task Bisection_Clears_When_Both_Halves_Are_Healthy()
{
// Transient failure scenario: range failed once, but by the next re-probe the PLC has
// unlocked it. Bisection of (100..110) returns success on both halves → entry removed
// entirely.
var fake = new ProtectedHoleTransport { ProtectedAddress = 105 };
var tags = Enumerable.Range(100, 11)
.Select(i => new ModbusTagDefinition($"T{i}", ModbusRegion.HoldingRegisters, (ushort)i, ModbusDataType.Int16))
.ToArray();
var opts = new ModbusDriverOptions { Host = "f", Tags = tags, MaxReadGap = 10,
AutoProhibitReprobeInterval = TimeSpan.FromMilliseconds(100),
Probe = new ModbusProbeOptions { Enabled = false } };
var drv = new ModbusDriver(opts, "m1", _ => fake);
await drv.InitializeAsync("{}", CancellationToken.None);
await drv.ReadAsync(tags.Select(t => t.Name).ToArray(), CancellationToken.None);
drv.AutoProhibitedRangeCount.ShouldBe(1);
// Operator unlocks the protected register before the re-probe runs.
fake.ProtectedAddress = ushort.MaxValue;
await drv.RunReprobeOnceForTestAsync(CancellationToken.None);
drv.AutoProhibitedRangeCount.ShouldBe(0, "both bisected halves succeed → parent prohibition cleared entirely");
await drv.ShutdownAsync(CancellationToken.None);
}
[Fact]
public async Task Bisection_Splits_Into_Two_When_Both_Halves_Still_Fail()
{
// Two protected registers in the same coalesced range: 102 and 108. After bisection,
// both halves of the original (100..110) range still contain a protected address
// (left=100..105 contains 102, right=106..110 contains 108). The prohibition replaces
// the parent with TWO smaller split-pending entries.
var fake = new ProtectedHoleTransport();
// Build a more elaborate transport that protects two addresses.
var twoHole = new TwoHoleTransport { ProtectedAddresses = { 102, 108 } };
var tags = Enumerable.Range(100, 11)
.Select(i => new ModbusTagDefinition($"T{i}", ModbusRegion.HoldingRegisters, (ushort)i, ModbusDataType.Int16))
.ToArray();
var opts = new ModbusDriverOptions { Host = "f", Tags = tags, MaxReadGap = 10,
AutoProhibitReprobeInterval = TimeSpan.FromMilliseconds(100),
Probe = new ModbusProbeOptions { Enabled = false } };
var drv = new ModbusDriver(opts, "m1", _ => twoHole);
await drv.InitializeAsync("{}", CancellationToken.None);
await drv.ReadAsync(tags.Select(t => t.Name).ToArray(), CancellationToken.None);
drv.AutoProhibitedRangeCount.ShouldBe(1);
// Re-probe: bisect 100..110 at mid=105 → left=100..105 (contains 102, fails),
// right=106..110 (contains 108, fails). Result: TWO entries in place of the parent.
await drv.RunReprobeOnceForTestAsync(CancellationToken.None);
drv.AutoProhibitedRangeCount.ShouldBe(2, "both halves still fail → prohibition splits into two");
await drv.ShutdownAsync(CancellationToken.None);
}
private sealed class TwoHoleTransport : IModbusTransport
{
public readonly HashSet<ushort> ProtectedAddresses = new();
public Task ConnectAsync(CancellationToken ct) => Task.CompletedTask;
public Task<byte[]> SendAsync(byte unitId, byte[] pdu, CancellationToken ct)
{
var addr = (ushort)((pdu[1] << 8) | pdu[2]);
var qty = (ushort)((pdu[3] << 8) | pdu[4]);
if (pdu[0] is 0x03 or 0x04)
for (var i = 0; i < qty; i++)
if (ProtectedAddresses.Contains((ushort)(addr + i)))
return Task.FromException<byte[]>(new ModbusException(pdu[0], 0x02, "IllegalDataAddress"));
switch (pdu[0])
{
case 0x03: case 0x04:
{
var resp = new byte[2 + qty * 2];
resp[0] = pdu[0]; resp[1] = (byte)(qty * 2);
return Task.FromResult(resp);
}
default: return Task.FromResult(new byte[] { pdu[0], 0, 0 });
}
}
public ValueTask DisposeAsync() => ValueTask.CompletedTask;
}
}