Task #151 — Modbus coalescing: periodic re-probe of auto-prohibitions

#148 introduced auto-prohibited coalesced ranges that persist for the
driver lifetime. Long-running deployments with transient PLC permission
changes (firmware update unlocking a previously-protected register,
operator reconfiguring the device) had no recovery short of operator
restart.

Adds an opt-in background loop that re-probes each prohibition periodically:

- ModbusDriverOptions.AutoProhibitReprobeInterval (TimeSpan?, default null
  = disabled). Set to e.g. TimeSpan.FromHours(1) to opt in.
- _autoProhibited refactored from HashSet<key> to Dictionary<key, DateTime>
  so each entry tracks its last failure / last re-probe timestamp.
- ReprobeLoopAsync runs on the same Task.Run pattern as ProbeLoopAsync;
  cancelled by ShutdownAsync. Each tick snapshots the prohibition set
  and issues a one-shot coalesced read per range. Successful re-probes
  drop the prohibition; failed ones bump the timestamp + leave the
  prohibition in place.
- Communication failures during re-probe (transport-level) are treated
  the same as PLC-exception failures — the prohibition stays, but isn't
  upgraded to "permanent" since transports recover. The driver-instance
  health surface picks up the failure separately.
- ShutdownAsync explicitly clears the prohibition set so a manual restart
  via ReinitializeAsync starts with a clean slate (matches the old
  "restart to clear" semantics).
- Factory DTO + JSON binding extended with AutoProhibitReprobeMs field.

Tests (2 new, additive to the 3 in ModbusCoalescingAutoRecoveryTests):
- Reprobe_Clears_Prohibition_When_Range_Becomes_Healthy — protected
  register at 102 records prohibition; clearing the simulated protection
  + invoking the re-probe drops the prohibition.
- Reprobe_Leaves_Prohibition_When_Range_Is_Still_Bad — re-probe on a
  still-failing range keeps the prohibition in place.

Tests use a new internal RunReprobeOnceForTestAsync helper to fire one
re-probe pass synchronously, so the suite doesn't have to wait on the
background timer (the loop's timer behaviour is exercised implicitly via
the InitializeAsync wire-up + the synchronous helper sharing the actual
re-probe code path).

234 + 2 = 236 unit tests green.
This commit is contained in:
Joseph Doherty
2026-04-25 01:12:48 -04:00
parent 8de152df4f
commit 9e4aae350b
4 changed files with 159 additions and 3 deletions

View File

@@ -136,6 +136,13 @@ public sealed class ModbusDriver
_probeCts = new CancellationTokenSource();
_ = Task.Run(() => ProbeLoopAsync(_probeCts.Token), _probeCts.Token);
}
// #151 — start the auto-prohibition re-probe loop when the operator opted in.
if (_options.AutoProhibitReprobeInterval is not null)
{
_reprobeCts = new CancellationTokenSource();
_ = Task.Run(() => ReprobeLoopAsync(_reprobeCts.Token), _reprobeCts.Token);
}
}
catch (Exception ex)
{
@@ -156,6 +163,15 @@ public sealed class ModbusDriver
_probeCts?.Dispose();
_probeCts = null;
try { _reprobeCts?.Cancel(); } catch { }
_reprobeCts?.Dispose();
_reprobeCts = null;
// #151 — clear the prohibition set on shutdown so an explicit operator restart
// (ReinitializeAsync) starts with a clean slate. The re-probe loop already retries
// automatically when enabled; the restart path is the manual escape hatch.
lock (_autoProhibitedLock) _autoProhibited.Clear();
await _poll.DisposeAsync().ConfigureAwait(false);
if (_transport is not null) await _transport.DisposeAsync().ConfigureAwait(false);
@@ -393,14 +409,15 @@ public sealed class ModbusDriver
/// Cleared by ReinitializeAsync (operator restart) or by an explicit re-probe API
/// (not yet shipped).
/// </summary>
private readonly HashSet<(byte Unit, ModbusRegion Region, ushort Start, ushort End)> _autoProhibited = new();
private readonly Dictionary<(byte Unit, ModbusRegion Region, ushort Start, ushort End), DateTime> _autoProhibited = new();
private readonly object _autoProhibitedLock = new();
private CancellationTokenSource? _reprobeCts;
private bool RangeIsAutoProhibited(byte unit, ModbusRegion region, ushort start, ushort end)
{
lock (_autoProhibitedLock)
{
foreach (var p in _autoProhibited)
foreach (var p in _autoProhibited.Keys)
{
// A candidate (start..end) range is prohibited if it overlaps any recorded
// failure. Overlap rule: max-start ≤ min-end. We don't try to be smart about
@@ -414,7 +431,7 @@ public sealed class ModbusDriver
private void RecordAutoProhibition(byte unit, ModbusRegion region, ushort start, ushort end)
{
lock (_autoProhibitedLock) _autoProhibited.Add((unit, region, start, end));
lock (_autoProhibitedLock) _autoProhibited[(unit, region, start, end)] = DateTime.UtcNow;
}
/// <summary>Test/diagnostic accessor — returns the current auto-prohibited range count.</summary>
@@ -423,6 +440,81 @@ public sealed class ModbusDriver
get { lock (_autoProhibitedLock) return _autoProhibited.Count; }
}
/// <summary>
/// #151 — periodic re-probe loop. Wakes every <c>AutoProhibitReprobeInterval</c> and
/// retries each auto-prohibited range with a one-shot coalesced read. Successful
/// re-probes drop the prohibition; failed ones leave it in place + bump the
/// last-probed timestamp so the next attempt waits another full interval.
/// Lives for the driver lifetime; cancelled by <c>ShutdownAsync</c>.
/// </summary>
private async Task ReprobeLoopAsync(CancellationToken ct)
{
var interval = _options.AutoProhibitReprobeInterval!.Value;
var transport = _transport;
while (!ct.IsCancellationRequested)
{
try { await Task.Delay(interval, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { return; }
if (transport is null) continue;
// Snapshot the prohibition set so we can release the lock during the wire calls.
(byte Unit, ModbusRegion Region, ushort Start, ushort End)[] candidates;
lock (_autoProhibitedLock)
candidates = _autoProhibited.Keys.ToArray();
foreach (var p in candidates)
{
if (ct.IsCancellationRequested) return;
var fc = p.Region == ModbusRegion.HoldingRegisters ? (byte)0x03 : (byte)0x04;
var qty = (ushort)(p.End - p.Start + 1);
try
{
using var probeCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
probeCts.CancelAfter(_options.Timeout);
_ = await ReadRegisterBlockAsync(transport, p.Unit, fc, p.Start, qty, probeCts.Token).ConfigureAwait(false);
// Range is healthy now — drop the prohibition. Next data scan re-coalesces normally.
lock (_autoProhibitedLock) _autoProhibited.Remove(p);
}
catch (OperationCanceledException) when (ct.IsCancellationRequested) { return; }
catch
{
// Still bad. Bump the timestamp so it shows up on diagnostics as recently
// re-probed — the prohibition stays in place.
lock (_autoProhibitedLock)
{
if (_autoProhibited.ContainsKey(p))
_autoProhibited[p] = DateTime.UtcNow;
}
}
}
}
}
/// <summary>Test/diagnostic accessor — fires one re-probe pass synchronously for tests.</summary>
internal async Task RunReprobeOnceForTestAsync(CancellationToken ct)
{
var transport = _transport ?? throw new InvalidOperationException("Transport not connected");
(byte Unit, ModbusRegion Region, ushort Start, ushort End)[] candidates;
lock (_autoProhibitedLock) candidates = _autoProhibited.Keys.ToArray();
foreach (var p in candidates)
{
var fc = p.Region == ModbusRegion.HoldingRegisters ? (byte)0x03 : (byte)0x04;
var qty = (ushort)(p.End - p.Start + 1);
try
{
_ = await ReadRegisterBlockAsync(transport, p.Unit, fc, p.Start, qty, ct).ConfigureAwait(false);
lock (_autoProhibitedLock) _autoProhibited.Remove(p);
}
catch
{
lock (_autoProhibitedLock)
if (_autoProhibited.ContainsKey(p))
_autoProhibited[p] = DateTime.UtcNow;
}
}
}
/// <summary>
/// #143 block-read coalescing planner. Groups eligible tags by (UnitId, Region), sorts
/// by start address, and merges adjacent / near-adjacent (gap ≤ MaxReadGap) into single

View File

@@ -50,6 +50,7 @@ public static class ModbusDriverFactoryExtensions
: ParseEnum<ModbusFamily>(dto.Family, "<driver-level>", driverInstanceId, "Family"),
MelsecSubFamily = dto.MelsecSubFamily is null ? MelsecFamily.Q_L_iQR
: ParseEnum<MelsecFamily>(dto.MelsecSubFamily, "<driver-level>", driverInstanceId, "MelsecSubFamily"),
AutoProhibitReprobeInterval = dto.AutoProhibitReprobeMs is { } reprobeMs ? TimeSpan.FromMilliseconds(reprobeMs) : null,
AutoReconnect = dto.AutoReconnect ?? true,
Tags = dto.Tags is { Count: > 0 }
? [.. dto.Tags.Select(t => BuildTag(
@@ -175,6 +176,7 @@ public static class ModbusDriverFactoryExtensions
public bool? WriteOnChangeOnly { get; init; }
public string? Family { get; init; }
public string? MelsecSubFamily { get; init; }
public int? AutoProhibitReprobeMs { get; init; }
public bool? AutoReconnect { get; init; }
public List<ModbusTagDto>? Tags { get; init; }
public ModbusProbeDto? Probe { get; init; }

View File

@@ -79,6 +79,16 @@ public sealed class ModbusDriverOptions
/// </summary>
public bool DisableFC23 { get; init; } = false;
/// <summary>
/// #151 — interval for the background re-probe loop that retries auto-prohibited
/// coalesced ranges (#148). When non-null, every <c>AutoProhibitReprobeInterval</c>
/// the driver attempts each prohibition's coalesced read once. If the re-probe
/// succeeds, the prohibition clears and the planner resumes coalescing across the
/// range on the next scan. Default <c>null</c> = re-probe disabled (prohibitions
/// persist until <c>ReinitializeAsync</c>; preserves pre-#151 behaviour).
/// </summary>
public TimeSpan? AutoProhibitReprobeInterval { get; init; } = null;
/// <summary>
/// Block-read coalescing budget (#143). When non-zero, the read planner combines tags
/// in the same (UnitId, Region) group whose addresses are at most this many registers