Task #151 — Modbus coalescing: periodic re-probe of auto-prohibitions
#148 introduced auto-prohibited coalesced ranges that persist for the driver lifetime. Long-running deployments with transient PLC permission changes (firmware update unlocking a previously-protected register, operator reconfiguring the device) had no recovery short of operator restart. Adds an opt-in background loop that re-probes each prohibition periodically: - ModbusDriverOptions.AutoProhibitReprobeInterval (TimeSpan?, default null = disabled). Set to e.g. TimeSpan.FromHours(1) to opt in. - _autoProhibited refactored from HashSet<key> to Dictionary<key, DateTime> so each entry tracks its last failure / last re-probe timestamp. - ReprobeLoopAsync runs on the same Task.Run pattern as ProbeLoopAsync; cancelled by ShutdownAsync. Each tick snapshots the prohibition set and issues a one-shot coalesced read per range. Successful re-probes drop the prohibition; failed ones bump the timestamp + leave the prohibition in place. - Communication failures during re-probe (transport-level) are treated the same as PLC-exception failures — the prohibition stays, but isn't upgraded to "permanent" since transports recover. The driver-instance health surface picks up the failure separately. - ShutdownAsync explicitly clears the prohibition set so a manual restart via ReinitializeAsync starts with a clean slate (matches the old "restart to clear" semantics). - Factory DTO + JSON binding extended with AutoProhibitReprobeMs field. Tests (2 new, additive to the 3 in ModbusCoalescingAutoRecoveryTests): - Reprobe_Clears_Prohibition_When_Range_Becomes_Healthy — protected register at 102 records prohibition; clearing the simulated protection + invoking the re-probe drops the prohibition. - Reprobe_Leaves_Prohibition_When_Range_Is_Still_Bad — re-probe on a still-failing range keeps the prohibition in place. Tests use a new internal RunReprobeOnceForTestAsync helper to fire one re-probe pass synchronously, so the suite doesn't have to wait on the background timer (the loop's timer behaviour is exercised implicitly via the InitializeAsync wire-up + the synchronous helper sharing the actual re-probe code path). 234 + 2 = 236 unit tests green.
This commit is contained in:
@@ -136,6 +136,13 @@ public sealed class ModbusDriver
|
||||
_probeCts = new CancellationTokenSource();
|
||||
_ = Task.Run(() => ProbeLoopAsync(_probeCts.Token), _probeCts.Token);
|
||||
}
|
||||
|
||||
// #151 — start the auto-prohibition re-probe loop when the operator opted in.
|
||||
if (_options.AutoProhibitReprobeInterval is not null)
|
||||
{
|
||||
_reprobeCts = new CancellationTokenSource();
|
||||
_ = Task.Run(() => ReprobeLoopAsync(_reprobeCts.Token), _reprobeCts.Token);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
@@ -156,6 +163,15 @@ public sealed class ModbusDriver
|
||||
_probeCts?.Dispose();
|
||||
_probeCts = null;
|
||||
|
||||
try { _reprobeCts?.Cancel(); } catch { }
|
||||
_reprobeCts?.Dispose();
|
||||
_reprobeCts = null;
|
||||
|
||||
// #151 — clear the prohibition set on shutdown so an explicit operator restart
|
||||
// (ReinitializeAsync) starts with a clean slate. The re-probe loop already retries
|
||||
// automatically when enabled; the restart path is the manual escape hatch.
|
||||
lock (_autoProhibitedLock) _autoProhibited.Clear();
|
||||
|
||||
await _poll.DisposeAsync().ConfigureAwait(false);
|
||||
|
||||
if (_transport is not null) await _transport.DisposeAsync().ConfigureAwait(false);
|
||||
@@ -393,14 +409,15 @@ public sealed class ModbusDriver
|
||||
/// Cleared by ReinitializeAsync (operator restart) or by an explicit re-probe API
|
||||
/// (not yet shipped).
|
||||
/// </summary>
|
||||
private readonly HashSet<(byte Unit, ModbusRegion Region, ushort Start, ushort End)> _autoProhibited = new();
|
||||
private readonly Dictionary<(byte Unit, ModbusRegion Region, ushort Start, ushort End), DateTime> _autoProhibited = new();
|
||||
private readonly object _autoProhibitedLock = new();
|
||||
private CancellationTokenSource? _reprobeCts;
|
||||
|
||||
private bool RangeIsAutoProhibited(byte unit, ModbusRegion region, ushort start, ushort end)
|
||||
{
|
||||
lock (_autoProhibitedLock)
|
||||
{
|
||||
foreach (var p in _autoProhibited)
|
||||
foreach (var p in _autoProhibited.Keys)
|
||||
{
|
||||
// A candidate (start..end) range is prohibited if it overlaps any recorded
|
||||
// failure. Overlap rule: max-start ≤ min-end. We don't try to be smart about
|
||||
@@ -414,7 +431,7 @@ public sealed class ModbusDriver
|
||||
|
||||
private void RecordAutoProhibition(byte unit, ModbusRegion region, ushort start, ushort end)
|
||||
{
|
||||
lock (_autoProhibitedLock) _autoProhibited.Add((unit, region, start, end));
|
||||
lock (_autoProhibitedLock) _autoProhibited[(unit, region, start, end)] = DateTime.UtcNow;
|
||||
}
|
||||
|
||||
/// <summary>Test/diagnostic accessor — returns the current auto-prohibited range count.</summary>
|
||||
@@ -423,6 +440,81 @@ public sealed class ModbusDriver
|
||||
get { lock (_autoProhibitedLock) return _autoProhibited.Count; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// #151 — periodic re-probe loop. Wakes every <c>AutoProhibitReprobeInterval</c> and
|
||||
/// retries each auto-prohibited range with a one-shot coalesced read. Successful
|
||||
/// re-probes drop the prohibition; failed ones leave it in place + bump the
|
||||
/// last-probed timestamp so the next attempt waits another full interval.
|
||||
/// Lives for the driver lifetime; cancelled by <c>ShutdownAsync</c>.
|
||||
/// </summary>
|
||||
private async Task ReprobeLoopAsync(CancellationToken ct)
|
||||
{
|
||||
var interval = _options.AutoProhibitReprobeInterval!.Value;
|
||||
var transport = _transport;
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
try { await Task.Delay(interval, ct).ConfigureAwait(false); }
|
||||
catch (OperationCanceledException) { return; }
|
||||
|
||||
if (transport is null) continue;
|
||||
|
||||
// Snapshot the prohibition set so we can release the lock during the wire calls.
|
||||
(byte Unit, ModbusRegion Region, ushort Start, ushort End)[] candidates;
|
||||
lock (_autoProhibitedLock)
|
||||
candidates = _autoProhibited.Keys.ToArray();
|
||||
|
||||
foreach (var p in candidates)
|
||||
{
|
||||
if (ct.IsCancellationRequested) return;
|
||||
var fc = p.Region == ModbusRegion.HoldingRegisters ? (byte)0x03 : (byte)0x04;
|
||||
var qty = (ushort)(p.End - p.Start + 1);
|
||||
try
|
||||
{
|
||||
using var probeCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
probeCts.CancelAfter(_options.Timeout);
|
||||
_ = await ReadRegisterBlockAsync(transport, p.Unit, fc, p.Start, qty, probeCts.Token).ConfigureAwait(false);
|
||||
// Range is healthy now — drop the prohibition. Next data scan re-coalesces normally.
|
||||
lock (_autoProhibitedLock) _autoProhibited.Remove(p);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested) { return; }
|
||||
catch
|
||||
{
|
||||
// Still bad. Bump the timestamp so it shows up on diagnostics as recently
|
||||
// re-probed — the prohibition stays in place.
|
||||
lock (_autoProhibitedLock)
|
||||
{
|
||||
if (_autoProhibited.ContainsKey(p))
|
||||
_autoProhibited[p] = DateTime.UtcNow;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Test/diagnostic accessor — fires one re-probe pass synchronously for tests.</summary>
|
||||
internal async Task RunReprobeOnceForTestAsync(CancellationToken ct)
|
||||
{
|
||||
var transport = _transport ?? throw new InvalidOperationException("Transport not connected");
|
||||
(byte Unit, ModbusRegion Region, ushort Start, ushort End)[] candidates;
|
||||
lock (_autoProhibitedLock) candidates = _autoProhibited.Keys.ToArray();
|
||||
foreach (var p in candidates)
|
||||
{
|
||||
var fc = p.Region == ModbusRegion.HoldingRegisters ? (byte)0x03 : (byte)0x04;
|
||||
var qty = (ushort)(p.End - p.Start + 1);
|
||||
try
|
||||
{
|
||||
_ = await ReadRegisterBlockAsync(transport, p.Unit, fc, p.Start, qty, ct).ConfigureAwait(false);
|
||||
lock (_autoProhibitedLock) _autoProhibited.Remove(p);
|
||||
}
|
||||
catch
|
||||
{
|
||||
lock (_autoProhibitedLock)
|
||||
if (_autoProhibited.ContainsKey(p))
|
||||
_autoProhibited[p] = DateTime.UtcNow;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// #143 block-read coalescing planner. Groups eligible tags by (UnitId, Region), sorts
|
||||
/// by start address, and merges adjacent / near-adjacent (gap ≤ MaxReadGap) into single
|
||||
|
||||
@@ -50,6 +50,7 @@ public static class ModbusDriverFactoryExtensions
|
||||
: ParseEnum<ModbusFamily>(dto.Family, "<driver-level>", driverInstanceId, "Family"),
|
||||
MelsecSubFamily = dto.MelsecSubFamily is null ? MelsecFamily.Q_L_iQR
|
||||
: ParseEnum<MelsecFamily>(dto.MelsecSubFamily, "<driver-level>", driverInstanceId, "MelsecSubFamily"),
|
||||
AutoProhibitReprobeInterval = dto.AutoProhibitReprobeMs is { } reprobeMs ? TimeSpan.FromMilliseconds(reprobeMs) : null,
|
||||
AutoReconnect = dto.AutoReconnect ?? true,
|
||||
Tags = dto.Tags is { Count: > 0 }
|
||||
? [.. dto.Tags.Select(t => BuildTag(
|
||||
@@ -175,6 +176,7 @@ public static class ModbusDriverFactoryExtensions
|
||||
public bool? WriteOnChangeOnly { get; init; }
|
||||
public string? Family { get; init; }
|
||||
public string? MelsecSubFamily { get; init; }
|
||||
public int? AutoProhibitReprobeMs { get; init; }
|
||||
public bool? AutoReconnect { get; init; }
|
||||
public List<ModbusTagDto>? Tags { get; init; }
|
||||
public ModbusProbeDto? Probe { get; init; }
|
||||
|
||||
@@ -79,6 +79,16 @@ public sealed class ModbusDriverOptions
|
||||
/// </summary>
|
||||
public bool DisableFC23 { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// #151 — interval for the background re-probe loop that retries auto-prohibited
|
||||
/// coalesced ranges (#148). When non-null, every <c>AutoProhibitReprobeInterval</c>
|
||||
/// the driver attempts each prohibition's coalesced read once. If the re-probe
|
||||
/// succeeds, the prohibition clears and the planner resumes coalescing across the
|
||||
/// range on the next scan. Default <c>null</c> = re-probe disabled (prohibitions
|
||||
/// persist until <c>ReinitializeAsync</c>; preserves pre-#151 behaviour).
|
||||
/// </summary>
|
||||
public TimeSpan? AutoProhibitReprobeInterval { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Block-read coalescing budget (#143). When non-zero, the read planner combines tags
|
||||
/// in the same (UnitId, Region) group whose addresses are at most this many registers
|
||||
|
||||
Reference in New Issue
Block a user