Files
lmxopcua/src/ZB.MOM.WW.OtOpcUa.Driver.S7/S7Driver.cs
Joseph Doherty f3850f8914 Phase 6.1 Stream A.5/A.6 — WriteIdempotent flag on DriverAttributeInfo + Modbus/S7 tag records + FlakeyDriver integration tests
Per-tag opt-in for write-retry per docs/v2/plan.md decisions #44, #45, #143.
Default is false — writes never auto-retry unless the driver author has marked
the tag as safe to replay.

Core.Abstractions:
- DriverAttributeInfo gains `bool WriteIdempotent = false` at the end of the
  positional record (back-compatible; every existing call site uses the default).

Driver.Modbus:
- ModbusTagDefinition gains `bool WriteIdempotent = false`. Safe candidates
  documented in the param XML: holding-register set-points, configuration
  registers. Unsafe: edge-triggered coils, counter-increment addresses.
- ModbusDriver.DiscoverAsync propagates t.WriteIdempotent into
  DriverAttributeInfo.WriteIdempotent.

Driver.S7:
- S7TagDefinition gains `bool WriteIdempotent = false`. Safe candidates:
  DB word/dword set-points, configuration DBs. Unsafe: M/Q bits that drive
  edge-triggered program routines.
- S7Driver.DiscoverAsync propagates the flag.

Stream A.5 integration tests (FlakeyDriverIntegrationTests, 4 new) exercise
the invoker + flaky-driver contract the plan enumerates:
- Read with 5 transient failures succeeds on the 6th attempt (RetryCount=10).
- Non-idempotent write with RetryCount=5 configured still fails on the first
  failure — no replay (decision #44 guard at the ExecuteWriteAsync surface).
- Idempotent write with 2 transient failures succeeds on the 3rd attempt.
- Two hosts on the same driver have independent breakers — dead-host trips
  its breaker but live-host's first call still succeeds.

Propagation tests:
- ModbusDriverTests: SetPoint WriteIdempotent=true flows into
  DriverAttributeInfo; PulseCoil default=false.
- S7DiscoveryAndSubscribeTests: same pattern for DBx SetPoint vs M-bit.

Full solution dotnet test: 947 passing (baseline 906, +41 net across Stream A
so far). Pre-existing Client.CLI Subscribe flake unchanged.

Stream A's remaining work (wiring CapabilityInvoker into DriverNodeManager's
OnReadValue / OnWriteValue / History / Subscribe dispatch paths) is the
server-side integration piece + needs DI wiring for the pipeline builder —
lands in the next PR on this branch.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 07:16:21 -04:00

515 lines
23 KiB
C#

using S7.Net;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Driver.S7;
/// <summary>
/// Siemens S7 native driver — speaks S7comm over ISO-on-TCP (port 102) via the S7netplus
/// library. First implementation of <see cref="IDriver"/> for an in-process .NET Standard
/// PLC protocol that is NOT Modbus, validating that the v2 driver-capability interfaces
/// generalize beyond Modbus + Galaxy.
/// </summary>
/// <remarks>
/// <para>
/// PR 62 ships the scaffold: <see cref="IDriver"/> only (Initialize / Reinitialize /
/// Shutdown / GetHealth). <see cref="ITagDiscovery"/>, <see cref="IReadable"/>,
/// <see cref="IWritable"/>, <see cref="ISubscribable"/>, <see cref="IHostConnectivityProbe"/>
/// land in PRs 63-65 once the address parser (PR 63) is in place.
/// </para>
/// <para>
/// <b>Single-connection policy</b>: S7netplus documented pattern is one
/// <c>Plc</c> instance per PLC, serialized with a <see cref="SemaphoreSlim"/>.
/// Parallelising reads against a single S7 CPU doesn't help — the CPU scans the
/// communication mailbox at most once per cycle (2-10 ms) and queues concurrent
/// requests wire-side anyway. Multiple client-side connections just waste the CPU's
/// 8-64 connection-resource budget.
/// </para>
/// </remarks>
public sealed class S7Driver(S7DriverOptions options, string driverInstanceId)
: IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IDisposable, IAsyncDisposable
{
// ---- ISubscribable + IHostConnectivityProbe state ----
private readonly System.Collections.Concurrent.ConcurrentDictionary<long, SubscriptionState> _subscriptions = new();
private long _nextSubscriptionId;
private readonly object _probeLock = new();
private HostState _hostState = HostState.Unknown;
private DateTime _hostStateChangedUtc = DateTime.UtcNow;
private CancellationTokenSource? _probeCts;
public event EventHandler<DataChangeEventArgs>? OnDataChange;
public event EventHandler<HostStatusChangedEventArgs>? OnHostStatusChanged;
/// <summary>OPC UA StatusCode used when the tag name isn't in the driver's tag map.</summary>
private const uint StatusBadNodeIdUnknown = 0x80340000u;
/// <summary>OPC UA StatusCode used when the tag's data type isn't implemented yet.</summary>
private const uint StatusBadNotSupported = 0x803D0000u;
/// <summary>OPC UA StatusCode used when the tag is declared read-only.</summary>
private const uint StatusBadNotWritable = 0x803B0000u;
/// <summary>OPC UA StatusCode used when write fails validation (e.g. out-of-range value).</summary>
private const uint StatusBadInternalError = 0x80020000u;
/// <summary>OPC UA StatusCode used for socket / timeout / protocol-layer faults.</summary>
private const uint StatusBadCommunicationError = 0x80050000u;
/// <summary>OPC UA StatusCode used when S7 returns <c>ErrorCode.WrongCPU</c> / PUT/GET disabled.</summary>
private const uint StatusBadDeviceFailure = 0x80550000u;
private readonly Dictionary<string, S7TagDefinition> _tagsByName = new(StringComparer.OrdinalIgnoreCase);
private readonly Dictionary<string, S7ParsedAddress> _parsedByName = new(StringComparer.OrdinalIgnoreCase);
private readonly S7DriverOptions _options = options;
private readonly SemaphoreSlim _gate = new(1, 1);
/// <summary>
/// Per-connection gate. Internal so PRs 63-65 (read/write/subscribe) can serialize on
/// the same semaphore without exposing it publicly. Single-connection-per-PLC is a
/// hard requirement of S7netplus — see class remarks.
/// </summary>
internal SemaphoreSlim Gate => _gate;
/// <summary>
/// Active S7.Net PLC connection. Null until <see cref="InitializeAsync"/> returns; null
/// after <see cref="ShutdownAsync"/>. Read-only outside this class; PR 64's Read/Write
/// will take the <see cref="_gate"/> before touching it.
/// </summary>
internal Plc? Plc { get; private set; }
private DriverHealth _health = new(DriverState.Unknown, null, null);
private bool _disposed;
public string DriverInstanceId => driverInstanceId;
public string DriverType => "S7";
public async Task InitializeAsync(string driverConfigJson, CancellationToken cancellationToken)
{
_health = new DriverHealth(DriverState.Initializing, null, null);
try
{
var plc = new Plc(_options.CpuType, _options.Host, _options.Rack, _options.Slot);
// S7netplus writes timeouts into the underlying TcpClient via Plc.WriteTimeout /
// Plc.ReadTimeout (milliseconds). Set before OpenAsync so the handshake itself
// honours the bound.
plc.WriteTimeout = (int)_options.Timeout.TotalMilliseconds;
plc.ReadTimeout = (int)_options.Timeout.TotalMilliseconds;
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
cts.CancelAfter(_options.Timeout);
await plc.OpenAsync(cts.Token).ConfigureAwait(false);
Plc = plc;
// Parse every tag's address once at init so config typos fail fast here instead
// of surfacing as BadInternalError on every Read against the bad tag. The parser
// also rejects bit-offset > 7, DB 0, unknown area letters, etc.
_tagsByName.Clear();
_parsedByName.Clear();
foreach (var t in _options.Tags)
{
var parsed = S7AddressParser.Parse(t.Address); // throws FormatException
_tagsByName[t.Name] = t;
_parsedByName[t.Name] = parsed;
}
_health = new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null);
// Kick off the probe loop once the connection is up. Initial HostState stays
// Unknown until the first probe tick succeeds — avoids broadcasting a premature
// Running transition before any PDU round-trip has happened.
if (_options.Probe.Enabled)
{
_probeCts = new CancellationTokenSource();
_ = Task.Run(() => ProbeLoopAsync(_probeCts.Token), _probeCts.Token);
}
}
catch (Exception ex)
{
// Clean up a partially-constructed Plc so a retry from the caller doesn't leak
// the TcpClient. S7netplus's Close() is best-effort and idempotent.
try { Plc?.Close(); } catch { }
Plc = null;
_health = new DriverHealth(DriverState.Faulted, null, ex.Message);
throw;
}
}
public async Task ReinitializeAsync(string driverConfigJson, CancellationToken cancellationToken)
{
await ShutdownAsync(cancellationToken).ConfigureAwait(false);
await InitializeAsync(driverConfigJson, cancellationToken).ConfigureAwait(false);
}
public Task ShutdownAsync(CancellationToken cancellationToken)
{
try { _probeCts?.Cancel(); } catch { }
_probeCts?.Dispose();
_probeCts = null;
foreach (var state in _subscriptions.Values)
{
try { state.Cts.Cancel(); } catch { }
state.Cts.Dispose();
}
_subscriptions.Clear();
try { Plc?.Close(); } catch { /* best-effort — tearing down anyway */ }
Plc = null;
_health = new DriverHealth(DriverState.Unknown, _health.LastSuccessfulRead, null);
return Task.CompletedTask;
}
public DriverHealth GetHealth() => _health;
/// <summary>
/// Approximate memory footprint. The Plc instance + one 240-960 byte PDU buffer is
/// under 4 KB; return 0 because the <see cref="IDriver"/> contract asks for a
/// driver-attributable growth number and S7.Net doesn't expose one.
/// </summary>
public long GetMemoryFootprint() => 0;
public Task FlushOptionalCachesAsync(CancellationToken cancellationToken) => Task.CompletedTask;
// ---- IReadable ----
public async Task<IReadOnlyList<DataValueSnapshot>> ReadAsync(
IReadOnlyList<string> fullReferences, CancellationToken cancellationToken)
{
var plc = RequirePlc();
var now = DateTime.UtcNow;
var results = new DataValueSnapshot[fullReferences.Count];
await _gate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
for (var i = 0; i < fullReferences.Count; i++)
{
var name = fullReferences[i];
if (!_tagsByName.TryGetValue(name, out var tag))
{
results[i] = new DataValueSnapshot(null, StatusBadNodeIdUnknown, null, now);
continue;
}
try
{
var value = await ReadOneAsync(plc, tag, cancellationToken).ConfigureAwait(false);
results[i] = new DataValueSnapshot(value, 0u, now, now);
_health = new DriverHealth(DriverState.Healthy, now, null);
}
catch (NotSupportedException)
{
results[i] = new DataValueSnapshot(null, StatusBadNotSupported, null, now);
}
catch (global::S7.Net.PlcException pex)
{
// S7.Net's PlcException carries an ErrorCode; PUT/GET-disabled on
// S7-1200/1500 surfaces here. Map to BadDeviceFailure so operators see a
// device-config problem (toggle PUT/GET in TIA Portal) rather than a
// transient fault — per driver-specs.md §5.
results[i] = new DataValueSnapshot(null, StatusBadDeviceFailure, null, now);
_health = new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, pex.Message);
}
catch (Exception ex)
{
results[i] = new DataValueSnapshot(null, StatusBadCommunicationError, null, now);
_health = new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, ex.Message);
}
}
}
finally { _gate.Release(); }
return results;
}
private async Task<object> ReadOneAsync(global::S7.Net.Plc plc, S7TagDefinition tag, CancellationToken ct)
{
var addr = _parsedByName[tag.Name];
// S7.Net's string-based ReadAsync returns object where the boxed .NET type depends on
// the size suffix: DBX=bool, DBB=byte, DBW=ushort, DBD=uint. Our S7DataType enum
// specifies the SEMANTIC type (Int16 vs UInt16 vs Float32 etc.); the reinterpret below
// converts the raw unsigned boxed value into the requested type without issuing an
// extra PLC round-trip.
var raw = await plc.ReadAsync(tag.Address, ct).ConfigureAwait(false)
?? throw new System.IO.InvalidDataException($"S7.Net returned null for '{tag.Address}'");
return (tag.DataType, addr.Size, raw) switch
{
(S7DataType.Bool, S7Size.Bit, bool b) => b,
(S7DataType.Byte, S7Size.Byte, byte by) => by,
(S7DataType.UInt16, S7Size.Word, ushort u16) => u16,
(S7DataType.Int16, S7Size.Word, ushort u16) => unchecked((short)u16),
(S7DataType.UInt32, S7Size.DWord, uint u32) => u32,
(S7DataType.Int32, S7Size.DWord, uint u32) => unchecked((int)u32),
(S7DataType.Float32, S7Size.DWord, uint u32) => BitConverter.UInt32BitsToSingle(u32),
(S7DataType.Int64, _, _) => throw new NotSupportedException("S7 Int64 reads land in a follow-up PR"),
(S7DataType.UInt64, _, _) => throw new NotSupportedException("S7 UInt64 reads land in a follow-up PR"),
(S7DataType.Float64, _, _) => throw new NotSupportedException("S7 Float64 (LReal) reads land in a follow-up PR"),
(S7DataType.String, _, _) => throw new NotSupportedException("S7 STRING reads land in a follow-up PR"),
(S7DataType.DateTime, _, _) => throw new NotSupportedException("S7 DateTime reads land in a follow-up PR"),
_ => throw new System.IO.InvalidDataException(
$"S7 Read type-mismatch: tag '{tag.Name}' declared {tag.DataType} but address '{tag.Address}' " +
$"parsed as Size={addr.Size}; S7.Net returned {raw.GetType().Name}"),
};
}
// ---- IWritable ----
public async Task<IReadOnlyList<WriteResult>> WriteAsync(
IReadOnlyList<WriteRequest> writes, CancellationToken cancellationToken)
{
var plc = RequirePlc();
var results = new WriteResult[writes.Count];
await _gate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
for (var i = 0; i < writes.Count; i++)
{
var w = writes[i];
if (!_tagsByName.TryGetValue(w.FullReference, out var tag))
{
results[i] = new WriteResult(StatusBadNodeIdUnknown);
continue;
}
if (!tag.Writable)
{
results[i] = new WriteResult(StatusBadNotWritable);
continue;
}
try
{
await WriteOneAsync(plc, tag, w.Value, cancellationToken).ConfigureAwait(false);
results[i] = new WriteResult(0u);
}
catch (NotSupportedException)
{
results[i] = new WriteResult(StatusBadNotSupported);
}
catch (global::S7.Net.PlcException)
{
results[i] = new WriteResult(StatusBadDeviceFailure);
}
catch (Exception)
{
results[i] = new WriteResult(StatusBadInternalError);
}
}
}
finally { _gate.Release(); }
return results;
}
private async Task WriteOneAsync(global::S7.Net.Plc plc, S7TagDefinition tag, object? value, CancellationToken ct)
{
// S7.Net's Plc.WriteAsync(string address, object value) expects the boxed value to
// match the address's size-suffix type: DBX=bool, DBB=byte, DBW=ushort, DBD=uint.
// Our S7DataType lets the caller pass short/int/float; convert to the unsigned
// wire representation before handing off.
var boxed = tag.DataType switch
{
S7DataType.Bool => (object)Convert.ToBoolean(value),
S7DataType.Byte => (object)Convert.ToByte(value),
S7DataType.UInt16 => (object)Convert.ToUInt16(value),
S7DataType.Int16 => (object)unchecked((ushort)Convert.ToInt16(value)),
S7DataType.UInt32 => (object)Convert.ToUInt32(value),
S7DataType.Int32 => (object)unchecked((uint)Convert.ToInt32(value)),
S7DataType.Float32 => (object)BitConverter.SingleToUInt32Bits(Convert.ToSingle(value)),
S7DataType.Int64 => throw new NotSupportedException("S7 Int64 writes land in a follow-up PR"),
S7DataType.UInt64 => throw new NotSupportedException("S7 UInt64 writes land in a follow-up PR"),
S7DataType.Float64 => throw new NotSupportedException("S7 Float64 (LReal) writes land in a follow-up PR"),
S7DataType.String => throw new NotSupportedException("S7 STRING writes land in a follow-up PR"),
S7DataType.DateTime => throw new NotSupportedException("S7 DateTime writes land in a follow-up PR"),
_ => throw new InvalidOperationException($"Unknown S7DataType {tag.DataType}"),
};
await plc.WriteAsync(tag.Address, boxed, ct).ConfigureAwait(false);
}
private global::S7.Net.Plc RequirePlc() =>
Plc ?? throw new InvalidOperationException("S7Driver not initialized");
// ---- ITagDiscovery ----
public Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(builder);
var folder = builder.Folder("S7", "S7");
foreach (var t in _options.Tags)
{
folder.Variable(t.Name, t.Name, new DriverAttributeInfo(
FullName: t.Name,
DriverDataType: MapDataType(t.DataType),
IsArray: false,
ArrayDim: null,
SecurityClass: t.Writable ? SecurityClassification.Operate : SecurityClassification.ViewOnly,
IsHistorized: false,
IsAlarm: false,
WriteIdempotent: t.WriteIdempotent));
}
return Task.CompletedTask;
}
private static DriverDataType MapDataType(S7DataType t) => t switch
{
S7DataType.Bool => DriverDataType.Boolean,
S7DataType.Byte => DriverDataType.Int32, // no 8-bit in DriverDataType yet
S7DataType.Int16 or S7DataType.UInt16 or S7DataType.Int32 or S7DataType.UInt32 => DriverDataType.Int32,
S7DataType.Int64 or S7DataType.UInt64 => DriverDataType.Int32, // widens; lossy for >2^31-1
S7DataType.Float32 => DriverDataType.Float32,
S7DataType.Float64 => DriverDataType.Float64,
S7DataType.String => DriverDataType.String,
S7DataType.DateTime => DriverDataType.DateTime,
_ => DriverDataType.Int32,
};
// ---- ISubscribable (polling overlay) ----
public Task<ISubscriptionHandle> SubscribeAsync(
IReadOnlyList<string> fullReferences, TimeSpan publishingInterval, CancellationToken cancellationToken)
{
var id = Interlocked.Increment(ref _nextSubscriptionId);
var cts = new CancellationTokenSource();
// Floor at 100 ms — S7 CPUs scan 2-10 ms but the comms mailbox is processed at most
// once per scan; sub-100 ms polling just queues wire-side with worse latency.
var interval = publishingInterval < TimeSpan.FromMilliseconds(100)
? TimeSpan.FromMilliseconds(100)
: publishingInterval;
var handle = new S7SubscriptionHandle(id);
var state = new SubscriptionState(handle, [.. fullReferences], interval, cts);
_subscriptions[id] = state;
_ = Task.Run(() => PollLoopAsync(state, cts.Token), cts.Token);
return Task.FromResult<ISubscriptionHandle>(handle);
}
public Task UnsubscribeAsync(ISubscriptionHandle handle, CancellationToken cancellationToken)
{
if (handle is S7SubscriptionHandle h && _subscriptions.TryRemove(h.Id, out var state))
{
state.Cts.Cancel();
state.Cts.Dispose();
}
return Task.CompletedTask;
}
private async Task PollLoopAsync(SubscriptionState state, CancellationToken ct)
{
// Initial-data push per OPC UA Part 4 convention.
try { await PollOnceAsync(state, forceRaise: true, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { return; }
catch { /* first-read error — polling continues */ }
while (!ct.IsCancellationRequested)
{
try { await Task.Delay(state.Interval, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { return; }
try { await PollOnceAsync(state, forceRaise: false, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { return; }
catch { /* transient polling error — loop continues, health surface reflects it */ }
}
}
private async Task PollOnceAsync(SubscriptionState state, bool forceRaise, CancellationToken ct)
{
var snapshots = await ReadAsync(state.TagReferences, ct).ConfigureAwait(false);
for (var i = 0; i < state.TagReferences.Count; i++)
{
var tagRef = state.TagReferences[i];
var current = snapshots[i];
var lastSeen = state.LastValues.TryGetValue(tagRef, out var prev) ? prev : default;
if (forceRaise || !Equals(lastSeen?.Value, current.Value) || lastSeen?.StatusCode != current.StatusCode)
{
state.LastValues[tagRef] = current;
OnDataChange?.Invoke(this, new DataChangeEventArgs(state.Handle, tagRef, current));
}
}
}
private sealed record SubscriptionState(
S7SubscriptionHandle Handle,
IReadOnlyList<string> TagReferences,
TimeSpan Interval,
CancellationTokenSource Cts)
{
public System.Collections.Concurrent.ConcurrentDictionary<string, DataValueSnapshot> LastValues { get; }
= new(StringComparer.OrdinalIgnoreCase);
}
private sealed record S7SubscriptionHandle(long Id) : ISubscriptionHandle
{
public string DiagnosticId => $"s7-sub-{Id}";
}
// ---- IHostConnectivityProbe ----
/// <summary>
/// Host identifier surfaced in <see cref="GetHostStatuses"/>. <c>host:port</c> format
/// matches the Modbus driver's convention so the Admin UI dashboard renders both
/// family's rows uniformly.
/// </summary>
public string HostName => $"{_options.Host}:{_options.Port}";
public IReadOnlyList<HostConnectivityStatus> GetHostStatuses()
{
lock (_probeLock)
return [new HostConnectivityStatus(HostName, _hostState, _hostStateChangedUtc)];
}
private async Task ProbeLoopAsync(CancellationToken ct)
{
while (!ct.IsCancellationRequested)
{
var success = false;
try
{
// Probe via S7.Net's low-cost GetCpuStatus — returns the CPU state (Run/Stop)
// and is intentionally light on the comms mailbox. Single-word Plc.ReadAsync
// would also work but GetCpuStatus doubles as a "PLC actually up" check.
using var probeCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
probeCts.CancelAfter(_options.Probe.Timeout);
var plc = Plc;
if (plc is null) throw new InvalidOperationException("Plc dropped during probe");
await _gate.WaitAsync(probeCts.Token).ConfigureAwait(false);
try
{
_ = await plc.ReadStatusAsync(probeCts.Token).ConfigureAwait(false);
success = true;
}
finally { _gate.Release(); }
}
catch (OperationCanceledException) when (ct.IsCancellationRequested) { return; }
catch { /* transport/timeout/exception — treated as Stopped below */ }
TransitionTo(success ? HostState.Running : HostState.Stopped);
try { await Task.Delay(_options.Probe.Interval, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { return; }
}
}
private void TransitionTo(HostState newState)
{
HostState old;
lock (_probeLock)
{
old = _hostState;
if (old == newState) return;
_hostState = newState;
_hostStateChangedUtc = DateTime.UtcNow;
}
OnHostStatusChanged?.Invoke(this, new HostStatusChangedEventArgs(HostName, old, newState));
}
public void Dispose() => DisposeAsync().AsTask().GetAwaiter().GetResult();
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
try { await ShutdownAsync(CancellationToken.None).ConfigureAwait(false); }
catch { /* disposal is best-effort */ }
_gate.Dispose();
}
}