fix(focas): serialize per-device wire I/O + bound reads; tolerate AdminUI config formats
Equipment tags were stuck at Bad_WaitingForInitialData on the deployed driver: the equipment poll, fixed-tree loop, probe and recycle shared one FOCAS/2 socket with no serialization, and the steady-state read had no timeout — concurrent reads collided and a stalled read hung forever, never overwriting the node's initial-data seed.
- SynchronizedFocasClient: per-device SemaphoreSlim gate + per-call timeout around every wire op (Connect/Probe gated, not double-bounded); wired in EnsureConnectedAsync. ReadAsync/WriteAsync map a per-call timeout to BadCommunicationError instead of rethrowing.
- FlexibleStringConverter on FOCAS config Series: the AdminUI persists the enum as a number ("series":6); accept number-or-string instead of throwing -> stub.
- FocasHostAddress.TryParse tolerates a scheme-less {ip}[:{port}] (AdminUI hostAddress form); canonical focas:// unchanged, malformed schemes still rejected.
247 FOCAS tests green; each fix has a regression test. Live-validated on wonder-app-vd03 (tags read Good).
This commit is contained in:
@@ -306,7 +306,16 @@ public sealed class FocasDriver : IDriver, IReadable, IWritable, ITagDiscovery,
|
||||
Volatile.Read(ref _health).LastSuccessfulRead,
|
||||
$"FOCAS status 0x{status:X8} reading {reference}"));
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) { throw; }
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Per-call timeout (not external cancellation) — the read stalled past the device
|
||||
// Timeout budget. Surface a recoverable comm error so the BadWaitingForInitialData
|
||||
// seed is overwritten and health degrades, instead of the read hanging forever.
|
||||
results[i] = new DataValueSnapshot(null, FocasStatusMapper.BadCommunicationError, null, now);
|
||||
Volatile.Write(ref _health, new DriverHealth(DriverState.Degraded,
|
||||
Volatile.Read(ref _health).LastSuccessfulRead, $"FOCAS read timed out for {reference}"));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
results[i] = new DataValueSnapshot(null, FocasStatusMapper.BadCommunicationError, null, now);
|
||||
@@ -356,7 +365,15 @@ public sealed class FocasDriver : IDriver, IReadable, IWritable, ITagDiscovery,
|
||||
var status = await client.WriteAsync(parsed, def.DataType, w.Value, cancellationToken).ConfigureAwait(false);
|
||||
results[i] = new WriteResult(status);
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) { throw; }
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Per-call timeout (not external cancellation) — the write stalled past the device
|
||||
// Timeout budget. Surface a recoverable comm error rather than aborting the batch.
|
||||
results[i] = new WriteResult(FocasStatusMapper.BadCommunicationError);
|
||||
Volatile.Write(ref _health, new DriverHealth(DriverState.Degraded,
|
||||
Volatile.Read(ref _health).LastSuccessfulRead, $"FOCAS write timed out for {w.FullReference}"));
|
||||
}
|
||||
catch (NotSupportedException nse)
|
||||
{
|
||||
results[i] = new WriteResult(FocasStatusMapper.BadNotSupported);
|
||||
@@ -1113,7 +1130,11 @@ public sealed class FocasDriver : IDriver, IReadable, IWritable, ITagDiscovery,
|
||||
device.Client = null;
|
||||
}
|
||||
|
||||
device.Client = _clientFactory.Create();
|
||||
// Wrap the raw wire client so every operation on the device's single FOCAS/2 socket is
|
||||
// serialized (request→response on one socket cannot interleave) and time-bounded. Without
|
||||
// this, the equipment poll, fixed-tree loop, probe, and recycle loop collide on the shared
|
||||
// socket and a stalled read blocks forever — leaving bound tags at BadWaitingForInitialData.
|
||||
device.Client = new SynchronizedFocasClient(_clientFactory.Create(), _options.Timeout);
|
||||
try
|
||||
{
|
||||
await device.Client.ConnectAsync(device.ParsedAddress, _options.Timeout, ct).ConfigureAwait(false);
|
||||
|
||||
@@ -195,12 +195,41 @@ public static class FocasDriverFactoryExtensions
|
||||
AllowTrailingCommas = true,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Reads a JSON property as a string, tolerating a JSON <b>number</b> token as well. The
|
||||
/// AdminUI persists the FOCAS <c>Series</c> enum as its integer value (e.g. <c>"series":6</c>),
|
||||
/// while this DTO models <c>Series</c> as a string handed to <see cref="ParseSeries"/>
|
||||
/// (Enum.TryParse accepts the numeric form). Without this, System.Text.Json throws
|
||||
/// "Cannot get the value of a token type 'Number' as a string" on the bare number and the
|
||||
/// driver falls back to a stub. Accepts string / number / null and emits a string.
|
||||
/// </summary>
|
||||
internal sealed class FlexibleStringConverter : JsonConverter<string?>
|
||||
{
|
||||
public override string? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) =>
|
||||
reader.TokenType switch
|
||||
{
|
||||
JsonTokenType.String => reader.GetString(),
|
||||
JsonTokenType.Number => reader.TryGetInt64(out var n)
|
||||
? n.ToString(System.Globalization.CultureInfo.InvariantCulture)
|
||||
: reader.GetDouble().ToString(System.Globalization.CultureInfo.InvariantCulture),
|
||||
JsonTokenType.Null => null,
|
||||
_ => throw new JsonException($"Expected string, number, or null but got {reader.TokenType}."),
|
||||
};
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, string? value, JsonSerializerOptions options)
|
||||
{
|
||||
if (value is null) writer.WriteNullValue();
|
||||
else writer.WriteStringValue(value);
|
||||
}
|
||||
}
|
||||
|
||||
internal sealed class FocasDriverConfigDto
|
||||
{
|
||||
/// <summary>Gets or sets the FOCAS client factory backend name (e.g. "wire" or "stub").</summary>
|
||||
public string? Backend { get; init; }
|
||||
|
||||
/// <summary>Gets or sets the CNC series for this driver.</summary>
|
||||
[JsonConverter(typeof(FlexibleStringConverter))]
|
||||
public string? Series { get; init; }
|
||||
|
||||
/// <summary>Gets or sets the operation timeout in milliseconds.</summary>
|
||||
@@ -234,6 +263,7 @@ public static class FocasDriverFactoryExtensions
|
||||
public string? DeviceName { get; init; }
|
||||
|
||||
/// <summary>Gets or sets the CNC series for this device (overrides top-level series if provided).</summary>
|
||||
[JsonConverter(typeof(FlexibleStringConverter))]
|
||||
public string? Series { get; init; }
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -21,9 +21,19 @@ public sealed record FocasHostAddress(string Host, int Port)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value)) return null;
|
||||
const string prefix = "focas://";
|
||||
if (!value.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)) return null;
|
||||
|
||||
var body = value[prefix.Length..];
|
||||
// Canonical form is focas://{ip}[:{port}], but the AdminUI persists the device host as a
|
||||
// scheme-less "{ip}[:{port}]" (e.g. "10.201.31.5:8193"). Accept that too: take the body
|
||||
// after focas:// when present, else the whole value when it carries NO other URI scheme
|
||||
// (a "://" that isn't ours — e.g. http:// — is still rejected). The host-contains-colon
|
||||
// guard below then rejects malformed scheme typos like "focas:10.0.0.5:8193".
|
||||
string body;
|
||||
if (value.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
|
||||
body = value[prefix.Length..];
|
||||
else if (!value.Contains("://", StringComparison.Ordinal))
|
||||
body = value;
|
||||
else
|
||||
return null;
|
||||
if (string.IsNullOrEmpty(body)) return null;
|
||||
|
||||
var colonIdx = body.LastIndexOf(':');
|
||||
@@ -39,7 +49,9 @@ public sealed record FocasHostAddress(string Host, int Port)
|
||||
{
|
||||
host = body;
|
||||
}
|
||||
if (string.IsNullOrEmpty(host)) return null;
|
||||
// Empty host, or a host still carrying a colon (e.g. the malformed "focas:10.0.0.5" left
|
||||
// when someone wrote "focas:10.0.0.5:8193" without the //), is invalid.
|
||||
if (string.IsNullOrEmpty(host) || host.Contains(':', StringComparison.Ordinal)) return null;
|
||||
return new FocasHostAddress(host, port);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.FOCAS;
|
||||
|
||||
/// <summary>
|
||||
/// Decorates an <see cref="IFocasClient"/> so that every wire operation on the device's
|
||||
/// single FOCAS/2 socket is (1) <b>serialized</b> against all other operations and
|
||||
/// (2) <b>time-bounded</b>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>FOCAS/2 over TCP:8193 is a strict request→response protocol on ONE socket. The
|
||||
/// driver holds a single <see cref="IFocasClient"/> per device, but several independent loops
|
||||
/// read from it concurrently — the equipment poll (<see cref="FocasDriver.ReadAsync"/>), the
|
||||
/// fixed-tree loop (<c>FixedTreeLoopAsync</c>), the connectivity probe, and the recycle loop.
|
||||
/// Without serialization, two reads interleave their <c>send(request); read(response)</c> on the
|
||||
/// same socket: one reader consumes the other's response PDU and the victim then blocks forever
|
||||
/// waiting for bytes that never arrive — leaving the bound OPC UA node stuck at
|
||||
/// <c>BadWaitingForInitialData</c>. This was the root cause of FOCAS equipment tags never
|
||||
/// surfacing a value while the probe reported HEALTHY (the probe reads work single-threaded on a
|
||||
/// dev box, but collide deployed once the fixed-tree loop runs concurrently).</para>
|
||||
///
|
||||
/// <para>The gate (<see cref="SemaphoreSlim"/> of count 1) makes each request→response atomic on
|
||||
/// the socket. The per-call timeout ensures a stalled response can never hold the gate — and thus
|
||||
/// the socket — indefinitely; a hung read surfaces as a recoverable error at the configured
|
||||
/// <c>Timeout</c> budget instead of permanent silence. The gate and timeout are paired
|
||||
/// deliberately: a lock around an <i>unbounded</i> read would deadlock all I/O for the device.</para>
|
||||
///
|
||||
/// <para><see cref="ConnectAsync"/> and <see cref="ProbeAsync"/> are serialized but NOT bounded by
|
||||
/// this decorator's call timeout — they carry their own budgets (the connect timeout argument and
|
||||
/// the probe's caller-supplied linked token respectively), and double-bounding would shrink them.</para>
|
||||
/// </remarks>
|
||||
public sealed class SynchronizedFocasClient : IFocasClient
|
||||
{
|
||||
private readonly IFocasClient _inner;
|
||||
private readonly TimeSpan _callTimeout;
|
||||
private readonly SemaphoreSlim _gate = new(1, 1);
|
||||
|
||||
/// <summary>Wraps <paramref name="inner"/> with per-device serialization + a per-call timeout.</summary>
|
||||
/// <param name="inner">The underlying FOCAS client to serialize access to.</param>
|
||||
/// <param name="callTimeout">
|
||||
/// The budget applied to each data read/write. <see cref="TimeSpan.Zero"/> or negative disables
|
||||
/// the per-call timeout (callers' own cancellation tokens still apply).
|
||||
/// </param>
|
||||
public SynchronizedFocasClient(IFocasClient inner, TimeSpan callTimeout)
|
||||
{
|
||||
_inner = inner ?? throw new ArgumentNullException(nameof(inner));
|
||||
_callTimeout = callTimeout;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool IsConnected => _inner.IsConnected;
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task ConnectAsync(FocasHostAddress address, TimeSpan timeout, CancellationToken cancellationToken) =>
|
||||
RunGatedAsync(ct => _inner.ConnectAsync(address, timeout, ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<bool> ProbeAsync(CancellationToken cancellationToken) =>
|
||||
RunGatedAsync(ct => _inner.ProbeAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<(object? value, uint status)> ReadAsync(
|
||||
FocasAddress address, FocasDataType type, CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.ReadAsync(address, type, ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<uint> WriteAsync(
|
||||
FocasAddress address, FocasDataType type, object? value, CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.WriteAsync(address, type, value, ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<FocasActiveAlarm>> ReadAlarmsAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.ReadAlarmsAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<FocasSysInfo> GetSysInfoAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetSysInfoAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<FocasAxisName>> GetAxisNamesAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetAxisNamesAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<FocasSpindleName>> GetSpindleNamesAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetSpindleNamesAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<FocasDynamicSnapshot> ReadDynamicAsync(int axisIndex, CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.ReadDynamicAsync(axisIndex, ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<FocasProgramInfo> GetProgramInfoAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetProgramInfoAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<FocasTimer> GetTimerAsync(FocasTimerKind kind, CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetTimerAsync(kind, ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<FocasServoLoad>> GetServoLoadsAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetServoLoadsAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<int>> GetSpindleLoadsAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetSpindleLoadsAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<int>> GetSpindleMaxRpmsAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetSpindleMaxRpmsAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<int>> GetPositionFiguresAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetPositionFiguresAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Dispose()
|
||||
{
|
||||
_inner.Dispose();
|
||||
_gate.Dispose();
|
||||
}
|
||||
|
||||
// Gate only — the caller already governs the budget (connect timeout arg / probe linked token).
|
||||
private async Task<T> RunGatedAsync<T>(Func<CancellationToken, Task<T>> op, CancellationToken ct)
|
||||
{
|
||||
await _gate.WaitAsync(ct).ConfigureAwait(false);
|
||||
try { return await op(ct).ConfigureAwait(false); }
|
||||
finally { _gate.Release(); }
|
||||
}
|
||||
|
||||
private async Task RunGatedAsync(Func<CancellationToken, Task> op, CancellationToken ct)
|
||||
{
|
||||
await _gate.WaitAsync(ct).ConfigureAwait(false);
|
||||
try { await op(ct).ConfigureAwait(false); }
|
||||
finally { _gate.Release(); }
|
||||
}
|
||||
|
||||
// Gate + per-call timeout. A fired timeout surfaces as OperationCanceledException whose token is
|
||||
// the linked (not the caller's) token — callers distinguish it from real cancellation by testing
|
||||
// their own token's IsCancellationRequested.
|
||||
private async Task<T> RunBoundedAsync<T>(Func<CancellationToken, Task<T>> op, CancellationToken ct)
|
||||
{
|
||||
await _gate.WaitAsync(ct).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
if (_callTimeout <= TimeSpan.Zero)
|
||||
return await op(ct).ConfigureAwait(false);
|
||||
|
||||
using var linked = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
linked.CancelAfter(_callTimeout);
|
||||
return await op(linked.Token).ConfigureAwait(false);
|
||||
}
|
||||
finally { _gate.Release(); }
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user