Close all four stability-review 2026-04-13 findings so a failed runtime probe subscription can no longer leave a phantom entry that Tick() flips to Stopped and fans out false BadOutOfService quality across a host's subtree, a silently-failed dashboard bind no longer lets the service advertise a successful start while an operator-visible endpoint is dead, the seven sync-over-async sites in LmxNodeManager (rebuild probe sync, Read, Write, four HistoryRead overrides) can no longer park the OPC UA stack thread indefinitely on a hung backend, and alarm auto-subscribe + transferred-subscription restore no longer race shutdown as untracked fire-and-forget tasks.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-04-14 00:48:07 -04:00
parent 731092595f
commit c76ab8fdee
21 changed files with 869 additions and 53 deletions

View File

@@ -0,0 +1,53 @@
using System;
using System.Threading.Tasks;
namespace ZB.MOM.WW.LmxOpcUa.Host.Utilities
{
/// <summary>
/// Bounded safety wrappers for blocking on async tasks from synchronous OPC UA stack
/// callbacks (Read, Write, HistoryRead*, BuildAddressSpace). These are backstops: the
/// underlying MxAccess / Historian clients already enforce inner timeouts on the async
/// path, but an outer bound is still required so the stack thread cannot be parked
/// indefinitely by a hung scheduler, a slow reconnect, or any other non-returning
/// async path.
/// </summary>
/// <remarks>
/// On timeout, the underlying task is NOT cancelled — it runs to completion on the
/// thread pool and is abandoned. Callers must be comfortable with the fire-forget
/// semantics of the background continuation. This is acceptable for the current call
/// sites because MxAccess and Historian clients are shared singletons whose background
/// work does not capture request-scoped state.
/// </remarks>
internal static class SyncOverAsync
{
public static void WaitSync(Task task, TimeSpan timeout, string operation)
{
if (task == null) throw new ArgumentNullException(nameof(task));
try
{
if (!task.Wait(timeout))
throw new TimeoutException($"{operation} exceeded {timeout.TotalSeconds:0.#}s");
}
catch (AggregateException ae) when (ae.InnerExceptions.Count == 1)
{
// Unwrap the single inner exception so callers can write natural catch blocks.
throw ae.InnerExceptions[0];
}
}
public static T WaitSync<T>(Task<T> task, TimeSpan timeout, string operation)
{
if (task == null) throw new ArgumentNullException(nameof(task));
try
{
if (!task.Wait(timeout))
throw new TimeoutException($"{operation} exceeded {timeout.TotalSeconds:0.#}s");
return task.Result;
}
catch (AggregateException ae) when (ae.InnerExceptions.Count == 1)
{
throw ae.InnerExceptions[0];
}
}
}
}