Resolve Worker-004, -005, -006, -007, -008 code-review findings

Worker-004: post-watchdog-fault heartbeats reported a non-faulted state.
ReportWatchdogFaultIfNeededAsync now sets _state = Faulted before writing
the StaHung fault.

Worker-005 (re-triaged): the cited OnPoll site was removed by Worker-001;
the real silent-failure bug was in MxAccessStaSession.RunAlarmPollLoopAsync,
which caught only graceful-stop exceptions. A failing PollOnce now records a
WorkerFault on the event queue instead of vanishing on a non-awaited task.

Worker-006: RunAsync's finally skipped runtime disposal when shutdown timed
out, leaking the STA thread and COM object. It now always disposes
(MxAccessStaSession.Dispose is idempotent and bounded).

Worker-007 (re-triaged): replaced MxAccessComServer's Type.InvokeMember
reflection fallback with an IMxAccessServer fast path plus typed
ILMXProxyServer* casts; a non-conforming object now fails fast.

Worker-008: alarm consumer STA affinity was unenforced. MxAccessStaSession
records the alarm consumer's STA thread id and asserts every PollOnce runs
on it via a unit-testable guard.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-18 21:31:23 -04:00
parent 1d9e3afadd
commit 54325343bd
8 changed files with 519 additions and 68 deletions
+14 -7
View File
@@ -36,7 +36,6 @@ public sealed class WorkerPipeSession
private volatile WorkerState _state = WorkerState.Starting;
private bool _acceptingCommands = true;
private bool _watchdogFaultSent;
private bool _shutdownTimedOut;
/// <summary>Initializes a new worker pipe session over the provided stream.</summary>
/// <param name="stream">Network stream for reading and writing frames.</param>
@@ -119,11 +118,14 @@ public sealed class WorkerPipeSession
}
finally
{
if (!_shutdownTimedOut)
{
_runtimeSession?.Dispose();
}
// Always dispose the runtime session, including after a
// shutdown timeout. MxAccessStaSession.Dispose is idempotent and
// bounded (each STA join is capped at 2s), so re-entering it on
// the normal path is a harmless no-op, while on the timed-out
// path it is the only thing that reclaims the STA thread and
// releases the MXAccess COM object — skipping it leaked both and
// left cleanup to rely solely on process exit.
_runtimeSession?.Dispose();
_runtimeSession = null;
_state = WorkerState.Stopped;
}
@@ -480,7 +482,6 @@ public sealed class WorkerPipeSession
}
catch (TimeoutException exception)
{
_shutdownTimedOut = true;
_state = WorkerState.Faulted;
await TryWriteFaultAsync(CreateShutdownTimeoutFault(exception), cancellationToken).ConfigureAwait(false);
throw;
@@ -615,6 +616,12 @@ public sealed class WorkerPipeSession
}
_watchdogFaultSent = true;
// The STA is hung — move the session to Faulted before the next
// heartbeat so the heartbeat's reported State stays consistent with
// the StaHung fault just sent. Without this the heartbeat loop keeps
// advertising a non-faulted state that contradicts the fault.
_state = WorkerState.Faulted;
await TryWriteFaultAsync(
CreateFault(
WorkerFaultCategory.StaHung,
@@ -1,13 +1,22 @@
using System;
using System.Reflection;
using System.Runtime.ExceptionServices;
using ArchestrA.MxAccess;
namespace MxGateway.Worker.MxAccess;
/// <summary>
/// Adapter exposing MXAccess COM object methods through the IMxAccessServer interface.
/// Adapter exposing MXAccess COM object methods through the <see cref="IMxAccessServer"/>
/// interface.
/// </summary>
/// <remarks>
/// The supplied object must implement the typed MXAccess COM interface contract.
/// In production it is the <c>LMXProxyServerClass</c> RCW, which implements
/// <see cref="ILMXProxyServer"/> / <see cref="ILMXProxyServer3"/> /
/// <see cref="ILMXProxyServer4"/>. Tests substitute a typed fake that
/// implements <see cref="IMxAccessServer"/> directly. The earlier late-bound
/// <c>Type.InvokeMember</c> reflection fallback was removed: it bypassed the
/// typed interface contract, boxed value-type handles on every call, and only
/// ever served test doubles — a typed fake is the supported test seam now.
/// </remarks>
public sealed class MxAccessComServer : IMxAccessServer
{
private readonly object mxAccessComObject;
@@ -15,7 +24,11 @@ public sealed class MxAccessComServer : IMxAccessServer
/// <summary>
/// Initializes the adapter with the MXAccess COM object.
/// </summary>
/// <param name="mxAccessComObject">MXAccess COM object instance.</param>
/// <param name="mxAccessComObject">
/// MXAccess COM object instance. Must implement either the typed
/// <see cref="ILMXProxyServer"/> COM interface family (production) or
/// <see cref="IMxAccessServer"/> directly (test fakes).
/// </param>
public MxAccessComServer(object mxAccessComObject)
{
this.mxAccessComObject = mxAccessComObject ?? throw new ArgumentNullException(nameof(mxAccessComObject));
@@ -24,24 +37,24 @@ public sealed class MxAccessComServer : IMxAccessServer
/// <inheritdoc />
public int Register(string clientName)
{
if (mxAccessComObject is ILMXProxyServer mxAccessServer)
if (mxAccessComObject is IMxAccessServer typedFake)
{
return mxAccessServer.Register(clientName);
return typedFake.Register(clientName);
}
return (int)Invoke(nameof(Register), clientName);
return AsProxyServer().Register(clientName);
}
/// <inheritdoc />
public void Unregister(int serverHandle)
{
if (mxAccessComObject is ILMXProxyServer mxAccessServer)
if (mxAccessComObject is IMxAccessServer typedFake)
{
mxAccessServer.Unregister(serverHandle);
typedFake.Unregister(serverHandle);
return;
}
Invoke(nameof(Unregister), serverHandle);
AsProxyServer().Unregister(serverHandle);
}
/// <inheritdoc />
@@ -49,12 +62,12 @@ public sealed class MxAccessComServer : IMxAccessServer
int serverHandle,
string itemDefinition)
{
if (mxAccessComObject is ILMXProxyServer mxAccessServer)
if (mxAccessComObject is IMxAccessServer typedFake)
{
return mxAccessServer.AddItem(serverHandle, itemDefinition);
return typedFake.AddItem(serverHandle, itemDefinition);
}
return (int)Invoke(nameof(AddItem), serverHandle, itemDefinition);
return AsProxyServer().AddItem(serverHandle, itemDefinition);
}
/// <inheritdoc />
@@ -63,12 +76,12 @@ public sealed class MxAccessComServer : IMxAccessServer
string itemDefinition,
string itemContext)
{
if (mxAccessComObject is ILMXProxyServer3 mxAccessServer)
if (mxAccessComObject is IMxAccessServer typedFake)
{
return mxAccessServer.AddItem2(serverHandle, itemDefinition, itemContext);
return typedFake.AddItem2(serverHandle, itemDefinition, itemContext);
}
return (int)Invoke(nameof(AddItem2), serverHandle, itemDefinition, itemContext);
return AsProxyServer3().AddItem2(serverHandle, itemDefinition, itemContext);
}
/// <inheritdoc />
@@ -76,13 +89,13 @@ public sealed class MxAccessComServer : IMxAccessServer
int serverHandle,
int itemHandle)
{
if (mxAccessComObject is ILMXProxyServer mxAccessServer)
if (mxAccessComObject is IMxAccessServer typedFake)
{
mxAccessServer.RemoveItem(serverHandle, itemHandle);
typedFake.RemoveItem(serverHandle, itemHandle);
return;
}
Invoke(nameof(RemoveItem), serverHandle, itemHandle);
AsProxyServer().RemoveItem(serverHandle, itemHandle);
}
/// <inheritdoc />
@@ -90,13 +103,13 @@ public sealed class MxAccessComServer : IMxAccessServer
int serverHandle,
int itemHandle)
{
if (mxAccessComObject is ILMXProxyServer mxAccessServer)
if (mxAccessComObject is IMxAccessServer typedFake)
{
mxAccessServer.Advise(serverHandle, itemHandle);
typedFake.Advise(serverHandle, itemHandle);
return;
}
Invoke(nameof(Advise), serverHandle, itemHandle);
AsProxyServer().Advise(serverHandle, itemHandle);
}
/// <inheritdoc />
@@ -104,13 +117,13 @@ public sealed class MxAccessComServer : IMxAccessServer
int serverHandle,
int itemHandle)
{
if (mxAccessComObject is ILMXProxyServer mxAccessServer)
if (mxAccessComObject is IMxAccessServer typedFake)
{
mxAccessServer.UnAdvise(serverHandle, itemHandle);
typedFake.UnAdvise(serverHandle, itemHandle);
return;
}
Invoke(nameof(UnAdvise), serverHandle, itemHandle);
AsProxyServer().UnAdvise(serverHandle, itemHandle);
}
/// <inheritdoc />
@@ -118,34 +131,36 @@ public sealed class MxAccessComServer : IMxAccessServer
int serverHandle,
int itemHandle)
{
if (mxAccessComObject is ILMXProxyServer4 mxAccessServer)
if (mxAccessComObject is IMxAccessServer typedFake)
{
mxAccessServer.AdviseSupervisory(serverHandle, itemHandle);
typedFake.AdviseSupervisory(serverHandle, itemHandle);
return;
}
Invoke(nameof(AdviseSupervisory), serverHandle, itemHandle);
AsProxyServer4().AdviseSupervisory(serverHandle, itemHandle);
}
private object Invoke(
string methodName,
params object[] arguments)
private ILMXProxyServer AsProxyServer()
{
try
{
return mxAccessComObject
.GetType()
.InvokeMember(
methodName,
BindingFlags.Instance | BindingFlags.Public | BindingFlags.InvokeMethod,
binder: null,
target: mxAccessComObject,
args: arguments);
}
catch (TargetInvocationException exception) when (exception.InnerException is not null)
{
ExceptionDispatchInfo.Capture(exception.InnerException).Throw();
throw;
}
return mxAccessComObject as ILMXProxyServer
?? throw new InvalidOperationException(
$"MXAccess COM object of type '{mxAccessComObject.GetType().FullName}' does not implement "
+ $"{nameof(ILMXProxyServer)} or {nameof(IMxAccessServer)}.");
}
private ILMXProxyServer3 AsProxyServer3()
{
return mxAccessComObject as ILMXProxyServer3
?? throw new InvalidOperationException(
$"MXAccess COM object of type '{mxAccessComObject.GetType().FullName}' does not implement "
+ $"{nameof(ILMXProxyServer3)} or {nameof(IMxAccessServer)}.");
}
private ILMXProxyServer4 AsProxyServer4()
{
return mxAccessComObject as ILMXProxyServer4
?? throw new InvalidOperationException(
$"MXAccess COM object of type '{mxAccessComObject.GetType().FullName}' does not implement "
+ $"{nameof(ILMXProxyServer4)} or {nameof(IMxAccessServer)}.");
}
}
@@ -23,6 +23,7 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
private IAlarmCommandHandler? alarmCommandHandler;
private CancellationTokenSource? alarmPollCts;
private Task? alarmPollTask;
private int? alarmConsumerThreadId;
private bool disposed;
/// <summary>
@@ -180,6 +181,14 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
session = MxAccessSession.Create(factory, eventSink, sessionId);
if (alarmCommandHandlerFactory is not null)
{
// STA-affinity invariant: the alarm consumer factory and
// every IMxAccessAlarmConsumer call must run on the STA
// thread, because the production wnwrap consumer holds an
// Apartment-threaded COM object. The factory runs here
// inside staRuntime.InvokeAsync, so this records the STA
// thread id; RunAlarmPollLoopAsync then asserts each
// PollOnce executes on the same thread.
alarmConsumerThreadId = Environment.CurrentManagedThreadId;
alarmCommandHandler = alarmCommandHandlerFactory(eventQueue);
}
commandDispatcher = new StaCommandDispatcher(
@@ -227,7 +236,11 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
try
{
await staRuntime.InvokeAsync(
() => handler.PollOnce(),
() =>
{
EnsureOnAlarmConsumerThread();
handler.PollOnce();
},
cancellationToken).ConfigureAwait(false);
}
catch (OperationCanceledException)
@@ -244,10 +257,77 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
// STA runtime shutting down — stop the loop gracefully.
return;
}
catch (Exception exception)
{
// A real alarm-poll failure (COMException from
// GetXmlCurrentAlarms2, malformed-XML parse failure, etc.).
// Record it as a fault on the event queue so a broken
// alarm subscription becomes observable on the IPC fault
// path instead of silently faulting this never-awaited
// task. The loop then stops — the subscription is dead.
eventQueue.RecordFault(CreateAlarmPollFault(exception));
return;
}
}
}, CancellationToken.None);
}
private void EnsureOnAlarmConsumerThread()
{
AssertOnAlarmConsumerThread(alarmConsumerThreadId, Environment.CurrentManagedThreadId);
}
/// <summary>
/// Enforces the STA-affinity invariant for the alarm consumer: every
/// <see cref="IMxAccessAlarmConsumer"/> call (and the consumer factory)
/// must run on the same thread the consumer was created on (the worker's
/// STA). Throws <see cref="InvalidOperationException"/> when a caller
/// breaks affinity — a programming error that would otherwise risk a
/// cross-apartment COM deadlock in the production wnwrap consumer, since
/// its CLSID is registered <c>ThreadingModel=Apartment</c>. The check is
/// a no-op until the consumer thread has been recorded (no alarm handler
/// configured, or session not yet started).
/// </summary>
/// <param name="expectedThreadId">
/// The managed thread id the alarm consumer was created on, or
/// <c>null</c> if no alarm consumer is configured.
/// </param>
/// <param name="actualThreadId">The current managed thread id.</param>
internal static void AssertOnAlarmConsumerThread(int? expectedThreadId, int actualThreadId)
{
if (expectedThreadId is not null && actualThreadId != expectedThreadId.Value)
{
throw new InvalidOperationException(
$"Alarm consumer accessed off its owning STA thread. Expected thread {expectedThreadId.Value}, "
+ $"actual {actualThreadId}. All IMxAccessAlarmConsumer calls must run on the STA that "
+ "created the consumer.");
}
}
private static WorkerFault CreateAlarmPollFault(Exception exception)
{
string message =
$"MXAccess alarm poll failed: {exception.Message}";
WorkerFault fault = new()
{
Category = WorkerFaultCategory.MxaccessEventConversionFailed,
ExceptionType = exception.GetType().FullName ?? string.Empty,
DiagnosticMessage = message,
ProtocolStatus = new ProtocolStatus
{
Code = ProtocolStatusCode.WorkerUnavailable,
Message = message,
},
};
if (exception is System.Runtime.InteropServices.COMException comException)
{
fault.Hresult = comException.HResult;
}
return fault;
}
/// <summary>
/// Dispatches a command to the STA thread for execution asynchronously.
/// </summary>