Resolve Worker-004, -005, -006, -007, -008 code-review findings

Worker-004: post-watchdog-fault heartbeats reported a non-faulted state.
ReportWatchdogFaultIfNeededAsync now sets _state = Faulted before writing
the StaHung fault.

Worker-005 (re-triaged): the cited OnPoll site was removed by Worker-001;
the real silent-failure bug was in MxAccessStaSession.RunAlarmPollLoopAsync,
which caught only graceful-stop exceptions. A failing PollOnce now records a
WorkerFault on the event queue instead of vanishing on a non-awaited task.

Worker-006: RunAsync's finally skipped runtime disposal when shutdown timed
out, leaking the STA thread and COM object. It now always disposes
(MxAccessStaSession.Dispose is idempotent and bounded).

Worker-007 (re-triaged): replaced MxAccessComServer's Type.InvokeMember
reflection fallback with an IMxAccessServer fast path plus typed
ILMXProxyServer* casts; a non-conforming object now fails fast.

Worker-008: alarm consumer STA affinity was unenforced. MxAccessStaSession
records the alarm consumer's STA thread id and asserts every PollOnce runs
on it via a unit-testable guard.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-18 21:31:23 -04:00
parent 1d9e3afadd
commit 54325343bd
8 changed files with 519 additions and 68 deletions
@@ -23,6 +23,7 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
private IAlarmCommandHandler? alarmCommandHandler;
private CancellationTokenSource? alarmPollCts;
private Task? alarmPollTask;
private int? alarmConsumerThreadId;
private bool disposed;
/// <summary>
@@ -180,6 +181,14 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
session = MxAccessSession.Create(factory, eventSink, sessionId);
if (alarmCommandHandlerFactory is not null)
{
// STA-affinity invariant: the alarm consumer factory and
// every IMxAccessAlarmConsumer call must run on the STA
// thread, because the production wnwrap consumer holds an
// Apartment-threaded COM object. The factory runs here
// inside staRuntime.InvokeAsync, so this records the STA
// thread id; RunAlarmPollLoopAsync then asserts each
// PollOnce executes on the same thread.
alarmConsumerThreadId = Environment.CurrentManagedThreadId;
alarmCommandHandler = alarmCommandHandlerFactory(eventQueue);
}
commandDispatcher = new StaCommandDispatcher(
@@ -227,7 +236,11 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
try
{
await staRuntime.InvokeAsync(
() => handler.PollOnce(),
() =>
{
EnsureOnAlarmConsumerThread();
handler.PollOnce();
},
cancellationToken).ConfigureAwait(false);
}
catch (OperationCanceledException)
@@ -244,10 +257,77 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
// STA runtime shutting down — stop the loop gracefully.
return;
}
catch (Exception exception)
{
// A real alarm-poll failure (COMException from
// GetXmlCurrentAlarms2, malformed-XML parse failure, etc.).
// Record it as a fault on the event queue so a broken
// alarm subscription becomes observable on the IPC fault
// path instead of silently faulting this never-awaited
// task. The loop then stops — the subscription is dead.
eventQueue.RecordFault(CreateAlarmPollFault(exception));
return;
}
}
}, CancellationToken.None);
}
private void EnsureOnAlarmConsumerThread()
{
AssertOnAlarmConsumerThread(alarmConsumerThreadId, Environment.CurrentManagedThreadId);
}
/// <summary>
/// Enforces the STA-affinity invariant for the alarm consumer: every
/// <see cref="IMxAccessAlarmConsumer"/> call (and the consumer factory)
/// must run on the same thread the consumer was created on (the worker's
/// STA). Throws <see cref="InvalidOperationException"/> when a caller
/// breaks affinity — a programming error that would otherwise risk a
/// cross-apartment COM deadlock in the production wnwrap consumer, since
/// its CLSID is registered <c>ThreadingModel=Apartment</c>. The check is
/// a no-op until the consumer thread has been recorded (no alarm handler
/// configured, or session not yet started).
/// </summary>
/// <param name="expectedThreadId">
/// The managed thread id the alarm consumer was created on, or
/// <c>null</c> if no alarm consumer is configured.
/// </param>
/// <param name="actualThreadId">The current managed thread id.</param>
internal static void AssertOnAlarmConsumerThread(int? expectedThreadId, int actualThreadId)
{
if (expectedThreadId is not null && actualThreadId != expectedThreadId.Value)
{
throw new InvalidOperationException(
$"Alarm consumer accessed off its owning STA thread. Expected thread {expectedThreadId.Value}, "
+ $"actual {actualThreadId}. All IMxAccessAlarmConsumer calls must run on the STA that "
+ "created the consumer.");
}
}
private static WorkerFault CreateAlarmPollFault(Exception exception)
{
string message =
$"MXAccess alarm poll failed: {exception.Message}";
WorkerFault fault = new()
{
Category = WorkerFaultCategory.MxaccessEventConversionFailed,
ExceptionType = exception.GetType().FullName ?? string.Empty,
DiagnosticMessage = message,
ProtocolStatus = new ProtocolStatus
{
Code = ProtocolStatusCode.WorkerUnavailable,
Message = message,
},
};
if (exception is System.Runtime.InteropServices.COMException comException)
{
fault.Hresult = comException.HResult;
}
return fault;
}
/// <summary>
/// Dispatches a command to the STA thread for execution asynchronously.
/// </summary>