Improve gateway reliability and dashboard docs

This commit is contained in:
Joseph Doherty
2026-04-28 00:13:22 -04:00
parent bd4a09a35e
commit 4fc355b357
61 changed files with 1722 additions and 150 deletions
+16 -4
View File
@@ -148,10 +148,22 @@ public sealed class WorkerPipeClient : IWorkerPipeClient
})
.Build();
return await pipeline.ExecuteAsync(
async token => await ConnectSingleAttemptAsync(pipeName, token).ConfigureAwait(false),
cancellationToken)
.ConfigureAwait(false);
using CancellationTokenSource connectDeadline =
CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
connectDeadline.CancelAfter(_connectTimeoutMilliseconds);
try
{
return await pipeline.ExecuteAsync(
async token => await ConnectSingleAttemptAsync(pipeName, token).ConfigureAwait(false),
connectDeadline.Token)
.ConfigureAwait(false);
}
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
{
throw new TimeoutException(
$"Worker pipe {pipeName} did not connect within {_connectTimeoutMilliseconds}ms.");
}
}
private async Task<NamedPipeClientStream> ConnectSingleAttemptAsync(
+68 -1
View File
@@ -14,6 +14,9 @@ namespace MxGateway.Worker.Ipc;
public sealed class WorkerPipeSession
{
private static readonly TimeSpan EventDrainInterval = TimeSpan.FromMilliseconds(25);
private const uint EventDrainBatchSize = 128;
private readonly WorkerFrameProtocolOptions _options;
private readonly Func<int> _processIdProvider;
private readonly Func<IWorkerRuntimeSession> _runtimeSessionFactory;
@@ -206,17 +209,22 @@ public sealed class WorkerPipeSession
using CancellationTokenSource heartbeatCancellation = CancellationTokenSource
.CreateLinkedTokenSource(cancellationToken);
Task heartbeatTask = RunHeartbeatLoopAsync(heartbeatCancellation.Token);
Task eventDrainTask = RunEventDrainLoopAsync(heartbeatCancellation.Token);
try
{
while (!cancellationToken.IsCancellationRequested)
{
Task<WorkerEnvelope> readTask = _reader.ReadAsync(cancellationToken);
Task completedTask = await Task.WhenAny(readTask, heartbeatTask).ConfigureAwait(false);
Task completedTask = await Task.WhenAny(readTask, heartbeatTask, eventDrainTask).ConfigureAwait(false);
if (completedTask == heartbeatTask)
{
await heartbeatTask.ConfigureAwait(false);
}
else if (completedTask == eventDrainTask)
{
await eventDrainTask.ConfigureAwait(false);
}
WorkerEnvelope envelope = await readTask.ConfigureAwait(false);
bool keepReading = await DispatchGatewayEnvelopeAsync(envelope, cancellationToken).ConfigureAwait(false);
@@ -236,6 +244,52 @@ public sealed class WorkerPipeSession
catch (OperationCanceledException)
{
}
try
{
await eventDrainTask.ConfigureAwait(false);
}
catch (OperationCanceledException)
{
}
}
}
private async Task RunEventDrainLoopAsync(CancellationToken cancellationToken)
{
while (!cancellationToken.IsCancellationRequested)
{
IWorkerRuntimeSession? runtimeSession = _runtimeSession;
if (runtimeSession is null)
{
await Task.Delay(EventDrainInterval, cancellationToken).ConfigureAwait(false);
continue;
}
WorkerFault? fault = runtimeSession.DrainFault();
if (fault is not null)
{
_state = WorkerState.Faulted;
await TryWriteFaultAsync(fault, cancellationToken).ConfigureAwait(false);
throw new InvalidOperationException(
string.IsNullOrWhiteSpace(fault.DiagnosticMessage)
? $"MXAccess event queue faulted with category {fault.Category}."
: fault.DiagnosticMessage);
}
IReadOnlyList<WorkerEvent> events = runtimeSession.DrainEvents(EventDrainBatchSize);
if (events.Count == 0)
{
await Task.Delay(EventDrainInterval, cancellationToken).ConfigureAwait(false);
continue;
}
foreach (WorkerEvent workerEvent in events)
{
await _writer
.WriteAsync(CreateEnvelope(workerEvent), cancellationToken)
.ConfigureAwait(false);
}
}
}
@@ -252,6 +306,7 @@ public sealed class WorkerPipeSession
await ShutdownAsync(envelope.WorkerShutdown, cancellationToken).ConfigureAwait(false);
return false;
case WorkerEnvelope.BodyOneofCase.WorkerCancel:
_runtimeSession?.CancelCommand(envelope.CorrelationId);
return true;
default:
throw new WorkerFrameProtocolException(
@@ -461,6 +516,11 @@ public sealed class WorkerPipeSession
return CreateBaseEnvelope(reply);
}
private WorkerEnvelope CreateEnvelope(WorkerEvent workerEvent)
{
return CreateBaseEnvelope(workerEvent);
}
private WorkerEnvelope CreateEnvelope(WorkerShutdownAck shutdownAck)
{
return CreateBaseEnvelope(shutdownAck);
@@ -500,6 +560,13 @@ public sealed class WorkerPipeSession
return envelope;
}
private WorkerEnvelope CreateBaseEnvelope(WorkerEvent body)
{
WorkerEnvelope envelope = CreateBaseEnvelope();
envelope.WorkerEvent = body;
return envelope;
}
private WorkerEnvelope CreateBaseEnvelope(WorkerShutdownAck body)
{
WorkerEnvelope envelope = CreateBaseEnvelope();
@@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using MxGateway.Contracts.Proto;
@@ -17,6 +18,12 @@ public interface IWorkerRuntimeSession : IDisposable
WorkerRuntimeHeartbeatSnapshot CaptureHeartbeat();
IReadOnlyList<WorkerEvent> DrainEvents(uint maxEvents);
WorkerFault? DrainFault();
bool CancelCommand(string correlationId);
void RequestShutdown();
Task<MxAccessShutdownResult> ShutdownGracefullyAsync(
@@ -14,6 +14,7 @@ public sealed class MxAccessEventQueue
private readonly object syncRoot = new();
private ulong lastEventSequence;
private WorkerFault? fault;
private bool faultDrained;
public MxAccessEventQueue()
: this(DefaultCapacity)
@@ -163,6 +164,20 @@ public sealed class MxAccessEventQueue
}
}
public WorkerFault? DrainFault()
{
lock (syncRoot)
{
if (fault is null || faultDrained)
{
return null;
}
faultDrained = true;
return fault.Clone();
}
}
private WorkerFault CreateOverflowFault()
{
string message = $"MXAccess outbound event queue reached capacity {capacity}.";
@@ -79,7 +79,14 @@ public sealed class MxAccessSession : IDisposable
}
catch (Exception exception)
{
eventSink.Detach();
try
{
eventSink.Detach();
}
catch
{
// Preserve the creation failure while still releasing the COM object below.
}
if (mxAccessComObject is not null && Marshal.IsComObject(mxAccessComObject))
{
@@ -535,13 +542,15 @@ public sealed class MxAccessSession : IDisposable
private void DisposeCore(ICollection<MxAccessShutdownFailure>? failures)
{
Exception? detachException = null;
try
{
eventSink.Detach();
}
catch (Exception exception) when (failures is not null)
catch (Exception exception)
{
failures.Add(new MxAccessShutdownFailure(
detachException = exception;
failures?.Add(new MxAccessShutdownFailure(
"DetachEvents",
serverHandle: null,
itemHandle: null,
@@ -565,6 +574,10 @@ public sealed class MxAccessSession : IDisposable
}
disposed = true;
if (detachException is not null && failures is null)
{
throw detachException;
}
}
private void ThrowIfDisposed()
@@ -127,6 +127,16 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
return eventQueue.Drain(maxEvents);
}
public WorkerFault? DrainFault()
{
return eventQueue.DrainFault();
}
public bool CancelCommand(string correlationId)
{
return commandDispatcher?.CancelQueuedCommand(correlationId) ?? false;
}
public Task<IReadOnlyList<RegisteredServerHandle>> GetRegisteredServerHandlesAsync(
CancellationToken cancellationToken = default)
{
@@ -207,7 +217,14 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
throw new TimeoutException($"MXAccess graceful shutdown exceeded {timeout}.");
}
result = await cleanupTask.ConfigureAwait(false);
try
{
result = await cleanupTask.ConfigureAwait(false);
}
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
{
throw new TimeoutException($"MXAccess graceful shutdown exceeded {timeout}.");
}
}
TimeSpan remaining = timeout - stopwatch.Elapsed;
@@ -232,7 +249,17 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
if (session is not null)
{
staRuntime.InvokeAsync(() => session.Dispose()).GetAwaiter().GetResult();
try
{
staRuntime.InvokeAsync(() => session.Dispose())
.Wait(TimeSpan.FromSeconds(2));
}
catch (AggregateException)
{
}
catch (ObjectDisposedException)
{
}
}
staRuntime.Dispose();
@@ -8,10 +8,13 @@ namespace MxGateway.Worker.Sta;
public sealed class StaCommandDispatcher
{
public const int DefaultMaxPendingCommands = 128;
private readonly HResultConverter hresultConverter;
private readonly IStaCommandExecutor commandExecutor;
private readonly Queue<QueuedStaCommand> commandQueue = new();
private readonly StaRuntime staRuntime;
private readonly int maxPendingCommands;
private readonly object gate = new();
private bool drainActive;
private bool shutdownRequested;
@@ -28,10 +31,27 @@ public sealed class StaCommandDispatcher
StaRuntime staRuntime,
IStaCommandExecutor commandExecutor,
HResultConverter hresultConverter)
: this(staRuntime, commandExecutor, hresultConverter, DefaultMaxPendingCommands)
{
}
public StaCommandDispatcher(
StaRuntime staRuntime,
IStaCommandExecutor commandExecutor,
HResultConverter hresultConverter,
int maxPendingCommands)
{
if (maxPendingCommands <= 0)
{
throw new ArgumentOutOfRangeException(
nameof(maxPendingCommands),
"Max pending STA commands must be greater than zero.");
}
this.staRuntime = staRuntime ?? throw new ArgumentNullException(nameof(staRuntime));
this.commandExecutor = commandExecutor ?? throw new ArgumentNullException(nameof(commandExecutor));
this.hresultConverter = hresultConverter ?? throw new ArgumentNullException(nameof(hresultConverter));
this.maxPendingCommands = maxPendingCommands;
}
public int PendingCommandCount
@@ -73,6 +93,14 @@ public sealed class StaCommandDispatcher
"The STA command dispatcher is shutting down."));
}
if (commandQueue.Count >= maxPendingCommands)
{
return Task.FromResult(CreateRejectedReply(
command,
ProtocolStatusCode.WorkerUnavailable,
$"The STA command dispatcher already has {maxPendingCommands} pending command(s)."));
}
QueuedStaCommand queuedCommand = new(command);
commandQueue.Enqueue(queuedCommand);
@@ -86,6 +114,51 @@ public sealed class StaCommandDispatcher
}
}
public bool CancelQueuedCommand(string correlationId)
{
if (string.IsNullOrWhiteSpace(correlationId))
{
return false;
}
lock (gate)
{
if (commandQueue.Count == 0)
{
return false;
}
bool canceled = false;
Queue<QueuedStaCommand> retainedCommands = new(commandQueue.Count);
while (commandQueue.Count > 0)
{
QueuedStaCommand queuedCommand = commandQueue.Dequeue();
if (!canceled
&& string.Equals(
queuedCommand.Command.CorrelationId,
correlationId,
StringComparison.Ordinal))
{
queuedCommand.Complete(CreateRejectedReply(
queuedCommand.Command,
ProtocolStatusCode.Canceled,
"The STA command was canceled before execution."));
canceled = true;
continue;
}
retainedCommands.Enqueue(queuedCommand);
}
while (retainedCommands.Count > 0)
{
commandQueue.Enqueue(retainedCommands.Dequeue());
}
return canceled;
}
}
public void RequestShutdown()
{
lock (gate)