Improve gateway reliability and client e2e coverage
This commit is contained in:
@@ -15,6 +15,7 @@ namespace MxGateway.Worker.Ipc;
|
||||
public sealed class WorkerPipeSession
|
||||
{
|
||||
private static readonly TimeSpan EventDrainInterval = TimeSpan.FromMilliseconds(25);
|
||||
private static readonly TimeSpan BackgroundTaskStopTimeout = TimeSpan.FromSeconds(1);
|
||||
private const uint EventDrainBatchSize = 128;
|
||||
|
||||
private readonly WorkerFrameProtocolOptions _options;
|
||||
@@ -24,9 +25,12 @@ public sealed class WorkerPipeSession
|
||||
private readonly IWorkerLogger? _logger;
|
||||
private readonly WorkerFrameReader _reader;
|
||||
private readonly WorkerFrameWriter _writer;
|
||||
private readonly object _commandTaskGate = new();
|
||||
private readonly HashSet<Task> _activeCommandTasks = new();
|
||||
private IWorkerRuntimeSession? _runtimeSession;
|
||||
private long _nextSequence;
|
||||
private WorkerState _state = WorkerState.Starting;
|
||||
private bool _acceptingCommands = true;
|
||||
private bool _watchdogFaultSent;
|
||||
private bool _shutdownTimedOut;
|
||||
|
||||
@@ -206,18 +210,31 @@ public sealed class WorkerPipeSession
|
||||
|
||||
private async Task RunMessageLoopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
using CancellationTokenSource loopCancellation = CancellationTokenSource
|
||||
.CreateLinkedTokenSource(cancellationToken);
|
||||
using CancellationTokenSource heartbeatCancellation = CancellationTokenSource
|
||||
.CreateLinkedTokenSource(cancellationToken);
|
||||
Task heartbeatTask = RunHeartbeatLoopAsync(heartbeatCancellation.Token);
|
||||
Task eventDrainTask = RunEventDrainLoopAsync(heartbeatCancellation.Token);
|
||||
Task<WorkerEnvelope> readTask = _reader.ReadAsync(loopCancellation.Token);
|
||||
|
||||
try
|
||||
{
|
||||
while (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
Task<WorkerEnvelope> readTask = _reader.ReadAsync(cancellationToken);
|
||||
Task completedTask = await Task.WhenAny(readTask, heartbeatTask, eventDrainTask).ConfigureAwait(false);
|
||||
if (completedTask == heartbeatTask)
|
||||
if (completedTask == readTask)
|
||||
{
|
||||
WorkerEnvelope envelope = await readTask.ConfigureAwait(false);
|
||||
bool keepReading = await DispatchGatewayEnvelopeAsync(envelope, cancellationToken).ConfigureAwait(false);
|
||||
if (!keepReading)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
readTask = _reader.ReadAsync(loopCancellation.Token);
|
||||
}
|
||||
else if (completedTask == heartbeatTask)
|
||||
{
|
||||
await heartbeatTask.ConfigureAwait(false);
|
||||
}
|
||||
@@ -225,33 +242,52 @@ public sealed class WorkerPipeSession
|
||||
{
|
||||
await eventDrainTask.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
WorkerEnvelope envelope = await readTask.ConfigureAwait(false);
|
||||
bool keepReading = await DispatchGatewayEnvelopeAsync(envelope, cancellationToken).ConfigureAwait(false);
|
||||
if (!keepReading)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
loopCancellation.Cancel();
|
||||
heartbeatCancellation.Cancel();
|
||||
try
|
||||
{
|
||||
await heartbeatTask.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
}
|
||||
await ObserveBackgroundTaskStopAsync(heartbeatTask, "Heartbeat").ConfigureAwait(false);
|
||||
await ObserveBackgroundTaskStopAsync(eventDrainTask, "EventDrain").ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await eventDrainTask.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
}
|
||||
private async Task ObserveBackgroundTaskStopAsync(
|
||||
Task task,
|
||||
string taskName)
|
||||
{
|
||||
Task completedTask = await Task
|
||||
.WhenAny(task, Task.Delay(BackgroundTaskStopTimeout))
|
||||
.ConfigureAwait(false);
|
||||
if (completedTask != task)
|
||||
{
|
||||
_logger?.Error(
|
||||
"WorkerPipeSessionBackgroundTaskStopTimedOut",
|
||||
new Dictionary<string, object?>
|
||||
{
|
||||
["task"] = taskName,
|
||||
["timeout_ms"] = BackgroundTaskStopTimeout.TotalMilliseconds,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await task.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger?.Error(
|
||||
"WorkerPipeSessionBackgroundTaskStopFailed",
|
||||
new Dictionary<string, object?>
|
||||
{
|
||||
["task"] = taskName,
|
||||
["exception"] = ex.ToString(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -300,7 +336,7 @@ public sealed class WorkerPipeSession
|
||||
switch (envelope.BodyCase)
|
||||
{
|
||||
case WorkerEnvelope.BodyOneofCase.WorkerCommand:
|
||||
_ = ProcessCommandAsync(envelope, cancellationToken);
|
||||
TryStartCommandTask(envelope, cancellationToken);
|
||||
return true;
|
||||
case WorkerEnvelope.BodyOneofCase.WorkerShutdown:
|
||||
await ShutdownAsync(envelope.WorkerShutdown, cancellationToken).ConfigureAwait(false);
|
||||
@@ -333,6 +369,11 @@ public sealed class WorkerPipeSession
|
||||
try
|
||||
{
|
||||
MxCommandReply reply = await runtimeSession.DispatchAsync(staCommand).ConfigureAwait(false);
|
||||
if (_state is not WorkerState.Ready and not WorkerState.ExecutingCommand)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await _writer
|
||||
.WriteAsync(
|
||||
CreateEnvelope(new WorkerCommandReply
|
||||
@@ -370,11 +411,13 @@ public sealed class WorkerPipeSession
|
||||
}
|
||||
|
||||
TimeSpan gracePeriod = ResolveGracePeriod(shutdown);
|
||||
StopAcceptingCommands();
|
||||
try
|
||||
{
|
||||
MxAccessShutdownResult result = await runtimeSession
|
||||
.ShutdownGracefullyAsync(gracePeriod, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await WaitForActiveCommandTasksAsync(gracePeriod, cancellationToken).ConfigureAwait(false);
|
||||
LogShutdownFailures(result.Failures);
|
||||
await WriteShutdownAckAsync(CreateShutdownAck(result, shutdown), cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
@@ -387,6 +430,79 @@ public sealed class WorkerPipeSession
|
||||
}
|
||||
}
|
||||
|
||||
private void TryStartCommandTask(
|
||||
WorkerEnvelope envelope,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
Task commandTask;
|
||||
lock (_commandTaskGate)
|
||||
{
|
||||
if (!_acceptingCommands)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
commandTask = ProcessCommandAsync(envelope, cancellationToken);
|
||||
_activeCommandTasks.Add(commandTask);
|
||||
}
|
||||
|
||||
_ = ObserveCommandTaskAsync(commandTask);
|
||||
}
|
||||
|
||||
private async Task ObserveCommandTaskAsync(Task commandTask)
|
||||
{
|
||||
try
|
||||
{
|
||||
await commandTask.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
}
|
||||
finally
|
||||
{
|
||||
lock (_commandTaskGate)
|
||||
{
|
||||
_activeCommandTasks.Remove(commandTask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void StopAcceptingCommands()
|
||||
{
|
||||
lock (_commandTaskGate)
|
||||
{
|
||||
_acceptingCommands = false;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task WaitForActiveCommandTasksAsync(
|
||||
TimeSpan timeout,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
Task[] activeTasks;
|
||||
lock (_commandTaskGate)
|
||||
{
|
||||
activeTasks = new List<Task>(_activeCommandTasks).ToArray();
|
||||
}
|
||||
|
||||
if (activeTasks.Length == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Task activeCommandsTask = Task.WhenAll(activeTasks);
|
||||
Task timeoutTask = Task.Delay(timeout, cancellationToken);
|
||||
Task completedTask = await Task.WhenAny(activeCommandsTask, timeoutTask).ConfigureAwait(false);
|
||||
if (completedTask == activeCommandsTask)
|
||||
{
|
||||
await activeCommandsTask.ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
throw new TimeoutException($"Worker command tasks did not stop within {timeout}.");
|
||||
}
|
||||
|
||||
private Task WriteShutdownAckAsync(
|
||||
WorkerShutdownAck shutdownAck,
|
||||
CancellationToken cancellationToken)
|
||||
|
||||
@@ -80,7 +80,7 @@ public sealed class MxAccessEventQueue
|
||||
}
|
||||
}
|
||||
|
||||
public WorkerEvent Enqueue(MxEvent mxEvent)
|
||||
public void Enqueue(MxEvent mxEvent)
|
||||
{
|
||||
if (mxEvent is null)
|
||||
{
|
||||
@@ -109,8 +109,6 @@ public sealed class MxAccessEventQueue
|
||||
Event = queuedEvent,
|
||||
};
|
||||
events.Enqueue(workerEvent);
|
||||
|
||||
return workerEvent.Clone();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,7 +122,7 @@ public sealed class MxAccessEventQueue
|
||||
return false;
|
||||
}
|
||||
|
||||
workerEvent = events.Dequeue().Clone();
|
||||
workerEvent = events.Dequeue();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -144,7 +142,7 @@ public sealed class MxAccessEventQueue
|
||||
List<WorkerEvent> drained = new(drainCount);
|
||||
for (int index = 0; index < drainCount; index++)
|
||||
{
|
||||
drained.Add(events.Dequeue().Clone());
|
||||
drained.Add(events.Dequeue());
|
||||
}
|
||||
|
||||
return drained;
|
||||
|
||||
Reference in New Issue
Block a user