Improve gateway reliability and dashboard docs

This commit is contained in:
Joseph Doherty
2026-04-28 00:13:22 -04:00
parent bd4a09a35e
commit 4fc355b357
61 changed files with 1722 additions and 150 deletions
@@ -42,8 +42,15 @@ public sealed class DashboardSnapshotServiceTests
DateTimeOffset.Parse("2026-04-26T10:01:00Z"));
faultedSession.AttachWorkerClient(new FakeWorkerClient("session-faulted", 1202, WorkerClientState.Faulted));
faultedSession.MarkFaulted("worker pipe disconnected");
GatewaySession closedSession = CreateSession(
"session-closed",
"client-three",
DateTimeOffset.Parse("2026-04-26T09:59:00Z"));
closedSession.AttachWorkerClient(new FakeWorkerClient("session-closed", 1203, WorkerClientState.Closed));
closedSession.TransitionTo(SessionState.Closed);
registry.TryAdd(activeSession);
registry.TryAdd(faultedSession);
registry.TryAdd(closedSession);
using GatewayMetrics metrics = new();
metrics.SessionOpened();
metrics.SessionOpened();
@@ -55,10 +62,15 @@ public sealed class DashboardSnapshotServiceTests
DashboardSnapshot snapshot = service.GetSnapshot();
Assert.Equal(2, snapshot.Sessions.Count);
Assert.Equal(3, snapshot.Sessions.Count);
Assert.Equal("session-faulted", snapshot.Sessions[0].SessionId);
Assert.Equal(SessionState.Faulted, snapshot.Sessions[0].State);
DashboardSessionSummary activeSummary = Assert.Single(
snapshot.Sessions,
session => session.SessionId == "session-active");
Assert.Equal(1, activeSummary.EventsReceived);
Assert.Equal(2, snapshot.Workers.Count);
Assert.DoesNotContain(snapshot.Workers, worker => worker.SessionId == "session-closed");
Assert.Contains(snapshot.Metrics, metric => metric.Name == "mxgateway.commands.started" && metric.Value == 1);
Assert.Contains(
snapshot.Metrics,
@@ -32,6 +32,35 @@ public sealed class SessionManagerTests
Assert.Equal(1, metrics.GetSnapshot().SessionsOpened);
}
[Fact]
public async Task OpenSessionAsync_GeneratesClientCorrelationIdFromClientNameAndSessionId()
{
SessionOpenRequest request = CreateOpenRequest() with
{
ClientSessionName = "rust-load-client",
ClientCorrelationId = "caller-provided-correlation",
};
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(new FakeWorkerClient()));
GatewaySession session = await manager.OpenSessionAsync(request, "client-1", CancellationToken.None);
Assert.Equal($"rust-load-client-{session.SessionId}", session.ClientCorrelationId);
}
[Fact]
public async Task OpenSessionAsync_WhenClientSessionNameMissing_UsesClientCorrelationPrefix()
{
SessionOpenRequest request = CreateOpenRequest() with
{
ClientSessionName = "",
};
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(new FakeWorkerClient()));
GatewaySession session = await manager.OpenSessionAsync(request, "client-1", CancellationToken.None);
Assert.Equal($"client-{session.SessionId}", session.ClientCorrelationId);
}
[Fact]
public async Task InvokeAsync_WhenSessionReady_ForwardsCommandToWorker()
{
@@ -111,7 +140,7 @@ public sealed class SessionManagerTests
}
[Fact]
public async Task CloseSessionAsync_WhenCalledTwice_IsIdempotent()
public async Task CloseSessionAsync_RemovesClosedSession()
{
FakeWorkerClient workerClient = new();
using GatewayMetrics metrics = new();
@@ -119,12 +148,12 @@ public sealed class SessionManagerTests
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
SessionCloseResult firstClose = await manager.CloseSessionAsync(session.SessionId, CancellationToken.None);
SessionCloseResult secondClose = await manager.CloseSessionAsync(session.SessionId, CancellationToken.None);
SessionManagerException secondClose = await Assert.ThrowsAsync<SessionManagerException>(
async () => await manager.CloseSessionAsync(session.SessionId, CancellationToken.None));
Assert.False(firstClose.AlreadyClosed);
Assert.True(secondClose.AlreadyClosed);
Assert.Equal(SessionState.Closed, firstClose.FinalState);
Assert.Equal(SessionState.Closed, secondClose.FinalState);
Assert.Equal(SessionManagerErrorCode.SessionNotFound, secondClose.ErrorCode);
Assert.Equal(1, workerClient.ShutdownCount);
Assert.Equal(1, metrics.GetSnapshot().SessionsClosed);
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
@@ -2,6 +2,7 @@ using System.IO.Pipes;
using Google.Protobuf.WellKnownTypes;
using MxGateway.Contracts;
using MxGateway.Contracts.Proto;
using MxGateway.Server.Metrics;
using MxGateway.Server.Workers;
namespace MxGateway.Tests.Gateway.Workers;
@@ -152,6 +153,27 @@ public sealed class WorkerClientTests
Assert.Equal(WorkerClientState.Faulted, client.State);
}
[Fact]
public async Task ReadLoop_WhenPipeDisconnects_StopsRunningWorkerMetric()
{
await using PipePair pipePair = await PipePair.CreateAsync();
using GatewayMetrics metrics = new();
await using WorkerClient client = CreateClient(pipePair, metrics: metrics);
await CompleteHandshakeAsync(client, pipePair);
Assert.Equal(1, metrics.GetSnapshot().WorkersRunning);
await pipePair.DisposeWorkerSideAsync();
await WaitUntilAsync(
() => client.State == WorkerClientState.Faulted,
TestTimeout);
GatewayMetricsSnapshot snapshot = metrics.GetSnapshot();
Assert.Equal(0, snapshot.WorkersRunning);
Assert.Equal(1, snapshot.WorkerExits);
}
[Fact]
public async Task ReadLoop_WhenHeartbeatArrives_UpdatesLastHeartbeatAndWorkerProcess()
{
@@ -193,7 +215,8 @@ public sealed class WorkerClientTests
private static WorkerClient CreateClient(
PipePair pipePair,
WorkerClientOptions? options = null)
WorkerClientOptions? options = null,
GatewayMetrics? metrics = null)
{
WorkerFrameProtocolOptions frameOptions = new(SessionId);
WorkerClientConnection connection = new(
@@ -202,7 +225,7 @@ public sealed class WorkerClientTests
pipePair.GatewayStream,
frameOptions);
return new WorkerClient(connection, options);
return new WorkerClient(connection, options, metrics);
}
private static async Task CompleteHandshakeAsync(
@@ -43,7 +43,7 @@ public sealed class WorkerProcessLauncherTests
Assert.DoesNotContain(Nonce, handle.CommandLine.ToString(), StringComparison.Ordinal);
Assert.DoesNotContain(Nonce, string.Join(" ", handle.CommandLine.Arguments), StringComparison.Ordinal);
Assert.False(pipeReservation.DisposeCalled);
Assert.Equal(1, metrics.GetSnapshot().WorkersRunning);
Assert.Equal(0, metrics.GetSnapshot().WorkersRunning);
}
[Fact]
@@ -17,7 +17,8 @@ public sealed class GatewayMetricsTests
metrics.CommandFailed("WriteSecured", "AuthorizationFailed", TimeSpan.FromMilliseconds(12));
metrics.EventReceived("session-1", "OnDataChange");
metrics.EventReceived("session-1", "OnDataChange");
metrics.SetEventQueueDepth(7);
metrics.SetWorkerEventQueueDepth(7);
metrics.SetGrpcEventStreamQueueDepth(3);
metrics.QueueOverflow("session-events");
metrics.Fault("CommandTimeout");
metrics.WorkerKilled("CommandTimeout");
@@ -30,7 +31,8 @@ public sealed class GatewayMetricsTests
Assert.Equal(0, snapshot.OpenSessions);
Assert.Equal(0, snapshot.WorkersRunning);
Assert.Equal(7, snapshot.EventQueueDepth);
Assert.Equal(7, snapshot.WorkerEventQueueDepth);
Assert.Equal(3, snapshot.GrpcEventStreamQueueDepth);
Assert.Equal(1, snapshot.SessionsOpened);
Assert.Equal(1, snapshot.SessionsClosed);
Assert.Equal(2, snapshot.CommandsStarted);
@@ -45,6 +47,7 @@ public sealed class GatewayMetricsTests
Assert.Equal(1, snapshot.StreamDisconnects);
Assert.Equal(1, snapshot.CommandFailuresByMethod["WriteSecured"]);
Assert.Equal(2, snapshot.EventsByFamily["OnDataChange"]);
Assert.Equal(2, snapshot.EventsBySession["session-1"]);
}
[Fact]
@@ -53,7 +56,7 @@ public sealed class GatewayMetricsTests
using GatewayMetrics metrics = new();
ArgumentOutOfRangeException exception = Assert.Throws<ArgumentOutOfRangeException>(
() => metrics.SetEventQueueDepth(-1));
() => metrics.SetWorkerEventQueueDepth(-1));
Assert.Equal("depth", exception.ParamName);
}