Files
mxaccessgw/src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/SessionManagerTests.cs
T
Joseph Doherty 4d77279e7e Resolve Server-044..050: KillWorker accounting + admin service hardening
Server-044  KillWorkerAsync catch path now calls _metrics.SessionRemoved
            so the open-session gauge does not leak when KillWorker throws.
Server-045  KillWorkerAsync routes through a new
            GatewaySession.KillWorkerWithCloseGateAsync that takes the
            per-session close lock, so concurrent kills count SessionsClosed
            exactly once.
Server-046  CloseSessionCoreAsync's SessionCloseStartedException branch and
            ShutdownAsync's kill fallback both increment SessionsClosed (not
            just the gauge), so the counter and gauge stay consistent.
Server-047  ApiKeysPage.ConfirmPendingAsync holds PendingAction across the
            awaited action and clears it in finally, matching the sessions
            pages.
Server-048  Closed: the 044/045 regression tests cover the previously-
            untested kill paths.
Server-049  IDashboardSessionAdminService + DashboardSessionAdminService
            now carry XML docs that pin the Admin gate, missing-session
            return-Fail semantics, and the dashboard-admin-kill reason.
Server-050  CloseSessionAsync and KillWorkerAsync catch unexpected
            exceptions after the SessionManagerException catches and return
            a friendly Fail; OperationCanceledException tied to the caller
            token still propagates.

All resolved at 2026-05-24; 503/503 gateway tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 08:49:34 -04:00

945 lines
40 KiB
C#

using Google.Protobuf.WellKnownTypes;
using Microsoft.Extensions.Options;
using ZB.MOM.WW.MxGateway.Contracts.Proto;
using ZB.MOM.WW.MxGateway.Server.Configuration;
using ZB.MOM.WW.MxGateway.Server.Metrics;
using ZB.MOM.WW.MxGateway.Server.Sessions;
using ZB.MOM.WW.MxGateway.Server.Workers;
using ZB.MOM.WW.MxGateway.Tests.TestSupport;
namespace ZB.MOM.WW.MxGateway.Tests.Gateway.Sessions;
public sealed class SessionManagerTests
{
/// <summary>Verifies that opening a session with a ready worker registers the session in ready state.</summary>
[Fact]
public async Task OpenSessionAsync_WithWorkerReady_RegistersReadySession()
{
FakeWorkerClient workerClient = new();
FakeSessionWorkerClientFactory factory = new(workerClient)
{
ApplyLifecycleTransitions = true,
};
using GatewayMetrics metrics = new();
SessionManager manager = CreateManager(factory, metrics: metrics);
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
Assert.True(manager.TryGetSession(session.SessionId, out GatewaySession? registered));
Assert.Same(session, registered);
Assert.Equal(SessionState.Ready, session.State);
Assert.Equal("client-1", session.ClientIdentity);
Assert.Equal(["StartingWorker", "WaitingForPipe", "Handshaking", "InitializingWorker"], factory.ObservedStates);
Assert.Equal(1, metrics.GetSnapshot().OpenSessions);
Assert.Equal(1, metrics.GetSnapshot().SessionsOpened);
}
/// <summary>Verifies that opening a session sets the initial lease expiry from the configured default lease.</summary>
[Fact]
public async Task OpenSessionAsync_SetsInitialDefaultLease()
{
ManualTimeProvider clock = new(DateTimeOffset.Parse("2026-04-29T10:00:00Z", System.Globalization.CultureInfo.InvariantCulture));
GatewayOptions options = CreateOptions(defaultLeaseSeconds: 1800);
SessionManager manager = CreateManager(
new FakeSessionWorkerClientFactory(new FakeWorkerClient()),
options: options,
timeProvider: clock);
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
Assert.Equal(clock.GetUtcNow() + TimeSpan.FromMinutes(30), session.LeaseExpiresAt);
}
[Fact]
public async Task OpenSessionAsync_GeneratesClientCorrelationIdFromClientNameAndSessionId()
{
SessionOpenRequest request = CreateOpenRequest() with
{
ClientSessionName = "rust-load-client",
ClientCorrelationId = "caller-provided-correlation",
};
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(new FakeWorkerClient()));
GatewaySession session = await manager.OpenSessionAsync(request, "client-1", CancellationToken.None);
Assert.Equal($"rust-load-client-{session.SessionId}", session.ClientCorrelationId);
}
/// <summary>Verifies that opening a session without a client session name uses the client correlation prefix.</summary>
[Fact]
public async Task OpenSessionAsync_WhenClientSessionNameMissing_UsesClientCorrelationPrefix()
{
SessionOpenRequest request = CreateOpenRequest() with
{
ClientSessionName = "",
};
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(new FakeWorkerClient()));
GatewaySession session = await manager.OpenSessionAsync(request, "client-1", CancellationToken.None);
Assert.Equal($"client-{session.SessionId}", session.ClientCorrelationId);
}
/// <summary>Verifies that invoking a command on a ready session forwards the command to the worker.</summary>
[Fact]
public async Task InvokeAsync_WhenSessionReady_ForwardsCommandToWorker()
{
FakeWorkerClient workerClient = new();
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(workerClient));
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
WorkerCommandReply reply = await manager.InvokeAsync(
session.SessionId,
CreateCommand(MxCommandKind.Ping),
CancellationToken.None);
Assert.Equal(1, workerClient.InvokeCount);
Assert.Equal(MxCommandKind.Ping, reply.Reply.Kind);
}
/// <summary>Verifies that invoking a command on a ready session refreshes its lease expiry.</summary>
[Fact]
public async Task InvokeAsync_WhenSessionReady_RefreshesLease()
{
GatewaySession session = new(
"session-lease-refresh",
"mxaccess",
"mxaccess-gateway-1-session-lease-refresh",
"nonce",
"client-1",
"test-session",
"client-correlation-1",
TimeSpan.FromSeconds(30),
TimeSpan.FromSeconds(5),
TimeSpan.FromSeconds(5),
TimeSpan.FromMinutes(30),
DateTimeOffset.UtcNow - TimeSpan.FromHours(1));
session.AttachWorkerClient(new FakeWorkerClient());
session.MarkReady();
DateTimeOffset? initialLease = session.LeaseExpiresAt;
await session.InvokeAsync(CreateCommand(MxCommandKind.Ping), CancellationToken.None);
Assert.True(session.LeaseExpiresAt > initialLease);
Assert.True(session.LeaseExpiresAt > DateTimeOffset.UtcNow);
}
[Fact]
public async Task GatewaySessionSubscribeBulkAsync_ForwardsOneBulkCommandAndReturnsResults()
{
FakeWorkerClient workerClient = new()
{
InvokeReply = new WorkerCommandReply
{
Reply = new MxCommandReply
{
SessionId = "session-1",
CorrelationId = "correlation-1",
Kind = MxCommandKind.SubscribeBulk,
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
SubscribeBulk = new BulkSubscribeReply
{
Results =
{
new SubscribeResult
{
ServerHandle = 12,
TagAddress = "Galaxy.Tag.Value",
ItemHandle = 512,
WasSuccessful = true,
},
},
},
},
},
};
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(workerClient));
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
IReadOnlyList<SubscribeResult> results = await session.SubscribeBulkAsync(
12,
["Galaxy.Tag.Value"],
CancellationToken.None);
SubscribeResult result = Assert.Single(results);
Assert.Equal(512, result.ItemHandle);
Assert.Equal(1, workerClient.InvokeCount);
Assert.Equal(MxCommandKind.SubscribeBulk, workerClient.LastCommand?.Command.Kind);
Assert.Equal(["Galaxy.Tag.Value"], workerClient.LastCommand?.Command.SubscribeBulk.TagAddresses);
}
[Fact]
public async Task GatewaySessionWriteBulkAsync_ForwardsOneBulkCommandAndReturnsResults()
{
FakeWorkerClient workerClient = new()
{
InvokeReply = new WorkerCommandReply
{
Reply = new MxCommandReply
{
SessionId = "session-1",
CorrelationId = "correlation-1",
Kind = MxCommandKind.WriteBulk,
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
WriteBulk = new BulkWriteReply
{
Results =
{
new BulkWriteResult
{
ServerHandle = 12,
ItemHandle = 901,
WasSuccessful = true,
},
new BulkWriteResult
{
ServerHandle = 12,
ItemHandle = 902,
WasSuccessful = false,
ErrorMessage = "MXAccess invalid handle",
},
},
},
},
},
};
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(workerClient));
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
IReadOnlyList<BulkWriteResult> results = await session.WriteBulkAsync(
12,
new[]
{
new WriteBulkEntry
{
ItemHandle = 901,
UserId = 5,
Value = new MxValue { DataType = MxDataType.Integer, Int32Value = 11 },
},
new WriteBulkEntry
{
ItemHandle = 902,
UserId = 5,
Value = new MxValue { DataType = MxDataType.Integer, Int32Value = 22 },
},
},
CancellationToken.None);
Assert.Equal(2, results.Count);
Assert.True(results[0].WasSuccessful);
Assert.False(results[1].WasSuccessful);
Assert.Equal(MxCommandKind.WriteBulk, workerClient.LastCommand?.Command.Kind);
Assert.Equal(2, workerClient.LastCommand?.Command.WriteBulk.Entries.Count);
}
[Fact]
public async Task GatewaySessionReadBulkAsync_ForwardsOneBulkCommandAndReturnsResults()
{
FakeWorkerClient workerClient = new()
{
InvokeReply = new WorkerCommandReply
{
Reply = new MxCommandReply
{
SessionId = "session-1",
CorrelationId = "correlation-1",
Kind = MxCommandKind.ReadBulk,
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
ReadBulk = new BulkReadReply
{
Results =
{
new BulkReadResult
{
ServerHandle = 12,
TagAddress = "Galaxy.Tag.Value",
ItemHandle = 512,
WasSuccessful = true,
WasCached = true,
Value = new MxValue { DataType = MxDataType.Integer, Int32Value = 42 },
},
},
},
},
},
};
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(workerClient));
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
IReadOnlyList<BulkReadResult> results = await session.ReadBulkAsync(
12,
["Galaxy.Tag.Value"],
TimeSpan.FromMilliseconds(500),
CancellationToken.None);
BulkReadResult result = Assert.Single(results);
Assert.True(result.WasSuccessful);
Assert.True(result.WasCached);
Assert.Equal(42, result.Value.Int32Value);
Assert.Equal(MxCommandKind.ReadBulk, workerClient.LastCommand?.Command.Kind);
Assert.Equal(["Galaxy.Tag.Value"], workerClient.LastCommand?.Command.ReadBulk.TagAddresses);
Assert.Equal(500u, workerClient.LastCommand?.Command.ReadBulk.TimeoutMs);
}
/// <summary>Verifies that invoking a command on a faulted session rejects the command.</summary>
[Fact]
public async Task InvokeAsync_WhenSessionFaulted_RejectsCommand()
{
FakeWorkerClient workerClient = new();
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(workerClient));
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
session.MarkFaulted("test fault");
SessionManagerException exception = await Assert.ThrowsAsync<SessionManagerException>(
async () => await manager.InvokeAsync(
session.SessionId,
CreateCommand(MxCommandKind.Ping),
CancellationToken.None));
Assert.Equal(SessionManagerErrorCode.SessionNotReady, exception.ErrorCode);
Assert.Equal(0, workerClient.InvokeCount);
}
/// <summary>
/// Server-030 regression: when the gateway-side <c>SessionState</c> is
/// <c>Ready</c> but the worker client's own state is not, the diagnostic
/// must surface both states so the mismatch is actionable instead of
/// producing a self-contradictory "Session ... is not ready. Current
/// state is Ready." message.
/// </summary>
[Fact]
public async Task InvokeAsync_WhenWorkerNotReadyButSessionReady_DiagnosticIncludesBothStates()
{
FakeWorkerClient workerClient = new();
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(workerClient));
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
// Force a state mismatch: session stays Ready, worker transitions out.
workerClient.State = WorkerClientState.Handshaking;
Assert.Equal(SessionState.Ready, session.State);
SessionManagerException exception = await Assert.ThrowsAsync<SessionManagerException>(
async () => await manager.InvokeAsync(
session.SessionId,
CreateCommand(MxCommandKind.Ping),
CancellationToken.None));
Assert.Equal(SessionManagerErrorCode.SessionNotReady, exception.ErrorCode);
Assert.Contains("Session state is Ready", exception.Message);
Assert.Contains("worker state is Handshaking", exception.Message);
Assert.Equal(0, workerClient.InvokeCount);
}
/// <summary>Verifies that closing a session removes it from the registry.</summary>
[Fact]
public async Task CloseSessionAsync_RemovesClosedSession()
{
FakeWorkerClient workerClient = new();
using GatewayMetrics metrics = new();
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(workerClient), metrics: metrics);
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
SessionCloseResult firstClose = await manager.CloseSessionAsync(session.SessionId, CancellationToken.None);
SessionManagerException secondClose = await Assert.ThrowsAsync<SessionManagerException>(
async () => await manager.CloseSessionAsync(session.SessionId, CancellationToken.None));
Assert.False(firstClose.AlreadyClosed);
Assert.Equal(SessionState.Closed, firstClose.FinalState);
Assert.Equal(SessionManagerErrorCode.SessionNotFound, secondClose.ErrorCode);
Assert.Equal(1, workerClient.ShutdownCount);
Assert.Equal(1, metrics.GetSnapshot().SessionsClosed);
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
}
/// <summary>Verifies that closing a session kills the worker when shutdown fails.</summary>
[Fact]
public async Task CloseSessionAsync_WhenWorkerShutdownFails_KillsWorker()
{
FakeWorkerClient workerClient = new()
{
ShutdownException = new WorkerClientException(
WorkerClientErrorCode.ShutdownTimeout,
"Worker shutdown timed out."),
};
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(workerClient));
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
SessionManagerException exception = await Assert.ThrowsAsync<SessionManagerException>(
async () => await manager.CloseSessionAsync(session.SessionId, CancellationToken.None));
Assert.Equal(SessionManagerErrorCode.CloseFailed, exception.ErrorCode);
Assert.Equal(1, workerClient.ShutdownCount);
Assert.Equal(1, workerClient.KillCount);
}
/// <summary>Verifies that when worker shutdown fails, the session is removed and the slot is released.</summary>
[Fact]
public async Task CloseSessionAsync_WhenWorkerShutdownFails_RemovesSessionAndReleasesSlot()
{
FakeWorkerClient failingWorkerClient = new()
{
ShutdownException = new WorkerClientException(
WorkerClientErrorCode.ShutdownTimeout,
"Worker shutdown timed out."),
};
FakeWorkerClient replacementWorkerClient = new();
SessionRegistry registry = new();
using GatewayMetrics metrics = new();
SessionManager manager = CreateManager(
new QueueingSessionWorkerClientFactory(failingWorkerClient, replacementWorkerClient),
registry,
metrics,
CreateOptions(maxSessions: 1));
GatewaySession firstSession = await manager.OpenSessionAsync(
CreateOpenRequest(),
"client-1",
CancellationToken.None);
metrics.EventReceived(firstSession.SessionId, MxEventFamily.OnDataChange.ToString());
SessionManagerException exception = await Assert.ThrowsAsync<SessionManagerException>(
async () => await manager.CloseSessionAsync(firstSession.SessionId, CancellationToken.None));
GatewaySession secondSession = await manager.OpenSessionAsync(
CreateOpenRequest(),
"client-2",
CancellationToken.None);
Assert.Equal(SessionManagerErrorCode.CloseFailed, exception.ErrorCode);
Assert.False(manager.TryGetSession(firstSession.SessionId, out _));
Assert.True(manager.TryGetSession(secondSession.SessionId, out _));
Assert.Equal(1, registry.Count);
Assert.Equal(1, failingWorkerClient.KillCount);
Assert.Equal(1, failingWorkerClient.DisposeCount);
GatewayMetricsSnapshot snapshot = metrics.GetSnapshot();
// Server-046: a close-that-failed now accounts as SessionClosed (counter += 1) rather
// than SessionRemoved (gauge -= 1, counter unchanged). The session is being removed
// from the registry on this path, so it must show up in the closed count.
Assert.Equal(1, snapshot.SessionsClosed);
Assert.False(snapshot.EventsBySession.ContainsKey(firstSession.SessionId));
Assert.Equal(1, snapshot.OpenSessions);
}
/// <summary>Verifies that when the second close is canceled, the session is not removed if owned by the first close.</summary>
[Fact]
public async Task CloseSessionAsync_WhenSecondCloseIsCanceled_DoesNotRemoveSessionOwnedByFirstClose()
{
FakeWorkerClient workerClient = new()
{
BlockShutdown = true,
};
SessionRegistry registry = new();
using GatewayMetrics metrics = new();
SessionManager manager = CreateManager(
new FakeSessionWorkerClientFactory(workerClient),
registry,
metrics,
CreateOptions(maxSessions: 1));
GatewaySession session = await manager.OpenSessionAsync(
CreateOpenRequest(),
"client-1",
CancellationToken.None);
Task<SessionCloseResult> firstClose = manager.CloseSessionAsync(session.SessionId, CancellationToken.None);
await workerClient.WaitForShutdownStartAsync();
using CancellationTokenSource secondCloseCancellation = new();
Task<SessionCloseResult> secondClose = manager.CloseSessionAsync(
session.SessionId,
secondCloseCancellation.Token);
await secondCloseCancellation.CancelAsync();
await Assert.ThrowsAnyAsync<OperationCanceledException>(
async () => await secondClose);
Assert.True(manager.TryGetSession(session.SessionId, out _));
Assert.Equal(1, registry.Count);
Assert.Equal(0, workerClient.DisposeCount);
Assert.Equal(0, metrics.GetSnapshot().SessionsClosed);
Assert.Equal(1, metrics.GetSnapshot().OpenSessions);
workerClient.ReleaseShutdown();
SessionCloseResult closeResult = await firstClose;
Assert.Equal(SessionState.Closed, closeResult.FinalState);
Assert.False(manager.TryGetSession(session.SessionId, out _));
Assert.Equal(0, registry.Count);
Assert.Equal(1, workerClient.DisposeCount);
Assert.Equal(1, metrics.GetSnapshot().SessionsClosed);
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
}
/// <summary>Verifies that killing a worker removes the session from the registry without calling shutdown.</summary>
[Fact]
public async Task KillWorkerAsync_KillsWorkerAndRemovesSession()
{
FakeWorkerClient workerClient = new();
using GatewayMetrics metrics = new();
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(workerClient), metrics: metrics);
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
SessionCloseResult result = await manager.KillWorkerAsync(session.SessionId, "test-kill", CancellationToken.None);
Assert.False(result.AlreadyClosed);
Assert.Equal(SessionState.Closed, result.FinalState);
Assert.Equal(1, workerClient.KillCount);
Assert.Equal(0, workerClient.ShutdownCount);
Assert.False(manager.TryGetSession(session.SessionId, out _));
Assert.Equal(1, metrics.GetSnapshot().SessionsClosed);
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
}
/// <summary>Verifies that killing the worker for an unknown session raises SessionNotFound.</summary>
[Fact]
public async Task KillWorkerAsync_WhenSessionMissing_ThrowsSessionNotFound()
{
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(new FakeWorkerClient()));
SessionManagerException exception = await Assert.ThrowsAsync<SessionManagerException>(
async () => await manager.KillWorkerAsync("session-missing", "test-kill", CancellationToken.None));
Assert.Equal(SessionManagerErrorCode.SessionNotFound, exception.ErrorCode);
}
/// <summary>
/// Regression for Server-044: when <c>session.KillWorker</c> throws, the catch path must still
/// decrement <c>mxgateway.sessions.open</c> (parity with the Server-006 fix in
/// <c>OpenSessionAsync</c>). Without the fix the gauge leaks one open session per failed kill.
/// </summary>
[Fact]
public async Task KillWorkerAsync_WhenSessionKillThrows_DecrementsOpenSessionGauge()
{
FakeWorkerClient workerClient = new()
{
KillException = new InvalidOperationException("worker kill failed"),
};
using GatewayMetrics metrics = new();
SessionManager manager = CreateManager(
new FakeSessionWorkerClientFactory(workerClient),
metrics: metrics);
GatewaySession session = await manager.OpenSessionAsync(
CreateOpenRequest(),
"client-1",
CancellationToken.None);
Assert.Equal(1, metrics.GetSnapshot().OpenSessions);
SessionManagerException exception = await Assert.ThrowsAsync<SessionManagerException>(
async () => await manager.KillWorkerAsync(session.SessionId, "test-kill", CancellationToken.None));
Assert.Equal(SessionManagerErrorCode.CloseFailed, exception.ErrorCode);
Assert.False(manager.TryGetSession(session.SessionId, out _));
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
Assert.True(metrics.GetSnapshot().Faults > 0);
}
/// <summary>
/// Regression for Server-045 / Server-048: concurrent kills on the same session must not
/// double-increment <c>mxgateway.sessions.closed</c>. The first kill wins, the second
/// observes <c>wasClosed == true</c> (or a missing session after removal) and short-circuits.
/// </summary>
[Fact]
public async Task KillWorkerAsync_ConcurrentCallsOnSameSession_CountClosedExactlyOnce()
{
FakeWorkerClient workerClient = new();
using GatewayMetrics metrics = new();
SessionManager manager = CreateManager(
new FakeSessionWorkerClientFactory(workerClient),
metrics: metrics);
GatewaySession session = await manager.OpenSessionAsync(
CreateOpenRequest(),
"client-1",
CancellationToken.None);
Task<SessionCloseResult> first = manager.KillWorkerAsync(session.SessionId, "kill-a", CancellationToken.None);
Task<SessionCloseResult> second = Task.Run(async () =>
{
try
{
return await manager.KillWorkerAsync(session.SessionId, "kill-b", CancellationToken.None);
}
catch (SessionManagerException missing) when (missing.ErrorCode == SessionManagerErrorCode.SessionNotFound)
{
return new SessionCloseResult(session.SessionId, SessionState.Closed, AlreadyClosed: true);
}
});
await Task.WhenAll(first, second);
Assert.Equal(1, metrics.GetSnapshot().SessionsClosed);
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
Assert.False(manager.TryGetSession(session.SessionId, out _));
}
/// <summary>
/// Regression for Server-046: <c>ShutdownAsync</c>'s graceful-close fallback (which calls
/// <c>KillWorker</c> + <c>RemoveSessionAsync</c> when <c>CloseSessionCoreAsync</c> throws)
/// must still account a successful close: both the open-session gauge must drop to zero AND
/// the <c>mxgateway.sessions.closed</c> counter must increment. Without the fix, the
/// graceful-close failure path under-counts the closed counter.
/// </summary>
[Fact]
public async Task ShutdownAsync_WhenSessionCloseThrows_StillDecrementsOpenSessionGaugeAndIncrementsClosedCounter()
{
FakeWorkerClient throwingClient = new()
{
ShutdownException = new InvalidOperationException("worker shutdown failed"),
};
using GatewayMetrics metrics = new();
SessionManager manager = CreateManager(
new FakeSessionWorkerClientFactory(throwingClient),
metrics: metrics);
GatewaySession session = await manager.OpenSessionAsync(
CreateOpenRequest(),
"client-1",
CancellationToken.None);
Assert.Equal(1, metrics.GetSnapshot().OpenSessions);
await manager.ShutdownAsync(CancellationToken.None);
// After shutdown, regardless of whether the graceful close path or the kill fallback ran,
// the open-session gauge must be zero and the closed counter must be incremented.
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
Assert.Equal(1, metrics.GetSnapshot().SessionsClosed);
Assert.False(manager.TryGetSession(session.SessionId, out _));
}
/// <summary>Verifies that when worker creation fails, the session is removed from the registry.</summary>
[Fact]
public async Task OpenSessionAsync_WhenWorkerCreationFails_RemovesSessionFromRegistry()
{
SessionRegistry registry = new();
using GatewayMetrics metrics = new();
SessionManager manager = CreateManager(
new FailingSessionWorkerClientFactory(),
registry,
metrics);
SessionManagerException exception = await Assert.ThrowsAsync<SessionManagerException>(
async () => await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None));
Assert.Equal(SessionManagerErrorCode.OpenFailed, exception.ErrorCode);
Assert.Equal(0, registry.Count);
Assert.Equal(0, metrics.GetSnapshot().SessionsOpened);
Assert.Equal(1, metrics.GetSnapshot().Faults);
}
/// <summary>Verifies that closing expired leases only closes expired sessions.</summary>
[Fact]
public async Task CloseExpiredLeasesAsync_ClosesExpiredSessionsOnly()
{
FakeWorkerClient expiredClient = new();
FakeWorkerClient activeClient = new();
QueueingSessionWorkerClientFactory factory = new(expiredClient, activeClient);
SessionManager manager = CreateManager(factory);
GatewaySession expiredSession = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
GatewaySession activeSession = await manager.OpenSessionAsync(CreateOpenRequest(), "client-2", CancellationToken.None);
DateTimeOffset now = DateTimeOffset.UtcNow;
expiredSession.ExtendLease(now.AddSeconds(-1));
activeSession.ExtendLease(now.AddMinutes(5));
int closedCount = await manager.CloseExpiredLeasesAsync(now, CancellationToken.None);
Assert.Equal(1, closedCount);
Assert.Equal(SessionState.Closed, expiredSession.State);
Assert.Equal(SessionState.Ready, activeSession.State);
Assert.Equal(1, expiredClient.ShutdownCount);
Assert.Equal(0, activeClient.ShutdownCount);
}
/// <summary>Verifies that an expired-lease sweep leaves a session with an active event subscriber open.</summary>
[Fact]
public async Task CloseExpiredLeasesAsync_DoesNotCloseActiveEventSubscriber()
{
FakeWorkerClient workerClient = new();
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(workerClient));
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
DateTimeOffset now = DateTimeOffset.UtcNow;
session.ExtendLease(now.AddSeconds(-1));
using IDisposable eventSubscriber = session.AttachEventSubscriber(allowMultipleSubscribers: false);
int closedCount = await manager.CloseExpiredLeasesAsync(now, CancellationToken.None);
Assert.Equal(0, closedCount);
Assert.Equal(SessionState.Ready, session.State);
Assert.Equal(0, workerClient.ShutdownCount);
}
[Fact]
public async Task ShutdownAsync_ClosesAllRegisteredSessions()
{
FakeWorkerClient firstClient = new();
FakeWorkerClient secondClient = new();
QueueingSessionWorkerClientFactory factory = new(firstClient, secondClient);
using GatewayMetrics metrics = new();
SessionManager manager = CreateManager(factory, metrics: metrics);
GatewaySession firstSession = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
GatewaySession secondSession = await manager.OpenSessionAsync(CreateOpenRequest(), "client-2", CancellationToken.None);
await manager.ShutdownAsync(CancellationToken.None);
Assert.Equal(SessionState.Closed, firstSession.State);
Assert.Equal(SessionState.Closed, secondSession.State);
Assert.Equal(1, firstClient.ShutdownCount);
Assert.Equal(1, secondClient.ShutdownCount);
Assert.Equal(2, metrics.GetSnapshot().SessionsClosed);
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
}
/// <summary>Creates a session manager for testing.</summary>
/// <param name="factory">Worker client factory.</param>
/// <param name="registry">Session registry; defaults to a new registry.</param>
/// <param name="metrics">Metrics collector; defaults to a new instance.</param>
/// <param name="options">Gateway options; defaults to test defaults.</param>
/// <returns>Configured session manager.</returns>
private static SessionManager CreateManager(
ISessionWorkerClientFactory factory,
ISessionRegistry? registry = null,
GatewayMetrics? metrics = null,
GatewayOptions? options = null,
TimeProvider? timeProvider = null)
{
return new SessionManager(
registry ?? new SessionRegistry(),
factory,
Options.Create(options ?? CreateOptions()),
metrics ?? new GatewayMetrics(),
timeProvider);
}
private static GatewayOptions CreateOptions(
int maxSessions = 64,
int defaultLeaseSeconds = 1800)
{
return new GatewayOptions
{
Sessions = new SessionOptions
{
DefaultCommandTimeoutSeconds = 30,
MaxSessions = maxSessions,
DefaultLeaseSeconds = defaultLeaseSeconds,
},
Worker = new WorkerOptions
{
StartupTimeoutSeconds = 30,
ShutdownTimeoutSeconds = 10,
},
};
}
private static SessionOpenRequest CreateOpenRequest()
{
return new SessionOpenRequest(
RequestedBackend: null,
ClientSessionName: "test-session",
ClientCorrelationId: "client-correlation-1",
CommandTimeout: Duration.FromTimeSpan(TimeSpan.FromSeconds(5)));
}
private static WorkerCommand CreateCommand(MxCommandKind kind)
{
return new WorkerCommand
{
Command = new MxCommand
{
Kind = kind,
},
};
}
private sealed class FakeSessionWorkerClientFactory(IWorkerClient workerClient) : ISessionWorkerClientFactory
{
/// <summary>Gets the list of observed session states during worker creation.</summary>
public List<string> ObservedStates { get; } = [];
/// <summary>Gets or sets a value indicating whether to apply lifecycle transitions during worker creation.</summary>
public bool ApplyLifecycleTransitions { get; init; }
/// <inheritdoc />
public Task<IWorkerClient> CreateAsync(
GatewaySession session,
CancellationToken cancellationToken)
{
ObservedStates.Add(session.State.ToString());
if (ApplyLifecycleTransitions)
{
session.TransitionTo(SessionState.WaitingForPipe);
ObservedStates.Add(session.State.ToString());
session.TransitionTo(SessionState.Handshaking);
ObservedStates.Add(session.State.ToString());
session.TransitionTo(SessionState.InitializingWorker);
ObservedStates.Add(session.State.ToString());
}
return Task.FromResult(workerClient);
}
}
private sealed class QueueingSessionWorkerClientFactory : ISessionWorkerClientFactory
{
private readonly Queue<IWorkerClient> _workerClients;
/// <summary>Initializes a new instance of the <see cref="QueueingSessionWorkerClientFactory"/> class.</summary>
/// <param name="workerClients">Array of worker clients to queue.</param>
public QueueingSessionWorkerClientFactory(params IWorkerClient[] workerClients)
{
_workerClients = new Queue<IWorkerClient>(workerClients);
}
/// <inheritdoc />
public Task<IWorkerClient> CreateAsync(
GatewaySession session,
CancellationToken cancellationToken)
{
return Task.FromResult(_workerClients.Dequeue());
}
}
private sealed class FailingSessionWorkerClientFactory : ISessionWorkerClientFactory
{
/// <inheritdoc />
public Task<IWorkerClient> CreateAsync(
GatewaySession session,
CancellationToken cancellationToken)
{
throw new InvalidOperationException("worker startup failed");
}
}
private sealed class FakeWorkerClient : IWorkerClient
{
/// <summary>Gets the session ID for the fake worker client.</summary>
public string SessionId { get; init; } = "session-1";
/// <summary>Gets the process ID for the fake worker client.</summary>
public int? ProcessId { get; init; } = 1234;
/// <summary>Gets or sets the state of the fake worker client.</summary>
public WorkerClientState State { get; set; } = WorkerClientState.Ready;
/// <summary>Gets the last heartbeat timestamp for the fake worker client.</summary>
public DateTimeOffset LastHeartbeatAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>Gets the number of times invoke was called on the fake worker client.</summary>
public int InvokeCount { get; private set; }
/// <summary>Gets the number of times shutdown was called on the fake worker client.</summary>
public int ShutdownCount { get; private set; }
/// <summary>Gets the number of times kill was called on the fake worker client.</summary>
public int KillCount { get; private set; }
/// <summary>Gets the number of times dispose was called on the fake worker client.</summary>
public int DisposeCount { get; private set; }
/// <summary>Gets the exception to throw when shutdown is called, if any.</summary>
public Exception? ShutdownException { get; init; }
/// <summary>Gets the exception to throw when kill is called, if any.</summary>
public Exception? KillException { get; init; }
/// <summary>Gets a value indicating whether to block shutdown on the fake worker client.</summary>
public bool BlockShutdown { get; init; }
/// <summary>Gets the last command invoked on the fake worker client.</summary>
public WorkerCommand? LastCommand { get; private set; }
/// <summary>Gets the reply to return for invoke calls on the fake worker client.</summary>
public WorkerCommandReply? InvokeReply { get; init; }
private TaskCompletionSource ShutdownStarted { get; } = new(TaskCreationOptions.RunContinuationsAsynchronously);
private TaskCompletionSource ShutdownReleased { get; } = new(TaskCreationOptions.RunContinuationsAsynchronously);
/// <inheritdoc />
public Task StartAsync(CancellationToken cancellationToken)
{
return Task.CompletedTask;
}
/// <inheritdoc />
public Task<WorkerCommandReply> InvokeAsync(
WorkerCommand command,
TimeSpan timeout,
CancellationToken cancellationToken)
{
InvokeCount++;
LastCommand = command;
if (InvokeReply is not null)
{
return Task.FromResult(InvokeReply);
}
MxCommandKind kind = command.Command?.Kind ?? MxCommandKind.Unspecified;
return Task.FromResult(new WorkerCommandReply
{
Reply = new MxCommandReply
{
SessionId = SessionId,
CorrelationId = "correlation-1",
Kind = kind,
},
});
}
/// <inheritdoc />
public async IAsyncEnumerable<WorkerEvent> ReadEventsAsync(
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken)
{
await Task.CompletedTask;
yield break;
}
/// <inheritdoc />
public async Task ShutdownAsync(
TimeSpan timeout,
CancellationToken cancellationToken)
{
ShutdownCount++;
if (ShutdownException is not null)
{
throw ShutdownException;
}
if (BlockShutdown)
{
ShutdownStarted.TrySetResult();
await ShutdownReleased.Task.WaitAsync(cancellationToken);
}
State = WorkerClientState.Closed;
}
/// <inheritdoc />
public void Kill(string reason)
{
KillCount++;
if (KillException is not null)
{
throw KillException;
}
State = WorkerClientState.Faulted;
}
/// <inheritdoc />
public ValueTask DisposeAsync()
{
DisposeCount++;
return ValueTask.CompletedTask;
}
/// <summary>Waits for shutdown to start on the fake worker client.</summary>
public Task WaitForShutdownStartAsync()
{
return ShutdownStarted.Task.WaitAsync(TimeSpan.FromSeconds(5));
}
/// <summary>Releases the shutdown block on the fake worker client.</summary>
public void ReleaseShutdown()
{
ShutdownReleased.TrySetResult();
}
}
}