Resolve Server-044..050: KillWorker accounting + admin service hardening
Server-044 KillWorkerAsync catch path now calls _metrics.SessionRemoved
so the open-session gauge does not leak when KillWorker throws.
Server-045 KillWorkerAsync routes through a new
GatewaySession.KillWorkerWithCloseGateAsync that takes the
per-session close lock, so concurrent kills count SessionsClosed
exactly once.
Server-046 CloseSessionCoreAsync's SessionCloseStartedException branch and
ShutdownAsync's kill fallback both increment SessionsClosed (not
just the gauge), so the counter and gauge stay consistent.
Server-047 ApiKeysPage.ConfirmPendingAsync holds PendingAction across the
awaited action and clears it in finally, matching the sessions
pages.
Server-048 Closed: the 044/045 regression tests cover the previously-
untested kill paths.
Server-049 IDashboardSessionAdminService + DashboardSessionAdminService
now carry XML docs that pin the Admin gate, missing-session
return-Fail semantics, and the dashboard-admin-kill reason.
Server-050 CloseSessionAsync and KillWorkerAsync catch unexpected
exceptions after the SessionManagerException catches and return
a friendly Fail; OperationCanceledException tied to the caller
token still propagates.
All resolved at 2026-05-24; 503/503 gateway tests pass.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -115,6 +115,52 @@ public sealed class DashboardSessionAdminServiceTests
|
||||
Assert.True(service.CanManage(CreateUser(DashboardRoles.Admin)));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression for Server-050: an unexpected (non-<see cref="SessionManagerException"/>)
|
||||
/// exception from <c>CloseSessionAsync</c> — e.g. an <see cref="InvalidOperationException"/>
|
||||
/// or <see cref="IOException"/> surfaced from <c>RemoveSessionAsync</c>/<c>DisposeAsync</c> —
|
||||
/// must be converted to a friendly <see cref="DashboardSessionAdminResult.Fail(string)"/>
|
||||
/// rather than propagating raw into Blazor's error boundary.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task CloseSessionAsync_WhenManagerThrowsUnexpected_ReturnsFriendlyFail()
|
||||
{
|
||||
FakeSessionManager sessionManager = new()
|
||||
{
|
||||
CloseThrowsUnexpected = new InvalidOperationException("unexpected"),
|
||||
};
|
||||
DashboardSessionAdminService service = CreateService(sessionManager);
|
||||
|
||||
DashboardSessionAdminResult result = await service.CloseSessionAsync(
|
||||
CreateUser(DashboardRoles.Admin),
|
||||
"session-1",
|
||||
CancellationToken.None);
|
||||
|
||||
Assert.False(result.Succeeded);
|
||||
Assert.False(string.IsNullOrWhiteSpace(result.Message));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression for Server-050: same friendly-fail contract for the Kill path.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task KillWorkerAsync_WhenManagerThrowsUnexpected_ReturnsFriendlyFail()
|
||||
{
|
||||
FakeSessionManager sessionManager = new()
|
||||
{
|
||||
KillThrowsUnexpected = new IOException("pipe broken"),
|
||||
};
|
||||
DashboardSessionAdminService service = CreateService(sessionManager);
|
||||
|
||||
DashboardSessionAdminResult result = await service.KillWorkerAsync(
|
||||
CreateUser(DashboardRoles.Admin),
|
||||
"session-1",
|
||||
CancellationToken.None);
|
||||
|
||||
Assert.False(result.Succeeded);
|
||||
Assert.False(string.IsNullOrWhiteSpace(result.Message));
|
||||
}
|
||||
|
||||
private static DashboardSessionAdminService CreateService(ISessionManager sessionManager)
|
||||
{
|
||||
DefaultHttpContext httpContext = new();
|
||||
@@ -150,6 +196,10 @@ public sealed class DashboardSessionAdminServiceTests
|
||||
|
||||
public bool CloseThrowsNotFound { get; init; }
|
||||
|
||||
public Exception? CloseThrowsUnexpected { get; init; }
|
||||
|
||||
public Exception? KillThrowsUnexpected { get; init; }
|
||||
|
||||
public Task<GatewaySession> OpenSessionAsync(
|
||||
SessionOpenRequest request,
|
||||
string? clientIdentity,
|
||||
@@ -194,6 +244,11 @@ public sealed class DashboardSessionAdminServiceTests
|
||||
$"Session {sessionId} was not found.");
|
||||
}
|
||||
|
||||
if (CloseThrowsUnexpected is not null)
|
||||
{
|
||||
throw CloseThrowsUnexpected;
|
||||
}
|
||||
|
||||
return Task.FromResult(new SessionCloseResult(sessionId, SessionState.Closed, AlreadyClosed: false));
|
||||
}
|
||||
|
||||
@@ -205,6 +260,11 @@ public sealed class DashboardSessionAdminServiceTests
|
||||
KillCount++;
|
||||
LastKilledSessionId = sessionId;
|
||||
LastKillReason = reason;
|
||||
if (KillThrowsUnexpected is not null)
|
||||
{
|
||||
throw KillThrowsUnexpected;
|
||||
}
|
||||
|
||||
return Task.FromResult(new SessionCloseResult(sessionId, SessionState.Closed, AlreadyClosed: false));
|
||||
}
|
||||
|
||||
|
||||
@@ -410,7 +410,10 @@ public sealed class SessionManagerTests
|
||||
Assert.Equal(1, failingWorkerClient.KillCount);
|
||||
Assert.Equal(1, failingWorkerClient.DisposeCount);
|
||||
GatewayMetricsSnapshot snapshot = metrics.GetSnapshot();
|
||||
Assert.Equal(0, snapshot.SessionsClosed);
|
||||
// Server-046: a close-that-failed now accounts as SessionClosed (counter += 1) rather
|
||||
// than SessionRemoved (gauge -= 1, counter unchanged). The session is being removed
|
||||
// from the registry on this path, so it must show up in the closed count.
|
||||
Assert.Equal(1, snapshot.SessionsClosed);
|
||||
Assert.False(snapshot.EventsBySession.ContainsKey(firstSession.SessionId));
|
||||
Assert.Equal(1, snapshot.OpenSessions);
|
||||
}
|
||||
@@ -495,6 +498,110 @@ public sealed class SessionManagerTests
|
||||
Assert.Equal(SessionManagerErrorCode.SessionNotFound, exception.ErrorCode);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression for Server-044: when <c>session.KillWorker</c> throws, the catch path must still
|
||||
/// decrement <c>mxgateway.sessions.open</c> (parity with the Server-006 fix in
|
||||
/// <c>OpenSessionAsync</c>). Without the fix the gauge leaks one open session per failed kill.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task KillWorkerAsync_WhenSessionKillThrows_DecrementsOpenSessionGauge()
|
||||
{
|
||||
FakeWorkerClient workerClient = new()
|
||||
{
|
||||
KillException = new InvalidOperationException("worker kill failed"),
|
||||
};
|
||||
using GatewayMetrics metrics = new();
|
||||
SessionManager manager = CreateManager(
|
||||
new FakeSessionWorkerClientFactory(workerClient),
|
||||
metrics: metrics);
|
||||
GatewaySession session = await manager.OpenSessionAsync(
|
||||
CreateOpenRequest(),
|
||||
"client-1",
|
||||
CancellationToken.None);
|
||||
|
||||
Assert.Equal(1, metrics.GetSnapshot().OpenSessions);
|
||||
|
||||
SessionManagerException exception = await Assert.ThrowsAsync<SessionManagerException>(
|
||||
async () => await manager.KillWorkerAsync(session.SessionId, "test-kill", CancellationToken.None));
|
||||
|
||||
Assert.Equal(SessionManagerErrorCode.CloseFailed, exception.ErrorCode);
|
||||
Assert.False(manager.TryGetSession(session.SessionId, out _));
|
||||
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
|
||||
Assert.True(metrics.GetSnapshot().Faults > 0);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression for Server-045 / Server-048: concurrent kills on the same session must not
|
||||
/// double-increment <c>mxgateway.sessions.closed</c>. The first kill wins, the second
|
||||
/// observes <c>wasClosed == true</c> (or a missing session after removal) and short-circuits.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task KillWorkerAsync_ConcurrentCallsOnSameSession_CountClosedExactlyOnce()
|
||||
{
|
||||
FakeWorkerClient workerClient = new();
|
||||
using GatewayMetrics metrics = new();
|
||||
SessionManager manager = CreateManager(
|
||||
new FakeSessionWorkerClientFactory(workerClient),
|
||||
metrics: metrics);
|
||||
GatewaySession session = await manager.OpenSessionAsync(
|
||||
CreateOpenRequest(),
|
||||
"client-1",
|
||||
CancellationToken.None);
|
||||
|
||||
Task<SessionCloseResult> first = manager.KillWorkerAsync(session.SessionId, "kill-a", CancellationToken.None);
|
||||
Task<SessionCloseResult> second = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
return await manager.KillWorkerAsync(session.SessionId, "kill-b", CancellationToken.None);
|
||||
}
|
||||
catch (SessionManagerException missing) when (missing.ErrorCode == SessionManagerErrorCode.SessionNotFound)
|
||||
{
|
||||
return new SessionCloseResult(session.SessionId, SessionState.Closed, AlreadyClosed: true);
|
||||
}
|
||||
});
|
||||
|
||||
await Task.WhenAll(first, second);
|
||||
|
||||
Assert.Equal(1, metrics.GetSnapshot().SessionsClosed);
|
||||
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
|
||||
Assert.False(manager.TryGetSession(session.SessionId, out _));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression for Server-046: <c>ShutdownAsync</c>'s graceful-close fallback (which calls
|
||||
/// <c>KillWorker</c> + <c>RemoveSessionAsync</c> when <c>CloseSessionCoreAsync</c> throws)
|
||||
/// must still account a successful close: both the open-session gauge must drop to zero AND
|
||||
/// the <c>mxgateway.sessions.closed</c> counter must increment. Without the fix, the
|
||||
/// graceful-close failure path under-counts the closed counter.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task ShutdownAsync_WhenSessionCloseThrows_StillDecrementsOpenSessionGaugeAndIncrementsClosedCounter()
|
||||
{
|
||||
FakeWorkerClient throwingClient = new()
|
||||
{
|
||||
ShutdownException = new InvalidOperationException("worker shutdown failed"),
|
||||
};
|
||||
using GatewayMetrics metrics = new();
|
||||
SessionManager manager = CreateManager(
|
||||
new FakeSessionWorkerClientFactory(throwingClient),
|
||||
metrics: metrics);
|
||||
GatewaySession session = await manager.OpenSessionAsync(
|
||||
CreateOpenRequest(),
|
||||
"client-1",
|
||||
CancellationToken.None);
|
||||
|
||||
Assert.Equal(1, metrics.GetSnapshot().OpenSessions);
|
||||
|
||||
await manager.ShutdownAsync(CancellationToken.None);
|
||||
|
||||
// After shutdown, regardless of whether the graceful close path or the kill fallback ran,
|
||||
// the open-session gauge must be zero and the closed counter must be incremented.
|
||||
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
|
||||
Assert.Equal(1, metrics.GetSnapshot().SessionsClosed);
|
||||
Assert.False(manager.TryGetSession(session.SessionId, out _));
|
||||
}
|
||||
|
||||
/// <summary>Verifies that when worker creation fails, the session is removed from the registry.</summary>
|
||||
[Fact]
|
||||
public async Task OpenSessionAsync_WhenWorkerCreationFails_RemovesSessionFromRegistry()
|
||||
@@ -726,6 +833,9 @@ public sealed class SessionManagerTests
|
||||
/// <summary>Gets the exception to throw when shutdown is called, if any.</summary>
|
||||
public Exception? ShutdownException { get; init; }
|
||||
|
||||
/// <summary>Gets the exception to throw when kill is called, if any.</summary>
|
||||
public Exception? KillException { get; init; }
|
||||
|
||||
/// <summary>Gets a value indicating whether to block shutdown on the fake worker client.</summary>
|
||||
public bool BlockShutdown { get; init; }
|
||||
|
||||
@@ -803,6 +913,11 @@ public sealed class SessionManagerTests
|
||||
public void Kill(string reason)
|
||||
{
|
||||
KillCount++;
|
||||
if (KillException is not null)
|
||||
{
|
||||
throw KillException;
|
||||
}
|
||||
|
||||
State = WorkerClientState.Faulted;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user