Resolve Server-044..050: KillWorker accounting + admin service hardening

Server-044  KillWorkerAsync catch path now calls _metrics.SessionRemoved
            so the open-session gauge does not leak when KillWorker throws.
Server-045  KillWorkerAsync routes through a new
            GatewaySession.KillWorkerWithCloseGateAsync that takes the
            per-session close lock, so concurrent kills count SessionsClosed
            exactly once.
Server-046  CloseSessionCoreAsync's SessionCloseStartedException branch and
            ShutdownAsync's kill fallback both increment SessionsClosed (not
            just the gauge), so the counter and gauge stay consistent.
Server-047  ApiKeysPage.ConfirmPendingAsync holds PendingAction across the
            awaited action and clears it in finally, matching the sessions
            pages.
Server-048  Closed: the 044/045 regression tests cover the previously-
            untested kill paths.
Server-049  IDashboardSessionAdminService + DashboardSessionAdminService
            now carry XML docs that pin the Admin gate, missing-session
            return-Fail semantics, and the dashboard-admin-kill reason.
Server-050  CloseSessionAsync and KillWorkerAsync catch unexpected
            exceptions after the SessionManagerException catches and return
            a friendly Fail; OperationCanceledException tied to the caller
            token still propagates.

All resolved at 2026-05-24; 503/503 gateway tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-24 08:49:34 -04:00
parent 6079c62709
commit 4d77279e7e
8 changed files with 403 additions and 16 deletions
@@ -5,6 +5,19 @@ using ZB.MOM.WW.MxGateway.Server.Sessions;
namespace ZB.MOM.WW.MxGateway.Server.Dashboard;
/// <summary>
/// Default implementation of <see cref="IDashboardSessionAdminService"/>: gates
/// destructive session actions on the <see cref="DashboardRoles.Admin"/> role,
/// audit-logs successful operations, and converts <see cref="SessionManagerException"/>
/// (and any other unexpected exceptions) into <see cref="DashboardSessionAdminResult.Fail(string)"/>
/// so the Blazor pages never see a raw exception.
/// </summary>
/// <remarks>
/// The constant <c>dashboard-admin-kill</c> is the reason passed to
/// <see cref="ISessionManager.KillWorkerAsync"/> and forwarded as the
/// <c>reason</c> tag on the <c>mxgateway.workers.killed</c> counter and in
/// the worker-kill audit log entries.
/// </remarks>
public sealed class DashboardSessionAdminService(
ISessionManager sessionManager,
IHttpContextAccessor httpContextAccessor,
@@ -16,6 +29,7 @@ public sealed class DashboardSessionAdminService(
private readonly ILogger<DashboardSessionAdminService> _logger =
logger ?? NullLogger<DashboardSessionAdminService>.Instance;
/// <inheritdoc />
public bool CanManage(ClaimsPrincipal user)
{
ArgumentNullException.ThrowIfNull(user);
@@ -24,6 +38,7 @@ public sealed class DashboardSessionAdminService(
&& user.IsInRole(DashboardRoles.Admin);
}
/// <inheritdoc />
public async Task<DashboardSessionAdminResult> CloseSessionAsync(
ClaimsPrincipal user,
string sessionId,
@@ -72,8 +87,27 @@ public sealed class DashboardSessionAdminService(
return DashboardSessionAdminResult.Fail(
$"Close failed: {exception.Message}");
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
throw;
}
catch (Exception exception)
{
// Server-050: any non-SessionManagerException (e.g. an IOException or
// InvalidOperationException from the session DisposeAsync / pipe teardown
// path) used to propagate raw into Blazor's error boundary. Convert it to
// a friendly failure so the Razor pages see only DashboardSessionAdminResult.
_logger.LogWarning(
exception,
"Dashboard admin {Actor} close failed unexpectedly for session {SessionId}.",
actor,
sessionId);
return DashboardSessionAdminResult.Fail(
$"Close failed unexpectedly for session {sessionId}. See the gateway log for details.");
}
}
/// <inheritdoc />
public async Task<DashboardSessionAdminResult> KillWorkerAsync(
ClaimsPrincipal user,
string sessionId,
@@ -122,6 +156,26 @@ public sealed class DashboardSessionAdminService(
return DashboardSessionAdminResult.Fail(
$"Kill failed: {exception.Message}");
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
throw;
}
catch (Exception exception)
{
// Server-050: any non-SessionManagerException (e.g. an IOException from
// worker pipe teardown surfacing through session.DisposeAsync, or an
// InvalidOperationException from a corrupted worker handle) used to
// propagate raw into Blazor's error boundary. Convert it to a friendly
// failure so the page renders the ResultMessage rather than the circuit
// error page.
_logger.LogWarning(
exception,
"Dashboard admin {Actor} kill failed unexpectedly for session {SessionId}.",
actor,
sessionId);
return DashboardSessionAdminResult.Fail(
$"Kill failed unexpectedly for session {sessionId}. See the gateway log for details.");
}
}
private static string ResolveActor(ClaimsPrincipal user)