Resolve Server-002, -004, -005, -006 code-review findings
Server-002: the gateway never terminated leftover MxGateway.Worker.exe processes at startup, contradicting gateway.md and CLAUDE.md. Added IRunningProcessInspector/SystemRunningProcessInspector, OrphanWorkerTerminator, and OrphanWorkerCleanupHostedService (best-effort, runs before sessions are accepted); updated gateway.md to describe the implemented behavior. Server-004: API-key scopes were persisted verbatim with no validation. Added GatewayScopes.All/IsKnown; the CLI parser and dashboard create path now reject unknown scope strings. Server-005: a non-SqlException/InvalidOperationException fault on the initial Galaxy hierarchy load faulted the BackgroundService. ExecuteAsync now catches all non-cancellation exceptions on first load and RefreshCoreAsync broadens its catch so the cache records Stale/Unavailable instead. Server-006: OpenSessionAsync incremented the open-sessions gauge before alarm auto-subscribe; an auto-subscribe failure leaked the gauge. The catch path now calls SessionRemoved() when the gauge was incremented. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -112,6 +112,33 @@ public sealed class DashboardApiKeyManagementServiceTests
|
||||
&& entry.Details == "rotated");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Server-004 regression: the dashboard create path must reject a request
|
||||
/// carrying a non-canonical scope string rather than persisting a key whose
|
||||
/// scope the authorization resolver never matches.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task CreateAsync_UnknownScope_DoesNotCallStore()
|
||||
{
|
||||
FakeApiKeyAdminStore adminStore = new();
|
||||
DashboardApiKeyManagementService service = CreateService(adminStore);
|
||||
|
||||
DashboardApiKeyManagementRequest request = CreateRequest() with
|
||||
{
|
||||
Scopes = new HashSet<string>(
|
||||
[GatewayScopes.SessionOpen, "invoke", "metadata"],
|
||||
StringComparer.Ordinal),
|
||||
};
|
||||
|
||||
DashboardApiKeyManagementResult result = await service.CreateAsync(
|
||||
CreateAuthorizedUser(),
|
||||
request,
|
||||
CancellationToken.None);
|
||||
|
||||
Assert.False(result.Succeeded);
|
||||
Assert.Equal(0, adminStore.CreateCount);
|
||||
}
|
||||
|
||||
private static DashboardApiKeyManagementService CreateService(
|
||||
FakeApiKeyAdminStore? adminStore = null,
|
||||
FakeApiKeyAuditStore? auditStore = null,
|
||||
|
||||
@@ -125,6 +125,44 @@ public sealed class SessionManagerAlarmAutoSubscribeTests
|
||||
CreateOpenRequest(), "client-1", CancellationToken.None));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Server-006 regression: when auto-subscribe throws after
|
||||
/// <c>SessionOpened()</c> incremented the open-session gauge, the failed
|
||||
/// open must not leave <c>mxgateway.sessions.open</c> over-counted.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task OpenSessionAsync_DoesNotLeakOpenSessionGauge_WhenAutoSubscribeFailsWithRequireOn()
|
||||
{
|
||||
AlarmAutoSubscribeWorkerClient worker = new()
|
||||
{
|
||||
SubscribeAlarmsReplyFactory = _ => new MxCommandReply
|
||||
{
|
||||
Kind = MxCommandKind.SubscribeAlarms,
|
||||
ProtocolStatus = new ProtocolStatus
|
||||
{
|
||||
Code = ProtocolStatusCode.MxaccessFailure,
|
||||
Message = "wnwrap subscribe failed",
|
||||
},
|
||||
},
|
||||
};
|
||||
using GatewayMetrics metrics = new();
|
||||
SessionManager manager = NewManager(
|
||||
worker,
|
||||
alarms: new AlarmsOptions
|
||||
{
|
||||
Enabled = true,
|
||||
SubscriptionExpression = @"\\HOST\Galaxy!Area1",
|
||||
RequireSubscribeOnOpen = true,
|
||||
},
|
||||
metrics: metrics);
|
||||
|
||||
await Assert.ThrowsAsync<SessionManagerException>(
|
||||
async () => await manager.OpenSessionAsync(
|
||||
CreateOpenRequest(), "client-1", CancellationToken.None));
|
||||
|
||||
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task OpenSessionAsync_Throws_WhenEnabledButNoExpressionAndRequireOn()
|
||||
{
|
||||
@@ -161,7 +199,8 @@ public sealed class SessionManagerAlarmAutoSubscribeTests
|
||||
|
||||
private static SessionManager NewManager(
|
||||
AlarmAutoSubscribeWorkerClient worker,
|
||||
AlarmsOptions alarms)
|
||||
AlarmsOptions alarms,
|
||||
GatewayMetrics? metrics = null)
|
||||
{
|
||||
FakeSessionWorkerClientFactory factory = new(worker);
|
||||
GatewayOptions options = new GatewayOptions
|
||||
@@ -183,7 +222,7 @@ public sealed class SessionManagerAlarmAutoSubscribeTests
|
||||
new SessionRegistry(),
|
||||
factory,
|
||||
Options.Create(options),
|
||||
new GatewayMetrics());
|
||||
metrics ?? new GatewayMetrics());
|
||||
}
|
||||
|
||||
private static SessionOpenRequest CreateOpenRequest()
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
using Microsoft.Extensions.Options;
|
||||
using MxGateway.Server.Configuration;
|
||||
using MxGateway.Server.Metrics;
|
||||
using MxGateway.Server.Workers;
|
||||
|
||||
namespace MxGateway.Tests.Gateway.Workers;
|
||||
|
||||
/// <summary>
|
||||
/// Server-002 regression: per <c>gateway.md</c> the gateway must terminate
|
||||
/// orphaned worker processes on startup. These tests pin that the terminator
|
||||
/// kills leftover workers (matched by executable path, or by image name when
|
||||
/// the path is unreadable) without touching unrelated processes or itself.
|
||||
/// </summary>
|
||||
public sealed class OrphanWorkerTerminatorTests
|
||||
{
|
||||
private const string WorkerExecutablePath = @"C:\app\src\MxGateway.Worker\bin\x86\Release\MxGateway.Worker.exe";
|
||||
|
||||
[Fact]
|
||||
public void TerminateOrphans_KillsWorkerProcessesMatchingConfiguredExecutablePath()
|
||||
{
|
||||
FakeProcessInspector inspector = new(
|
||||
[
|
||||
new RunningProcessInfo(101, WorkerExecutablePath),
|
||||
new RunningProcessInfo(102, WorkerExecutablePath),
|
||||
]);
|
||||
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
|
||||
|
||||
int killed = terminator.TerminateOrphans();
|
||||
|
||||
Assert.Equal(2, killed);
|
||||
Assert.Equal([101, 102], inspector.KilledProcessIds.Order());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TerminateOrphans_KillsImageNameMatchWhenExecutablePathUnreadable()
|
||||
{
|
||||
// The x64 gateway cannot introspect the x86 worker's main module, so the
|
||||
// path comes back null. Image-name match is the only signal — and it is
|
||||
// exactly the orphan worker case, so the process must still be killed.
|
||||
FakeProcessInspector inspector = new(
|
||||
[
|
||||
new RunningProcessInfo(201, ExecutablePath: null),
|
||||
]);
|
||||
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
|
||||
|
||||
int killed = terminator.TerminateOrphans();
|
||||
|
||||
Assert.Equal(1, killed);
|
||||
Assert.Equal([201], inspector.KilledProcessIds);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TerminateOrphans_DoesNotKillUnrelatedProcessSharingImageName()
|
||||
{
|
||||
// A process with the same image name but a different executable path is
|
||||
// not our worker and must be left alone.
|
||||
FakeProcessInspector inspector = new(
|
||||
[
|
||||
new RunningProcessInfo(301, @"C:\other\place\MxGateway.Worker.exe"),
|
||||
]);
|
||||
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
|
||||
|
||||
int killed = terminator.TerminateOrphans();
|
||||
|
||||
Assert.Equal(0, killed);
|
||||
Assert.Empty(inspector.KilledProcessIds);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TerminateOrphans_DoesNotKillCurrentProcess()
|
||||
{
|
||||
FakeProcessInspector inspector = new(
|
||||
[
|
||||
new RunningProcessInfo(Environment.ProcessId, WorkerExecutablePath),
|
||||
]);
|
||||
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
|
||||
|
||||
int killed = terminator.TerminateOrphans();
|
||||
|
||||
Assert.Equal(0, killed);
|
||||
Assert.Empty(inspector.KilledProcessIds);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TerminateOrphans_ContinuesWhenOneKillThrows()
|
||||
{
|
||||
FakeProcessInspector inspector = new(
|
||||
[
|
||||
new RunningProcessInfo(401, WorkerExecutablePath),
|
||||
new RunningProcessInfo(402, WorkerExecutablePath),
|
||||
])
|
||||
{
|
||||
ThrowOnKillProcessId = 401,
|
||||
};
|
||||
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
|
||||
|
||||
int killed = terminator.TerminateOrphans();
|
||||
|
||||
Assert.Equal(1, killed);
|
||||
Assert.Contains(402, inspector.KilledProcessIds);
|
||||
}
|
||||
|
||||
private static OrphanWorkerTerminator CreateTerminator(IRunningProcessInspector inspector)
|
||||
{
|
||||
GatewayOptions options = new()
|
||||
{
|
||||
Worker = new WorkerOptions
|
||||
{
|
||||
ExecutablePath = WorkerExecutablePath,
|
||||
},
|
||||
};
|
||||
return new OrphanWorkerTerminator(
|
||||
Options.Create(options),
|
||||
inspector,
|
||||
new GatewayMetrics());
|
||||
}
|
||||
|
||||
private sealed class FakeProcessInspector(IReadOnlyList<RunningProcessInfo> processes)
|
||||
: IRunningProcessInspector
|
||||
{
|
||||
public List<int> KilledProcessIds { get; } = [];
|
||||
|
||||
public int? ThrowOnKillProcessId { get; init; }
|
||||
|
||||
public IReadOnlyList<RunningProcessInfo> GetProcessesByName(string processName) => processes;
|
||||
|
||||
public void Kill(int processId)
|
||||
{
|
||||
if (ThrowOnKillProcessId == processId)
|
||||
{
|
||||
throw new InvalidOperationException("Process has already exited.");
|
||||
}
|
||||
|
||||
KilledProcessIds.Add(processId);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user