Resolve Server-002, -004, -005, -006 code-review findings

Server-002: the gateway never terminated leftover MxGateway.Worker.exe
processes at startup, contradicting gateway.md and CLAUDE.md. Added
IRunningProcessInspector/SystemRunningProcessInspector, OrphanWorkerTerminator,
and OrphanWorkerCleanupHostedService (best-effort, runs before sessions are
accepted); updated gateway.md to describe the implemented behavior.

Server-004: API-key scopes were persisted verbatim with no validation. Added
GatewayScopes.All/IsKnown; the CLI parser and dashboard create path now
reject unknown scope strings.

Server-005: a non-SqlException/InvalidOperationException fault on the initial
Galaxy hierarchy load faulted the BackgroundService. ExecuteAsync now catches
all non-cancellation exceptions on first load and RefreshCoreAsync broadens
its catch so the cache records Stale/Unavailable instead.

Server-006: OpenSessionAsync incremented the open-sessions gauge before
alarm auto-subscribe; an auto-subscribe failure leaked the gauge. The catch
path now calls SessionRemoved() when the gauge was incremented.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-18 21:31:10 -04:00
parent 5e795aeeb8
commit 1d9e3afadd
18 changed files with 676 additions and 15 deletions
@@ -0,0 +1,137 @@
using Microsoft.Extensions.Options;
using MxGateway.Server.Configuration;
using MxGateway.Server.Metrics;
using MxGateway.Server.Workers;
namespace MxGateway.Tests.Gateway.Workers;
/// <summary>
/// Server-002 regression: per <c>gateway.md</c> the gateway must terminate
/// orphaned worker processes on startup. These tests pin that the terminator
/// kills leftover workers (matched by executable path, or by image name when
/// the path is unreadable) without touching unrelated processes or itself.
/// </summary>
public sealed class OrphanWorkerTerminatorTests
{
private const string WorkerExecutablePath = @"C:\app\src\MxGateway.Worker\bin\x86\Release\MxGateway.Worker.exe";
[Fact]
public void TerminateOrphans_KillsWorkerProcessesMatchingConfiguredExecutablePath()
{
FakeProcessInspector inspector = new(
[
new RunningProcessInfo(101, WorkerExecutablePath),
new RunningProcessInfo(102, WorkerExecutablePath),
]);
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
int killed = terminator.TerminateOrphans();
Assert.Equal(2, killed);
Assert.Equal([101, 102], inspector.KilledProcessIds.Order());
}
[Fact]
public void TerminateOrphans_KillsImageNameMatchWhenExecutablePathUnreadable()
{
// The x64 gateway cannot introspect the x86 worker's main module, so the
// path comes back null. Image-name match is the only signal — and it is
// exactly the orphan worker case, so the process must still be killed.
FakeProcessInspector inspector = new(
[
new RunningProcessInfo(201, ExecutablePath: null),
]);
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
int killed = terminator.TerminateOrphans();
Assert.Equal(1, killed);
Assert.Equal([201], inspector.KilledProcessIds);
}
[Fact]
public void TerminateOrphans_DoesNotKillUnrelatedProcessSharingImageName()
{
// A process with the same image name but a different executable path is
// not our worker and must be left alone.
FakeProcessInspector inspector = new(
[
new RunningProcessInfo(301, @"C:\other\place\MxGateway.Worker.exe"),
]);
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
int killed = terminator.TerminateOrphans();
Assert.Equal(0, killed);
Assert.Empty(inspector.KilledProcessIds);
}
[Fact]
public void TerminateOrphans_DoesNotKillCurrentProcess()
{
FakeProcessInspector inspector = new(
[
new RunningProcessInfo(Environment.ProcessId, WorkerExecutablePath),
]);
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
int killed = terminator.TerminateOrphans();
Assert.Equal(0, killed);
Assert.Empty(inspector.KilledProcessIds);
}
[Fact]
public void TerminateOrphans_ContinuesWhenOneKillThrows()
{
FakeProcessInspector inspector = new(
[
new RunningProcessInfo(401, WorkerExecutablePath),
new RunningProcessInfo(402, WorkerExecutablePath),
])
{
ThrowOnKillProcessId = 401,
};
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
int killed = terminator.TerminateOrphans();
Assert.Equal(1, killed);
Assert.Contains(402, inspector.KilledProcessIds);
}
private static OrphanWorkerTerminator CreateTerminator(IRunningProcessInspector inspector)
{
GatewayOptions options = new()
{
Worker = new WorkerOptions
{
ExecutablePath = WorkerExecutablePath,
},
};
return new OrphanWorkerTerminator(
Options.Create(options),
inspector,
new GatewayMetrics());
}
private sealed class FakeProcessInspector(IReadOnlyList<RunningProcessInfo> processes)
: IRunningProcessInspector
{
public List<int> KilledProcessIds { get; } = [];
public int? ThrowOnKillProcessId { get; init; }
public IReadOnlyList<RunningProcessInfo> GetProcessesByName(string processName) => processes;
public void Kill(int processId)
{
if (ThrowOnKillProcessId == processId)
{
throw new InvalidOperationException("Process has already exited.");
}
KilledProcessIds.Add(processId);
}
}
}