Resolve Server-002, -004, -005, -006 code-review findings
Server-002: the gateway never terminated leftover MxGateway.Worker.exe processes at startup, contradicting gateway.md and CLAUDE.md. Added IRunningProcessInspector/SystemRunningProcessInspector, OrphanWorkerTerminator, and OrphanWorkerCleanupHostedService (best-effort, runs before sessions are accepted); updated gateway.md to describe the implemented behavior. Server-004: API-key scopes were persisted verbatim with no validation. Added GatewayScopes.All/IsKnown; the CLI parser and dashboard create path now reject unknown scope strings. Server-005: a non-SqlException/InvalidOperationException fault on the initial Galaxy hierarchy load faulted the BackgroundService. ExecuteAsync now catches all non-cancellation exceptions on first load and RefreshCoreAsync broadens its catch so the cache records Stale/Unavailable instead. Server-006: OpenSessionAsync incremented the open-sessions gauge before alarm auto-subscribe; an auto-subscribe failure leaked the gauge. The catch path now calls SessionRemoved() when the gauge was incremented. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,137 @@
|
||||
using Microsoft.Extensions.Options;
|
||||
using MxGateway.Server.Configuration;
|
||||
using MxGateway.Server.Metrics;
|
||||
using MxGateway.Server.Workers;
|
||||
|
||||
namespace MxGateway.Tests.Gateway.Workers;
|
||||
|
||||
/// <summary>
|
||||
/// Server-002 regression: per <c>gateway.md</c> the gateway must terminate
|
||||
/// orphaned worker processes on startup. These tests pin that the terminator
|
||||
/// kills leftover workers (matched by executable path, or by image name when
|
||||
/// the path is unreadable) without touching unrelated processes or itself.
|
||||
/// </summary>
|
||||
public sealed class OrphanWorkerTerminatorTests
|
||||
{
|
||||
private const string WorkerExecutablePath = @"C:\app\src\MxGateway.Worker\bin\x86\Release\MxGateway.Worker.exe";
|
||||
|
||||
[Fact]
|
||||
public void TerminateOrphans_KillsWorkerProcessesMatchingConfiguredExecutablePath()
|
||||
{
|
||||
FakeProcessInspector inspector = new(
|
||||
[
|
||||
new RunningProcessInfo(101, WorkerExecutablePath),
|
||||
new RunningProcessInfo(102, WorkerExecutablePath),
|
||||
]);
|
||||
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
|
||||
|
||||
int killed = terminator.TerminateOrphans();
|
||||
|
||||
Assert.Equal(2, killed);
|
||||
Assert.Equal([101, 102], inspector.KilledProcessIds.Order());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TerminateOrphans_KillsImageNameMatchWhenExecutablePathUnreadable()
|
||||
{
|
||||
// The x64 gateway cannot introspect the x86 worker's main module, so the
|
||||
// path comes back null. Image-name match is the only signal — and it is
|
||||
// exactly the orphan worker case, so the process must still be killed.
|
||||
FakeProcessInspector inspector = new(
|
||||
[
|
||||
new RunningProcessInfo(201, ExecutablePath: null),
|
||||
]);
|
||||
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
|
||||
|
||||
int killed = terminator.TerminateOrphans();
|
||||
|
||||
Assert.Equal(1, killed);
|
||||
Assert.Equal([201], inspector.KilledProcessIds);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TerminateOrphans_DoesNotKillUnrelatedProcessSharingImageName()
|
||||
{
|
||||
// A process with the same image name but a different executable path is
|
||||
// not our worker and must be left alone.
|
||||
FakeProcessInspector inspector = new(
|
||||
[
|
||||
new RunningProcessInfo(301, @"C:\other\place\MxGateway.Worker.exe"),
|
||||
]);
|
||||
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
|
||||
|
||||
int killed = terminator.TerminateOrphans();
|
||||
|
||||
Assert.Equal(0, killed);
|
||||
Assert.Empty(inspector.KilledProcessIds);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TerminateOrphans_DoesNotKillCurrentProcess()
|
||||
{
|
||||
FakeProcessInspector inspector = new(
|
||||
[
|
||||
new RunningProcessInfo(Environment.ProcessId, WorkerExecutablePath),
|
||||
]);
|
||||
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
|
||||
|
||||
int killed = terminator.TerminateOrphans();
|
||||
|
||||
Assert.Equal(0, killed);
|
||||
Assert.Empty(inspector.KilledProcessIds);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TerminateOrphans_ContinuesWhenOneKillThrows()
|
||||
{
|
||||
FakeProcessInspector inspector = new(
|
||||
[
|
||||
new RunningProcessInfo(401, WorkerExecutablePath),
|
||||
new RunningProcessInfo(402, WorkerExecutablePath),
|
||||
])
|
||||
{
|
||||
ThrowOnKillProcessId = 401,
|
||||
};
|
||||
OrphanWorkerTerminator terminator = CreateTerminator(inspector);
|
||||
|
||||
int killed = terminator.TerminateOrphans();
|
||||
|
||||
Assert.Equal(1, killed);
|
||||
Assert.Contains(402, inspector.KilledProcessIds);
|
||||
}
|
||||
|
||||
private static OrphanWorkerTerminator CreateTerminator(IRunningProcessInspector inspector)
|
||||
{
|
||||
GatewayOptions options = new()
|
||||
{
|
||||
Worker = new WorkerOptions
|
||||
{
|
||||
ExecutablePath = WorkerExecutablePath,
|
||||
},
|
||||
};
|
||||
return new OrphanWorkerTerminator(
|
||||
Options.Create(options),
|
||||
inspector,
|
||||
new GatewayMetrics());
|
||||
}
|
||||
|
||||
private sealed class FakeProcessInspector(IReadOnlyList<RunningProcessInfo> processes)
|
||||
: IRunningProcessInspector
|
||||
{
|
||||
public List<int> KilledProcessIds { get; } = [];
|
||||
|
||||
public int? ThrowOnKillProcessId { get; init; }
|
||||
|
||||
public IReadOnlyList<RunningProcessInfo> GetProcessesByName(string processName) => processes;
|
||||
|
||||
public void Kill(int processId)
|
||||
{
|
||||
if (ThrowOnKillProcessId == processId)
|
||||
{
|
||||
throw new InvalidOperationException("Process has already exited.");
|
||||
}
|
||||
|
||||
KilledProcessIds.Add(processId);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user