File and fix Server-030 and Client.Dotnet-017 from e2e surfacing
Both findings surfaced when running the cross-language e2e matrix (scripts/run-client-e2e-tests.ps1) against the redeployed gateway at commit84d36b7. Filed in code-reviews/Server/findings.md and code-reviews/Client.Dotnet/findings.md and fixed in the same change. Server-030 (Medium / Error handling): GatewaySession.GetReadyWorkerClient gated on `_state == Ready && _workerClient.State == Ready` but only formatted `_state` into the SessionManagerException message. Under load the gateway-driven `_state` and the worker-driven `WorkerClient.State` can diverge, producing a self-contradictory diagnostic ("Session ... is not ready. Current state is Ready."). The Java e2e client hit this on the 56th item after 55 successful add-items. Rewrote the message to include both states ("Session state is X; worker state is Y"), added an XML doc explaining the two-state contract and that this branch is the fail-fast for a divergence race, and added regression test SessionManagerTests.InvokeAsync_WhenWorkerNotReadyButSessionReady_DiagnosticIncludesBothStates that pins both states appear in the message. The deeper race (should the gateway briefly wait for worker-Ready before failing?) remains open as a follow-up. Client.Dotnet-017 (Low / Error handling): stream-events CLI threw OperationCanceledException as an unhandled exception when the user's --timeout expired before --max-events was reached. Exit code -532462766, no aggregate JSON. The other client CLIs (Go, Rust, Python, Java) exit 0 in this case. Wrapped the `await foreach` in `catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)` so the supplied token's cancellation (--timeout, Ctrl+C, or parent CTS) becomes graceful completion; the aggregate `{ "events": [...] }` JSON still runs after the catch. Added regression test RunAsync_StreamEvents_WhenTimeoutFiresAfterEvents_EmitsCollectedEventsAndExitsZero backed by a new FakeCliClient.StreamHangAfterEvents hook that yields the configured events then parks on the cancellation token. Side cleanup: the GatewayApplicationTests test added under Server-020 was asserting an invariant (`/dashboard/dashboard/X` doesn't exist) that I broke by reverting Server-020 in84d36b7. The doubled endpoint shapes do exist now (MapGroup("/dashboard") prefixing an already "/dashboard/X" @page directive) but they're harmless — no client requests `/dashboard/dashboard/X`. Replaced the test with a positive assertion (`/dashboard/X` routes ARE registered) and rewrote the XML doc to record the actual contract. Verified: dotnet test src/MxGateway.Tests passes 480/480, dotnet test clients/dotnet/MxGateway.Client.Tests passes 77/77, gateway redeployed at this commit and GET http://localhost:5130/dashboard returns 200. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1216,29 +1216,43 @@ public static class MxGatewayClientCli
|
||||
AfterWorkerSequence = arguments.GetUInt64("after-worker-sequence", 0),
|
||||
};
|
||||
|
||||
await foreach (MxEvent gatewayEvent in client.StreamEventsAsync(request, cancellationToken)
|
||||
.WithCancellation(cancellationToken)
|
||||
.ConfigureAwait(false))
|
||||
try
|
||||
{
|
||||
if (jsonLines)
|
||||
await foreach (MxEvent gatewayEvent in client.StreamEventsAsync(request, cancellationToken)
|
||||
.WithCancellation(cancellationToken)
|
||||
.ConfigureAwait(false))
|
||||
{
|
||||
output.WriteLine(ProtobufJsonFormatter.Format(gatewayEvent));
|
||||
}
|
||||
else if (json)
|
||||
{
|
||||
events.Add(gatewayEvent);
|
||||
}
|
||||
else
|
||||
{
|
||||
output.WriteLine(ProtobufJsonFormatter.Format(gatewayEvent));
|
||||
}
|
||||
if (jsonLines)
|
||||
{
|
||||
output.WriteLine(ProtobufJsonFormatter.Format(gatewayEvent));
|
||||
}
|
||||
else if (json)
|
||||
{
|
||||
events.Add(gatewayEvent);
|
||||
}
|
||||
else
|
||||
{
|
||||
output.WriteLine(ProtobufJsonFormatter.Format(gatewayEvent));
|
||||
}
|
||||
|
||||
eventCount++;
|
||||
if (maxEvents > 0 && eventCount >= maxEvents)
|
||||
{
|
||||
break;
|
||||
eventCount++;
|
||||
if (maxEvents > 0 && eventCount >= maxEvents)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
// Client.Dotnet-017: the supplied cancellation token covers both the
|
||||
// user's --timeout wall-clock budget (via CreateCancellation's
|
||||
// CancelAfter) and external Ctrl+C / parent CTS cancellation. All
|
||||
// three are graceful completion modes for a finite-window event
|
||||
// collector: emit the events that arrived before the window closed
|
||||
// and exit 0. The events list is well-formed at this point; the
|
||||
// aggregate JSON below still runs. This matches how the Go, Rust,
|
||||
// Python, and Java CLIs treat their equivalent timeouts.
|
||||
}
|
||||
|
||||
if (json && !jsonLines)
|
||||
{
|
||||
|
||||
@@ -184,6 +184,69 @@ public sealed class MxGatewayClientCliTests
|
||||
Assert.DoesNotContain("ON_WRITE_COMPLETE", output.ToString());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Client.Dotnet-017 regression: a finite-window event collector
|
||||
/// (<c>stream-events --timeout</c>) must exit 0 and emit the events
|
||||
/// that arrived before the timeout fired, instead of propagating the
|
||||
/// timeout-driven <see cref="OperationCanceledException"/> as an
|
||||
/// unhandled exception (exit code -532462766). The fix wraps the
|
||||
/// <c>await foreach</c> in a token-aware catch so the cancellation
|
||||
/// ends the foreach gracefully; the aggregated JSON output still runs.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task RunAsync_StreamEvents_WhenTimeoutFiresAfterEvents_EmitsCollectedEventsAndExitsZero()
|
||||
{
|
||||
using var output = new StringWriter();
|
||||
using var error = new StringWriter();
|
||||
FakeCliClient fakeClient = new();
|
||||
fakeClient.Events.Add(new MxEvent
|
||||
{
|
||||
SessionId = "session-fixture",
|
||||
Family = MxEventFamily.OnDataChange,
|
||||
WorkerSequence = 1,
|
||||
});
|
||||
fakeClient.Events.Add(new MxEvent
|
||||
{
|
||||
SessionId = "session-fixture",
|
||||
Family = MxEventFamily.OnDataChange,
|
||||
WorkerSequence = 2,
|
||||
});
|
||||
// Park forever after yielding the configured events so the CLI's
|
||||
// --timeout drives the cancellation path.
|
||||
fakeClient.StreamHangAfterEvents = async token =>
|
||||
{
|
||||
await Task.Delay(Timeout.InfiniteTimeSpan, token).ConfigureAwait(false);
|
||||
};
|
||||
|
||||
int exitCode = await MxGatewayClientCli.RunAsync(
|
||||
[
|
||||
"stream-events",
|
||||
"--endpoint",
|
||||
"http://localhost:5000",
|
||||
"--api-key",
|
||||
"test-api-key",
|
||||
"--session-id",
|
||||
"session-fixture",
|
||||
"--json",
|
||||
"--max-events",
|
||||
"200",
|
||||
"--timeout",
|
||||
"1s",
|
||||
],
|
||||
output,
|
||||
error,
|
||||
_ => fakeClient);
|
||||
|
||||
Assert.Equal(0, exitCode);
|
||||
string json = output.ToString();
|
||||
// Aggregate JSON output must run even though the foreach exited via
|
||||
// cancellation, and it must contain both events that arrived first.
|
||||
Assert.Contains("\"events\"", json);
|
||||
Assert.Contains("\"workerSequence\":\"1\"", json);
|
||||
Assert.Contains("\"workerSequence\":\"2\"", json);
|
||||
Assert.Equal(string.Empty, error.ToString());
|
||||
}
|
||||
|
||||
|
||||
/// <summary>Verifies that smoke command closes opened session when a command fails.</summary>
|
||||
[Fact]
|
||||
@@ -423,6 +486,14 @@ public sealed class MxGatewayClientCliTests
|
||||
/// <summary>Exception to throw on invoke, if any.</summary>
|
||||
public Exception? InvokeFailure { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When set, after yielding all <see cref="Events"/> the stream
|
||||
/// awaits the provided handle and then throws
|
||||
/// <see cref="OperationCanceledException"/> — used to simulate the
|
||||
/// CLI timeout / Ctrl+C cancellation path (Client.Dotnet-017).
|
||||
/// </summary>
|
||||
public Func<CancellationToken, Task>? StreamHangAfterEvents { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public ValueTask DisposeAsync()
|
||||
{
|
||||
@@ -482,6 +553,11 @@ public sealed class MxGatewayClientCliTests
|
||||
await Task.Yield();
|
||||
yield return gatewayEvent;
|
||||
}
|
||||
|
||||
if (StreamHangAfterEvents is not null)
|
||||
{
|
||||
await StreamHangAfterEvents(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Galaxy test connection reply to return.</summary>
|
||||
|
||||
@@ -297,3 +297,18 @@ The secondary `Grpc.Core.RpcException` catch on line 975 is also dead in this co
|
||||
**Recommendation:** Restructure `RunStreamAsync` so the inner `streamTask` is always observed. A `try { await Task.Delay(...) } finally { streamCts.Cancel(); try { await streamTask } catch (OperationCanceledException) {} catch (MxGatewayException) {} }` shape works (the `finally` runs even on outer cancellation). Alternatively, hoist `streamTask` into a local that the outer method's `try`/`finally` always awaits before exiting, so the per-session loop becomes `await Task.WhenAny(streamTask, Task.Delay(...))` then a guaranteed `await streamTask`. Drop the now-redundant `Grpc.Core.RpcException` catch or convert it to catch `MxGatewayException` for the wrapped shape (and document that it should never fire in production).
|
||||
|
||||
**Resolution:** 2026-05-20 — Restructured `RunStreamAsync` to wrap the `Task.Delay` in `try { await Task.Delay(...) } finally { streamCts.Cancel(); try { await streamTask } catch (OperationCanceledException) {} catch (MxGatewayException) {} }`, so the inner stream task is observed on every path — including when the outer `cancellationToken` cancels during the delay. Dropped the dead `catch (Grpc.Core.RpcException ex) when (ex.StatusCode is Grpc.Core.StatusCode.Cancelled)` clause (the production `GrpcMxGatewayClientTransport.StreamEventsAsync` routes through `RpcExceptionMapper.Map`, which returns `OperationCanceledException` for `StatusCode.Cancelled`, so an `RpcException` never reaches here) and replaced it with `catch (MxGatewayException)` to absorb the wrapped shape for any non-cancellation mapper output. Added an inline comment naming the finding and documenting why the new catch shape is correct. Eliminates the latent `TaskScheduler.UnobservedTaskException` whenever the outer cancellation fires mid-measurement-window.
|
||||
|
||||
### Client.Dotnet-017
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `clients/dotnet/MxGateway.Client.Cli/MxGatewayClientCli.cs:1190-1262` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Surfaced during the 2026-05-20 cross-language e2e matrix run: `dotnet run --project clients/dotnet/MxGateway.Client.Cli -- stream-events --endpoint http://localhost:5120 --api-key-env MXGATEWAY_API_KEY --timeout 60s --json --session-id session-... --max-events 200` exited with `-532462766` (unhandled-exception exit code) and propagated `System.OperationCanceledException: Call canceled by the client.` mapped from `Status(StatusCode="Cancelled", …)`. The CLI's `StreamEventsAsync` does `await foreach (... in client.StreamEventsAsync(...).WithCancellation(cancellationToken))` and never catches `OperationCanceledException`. When the caller's `--timeout` (driven by `CreateCancellation`'s `CancelAfter`) fires before `--max-events` is reached — the common case for a finite-window event collector against a quiet test rig — the foreach throws, the exception bubbles up, the process exits non-zero, and any `--json` aggregate output is never written. The other client CLIs (Go, Rust, Python, Java) all exit 0 in this case (e2e clients g/r/p ran clean). The bug is also a strict regression of the CLI's contract: callers can't tell "stream collected 0–N events then the budget closed" apart from "the call genuinely failed".
|
||||
|
||||
**Recommendation:** Wrap the `await foreach` in `try { ... } catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) { /* graceful */ }`. The `when` clause ensures only the supplied cancellation token (which covers `--timeout`, Ctrl+C, and parent-CTS cancellation — all three of which are graceful completion modes for a finite-window collector) gets absorbed; a server-side cancellation propagated through a different token still surfaces. Keep the existing aggregate-JSON emission below the catch so the events that arrived before the budget closed are still emitted. Add a regression test that drives the CLI with `--timeout 1s` against a fake that yields a couple of events then parks on the cancellation token; assert exit 0, no stderr, and the JSON output contains both yielded events.
|
||||
|
||||
**Resolution:** 2026-05-20 — Wrapped the `await foreach` in `try { ... } catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) { }` so the CLI exits 0 and emits the aggregate `{ "events": [...] }` JSON when the supplied token cancels (the `--timeout`, Ctrl+C, and parent-CTS paths all flow through that same token). The catch's `when` clause ensures non-token-driven cancellation still propagates. Added regression test `MxGatewayClientCliTests.RunAsync_StreamEvents_WhenTimeoutFiresAfterEvents_EmitsCollectedEventsAndExitsZero` that yields two events, parks on the cancellation token via a new `FakeCliClient.StreamHangAfterEvents` hook, runs the CLI with `--timeout 1s --json --max-events 200`, and asserts exit code 0, empty stderr, and both events present in the emitted aggregate JSON. Brings .NET stream-events behavior into parity with the Go, Rust, Python, and Java CLIs which all exit 0 on equivalent timeouts.
|
||||
|
||||
@@ -10,14 +10,14 @@ Each module's `findings.md` is the source of truth; this file is generated from
|
||||
|
||||
| Module | Reviewer | Date | Commit | Status | Open | Total |
|
||||
|---|---|---|---|---|---|---|
|
||||
| [Client.Dotnet](Client.Dotnet/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 16 |
|
||||
| [Client.Dotnet](Client.Dotnet/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 17 |
|
||||
| [Client.Go](Client.Go/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 21 |
|
||||
| [Client.Java](Client.Java/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 26 |
|
||||
| [Client.Python](Client.Python/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 21 |
|
||||
| [Client.Rust](Client.Rust/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 20 |
|
||||
| [Contracts](Contracts/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 15 |
|
||||
| [IntegrationTests](IntegrationTests/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 21 |
|
||||
| [Server](Server/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 29 |
|
||||
| [Server](Server/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 30 |
|
||||
| [Tests](Tests/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 24 |
|
||||
| [Worker](Worker/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 25 |
|
||||
| [Worker.Tests](Worker.Tests/findings.md) | Claude Code | 2026-05-20 | `a020350` | Reviewed | 0 | 30 |
|
||||
@@ -93,6 +93,7 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Server-015 | Medium | Resolved | Concurrency & thread safety | `src/MxGateway.Server/Sessions/GatewaySession.cs:8-15,266-308,720-775` |
|
||||
| Server-016 | Medium | Resolved | Error handling & resilience | `src/MxGateway.Server/Sessions/GatewaySession.cs:790-797`, `src/MxGateway.Server/Sessions/SessionManager.cs:237-258` |
|
||||
| Server-021 | Medium | Resolved | Testing coverage | `src/MxGateway.Server/Grpc/MxAccessGatewayService.cs:266-664`, `src/MxGateway.Tests/Gateway/Grpc/MxAccessGatewayServiceTests.cs` |
|
||||
| Server-030 | Medium | Resolved | Error handling & resilience | `src/MxGateway.Server/Sessions/GatewaySession.cs:952-980` |
|
||||
| Tests-003 | Medium | Resolved | Performance & resource management | `src/MxGateway.Tests/Security/Authentication/SqliteAuthStoreTests.cs:170-176`, `src/MxGateway.Tests/Security/Authentication/ApiKeyAdminCliRunnerTests.cs:252-258` |
|
||||
| Tests-004 | Medium | Resolved | Testing coverage | `src/MxGateway.Tests/Security/Authorization/GatewayGrpcAuthorizationInterceptorTests.cs` |
|
||||
| Tests-005 | Medium | Resolved | Testing coverage | `src/MxGateway.Tests/Gateway/Grpc/EventStreamServiceTests.cs:239-261`, `src/MxGateway.Tests/Gateway/Sessions/SessionManagerTests.cs` |
|
||||
@@ -129,6 +130,7 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Client.Dotnet-014 | Low | Resolved | Testing coverage | `clients/dotnet/MxGateway.Client.Tests/MxGatewayClientAlarmsTests.cs:76-98`, `clients/dotnet/MxGateway.Client.Tests/FakeGatewayTransport.cs:212-231` |
|
||||
| Client.Dotnet-015 | Low | Resolved | Correctness & logic bugs | `clients/dotnet/MxGateway.Client.Cli/MxGatewayClientCli.cs:221-236`, `clients/dotnet/MxGateway.Client.Cli/MxGatewayClientCli.cs:596-1065` |
|
||||
| Client.Dotnet-016 | Low | Resolved | Concurrency & thread safety | `clients/dotnet/MxGateway.Client.Cli/MxGatewayClientCli.cs:922-976` |
|
||||
| Client.Dotnet-017 | Low | Resolved | Error handling & resilience | `clients/dotnet/MxGateway.Client.Cli/MxGatewayClientCli.cs:1190-1262` |
|
||||
| Client.Go-004 | Low | Resolved | mxaccessgw conventions | `clients/go/mxgateway/alarms_test.go:153-154`, `clients/go/mxgateway/galaxy_test.go:58-59` |
|
||||
| Client.Go-005 | Low | Resolved | Design-document adherence | `clients/go/mxgateway/client.go:64,68`, `clients/go/mxgateway/galaxy.go:83,87` |
|
||||
| Client.Go-006 | Low | Resolved | Error handling & resilience | `clients/go/mxgateway/errors.go:9-130` |
|
||||
|
||||
@@ -489,3 +489,18 @@ Re-review pass at `a020350` — the cross-module sweep that resolved Server-015
|
||||
**Recommendation:** Either (a) extend the advertised list with `bulk-read-command` and `bulk-write-commands` (`WriteBulk` / `Write2Bulk` / `WriteSecuredBulk` / `WriteSecured2Bulk` collectively), or (b) document in `gateway.md` and `docs/Contracts.md` that `Capabilities` is informational only and not the contract version. Option (a) is the simplest forward-compatible fix and keeps the capability token shape clients are already familiar with.
|
||||
|
||||
**Resolution:** 2026-05-20 — Extended the `OpenSession` capabilities list with `bulk-read-commands` and `bulk-write-commands` alongside the existing `bulk-subscribe-commands` token, so clients that gate on capability strings have an explicit signal for the bulk-read and bulk-write families.
|
||||
|
||||
### Server-030
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `src/MxGateway.Server/Sessions/GatewaySession.cs:952-980` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Surfaced during the 2026-05-20 cross-language e2e run against a redeployed gateway (`a020350`). The Java client got 55 of 120 `AddItem` calls in, then `Advise` returned `Session session-de7728a290bd41028ad6fec81e233144 is not ready. Current state is Ready.` — a self-contradictory diagnostic. The check in `GetReadyWorkerClient` (`GatewaySession.cs:956`) is `_state != SessionState.Ready || _workerClient?.State != WorkerClientState.Ready`, but the formatted message only includes `_state`. When the gateway-side session state is `Ready` but the worker client's own `WorkerClientState` has transitioned (heartbeat watchdog firing, pipe disconnect detected by the read loop, etc.) before the session-level reaction observes it, the in-flight RPC fails fast here — and the operator sees a message that doesn't tell them which side of the gate the failure is on. The two-state gap itself is a real race (the worker-side state can shift independently of the gateway-driven session state) but a clear diagnostic is the prerequisite for diagnosing it; without it, a future investigation will start from "it says Ready but it's not Ready" instead of "the worker is Handshaking / Closing / Faulted while the session is still Ready".
|
||||
|
||||
**Recommendation:** Format both states into the exception message — `Session {SessionId} is not ready. Session state is {_state}; worker state is {workerClientState}.` (or `"<no worker>"` when `_workerClient` is null). Document on the method that the two states can diverge under load and that this branch is the fail-fast for that case. Add a regression test that flips `FakeWorkerClient.State` to a non-Ready value (e.g. `Handshaking`) while the session is `Ready` and asserts both pieces of state appear in the thrown `SessionManagerException.Message`. The deeper race investigation (should the gateway briefly wait for worker-Ready before failing? when does `WorkerClient.State` legitimately shift while the session is still `Ready`?) is out of scope for this finding but is worth a follow-up.
|
||||
|
||||
**Resolution:** 2026-05-20 — Rewrote `GetReadyWorkerClient` so the `SessionManagerException` message includes both `_state` and `_workerClient.State` (or `"<no worker>"` for the null case): `"Session {SessionId} is not ready. Session state is {_state}; worker state is {workerState}."`. Added XML doc on the method explaining the two-state contract and that this branch is the fail-fast for a state-divergence race. Added regression test `SessionManagerTests.InvokeAsync_WhenWorkerNotReadyButSessionReady_DiagnosticIncludesBothStates` that sets `FakeWorkerClient.State = WorkerClientState.Handshaking` while the session is `Ready` and asserts both `"Session state is Ready"` and `"worker state is Handshaking"` appear in the message; the test also pins `InvokeCount == 0` so the worker isn't called. The deeper race (should `GetReadyWorkerClient` retry briefly when state has just diverged?) remains open for follow-up.
|
||||
|
||||
@@ -949,15 +949,31 @@ public sealed class GatewaySession
|
||||
return reply;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the worker client iff both the gateway-side session state AND
|
||||
/// the worker client's own state are <see cref="SessionState.Ready"/> /
|
||||
/// <see cref="WorkerClientState.Ready"/>. The two states can diverge under
|
||||
/// load: <c>_state</c> only transitions on gateway-driven events (open,
|
||||
/// close, fault), while <see cref="WorkerClient.State"/> can shift on
|
||||
/// worker-side signals (heartbeat watchdog, pipe disconnect) before the
|
||||
/// gateway's session-level reaction observes them. When that happens the
|
||||
/// in-flight RPC fails fast here with both states surfaced in the
|
||||
/// diagnostic (Server-030) so the actual mismatch is actionable instead
|
||||
/// of misleading. The session usually transitions to <c>Faulted</c>
|
||||
/// shortly after.
|
||||
/// </summary>
|
||||
private IWorkerClient GetReadyWorkerClient()
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
if (_state != SessionState.Ready || _workerClient?.State != WorkerClientState.Ready)
|
||||
{
|
||||
string workerState = _workerClient is null
|
||||
? "<no worker>"
|
||||
: _workerClient.State.ToString();
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.SessionNotReady,
|
||||
$"Session {SessionId} is not ready. Current state is {_state}.");
|
||||
$"Session {SessionId} is not ready. Session state is {_state}; worker state is {workerState}.");
|
||||
}
|
||||
|
||||
return _workerClient;
|
||||
|
||||
@@ -100,39 +100,45 @@ public sealed class GatewayApplicationTests
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Server-020 regression guard. The original Server-020 finding incorrectly
|
||||
/// concluded that the duplicate <c>@page "/dashboard/X"</c> directives were
|
||||
/// redundant because <c>MapGroup("/dashboard")</c> would prepend the prefix
|
||||
/// to all dashboard Razor pages. In practice Blazor SSR's <c>@page</c>
|
||||
/// template matcher does NOT compose with <c>MapGroup</c>, so removing the
|
||||
/// <c>/dashboard/X</c> directive left the dashboard unreachable at runtime
|
||||
/// (every page returned HTTP 500 with "Unable to find the provided template
|
||||
/// '/dashboard/'" from <c>RouteTableFactory.CreateEntry</c>). The duplicate
|
||||
/// <c>@page</c> directives are restored. This test only confirms the
|
||||
/// genuinely-double-prefixed shape (<c>/dashboard/dashboard/X</c>) never
|
||||
/// appears — it never did, since the original duplicates were
|
||||
/// <c>"/"</c> + <c>"/dashboard/"</c>, not <c>"/dashboard/"</c> repeated.
|
||||
/// Server-020 reversal regression guard. The original Server-020 finding
|
||||
/// incorrectly concluded that the duplicate <c>@page "/dashboard/X"</c>
|
||||
/// directives were redundant because <c>MapGroup("/dashboard")</c>
|
||||
/// would prepend the prefix to all dashboard Razor pages. In practice
|
||||
/// Blazor SSR's <c>RouteTableFactory</c> matches against the raw
|
||||
/// <c>@page</c> template values (not against the endpoint-route
|
||||
/// prefix), so removing <c>@page "/dashboard/X"</c> left the dashboard
|
||||
/// unreachable at runtime (every page returned HTTP 500 with "Unable
|
||||
/// to find the provided template '/dashboard/'"). The duplicate
|
||||
/// <c>@page</c> directives are restored, and as a side effect the
|
||||
/// endpoint route table DOES carry the doubled <c>/dashboard/dashboard/X</c>
|
||||
/// shape (because <c>MapGroup("/dashboard")</c> prefixes the already-prefixed
|
||||
/// <c>@page "/dashboard/X"</c>). Those doubled endpoints are harmless —
|
||||
/// no client requests <c>/dashboard/dashboard/X</c> — and removing them
|
||||
/// requires either dropping <c>MapGroup</c> or the <c>@page</c>
|
||||
/// prefix. This test asserts only the positive contract: every
|
||||
/// dashboard page IS reachable under the canonical <c>/dashboard/X</c>
|
||||
/// route, which is what the Blazor router actually serves.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task Build_WhenDashboardEnabled_DoesNotRegisterDoubledDashboardPrefixRoutes()
|
||||
public async Task Build_WhenDashboardEnabled_RegistersCanonicalDashboardRoutes()
|
||||
{
|
||||
await using WebApplication app = GatewayApplication.Build([]);
|
||||
IReadOnlyList<RouteEndpoint> endpoints = GetRouteEndpoints(app);
|
||||
|
||||
string[] doubledRoutes =
|
||||
string[] canonicalRoutes =
|
||||
[
|
||||
"/dashboard/dashboard/",
|
||||
"/dashboard/dashboard/sessions",
|
||||
"/dashboard/dashboard/workers",
|
||||
"/dashboard/dashboard/events",
|
||||
"/dashboard/dashboard/settings",
|
||||
"/dashboard/dashboard/galaxy",
|
||||
"/dashboard/dashboard/apikeys",
|
||||
"/dashboard/dashboard/sessions/{SessionId}",
|
||||
"/dashboard/",
|
||||
"/dashboard/sessions",
|
||||
"/dashboard/workers",
|
||||
"/dashboard/events",
|
||||
"/dashboard/settings",
|
||||
"/dashboard/galaxy",
|
||||
"/dashboard/apikeys",
|
||||
"/dashboard/sessions/{SessionId}",
|
||||
];
|
||||
foreach (string doubled in doubledRoutes)
|
||||
foreach (string canonical in canonicalRoutes)
|
||||
{
|
||||
Assert.DoesNotContain(endpoints, endpoint => endpoint.RoutePattern.RawText == doubled);
|
||||
Assert.Contains(endpoints, endpoint => endpoint.RoutePattern.RawText == canonical);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -300,6 +300,36 @@ public sealed class SessionManagerTests
|
||||
Assert.Equal(0, workerClient.InvokeCount);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Server-030 regression: when the gateway-side <c>SessionState</c> is
|
||||
/// <c>Ready</c> but the worker client's own state is not, the diagnostic
|
||||
/// must surface both states so the mismatch is actionable instead of
|
||||
/// producing a self-contradictory "Session ... is not ready. Current
|
||||
/// state is Ready." message.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task InvokeAsync_WhenWorkerNotReadyButSessionReady_DiagnosticIncludesBothStates()
|
||||
{
|
||||
FakeWorkerClient workerClient = new();
|
||||
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(workerClient));
|
||||
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
|
||||
|
||||
// Force a state mismatch: session stays Ready, worker transitions out.
|
||||
workerClient.State = WorkerClientState.Handshaking;
|
||||
Assert.Equal(SessionState.Ready, session.State);
|
||||
|
||||
SessionManagerException exception = await Assert.ThrowsAsync<SessionManagerException>(
|
||||
async () => await manager.InvokeAsync(
|
||||
session.SessionId,
|
||||
CreateCommand(MxCommandKind.Ping),
|
||||
CancellationToken.None));
|
||||
|
||||
Assert.Equal(SessionManagerErrorCode.SessionNotReady, exception.ErrorCode);
|
||||
Assert.Contains("Session state is Ready", exception.Message);
|
||||
Assert.Contains("worker state is Handshaking", exception.Message);
|
||||
Assert.Equal(0, workerClient.InvokeCount);
|
||||
}
|
||||
|
||||
/// <summary>Verifies that closing a session removes it from the registry.</summary>
|
||||
[Fact]
|
||||
public async Task CloseSessionAsync_RemovesClosedSession()
|
||||
|
||||
Reference in New Issue
Block a user