fix: resolve code-review findings (locally verified)

Server-054/055/056, Contracts-020/021/022, Tests-036/038/039,
IntegrationTests-030/031/032 (+033 deferred to live rig),
Client.Dotnet-026/028/029 (+027 won't-fix), Client.Go-030..034,
Client.Python-032..036, Client.Rust-033..038.

Key fix: SessionEventDistributor orphaned a subscriber that registered after
the pump completed but before disposal (Server-056) -> register paths now
complete late registrants under _lifecycleLock; regression test added. The
racy dashboard-mirror gRPC test made deterministic (Tests-039).

Verified green locally: gateway Tests targeted classes (GatewaySession,
SessionEventDistributor, GatewayOptionsValidator, ProtobufContractRoundTrip,
GatewaySessionDashboardMirror) + dotnet/go/python/rust client suites.
This commit is contained in:
Joseph Doherty
2026-06-17 05:23:14 -04:00
parent 25d04ec37e
commit 6b5fe6aa82
37 changed files with 1049 additions and 211 deletions
@@ -3,6 +3,13 @@ using ZB.MOM.WW.MxGateway.Contracts.Proto.Galaxy;
namespace ZB.MOM.WW.MxGateway.Client.Cli;
/// <summary>
/// Transport seam used by the CLI to drive gateway and Galaxy Repository
/// RPCs, exposing only the operations the command surface needs. The
/// production binding is <see cref="MxGatewayCliClientAdapter"/> (wrapping a
/// real <c>MxGatewayClient</c>); tests substitute an in-memory fake so the
/// command routing can be exercised without a live gateway.
/// </summary>
public interface IMxGatewayCliClient : IAsyncDisposable
{
/// <summary>
@@ -153,7 +153,10 @@ public static class MxGatewayClientCli
}
catch (Exception exception) when (exception is not OperationCanceledException)
{
string? apiKey = arguments.GetOptional("api-key");
// Client.Dotnet-028: redact the *effective* key — from --api-key or the
// --api-key-env environment variable — so an env-var-sourced key echoed
// in a transport error never reaches stderr unredacted.
string? apiKey = TryResolveApiKey(arguments);
string message = MxGatewayCliSecretRedactor.Redact(exception.Message, apiKey);
if (forceJsonErrors || arguments.HasFlag("json"))
@@ -278,6 +281,29 @@ public static class MxGatewayClientCli
}
private static string ResolveApiKey(CliArguments arguments)
{
string? apiKey = TryResolveApiKey(arguments);
if (!string.IsNullOrWhiteSpace(apiKey))
{
return apiKey;
}
string apiKeyEnvironmentName = arguments.GetOptional("api-key-env")
?? "MXGATEWAY_API_KEY";
throw new ArgumentException(
$"Gateway API key is required. Pass --api-key or set {apiKeyEnvironmentName}.");
}
/// <summary>
/// Resolves the effective API key from <c>--api-key</c> or, failing that,
/// the <c>--api-key-env</c>-named environment variable (default
/// <c>MXGATEWAY_API_KEY</c>), returning <see langword="null"/> when neither
/// is set. Unlike <see cref="ResolveApiKey"/> this never throws, so the
/// error-redaction catch block can strip the env-var-sourced key from
/// output (Client.Dotnet-028) without re-raising on the absent-key path.
/// </summary>
private static string? TryResolveApiKey(CliArguments arguments)
{
string? apiKey = arguments.GetOptional("api-key");
if (!string.IsNullOrWhiteSpace(apiKey))
@@ -288,14 +314,7 @@ public static class MxGatewayClientCli
string apiKeyEnvironmentName = arguments.GetOptional("api-key-env")
?? "MXGATEWAY_API_KEY";
apiKey = Environment.GetEnvironmentVariable(apiKeyEnvironmentName);
if (!string.IsNullOrWhiteSpace(apiKey))
{
return apiKey;
}
throw new ArgumentException(
$"Gateway API key is required. Pass --api-key or set {apiKeyEnvironmentName}.");
return Environment.GetEnvironmentVariable(apiKeyEnvironmentName);
}
private static CancellationTokenSource CreateCancellation(CliArguments arguments, string command)
@@ -303,7 +322,7 @@ public static class MxGatewayClientCli
var cancellation = new CancellationTokenSource();
// Long-running streaming commands run until Ctrl+C / cancellation by default;
// a caller-supplied --timeout still applies if present.
bool isLongRunning = command is "galaxy-watch";
bool isLongRunning = command is "galaxy-watch" or "galaxy-browse";
string? rawTimeout = arguments.GetOptional("timeout");
if (isLongRunning && string.IsNullOrWhiteSpace(rawTimeout))
{
@@ -106,6 +106,48 @@ public sealed class MxGatewayClientCliTests
Assert.Contains("[redacted]", error.ToString());
}
/// <summary>
/// Client.Dotnet-028: when the API key is sourced from the env var
/// (<c>--api-key-env</c> path, no <c>--api-key</c> flag), the error-redaction
/// catch block must still resolve and redact the effective key. Regression
/// guard for the catch block reverting to <c>GetOptional("api-key")</c> only,
/// which is null on the env-var path and leaves the key unredacted.
/// </summary>
[Fact]
public async Task RunAsync_ErrorOutput_RedactsApiKey_WhenSourcedFromEnvironmentVariable()
{
const string envName = "MXGATEWAY_TEST_API_KEY_028";
const string secret = "env-sourced-secret-key";
string? previousKey = Environment.GetEnvironmentVariable(envName);
Environment.SetEnvironmentVariable(envName, secret);
try
{
using var output = new StringWriter();
using var error = new StringWriter();
int exitCode = await MxGatewayClientCli.RunAsync(
[
"open-session",
"--endpoint",
"http://localhost:5000",
"--api-key-env",
envName,
],
output,
error,
_ => throw new InvalidOperationException($"boom {secret}"));
Assert.Equal(1, exitCode);
Assert.DoesNotContain(secret, error.ToString());
Assert.Contains("[redacted]", error.ToString());
}
finally
{
Environment.SetEnvironmentVariable(envName, previousKey);
}
}
/// <summary>Verifies that stream-events with max-events limit stops output in non-JSON format.</summary>
[Fact]
public async Task RunAsync_StreamEvents_WithMaxEventsStopsNonJsonOutput()
@@ -12,6 +12,12 @@ public sealed class LazyBrowseNode
{
private readonly GalaxyRepositoryClient _client;
private readonly BrowseChildrenOptions _options;
// Client.Dotnet-027 (Won't Fix): this gate is used only via WaitAsync/Release and
// never via AvailableWaitHandle, so SemaphoreSlim allocates no kernel wait handle —
// it holds no unmanaged/OS handle to leak. It is pure managed memory whose lifetime
// is the node's, so the type is intentionally not IDisposable: making it disposable
// would push per-node disposal onto every tree consumer (thousands of nodes) for no
// resource benefit.
private readonly SemaphoreSlim _expandLock = new(1, 1);
// Published once, under _expandLock, when expansion completes. Lock-free readers