Resolve Client.Dotnet-018..021: README + bench-read-bulk hardening

Client.Dotnet-018  README CLI examples for stream-alarms / acknowledge-alarm
                   replaced with parser-correct flags; new theory test
                   parses each documented README example through the CLI.
Client.Dotnet-019  BenchReadBulkAsync routes through new
                   RequireRegisterServerHandle helper that fails loudly when
                   the OK register reply has no typed payload.
Client.Dotnet-020  Bench steady-state catch is now
                   catch (Exception ex) when (ex is not OperationCanceledException)
                   so user-driven cancellation exits promptly.
Client.Dotnet-021  --timeout-ms now flows through ParseTimeoutMs which
                   rejects negatives with a clear error in both read-bulk
                   and bench-read-bulk.

All resolved at 2026-05-24; 67/67 .NET client tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-24 08:49:45 -04:00
parent 4d77279e7e
commit 712cb06442
4 changed files with 423 additions and 11 deletions
+2 -2
View File
@@ -134,8 +134,8 @@ dotnet run --project clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli -- advise --s
dotnet run --project clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli -- write --session-id <id> --server-handle 1 --item-handle 1 --type int32 --value 123 --json
dotnet run --project clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli -- write2 --session-id <id> --server-handle 1 --item-handle 1 --type int32 --value 123 --timestamp 2026-01-01T00:00:00Z --json
dotnet run --project clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli -- stream-events --session-id <id> --max-events 1 --json
dotnet run --project clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli -- stream-alarms --session-id <id> --max-messages 1 --json
dotnet run --project clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli -- acknowledge-alarm --session-id <id> --alarm-reference "\\Galaxy\Area001.Pump001.PumpFault" --json
dotnet run --project clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli -- stream-alarms --filter-prefix Area001 --max-events 1 --json
dotnet run --project clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli -- acknowledge-alarm --reference "\\Galaxy\Area001.Pump001.PumpFault" --comment "ack from cli" --operator operator1 --json
dotnet run --project clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli -- smoke --endpoint http://localhost:5000 --api-key-env MXGATEWAY_API_KEY --item Area001.Pump001.Speed --json
```
@@ -487,7 +487,7 @@ public static class MxGatewayClientCli
ReadBulkCommand command = new()
{
ServerHandle = arguments.GetInt32("server-handle"),
TimeoutMs = (uint)arguments.GetInt32("timeout-ms", 0),
TimeoutMs = ParseTimeoutMs(arguments, defaultValue: 0),
};
command.TagAddresses.Add(ParseStringList(arguments.GetRequired("items")));
@@ -692,6 +692,49 @@ public static class MxGatewayClientCli
}
}
/// <summary>
/// Parses the optional <c>--timeout-ms</c> argument as a non-negative
/// unsigned millisecond count. Mirrors the SDK-side <c>(uint)Math.Min</c>
/// guard on <c>MxGatewaySession.ReadBulkAsync</c>: a negative value
/// (e.g. <c>-1</c>, an easy copy-paste mistake for "unbounded") is
/// rejected loudly rather than silently wrapped to <c>~49.7 days</c>,
/// which would park one worker thread per pending tag for hours.
/// Resolves Client.Dotnet-021.
/// </summary>
private static uint ParseTimeoutMs(CliArguments arguments, int defaultValue)
{
int raw = arguments.GetInt32("timeout-ms", defaultValue);
if (raw < 0)
{
throw new ArgumentException(
"--timeout-ms must be a non-negative integer (use 0 for the gateway default).");
}
return (uint)raw;
}
/// <summary>
/// Extracts the <c>ServerHandle</c> from a Register reply, throwing a
/// descriptive <see cref="MxGatewayException"/> when the typed
/// <c>Register</c> payload is absent on an otherwise-successful reply.
/// The typed sub-message is the contract for the Register command, so
/// its absence must not silently fall through to
/// <c>ReturnValue.Int32Value</c> (which would be <c>0</c> for an empty
/// reply, driving the rest of the bench against an invalid handle).
/// Resolves Client.Dotnet-019.
/// </summary>
private static int RequireRegisterServerHandle(MxCommandReply reply, string sessionId)
{
if (reply.Register is null)
{
throw new MxGatewayException(
$"Gateway reply for Register on session '{sessionId}' (correlation '{reply.CorrelationId}') "
+ "succeeded but is missing the typed 'register' payload required to read ServerHandle.");
}
return reply.Register.ServerHandle;
}
/// <summary>
/// Cross-language stress benchmark for ReadBulk. Opens its own session,
/// subscribes to N tags so the worker's MxAccessValueCache populates from
@@ -712,7 +755,7 @@ public static class MxGatewayClientCli
int tagStart = arguments.GetInt32("tag-start", 1);
string tagPrefix = arguments.GetOptional("tag-prefix") ?? "TestMachine_";
string tagAttribute = arguments.GetOptional("tag-attribute") ?? "TestChangingInt";
uint timeoutMs = (uint)arguments.GetInt32("timeout-ms", 1500);
uint timeoutMs = ParseTimeoutMs(arguments, defaultValue: 1500);
string clientName = arguments.GetOptional("client-name") ?? "mxgw-dotnet-bench";
string[] tags = new string[bulkSize];
@@ -742,7 +785,7 @@ public static class MxGatewayClientCli
}),
cancellationToken)
.ConfigureAwait(false);
int serverHandle = registerReply.Register?.ServerHandle ?? registerReply.ReturnValue.Int32Value;
int serverHandle = RequireRegisterServerHandle(registerReply, sessionId);
SubscribeBulkCommand subscribe = new() { ServerHandle = serverHandle };
subscribe.TagAddresses.Add(tags);
@@ -801,8 +844,13 @@ public static class MxGatewayClientCli
.ConfigureAwait(false);
sw.Stop();
}
catch
catch (Exception ex) when (ex is not OperationCanceledException)
{
// Client.Dotnet-020: never swallow OperationCanceledException
// here. A bare `catch` would let Ctrl+C / parent CTS /
// wall-clock timeouts keep spinning until --duration-seconds
// elapsed, burning CPU and skewing the p99/max latency numbers
// with hundreds of immediate-OCE iterations.
sw.Stop();
failedCalls++;
latencyMillis.Add(sw.Elapsed.TotalMilliseconds);
@@ -509,6 +509,354 @@ public sealed class MxGatewayClientCliTests
Assert.Contains("gateway-protocol=", text);
}
/// <summary>
/// Client.Dotnet-018: the README CLI examples for the alarm subcommands at
/// `clients/dotnet/README.md` must drive cleanly through the production
/// CLI argument parser. The previous text used non-existent flags
/// (`--session-id`, `--max-messages`, `--alarm-reference`) that would
/// fail with "Unknown command" / "Missing required option --reference".
/// Each documented example is extracted from the README, parsed via the
/// production <see cref="MxGatewayClientCli.RunAsync"/>, and asserted
/// against exit code 0.
/// </summary>
[Theory]
[InlineData("stream-alarms")]
[InlineData("acknowledge-alarm")]
public async Task RunAsync_ReadmeExamples_ForAlarmCommands_ParseSuccessfully(string command)
{
string readme = LocateClientReadme();
string[] commandLine = ExtractReadmeCommandLine(readme, command);
// The documented examples do not include --api-key (the README assumes
// the env var path documented elsewhere). Inject an API key via the
// standard env var so CreateOptions succeeds and the parser fully
// exercises the documented flag shape.
string? previousKey = Environment.GetEnvironmentVariable("MXGATEWAY_API_KEY");
Environment.SetEnvironmentVariable("MXGATEWAY_API_KEY", "test-api-key");
try
{
using var output = new StringWriter();
using var error = new StringWriter();
FakeCliClient fakeClient = new();
fakeClient.AlarmFeedMessages.Add(new AlarmFeedMessage
{
ActiveAlarm = new ActiveAlarmSnapshot { AlarmFullReference = "fixture" },
});
fakeClient.AcknowledgeAlarmReplies.Enqueue(new AcknowledgeAlarmReply
{
CorrelationId = "ack-fixture",
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
});
int exitCode = await MxGatewayClientCli.RunAsync(
commandLine,
output,
error,
_ => fakeClient);
Assert.True(
exitCode == 0,
$"README example for '{command}' exited {exitCode}; stderr=<<{error}>>");
Assert.DoesNotContain("Unknown command", error.ToString());
Assert.DoesNotContain("Missing required option", error.ToString());
}
finally
{
Environment.SetEnvironmentVariable("MXGATEWAY_API_KEY", previousKey);
}
}
/// <summary>
/// Client.Dotnet-019: `BenchReadBulkAsync` previously fell back to
/// <c>reply.ReturnValue.Int32Value</c> when the register reply had no
/// typed <c>Register</c> payload, silently driving the rest of the bench
/// against a zero server handle. The fix must fail loudly with a
/// descriptive <see cref="MxGatewayException"/>.
/// </summary>
[Fact]
public async Task RunAsync_BenchReadBulk_WhenRegisterReplyMissingTypedPayload_FailsLoudly()
{
using var output = new StringWriter();
using var error = new StringWriter();
FakeCliClient fakeClient = new();
// Successful protocol + MX status but no typed `Register` payload.
// Before the Client.Dotnet-019 fix this silently became serverHandle=0
// and the bench proceeded through SubscribeBulk / warmup / steady-state
// against an invalid handle, producing a misleading zero-result summary.
fakeClient.InvokeReplies.Enqueue(new MxCommandReply
{
SessionId = "session-fixture",
Kind = MxCommandKind.Register,
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
});
int exitCode = await MxGatewayClientCli.RunAsync(
[
"bench-read-bulk",
"--endpoint",
"http://localhost:5000",
"--api-key",
"test-api-key",
"--duration-seconds",
"1",
"--warmup-seconds",
"0",
"--bulk-size",
"1",
],
output,
error,
_ => fakeClient);
Assert.Equal(1, exitCode);
// Descriptive message that names the missing typed payload.
string err = error.ToString();
Assert.Contains("Register", err);
// The bench must not produce any aggregate stats JSON.
Assert.DoesNotContain("bench-read-bulk", output.ToString());
}
/// <summary>
/// Client.Dotnet-020: the steady-state loop in `BenchReadBulkAsync` had a
/// bare `catch { failedCalls++; continue; }` that swallowed
/// <see cref="OperationCanceledException"/>, so token-driven cancellation
/// kept spinning until <c>--duration-seconds</c> elapsed. After the fix
/// the bench must exit promptly when the supplied token cancels.
/// </summary>
[Fact]
public async Task RunAsync_BenchReadBulk_WhenSteadyStateLoopReceivesCancellation_ExitsPromptly()
{
using var output = new StringWriter();
using var error = new StringWriter();
int invokeCount = 0;
FakeCliClient fakeClient = new()
{
InvokeHandler = (request, ct) =>
{
int n = Interlocked.Increment(ref invokeCount);
// Reply 1 = Register (success with typed payload).
if (request.Command.Kind == MxCommandKind.Register)
{
return Task.FromResult(new MxCommandReply
{
SessionId = "session-fixture",
Kind = MxCommandKind.Register,
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
Register = new RegisterReply { ServerHandle = 1 },
});
}
// Reply 2 = SubscribeBulk (success).
if (request.Command.Kind == MxCommandKind.SubscribeBulk)
{
var subscribeReply = new MxCommandReply
{
SessionId = "session-fixture",
Kind = MxCommandKind.SubscribeBulk,
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
SubscribeBulk = new BulkSubscribeReply(),
};
return Task.FromResult(subscribeReply);
}
// ReadBulk reply 1 = success (so the steady-state loop enters
// and starts iterating). Reply 2+ = simulated cancellation.
if (request.Command.Kind == MxCommandKind.ReadBulk && n <= 3)
{
return Task.FromResult(new MxCommandReply
{
SessionId = "session-fixture",
Kind = MxCommandKind.ReadBulk,
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
ReadBulk = new BulkReadReply(),
});
}
// From here on every ReadBulk throws OCE — the steady-state
// loop must exit promptly rather than spinning until
// --duration-seconds elapses.
throw new OperationCanceledException();
},
};
var sw = System.Diagnostics.Stopwatch.StartNew();
await Assert.ThrowsAsync<OperationCanceledException>(async () =>
await MxGatewayClientCli.RunAsync(
[
"bench-read-bulk",
"--endpoint",
"http://localhost:5000",
"--api-key",
"test-api-key",
"--duration-seconds",
"30",
"--warmup-seconds",
"0",
"--bulk-size",
"1",
],
output,
error,
_ => fakeClient));
sw.Stop();
// Without the fix the loop swallows OCE and continues until the 30 s
// steady-state deadline expires. With the fix it exits as soon as OCE
// surfaces. Generous 10 s ceiling to keep the test stable under load.
Assert.True(
sw.Elapsed < TimeSpan.FromSeconds(10),
$"Bench did not exit promptly on cancellation; took {sw.Elapsed}.");
}
/// <summary>
/// Client.Dotnet-021: both `ReadBulkAsync` and `BenchReadBulkAsync` cast
/// the user-supplied <c>--timeout-ms</c> to <see cref="uint"/> without
/// bounds checking, so a negative value (e.g. <c>-1</c>) silently wraps
/// to ~49.7 days. The fix must reject negatives with a clear error.
/// </summary>
[Theory]
[InlineData("read-bulk")]
[InlineData("bench-read-bulk")]
public async Task RunAsync_TimeoutMs_NegativeValue_RejectsWithClearError(string command)
{
using var output = new StringWriter();
using var error = new StringWriter();
FakeCliClient fakeClient = new();
string[] args = command is "read-bulk"
? [
"read-bulk",
"--endpoint",
"http://localhost:5000",
"--api-key",
"test-api-key",
"--session-id",
"session-fixture",
"--server-handle",
"1",
"--items",
"Area001.Pump001.Speed",
"--timeout-ms",
"-1",
]
: [
"bench-read-bulk",
"--endpoint",
"http://localhost:5000",
"--api-key",
"test-api-key",
"--duration-seconds",
"1",
"--warmup-seconds",
"0",
"--bulk-size",
"1",
"--timeout-ms",
"-1",
];
int exitCode = await MxGatewayClientCli.RunAsync(
args,
output,
error,
_ => fakeClient);
Assert.NotEqual(0, exitCode);
string err = error.ToString();
Assert.Contains("timeout-ms", err);
Assert.Contains("non-negative", err);
}
/// <summary>
/// Locates the .NET client README by walking up from the test assembly's
/// base directory until <c>clients/dotnet/README.md</c> is found. Keeps
/// the regression test independent of the current working directory.
/// </summary>
private static string LocateClientReadme()
{
string? directory = AppContext.BaseDirectory;
while (!string.IsNullOrEmpty(directory))
{
string candidate = Path.Combine(directory, "clients", "dotnet", "README.md");
if (File.Exists(candidate))
{
return candidate;
}
directory = Path.GetDirectoryName(directory);
}
throw new FileNotFoundException("clients/dotnet/README.md not found above test assembly base directory.");
}
/// <summary>
/// Extracts the documented CLI invocation for the requested subcommand
/// from the README, returning only the arguments after the
/// <c>mxgw-dotnet</c>-equivalent prefix so they can be passed straight
/// to <see cref="MxGatewayClientCli.RunAsync"/>.
/// </summary>
private static string[] ExtractReadmeCommandLine(string readmePath, string command)
{
string[] lines = File.ReadAllLines(readmePath);
// Look for the documented `dotnet run ... -- <command> ...` line.
foreach (string line in lines)
{
int dashes = line.IndexOf("-- " + command, StringComparison.Ordinal);
if (dashes < 0)
{
continue;
}
string after = line[(dashes + 3)..].Trim();
// Tokenize by whitespace, respecting "..." quoted segments.
return TokenizeCommandLine(after);
}
throw new InvalidOperationException(
$"README at '{readmePath}' has no documented example for subcommand '{command}'.");
}
/// <summary>
/// Splits a single command-line string into argv tokens, honouring
/// double-quoted segments so paths with embedded spaces survive intact.
/// </summary>
private static string[] TokenizeCommandLine(string input)
{
var tokens = new List<string>();
var current = new System.Text.StringBuilder();
bool inQuotes = false;
foreach (char ch in input)
{
if (ch == '"')
{
inQuotes = !inQuotes;
continue;
}
if (!inQuotes && char.IsWhiteSpace(ch))
{
if (current.Length > 0)
{
tokens.Add(current.ToString());
current.Clear();
}
continue;
}
current.Append(ch);
}
if (current.Length > 0)
{
tokens.Add(current.ToString());
}
return tokens.ToArray();
}
/// <summary>Fake CLI client for testing.</summary>
private sealed class FakeCliClient : IMxGatewayCliClient
{
@@ -527,6 +875,9 @@ public sealed class MxGatewayClientCliTests
/// <summary>Exception to throw on invoke, if any.</summary>
public Exception? InvokeFailure { get; init; }
/// <summary>Optional per-call handler that overrides queue-based behaviour.</summary>
public Func<MxCommandRequest, CancellationToken, Task<MxCommandReply>>? InvokeHandler { get; init; }
/// <inheritdoc />
public ValueTask DisposeAsync()
{
@@ -572,6 +923,11 @@ public sealed class MxGatewayClientCliTests
throw InvokeFailure;
}
if (InvokeHandler is not null)
{
return InvokeHandler(request, cancellationToken);
}
return Task.FromResult(InvokeReplies.Dequeue());
}