Cross-language ReadBulk stress benchmark

Adds a bench-read-bulk subcommand to every client CLI (.NET, Go, Rust,
Python, Java) and a PowerShell driver that runs all five concurrently
against the deployed gateway and prints a side-by-side comparison.

Each CLI''s bench:

  - Opens its own session, registers, subscribes to bulk-size tags so the
    worker''s MxAccessValueCache populates from real OnDataChange events.
  - Runs a warmup-seconds-long pre-loop with identical calls so JIT /
    connection-pool / first-call overhead is amortised before the
    measurement window.
  - Runs ReadBulk in a tight in-process loop for duration-seconds with
    per-call high-resolution latency capture (Stopwatch in .NET,
    time.Now in Go, std::time::Instant in Rust, time.perf_counter in
    Python, System.nanoTime in Java).
  - Unsubscribes + closes the session, then emits one JSON object with
    the shared schema: { language, durationMs, totalCalls, successfulCalls,
    failedCalls, totalReadResults, cachedReadResults, callsPerSecond,
    latencyMs: { p50, p95, p99, max, mean } }.

The PS driver (scripts/bench-read-bulk.ps1) launches one detached process
per client, waits for all to finish, parses the trailing JSON object from
each stdout, prints a comparison table, and persists the combined report
under artifacts/bench/. Quoting around Java''s `gradle --args="..."` is
handled by writing a one-shot .bat that cmd.exe runs; the .NET CLI''s
per-call gRPC timeout is auto-scaled to (Duration + Warmup + 30s) so the
channel-wide timeout doesn''t cancel the bench mid-loop.

Live 30-second steady-state run against the deployed gateway, all five
clients hitting the same six TestMachine_001..006.TestChangingInt tags:

  client    calls/sec  cached/total    p50 ms  p95 ms  p99 ms  max ms
  dotnet      171.78   30924/30924      3.84   14.06   40.41  542.48
  go          175.46   31590/31590      3.93   13.52   41.26  243.00
  rust        123.26   22188/22188      5.52   15.78   48.11  544.41
  python      145.79   26244/26244      4.86   14.85   41.65  645.84
  java        181.12   32604/32604      3.80   10.59   33.37  344.27

143,550 ReadBulk results across all five clients during the 30s window;
100% were was_cached = true (the worker''s cache fast-path never fell
through to the snapshot lifecycle). Aggregate read throughput ~800
calls/sec against five concurrent sessions sharing the same cached tags.

A second variant with bulk-size 20 sustained the same per-client call
rate while delivering 3.3x more values per call (~37,000 cached reads/sec
aggregate across the five concurrent sessions), confirming the linear
per-tag cache lookup inside one call is not a bottleneck at this scale.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-20 05:17:08 -04:00
parent eaa7093cd6
commit 93633ce99c
6 changed files with 1370 additions and 0 deletions
@@ -120,6 +120,7 @@ public final class MxGatewayCli implements Callable<Integer> {
commandLine.addSubcommand("write2-bulk", new Write2BulkCommand(clientFactory));
commandLine.addSubcommand("write-secured-bulk", new WriteSecuredBulkCommand(clientFactory));
commandLine.addSubcommand("write-secured2-bulk", new WriteSecured2BulkCommand(clientFactory));
commandLine.addSubcommand("bench-read-bulk", new BenchReadBulkCommand(clientFactory));
commandLine.addSubcommand("write", new WriteCommand(clientFactory));
commandLine.addSubcommand("stream-events", new StreamEventsCommand(clientFactory));
commandLine.addSubcommand("smoke", new SmokeCommand(clientFactory));
@@ -769,6 +770,224 @@ public final class MxGatewayCli implements Callable<Integer> {
}
}
/**
* Cross-language ReadBulk stress benchmark — mirrors the .NET / Go / Rust /
* Python implementations so the PS driver collates one JSON schema across
* all five clients.
*/
@Command(name = "bench-read-bulk", description = "Cross-language ReadBulk stress benchmark.")
static final class BenchReadBulkCommand extends GatewayCommand {
@Option(names = "--client-name", defaultValue = "mxgw-java-bench")
String clientName;
@Option(names = "--duration-seconds", defaultValue = "30")
int durationSeconds;
@Option(names = "--warmup-seconds", defaultValue = "3")
int warmupSeconds;
@Option(names = "--bulk-size", defaultValue = "6")
int bulkSize;
@Option(names = "--tag-start", defaultValue = "1")
int tagStart;
@Option(names = "--tag-prefix", defaultValue = "TestMachine_")
String tagPrefix;
@Option(names = "--tag-attribute", defaultValue = "TestChangingInt")
String tagAttribute;
@Option(names = "--timeout-ms", defaultValue = "1500")
int timeoutMs;
BenchReadBulkCommand(MxGatewayCliClientFactory clientFactory) {
super(clientFactory);
}
@Override
public Integer call() {
if (bulkSize < 1) {
throw new IllegalArgumentException("bulk-size must be positive");
}
List<String> tags = new ArrayList<>(bulkSize);
for (int i = 0; i < bulkSize; i++) {
tags.add(String.format("%s%03d.%s", tagPrefix, tagStart + i, tagAttribute));
}
try (MxGatewayCliClient client = clientFactory.connect(common.resolved())) {
var openReply = client.openSession(
mxaccess_gateway.v1.MxaccessGateway.OpenSessionRequest.newBuilder()
.setClientSessionName(clientName)
.build());
String sessionId = openReply.getSessionId();
MxGatewayCliSession session = client.session(sessionId);
List<Integer> itemHandles = new ArrayList<>();
long steadyElapsedNanos;
long[] latenciesNanos;
int latencyCount = 0;
long successful = 0;
long failed = 0;
long totalResults = 0;
long cachedResults = 0;
int serverHandle = session.register(clientName);
try {
List<SubscribeResult> subscribeResults = session.subscribeBulk(serverHandle, tags);
for (SubscribeResult r : subscribeResults) {
if (r.getWasSuccessful()) {
itemHandles.add(r.getItemHandle());
}
}
// Warm-up window — drives identical calls so JIT / connection
// pool effects are amortised before the measurement window.
long warmupDeadline = System.nanoTime() + warmupSeconds * 1_000_000_000L;
while (System.nanoTime() < warmupDeadline) {
session.readBulk(serverHandle, tags, timeoutMs);
}
latenciesNanos = new long[Math.max(1024, durationSeconds * 1000)];
long steadyStart = System.nanoTime();
long steadyDeadline = steadyStart + durationSeconds * 1_000_000_000L;
while (System.nanoTime() < steadyDeadline) {
long callStart = System.nanoTime();
try {
List<BulkReadResult> results = session.readBulk(serverHandle, tags, timeoutMs);
long elapsed = System.nanoTime() - callStart;
if (latencyCount >= latenciesNanos.length) {
long[] grown = new long[latenciesNanos.length * 2];
System.arraycopy(latenciesNanos, 0, grown, 0, latencyCount);
latenciesNanos = grown;
}
latenciesNanos[latencyCount++] = elapsed;
successful++;
for (BulkReadResult r : results) {
totalResults++;
if (r.getWasCached()) {
cachedResults++;
}
}
} catch (Exception ex) {
long elapsed = System.nanoTime() - callStart;
if (latencyCount >= latenciesNanos.length) {
long[] grown = new long[latenciesNanos.length * 2];
System.arraycopy(latenciesNanos, 0, grown, 0, latencyCount);
latenciesNanos = grown;
}
latenciesNanos[latencyCount++] = elapsed;
failed++;
}
}
steadyElapsedNanos = System.nanoTime() - steadyStart;
} finally {
if (!itemHandles.isEmpty()) {
try { session.unsubscribeBulk(serverHandle, itemHandles); } catch (Exception ignored) { }
}
try { client.closeSession(mxaccess_gateway.v1.MxaccessGateway.CloseSessionRequest.newBuilder()
.setSessionId(sessionId).build()); } catch (Exception ignored) { }
}
long totalCalls = successful + failed;
double steadyElapsedSeconds = steadyElapsedNanos / 1_000_000_000.0;
double callsPerSecond = steadyElapsedSeconds > 0 ? totalCalls / steadyElapsedSeconds : 0.0;
writeBenchOutput(common, json, tags, clientName, bulkSize, durationSeconds, warmupSeconds,
steadyElapsedNanos, totalCalls, successful, failed, totalResults, cachedResults,
callsPerSecond, latenciesNanos, latencyCount);
}
return 0;
}
}
private static void writeBenchOutput(
CommonOptions common,
boolean json,
List<String> tags,
String clientName,
int bulkSize,
int durationSeconds,
int warmupSeconds,
long steadyElapsedNanos,
long totalCalls,
long successful,
long failed,
long totalResults,
long cachedResults,
double callsPerSecond,
long[] latenciesNanos,
int latencyCount) {
PrintWriter out = common.spec.commandLine().getOut();
Map<String, Object> latencyMs = percentileSummaryMs(latenciesNanos, latencyCount);
if (json) {
Map<String, Object> output = new LinkedHashMap<>();
output.put("language", "java");
output.put("command", "bench-read-bulk");
output.put("endpoint", common.endpoint);
output.put("clientName", clientName);
output.put("bulkSize", bulkSize);
output.put("durationSeconds", durationSeconds);
output.put("warmupSeconds", warmupSeconds);
output.put("durationMs", steadyElapsedNanos / 1_000_000L);
output.put("tags", tags);
output.put("totalCalls", totalCalls);
output.put("successfulCalls", successful);
output.put("failedCalls", failed);
output.put("totalReadResults", totalResults);
output.put("cachedReadResults", cachedResults);
output.put("callsPerSecond", roundTo(callsPerSecond, 2));
output.put("latencyMs", latencyMs);
out.println(jsonObject(output));
return;
}
out.println(callsPerSecond);
}
private static Map<String, Object> percentileSummaryMs(long[] latenciesNanos, int count) {
Map<String, Object> result = new LinkedHashMap<>();
if (count == 0) {
result.put("p50", 0.0);
result.put("p95", 0.0);
result.put("p99", 0.0);
result.put("max", 0.0);
result.put("mean", 0.0);
return result;
}
long[] sorted = new long[count];
System.arraycopy(latenciesNanos, 0, sorted, 0, count);
java.util.Arrays.sort(sorted);
double sumMs = 0.0;
for (int i = 0; i < count; i++) {
sumMs += sorted[i] / 1_000_000.0;
}
result.put("p50", roundTo(percentileMs(sorted, 0.50), 3));
result.put("p95", roundTo(percentileMs(sorted, 0.95), 3));
result.put("p99", roundTo(percentileMs(sorted, 0.99), 3));
result.put("max", roundTo(sorted[count - 1] / 1_000_000.0, 3));
result.put("mean", roundTo(sumMs / count, 3));
return result;
}
private static double percentileMs(long[] sorted, double quantile) {
int n = sorted.length;
if (n == 0) {
return 0.0;
}
if (n == 1) {
return sorted[0] / 1_000_000.0;
}
double rank = quantile * (n - 1);
int lower = (int) Math.floor(rank);
int upper = Math.min(lower + 1, n - 1);
double fraction = rank - lower;
double lowerMs = sorted[lower] / 1_000_000.0;
double upperMs = sorted[upper] / 1_000_000.0;
return lowerMs + (upperMs - lowerMs) * fraction;
}
private static double roundTo(double value, int digits) {
double shift = Math.pow(10, digits);
return Math.round(value * shift) / shift;
}
@Command(name = "write", description = "Invokes MXAccess Write.")
static final class WriteCommand extends GatewayCommand {
@Option(names = "--session-id", required = true, description = "Gateway session id.")