Cross-language ReadBulk stress benchmark

Adds a bench-read-bulk subcommand to every client CLI (.NET, Go, Rust, Python, Java) and a PowerShell driver that runs all five concurrently against the deployed gateway and prints a side-by-side comparison. Each CLI''s bench: - Opens its own session, registers, subscribes to bulk-size tags so the worker''s MxAccessValueCache populates from real OnDataChange events. - Runs a warmup-seconds-long pre-loop with identical calls so JIT / connection-pool / first-call overhead is amortised before the measurement window. - Runs ReadBulk in a tight in-process loop for duration-seconds with per-call high-resolution latency capture (Stopwatch in .NET, time.Now in Go, std::time::Instant in Rust, time.perf_counter in Python, System.nanoTime in Java). - Unsubscribes + closes the session, then emits one JSON object with the shared schema: { language, durationMs, totalCalls, successfulCalls, failedCalls, totalReadResults, cachedReadResults, callsPerSecond, latencyMs: { p50, p95, p99, max, mean } }. The PS driver (scripts/bench-read-bulk.ps1) launches one detached process per client, waits for all to finish, parses the trailing JSON object from each stdout, prints a comparison table, and persists the combined report under artifacts/bench/. Quoting around Java''s `gradle --args="..."` is handled by writing a one-shot .bat that cmd.exe runs; the .NET CLI''s per-call gRPC timeout is auto-scaled to (Duration + Warmup + 30s) so the channel-wide timeout doesn''t cancel the bench mid-loop. Live 30-second steady-state run against the deployed gateway, all five clients hitting the same six TestMachine_001..006.TestChangingInt tags: client calls/sec cached/total p50 ms p95 ms p99 ms max ms dotnet 171.78 30924/30924 3.84 14.06 40.41 542.48 go 175.46 31590/31590 3.93 13.52 41.26 243.00 rust 123.26 22188/22188 5.52 15.78 48.11 544.41 python 145.79 26244/26244 4.86 14.85 41.65 645.84 java 181.12 32604/32604 3.80 10.59 33.37 344.27 143,550 ReadBulk results across all five clients during the 30s window; 100% were was_cached = true (the worker''s cache fast-path never fell through to the snapshot lifecycle). Aggregate read throughput ~800 calls/sec against five concurrent sessions sharing the same cached tags. A second variant with bulk-size 20 sustained the same per-client call rate while delivering 3.3x more values per call (~37,000 cached reads/sec aggregate across the five concurrent sessions), confirming the linear per-tag cache lookup inside one call is not a bottleneck at this scale. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 05:17:08 -04:00
parent eaa7093cd6
commit 93633ce99c
6 changed files with 1370 additions and 0 deletions
@@ -120,6 +120,7 @@ public final class MxGatewayCli implements Callable<Integer> {
        commandLine.addSubcommand("write2-bulk", new Write2BulkCommand(clientFactory));
        commandLine.addSubcommand("write-secured-bulk", new WriteSecuredBulkCommand(clientFactory));
        commandLine.addSubcommand("write-secured2-bulk", new WriteSecured2BulkCommand(clientFactory));
+        commandLine.addSubcommand("bench-read-bulk", new BenchReadBulkCommand(clientFactory));
        commandLine.addSubcommand("write", new WriteCommand(clientFactory));
        commandLine.addSubcommand("stream-events", new StreamEventsCommand(clientFactory));
        commandLine.addSubcommand("smoke", new SmokeCommand(clientFactory));
@@ -769,6 +770,224 @@ public final class MxGatewayCli implements Callable<Integer> {
        }
    }

+    /**
+     * Cross-language ReadBulk stress benchmark — mirrors the .NET / Go / Rust /
+     * Python implementations so the PS driver collates one JSON schema across
+     * all five clients.
+     */
+    @Command(name = "bench-read-bulk", description = "Cross-language ReadBulk stress benchmark.")
+    static final class BenchReadBulkCommand extends GatewayCommand {
+        @Option(names = "--client-name", defaultValue = "mxgw-java-bench")
+        String clientName;
+
+        @Option(names = "--duration-seconds", defaultValue = "30")
+        int durationSeconds;
+
+        @Option(names = "--warmup-seconds", defaultValue = "3")
+        int warmupSeconds;
+
+        @Option(names = "--bulk-size", defaultValue = "6")
+        int bulkSize;
+
+        @Option(names = "--tag-start", defaultValue = "1")
+        int tagStart;
+
+        @Option(names = "--tag-prefix", defaultValue = "TestMachine_")
+        String tagPrefix;
+
+        @Option(names = "--tag-attribute", defaultValue = "TestChangingInt")
+        String tagAttribute;
+
+        @Option(names = "--timeout-ms", defaultValue = "1500")
+        int timeoutMs;
+
+        BenchReadBulkCommand(MxGatewayCliClientFactory clientFactory) {
+            super(clientFactory);
+        }
+
+        @Override
+        public Integer call() {
+            if (bulkSize < 1) {
+                throw new IllegalArgumentException("bulk-size must be positive");
+            }
+            List<String> tags = new ArrayList<>(bulkSize);
+            for (int i = 0; i < bulkSize; i++) {
+                tags.add(String.format("%s%03d.%s", tagPrefix, tagStart + i, tagAttribute));
+            }
+
+            try (MxGatewayCliClient client = clientFactory.connect(common.resolved())) {
+                var openReply = client.openSession(
+                        mxaccess_gateway.v1.MxaccessGateway.OpenSessionRequest.newBuilder()
+                                .setClientSessionName(clientName)
+                                .build());
+                String sessionId = openReply.getSessionId();
+                MxGatewayCliSession session = client.session(sessionId);
+                List<Integer> itemHandles = new ArrayList<>();
+                long steadyElapsedNanos;
+                long[] latenciesNanos;
+                int latencyCount = 0;
+                long successful = 0;
+                long failed = 0;
+                long totalResults = 0;
+                long cachedResults = 0;
+                int serverHandle = session.register(clientName);
+                try {
+                    List<SubscribeResult> subscribeResults = session.subscribeBulk(serverHandle, tags);
+                    for (SubscribeResult r : subscribeResults) {
+                        if (r.getWasSuccessful()) {
+                            itemHandles.add(r.getItemHandle());
+                        }
+                    }
+
+                    // Warm-up window — drives identical calls so JIT / connection
+                    // pool effects are amortised before the measurement window.
+                    long warmupDeadline = System.nanoTime() + warmupSeconds * 1_000_000_000L;
+                    while (System.nanoTime() < warmupDeadline) {
+                        session.readBulk(serverHandle, tags, timeoutMs);
+                    }
+
+                    latenciesNanos = new long[Math.max(1024, durationSeconds * 1000)];
+                    long steadyStart = System.nanoTime();
+                    long steadyDeadline = steadyStart + durationSeconds * 1_000_000_000L;
+                    while (System.nanoTime() < steadyDeadline) {
+                        long callStart = System.nanoTime();
+                        try {
+                            List<BulkReadResult> results = session.readBulk(serverHandle, tags, timeoutMs);
+                            long elapsed = System.nanoTime() - callStart;
+                            if (latencyCount >= latenciesNanos.length) {
+                                long[] grown = new long[latenciesNanos.length * 2];
+                                System.arraycopy(latenciesNanos, 0, grown, 0, latencyCount);
+                                latenciesNanos = grown;
+                            }
+                            latenciesNanos[latencyCount++] = elapsed;
+                            successful++;
+                            for (BulkReadResult r : results) {
+                                totalResults++;
+                                if (r.getWasCached()) {
+                                    cachedResults++;
+                                }
+                            }
+                        } catch (Exception ex) {
+                            long elapsed = System.nanoTime() - callStart;
+                            if (latencyCount >= latenciesNanos.length) {
+                                long[] grown = new long[latenciesNanos.length * 2];
+                                System.arraycopy(latenciesNanos, 0, grown, 0, latencyCount);
+                                latenciesNanos = grown;
+                            }
+                            latenciesNanos[latencyCount++] = elapsed;
+                            failed++;
+                        }
+                    }
+                    steadyElapsedNanos = System.nanoTime() - steadyStart;
+                } finally {
+                    if (!itemHandles.isEmpty()) {
+                        try { session.unsubscribeBulk(serverHandle, itemHandles); } catch (Exception ignored) { }
+                    }
+                    try { client.closeSession(mxaccess_gateway.v1.MxaccessGateway.CloseSessionRequest.newBuilder()
+                            .setSessionId(sessionId).build()); } catch (Exception ignored) { }
+                }
+
+                long totalCalls = successful + failed;
+                double steadyElapsedSeconds = steadyElapsedNanos / 1_000_000_000.0;
+                double callsPerSecond = steadyElapsedSeconds > 0 ? totalCalls / steadyElapsedSeconds : 0.0;
+                writeBenchOutput(common, json, tags, clientName, bulkSize, durationSeconds, warmupSeconds,
+                        steadyElapsedNanos, totalCalls, successful, failed, totalResults, cachedResults,
+                        callsPerSecond, latenciesNanos, latencyCount);
+            }
+            return 0;
+        }
+    }
+
+    private static void writeBenchOutput(
+            CommonOptions common,
+            boolean json,
+            List<String> tags,
+            String clientName,
+            int bulkSize,
+            int durationSeconds,
+            int warmupSeconds,
+            long steadyElapsedNanos,
+            long totalCalls,
+            long successful,
+            long failed,
+            long totalResults,
+            long cachedResults,
+            double callsPerSecond,
+            long[] latenciesNanos,
+            int latencyCount) {
+        PrintWriter out = common.spec.commandLine().getOut();
+        Map<String, Object> latencyMs = percentileSummaryMs(latenciesNanos, latencyCount);
+        if (json) {
+            Map<String, Object> output = new LinkedHashMap<>();
+            output.put("language", "java");
+            output.put("command", "bench-read-bulk");
+            output.put("endpoint", common.endpoint);
+            output.put("clientName", clientName);
+            output.put("bulkSize", bulkSize);
+            output.put("durationSeconds", durationSeconds);
+            output.put("warmupSeconds", warmupSeconds);
+            output.put("durationMs", steadyElapsedNanos / 1_000_000L);
+            output.put("tags", tags);
+            output.put("totalCalls", totalCalls);
+            output.put("successfulCalls", successful);
+            output.put("failedCalls", failed);
+            output.put("totalReadResults", totalResults);
+            output.put("cachedReadResults", cachedResults);
+            output.put("callsPerSecond", roundTo(callsPerSecond, 2));
+            output.put("latencyMs", latencyMs);
+            out.println(jsonObject(output));
+            return;
+        }
+        out.println(callsPerSecond);
+    }
+
+    private static Map<String, Object> percentileSummaryMs(long[] latenciesNanos, int count) {
+        Map<String, Object> result = new LinkedHashMap<>();
+        if (count == 0) {
+            result.put("p50", 0.0);
+            result.put("p95", 0.0);
+            result.put("p99", 0.0);
+            result.put("max", 0.0);
+            result.put("mean", 0.0);
+            return result;
+        }
+        long[] sorted = new long[count];
+        System.arraycopy(latenciesNanos, 0, sorted, 0, count);
+        java.util.Arrays.sort(sorted);
+        double sumMs = 0.0;
+        for (int i = 0; i < count; i++) {
+            sumMs += sorted[i] / 1_000_000.0;
+        }
+        result.put("p50", roundTo(percentileMs(sorted, 0.50), 3));
+        result.put("p95", roundTo(percentileMs(sorted, 0.95), 3));
+        result.put("p99", roundTo(percentileMs(sorted, 0.99), 3));
+        result.put("max", roundTo(sorted[count - 1] / 1_000_000.0, 3));
+        result.put("mean", roundTo(sumMs / count, 3));
+        return result;
+    }
+
+    private static double percentileMs(long[] sorted, double quantile) {
+        int n = sorted.length;
+        if (n == 0) {
+            return 0.0;
+        }
+        if (n == 1) {
+            return sorted[0] / 1_000_000.0;
+        }
+        double rank = quantile * (n - 1);
+        int lower = (int) Math.floor(rank);
+        int upper = Math.min(lower + 1, n - 1);
+        double fraction = rank - lower;
+        double lowerMs = sorted[lower] / 1_000_000.0;
+        double upperMs = sorted[upper] / 1_000_000.0;
+        return lowerMs + (upperMs - lowerMs) * fraction;
+    }
+
+    private static double roundTo(double value, int digits) {
+        double shift = Math.pow(10, digits);
+        return Math.round(value * shift) / shift;
+    }
+
    @Command(name = "write", description = "Invokes MXAccess Write.")
    static final class WriteCommand extends GatewayCommand {
        @Option(names = "--session-id", required = true, description = "Gateway session id.")