From 7db4bffa30ba9a958a5e8e31de212d4efff20119 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Wed, 20 May 2026 05:25:17 -0400 Subject: [PATCH] bench-read-bulk driver: invoke .NET in -c Release and Rust in --release Rust''s debug profile costs the bench ~45% of solo throughput and ~3x of p99 latency vs release (267 vs 184 solo calls/sec, p99 5.7 vs 16ms). Debug disables inlining, runs overflow checks on every arithmetic op, keeps Future state machines un-collapsed, and lets every Vec allocation through unoptimized. Other compiled clients in the matrix don''t see this gap: Go always builds optimized, Python is interpreted, and the JIT-tiered runtimes (HotSpot for Java, CoreCLR Tier 1 for .NET) close most of the gap during the warmup window. The driver now requests `cargo run --release` for Rust and `dotnet run -c Release --no-build` for .NET, so the two compiled-AOT clients race under their production-equivalent profiles. Callers must `cargo build --release -p mxgw-cli` and `dotnet build ... -c Release` once before running the bench; `--no-build` then keeps each measurement window free of compilation overhead. Live re-run (5-way concurrent, 30s, bulkSize 6) after the switch: rust: 145.35 calls/sec (was 123.26 in debug; 18% gain under contention) go: 185.59 calls/sec java: 171.80 calls/sec dotnet:172.31 calls/sec python:140.52 calls/sec Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/bench-read-bulk.ps1 | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/bench-read-bulk.ps1 b/scripts/bench-read-bulk.ps1 index 7aa9cdc..0a4568c 100644 --- a/scripts/bench-read-bulk.ps1 +++ b/scripts/bench-read-bulk.ps1 @@ -126,8 +126,13 @@ function Get-ClientCommand { switch ($Client) { "dotnet" { + # -c Release matches the rest of the matrix: HotSpot/Tier 1 JIT + # closes most of the debug/release gap for .NET on its own, but + # Release also disables JIT inline thresholds that hurt Stopwatch- + # bracketed measurement noise. Same project must have been built + # in Release at least once before this --no-build invocation. $cliArgs = @( - "run", "--project", "clients/dotnet/MxGateway.Client.Cli", "--no-build", "--", + "run", "--project", "clients/dotnet/MxGateway.Client.Cli", "-c", "Release", "--no-build", "--", "bench-read-bulk", "--endpoint", $httpEndpoint, "--api-key-env", $ApiKeyEnv, @@ -163,8 +168,13 @@ function Get-ClientCommand { return [pscustomobject]@{ file = "go"; args = $cliArgs; cwd = (Join-Path $repoRoot "clients/go") } } "rust" { + # --release is essential: Rust debug builds disable inlining and + # add overflow checks, which costs the bench ~45% of throughput + # and ~3x of p99 latency vs release. The other compiled clients + # don't have this gap (go run is optimized, dotnet/java run JIT- + # optimized after the warmup window). $cliArgs = @( - "run", "--quiet", "-p", "mxgw-cli", "--", + "run", "--release", "--quiet", "-p", "mxgw-cli", "--", "bench-read-bulk", "--endpoint", $httpEndpoint, "--api-key-env", $ApiKeyEnv,