e2e: drive each client CLI through one long-lived batch process

The cross-language e2e matrix spawned one CLI process per operation — ~250 per client — paying a process (and, for the Java CLI, a full JVM) cold-start every time. The Java leg alone ran ~16 minutes. Each client CLI (dotnet, go, rust, python, java) gains a `batch` subcommand: a single process that reads one command line from stdin, runs it through the normal subcommand dispatch, writes the JSON result, then a line containing exactly `__MXGW_BATCH_EOR__`. A failing command writes its `{"error":...}` envelope and the loop continues. run-client-e2e-tests.ps1 now launches one batch process per client and pings every operation through its stdin/stdout, so startup is paid once per client. The orchestration and assertions are unchanged; the parity and auth phases now read the `{"error":...}` envelope instead of a process exit code. Full 5-client matrix with -VerifyWrite: ~15 min, down from ~35; the Java leg dropped from ~16 min to ~2-3. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 06:20:13 -04:00
parent c1ff8c94e8
commit 6126099cdb
10 changed files with 970 additions and 47 deletions
@@ -9,6 +9,7 @@
 #![warn(missing_docs)]

 use std::env;
+use std::io::{self, BufRead, Write};
 use std::path::PathBuf;
 use std::process::ExitCode;
 use std::time::Duration;
@@ -319,6 +320,13 @@ enum Command {
        #[arg(long)]
        json: bool,
    },
+    /// Read commands from stdin, one per line, execute each in sequence, and
+    /// write `__MXGW_BATCH_EOR__` to stdout after every result.  Errors are
+    /// written as `{"error":"…","type":"error"}` JSON to stdout (not stderr)
+    /// so the harness can parse them without interleaving stderr.  The loop
+    /// never terminates on command error; only stdin EOF (or an empty line)
+    /// ends the session.
+    Batch,
    #[command(subcommand)]
    Galaxy(GalaxyCommand),
 }
@@ -427,7 +435,11 @@ enum CliValueType {
 #[tokio::main]
 async fn main() -> ExitCode {
    let cli = Cli::parse();
-    match run(cli).await {
+    let result = match cli.command {
+        Command::Batch => run_batch().await,
+        command => dispatch(command).await,
+    };
+    match result {
        Ok(()) => ExitCode::SUCCESS,
        Err(error) => {
            eprintln!("{error}");
@@ -436,8 +448,17 @@ async fn main() -> ExitCode {
    }
 }

-async fn run(cli: Cli) -> Result<(), Error> {
-    match cli.command {
+/// Dispatch a parsed [`Command`] to its handler.  All subcommands except
+/// [`Command::Batch`] are handled here; `Batch` is handled separately in
+/// `main` to avoid mutual recursion between `dispatch` and `run_batch`.
+async fn dispatch(command: Command) -> Result<(), Error> {
+    match command {
+        Command::Batch => {
+            return Err(Error::InvalidArgument {
+                name: "batch".to_owned(),
+                detail: "batch cannot be nested inside another batch session".to_owned(),
+            });
+        }
        Command::Version { json, .. } => print_version(json),
        Command::Ping {
            connection,
@@ -996,6 +1017,76 @@ async fn session_for(
    Ok(client.session(session_id))
 }

+/// End-of-result sentinel written to stdout after every batch command.
+const BATCH_EOR: &str = "__MXGW_BATCH_EOR__";
+
+/// Run the batch loop: read one command line at a time from stdin, dispatch
+/// each through the normal [`run`] path, and write [`BATCH_EOR`] to stdout
+/// after every result.  Errors are serialised as JSON to stdout so the
+/// harness can parse them without interleaving stderr.  The loop never
+/// terminates on command error; only stdin EOF or an empty line ends the
+/// session.
+async fn run_batch() -> Result<(), Error> {
+    let stdin = io::stdin();
+    let stdout = io::stdout();
+
+    for line in stdin.lock().lines() {
+        let line = line.map_err(|e| Error::InvalidArgument {
+            name: "stdin".to_owned(),
+            detail: e.to_string(),
+        })?;
+
+        if line.is_empty() {
+            break;
+        }
+
+        let parts: Vec<&str> = line.split_ascii_whitespace().collect();
+        if parts.is_empty() {
+            println!("{BATCH_EOR}");
+            stdout.lock().flush().ok();
+            continue;
+        }
+
+        // Re-parse the split arguments under a fresh Cli, prepending the
+        // program-name placeholder so clap sees a complete argv[].
+        let parse_result =
+            Cli::try_parse_from(std::iter::once("mxgw-cli").chain(parts.iter().copied()));
+
+        let outcome: Result<(), Error> = match parse_result {
+            Ok(cli) => {
+                // Spawn on a new tokio task so each command runs with a fresh
+                // stack, avoiding stack overflow from the large dispatch future.
+                tokio::task::spawn(dispatch(cli.command))
+                    .await
+                    .unwrap_or_else(|join_err| {
+                        Err(Error::InvalidArgument {
+                            name: "task".to_owned(),
+                            detail: join_err.to_string(),
+                        })
+                    })
+            }
+            Err(clap_err) => Err(Error::InvalidArgument {
+                name: "args".to_owned(),
+                detail: clap_err.to_string(),
+            }),
+        };
+
+        if let Err(err) = outcome {
+            // Write error as JSON to stdout so the harness sees it in the
+            // same stream as normal output, framed by the EOR sentinel.
+            println!(
+                "{}",
+                serde_json::json!({ "error": err.to_string(), "type": "error" })
+            );
+        }
+
+        println!("{BATCH_EOR}");
+        stdout.lock().flush().ok();
+    }
+
+    Ok(())
+}
+
 /// Cross-language ReadBulk stress benchmark — mirrors the .NET / Go / Python /
 /// Java implementations so the PS driver collates one JSON schema across all
 /// five clients.