e2e: drive each client CLI through one long-lived batch process

The cross-language e2e matrix spawned one CLI process per operation — ~250 per client — paying a process (and, for the Java CLI, a full JVM) cold-start every time. The Java leg alone ran ~16 minutes. Each client CLI (dotnet, go, rust, python, java) gains a `batch` subcommand: a single process that reads one command line from stdin, runs it through the normal subcommand dispatch, writes the JSON result, then a line containing exactly `__MXGW_BATCH_EOR__`. A failing command writes its `{"error":...}` envelope and the loop continues. run-client-e2e-tests.ps1 now launches one batch process per client and pings every operation through its stdin/stdout, so startup is paid once per client. The orchestration and assertions are unchanged; the parity and auth phases now read the `{"error":...}` envelope instead of a process exit code. Full 5-client matrix with -VerifyWrite: ~15 min, down from ~35; the Java leg dropped from ~16 min to ~2-3. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 06:20:13 -04:00
parent c1ff8c94e8
commit 6126099cdb
10 changed files with 970 additions and 47 deletions
@@ -6,6 +6,7 @@
 package main

 import (
+	"bufio"
 	"context"
 	"encoding/json"
 	"errors"
@@ -116,6 +117,8 @@ func runWithIO(ctx context.Context, args []string, stdout, stderr io.Writer) err
 		return runGalaxyDiscover(ctx, args[1:], stdout, stderr)
 	case "galaxy-watch":
 		return runGalaxyWatch(ctx, args[1:], stdout, stderr)
+	case "batch":
+		return runBatch(ctx, os.Stdin, stdout, stderr)
 	default:
 		writeUsage(stderr)
 		return fmt.Errorf("unknown command %q", args[0])
@@ -1080,8 +1083,44 @@ type protojsonMessage interface {
 	ProtoReflect() protoreflect.Message
 }

+// batchEOR is the end-of-result sentinel emitted to stdout after every command
+// in batch mode, regardless of success or failure.
+const batchEOR = "__MXGW_BATCH_EOR__"
+
+// runBatch reads one command line at a time from in, dispatches each via the
+// normal runWithIO routing, and writes a batchEOR sentinel to stdout after
+// every result. Errors are serialised as JSON to stdout (not stderr) so the
+// harness can parse them without interleaving stderr. The loop never terminates
+// on command error; only stdin EOF (or an empty line) ends the session.
+func runBatch(ctx context.Context, in io.Reader, stdout, stderr io.Writer) error {
+	bw := bufio.NewWriter(stdout)
+	scanner := bufio.NewScanner(in)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if line == "" {
+			break
+		}
+		args := strings.Fields(line)
+		if len(args) == 0 {
+			continue
+		}
+		if err := runWithIO(ctx, args, bw, stderr); err != nil {
+			// Write error as JSON to stdout (bw) so the harness sees it in the
+			// same stream as normal output, framed by the EOR sentinel.
+			errPayload := map[string]string{
+				"error": err.Error(),
+				"type":  "error",
+			}
+			_ = writeJSON(bw, errPayload)
+		}
+		_, _ = fmt.Fprintln(bw, batchEOR)
+		_ = bw.Flush()
+	}
+	return scanner.Err()
+}
+
 func writeUsage(writer io.Writer) {
-	fmt.Fprintln(writer, "usage: mxgw-go <version|open-session|close-session|register|add-item|advise|subscribe-bulk|unsubscribe-bulk|read-bulk|write-bulk|write2-bulk|write-secured-bulk|write-secured2-bulk|bench-read-bulk|write|stream-events|smoke|galaxy-test-connection|galaxy-last-deploy|galaxy-discover|galaxy-watch>")
+	fmt.Fprintln(writer, "usage: mxgw-go <version|open-session|close-session|register|add-item|advise|subscribe-bulk|unsubscribe-bulk|read-bulk|write-bulk|write2-bulk|write-secured-bulk|write-secured2-bulk|bench-read-bulk|write|stream-events|smoke|galaxy-test-connection|galaxy-last-deploy|galaxy-discover|galaxy-watch|batch>")
 }

 func dialGalaxyForCommand(ctx context.Context, common *commonOptions) (*mxgateway.GalaxyClient, commonOptions, error) {