e2e: drive each client CLI through one long-lived batch process

The cross-language e2e matrix spawned one CLI process per operation —
~250 per client — paying a process (and, for the Java CLI, a full JVM)
cold-start every time. The Java leg alone ran ~16 minutes.

Each client CLI (dotnet, go, rust, python, java) gains a `batch`
subcommand: a single process that reads one command line from stdin,
runs it through the normal subcommand dispatch, writes the JSON result,
then a line containing exactly `__MXGW_BATCH_EOR__`. A failing command
writes its `{"error":...}` envelope and the loop continues.

run-client-e2e-tests.ps1 now launches one batch process per client and
pings every operation through its stdin/stdout, so startup is paid once
per client. The orchestration and assertions are unchanged; the parity
and auth phases now read the `{"error":...}` envelope instead of a
process exit code.

Full 5-client matrix with -VerifyWrite: ~15 min, down from ~35; the Java
leg dropped from ~16 min to ~2-3.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-21 06:20:13 -04:00
parent c1ff8c94e8
commit 6126099cdb
10 changed files with 970 additions and 47 deletions
+40 -1
View File
@@ -6,6 +6,7 @@
package main
import (
"bufio"
"context"
"encoding/json"
"errors"
@@ -116,6 +117,8 @@ func runWithIO(ctx context.Context, args []string, stdout, stderr io.Writer) err
return runGalaxyDiscover(ctx, args[1:], stdout, stderr)
case "galaxy-watch":
return runGalaxyWatch(ctx, args[1:], stdout, stderr)
case "batch":
return runBatch(ctx, os.Stdin, stdout, stderr)
default:
writeUsage(stderr)
return fmt.Errorf("unknown command %q", args[0])
@@ -1080,8 +1083,44 @@ type protojsonMessage interface {
ProtoReflect() protoreflect.Message
}
// batchEOR is the end-of-result sentinel emitted to stdout after every command
// in batch mode, regardless of success or failure.
const batchEOR = "__MXGW_BATCH_EOR__"
// runBatch reads one command line at a time from in, dispatches each via the
// normal runWithIO routing, and writes a batchEOR sentinel to stdout after
// every result. Errors are serialised as JSON to stdout (not stderr) so the
// harness can parse them without interleaving stderr. The loop never terminates
// on command error; only stdin EOF (or an empty line) ends the session.
func runBatch(ctx context.Context, in io.Reader, stdout, stderr io.Writer) error {
bw := bufio.NewWriter(stdout)
scanner := bufio.NewScanner(in)
for scanner.Scan() {
line := scanner.Text()
if line == "" {
break
}
args := strings.Fields(line)
if len(args) == 0 {
continue
}
if err := runWithIO(ctx, args, bw, stderr); err != nil {
// Write error as JSON to stdout (bw) so the harness sees it in the
// same stream as normal output, framed by the EOR sentinel.
errPayload := map[string]string{
"error": err.Error(),
"type": "error",
}
_ = writeJSON(bw, errPayload)
}
_, _ = fmt.Fprintln(bw, batchEOR)
_ = bw.Flush()
}
return scanner.Err()
}
func writeUsage(writer io.Writer) {
fmt.Fprintln(writer, "usage: mxgw-go <version|open-session|close-session|register|add-item|advise|subscribe-bulk|unsubscribe-bulk|read-bulk|write-bulk|write2-bulk|write-secured-bulk|write-secured2-bulk|bench-read-bulk|write|stream-events|smoke|galaxy-test-connection|galaxy-last-deploy|galaxy-discover|galaxy-watch>")
fmt.Fprintln(writer, "usage: mxgw-go <version|open-session|close-session|register|add-item|advise|subscribe-bulk|unsubscribe-bulk|read-bulk|write-bulk|write2-bulk|write-secured-bulk|write-secured2-bulk|bench-read-bulk|write|stream-events|smoke|galaxy-test-connection|galaxy-last-deploy|galaxy-discover|galaxy-watch|batch>")
}
func dialGalaxyForCommand(ctx context.Context, common *commonOptions) (*mxgateway.GalaxyClient, commonOptions, error) {