e2e: drive each client CLI through one long-lived batch process

The cross-language e2e matrix spawned one CLI process per operation — ~250 per client — paying a process (and, for the Java CLI, a full JVM) cold-start every time. The Java leg alone ran ~16 minutes. Each client CLI (dotnet, go, rust, python, java) gains a `batch` subcommand: a single process that reads one command line from stdin, runs it through the normal subcommand dispatch, writes the JSON result, then a line containing exactly `__MXGW_BATCH_EOR__`. A failing command writes its `{"error":...}` envelope and the loop continues. run-client-e2e-tests.ps1 now launches one batch process per client and pings every operation through its stdin/stdout, so startup is paid once per client. The orchestration and assertions are unchanged; the parity and auth phases now read the `{"error":...}` envelope instead of a process exit code. Full 5-client matrix with -VerifyWrite: ~15 min, down from ~35; the Java leg dropped from ~16 min to ~2-3. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 06:20:13 -04:00
parent c1ff8c94e8
commit 6126099cdb
10 changed files with 970 additions and 47 deletions
@@ -5,11 +5,13 @@ from __future__ import annotations
 import asyncio
 import json
 import os
+import sys
 from collections.abc import Awaitable, Callable
 from datetime import datetime, timezone
 from typing import Any

 import click
+from click.testing import CliRunner
 from google.protobuf.json_format import MessageToDict

 from mxgateway import __version__
@@ -23,6 +25,8 @@ from mxgateway.values import MxValueInput, to_mx_value

 MAX_AGGREGATE_EVENTS = 10_000

+_BATCH_EOR = "__MXGW_BATCH_EOR__"
+

@click.group()
 def main() -> None:
@@ -42,6 +46,80 @@ def version(output_json: bool) -> None:
    _emit(payload, output_json=output_json, text=f"mxgw-py {__version__}")


+@main.command()
+def batch() -> None:
+    """Read commands from stdin and execute each, writing output + __MXGW_BATCH_EOR__ after each.
+
+    Each non-empty line of stdin is a complete argument string (no quoting support — the
+    harness never passes whitespace-containing arguments). Lines are split on runs of ASCII
+    whitespace and dispatched through the normal CLI parser. On EOF or an empty line, exit 0.
+
+    Errors do NOT terminate the loop. Each command's output (including any error JSON) is
+    written to stdout followed by a line containing exactly ``__MXGW_BATCH_EOR__``, then
+    stdout is flushed. Error output is formatted as ``{"error": "...", "type": "..."}``.
+    """
+
+    runner = CliRunner()
+
+    for raw_line in sys.stdin:
+        line = raw_line.rstrip("\n").rstrip("\r")
+        if not line:
+            # Empty line signals clean exit (matches the spec and .NET behaviour).
+            break
+
+        args = line.split()
+
+        try:
+            result = runner.invoke(main, args, catch_exceptions=True)
+        except Exception as exc:  # noqa: BLE001 — be safe; never let batch loop die
+            _batch_write_error(exc.__class__.__name__, str(exc))
+            _batch_flush_eor()
+            continue
+
+        if result.exit_code == 0:
+            # Normal success — write captured output as-is.
+            sys.stdout.write(result.output)
+        else:
+            # Something went wrong.  If the command already emitted a JSON object
+            # (e.g. the output starts with '{'), trust that and relay it verbatim.
+            # Otherwise synthesise the standard {"error": ..., "type": ...} shape.
+            output = result.output or ""
+            exc = result.exception
+
+            if output.lstrip().startswith("{"):
+                # Already JSON — relay verbatim (may or may not end with newline).
+                sys.stdout.write(output)
+                if not output.endswith("\n"):
+                    sys.stdout.write("\n")
+            elif exc is not None and not isinstance(exc, SystemExit):
+                _batch_write_error(type(exc).__name__, str(exc))
+            else:
+                # Click's default error format is "Error: <message>\n"; extract the
+                # message so the harness gets clean JSON.
+                msg = output.strip()
+                if msg.startswith("Error: "):
+                    msg = msg[len("Error: "):]
+                exc_type = (
+                    type(exc).__name__
+                    if exc is not None and not isinstance(exc, SystemExit)
+                    else "CliError"
+                )
+                _batch_write_error(exc_type, msg)
+
+        _batch_flush_eor()
+
+
+def _batch_write_error(exc_type: str, message: str) -> None:
+    """Write a JSON error record to stdout in the standard batch error shape."""
+    sys.stdout.write(json.dumps({"error": message, "type": exc_type}) + "\n")
+
+
+def _batch_flush_eor() -> None:
+    """Write the end-of-record sentinel and flush stdout."""
+    sys.stdout.write(_BATCH_EOR + "\n")
+    sys.stdout.flush()
+
+
 def gateway_options(command: Callable[..., Any]) -> Callable[..., Any]:
    """Apply the shared gateway connection options to a Click command."""
    command = click.option("--endpoint", default="localhost:5000", show_default=True)(command)
@@ -1,13 +1,15 @@
 """Tests for the Python CLI."""

+import io
 import json
+from typing import Any

 import click
 import pytest
 from click.testing import CliRunner

 from mxgateway import __version__
-from mxgateway_cli.commands import _use_plaintext, main
+from mxgateway_cli.commands import _BATCH_EOR, _use_plaintext, main


 def test_version_json_is_deterministic() -> None:
@@ -216,3 +218,142 @@ def test_cli_localhost_endpoint_with_plaintext_flag_uses_plaintext(

    assert result.exit_code != 0
    assert captured.get("plaintext") is True
+
+
+# ---------------------------------------------------------------------------
+# batch subcommand tests
+# ---------------------------------------------------------------------------
+
+
+def _run_batch(lines: list[str]) -> tuple[int, list[str]]:
+    """Invoke ``batch`` with the given stdin lines; return (exit_code, stdout_lines)."""
+    runner = CliRunner()
+    stdin_text = "\n".join(lines) + "\n"
+    result = runner.invoke(main, ["batch"], input=stdin_text)
+    stdout_lines = result.output.splitlines()
+    return result.exit_code, stdout_lines
+
+
+def _split_records(stdout_lines: list[str]) -> list[list[str]]:
+    """Split stdout lines on ``__MXGW_BATCH_EOR__`` sentinels into per-command records."""
+    records: list[list[str]] = []
+    current: list[str] = []
+    for line in stdout_lines:
+        if line == _BATCH_EOR:
+            records.append(current)
+            current = []
+        else:
+            current.append(line)
+    # Any trailing lines without a sentinel are ignored (shouldn't occur).
+    return records
+
+
+def test_batch_version_json_produces_eor_sentinel() -> None:
+    """A single ``version --json`` line produces the version JSON followed by the EOR sentinel."""
+    exit_code, lines = _run_batch(["version --json"])
+
+    assert exit_code == 0
+    records = _split_records(lines)
+    assert len(records) == 1
+    payload = json.loads(records[0][0])
+    assert payload == {
+        "client": "mxgw-py",
+        "package": "mxaccess-gateway-client",
+        "version": __version__,
+    }
+
+
+def test_batch_two_commands_produce_two_delimited_records() -> None:
+    """Two input lines produce exactly two EOR-delimited records."""
+    exit_code, lines = _run_batch(["version --json", "version --json"])
+
+    assert exit_code == 0
+    records = _split_records(lines)
+    assert len(records) == 2
+    for record in records:
+        payload = json.loads(record[0])
+        assert payload["client"] == "mxgw-py"
+
+
+def test_batch_eof_exits_zero() -> None:
+    """EOF on stdin exits with code 0."""
+    runner = CliRunner()
+    result = runner.invoke(main, ["batch"], input="")
+    assert result.exit_code == 0
+
+
+def test_batch_empty_line_exits_zero() -> None:
+    """An empty line signals a clean exit with code 0."""
+    exit_code, lines = _run_batch([""])
+    assert exit_code == 0
+    # No EOR sentinels should have been emitted.
+    assert _BATCH_EOR not in lines
+
+
+def test_batch_empty_line_stops_processing_subsequent_commands() -> None:
+    """Commands after the first empty line must not be executed."""
+    exit_code, lines = _run_batch(["", "version --json"])
+
+    assert exit_code == 0
+    # No records should appear because the empty line stopped the loop.
+    records = _split_records(lines)
+    assert records == []
+
+
+def test_batch_failure_does_not_terminate_loop() -> None:
+    """A failing command (bad parse) must not terminate the batch loop."""
+    exit_code, lines = _run_batch([
+        "open-session --unknown-flag",
+        "version --json",
+    ])
+
+    assert exit_code == 0
+    records = _split_records(lines)
+    # Two records: one error + one success.
+    assert len(records) == 2
+    # First record must be a JSON error object.
+    error_payload = json.loads(records[0][0])
+    assert "error" in error_payload
+    assert "type" in error_payload
+    # Second record must be the version JSON.
+    version_payload = json.loads(records[1][0])
+    assert version_payload["client"] == "mxgw-py"
+
+
+def test_batch_error_record_has_required_json_shape() -> None:
+    """A failing command must produce ``{"error": "...", "type": "..."}`` JSON."""
+    exit_code, lines = _run_batch(["open-session --unknown-flag"])
+
+    assert exit_code == 0
+    records = _split_records(lines)
+    assert len(records) == 1
+    payload = json.loads(records[0][0])
+    assert isinstance(payload.get("error"), str)
+    assert isinstance(payload.get("type"), str)
+
+
+def test_batch_network_error_produces_error_json_not_terminates(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """A network-level failure (MxGatewayError) on one command must not stop the loop."""
+
+    async def _fake_connect(kwargs: dict[str, Any]) -> Any:
+        raise RuntimeError("injected-network-failure")
+
+    monkeypatch.setattr("mxgateway_cli.commands.GatewayClient.connect", _fake_connect)
+
+    exit_code, lines = _run_batch([
+        "open-session --endpoint localhost:5000 --api-key mxgw_test --plaintext --json",
+        "version --json",
+    ])
+
+    assert exit_code == 0
+    records = _split_records(lines)
+    assert len(records) == 2
+    # First record is an error.
+    error_payload = json.loads(records[0][0])
+    assert "error" in error_payload
+    assert "type" in error_payload
+    # Second record is success.
+    version_payload = json.loads(records[1][0])
+    assert version_payload["client"] == "mxgw-py"