fix(client/python): reachable cert-validation flag; bounded off-loop TOFU probe; license/marker fixes (Client.Python-027..031)
This commit is contained in:
@@ -238,7 +238,11 @@ left `False`, the client fetches the gateway's presented certificate once
|
||||
to `localhost` (the generated certificate always carries a `localhost` SAN) when
|
||||
none was supplied. To verify instead, pass `ca_file` to verify against a specific
|
||||
CA, or set `require_certificate_validation=True` to verify against the system
|
||||
trust roots. See
|
||||
trust roots. The strict posture is reachable through every documented entry
|
||||
point: the `require_certificate_validation=True` keyword on
|
||||
`GatewayClient.connect(...)` / `GalaxyRepositoryClient.connect(...)`, the
|
||||
`ClientOptions(require_certificate_validation=True)` struct, and the
|
||||
`--require-certificate-validation` CLI flag. See
|
||||
[Gateway Configuration](../../docs/GatewayConfiguration.md#automatic-self-signed-certificate).
|
||||
|
||||
## CLI
|
||||
@@ -267,6 +271,13 @@ Use TLS options for a secured gateway:
|
||||
mxgw-py smoke --endpoint mxgateway.example.local:5001 --tls --ca-file C:\certs\mxgateway-ca.pem --server-name-override mxgateway.example.local --api-key-env MXGATEWAY_API_KEY --item Object.Attribute --json
|
||||
```
|
||||
|
||||
To force certificate validation against the system trust store instead of the
|
||||
lenient trust-on-first-use default, add `--require-certificate-validation`:
|
||||
|
||||
```powershell
|
||||
mxgw-py smoke --endpoint mxgateway.example.local:5001 --tls --require-certificate-validation --api-key-env MXGATEWAY_API_KEY --item Object.Attribute --json
|
||||
```
|
||||
|
||||
## Integration Checks
|
||||
|
||||
Run live checks only when a gateway and MXAccess-backed worker are available:
|
||||
|
||||
@@ -16,11 +16,10 @@ dependencies = [
|
||||
authors = [
|
||||
{ name = "Joseph Doherty" },
|
||||
]
|
||||
license = { text = "Proprietary" }
|
||||
license = "LicenseRef-Proprietary"
|
||||
keywords = ["mxaccess", "mxgateway", "grpc", "client", "archestra"]
|
||||
classifiers = [
|
||||
"Development Status :: 3 - Alpha",
|
||||
"License :: Other/Proprietary License",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
@@ -54,3 +53,6 @@ where = ["src"]
|
||||
addopts = "-ra"
|
||||
pythonpath = ["src"]
|
||||
testpaths = ["tests"]
|
||||
markers = [
|
||||
"tls: loopback TLS tests, opt-in via MXGATEWAY_RUN_TLS_TESTS=1",
|
||||
]
|
||||
|
||||
@@ -40,6 +40,7 @@ class GatewayClient:
|
||||
api_key: str | None = None,
|
||||
plaintext: bool = False,
|
||||
ca_file: str | None = None,
|
||||
require_certificate_validation: bool = False,
|
||||
server_name_override: str | None = None,
|
||||
stub: Any | None = None,
|
||||
) -> "GatewayClient":
|
||||
@@ -50,13 +51,16 @@ class GatewayClient:
|
||||
api_key=api_key,
|
||||
plaintext=plaintext,
|
||||
ca_file=ca_file,
|
||||
require_certificate_validation=require_certificate_validation,
|
||||
server_name_override=server_name_override,
|
||||
)
|
||||
|
||||
if stub is not None:
|
||||
return cls(options=resolved, stub=stub)
|
||||
|
||||
channel = create_channel(resolved)
|
||||
# create_channel may perform a blocking TLS certificate probe (TOFU
|
||||
# default); run it off the event loop so connect never freezes the loop.
|
||||
channel = await asyncio.to_thread(create_channel, resolved)
|
||||
return cls(
|
||||
options=resolved,
|
||||
stub=pb_grpc.MxAccessGatewayStub(channel),
|
||||
|
||||
@@ -52,6 +52,7 @@ class GalaxyRepositoryClient:
|
||||
api_key: str | None = None,
|
||||
plaintext: bool = False,
|
||||
ca_file: str | None = None,
|
||||
require_certificate_validation: bool = False,
|
||||
server_name_override: str | None = None,
|
||||
stub: Any | None = None,
|
||||
) -> "GalaxyRepositoryClient":
|
||||
@@ -62,13 +63,16 @@ class GalaxyRepositoryClient:
|
||||
api_key=api_key,
|
||||
plaintext=plaintext,
|
||||
ca_file=ca_file,
|
||||
require_certificate_validation=require_certificate_validation,
|
||||
server_name_override=server_name_override,
|
||||
)
|
||||
|
||||
if stub is not None:
|
||||
return cls(options=resolved, stub=stub)
|
||||
|
||||
channel = create_channel(resolved)
|
||||
# create_channel may perform a blocking TLS certificate probe (TOFU
|
||||
# default); run it off the event loop so connect never freezes the loop.
|
||||
channel = await asyncio.to_thread(create_channel, resolved)
|
||||
return cls(
|
||||
options=resolved,
|
||||
stub=galaxy_pb_grpc.GalaxyRepositoryStub(channel),
|
||||
|
||||
@@ -12,6 +12,10 @@ import grpc
|
||||
from .auth import REDACTED, ApiKey
|
||||
from .errors import MxGatewayTransportError
|
||||
|
||||
# Fallback bound for the TOFU certificate probe when no call_timeout is set, so a
|
||||
# black-holed host fails fast instead of hanging on the OS default connect timeout.
|
||||
_TOFU_PROBE_TIMEOUT_SECONDS = 10.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ClientOptions:
|
||||
@@ -88,8 +92,17 @@ def _split_authority(endpoint: str) -> tuple[str, int]:
|
||||
remainder = target[bracket_end + 1 :] # ":5120" or ""
|
||||
port_str = remainder.lstrip(":")
|
||||
return (host, int(port_str) if port_str else 443)
|
||||
host, _, port = target.rpartition(":")
|
||||
return (host or "localhost", int(port) if port else 443)
|
||||
host, sep, port = target.rpartition(":")
|
||||
if not sep:
|
||||
# No colon at all (e.g. a bare hostname "mygateway"): the whole target
|
||||
# is the host; default the port rather than raising on int("mygateway").
|
||||
return (target or "localhost", 443)
|
||||
if not port.isdigit():
|
||||
# A colon with a non-numeric / empty tail (e.g. a trailing ":") is not
|
||||
# an explicit port — keep the left side as the host and default the
|
||||
# port so a typo cannot raise an uncaught ValueError on the TOFU path.
|
||||
return (host or "localhost", 443)
|
||||
return (host or "localhost", int(port))
|
||||
|
||||
|
||||
def create_channel(options: ClientOptions) -> grpc.aio.Channel:
|
||||
@@ -120,9 +133,15 @@ def create_channel(options: ClientOptions) -> grpc.aio.Channel:
|
||||
else:
|
||||
# Lenient default: grpc-python has no per-channel skip-verify, so fetch the
|
||||
# server's certificate (unverified) and pin it for this channel (TOFU).
|
||||
# The probe opens a real blocking TCP+TLS socket, so it MUST be bounded —
|
||||
# a black-holed / firewall-drop host would otherwise hang on the OS default
|
||||
# connect timeout (minutes). Bound it by call_timeout (or a short fixed
|
||||
# fallback) so the dial fails fast as a transport error. The async
|
||||
# `connect` classmethods run this off the event loop (asyncio.to_thread).
|
||||
host, port = _split_authority(options.endpoint)
|
||||
probe_timeout = options.call_timeout if options.call_timeout else _TOFU_PROBE_TIMEOUT_SECONDS
|
||||
try:
|
||||
presented = ssl.get_server_certificate((host, port))
|
||||
presented = ssl.get_server_certificate((host, port), timeout=probe_timeout)
|
||||
except OSError as error:
|
||||
raise MxGatewayTransportError(
|
||||
f"failed to fetch TLS certificate from {options.endpoint}: {error}"
|
||||
|
||||
@@ -170,6 +170,13 @@ def gateway_options(command: Callable[..., Any]) -> Callable[..., Any]:
|
||||
command = click.option("--plaintext", is_flag=True, help="Use plaintext gRPC.")(command)
|
||||
command = click.option("--tls", "use_tls", is_flag=True, help="Use TLS gRPC.")(command)
|
||||
command = click.option("--ca-file", default=None, help="Custom root certificate file.")(command)
|
||||
command = click.option(
|
||||
"--require-certificate-validation",
|
||||
"require_certificate_validation",
|
||||
is_flag=True,
|
||||
help="Verify the TLS certificate against the system trust store "
|
||||
"instead of the lenient trust-on-first-use default.",
|
||||
)(command)
|
||||
command = click.option(
|
||||
"--server-name-override",
|
||||
default=None,
|
||||
@@ -923,6 +930,7 @@ async def _connect(kwargs: dict[str, Any]) -> GatewayClient:
|
||||
api_key=api_key,
|
||||
plaintext=_use_plaintext(kwargs),
|
||||
ca_file=kwargs.get("ca_file"),
|
||||
require_certificate_validation=bool(kwargs.get("require_certificate_validation")),
|
||||
server_name_override=kwargs.get("server_name_override"),
|
||||
call_timeout=kwargs.get("call_timeout"),
|
||||
stream_timeout=kwargs.get("stream_timeout"),
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
"""Tests for auth metadata and connection options."""
|
||||
|
||||
import socket
|
||||
|
||||
import pytest
|
||||
|
||||
from zb_mom_ww_mxgateway.auth import REDACTED, ApiKey, auth_metadata, redact_secret
|
||||
from zb_mom_ww_mxgateway import options as options_module
|
||||
from zb_mom_ww_mxgateway.errors import MxGatewayTransportError
|
||||
from zb_mom_ww_mxgateway.options import ClientOptions, create_channel
|
||||
|
||||
|
||||
@@ -80,7 +83,9 @@ def test_create_channel_uses_tls_channel_tofu_default(monkeypatch: pytest.Monkey
|
||||
_DUMMY_PEM = "-----BEGIN CERTIFICATE-----\nZmFrZQ==\n-----END CERTIFICATE-----\n"
|
||||
get_cert_calls: list[tuple[str, int]] = []
|
||||
|
||||
def fake_get_server_certificate(addr: tuple[str, int]) -> str:
|
||||
def fake_get_server_certificate(
|
||||
addr: tuple[str, int], *, timeout: float | None = None
|
||||
) -> str:
|
||||
get_cert_calls.append(addr)
|
||||
return _DUMMY_PEM
|
||||
|
||||
@@ -133,7 +138,7 @@ def test_create_channel_uses_tls_channel_tofu_respects_server_name_override(
|
||||
monkeypatch.setattr(
|
||||
options_module.ssl,
|
||||
"get_server_certificate",
|
||||
lambda addr: _DUMMY_PEM,
|
||||
lambda addr, *, timeout=None: _DUMMY_PEM,
|
||||
)
|
||||
cred_calls: list[object] = []
|
||||
|
||||
@@ -276,3 +281,46 @@ def test_create_channel_uses_tls_channel_ca_file(
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def test_tofu_probe_passes_a_bounded_timeout(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""The TOFU cert pre-fetch must be bounded so a black-holed host fails fast."""
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def fake_get_server_certificate(addr: object, *, timeout: float | None = None) -> str:
|
||||
captured["timeout"] = timeout
|
||||
return "-----BEGIN CERTIFICATE-----\nZmFrZQ==\n-----END CERTIFICATE-----\n"
|
||||
|
||||
monkeypatch.setattr(options_module.ssl, "get_server_certificate", fake_get_server_certificate)
|
||||
monkeypatch.setattr(options_module.grpc, "ssl_channel_credentials", lambda **_: "creds")
|
||||
monkeypatch.setattr(
|
||||
options_module.grpc.aio,
|
||||
"secure_channel",
|
||||
lambda endpoint, credentials, *, options: "tls-channel",
|
||||
)
|
||||
|
||||
create_channel(ClientOptions(endpoint="gateway.example:5001", call_timeout=7.5))
|
||||
|
||||
# A finite, positive timeout must be supplied (bounded by call_timeout here).
|
||||
assert isinstance(captured["timeout"], (int, float))
|
||||
assert 0 < captured["timeout"] <= 7.5
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"raised",
|
||||
[socket.timeout("timed out"), TimeoutError("timed out"), OSError("connection refused")],
|
||||
)
|
||||
def test_tofu_probe_timeout_raises_transport_error(
|
||||
monkeypatch: pytest.MonkeyPatch, raised: Exception
|
||||
) -> None:
|
||||
"""A timed-out / failed probe surfaces as MxGatewayTransportError, not a raw error."""
|
||||
|
||||
def fake_get_server_certificate(addr: object, *, timeout: float | None = None) -> str:
|
||||
raise raised
|
||||
|
||||
monkeypatch.setattr(options_module.ssl, "get_server_certificate", fake_get_server_certificate)
|
||||
|
||||
options = ClientOptions(endpoint="gateway.example:5001")
|
||||
with pytest.raises(MxGatewayTransportError) as excinfo:
|
||||
create_channel(options)
|
||||
assert options.endpoint in str(excinfo.value)
|
||||
|
||||
@@ -2,14 +2,79 @@
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
|
||||
from zb_mom_ww_mxgateway import __version__
|
||||
from zb_mom_ww_mxgateway_cli import commands as commands_module
|
||||
from zb_mom_ww_mxgateway_cli.commands import main
|
||||
|
||||
_BATCH_EOR = "__MXGW_BATCH_EOR__"
|
||||
|
||||
|
||||
def test_require_certificate_validation_flag_flows_through_connect(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""The --require-certificate-validation CLI flag must reach ClientOptions (Client.Python-027)."""
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
async def fake_connect(options, **_kwargs):
|
||||
captured["options"] = options
|
||||
# Return a minimal object that supports the async context-manager protocol
|
||||
# used by every CLI command body (async with await _connect(...) as client).
|
||||
return _FakeAsyncClient()
|
||||
|
||||
monkeypatch.setattr(commands_module.GatewayClient, "connect", fake_connect)
|
||||
|
||||
result = CliRunner().invoke(
|
||||
main,
|
||||
[
|
||||
"open-session",
|
||||
"--endpoint",
|
||||
"gateway.example:5001",
|
||||
"--require-certificate-validation",
|
||||
"--json",
|
||||
],
|
||||
)
|
||||
|
||||
assert result.exit_code == 0, result.output
|
||||
assert captured["options"].require_certificate_validation is True
|
||||
|
||||
|
||||
def test_require_certificate_validation_defaults_off(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Without the flag the strict-validation posture stays off (TOFU default)."""
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
async def fake_connect(options, **_kwargs):
|
||||
captured["options"] = options
|
||||
return _FakeAsyncClient()
|
||||
|
||||
monkeypatch.setattr(commands_module.GatewayClient, "connect", fake_connect)
|
||||
|
||||
result = CliRunner().invoke(
|
||||
main,
|
||||
["open-session", "--endpoint", "gateway.example:5001", "--plaintext", "--json"],
|
||||
)
|
||||
|
||||
assert result.exit_code == 0, result.output
|
||||
assert captured["options"].require_certificate_validation is False
|
||||
|
||||
|
||||
class _FakeAsyncClient:
|
||||
"""Minimal async-context-manager fake satisfying the open-session command body."""
|
||||
|
||||
async def __aenter__(self) -> "_FakeAsyncClient":
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *_exc: object) -> None:
|
||||
return None
|
||||
|
||||
async def open_session_raw(self, *_args, **_kwargs):
|
||||
from zb_mom_ww_mxgateway.generated import mxaccess_gateway_pb2 as pb
|
||||
|
||||
return pb.OpenSessionReply(session_id="cli-test-session")
|
||||
|
||||
|
||||
def test_version_json_is_deterministic() -> None:
|
||||
runner = CliRunner()
|
||||
|
||||
|
||||
@@ -8,9 +8,107 @@ from typing import Any
|
||||
import pytest
|
||||
|
||||
from zb_mom_ww_mxgateway import ClientOptions, GatewayClient, MxAccessError
|
||||
from zb_mom_ww_mxgateway import client as client_module
|
||||
from zb_mom_ww_mxgateway import galaxy as galaxy_module
|
||||
from zb_mom_ww_mxgateway.galaxy import GalaxyRepositoryClient
|
||||
from zb_mom_ww_mxgateway.generated import mxaccess_gateway_pb2 as pb
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gateway_connect_forwards_require_certificate_validation(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""The connect convenience kwarg must reach ClientOptions (Client.Python-027)."""
|
||||
captured: dict[str, Any] = {}
|
||||
|
||||
def fake_create_channel(options: ClientOptions) -> object:
|
||||
captured["options"] = options
|
||||
return object()
|
||||
|
||||
monkeypatch.setattr(client_module, "create_channel", fake_create_channel)
|
||||
monkeypatch.setattr(client_module.pb_grpc, "MxAccessGatewayStub", lambda channel: object())
|
||||
|
||||
await GatewayClient.connect(
|
||||
endpoint="gateway.example:5001",
|
||||
require_certificate_validation=True,
|
||||
)
|
||||
|
||||
assert captured["options"].require_certificate_validation is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_galaxy_connect_forwards_require_certificate_validation(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""GalaxyRepositoryClient.connect must thread the flag too (Client.Python-027)."""
|
||||
captured: dict[str, Any] = {}
|
||||
|
||||
def fake_create_channel(options: ClientOptions) -> object:
|
||||
captured["options"] = options
|
||||
return object()
|
||||
|
||||
monkeypatch.setattr(galaxy_module, "create_channel", fake_create_channel)
|
||||
monkeypatch.setattr(
|
||||
galaxy_module.galaxy_pb_grpc, "GalaxyRepositoryStub", lambda channel: object()
|
||||
)
|
||||
|
||||
await GalaxyRepositoryClient.connect(
|
||||
endpoint="gateway.example:5001",
|
||||
require_certificate_validation=True,
|
||||
)
|
||||
|
||||
assert captured["options"].require_certificate_validation is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gateway_connect_runs_create_channel_off_the_event_loop(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""connect must run the blocking channel factory off the loop (Client.Python-028)."""
|
||||
ran_in_thread: dict[str, bool] = {}
|
||||
|
||||
def fake_create_channel(options: ClientOptions) -> object:
|
||||
# If this runs on the event loop thread, get_running_loop() succeeds.
|
||||
try:
|
||||
asyncio.get_running_loop()
|
||||
ran_in_thread["off_loop"] = False
|
||||
except RuntimeError:
|
||||
ran_in_thread["off_loop"] = True
|
||||
return object()
|
||||
|
||||
monkeypatch.setattr(client_module, "create_channel", fake_create_channel)
|
||||
monkeypatch.setattr(client_module.pb_grpc, "MxAccessGatewayStub", lambda channel: object())
|
||||
|
||||
await GatewayClient.connect(endpoint="gateway.example:5001")
|
||||
|
||||
assert ran_in_thread["off_loop"] is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_galaxy_connect_runs_create_channel_off_the_event_loop(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""GalaxyRepositoryClient.connect must also run the probe off the loop (Client.Python-028)."""
|
||||
ran_in_thread: dict[str, bool] = {}
|
||||
|
||||
def fake_create_channel(options: ClientOptions) -> object:
|
||||
try:
|
||||
asyncio.get_running_loop()
|
||||
ran_in_thread["off_loop"] = False
|
||||
except RuntimeError:
|
||||
ran_in_thread["off_loop"] = True
|
||||
return object()
|
||||
|
||||
monkeypatch.setattr(galaxy_module, "create_channel", fake_create_channel)
|
||||
monkeypatch.setattr(
|
||||
galaxy_module.galaxy_pb_grpc, "GalaxyRepositoryStub", lambda channel: object()
|
||||
)
|
||||
|
||||
await GalaxyRepositoryClient.connect(endpoint="gateway.example:5001")
|
||||
|
||||
assert ran_in_thread["off_loop"] is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_session_helpers_send_auth_metadata_and_preserve_raw_replies() -> None:
|
||||
stub = FakeGatewayStub()
|
||||
|
||||
@@ -134,6 +134,17 @@ def test_split_authority_parses_host_and_port() -> None:
|
||||
assert _split_authority(":5120") == ("localhost", 5120)
|
||||
|
||||
|
||||
def test_split_authority_defaults_port_for_portless_endpoint() -> None:
|
||||
from zb_mom_ww_mxgateway.options import _split_authority
|
||||
|
||||
# A bare hostname (no ":port") must default to 443, not crash on int("mygateway").
|
||||
assert _split_authority("mygateway") == ("mygateway", 443)
|
||||
# Scheme-prefixed bare hostname behaves the same.
|
||||
assert _split_authority("https://mygateway") == ("mygateway", 443)
|
||||
# A non-numeric tail after a colon is treated as no explicit port.
|
||||
assert _split_authority("mygateway:") == ("mygateway", 443)
|
||||
|
||||
|
||||
def test_split_authority_strips_ipv6_brackets() -> None:
|
||||
from zb_mom_ww_mxgateway.options import _split_authority
|
||||
|
||||
|
||||
@@ -4,16 +4,48 @@
|
||||
|---|---|
|
||||
| Module | `clients/python` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-05-24 |
|
||||
| Commit reviewed | `42b0037` |
|
||||
| Review date | 2026-06-15 |
|
||||
| Commit reviewed | `410acc9` |
|
||||
| Status | Re-reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
### 2026-06-15 re-review (commit 410acc9)
|
||||
|
||||
Re-review pass at `410acc9`. The diff against the previous review base
|
||||
`42b0037` covers: PyPI metadata + Gitea PyPI-feed install instructions in
|
||||
`pyproject.toml` / `README.md`; a new lazy Galaxy browse surface
|
||||
(`GalaxyRepositoryClient.browse_children_raw` / `browse` / `_iter_browse_children`,
|
||||
the `LazyBrowseNode` walker, and `BrowseChildrenOptions`); a TLS
|
||||
trust-on-first-use (TOFU) default in `options.py` gated by a new
|
||||
`ClientOptions.require_certificate_validation` flag; the `_use_plaintext`
|
||||
TLS-default contract carried forward; and the `batch` `CliRunner`-removal
|
||||
follow-through. The new browse / TOFU surface is well tested
|
||||
(`tests/test_galaxy.py`, `tests/test_auth_options.py`, `tests/test_tls.py`).
|
||||
|
||||
`python -m pytest` passes (80 passed, 1 skipped — the loopback-TLS test is
|
||||
opt-in via `MXGATEWAY_RUN_TLS_TESTS=1`). `python -m pip wheel .` builds the
|
||||
wheel cleanly against the installed setuptools 82.0.1.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Issue found: `_split_authority` raises an uncaught `ValueError` for a port-less endpoint instead of a transport error (Client.Python-029). |
|
||||
| 2 | mxaccessgw conventions | No new issues found — secrets still redacted, generated code untouched, no committed tokens in the new Gitea feed URLs (placeholders only). |
|
||||
| 3 | Concurrency & thread safety | No new issues found — `LazyBrowseNode.expand` uses a per-node `asyncio.Lock` with a double-checked guard and is verified concurrent-safe by `test_browse_expand_concurrent_callers_only_fire_one_rpc`. |
|
||||
| 4 | Error handling & resilience | Issue found: the TOFU branch calls the blocking `ssl.get_server_certificate` with no timeout from inside the `async def connect` path, blocking the event loop and hanging indefinitely on a black-holed host (Client.Python-028). |
|
||||
| 5 | Security | Issue found: the new `require_certificate_validation` security flag is not reachable through the documented `connect(...)` convenience kwargs or any CLI flag, so callers using those paths are locked into TOFU and cannot force certificate validation (Client.Python-027). TOFU itself is design-sanctioned (`docs/GatewayConfiguration.md` line 470). |
|
||||
| 6 | Performance & resource management | No new issues found beyond the blocking TLS probe captured in Client.Python-028. |
|
||||
| 7 | Design-document adherence | No new issues found — TOFU default, `require_certificate_validation` naming, and the BrowseChildren surface match `docs/GatewayConfiguration.md` / `docs/GalaxyRepository.md`; both README doc anchors resolve. |
|
||||
| 8 | Code organization & conventions | Issue found: `pyproject.toml` uses the PEP 639-deprecated `license = { text = ... }` table form (Client.Python-030). pyproject metadata is otherwise correct and the wheel builds. |
|
||||
| 9 | Testing coverage | Issue found: the `tls` pytest mark used by `tests/test_tls.py` is not registered in `[tool.pytest.ini_options]`, emitting a `PytestUnknownMarkWarning` (Client.Python-031). New browse / TOFU paths are otherwise well covered. |
|
||||
| 10 | Documentation & comments | No new issues found — README TLS/browse/Gitea-feed prose matches the code; the alarm-CLI README examples corrected under Client.Python-022 remain correct. |
|
||||
|
||||
### Prior coverage (commit a020350)
|
||||
|
||||
A re-review at commit `a020350` over the same module. Prior findings
|
||||
(Client.Python-001 — Client.Python-017) remain closed and are kept as
|
||||
history. This section reflects categories evaluated in this pass.
|
||||
history. This section reflects categories evaluated in that pass.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
@@ -1171,3 +1203,238 @@ scope; `test_commands_module_bench_read_bulk_does_not_use_bare_except_pass`
|
||||
greps the function source for the `except Exception:\n pass` pattern
|
||||
and rejects it. Both tests failed against the pre-fix source and pass
|
||||
against the fix.
|
||||
|
||||
### Client.Python-027
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Security |
|
||||
| Location | `clients/python/src/zb_mom_ww_mxgateway/client.py:36-54`, `clients/python/src/zb_mom_ww_mxgateway/galaxy.py:47-66`, `clients/python/src/zb_mom_ww_mxgateway_cli/commands.py:165-172,918-930` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** This commit adds `ClientOptions.require_certificate_validation`
|
||||
(default `False`) so a caller can force system-trust certificate verification
|
||||
instead of the new lenient trust-on-first-use (TOFU) default. The flag is
|
||||
honoured inside `create_channel`, but it is not surfaced through either of the
|
||||
two documented ways a normal caller dials the gateway:
|
||||
|
||||
1. `GatewayClient.connect(...)` and `GalaxyRepositoryClient.connect(...)` accept
|
||||
the convenience kwargs `endpoint` / `api_key` / `plaintext` / `ca_file` /
|
||||
`server_name_override` and build the `ClientOptions` internally, but do **not**
|
||||
accept or forward `require_certificate_validation`. The README's high-level
|
||||
examples (e.g. the lazy-browse walker) use exactly this kwarg form
|
||||
(`GalaxyRepositoryClient.connect(endpoint=..., api_key=..., plaintext=True)`),
|
||||
so the kwarg path is the primary documented entry point.
|
||||
2. The CLI exposes `--plaintext`, `--tls`, and `--ca-file` but no
|
||||
`--require-certificate-validation` flag, and `_connect` constructs
|
||||
`ClientOptions(...)` without setting the field. A CLI user connecting to a
|
||||
TLS gateway is therefore locked into TOFU.
|
||||
|
||||
The net effect is that the *only* way to opt into real certificate validation is
|
||||
to construct a `ClientOptions` instance directly and pass it as the positional
|
||||
`options=` argument — a path neither the README nor the CLI documents. A
|
||||
security-sensitive deployment that wants the strict (verify-against-system-trust)
|
||||
posture cannot select it through the documented surface, so it silently stays on
|
||||
TOFU. TOFU itself is design-sanctioned (`docs/GatewayConfiguration.md` line 470
|
||||
explicitly says "Python uses trust-on-first-use"), so this is an opt-in-to-strict
|
||||
reachability gap rather than an insecure default — hence Medium with a workaround.
|
||||
|
||||
**Recommendation:** Add a `require_certificate_validation: bool = False` kwarg to
|
||||
both `GatewayClient.connect` and `GalaxyRepositoryClient.connect` and forward it
|
||||
into the constructed `ClientOptions`. Add a `--require-certificate-validation`
|
||||
(or `--verify-tls`) flag to the shared CLI option set and wire it through
|
||||
`_connect`. Add a test asserting the flag flows through to
|
||||
`ClientOptions.require_certificate_validation` and a README note documenting how
|
||||
to select the strict posture.
|
||||
|
||||
**Resolution:** 2026-06-15 — Confirmed: `connect` built `ClientOptions` from a
|
||||
fixed kwarg set that omitted `require_certificate_validation`, and the CLI had no
|
||||
flag, so the strict posture was only reachable via a hand-built `options=`. Added
|
||||
a `require_certificate_validation: bool = False` kwarg to both
|
||||
`GatewayClient.connect` and `GalaxyRepositoryClient.connect` (forwarded into the
|
||||
constructed `ClientOptions`), a `--require-certificate-validation` flag to the
|
||||
shared `gateway_options` CLI option set, and wired it through `_connect`. README
|
||||
TLS section now documents the strict posture is reachable via the connect kwarg,
|
||||
the options struct, and the CLI flag. Tests:
|
||||
`tests/test_client_session.py::test_gateway_connect_forwards_require_certificate_validation`,
|
||||
`::test_galaxy_connect_forwards_require_certificate_validation`,
|
||||
`tests/test_cli.py::test_require_certificate_validation_flag_flows_through_connect`,
|
||||
`::test_require_certificate_validation_defaults_off` — all failed before the fix
|
||||
and pass after.
|
||||
|
||||
### Client.Python-028
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `clients/python/src/zb_mom_ww_mxgateway/options.py:120-130`, `clients/python/src/zb_mom_ww_mxgateway/client.py:59`, `clients/python/src/zb_mom_ww_mxgateway/galaxy.py:71` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The TOFU branch of `create_channel` calls
|
||||
`ssl.get_server_certificate((host, port))` to pre-fetch the server certificate.
|
||||
`create_channel` is a synchronous function, but it is invoked exclusively from
|
||||
inside the `async def connect` classmethods of `GatewayClient` and
|
||||
`GalaxyRepositoryClient` (`client.py:59`, `galaxy.py:71`). `ssl.get_server_certificate`
|
||||
opens a real blocking TCP+TLS socket on the calling thread, so:
|
||||
|
||||
1. It **blocks the asyncio event loop** for the full duration of the connect/handshake.
|
||||
This is at odds with the rest of the client, which is fully `async`.
|
||||
2. It passes **no `timeout`** to `ssl.get_server_certificate`. The `test_tofu_connect_failure_raises_transport_error`
|
||||
test only proves the *connection-refused* case (a closed port returns fast).
|
||||
A black-holed / firewall-drop host (packets silently dropped) makes the
|
||||
underlying `socket.create_connection` hang on the OS default connect timeout,
|
||||
which can be minutes, with the event loop frozen the whole time. A caller that
|
||||
wrapped `connect` in `asyncio.wait_for(...)` cannot cancel it because the block
|
||||
is in synchronous C, not at an `await` point.
|
||||
|
||||
The other TLS branches (`ca_file`, `require_certificate_validation`) build the
|
||||
channel lazily and return immediately, so only the lenient default — the most
|
||||
common path — has this hazard.
|
||||
|
||||
**Recommendation:** Pass an explicit `timeout=` to `ssl.get_server_certificate`
|
||||
(it accepts one), bounded by `options.call_timeout` or a short fixed value, so a
|
||||
black-holed host fails fast as a `MxGatewayTransportError` instead of hanging.
|
||||
Better, run the synchronous probe off the event loop — make the TOFU pre-fetch
|
||||
path awaitable (e.g. wrap it in `asyncio.get_running_loop().run_in_executor(...)`
|
||||
from an `async` channel factory, or document that `connect` must not be called
|
||||
from a running loop). Add a regression test that asserts the probe honours a
|
||||
timeout.
|
||||
|
||||
**Resolution:** 2026-06-15 — Confirmed: the TOFU branch called
|
||||
`ssl.get_server_certificate((host, port))` with no timeout from the synchronous
|
||||
`create_channel`, which both `connect` classmethods invoked directly on the event
|
||||
loop. Fix is two-part: (1) `create_channel` now passes
|
||||
`timeout=options.call_timeout` (falling back to a fixed
|
||||
`_TOFU_PROBE_TIMEOUT_SECONDS = 10.0` when no call_timeout is set) to
|
||||
`ssl.get_server_certificate`, and the existing `except OSError` wraps a
|
||||
timeout/connect failure into `MxGatewayTransportError` (TimeoutError/socket.timeout
|
||||
are OSError subclasses); (2) both `GatewayClient.connect` and
|
||||
`GalaxyRepositoryClient.connect` now run the blocking factory off the loop via
|
||||
`await asyncio.to_thread(create_channel, resolved)`, so the event loop is never
|
||||
frozen and a caller's `asyncio.wait_for` can cancel the connect. Tests:
|
||||
`tests/test_auth_options.py::test_tofu_probe_passes_a_bounded_timeout`,
|
||||
`::test_tofu_probe_timeout_raises_transport_error` (parametrized over
|
||||
socket.timeout / TimeoutError / OSError), and
|
||||
`tests/test_client_session.py::test_gateway_connect_runs_create_channel_off_the_event_loop`,
|
||||
`::test_galaxy_connect_runs_create_channel_off_the_event_loop`. The timeout and
|
||||
off-loop tests failed before the fix and pass after.
|
||||
|
||||
### Client.Python-029
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/python/src/zb_mom_ww_mxgateway/options.py:78-90` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `_split_authority` parses a non-bracketed target with
|
||||
`host, _, port = target.rpartition(":")` and returns
|
||||
`(host or "localhost", int(port) if port else 443)`. For a port-less endpoint
|
||||
such as `"mygateway"`, `rpartition(":")` returns `("", "", "mygateway")`, so
|
||||
`host` becomes `""` (→ `"localhost"`) and `port` becomes `"mygateway"`, and
|
||||
`int("mygateway")` raises an uncaught `ValueError: invalid literal for int()`.
|
||||
Because `_split_authority` is called *before* the `try/except OSError` guard in
|
||||
`create_channel`, the failure escapes as a raw `ValueError` rather than the
|
||||
intended `MxGatewayTransportError`, and the message does not name the endpoint.
|
||||
Verified at runtime:
|
||||
`_split_authority("mygateway")` → `ValueError: invalid literal for int() with base 10: 'mygateway'`.
|
||||
gRPC targets normally carry an explicit port (`host:port`), so impact is narrow,
|
||||
but a typo or a bare-hostname endpoint produces a confusing crash on the TOFU
|
||||
default path. The bracketed-IPv6 and `host:port` cases are covered by tests; the
|
||||
port-less case is not.
|
||||
|
||||
**Recommendation:** Treat a non-numeric / missing port as the default (443) and
|
||||
keep the whole string as the host, e.g. detect a trailing `:<digits>` explicitly
|
||||
rather than assuming the `rpartition` tail is numeric, or wrap the `int(port)`
|
||||
conversion so a non-numeric tail falls back to host-only with the default port.
|
||||
Add a `_split_authority("mygateway")` case to `tests/test_tls.py`.
|
||||
|
||||
**Resolution:** 2026-06-15 — Confirmed: `_split_authority("mygateway")` raised
|
||||
`ValueError: invalid literal for int() with base 10: 'mygateway'` because
|
||||
`rpartition(":")` put the whole string in the port slot. Rewrote the
|
||||
non-bracketed branch to inspect the `rpartition` separator and the tail: no colon
|
||||
→ whole target is the host with default port 443; a colon with a non-digit/empty
|
||||
tail → left side is the host with default port 443; a digit tail → parse the
|
||||
port. The bare-hostname case now returns `("mygateway", 443)` instead of raising,
|
||||
and the existing `":5120"` / `"localhost:5120"` / IPv6 cases are unchanged. Test:
|
||||
`tests/test_tls.py::test_split_authority_defaults_port_for_portless_endpoint`
|
||||
(covers `"mygateway"`, `"https://mygateway"`, and `"mygateway:"`) — failed before
|
||||
the fix and passes after.
|
||||
|
||||
### Client.Python-030
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/python/pyproject.toml:17` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** This commit re-adds a `license` key to `pyproject.toml` as the
|
||||
table form `license = { text = "Proprietary" }`. Under PEP 639 (active in the
|
||||
installed setuptools 82.0.1), the `[project.license]` **table** forms (`text` and
|
||||
`file`) are deprecated in favour of the SPDX string expression, and a future
|
||||
setuptools major may reject them — the same class of regression that
|
||||
Client.Python-018 (the earlier `license = "Proprietary"` string, rejected because
|
||||
`Proprietary` is not a valid SPDX identifier) recorded for this exact field. The
|
||||
build currently succeeds (verified: `python -m pip wheel .` produces
|
||||
`zb_mom_ww_mxaccess_gateway_client-0.1.0-py3-none-any.whl` and the metadata
|
||||
carries `License: Proprietary` plus the `License :: Other/Proprietary License`
|
||||
classifier), so this is a forward-looking maintainability flag, not a present
|
||||
breakage. Note that pairing a `license` table with a `License ::` trove
|
||||
classifier is also flagged by PyPI/twine as redundant under the new metadata
|
||||
rules.
|
||||
|
||||
**Recommendation:** Prefer the PEP 639 SPDX-string form with a `LicenseRef-*`
|
||||
custom identifier for an unlisted licence (`license = "LicenseRef-Proprietary"`)
|
||||
— this is the future-proof equivalent of the intent and avoids the deprecated
|
||||
table form — or drop the `license` key entirely and rely on the existing
|
||||
`License :: Other/Proprietary License` classifier (the Client.Python-018
|
||||
resolution chose this). The `tests/test_packaging.py::test_pip_wheel_build_succeeds`
|
||||
guard (added under Client.Python-020) will catch the day a setuptools upgrade
|
||||
turns the deprecation into a hard error.
|
||||
|
||||
**Resolution:** 2026-06-15 — Switched the deprecated `license = { text =
|
||||
"Proprietary" }` table form to the PEP 639 SPDX-string form
|
||||
`license = "LicenseRef-Proprietary"` (the future-proof custom identifier for an
|
||||
unlisted/proprietary licence). Also removed the now-redundant
|
||||
`License :: Other/Proprietary License` trove classifier, which setuptools >= 77
|
||||
flags as conflicting when a `License-Expression` is present. The built wheel
|
||||
metadata now carries `License-Expression: LicenseRef-Proprietary` and no
|
||||
`Classifier: License ::` line. Verified by `python -m pip wheel . --no-deps`,
|
||||
which builds cleanly; the existing
|
||||
`tests/test_packaging.py::test_pip_wheel_build_succeeds` guard exercises the same
|
||||
build and passes.
|
||||
|
||||
### Client.Python-031
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/python/tests/test_tls.py:34`, `clients/python/pyproject.toml:53-56` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `tests/test_tls.py` applies a module-level
|
||||
`pytestmark = pytest.mark.tls`, but the `tls` marker is not registered in
|
||||
`[tool.pytest.ini_options]` (which declares only `addopts`, `pythonpath`, and
|
||||
`testpaths`). Every run emits a `PytestUnknownMarkWarning: Unknown
|
||||
pytest.mark.tls - is this a typo?`. The warning is benign today, but (a) it is
|
||||
exactly the kind of typo the warning exists to catch, so a future genuine
|
||||
mistyped marker would be lost in the noise, and (b) if the suite ever adopts
|
||||
`filterwarnings = ["error"]` (a common hardening step), the unregistered marker
|
||||
would turn into a hard collection failure.
|
||||
|
||||
**Recommendation:** Register the marker, e.g.
|
||||
`markers = ["tls: loopback TLS tests, opt-in via MXGATEWAY_RUN_TLS_TESTS=1"]`
|
||||
under `[tool.pytest.ini_options]` in `clients/python/pyproject.toml`.
|
||||
|
||||
**Resolution:** 2026-06-15 — Registered the `tls` marker by adding
|
||||
`markers = ["tls: loopback TLS tests, opt-in via MXGATEWAY_RUN_TLS_TESTS=1"]`
|
||||
under `[tool.pytest.ini_options]` in `clients/python/pyproject.toml`.
|
||||
`python -m pytest` now reports no `PytestUnknownMarkWarning` (full run: 91
|
||||
passed, 1 skipped, 0 warnings; previously 1 warning). The `tls`-marked
|
||||
`tests/test_tls.py` module is the guard — its run is now warning-free.
|
||||
|
||||
Reference in New Issue
Block a user