fix(client/rust): handle provider_status arm (build break); real system-roots TLS; design doc (Client.Rust-030..032)

This commit is contained in:
Joseph Doherty
2026-06-15 02:39:11 -04:00
parent 47062c1a6e
commit b57d02cc4d
7 changed files with 442 additions and 65 deletions
+67
View File
@@ -207,6 +207,22 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
[[package]]
name = "core-foundation"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "either"
version = "1.15.0"
@@ -597,6 +613,12 @@ version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
name = "openssl-probe"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
[[package]]
name = "percent-encoding"
version = "2.3.2"
@@ -796,6 +818,18 @@ dependencies = [
"zeroize",
]
[[package]]
name = "rustls-native-certs"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
dependencies = [
"openssl-probe",
"rustls-pki-types",
"schannel",
"security-framework",
]
[[package]]
name = "rustls-pki-types"
version = "1.14.1"
@@ -816,6 +850,38 @@ dependencies = [
"untrusted",
]
[[package]]
name = "schannel"
version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "security-framework"
version = "3.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d"
dependencies = [
"bitflags",
"core-foundation",
"core-foundation-sys",
"libc",
"security-framework-sys",
]
[[package]]
name = "security-framework-sys"
version = "2.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "semver"
version = "1.0.28"
@@ -1056,6 +1122,7 @@ dependencies = [
"percent-encoding",
"pin-project",
"prost",
"rustls-native-certs",
"socket2 0.5.10",
"tokio",
"tokio-rustls",
+1 -1
View File
@@ -37,7 +37,7 @@ serde_json = "1.0.145"
thiserror = "2.0.17"
tokio = { version = "1.48.0", features = ["macros", "rt-multi-thread", "sync", "time"] }
tokio-stream = { version = "0.1.17", features = ["net"] }
tonic = { version = "0.13.1", features = ["transport", "tls-ring"] }
tonic = { version = "0.13.1", features = ["transport", "tls-ring", "tls-native-roots"] }
tonic-build = "0.13.1"
[dependencies]
+13 -5
View File
@@ -81,11 +81,19 @@ cargo run -p mxgw-cli -- smoke --endpoint https://mxgateway.example.local:5001 -
The gateway can auto-generate its own self-signed certificate (it has no PKI).
Unlike the other clients, the Rust client is **not** lenient: tonic 0.13.1
exposes no public hook to inject a custom certificate verifier, so TLS over Rust
is pin-only. A TLS connection requires either `--ca-file` /
`ClientOptions::with_ca_file(...)` to pin a CA (export the gateway's self-signed
certificate and pin it), or `--require-certificate-validation` /
`with_require_certificate_validation(true)` to verify against the system trust
roots. TLS with neither set fails `connect` with a clear, actionable error rather
cannot accept an *arbitrary* self-signed certificate. A TLS connection requires
one of two trust paths:
- `--ca-file` / `ClientOptions::with_ca_file(...)` to pin a CA (export the
gateway's self-signed certificate and pin it). This is the path for a
self-signed gateway.
- `--require-certificate-validation` / `with_require_certificate_validation(true)`
to verify against the operating system's trust roots (`tls-native-roots`). This
only succeeds for a certificate that chains to a root the host already trusts —
i.e. a gateway fronted by a publicly- or enterprise-CA-issued certificate, not a
bare self-signed one.
TLS with neither set fails `connect` with a clear, actionable error rather
than accepting the certificate. See
[Gateway Configuration](../../docs/GatewayConfiguration.md#automatic-self-signed-certificate).
+80 -12
View File
@@ -162,12 +162,73 @@ impl GatewayClient {
`stream_alarms` opens with one `active_alarm` per currently-active alarm
(the ConditionRefresh snapshot), then a single `snapshot_complete`, then a
`transition` for every subsequent raise / acknowledge / clear. The feed is
served by the gateway's always-on alarm monitor — no worker session is
opened — so any number of clients may attach. Dropping the stream cancels
the gRPC call cooperatively. `acknowledge_alarm` is idempotent at the
MxAccess layer; the returned `AcknowledgeAlarmReply` carries the native
MxStatus from the worker.
`transition` for every subsequent raise / acknowledge / clear. A fourth
`provider_status` oneof case (`AlarmProviderStatus`: `mode`, `degraded`,
`reason`, `since`) is emitted once on stream open and again on every
failover/failback so late joiners learn the current alarm-provider mode.
The CLI renders all four cases in both its one-line summary and its
protobuf-JSON output (`alarm_feed_message_summary` /
`alarm_feed_message_to_json`). The feed is served by the gateway's always-on
alarm monitor — no worker session is opened — so any number of clients may
attach. Dropping the stream cancels the gRPC call cooperatively.
`acknowledge_alarm` is idempotent at the MxAccess layer; the returned
`AcknowledgeAlarmReply` carries the native MxStatus from the worker.
## Galaxy Repository
`GalaxyClient` is a session-less metadata client (requires the
`metadata:read` API-key scope). Alongside `test_connection`,
`get_last_deploy_time`, `discover_hierarchy`, and `watch_deploy_events`, it
exposes a lazy hierarchy walker built on the `BrowseChildren` RPC:
```rust
impl GalaxyClient {
pub async fn browse(&mut self, options: Option<BrowseChildrenOptions>) -> Result<Vec<LazyBrowseNode>, Error>;
pub async fn browse_children_raw(&mut self, request: BrowseChildrenRequest) -> Result<BrowseChildrenReply, Error>;
}
pub struct BrowseChildrenOptions {
pub category_ids: Vec<i32>,
pub template_chain_contains: Vec<String>,
pub tag_name_glob: Option<String>,
pub include_attributes: Option<bool>,
pub alarm_bearing_only: bool,
pub historized_only: bool,
}
impl LazyBrowseNode {
pub fn object(&self) -> &GalaxyObject;
pub fn has_children_hint(&self) -> bool;
pub async fn children(&self) -> Vec<LazyBrowseNode>;
pub async fn is_expanded(&self) -> bool;
pub async fn expand(&self) -> Result<(), Error>;
}
```
- `browse(options)` returns the root objects as `LazyBrowseNode`s. The
supplied `BrowseChildrenOptions` filter is captured and reused when any
returned node is expanded, so a single filter set scopes the entire walk.
- `BrowseChildrenOptions` mirrors the request-level filters on the wire and
combines them with **AND**: a child appears only when it satisfies every
populated criterion (`category_ids` membership, every
`template_chain_contains` substring, the `tag_name_glob`, plus the
`alarm_bearing_only` / `historized_only` flags). `include_attributes` is a
tri-state (`None` = server default). Empty/`None` fields impose no
restriction. See
[Galaxy Repository — BrowseChildren](../../docs/GalaxyRepository.md#browsechildren)
for the wire-level semantics.
- `LazyBrowseNode` is cheap to clone — clones share state through an internal
`Arc`, so expanding one clone makes the children visible to every clone.
`has_children_hint()` exposes the server's `child_has_children` hint so a UI
can draw an expand affordance without issuing an RPC. `expand()` is
idempotent: the first call issues a paged `BrowseChildren` walk (page size
500) under an async mutex held across the await, sets the `is_expanded`
flag, and caches the children; subsequent calls are no-ops and re-hit
nothing. The internal paged loop guards against a server returning a
repeated `next_page_token` by failing with `Error::InvalidArgument` rather
than looping forever.
- `browse_children_raw` issues a single `BrowseChildren` RPC and returns the
raw reply for callers that want to drive paging themselves.
## Authentication
@@ -200,13 +261,20 @@ Rust client is therefore **pin-only** — it requires either:
- `ClientOptions::with_ca_file(...)` to pin a CA (the supported path for the
gateway's self-signed certificate; export the certificate and pin it), or
- `ClientOptions::with_require_certificate_validation(true)` to verify against the
system trust roots.
operating system's trust roots. This enables the `tonic` `tls-native-roots`
feature and calls `ClientTlsConfig::with_native_roots()`, so the handshake
validates a certificate that chains to a root the host already trusts. It does
**not** accept a bare self-signed gateway certificate — that still needs
`with_ca_file`.
With TLS enabled (`with_plaintext(false)`), no pinned CA, and certificate
validation not required, `GatewayClient::connect` rejects the connection with a
clear, actionable error pointing at `with_ca_file` /
`require_certificate_validation` rather than silently accepting the certificate.
The CLI exposes `--ca-file` and `--require-certificate-validation`.
`build_tls_config` computes the trust posture with the pure `tls_trust_decision`
helper (`None` / `PinnedCa` / `SystemRoots` / `RejectNoCa`) so the posture is
unit-testable without a live handshake. With TLS enabled (`with_plaintext(false)`),
no pinned CA, and certificate validation not required (`RejectNoCa`),
`GatewayClient::connect` rejects the connection with a clear, actionable error
pointing at `with_ca_file` / `require_certificate_validation` rather than building
a config with zero trust anchors. The CLI exposes `--ca-file` and
`--require-certificate-validation`.
## Streaming
+63 -1
View File
@@ -1726,7 +1726,7 @@ fn event_value_to_json(value: &ProtoMxValue) -> Value {
}
/// Render a streamed [`AlarmFeedMessage`] as a terse one-line summary that
/// distinguishes the three `payload` oneof cases.
/// distinguishes the four `payload` oneof cases.
fn alarm_feed_message_summary(message: &AlarmFeedMessage) -> String {
match &message.payload {
Some(alarm_feed_message::Payload::ActiveAlarm(snapshot)) => {
@@ -1746,6 +1746,14 @@ fn alarm_feed_message_summary(message: &AlarmFeedMessage) -> String {
AlarmEnumName::transition_kind(transition.transition_kind)
)
}
Some(alarm_feed_message::Payload::ProviderStatus(status)) => {
format!(
"provider-status mode={} degraded={} reason={:?}",
AlarmEnumName::provider_mode(status.mode),
status.degraded,
status.reason
)
}
None => "(empty)".to_owned(),
}
}
@@ -1784,6 +1792,17 @@ fn alarm_feed_message_to_json(message: &AlarmFeedMessage) -> Value {
"description": transition.description,
}
}),
Some(alarm_feed_message::Payload::ProviderStatus(status)) => json!({
"providerStatus": {
"mode": AlarmEnumName::provider_mode(status.mode),
"degraded": status.degraded,
"reason": status.reason,
"since": status.since.as_ref().map(|ts| json!({
"seconds": ts.seconds,
"nanos": ts.nanos,
})),
}
}),
None => Value::Null,
}
}
@@ -1806,6 +1825,13 @@ impl AlarmEnumName {
.map(|kind| kind.as_str_name().to_owned())
.unwrap_or_else(|_| value.to_string())
}
fn provider_mode(value: i32) -> String {
use zb_mom_ww_mxgateway_client::generated::mxaccess_gateway::v1::AlarmProviderMode;
AlarmProviderMode::try_from(value)
.map(|mode| mode.as_str_name().to_owned())
.unwrap_or_else(|_| value.to_string())
}
}
/// Render an [`AcknowledgeAlarmReply`] as a terse line or a JSON document.
@@ -2165,4 +2191,40 @@ mod tests {
assert_eq!(frac.seconds, utc.seconds);
assert_eq!(frac.nanos, 250_000_000);
}
#[test]
fn alarm_feed_provider_status_renders_in_summary_and_json() {
use zb_mom_ww_mxgateway_client::generated::mxaccess_gateway::v1::{
alarm_feed_message, AlarmFeedMessage, AlarmProviderMode, AlarmProviderStatus,
};
let message = AlarmFeedMessage {
payload: Some(alarm_feed_message::Payload::ProviderStatus(
AlarmProviderStatus {
mode: AlarmProviderMode::Subtag as i32,
degraded: true,
reason: "alarmmgr unavailable".to_owned(),
since: Some(prost_types::Timestamp {
seconds: 1_777_995_000,
nanos: 0,
}),
},
)),
};
let summary = super::alarm_feed_message_summary(&message);
assert!(summary.contains("provider-status"), "summary: {summary}");
assert!(
summary.contains("ALARM_PROVIDER_MODE_SUBTAG"),
"summary: {summary}"
);
assert!(summary.contains("degraded=true"), "summary: {summary}");
let value = super::alarm_feed_message_to_json(&message);
let provider = &value["providerStatus"];
assert_eq!(provider["mode"], "ALARM_PROVIDER_MODE_SUBTAG");
assert_eq!(provider["degraded"], true);
assert_eq!(provider["reason"], "alarmmgr unavailable");
assert_eq!(provider["since"]["seconds"], 1_777_995_000_i64);
}
}
+141 -44
View File
@@ -74,16 +74,22 @@ impl ClientOptions {
}
/// Require TLS certificate verification even without a pinned CA. Default
/// false: the gateway's self-signed certificate is accepted (internal-tool
/// posture). Setting a CA file always verifies.
/// false. Setting a CA file always verifies against that CA.
///
/// Note for Rust: tonic 0.13's `ClientTlsConfig` exposes no hook for a
/// custom rustls verifier, so the Rust client cannot accept an arbitrary
/// custom rustls verifier, so the Rust client cannot accept an *arbitrary*
/// self-signed certificate the way the other clients do. With the default
/// (false) and no pinned CA, [`crate::client::GatewayClient::connect`]
/// rejects the TLS connection and asks for a CA file. Either pin a CA via
/// [`ClientOptions::with_ca_file`] (the supported lenient path on Rust) or
/// set this `true` to verify against the system trust roots.
/// rejects the TLS connection and asks for a CA file. There are two
/// supported TLS paths:
///
/// - Pin the gateway certificate with [`ClientOptions::with_ca_file`] (the
/// lenient pin-only path; works for a self-signed gateway cert).
/// - Set this `true` to verify against the operating system's trust roots
/// (`tls-native-roots`). This only succeeds for a certificate that chains
/// to a root the host already trusts, so it is for gateways fronted by a
/// publicly- or enterprise-CA-issued certificate, not a bare self-signed
/// one.
pub fn with_require_certificate_validation(mut self, require: bool) -> Self {
self.require_certificate_validation = require;
self
@@ -175,26 +181,63 @@ impl ClientOptions {
}
}
/// Where the TLS handshake gets its trust anchors for a given set of options.
/// Computed by [`tls_trust_decision`] and applied by [`build_tls_config`];
/// split out so the trust posture is unit-testable without a live handshake.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum TlsTrustDecision {
/// Plaintext transport — no TLS, no trust anchors.
None,
/// Validate against the CA pinned with [`ClientOptions::with_ca_file`].
PinnedCa,
/// Validate against the operating system's trust roots
/// (`require_certificate_validation == true`, no pinned CA).
SystemRoots,
/// Reject up front: TLS requested with neither a pinned CA nor strict
/// verification (the Rust pin-only lenient default).
RejectNoCa,
}
/// Decide the TLS trust posture from `options` without touching the filesystem
/// or the network.
pub(crate) fn tls_trust_decision(options: &ClientOptions) -> TlsTrustDecision {
if options.plaintext() {
TlsTrustDecision::None
} else if options.ca_file().is_some() {
TlsTrustDecision::PinnedCa
} else if options.require_certificate_validation() {
TlsTrustDecision::SystemRoots
} else {
TlsTrustDecision::RejectNoCa
}
}
/// Build the [`ClientTlsConfig`] for a non-plaintext connection described by
/// `options`, applying the lenient-default guard that is the **Rust
/// pin-only exception**.
///
/// Returns `Ok(None)` when `options.plaintext()` is `true` (no TLS needed).
/// Returns `Ok(Some(tls))` when a valid TLS config can be assembled.
/// Returns `Ok(Some(tls))` when a valid TLS config can be assembled — either
/// pinned to the CA from [`ClientOptions::with_ca_file`], or, when
/// `require_certificate_validation` is set with no pinned CA, verifying against
/// the operating system's trust roots (`tls-native-roots`).
/// Returns `Err(Error::InvalidEndpoint)` when TLS is requested but no pinned
/// CA was provided and `require_certificate_validation` is `false`.
///
/// # Why this guard exists
/// # Why the no-CA guard exists
///
/// `tonic` 0.13's `ClientTlsConfig` builds its rustls verifier inside a
/// crate-private connector and exposes no hook for a custom
/// `ServerCertVerifier`. The Rust client therefore cannot accept an arbitrary
/// `ServerCertVerifier`. The Rust client therefore cannot accept an *arbitrary*
/// self-signed certificate the way the other language clients do. Rather than
/// silently falling back to system-root verification (which always fails
/// against a self-signed gateway certificate), we reject the configuration
/// early with an actionable error.
/// silently falling back to a verifier with no trust anchors (which rejects
/// every certificate with a confusing handshake error), the lenient default
/// rejects the configuration early with an actionable error. The strict opt-in
/// instead loads the system trust roots so a certificate chaining to an
/// already-trusted root validates.
pub(crate) fn build_tls_config(options: &ClientOptions) -> Result<Option<ClientTlsConfig>, Error> {
if options.plaintext() {
let decision = tls_trust_decision(options);
if decision == TlsTrustDecision::None {
return Ok(None);
}
@@ -202,37 +245,46 @@ pub(crate) fn build_tls_config(options: &ClientOptions) -> Result<Option<ClientT
if let Some(server_name) = options.server_name_override() {
tls = tls.domain_name(server_name.to_owned());
}
if let Some(ca_file) = options.ca_file() {
let certificate = fs::read(ca_file).map_err(|source| Error::InvalidEndpoint {
endpoint: options.endpoint().to_owned(),
detail: format!("failed to read CA file {}: {source}", ca_file.display()),
})?;
tls = tls.ca_certificate(Certificate::from_pem(certificate));
} else if !options.require_certificate_validation() {
// Lenient-default fallback (Rust pin-only exception): tonic
// 0.13's `ClientTlsConfig` builds its rustls verifier inside a
// crate-private connector and exposes no hook for a custom
// `ServerCertVerifier`, so — unlike the other clients — the
// Rust client cannot accept an arbitrary self-signed cert. Pin
// the gateway's CA instead, or opt into strict verification
// against the system trust roots. We reject here rather than
// silently verifying against system roots (which would fail a
// self-signed gateway with a confusing handshake error).
//
// Note: a server-name override affects SNI (the hostname sent
// in the TLS ClientHello) but does NOT pin trust. Overriding
// the server name alone does not bypass certificate validation.
return Err(Error::InvalidEndpoint {
endpoint: options.endpoint().to_owned(),
detail: "TLS requested without a pinned CA. The Rust client cannot accept an \
arbitrary self-signed certificate (tonic 0.13 exposes no custom \
rustls verifier). Pin the gateway certificate with \
ClientOptions::with_ca_file, or call \
ClientOptions::with_require_certificate_validation(true) to verify \
against the system trust roots. Note: a server-name override \
affects SNI but does not pin trust."
.to_owned(),
});
match decision {
TlsTrustDecision::PinnedCa => {
let ca_file = options.ca_file().expect("PinnedCa implies a CA file");
let certificate = fs::read(ca_file).map_err(|source| Error::InvalidEndpoint {
endpoint: options.endpoint().to_owned(),
detail: format!("failed to read CA file {}: {source}", ca_file.display()),
})?;
tls = tls.ca_certificate(Certificate::from_pem(certificate));
}
TlsTrustDecision::SystemRoots => {
// Strict opt-in with no pinned CA: verify against the OS trust
// store. Without this the bare `ClientTlsConfig` carries zero
// trust anchors and rejects every certificate, so the documented
// "verify against the system trust roots" behaviour would be
// unreachable. Only a certificate chaining to an already-trusted
// root validates — a bare self-signed gateway cert still needs
// `with_ca_file`.
tls = tls.with_native_roots();
}
TlsTrustDecision::RejectNoCa => {
// Lenient-default fallback (Rust pin-only exception): the Rust
// client cannot accept an arbitrary self-signed cert. Pin the
// gateway's CA, or opt into strict verification against the
// system trust roots.
//
// Note: a server-name override affects SNI (the hostname sent in
// the TLS ClientHello) but does NOT pin trust.
return Err(Error::InvalidEndpoint {
endpoint: options.endpoint().to_owned(),
detail: "TLS requested without a pinned CA. The Rust client cannot accept an \
arbitrary self-signed certificate (tonic 0.13 exposes no custom \
rustls verifier). Pin the gateway certificate with \
ClientOptions::with_ca_file, or call \
ClientOptions::with_require_certificate_validation(true) to verify \
against the system trust roots. Note: a server-name override \
affects SNI but does not pin trust."
.to_owned(),
});
}
TlsTrustDecision::None => unreachable!("handled above"),
}
Ok(Some(tls))
}
@@ -269,6 +321,8 @@ mod tests {
use super::ClientOptions;
use crate::auth::ApiKey;
use super::{build_tls_config, tls_trust_decision, TlsTrustDecision};
#[test]
fn debug_redacts_api_key() {
let options =
@@ -279,4 +333,47 @@ mod tests {
assert!(debug.contains("<redacted>"));
assert!(!debug.contains("mxgw_secret"));
}
#[test]
fn plaintext_needs_no_tls() {
let options = ClientOptions::new("http://127.0.0.1:5000").with_plaintext(true);
assert_eq!(tls_trust_decision(&options), TlsTrustDecision::None);
assert!(build_tls_config(&options).unwrap().is_none());
}
#[test]
fn pinned_ca_uses_pinned_trust() {
let options = ClientOptions::new("https://127.0.0.1:5000")
.with_plaintext(false)
.with_ca_file("/some/ca.pem");
assert_eq!(tls_trust_decision(&options), TlsTrustDecision::PinnedCa);
}
#[test]
fn strict_without_ca_uses_system_roots() {
// Regression for Client.Rust-031: strict verification with no pinned CA
// must verify against the system trust roots, not produce a config with
// zero trust anchors. The trust decision proves roots are consulted; the
// build then succeeds (no no-CA guard error) and emits a config.
let options = ClientOptions::new("https://127.0.0.1:5000")
.with_plaintext(false)
.with_require_certificate_validation(true);
assert_eq!(
tls_trust_decision(&options),
TlsTrustDecision::SystemRoots,
"strict-no-CA must request the system trust roots"
);
assert!(
build_tls_config(&options).unwrap().is_some(),
"strict-no-CA must build a usable TLS config"
);
}
#[test]
fn lenient_without_ca_is_rejected() {
let options = ClientOptions::new("https://127.0.0.1:5000").with_plaintext(false);
assert_eq!(tls_trust_decision(&options), TlsTrustDecision::RejectNoCa);
assert!(build_tls_config(&options).is_err());
}
}