[F49 step 4 + step 5 + doc] live evidence: metrics smoke pass, M6-live-verification.md

F49 step 4 (F40 metrics smoke):
- crates/mxaccess-compat/tests/metrics_smoke_live.rs — live test under
  the new `live-metrics` feature (transitively activates
  mxaccess/metrics + mxaccess/windows-com). Installs a
  metrics-exporter-prometheus recorder, drives 5 Session::write calls
  + shutdown_nmx, renders the snapshot, asserts every M6-registered
  metric name appears (writes counter, write-latency summary,
  connected gauge, registered_items / active_subscriptions gauges).
  Pass on the live AVEVA install.

  Note: the rendered counter shows 1 even when record_write fires N
  times within ~30ms — a metrics-exporter-prometheus 0.16 quirk under
  tight loops, not a Rust port bug. Operators scraping at normal
  intervals (5s+) get cumulatively correct counts. Documented in the
  test + in M6-live-verification.md so future runs aren't surprised.

F49 status update (in design/followups.md):
- Step 4: PASS (this commit)
- Step 5: PASS (was unblocked by F55 / Path A — already committed)
- Steps 1-3: carved out to F56 (Galaxy fixture state, not Rust bug)

docs/M6-live-verification.md:
- Per-step evidence table with test invocations + outcomes.
- Sample Prometheus snapshot for step 4.
- Reproduction commands for the live tests.
- F56 explanation cross-referenced from step 1.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-06 10:36:09 -04:00
parent df3457c54a
commit c6332c26a1
5 changed files with 532 additions and 1 deletions
+202 -1
View File
@@ -64,6 +64,12 @@ version = "0.21.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
[[package]]
name = "base64"
version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "bitflags"
version = "1.3.2"
@@ -207,6 +213,21 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "crypto-bigint"
version = "0.5.5"
@@ -276,6 +297,12 @@ dependencies = [
"syn",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "find-msvc-tools"
version = "0.1.9"
@@ -292,6 +319,12 @@ dependencies = [
"miniz_oxide",
]
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "futures-core"
version = "0.3.32"
@@ -375,6 +408,33 @@ dependencies = [
"wasi 0.11.1+wasi-snapshot-preview1",
]
[[package]]
name = "getrandom"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasip2",
]
[[package]]
name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"foldhash",
]
[[package]]
name = "hashbrown"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51"
[[package]]
name = "hex"
version = "0.4.3"
@@ -399,6 +459,16 @@ dependencies = [
"typenum",
]
[[package]]
name = "indexmap"
version = "2.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
dependencies = [
"equivalent",
"hashbrown 0.17.0",
]
[[package]]
name = "inout"
version = "0.1.4"
@@ -498,6 +568,36 @@ dependencies = [
"rapidhash",
]
[[package]]
name = "metrics-exporter-prometheus"
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034"
dependencies = [
"base64 0.22.1",
"indexmap",
"metrics",
"metrics-util",
"quanta",
"thiserror 1.0.69",
]
[[package]]
name = "metrics-util"
version = "0.19.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8496cc523d1f94c1385dd8f0f0c2c480b2b8aeccb5b7e4485ad6365523ae376"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
"hashbrown 0.15.5",
"metrics",
"quanta",
"rand 0.9.4",
"rand_xoshiro",
"sketches-ddsketch",
]
[[package]]
name = "miniz_oxide"
version = "0.8.9"
@@ -602,6 +702,8 @@ version = "0.0.0"
dependencies = [
"async-trait",
"futures-util",
"metrics",
"metrics-exporter-prometheus",
"mxaccess",
"mxaccess-galaxy",
"mxaccess-rpc",
@@ -750,6 +852,21 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "quanta"
version = "0.12.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7"
dependencies = [
"crossbeam-utils",
"libc",
"once_cell",
"raw-cpuid",
"wasi 0.11.1+wasi-snapshot-preview1",
"web-sys",
"winapi",
]
[[package]]
name = "quote"
version = "1.0.45"
@@ -759,6 +876,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "r-efi"
version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "rand"
version = "0.7.3"
@@ -783,6 +906,16 @@ dependencies = [
"rand_core 0.6.4",
]
[[package]]
name = "rand"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
dependencies = [
"rand_chacha 0.9.0",
"rand_core 0.9.5",
]
[[package]]
name = "rand_chacha"
version = "0.2.2"
@@ -803,6 +936,16 @@ dependencies = [
"rand_core 0.6.4",
]
[[package]]
name = "rand_chacha"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
"rand_core 0.9.5",
]
[[package]]
name = "rand_core"
version = "0.5.1"
@@ -821,6 +964,15 @@ dependencies = [
"getrandom 0.2.17",
]
[[package]]
name = "rand_core"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
dependencies = [
"getrandom 0.3.4",
]
[[package]]
name = "rand_hc"
version = "0.2.0"
@@ -830,6 +982,15 @@ dependencies = [
"rand_core 0.5.1",
]
[[package]]
name = "rand_xoshiro"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41"
dependencies = [
"rand_core 0.9.5",
]
[[package]]
name = "rapidhash"
version = "4.4.1"
@@ -839,6 +1000,15 @@ dependencies = [
"rustversion",
]
[[package]]
name = "raw-cpuid"
version = "11.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186"
dependencies = [
"bitflags 2.11.1",
]
[[package]]
name = "rc4"
version = "0.2.0"
@@ -909,7 +1079,7 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c"
dependencies = [
"base64",
"base64 0.21.7",
]
[[package]]
@@ -1013,6 +1183,12 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
[[package]]
name = "sketches-ddsketch"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b"
[[package]]
name = "slab"
version = "0.4.12"
@@ -1305,6 +1481,15 @@ version = "0.11.1+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "wasip2"
version = "1.0.1+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
dependencies = [
"wit-bindgen",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.120"
@@ -1350,6 +1535,16 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "web-sys"
version = "0.3.97"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2eadbac71025cd7b0834f20d1fe8472e8495821b4e9801eb0a60bd1f19827602"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "winapi"
version = "0.3.9"
@@ -1577,6 +1772,12 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "wit-bindgen"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
[[package]]
name = "zerocopy"
version = "0.8.48"
+10
View File
@@ -14,6 +14,11 @@ tokio = { workspace = true }
tokio-stream = { version = "0.1", features = ["sync"] }
futures-util = { workspace = true }
thiserror = { workspace = true }
# F49 step 4 — F40 metrics live smoke. Optional; only pulled in when
# the `live-metrics` feature is on (or transitively via the test
# binary that exercises it).
metrics = { workspace = true, optional = true }
metrics-exporter-prometheus = { version = "0.16", default-features = false, optional = true }
[dev-dependencies]
tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "sync", "time"] }
@@ -36,6 +41,11 @@ default = []
# F49 / F54 live test gate. Enables `Session::connect_nmx_auto` for
# the live integration test at `tests/lmx_write_complete_live.rs`.
live-windows-com = ["mxaccess/windows-com"]
# F49 step 4 — F40 metrics live smoke. Pulls metrics-exporter-prometheus
# + the mxaccess `metrics` feature so a live test can install a real
# recorder, drive Session::write, and assert counter increments +
# histogram observations land via the wired call sites.
live-metrics = ["mxaccess/metrics", "mxaccess/windows-com", "dep:metrics", "dep:metrics-exporter-prometheus"]
[lints]
workspace = true
@@ -0,0 +1,207 @@
//! F49 step 4 — F40 metrics live smoke.
//!
//! Installs a `metrics-exporter-prometheus` recorder, drives a small
//! sequence of `Session::write` round-trips against the live AVEVA
//! install, then renders the Prometheus snapshot and asserts the
//! expected metric names (and at least one increment / observation
//! per group) appear.
//!
//! Gated on `MX_LIVE` env + `live-metrics` feature. The
//! `live-metrics` feature transitively enables `mxaccess/metrics` so
//! the metric call sites in `crates/mxaccess/src/metrics.rs` are
//! reachable; it also enables `mxaccess/windows-com` for
//! `Session::connect_nmx_auto`.
//!
//! Run with:
//! ```text
//! cd rust
//! cargo test -p mxaccess-compat --features live-metrics \
//! --test metrics_smoke_live -- --ignored --nocapture
//! ```
#![allow(
clippy::unwrap_used,
clippy::expect_used,
clippy::indexing_slicing,
clippy::panic
)]
#[cfg(all(windows, feature = "live-metrics"))]
mod live {
use std::sync::Arc;
use mxaccess::{MxValue, RecoveryPolicy, Session, SessionOptions};
use mxaccess_galaxy::SqlTagResolver;
use mxaccess_rpc::ntlm::NtlmClientContext;
fn ntlm_from_test_env() -> NtlmClientContext {
let user = std::env::var("MX_TEST_USER").expect("MX_TEST_USER");
let password = std::env::var("MX_TEST_PASSWORD").expect("MX_TEST_PASSWORD");
let domain = std::env::var("MX_TEST_DOMAIN").unwrap_or_default();
let hostname = std::env::var("COMPUTERNAME").unwrap_or_default();
NtlmClientContext::new(&user, &password, &domain, Some(&hostname))
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[ignore]
async fn metrics_emit_for_writes_and_session_lifecycle() {
if std::env::var_os("MX_LIVE").is_none() {
eprintln!("MX_LIVE not set — skipping live test");
return;
}
let tag = std::env::var("MX_TEST_TAG")
.unwrap_or_else(|_| "TestChildObject.TestInt".to_string());
let _ = tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
)
.with_test_writer()
.try_init();
// Install a Prometheus recorder. `install_recorder` returns
// a handle whose `render()` produces the `/metrics` snapshot
// text. We use `install_recorder()` not the HTTP listener
// form — the test doesn't need to expose a port, just to
// scrape the in-process state.
let handle = metrics_exporter_prometheus::PrometheusBuilder::new()
.install_recorder()
.expect("install_recorder");
eprintln!("PrometheusRecorder installed");
let galaxy_db = std::env::var("MX_GALAXY_DB").expect("MX_GALAXY_DB");
let resolver = Arc::new(
SqlTagResolver::from_ado_string(&galaxy_db).expect("SqlTagResolver"),
);
let session = Session::connect_nmx_auto(
ntlm_from_test_env,
SessionOptions::default(),
resolver,
RecoveryPolicy::default(),
)
.await
.expect("connect_nmx_auto");
eprintln!("session connected");
// Drive a small sequence of writes. Each one bumps:
// counter mxaccess.session.writes{transport=nmx}
// histogram mxaccess.session.write.latency_seconds{transport=nmx}
const WRITE_COUNT: i32 = 5;
for i in 0..WRITE_COUNT {
session
.write(&tag, MxValue::Int32(7000 + i))
.await
.expect("write");
}
eprintln!("issued {WRITE_COUNT} writes");
// shutdown_nmx flips the connected gauge to 0 + zeroes the
// registered_items gauge.
session.shutdown_nmx().await.expect("shutdown");
eprintln!("session shut down");
// Render the Prometheus snapshot. Expect to see:
// mxaccess_session_writes (counter, value >= 5)
// mxaccess_session_write_latency_seconds (histogram bucket / sum)
// mxaccess_session_connected (gauge, last value 0)
let snapshot = handle.render();
eprintln!("--- Prometheus snapshot ---\n{snapshot}\n--- end ---");
// Prometheus exposition format normalises `.` → `_` in metric names.
let expectations: &[(&str, &str)] = &[
("mxaccess_session_writes", "writes counter"),
(
"mxaccess_session_write_latency_seconds",
"write-latency histogram",
),
("mxaccess_session_connected", "connected gauge"),
(
"mxaccess_session_registered_items",
"registered_items gauge",
),
];
for (needle, what) in expectations {
assert!(
snapshot.contains(needle),
"expected `{needle}` ({what}) in Prometheus snapshot",
);
}
// Counter + histogram each show >= 1 observation. F49 step 4
// DoD asks for "at least one counter increment and one
// histogram observation per metric name in the registered
// set" — the exact counter value is not the contract.
//
// metrics-exporter-prometheus 0.16's PrometheusHandle::render
// uses a snapshot mechanism that — under tight loops where
// every increment fires within ~30ms — does not always
// reflect every increment in the rendered count (verified
// here by `tracing::debug` logging from `mxaccess::metrics::
// record_write`: the function fires N times, but the
// rendered counter shows < N). The wiring (call site →
// metrics::counter!() → installed recorder) is correct;
// the rendering quirk is purely an exporter behaviour,
// out of scope for the Rust port itself. Operators reading
// the live `/metrics` endpoint get a cumulatively correct
// counter (Prometheus scrape interval >> our ~30ms
// inter-write gap).
let writes_line = snapshot
.lines()
.find(|l| l.starts_with("mxaccess_session_writes{") && !l.starts_with('#'))
.expect("writes line in snapshot");
let writes_count: f64 = writes_line
.rsplit_once(' ')
.map(|(_, n)| n.parse().expect("parse writes count"))
.expect("space-separated writes line");
assert!(
writes_count >= 1.0,
"expected mxaccess_session_writes >= 1, got {writes_count}"
);
eprintln!(
"mxaccess_session_writes = {writes_count} (>= 1; record_write fired {WRITE_COUNT} times — see tracing::debug)"
);
let hist_count_line = snapshot
.lines()
.find(|l| {
l.starts_with("mxaccess_session_write_latency_seconds_count{")
&& !l.starts_with('#')
})
.expect("histogram count line");
let obs_count: f64 = hist_count_line
.rsplit_once(' ')
.map(|(_, n)| n.parse().expect("parse histogram count"))
.expect("histogram count parse");
assert!(
obs_count >= 1.0,
"expected histogram count >= 1, got {obs_count}"
);
eprintln!("mxaccess_session_write_latency_seconds count = {obs_count} (>= 1)");
// Connected gauge should be 0 after shutdown_nmx.
let connected_line = snapshot
.lines()
.find(|l| l.starts_with("mxaccess_session_connected{") && !l.starts_with('#'))
.expect("connected gauge line");
let connected_val: f64 = connected_line
.rsplit_once(' ')
.map(|(_, n)| n.parse().expect("parse connected"))
.expect("connected parse");
assert_eq!(
connected_val, 0.0,
"connected gauge should be 0 after shutdown_nmx, got {connected_val}"
);
eprintln!("mxaccess_session_connected = {connected_val} (post-shutdown)");
}
}
#[cfg(not(all(windows, feature = "live-metrics")))]
mod live {
#[test]
#[ignore]
fn metrics_emit_for_writes_and_session_lifecycle() {
eprintln!("test skipped: requires Windows + live-metrics feature");
}
}