//! F38 — counting-allocator bench for `mxaccess-codec`. //! //! Measures allocation count + bytes-allocated for the proven //! encode/decode matrix per `design/70-risks-and-open-questions.md` //! R12 (< 5 allocs per write at steady state). The harness wraps the //! global allocator with a [`CountingAllocator`] that tracks //! per-call counts; each scenario records pre-state, runs N //! iterations, and reports `(alloc_count, bytes_allocated) / N`. //! //! Output is the source of truth for `design/M6-bench-baseline.md`. //! //! ## Why hand-rolled (not `dhat` / `criterion`) //! //! - `dhat` is heap-profiling oriented (snapshots, call-stack //! attribution); for "did this op allocate < 5 times?" the simpler //! approach is a thin `GlobalAlloc` wrapper that increments two //! atomics. No call-stack capture, no JSON output to post-process. //! - `criterion` measures wall-clock latency; per `60-roadmap.md:104`, //! latency is reported but not gating in V1. Allocation count IS //! the gating metric for M6 DoD bullet 3. //! //! ## Run //! //! ```text //! cargo bench -p mxaccess-codec //! ``` //! //! Each scenario runs in release mode by default (cargo bench //! profile = `bench` which inherits release). #![allow( clippy::unwrap_used, clippy::expect_used, clippy::cast_possible_truncation, clippy::cast_sign_loss )] use std::alloc::{GlobalAlloc, Layout, System}; use std::sync::atomic::{AtomicU64, Ordering}; use bytes::BytesMut; use mxaccess_codec::{ write_message, write_message::WriteValue, MxReferenceHandle, NmxSubscriptionMessage, }; // ---- counting allocator ------------------------------------------------- struct CountingAllocator; static ALLOC_COUNT: AtomicU64 = AtomicU64::new(0); static ALLOC_BYTES: AtomicU64 = AtomicU64::new(0); static DEALLOC_COUNT: AtomicU64 = AtomicU64::new(0); unsafe impl GlobalAlloc for CountingAllocator { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { ALLOC_COUNT.fetch_add(1, Ordering::Relaxed); ALLOC_BYTES.fetch_add(layout.size() as u64, Ordering::Relaxed); // SAFETY: forwarding to the system allocator with the same layout. unsafe { System.alloc(layout) } } unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { DEALLOC_COUNT.fetch_add(1, Ordering::Relaxed); // SAFETY: forwarding to the system allocator with the same ptr+layout. unsafe { System.dealloc(ptr, layout) } } } #[global_allocator] static GLOBAL: CountingAllocator = CountingAllocator; // ---- scenario harness --------------------------------------------------- #[derive(Debug, Clone, Copy)] struct Snapshot { allocs: u64, bytes: u64, deallocs: u64, } fn snapshot() -> Snapshot { Snapshot { allocs: ALLOC_COUNT.load(Ordering::Relaxed), bytes: ALLOC_BYTES.load(Ordering::Relaxed), deallocs: DEALLOC_COUNT.load(Ordering::Relaxed), } } fn diff(start: Snapshot, end: Snapshot, iterations: u64) -> (f64, f64, f64) { ( (end.allocs - start.allocs) as f64 / iterations as f64, (end.bytes - start.bytes) as f64 / iterations as f64, (end.deallocs - start.deallocs) as f64 / iterations as f64, ) } /// Run `op` `iterations` times and return per-op alloc/bytes/dealloc /// counts. The hint is passed through `std::hint::black_box` to keep /// the compiler from optimising the work away. fn measure(name: &str, iterations: u64, mut op: F) -> Row where F: FnMut(), { // Warm-up: 1k iterations to settle any one-time setup state. for _ in 0..1024 { op(); } let start = snapshot(); for _ in 0..iterations { op(); } let end = snapshot(); let (allocs, bytes, deallocs) = diff(start, end, iterations); Row { name: name.to_string(), iterations, allocs_per_op: allocs, bytes_per_op: bytes, deallocs_per_op: deallocs, } } struct Row { name: String, iterations: u64, allocs_per_op: f64, bytes_per_op: f64, deallocs_per_op: f64, } fn print_table(rows: &[Row]) { println!(); println!( "| {:40} | {:>10} | {:>10} | {:>10} | {:>10} |", "scenario", "iters", "allocs/op", "bytes/op", "deallocs/op" ); println!( "| {:40} | {:>10} | {:>10} | {:>10} | {:>10} |", "-".repeat(40), "-".repeat(10), "-".repeat(10), "-".repeat(10), "-".repeat(10) ); for row in rows { println!( "| {:40} | {:>10} | {:>10.2} | {:>10.0} | {:>10.2} |", row.name, row.iterations, row.allocs_per_op, row.bytes_per_op, row.deallocs_per_op ); } println!(); } // ---- scenarios ---------------------------------------------------------- fn make_handle() -> MxReferenceHandle { MxReferenceHandle::from_names(0, 1, 2, 3, "TestObject", 0, 1, 0, "TestAttr", false) .expect("handle") } fn bench_write_int32() -> Row { let handle = make_handle(); let value = WriteValue::Int32(42); measure("write_message::encode (Int32)", 10_000, || { let bytes = write_message::encode(&handle, &value, 0, 0).unwrap(); std::hint::black_box(bytes); }) } fn bench_write_float() -> Row { let handle = make_handle(); let value = WriteValue::Float32(1.5); measure("write_message::encode (Float32)", 10_000, || { let bytes = write_message::encode(&handle, &value, 0, 0).unwrap(); std::hint::black_box(bytes); }) } fn bench_write_double() -> Row { let handle = make_handle(); let value = WriteValue::Float64(3.25); measure("write_message::encode (Float64)", 10_000, || { let bytes = write_message::encode(&handle, &value, 0, 0).unwrap(); std::hint::black_box(bytes); }) } fn bench_write_bool() -> Row { let handle = make_handle(); let value = WriteValue::Boolean(true); measure("write_message::encode (Boolean)", 10_000, || { let bytes = write_message::encode(&handle, &value, 0, 0).unwrap(); std::hint::black_box(bytes); }) } fn bench_write_string() -> Row { let handle = make_handle(); let value = WriteValue::String("hello".to_string()); measure("write_message::encode (String, 5 chars)", 10_000, || { let bytes = write_message::encode(&handle, &value, 0, 0).unwrap(); std::hint::black_box(bytes); }) } // F52.1 — `BytesMut` output. Same alloc count as `encode`; the benefit is // downstream zero-copy (consumers can `split_to` / `freeze` without copying // the body bytes). fn bench_write_int32_bytes_mut() -> Row { let handle = make_handle(); let value = WriteValue::Int32(42); measure("write_message::encode_to_bytes_mut (Int32)", 10_000, || { let bytes = write_message::encode_to_bytes_mut(&handle, &value, 0, 0).unwrap(); std::hint::black_box(bytes); }) } // F52.3 — session-level scratch buffer. The caller supplies a `BytesMut` // that is cleared and resized in place, so the body allocation is amortised // across a session's writes. Drops the per-write count from 2 → 1 for // fixed-width scalars (the remaining alloc is the per-value scratch buffer // inside `encode_scalar_value`) and 1 → 0 for Boolean (no scalar scratch). fn bench_write_int32_into_pooled() -> Row { let handle = make_handle(); let value = WriteValue::Int32(42); let mut buf = BytesMut::new(); measure( "write_message::encode_into_bytes_mut (Int32, pooled)", 10_000, || { write_message::encode_into_bytes_mut(&handle, &value, 0, 0, &mut buf).unwrap(); std::hint::black_box(&buf); }, ) } fn bench_write_bool_into_pooled() -> Row { let handle = make_handle(); let value = WriteValue::Boolean(true); let mut buf = BytesMut::new(); measure( "write_message::encode_into_bytes_mut (Boolean, pooled)", 10_000, || { write_message::encode_into_bytes_mut(&handle, &value, 0, 0, &mut buf).unwrap(); std::hint::black_box(&buf); }, ) } fn bench_subscription_decode() -> Row { // Build a single-record DataUpdate body once; decode N times. let body = build_data_update_int32_body(42); measure( "NmxSubscriptionMessage::parse_inner (DataUpdate, Int32)", 10_000, || { let msg = NmxSubscriptionMessage::parse_inner(&body).unwrap(); std::hint::black_box(msg); }, ) } fn bench_handle_from_names() -> Row { measure("MxReferenceHandle::from_names", 10_000, || { let h = MxReferenceHandle::from_names(0, 1, 2, 3, "TestChildObject", 0, 1, 0, "TestInt", false) .unwrap(); std::hint::black_box(h); }) } // ---- helpers (mirror the test fixtures in subscription_message.rs) ----- fn build_data_update_int32_body(value: i32) -> Vec { // Operation id + correlation id are arbitrary 16-byte sequences for // a synthetic body; the codec doesn't reject any GUID shape. const DATA_UPDATE_COMMAND: u8 = 0x33; let operation_id = [0x11u8; 16]; // 15-byte record prefix: status(4) + quality(2) + filetime(8) + wire_kind(1). // wire_kind = 0x02 = Int32. Then the 4-byte i32 LE payload. let mut record = Vec::with_capacity(15 + 4); record.extend_from_slice(&0i32.to_le_bytes()); // status record.extend_from_slice(&0x00C0u16.to_le_bytes()); // quality record.extend_from_slice(&0i64.to_le_bytes()); // filetime record.push(0x02); // wire_kind = Int32 record.extend_from_slice(&value.to_le_bytes()); let mut out = Vec::with_capacity(23 + record.len()); out.push(DATA_UPDATE_COMMAND); out.extend_from_slice(&1u16.to_le_bytes()); // version out.extend_from_slice(&1i32.to_le_bytes()); // record_count = 1 out.extend_from_slice(&operation_id); out.extend_from_slice(&record); out } // ---- main -------------------------------------------------------------- fn main() { println!("F38 — mxaccess-codec allocation-count baseline"); println!("Counting allocator: thin GlobalAlloc wrapper around System."); println!("R12 target: < 5 allocations per write at steady state."); let rows = vec![ bench_write_int32(), bench_write_float(), bench_write_double(), bench_write_bool(), bench_write_string(), bench_write_int32_bytes_mut(), bench_write_int32_into_pooled(), bench_write_bool_into_pooled(), bench_handle_from_names(), bench_subscription_decode(), ]; print_table(&rows); // R12 gate: emit a non-zero exit code if any encode-write scenario // exceeds the 5-allocs threshold. Decoders are reported but not // gated (the sweep below explicitly excludes them). let mut violations = 0; for row in &rows { if row.name.starts_with("write_message::encode") && row.allocs_per_op >= 5.0 { eprintln!( "R12 violation: {} allocates {:.2}/op (>= 5)", row.name, row.allocs_per_op ); violations += 1; } } if violations > 0 { std::process::exit(1); } }