From a0fa5bedfdd944760a7500c01044337484470313 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Wed, 6 May 2026 22:50:07 -0400 Subject: [PATCH] [F52.2] mxaccess-codec: thread-local name-signature cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a thread-local `HashMap` cache inside `compute_name_signature`. Repeated calls with the same name (the hot path inside `MxReferenceHandle::from_names`) skip the `to_lowercase` allocation and the CRC-16/IBM walk entirely. Bounded at 1024 entries per thread; on overflow the cache is cleared rather than evicted LRU — any sane workload re-fills only the names it actively uses. `MxReferenceHandle::from_names` drops from 2 → 0 allocs/op once warm (bench delta in `design/M6-bench-baseline.md` § F52.2). Cold-path behaviour is unchanged: first call with a new name still pays the `to_lowercase` + cache-key `String` allocations. Two new tests pin the cache: cache-hit returns the same value as cold-compute, and cache overflow doesn't break correctness. Co-Authored-By: Claude Opus 4.7 (1M context) --- design/M6-bench-baseline.md | 19 ++++- design/followups.md | 2 +- .../mxaccess-codec/src/reference_handle.rs | 78 ++++++++++++++++++- 3 files changed, 96 insertions(+), 3 deletions(-) diff --git a/design/M6-bench-baseline.md b/design/M6-bench-baseline.md index cc95747..72b947f 100644 --- a/design/M6-bench-baseline.md +++ b/design/M6-bench-baseline.md @@ -23,7 +23,7 @@ The bench gates on this: any `write_message::encode` scenario at | `write_message::encode` (Boolean) | 10,000 | 1.00 | 37 | 1.00 | | `write_message::encode` (String, 5 chars) | 10,000 | 4.00 | 92 | 4.00 | | `write_message::encode_to_bytes_mut` (Int32) | 10,000 | 2.00 | 44 | 2.00 | -| `MxReferenceHandle::from_names` | 10,000 | 2.00 | 22 | 2.00 | +| `MxReferenceHandle::from_names` (F52.2) | 10,000 | 0.00 | 0 | 0.00 | | `NmxSubscriptionMessage::parse_inner` | 10,000 | 1.00 | 72 | 1.00 | | (DataUpdate, Int32) | | | | | @@ -82,6 +82,23 @@ to fill a pre-sized `&mut [u8]` rather than each allocating their own `*_body_size` helpers and resizes the destination buffer (Vec or BytesMut) once. This is also the prerequisite refactor for F52.3. +### F52.2 — Per-handle name-signature cache + +Adds a thread-local `HashMap` cache inside +`compute_name_signature`. Repeated calls with the same name (the hot +path inside `MxReferenceHandle::from_names` when handles are +constructed many times) skip the `to_lowercase` allocation entirely. +Capped at 1024 entries; on overflow the thread's cache is cleared. + +| scenario | before (allocs/op) | after (allocs/op) | +|-----------------------------------|-------------------:|------------------:| +| `MxReferenceHandle::from_names` | 2.00 | 0.00 | + +Cold-path (first call with a new name) still pays the +`to_lowercase` + cache-key `String` allocations — the cache only helps +on repeats. The 1k-iter warmup in the F38 harness is enough to prime +the cache, so the measurement loop sees pure cache hits. + ## Reproducing ```powershell diff --git a/design/followups.md b/design/followups.md index a7af885..1e88cad 100644 --- a/design/followups.md +++ b/design/followups.md @@ -65,7 +65,7 @@ Array tags (`TestIntArray`, `TestBoolArray`, etc.) read live as `type_id=0 lengt **Scope.** Three independent codec tightenings, each measurable via the F38 bench harness: 1. **`bytes::BytesMut` output buffer** on the encoder side. Doesn't reduce alloc count but enables downstream zero-copy splits when the consumer wants to send the encoded body without copying. ✅ Landed 2026-05-06 — `write_message::encode_to_bytes_mut` (and `encode_timestamped_to_bytes_mut`); body builders refactored to fill a pre-sized `&mut [u8]`. Bench delta in `design/M6-bench-baseline.md` § F52.1. -2. **Per-handle name-signature cache** in `MxReferenceHandle::from_names`. Currently allocates twice (one UTF-16LE conversion per `compute_name_signature` call); cache by `(name, hasher_state)` to elide both on repeated calls with the same names. +2. **Per-handle name-signature cache** in `MxReferenceHandle::from_names`. Currently allocates twice (one UTF-16LE conversion per `compute_name_signature` call); cache by `(name, hasher_state)` to elide both on repeated calls with the same names. ✅ Landed 2026-05-06 — thread-local `HashMap` keyed by raw name; bounded at 1024 entries. `MxReferenceHandle::from_names` drops 2 → 0 allocs/op once warm. Bench delta in `design/M6-bench-baseline.md` § F52.2. 3. **Session-level scratch pool** for the per-write encode buffer. Drops the per-write count from 2 → 1 by amortising the output buffer allocation across a session's writes. **Definition of done:** diff --git a/rust/crates/mxaccess-codec/src/reference_handle.rs b/rust/crates/mxaccess-codec/src/reference_handle.rs index 61e0db3..2845117 100644 --- a/rust/crates/mxaccess-codec/src/reference_handle.rs +++ b/rust/crates/mxaccess-codec/src/reference_handle.rs @@ -10,6 +10,9 @@ // `.get(n)?` would obscure the byte map. #![allow(clippy::indexing_slicing)] +use std::cell::RefCell; +use std::collections::HashMap; + use crate::error::CodecError; const CRC16_IBM_POLYNOMIAL: u16 = 0xa001; @@ -191,6 +194,13 @@ impl MxReferenceHandle { /// mappings (e.g. Turkish dotless-i) may diverge — see /// `design/10-raw-layer.md` L37 for the path forward via `icu_casemap`. /// +/// **Caching**: Results are memoised in a thread-local +/// [`HashMap`]<[`String`], `u16`> so repeated calls with the same name (the +/// hot path inside [`MxReferenceHandle::from_names`] when the same handles +/// are constructed many times) skip the UTF-16LE conversion and CRC walk. +/// The cache is bounded ([`SIGNATURE_CACHE_CAP`] entries); on overflow the +/// thread's cache is cleared. (F52.2 from `design/M6-bench-baseline.md`.) +/// /// # Errors /// /// Returns [`CodecError::InvalidName`] if `name` is empty or whitespace-only. @@ -198,6 +208,35 @@ pub fn compute_name_signature(name: &str) -> Result { if name.trim().is_empty() { return Err(CodecError::InvalidName); } + + // Fast path: thread-local cache lookup. Repeated calls with the same name + // skip the `to_lowercase` allocation entirely. + if let Some(cached) = SIGNATURE_CACHE.with(|c| c.borrow().get(name).copied()) { + return Ok(cached); + } + + let signature = compute_name_signature_uncached(name); + SIGNATURE_CACHE.with(|c| { + let mut cache = c.borrow_mut(); + if cache.len() >= SIGNATURE_CACHE_CAP { + cache.clear(); + } + cache.insert(name.to_string(), signature); + }); + Ok(signature) +} + +/// Soft cap on the per-thread name → signature cache. Keeps memory bounded +/// when a workload churns through unique names (e.g. dynamic discovery). On +/// overflow the cache is cleared rather than evicted LRU — any sane workload +/// re-fills only the names it actively uses. +pub const SIGNATURE_CACHE_CAP: usize = 1024; + +thread_local! { + static SIGNATURE_CACHE: RefCell> = RefCell::new(HashMap::new()); +} + +fn compute_name_signature_uncached(name: &str) -> u16 { let lower = name.to_lowercase(); let mut crc: u16 = 0; for ch in lower.chars() { @@ -212,7 +251,16 @@ pub fn compute_name_signature(name: &str) -> Result { crc = update_crc16_ibm(crc, (*unit >> 8) as u8); } } - Ok(crc) + crc +} + +/// Clear the current thread's name → signature cache. Used by tests that +/// want to measure cold-path behaviour; not exposed publicly because the +/// cache is otherwise transparent to callers. +#[cfg(test)] +#[allow(dead_code)] +pub(crate) fn clear_signature_cache_for_tests() { + SIGNATURE_CACHE.with(|c| c.borrow_mut().clear()); } /// One iteration of the CRC-16/IBM update loop (poly `0xa001`, right-shifted @@ -333,6 +381,34 @@ mod tests { assert_eq!(update_crc16_ibm(0, 0), 0); } + /// F52.2 — the thread-local cache must return the same value for cold + /// (cache-miss) and hot (cache-hit) calls. Walking the cache twice with + /// the same name should be a no-op as far as the result goes. + #[test] + fn signature_cache_hit_matches_cold_compute() { + clear_signature_cache_for_tests(); + let cold = compute_name_signature("TestObject").unwrap(); + // Second call should hit the cache. + let hot = compute_name_signature("TestObject").unwrap(); + assert_eq!(cold, hot); + // And match the well-known dotnet-parity vector. + assert_eq!(cold, 0x0B25); + } + + #[test] + fn signature_cache_overflow_clears() { + clear_signature_cache_for_tests(); + // Exceed the cap by one to trigger a clear. + for i in 0..=SIGNATURE_CACHE_CAP { + let name = format!("Tag{i}"); + compute_name_signature(&name).unwrap(); + } + // After overflow, recompute against a known vector should still + // produce the right value (cache hit-or-miss, doesn't matter — the + // returned u16 is what we assert on). + assert_eq!(compute_name_signature("TestObject").unwrap(), 0x0B25); + } + #[test] fn round_trip_zero_handle() { let handle = MxReferenceHandle::default();