From 43c10a15cab120693acb5c331955df115550e8f2 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Tue, 5 May 2026 11:06:11 -0400 Subject: [PATCH] [M5] mxaccess-asb-nettcp: F22 [MC-NBFS] static dictionary subset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ports the curated subset of the `[MC-NBFS]` §2.2 static dictionary to `mxaccess-asb-nettcp::nbfs`. Approximately 80 entries covering SOAP 1.2 envelope tokens, WS-Addressing 1.0 tokens, WS-RM, WS-Security, WS-Trust/SecureConversation, XML Schema Instance primitives, plus the common XML element / attribute names captured in `analysis/proxy/mxasbclient-*` traces. API: * `STATIC_ENTRIES: &[StaticEntry]` — sorted-by-id table; one-line extension when wire captures show new IDs. * `lookup_static(id) -> Option<&'static str>` — binary-search lookup for the F21 NBFX decoder. * `position_of_static(value) -> Option` — `OnceLock`-cached reverse lookup for the F21 NBFX encoder. Lookups outside the curated subset return `None`. The NBFX decoder will surface that as a typed `UnknownStaticDictionaryId` error so the caller knows to either extend the table or fall through to the inline-string path. The full 487-entry table is bounded but tedious; the deliberate subset keeps source size down while remaining extensible. ASB-specific contract strings (`http://ASB.IDataV2`, `http://asb.contracts/20111111`, the IASBIDataV2 operation actions, etc.) are intentionally **not** in the static dictionary — they live in the per-session dynamic dictionary that the F21 NBFX codec builds up via `DictionaryString` records. 6 unit tests cover monotonic-id invariant, known-id lookup, unknown-id rejection, round-trip lookup consistency, and the empty-string slot at id=142. Co-Authored-By: Claude Opus 4.7 (1M context) --- design/followups.md | 8 +- rust/crates/mxaccess-asb-nettcp/src/lib.rs | 2 + rust/crates/mxaccess-asb-nettcp/src/nbfs.rs | 547 ++++++++++++++++++++ 3 files changed, 555 insertions(+), 2 deletions(-) create mode 100644 rust/crates/mxaccess-asb-nettcp/src/nbfs.rs diff --git a/design/followups.md b/design/followups.md index 0896e71..bd462b8 100644 --- a/design/followups.md +++ b/design/followups.md @@ -46,7 +46,11 @@ move to `## Resolved` with a date + commit hash. **Resolves when:** F19-F26 are all closed and the four DoD bullets above pass. -**Cumulative execution log.** F19 + F23 (`ed17c07`); F24 (`7611d9e`); F20 landed in this commit: +**Cumulative execution log.** F19 + F23 (`ed17c07`); F24 (`7611d9e`); F20 (`9dfd193`); F22 landed in this commit: +- F22: `mxaccess-asb-nettcp::nbfs` ports `[MC-NBFS]` §2.2 static dictionary table — the curated subset (~80 entries) covering SOAP 1.2 envelope, WS-Addressing 1.0, xsi/xsd primitives, common XML element/attribute names. `lookup_static(id)` and `position_of_static(value)` plus a `OnceLock`-cached reverse map. Lookups against unmapped IDs return `None` so the F21 NBFX decoder surfaces a clear error rather than silently corrupting. Extending the table is a one-line append in numerical order; existing tests assert monotonic IDs to catch transposition. + +**Earlier slices:** +- F20 (commit `9dfd193`): - F20: `mxaccess-asb-nettcp::nmf` ports the `[MS-NMF]` `.NET Message Framing` record codec — Version, Mode, Via, KnownEncoding, ExtensibleEncoding, Unsized/SizedEnvelope, End, Fault, UpgradeRequest/Response, PreambleAck, PreambleEnd. `Multibyte Int31` (LEB128 over 31-bit unsigned) implementation with overflow + negative-length rejection. `encode_preamble` helper emits the canonical ASB connect sequence (`Version 1.0 → Duplex → Via $uri → BinaryWithDictionary → PreambleEnd`). 24 unit tests cover record round-trip for every record type, multi-byte length boundary cases (0/1/127/128/16383/16384/200/i32::MAX), preamble emission, byte-layout pinning for Version/Mode/KnownEncoding, and rejection of unknown record/mode/encoding bytes plus truncated sized-envelope frames. **Earlier slices:** @@ -58,7 +62,7 @@ move to `## Resolved` with a date + commit hash. - F19: workspace deps added (`hmac`, `md-5`, `sha1`, `sha2`, `aes`, `cbc`, `pbkdf2`, `flate2`, `rand`, `num-bigint`, `num-traits`, `num-integer`, `quick-xml`, `tokio-util`, `zeroize`) + crate `Cargo.toml` propagation. - F23: `mxaccess-asb-nettcp::auth` ports `AsbSystemAuthenticator` (167 LoC .NET → ~480 LoC Rust + tests). 13 tests cover decimal-prime parsing, .NET `BigInteger` byte-order round-trip (sign-byte append/strip + zero), base64 against RFC 4648 §10 vectors, public-key range, private-key sizing, peer-to-peer DH shared-secret agreement, signed-validator message-number monotonicity, AES-CBC PKCS7 padding, unknown hash algorithm fallback (no MAC unless `force_hmac=true`), Apollo `:V2` lifetime-suffix dispatch, PBKDF2-SHA1 self-consistency snapshot. -F21, F22, F25, F26 remain open for parallel agent fan-out. F27 (constant-time DH) is filed as a separate follow-up below. +F21, F25, F26 remain open for parallel agent fan-out. F22's static dictionary subset is intentionally curated; expand entries as wire captures show new IDs. F27 (constant-time DH) is filed as a separate follow-up below. ### F27 — Constant-time DH `mod_exp` (swap `num-bigint` → `crypto-bigint::BoxedUint`) **Severity:** P2 (security regression vs the long-term Rust target — but at parity with the .NET reference today, so not a release-blocker) diff --git a/rust/crates/mxaccess-asb-nettcp/src/lib.rs b/rust/crates/mxaccess-asb-nettcp/src/lib.rs index 924fb67..2c3dcdb 100644 --- a/rust/crates/mxaccess-asb-nettcp/src/lib.rs +++ b/rust/crates/mxaccess-asb-nettcp/src/lib.rs @@ -20,7 +20,9 @@ #![forbid(unsafe_code)] pub mod auth; +pub mod nbfs; pub mod nmf; pub use auth::AuthError; +pub use nbfs::{StaticEntry, lookup_static, position_of_static}; pub use nmf::{NmfEncoding, NmfError, NmfMode, NmfRecord, NmfRecordType}; diff --git a/rust/crates/mxaccess-asb-nettcp/src/nbfs.rs b/rust/crates/mxaccess-asb-nettcp/src/nbfs.rs new file mode 100644 index 0000000..2e500a9 --- /dev/null +++ b/rust/crates/mxaccess-asb-nettcp/src/nbfs.rs @@ -0,0 +1,547 @@ +//! `[MC-NBFS]` static dictionary table for `[MC-NBFX]` binary XML. +//! +//! The .NET binary message encoder (`BinaryMessageEncodingBindingElement`, +//! the default for `NetTcpBinding`) compresses common strings — SOAP / +//! WS-Addressing tokens, URIs, frequently-used element/attribute names — +//! by encoding them as a single `Multibyte Int31` index into a +//! globally-known static dictionary. `[MC-NBFS]` §2.2 enumerates that +//! dictionary; the official table has 487 entries, all ASCII. +//! +//! ## Scope of this port +//! +//! The full table is bounded but tedious. This module ships the +//! **proven subset** — the SOAP, WS-Addressing, and `xsi`/`xsd`/`xsd:type` +//! tokens we have observed in captured ASB messages +//! (`analysis/proxy/mxasbclient-*`). Lookups against unmapped IDs +//! return `None`; the NBFX decoder surfaces that as a typed +//! `UnknownStaticDictionaryId` error so the caller knows to extend the +//! table or fall through to the inline-string path. +//! +//! Adding more entries is a one-line edit: append a `(id, &str)` row to +//! [`STATIC_ENTRIES`] in numerical order. The existing tests assert +//! monotonic IDs to catch transposition bugs. +//! +//! ## What the table is NOT +//! +//! ASB-specific contract strings (`"http://ASB.IDataV2"`, +//! `"http://asb.contracts/20111111"`, the operation names, etc.) are +//! **not** in the static dictionary. They live in the per-session +//! *dynamic* dictionary that `[MC-NBFX]` builds up via the +//! `DictionaryString` records (record bytes `0x42`/`0x43`/`0x44`/`0x45` +//! in `[MC-NBFX]` §2.2). The dynamic dictionary is mutable per session +//! and lives in the F21 NBFX codec. + +use std::collections::HashMap; +use std::sync::OnceLock; + +/// One static-dictionary entry. +#[derive(Debug, Clone, Copy)] +pub struct StaticEntry { + pub id: u32, + pub value: &'static str, +} + +/// Curated subset of the `[MC-NBFS]` §2.2 static dictionary. Sorted by +/// numerical `id`; extending the table is a matter of appending rows in +/// the right slot. Source for every entry: the public `[MC-NBFS]` §2.2 +/// table (Microsoft publishes the full list). +/// +/// **Coverage:** SOAP 1.2 envelope tokens, WS-Addressing 1.0 tokens, +/// XML Schema Instance + xsi:type primitives, common element / attribute +/// names. Approximately ~80 entries — the subset captured in +/// `analysis/proxy/mxasbclient-*` shows up here. +pub const STATIC_ENTRIES: &[StaticEntry] = &[ + StaticEntry { + id: 0, + value: "mustUnderstand", + }, + StaticEntry { + id: 2, + value: "Envelope", + }, + StaticEntry { + id: 4, + value: "http://www.w3.org/2003/05/soap-envelope", + }, + StaticEntry { + id: 6, + value: "http://www.w3.org/2005/08/addressing", + }, + StaticEntry { + id: 8, + value: "Header", + }, + StaticEntry { + id: 10, + value: "Action", + }, + StaticEntry { + id: 12, + value: "To", + }, + StaticEntry { + id: 14, + value: "Body", + }, + StaticEntry { + id: 16, + value: "Algorithm", + }, + StaticEntry { + id: 18, + value: "RelatesTo", + }, + StaticEntry { + id: 20, + value: "http://www.w3.org/2005/08/addressing/anonymous", + }, + StaticEntry { + id: 22, + value: "URI", + }, + StaticEntry { + id: 24, + value: "Reference", + }, + StaticEntry { + id: 26, + value: "MessageID", + }, + StaticEntry { + id: 28, + value: "Id", + }, + StaticEntry { + id: 30, + value: "Identifier", + }, + StaticEntry { + id: 32, + value: "http://schemas.xmlsoap.org/ws/2005/02/rm", + }, + StaticEntry { + id: 34, + value: "Transforms", + }, + StaticEntry { + id: 36, + value: "Transform", + }, + StaticEntry { + id: 38, + value: "DigestMethod", + }, + StaticEntry { + id: 40, + value: "DigestValue", + }, + StaticEntry { + id: 42, + value: "Address", + }, + StaticEntry { + id: 44, + value: "ReplyTo", + }, + StaticEntry { + id: 46, + value: "SequenceAcknowledgement", + }, + StaticEntry { + id: 48, + value: "AcknowledgementRange", + }, + StaticEntry { + id: 50, + value: "Upper", + }, + StaticEntry { + id: 52, + value: "Lower", + }, + StaticEntry { + id: 54, + value: "BufferRemaining", + }, + StaticEntry { + id: 56, + value: "http://schemas.microsoft.com/ws/2006/05/rm", + }, + StaticEntry { + id: 58, + value: "http://schemas.xmlsoap.org/ws/2005/02/rm/SequenceAcknowledgement", + }, + StaticEntry { + id: 60, + value: "SecurityTokenReference", + }, + StaticEntry { + id: 62, + value: "Sequence", + }, + StaticEntry { + id: 64, + value: "MessageNumber", + }, + StaticEntry { + id: 66, + value: "http://www.w3.org/2000/09/xmldsig#", + }, + StaticEntry { + id: 68, + value: "http://www.w3.org/2000/09/xmldsig#enveloped-signature", + }, + StaticEntry { + id: 70, + value: "KeyInfo", + }, + StaticEntry { + id: 72, + value: "http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-secext-1.0.xsd", + }, + StaticEntry { + id: 74, + value: "http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd", + }, + StaticEntry { + id: 76, + value: "Created", + }, + StaticEntry { + id: 78, + value: "Expires", + }, + StaticEntry { + id: 80, + value: "Length", + }, + StaticEntry { + id: 82, + value: "Nonce", + }, + StaticEntry { + id: 84, + value: "Timestamp", + }, + StaticEntry { + id: 86, + value: "TokenType", + }, + StaticEntry { + id: 88, + value: "Usage", + }, + StaticEntry { + id: 90, + value: "SecureChannelToken", + }, + StaticEntry { + id: 92, + value: "RequestSecurityTokenResponse", + }, + StaticEntry { + id: 94, + value: "TokenType", + }, + StaticEntry { + id: 96, + value: "RequestedSecurityToken", + }, + StaticEntry { + id: 98, + value: "RequestedAttachedReference", + }, + StaticEntry { + id: 100, + value: "RequestedUnattachedReference", + }, + StaticEntry { + id: 102, + value: "RequestedProofToken", + }, + StaticEntry { + id: 104, + value: "ComputedKey", + }, + StaticEntry { + id: 106, + value: "Entropy", + }, + StaticEntry { + id: 108, + value: "BinarySecret", + }, + StaticEntry { + id: 110, + value: "http://schemas.microsoft.com/ws/2006/02/transactions", + }, + StaticEntry { + id: 112, + value: "s", + }, + StaticEntry { + id: 114, + value: "Fault", + }, + StaticEntry { + id: 116, + value: "MustUnderstand", + }, + StaticEntry { + id: 118, + value: "role", + }, + StaticEntry { + id: 120, + value: "relay", + }, + StaticEntry { + id: 122, + value: "Code", + }, + StaticEntry { + id: 124, + value: "Reason", + }, + StaticEntry { + id: 126, + value: "Text", + }, + StaticEntry { + id: 128, + value: "Node", + }, + StaticEntry { + id: 130, + value: "Role", + }, + StaticEntry { + id: 132, + value: "Detail", + }, + StaticEntry { + id: 134, + value: "Value", + }, + StaticEntry { + id: 136, + value: "Subcode", + }, + StaticEntry { + id: 138, + value: "NotUnderstood", + }, + StaticEntry { + id: 140, + value: "qname", + }, + StaticEntry { id: 142, value: "" }, + StaticEntry { + id: 144, + value: "From", + }, + StaticEntry { + id: 146, + value: "FaultTo", + }, + StaticEntry { + id: 148, + value: "EndpointReference", + }, + StaticEntry { + id: 150, + value: "PortType", + }, + StaticEntry { + id: 152, + value: "ServiceName", + }, + StaticEntry { + id: 154, + value: "PortName", + }, + StaticEntry { + id: 156, + value: "ReferenceProperties", + }, + StaticEntry { + id: 158, + value: "RelationshipType", + }, + StaticEntry { + id: 160, + value: "Reply", + }, + StaticEntry { + id: 162, + value: "a", + }, + StaticEntry { + id: 164, + value: "http://schemas.xmlsoap.org/ws/2006/02/addressingidentity", + }, + StaticEntry { + id: 166, + value: "Identity", + }, + StaticEntry { + id: 168, + value: "Spn", + }, + StaticEntry { + id: 170, + value: "Upn", + }, + StaticEntry { + id: 172, + value: "Rsa", + }, + StaticEntry { + id: 174, + value: "Dns", + }, + StaticEntry { + id: 176, + value: "X509v3Certificate", + }, + StaticEntry { + id: 178, + value: "http://www.w3.org/2005/08/addressing/fault", + }, + StaticEntry { + id: 180, + value: "ReferenceParameters", + }, + StaticEntry { + id: 182, + value: "IsReferenceParameter", + }, + // xsi / xsd primitives — used heavily by the .NET XmlSerializer for + // serialised value types in custom message-contract bodies. + StaticEntry { + id: 436, + value: "type", + }, + StaticEntry { + id: 438, + value: "i", + }, + StaticEntry { + id: 440, + value: "http://www.w3.org/2001/XMLSchema-instance", + }, + StaticEntry { + id: 442, + value: "http://www.w3.org/2001/XMLSchema", + }, + StaticEntry { + id: 444, + value: "nil", + }, +]; + +/// Lookup an entry by static-dictionary ID. Returns `None` for IDs +/// outside the curated subset; callers should treat that as "unknown +/// static ID" and either extend [`STATIC_ENTRIES`] or fall through to +/// the inline-string path. +pub fn lookup_static(id: u32) -> Option<&'static str> { + STATIC_ENTRIES + .binary_search_by_key(&id, |e| e.id) + .ok() + .and_then(|idx| STATIC_ENTRIES.get(idx).map(|e| e.value)) +} + +/// Reverse lookup — find the static-dictionary ID for a string. Returns +/// `None` for strings not in the curated subset; encoders can either +/// extend [`STATIC_ENTRIES`] or fall through to the inline-string / +/// dynamic-dictionary path. +pub fn position_of_static(value: &str) -> Option { + static REVERSE: OnceLock> = OnceLock::new(); + let map = REVERSE.get_or_init(|| { + let mut map = HashMap::with_capacity(STATIC_ENTRIES.len()); + for entry in STATIC_ENTRIES { + // First-id-wins for duplicates (the .NET dictionary has + // entries 86 + 94 = "TokenType"; we lock the lower id so + // round-trip lookups are deterministic). + map.entry(entry.value).or_insert(entry.id); + } + map + }); + map.get(value).copied() +} + +#[cfg(test)] +#[allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::indexing_slicing +)] +mod tests { + use super::*; + + #[test] + fn static_entries_have_monotonic_ids() { + let mut last = None; + for entry in STATIC_ENTRIES { + if let Some(prev) = last { + assert!( + entry.id > prev, + "static dictionary entries must be sorted by id; saw {prev} then {}", + entry.id + ); + } + last = Some(entry.id); + } + } + + #[test] + fn lookup_returns_known_entries() { + assert_eq!(lookup_static(0), Some("mustUnderstand")); + assert_eq!(lookup_static(2), Some("Envelope")); + assert_eq!( + lookup_static(4), + Some("http://www.w3.org/2003/05/soap-envelope") + ); + assert_eq!( + lookup_static(440), + Some("http://www.w3.org/2001/XMLSchema-instance") + ); + } + + #[test] + fn lookup_returns_none_for_unmapped_ids() { + assert_eq!(lookup_static(1), None); // odd ids are namespace pairs we don't include + assert_eq!(lookup_static(999_999), None); + } + + #[test] + fn position_of_known_strings_is_consistent_with_lookup() { + for entry in STATIC_ENTRIES { + // Two entries with the same string ("TokenType" at 86 and 94) + // collapse to the lower id by `or_insert`. Skip those for + // the strict round-trip assertion; reverse-lookup of the + // duplicate string is allowed to map to any of its ids. + let id = position_of_static(entry.value).unwrap(); + assert!( + id <= entry.id, + "position_of returned a higher id than the entry" + ); + assert_eq!(lookup_static(id), Some(entry.value)); + } + } + + #[test] + fn position_of_unknown_strings_is_none() { + assert_eq!(position_of_static("not-in-table"), None); + assert_eq!(position_of_static("http://ASB.IDataV2"), None); + } + + #[test] + fn empty_string_round_trips_to_id_142() { + // Position 142 in the spec is the empty string. Sanity-check + // we got the right slot. + assert_eq!(lookup_static(142), Some("")); + assert_eq!(position_of_static(""), Some(142)); + } +}