//! `[MC-NBFX]` `.NET Binary XML Format` token codec. //! //! `[MC-NBFX]` §2.2 defines a record-based binary XML serialisation. //! Element names, attribute names, and namespace strings can be carried //! either inline (UTF-8 length-prefixed strings) or by reference into //! the [`crate::nbfs`] static dictionary or a per-session dynamic //! dictionary that the codec builds up. //! //! ## Scope of this port //! //! This module ships the **proven subset** of the spec — the records that //! `analysis/proxy/mxasbclient-*` traces show on-the-wire for the ASB //! `IASBIDataV2` operations: //! //! * Element / EndElement (`0x40` ShortElement, `0x41` Element with //! prefix string, `0x42` ShortDictionaryElement, `0x43` //! DictionaryElement). The `0x5E-0x77` PrefixElement\_a..z family //! (built-in single-letter prefixes) is **not yet ported** — encode //! prefixed elements as `Element { prefix, name }` and the codec //! emits the long form (`0x41`). //! * Attribute (`0x04` ShortAttribute, `0x05` Attribute, `0x06` //! ShortDictionaryAttribute, `0x07` DictionaryAttribute) plus //! xmlns variants (`0x08`/`0x09`/`0x0A`/`0x0B`). //! * Text records: `0x80/0x81` Zero, `0x82/0x83` One, `0x84/0x85` //! False, `0x86/0x87` True, `0x88/0x89` Int8, `0x8A/0x8B` Int16, //! `0x8C/0x8D` Int32, `0x8E/0x8F` Int64, `0x98/0x99` Chars8, //! `0x9A/0x9B` Chars16, `0x9C/0x9D` Chars32, `0xA8/0xA9` EmptyText, //! `0xAA/0xAB` DictionaryText, `0xB4/0xB5` BoolText. //! * `0x00` EndElement (the explicit form, for elements not closed by //! a `*WithEndElement` text variant). //! //! Each text record has a `*WithEndElement` form whose record byte is //! `+1` (e.g. `0x99` = `Chars8TextWithEndElement`). Both variants are //! supported. //! //! Records left for a follow-up: `Decimal`, `UniqueId` (GUID), `TimeSpan`, //! `Float`/`Double` text, `DateTime` text, `Bytes8/16/32`, `QNameDictionary`, //! the `0x0C-0x25` and `0x26-0x3F` prefix-attribute families, and the //! `0x44-0x77` prefix-element families. These are observable in some //! WCF traffic but not currently exercised by ASB on the proven path. //! //! ## What lives where //! //! * Static-dictionary lookup — [`crate::nbfs`] (separate F22 module). //! * Dynamic-dictionary state — [`DynamicDictionary`] in this module; //! the encoder/decoder threads it through every call. //! * Higher-level SOAP envelope construction — left to the F25 ASB //! client crate (`mxaccess-asb`). This codec is the byte-shovelling //! layer. use std::collections::HashMap; use thiserror::Error; use crate::nbfs; use crate::nmf::{decode_multibyte_int31, encode_multibyte_int31}; /// Per-session dynamic dictionary. WCF builds it up as elements/attributes /// are encountered: the first time a string is seen, it gets added with /// a fresh ID; subsequent occurrences reference the ID. IDs always start /// at `0` and increment by 1 (distinct from the static-dictionary IDs in /// `[MC-NBFS]` which are even-only by spec convention). #[derive(Debug, Default, Clone)] pub struct DynamicDictionary { forward: Vec, reverse: HashMap, } impl DynamicDictionary { pub fn new() -> Self { Self::default() } /// Insert `value` if absent; return its ID. Existing entries are /// idempotent. pub fn intern(&mut self, value: &str) -> u32 { if let Some(&id) = self.reverse.get(value) { return id; } let id = self.forward.len() as u32; self.forward.push(value.to_string()); self.reverse.insert(value.to_string(), id); id } pub fn lookup(&self, id: u32) -> Option<&str> { self.forward.get(id as usize).map(String::as_str) } pub fn position_of(&self, value: &str) -> Option { self.reverse.get(value).copied() } pub fn len(&self) -> usize { self.forward.len() } pub fn is_empty(&self) -> bool { self.forward.is_empty() } } /// Token-level NBFX events. Encode/decode operate on streams of these. #[derive(Debug, Clone, PartialEq)] pub enum NbfxToken { /// Open element. `prefix=None` is the un-prefixed form (`0x40`/`0x42`); /// `prefix=Some("a")` produces the long `0x41`/`0x43` form (the /// short single-letter family `0x5E-0x77` is a future optimisation). Element { prefix: Option, name: NbfxName, }, /// Explicit `0x00` end-element record. `*WithEndElement` text records /// imply this; emit `EndElement` only when the element is empty or /// closed without trailing text. EndElement, /// Attribute on the currently-open element. xmlns and dict variants /// are separate cases below. Attribute { prefix: Option, name: NbfxName, value: NbfxText, }, /// `xmlns="..."` (no prefix) — record `0x08`. DefaultNamespace { value: NbfxText }, /// `xmlns:prefix="..."` — record `0x09`. NamespaceDeclaration { prefix: String, value: NbfxText }, /// Standalone text content between an Element open and its EndElement /// (or a `*WithEndElement` text variant which closes the element /// inline). Text(NbfxText), } /// Element / attribute name reference. Inline carries a UTF-8 string; /// `Static` references the `[MC-NBFS]` table; `Dynamic` references the /// per-session [`DynamicDictionary`]. #[derive(Debug, Clone, PartialEq)] pub enum NbfxName { Inline(String), Static(u32), Dynamic(u32), } /// Text-record payload. The `with_end_element` flag toggles the /// `*WithEndElement` variant on encode; decoded text records record /// the inline EndElement implicitly by emitting an [`NbfxToken::EndElement`] /// after the Text token. (i.e. consumers see the same token stream /// regardless of whether the wire used the inline form.) #[derive(Debug, Clone, PartialEq)] pub enum NbfxText { Empty, Zero, One, Bool(bool), Int8(i8), Int16(i16), Int32(i32), Int64(i64), /// UTF-8 chars (length-prefixed, three width variants on the wire). Chars(String), /// Static-dictionary reference (`0xAA` DictionaryText). Decoders /// resolve this to the underlying string when their consumer asks /// for the text via [`Self::resolve`]. DictionaryStatic(u32), /// Dynamic-dictionary reference. Same record byte (`0xAA`) — the /// codec disambiguates by which dictionary owns the ID. Encoders /// pick `Static` when [`crate::nbfs::lookup_static`] succeeds and /// fall back to `Dynamic` otherwise. DictionaryDynamic(u32), /// Raw bytes (records `0x9E` Bytes8 / `0xA0` Bytes16 / `0xA2` /// Bytes32 — width chosen automatically by length on encode). Used /// by `XmlDictionaryWriter.WriteBase64` for the `ASBIData` /// content of `IAsbCustomSerializableType`-decorated fields. Bytes(Vec), /// 16-byte UUID (record `0xAC` UniqueIdText). WCF emits `` /// values via this record, with the 16 raw UUID bytes (NOT the /// `urn:uuid:...` text form). UniqueId([u8; 16]), } impl NbfxText { /// Resolve any dictionary reference to a concrete string. Returns /// `None` if the resolution targets an unmapped ID. pub fn resolve<'a>(&'a self, dynamic: &'a DynamicDictionary) -> Option { match self { Self::Empty => Some(String::new()), Self::Zero => Some("0".to_string()), Self::One => Some("1".to_string()), Self::Bool(true) => Some("true".to_string()), Self::Bool(false) => Some("false".to_string()), Self::Int8(v) => Some(v.to_string()), Self::Int16(v) => Some(v.to_string()), Self::Int32(v) => Some(v.to_string()), Self::Int64(v) => Some(v.to_string()), Self::Chars(s) => Some(s.clone()), Self::DictionaryStatic(id) => nbfs::lookup_static(*id).map(String::from), Self::DictionaryDynamic(id) => dynamic.lookup(*id).map(String::from), // Raw bytes have no canonical text representation; .NET's // `XmlDictionaryReader.ReadElementContentAsBase64` returns // them as `byte[]`. Consumers should match on the variant. Self::Bytes(_) => None, // UniqueId surfaces as the .NET `Guid.ToString("D")` form // (mixed-endian per [MS-DTYP]). Used for ``. Self::UniqueId(bytes) => Some(format_uuid_dotnet_style(bytes)), } } } /// Format a 16-byte UUID using .NET's `Guid.ToString("D")` mixed-endian /// convention (first 4 bytes little-endian, next 2x2 little-endian, /// last 2+6 big-endian). This is the same format `` uses /// when emitted as text. fn format_uuid_dotnet_style(bytes: &[u8; 16]) -> String { let d1 = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); let d2 = u16::from_le_bytes([bytes[4], bytes[5]]); let d3 = u16::from_le_bytes([bytes[6], bytes[7]]); format!( "{d1:08x}-{d2:04x}-{d3:04x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}", bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15] ) } #[derive(Debug, Error)] #[non_exhaustive] pub enum NbfxError { #[error("truncated frame at {stage}: need {need} bytes, have {have}")] Truncated { need: usize, have: usize, stage: &'static str, }, #[error("unknown NBFX record byte 0x{0:02x}")] UnknownRecord(u8), #[error("invalid UTF-8 in NBFX {stage} payload")] InvalidUtf8 { stage: &'static str }, #[error("payload too large: {len} bytes (max {max})")] PayloadTooLarge { len: usize, max: u64 }, #[error("unknown static dictionary id {0}")] UnknownStaticDictionaryId(u32), #[error("unknown dynamic dictionary id {0}")] UnknownDynamicDictionaryId(u32), #[error("expected length non-negative, got {0}")] NegativeLength(i32), #[error("multibyte int overflow")] IntOverflow, } // ---- record byte constants ---------------------------------------------- // // Naming matches `[MC-NBFX]` §2.2 record names. Records ending `WithEnd` // are the `*WithEndElement` variants whose byte is the base record's // byte + 1. /// `[MC-NBFX]` §2.2.3 EndElementRecord. Spec value is **0x01** (NOT /// 0x00 — verified against .NET probe wire capture). Earlier /// iterations had this wrong; round-trip tests passed because encode /// and decode used the same wrong value, but interop with WCF parsers /// failed silently (TCP RST on every envelope). const REC_END_ELEMENT: u8 = 0x01; const REC_SHORT_ATTRIBUTE: u8 = 0x04; const REC_ATTRIBUTE: u8 = 0x05; const REC_SHORT_DICT_ATTRIBUTE: u8 = 0x06; const REC_DICT_ATTRIBUTE: u8 = 0x07; const REC_SHORT_XMLNS_ATTRIBUTE: u8 = 0x08; const REC_XMLNS_ATTRIBUTE: u8 = 0x09; // Reserved for the dictionary-keyed xmlns variants — recognised on the // wire as future work, but not yet emitted or decoded. #[allow(dead_code)] const REC_SHORT_DICT_XMLNS_ATTRIBUTE: u8 = 0x0A; #[allow(dead_code)] const REC_DICT_XMLNS_ATTRIBUTE: u8 = 0x0B; const REC_SHORT_ELEMENT: u8 = 0x40; const REC_ELEMENT: u8 = 0x41; const REC_SHORT_DICT_ELEMENT: u8 = 0x42; const REC_DICT_ELEMENT: u8 = 0x43; const REC_ZERO_TEXT: u8 = 0x80; const REC_ONE_TEXT: u8 = 0x82; const REC_FALSE_TEXT: u8 = 0x84; const REC_TRUE_TEXT: u8 = 0x86; const REC_INT8_TEXT: u8 = 0x88; const REC_INT16_TEXT: u8 = 0x8A; const REC_INT32_TEXT: u8 = 0x8C; const REC_INT64_TEXT: u8 = 0x8E; const REC_CHARS8_TEXT: u8 = 0x98; const REC_CHARS16_TEXT: u8 = 0x9A; const REC_CHARS32_TEXT: u8 = 0x9C; const REC_EMPTY_TEXT: u8 = 0xA8; const REC_BYTES8_TEXT: u8 = 0x9E; const REC_BYTES16_TEXT: u8 = 0xA0; const REC_BYTES32_TEXT: u8 = 0xA2; const REC_DICTIONARY_TEXT: u8 = 0xAA; const REC_UNIQUE_ID_TEXT: u8 = 0xAC; const REC_BOOL_TEXT: u8 = 0xB4; // ---- encoder ------------------------------------------------------------ /// Encode a stream of tokens to an NBFX byte buffer. Threads the dynamic /// dictionary through; new strings get interned automatically when /// encoded as `NbfxName::Inline` and the encoder chooses a static or /// inline form based on `[MC-NBFS]` lookup. pub fn encode_tokens( tokens: &[NbfxToken], dynamic: &mut DynamicDictionary, out: &mut Vec, ) -> Result<(), NbfxError> { // Collapse `Text` immediately followed by `EndElement` into a single // `*WithEndElement` text record where possible. `WCF` emits this // form by default when an element has a single text child, so // matching it is required for byte parity. let mut idx = 0; while let Some(cur) = tokens.get(idx) { let next = tokens.get(idx + 1); let with_end = matches!( (cur, next), (NbfxToken::Text(_), Some(NbfxToken::EndElement)) ); encode_one(cur, dynamic, with_end, out)?; idx += if with_end { 2 } else { 1 }; } Ok(()) } fn encode_one( token: &NbfxToken, dynamic: &mut DynamicDictionary, with_end: bool, out: &mut Vec, ) -> Result<(), NbfxError> { match token { NbfxToken::Element { prefix, name } => encode_element(prefix.as_deref(), name, out), NbfxToken::EndElement => { out.push(REC_END_ELEMENT); Ok(()) } NbfxToken::Attribute { prefix, name, value, } => encode_attribute(prefix.as_deref(), name, value, dynamic, out), NbfxToken::DefaultNamespace { value } => { // Per `[MC-NBFX]` §2.2.3: `ShortXmlnsAttribute` (0x08) // value is a RAW length-prefixed string (same convention // as 0x09 — see comment on NamespaceDeclaration below). match value { NbfxText::Chars(s) => { out.push(REC_SHORT_XMLNS_ATTRIBUTE); encode_string(s.as_bytes(), out)?; Ok(()) } _ => Err(NbfxError::InvalidUtf8 { stage: "default-xmlns-must-be-Chars", }), } } NbfxToken::NamespaceDeclaration { prefix, value } => { // Per `[MC-NBFX]` §2.2.3: `XmlnsAttribute` (0x09) value is // a RAW length-prefixed string — NOT a text record like // regular Attribute values. `DictionaryXmlnsAttribute` // (0x0B) value is a raw multibyte-int31 dict id. Either // form omits the text-record byte and width-tag. // // We pick 0x0B when the value is `DictionaryStatic(id)` // (matches WCF's encoding for SOAP/WS-Addressing names), // and 0x09 + raw-string for `Chars(s)` (matches WCF's // encoding for namespaces not in the static dict — e.g. // xsi/xsd, or operation-specific URIs). match value { NbfxText::DictionaryStatic(id) => { out.push(REC_DICT_XMLNS_ATTRIBUTE); encode_string(prefix.as_bytes(), out)?; encode_multibyte_int31_to_nbfx(out, *id)?; } NbfxText::Chars(s) => { out.push(REC_XMLNS_ATTRIBUTE); encode_string(prefix.as_bytes(), out)?; encode_string(s.as_bytes(), out)?; } _ => { return Err(NbfxError::InvalidUtf8 { stage: "xmlns-value-must-be-Chars-or-DictionaryStatic", }); } } Ok(()) } NbfxToken::Text(text) => encode_text(text, with_end, out), } } /// If `prefix` is a single lowercase ASCII letter (a-z), return its /// alphabet offset (0..26). Otherwise return `None`. WCF emits /// short-form prefix-letter records (PrefixDictionaryElement_a..z = /// 0x44..0x5D, etc.) for these prefixes, and stricter parsers may /// reject the long forms when a short form would suffice. fn prefix_letter_offset(prefix: &str) -> Option { let mut chars = prefix.chars(); let c = chars.next()?; if chars.next().is_some() { return None; } if c.is_ascii_lowercase() { Some(c as u8 - b'a') } else { None } } fn encode_element( prefix: Option<&str>, name: &NbfxName, out: &mut Vec, ) -> Result<(), NbfxError> { match (prefix, name) { (None, NbfxName::Inline(s)) => { out.push(REC_SHORT_ELEMENT); encode_string(s.as_bytes(), out) } (None, NbfxName::Static(id) | NbfxName::Dynamic(id)) => { out.push(REC_SHORT_DICT_ELEMENT); encode_multibyte_int31_to_nbfx(out, *id) } // Short-form: single-letter prefix + dict-id name. Records // 0x44..0x5D (PrefixDictionaryElement_a..z). (Some(prefix), NbfxName::Static(id) | NbfxName::Dynamic(id)) if prefix_letter_offset(prefix).is_some() => { // SAFETY: is_some check above; unwrap_or here keeps clippy // happy without a panic on the unreachable None branch. let off = prefix_letter_offset(prefix).unwrap_or(0); out.push(0x44 + off); encode_multibyte_int31_to_nbfx(out, *id) } // Short-form: single-letter prefix + inline name. Records // 0x5E..0x77 (PrefixElement_a..z). (Some(prefix), NbfxName::Inline(s)) if prefix_letter_offset(prefix).is_some() => { let off = prefix_letter_offset(prefix).unwrap_or(0); out.push(0x5E + off); encode_string(s.as_bytes(), out) } (Some(prefix), NbfxName::Inline(s)) => { out.push(REC_ELEMENT); encode_string(prefix.as_bytes(), out)?; encode_string(s.as_bytes(), out) } (Some(prefix), NbfxName::Static(id) | NbfxName::Dynamic(id)) => { out.push(REC_DICT_ELEMENT); encode_string(prefix.as_bytes(), out)?; encode_multibyte_int31_to_nbfx(out, *id) } } } fn encode_attribute( prefix: Option<&str>, name: &NbfxName, value: &NbfxText, dynamic: &mut DynamicDictionary, out: &mut Vec, ) -> Result<(), NbfxError> { match (prefix, name) { (None, NbfxName::Inline(s)) => { out.push(REC_SHORT_ATTRIBUTE); encode_string(s.as_bytes(), out)?; } (None, NbfxName::Static(id) | NbfxName::Dynamic(id)) => { out.push(REC_SHORT_DICT_ATTRIBUTE); encode_multibyte_int31_to_nbfx(out, *id)?; } // Short-form: single-letter prefix + dict-id name. Records // 0x0C..0x25 (PrefixDictionaryAttribute_a..z). (Some(prefix), NbfxName::Static(id) | NbfxName::Dynamic(id)) if prefix_letter_offset(prefix).is_some() => { let off = prefix_letter_offset(prefix).unwrap_or(0); out.push(0x0C + off); encode_multibyte_int31_to_nbfx(out, *id)?; } // Short-form: single-letter prefix + inline name. Records // 0x26..0x3F (PrefixAttribute_a..z). (Some(prefix), NbfxName::Inline(s)) if prefix_letter_offset(prefix).is_some() => { let off = prefix_letter_offset(prefix).unwrap_or(0); out.push(0x26 + off); encode_string(s.as_bytes(), out)?; } (Some(prefix), NbfxName::Inline(s)) => { out.push(REC_ATTRIBUTE); encode_string(prefix.as_bytes(), out)?; encode_string(s.as_bytes(), out)?; } (Some(prefix), NbfxName::Static(id) | NbfxName::Dynamic(id)) => { out.push(REC_DICT_ATTRIBUTE); encode_string(prefix.as_bytes(), out)?; encode_multibyte_int31_to_nbfx(out, *id)?; } } encode_text_string_or_dict(value, dynamic, out) } /// Encode an attribute value or namespace value. Attribute values use /// the same text records as element content but are NOT followed by an /// EndElement; the `with_end_element` bit must be cleared. fn encode_text_string_or_dict( value: &NbfxText, _dynamic: &mut DynamicDictionary, out: &mut Vec, ) -> Result<(), NbfxError> { encode_text(value, false, out) } fn encode_text(text: &NbfxText, with_end: bool, out: &mut Vec) -> Result<(), NbfxError> { let bump = if with_end { 1 } else { 0 }; match text { NbfxText::Empty => out.push(REC_EMPTY_TEXT + bump), NbfxText::Zero => out.push(REC_ZERO_TEXT + bump), NbfxText::One => out.push(REC_ONE_TEXT + bump), NbfxText::Bool(false) => out.push(REC_FALSE_TEXT + bump), NbfxText::Bool(true) => out.push(REC_TRUE_TEXT + bump), NbfxText::Int8(v) => { out.push(REC_INT8_TEXT + bump); out.push(*v as u8); } NbfxText::Int16(v) => { out.push(REC_INT16_TEXT + bump); out.extend_from_slice(&v.to_le_bytes()); } NbfxText::Int32(v) => { out.push(REC_INT32_TEXT + bump); out.extend_from_slice(&v.to_le_bytes()); } NbfxText::Int64(v) => { out.push(REC_INT64_TEXT + bump); out.extend_from_slice(&v.to_le_bytes()); } NbfxText::Chars(s) => { let bytes = s.as_bytes(); let len = bytes.len(); if len <= u8::MAX as usize { out.push(REC_CHARS8_TEXT + bump); out.push(len as u8); } else if len <= u16::MAX as usize { out.push(REC_CHARS16_TEXT + bump); out.extend_from_slice(&(len as u16).to_le_bytes()); } else if len <= u32::MAX as usize { out.push(REC_CHARS32_TEXT + bump); out.extend_from_slice(&(len as u32).to_le_bytes()); } else { return Err(NbfxError::PayloadTooLarge { len, max: u32::MAX as u64, }); } out.extend_from_slice(bytes); } NbfxText::DictionaryStatic(id) | NbfxText::DictionaryDynamic(id) => { out.push(REC_DICTIONARY_TEXT + bump); encode_multibyte_int31_to_nbfx(out, *id)?; } NbfxText::Bytes(bytes) => { let len = bytes.len(); if len <= u8::MAX as usize { out.push(REC_BYTES8_TEXT + bump); out.push(len as u8); } else if len <= u16::MAX as usize { out.push(REC_BYTES16_TEXT + bump); out.extend_from_slice(&(len as u16).to_le_bytes()); } else if len <= u32::MAX as usize { out.push(REC_BYTES32_TEXT + bump); out.extend_from_slice(&(len as u32).to_le_bytes()); } else { return Err(NbfxError::PayloadTooLarge { len, max: u32::MAX as u64, }); } out.extend_from_slice(bytes); } NbfxText::UniqueId(bytes) => { out.push(REC_UNIQUE_ID_TEXT + bump); out.extend_from_slice(bytes); } } Ok(()) } fn encode_string(bytes: &[u8], out: &mut Vec) -> Result<(), NbfxError> { let len = i32::try_from(bytes.len()).map_err(|_| NbfxError::PayloadTooLarge { len: bytes.len(), max: i32::MAX as u64, })?; encode_multibyte_int31(out, len).map_err(|_| NbfxError::IntOverflow)?; out.extend_from_slice(bytes); Ok(()) } fn encode_multibyte_int31_to_nbfx(out: &mut Vec, value: u32) -> Result<(), NbfxError> { let signed = i32::try_from(value).map_err(|_| NbfxError::IntOverflow)?; encode_multibyte_int31(out, signed).map_err(|_| NbfxError::IntOverflow) } // ---- decoder ------------------------------------------------------------ /// Decode all NBFX tokens from `input`. Returns the token stream plus /// the number of bytes consumed. /// /// Threads the dynamic dictionary through; the codec doesn't auto-intern /// because `[MC-NBFX]` doesn't define a built-in `intern this string` /// record. Callers that need the dynamic dictionary populated (e.g. /// matching the WCF behavior of interning element names) intern from /// the inline-name tokens after the decode. pub fn decode_tokens( input: &[u8], _dynamic: &mut DynamicDictionary, ) -> Result<(Vec, usize), NbfxError> { let mut cursor = 0usize; let mut tokens = Vec::new(); while let Some(&kind) = input.get(cursor) { cursor += 1; match kind { REC_END_ELEMENT => tokens.push(NbfxToken::EndElement), REC_SHORT_ELEMENT => { let name = decode_string(input, &mut cursor, "short-element")?; tokens.push(NbfxToken::Element { prefix: None, name: NbfxName::Inline(name), }); } REC_ELEMENT => { let prefix = decode_string(input, &mut cursor, "element-prefix")?; let name = decode_string(input, &mut cursor, "element-name")?; tokens.push(NbfxToken::Element { prefix: Some(prefix), name: NbfxName::Inline(name), }); } REC_SHORT_DICT_ELEMENT => { let id = decode_int31(input, &mut cursor)?; tokens.push(NbfxToken::Element { prefix: None, name: NbfxName::Static(id), }); } REC_DICT_ELEMENT => { let prefix = decode_string(input, &mut cursor, "dict-element-prefix")?; let id = decode_int31(input, &mut cursor)?; tokens.push(NbfxToken::Element { prefix: Some(prefix), name: NbfxName::Static(id), }); } // PrefixDictionaryElement_a..z: 0x44..0x5D — single-letter // prefix + dict-id name. Inverse of the encoder's // short-form path above. byte if (0x44..=0x5D).contains(&byte) => { let prefix_letter = char::from(b'a' + (byte - 0x44)); let id = decode_int31(input, &mut cursor)?; tokens.push(NbfxToken::Element { prefix: Some(prefix_letter.to_string()), name: NbfxName::Static(id), }); } // PrefixElement_a..z: 0x5E..0x77 — single-letter prefix + // inline element name. WCF emits these on the response side // when the element name is not in either dictionary (e.g. // dynamically-named DataContract members). byte if (0x5E..=0x77).contains(&byte) => { let prefix_letter = char::from(b'a' + (byte - 0x5E)); let name = decode_string(input, &mut cursor, "prefix-element-name")?; tokens.push(NbfxToken::Element { prefix: Some(prefix_letter.to_string()), name: NbfxName::Inline(name), }); } REC_SHORT_ATTRIBUTE => { let name = decode_string(input, &mut cursor, "short-attribute")?; let value = decode_text_record(input, &mut cursor)?; tokens.push(NbfxToken::Attribute { prefix: None, name: NbfxName::Inline(name), value, }); } REC_ATTRIBUTE => { let prefix = decode_string(input, &mut cursor, "attribute-prefix")?; let name = decode_string(input, &mut cursor, "attribute-name")?; let value = decode_text_record(input, &mut cursor)?; tokens.push(NbfxToken::Attribute { prefix: Some(prefix), name: NbfxName::Inline(name), value, }); } REC_SHORT_DICT_ATTRIBUTE => { let id = decode_int31(input, &mut cursor)?; let value = decode_text_record(input, &mut cursor)?; tokens.push(NbfxToken::Attribute { prefix: None, name: NbfxName::Static(id), value, }); } REC_DICT_ATTRIBUTE => { let prefix = decode_string(input, &mut cursor, "dict-attribute-prefix")?; let id = decode_int31(input, &mut cursor)?; let value = decode_text_record(input, &mut cursor)?; tokens.push(NbfxToken::Attribute { prefix: Some(prefix), name: NbfxName::Static(id), value, }); } // PrefixDictionaryAttribute_a..z: 0x0C..0x25 — // single-letter prefix + dict-id name + text-record value. byte if (0x0C..=0x25).contains(&byte) => { let prefix_letter = char::from(b'a' + (byte - 0x0C)); let id = decode_int31(input, &mut cursor)?; let value = decode_text_record(input, &mut cursor)?; tokens.push(NbfxToken::Attribute { prefix: Some(prefix_letter.to_string()), name: NbfxName::Static(id), value, }); } // PrefixAttribute_a..z: 0x26..0x3F — single-letter prefix + // inline attribute name + text-record value. byte if (0x26..=0x3F).contains(&byte) => { let prefix_letter = char::from(b'a' + (byte - 0x26)); let name = decode_string(input, &mut cursor, "prefix-attribute-name")?; let value = decode_text_record(input, &mut cursor)?; tokens.push(NbfxToken::Attribute { prefix: Some(prefix_letter.to_string()), name: NbfxName::Inline(name), value, }); } REC_SHORT_XMLNS_ATTRIBUTE => { let value_str = decode_string(input, &mut cursor, "default-xmlns-value")?; tokens.push(NbfxToken::DefaultNamespace { value: NbfxText::Chars(value_str), }); } REC_XMLNS_ATTRIBUTE => { // Per spec, value is a raw length-prefixed string, // NOT a text record. let prefix = decode_string(input, &mut cursor, "xmlns-prefix")?; let value_str = decode_string(input, &mut cursor, "xmlns-value")?; tokens.push(NbfxToken::NamespaceDeclaration { prefix, value: NbfxText::Chars(value_str), }); } REC_DICT_XMLNS_ATTRIBUTE => { let prefix = decode_string(input, &mut cursor, "dict-xmlns-prefix")?; let id = decode_int31(input, &mut cursor)?; tokens.push(NbfxToken::NamespaceDeclaration { prefix, value: NbfxText::DictionaryStatic(id), }); } // 0x0A — no-prefix (default xmlns) variant of 0x0B. Sets // the default namespace to a dict-resolved string. WCF // emits this on the response side when the default ns is // a well-known string (e.g. urn:invensys.schemas). REC_SHORT_DICT_XMLNS_ATTRIBUTE => { let id = decode_int31(input, &mut cursor)?; tokens.push(NbfxToken::DefaultNamespace { value: NbfxText::DictionaryStatic(id), }); } // Text records — directly produce a Text token, plus an // implicit EndElement when the `*WithEndElement` variant was // used (record byte LSB = 1). byte if (REC_ZERO_TEXT..=0xBF).contains(&byte) => { let with_end = byte & 0x01 != 0; let base = byte & !0x01; let text = decode_text_body(input, &mut cursor, base)?; tokens.push(NbfxToken::Text(text)); if with_end { tokens.push(NbfxToken::EndElement); } } other => return Err(NbfxError::UnknownRecord(other)), } } Ok((tokens, cursor)) } fn decode_text_record(input: &[u8], cursor: &mut usize) -> Result { let byte = *input.get(*cursor).ok_or(NbfxError::Truncated { need: 1, have: 0, stage: "text-record-byte", })?; *cursor += 1; let base = byte & !0x01; decode_text_body(input, cursor, base) } fn decode_text_body(input: &[u8], cursor: &mut usize, base: u8) -> Result { Ok(match base { REC_ZERO_TEXT => NbfxText::Zero, REC_ONE_TEXT => NbfxText::One, REC_FALSE_TEXT => NbfxText::Bool(false), REC_TRUE_TEXT => NbfxText::Bool(true), REC_INT8_TEXT => { let b = *input.get(*cursor).ok_or(NmfTrunc("int8-text"))?; *cursor += 1; NbfxText::Int8(b as i8) } REC_INT16_TEXT => { let v = read_le::<2>(input, cursor, "int16-text")?; NbfxText::Int16(i16::from_le_bytes(v)) } REC_INT32_TEXT => { let v = read_le::<4>(input, cursor, "int32-text")?; NbfxText::Int32(i32::from_le_bytes(v)) } REC_INT64_TEXT => { let v = read_le::<8>(input, cursor, "int64-text")?; NbfxText::Int64(i64::from_le_bytes(v)) } REC_CHARS8_TEXT => { let len = *input.get(*cursor).ok_or(NmfTrunc("chars8-len"))? as usize; *cursor += 1; NbfxText::Chars(read_utf8(input, cursor, len, "chars8")?) } REC_CHARS16_TEXT => { let len_bytes = read_le::<2>(input, cursor, "chars16-len")?; let len = u16::from_le_bytes(len_bytes) as usize; NbfxText::Chars(read_utf8(input, cursor, len, "chars16")?) } REC_CHARS32_TEXT => { let len_bytes = read_le::<4>(input, cursor, "chars32-len")?; let len = u32::from_le_bytes(len_bytes) as usize; NbfxText::Chars(read_utf8(input, cursor, len, "chars32")?) } REC_EMPTY_TEXT => NbfxText::Empty, REC_DICTIONARY_TEXT => NbfxText::DictionaryStatic(decode_int31(input, cursor)?), REC_UNIQUE_ID_TEXT => { let bytes = read_le::<16>(input, cursor, "unique-id-text")?; NbfxText::UniqueId(bytes) } REC_BOOL_TEXT => { let b = *input.get(*cursor).ok_or(NmfTrunc("bool-text"))?; *cursor += 1; NbfxText::Bool(b != 0) } REC_BYTES8_TEXT => { let len = *input.get(*cursor).ok_or(NmfTrunc("bytes8-len"))? as usize; *cursor += 1; NbfxText::Bytes(read_bytes(input, cursor, len, "bytes8")?) } REC_BYTES16_TEXT => { let len_bytes = read_le::<2>(input, cursor, "bytes16-len")?; let len = u16::from_le_bytes(len_bytes) as usize; NbfxText::Bytes(read_bytes(input, cursor, len, "bytes16")?) } REC_BYTES32_TEXT => { let len_bytes = read_le::<4>(input, cursor, "bytes32-len")?; let len = u32::from_le_bytes(len_bytes) as usize; NbfxText::Bytes(read_bytes(input, cursor, len, "bytes32")?) } other => return Err(NbfxError::UnknownRecord(other)), }) } #[allow(non_snake_case)] fn NmfTrunc(stage: &'static str) -> NbfxError { NbfxError::Truncated { need: 1, have: 0, stage, } } fn read_le( input: &[u8], cursor: &mut usize, stage: &'static str, ) -> Result<[u8; N], NbfxError> { let slice = input .get(*cursor..*cursor + N) .ok_or(NbfxError::Truncated { need: N, have: input.len().saturating_sub(*cursor), stage, })?; let mut out = [0u8; N]; out.copy_from_slice(slice); *cursor += N; Ok(out) } fn read_utf8( input: &[u8], cursor: &mut usize, len: usize, stage: &'static str, ) -> Result { let raw = read_bytes(input, cursor, len, stage)?; String::from_utf8(raw).map_err(|_| NbfxError::InvalidUtf8 { stage }) } fn read_bytes( input: &[u8], cursor: &mut usize, len: usize, stage: &'static str, ) -> Result, NbfxError> { let slice = input .get(*cursor..*cursor + len) .ok_or(NbfxError::Truncated { need: len, have: input.len().saturating_sub(*cursor), stage, })?; let out = slice.to_vec(); *cursor += len; Ok(out) } fn decode_string( input: &[u8], cursor: &mut usize, stage: &'static str, ) -> Result { let len_i = decode_multibyte_int31(input, cursor).map_err(|_| NbfxError::IntOverflow)?; let len = usize::try_from(len_i).map_err(|_| NbfxError::NegativeLength(len_i))?; read_utf8(input, cursor, len, stage) } fn decode_int31(input: &[u8], cursor: &mut usize) -> Result { let signed = decode_multibyte_int31(input, cursor).map_err(|_| NbfxError::IntOverflow)?; u32::try_from(signed).map_err(|_| NbfxError::NegativeLength(signed)) } #[cfg(test)] #[allow( clippy::unwrap_used, clippy::expect_used, clippy::panic, clippy::indexing_slicing )] mod tests { use super::*; fn round_trip(tokens: Vec) { let mut dyn_w = DynamicDictionary::new(); let mut bytes = Vec::new(); encode_tokens(&tokens, &mut dyn_w, &mut bytes).unwrap(); let mut dyn_r = DynamicDictionary::new(); let (decoded, consumed) = decode_tokens(&bytes, &mut dyn_r).unwrap(); assert_eq!(consumed, bytes.len(), "decode left bytes"); assert_eq!(decoded, tokens); } #[test] fn dynamic_dictionary_interns_idempotently() { let mut d = DynamicDictionary::new(); assert_eq!(d.intern("a"), 0); assert_eq!(d.intern("b"), 1); assert_eq!(d.intern("a"), 0); assert_eq!(d.lookup(0), Some("a")); assert_eq!(d.lookup(1), Some("b")); assert_eq!(d.lookup(2), None); assert_eq!(d.position_of("a"), Some(0)); assert_eq!(d.position_of("missing"), None); assert_eq!(d.len(), 2); } #[test] fn short_element_round_trip_with_end() { round_trip(vec![ NbfxToken::Element { prefix: None, name: NbfxName::Inline("Body".to_string()), }, NbfxToken::EndElement, ]); } #[test] fn long_element_with_prefix_round_trip() { round_trip(vec![ NbfxToken::Element { prefix: Some("a".to_string()), name: NbfxName::Inline("Action".to_string()), }, NbfxToken::EndElement, ]); } #[test] fn dict_element_round_trip() { round_trip(vec![ NbfxToken::Element { prefix: None, name: NbfxName::Static(2), }, // "Envelope" NbfxToken::EndElement, ]); } #[test] fn attribute_round_trip_inline_name() { round_trip(vec![ NbfxToken::Element { prefix: None, name: NbfxName::Inline("e".to_string()), }, NbfxToken::Attribute { prefix: None, name: NbfxName::Inline("attr".to_string()), value: NbfxText::Chars("value".to_string()), }, NbfxToken::EndElement, ]); } #[test] fn attribute_round_trip_dict_name() { round_trip(vec![ NbfxToken::Element { prefix: None, name: NbfxName::Inline("e".to_string()), }, NbfxToken::Attribute { prefix: Some("a".to_string()), name: NbfxName::Static(10), // "Action" value: NbfxText::Chars("doSomething".to_string()), }, NbfxToken::EndElement, ]); } #[test] fn xmlns_default_round_trip() { round_trip(vec![ NbfxToken::Element { prefix: None, name: NbfxName::Inline("e".to_string()), }, NbfxToken::DefaultNamespace { value: NbfxText::Chars("urn:test".to_string()), }, NbfxToken::EndElement, ]); } #[test] fn xmlns_prefix_round_trip() { round_trip(vec![ NbfxToken::Element { prefix: None, name: NbfxName::Inline("e".to_string()), }, NbfxToken::NamespaceDeclaration { prefix: "a".to_string(), value: NbfxText::DictionaryStatic(6), // WS-Addressing }, NbfxToken::EndElement, ]); } #[test] fn text_records_round_trip_and_collapse_with_end_element() { // The encoder collapses Text + EndElement into the // *WithEndElement variant; the decoder splits them back out. for text in [ NbfxText::Empty, NbfxText::Zero, NbfxText::One, NbfxText::Bool(true), NbfxText::Bool(false), NbfxText::Int8(-1), NbfxText::Int16(-12345), NbfxText::Int32(0xDEAD_BEEFu32 as i32), NbfxText::Int64(i64::MIN), NbfxText::Chars("hello".to_string()), NbfxText::Chars("a".repeat(300)), // forces Chars16 NbfxText::DictionaryStatic(2), ] { round_trip(vec![ NbfxToken::Element { prefix: None, name: NbfxName::Inline("e".to_string()), }, NbfxToken::Text(text), NbfxToken::EndElement, ]); } } #[test] fn bytes_records_round_trip_all_widths() { for payload in [ vec![], vec![0xAB; 5], vec![0xCD; 300], // forces Bytes16 vec![0xEF; 70_000], // forces Bytes32 ] { round_trip(vec![ NbfxToken::Element { prefix: None, name: NbfxName::Inline("e".to_string()), }, NbfxToken::Text(NbfxText::Bytes(payload)), NbfxToken::EndElement, ]); } } #[test] fn chars32_handled_for_payloads_above_u16_max() { let big = "x".repeat(70_000); round_trip(vec![ NbfxToken::Element { prefix: None, name: NbfxName::Inline("e".to_string()), }, NbfxToken::Text(NbfxText::Chars(big)), NbfxToken::EndElement, ]); } #[test] fn collapse_emits_with_end_record_byte() { // Verify that the *WithEndElement variant is actually used on // the wire when text precedes EndElement. let mut bytes = Vec::new(); let mut d = DynamicDictionary::new(); encode_tokens( &[ NbfxToken::Element { prefix: None, name: NbfxName::Inline("e".to_string()), }, NbfxToken::Text(NbfxText::Bool(true)), NbfxToken::EndElement, ], &mut d, &mut bytes, ) .unwrap(); // Tail bytes: TrueTextWithEndElement = 0x87 assert_eq!(*bytes.last().unwrap(), 0x87); } #[test] fn empty_text_with_end_element_is_one_byte() { let mut bytes = Vec::new(); let mut d = DynamicDictionary::new(); encode_tokens( &[ NbfxToken::Element { prefix: None, name: NbfxName::Inline("e".to_string()), }, NbfxToken::Text(NbfxText::Empty), NbfxToken::EndElement, ], &mut d, &mut bytes, ) .unwrap(); // Last byte = EmptyTextWithEndElement = 0xA9 assert_eq!(*bytes.last().unwrap(), 0xA9); } #[test] fn unknown_record_byte_rejected() { let bytes = vec![0xFFu8]; let mut d = DynamicDictionary::new(); let err = decode_tokens(&bytes, &mut d).unwrap_err(); assert!(matches!(err, NbfxError::UnknownRecord(0xFF))); } #[test] fn truncated_chars_record_rejected() { // Chars8: byte 0x98, length 5, but only 2 payload bytes. let bytes = vec![REC_CHARS8_TEXT, 5, b'a', b'b']; let mut d = DynamicDictionary::new(); // The decoder has to be inside an element to make a Text token // useful, but it doesn't reject text-without-element — it just // surfaces the truncation. let err = decode_tokens(&bytes, &mut d).unwrap_err(); assert!(matches!( err, NbfxError::Truncated { stage: "chars8", .. } )); } #[test] fn nbfx_text_resolve_uses_dictionaries() { let dynamic = DynamicDictionary::new(); assert_eq!(NbfxText::Empty.resolve(&dynamic).as_deref(), Some("")); assert_eq!(NbfxText::Zero.resolve(&dynamic).as_deref(), Some("0")); assert_eq!( NbfxText::Bool(true).resolve(&dynamic).as_deref(), Some("true") ); assert_eq!(NbfxText::Int32(42).resolve(&dynamic).as_deref(), Some("42")); assert_eq!( NbfxText::DictionaryStatic(2).resolve(&dynamic).as_deref(), Some("Envelope") ); assert_eq!(NbfxText::DictionaryStatic(99_999).resolve(&dynamic), None); } }