mxaccess/rust/crates/mxaccess-asb-nettcp/src/nbfx.rs

//! `[MC-NBFX]` `.NET Binary XML Format` token codec.
//!
//! `[MC-NBFX]` §2.2 defines a record-based binary XML serialisation.
//! Element names, attribute names, and namespace strings can be carried
//! either inline (UTF-8 length-prefixed strings) or by reference into
//! the [`crate::nbfs`] static dictionary or a per-session dynamic
//! dictionary that the codec builds up.
//!
//! ## Scope of this port
//!
//! This module ships the **proven subset** of the spec — the records that
//! `analysis/proxy/mxasbclient-*` traces show on-the-wire for the ASB
//! `IASBIDataV2` operations:
//!
//! * Element / EndElement (`0x40` ShortElement, `0x41` Element with
//!   prefix string, `0x42` ShortDictionaryElement, `0x43`
//!   DictionaryElement). The `0x5E-0x77` PrefixElement\_a..z family
//!   (built-in single-letter prefixes) is **not yet ported** — encode
//!   prefixed elements as `Element { prefix, name }` and the codec
//!   emits the long form (`0x41`).
//! * Attribute (`0x04` ShortAttribute, `0x05` Attribute, `0x06`
//!   ShortDictionaryAttribute, `0x07` DictionaryAttribute) plus
//!   xmlns variants (`0x08`/`0x09`/`0x0A`/`0x0B`).
//! * Text records: `0x80/0x81` Zero, `0x82/0x83` One, `0x84/0x85`
//!   False, `0x86/0x87` True, `0x88/0x89` Int8, `0x8A/0x8B` Int16,
//!   `0x8C/0x8D` Int32, `0x8E/0x8F` Int64, `0x98/0x99` Chars8,
//!   `0x9A/0x9B` Chars16, `0x9C/0x9D` Chars32, `0xA8/0xA9` EmptyText,
//!   `0xAA/0xAB` DictionaryText, `0xB4/0xB5` BoolText.
//! * `0x00` EndElement (the explicit form, for elements not closed by
//!   a `*WithEndElement` text variant).
//!
//! Each text record has a `*WithEndElement` form whose record byte is
//! `+1` (e.g. `0x99` = `Chars8TextWithEndElement`). Both variants are
//! supported.
//!
//! Records left for a follow-up: `Decimal`, `UniqueId` (GUID), `TimeSpan`,
//! `Float`/`Double` text, `DateTime` text, `Bytes8/16/32`, `QNameDictionary`,
//! the `0x0C-0x25` and `0x26-0x3F` prefix-attribute families, and the
//! `0x44-0x77` prefix-element families. These are observable in some
//! WCF traffic but not currently exercised by ASB on the proven path.
//!
//! ## What lives where
//!
//! * Static-dictionary lookup — [`crate::nbfs`] (separate F22 module).
//! * Dynamic-dictionary state — [`DynamicDictionary`] in this module;
//!   the encoder/decoder threads it through every call.
//! * Higher-level SOAP envelope construction — left to the F25 ASB
//!   client crate (`mxaccess-asb`). This codec is the byte-shovelling
//!   layer.

use std::collections::HashMap;

use thiserror::Error;

use crate::nbfs;
use crate::nmf::{decode_multibyte_int31, encode_multibyte_int31};

/// Per-session dynamic dictionary. WCF builds it up as elements/attributes
/// are encountered: the first time a string is seen, it gets added with
/// a fresh ID; subsequent occurrences reference the ID. IDs always start
/// at `0` and increment by 1 (distinct from the static-dictionary IDs in
/// `[MC-NBFS]` which are even-only by spec convention).
#[derive(Debug, Default, Clone)]
pub struct DynamicDictionary {
    forward: Vec<String>,
    reverse: HashMap<String, u32>,
}

impl DynamicDictionary {
    pub fn new() -> Self {
        Self::default()
    }

    /// Insert `value` if absent; return its ID. Existing entries are
    /// idempotent.
    pub fn intern(&mut self, value: &str) -> u32 {
        if let Some(&id) = self.reverse.get(value) {
            return id;
        }
        let id = self.forward.len() as u32;
        self.forward.push(value.to_string());
        self.reverse.insert(value.to_string(), id);
        id
    }

    pub fn lookup(&self, id: u32) -> Option<&str> {
        self.forward.get(id as usize).map(String::as_str)
    }

    pub fn position_of(&self, value: &str) -> Option<u32> {
        self.reverse.get(value).copied()
    }

    pub fn len(&self) -> usize {
        self.forward.len()
    }

    pub fn is_empty(&self) -> bool {
        self.forward.is_empty()
    }
}

/// Token-level NBFX events. Encode/decode operate on streams of these.
#[derive(Debug, Clone, PartialEq)]
pub enum NbfxToken {
    /// Open element. `prefix=None` is the un-prefixed form (`0x40`/`0x42`);
    /// `prefix=Some("a")` produces the long `0x41`/`0x43` form (the
    /// short single-letter family `0x5E-0x77` is a future optimisation).
    Element {
        prefix: Option<String>,
        name: NbfxName,
    },
    /// Explicit `0x00` end-element record. `*WithEndElement` text records
    /// imply this; emit `EndElement` only when the element is empty or
    /// closed without trailing text.
    EndElement,
    /// Attribute on the currently-open element. xmlns and dict variants
    /// are separate cases below.
    Attribute {
        prefix: Option<String>,
        name: NbfxName,
        value: NbfxText,
    },
    /// `xmlns="..."` (no prefix) — record `0x08`.
    DefaultNamespace { value: NbfxText },
    /// `xmlns:prefix="..."` — record `0x09`.
    NamespaceDeclaration { prefix: String, value: NbfxText },
    /// Standalone text content between an Element open and its EndElement
    /// (or a `*WithEndElement` text variant which closes the element
    /// inline).
    Text(NbfxText),
}

/// Element / attribute name reference. Inline carries a UTF-8 string;
/// `Static` references the `[MC-NBFS]` table; `Dynamic` references the
/// per-session [`DynamicDictionary`].
#[derive(Debug, Clone, PartialEq)]
pub enum NbfxName {
    Inline(String),
    Static(u32),
    Dynamic(u32),
}

/// Text-record payload. The `with_end_element` flag toggles the
/// `*WithEndElement` variant on encode; decoded text records record
/// the inline EndElement implicitly by emitting an [`NbfxToken::EndElement`]
/// after the Text token. (i.e. consumers see the same token stream
/// regardless of whether the wire used the inline form.)
#[derive(Debug, Clone, PartialEq)]
pub enum NbfxText {
    Empty,
    Zero,
    One,
    Bool(bool),
    Int8(i8),
    Int16(i16),
    Int32(i32),
    Int64(i64),
    /// UTF-8 chars (length-prefixed, three width variants on the wire).
    Chars(String),
    /// Static-dictionary reference (`0xAA` DictionaryText). Decoders
    /// resolve this to the underlying string when their consumer asks
    /// for the text via [`Self::resolve`].
    DictionaryStatic(u32),
    /// Dynamic-dictionary reference. Same record byte (`0xAA`) — the
    /// codec disambiguates by which dictionary owns the ID. Encoders
    /// pick `Static` when [`crate::nbfs::lookup_static`] succeeds and
    /// fall back to `Dynamic` otherwise.
    DictionaryDynamic(u32),
    /// Raw bytes (records `0x9E` Bytes8 / `0xA0` Bytes16 / `0xA2`
    /// Bytes32 — width chosen automatically by length on encode). Used
    /// by `XmlDictionaryWriter.WriteBase64` for the `ASBIData`
    /// content of `IAsbCustomSerializableType`-decorated fields.
    Bytes(Vec<u8>),
    /// 16-byte UUID (record `0xAC` UniqueIdText). WCF emits `<a:MessageID>`
    /// values via this record, with the 16 raw UUID bytes (NOT the
    /// `urn:uuid:...` text form).
    UniqueId([u8; 16]),
}

impl NbfxText {
    /// Resolve any dictionary reference to a concrete string. Returns
    /// `None` if the resolution targets an unmapped ID.
    pub fn resolve<'a>(&'a self, dynamic: &'a DynamicDictionary) -> Option<String> {
        match self {
            Self::Empty => Some(String::new()),
            Self::Zero => Some("0".to_string()),
            Self::One => Some("1".to_string()),
            Self::Bool(true) => Some("true".to_string()),
            Self::Bool(false) => Some("false".to_string()),
            Self::Int8(v) => Some(v.to_string()),
            Self::Int16(v) => Some(v.to_string()),
            Self::Int32(v) => Some(v.to_string()),
            Self::Int64(v) => Some(v.to_string()),
            Self::Chars(s) => Some(s.clone()),
            Self::DictionaryStatic(id) => nbfs::lookup_static(*id).map(String::from),
            Self::DictionaryDynamic(id) => dynamic.lookup(*id).map(String::from),
            // Raw bytes have no canonical text representation; .NET's
            // `XmlDictionaryReader.ReadElementContentAsBase64` returns
            // them as `byte[]`. Consumers should match on the variant.
            Self::Bytes(_) => None,
            // UniqueId surfaces as the .NET `Guid.ToString("D")` form
            // (mixed-endian per [MS-DTYP]). Used for `<a:MessageID>`.
            Self::UniqueId(bytes) => Some(format_uuid_dotnet_style(bytes)),
        }
    }
}

/// Format a 16-byte UUID using .NET's `Guid.ToString("D")` mixed-endian
/// convention (first 4 bytes little-endian, next 2x2 little-endian,
/// last 2+6 big-endian). This is the same format `<a:MessageID>` uses
/// when emitted as text.
fn format_uuid_dotnet_style(bytes: &[u8; 16]) -> String {
    let d1 = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
    let d2 = u16::from_le_bytes([bytes[4], bytes[5]]);
    let d3 = u16::from_le_bytes([bytes[6], bytes[7]]);
    format!(
        "{d1:08x}-{d2:04x}-{d3:04x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
        bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]
    )
}

#[derive(Debug, Error)]
#[non_exhaustive]
pub enum NbfxError {
    #[error("truncated frame at {stage}: need {need} bytes, have {have}")]
    Truncated {
        need: usize,
        have: usize,
        stage: &'static str,
    },
    #[error("unknown NBFX record byte 0x{0:02x}")]
    UnknownRecord(u8),
    #[error("invalid UTF-8 in NBFX {stage} payload")]
    InvalidUtf8 { stage: &'static str },
    #[error("payload too large: {len} bytes (max {max})")]
    PayloadTooLarge { len: usize, max: u64 },
    #[error("unknown static dictionary id {0}")]
    UnknownStaticDictionaryId(u32),
    #[error("unknown dynamic dictionary id {0}")]
    UnknownDynamicDictionaryId(u32),
    #[error("expected length non-negative, got {0}")]
    NegativeLength(i32),
    #[error("multibyte int overflow")]
    IntOverflow,
}

// ---- record byte constants ----------------------------------------------
//
// Naming matches `[MC-NBFX]` §2.2 record names. Records ending `WithEnd`
// are the `*WithEndElement` variants whose byte is the base record's
// byte + 1.

/// `[MC-NBFX]` §2.2.3 EndElementRecord. Spec value is **0x01** (NOT
/// 0x00 — verified against .NET probe wire capture). Earlier
/// iterations had this wrong; round-trip tests passed because encode
/// and decode used the same wrong value, but interop with WCF parsers
/// failed silently (TCP RST on every envelope).
const REC_END_ELEMENT: u8 = 0x01;
const REC_SHORT_ATTRIBUTE: u8 = 0x04;
const REC_ATTRIBUTE: u8 = 0x05;
const REC_SHORT_DICT_ATTRIBUTE: u8 = 0x06;
const REC_DICT_ATTRIBUTE: u8 = 0x07;
const REC_SHORT_XMLNS_ATTRIBUTE: u8 = 0x08;
const REC_XMLNS_ATTRIBUTE: u8 = 0x09;
// Reserved for the dictionary-keyed xmlns variants — recognised on the
// wire as future work, but not yet emitted or decoded.
#[allow(dead_code)]
const REC_SHORT_DICT_XMLNS_ATTRIBUTE: u8 = 0x0A;
#[allow(dead_code)]
const REC_DICT_XMLNS_ATTRIBUTE: u8 = 0x0B;

const REC_SHORT_ELEMENT: u8 = 0x40;
const REC_ELEMENT: u8 = 0x41;
const REC_SHORT_DICT_ELEMENT: u8 = 0x42;
const REC_DICT_ELEMENT: u8 = 0x43;

const REC_ZERO_TEXT: u8 = 0x80;
const REC_ONE_TEXT: u8 = 0x82;
const REC_FALSE_TEXT: u8 = 0x84;
const REC_TRUE_TEXT: u8 = 0x86;
const REC_INT8_TEXT: u8 = 0x88;
const REC_INT16_TEXT: u8 = 0x8A;
const REC_INT32_TEXT: u8 = 0x8C;
const REC_INT64_TEXT: u8 = 0x8E;
const REC_CHARS8_TEXT: u8 = 0x98;
const REC_CHARS16_TEXT: u8 = 0x9A;
const REC_CHARS32_TEXT: u8 = 0x9C;
const REC_EMPTY_TEXT: u8 = 0xA8;
const REC_BYTES8_TEXT: u8 = 0x9E;
const REC_BYTES16_TEXT: u8 = 0xA0;
const REC_BYTES32_TEXT: u8 = 0xA2;
const REC_DICTIONARY_TEXT: u8 = 0xAA;
const REC_UNIQUE_ID_TEXT: u8 = 0xAC;
const REC_BOOL_TEXT: u8 = 0xB4;

// ---- encoder ------------------------------------------------------------

/// Encode a stream of tokens to an NBFX byte buffer. Threads the dynamic
/// dictionary through; new strings get interned automatically when
/// encoded as `NbfxName::Inline` and the encoder chooses a static or
/// inline form based on `[MC-NBFS]` lookup.
pub fn encode_tokens(
    tokens: &[NbfxToken],
    dynamic: &mut DynamicDictionary,
    out: &mut Vec<u8>,
) -> Result<(), NbfxError> {
    // Collapse `Text` immediately followed by `EndElement` into a single
    // `*WithEndElement` text record where possible. `WCF` emits this
    // form by default when an element has a single text child, so
    // matching it is required for byte parity.
    let mut idx = 0;
    while let Some(cur) = tokens.get(idx) {
        let next = tokens.get(idx + 1);
        let with_end = matches!(
            (cur, next),
            (NbfxToken::Text(_), Some(NbfxToken::EndElement))
        );
        encode_one(cur, dynamic, with_end, out)?;
        idx += if with_end { 2 } else { 1 };
    }
    Ok(())
}

fn encode_one(
    token: &NbfxToken,
    dynamic: &mut DynamicDictionary,
    with_end: bool,
    out: &mut Vec<u8>,
) -> Result<(), NbfxError> {
    match token {
        NbfxToken::Element { prefix, name } => encode_element(prefix.as_deref(), name, out),
        NbfxToken::EndElement => {
            out.push(REC_END_ELEMENT);
            Ok(())
        }
        NbfxToken::Attribute {
            prefix,
            name,
            value,
        } => encode_attribute(prefix.as_deref(), name, value, dynamic, out),
        NbfxToken::DefaultNamespace { value } => {
            // Per `[MC-NBFX]` §2.2.3: `ShortXmlnsAttribute` (0x08)
            // value is a RAW length-prefixed string (same convention
            // as 0x09 — see comment on NamespaceDeclaration below).
            match value {
                NbfxText::Chars(s) => {
                    out.push(REC_SHORT_XMLNS_ATTRIBUTE);
                    encode_string(s.as_bytes(), out)?;
                    Ok(())
                }
                _ => Err(NbfxError::InvalidUtf8 {
                    stage: "default-xmlns-must-be-Chars",
                }),
            }
        }
        NbfxToken::NamespaceDeclaration { prefix, value } => {
            // Per `[MC-NBFX]` §2.2.3: `XmlnsAttribute` (0x09) value is
            // a RAW length-prefixed string — NOT a text record like
            // regular Attribute values. `DictionaryXmlnsAttribute`
            // (0x0B) value is a raw multibyte-int31 dict id. Either
            // form omits the text-record byte and width-tag.
            //
            // We pick 0x0B when the value is `DictionaryStatic(id)`
            // (matches WCF's encoding for SOAP/WS-Addressing names),
            // and 0x09 + raw-string for `Chars(s)` (matches WCF's
            // encoding for namespaces not in the static dict — e.g.
            // xsi/xsd, or operation-specific URIs).
            match value {
                NbfxText::DictionaryStatic(id) => {
                    out.push(REC_DICT_XMLNS_ATTRIBUTE);
                    encode_string(prefix.as_bytes(), out)?;
                    encode_multibyte_int31_to_nbfx(out, *id)?;
                }
                NbfxText::Chars(s) => {
                    out.push(REC_XMLNS_ATTRIBUTE);
                    encode_string(prefix.as_bytes(), out)?;
                    encode_string(s.as_bytes(), out)?;
                }
                _ => {
                    return Err(NbfxError::InvalidUtf8 {
                        stage: "xmlns-value-must-be-Chars-or-DictionaryStatic",
                    });
                }
            }
            Ok(())
        }
        NbfxToken::Text(text) => encode_text(text, with_end, out),
    }
}

/// If `prefix` is a single lowercase ASCII letter (a-z), return its
/// alphabet offset (0..26). Otherwise return `None`. WCF emits
/// short-form prefix-letter records (PrefixDictionaryElement_a..z =
/// 0x44..0x5D, etc.) for these prefixes, and stricter parsers may
/// reject the long forms when a short form would suffice.
fn prefix_letter_offset(prefix: &str) -> Option<u8> {
    let mut chars = prefix.chars();
    let c = chars.next()?;
    if chars.next().is_some() {
        return None;
    }
    if c.is_ascii_lowercase() {
        Some(c as u8 - b'a')
    } else {
        None
    }
}

fn encode_element(
    prefix: Option<&str>,
    name: &NbfxName,
    out: &mut Vec<u8>,
) -> Result<(), NbfxError> {
    match (prefix, name) {
        (None, NbfxName::Inline(s)) => {
            out.push(REC_SHORT_ELEMENT);
            encode_string(s.as_bytes(), out)
        }
        (None, NbfxName::Static(id) | NbfxName::Dynamic(id)) => {
            out.push(REC_SHORT_DICT_ELEMENT);
            encode_multibyte_int31_to_nbfx(out, *id)
        }
        // Short-form: single-letter prefix + dict-id name. Records
        // 0x44..0x5D (PrefixDictionaryElement_a..z).
        (Some(prefix), NbfxName::Static(id) | NbfxName::Dynamic(id))
            if prefix_letter_offset(prefix).is_some() =>
        {
            // SAFETY: is_some check above; unwrap_or here keeps clippy
            // happy without a panic on the unreachable None branch.
            let off = prefix_letter_offset(prefix).unwrap_or(0);
            out.push(0x44 + off);
            encode_multibyte_int31_to_nbfx(out, *id)
        }
        // Short-form: single-letter prefix + inline name. Records
        // 0x5E..0x77 (PrefixElement_a..z).
        (Some(prefix), NbfxName::Inline(s)) if prefix_letter_offset(prefix).is_some() => {
            let off = prefix_letter_offset(prefix).unwrap_or(0);
            out.push(0x5E + off);
            encode_string(s.as_bytes(), out)
        }
        (Some(prefix), NbfxName::Inline(s)) => {
            out.push(REC_ELEMENT);
            encode_string(prefix.as_bytes(), out)?;
            encode_string(s.as_bytes(), out)
        }
        (Some(prefix), NbfxName::Static(id) | NbfxName::Dynamic(id)) => {
            out.push(REC_DICT_ELEMENT);
            encode_string(prefix.as_bytes(), out)?;
            encode_multibyte_int31_to_nbfx(out, *id)
        }
    }
}

fn encode_attribute(
    prefix: Option<&str>,
    name: &NbfxName,
    value: &NbfxText,
    dynamic: &mut DynamicDictionary,
    out: &mut Vec<u8>,
) -> Result<(), NbfxError> {
    match (prefix, name) {
        (None, NbfxName::Inline(s)) => {
            out.push(REC_SHORT_ATTRIBUTE);
            encode_string(s.as_bytes(), out)?;
        }
        (None, NbfxName::Static(id) | NbfxName::Dynamic(id)) => {
            out.push(REC_SHORT_DICT_ATTRIBUTE);
            encode_multibyte_int31_to_nbfx(out, *id)?;
        }
        // Short-form: single-letter prefix + dict-id name. Records
        // 0x0C..0x25 (PrefixDictionaryAttribute_a..z).
        (Some(prefix), NbfxName::Static(id) | NbfxName::Dynamic(id))
            if prefix_letter_offset(prefix).is_some() =>
        {
            let off = prefix_letter_offset(prefix).unwrap_or(0);
            out.push(0x0C + off);
            encode_multibyte_int31_to_nbfx(out, *id)?;
        }
        // Short-form: single-letter prefix + inline name. Records
        // 0x26..0x3F (PrefixAttribute_a..z).
        (Some(prefix), NbfxName::Inline(s)) if prefix_letter_offset(prefix).is_some() => {
            let off = prefix_letter_offset(prefix).unwrap_or(0);
            out.push(0x26 + off);
            encode_string(s.as_bytes(), out)?;
        }
        (Some(prefix), NbfxName::Inline(s)) => {
            out.push(REC_ATTRIBUTE);
            encode_string(prefix.as_bytes(), out)?;
            encode_string(s.as_bytes(), out)?;
        }
        (Some(prefix), NbfxName::Static(id) | NbfxName::Dynamic(id)) => {
            out.push(REC_DICT_ATTRIBUTE);
            encode_string(prefix.as_bytes(), out)?;
            encode_multibyte_int31_to_nbfx(out, *id)?;
        }
    }
    encode_text_string_or_dict(value, dynamic, out)
}

/// Encode an attribute value or namespace value. Attribute values use
/// the same text records as element content but are NOT followed by an
/// EndElement; the `with_end_element` bit must be cleared.
fn encode_text_string_or_dict(
    value: &NbfxText,
    _dynamic: &mut DynamicDictionary,
    out: &mut Vec<u8>,
) -> Result<(), NbfxError> {
    encode_text(value, false, out)
}

fn encode_text(text: &NbfxText, with_end: bool, out: &mut Vec<u8>) -> Result<(), NbfxError> {
    let bump = if with_end { 1 } else { 0 };
    match text {
        NbfxText::Empty => out.push(REC_EMPTY_TEXT + bump),
        NbfxText::Zero => out.push(REC_ZERO_TEXT + bump),
        NbfxText::One => out.push(REC_ONE_TEXT + bump),
        NbfxText::Bool(false) => out.push(REC_FALSE_TEXT + bump),
        NbfxText::Bool(true) => out.push(REC_TRUE_TEXT + bump),
        NbfxText::Int8(v) => {
            out.push(REC_INT8_TEXT + bump);
            out.push(*v as u8);
        }
        NbfxText::Int16(v) => {
            out.push(REC_INT16_TEXT + bump);
            out.extend_from_slice(&v.to_le_bytes());
        }
        NbfxText::Int32(v) => {
            out.push(REC_INT32_TEXT + bump);
            out.extend_from_slice(&v.to_le_bytes());
        }
        NbfxText::Int64(v) => {
            out.push(REC_INT64_TEXT + bump);
            out.extend_from_slice(&v.to_le_bytes());
        }
        NbfxText::Chars(s) => {
            let bytes = s.as_bytes();
            let len = bytes.len();
            if len <= u8::MAX as usize {
                out.push(REC_CHARS8_TEXT + bump);
                out.push(len as u8);
            } else if len <= u16::MAX as usize {
                out.push(REC_CHARS16_TEXT + bump);
                out.extend_from_slice(&(len as u16).to_le_bytes());
            } else if len <= u32::MAX as usize {
                out.push(REC_CHARS32_TEXT + bump);
                out.extend_from_slice(&(len as u32).to_le_bytes());
            } else {
                return Err(NbfxError::PayloadTooLarge {
                    len,
                    max: u32::MAX as u64,
                });
            }
            out.extend_from_slice(bytes);
        }
        NbfxText::DictionaryStatic(id) | NbfxText::DictionaryDynamic(id) => {
            out.push(REC_DICTIONARY_TEXT + bump);
            encode_multibyte_int31_to_nbfx(out, *id)?;
        }
        NbfxText::Bytes(bytes) => {
            let len = bytes.len();
            if len <= u8::MAX as usize {
                out.push(REC_BYTES8_TEXT + bump);
                out.push(len as u8);
            } else if len <= u16::MAX as usize {
                out.push(REC_BYTES16_TEXT + bump);
                out.extend_from_slice(&(len as u16).to_le_bytes());
            } else if len <= u32::MAX as usize {
                out.push(REC_BYTES32_TEXT + bump);
                out.extend_from_slice(&(len as u32).to_le_bytes());
            } else {
                return Err(NbfxError::PayloadTooLarge {
                    len,
                    max: u32::MAX as u64,
                });
            }
            out.extend_from_slice(bytes);
        }
        NbfxText::UniqueId(bytes) => {
            out.push(REC_UNIQUE_ID_TEXT + bump);
            out.extend_from_slice(bytes);
        }
    }
    Ok(())
}

fn encode_string(bytes: &[u8], out: &mut Vec<u8>) -> Result<(), NbfxError> {
    let len = i32::try_from(bytes.len()).map_err(|_| NbfxError::PayloadTooLarge {
        len: bytes.len(),
        max: i32::MAX as u64,
    })?;
    encode_multibyte_int31(out, len).map_err(|_| NbfxError::IntOverflow)?;
    out.extend_from_slice(bytes);
    Ok(())
}

fn encode_multibyte_int31_to_nbfx(out: &mut Vec<u8>, value: u32) -> Result<(), NbfxError> {
    let signed = i32::try_from(value).map_err(|_| NbfxError::IntOverflow)?;
    encode_multibyte_int31(out, signed).map_err(|_| NbfxError::IntOverflow)
}

// ---- decoder ------------------------------------------------------------

/// Decode all NBFX tokens from `input`. Returns the token stream plus
/// the number of bytes consumed.
///
/// Threads the dynamic dictionary through; the codec doesn't auto-intern
/// because `[MC-NBFX]` doesn't define a built-in `intern this string`
/// record. Callers that need the dynamic dictionary populated (e.g.
/// matching the WCF behavior of interning element names) intern from
/// the inline-name tokens after the decode.
pub fn decode_tokens(
    input: &[u8],
    _dynamic: &mut DynamicDictionary,
) -> Result<(Vec<NbfxToken>, usize), NbfxError> {
    let mut cursor = 0usize;
    let mut tokens = Vec::new();
    while let Some(&kind) = input.get(cursor) {
        cursor += 1;

        match kind {
            REC_END_ELEMENT => tokens.push(NbfxToken::EndElement),
            REC_SHORT_ELEMENT => {
                let name = decode_string(input, &mut cursor, "short-element")?;
                tokens.push(NbfxToken::Element {
                    prefix: None,
                    name: NbfxName::Inline(name),
                });
            }
            REC_ELEMENT => {
                let prefix = decode_string(input, &mut cursor, "element-prefix")?;
                let name = decode_string(input, &mut cursor, "element-name")?;
                tokens.push(NbfxToken::Element {
                    prefix: Some(prefix),
                    name: NbfxName::Inline(name),
                });
            }
            REC_SHORT_DICT_ELEMENT => {
                let id = decode_int31(input, &mut cursor)?;
                tokens.push(NbfxToken::Element {
                    prefix: None,
                    name: NbfxName::Static(id),
                });
            }
            REC_DICT_ELEMENT => {
                let prefix = decode_string(input, &mut cursor, "dict-element-prefix")?;
                let id = decode_int31(input, &mut cursor)?;
                tokens.push(NbfxToken::Element {
                    prefix: Some(prefix),
                    name: NbfxName::Static(id),
                });
            }
            // PrefixDictionaryElement_a..z: 0x44..0x5D — single-letter
            // prefix + dict-id name. Inverse of the encoder's
            // short-form path above.
            byte if (0x44..=0x5D).contains(&byte) => {
                let prefix_letter = char::from(b'a' + (byte - 0x44));
                let id = decode_int31(input, &mut cursor)?;
                tokens.push(NbfxToken::Element {
                    prefix: Some(prefix_letter.to_string()),
                    name: NbfxName::Static(id),
                });
            }
            // PrefixElement_a..z: 0x5E..0x77 — single-letter prefix +
            // inline element name. WCF emits these on the response side
            // when the element name is not in either dictionary (e.g.
            // dynamically-named DataContract members).
            byte if (0x5E..=0x77).contains(&byte) => {
                let prefix_letter = char::from(b'a' + (byte - 0x5E));
                let name = decode_string(input, &mut cursor, "prefix-element-name")?;
                tokens.push(NbfxToken::Element {
                    prefix: Some(prefix_letter.to_string()),
                    name: NbfxName::Inline(name),
                });
            }
            REC_SHORT_ATTRIBUTE => {
                let name = decode_string(input, &mut cursor, "short-attribute")?;
                let value = decode_text_record(input, &mut cursor)?;
                tokens.push(NbfxToken::Attribute {
                    prefix: None,
                    name: NbfxName::Inline(name),
                    value,
                });
            }
            REC_ATTRIBUTE => {
                let prefix = decode_string(input, &mut cursor, "attribute-prefix")?;
                let name = decode_string(input, &mut cursor, "attribute-name")?;
                let value = decode_text_record(input, &mut cursor)?;
                tokens.push(NbfxToken::Attribute {
                    prefix: Some(prefix),
                    name: NbfxName::Inline(name),
                    value,
                });
            }
            REC_SHORT_DICT_ATTRIBUTE => {
                let id = decode_int31(input, &mut cursor)?;
                let value = decode_text_record(input, &mut cursor)?;
                tokens.push(NbfxToken::Attribute {
                    prefix: None,
                    name: NbfxName::Static(id),
                    value,
                });
            }
            REC_DICT_ATTRIBUTE => {
                let prefix = decode_string(input, &mut cursor, "dict-attribute-prefix")?;
                let id = decode_int31(input, &mut cursor)?;
                let value = decode_text_record(input, &mut cursor)?;
                tokens.push(NbfxToken::Attribute {
                    prefix: Some(prefix),
                    name: NbfxName::Static(id),
                    value,
                });
            }
            // PrefixDictionaryAttribute_a..z: 0x0C..0x25 —
            // single-letter prefix + dict-id name + text-record value.
            byte if (0x0C..=0x25).contains(&byte) => {
                let prefix_letter = char::from(b'a' + (byte - 0x0C));
                let id = decode_int31(input, &mut cursor)?;
                let value = decode_text_record(input, &mut cursor)?;
                tokens.push(NbfxToken::Attribute {
                    prefix: Some(prefix_letter.to_string()),
                    name: NbfxName::Static(id),
                    value,
                });
            }
            // PrefixAttribute_a..z: 0x26..0x3F — single-letter prefix +
            // inline attribute name + text-record value.
            byte if (0x26..=0x3F).contains(&byte) => {
                let prefix_letter = char::from(b'a' + (byte - 0x26));
                let name = decode_string(input, &mut cursor, "prefix-attribute-name")?;
                let value = decode_text_record(input, &mut cursor)?;
                tokens.push(NbfxToken::Attribute {
                    prefix: Some(prefix_letter.to_string()),
                    name: NbfxName::Inline(name),
                    value,
                });
            }
            REC_SHORT_XMLNS_ATTRIBUTE => {
                let value_str = decode_string(input, &mut cursor, "default-xmlns-value")?;
                tokens.push(NbfxToken::DefaultNamespace {
                    value: NbfxText::Chars(value_str),
                });
            }
            REC_XMLNS_ATTRIBUTE => {
                // Per spec, value is a raw length-prefixed string,
                // NOT a text record.
                let prefix = decode_string(input, &mut cursor, "xmlns-prefix")?;
                let value_str = decode_string(input, &mut cursor, "xmlns-value")?;
                tokens.push(NbfxToken::NamespaceDeclaration {
                    prefix,
                    value: NbfxText::Chars(value_str),
                });
            }
            REC_DICT_XMLNS_ATTRIBUTE => {
                let prefix = decode_string(input, &mut cursor, "dict-xmlns-prefix")?;
                let id = decode_int31(input, &mut cursor)?;
                tokens.push(NbfxToken::NamespaceDeclaration {
                    prefix,
                    value: NbfxText::DictionaryStatic(id),
                });
            }
            // 0x0A — no-prefix (default xmlns) variant of 0x0B. Sets
            // the default namespace to a dict-resolved string. WCF
            // emits this on the response side when the default ns is
            // a well-known string (e.g. urn:invensys.schemas).
            REC_SHORT_DICT_XMLNS_ATTRIBUTE => {
                let id = decode_int31(input, &mut cursor)?;
                tokens.push(NbfxToken::DefaultNamespace {
                    value: NbfxText::DictionaryStatic(id),
                });
            }
            // Text records — directly produce a Text token, plus an
            // implicit EndElement when the `*WithEndElement` variant was
            // used (record byte LSB = 1).
            byte if (REC_ZERO_TEXT..=0xBF).contains(&byte) => {
                let with_end = byte & 0x01 != 0;
                let base = byte & !0x01;
                let text = decode_text_body(input, &mut cursor, base)?;
                tokens.push(NbfxToken::Text(text));
                if with_end {
                    tokens.push(NbfxToken::EndElement);
                }
            }
            other => return Err(NbfxError::UnknownRecord(other)),
        }
    }
    Ok((tokens, cursor))
}

fn decode_text_record(input: &[u8], cursor: &mut usize) -> Result<NbfxText, NbfxError> {
    let byte = *input.get(*cursor).ok_or(NbfxError::Truncated {
        need: 1,
        have: 0,
        stage: "text-record-byte",
    })?;
    *cursor += 1;
    let base = byte & !0x01;
    decode_text_body(input, cursor, base)
}

fn decode_text_body(input: &[u8], cursor: &mut usize, base: u8) -> Result<NbfxText, NbfxError> {
    Ok(match base {
        REC_ZERO_TEXT => NbfxText::Zero,
        REC_ONE_TEXT => NbfxText::One,
        REC_FALSE_TEXT => NbfxText::Bool(false),
        REC_TRUE_TEXT => NbfxText::Bool(true),
        REC_INT8_TEXT => {
            let b = *input.get(*cursor).ok_or(NmfTrunc("int8-text"))?;
            *cursor += 1;
            NbfxText::Int8(b as i8)
        }
        REC_INT16_TEXT => {
            let v = read_le::<2>(input, cursor, "int16-text")?;
            NbfxText::Int16(i16::from_le_bytes(v))
        }
        REC_INT32_TEXT => {
            let v = read_le::<4>(input, cursor, "int32-text")?;
            NbfxText::Int32(i32::from_le_bytes(v))
        }
        REC_INT64_TEXT => {
            let v = read_le::<8>(input, cursor, "int64-text")?;
            NbfxText::Int64(i64::from_le_bytes(v))
        }
        REC_CHARS8_TEXT => {
            let len = *input.get(*cursor).ok_or(NmfTrunc("chars8-len"))? as usize;
            *cursor += 1;
            NbfxText::Chars(read_utf8(input, cursor, len, "chars8")?)
        }
        REC_CHARS16_TEXT => {
            let len_bytes = read_le::<2>(input, cursor, "chars16-len")?;
            let len = u16::from_le_bytes(len_bytes) as usize;
            NbfxText::Chars(read_utf8(input, cursor, len, "chars16")?)
        }
        REC_CHARS32_TEXT => {
            let len_bytes = read_le::<4>(input, cursor, "chars32-len")?;
            let len = u32::from_le_bytes(len_bytes) as usize;
            NbfxText::Chars(read_utf8(input, cursor, len, "chars32")?)
        }
        REC_EMPTY_TEXT => NbfxText::Empty,
        REC_DICTIONARY_TEXT => NbfxText::DictionaryStatic(decode_int31(input, cursor)?),
        REC_UNIQUE_ID_TEXT => {
            let bytes = read_le::<16>(input, cursor, "unique-id-text")?;
            NbfxText::UniqueId(bytes)
        }
        REC_BOOL_TEXT => {
            let b = *input.get(*cursor).ok_or(NmfTrunc("bool-text"))?;
            *cursor += 1;
            NbfxText::Bool(b != 0)
        }
        REC_BYTES8_TEXT => {
            let len = *input.get(*cursor).ok_or(NmfTrunc("bytes8-len"))? as usize;
            *cursor += 1;
            NbfxText::Bytes(read_bytes(input, cursor, len, "bytes8")?)
        }
        REC_BYTES16_TEXT => {
            let len_bytes = read_le::<2>(input, cursor, "bytes16-len")?;
            let len = u16::from_le_bytes(len_bytes) as usize;
            NbfxText::Bytes(read_bytes(input, cursor, len, "bytes16")?)
        }
        REC_BYTES32_TEXT => {
            let len_bytes = read_le::<4>(input, cursor, "bytes32-len")?;
            let len = u32::from_le_bytes(len_bytes) as usize;
            NbfxText::Bytes(read_bytes(input, cursor, len, "bytes32")?)
        }
        other => return Err(NbfxError::UnknownRecord(other)),
    })
}

#[allow(non_snake_case)]
fn NmfTrunc(stage: &'static str) -> NbfxError {
    NbfxError::Truncated {
        need: 1,
        have: 0,
        stage,
    }
}

fn read_le<const N: usize>(
    input: &[u8],
    cursor: &mut usize,
    stage: &'static str,
) -> Result<[u8; N], NbfxError> {
    let slice = input
        .get(*cursor..*cursor + N)
        .ok_or(NbfxError::Truncated {
            need: N,
            have: input.len().saturating_sub(*cursor),
            stage,
        })?;
    let mut out = [0u8; N];
    out.copy_from_slice(slice);
    *cursor += N;
    Ok(out)
}

fn read_utf8(
    input: &[u8],
    cursor: &mut usize,
    len: usize,
    stage: &'static str,
) -> Result<String, NbfxError> {
    let raw = read_bytes(input, cursor, len, stage)?;
    String::from_utf8(raw).map_err(|_| NbfxError::InvalidUtf8 { stage })
}

fn read_bytes(
    input: &[u8],
    cursor: &mut usize,
    len: usize,
    stage: &'static str,
) -> Result<Vec<u8>, NbfxError> {
    let slice = input
        .get(*cursor..*cursor + len)
        .ok_or(NbfxError::Truncated {
            need: len,
            have: input.len().saturating_sub(*cursor),
            stage,
        })?;
    let out = slice.to_vec();
    *cursor += len;
    Ok(out)
}

fn decode_string(
    input: &[u8],
    cursor: &mut usize,
    stage: &'static str,
) -> Result<String, NbfxError> {
    let len_i = decode_multibyte_int31(input, cursor).map_err(|_| NbfxError::IntOverflow)?;
    let len = usize::try_from(len_i).map_err(|_| NbfxError::NegativeLength(len_i))?;
    read_utf8(input, cursor, len, stage)
}

fn decode_int31(input: &[u8], cursor: &mut usize) -> Result<u32, NbfxError> {
    let signed = decode_multibyte_int31(input, cursor).map_err(|_| NbfxError::IntOverflow)?;
    u32::try_from(signed).map_err(|_| NbfxError::NegativeLength(signed))
}

#[cfg(test)]
#[allow(
    clippy::unwrap_used,
    clippy::expect_used,
    clippy::panic,
    clippy::indexing_slicing
)]
mod tests {
    use super::*;

    fn round_trip(tokens: Vec<NbfxToken>) {
        let mut dyn_w = DynamicDictionary::new();
        let mut bytes = Vec::new();
        encode_tokens(&tokens, &mut dyn_w, &mut bytes).unwrap();
        let mut dyn_r = DynamicDictionary::new();
        let (decoded, consumed) = decode_tokens(&bytes, &mut dyn_r).unwrap();
        assert_eq!(consumed, bytes.len(), "decode left bytes");
        assert_eq!(decoded, tokens);
    }

    #[test]
    fn dynamic_dictionary_interns_idempotently() {
        let mut d = DynamicDictionary::new();
        assert_eq!(d.intern("a"), 0);
        assert_eq!(d.intern("b"), 1);
        assert_eq!(d.intern("a"), 0);
        assert_eq!(d.lookup(0), Some("a"));
        assert_eq!(d.lookup(1), Some("b"));
        assert_eq!(d.lookup(2), None);
        assert_eq!(d.position_of("a"), Some(0));
        assert_eq!(d.position_of("missing"), None);
        assert_eq!(d.len(), 2);
    }

    #[test]
    fn short_element_round_trip_with_end() {
        round_trip(vec![
            NbfxToken::Element {
                prefix: None,
                name: NbfxName::Inline("Body".to_string()),
            },
            NbfxToken::EndElement,
        ]);
    }

    #[test]
    fn long_element_with_prefix_round_trip() {
        round_trip(vec![
            NbfxToken::Element {
                prefix: Some("a".to_string()),
                name: NbfxName::Inline("Action".to_string()),
            },
            NbfxToken::EndElement,
        ]);
    }

    #[test]
    fn dict_element_round_trip() {
        round_trip(vec![
            NbfxToken::Element {
                prefix: None,
                name: NbfxName::Static(2),
            }, // "Envelope"
            NbfxToken::EndElement,
        ]);
    }

    #[test]
    fn attribute_round_trip_inline_name() {
        round_trip(vec![
            NbfxToken::Element {
                prefix: None,
                name: NbfxName::Inline("e".to_string()),
            },
            NbfxToken::Attribute {
                prefix: None,
                name: NbfxName::Inline("attr".to_string()),
                value: NbfxText::Chars("value".to_string()),
            },
            NbfxToken::EndElement,
        ]);
    }

    #[test]
    fn attribute_round_trip_dict_name() {
        round_trip(vec![
            NbfxToken::Element {
                prefix: None,
                name: NbfxName::Inline("e".to_string()),
            },
            NbfxToken::Attribute {
                prefix: Some("a".to_string()),
                name: NbfxName::Static(10), // "Action"
                value: NbfxText::Chars("doSomething".to_string()),
            },
            NbfxToken::EndElement,
        ]);
    }

    #[test]
    fn xmlns_default_round_trip() {
        round_trip(vec![
            NbfxToken::Element {
                prefix: None,
                name: NbfxName::Inline("e".to_string()),
            },
            NbfxToken::DefaultNamespace {
                value: NbfxText::Chars("urn:test".to_string()),
            },
            NbfxToken::EndElement,
        ]);
    }

    #[test]
    fn xmlns_prefix_round_trip() {
        round_trip(vec![
            NbfxToken::Element {
                prefix: None,
                name: NbfxName::Inline("e".to_string()),
            },
            NbfxToken::NamespaceDeclaration {
                prefix: "a".to_string(),
                value: NbfxText::DictionaryStatic(6), // WS-Addressing
            },
            NbfxToken::EndElement,
        ]);
    }

    #[test]
    fn text_records_round_trip_and_collapse_with_end_element() {
        // The encoder collapses Text + EndElement into the
        // *WithEndElement variant; the decoder splits them back out.
        for text in [
            NbfxText::Empty,
            NbfxText::Zero,
            NbfxText::One,
            NbfxText::Bool(true),
            NbfxText::Bool(false),
            NbfxText::Int8(-1),
            NbfxText::Int16(-12345),
            NbfxText::Int32(0xDEAD_BEEFu32 as i32),
            NbfxText::Int64(i64::MIN),
            NbfxText::Chars("hello".to_string()),
            NbfxText::Chars("a".repeat(300)), // forces Chars16
            NbfxText::DictionaryStatic(2),
        ] {
            round_trip(vec![
                NbfxToken::Element {
                    prefix: None,
                    name: NbfxName::Inline("e".to_string()),
                },
                NbfxToken::Text(text),
                NbfxToken::EndElement,
            ]);
        }
    }

    #[test]
    fn bytes_records_round_trip_all_widths() {
        for payload in [
            vec![],
            vec![0xAB; 5],
            vec![0xCD; 300],    // forces Bytes16
            vec![0xEF; 70_000], // forces Bytes32
        ] {
            round_trip(vec![
                NbfxToken::Element {
                    prefix: None,
                    name: NbfxName::Inline("e".to_string()),
                },
                NbfxToken::Text(NbfxText::Bytes(payload)),
                NbfxToken::EndElement,
            ]);
        }
    }

    #[test]
    fn chars32_handled_for_payloads_above_u16_max() {
        let big = "x".repeat(70_000);
        round_trip(vec![
            NbfxToken::Element {
                prefix: None,
                name: NbfxName::Inline("e".to_string()),
            },
            NbfxToken::Text(NbfxText::Chars(big)),
            NbfxToken::EndElement,
        ]);
    }

    #[test]
    fn collapse_emits_with_end_record_byte() {
        // Verify that the *WithEndElement variant is actually used on
        // the wire when text precedes EndElement.
        let mut bytes = Vec::new();
        let mut d = DynamicDictionary::new();
        encode_tokens(
            &[
                NbfxToken::Element {
                    prefix: None,
                    name: NbfxName::Inline("e".to_string()),
                },
                NbfxToken::Text(NbfxText::Bool(true)),
                NbfxToken::EndElement,
            ],
            &mut d,
            &mut bytes,
        )
        .unwrap();
        // Tail bytes: TrueTextWithEndElement = 0x87
        assert_eq!(*bytes.last().unwrap(), 0x87);
    }

    #[test]
    fn empty_text_with_end_element_is_one_byte() {
        let mut bytes = Vec::new();
        let mut d = DynamicDictionary::new();
        encode_tokens(
            &[
                NbfxToken::Element {
                    prefix: None,
                    name: NbfxName::Inline("e".to_string()),
                },
                NbfxToken::Text(NbfxText::Empty),
                NbfxToken::EndElement,
            ],
            &mut d,
            &mut bytes,
        )
        .unwrap();
        // Last byte = EmptyTextWithEndElement = 0xA9
        assert_eq!(*bytes.last().unwrap(), 0xA9);
    }

    #[test]
    fn unknown_record_byte_rejected() {
        let bytes = vec![0xFFu8];
        let mut d = DynamicDictionary::new();
        let err = decode_tokens(&bytes, &mut d).unwrap_err();
        assert!(matches!(err, NbfxError::UnknownRecord(0xFF)));
    }

    #[test]
    fn truncated_chars_record_rejected() {
        // Chars8: byte 0x98, length 5, but only 2 payload bytes.
        let bytes = vec![REC_CHARS8_TEXT, 5, b'a', b'b'];
        let mut d = DynamicDictionary::new();
        // The decoder has to be inside an element to make a Text token
        // useful, but it doesn't reject text-without-element — it just
        // surfaces the truncation.
        let err = decode_tokens(&bytes, &mut d).unwrap_err();
        assert!(matches!(
            err,
            NbfxError::Truncated {
                stage: "chars8",
                ..
            }
        ));
    }

    #[test]
    fn nbfx_text_resolve_uses_dictionaries() {
        let dynamic = DynamicDictionary::new();
        assert_eq!(NbfxText::Empty.resolve(&dynamic).as_deref(), Some(""));
        assert_eq!(NbfxText::Zero.resolve(&dynamic).as_deref(), Some("0"));
        assert_eq!(
            NbfxText::Bool(true).resolve(&dynamic).as_deref(),
            Some("true")
        );
        assert_eq!(NbfxText::Int32(42).resolve(&dynamic).as_deref(), Some("42"));
        assert_eq!(
            NbfxText::DictionaryStatic(2).resolve(&dynamic).as_deref(),
            Some("Envelope")
        );
        assert_eq!(NbfxText::DictionaryStatic(99_999).resolve(&dynamic), None);
    }
}