From c2222b16b0ce6103787f207cf7ecee534cbd1d9a Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Tue, 5 May 2026 15:48:03 -0400 Subject: [PATCH] [M5] mxaccess-asb-nettcp/asb: F21 short forms + EndElement fix + UniqueIdText MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three NBFX-spec corrections discovered by diffing our wire output against the .NET probe's capture: 1. **EndElement is 0x01, NOT 0x00**. Our F21 had this wrong since the first iteration. Our round-trip tests passed because encode and decode used the same wrong value, but interop with WCF's parser silently failed (TCP RST on every request). Fixed by changing `REC_END_ELEMENT` to 0x01 — all 702 tests pass on the new value. 2. **Single-letter prefix short forms**. WCF uses `PrefixDictionaryElement_` (records 0x44-0x5D) and `PrefixDictionaryAttribute_` (records 0x0C-0x25) for single-character prefixes. Our F21 always used the long forms (0x43 prefix-string + dict-id, etc.). The encoder now emits the short form when the prefix is a single ASCII lowercase letter; the decoder accepts both. New `prefix_letter_offset(prefix)` helper. 3. **`DictionaryXmlnsAttribute` (0x0B)** for xmlns:prefix declarations whose value is a static-dict id. The long form (0x09 + prefix-string + text-record) is still emitted when the value is an inline string, but for `xmlns:s="...soap-envelope"` (dict id 4) we now emit the short `0b 01 73 04` form WCF uses. 4. **UniqueIdText (0xAC)** added to `NbfxText` enum + encode/decode. WCF emits `` as a UniqueIdText carrying the 16 raw UUID bytes (NOT the `urn:uuid:...` text form). Updated `encode_envelope` to use this for MessageID. Combined wire-byte impact: our envelope body section now matches the .NET probe byte-for-byte through ``, ``, `` (UniqueId), ``, ``, and ``. The trailing `01 01 01 01` = 4 EndElements is now the correct record byte. Tests pass (702 total). Live status: still TCP RST after the SizedEnvelope. Remaining unknown is in the body section — the .NET capture shows xmlns:xsi / xmlns:xsd declarations on the operation-specific request element (ConnectRequest etc.) that we don't emit, plus possibly different field encoding inside ConnectRequest. Next iteration will re-capture through the relay and diff our body bytes against the new .NET-byte-equivalent we now produce. Co-Authored-By: Claude Opus 4.7 (1M context) --- rust/crates/mxaccess-asb-nettcp/src/nbfx.rs | 123 +++++++++++++++++++- rust/crates/mxaccess-asb/src/envelope.rs | 22 ++-- 2 files changed, 134 insertions(+), 11 deletions(-) diff --git a/rust/crates/mxaccess-asb-nettcp/src/nbfx.rs b/rust/crates/mxaccess-asb-nettcp/src/nbfx.rs index d65df90..2efd3a2 100644 --- a/rust/crates/mxaccess-asb-nettcp/src/nbfx.rs +++ b/rust/crates/mxaccess-asb-nettcp/src/nbfx.rs @@ -172,6 +172,10 @@ pub enum NbfxText { /// by `XmlDictionaryWriter.WriteBase64` for the `ASBIData` /// content of `IAsbCustomSerializableType`-decorated fields. Bytes(Vec), + /// 16-byte UUID (record `0xAC` UniqueIdText). WCF emits `` + /// values via this record, with the 16 raw UUID bytes (NOT the + /// `urn:uuid:...` text form). + UniqueId([u8; 16]), } impl NbfxText { @@ -195,10 +199,27 @@ impl NbfxText { // `XmlDictionaryReader.ReadElementContentAsBase64` returns // them as `byte[]`. Consumers should match on the variant. Self::Bytes(_) => None, + // UniqueId surfaces as the .NET `Guid.ToString("D")` form + // (mixed-endian per [MS-DTYP]). Used for ``. + Self::UniqueId(bytes) => Some(format_uuid_dotnet_style(bytes)), } } } +/// Format a 16-byte UUID using .NET's `Guid.ToString("D")` mixed-endian +/// convention (first 4 bytes little-endian, next 2x2 little-endian, +/// last 2+6 big-endian). This is the same format `` uses +/// when emitted as text. +fn format_uuid_dotnet_style(bytes: &[u8; 16]) -> String { + let d1 = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); + let d2 = u16::from_le_bytes([bytes[4], bytes[5]]); + let d3 = u16::from_le_bytes([bytes[6], bytes[7]]); + format!( + "{d1:08x}-{d2:04x}-{d3:04x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}", + bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15] + ) +} + #[derive(Debug, Error)] #[non_exhaustive] pub enum NbfxError { @@ -230,7 +251,12 @@ pub enum NbfxError { // are the `*WithEndElement` variants whose byte is the base record's // byte + 1. -const REC_END_ELEMENT: u8 = 0x00; +/// `[MC-NBFX]` §2.2.3 EndElementRecord. Spec value is **0x01** (NOT +/// 0x00 — verified against .NET probe wire capture). Earlier +/// iterations had this wrong; round-trip tests passed because encode +/// and decode used the same wrong value, but interop with WCF parsers +/// failed silently (TCP RST on every envelope). +const REC_END_ELEMENT: u8 = 0x01; const REC_SHORT_ATTRIBUTE: u8 = 0x04; const REC_ATTRIBUTE: u8 = 0x05; const REC_SHORT_DICT_ATTRIBUTE: u8 = 0x06; @@ -265,6 +291,7 @@ const REC_BYTES8_TEXT: u8 = 0x9E; const REC_BYTES16_TEXT: u8 = 0xA0; const REC_BYTES32_TEXT: u8 = 0xA2; const REC_DICTIONARY_TEXT: u8 = 0xAA; +const REC_UNIQUE_ID_TEXT: u8 = 0xAC; const REC_BOOL_TEXT: u8 = 0xB4; // ---- encoder ------------------------------------------------------------ @@ -317,14 +344,43 @@ fn encode_one( encode_text_string_or_dict(value, dynamic, out) } NbfxToken::NamespaceDeclaration { prefix, value } => { - out.push(REC_XMLNS_ATTRIBUTE); - encode_string(prefix.as_bytes(), out)?; - encode_text_string_or_dict(value, dynamic, out) + // WCF emits `DictionaryXmlnsAttribute` (0x0B) when the + // value is a static-dictionary id, and `XmlnsAttribute` + // (0x09) when it's an inline string. Stricter parsers + // reject the long form when a dict-id is available. + if let NbfxText::DictionaryStatic(id) = value { + out.push(REC_DICT_XMLNS_ATTRIBUTE); + encode_string(prefix.as_bytes(), out)?; + encode_multibyte_int31_to_nbfx(out, *id)?; + Ok(()) + } else { + out.push(REC_XMLNS_ATTRIBUTE); + encode_string(prefix.as_bytes(), out)?; + encode_text_string_or_dict(value, dynamic, out) + } } NbfxToken::Text(text) => encode_text(text, with_end, out), } } +/// If `prefix` is a single lowercase ASCII letter (a-z), return its +/// alphabet offset (0..26). Otherwise return `None`. WCF emits +/// short-form prefix-letter records (PrefixDictionaryElement_a..z = +/// 0x44..0x5D, etc.) for these prefixes, and stricter parsers may +/// reject the long forms when a short form would suffice. +fn prefix_letter_offset(prefix: &str) -> Option { + let mut chars = prefix.chars(); + let c = chars.next()?; + if chars.next().is_some() { + return None; + } + if c.is_ascii_lowercase() { + Some(c as u8 - b'a') + } else { + None + } +} + fn encode_element( prefix: Option<&str>, name: &NbfxName, @@ -339,6 +395,17 @@ fn encode_element( out.push(REC_SHORT_DICT_ELEMENT); encode_multibyte_int31_to_nbfx(out, *id) } + // Short-form: single-letter prefix + dict-id name. Records + // 0x44..0x5D (PrefixDictionaryElement_a..z). + (Some(prefix), NbfxName::Static(id) | NbfxName::Dynamic(id)) + if prefix_letter_offset(prefix).is_some() => + { + // SAFETY: is_some check above; unwrap_or here keeps clippy + // happy without a panic on the unreachable None branch. + let off = prefix_letter_offset(prefix).unwrap_or(0); + out.push(0x44 + off); + encode_multibyte_int31_to_nbfx(out, *id) + } (Some(prefix), NbfxName::Inline(s)) => { out.push(REC_ELEMENT); encode_string(prefix.as_bytes(), out)?; @@ -368,6 +435,15 @@ fn encode_attribute( out.push(REC_SHORT_DICT_ATTRIBUTE); encode_multibyte_int31_to_nbfx(out, *id)?; } + // Short-form: single-letter prefix + dict-id name. Records + // 0x0C..0x25 (PrefixDictionaryAttribute_a..z). + (Some(prefix), NbfxName::Static(id) | NbfxName::Dynamic(id)) + if prefix_letter_offset(prefix).is_some() => + { + let off = prefix_letter_offset(prefix).unwrap_or(0); + out.push(0x0C + off); + encode_multibyte_int31_to_nbfx(out, *id)?; + } (Some(prefix), NbfxName::Inline(s)) => { out.push(REC_ATTRIBUTE); encode_string(prefix.as_bytes(), out)?; @@ -460,6 +536,10 @@ fn encode_text(text: &NbfxText, with_end: bool, out: &mut Vec) -> Result<(), } out.extend_from_slice(bytes); } + NbfxText::UniqueId(bytes) => { + out.push(REC_UNIQUE_ID_TEXT + bump); + out.extend_from_slice(bytes); + } } Ok(()) } @@ -530,6 +610,17 @@ pub fn decode_tokens( name: NbfxName::Static(id), }); } + // PrefixDictionaryElement_a..z: 0x44..0x5D — single-letter + // prefix + dict-id name. Inverse of the encoder's + // short-form path above. + byte if (0x44..=0x5D).contains(&byte) => { + let prefix_letter = char::from(b'a' + (byte - 0x44)); + let id = decode_int31(input, &mut cursor)?; + tokens.push(NbfxToken::Element { + prefix: Some(prefix_letter.to_string()), + name: NbfxName::Static(id), + }); + } REC_SHORT_ATTRIBUTE => { let name = decode_string(input, &mut cursor, "short-attribute")?; let value = decode_text_record(input, &mut cursor)?; @@ -568,6 +659,18 @@ pub fn decode_tokens( value, }); } + // PrefixDictionaryAttribute_a..z: 0x0C..0x25 — + // single-letter prefix + dict-id name + text-record value. + byte if (0x0C..=0x25).contains(&byte) => { + let prefix_letter = char::from(b'a' + (byte - 0x0C)); + let id = decode_int31(input, &mut cursor)?; + let value = decode_text_record(input, &mut cursor)?; + tokens.push(NbfxToken::Attribute { + prefix: Some(prefix_letter.to_string()), + name: NbfxName::Static(id), + value, + }); + } REC_SHORT_XMLNS_ATTRIBUTE => { let value = decode_text_record(input, &mut cursor)?; tokens.push(NbfxToken::DefaultNamespace { value }); @@ -577,6 +680,14 @@ pub fn decode_tokens( let value = decode_text_record(input, &mut cursor)?; tokens.push(NbfxToken::NamespaceDeclaration { prefix, value }); } + REC_DICT_XMLNS_ATTRIBUTE => { + let prefix = decode_string(input, &mut cursor, "dict-xmlns-prefix")?; + let id = decode_int31(input, &mut cursor)?; + tokens.push(NbfxToken::NamespaceDeclaration { + prefix, + value: NbfxText::DictionaryStatic(id), + }); + } // Text records — directly produce a Text token, plus an // implicit EndElement when the `*WithEndElement` variant was // used (record byte LSB = 1). @@ -646,6 +757,10 @@ fn decode_text_body(input: &[u8], cursor: &mut usize, base: u8) -> Result NbfxText::Empty, REC_DICTIONARY_TEXT => NbfxText::DictionaryStatic(decode_int31(input, cursor)?), + REC_UNIQUE_ID_TEXT => { + let bytes = read_le::<16>(input, cursor, "unique-id-text")?; + NbfxText::UniqueId(bytes) + } REC_BOOL_TEXT => { let b = *input.get(*cursor).ok_or(NmfTrunc("bool-text"))?; *cursor += 1; diff --git a/rust/crates/mxaccess-asb/src/envelope.rs b/rust/crates/mxaccess-asb/src/envelope.rs index d940b37..ae54845 100644 --- a/rust/crates/mxaccess-asb/src/envelope.rs +++ b/rust/crates/mxaccess-asb/src/envelope.rs @@ -260,13 +260,16 @@ pub fn encode_envelope( encode_validator(&mut tokens, v, dynamic); } - // urn:uuid:{uuid} - let message_id = format!("urn:uuid:{}", make_random_uuid_v4()); + // {16-byte UUID via UniqueIdText} + // WCF emits MessageID as a UniqueIdText record (0xAC) carrying the + // 16 raw UUID bytes — NOT as Chars text. Verified against .NET + // probe wire capture. + let message_id_bytes = make_random_uuid_v4_bytes(); tokens.push(NbfxToken::Element { prefix: Some("a".to_string()), name: NbfxName::Static(26), }); - tokens.push(NbfxToken::Text(NbfxText::Chars(message_id))); + tokens.push(NbfxToken::Text(NbfxText::UniqueId(message_id_bytes))); tokens.push(NbfxToken::EndElement); // // {anonymous} @@ -587,15 +590,20 @@ fn push_dc_field(out: &mut Vec, name: &str, dc_ns: &str, value: &str) out.push(NbfxToken::EndElement); } -/// Random RFC 4122 v4-shaped UUID (without pulling the `uuid` crate). -/// Used by `encode_envelope` for the `urn:uuid:...` -/// header. The output is a hyphenated lowercase 36-char string. -fn make_random_uuid_v4() -> String { +/// Random RFC 4122 v4 UUID raw bytes. Used by `encode_envelope` for +/// the `` UniqueIdText record (16 raw bytes on the wire). +fn make_random_uuid_v4_bytes() -> [u8; 16] { use rand::RngCore; let mut bytes = [0u8; 16]; rand::thread_rng().fill_bytes(&mut bytes); bytes[6] = (bytes[6] & 0x0F) | 0x40; // version 4 bytes[8] = (bytes[8] & 0x3F) | 0x80; // variant 1 (RFC 4122) + bytes +} + +#[allow(dead_code)] // kept for callers that need the textual form +fn _unused_make_random_uuid_v4() -> String { + let bytes = make_random_uuid_v4_bytes(); format!( "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}", bytes[0],