From d03bd04ef5b0e8b266a2e08b5908dcaccc38539a Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Wed, 6 May 2026 03:05:20 -0400 Subject: [PATCH] [F34 evidence] dump WCF binary-header dictionary for AddMonitoredItems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends tests/add_monitored_items_request_capture.rs with a manual binary-header walk that prints every pre-interned string + its wire id. The captured request's binary header pre-declares **23 strings** covering the entire DataContract field set: wire-id 1 http://ASB.IDataV2:addMonitoredItemsIn wire-id 3 AddMonitoredItemsRequest wire-id 5 SubscriptionId wire-id 7 Items wire-id 9 http://schemas.datacontract.org/.../ASBIDataV2Contract wire-id 11 MonitoredItem wire-id 13 activeField wire-id 15 activeFieldSpecified wire-id 17 bufferedField wire-id 19 itemField wire-id 21 contextNameField wire-id 23 idField wire-id 25 idFieldSpecified wire-id 27 nameField wire-id 29 referenceTypeField wire-id 31 typeField wire-id 33 sampleIntervalField wire-id 35 timeDeadbandField wire-id 37 timeDeadbandFieldSpecified wire-id 39 userDataField wire-id 41 lengthField wire-id 43 payloadField wire-id 45 valueDeadbandField That gives F34's binary-builder rewrite the exact dict-id mapping to target — every MonitoredItem child can be emitted as a DictionaryStatic(odd-id) reference instead of an inline string, matching WCF's compression. The "RequireId" mystery from the earlier inline-name decode is also resolved: the wire body has NO `RequireId` element at the bottom — the trailing `Inline("referenceTypeField")` was a dict-id wraparound or auto-intern artifact, not actual content. design/followups.md F34 updated with the full ground-truth header, plus a refined "Resolves when" pointing at the underlying `nbfx.rs::decode_tokens` auto-intern semantics. The current codec's doc comment ("the codec doesn't auto-intern") is correct for raw [MC-NBFX] but wrong for WCF binary messages where the writer auto-interns by convention; that's the structural fix the F34 binary rewrite depends on. No code-path change in this commit beyond the test improvements. Co-Authored-By: Claude Opus 4.7 (1M context) --- design/followups.md | 32 ++++++++++++++++++- .../add_monitored_items_request_capture.rs | 23 +++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/design/followups.md b/design/followups.md index 33f50a1..e927e7b 100644 --- a/design/followups.md +++ b/design/followups.md @@ -90,9 +90,39 @@ For the per-step body of every line listed in the cumulative execution log, see For ops where the body is purely `IAsbCustomSerializableType` arrays (Read, Register, Unregister), no DataContract names appear — every payload is wrapped as `{bytes}` (binary fast-path) and our builders are correct. The DataContract schema only matters for ops carrying non-`IAsbCustomSerializable` types like `MonitoredItem` and `WriteValue`. +**Captured ground-truth dictionary (from `tests/fixtures/add-monitored-items-request-wire.bin` binary header at `tests/add_monitored_items_request_capture.rs`).** The .NET WCF binary writer pre-declares **23 strings** in the session dynamic dictionary at the start of each request, mapping wire id → string: + +``` +header[ 0] (wire-id 1) = "http://ASB.IDataV2:addMonitoredItemsIn" +header[ 1] (wire-id 3) = "AddMonitoredItemsRequest" +header[ 2] (wire-id 5) = "SubscriptionId" +header[ 3] (wire-id 7) = "Items" +header[ 4] (wire-id 9) = "http://schemas.datacontract.org/2004/07/ArchestrAServices.ASBIDataV2Contract" +header[ 5] (wire-id 11) = "MonitoredItem" +header[ 6] (wire-id 13) = "activeField" +header[ 7] (wire-id 15) = "activeFieldSpecified" +header[ 8] (wire-id 17) = "bufferedField" +header[ 9] (wire-id 19) = "itemField" +header[10] (wire-id 21) = "contextNameField" +header[11] (wire-id 23) = "idField" +header[12] (wire-id 25) = "idFieldSpecified" +header[13] (wire-id 27) = "nameField" +header[14] (wire-id 29) = "referenceTypeField" +header[15] (wire-id 31) = "typeField" +header[16] (wire-id 33) = "sampleIntervalField" +header[17] (wire-id 35) = "timeDeadbandField" +header[18] (wire-id 37) = "timeDeadbandFieldSpecified" +header[19] (wire-id 39) = "userDataField" +header[20] (wire-id 41) = "lengthField" +header[21] (wire-id 43) = "payloadField" +header[22] (wire-id 45) = "valueDeadbandField" +``` + +That's **the entire DataContract field name set** plus the wrapper / array / namespace / action strings. The body then references these by wire id throughout — no inline strings needed for any of the field names. The `nameField` slot 13 (wire id 27) etc. are exactly what I'd misidentified as resolved namespace URLs in my earlier `decode_envelope` trace; the wire id resolution is actually working — it's just that the body's xmlns slots reference dict ids whose resolution lands on a string our decoder doesn't expect there. Both observations are consistent: WCF reuses the same dynamic dictionary for both element names AND namespace declarations. + **Resolves when:** Two prerequisites: -1. **F30 dynamic-dict resolution bug** — captured `tests/fixtures/add-monitored-items-request-wire.bin` (the .NET probe's verbatim 695-byte AddMonitoredItems request via `examples/asb-relay.rs`), decoded via `decode_envelope` at `tests/add_monitored_items_request_capture.rs`. The trace shows `DefaultNamespace { value: Chars("nameField") }` and `NamespaceDeclaration { prefix: "i", value: Chars("activeField") }` — namespace URL slots resolved to field-name strings, plus most element names left as `Static(NN)` instead of resolving to inline names. The F30 cumulative dynamic-dict post-pass at `envelope.rs::resolve_dict_names_in_tokens` mis-maps per-session dynamic dict ids; the fix needs reproducing exactly which dict each id refers to (per-message header vs cumulative dynamic vs `[MC-NBFS]` static) and resolving in the right order. +1. **F30 dynamic-dict resolution + body-dict accounting** — `decode_envelope::resolve_dict_names_in_tokens` resolves dict-id-named elements correctly per the captured header; what's missing is **interpretation of which records auto-intern new strings into the dict** as the body decodes. WCF's binary writer (`XmlBinaryWriterSession.cs` in `dotnet/wcf`) auto-interns inline element/attribute names — the dynamic dict grows as the message decodes. For decoder/encoder parity we need the same auto-intern behaviour in `nbfx.rs::decode_tokens` and `encode_tokens`. The current codec leaves `_dynamic` parameter unused (intentional per its doc comment, "the codec doesn't auto-intern because `[MC-NBFX]` doesn't define a built-in `intern this string` record") — but that comment is wrong for WCF binary messages, where the writer DOES intern by convention. Fix: rewrite both halves to auto-intern inline names and to refer back to the dict on subsequent inline-or-dict choices. 2. **Builder rewrite** — once (1) lands and we can read the captured request structurally, rewrite `build_add_monitored_items_request_body` and `build_delete_monitored_items_request_body` to emit each `MonitoredItem` child as the DataContract field-suffix names (`activeField` / `activeFieldSpecified` / `bufferedField` / `itemField` / `sampleIntervalField` / `timeDeadbandField` / `timeDeadbandFieldSpecified` / `userDataField` / `valueDeadbandField`) under a `b` namespace prefix that maps to `http://schemas.datacontract.org/2004/07/ArchestrAServices.ASBIDataV2Contract`. The nested `` carries an ItemIdentity serialized via DataContract (NOT the binary `` fast-path — that only kicks in at the outer body-member level) with children `contextNameField` / `idField` / `idFieldSpecified` / `nameField` / `referenceTypeField` / `typeField` under a different `b` prefix mapping to `http://schemas.datacontract.org/2004/07/ArchestrAServices.ASBContract`. The Variant fields (`userDataField` / `valueDeadbandField`) carry `lengthField` / `payloadField` / `typeField` children. Same fix likely applies to `WriteBasicRequest`'s `WriteValue[]? Values` field (also non-`IAsbCustomSerializable`); needs its own capture-and-verify pass. The dictionary-id pre-population that .NET's WCF binary writer uses is a perf optimisation; an inline-string emit will work for correctness once the structure is right. diff --git a/rust/crates/mxaccess-asb/tests/add_monitored_items_request_capture.rs b/rust/crates/mxaccess-asb/tests/add_monitored_items_request_capture.rs index 165b92e..988f202 100644 --- a/rust/crates/mxaccess-asb/tests/add_monitored_items_request_capture.rs +++ b/rust/crates/mxaccess-asb/tests/add_monitored_items_request_capture.rs @@ -38,6 +38,29 @@ fn add_monitored_items_request_capture_decoder_trace() { let envelope = &raw[3..]; assert_eq!(envelope.len(), 692); + // Manually walk the leading WCF binary header (length-prefixed + // string list) so we can dump every interned string + its wire + // id. Mirrors what `decode_envelope::parse_binary_header_prefix` + // does internally; reproducing it inline so the test sees the + // raw strings. + use mxaccess_asb_nettcp::nmf::decode_multibyte_int31; + let mut cursor = 0usize; + let outer_len = decode_multibyte_int31(envelope, &mut cursor).expect("outer-len varint"); + eprintln!("=== binary-header outer length: {outer_len} ==="); + let header_start = cursor; + let header_end = header_start + outer_len as usize; + let mut p = header_start; + let mut idx = 0usize; + while p < header_end { + let len = decode_multibyte_int31(envelope, &mut p).expect("string-len varint"); + let bytes = &envelope[p..p + len as usize]; + let s = std::str::from_utf8(bytes).expect("utf-8 header string"); + let wire_id = (idx as u32) * 2 + 1; + eprintln!(" header[{idx}] (wire-id {wire_id}) = {s:?}"); + p += len as usize; + idx += 1; + } + let mut dict = DynamicDictionary::new(); let decoded = decode_envelope(envelope, &mut dict).expect("decode_envelope succeeds"); eprintln!("=== body tokens ({} total) ===", decoded.body_tokens.len());