//! `NmxObservedFrame` — tolerant transfer-envelope + inner-message parser. //! //! Direct port of `src/MxNativeCodec/NmxObservedFrame.cs`. //! //! Where [`crate::NmxTransferEnvelope`] strictly validates the typed fields //! of the 46-byte transfer header, the *observed* envelope path is a //! permissive analyser used by probes and replay: //! //! - Splits a `TransferData`-shaped or `ProcessDataReceived`-shaped buffer //! into a 46-byte header plus an inner body. //! - Surfaces the optional 4-byte length prefix that wraps //! `ProcessDataReceived` bodies on the wire. //! - Parses the inner body's leading `cmd + version` bytes plus, for the //! recognised opcodes `0x1f` and `0x21`, a 16-byte item-correlation GUID. //! - Walks the body looking for runs of printable UTF-16LE strings and //! surfaces them with their offsets. Unknown opcodes round-trip cleanly //! — the parser never rejects them, it just gives them a synthetic //! `Unknown0xNN` name (`NmxObservedFrame.cs:148`). //! //! ## hasDetailStatus audit (Q7 follow-up) //! //! `NmxObservedFrame.cs:122-126` reads `itemCorrelationId` **conditionally**: //! //! ```csharp //! if (command is 0x1f or 0x21 && body.Length >= 19) //! { //! itemCorrelationId = new Guid(body.Slice(3, 16)); //! } //! ``` //! //! That is a `has_*`-style conditional read in the .NET source — it depends //! on both the opcode and the buffer length. **Audit: the Rust port mirrors //! the same conditional exactly** (it MUST stay conditional — making it //! unconditional would either crash on shorter unknown-opcode bodies or //! attach a meaningless GUID to bodies that have no correlation slot). No //! other field in this file is read conditionally. // Direct byte indexing — see reference_handle.rs for rationale. #![allow(clippy::indexing_slicing)] use crate::error::CodecError; /// Header length in bytes (`NmxObservedFrame.cs:14`). pub const HEADER_LENGTH: usize = 46; /// Inner-length field offset in the transfer header /// (`NmxObservedFrame.cs:15`). pub const INNER_LENGTH_OFFSET: usize = 2; /// Tolerant parse of a `TransferData`-style envelope body. Mirrors /// [`NmxObservedEnvelope`] returned by `ParseTransferDataBody` /// (`NmxObservedFrame.cs:17-38`). #[derive(Debug, Clone, PartialEq, Eq)] pub struct NmxObservedEnvelope { /// Whether the body began with a 4-byte total-length prefix /// (set on `ProcessDataReceived` payloads). pub has_length_prefix: bool, /// The captured 4-byte total-length prefix, or `None` if absent. pub total_length_prefix: Option, /// `inner_length` field at offset 2 of the 46-byte header. pub declared_inner_length: i32, /// Actual inner-body length in bytes (`body.len() - 46` after stripping /// any optional length prefix). pub actual_inner_length: usize, /// The captured 46-byte header. pub header: Vec, /// The inner body that follows the header. pub inner_body: Vec, } impl NmxObservedEnvelope { /// Parse a `TransferData` body (no leading 4-byte length prefix). /// Mirrors `ParseTransferDataBody` (`NmxObservedFrame.cs:17-38`). /// /// # Errors /// /// - [`CodecError::ShortRead`] if `body.len() < 46`. /// - [`CodecError::InnerLengthMismatch`] if the declared inner length /// doesn't match the actual inner body length. pub fn parse_transfer_data_body(body: &[u8]) -> Result { if body.len() < HEADER_LENGTH { return Err(CodecError::ShortRead { expected: HEADER_LENGTH, actual: body.len(), }); } let declared_inner_length = read_i32_le(body, INNER_LENGTH_OFFSET); let actual_inner_length = body.len() - HEADER_LENGTH; if declared_inner_length != actual_inner_length as i32 { return Err(CodecError::InnerLengthMismatch { declared: declared_inner_length, actual: actual_inner_length, }); } Ok(Self { has_length_prefix: false, total_length_prefix: None, declared_inner_length, actual_inner_length, header: body[..HEADER_LENGTH].to_vec(), inner_body: body[HEADER_LENGTH..].to_vec(), }) } /// Parse a `ProcessDataReceived` body — strict form with leading /// 4-byte total-length prefix. Mirrors `ParseProcessDataReceivedBody` /// (`NmxObservedFrame.cs:40-69`). /// /// # Errors /// /// - [`CodecError::ShortRead`] if `body.len() < 50`. /// - [`CodecError::InnerLengthMismatch`] if either the total-length /// prefix or the declared inner length doesn't reconcile with the /// buffer size. pub fn parse_process_data_received_body(body: &[u8]) -> Result { if body.len() < 4 + HEADER_LENGTH { return Err(CodecError::ShortRead { expected: 4 + HEADER_LENGTH, actual: body.len(), }); } // `.cs:47` — total length prefix at offset 0. let total_length_prefix = read_i32_le(body, 0); if total_length_prefix as usize != body.len() { return Err(CodecError::InnerLengthMismatch { declared: total_length_prefix, actual: body.len(), }); } let header_offset = 4; // `.cs:54-55` — inner length sits at headerOffset + InnerLengthOffset. let declared_inner_length = read_i32_le(body, header_offset + INNER_LENGTH_OFFSET); // `.cs:56` — actualInnerLength = declared - sizeof(int). let actual_inner_length = declared_inner_length - 4; if actual_inner_length < 0 || header_offset + HEADER_LENGTH + actual_inner_length as usize != body.len() { return Err(CodecError::InnerLengthMismatch { declared: declared_inner_length, actual: body.len() - header_offset - HEADER_LENGTH, }); } let actual_inner_length = actual_inner_length as usize; Ok(Self { has_length_prefix: true, total_length_prefix: Some(total_length_prefix), declared_inner_length, actual_inner_length, header: body[header_offset..header_offset + HEADER_LENGTH].to_vec(), inner_body: body[header_offset + HEADER_LENGTH ..header_offset + HEADER_LENGTH + actual_inner_length] .to_vec(), }) } /// Flexible `ProcessDataReceived` parse — tries the strict /// length-prefixed form first; falls back to the `TransferData`-style /// header-only form. Mirrors `ParseProcessDataReceivedBodyFlexible` /// (`NmxObservedFrame.cs:71-101`). pub fn parse_process_data_received_body_flexible(body: &[u8]) -> Result { // `.cs:73-80` — try the strict path if and only if the leading // i32 == body length. if body.len() >= 4 + HEADER_LENGTH { let total_length_prefix = read_i32_le(body, 0); if total_length_prefix as usize == body.len() { return Self::parse_process_data_received_body(body); } } if body.len() < HEADER_LENGTH { return Err(CodecError::ShortRead { expected: HEADER_LENGTH, actual: body.len(), }); } // `.cs:87-92` — fall back to header-only inner-length validation. let declared_inner_length = read_i32_le(body, INNER_LENGTH_OFFSET); let actual_inner_length = body.len() - HEADER_LENGTH; if declared_inner_length != actual_inner_length as i32 { return Err(CodecError::InnerLengthMismatch { declared: declared_inner_length, actual: actual_inner_length, }); } Ok(Self { has_length_prefix: false, total_length_prefix: None, declared_inner_length, actual_inner_length, header: body[..HEADER_LENGTH].to_vec(), inner_body: body[HEADER_LENGTH..].to_vec(), }) } } /// A printable UTF-16LE string discovered at a specific offset inside the /// observed body. Mirrors the .NET `NmxObservedString` record /// (`NmxObservedFrame.cs:104`). #[derive(Debug, Clone, PartialEq, Eq)] pub struct NmxObservedString { pub offset: usize, pub value: String, } /// Tolerant parse of an inner NMX message body. Mirrors /// `NmxObservedMessage` (`NmxObservedFrame.cs:106-192`). /// /// "Tolerant" means: the parser does NOT validate the body shape against /// any specific opcode — it simply records the leading `cmd`, `version` u16 /// (split into major/minor bytes), and (for `0x1f` / `0x21`) a 16-byte item /// correlation GUID. Unknown opcodes get a synthetic name (`Unknown0xNN`) /// per `.cs:148`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct NmxObservedMessage { pub command: u8, pub command_name: &'static str, /// Synthetic name for unknown commands (`Unknown0xNN`). When the command /// is recognised, this is empty and [`Self::command_name`] is used. pub synthetic_name: Option, pub version_major: u8, pub version_minor: u8, /// Item-correlation GUID for `AdviseSupervisory` (`0x1f`) and /// `UnAdvise` (`0x21`) bodies. **Read conditionally** — mirroring /// `NmxObservedFrame.cs:122-126`. See module-level Q7 audit. /// /// The GUID is 16 raw bytes from `body[3..19]`. The .NET source uses /// `new Guid(byte[])` which interprets the first three groups as /// little-endian (mixed-endian on the wire). The Rust port keeps the /// raw 16-byte form to avoid pulling in a `Guid`/`uuid` dependency at /// the codec level — consumers can re-interpret if needed. pub item_correlation_id: Option<[u8; 16]>, /// Printable UTF-16LE strings discovered in the body, with their /// starting byte offsets. pub strings: Vec, } impl NmxObservedMessage { /// Parse the body. Mirrors `NmxObservedMessage.Parse` /// (`NmxObservedFrame.cs:114-135`). /// /// # Errors /// /// - [`CodecError::ShortRead`] if the body has fewer than 3 bytes (the /// minimum needed to read `cmd + version`). pub fn parse(body: &[u8]) -> Result { // `.cs:116-119` — minimum length 3. if body.len() < 3 { return Err(CodecError::ShortRead { expected: 3, actual: body.len(), }); } let command = body[0]; // `.cs:122-126` — CONDITIONAL read of itemCorrelationId. // Audit Q7: this stays conditional in the Rust port. let item_correlation_id = if (command == 0x1f || command == 0x21) && body.len() >= 19 { let mut guid = [0u8; 16]; guid.copy_from_slice(&body[3..19]); Some(guid) } else { None }; let (command_name, synthetic_name) = command_name(command); Ok(Self { command, command_name, synthetic_name, // `.cs:131` — body[1] is the major byte of the u16 version. version_major: body[1], // `.cs:132` — body[2] is the minor byte. version_minor: body[2], item_correlation_id, strings: extract_utf16_strings(body), }) } } /// Map a command byte to its declared name. Mirrors `GetCommandName` /// (`NmxObservedFrame.cs:137-150`). /// /// Returns `(known_name, synthetic_name_for_unknown)`. For known commands, /// the synthetic-name slot is `None`; for unknown commands, the known-name /// slot is `"Unknown"` and the synthetic slot carries the formatted name. fn command_name(command: u8) -> (&'static str, Option) { match command { 0x17 => ("MetadataQuery", None), 0x1f => ("AdviseSupervisory", None), 0x21 => ("UnAdvise", None), 0x32 => ("SubscriptionStatus", None), 0x33 => ("DataUpdate", None), 0x37 => ("Write", None), 0x40 => ("MetadataResponse", None), // `.cs:148` — synthesised name for everything else. other => ("Unknown", Some(format!("Unknown0x{other:02X}"))), } } /// Walk the body looking for runs of printable UTF-16LE characters /// terminated by a 2-byte NUL. Mirrors `ExtractUtf16Strings` /// (`NmxObservedFrame.cs:152-191`). /// /// A "string" is at least 3 printable ASCII characters (low byte in /// `0x20..=0x7e`, high byte zero) followed by a `00 00` terminator. The /// scanner's appetite is intentionally narrow: arbitrary binary that /// happens to look like UTF-16 won't trip it. fn extract_utf16_strings(body: &[u8]) -> Vec { let mut strings = Vec::new(); let mut offset = 0usize; // `.cs:156` — outer guard `offset + 8 <= body.length`. while offset + 8 <= body.len() { let start = offset; let mut chars: usize = 0; // `.cs:160-177` — inner scan loop. while offset + 1 < body.len() { let lo = body[offset]; let hi = body[offset + 1]; // `.cs:162-167` — null terminator ends the run. if lo == 0 && hi == 0 { break; } // `.cs:169-173` — non-printable / non-ASCII byte invalidates // the candidate run. if hi != 0 || !(0x20..=0x7e).contains(&lo) { chars = 0; break; } chars += 1; offset += 2; } // `.cs:179-186` — accept the run if it had at least 3 chars and // is followed by the 00 00 terminator. if chars >= 3 && offset + 1 < body.len() && body[offset] == 0 && body[offset + 1] == 0 { let raw = &body[start..start + chars * 2]; let utf16: Vec = raw .chunks_exact(2) .map(|c| u16::from_le_bytes([c[0], c[1]])) .collect(); // The scan accepted only printable ASCII, so the conversion // can't fail in practice. If it does, we silently drop the run. if let Ok(value) = String::from_utf16(&utf16) { strings.push(NmxObservedString { offset: start, value, }); } offset += 2; continue; } // `.cs:187` — failed match: advance by 1 byte and retry. offset = start + 1; } strings } // ---- LE primitive helpers ------------------------------------------------- #[inline] fn read_i32_le(bytes: &[u8], offset: usize) -> i32 { i32::from_le_bytes([ bytes[offset], bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], ]) } // =========================================================================== // Tests // =========================================================================== #[cfg(test)] #[allow(clippy::unwrap_used, clippy::expect_used, clippy::indexing_slicing)] mod tests { use super::*; fn synthesise_envelope(inner: &[u8]) -> Vec { let mut out = vec![0u8; HEADER_LENGTH + inner.len()]; // Pack the header with a recognisable pattern so we can verify // round-trip preservation. for (i, b) in out[..HEADER_LENGTH].iter_mut().enumerate() { *b = 0xA0u8.wrapping_add(i as u8); } // Patch the inner-length field at offset 2. out[INNER_LENGTH_OFFSET..INNER_LENGTH_OFFSET + 4] .copy_from_slice(&(inner.len() as i32).to_le_bytes()); out[HEADER_LENGTH..].copy_from_slice(inner); out } fn synthesise_pdr_body(inner: &[u8]) -> Vec { // ProcessDataReceived strict layout: 4 (total) + 46 (header) + inner. // Total-length prefix == body.len(), inner-length field == inner.len() + 4. let total_len = 4 + HEADER_LENGTH + inner.len(); let mut out = vec![0u8; total_len]; out[..4].copy_from_slice(&(total_len as i32).to_le_bytes()); for (i, b) in out[4..4 + HEADER_LENGTH].iter_mut().enumerate() { *b = 0xC0u8.wrapping_add(i as u8); } // inner length field at offset 4 + 2 = 6, value = inner.len() + 4. out[6..10].copy_from_slice(&((inner.len() + 4) as i32).to_le_bytes()); out[4 + HEADER_LENGTH..].copy_from_slice(inner); out } #[test] fn header_constants_match_dotnet() { // `NmxObservedFrame.cs:14-15`. assert_eq!(HEADER_LENGTH, 46); assert_eq!(INNER_LENGTH_OFFSET, 2); } // ---- Envelope parsing ----------------------------------------------- #[test] fn parse_transfer_data_body_round_trip() { let inner = [0x37u8, 0x01, 0x00, 0xAB, 0xCD]; let body = synthesise_envelope(&inner); let env = NmxObservedEnvelope::parse_transfer_data_body(&body).unwrap(); assert!(!env.has_length_prefix); assert_eq!(env.total_length_prefix, None); assert_eq!(env.declared_inner_length, inner.len() as i32); assert_eq!(env.actual_inner_length, inner.len()); assert_eq!(env.inner_body, inner); assert_eq!(env.header.len(), HEADER_LENGTH); // Header preserved verbatim. assert_eq!(&env.header, &body[..HEADER_LENGTH]); } #[test] fn parse_transfer_data_body_rejects_short_buffer() { let err = NmxObservedEnvelope::parse_transfer_data_body(&[0u8; 45]).unwrap_err(); assert!(matches!(err, CodecError::ShortRead { .. })); } #[test] fn parse_transfer_data_body_rejects_inner_length_mismatch() { let mut body = synthesise_envelope(&[0u8; 8]); // Clobber inner-length field to a wrong value. body[INNER_LENGTH_OFFSET..INNER_LENGTH_OFFSET + 4].copy_from_slice(&100i32.to_le_bytes()); let err = NmxObservedEnvelope::parse_transfer_data_body(&body).unwrap_err(); assert!(matches!(err, CodecError::InnerLengthMismatch { .. })); } #[test] fn parse_pdr_body_strict_round_trip() { let inner = [0x33u8, 0x01, 0x00]; let body = synthesise_pdr_body(&inner); let env = NmxObservedEnvelope::parse_process_data_received_body(&body).unwrap(); assert!(env.has_length_prefix); assert_eq!(env.total_length_prefix, Some(body.len() as i32)); assert_eq!(env.actual_inner_length, inner.len()); assert_eq!(env.inner_body, inner); } #[test] fn parse_pdr_body_strict_rejects_bad_total_length() { let inner = [0u8; 4]; let mut body = synthesise_pdr_body(&inner); // Corrupt the total-length prefix (compute the corrupt value first // to avoid borrowing `body` mutably and immutably in the same expr). let bad_total = body.len() as i32 + 1; body[0..4].copy_from_slice(&bad_total.to_le_bytes()); let err = NmxObservedEnvelope::parse_process_data_received_body(&body).unwrap_err(); assert!(matches!(err, CodecError::InnerLengthMismatch { .. })); } #[test] fn parse_pdr_flexible_uses_strict_when_possible() { let inner = [0x32u8, 0x01, 0x00]; let body = synthesise_pdr_body(&inner); let env = NmxObservedEnvelope::parse_process_data_received_body_flexible(&body).unwrap(); assert!(env.has_length_prefix); } #[test] fn parse_pdr_flexible_falls_back_to_header_only() { // No leading 4-byte length prefix — flexible parser falls back. let inner = [0x32u8, 0x01, 0x00]; let body = synthesise_envelope(&inner); let env = NmxObservedEnvelope::parse_process_data_received_body_flexible(&body).unwrap(); assert!(!env.has_length_prefix); assert_eq!(env.inner_body, inner); } // ---- Inner-message parsing ------------------------------------------ #[test] fn parse_message_minimum_length_3() { let err = NmxObservedMessage::parse(&[0x37u8, 0x01]).unwrap_err(); assert!(matches!(err, CodecError::ShortRead { .. })); } #[test] fn parse_recognised_command_yields_known_name() { let body = [0x37u8, 0x01, 0x00]; let msg = NmxObservedMessage::parse(&body).unwrap(); assert_eq!(msg.command, 0x37); assert_eq!(msg.command_name, "Write"); assert_eq!(msg.synthetic_name, None); assert_eq!(msg.version_major, 0x01); assert_eq!(msg.version_minor, 0x00); assert_eq!(msg.item_correlation_id, None); } #[test] fn parse_unknown_command_yields_synthetic_name() { let body = [0xAAu8, 0x01, 0x00]; let msg = NmxObservedMessage::parse(&body).unwrap(); assert_eq!(msg.command, 0xAA); // Known-name slot is "Unknown" and synthetic_name carries the // formatted string ("Unknown0xAA"). assert_eq!(msg.command_name, "Unknown"); assert_eq!(msg.synthetic_name.as_deref(), Some("Unknown0xAA")); } #[test] fn advise_supervisory_carries_correlation_id_when_long_enough() { // 0x1f + version 1 + 16-byte GUID + a couple of stuffer bytes. let mut body = vec![0x1fu8, 0x01, 0x00]; let guid = [ 0x11u8, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, ]; body.extend_from_slice(&guid); body.extend_from_slice(&[0xDE, 0xAD]); let msg = NmxObservedMessage::parse(&body).unwrap(); assert_eq!(msg.command_name, "AdviseSupervisory"); assert_eq!(msg.item_correlation_id, Some(guid)); } #[test] fn unadvise_carries_correlation_id_when_long_enough() { let mut body = vec![0x21u8, 0x01, 0x00]; let guid = [0x42u8; 16]; body.extend_from_slice(&guid); let msg = NmxObservedMessage::parse(&body).unwrap(); assert_eq!(msg.command_name, "UnAdvise"); assert_eq!(msg.item_correlation_id, Some(guid)); } #[test] fn correlation_id_only_for_advise_or_unadvise_opcodes() { // Q7 audit: the conditional read is opcode-gated. Even with 19+ // bytes available, opcodes other than 0x1f / 0x21 do NOT extract // the GUID slot. let mut body = vec![0x37u8, 0x01, 0x00]; body.extend_from_slice(&[0xFFu8; 16]); let msg = NmxObservedMessage::parse(&body).unwrap(); assert_eq!(msg.item_correlation_id, None); } #[test] fn correlation_id_omitted_when_buffer_too_short() { // Q7 audit: even 0x1f / 0x21 don't get a GUID if the buffer is < 19. let body = [0x1fu8, 0x01, 0x00, 0x42]; let msg = NmxObservedMessage::parse(&body).unwrap(); assert_eq!(msg.command_name, "AdviseSupervisory"); assert_eq!(msg.item_correlation_id, None); } // ---- UTF-16 string scanner ------------------------------------------ #[test] fn extract_strings_finds_simple_run() { // "Hello" UTF-16LE + 00 00 terminator, embedded in a larger body. let mut body = vec![0u8; 8]; let utf16 = "Hello".encode_utf16().collect::>(); for u in &utf16 { body.extend_from_slice(&u.to_le_bytes()); } body.extend_from_slice(&[0x00, 0x00]); body.extend_from_slice(&[0u8; 4]); // Prefix the body with cmd+version so we can call parse(). let mut full = vec![0x17u8, 0x01, 0x00]; full.extend_from_slice(&body); let msg = NmxObservedMessage::parse(&full).unwrap(); let found: Vec<_> = msg.strings.iter().map(|s| s.value.as_str()).collect(); assert!( found.contains(&"Hello"), "did not find 'Hello' in {found:?}" ); } #[test] fn extract_strings_skips_short_runs() { // "ab\0\0" — only 2 chars, below the 3-char minimum. let mut body = vec![0x17u8, 0x01, 0x00, 0u8, 0u8]; let utf16 = "ab".encode_utf16().collect::>(); for u in &utf16 { body.extend_from_slice(&u.to_le_bytes()); } body.extend_from_slice(&[0x00, 0x00, 0u8, 0u8]); let msg = NmxObservedMessage::parse(&body).unwrap(); assert!(msg.strings.is_empty()); } #[test] fn extract_strings_ignores_non_printable() { // A byte sequence that looks UTF-16-ish but contains a control // character (0x07) — must NOT be reported as a string. let mut body = vec![0x17u8, 0x01, 0x00]; body.extend_from_slice(&[0x41, 0x00, 0x07, 0x00, 0x42, 0x00, 0x00, 0x00]); let msg = NmxObservedMessage::parse(&body).unwrap(); assert!(msg.strings.is_empty()); } #[test] fn extract_strings_reports_offset_relative_to_body() { // Two trailing strings; verify the second's offset is correct. let mut body = vec![0x17u8, 0x01, 0x00, 0u8, 0u8, 0u8]; let prefix_len = body.len(); for u in "abcdef".encode_utf16() { body.extend_from_slice(&u.to_le_bytes()); } body.extend_from_slice(&[0x00, 0x00]); let msg = NmxObservedMessage::parse(&body).unwrap(); assert_eq!(msg.strings.len(), 1); assert_eq!(msg.strings[0].value, "abcdef"); assert_eq!(msg.strings[0].offset, prefix_len); } // ---- Round-trip preservation across malformed bodies ---------------- #[test] fn malformed_body_does_not_panic() { // A body of all 0xFF bytes is structurally invalid for any opcode // but parse() must not panic. let body = [0xFFu8; 64]; let msg = NmxObservedMessage::parse(&body).unwrap(); // 0xFF is unknown; synthetic name should reflect that. assert_eq!(msg.command, 0xFF); assert_eq!(msg.synthetic_name.as_deref(), Some("Unknown0xFF")); } #[test] fn version_bytes_are_split_major_minor() { // body[1] = major, body[2] = minor, regardless of endianness. let body = [0x37u8, 0xAB, 0xCD]; let msg = NmxObservedMessage::parse(&body).unwrap(); assert_eq!(msg.version_major, 0xAB); assert_eq!(msg.version_minor, 0xCD); } }