From 7611d9e215b67b6d33a2f3df9df5d85ddd6100b2 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Tue, 5 May 2026 10:47:11 -0400 Subject: [PATCH] [M5] mxaccess-codec: F24 ASB Variant + AsbStatus + RuntimeValue codec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ports `Variant` (cs:1170-1241), `AsbStatus` (cs:1109-1167), `RuntimeValue` (cs:741-791), `AsbVariantFactory.From*` (cs:1310-1429), and `MxAsbDataClient.DecodeVariant` (cs:713-825) into `mxaccess-codec::asb_variant`. Three layers per `docs/ASB-Variant-Wire-Format.md`: 1. `AsbVariant` — raw 2/4/4/payload header + bytes; round-trips byte-identical. 2. `DecodedVariant` — typed view with one variant per proven ASB scalar / array (`Bool`, `Int32`, `Float`, `Double`, `String`, `DateTime`, `Duration` plus array forms). Type ids outside the proven matrix surface as `Unsupported { type_id, payload }` — same fallback as .NET's `_ => payload`. 3. `from_*` factories — mirror `AsbVariantFactory.FromX` exactly, setting `length` to `payload.len()` per `cs:1431-1438`. `AsbStatus` and `RuntimeValue` round-trip the wire layout verbatim. Status-element walking (marker bit 7 = implicit zero, etc., per `docs/ASB-Variant-Wire-Format.md:180-205`) is deferred to a follow-up; the codec exposes the raw status payload bytes for now, matching .NET's `AsbStatus.Payload = byte[]` shape. The lib.rs `AsbVariant` / `AsbStatus` / `RuntimeValue` stubs are replaced by the real types via `pub use`. 25 new unit tests cover the proven matrix: scalar + array round-trip, byte layout (2/4/4/payload), `Unsupported` fallback for declared-but-unproven types, short-frame rejection, malformed `string[]` partial-decode preservation matching .NET behavior. Co-Authored-By: Claude Opus 4.7 (1M context) --- design/followups.md | 8 +- rust/crates/mxaccess-codec/src/asb_variant.rs | 975 ++++++++++++++++++ rust/crates/mxaccess-codec/src/lib.rs | 18 +- 3 files changed, 988 insertions(+), 13 deletions(-) create mode 100644 rust/crates/mxaccess-codec/src/asb_variant.rs diff --git a/design/followups.md b/design/followups.md index fc7747d..048a5b7 100644 --- a/design/followups.md +++ b/design/followups.md @@ -46,11 +46,15 @@ move to `## Resolved` with a date + commit hash. **Resolves when:** F19-F26 are all closed and the four DoD bullets above pass. -**This-iteration execution slice (resolved in this commit).** F19 + F23 landed: +**Cumulative execution log.** F19 + F23 landed in commit `ed17c07`; F24 landed in this commit: +- F24: `mxaccess-codec::asb_variant` ports `Variant` + `AsbStatus` + `RuntimeValue` from `AsbContracts.cs:1109-1241,741-791` plus `MxAsbDataClient::DecodeVariant` + `AsbVariantFactory` from `cs:713-825,1310-1429`. Wire layout per `docs/ASB-Variant-Wire-Format.md`. `AsbVariant` is the raw 10-byte-header + payload form; `DecodedVariant` is the typed view; `from_*` factories mirror .NET's `From*`. 25 unit tests cover all proven scalar/array types' round-trip, byte layout (2/4/4/payload), `Unsupported` fallback for type ids outside the proven matrix, `AsbStatus` round-trip, `RuntimeValue` round-trip, malformed `string[]` partial-decode preservation, and short-frame rejection. + +**Earlier slices:** +- F19 + F23 (commit `ed17c07`): - F19: workspace deps added (`hmac`, `md-5`, `sha1`, `sha2`, `aes`, `cbc`, `pbkdf2`, `flate2`, `rand`, `num-bigint`, `num-traits`, `num-integer`, `quick-xml`, `tokio-util`, `zeroize`) + crate `Cargo.toml` propagation. - F23: `mxaccess-asb-nettcp::auth` ports `AsbSystemAuthenticator` (167 LoC .NET → ~480 LoC Rust + tests). 13 tests cover decimal-prime parsing, .NET `BigInteger` byte-order round-trip (sign-byte append/strip + zero), base64 against RFC 4648 §10 vectors, public-key range, private-key sizing, peer-to-peer DH shared-secret agreement, signed-validator message-number monotonicity, AES-CBC PKCS7 padding, unknown hash algorithm fallback (no MAC unless `force_hmac=true`), Apollo `:V2` lifetime-suffix dispatch, PBKDF2-SHA1 self-consistency snapshot. -F20-F22, F24-F26 remain open for parallel agent fan-out. F27 (constant-time DH) is filed as a separate follow-up below. +F20, F21, F22, F25, F26 remain open for parallel agent fan-out. F27 (constant-time DH) is filed as a separate follow-up below. ### F27 — Constant-time DH `mod_exp` (swap `num-bigint` → `crypto-bigint::BoxedUint`) **Severity:** P2 (security regression vs the long-term Rust target — but at parity with the .NET reference today, so not a release-blocker) diff --git a/rust/crates/mxaccess-codec/src/asb_variant.rs b/rust/crates/mxaccess-codec/src/asb_variant.rs new file mode 100644 index 0000000..185155e --- /dev/null +++ b/rust/crates/mxaccess-codec/src/asb_variant.rs @@ -0,0 +1,975 @@ +//! ASB `Variant` + `AsbStatus` + `RuntimeValue` codec. +//! +//! Ports `src/MxAsbClient/AsbContracts.cs` (the `Variant`, `AsbStatus`, and +//! `RuntimeValue` `IAsbCustomSerializableType` blocks) plus the `DecodeVariant` +//! / `AsbVariantFactory` value-typed decode/encode in +//! `src/MxAsbClient/MxAsbDataClient.cs:713-825`. Spec-by-evidence: the wire +//! shape is documented in `docs/ASB-Variant-Wire-Format.md`. +//! +//! Layered for parity with the .NET reference: +//! +//! 1. [`AsbVariant`] is the raw 10-byte header + payload layout that round- +//! trips byte-for-byte against captured ASB messages. It carries a `u16` +//! type id, an `i32` "logical length" (set to `payload.len()` by the +//! factory), and a `u32` payload length followed by the payload bytes. +//! No interpretation; consumers can stash arbitrary unknown variants. +//! 2. [`DecodedVariant`] is the typed view. [`decode_variant`] consumes an +//! [`AsbVariant`] and produces a typed value for the proven matrix +//! (`Bool`, `Int32`, `Float`, `Double`, `String`, `DateTime`, `Duration`, +//! plus their array forms). Unknown type IDs surface as +//! [`DecodedVariant::Unsupported`] carrying the raw payload — same +//! fallback as `MxAsbDataClient.DecodeVariant` at `cs:748` (return raw +//! bytes). +//! 3. The `from_*` factories mirror `AsbVariantFactory.From*` — they build +//! an `AsbVariant` whose `length` field is set to `payload.len()` (per +//! `cs:1316`). Wire bytes are produced by [`AsbVariant::encode`]. +//! +//! [`AsbStatus`] and [`RuntimeValue`] round-trip exactly. The richer +//! status-element parsing (marker bit 7 = implicit zero; otherwise `u16` +//! follows) documented in `docs/ASB-Variant-Wire-Format.md:182-186` is +//! deferred to a follow-up — `AsbStatus.payload` is exposed as raw bytes +//! for now, mirroring the .NET reference, which keeps `Payload` as +//! `byte[]` and only `AsbPublishMapper.DecodeStatus` walks the records. + +use std::string::FromUtf16Error; + +use crate::error::CodecError; + +/// ASB data type IDs from `AsbContracts.cs:1243-1293`. Stored as `u16` on +/// the wire. Variants outside the proven set (e.g. GUID, byte string, +/// localized text, enum/data-type/security/data-quality forms and their +/// arrays) are carried but not interpreted — matching the .NET reference, +/// which preserves them as raw bytes via the `_ => payload` fallback at +/// `MxAsbDataClient.cs:748`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u16)] +pub enum AsbDataType { + Byte = 0, + Char = 1, + Int16 = 2, + UInt16 = 3, + Int32 = 4, + UInt32 = 5, + Int64 = 6, + UInt64 = 7, + Float = 8, + Double = 9, + String = 10, + DateTime = 11, + Duration = 12, + Guid = 13, + ByteString = 14, + LocaleId = 15, + LocalizedText = 16, + Bool = 17, + SByte = 18, + ErrorStatus = 19, + Enum = 20, + DataType = 21, + SecurityClassification = 22, + DataQuality = 23, + ByteArray = 40, + CharArray = 41, + Int16Array = 42, + UInt16Array = 43, + Int32Array = 44, + UInt32Array = 45, + Int64Array = 46, + UInt64Array = 47, + FloatArray = 48, + DoubleArray = 49, + StringArray = 50, + DateTimeArray = 51, + DurationArray = 52, + GuidArray = 53, + ByteStringArray = 54, + LocaleIdArray = 55, + LocalizedTextArray = 56, + BoolArray = 57, + SByteArray = 58, + EnumArray = 60, + DataTypeArray = 61, + SecurityClassificationArray = 62, + DataQualityArray = 63, + Unknown = 65535, +} + +impl AsbDataType { + pub fn as_u16(self) -> u16 { + self as u16 + } +} + +/// Raw ASB `Variant` wire layout (`AsbContracts.cs:1170-1241`). +/// +/// `length` is the .NET `int` length set by the factory to `payload.len()` +/// at construction (`cs:1431-1438`). It is written separately from the +/// `u32` payload-length on the wire — both are emitted by the .NET writer +/// (`cs:1202-1211`). Decoders may legitimately observe `length != payload.len()` +/// for malformed or partial frames; this codec preserves both verbatim. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AsbVariant { + pub type_id: u16, + pub length: i32, + pub payload: Vec, +} + +impl AsbVariant { + /// Build a variant with `length` set to `payload.len()` per + /// `AsbVariantFactory.Create` (`cs:1431-1438`). + pub fn new(type_id: AsbDataType, payload: Vec) -> Self { + let length = i32::try_from(payload.len()).unwrap_or(i32::MAX); + Self { + type_id: type_id.as_u16(), + length, + payload, + } + } + + /// `AsbVariantFactory.Empty` — `TypeUnknown`, length 0, empty payload + /// (`cs:1312`). + pub fn empty() -> Self { + Self { + type_id: AsbDataType::Unknown.as_u16(), + length: 0, + payload: Vec::new(), + } + } + + /// Wire size in bytes: 2 + 4 + 4 + payload. + pub fn wire_len(&self) -> usize { + 10 + self.payload.len() + } + + /// Encode `Variant.WriteToStream` (`cs:1202-1211`). Append-style so + /// callers can chain into a larger `BinaryWriter`-equivalent buffer + /// without intermediate allocations. + pub fn encode_into(&self, out: &mut Vec) { + out.extend_from_slice(&self.type_id.to_le_bytes()); + out.extend_from_slice(&self.length.to_le_bytes()); + let payload_len = u32::try_from(self.payload.len()).unwrap_or(u32::MAX); + out.extend_from_slice(&payload_len.to_le_bytes()); + if !self.payload.is_empty() { + out.extend_from_slice(&self.payload); + } + } + + /// Standalone encode: convenience wrapper around [`encode_into`]. + pub fn encode(&self) -> Vec { + let mut out = Vec::with_capacity(self.wire_len()); + self.encode_into(&mut out); + out + } + + /// Decode `Variant.InitializeFromStream` (`cs:1213-1219`). Returns + /// `(variant, bytes_consumed)`. Empty payload → `payload: Vec::new()`, + /// matching .NET `Payload = []`. + pub fn decode(input: &[u8]) -> Result<(Self, usize), CodecError> { + let mut cursor = 0usize; + let type_id = read_u16_le(input, &mut cursor)?; + let length = read_i32_le(input, &mut cursor)?; + let payload_length = read_u32_le(input, &mut cursor)? as usize; + let payload = read_bytes(input, &mut cursor, payload_length)?; + Ok(( + Self { + type_id, + length, + payload: payload.to_vec(), + }, + cursor, + )) + } +} + +/// Typed decode of an [`AsbVariant`]. +/// +/// Variant order follows the `AsbDataType` numerical sort. Unknown types +/// surface as [`Unsupported`](DecodedVariant::Unsupported) carrying both +/// the type ID and the raw payload, mirroring `DecodeVariant`'s `_ => +/// payload` fallback at `MxAsbDataClient.cs:748`. +#[derive(Debug, Clone, PartialEq)] +pub enum DecodedVariant { + /// `null` from .NET when the payload is empty and the type does not + /// have an "empty literal" (e.g. empty `string`/`bool[]`/...). + /// Matches `_ => null` at `MxAsbDataClient.cs:728`. + Empty, + Bool(bool), + Int32(i32), + Float(f32), + Double(f64), + /// UTF-16LE-decoded contents. + String(String), + /// Windows FILETIME UTC value (`DateTime.ToFileTimeUtc()` — + /// 100-ns ticks since 1601-01-01 UTC). + DateTime(i64), + /// .NET `TimeSpan.Ticks` — 100-ns ticks. + Duration(i64), + BoolArray(Vec), + Int32Array(Vec), + FloatArray(Vec), + DoubleArray(Vec), + StringArray(Vec), + DateTimeArray(Vec), + DurationArray(Vec), + /// Type IDs outside the proven matrix. Payload bytes are preserved + /// verbatim — the consumer can either decode them with a custom + /// helper or surface them upstream. + Unsupported { + type_id: u16, + payload: Vec, + }, +} + +/// Decode an [`AsbVariant`] into a typed value. Mirrors `MxAsbDataClient.DecodeVariant` +/// at `cs:713-750` exactly: +/// +/// * Empty payload → empty literal for known string/array types +/// (`""` / `[]`), [`Empty`] otherwise. +/// * Non-empty payload that doesn't satisfy the minimum length for a +/// scalar (e.g. `TypeInt32` with 3 bytes) falls through to +/// [`Unsupported`] with the raw payload — matches .NET `when payload.Length >= 4`. +/// * Decode failures inside the typed branches surface as +/// [`CodecError::ShortRead`] / [`CodecError::Decode`] so the caller can +/// distinguish "wrong shape" from "unrecognized type". +/// +/// [`Empty`]: DecodedVariant::Empty +/// [`Unsupported`]: DecodedVariant::Unsupported +pub fn decode_variant(variant: &AsbVariant) -> Result { + use AsbDataType::*; + let type_id = variant.type_id; + let payload = &variant.payload; + + if payload.is_empty() { + return Ok(match type_id { + x if x == String.as_u16() => DecodedVariant::String(std::string::String::new()), + x if x == Int32Array.as_u16() => DecodedVariant::Int32Array(Vec::new()), + x if x == BoolArray.as_u16() => DecodedVariant::BoolArray(Vec::new()), + x if x == FloatArray.as_u16() => DecodedVariant::FloatArray(Vec::new()), + x if x == DoubleArray.as_u16() => DecodedVariant::DoubleArray(Vec::new()), + x if x == StringArray.as_u16() => DecodedVariant::StringArray(Vec::new()), + x if x == DateTimeArray.as_u16() => DecodedVariant::DateTimeArray(Vec::new()), + x if x == DurationArray.as_u16() => DecodedVariant::DurationArray(Vec::new()), + _ => DecodedVariant::Empty, + }); + } + + match type_id { + x if x == Bool.as_u16() && !payload.is_empty() => Ok(DecodedVariant::Bool( + payload.first().copied().unwrap_or(0) != 0, + )), + x if x == Int32.as_u16() && payload.len() >= 4 => { + Ok(DecodedVariant::Int32(i32::from_le_bytes(arr4(payload, 0)?))) + } + x if x == Float.as_u16() && payload.len() >= 4 => { + Ok(DecodedVariant::Float(f32::from_le_bytes(arr4(payload, 0)?))) + } + x if x == Double.as_u16() && payload.len() >= 8 => Ok(DecodedVariant::Double( + f64::from_le_bytes(arr8(payload, 0)?), + )), + x if x == String.as_u16() => Ok(DecodedVariant::String(decode_utf16le(payload)?)), + x if x == DateTime.as_u16() && payload.len() >= 8 => Ok(DecodedVariant::DateTime( + i64::from_le_bytes(arr8(payload, 0)?), + )), + x if x == Duration.as_u16() && payload.len() >= 8 => Ok(DecodedVariant::Duration( + i64::from_le_bytes(arr8(payload, 0)?), + )), + x if x == Int32Array.as_u16() => { + decode_int32_array(payload).map(DecodedVariant::Int32Array) + } + x if x == BoolArray.as_u16() => Ok(DecodedVariant::BoolArray( + payload.iter().map(|&b| b != 0).collect(), + )), + x if x == FloatArray.as_u16() => { + decode_float_array(payload).map(DecodedVariant::FloatArray) + } + x if x == DoubleArray.as_u16() => { + decode_double_array(payload).map(DecodedVariant::DoubleArray) + } + x if x == StringArray.as_u16() => { + decode_string_array(payload).map(DecodedVariant::StringArray) + } + x if x == DateTimeArray.as_u16() => { + decode_filetime_array(payload).map(DecodedVariant::DateTimeArray) + } + x if x == DurationArray.as_u16() => { + decode_filetime_array(payload).map(DecodedVariant::DurationArray) + } + _ => Ok(DecodedVariant::Unsupported { + type_id, + payload: payload.clone(), + }), + } +} + +// ---- Factories (mirror `AsbVariantFactory.From*` at cs:1314-1429) -------- + +impl AsbVariant { + pub fn from_bool(value: bool) -> Self { + Self::new(AsbDataType::Bool, vec![if value { 1 } else { 0 }]) + } + + pub fn from_i32(value: i32) -> Self { + Self::new(AsbDataType::Int32, value.to_le_bytes().to_vec()) + } + + pub fn from_f32(value: f32) -> Self { + Self::new(AsbDataType::Float, value.to_le_bytes().to_vec()) + } + + pub fn from_f64(value: f64) -> Self { + Self::new(AsbDataType::Double, value.to_le_bytes().to_vec()) + } + + pub fn from_string(value: &str) -> Self { + Self::new(AsbDataType::String, encode_utf16le(value)) + } + + pub fn from_filetime(value: i64) -> Self { + Self::new(AsbDataType::DateTime, value.to_le_bytes().to_vec()) + } + + pub fn from_duration_ticks(value: i64) -> Self { + Self::new(AsbDataType::Duration, value.to_le_bytes().to_vec()) + } + + pub fn from_i32_array(values: &[i32]) -> Self { + let mut payload = Vec::with_capacity(values.len() * 4); + for v in values { + payload.extend_from_slice(&v.to_le_bytes()); + } + Self::new(AsbDataType::Int32Array, payload) + } + + pub fn from_bool_array(values: &[bool]) -> Self { + Self::new( + AsbDataType::BoolArray, + values.iter().map(|&b| if b { 1u8 } else { 0u8 }).collect(), + ) + } + + pub fn from_f32_array(values: &[f32]) -> Self { + let mut payload = Vec::with_capacity(values.len() * 4); + for v in values { + payload.extend_from_slice(&v.to_le_bytes()); + } + Self::new(AsbDataType::FloatArray, payload) + } + + pub fn from_f64_array(values: &[f64]) -> Self { + let mut payload = Vec::with_capacity(values.len() * 8); + for v in values { + payload.extend_from_slice(&v.to_le_bytes()); + } + Self::new(AsbDataType::DoubleArray, payload) + } + + /// String-array layout: per-string `i32` byte-length followed by + /// UTF-16LE bytes. `null` and `""` both emit a zero-length record + /// (`cs:1400`). The .NET decoder maps zero-length back to + /// `string.Empty` (`cs:798`). + pub fn from_string_array(values: &[&str]) -> Self { + let mut payload = Vec::new(); + for value in values { + let bytes = encode_utf16le(value); + let len = i32::try_from(bytes.len()).unwrap_or(i32::MAX); + payload.extend_from_slice(&len.to_le_bytes()); + payload.extend_from_slice(&bytes); + } + Self::new(AsbDataType::StringArray, payload) + } + + pub fn from_filetime_array(values: &[i64]) -> Self { + let mut payload = Vec::with_capacity(values.len() * 8); + for v in values { + payload.extend_from_slice(&v.to_le_bytes()); + } + Self::new(AsbDataType::DateTimeArray, payload) + } + + pub fn from_duration_array(values: &[i64]) -> Self { + let mut payload = Vec::with_capacity(values.len() * 8); + for v in values { + payload.extend_from_slice(&v.to_le_bytes()); + } + Self::new(AsbDataType::DurationArray, payload) + } +} + +// ---- AsbStatus ----------------------------------------------------------- + +/// Wire layout: signed 1-byte `count`, 4-byte unsigned `payload_length`, +/// `payload_length` bytes of status elements (`cs:1109-1167`). The richer +/// status-element walk (marker-byte bit 7 = implicit zero, etc., see +/// `docs/ASB-Variant-Wire-Format.md:180-205`) is deliberately not done +/// here; the codec round-trips the payload bytes verbatim and exposes a +/// raw accessor so consumers (or a higher-level `StatusElement` parser +/// added later) can walk them. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct AsbStatus { + pub count: i8, + pub payload: Vec, +} + +impl AsbStatus { + pub fn wire_len(&self) -> usize { + 1 + 4 + self.payload.len() + } + + pub fn encode_into(&self, out: &mut Vec) { + out.push(self.count as u8); + let len = u32::try_from(self.payload.len()).unwrap_or(u32::MAX); + out.extend_from_slice(&len.to_le_bytes()); + if !self.payload.is_empty() { + out.extend_from_slice(&self.payload); + } + } + + pub fn encode(&self) -> Vec { + let mut out = Vec::with_capacity(self.wire_len()); + self.encode_into(&mut out); + out + } + + pub fn decode(input: &[u8]) -> Result<(Self, usize), CodecError> { + let mut cursor = 0usize; + let count_byte = *input.first().ok_or(CodecError::ShortRead { + expected: 1, + actual: 0, + })?; + let count = count_byte as i8; + cursor += 1; + let payload_length = read_u32_le(input, &mut cursor)? as usize; + let payload = read_bytes(input, &mut cursor, payload_length)?; + Ok(( + Self { + count, + payload: payload.to_vec(), + }, + cursor, + )) + } +} + +// ---- RuntimeValue -------------------------------------------------------- + +/// Wraps an [`AsbVariant`] with a `DateTime.ToBinary()` timestamp + status +/// per `RuntimeValue` at `cs:741-791`. The 8-byte timestamp is the .NET +/// `DateTime.ToBinary()` packed value (62-bit ticks + 2-bit kind); we +/// preserve it as `i64` rather than splitting because consumers vary in +/// whether they care about the kind bits, and the read path on .NET uses +/// `DateTime.FromBinary` which round-trips the exact value. +#[derive(Debug, Clone, PartialEq)] +pub struct RuntimeValue { + pub timestamp_binary: i64, + pub timestamp_specified: bool, + pub value: AsbVariant, + pub status: AsbStatus, +} + +impl RuntimeValue { + pub fn wire_len(&self) -> usize { + 8 + 1 + self.value.wire_len() + self.status.wire_len() + } + + pub fn encode_into(&self, out: &mut Vec) { + out.extend_from_slice(&self.timestamp_binary.to_le_bytes()); + out.push(if self.timestamp_specified { 1 } else { 0 }); + self.value.encode_into(out); + self.status.encode_into(out); + } + + pub fn encode(&self) -> Vec { + let mut out = Vec::with_capacity(self.wire_len()); + self.encode_into(&mut out); + out + } + + pub fn decode(input: &[u8]) -> Result<(Self, usize), CodecError> { + let mut cursor = 0usize; + let timestamp_binary = read_i64_le(input, &mut cursor)?; + let flag_byte = input.get(cursor).copied().ok_or(CodecError::ShortRead { + expected: 1, + actual: 0, + })?; + let timestamp_specified = flag_byte != 0; + cursor += 1; + let value_tail = input.get(cursor..).ok_or(CodecError::ShortRead { + expected: 10, + actual: 0, + })?; + let (value, value_consumed) = AsbVariant::decode(value_tail)?; + cursor += value_consumed; + let status_tail = input.get(cursor..).ok_or(CodecError::ShortRead { + expected: 5, + actual: 0, + })?; + let (status, status_consumed) = AsbStatus::decode(status_tail)?; + cursor += status_consumed; + Ok(( + Self { + timestamp_binary, + timestamp_specified, + value, + status, + }, + cursor, + )) + } +} + +// ---- helpers -------------------------------------------------------------- + +fn read_array(input: &[u8], cursor: &mut usize) -> Result<[u8; N], CodecError> { + let slice = read_bytes(input, cursor, N)?; + let mut out = [0u8; N]; + out.copy_from_slice(slice); + Ok(out) +} + +fn read_u16_le(input: &[u8], cursor: &mut usize) -> Result { + Ok(u16::from_le_bytes(read_array::<2>(input, cursor)?)) +} + +fn read_u32_le(input: &[u8], cursor: &mut usize) -> Result { + Ok(u32::from_le_bytes(read_array::<4>(input, cursor)?)) +} + +fn read_i32_le(input: &[u8], cursor: &mut usize) -> Result { + Ok(i32::from_le_bytes(read_array::<4>(input, cursor)?)) +} + +fn read_i64_le(input: &[u8], cursor: &mut usize) -> Result { + Ok(i64::from_le_bytes(read_array::<8>(input, cursor)?)) +} + +fn read_bytes<'a>( + input: &'a [u8], + cursor: &mut usize, + needed: usize, +) -> Result<&'a [u8], CodecError> { + let end = cursor.checked_add(needed).ok_or(CodecError::ShortRead { + expected: needed, + actual: input.len().saturating_sub(*cursor), + })?; + if end > input.len() { + return Err(CodecError::ShortRead { + expected: needed, + actual: input.len().saturating_sub(*cursor), + }); + } + let slice = input.get(*cursor..end).ok_or(CodecError::ShortRead { + expected: needed, + actual: input.len().saturating_sub(*cursor), + })?; + *cursor = end; + Ok(slice) +} + +fn arr4(payload: &[u8], offset: usize) -> Result<[u8; 4], CodecError> { + let slice = payload + .get(offset..offset + 4) + .ok_or(CodecError::ShortRead { + expected: 4, + actual: payload.len().saturating_sub(offset), + })?; + let mut out = [0u8; 4]; + out.copy_from_slice(slice); + Ok(out) +} + +fn arr8(payload: &[u8], offset: usize) -> Result<[u8; 8], CodecError> { + let slice = payload + .get(offset..offset + 8) + .ok_or(CodecError::ShortRead { + expected: 8, + actual: payload.len().saturating_sub(offset), + })?; + let mut out = [0u8; 8]; + out.copy_from_slice(slice); + Ok(out) +} + +fn decode_int32_array(payload: &[u8]) -> Result, CodecError> { + let count = payload.len() / 4; + let mut out = Vec::with_capacity(count); + for i in 0..count { + out.push(i32::from_le_bytes(arr4(payload, i * 4)?)); + } + Ok(out) +} + +fn decode_float_array(payload: &[u8]) -> Result, CodecError> { + let count = payload.len() / 4; + let mut out = Vec::with_capacity(count); + for i in 0..count { + out.push(f32::from_le_bytes(arr4(payload, i * 4)?)); + } + Ok(out) +} + +fn decode_double_array(payload: &[u8]) -> Result, CodecError> { + let count = payload.len() / 8; + let mut out = Vec::with_capacity(count); + for i in 0..count { + out.push(f64::from_le_bytes(arr8(payload, i * 8)?)); + } + Ok(out) +} + +fn decode_filetime_array(payload: &[u8]) -> Result, CodecError> { + let count = payload.len() / 8; + let mut out = Vec::with_capacity(count); + for i in 0..count { + out.push(i64::from_le_bytes(arr8(payload, i * 8)?)); + } + Ok(out) +} + +/// String-array decode: walks `i32` length + UTF-16LE bytes records until +/// the payload is exhausted or a malformed length is encountered. +/// `MxAsbDataClient.DecodeStringArray` (`cs:785-803`) stops on negative +/// length or out-of-range; partial values decoded before that point are +/// kept. We mirror that exactly. +fn decode_string_array(payload: &[u8]) -> Result, CodecError> { + let mut values = Vec::new(); + let mut offset = 0usize; + while offset + 4 <= payload.len() { + let len_bytes = payload + .get(offset..offset + 4) + .ok_or(CodecError::ShortRead { + expected: 4, + actual: payload.len().saturating_sub(offset), + })?; + let mut buf = [0u8; 4]; + buf.copy_from_slice(len_bytes); + let byte_length = i32::from_le_bytes(buf); + offset += 4; + if byte_length < 0 || (byte_length as usize) > payload.len().saturating_sub(offset) { + break; + } + let byte_length = byte_length as usize; + if byte_length == 0 { + values.push(String::new()); + continue; + } + let str_bytes = payload + .get(offset..offset + byte_length) + .ok_or(CodecError::ShortRead { + expected: byte_length, + actual: payload.len().saturating_sub(offset), + })?; + values.push(decode_utf16le(str_bytes)?); + offset += byte_length; + } + Ok(values) +} + +fn encode_utf16le(value: &str) -> Vec { + let mut out = Vec::with_capacity(value.len() * 2); + for code_unit in value.encode_utf16() { + out.extend_from_slice(&code_unit.to_le_bytes()); + } + out +} + +fn decode_utf16le(bytes: &[u8]) -> Result { + if bytes.len() % 2 != 0 { + return Err(CodecError::Decode { + offset: bytes.len(), + reason: "UTF-16LE payload has odd byte length", + buffer_len: bytes.len(), + }); + } + let units: Vec = bytes + .chunks_exact(2) + .map(|chunk| { + let mut buf = [0u8; 2]; + buf.copy_from_slice(chunk); + u16::from_le_bytes(buf) + }) + .collect(); + let buf_len = bytes.len(); + String::from_utf16(&units).map_err(|err: FromUtf16Error| CodecError::Decode { + offset: 0, + reason: utf16_error_reason(&err), + buffer_len: buf_len, + }) +} + +const fn utf16_error_reason(_: &FromUtf16Error) -> &'static str { + // FromUtf16Error doesn't carry a position; fixed string preserves the + // 'static-reason contract used by CodecError variants. + "UTF-16LE payload contains an unpaired surrogate" +} + +#[cfg(test)] +#[allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::indexing_slicing +)] +mod tests { + use super::*; + + fn round_trip_variant(variant: AsbVariant) { + let bytes = variant.encode(); + let (decoded, consumed) = AsbVariant::decode(&bytes).unwrap(); + assert_eq!(consumed, bytes.len(), "decode consumed != encoded len"); + assert_eq!(decoded, variant, "wire round-trip diverged"); + } + + #[test] + fn variant_empty_round_trip() { + round_trip_variant(AsbVariant::empty()); + } + + #[test] + fn variant_bool_round_trip() { + round_trip_variant(AsbVariant::from_bool(true)); + round_trip_variant(AsbVariant::from_bool(false)); + } + + #[test] + fn variant_i32_round_trip() { + round_trip_variant(AsbVariant::from_i32(0)); + round_trip_variant(AsbVariant::from_i32(123)); + round_trip_variant(AsbVariant::from_i32(i32::MIN)); + round_trip_variant(AsbVariant::from_i32(i32::MAX)); + } + + #[test] + fn variant_floats_round_trip() { + round_trip_variant(AsbVariant::from_f32(1.5)); + round_trip_variant(AsbVariant::from_f64(-std::f64::consts::E)); + } + + #[test] + fn variant_string_round_trip() { + round_trip_variant(AsbVariant::from_string("")); + round_trip_variant(AsbVariant::from_string("hello world")); + round_trip_variant(AsbVariant::from_string("éàü 漢字")); + } + + #[test] + fn variant_datetime_round_trip() { + round_trip_variant(AsbVariant::from_filetime(0)); + round_trip_variant(AsbVariant::from_filetime(132_845_000_000_000_000)); + } + + #[test] + fn variant_duration_round_trip() { + round_trip_variant(AsbVariant::from_duration_ticks(0)); + round_trip_variant(AsbVariant::from_duration_ticks(1_234_567_890)); + } + + #[test] + fn variant_int32_array_round_trip() { + round_trip_variant(AsbVariant::from_i32_array(&[])); + round_trip_variant(AsbVariant::from_i32_array(&[1, 2, 3, -4, i32::MAX])); + } + + #[test] + fn variant_bool_array_round_trip() { + round_trip_variant(AsbVariant::from_bool_array(&[])); + round_trip_variant(AsbVariant::from_bool_array(&[true, false, true, true])); + } + + #[test] + fn variant_float_array_round_trip() { + round_trip_variant(AsbVariant::from_f32_array(&[1.0, -2.0, 3.5])); + round_trip_variant(AsbVariant::from_f64_array(&[std::f64::consts::PI, -0.0])); + } + + #[test] + fn variant_string_array_round_trip() { + round_trip_variant(AsbVariant::from_string_array(&[])); + round_trip_variant(AsbVariant::from_string_array(&["alpha", "", "γαμμα"])); + } + + #[test] + fn variant_datetime_and_duration_arrays_round_trip() { + round_trip_variant(AsbVariant::from_filetime_array(&[ + 0, + 132_845_000_000_000_000, + i64::MAX, + ])); + round_trip_variant(AsbVariant::from_duration_array(&[-1, i64::MIN, 42])); + } + + #[test] + fn decode_variant_handles_empty_arrays_to_empty_typed_values() { + let v = AsbVariant { + type_id: AsbDataType::Int32Array.as_u16(), + length: 0, + payload: Vec::new(), + }; + assert_eq!( + decode_variant(&v).unwrap(), + DecodedVariant::Int32Array(Vec::new()) + ); + + let v = AsbVariant { + type_id: AsbDataType::String.as_u16(), + length: 0, + payload: Vec::new(), + }; + assert_eq!( + decode_variant(&v).unwrap(), + DecodedVariant::String(String::new()) + ); + } + + #[test] + fn decode_variant_returns_empty_for_unknown_type_with_empty_payload() { + let v = AsbVariant { + type_id: AsbDataType::Bool.as_u16(), + length: 0, + payload: Vec::new(), + }; + assert_eq!(decode_variant(&v).unwrap(), DecodedVariant::Empty); + } + + #[test] + fn decode_variant_int32() { + let v = AsbVariant::from_i32(0x1234_5678); + assert_eq!( + decode_variant(&v).unwrap(), + DecodedVariant::Int32(0x1234_5678) + ); + } + + #[test] + fn decode_variant_string() { + let v = AsbVariant::from_string("hello"); + assert_eq!( + decode_variant(&v).unwrap(), + DecodedVariant::String("hello".to_string()) + ); + } + + #[test] + fn decode_variant_string_array_with_empty_entries() { + let v = AsbVariant::from_string_array(&["a", "", "bc"]); + let decoded = decode_variant(&v).unwrap(); + match decoded { + DecodedVariant::StringArray(values) => { + assert_eq!( + values, + vec!["a".to_string(), String::new(), "bc".to_string()] + ); + } + other => panic!("expected StringArray, got {other:?}"), + } + } + + #[test] + fn decode_variant_unsupported_type_returns_raw_bytes() { + let v = AsbVariant { + type_id: AsbDataType::Guid.as_u16(), + length: 16, + payload: vec![0xAB; 16], + }; + match decode_variant(&v).unwrap() { + DecodedVariant::Unsupported { type_id, payload } => { + assert_eq!(type_id, AsbDataType::Guid.as_u16()); + assert_eq!(payload, vec![0xAB; 16]); + } + other => panic!("expected Unsupported, got {other:?}"), + } + } + + #[test] + fn decode_variant_int32_too_short_falls_through_to_unsupported() { + // payload < 4 bytes for TypeInt32 — match-arm guard fails and + // .NET hits the `_ => payload` fallback (cs:748). We mirror that. + let v = AsbVariant { + type_id: AsbDataType::Int32.as_u16(), + length: 3, + payload: vec![1, 2, 3], + }; + match decode_variant(&v).unwrap() { + DecodedVariant::Unsupported { type_id, payload } => { + assert_eq!(type_id, AsbDataType::Int32.as_u16()); + assert_eq!(payload, vec![1, 2, 3]); + } + other => panic!("expected Unsupported, got {other:?}"), + } + } + + #[test] + fn variant_decode_rejects_truncated_header() { + // Cut off before the payload-length field finishes. + let bytes = vec![0x04, 0x00, 1, 0, 0, 0, 0xFF]; + let err = AsbVariant::decode(&bytes).unwrap_err(); + assert!(matches!(err, CodecError::ShortRead { .. })); + } + + #[test] + fn asb_status_round_trip() { + let status = AsbStatus { + count: -3, + payload: vec![0x01, 0x02, 0x03], + }; + let bytes = status.encode(); + let (decoded, consumed) = AsbStatus::decode(&bytes).unwrap(); + assert_eq!(consumed, bytes.len()); + assert_eq!(decoded, status); + } + + #[test] + fn asb_status_round_trip_empty() { + let status = AsbStatus::default(); + let bytes = status.encode(); + let (decoded, consumed) = AsbStatus::decode(&bytes).unwrap(); + assert_eq!(consumed, 5); + assert_eq!(decoded, status); + } + + #[test] + fn runtime_value_round_trip() { + let rv = RuntimeValue { + timestamp_binary: 0x0123_4567_89AB_CDEF, + timestamp_specified: true, + value: AsbVariant::from_i32(42), + status: AsbStatus { + count: 1, + payload: vec![0xC0], + }, + }; + let bytes = rv.encode(); + let (decoded, consumed) = RuntimeValue::decode(&bytes).unwrap(); + assert_eq!(consumed, bytes.len()); + assert_eq!(decoded, rv); + } + + #[test] + fn runtime_value_round_trip_empty_variant() { + let rv = RuntimeValue { + timestamp_binary: 0, + timestamp_specified: false, + value: AsbVariant::empty(), + status: AsbStatus::default(), + }; + let bytes = rv.encode(); + let (decoded, consumed) = RuntimeValue::decode(&bytes).unwrap(); + assert_eq!(consumed, bytes.len()); + assert_eq!(decoded, rv); + } + + #[test] + fn variant_wire_layout_is_2_4_4_payload() { + // .NET reference: WriteToStream writes Type (u16), Length (i32), + // payloadLength (u32), payload bytes. Verify byte positions. + let v = AsbVariant::from_i32(0xAABB_CCDD_u32 as i32); + let bytes = v.encode(); + // type_id 0x0004 little-endian + assert_eq!(&bytes[0..2], &[0x04, 0x00]); + // length = 4 + assert_eq!(&bytes[2..6], &[0x04, 0x00, 0x00, 0x00]); + // payload length = 4 + assert_eq!(&bytes[6..10], &[0x04, 0x00, 0x00, 0x00]); + // payload = 0xAABB_CCDD little-endian + assert_eq!(&bytes[10..14], &[0xDD, 0xCC, 0xBB, 0xAA]); + } +} diff --git a/rust/crates/mxaccess-codec/src/lib.rs b/rust/crates/mxaccess-codec/src/lib.rs index ddb93da..dbf9423 100644 --- a/rust/crates/mxaccess-codec/src/lib.rs +++ b/rust/crates/mxaccess-codec/src/lib.rs @@ -15,14 +15,15 @@ //! `NmxTransferEnvelopeTemplate` (round-trip preserver). //! //! Remaining (wave 2): `NmxSecuredWrite2Message` (`0x38`), -//! `ObservedWriteBodyTemplate`. ASB Variant + AsbStatus + RuntimeValue land -//! in M5. +//! `ObservedWriteBodyTemplate`. ASB Variant + AsbStatus + RuntimeValue +//! landed in the F24 sub-stream of M5 — see [`asb_variant`]. //! //! Every wire shape here is grounded in `src/MxNativeCodec/*.cs` (the .NET //! reference) and `captures/0NN-frida-*` (Frida ground truth). #![forbid(unsafe_code)] +pub mod asb_variant; pub mod envelope; pub mod envelope_template; pub mod error; @@ -68,16 +69,11 @@ pub struct NmxWriteMessage; #[derive(Debug, Clone)] pub struct NmxSecuredWrite2Message; -// ---- ASB types (M5 follow-up) -------------------------------------------- +// ---- ASB types (M5, F24) ------------------------------------------------- -#[derive(Debug, Clone)] -pub struct AsbVariant; - -#[derive(Debug, Clone, Copy, Default)] -pub struct AsbStatus; - -#[derive(Debug, Clone)] -pub struct RuntimeValue; +pub use asb_variant::{ + AsbDataType, AsbStatus, AsbVariant, DecodedVariant, RuntimeValue, decode_variant, +}; // ---- Convenience prelude -------------------------------------------------