Files
mxaccess/rust/crates/mxaccess-codec/src/reference_handle.rs
T
Joseph Doherty fe2a6db786
rust / build / test / clippy / fmt (push) Has been cancelled
Initial project state: .NET reference, design, Rust port (M0+M1), evidence
Layout:
- src/                    .NET 10 x64 reference: MxNativeCodec, MxNativeClient,
                          MxAsbClient, probes, tests, harnesses. Executable spec.
- design/                 Architectural plan for the Rust port (M0–M6), error
                          model, protocol invariants, risks (R1–R16), adversarial
                          review log (review.md).
- rust/                   Rust workspace. M0 skeleton + M1 codec parity.
                          mxaccess-codec: 215 unit tests + 2 cross-implementation
                          parity tests (byte-identical against .NET reference).
                          Other crates are M0 stubs awaiting M2+.
- captures/               Frida + netsh + pcap evidence per CLAUDE.md
                          ("captures are evidence, not throwaway logs").
- analysis/               Decompiled C# (frida/proxy/decompiled-*),
                          Ghidra exports for native DLLs (`exports/` only —
                          working state at `projects/` and AVEVA's input
                          binaries at `input/` are gitignored).
- docs/                   Reverse-engineering reference docs.
- tools/                  Setup-LiveProbeEnv.ps1 (Infisical credential fetcher),
                          Compute-Crc.ps1 (.NET parity helper).
- .github/workflows/      Rust CI: fmt + build + test + clippy on Windows.
- LICENSE                 MIT (Joseph Doherty, 2026).

Verified:
- cargo test --workspace → 217 passed (215 unit + 2 .NET parity), 0 failed
- cargo clippy --workspace -- -D warnings → clean
- cargo fmt --all -- --check → clean
- cargo publish --dry-run -p mxaccess-codec → packages cleanly

Excluded from history (see .gitignore):
- **/bin, **/obj, **/target — build artifacts
- analysis/ghidra/projects/ — Ghidra working state (regenerable)
- analysis/ghidra/input/ — AVEVA proprietary DLLs (vendor IP)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 06:21:00 -04:00

423 lines
15 KiB
Rust

//! `MxReferenceHandle` — 20-byte reference handle.
//!
//! Direct port of `src/MxNativeCodec/MxReferenceHandle.cs`. CRC-16/IBM
//! (poly `0xa001`, initial `0`) computed over lowercase UTF-16LE name bytes
//! (low byte then high byte per char), per `MxReferenceHandle.cs:51,47-59`.
// Direct byte indexing is the right pattern for fixed-layout codec code:
// every byte access is preceded by an explicit length check, and the resulting
// code reads as a 1:1 mirror of the .NET source's `BinaryPrimitives` calls.
// `.get(n)?` would obscure the byte map.
#![allow(clippy::indexing_slicing)]
use crate::error::CodecError;
const CRC16_IBM_POLYNOMIAL: u16 = 0xa001;
/// 20-byte reference handle. Encoded layout matches the .NET reference
/// (`MxReferenceHandle.cs:88-106`):
///
/// ```text
/// offset size field
/// 0 1 galaxy_id
/// 1 1 reserved (always 0; not exposed publicly)
/// 2 2 platform_id u16 LE
/// 4 2 engine_id u16 LE
/// 6 2 object_id u16 LE
/// 8 2 object_signature u16 LE (CRC-16/IBM of object tag name)
/// 10 2 primitive_id i16 LE
/// 12 2 attribute_id i16 LE
/// 14 2 property_id i16 LE
/// 16 2 attribute_signature u16 LE (CRC-16/IBM of attribute name)
/// 18 2 attribute_index i16 LE (-1 array, 0 scalar)
/// ```
///
/// `object_signature` and `attribute_signature` are derived values. The Rust
/// port keeps them private — the only constructor that produces a handle from
/// names is [`from_names`]; the only mutators that update one signature are
/// [`with_object_tag_name`] and [`with_attribute_name`], which both
/// recompute. This is a deliberate tightening over the .NET reference (which
/// is a record with public init-only signature fields).
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub struct MxReferenceHandle {
pub galaxy_id: u8,
pub platform_id: u16,
pub engine_id: u16,
pub object_id: u16,
object_signature: u16,
pub primitive_id: i16,
pub attribute_id: i16,
pub property_id: i16,
attribute_signature: u16,
pub attribute_index: i16,
}
impl MxReferenceHandle {
pub const ENCODED_LEN: usize = 20;
/// Construct a handle by computing the object/attribute signatures from
/// their respective names. Mirrors `MxReferenceHandle.Create`.
///
/// # Errors
///
/// Returns [`CodecError::InvalidName`] if either name is empty or
/// whitespace-only — matching the .NET `ArgumentException.ThrowIfNullOrWhiteSpace`
/// contract at `MxReferenceHandle.cs:49`.
#[allow(clippy::too_many_arguments)]
pub fn from_names(
galaxy_id: u8,
platform_id: u16,
engine_id: u16,
object_id: u16,
object_tag_name: &str,
primitive_id: i16,
attribute_id: i16,
property_id: i16,
attribute_name: &str,
is_array: bool,
) -> Result<Self, CodecError> {
Ok(Self {
galaxy_id,
platform_id,
engine_id,
object_id,
object_signature: compute_name_signature(object_tag_name)?,
primitive_id,
attribute_id,
property_id,
attribute_signature: compute_name_signature(attribute_name)?,
attribute_index: if is_array { -1 } else { 0 },
})
}
pub fn object_signature(self) -> u16 {
self.object_signature
}
pub fn attribute_signature(self) -> u16 {
self.attribute_signature
}
/// Returns a new handle with the object signature recomputed from
/// `object_tag_name`. Every other field is preserved.
pub fn with_object_tag_name(self, object_tag_name: &str) -> Result<Self, CodecError> {
Ok(Self {
object_signature: compute_name_signature(object_tag_name)?,
..self
})
}
/// Returns a new handle with the attribute signature recomputed from
/// `attribute_name`. Every other field is preserved.
pub fn with_attribute_name(self, attribute_name: &str) -> Result<Self, CodecError> {
Ok(Self {
attribute_signature: compute_name_signature(attribute_name)?,
..self
})
}
/// Parse a 20-byte encoded handle. Mirrors `MxReferenceHandle.Parse`
/// (`MxReferenceHandle.cs:61-79`); byte 1 is read but discarded.
///
/// # Errors
///
/// Returns [`CodecError::ShortRead`] if `bytes` is not exactly 20 bytes.
pub fn parse(bytes: &[u8]) -> Result<Self, CodecError> {
if bytes.len() != Self::ENCODED_LEN {
return Err(CodecError::ShortRead {
expected: Self::ENCODED_LEN,
actual: bytes.len(),
});
}
Ok(Self {
galaxy_id: bytes[0],
// byte 1 reserved (discarded, mirrors .NET Parse)
platform_id: read_u16_le(bytes, 2),
engine_id: read_u16_le(bytes, 4),
object_id: read_u16_le(bytes, 6),
object_signature: read_u16_le(bytes, 8),
primitive_id: read_i16_le(bytes, 10),
attribute_id: read_i16_le(bytes, 12),
property_id: read_i16_le(bytes, 14),
attribute_signature: read_u16_le(bytes, 16),
attribute_index: read_i16_le(bytes, 18),
})
}
/// Encode into a freshly-allocated 20-byte buffer.
pub fn encode(self) -> [u8; Self::ENCODED_LEN] {
let mut bytes = [0u8; Self::ENCODED_LEN];
self.write_to(&mut bytes);
bytes
}
/// Encode into the provided destination. Mirrors `MxReferenceHandle.WriteTo`
/// (`MxReferenceHandle.cs:88-106`); byte 1 is always written as 0.
///
/// # Panics
///
/// Panics if `destination.len() < 20`. Use a 20-byte slice or call
/// [`encode`] for a fresh buffer.
pub fn write_to(self, destination: &mut [u8]) {
assert!(
destination.len() >= Self::ENCODED_LEN,
"destination must be at least {} bytes",
Self::ENCODED_LEN
);
destination[0] = self.galaxy_id;
destination[1] = 0;
write_u16_le(destination, 2, self.platform_id);
write_u16_le(destination, 4, self.engine_id);
write_u16_le(destination, 6, self.object_id);
write_u16_le(destination, 8, self.object_signature);
write_i16_le(destination, 10, self.primitive_id);
write_i16_le(destination, 12, self.attribute_id);
write_i16_le(destination, 14, self.property_id);
write_u16_le(destination, 16, self.attribute_signature);
write_i16_le(destination, 18, self.attribute_index);
}
}
/// CRC-16/IBM signature of a name. Lowercases the name, then for each `char`
/// runs the low byte then high byte of the UTF-16LE representation through
/// [`update_crc16_ibm`].
///
/// Mirrors `MxReferenceHandle.ComputeNameSignature` (`MxReferenceHandle.cs:47-59`).
///
/// **Unicode caveat**: This uses Rust's [`str::to_lowercase`], which performs
/// the Unicode Default_Lowercase mapping. This is intended to match
/// `String.ToLowerInvariant()` in .NET. Edge cases involving locale-tailored
/// mappings (e.g. Turkish dotless-i) may diverge — see
/// `design/10-raw-layer.md` L37 for the path forward via `icu_casemap`.
///
/// # Errors
///
/// Returns [`CodecError::InvalidName`] if `name` is empty or whitespace-only.
pub fn compute_name_signature(name: &str) -> Result<u16, CodecError> {
if name.trim().is_empty() {
return Err(CodecError::InvalidName);
}
let lower = name.to_lowercase();
let mut crc: u16 = 0;
for ch in lower.chars() {
// UTF-16LE: low byte then high byte of each `char`'s UTF-16 code units.
// Surrogate-pair chars (>= U+10000) emit two u16 code units; we feed
// each as low-then-high. This mirrors the .NET enumeration which
// iterates over UTF-16 code units (the `char` in C# is a u16).
let mut buf = [0u16; 2];
let utf16 = ch.encode_utf16(&mut buf);
for unit in utf16 {
crc = update_crc16_ibm(crc, *unit as u8);
crc = update_crc16_ibm(crc, (*unit >> 8) as u8);
}
}
Ok(crc)
}
/// One iteration of the CRC-16/IBM update loop (poly `0xa001`, right-shifted
/// variant). Mirrors `UpdateCrc16Ibm` (`MxReferenceHandle.cs:108-119`).
pub const fn update_crc16_ibm(mut crc: u16, value: u8) -> u16 {
crc ^= value as u16;
let mut bit = 0u8;
while bit < 8 {
crc = if (crc & 1) != 0 {
(crc >> 1) ^ CRC16_IBM_POLYNOMIAL
} else {
crc >> 1
};
bit += 1;
}
crc
}
#[inline]
fn read_u16_le(bytes: &[u8], offset: usize) -> u16 {
u16::from_le_bytes([bytes[offset], bytes[offset + 1]])
}
#[inline]
fn read_i16_le(bytes: &[u8], offset: usize) -> i16 {
i16::from_le_bytes([bytes[offset], bytes[offset + 1]])
}
#[inline]
fn write_u16_le(bytes: &mut [u8], offset: usize, value: u16) {
let le = value.to_le_bytes();
bytes[offset] = le[0];
bytes[offset + 1] = le[1];
}
#[inline]
fn write_i16_le(bytes: &mut [u8], offset: usize, value: i16) {
let le = value.to_le_bytes();
bytes[offset] = le[0];
bytes[offset + 1] = le[1];
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used, clippy::indexing_slicing)]
mod tests {
use super::*;
/// CRC vectors hand-traced from `MxReferenceHandle.cs` against the
/// .NET `ToLowerInvariant` + per-char low/high UTF-16LE feed.
///
/// Single ASCII char "a" (0x61):
/// low byte = 0x61 → after one iter: crc = ?
/// high byte = 0x00 → after another iter
///
/// Easier sanity: empty string check; matches the .NET behaviour of
/// throwing on whitespace-only input.
/// **Cross-implementation parity**: the values on the right are the exact
/// CRC-16/IBM outputs of `MxNativeCodec.MxReferenceHandle.ComputeNameSignature`
/// in the .NET reference, captured via `tools/Compute-Crc.ps1`. If the
/// Rust port ever diverges, these tests catch it. Regenerate with
/// `pwsh -NoProfile -File tools\Compute-Crc.ps1` after adding new vectors.
#[test]
fn dotnet_reference_parity_vectors() {
let cases = [
("TestObject", 0x0B25),
("TestInt", 0xDA3E),
("$Object", 0x22A4),
("a", 0x9029),
("TestChildObject", 0xD736),
// Case-insensitivity: all three of these collapse to the same CRC
// because `to_lowercase` matches `String.ToLowerInvariant`.
("testobject", 0x0B25),
("TESTOBJECT", 0x0B25),
];
for (name, expected) in cases {
assert_eq!(
compute_name_signature(name).unwrap(),
expected,
"CRC for {name:?} diverged from .NET reference"
);
}
}
#[test]
fn empty_name_rejected() {
assert!(compute_name_signature("").is_err());
assert!(compute_name_signature(" ").is_err());
}
#[test]
fn lowercasing_is_invariant() {
// Same name in different cases produces the same signature.
let a = compute_name_signature("TestObject").unwrap();
let b = compute_name_signature("testobject").unwrap();
let c = compute_name_signature("TESTOBJECT").unwrap();
assert_eq!(a, b);
assert_eq!(a, c);
}
#[test]
fn distinct_names_distinct_signatures() {
// Different names should hash to different values for any reasonable
// hash. (CRC-16 collisions exist, but these short distinct strings
// shouldn't collide.)
let a = compute_name_signature("TestObject").unwrap();
let b = compute_name_signature("TestInt").unwrap();
let c = compute_name_signature("$Object").unwrap();
assert_ne!(a, b);
assert_ne!(a, c);
assert_ne!(b, c);
}
#[test]
fn crc_init_is_zero() {
// CRC of a single null byte under poly 0xa001 with init 0:
// crc = 0 XOR 0 = 0; eight right-shifts on 0 stay 0.
// So CRC of [0u8] under update_crc16_ibm is 0.
assert_eq!(update_crc16_ibm(0, 0), 0);
}
#[test]
fn round_trip_zero_handle() {
let handle = MxReferenceHandle::default();
let encoded = handle.encode();
let decoded = MxReferenceHandle::parse(&encoded).unwrap();
assert_eq!(handle, decoded);
assert_eq!(encoded, [0u8; 20]);
}
#[test]
fn round_trip_populated_handle() {
let handle = MxReferenceHandle::from_names(
1, // galaxy_id
42, // platform_id
17, // engine_id
300, // object_id
"TestChildObject", // object_tag_name
-1, // primitive_id
7, // attribute_id
0, // property_id
"TestInt", // attribute_name
false, // is_array
)
.unwrap();
let encoded = handle.encode();
let decoded = MxReferenceHandle::parse(&encoded).unwrap();
assert_eq!(handle, decoded);
assert_eq!(decoded.galaxy_id, 1);
assert_eq!(decoded.platform_id, 42);
assert_eq!(decoded.engine_id, 17);
assert_eq!(decoded.object_id, 300);
assert_eq!(decoded.primitive_id, -1);
assert_eq!(decoded.attribute_id, 7);
assert_eq!(decoded.property_id, 0);
assert_eq!(decoded.attribute_index, 0);
assert_eq!(decoded.object_signature(), handle.object_signature());
assert_eq!(decoded.attribute_signature(), handle.attribute_signature());
}
#[test]
fn array_flag_is_minus_one() {
let handle = MxReferenceHandle::from_names(1, 1, 1, 1, "X", 0, 0, 0, "Y", true).unwrap();
assert_eq!(handle.attribute_index, -1);
}
#[test]
fn byte_1_always_zero_on_encode() {
let handle = MxReferenceHandle {
galaxy_id: 0xff,
..MxReferenceHandle::default()
};
let encoded = handle.encode();
assert_eq!(encoded[0], 0xff);
assert_eq!(encoded[1], 0x00);
}
#[test]
fn parse_rejects_short_buffer() {
assert!(MxReferenceHandle::parse(&[0u8; 19]).is_err());
assert!(MxReferenceHandle::parse(&[0u8; 21]).is_err());
}
#[test]
fn with_attribute_name_recomputes_signature() {
let h1 = MxReferenceHandle::from_names(1, 1, 1, 1, "Obj", 0, 0, 0, "AttrA", false).unwrap();
let h2 = h1.with_attribute_name("AttrB").unwrap();
assert_ne!(h1.attribute_signature(), h2.attribute_signature());
// Object signature unchanged.
assert_eq!(h1.object_signature(), h2.object_signature());
// Other fields preserved.
assert_eq!(h1.galaxy_id, h2.galaxy_id);
assert_eq!(h1.platform_id, h2.platform_id);
}
#[test]
fn endianness_is_little() {
// Verify that platform_id 0x1234 ends up as bytes [0x34, 0x12] at
// offset 2..4.
let h = MxReferenceHandle {
platform_id: 0x1234,
..MxReferenceHandle::default()
};
let encoded = h.encode();
assert_eq!(encoded[2], 0x34);
assert_eq!(encoded[3], 0x12);
}
}