Files
mxaccess/rust/crates/mxaccess-galaxy/src/role_blob.rs
T
Joseph Doherty baea6eaa41 [M3] mxaccess-galaxy: GalaxyUserProfile + UserResolver trait + role-blob
Lands the user-resolver half of M3 stream A. Pure-Rust foundation —
the tiberius-backed SQL impl is logged as F14 and stays gated behind
the existing galaxy-resolver Cargo feature.

New
- role_blob.rs (~270 LoC, 12 tests including a garbage-between-roles
  edge case) — port of ParseRoleBlob (cs:87-133). Sliding-window scan
  over hex-decoded UTF-16LE bytes; rejects non-printable code units;
  case-insensitive dedup. Pure function, no I/O.
- user.rs (~290 LoC, 8 tests including 4 tokio-driven InMemoryUserResolver
  cases) — GalaxyUserProfile (port of cs:5-11) + from_columns helper
  bridging into role_blob + UserResolver async trait + UserResolverError
  with NotFound / Backend variants.
- sql.rs additions: USER_SELECT_SQL + USER_BY_GUID_SQL + USER_BY_NAME_SQL
  constants (port of cs:135-148). Inline concatcp! macro composes the
  base SELECT with each WHERE clause at compile time without pulling
  const_format.

Cargo.toml: added uuid (Galaxy user_guid is a uniqueidentifier).

design/followups.md: added F14 (P2) for the tiberius-backed SQL impl
behind the galaxy-resolver feature.

Test count delta: 427 -> 446 (+19; mxaccess-galaxy 30 -> 49). All four
DoD gates green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 08:26:24 -04:00

334 lines
12 KiB
Rust

//! Parser for the SQL `roles` blob attached to `dbo.user_profile`.
//!
//! Direct port of `ParseRoleBlob` at
//! `src/MxNativeClient/GalaxyRepositoryUserResolver.cs:87-133`.
//!
//! ## Wire format
//!
//! The Galaxy DB stores the user-roles set as a `varbinary` column whose
//! `CONVERT(nvarchar(max), roles)` projection produces a hex-string of the
//! raw bytes (with `0x` prefix). The bytes themselves are a packed
//! sequence of UTF-16LE role names separated by `0x00 0x00` terminators
//! (the UTF-16 NUL character) followed by another `0x00 0x00` (the role-list
//! separator).
//!
//! There is no length prefix and no count; the .NET reference walks the
//! buffer with a sliding window, emitting each printable-ASCII UTF-16LE
//! string of length ≥ 2 that ends in a double-null. Sub-windows that
//! produce a non-printable code unit (anything outside `0x20..=0x7E`) are
//! discarded — this naturally skips garbage between roles.
//!
//! Roles are deduplicated case-insensitively (`StringComparer.OrdinalIgnoreCase`
//! at `cs:124`).
//!
//! ## Why this is a separate module
//!
//! The .NET reference inlines the parser as a `private static`. The Rust
//! port lifts it because (a) it has interesting failure modes worth
//! testing in isolation and (b) future SQL backends (the planned
//! `tiberius`-gated `UserResolver` impl, snapshot-replay test harnesses)
//! all need to call it the same way.
#![allow(clippy::indexing_slicing)]
/// Parse a hex-encoded role blob. Returns the deduplicated list of role
/// names in discovery order. Mirrors `ParseRoleBlob` (`cs:87-133`).
///
/// Behavior:
///
/// - Input that doesn't start with `0x`/`0X` (case-insensitive per
/// `StringComparison.OrdinalIgnoreCase` at `cs:89`) returns `[]`.
/// - Input shorter than `0x` plus 8 hex chars (the smallest payload that
/// could encode a 2-char role + terminator) returns `[]`.
/// - Hex-decoding failures return `[]` (the .NET reference would throw
/// `FormatException` from `Convert.FromHexString`; the Rust port matches
/// the .NET behavior of yielding an empty list because every caller
/// expects "unknown" to mean "no roles" — there's no way to distinguish
/// "user has no roles" from "user has malformed roles" upstream).
#[must_use]
pub fn parse_role_blob(roles_text: &str) -> Vec<String> {
if !roles_text.len().checked_sub(2).is_some_and(|_| {
roles_text
.get(..2)
.is_some_and(|p| p.eq_ignore_ascii_case("0x"))
}) {
return Vec::new();
}
let hex = &roles_text[2..];
let bytes = match hex_decode(hex) {
Some(b) => b,
None => return Vec::new(),
};
let mut roles: Vec<String> = Vec::new();
let mut offset: usize = 0;
while offset + 3 < bytes.len() {
// Scan a candidate role starting at `offset`. Mirrors the inner
// `while (cursor + 1 < bytes.Length)` loop at cs:100-116. `cursor`
// walks in 2-byte steps reading UTF-16LE code units; `chars`
// accumulates ASCII chars; non-printable chars discard the
// candidate entirely.
let mut chars: Vec<char> = Vec::new();
let mut cursor = offset;
loop {
if cursor + 1 >= bytes.len() {
break;
}
// (bytes[cursor] | (bytes[cursor+1] << 8)) — UTF-16LE u16.
let code_unit = u16::from(bytes[cursor]) | (u16::from(bytes[cursor + 1]) << 8);
if code_unit == 0 {
break;
}
if !(0x20..=0x7e).contains(&code_unit) {
chars.clear();
break;
}
// Cast is safe: range above guarantees `code_unit` is a printable
// ASCII byte (0x20..=0x7e), all of which are valid `char` scalars.
chars.push(char::from_u32(u32::from(code_unit)).unwrap_or('\0'));
cursor += 2;
}
// Terminator check (cs:118-121): role must be ≥2 chars, the cursor
// must still be in-bounds for the trailing 0x00 0x00 pair, and
// those two bytes must both be 0. The inner loop guarantees this
// when it broke on `code_unit == 0`, but the .NET reference
// re-asserts it as a defense against malformed input where the
// inner loop ran off the end without seeing a null.
let role_ok = chars.len() >= 2
&& cursor + 1 < bytes.len()
&& bytes[cursor] == 0
&& bytes[cursor + 1] == 0;
if !role_ok {
offset += 1;
continue;
}
let role: String = chars.iter().collect();
// Deduplicate case-insensitively (`StringComparer.OrdinalIgnoreCase`
// at cs:124).
if !roles.iter().any(|r| r.eq_ignore_ascii_case(&role)) {
roles.push(role);
}
// Jump the outer offset past the matched role + the terminator
// pair. The .NET reference does `offset = cursor; offset++`
// (the `++` is the `for`-loop increment) — net effect: the next
// iteration starts at `cursor + 1`, which is the second byte of
// the terminator. This deliberately re-scans starting from the
// "wrong" alignment so the parser tolerates packed bytes that
// happen to look like a partial role on the offset-by-one slot.
offset = cursor + 1;
}
roles
}
/// Hex-decode `hex` (no `0x` prefix). Returns `None` on odd length, on
/// non-hex characters, or on overflow. Mirrors `Convert.FromHexString`
/// at `cs:94`. Pure-Rust to avoid pulling `hex` as a dep.
fn hex_decode(hex: &str) -> Option<Vec<u8>> {
if hex.len() % 2 != 0 {
return None;
}
let bytes = hex.as_bytes();
let mut out = Vec::with_capacity(hex.len() / 2);
let mut i = 0;
while i < bytes.len() {
let hi = nibble(bytes[i])?;
let lo = nibble(bytes[i + 1])?;
out.push((hi << 4) | lo);
i += 2;
}
Some(out)
}
fn nibble(byte: u8) -> Option<u8> {
match byte {
b'0'..=b'9' => Some(byte - b'0'),
b'a'..=b'f' => Some(byte - b'a' + 10),
b'A'..=b'F' => Some(byte - b'A' + 10),
_ => None,
}
}
#[cfg(test)]
#[allow(
clippy::unwrap_used,
clippy::expect_used,
clippy::indexing_slicing,
clippy::panic
)]
mod tests {
use super::*;
/// Encode a sequence of role strings + a trailing 0x00 0x00 separator
/// into the on-wire byte format, then format as a `0x`-prefixed hex
/// string. Used to build test inputs.
fn encode_roles(roles: &[&str]) -> String {
let mut out: Vec<u8> = Vec::new();
for r in roles {
for c in r.chars() {
let cu = c as u32 as u16;
out.push((cu & 0xFF) as u8);
out.push((cu >> 8) as u8);
}
out.push(0);
out.push(0);
}
// .NET appears to require the trailing 0x00 0x00 after the last
// role to satisfy the cursor+1<bytes.Length check.
out.push(0);
out.push(0);
let mut hex = String::from("0x");
for b in &out {
hex.push_str(&format!("{b:02X}"));
}
hex
}
#[test]
fn empty_string_returns_empty_list() {
assert_eq!(parse_role_blob(""), Vec::<String>::new());
}
#[test]
fn missing_0x_prefix_returns_empty_list() {
// Even a syntactically-valid hex string without 0x is treated as
// garbage per cs:89.
assert_eq!(parse_role_blob("DEADBEEF"), Vec::<String>::new());
}
#[test]
fn just_0x_prefix_returns_empty_list() {
assert_eq!(parse_role_blob("0x"), Vec::<String>::new());
}
#[test]
fn upper_and_lower_case_0x_prefix_both_accepted() {
// .NET uses StringComparison.OrdinalIgnoreCase at cs:89.
let lower = encode_roles(&["Op"]);
let upper = lower.replacen("0x", "0X", 1);
assert_eq!(parse_role_blob(&lower), parse_role_blob(&upper));
}
#[test]
fn parses_single_role() {
let input = encode_roles(&["Operator"]);
assert_eq!(parse_role_blob(&input), vec!["Operator".to_string()]);
}
#[test]
fn parses_two_distinct_roles() {
let input = encode_roles(&["Operator", "Owner"]);
let parsed = parse_role_blob(&input);
assert!(parsed.contains(&"Operator".to_string()));
assert!(parsed.contains(&"Owner".to_string()));
}
#[test]
fn deduplicates_case_insensitively() {
// Both "Operator" and "operator" appear in the buffer; only the
// first wins. Mirrors StringComparer.OrdinalIgnoreCase at cs:124.
let input = encode_roles(&["Operator", "operator"]);
let parsed = parse_role_blob(&input);
assert_eq!(parsed, vec!["Operator".to_string()]);
}
#[test]
fn skips_single_char_candidates() {
// chars.Count < 2 fails the role_ok check at cs:118; single-char
// role "A" is dropped.
let input = encode_roles(&["A", "Owner"]);
let parsed = parse_role_blob(&input);
assert_eq!(parsed, vec!["Owner".to_string()]);
}
#[test]
fn rejects_role_containing_non_printable() {
// Build bytes manually: "Op\x01" + 0x00 0x00 + "Owner" + 0x00 0x00.
// The 0x01 in the first role (a control character) trips the
// chars.Clear() branch at cs:108-112; the parser then continues
// scanning offset+1 forward and eventually finds "Owner".
let mut bytes: Vec<u8> = Vec::new();
for c in "Op".chars() {
let cu = c as u16;
bytes.push((cu & 0xFF) as u8);
bytes.push((cu >> 8) as u8);
}
// \x01 (non-printable u16 = 0x0001).
bytes.push(0x01);
bytes.push(0x00);
bytes.push(0);
bytes.push(0);
for c in "Owner".chars() {
let cu = c as u16;
bytes.push((cu & 0xFF) as u8);
bytes.push((cu >> 8) as u8);
}
bytes.push(0);
bytes.push(0);
bytes.push(0);
bytes.push(0);
let mut hex = String::from("0x");
for b in &bytes {
hex.push_str(&format!("{b:02X}"));
}
let parsed = parse_role_blob(&hex);
assert!(parsed.contains(&"Owner".to_string()));
assert!(!parsed.iter().any(|r| r.contains("Op")));
}
#[test]
fn malformed_hex_returns_empty_list() {
// Odd-length hex.
assert_eq!(parse_role_blob("0xABC"), Vec::<String>::new());
// Non-hex char.
assert_eq!(parse_role_blob("0xAGG"), Vec::<String>::new());
}
#[test]
fn hex_decode_helper_round_trip() {
assert_eq!(hex_decode("4D454F57"), Some(vec![0x4D, 0x45, 0x4F, 0x57]));
assert_eq!(hex_decode("deadbeef"), Some(vec![0xDE, 0xAD, 0xBE, 0xEF]));
assert_eq!(hex_decode("DeAdBeEf"), Some(vec![0xDE, 0xAD, 0xBE, 0xEF]));
assert_eq!(hex_decode(""), Some(Vec::new()));
assert_eq!(hex_decode("ABC"), None); // odd length
assert_eq!(hex_decode("ZZ"), None); // non-hex
}
#[test]
fn long_blob_with_garbage_between_roles_still_parses() {
// 4 random bytes of garbage between two valid roles. The parser's
// sliding window should skip the garbage and pick up the second role.
let mut bytes: Vec<u8> = Vec::new();
for c in "Operator".chars() {
let cu = c as u16;
bytes.push((cu & 0xFF) as u8);
bytes.push((cu >> 8) as u8);
}
bytes.push(0);
bytes.push(0);
// Garbage (odd number of bytes — still gets scanned but doesn't
// produce valid u16 chars in a way that meets the role_ok check).
bytes.extend_from_slice(&[0xFF, 0x01, 0x80, 0xAB]);
for c in "Owner".chars() {
let cu = c as u16;
bytes.push((cu & 0xFF) as u8);
bytes.push((cu >> 8) as u8);
}
bytes.push(0);
bytes.push(0);
bytes.push(0);
bytes.push(0);
let mut hex = String::from("0x");
for b in &bytes {
hex.push_str(&format!("{b:02X}"));
}
let parsed = parse_role_blob(&hex);
assert!(parsed.contains(&"Operator".to_string()));
assert!(parsed.contains(&"Owner".to_string()));
}
}