//! Parser for the SQL `roles` blob attached to `dbo.user_profile`. //! //! Direct port of `ParseRoleBlob` at //! `src/MxNativeClient/GalaxyRepositoryUserResolver.cs:87-133`. //! //! ## Wire format //! //! The Galaxy DB stores the user-roles set as a `varbinary` column whose //! `CONVERT(nvarchar(max), roles)` projection produces a hex-string of the //! raw bytes (with `0x` prefix). The bytes themselves are a packed //! sequence of UTF-16LE role names separated by `0x00 0x00` terminators //! (the UTF-16 NUL character) followed by another `0x00 0x00` (the role-list //! separator). //! //! There is no length prefix and no count; the .NET reference walks the //! buffer with a sliding window, emitting each printable-ASCII UTF-16LE //! string of length ≥ 2 that ends in a double-null. Sub-windows that //! produce a non-printable code unit (anything outside `0x20..=0x7E`) are //! discarded — this naturally skips garbage between roles. //! //! Roles are deduplicated case-insensitively (`StringComparer.OrdinalIgnoreCase` //! at `cs:124`). //! //! ## Why this is a separate module //! //! The .NET reference inlines the parser as a `private static`. The Rust //! port lifts it because (a) it has interesting failure modes worth //! testing in isolation and (b) future SQL backends (the planned //! `tiberius`-gated `UserResolver` impl, snapshot-replay test harnesses) //! all need to call it the same way. #![allow(clippy::indexing_slicing)] /// Parse a hex-encoded role blob. Returns the deduplicated list of role /// names in discovery order. Mirrors `ParseRoleBlob` (`cs:87-133`). /// /// Behavior: /// /// - Input that doesn't start with `0x`/`0X` (case-insensitive per /// `StringComparison.OrdinalIgnoreCase` at `cs:89`) returns `[]`. /// - Input shorter than `0x` plus 8 hex chars (the smallest payload that /// could encode a 2-char role + terminator) returns `[]`. /// - Hex-decoding failures return `[]` (the .NET reference would throw /// `FormatException` from `Convert.FromHexString`; the Rust port matches /// the .NET behavior of yielding an empty list because every caller /// expects "unknown" to mean "no roles" — there's no way to distinguish /// "user has no roles" from "user has malformed roles" upstream). #[must_use] pub fn parse_role_blob(roles_text: &str) -> Vec { if !roles_text.len().checked_sub(2).is_some_and(|_| { roles_text .get(..2) .is_some_and(|p| p.eq_ignore_ascii_case("0x")) }) { return Vec::new(); } let hex = &roles_text[2..]; let bytes = match hex_decode(hex) { Some(b) => b, None => return Vec::new(), }; let mut roles: Vec = Vec::new(); let mut offset: usize = 0; while offset + 3 < bytes.len() { // Scan a candidate role starting at `offset`. Mirrors the inner // `while (cursor + 1 < bytes.Length)` loop at cs:100-116. `cursor` // walks in 2-byte steps reading UTF-16LE code units; `chars` // accumulates ASCII chars; non-printable chars discard the // candidate entirely. let mut chars: Vec = Vec::new(); let mut cursor = offset; loop { if cursor + 1 >= bytes.len() { break; } // (bytes[cursor] | (bytes[cursor+1] << 8)) — UTF-16LE u16. let code_unit = u16::from(bytes[cursor]) | (u16::from(bytes[cursor + 1]) << 8); if code_unit == 0 { break; } if !(0x20..=0x7e).contains(&code_unit) { chars.clear(); break; } // Cast is safe: range above guarantees `code_unit` is a printable // ASCII byte (0x20..=0x7e), all of which are valid `char` scalars. chars.push(char::from_u32(u32::from(code_unit)).unwrap_or('\0')); cursor += 2; } // Terminator check (cs:118-121): role must be ≥2 chars, the cursor // must still be in-bounds for the trailing 0x00 0x00 pair, and // those two bytes must both be 0. The inner loop guarantees this // when it broke on `code_unit == 0`, but the .NET reference // re-asserts it as a defense against malformed input where the // inner loop ran off the end without seeing a null. let role_ok = chars.len() >= 2 && cursor + 1 < bytes.len() && bytes[cursor] == 0 && bytes[cursor + 1] == 0; if !role_ok { offset += 1; continue; } let role: String = chars.iter().collect(); // Deduplicate case-insensitively (`StringComparer.OrdinalIgnoreCase` // at cs:124). if !roles.iter().any(|r| r.eq_ignore_ascii_case(&role)) { roles.push(role); } // Jump the outer offset past the matched role + the terminator // pair. The .NET reference does `offset = cursor; offset++` // (the `++` is the `for`-loop increment) — net effect: the next // iteration starts at `cursor + 1`, which is the second byte of // the terminator. This deliberately re-scans starting from the // "wrong" alignment so the parser tolerates packed bytes that // happen to look like a partial role on the offset-by-one slot. offset = cursor + 1; } roles } /// Hex-decode `hex` (no `0x` prefix). Returns `None` on odd length, on /// non-hex characters, or on overflow. Mirrors `Convert.FromHexString` /// at `cs:94`. Pure-Rust to avoid pulling `hex` as a dep. fn hex_decode(hex: &str) -> Option> { if hex.len() % 2 != 0 { return None; } let bytes = hex.as_bytes(); let mut out = Vec::with_capacity(hex.len() / 2); let mut i = 0; while i < bytes.len() { let hi = nibble(bytes[i])?; let lo = nibble(bytes[i + 1])?; out.push((hi << 4) | lo); i += 2; } Some(out) } fn nibble(byte: u8) -> Option { match byte { b'0'..=b'9' => Some(byte - b'0'), b'a'..=b'f' => Some(byte - b'a' + 10), b'A'..=b'F' => Some(byte - b'A' + 10), _ => None, } } #[cfg(test)] #[allow( clippy::unwrap_used, clippy::expect_used, clippy::indexing_slicing, clippy::panic )] mod tests { use super::*; /// Encode a sequence of role strings + a trailing 0x00 0x00 separator /// into the on-wire byte format, then format as a `0x`-prefixed hex /// string. Used to build test inputs. fn encode_roles(roles: &[&str]) -> String { let mut out: Vec = Vec::new(); for r in roles { for c in r.chars() { let cu = c as u32 as u16; out.push((cu & 0xFF) as u8); out.push((cu >> 8) as u8); } out.push(0); out.push(0); } // .NET appears to require the trailing 0x00 0x00 after the last // role to satisfy the cursor+1::new()); } #[test] fn missing_0x_prefix_returns_empty_list() { // Even a syntactically-valid hex string without 0x is treated as // garbage per cs:89. assert_eq!(parse_role_blob("DEADBEEF"), Vec::::new()); } #[test] fn just_0x_prefix_returns_empty_list() { assert_eq!(parse_role_blob("0x"), Vec::::new()); } #[test] fn upper_and_lower_case_0x_prefix_both_accepted() { // .NET uses StringComparison.OrdinalIgnoreCase at cs:89. let lower = encode_roles(&["Op"]); let upper = lower.replacen("0x", "0X", 1); assert_eq!(parse_role_blob(&lower), parse_role_blob(&upper)); } #[test] fn parses_single_role() { let input = encode_roles(&["Operator"]); assert_eq!(parse_role_blob(&input), vec!["Operator".to_string()]); } #[test] fn parses_two_distinct_roles() { let input = encode_roles(&["Operator", "Owner"]); let parsed = parse_role_blob(&input); assert!(parsed.contains(&"Operator".to_string())); assert!(parsed.contains(&"Owner".to_string())); } #[test] fn deduplicates_case_insensitively() { // Both "Operator" and "operator" appear in the buffer; only the // first wins. Mirrors StringComparer.OrdinalIgnoreCase at cs:124. let input = encode_roles(&["Operator", "operator"]); let parsed = parse_role_blob(&input); assert_eq!(parsed, vec!["Operator".to_string()]); } #[test] fn skips_single_char_candidates() { // chars.Count < 2 fails the role_ok check at cs:118; single-char // role "A" is dropped. let input = encode_roles(&["A", "Owner"]); let parsed = parse_role_blob(&input); assert_eq!(parsed, vec!["Owner".to_string()]); } #[test] fn rejects_role_containing_non_printable() { // Build bytes manually: "Op\x01" + 0x00 0x00 + "Owner" + 0x00 0x00. // The 0x01 in the first role (a control character) trips the // chars.Clear() branch at cs:108-112; the parser then continues // scanning offset+1 forward and eventually finds "Owner". let mut bytes: Vec = Vec::new(); for c in "Op".chars() { let cu = c as u16; bytes.push((cu & 0xFF) as u8); bytes.push((cu >> 8) as u8); } // \x01 (non-printable u16 = 0x0001). bytes.push(0x01); bytes.push(0x00); bytes.push(0); bytes.push(0); for c in "Owner".chars() { let cu = c as u16; bytes.push((cu & 0xFF) as u8); bytes.push((cu >> 8) as u8); } bytes.push(0); bytes.push(0); bytes.push(0); bytes.push(0); let mut hex = String::from("0x"); for b in &bytes { hex.push_str(&format!("{b:02X}")); } let parsed = parse_role_blob(&hex); assert!(parsed.contains(&"Owner".to_string())); assert!(!parsed.iter().any(|r| r.contains("Op"))); } #[test] fn malformed_hex_returns_empty_list() { // Odd-length hex. assert_eq!(parse_role_blob("0xABC"), Vec::::new()); // Non-hex char. assert_eq!(parse_role_blob("0xAGG"), Vec::::new()); } #[test] fn hex_decode_helper_round_trip() { assert_eq!(hex_decode("4D454F57"), Some(vec![0x4D, 0x45, 0x4F, 0x57])); assert_eq!(hex_decode("deadbeef"), Some(vec![0xDE, 0xAD, 0xBE, 0xEF])); assert_eq!(hex_decode("DeAdBeEf"), Some(vec![0xDE, 0xAD, 0xBE, 0xEF])); assert_eq!(hex_decode(""), Some(Vec::new())); assert_eq!(hex_decode("ABC"), None); // odd length assert_eq!(hex_decode("ZZ"), None); // non-hex } #[test] fn long_blob_with_garbage_between_roles_still_parses() { // 4 random bytes of garbage between two valid roles. The parser's // sliding window should skip the garbage and pick up the second role. let mut bytes: Vec = Vec::new(); for c in "Operator".chars() { let cu = c as u16; bytes.push((cu & 0xFF) as u8); bytes.push((cu >> 8) as u8); } bytes.push(0); bytes.push(0); // Garbage (odd number of bytes — still gets scanned but doesn't // produce valid u16 chars in a way that meets the role_ok check). bytes.extend_from_slice(&[0xFF, 0x01, 0x80, 0xAB]); for c in "Owner".chars() { let cu = c as u16; bytes.push((cu & 0xFF) as u8); bytes.push((cu >> 8) as u8); } bytes.push(0); bytes.push(0); bytes.push(0); bytes.push(0); let mut hex = String::from("0x"); for b in &bytes { hex.push_str(&format!("{b:02X}")); } let parsed = parse_role_blob(&hex); assert!(parsed.contains(&"Operator".to_string())); assert!(parsed.contains(&"Owner".to_string())); } }