mxaccess/rust/crates/mxaccess-galaxy/src/role_blob.rs

//! Parser for the SQL `roles` blob attached to `dbo.user_profile`.
//!
//! Direct port of `ParseRoleBlob` at
//! `src/MxNativeClient/GalaxyRepositoryUserResolver.cs:87-133`.
//!
//! ## Wire format
//!
//! The Galaxy DB stores the user-roles set as a `varbinary` column whose
//! `CONVERT(nvarchar(max), roles)` projection produces a hex-string of the
//! raw bytes (with `0x` prefix). The bytes themselves are a packed
//! sequence of UTF-16LE role names separated by `0x00 0x00` terminators
//! (the UTF-16 NUL character) followed by another `0x00 0x00` (the role-list
//! separator).
//!
//! There is no length prefix and no count; the .NET reference walks the
//! buffer with a sliding window, emitting each printable-ASCII UTF-16LE
//! string of length ≥ 2 that ends in a double-null. Sub-windows that
//! produce a non-printable code unit (anything outside `0x20..=0x7E`) are
//! discarded — this naturally skips garbage between roles.
//!
//! Roles are deduplicated case-insensitively (`StringComparer.OrdinalIgnoreCase`
//! at `cs:124`).
//!
//! ## Why this is a separate module
//!
//! The .NET reference inlines the parser as a `private static`. The Rust
//! port lifts it because (a) it has interesting failure modes worth
//! testing in isolation and (b) future SQL backends (the planned
//! `tiberius`-gated `UserResolver` impl, snapshot-replay test harnesses)
//! all need to call it the same way.

#![allow(clippy::indexing_slicing)]

/// Parse a hex-encoded role blob. Returns the deduplicated list of role
/// names in discovery order. Mirrors `ParseRoleBlob` (`cs:87-133`).
///
/// Behavior:
///
/// - Input that doesn't start with `0x`/`0X` (case-insensitive per
///   `StringComparison.OrdinalIgnoreCase` at `cs:89`) returns `[]`.
/// - Input shorter than `0x` plus 8 hex chars (the smallest payload that
///   could encode a 2-char role + terminator) returns `[]`.
/// - Hex-decoding failures return `[]` (the .NET reference would throw
///   `FormatException` from `Convert.FromHexString`; the Rust port matches
///   the .NET behavior of yielding an empty list because every caller
///   expects "unknown" to mean "no roles" — there's no way to distinguish
///   "user has no roles" from "user has malformed roles" upstream).
#[must_use]
pub fn parse_role_blob(roles_text: &str) -> Vec<String> {
    if !roles_text.len().checked_sub(2).is_some_and(|_| {
        roles_text
            .get(..2)
            .is_some_and(|p| p.eq_ignore_ascii_case("0x"))
    }) {
        return Vec::new();
    }

    let hex = &roles_text[2..];
    let bytes = match hex_decode(hex) {
        Some(b) => b,
        None => return Vec::new(),
    };

    let mut roles: Vec<String> = Vec::new();
    let mut offset: usize = 0;
    while offset + 3 < bytes.len() {
        // Scan a candidate role starting at `offset`. Mirrors the inner
        // `while (cursor + 1 < bytes.Length)` loop at cs:100-116. `cursor`
        // walks in 2-byte steps reading UTF-16LE code units; `chars`
        // accumulates ASCII chars; non-printable chars discard the
        // candidate entirely.
        let mut chars: Vec<char> = Vec::new();
        let mut cursor = offset;
        loop {
            if cursor + 1 >= bytes.len() {
                break;
            }
            // (bytes[cursor] | (bytes[cursor+1] << 8)) — UTF-16LE u16.
            let code_unit = u16::from(bytes[cursor]) | (u16::from(bytes[cursor + 1]) << 8);
            if code_unit == 0 {
                break;
            }
            if !(0x20..=0x7e).contains(&code_unit) {
                chars.clear();
                break;
            }
            // Cast is safe: range above guarantees `code_unit` is a printable
            // ASCII byte (0x20..=0x7e), all of which are valid `char` scalars.
            chars.push(char::from_u32(u32::from(code_unit)).unwrap_or('\0'));
            cursor += 2;
        }

        // Terminator check (cs:118-121): role must be ≥2 chars, the cursor
        // must still be in-bounds for the trailing 0x00 0x00 pair, and
        // those two bytes must both be 0. The inner loop guarantees this
        // when it broke on `code_unit == 0`, but the .NET reference
        // re-asserts it as a defense against malformed input where the
        // inner loop ran off the end without seeing a null.
        let role_ok = chars.len() >= 2
            && cursor + 1 < bytes.len()
            && bytes[cursor] == 0
            && bytes[cursor + 1] == 0;
        if !role_ok {
            offset += 1;
            continue;
        }

        let role: String = chars.iter().collect();
        // Deduplicate case-insensitively (`StringComparer.OrdinalIgnoreCase`
        // at cs:124).
        if !roles.iter().any(|r| r.eq_ignore_ascii_case(&role)) {
            roles.push(role);
        }

        // Jump the outer offset past the matched role + the terminator
        // pair. The .NET reference does `offset = cursor; offset++`
        // (the `++` is the `for`-loop increment) — net effect: the next
        // iteration starts at `cursor + 1`, which is the second byte of
        // the terminator. This deliberately re-scans starting from the
        // "wrong" alignment so the parser tolerates packed bytes that
        // happen to look like a partial role on the offset-by-one slot.
        offset = cursor + 1;
    }

    roles
}

/// Hex-decode `hex` (no `0x` prefix). Returns `None` on odd length, on
/// non-hex characters, or on overflow. Mirrors `Convert.FromHexString`
/// at `cs:94`. Pure-Rust to avoid pulling `hex` as a dep.
fn hex_decode(hex: &str) -> Option<Vec<u8>> {
    if hex.len() % 2 != 0 {
        return None;
    }
    let bytes = hex.as_bytes();
    let mut out = Vec::with_capacity(hex.len() / 2);
    let mut i = 0;
    while i < bytes.len() {
        let hi = nibble(bytes[i])?;
        let lo = nibble(bytes[i + 1])?;
        out.push((hi << 4) | lo);
        i += 2;
    }
    Some(out)
}

fn nibble(byte: u8) -> Option<u8> {
    match byte {
        b'0'..=b'9' => Some(byte - b'0'),
        b'a'..=b'f' => Some(byte - b'a' + 10),
        b'A'..=b'F' => Some(byte - b'A' + 10),
        _ => None,
    }
}

#[cfg(test)]
#[allow(
    clippy::unwrap_used,
    clippy::expect_used,
    clippy::indexing_slicing,
    clippy::panic
)]
mod tests {
    use super::*;

    /// Encode a sequence of role strings + a trailing 0x00 0x00 separator
    /// into the on-wire byte format, then format as a `0x`-prefixed hex
    /// string. Used to build test inputs.
    fn encode_roles(roles: &[&str]) -> String {
        let mut out: Vec<u8> = Vec::new();
        for r in roles {
            for c in r.chars() {
                let cu = c as u32 as u16;
                out.push((cu & 0xFF) as u8);
                out.push((cu >> 8) as u8);
            }
            out.push(0);
            out.push(0);
        }
        // .NET appears to require the trailing 0x00 0x00 after the last
        // role to satisfy the cursor+1<bytes.Length check.
        out.push(0);
        out.push(0);
        let mut hex = String::from("0x");
        for b in &out {
            hex.push_str(&format!("{b:02X}"));
        }
        hex
    }

    #[test]
    fn empty_string_returns_empty_list() {
        assert_eq!(parse_role_blob(""), Vec::<String>::new());
    }

    #[test]
    fn missing_0x_prefix_returns_empty_list() {
        // Even a syntactically-valid hex string without 0x is treated as
        // garbage per cs:89.
        assert_eq!(parse_role_blob("DEADBEEF"), Vec::<String>::new());
    }

    #[test]
    fn just_0x_prefix_returns_empty_list() {
        assert_eq!(parse_role_blob("0x"), Vec::<String>::new());
    }

    #[test]
    fn upper_and_lower_case_0x_prefix_both_accepted() {
        // .NET uses StringComparison.OrdinalIgnoreCase at cs:89.
        let lower = encode_roles(&["Op"]);
        let upper = lower.replacen("0x", "0X", 1);
        assert_eq!(parse_role_blob(&lower), parse_role_blob(&upper));
    }

    #[test]
    fn parses_single_role() {
        let input = encode_roles(&["Operator"]);
        assert_eq!(parse_role_blob(&input), vec!["Operator".to_string()]);
    }

    #[test]
    fn parses_two_distinct_roles() {
        let input = encode_roles(&["Operator", "Owner"]);
        let parsed = parse_role_blob(&input);
        assert!(parsed.contains(&"Operator".to_string()));
        assert!(parsed.contains(&"Owner".to_string()));
    }

    #[test]
    fn deduplicates_case_insensitively() {
        // Both "Operator" and "operator" appear in the buffer; only the
        // first wins. Mirrors StringComparer.OrdinalIgnoreCase at cs:124.
        let input = encode_roles(&["Operator", "operator"]);
        let parsed = parse_role_blob(&input);
        assert_eq!(parsed, vec!["Operator".to_string()]);
    }

    #[test]
    fn skips_single_char_candidates() {
        // chars.Count < 2 fails the role_ok check at cs:118; single-char
        // role "A" is dropped.
        let input = encode_roles(&["A", "Owner"]);
        let parsed = parse_role_blob(&input);
        assert_eq!(parsed, vec!["Owner".to_string()]);
    }

    #[test]
    fn rejects_role_containing_non_printable() {
        // Build bytes manually: "Op\x01" + 0x00 0x00 + "Owner" + 0x00 0x00.
        // The 0x01 in the first role (a control character) trips the
        // chars.Clear() branch at cs:108-112; the parser then continues
        // scanning offset+1 forward and eventually finds "Owner".
        let mut bytes: Vec<u8> = Vec::new();
        for c in "Op".chars() {
            let cu = c as u16;
            bytes.push((cu & 0xFF) as u8);
            bytes.push((cu >> 8) as u8);
        }
        // \x01 (non-printable u16 = 0x0001).
        bytes.push(0x01);
        bytes.push(0x00);
        bytes.push(0);
        bytes.push(0);
        for c in "Owner".chars() {
            let cu = c as u16;
            bytes.push((cu & 0xFF) as u8);
            bytes.push((cu >> 8) as u8);
        }
        bytes.push(0);
        bytes.push(0);
        bytes.push(0);
        bytes.push(0);
        let mut hex = String::from("0x");
        for b in &bytes {
            hex.push_str(&format!("{b:02X}"));
        }
        let parsed = parse_role_blob(&hex);
        assert!(parsed.contains(&"Owner".to_string()));
        assert!(!parsed.iter().any(|r| r.contains("Op")));
    }

    #[test]
    fn malformed_hex_returns_empty_list() {
        // Odd-length hex.
        assert_eq!(parse_role_blob("0xABC"), Vec::<String>::new());
        // Non-hex char.
        assert_eq!(parse_role_blob("0xAGG"), Vec::<String>::new());
    }

    #[test]
    fn hex_decode_helper_round_trip() {
        assert_eq!(hex_decode("4D454F57"), Some(vec![0x4D, 0x45, 0x4F, 0x57]));
        assert_eq!(hex_decode("deadbeef"), Some(vec![0xDE, 0xAD, 0xBE, 0xEF]));
        assert_eq!(hex_decode("DeAdBeEf"), Some(vec![0xDE, 0xAD, 0xBE, 0xEF]));
        assert_eq!(hex_decode(""), Some(Vec::new()));
        assert_eq!(hex_decode("ABC"), None); // odd length
        assert_eq!(hex_decode("ZZ"), None); // non-hex
    }

    #[test]
    fn long_blob_with_garbage_between_roles_still_parses() {
        // 4 random bytes of garbage between two valid roles. The parser's
        // sliding window should skip the garbage and pick up the second role.
        let mut bytes: Vec<u8> = Vec::new();
        for c in "Operator".chars() {
            let cu = c as u16;
            bytes.push((cu & 0xFF) as u8);
            bytes.push((cu >> 8) as u8);
        }
        bytes.push(0);
        bytes.push(0);
        // Garbage (odd number of bytes — still gets scanned but doesn't
        // produce valid u16 chars in a way that meets the role_ok check).
        bytes.extend_from_slice(&[0xFF, 0x01, 0x80, 0xAB]);
        for c in "Owner".chars() {
            let cu = c as u16;
            bytes.push((cu & 0xFF) as u8);
            bytes.push((cu >> 8) as u8);
        }
        bytes.push(0);
        bytes.push(0);
        bytes.push(0);
        bytes.push(0);
        let mut hex = String::from("0x");
        for b in &bytes {
            hex.push_str(&format!("{b:02X}"));
        }
        let parsed = parse_role_blob(&hex);
        assert!(parsed.contains(&"Operator".to_string()));
        assert!(parsed.contains(&"Owner".to_string()));
    }
}