using System.Text; namespace AVEVA.Historian.ReverseEngineering.Capture; /// A sensitive value to scrub from a captured buffer before it can be committed. /// Stable label (e.g. "host", "tag", "user") recorded in the scrub report. /// The literal value to redact wherever it appears in the buffer. public sealed record CaptureSecret(string Name, string Value); /// How many times a secret was found and redacted, per encoding. public sealed record ScrubCount(string Name, int AsciiMatches, int Utf16Matches) { public int Total => AsciiMatches + Utf16Matches; } /// Result of sanitizing a captured buffer: the redacted copy plus a per-secret report. public sealed record SanitizeResult(byte[] Sanitized, IReadOnlyList Report) { public int TotalRedactions { get { int total = 0; foreach (ScrubCount count in Report) { total += count.Total; } return total; } } } /// /// CW-1 core: redacts identity-bearing values (hostnames, tag names, user names) from a captured /// native Historian buffer so the result can be saved as a committable golden fixture. /// /// Each secret is matched in both ASCII/UTF-8 and UTF-16LE (the two encodings AVEVA's /// native buffers use for embedded strings) and overwritten in place with a fixed fill byte. The /// redaction preserves the buffer's exact length and every field offset, so the sanitized fixture /// remains useful for byte-layout reverse engineering while carrying none of the original identity. /// /// ASCII-letter matching is case-insensitive (servers may echo a tag/host in a different case than /// requested); other bytes match exactly. Secrets shorter than are /// ignored to avoid corrupting unrelated bytes that coincidentally collide with a short value. /// public static class ProtocolCaptureSanitizer { /// Fill byte written over a redacted region ('X'). Chosen to be obviously non-data on inspection. public const byte FillByte = (byte)'X'; /// Secrets shorter than this many characters are not scrubbed (too collision-prone). public const int MinSecretLength = 3; public static SanitizeResult Sanitize(ReadOnlySpan buffer, IReadOnlyList secrets) { ArgumentNullException.ThrowIfNull(secrets); byte[] working = buffer.ToArray(); List report = new(secrets.Count); foreach (CaptureSecret secret in secrets) { if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength) { report.Add(new ScrubCount(secret.Name, 0, 0)); continue; } int ascii = RedactPattern(working, Encoding.ASCII.GetBytes(secret.Value)); int utf16 = RedactPattern(working, Encoding.Unicode.GetBytes(secret.Value)); report.Add(new ScrubCount(secret.Name, ascii, utf16)); } return new SanitizeResult(working, report); } /// /// Safety net: throws if any secret value still survives (in either encoding) in the buffer. /// Call after before writing a fixture so a redaction gap can never /// leak identity into a committed file. /// public static void AssertNoSecretsRemain(ReadOnlySpan sanitized, IReadOnlyList secrets) { ArgumentNullException.ThrowIfNull(secrets); foreach (CaptureSecret secret in secrets) { if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength) { continue; } if (IndexOf(sanitized, Encoding.ASCII.GetBytes(secret.Value), 0) >= 0 || IndexOf(sanitized, Encoding.Unicode.GetBytes(secret.Value), 0) >= 0) { throw new InvalidOperationException( $"Sanitized buffer still contains secret '{secret.Name}'. Refusing to emit an unsanitized fixture."); } } } private static int RedactPattern(byte[] buffer, byte[] pattern) { if (pattern.Length == 0) { return 0; } int matches = 0; int index = 0; while ((index = IndexOf(buffer, pattern, index)) >= 0) { buffer.AsSpan(index, pattern.Length).Fill(FillByte); index += pattern.Length; matches++; } return matches; } private static int IndexOf(ReadOnlySpan haystack, ReadOnlySpan needle, int start) { if (needle.Length == 0 || haystack.Length - start < needle.Length) { return -1; } for (int i = start; i <= haystack.Length - needle.Length; i++) { bool match = true; for (int j = 0; j < needle.Length; j++) { if (!BytesEqualCaseInsensitive(haystack[i + j], needle[j])) { match = false; break; } } if (match) { return i; } } return -1; } /// Compare bytes, treating ASCII letters case-insensitively; all other bytes exactly. private static bool BytesEqualCaseInsensitive(byte a, byte b) { if (a == b) { return true; } return ToLowerAscii(a) == ToLowerAscii(b); } private static byte ToLowerAscii(byte value) => value is >= (byte)'A' and <= (byte)'Z' ? (byte)(value + 32) : value; }