using System.Text;
namespace AVEVA.Historian.ReverseEngineering.Capture;
/// A sensitive value to scrub from a captured buffer before it can be committed.
/// Stable label (e.g. "host", "tag", "user") recorded in the scrub report.
/// The literal value to redact wherever it appears in the buffer.
public sealed record CaptureSecret(string Name, string Value);
/// How many times a secret was found and redacted, per encoding.
public sealed record ScrubCount(string Name, int AsciiMatches, int Utf16Matches)
{
public int Total => AsciiMatches + Utf16Matches;
}
/// Result of sanitizing a captured buffer: the redacted copy plus a per-secret report.
public sealed record SanitizeResult(byte[] Sanitized, IReadOnlyList Report)
{
public int TotalRedactions
{
get
{
int total = 0;
foreach (ScrubCount count in Report)
{
total += count.Total;
}
return total;
}
}
}
///
/// CW-1 core: redacts identity-bearing values (hostnames, tag names, user names) from a captured
/// native Historian buffer so the result can be saved as a committable golden fixture.
///
/// Each secret is matched in both ASCII/UTF-8 and UTF-16LE (the two encodings AVEVA's
/// native buffers use for embedded strings) and overwritten in place with a fixed fill byte. The
/// redaction preserves the buffer's exact length and every field offset, so the sanitized fixture
/// remains useful for byte-layout reverse engineering while carrying none of the original identity.
///
/// ASCII-letter matching is case-insensitive (servers may echo a tag/host in a different case than
/// requested); other bytes match exactly. Secrets shorter than are
/// ignored to avoid corrupting unrelated bytes that coincidentally collide with a short value.
///
public static class ProtocolCaptureSanitizer
{
/// Fill byte written over a redacted region ('X'). Chosen to be obviously non-data on inspection.
public const byte FillByte = (byte)'X';
/// Secrets shorter than this many characters are not scrubbed (too collision-prone).
public const int MinSecretLength = 3;
public static SanitizeResult Sanitize(ReadOnlySpan buffer, IReadOnlyList secrets)
{
ArgumentNullException.ThrowIfNull(secrets);
byte[] working = buffer.ToArray();
List report = new(secrets.Count);
foreach (CaptureSecret secret in secrets)
{
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
{
report.Add(new ScrubCount(secret.Name, 0, 0));
continue;
}
int ascii = RedactPattern(working, Encoding.ASCII.GetBytes(secret.Value));
int utf16 = RedactPattern(working, Encoding.Unicode.GetBytes(secret.Value));
report.Add(new ScrubCount(secret.Name, ascii, utf16));
}
return new SanitizeResult(working, report);
}
///
/// Safety net: throws if any secret value still survives (in either encoding) in the buffer.
/// Call after before writing a fixture so a redaction gap can never
/// leak identity into a committed file.
///
public static void AssertNoSecretsRemain(ReadOnlySpan sanitized, IReadOnlyList secrets)
{
ArgumentNullException.ThrowIfNull(secrets);
foreach (CaptureSecret secret in secrets)
{
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
{
continue;
}
if (IndexOf(sanitized, Encoding.ASCII.GetBytes(secret.Value), 0) >= 0
|| IndexOf(sanitized, Encoding.Unicode.GetBytes(secret.Value), 0) >= 0)
{
throw new InvalidOperationException(
$"Sanitized buffer still contains secret '{secret.Name}'. Refusing to emit an unsanitized fixture.");
}
}
}
private static int RedactPattern(byte[] buffer, byte[] pattern)
{
if (pattern.Length == 0)
{
return 0;
}
int matches = 0;
int index = 0;
while ((index = IndexOf(buffer, pattern, index)) >= 0)
{
buffer.AsSpan(index, pattern.Length).Fill(FillByte);
index += pattern.Length;
matches++;
}
return matches;
}
private static int IndexOf(ReadOnlySpan haystack, ReadOnlySpan needle, int start)
{
if (needle.Length == 0 || haystack.Length - start < needle.Length)
{
return -1;
}
for (int i = start; i <= haystack.Length - needle.Length; i++)
{
bool match = true;
for (int j = 0; j < needle.Length; j++)
{
if (!BytesEqualCaseInsensitive(haystack[i + j], needle[j]))
{
match = false;
break;
}
}
if (match)
{
return i;
}
}
return -1;
}
/// Compare bytes, treating ASCII letters case-insensitively; all other bytes exactly.
private static bool BytesEqualCaseInsensitive(byte a, byte b)
{
if (a == b)
{
return true;
}
return ToLowerAscii(a) == ToLowerAscii(b);
}
private static byte ToLowerAscii(byte value) =>
value is >= (byte)'A' and <= (byte)'Z' ? (byte)(value + 32) : value;
}