CW-1: reusable capture -> sanitize -> golden-fixture pipeline
Adds the highest-leverage reverse-engineering primitive from the roadmap: one path to turn a live operation buffer into a committable golden fixture. Unblocks every capture-tier item (R0.5, R1.x, R2.1). - ProtocolCaptureSanitizer: redacts identity-bearing values (host, tag, user, machine) from a native buffer in BOTH ASCII and UTF-16LE, overwriting in place with an 'X' fill so length and every field offset are preserved (keeps the fixture useful for byte-layout RE). ASCII-letter matching is case-insensitive; secrets < 3 chars are skipped to avoid collision corruption. AssertNoSecretsRemain is a fail-closed safety net that refuses to emit if any value survives. - ProtocolFixtureWriter: serializes a capture to fixtures/protocol/<op>/<name>.json with sanitized hex, length, SHA-256 of the sanitized bytes, and a scrub report. Timestamps are passed in (deterministic / testable). - capture-tag-info CLI command: captures a live GetTagInfoFromName response and writes the fixture. The same native bytes ride inside 2023 R2 gRPC GetTagInfosFromName, so the fixture is transport-agnostic. - 11 unit tests for the sanitizer/writer (test project now references the RE tool). - First real fixture: get-tag-info/analog-*.json — a 98-byte Int4 CTagMetadata buffer captured live from the local Historian 2020 server, tag name redacted, verified to contain no identity (descriptor 03 c3 00 31 = Int4, as documented). 180 non-live unit tests green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,163 @@
|
||||
using System.Text;
|
||||
|
||||
namespace AVEVA.Historian.ReverseEngineering.Capture;
|
||||
|
||||
/// <summary>A sensitive value to scrub from a captured buffer before it can be committed.</summary>
|
||||
/// <param name="Name">Stable label (e.g. "host", "tag", "user") recorded in the scrub report.</param>
|
||||
/// <param name="Value">The literal value to redact wherever it appears in the buffer.</param>
|
||||
public sealed record CaptureSecret(string Name, string Value);
|
||||
|
||||
/// <summary>How many times a secret was found and redacted, per encoding.</summary>
|
||||
public sealed record ScrubCount(string Name, int AsciiMatches, int Utf16Matches)
|
||||
{
|
||||
public int Total => AsciiMatches + Utf16Matches;
|
||||
}
|
||||
|
||||
/// <summary>Result of sanitizing a captured buffer: the redacted copy plus a per-secret report.</summary>
|
||||
public sealed record SanitizeResult(byte[] Sanitized, IReadOnlyList<ScrubCount> Report)
|
||||
{
|
||||
public int TotalRedactions
|
||||
{
|
||||
get
|
||||
{
|
||||
int total = 0;
|
||||
foreach (ScrubCount count in Report)
|
||||
{
|
||||
total += count.Total;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// CW-1 core: redacts identity-bearing values (hostnames, tag names, user names) from a captured
|
||||
/// native Historian buffer so the result can be saved as a committable golden fixture.
|
||||
///
|
||||
/// Each secret is matched in both <b>ASCII/UTF-8</b> and <b>UTF-16LE</b> (the two encodings AVEVA's
|
||||
/// native buffers use for embedded strings) and overwritten in place with a fixed fill byte. The
|
||||
/// redaction preserves the buffer's exact length and every field offset, so the sanitized fixture
|
||||
/// remains useful for byte-layout reverse engineering while carrying none of the original identity.
|
||||
///
|
||||
/// ASCII-letter matching is case-insensitive (servers may echo a tag/host in a different case than
|
||||
/// requested); other bytes match exactly. Secrets shorter than <see cref="MinSecretLength"/> are
|
||||
/// ignored to avoid corrupting unrelated bytes that coincidentally collide with a short value.
|
||||
/// </summary>
|
||||
public static class ProtocolCaptureSanitizer
|
||||
{
|
||||
/// <summary>Fill byte written over a redacted region ('X'). Chosen to be obviously non-data on inspection.</summary>
|
||||
public const byte FillByte = (byte)'X';
|
||||
|
||||
/// <summary>Secrets shorter than this many characters are not scrubbed (too collision-prone).</summary>
|
||||
public const int MinSecretLength = 3;
|
||||
|
||||
public static SanitizeResult Sanitize(ReadOnlySpan<byte> buffer, IReadOnlyList<CaptureSecret> secrets)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(secrets);
|
||||
|
||||
byte[] working = buffer.ToArray();
|
||||
List<ScrubCount> report = new(secrets.Count);
|
||||
|
||||
foreach (CaptureSecret secret in secrets)
|
||||
{
|
||||
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
|
||||
{
|
||||
report.Add(new ScrubCount(secret.Name, 0, 0));
|
||||
continue;
|
||||
}
|
||||
|
||||
int ascii = RedactPattern(working, Encoding.ASCII.GetBytes(secret.Value));
|
||||
int utf16 = RedactPattern(working, Encoding.Unicode.GetBytes(secret.Value));
|
||||
report.Add(new ScrubCount(secret.Name, ascii, utf16));
|
||||
}
|
||||
|
||||
return new SanitizeResult(working, report);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Safety net: throws if any secret value still survives (in either encoding) in the buffer.
|
||||
/// Call after <see cref="Sanitize"/> before writing a fixture so a redaction gap can never
|
||||
/// leak identity into a committed file.
|
||||
/// </summary>
|
||||
public static void AssertNoSecretsRemain(ReadOnlySpan<byte> sanitized, IReadOnlyList<CaptureSecret> secrets)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(secrets);
|
||||
|
||||
foreach (CaptureSecret secret in secrets)
|
||||
{
|
||||
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (IndexOf(sanitized, Encoding.ASCII.GetBytes(secret.Value), 0) >= 0
|
||||
|| IndexOf(sanitized, Encoding.Unicode.GetBytes(secret.Value), 0) >= 0)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Sanitized buffer still contains secret '{secret.Name}'. Refusing to emit an unsanitized fixture.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int RedactPattern(byte[] buffer, byte[] pattern)
|
||||
{
|
||||
if (pattern.Length == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int matches = 0;
|
||||
int index = 0;
|
||||
while ((index = IndexOf(buffer, pattern, index)) >= 0)
|
||||
{
|
||||
buffer.AsSpan(index, pattern.Length).Fill(FillByte);
|
||||
index += pattern.Length;
|
||||
matches++;
|
||||
}
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
private static int IndexOf(ReadOnlySpan<byte> haystack, ReadOnlySpan<byte> needle, int start)
|
||||
{
|
||||
if (needle.Length == 0 || haystack.Length - start < needle.Length)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i = start; i <= haystack.Length - needle.Length; i++)
|
||||
{
|
||||
bool match = true;
|
||||
for (int j = 0; j < needle.Length; j++)
|
||||
{
|
||||
if (!BytesEqualCaseInsensitive(haystack[i + j], needle[j]))
|
||||
{
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (match)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/// <summary>Compare bytes, treating ASCII letters case-insensitively; all other bytes exactly.</summary>
|
||||
private static bool BytesEqualCaseInsensitive(byte a, byte b)
|
||||
{
|
||||
if (a == b)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return ToLowerAscii(a) == ToLowerAscii(b);
|
||||
}
|
||||
|
||||
private static byte ToLowerAscii(byte value) =>
|
||||
value is >= (byte)'A' and <= (byte)'Z' ? (byte)(value + 32) : value;
|
||||
}
|
||||
Reference in New Issue
Block a user