fa9cde3e2f
Adds the highest-leverage reverse-engineering primitive from the roadmap: one path to turn a live operation buffer into a committable golden fixture. Unblocks every capture-tier item (R0.5, R1.x, R2.1). - ProtocolCaptureSanitizer: redacts identity-bearing values (host, tag, user, machine) from a native buffer in BOTH ASCII and UTF-16LE, overwriting in place with an 'X' fill so length and every field offset are preserved (keeps the fixture useful for byte-layout RE). ASCII-letter matching is case-insensitive; secrets < 3 chars are skipped to avoid collision corruption. AssertNoSecretsRemain is a fail-closed safety net that refuses to emit if any value survives. - ProtocolFixtureWriter: serializes a capture to fixtures/protocol/<op>/<name>.json with sanitized hex, length, SHA-256 of the sanitized bytes, and a scrub report. Timestamps are passed in (deterministic / testable). - capture-tag-info CLI command: captures a live GetTagInfoFromName response and writes the fixture. The same native bytes ride inside 2023 R2 gRPC GetTagInfosFromName, so the fixture is transport-agnostic. - 11 unit tests for the sanitizer/writer (test project now references the RE tool). - First real fixture: get-tag-info/analog-*.json — a 98-byte Int4 CTagMetadata buffer captured live from the local Historian 2020 server, tag name redacted, verified to contain no identity (descriptor 03 c3 00 31 = Int4, as documented). 180 non-live unit tests green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
164 lines
5.7 KiB
C#
164 lines
5.7 KiB
C#
using System.Text;
|
|
|
|
namespace AVEVA.Historian.ReverseEngineering.Capture;
|
|
|
|
/// <summary>A sensitive value to scrub from a captured buffer before it can be committed.</summary>
|
|
/// <param name="Name">Stable label (e.g. "host", "tag", "user") recorded in the scrub report.</param>
|
|
/// <param name="Value">The literal value to redact wherever it appears in the buffer.</param>
|
|
public sealed record CaptureSecret(string Name, string Value);
|
|
|
|
/// <summary>How many times a secret was found and redacted, per encoding.</summary>
|
|
public sealed record ScrubCount(string Name, int AsciiMatches, int Utf16Matches)
|
|
{
|
|
public int Total => AsciiMatches + Utf16Matches;
|
|
}
|
|
|
|
/// <summary>Result of sanitizing a captured buffer: the redacted copy plus a per-secret report.</summary>
|
|
public sealed record SanitizeResult(byte[] Sanitized, IReadOnlyList<ScrubCount> Report)
|
|
{
|
|
public int TotalRedactions
|
|
{
|
|
get
|
|
{
|
|
int total = 0;
|
|
foreach (ScrubCount count in Report)
|
|
{
|
|
total += count.Total;
|
|
}
|
|
|
|
return total;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// CW-1 core: redacts identity-bearing values (hostnames, tag names, user names) from a captured
|
|
/// native Historian buffer so the result can be saved as a committable golden fixture.
|
|
///
|
|
/// Each secret is matched in both <b>ASCII/UTF-8</b> and <b>UTF-16LE</b> (the two encodings AVEVA's
|
|
/// native buffers use for embedded strings) and overwritten in place with a fixed fill byte. The
|
|
/// redaction preserves the buffer's exact length and every field offset, so the sanitized fixture
|
|
/// remains useful for byte-layout reverse engineering while carrying none of the original identity.
|
|
///
|
|
/// ASCII-letter matching is case-insensitive (servers may echo a tag/host in a different case than
|
|
/// requested); other bytes match exactly. Secrets shorter than <see cref="MinSecretLength"/> are
|
|
/// ignored to avoid corrupting unrelated bytes that coincidentally collide with a short value.
|
|
/// </summary>
|
|
public static class ProtocolCaptureSanitizer
|
|
{
|
|
/// <summary>Fill byte written over a redacted region ('X'). Chosen to be obviously non-data on inspection.</summary>
|
|
public const byte FillByte = (byte)'X';
|
|
|
|
/// <summary>Secrets shorter than this many characters are not scrubbed (too collision-prone).</summary>
|
|
public const int MinSecretLength = 3;
|
|
|
|
public static SanitizeResult Sanitize(ReadOnlySpan<byte> buffer, IReadOnlyList<CaptureSecret> secrets)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(secrets);
|
|
|
|
byte[] working = buffer.ToArray();
|
|
List<ScrubCount> report = new(secrets.Count);
|
|
|
|
foreach (CaptureSecret secret in secrets)
|
|
{
|
|
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
|
|
{
|
|
report.Add(new ScrubCount(secret.Name, 0, 0));
|
|
continue;
|
|
}
|
|
|
|
int ascii = RedactPattern(working, Encoding.ASCII.GetBytes(secret.Value));
|
|
int utf16 = RedactPattern(working, Encoding.Unicode.GetBytes(secret.Value));
|
|
report.Add(new ScrubCount(secret.Name, ascii, utf16));
|
|
}
|
|
|
|
return new SanitizeResult(working, report);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Safety net: throws if any secret value still survives (in either encoding) in the buffer.
|
|
/// Call after <see cref="Sanitize"/> before writing a fixture so a redaction gap can never
|
|
/// leak identity into a committed file.
|
|
/// </summary>
|
|
public static void AssertNoSecretsRemain(ReadOnlySpan<byte> sanitized, IReadOnlyList<CaptureSecret> secrets)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(secrets);
|
|
|
|
foreach (CaptureSecret secret in secrets)
|
|
{
|
|
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (IndexOf(sanitized, Encoding.ASCII.GetBytes(secret.Value), 0) >= 0
|
|
|| IndexOf(sanitized, Encoding.Unicode.GetBytes(secret.Value), 0) >= 0)
|
|
{
|
|
throw new InvalidOperationException(
|
|
$"Sanitized buffer still contains secret '{secret.Name}'. Refusing to emit an unsanitized fixture.");
|
|
}
|
|
}
|
|
}
|
|
|
|
private static int RedactPattern(byte[] buffer, byte[] pattern)
|
|
{
|
|
if (pattern.Length == 0)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
int matches = 0;
|
|
int index = 0;
|
|
while ((index = IndexOf(buffer, pattern, index)) >= 0)
|
|
{
|
|
buffer.AsSpan(index, pattern.Length).Fill(FillByte);
|
|
index += pattern.Length;
|
|
matches++;
|
|
}
|
|
|
|
return matches;
|
|
}
|
|
|
|
private static int IndexOf(ReadOnlySpan<byte> haystack, ReadOnlySpan<byte> needle, int start)
|
|
{
|
|
if (needle.Length == 0 || haystack.Length - start < needle.Length)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
for (int i = start; i <= haystack.Length - needle.Length; i++)
|
|
{
|
|
bool match = true;
|
|
for (int j = 0; j < needle.Length; j++)
|
|
{
|
|
if (!BytesEqualCaseInsensitive(haystack[i + j], needle[j]))
|
|
{
|
|
match = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (match)
|
|
{
|
|
return i;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
/// <summary>Compare bytes, treating ASCII letters case-insensitively; all other bytes exactly.</summary>
|
|
private static bool BytesEqualCaseInsensitive(byte a, byte b)
|
|
{
|
|
if (a == b)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
return ToLowerAscii(a) == ToLowerAscii(b);
|
|
}
|
|
|
|
private static byte ToLowerAscii(byte value) =>
|
|
value is >= (byte)'A' and <= (byte)'Z' ? (byte)(value + 32) : value;
|
|
}
|