Files
histsdk/tools/AVEVA.Historian.ReverseEngineering/Capture/ProtocolCaptureSanitizer.cs
T
Joseph Doherty fa9cde3e2f CW-1: reusable capture -> sanitize -> golden-fixture pipeline
Adds the highest-leverage reverse-engineering primitive from the roadmap: one
path to turn a live operation buffer into a committable golden fixture. Unblocks
every capture-tier item (R0.5, R1.x, R2.1).

- ProtocolCaptureSanitizer: redacts identity-bearing values (host, tag, user,
  machine) from a native buffer in BOTH ASCII and UTF-16LE, overwriting in place
  with an 'X' fill so length and every field offset are preserved (keeps the
  fixture useful for byte-layout RE). ASCII-letter matching is case-insensitive;
  secrets < 3 chars are skipped to avoid collision corruption. AssertNoSecretsRemain
  is a fail-closed safety net that refuses to emit if any value survives.
- ProtocolFixtureWriter: serializes a capture to fixtures/protocol/<op>/<name>.json
  with sanitized hex, length, SHA-256 of the sanitized bytes, and a scrub report.
  Timestamps are passed in (deterministic / testable).
- capture-tag-info CLI command: captures a live GetTagInfoFromName response and
  writes the fixture. The same native bytes ride inside 2023 R2 gRPC
  GetTagInfosFromName, so the fixture is transport-agnostic.
- 11 unit tests for the sanitizer/writer (test project now references the RE tool).
- First real fixture: get-tag-info/analog-*.json — a 98-byte Int4 CTagMetadata
  buffer captured live from the local Historian 2020 server, tag name redacted,
  verified to contain no identity (descriptor 03 c3 00 31 = Int4, as documented).

180 non-live unit tests green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-19 14:56:48 -04:00

164 lines
5.7 KiB
C#

using System.Text;
namespace AVEVA.Historian.ReverseEngineering.Capture;
/// <summary>A sensitive value to scrub from a captured buffer before it can be committed.</summary>
/// <param name="Name">Stable label (e.g. "host", "tag", "user") recorded in the scrub report.</param>
/// <param name="Value">The literal value to redact wherever it appears in the buffer.</param>
public sealed record CaptureSecret(string Name, string Value);
/// <summary>How many times a secret was found and redacted, per encoding.</summary>
public sealed record ScrubCount(string Name, int AsciiMatches, int Utf16Matches)
{
public int Total => AsciiMatches + Utf16Matches;
}
/// <summary>Result of sanitizing a captured buffer: the redacted copy plus a per-secret report.</summary>
public sealed record SanitizeResult(byte[] Sanitized, IReadOnlyList<ScrubCount> Report)
{
public int TotalRedactions
{
get
{
int total = 0;
foreach (ScrubCount count in Report)
{
total += count.Total;
}
return total;
}
}
}
/// <summary>
/// CW-1 core: redacts identity-bearing values (hostnames, tag names, user names) from a captured
/// native Historian buffer so the result can be saved as a committable golden fixture.
///
/// Each secret is matched in both <b>ASCII/UTF-8</b> and <b>UTF-16LE</b> (the two encodings AVEVA's
/// native buffers use for embedded strings) and overwritten in place with a fixed fill byte. The
/// redaction preserves the buffer's exact length and every field offset, so the sanitized fixture
/// remains useful for byte-layout reverse engineering while carrying none of the original identity.
///
/// ASCII-letter matching is case-insensitive (servers may echo a tag/host in a different case than
/// requested); other bytes match exactly. Secrets shorter than <see cref="MinSecretLength"/> are
/// ignored to avoid corrupting unrelated bytes that coincidentally collide with a short value.
/// </summary>
public static class ProtocolCaptureSanitizer
{
/// <summary>Fill byte written over a redacted region ('X'). Chosen to be obviously non-data on inspection.</summary>
public const byte FillByte = (byte)'X';
/// <summary>Secrets shorter than this many characters are not scrubbed (too collision-prone).</summary>
public const int MinSecretLength = 3;
public static SanitizeResult Sanitize(ReadOnlySpan<byte> buffer, IReadOnlyList<CaptureSecret> secrets)
{
ArgumentNullException.ThrowIfNull(secrets);
byte[] working = buffer.ToArray();
List<ScrubCount> report = new(secrets.Count);
foreach (CaptureSecret secret in secrets)
{
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
{
report.Add(new ScrubCount(secret.Name, 0, 0));
continue;
}
int ascii = RedactPattern(working, Encoding.ASCII.GetBytes(secret.Value));
int utf16 = RedactPattern(working, Encoding.Unicode.GetBytes(secret.Value));
report.Add(new ScrubCount(secret.Name, ascii, utf16));
}
return new SanitizeResult(working, report);
}
/// <summary>
/// Safety net: throws if any secret value still survives (in either encoding) in the buffer.
/// Call after <see cref="Sanitize"/> before writing a fixture so a redaction gap can never
/// leak identity into a committed file.
/// </summary>
public static void AssertNoSecretsRemain(ReadOnlySpan<byte> sanitized, IReadOnlyList<CaptureSecret> secrets)
{
ArgumentNullException.ThrowIfNull(secrets);
foreach (CaptureSecret secret in secrets)
{
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
{
continue;
}
if (IndexOf(sanitized, Encoding.ASCII.GetBytes(secret.Value), 0) >= 0
|| IndexOf(sanitized, Encoding.Unicode.GetBytes(secret.Value), 0) >= 0)
{
throw new InvalidOperationException(
$"Sanitized buffer still contains secret '{secret.Name}'. Refusing to emit an unsanitized fixture.");
}
}
}
private static int RedactPattern(byte[] buffer, byte[] pattern)
{
if (pattern.Length == 0)
{
return 0;
}
int matches = 0;
int index = 0;
while ((index = IndexOf(buffer, pattern, index)) >= 0)
{
buffer.AsSpan(index, pattern.Length).Fill(FillByte);
index += pattern.Length;
matches++;
}
return matches;
}
private static int IndexOf(ReadOnlySpan<byte> haystack, ReadOnlySpan<byte> needle, int start)
{
if (needle.Length == 0 || haystack.Length - start < needle.Length)
{
return -1;
}
for (int i = start; i <= haystack.Length - needle.Length; i++)
{
bool match = true;
for (int j = 0; j < needle.Length; j++)
{
if (!BytesEqualCaseInsensitive(haystack[i + j], needle[j]))
{
match = false;
break;
}
}
if (match)
{
return i;
}
}
return -1;
}
/// <summary>Compare bytes, treating ASCII letters case-insensitively; all other bytes exactly.</summary>
private static bool BytesEqualCaseInsensitive(byte a, byte b)
{
if (a == b)
{
return true;
}
return ToLowerAscii(a) == ToLowerAscii(b);
}
private static byte ToLowerAscii(byte value) =>
value is >= (byte)'A' and <= (byte)'Z' ? (byte)(value + 32) : value;
}