CW-1: reusable capture -> sanitize -> golden-fixture pipeline

Adds the highest-leverage reverse-engineering primitive from the roadmap: one
path to turn a live operation buffer into a committable golden fixture. Unblocks
every capture-tier item (R0.5, R1.x, R2.1).

- ProtocolCaptureSanitizer: redacts identity-bearing values (host, tag, user,
  machine) from a native buffer in BOTH ASCII and UTF-16LE, overwriting in place
  with an 'X' fill so length and every field offset are preserved (keeps the
  fixture useful for byte-layout RE). ASCII-letter matching is case-insensitive;
  secrets < 3 chars are skipped to avoid collision corruption. AssertNoSecretsRemain
  is a fail-closed safety net that refuses to emit if any value survives.
- ProtocolFixtureWriter: serializes a capture to fixtures/protocol/<op>/<name>.json
  with sanitized hex, length, SHA-256 of the sanitized bytes, and a scrub report.
  Timestamps are passed in (deterministic / testable).
- capture-tag-info CLI command: captures a live GetTagInfoFromName response and
  writes the fixture. The same native bytes ride inside 2023 R2 gRPC
  GetTagInfosFromName, so the fixture is transport-agnostic.
- 11 unit tests for the sanitizer/writer (test project now references the RE tool).
- First real fixture: get-tag-info/analog-*.json — a 98-byte Int4 CTagMetadata
  buffer captured live from the local Historian 2020 server, tag name redacted,
  verified to contain no identity (descriptor 03 c3 00 31 = Int4, as documented).

180 non-live unit tests green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-06-19 14:56:48 -04:00
parent 6b892b69ba
commit fa9cde3e2f
6 changed files with 502 additions and 0 deletions
@@ -0,0 +1,163 @@
using System.Text;
namespace AVEVA.Historian.ReverseEngineering.Capture;
/// <summary>A sensitive value to scrub from a captured buffer before it can be committed.</summary>
/// <param name="Name">Stable label (e.g. "host", "tag", "user") recorded in the scrub report.</param>
/// <param name="Value">The literal value to redact wherever it appears in the buffer.</param>
public sealed record CaptureSecret(string Name, string Value);
/// <summary>How many times a secret was found and redacted, per encoding.</summary>
public sealed record ScrubCount(string Name, int AsciiMatches, int Utf16Matches)
{
public int Total => AsciiMatches + Utf16Matches;
}
/// <summary>Result of sanitizing a captured buffer: the redacted copy plus a per-secret report.</summary>
public sealed record SanitizeResult(byte[] Sanitized, IReadOnlyList<ScrubCount> Report)
{
public int TotalRedactions
{
get
{
int total = 0;
foreach (ScrubCount count in Report)
{
total += count.Total;
}
return total;
}
}
}
/// <summary>
/// CW-1 core: redacts identity-bearing values (hostnames, tag names, user names) from a captured
/// native Historian buffer so the result can be saved as a committable golden fixture.
///
/// Each secret is matched in both <b>ASCII/UTF-8</b> and <b>UTF-16LE</b> (the two encodings AVEVA's
/// native buffers use for embedded strings) and overwritten in place with a fixed fill byte. The
/// redaction preserves the buffer's exact length and every field offset, so the sanitized fixture
/// remains useful for byte-layout reverse engineering while carrying none of the original identity.
///
/// ASCII-letter matching is case-insensitive (servers may echo a tag/host in a different case than
/// requested); other bytes match exactly. Secrets shorter than <see cref="MinSecretLength"/> are
/// ignored to avoid corrupting unrelated bytes that coincidentally collide with a short value.
/// </summary>
public static class ProtocolCaptureSanitizer
{
/// <summary>Fill byte written over a redacted region ('X'). Chosen to be obviously non-data on inspection.</summary>
public const byte FillByte = (byte)'X';
/// <summary>Secrets shorter than this many characters are not scrubbed (too collision-prone).</summary>
public const int MinSecretLength = 3;
public static SanitizeResult Sanitize(ReadOnlySpan<byte> buffer, IReadOnlyList<CaptureSecret> secrets)
{
ArgumentNullException.ThrowIfNull(secrets);
byte[] working = buffer.ToArray();
List<ScrubCount> report = new(secrets.Count);
foreach (CaptureSecret secret in secrets)
{
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
{
report.Add(new ScrubCount(secret.Name, 0, 0));
continue;
}
int ascii = RedactPattern(working, Encoding.ASCII.GetBytes(secret.Value));
int utf16 = RedactPattern(working, Encoding.Unicode.GetBytes(secret.Value));
report.Add(new ScrubCount(secret.Name, ascii, utf16));
}
return new SanitizeResult(working, report);
}
/// <summary>
/// Safety net: throws if any secret value still survives (in either encoding) in the buffer.
/// Call after <see cref="Sanitize"/> before writing a fixture so a redaction gap can never
/// leak identity into a committed file.
/// </summary>
public static void AssertNoSecretsRemain(ReadOnlySpan<byte> sanitized, IReadOnlyList<CaptureSecret> secrets)
{
ArgumentNullException.ThrowIfNull(secrets);
foreach (CaptureSecret secret in secrets)
{
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
{
continue;
}
if (IndexOf(sanitized, Encoding.ASCII.GetBytes(secret.Value), 0) >= 0
|| IndexOf(sanitized, Encoding.Unicode.GetBytes(secret.Value), 0) >= 0)
{
throw new InvalidOperationException(
$"Sanitized buffer still contains secret '{secret.Name}'. Refusing to emit an unsanitized fixture.");
}
}
}
private static int RedactPattern(byte[] buffer, byte[] pattern)
{
if (pattern.Length == 0)
{
return 0;
}
int matches = 0;
int index = 0;
while ((index = IndexOf(buffer, pattern, index)) >= 0)
{
buffer.AsSpan(index, pattern.Length).Fill(FillByte);
index += pattern.Length;
matches++;
}
return matches;
}
private static int IndexOf(ReadOnlySpan<byte> haystack, ReadOnlySpan<byte> needle, int start)
{
if (needle.Length == 0 || haystack.Length - start < needle.Length)
{
return -1;
}
for (int i = start; i <= haystack.Length - needle.Length; i++)
{
bool match = true;
for (int j = 0; j < needle.Length; j++)
{
if (!BytesEqualCaseInsensitive(haystack[i + j], needle[j]))
{
match = false;
break;
}
}
if (match)
{
return i;
}
}
return -1;
}
/// <summary>Compare bytes, treating ASCII letters case-insensitively; all other bytes exactly.</summary>
private static bool BytesEqualCaseInsensitive(byte a, byte b)
{
if (a == b)
{
return true;
}
return ToLowerAscii(a) == ToLowerAscii(b);
}
private static byte ToLowerAscii(byte value) =>
value is >= (byte)'A' and <= (byte)'Z' ? (byte)(value + 32) : value;
}
@@ -0,0 +1,89 @@
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
namespace AVEVA.Historian.ReverseEngineering.Capture;
/// <summary>One captured operation: the (optional) request buffer and the response buffer, raw.</summary>
public sealed record ProtocolCapture(string Op, byte[]? Request, byte[]? Response, string? Notes = null);
/// <summary>
/// CW-1 fixture writer: takes a live <see cref="ProtocolCapture"/>, redacts it with
/// <see cref="ProtocolCaptureSanitizer"/>, and writes a committable JSON fixture under
/// <c>fixtures/protocol/&lt;op&gt;/</c>. The fixture records sanitized hex, lengths, SHA-256 of the
/// sanitized bytes, and the scrub report — never the original identity-bearing bytes.
///
/// Timestamps are passed in (never generated here) so the writer stays deterministic and testable.
/// </summary>
public static class ProtocolFixtureWriter
{
public static string BuildFixtureJson(
ProtocolCapture capture,
IReadOnlyList<CaptureSecret> secrets,
string capturedUtcIso)
{
ArgumentNullException.ThrowIfNull(capture);
BufferSection? request = BuildSection(capture.Request, secrets);
BufferSection? response = BuildSection(capture.Response, secrets);
var document = new
{
op = capture.Op,
capturedUtc = capturedUtcIso,
notes = capture.Notes,
request,
response,
};
return JsonSerializer.Serialize(document, new JsonSerializerOptions
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
});
}
/// <summary>Serializes the fixture and writes it to <paramref name="fixtureRoot"/>/&lt;op&gt;/&lt;name&gt;.json. Returns the path.</summary>
public static string Write(
string fixtureRoot,
string name,
ProtocolCapture capture,
IReadOnlyList<CaptureSecret> secrets,
string capturedUtcIso)
{
ArgumentException.ThrowIfNullOrWhiteSpace(fixtureRoot);
ArgumentException.ThrowIfNullOrWhiteSpace(name);
ArgumentNullException.ThrowIfNull(capture);
string json = BuildFixtureJson(capture, secrets, capturedUtcIso);
string directory = Path.Combine(fixtureRoot, capture.Op);
Directory.CreateDirectory(directory);
string path = Path.Combine(directory, name + ".json");
File.WriteAllText(path, json, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false));
return path;
}
private static BufferSection? BuildSection(byte[]? raw, IReadOnlyList<CaptureSecret> secrets)
{
if (raw is null)
{
return null;
}
SanitizeResult result = ProtocolCaptureSanitizer.Sanitize(raw, secrets);
ProtocolCaptureSanitizer.AssertNoSecretsRemain(result.Sanitized, secrets);
return new BufferSection(
Length: raw.Length,
Sha256: Convert.ToHexString(SHA256.HashData(result.Sanitized)).ToLowerInvariant(),
Hex: Convert.ToHexString(result.Sanitized).ToLowerInvariant(),
Redactions: result.Report
.Where(r => r.Total > 0)
.Select(r => new RedactionEntry(r.Name, r.AsciiMatches, r.Utf16Matches))
.ToArray());
}
private sealed record BufferSection(int Length, string Sha256, string Hex, IReadOnlyList<RedactionEntry> Redactions);
private sealed record RedactionEntry(string Secret, int AsciiMatches, int Utf16Matches);
}
@@ -12,8 +12,10 @@ using System.Security.Cryptography;
using System.Runtime.Versioning;
using System.Text;
using System.Text.Json;
using AVEVA.Historian.Client;
using AVEVA.Historian.Client.Wcf;
using AVEVA.Historian.Client.Wcf.Contracts;
using AVEVA.Historian.ReverseEngineering.Capture;
using dnlib.DotNet;
using dnlib.DotNet.Emit;
@@ -68,6 +70,7 @@ try
"wcf-start-event-query" => StartWcfEventQuery(args),
"wcf-register-event-tag" => RegisterEventTagAndStartQuery(args),
"wcf-add-event-tag" => AddEventTagAndStartQuery(args),
"capture-tag-info" => CaptureTagInfo(args),
_ => UnknownCommand(args[0])
};
}
@@ -3605,6 +3608,90 @@ static int ProbeWcfTagInfo(string[] args)
return result.Success ? 0 : 1;
}
// CW-1: capture a live GetTagInfoFromName response buffer and persist it as a sanitized,
// committable golden fixture under fixtures/protocol/get-tag-info/. The same native byte blob
// travels inside the 2023 R2 gRPC RetrievalService.GetTagInfosFromName response, so the fixture
// is transport-agnostic. Usage: capture-tag-info [host] [port] [tag] [fixture-root]
static int CaptureTagInfo(string[] args)
{
string host = args.Length > 1 ? args[1] : "localhost";
int port = args.Length > 2 && int.TryParse(args[2], out int parsedPort)
? parsedPort
: HistorianWcfBindingFactory.DefaultPort;
string tag = args.Length > 3 ? args[3] : "OtOpcUaParityTest_001.Counter";
string fixtureRoot = args.Length > 4 ? args[4] : ResolveFixtureRoot();
var options = new HistorianClientOptions
{
Host = host,
Port = port,
IntegratedSecurity = true,
};
IReadOnlyDictionary<string, byte[]?> raw = HistorianWcfTagClient.GetTagInfoRawBytesForProbe(options, [tag]);
byte[]? response = raw.TryGetValue(tag, out byte[]? bytes) ? bytes : null;
if (response is null || response.Length == 0)
{
Console.Error.WriteLine($"GetTagInfoFromName returned no bytes for the requested tag against {host}:{port}.");
return 1;
}
// Redact every identity-bearing value that could appear in the buffer: the requested tag,
// the host/machine name, and the captured user. The sanitizer scrubs ASCII + UTF-16LE and
// refuses to emit if any value survives.
var secrets = new List<CaptureSecret>
{
new("tag", tag),
new("host", host),
new("machine", Environment.MachineName),
new("user", Environment.UserName),
};
string? envUser = Environment.GetEnvironmentVariable("HISTORIAN_USER");
if (!string.IsNullOrWhiteSpace(envUser))
{
secrets.Add(new CaptureSecret("env-user", envUser));
}
var capture = new ProtocolCapture(
Op: "get-tag-info",
Request: null,
Response: response,
Notes: "RetrievalService.GetTagInfoFromName response (CTagMetadata buffer); identical bytes on 2023 R2 gRPC GetTagInfosFromName.");
string capturedUtc = DateTime.UtcNow.ToString("o");
string path = ProtocolFixtureWriter.Write(fixtureRoot, $"analog-{DateTime.UtcNow:yyyyMMddHHmmss}", capture, secrets, capturedUtc);
var summary = new
{
Op = capture.Op,
ResponseLength = response.Length,
FixturePath = path,
Redactions = ProtocolCaptureSanitizer.Sanitize(response, secrets).Report
.Where(r => r.Total > 0)
.Select(r => new { r.Name, r.AsciiMatches, r.Utf16Matches }),
};
Console.WriteLine(JsonSerializer.Serialize(summary, CreateJsonOptions()));
return 0;
}
// Walk up from the working directory to the repo root (the directory holding Histsdk.slnx) and
// return its fixtures/protocol path; fall back to fixtures/protocol under the CWD.
static string ResolveFixtureRoot()
{
DirectoryInfo? dir = new(Directory.GetCurrentDirectory());
while (dir is not null)
{
if (File.Exists(Path.Combine(dir.FullName, "Histsdk.slnx")))
{
return Path.Combine(dir.FullName, "fixtures", "protocol");
}
dir = dir.Parent;
}
return Path.Combine(Directory.GetCurrentDirectory(), "fixtures", "protocol");
}
static int ProbeWcfLikeTagBrowse(string[] args)
{
string host = args.Length > 1 ? args[1] : "localhost";
@@ -6370,6 +6457,9 @@ static void PrintHelp()
instrument-tagquery-gettaginfo [dll-path] [output-path]
Write a reverse-only wrapper copy that logs TagQuery CTagMetadata vectors.
mark <scenario-name> Emit a timestamp marker for Wireshark/API Monitor notes.
capture-tag-info [host] [port] [tag] [fixture-root]
CW-1: capture a live GetTagInfoFromName buffer and write a
sanitized golden fixture to fixtures/protocol/get-tag-info/.
wcf-probe [host] [port] Probe Hist/Retr/Stat WCF GetV endpoints with MDAS encoding.
wcf-cert-probe [host] [port] [dns]
Probe HistCert GetV with MDAS over TLS transport security.