CW-1: reusable capture -> sanitize -> golden-fixture pipeline

Adds the highest-leverage reverse-engineering primitive from the roadmap: one
path to turn a live operation buffer into a committable golden fixture. Unblocks
every capture-tier item (R0.5, R1.x, R2.1).

- ProtocolCaptureSanitizer: redacts identity-bearing values (host, tag, user,
  machine) from a native buffer in BOTH ASCII and UTF-16LE, overwriting in place
  with an 'X' fill so length and every field offset are preserved (keeps the
  fixture useful for byte-layout RE). ASCII-letter matching is case-insensitive;
  secrets < 3 chars are skipped to avoid collision corruption. AssertNoSecretsRemain
  is a fail-closed safety net that refuses to emit if any value survives.
- ProtocolFixtureWriter: serializes a capture to fixtures/protocol/<op>/<name>.json
  with sanitized hex, length, SHA-256 of the sanitized bytes, and a scrub report.
  Timestamps are passed in (deterministic / testable).
- capture-tag-info CLI command: captures a live GetTagInfoFromName response and
  writes the fixture. The same native bytes ride inside 2023 R2 gRPC
  GetTagInfosFromName, so the fixture is transport-agnostic.
- 11 unit tests for the sanitizer/writer (test project now references the RE tool).
- First real fixture: get-tag-info/analog-*.json — a 98-byte Int4 CTagMetadata
  buffer captured live from the local Historian 2020 server, tag name redacted,
  verified to contain no identity (descriptor 03 c3 00 31 = Int4, as documented).

180 non-live unit tests green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-06-19 14:56:48 -04:00
parent 6b892b69ba
commit fa9cde3e2f
6 changed files with 502 additions and 0 deletions
@@ -0,0 +1,18 @@
{
"op": "get-tag-info",
"capturedUtc": "2026-06-19T18:55:46.5988258Z",
"notes": "RetrievalService.GetTagInfoFromName response (CTagMetadata buffer); identical bytes on 2023 R2 gRPC GetTagInfosFromName.",
"request": null,
"response": {
"length": 98,
"sha256": "cdda36baa869355b52ccb4be2735ccacfa2da69f0cafe62e88b807f1a05089fd",
"hex": "03c3003184228c4058e1874a984b3dbecbe0aa42ee000000091d0058585858585858585858585858585858585858585858585858585858580904004d44415302030102000000d057f49465d8dc010a0000000000000024400000000000002440fe00",
"redactions": [
{
"secret": "tag",
"asciiMatches": 1,
"utf16Matches": 0
}
]
}
}
@@ -21,6 +21,8 @@
<ItemGroup>
<ProjectReference Include="..\..\src\AVEVA.Historian.Client\AVEVA.Historian.Client.csproj" />
<!-- Reverse-engineering tooling: unit-tests the CW-1 capture sanitizer / fixture writer. -->
<ProjectReference Include="..\..\tools\AVEVA.Historian.ReverseEngineering\AVEVA.Historian.ReverseEngineering.csproj" />
</ItemGroup>
</Project>
@@ -0,0 +1,140 @@
using System.Text;
using System.Text.Json;
using AVEVA.Historian.ReverseEngineering.Capture;
namespace AVEVA.Historian.Client.Tests;
/// <summary>
/// Unit coverage for the CW-1 capture sanitizer and fixture writer — the reusable
/// "redact identity → emit committable fixture" core that all capture-tier work depends on.
/// </summary>
public sealed class ProtocolCaptureSanitizerTests
{
private static byte[] Ascii(string s) => Encoding.ASCII.GetBytes(s);
private static byte[] Utf16(string s) => Encoding.Unicode.GetBytes(s);
[Fact]
public void Sanitize_RedactsAsciiOccurrence_PreservingLength()
{
byte[] buffer = [0x01, 0x02, .. Ascii("SECRETTAG"), 0x03];
SanitizeResult result = ProtocolCaptureSanitizer.Sanitize(buffer, [new CaptureSecret("tag", "SECRETTAG")]);
Assert.Equal(buffer.Length, result.Sanitized.Length);
Assert.Equal(0x01, result.Sanitized[0]);
Assert.Equal(0x03, result.Sanitized[^1]);
Assert.DoesNotContain(Ascii("SECRETTAG"), result.Sanitized); // value gone
Assert.Equal(1, result.Report[0].AsciiMatches);
Assert.Equal(0, result.Report[0].Utf16Matches);
}
[Fact]
public void Sanitize_RedactsUtf16Occurrence()
{
byte[] buffer = [0xAA, .. Utf16("HostName"), 0xBB];
SanitizeResult result = ProtocolCaptureSanitizer.Sanitize(buffer, [new CaptureSecret("host", "HostName")]);
Assert.Equal(0, result.Report[0].AsciiMatches);
Assert.Equal(1, result.Report[0].Utf16Matches);
Assert.Equal(0xAA, result.Sanitized[0]);
Assert.Equal(0xBB, result.Sanitized[^1]);
}
[Fact]
public void Sanitize_IsCaseInsensitiveForAsciiLetters()
{
byte[] buffer = Ascii("myserver01");
SanitizeResult result = ProtocolCaptureSanitizer.Sanitize(buffer, [new CaptureSecret("host", "MyServer01")]);
Assert.Equal(1, result.Report[0].AsciiMatches);
Assert.All(result.Sanitized, b => Assert.Equal(ProtocolCaptureSanitizer.FillByte, b));
}
[Fact]
public void Sanitize_RedactsMultipleOccurrences()
{
byte[] buffer = [.. Ascii("TagA"), 0x00, .. Ascii("TagA"), 0x00, .. Ascii("TagA")];
SanitizeResult result = ProtocolCaptureSanitizer.Sanitize(buffer, [new CaptureSecret("tag", "TagA")]);
Assert.Equal(3, result.Report[0].AsciiMatches);
Assert.Equal(3, result.TotalRedactions);
}
[Fact]
public void Sanitize_IgnoresShortSecrets_ToAvoidCollisionCorruption()
{
byte[] buffer = [0x41, 0x42, 0x43]; // "ABC"
SanitizeResult result = ProtocolCaptureSanitizer.Sanitize(buffer, [new CaptureSecret("x", "AB")]); // length 2 < MinSecretLength
Assert.Equal(buffer, result.Sanitized); // untouched
Assert.Equal(0, result.TotalRedactions);
}
[Fact]
public void Sanitize_LeavesUnrelatedBytesUntouched()
{
byte[] buffer = [.. Ascii("keepme"), .. Ascii("DROPME"), .. Ascii("keepme")];
SanitizeResult result = ProtocolCaptureSanitizer.Sanitize(buffer, [new CaptureSecret("s", "DROPME")]);
Assert.Equal(Ascii("keepme"), result.Sanitized[..6]);
Assert.Equal(Ascii("keepme"), result.Sanitized[^6..]);
}
[Fact]
public void AssertNoSecretsRemain_Passes_WhenRedacted()
{
byte[] buffer = Ascii("prefix-SECRET-suffix");
SanitizeResult result = ProtocolCaptureSanitizer.Sanitize(buffer, [new CaptureSecret("s", "SECRET")]);
ProtocolCaptureSanitizer.AssertNoSecretsRemain(result.Sanitized, [new CaptureSecret("s", "SECRET")]);
}
[Fact]
public void AssertNoSecretsRemain_Throws_WhenSecretSurvives()
{
byte[] buffer = Ascii("prefix-SECRET-suffix");
Assert.Throws<InvalidOperationException>(
() => ProtocolCaptureSanitizer.AssertNoSecretsRemain(buffer, [new CaptureSecret("s", "SECRET")]));
}
[Fact]
public void FixtureWriter_BuildJson_OmitsRawIdentity_AndRecordsScrubReport()
{
byte[] response = [0x4E, .. Utf16("CustomerTag.PV"), 0xFE, 0x00];
var capture = new ProtocolCapture("get-tag-info", Request: null, Response: response, Notes: "live 2020 server");
var secrets = new[] { new CaptureSecret("tag", "CustomerTag.PV") };
string json = ProtocolFixtureWriter.BuildFixtureJson(capture, secrets, "2026-06-19T00:00:00Z");
Assert.DoesNotContain("CustomerTag", json); // identity scrubbed from hex
using JsonDocument doc = JsonDocument.Parse(json);
JsonElement root = doc.RootElement;
Assert.Equal("get-tag-info", root.GetProperty("op").GetString());
Assert.Equal("2026-06-19T00:00:00Z", root.GetProperty("capturedUtc").GetString());
Assert.Equal(JsonValueKind.Null, root.GetProperty("request").ValueKind);
JsonElement resp = root.GetProperty("response");
Assert.Equal(response.Length, resp.GetProperty("length").GetInt32());
Assert.Equal(64, resp.GetProperty("sha256").GetString()!.Length);
Assert.Equal("tag", resp.GetProperty("redactions")[0].GetProperty("secret").GetString());
}
[Fact]
public void FixtureWriter_Write_CreatesOpSubdirectoryFile()
{
string root = Path.Combine(Path.GetTempPath(), "histsdk-fixture-test-" + Guid.NewGuid().ToString("N"));
try
{
var capture = new ProtocolCapture("get-tag-info", Request: null, Response: [0x01, 0x02, 0x03], Notes: null);
string path = ProtocolFixtureWriter.Write(root, "sample", capture, [], "2026-06-19T00:00:00Z");
Assert.True(File.Exists(path));
Assert.EndsWith(Path.Combine("get-tag-info", "sample.json"), path);
}
finally
{
if (Directory.Exists(root))
{
Directory.Delete(root, recursive: true);
}
}
}
}
@@ -0,0 +1,163 @@
using System.Text;
namespace AVEVA.Historian.ReverseEngineering.Capture;
/// <summary>A sensitive value to scrub from a captured buffer before it can be committed.</summary>
/// <param name="Name">Stable label (e.g. "host", "tag", "user") recorded in the scrub report.</param>
/// <param name="Value">The literal value to redact wherever it appears in the buffer.</param>
public sealed record CaptureSecret(string Name, string Value);
/// <summary>How many times a secret was found and redacted, per encoding.</summary>
public sealed record ScrubCount(string Name, int AsciiMatches, int Utf16Matches)
{
public int Total => AsciiMatches + Utf16Matches;
}
/// <summary>Result of sanitizing a captured buffer: the redacted copy plus a per-secret report.</summary>
public sealed record SanitizeResult(byte[] Sanitized, IReadOnlyList<ScrubCount> Report)
{
public int TotalRedactions
{
get
{
int total = 0;
foreach (ScrubCount count in Report)
{
total += count.Total;
}
return total;
}
}
}
/// <summary>
/// CW-1 core: redacts identity-bearing values (hostnames, tag names, user names) from a captured
/// native Historian buffer so the result can be saved as a committable golden fixture.
///
/// Each secret is matched in both <b>ASCII/UTF-8</b> and <b>UTF-16LE</b> (the two encodings AVEVA's
/// native buffers use for embedded strings) and overwritten in place with a fixed fill byte. The
/// redaction preserves the buffer's exact length and every field offset, so the sanitized fixture
/// remains useful for byte-layout reverse engineering while carrying none of the original identity.
///
/// ASCII-letter matching is case-insensitive (servers may echo a tag/host in a different case than
/// requested); other bytes match exactly. Secrets shorter than <see cref="MinSecretLength"/> are
/// ignored to avoid corrupting unrelated bytes that coincidentally collide with a short value.
/// </summary>
public static class ProtocolCaptureSanitizer
{
/// <summary>Fill byte written over a redacted region ('X'). Chosen to be obviously non-data on inspection.</summary>
public const byte FillByte = (byte)'X';
/// <summary>Secrets shorter than this many characters are not scrubbed (too collision-prone).</summary>
public const int MinSecretLength = 3;
public static SanitizeResult Sanitize(ReadOnlySpan<byte> buffer, IReadOnlyList<CaptureSecret> secrets)
{
ArgumentNullException.ThrowIfNull(secrets);
byte[] working = buffer.ToArray();
List<ScrubCount> report = new(secrets.Count);
foreach (CaptureSecret secret in secrets)
{
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
{
report.Add(new ScrubCount(secret.Name, 0, 0));
continue;
}
int ascii = RedactPattern(working, Encoding.ASCII.GetBytes(secret.Value));
int utf16 = RedactPattern(working, Encoding.Unicode.GetBytes(secret.Value));
report.Add(new ScrubCount(secret.Name, ascii, utf16));
}
return new SanitizeResult(working, report);
}
/// <summary>
/// Safety net: throws if any secret value still survives (in either encoding) in the buffer.
/// Call after <see cref="Sanitize"/> before writing a fixture so a redaction gap can never
/// leak identity into a committed file.
/// </summary>
public static void AssertNoSecretsRemain(ReadOnlySpan<byte> sanitized, IReadOnlyList<CaptureSecret> secrets)
{
ArgumentNullException.ThrowIfNull(secrets);
foreach (CaptureSecret secret in secrets)
{
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
{
continue;
}
if (IndexOf(sanitized, Encoding.ASCII.GetBytes(secret.Value), 0) >= 0
|| IndexOf(sanitized, Encoding.Unicode.GetBytes(secret.Value), 0) >= 0)
{
throw new InvalidOperationException(
$"Sanitized buffer still contains secret '{secret.Name}'. Refusing to emit an unsanitized fixture.");
}
}
}
private static int RedactPattern(byte[] buffer, byte[] pattern)
{
if (pattern.Length == 0)
{
return 0;
}
int matches = 0;
int index = 0;
while ((index = IndexOf(buffer, pattern, index)) >= 0)
{
buffer.AsSpan(index, pattern.Length).Fill(FillByte);
index += pattern.Length;
matches++;
}
return matches;
}
private static int IndexOf(ReadOnlySpan<byte> haystack, ReadOnlySpan<byte> needle, int start)
{
if (needle.Length == 0 || haystack.Length - start < needle.Length)
{
return -1;
}
for (int i = start; i <= haystack.Length - needle.Length; i++)
{
bool match = true;
for (int j = 0; j < needle.Length; j++)
{
if (!BytesEqualCaseInsensitive(haystack[i + j], needle[j]))
{
match = false;
break;
}
}
if (match)
{
return i;
}
}
return -1;
}
/// <summary>Compare bytes, treating ASCII letters case-insensitively; all other bytes exactly.</summary>
private static bool BytesEqualCaseInsensitive(byte a, byte b)
{
if (a == b)
{
return true;
}
return ToLowerAscii(a) == ToLowerAscii(b);
}
private static byte ToLowerAscii(byte value) =>
value is >= (byte)'A' and <= (byte)'Z' ? (byte)(value + 32) : value;
}
@@ -0,0 +1,89 @@
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
namespace AVEVA.Historian.ReverseEngineering.Capture;
/// <summary>One captured operation: the (optional) request buffer and the response buffer, raw.</summary>
public sealed record ProtocolCapture(string Op, byte[]? Request, byte[]? Response, string? Notes = null);
/// <summary>
/// CW-1 fixture writer: takes a live <see cref="ProtocolCapture"/>, redacts it with
/// <see cref="ProtocolCaptureSanitizer"/>, and writes a committable JSON fixture under
/// <c>fixtures/protocol/&lt;op&gt;/</c>. The fixture records sanitized hex, lengths, SHA-256 of the
/// sanitized bytes, and the scrub report — never the original identity-bearing bytes.
///
/// Timestamps are passed in (never generated here) so the writer stays deterministic and testable.
/// </summary>
public static class ProtocolFixtureWriter
{
public static string BuildFixtureJson(
ProtocolCapture capture,
IReadOnlyList<CaptureSecret> secrets,
string capturedUtcIso)
{
ArgumentNullException.ThrowIfNull(capture);
BufferSection? request = BuildSection(capture.Request, secrets);
BufferSection? response = BuildSection(capture.Response, secrets);
var document = new
{
op = capture.Op,
capturedUtc = capturedUtcIso,
notes = capture.Notes,
request,
response,
};
return JsonSerializer.Serialize(document, new JsonSerializerOptions
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
});
}
/// <summary>Serializes the fixture and writes it to <paramref name="fixtureRoot"/>/&lt;op&gt;/&lt;name&gt;.json. Returns the path.</summary>
public static string Write(
string fixtureRoot,
string name,
ProtocolCapture capture,
IReadOnlyList<CaptureSecret> secrets,
string capturedUtcIso)
{
ArgumentException.ThrowIfNullOrWhiteSpace(fixtureRoot);
ArgumentException.ThrowIfNullOrWhiteSpace(name);
ArgumentNullException.ThrowIfNull(capture);
string json = BuildFixtureJson(capture, secrets, capturedUtcIso);
string directory = Path.Combine(fixtureRoot, capture.Op);
Directory.CreateDirectory(directory);
string path = Path.Combine(directory, name + ".json");
File.WriteAllText(path, json, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false));
return path;
}
private static BufferSection? BuildSection(byte[]? raw, IReadOnlyList<CaptureSecret> secrets)
{
if (raw is null)
{
return null;
}
SanitizeResult result = ProtocolCaptureSanitizer.Sanitize(raw, secrets);
ProtocolCaptureSanitizer.AssertNoSecretsRemain(result.Sanitized, secrets);
return new BufferSection(
Length: raw.Length,
Sha256: Convert.ToHexString(SHA256.HashData(result.Sanitized)).ToLowerInvariant(),
Hex: Convert.ToHexString(result.Sanitized).ToLowerInvariant(),
Redactions: result.Report
.Where(r => r.Total > 0)
.Select(r => new RedactionEntry(r.Name, r.AsciiMatches, r.Utf16Matches))
.ToArray());
}
private sealed record BufferSection(int Length, string Sha256, string Hex, IReadOnlyList<RedactionEntry> Redactions);
private sealed record RedactionEntry(string Secret, int AsciiMatches, int Utf16Matches);
}
@@ -12,8 +12,10 @@ using System.Security.Cryptography;
using System.Runtime.Versioning;
using System.Text;
using System.Text.Json;
using AVEVA.Historian.Client;
using AVEVA.Historian.Client.Wcf;
using AVEVA.Historian.Client.Wcf.Contracts;
using AVEVA.Historian.ReverseEngineering.Capture;
using dnlib.DotNet;
using dnlib.DotNet.Emit;
@@ -68,6 +70,7 @@ try
"wcf-start-event-query" => StartWcfEventQuery(args),
"wcf-register-event-tag" => RegisterEventTagAndStartQuery(args),
"wcf-add-event-tag" => AddEventTagAndStartQuery(args),
"capture-tag-info" => CaptureTagInfo(args),
_ => UnknownCommand(args[0])
};
}
@@ -3605,6 +3608,90 @@ static int ProbeWcfTagInfo(string[] args)
return result.Success ? 0 : 1;
}
// CW-1: capture a live GetTagInfoFromName response buffer and persist it as a sanitized,
// committable golden fixture under fixtures/protocol/get-tag-info/. The same native byte blob
// travels inside the 2023 R2 gRPC RetrievalService.GetTagInfosFromName response, so the fixture
// is transport-agnostic. Usage: capture-tag-info [host] [port] [tag] [fixture-root]
static int CaptureTagInfo(string[] args)
{
string host = args.Length > 1 ? args[1] : "localhost";
int port = args.Length > 2 && int.TryParse(args[2], out int parsedPort)
? parsedPort
: HistorianWcfBindingFactory.DefaultPort;
string tag = args.Length > 3 ? args[3] : "OtOpcUaParityTest_001.Counter";
string fixtureRoot = args.Length > 4 ? args[4] : ResolveFixtureRoot();
var options = new HistorianClientOptions
{
Host = host,
Port = port,
IntegratedSecurity = true,
};
IReadOnlyDictionary<string, byte[]?> raw = HistorianWcfTagClient.GetTagInfoRawBytesForProbe(options, [tag]);
byte[]? response = raw.TryGetValue(tag, out byte[]? bytes) ? bytes : null;
if (response is null || response.Length == 0)
{
Console.Error.WriteLine($"GetTagInfoFromName returned no bytes for the requested tag against {host}:{port}.");
return 1;
}
// Redact every identity-bearing value that could appear in the buffer: the requested tag,
// the host/machine name, and the captured user. The sanitizer scrubs ASCII + UTF-16LE and
// refuses to emit if any value survives.
var secrets = new List<CaptureSecret>
{
new("tag", tag),
new("host", host),
new("machine", Environment.MachineName),
new("user", Environment.UserName),
};
string? envUser = Environment.GetEnvironmentVariable("HISTORIAN_USER");
if (!string.IsNullOrWhiteSpace(envUser))
{
secrets.Add(new CaptureSecret("env-user", envUser));
}
var capture = new ProtocolCapture(
Op: "get-tag-info",
Request: null,
Response: response,
Notes: "RetrievalService.GetTagInfoFromName response (CTagMetadata buffer); identical bytes on 2023 R2 gRPC GetTagInfosFromName.");
string capturedUtc = DateTime.UtcNow.ToString("o");
string path = ProtocolFixtureWriter.Write(fixtureRoot, $"analog-{DateTime.UtcNow:yyyyMMddHHmmss}", capture, secrets, capturedUtc);
var summary = new
{
Op = capture.Op,
ResponseLength = response.Length,
FixturePath = path,
Redactions = ProtocolCaptureSanitizer.Sanitize(response, secrets).Report
.Where(r => r.Total > 0)
.Select(r => new { r.Name, r.AsciiMatches, r.Utf16Matches }),
};
Console.WriteLine(JsonSerializer.Serialize(summary, CreateJsonOptions()));
return 0;
}
// Walk up from the working directory to the repo root (the directory holding Histsdk.slnx) and
// return its fixtures/protocol path; fall back to fixtures/protocol under the CWD.
static string ResolveFixtureRoot()
{
DirectoryInfo? dir = new(Directory.GetCurrentDirectory());
while (dir is not null)
{
if (File.Exists(Path.Combine(dir.FullName, "Histsdk.slnx")))
{
return Path.Combine(dir.FullName, "fixtures", "protocol");
}
dir = dir.Parent;
}
return Path.Combine(Directory.GetCurrentDirectory(), "fixtures", "protocol");
}
static int ProbeWcfLikeTagBrowse(string[] args)
{
string host = args.Length > 1 ? args[1] : "localhost";
@@ -6370,6 +6457,9 @@ static void PrintHelp()
instrument-tagquery-gettaginfo [dll-path] [output-path]
Write a reverse-only wrapper copy that logs TagQuery CTagMetadata vectors.
mark <scenario-name> Emit a timestamp marker for Wireshark/API Monitor notes.
capture-tag-info [host] [port] [tag] [fixture-root]
CW-1: capture a live GetTagInfoFromName buffer and write a
sanitized golden fixture to fixtures/protocol/get-tag-info/.
wcf-probe [host] [port] Probe Hist/Retr/Stat WCF GetV endpoints with MDAS encoding.
wcf-cert-probe [host] [port] [dns]
Probe HistCert GetV with MDAS over TLS transport security.