CW-1: reusable capture -> sanitize -> golden-fixture pipeline
Adds the highest-leverage reverse-engineering primitive from the roadmap: one path to turn a live operation buffer into a committable golden fixture. Unblocks every capture-tier item (R0.5, R1.x, R2.1). - ProtocolCaptureSanitizer: redacts identity-bearing values (host, tag, user, machine) from a native buffer in BOTH ASCII and UTF-16LE, overwriting in place with an 'X' fill so length and every field offset are preserved (keeps the fixture useful for byte-layout RE). ASCII-letter matching is case-insensitive; secrets < 3 chars are skipped to avoid collision corruption. AssertNoSecretsRemain is a fail-closed safety net that refuses to emit if any value survives. - ProtocolFixtureWriter: serializes a capture to fixtures/protocol/<op>/<name>.json with sanitized hex, length, SHA-256 of the sanitized bytes, and a scrub report. Timestamps are passed in (deterministic / testable). - capture-tag-info CLI command: captures a live GetTagInfoFromName response and writes the fixture. The same native bytes ride inside 2023 R2 gRPC GetTagInfosFromName, so the fixture is transport-agnostic. - 11 unit tests for the sanitizer/writer (test project now references the RE tool). - First real fixture: get-tag-info/analog-*.json — a 98-byte Int4 CTagMetadata buffer captured live from the local Historian 2020 server, tag name redacted, verified to contain no identity (descriptor 03 c3 00 31 = Int4, as documented). 180 non-live unit tests green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,163 @@
|
||||
using System.Text;
|
||||
|
||||
namespace AVEVA.Historian.ReverseEngineering.Capture;
|
||||
|
||||
/// <summary>A sensitive value to scrub from a captured buffer before it can be committed.</summary>
|
||||
/// <param name="Name">Stable label (e.g. "host", "tag", "user") recorded in the scrub report.</param>
|
||||
/// <param name="Value">The literal value to redact wherever it appears in the buffer.</param>
|
||||
public sealed record CaptureSecret(string Name, string Value);
|
||||
|
||||
/// <summary>How many times a secret was found and redacted, per encoding.</summary>
|
||||
public sealed record ScrubCount(string Name, int AsciiMatches, int Utf16Matches)
|
||||
{
|
||||
public int Total => AsciiMatches + Utf16Matches;
|
||||
}
|
||||
|
||||
/// <summary>Result of sanitizing a captured buffer: the redacted copy plus a per-secret report.</summary>
|
||||
public sealed record SanitizeResult(byte[] Sanitized, IReadOnlyList<ScrubCount> Report)
|
||||
{
|
||||
public int TotalRedactions
|
||||
{
|
||||
get
|
||||
{
|
||||
int total = 0;
|
||||
foreach (ScrubCount count in Report)
|
||||
{
|
||||
total += count.Total;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// CW-1 core: redacts identity-bearing values (hostnames, tag names, user names) from a captured
|
||||
/// native Historian buffer so the result can be saved as a committable golden fixture.
|
||||
///
|
||||
/// Each secret is matched in both <b>ASCII/UTF-8</b> and <b>UTF-16LE</b> (the two encodings AVEVA's
|
||||
/// native buffers use for embedded strings) and overwritten in place with a fixed fill byte. The
|
||||
/// redaction preserves the buffer's exact length and every field offset, so the sanitized fixture
|
||||
/// remains useful for byte-layout reverse engineering while carrying none of the original identity.
|
||||
///
|
||||
/// ASCII-letter matching is case-insensitive (servers may echo a tag/host in a different case than
|
||||
/// requested); other bytes match exactly. Secrets shorter than <see cref="MinSecretLength"/> are
|
||||
/// ignored to avoid corrupting unrelated bytes that coincidentally collide with a short value.
|
||||
/// </summary>
|
||||
public static class ProtocolCaptureSanitizer
|
||||
{
|
||||
/// <summary>Fill byte written over a redacted region ('X'). Chosen to be obviously non-data on inspection.</summary>
|
||||
public const byte FillByte = (byte)'X';
|
||||
|
||||
/// <summary>Secrets shorter than this many characters are not scrubbed (too collision-prone).</summary>
|
||||
public const int MinSecretLength = 3;
|
||||
|
||||
public static SanitizeResult Sanitize(ReadOnlySpan<byte> buffer, IReadOnlyList<CaptureSecret> secrets)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(secrets);
|
||||
|
||||
byte[] working = buffer.ToArray();
|
||||
List<ScrubCount> report = new(secrets.Count);
|
||||
|
||||
foreach (CaptureSecret secret in secrets)
|
||||
{
|
||||
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
|
||||
{
|
||||
report.Add(new ScrubCount(secret.Name, 0, 0));
|
||||
continue;
|
||||
}
|
||||
|
||||
int ascii = RedactPattern(working, Encoding.ASCII.GetBytes(secret.Value));
|
||||
int utf16 = RedactPattern(working, Encoding.Unicode.GetBytes(secret.Value));
|
||||
report.Add(new ScrubCount(secret.Name, ascii, utf16));
|
||||
}
|
||||
|
||||
return new SanitizeResult(working, report);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Safety net: throws if any secret value still survives (in either encoding) in the buffer.
|
||||
/// Call after <see cref="Sanitize"/> before writing a fixture so a redaction gap can never
|
||||
/// leak identity into a committed file.
|
||||
/// </summary>
|
||||
public static void AssertNoSecretsRemain(ReadOnlySpan<byte> sanitized, IReadOnlyList<CaptureSecret> secrets)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(secrets);
|
||||
|
||||
foreach (CaptureSecret secret in secrets)
|
||||
{
|
||||
if (string.IsNullOrEmpty(secret.Value) || secret.Value.Length < MinSecretLength)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (IndexOf(sanitized, Encoding.ASCII.GetBytes(secret.Value), 0) >= 0
|
||||
|| IndexOf(sanitized, Encoding.Unicode.GetBytes(secret.Value), 0) >= 0)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Sanitized buffer still contains secret '{secret.Name}'. Refusing to emit an unsanitized fixture.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int RedactPattern(byte[] buffer, byte[] pattern)
|
||||
{
|
||||
if (pattern.Length == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int matches = 0;
|
||||
int index = 0;
|
||||
while ((index = IndexOf(buffer, pattern, index)) >= 0)
|
||||
{
|
||||
buffer.AsSpan(index, pattern.Length).Fill(FillByte);
|
||||
index += pattern.Length;
|
||||
matches++;
|
||||
}
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
private static int IndexOf(ReadOnlySpan<byte> haystack, ReadOnlySpan<byte> needle, int start)
|
||||
{
|
||||
if (needle.Length == 0 || haystack.Length - start < needle.Length)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i = start; i <= haystack.Length - needle.Length; i++)
|
||||
{
|
||||
bool match = true;
|
||||
for (int j = 0; j < needle.Length; j++)
|
||||
{
|
||||
if (!BytesEqualCaseInsensitive(haystack[i + j], needle[j]))
|
||||
{
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (match)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/// <summary>Compare bytes, treating ASCII letters case-insensitively; all other bytes exactly.</summary>
|
||||
private static bool BytesEqualCaseInsensitive(byte a, byte b)
|
||||
{
|
||||
if (a == b)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return ToLowerAscii(a) == ToLowerAscii(b);
|
||||
}
|
||||
|
||||
private static byte ToLowerAscii(byte value) =>
|
||||
value is >= (byte)'A' and <= (byte)'Z' ? (byte)(value + 32) : value;
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace AVEVA.Historian.ReverseEngineering.Capture;
|
||||
|
||||
/// <summary>One captured operation: the (optional) request buffer and the response buffer, raw.</summary>
|
||||
public sealed record ProtocolCapture(string Op, byte[]? Request, byte[]? Response, string? Notes = null);
|
||||
|
||||
/// <summary>
|
||||
/// CW-1 fixture writer: takes a live <see cref="ProtocolCapture"/>, redacts it with
|
||||
/// <see cref="ProtocolCaptureSanitizer"/>, and writes a committable JSON fixture under
|
||||
/// <c>fixtures/protocol/<op>/</c>. The fixture records sanitized hex, lengths, SHA-256 of the
|
||||
/// sanitized bytes, and the scrub report — never the original identity-bearing bytes.
|
||||
///
|
||||
/// Timestamps are passed in (never generated here) so the writer stays deterministic and testable.
|
||||
/// </summary>
|
||||
public static class ProtocolFixtureWriter
|
||||
{
|
||||
public static string BuildFixtureJson(
|
||||
ProtocolCapture capture,
|
||||
IReadOnlyList<CaptureSecret> secrets,
|
||||
string capturedUtcIso)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(capture);
|
||||
|
||||
BufferSection? request = BuildSection(capture.Request, secrets);
|
||||
BufferSection? response = BuildSection(capture.Response, secrets);
|
||||
|
||||
var document = new
|
||||
{
|
||||
op = capture.Op,
|
||||
capturedUtc = capturedUtcIso,
|
||||
notes = capture.Notes,
|
||||
request,
|
||||
response,
|
||||
};
|
||||
|
||||
return JsonSerializer.Serialize(document, new JsonSerializerOptions
|
||||
{
|
||||
WriteIndented = true,
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>Serializes the fixture and writes it to <paramref name="fixtureRoot"/>/<op>/<name>.json. Returns the path.</summary>
|
||||
public static string Write(
|
||||
string fixtureRoot,
|
||||
string name,
|
||||
ProtocolCapture capture,
|
||||
IReadOnlyList<CaptureSecret> secrets,
|
||||
string capturedUtcIso)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(fixtureRoot);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(name);
|
||||
ArgumentNullException.ThrowIfNull(capture);
|
||||
|
||||
string json = BuildFixtureJson(capture, secrets, capturedUtcIso);
|
||||
string directory = Path.Combine(fixtureRoot, capture.Op);
|
||||
Directory.CreateDirectory(directory);
|
||||
string path = Path.Combine(directory, name + ".json");
|
||||
File.WriteAllText(path, json, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false));
|
||||
return path;
|
||||
}
|
||||
|
||||
private static BufferSection? BuildSection(byte[]? raw, IReadOnlyList<CaptureSecret> secrets)
|
||||
{
|
||||
if (raw is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
SanitizeResult result = ProtocolCaptureSanitizer.Sanitize(raw, secrets);
|
||||
ProtocolCaptureSanitizer.AssertNoSecretsRemain(result.Sanitized, secrets);
|
||||
|
||||
return new BufferSection(
|
||||
Length: raw.Length,
|
||||
Sha256: Convert.ToHexString(SHA256.HashData(result.Sanitized)).ToLowerInvariant(),
|
||||
Hex: Convert.ToHexString(result.Sanitized).ToLowerInvariant(),
|
||||
Redactions: result.Report
|
||||
.Where(r => r.Total > 0)
|
||||
.Select(r => new RedactionEntry(r.Name, r.AsciiMatches, r.Utf16Matches))
|
||||
.ToArray());
|
||||
}
|
||||
|
||||
private sealed record BufferSection(int Length, string Sha256, string Hex, IReadOnlyList<RedactionEntry> Redactions);
|
||||
|
||||
private sealed record RedactionEntry(string Secret, int AsciiMatches, int Utf16Matches);
|
||||
}
|
||||
@@ -12,8 +12,10 @@ using System.Security.Cryptography;
|
||||
using System.Runtime.Versioning;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using AVEVA.Historian.Client;
|
||||
using AVEVA.Historian.Client.Wcf;
|
||||
using AVEVA.Historian.Client.Wcf.Contracts;
|
||||
using AVEVA.Historian.ReverseEngineering.Capture;
|
||||
using dnlib.DotNet;
|
||||
using dnlib.DotNet.Emit;
|
||||
|
||||
@@ -68,6 +70,7 @@ try
|
||||
"wcf-start-event-query" => StartWcfEventQuery(args),
|
||||
"wcf-register-event-tag" => RegisterEventTagAndStartQuery(args),
|
||||
"wcf-add-event-tag" => AddEventTagAndStartQuery(args),
|
||||
"capture-tag-info" => CaptureTagInfo(args),
|
||||
_ => UnknownCommand(args[0])
|
||||
};
|
||||
}
|
||||
@@ -3605,6 +3608,90 @@ static int ProbeWcfTagInfo(string[] args)
|
||||
return result.Success ? 0 : 1;
|
||||
}
|
||||
|
||||
// CW-1: capture a live GetTagInfoFromName response buffer and persist it as a sanitized,
|
||||
// committable golden fixture under fixtures/protocol/get-tag-info/. The same native byte blob
|
||||
// travels inside the 2023 R2 gRPC RetrievalService.GetTagInfosFromName response, so the fixture
|
||||
// is transport-agnostic. Usage: capture-tag-info [host] [port] [tag] [fixture-root]
|
||||
static int CaptureTagInfo(string[] args)
|
||||
{
|
||||
string host = args.Length > 1 ? args[1] : "localhost";
|
||||
int port = args.Length > 2 && int.TryParse(args[2], out int parsedPort)
|
||||
? parsedPort
|
||||
: HistorianWcfBindingFactory.DefaultPort;
|
||||
string tag = args.Length > 3 ? args[3] : "OtOpcUaParityTest_001.Counter";
|
||||
string fixtureRoot = args.Length > 4 ? args[4] : ResolveFixtureRoot();
|
||||
|
||||
var options = new HistorianClientOptions
|
||||
{
|
||||
Host = host,
|
||||
Port = port,
|
||||
IntegratedSecurity = true,
|
||||
};
|
||||
|
||||
IReadOnlyDictionary<string, byte[]?> raw = HistorianWcfTagClient.GetTagInfoRawBytesForProbe(options, [tag]);
|
||||
byte[]? response = raw.TryGetValue(tag, out byte[]? bytes) ? bytes : null;
|
||||
if (response is null || response.Length == 0)
|
||||
{
|
||||
Console.Error.WriteLine($"GetTagInfoFromName returned no bytes for the requested tag against {host}:{port}.");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Redact every identity-bearing value that could appear in the buffer: the requested tag,
|
||||
// the host/machine name, and the captured user. The sanitizer scrubs ASCII + UTF-16LE and
|
||||
// refuses to emit if any value survives.
|
||||
var secrets = new List<CaptureSecret>
|
||||
{
|
||||
new("tag", tag),
|
||||
new("host", host),
|
||||
new("machine", Environment.MachineName),
|
||||
new("user", Environment.UserName),
|
||||
};
|
||||
string? envUser = Environment.GetEnvironmentVariable("HISTORIAN_USER");
|
||||
if (!string.IsNullOrWhiteSpace(envUser))
|
||||
{
|
||||
secrets.Add(new CaptureSecret("env-user", envUser));
|
||||
}
|
||||
|
||||
var capture = new ProtocolCapture(
|
||||
Op: "get-tag-info",
|
||||
Request: null,
|
||||
Response: response,
|
||||
Notes: "RetrievalService.GetTagInfoFromName response (CTagMetadata buffer); identical bytes on 2023 R2 gRPC GetTagInfosFromName.");
|
||||
|
||||
string capturedUtc = DateTime.UtcNow.ToString("o");
|
||||
string path = ProtocolFixtureWriter.Write(fixtureRoot, $"analog-{DateTime.UtcNow:yyyyMMddHHmmss}", capture, secrets, capturedUtc);
|
||||
|
||||
var summary = new
|
||||
{
|
||||
Op = capture.Op,
|
||||
ResponseLength = response.Length,
|
||||
FixturePath = path,
|
||||
Redactions = ProtocolCaptureSanitizer.Sanitize(response, secrets).Report
|
||||
.Where(r => r.Total > 0)
|
||||
.Select(r => new { r.Name, r.AsciiMatches, r.Utf16Matches }),
|
||||
};
|
||||
Console.WriteLine(JsonSerializer.Serialize(summary, CreateJsonOptions()));
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Walk up from the working directory to the repo root (the directory holding Histsdk.slnx) and
|
||||
// return its fixtures/protocol path; fall back to fixtures/protocol under the CWD.
|
||||
static string ResolveFixtureRoot()
|
||||
{
|
||||
DirectoryInfo? dir = new(Directory.GetCurrentDirectory());
|
||||
while (dir is not null)
|
||||
{
|
||||
if (File.Exists(Path.Combine(dir.FullName, "Histsdk.slnx")))
|
||||
{
|
||||
return Path.Combine(dir.FullName, "fixtures", "protocol");
|
||||
}
|
||||
|
||||
dir = dir.Parent;
|
||||
}
|
||||
|
||||
return Path.Combine(Directory.GetCurrentDirectory(), "fixtures", "protocol");
|
||||
}
|
||||
|
||||
static int ProbeWcfLikeTagBrowse(string[] args)
|
||||
{
|
||||
string host = args.Length > 1 ? args[1] : "localhost";
|
||||
@@ -6370,6 +6457,9 @@ static void PrintHelp()
|
||||
instrument-tagquery-gettaginfo [dll-path] [output-path]
|
||||
Write a reverse-only wrapper copy that logs TagQuery CTagMetadata vectors.
|
||||
mark <scenario-name> Emit a timestamp marker for Wireshark/API Monitor notes.
|
||||
capture-tag-info [host] [port] [tag] [fixture-root]
|
||||
CW-1: capture a live GetTagInfoFromName buffer and write a
|
||||
sanitized golden fixture to fixtures/protocol/get-tag-info/.
|
||||
wcf-probe [host] [port] Probe Hist/Retr/Stat WCF GetV endpoints with MDAS encoding.
|
||||
wcf-cert-probe [host] [port] [dns]
|
||||
Probe HistCert GetV with MDAS over TLS transport security.
|
||||
|
||||
Reference in New Issue
Block a user