Files
mxaccess/analysis/scripts/decode_mixed_local_stream.py
T
Joseph Doherty fe2a6db786
rust / build / test / clippy / fmt (push) Has been cancelled
Initial project state: .NET reference, design, Rust port (M0+M1), evidence
Layout:
- src/                    .NET 10 x64 reference: MxNativeCodec, MxNativeClient,
                          MxAsbClient, probes, tests, harnesses. Executable spec.
- design/                 Architectural plan for the Rust port (M0–M6), error
                          model, protocol invariants, risks (R1–R16), adversarial
                          review log (review.md).
- rust/                   Rust workspace. M0 skeleton + M1 codec parity.
                          mxaccess-codec: 215 unit tests + 2 cross-implementation
                          parity tests (byte-identical against .NET reference).
                          Other crates are M0 stubs awaiting M2+.
- captures/               Frida + netsh + pcap evidence per CLAUDE.md
                          ("captures are evidence, not throwaway logs").
- analysis/               Decompiled C# (frida/proxy/decompiled-*),
                          Ghidra exports for native DLLs (`exports/` only —
                          working state at `projects/` and AVEVA's input
                          binaries at `input/` are gitignored).
- docs/                   Reverse-engineering reference docs.
- tools/                  Setup-LiveProbeEnv.ps1 (Infisical credential fetcher),
                          Compute-Crc.ps1 (.NET parity helper).
- .github/workflows/      Rust CI: fmt + build + test + clippy on Windows.
- LICENSE                 MIT (Joseph Doherty, 2026).

Verified:
- cargo test --workspace → 217 passed (215 unit + 2 .NET parity), 0 failed
- cargo clippy --workspace -- -D warnings → clean
- cargo fmt --all -- --check → clean
- cargo publish --dry-run -p mxaccess-codec → packages cleanly

Excluded from history (see .gitignore):
- **/bin, **/obj, **/target — build artifacts
- analysis/ghidra/projects/ — Ghidra working state (regenerable)
- analysis/ghidra/input/ — AVEVA proprietary DLLs (vendor IP)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 06:21:00 -04:00

139 lines
4.2 KiB
Python

from __future__ import annotations
import argparse
import csv
import struct
from pathlib import Path
def i32(data: bytes, offset: int) -> int | None:
if offset + 4 > len(data):
return None
return struct.unpack_from("<i", data, offset)[0]
def u32(data: bytes, offset: int) -> int | None:
if offset + 4 > len(data):
return None
return struct.unpack_from("<I", data, offset)[0]
def ascii_preview(data: bytes, limit: int = 48) -> str:
return "".join(chr(value) if 32 <= value <= 126 else "." for value in data[:limit])
def announced_data_records_match(data: bytes, offset: int, announced_size: int) -> bool:
if announced_size < 0:
return False
total = 0
cursor = offset + 12
while total < announced_size and cursor + 4 <= len(data):
record_length = u32(data, cursor)
if record_length is None or record_length > 1024 * 1024:
return False
record_size = record_length + 4
if record_size <= 4 or cursor + record_size > len(data):
return False
total += record_size
cursor += record_size
return total == announced_size
def looks_like_control(data: bytes, offset: int) -> bool:
first = i32(data, offset)
second = i32(data, offset + 4)
third = i32(data, offset + 8)
if first is None or second is None or third is None:
return False
if first in {-1, -2}:
return True
if third != 0 or second < 0:
return False
return announced_data_records_match(data, offset, first)
def parse(data: bytes, max_record_size: int) -> list[dict[str, str]]:
rows: list[dict[str, str]] = []
offset = 0
index = 0
while offset < len(data):
first = i32(data, offset)
if first is None:
break
if looks_like_control(data, offset):
record_type = "control"
size = 12
body = data[offset:offset + size]
elif first >= 0 and first <= max_record_size and offset + 4 + first <= len(data):
record_type = "data"
size = 4 + first
body = data[offset + 4:offset + size]
else:
record_type = "unknown"
size = min(32, len(data) - offset)
body = data[offset:offset + size]
rows.append({
"index": str(index),
"offset": f"0x{offset:08x}",
"record_type": record_type,
"record_size": str(size),
"first_i32": "" if first is None else str(first),
"second_i32": "" if (v := i32(data, offset + 4)) is None else str(v),
"third_i32": "" if (v := i32(data, offset + 8)) is None else str(v),
"body_i32_0": "" if (v := i32(body, 0)) is None else str(v),
"body_i32_1": "" if (v := i32(body, 4)) is None else str(v),
"body_i32_2": "" if (v := i32(body, 8)) is None else str(v),
"body_i32_3": "" if (v := i32(body, 12)) is None else str(v),
"hex_prefix": data[offset:offset + min(size, 80)].hex(" "),
"ascii_preview": ascii_preview(data[offset:offset + min(size, 80)]),
})
index += 1
if record_type == "unknown":
break
offset += size
return rows
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("stream", type=Path)
parser.add_argument("--out", type=Path, required=True)
parser.add_argument("--max-record-size", type=int, default=1024 * 1024)
args = parser.parse_args()
rows = parse(args.stream.read_bytes(), args.max_record_size)
header = [
"index",
"offset",
"record_type",
"record_size",
"first_i32",
"second_i32",
"third_i32",
"body_i32_0",
"body_i32_1",
"body_i32_2",
"body_i32_3",
"hex_prefix",
"ascii_preview",
]
args.out.parent.mkdir(parents=True, exist_ok=True)
with args.out.open("w", encoding="utf-8", newline="") as handle:
writer = csv.DictWriter(handle, fieldnames=header, delimiter="\t", lineterminator="\n")
writer.writeheader()
writer.writerows(rows)
return 0
if __name__ == "__main__":
raise SystemExit(main())