from __future__ import annotations import argparse import csv from collections import Counter from pathlib import Path HEADER = [ "capture", "stream", "packet_type", "context_id", "opnum", "count", "frag_lengths", ] def summarize(path: Path) -> list[list[str]]: rows: list[list[str]] = [] counts: Counter[tuple[str, str, str, str]] = Counter() lengths: dict[tuple[str, str, str, str], Counter[str]] = {} with path.open("r", encoding="utf-8-sig", newline="") as handle: reader = csv.reader(handle, delimiter="\t") for fields in reader: if len(fields) < 10: continue stream = fields[2] packet_type = fields[3] context_id = fields[5] opnum = fields[6] frag_len = fields[8] key = (stream, packet_type, context_id, opnum) counts[key] += 1 lengths.setdefault(key, Counter())[frag_len] += 1 for key, count in sorted(counts.items(), key=lambda item: (-item[1], item[0])): stream, packet_type, context_id, opnum = key frag_lengths = ",".join( f"{length}:{length_count}" for length, length_count in sorted(lengths[key].items(), key=lambda item: (item[0], item[1])) ) rows.append([path.parent.name, stream, packet_type, context_id, opnum, str(count), frag_lengths]) return rows def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("dcerpc_tsv", type=Path, nargs="+") parser.add_argument("--out", type=Path) args = parser.parse_args() output_rows = [HEADER] for path in args.dcerpc_tsv: output_rows.extend(summarize(path)) if args.out: args.out.parent.mkdir(parents=True, exist_ok=True) with args.out.open("w", encoding="utf-8", newline="") as handle: writer = csv.writer(handle, delimiter="\t", lineterminator="\n") writer.writerows(output_rows) else: writer = csv.writer(__import__("sys").stdout, delimiter="\t", lineterminator="\n") writer.writerows(output_rows) return 0 if __name__ == "__main__": raise SystemExit(main())