mxaccess/analysis/scripts/summarize_dcerpc.py

from __future__ import annotations

import argparse
import csv
from collections import Counter
from pathlib import Path


HEADER = [
    "capture",
    "stream",
    "packet_type",
    "context_id",
    "opnum",
    "count",
    "frag_lengths",
]


def summarize(path: Path) -> list[list[str]]:
    rows: list[list[str]] = []
    counts: Counter[tuple[str, str, str, str]] = Counter()
    lengths: dict[tuple[str, str, str, str], Counter[str]] = {}

    with path.open("r", encoding="utf-8-sig", newline="") as handle:
        reader = csv.reader(handle, delimiter="\t")
        for fields in reader:
            if len(fields) < 10:
                continue

            stream = fields[2]
            packet_type = fields[3]
            context_id = fields[5]
            opnum = fields[6]
            frag_len = fields[8]
            key = (stream, packet_type, context_id, opnum)
            counts[key] += 1
            lengths.setdefault(key, Counter())[frag_len] += 1

    for key, count in sorted(counts.items(), key=lambda item: (-item[1], item[0])):
        stream, packet_type, context_id, opnum = key
        frag_lengths = ",".join(
            f"{length}:{length_count}"
            for length, length_count in sorted(lengths[key].items(), key=lambda item: (item[0], item[1]))
        )
        rows.append([path.parent.name, stream, packet_type, context_id, opnum, str(count), frag_lengths])

    return rows


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("dcerpc_tsv", type=Path, nargs="+")
    parser.add_argument("--out", type=Path)
    args = parser.parse_args()

    output_rows = [HEADER]
    for path in args.dcerpc_tsv:
        output_rows.extend(summarize(path))

    if args.out:
        args.out.parent.mkdir(parents=True, exist_ok=True)
        with args.out.open("w", encoding="utf-8", newline="") as handle:
            writer = csv.writer(handle, delimiter="\t", lineterminator="\n")
            writer.writerows(output_rows)
    else:
        writer = csv.writer(__import__("sys").stdout, delimiter="\t", lineterminator="\n")
        writer.writerows(output_rows)

    return 0


if __name__ == "__main__":
    raise SystemExit(main())