lmxopcua/code-reviews/regen-readme.py

#!/usr/bin/env python3
"""Regenerate code-reviews/README.md from the per-module findings.md files.

The per-module findings.md files are the source of truth. This script aggregates
them into the single cross-module README.md (module status + pending/closed
finding tables).

Usage:
    python code-reviews/regen-readme.py          # rewrite README.md
    python code-reviews/regen-readme.py --check  # exit 1 if stale or inconsistent

`--check` fails when README.md is out of date OR when a module's header
`Open findings` count disagrees with its finding statuses, or a finding
carries an unrecognised Status value.
"""
from __future__ import annotations

import re
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parent
README = ROOT / "README.md"

PENDING_STATUSES = {"Open", "In Progress"}
KNOWN_STATUSES = {"Open", "In Progress", "Resolved", "Won't Fix", "Deferred"}
SEVERITY_ORDER = {"Critical": 0, "High": 1, "Medium": 2, "Low": 3}

GENERATED_NOTE = (
    "<!-- GENERATED FILE - do not edit by hand. "
    "Regenerate with: python code-reviews/regen-readme.py -->"
)


def cell(value: str) -> str:
    """Escape a value for safe inclusion in a markdown table cell."""
    return value.replace("|", "\\|").strip()


def summarize(value: str, limit: int = 240) -> str:
    """Trim a long description to a single-cell-friendly summary."""
    value = value.strip()
    if len(value) <= limit:
        return value
    return value[: limit - 1].rstrip() + "…"


def first_table(text: str) -> dict[str, str]:
    """Parse the first contiguous block of '| key | value |' rows into a dict."""
    rows: dict[str, str] = {}
    started = False
    for line in text.splitlines():
        stripped = line.strip()
        if stripped.startswith("|"):
            started = True
            cells = [c.strip() for c in stripped.strip("|").split("|")]
            if len(cells) >= 2:
                key, value = cells[0], cells[1]
                if key and not set(key) <= {"-", ":"} and key != "Field":
                    rows[key] = value
        elif started:
            break
    return rows


def parse_module(findings_path: Path) -> dict:
    """Parse one module's findings.md into its header and finding list."""
    text = findings_path.read_text(encoding="utf-8")
    module = findings_path.parent.name
    parts = re.split(r"^##\s+Findings\s*$", text, maxsplit=1, flags=re.M)
    header = first_table(parts[0])
    findings: list[dict] = []
    if len(parts) > 1:
        for chunk in re.split(r"^###\s+", parts[1], flags=re.M)[1:]:
            fid = chunk.splitlines()[0].strip()
            tbl = first_table(chunk)
            desc_m = re.search(
                r"\*\*Description:\*\*\s*(.*?)(?=\n\*\*|\Z)", chunk, re.S
            )
            desc = re.sub(r"\s+", " ", desc_m.group(1)).strip() if desc_m else ""
            findings.append(
                {
                    "id": fid,
                    "severity": tbl.get("Severity", ""),
                    "category": tbl.get("Category", ""),
                    "location": tbl.get("Location", ""),
                    "status": tbl.get("Status", ""),
                    "description": desc,
                }
            )
    return {"module": module, "header": header, "findings": findings}


def build_readme(modules: list[dict]) -> str:
    modules = sorted(modules, key=lambda m: m["module"])
    all_findings = [
        dict(f, module=m["module"]) for m in modules for f in m["findings"]
    ]
    pending = [f for f in all_findings if f["status"] in PENDING_STATUSES]
    closed = [
        f
        for f in all_findings
        if f["status"] and f["status"] not in PENDING_STATUSES
    ]

    def sev_key(f: dict) -> tuple:
        return (SEVERITY_ORDER.get(f["severity"], 9), f["id"])

    pending.sort(key=sev_key)
    closed.sort(key=sev_key)

    out: list[str] = [
        "# Code Reviews",
        "",
        GENERATED_NOTE,
        "",
        "Cross-module code review index for the OtOpcUa server codebase "
        "(`lmxopcua`). The review process is defined in "
        "[../REVIEW-PROCESS.md](../REVIEW-PROCESS.md).",
        "",
        "Each module's `findings.md` is the source of truth; this file is generated "
        "from them by `regen-readme.py` and must not be edited by hand.",
        "",
        "## Module status",
        "",
        "| Module | Reviewer | Date | Commit | Status | Open | Total |",
        "|---|---|---|---|---|---|---|",
    ]
    if not modules:
        out.append(
            "| _no modules reviewed yet_ |  |  |  |  |  |  |"
        )
    for m in modules:
        h = m["header"]
        open_n = sum(
            1 for f in m["findings"] if f["status"] in PENDING_STATUSES
        )
        out.append(
            f"| [{m['module']}]({m['module']}/findings.md) "
            f"| {cell(h.get('Reviewer', ''))} "
            f"| {cell(h.get('Review date', ''))} "
            f"| {cell(h.get('Commit reviewed', ''))} "
            f"| {cell(h.get('Status', ''))} "
            f"| {open_n} | {len(m['findings'])} |"
        )

    out += ["", "## Pending findings", ""]
    out.append(
        "Findings with status `Open` or `In Progress`, ordered by severity."
    )
    out.append("")
    if pending:
        out.append("| ID | Severity | Category | Location | Description |")
        out.append("|---|---|---|---|---|")
        for f in pending:
            out.append(
                f"| {cell(f['id'])} | {cell(f['severity'])} "
                f"| {cell(f['category'])} | {cell(f['location'])} "
                f"| {cell(summarize(f['description']))} |"
            )
    else:
        out.append("_No pending findings._")

    out += ["", "## Closed findings", ""]
    out.append("Findings with status `Resolved`, `Won't Fix`, or `Deferred`.")
    out.append("")
    if closed:
        out.append("| ID | Severity | Status | Category | Location |")
        out.append("|---|---|---|---|---|")
        for f in closed:
            out.append(
                f"| {cell(f['id'])} | {cell(f['severity'])} "
                f"| {cell(f['status'])} | {cell(f['category'])} "
                f"| {cell(f['location'])} |"
            )
    else:
        out.append("_No closed findings._")

    return "\n".join(out) + "\n"


def find_inconsistencies(modules: list[dict]) -> list[str]:
    """Return human-readable problems in the per-module findings.md files.

    Checks that each module header's `Open findings` count agrees with its
    finding statuses, and that every finding carries a known Status value.
    """
    issues: list[str] = []
    for m in modules:
        open_n = sum(
            1 for f in m["findings"] if f["status"] in PENDING_STATUSES
        )
        declared = m["header"].get("Open findings", "").strip()
        if declared != str(open_n):
            issues.append(
                f"{m['module']}: header 'Open findings' = '{declared}' but "
                f"{open_n} finding(s) are Open/In Progress"
            )
        for f in m["findings"]:
            if f["status"] not in KNOWN_STATUSES:
                issues.append(
                    f"{m['module']}: finding {f['id']} has unrecognised "
                    f"Status '{f['status']}'"
                )
    return issues


def main(argv: list[str]) -> int:
    check = "--check" in argv[1:]
    module_dirs = sorted(
        d
        for d in ROOT.iterdir()
        if d.is_dir() and d.name != "_template" and (d / "findings.md").is_file()
    )
    modules = [parse_module(d / "findings.md") for d in module_dirs]
    content = build_readme(modules)
    issues = find_inconsistencies(modules)
    if check:
        stale = (
            README.read_text(encoding="utf-8") if README.exists() else ""
        ) != content
        for issue in issues:
            print(f"inconsistent: {issue}", file=sys.stderr)
        if stale:
            print(
                "code-reviews/README.md is stale - run regen-readme.py",
                file=sys.stderr,
            )
        if stale or issues:
            return 1
        print("code-reviews/README.md is up to date and consistent.")
        return 0
    for issue in issues:
        print(f"warning: {issue}", file=sys.stderr)
    README.write_text(content, encoding="utf-8", newline="\n")
    print(f"Wrote {README} ({len(modules)} modules).")
    return 0


if __name__ == "__main__":
    raise SystemExit(main(sys.argv))