adr: add INDEX.md (auto-generated by tools/generate_adr_index.py)

Adds a section-based table of contents for the 46-ADR corpus, mirroring the /report skill's classification (Design Principles / High-level Architecture / Detailed Architecture by component / Implementation Decisions by topic). Generated for both docs/adr/ (EN titles) and docs/adr-ko/ (KO titles) from one tool. tools/generate_adr_index.py: - Single CLASSIFICATION dict per ADR — add an entry when introducing a new ADR; the script fails loud if any file is missing from the table. - DETAILED_COMPONENTS lists each builtin component and the ADR(s) that cover it (ADR-0014 appears under six PE engines; ADR-0023 under pe_dma + pe_ipcq). - Accepts both ":" and "—" title separators (matching ADR-0033's existing format). - --check mode for CI: exits 1 if INDEX.md is stale. Also includes the docs/report/architecture-2026-1H.md generated by the prior /report write (the public-facing architecture document; 836 lines, 76 source-attribution comments). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 11:15:37 -07:00
parent bd49c93703
commit e33e76f2d1
4 changed files with 1517 additions and 0 deletions
@@ -0,0 +1,333 @@
+"""Generate docs/adr/INDEX.md (and docs/adr-ko/INDEX.md) from the ADR corpus.
+
+Auto-derives a section-based index following the same classification as
+the /report skill — Design Principles / High-level Architecture /
+Detailed Architecture (by component) / Implementation Decisions
+(by topic). Run before publishing to refresh INDEX.md.
+
+The classification table below is the single source of truth. When a new
+ADR is added under docs/adr/, append an entry to ``CLASSIFICATION``. The
+script exits 1 if any ADR file is missing from the table or any title
+cannot be parsed, so omissions surface in CI.
+
+Usage:
+    python tools/generate_adr_index.py [--root <repo-root>] [--check]
+
+  --check : exit 1 if the generated INDEX differs from the on-disk file
+            (used by CI to detect un-regenerated indexes).
+"""
+
+from __future__ import annotations
+
+import argparse
+import re
+import sys
+from pathlib import Path
+
+ADR_FILENAME_RE = re.compile(r"^ADR-(\d{4})-([a-z0-9_-]+)\.md$")
+# Title separator may be ":" (most ADRs) or "—" (em-dash; ADR-0033 uses
+# this). The verifier (tools/verify_adr_lang_pairs.py) only checks the
+# number, so both styles already coexist in the corpus.
+TITLE_RE = re.compile(r"^# ADR-(\d{4})\s*[:—]\s*(.+?)\s*$")
+
+DESIGN_PRINCIPLES = "Design Principles"
+HIGH_LEVEL = "High-level Architecture"
+DETAILED = "Detailed Architecture"
+IMPL_DECISIONS = "Implementation Decisions"
+
+
+# (section, subgroup) per ADR. subgroup is used to sub-divide Detailed
+# (by component, see DETAILED_COMPONENTS) and Implementation (by topic).
+# Add a line here when introducing a new ADR.
+CLASSIFICATION: dict[int, tuple[str, str | None]] = {
+    # Design Principles
+    13: (DESIGN_PRINCIPLES, None),
+    33: (DESIGN_PRINCIPLES, None),
+
+    # High-level Architecture
+    3:  (HIGH_LEVEL, "System hierarchy (Tray / SIP / CUBE / PE)"),
+    7:  (HIGH_LEVEL, "Runtime API ↔ sim_engine boundaries"),
+    16: (HIGH_LEVEL, "IOChiplet NOC and memory data path"),
+    17: (HIGH_LEVEL, "Cube NOC and HBM connectivity"),
+
+    # Detailed Architecture (subgroup matches DETAILED_COMPONENTS entries)
+    14: (DETAILED, "pe_pipeline"),  # covers pe_cpu/pe_dma/pe_fetch_store/pe_gemm/pe_math/pe_scheduler
+    23: (DETAILED, "pe_ipcq"),
+    34: (DETAILED, "hbm_ctrl"),
+    35: (DETAILED, "m_cpu"),
+    36: (DETAILED, "io_cpu"),
+    37: (DETAILED, "forwarding"),
+    38: (DETAILED, "pcie_ep"),
+    39: (DETAILED, "pe_mmu"),
+    40: (DETAILED, "pe_tcm"),
+    41: (DETAILED, "sram"),
+    42: (DETAILED, "tiling"),
+
+    # Implementation Decisions
+    1:  (IMPL_DECISIONS, "Address Scheme"),
+    2:  (IMPL_DECISIONS, "Routing & Helper API"),
+    4:  (IMPL_DECISIONS, "Memory Semantics & Local-HBM Bandwidth"),
+    5:  (IMPL_DECISIONS, "Topology Compilation, Diagrams & Builder Algorithms"),
+    6:  (IMPL_DECISIONS, "Topology Compilation, Diagrams & Builder Algorithms"),
+    8:  (IMPL_DECISIONS, "Tensor Deployment and Allocation"),
+    9:  (IMPL_DECISIONS, "Kernel Execution and Host-Device Messaging"),
+    10: (IMPL_DECISIONS, "CLI Surface and Semantics"),
+    11: (IMPL_DECISIONS, "Address Scheme"),
+    12: (IMPL_DECISIONS, "Kernel Execution and Host-Device Messaging"),
+    15: (IMPL_DECISIONS, "Component Port/Wire Fabric Model"),
+    20: (IMPL_DECISIONS, "Two-Pass Data Execution"),
+    22: (IMPL_DECISIONS, "2D Grid Program Identity"),
+    24: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
+    25: (IMPL_DECISIONS, "IPCQ Direction Addressing"),
+    26: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
+    27: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
+    32: (IMPL_DECISIONS, "Intercube All-Reduce"),
+    43: (IMPL_DECISIONS, "Evaluation Harnesses"),
+    44: (IMPL_DECISIONS, "Evaluation Harnesses"),
+    45: (IMPL_DECISIONS, "Bench Module Contract"),
+    46: (IMPL_DECISIONS, "Kernel-side tl.* API (TLContext)"),
+    47: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
+    48: (IMPL_DECISIONS, "Memory Allocator Algorithms"),
+    49: (IMPL_DECISIONS, "Probe Subcommand"),
+    50: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
+    51: (IMPL_DECISIONS, "Routing & Helper API"),
+    52: (IMPL_DECISIONS, "Sim-engine Op Log and Memory Store Schemas"),
+    53: (IMPL_DECISIONS, "Topology Compilation, Diagrams & Builder Algorithms"),
+}
+
+# Canonical component order for the Detailed Architecture section.
+# Each entry: (component_name, list[ADR-numbers that cover it]).
+# Order matches src/kernbench/components/builtin/*.py alphabetical
+# (the same order /report uses).
+DETAILED_COMPONENTS: list[tuple[str, list[int]]] = [
+    ("forwarding",      [37]),
+    ("hbm_ctrl",        [34]),
+    ("io_cpu",          [36]),
+    ("m_cpu",           [35]),
+    ("pcie_ep",         [38]),
+    ("pe_cpu",          [14]),
+    ("pe_dma",          [14, 23]),
+    ("pe_fetch_store",  [14]),
+    ("pe_gemm",         [14]),
+    ("pe_ipcq",         [23]),
+    ("pe_math",         [14]),
+    ("pe_mmu",          [39]),
+    ("pe_scheduler",    [14]),
+    ("pe_tcm",          [40]),
+    ("sram",            [41]),
+    ("tiling",          [42]),
+]
+
+
+def _strip_bom(text: str) -> str:
+    """Strip leading UTF-8 BOM if present."""
+    if text and ord(text[0]) == 0xFEFF:
+        return text[1:]
+    return text
+
+
+def _find_adrs(adr_dir: Path) -> list[tuple[int, str, Path]]:
+    """Return [(num, slug, path), ...] for ADR files in adr_dir, sorted by num."""
+    out: list[tuple[int, str, Path]] = []
+    for p in sorted(adr_dir.iterdir()):
+        if not p.is_file():
+            continue
+        m = ADR_FILENAME_RE.match(p.name)
+        if not m:
+            continue
+        out.append((int(m.group(1)), m.group(2), p))
+    out.sort(key=lambda t: t[0])
+    return out
+
+
+def _extract_title(path: Path) -> str:
+    """Parse the title from the first line `# ADR-NNNN: <title>`. Strips BOM."""
+    text = _strip_bom(path.read_text(encoding="utf-8"))
+    first_line = text.split("\n", 1)[0] if text else ""
+    m = TITLE_RE.match(first_line)
+    if not m:
+        raise ValueError(
+            f"{path.name}: cannot parse title from first line: {first_line!r}"
+        )
+    return m.group(2)
+
+
+def _build_index(adr_dir: Path, link_prefix: str) -> str:
+    """Build the INDEX.md text for adr_dir.
+
+    link_prefix is the relative href used for ADR links (e.g., ``./``
+    so links resolve relative to the INDEX file location).
+    """
+    adrs = _find_adrs(adr_dir)
+    if not adrs:
+        raise RuntimeError(f"No ADR files found under {adr_dir}")
+
+    # Validate every ADR is classified.
+    missing = sorted(num for num, _slug, _ in adrs if num not in CLASSIFICATION)
+    if missing:
+        raise RuntimeError(
+            "ADR(s) missing from CLASSIFICATION table in "
+            "tools/generate_adr_index.py: "
+            + ", ".join(f"ADR-{n:04d}" for n in missing)
+            + ". Add an entry for each."
+        )
+
+    # Map: num → (filename, title)
+    num_to_meta: dict[int, tuple[str, str]] = {}
+    for num, _slug, path in adrs:
+        num_to_meta[num] = (path.name, _extract_title(path))
+
+    # ── Section assembly ────────────────────────────────────────────
+    lines: list[str] = []
+    lines.append("# ADR Index")
+    lines.append("")
+    lines.append(
+        f"Auto-generated by `tools/generate_adr_index.py`. "
+        f"Total ADRs: **{len(adrs)}**."
+    )
+    lines.append("")
+    lines.append(
+        "Classification mirrors the `/report` skill's section assignment. "
+        "When adding a new ADR, also add an entry to the "
+        "`CLASSIFICATION` table in `tools/generate_adr_index.py`."
+    )
+    lines.append("")
+
+    def fmt_entry(num: int) -> str:
+        fname, title = num_to_meta[num]
+        return f"- [ADR-{num:04d}]({link_prefix}{fname}) — {title}"
+
+    # Design Principles
+    lines.append("## Design Principles")
+    lines.append("")
+    nums = sorted(n for n, (sec, _) in CLASSIFICATION.items()
+                  if sec == DESIGN_PRINCIPLES and n in num_to_meta)
+    for n in nums:
+        lines.append(fmt_entry(n))
+    lines.append("")
+
+    # High-level Architecture (preserve declaration order via CLASSIFICATION dict's insertion order)
+    lines.append("## High-level Architecture")
+    lines.append("")
+    nums = sorted(n for n, (sec, _) in CLASSIFICATION.items()
+                  if sec == HIGH_LEVEL and n in num_to_meta)
+    for n in nums:
+        sub = CLASSIFICATION[n][1] or ""
+        fname, title = num_to_meta[n]
+        if sub:
+            lines.append(
+                f"- [ADR-{n:04d}]({link_prefix}{fname}) — {title}"
+                f"  _({sub})_"
+            )
+        else:
+            lines.append(fmt_entry(n))
+    lines.append("")
+
+    # Detailed Architecture (canonical component order)
+    lines.append("## Detailed Architecture")
+    lines.append("")
+    lines.append("One subsection per component file under `src/kernbench/components/builtin/`.")
+    lines.append("")
+    for comp, adr_nums in DETAILED_COMPONENTS:
+        lines.append(f"### {comp}")
+        lines.append("")
+        if adr_nums:
+            for n in adr_nums:
+                if n not in num_to_meta:
+                    raise RuntimeError(
+                        f"DETAILED_COMPONENTS references ADR-{n:04d} for "
+                        f"'{comp}' but no such ADR file exists."
+                    )
+                lines.append(fmt_entry(n))
+        else:
+            lines.append("_(no ADR coverage)_")
+        lines.append("")
+
+    # Implementation Decisions — group by subgroup, preserving first-appearance order.
+    lines.append("## Implementation Decisions")
+    lines.append("")
+    topic_order: list[str] = []
+    topic_to_nums: dict[str, list[int]] = {}
+    for n, (sec, sub) in CLASSIFICATION.items():
+        if sec != IMPL_DECISIONS or n not in num_to_meta:
+            continue
+        topic = sub or "Uncategorized"
+        if topic not in topic_to_nums:
+            topic_order.append(topic)
+            topic_to_nums[topic] = []
+        topic_to_nums[topic].append(n)
+    # Stable order: by smallest ADR-number in topic, so older infra appears first.
+    topic_order.sort(key=lambda t: min(topic_to_nums[t]))
+    for topic in topic_order:
+        lines.append(f"### {topic}")
+        lines.append("")
+        for n in sorted(topic_to_nums[topic]):
+            lines.append(fmt_entry(n))
+        lines.append("")
+
+    return "\n".join(lines).rstrip() + "\n"
+
+
+def _check_or_write(path: Path, content: str, check: bool) -> bool:
+    """Write content to path, or compare in --check mode. Returns True on diff."""
+    existing = path.read_text(encoding="utf-8") if path.exists() else ""
+    if check:
+        if existing != content:
+            print(f"[diff] {path} would change.")
+            return True
+        return False
+    path.write_text(content, encoding="utf-8")
+    if existing != content:
+        print(f"[wrote] {path}")
+    else:
+        print(f"[unchanged] {path}")
+    return False
+
+
+def main(argv: list[str] | None = None) -> int:
+    p = argparse.ArgumentParser(description=__doc__)
+    p.add_argument(
+        "--root", type=Path, default=Path.cwd(),
+        help="Repository root (default: cwd)",
+    )
+    p.add_argument(
+        "--check", action="store_true",
+        help="Exit 1 if generated INDEX would differ from disk",
+    )
+    args = p.parse_args(argv)
+
+    en_dir = args.root / "docs" / "adr"
+    ko_dir = args.root / "docs" / "adr-ko"
+
+    if not en_dir.is_dir():
+        print(f"error: {en_dir} does not exist", file=sys.stderr)
+        return 1
+
+    any_diff = False
+    try:
+        en_index = _build_index(en_dir, link_prefix="./")
+    except (RuntimeError, ValueError) as e:
+        print(f"error (EN): {e}", file=sys.stderr)
+        return 1
+    any_diff |= _check_or_write(en_dir / "INDEX.md", en_index, args.check)
+
+    if ko_dir.is_dir():
+        try:
+            ko_index = _build_index(ko_dir, link_prefix="./")
+        except (RuntimeError, ValueError) as e:
+            print(f"error (KO): {e}", file=sys.stderr)
+            return 1
+        any_diff |= _check_or_write(ko_dir / "INDEX.md", ko_index, args.check)
+
+    if args.check and any_diff:
+        print(
+            "INDEX.md is out of date. "
+            "Run `python tools/generate_adr_index.py` to refresh.",
+            file=sys.stderr,
+        )
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())