"""Generate docs/adr/INDEX.md (and docs/adr-ko/INDEX.md) from the ADR corpus. Auto-derives a section-based index following the same classification as the /report skill — Design Principles / High-level Architecture / Detailed Architecture (by component) / Implementation Decisions (by topic). Run before publishing to refresh INDEX.md. The classification table below is the single source of truth. When a new ADR is added under docs/adr/, append an entry to ``CLASSIFICATION``. The script exits 1 if any ADR file is missing from the table or any title cannot be parsed, so omissions surface in CI. Usage: python tools/generate_adr_index.py [--root ] [--check] --check : exit 1 if the generated INDEX differs from the on-disk file (used by CI to detect un-regenerated indexes). """ from __future__ import annotations import argparse import re import sys from pathlib import Path ADR_FILENAME_RE = re.compile(r"^ADR-(\d{4})-([a-z0-9_-]+)\.md$") # Title separator may be ":" (most ADRs) or "—" (em-dash; ADR-0033 uses # this). The verifier (tools/verify_adr_lang_pairs.py) only checks the # number, so both styles already coexist in the corpus. TITLE_RE = re.compile(r"^# ADR-(\d{4})\s*[:—]\s*(.+?)\s*$") DESIGN_PRINCIPLES = "Design Principles" HIGH_LEVEL = "High-level Architecture" DETAILED = "Detailed Architecture" IMPL_DECISIONS = "Implementation Decisions" # (section, subgroup) per ADR. subgroup is used to sub-divide Detailed # (by component, see DETAILED_COMPONENTS) and Implementation (by topic). # Add a line here when introducing a new ADR. CLASSIFICATION: dict[int, tuple[str, str | None]] = { # Design Principles 13: (DESIGN_PRINCIPLES, None), 33: (DESIGN_PRINCIPLES, None), # High-level Architecture 3: (HIGH_LEVEL, "System hierarchy (Tray / SIP / CUBE / PE)"), 7: (HIGH_LEVEL, "Runtime API ↔ sim_engine boundaries"), 16: (HIGH_LEVEL, "IOChiplet NOC and memory data path"), 17: (HIGH_LEVEL, "Cube NOC and HBM connectivity"), # Detailed Architecture (subgroup matches DETAILED_COMPONENTS entries) 14: (DETAILED, "pe_pipeline"), # covers pe_cpu/pe_dma/pe_fetch_store/pe_gemm/pe_math/pe_scheduler 23: (DETAILED, "pe_ipcq"), 34: (DETAILED, "hbm_ctrl"), 35: (DETAILED, "m_cpu"), 36: (DETAILED, "io_cpu"), 37: (DETAILED, "forwarding"), 38: (DETAILED, "pcie_ep"), 39: (DETAILED, "pe_mmu"), 40: (DETAILED, "pe_tcm"), 41: (DETAILED, "sram"), 42: (DETAILED, "tiling"), # Implementation Decisions 1: (IMPL_DECISIONS, "Address Scheme"), 2: (IMPL_DECISIONS, "Routing & Helper API"), 4: (IMPL_DECISIONS, "Memory Semantics & Local-HBM Bandwidth"), 5: (IMPL_DECISIONS, "Topology Compilation, Diagrams & Builder Algorithms"), 6: (IMPL_DECISIONS, "Topology Compilation, Diagrams & Builder Algorithms"), 8: (IMPL_DECISIONS, "Tensor Deployment and Allocation"), 9: (IMPL_DECISIONS, "Kernel Execution and Host-Device Messaging"), 10: (IMPL_DECISIONS, "CLI Surface and Semantics"), 11: (IMPL_DECISIONS, "Address Scheme"), 12: (IMPL_DECISIONS, "Kernel Execution and Host-Device Messaging"), 15: (IMPL_DECISIONS, "Component Port/Wire Fabric Model"), 20: (IMPL_DECISIONS, "Two-Pass Data Execution"), 22: (IMPL_DECISIONS, "2D Grid Program Identity"), 24: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"), 25: (IMPL_DECISIONS, "IPCQ Direction Addressing"), 26: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"), 27: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"), 32: (IMPL_DECISIONS, "Intercube All-Reduce"), 43: (IMPL_DECISIONS, "Evaluation Harnesses"), 44: (IMPL_DECISIONS, "Evaluation Harnesses"), 45: (IMPL_DECISIONS, "Bench Module Contract"), 46: (IMPL_DECISIONS, "Kernel-side tl.* API (TLContext)"), 47: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"), 48: (IMPL_DECISIONS, "Memory Allocator Algorithms"), 49: (IMPL_DECISIONS, "Probe Subcommand"), 50: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"), 51: (IMPL_DECISIONS, "Routing & Helper API"), 52: (IMPL_DECISIONS, "Sim-engine Op Log and Memory Store Schemas"), 53: (IMPL_DECISIONS, "Topology Compilation, Diagrams & Builder Algorithms"), 54: (IMPL_DECISIONS, "Evaluation Harnesses"), } # Canonical component order for the Detailed Architecture section. # Each entry: (component_name, list[ADR-numbers that cover it]). # Order matches src/kernbench/components/builtin/*.py alphabetical # (the same order /report uses). DETAILED_COMPONENTS: list[tuple[str, list[int]]] = [ ("forwarding", [37]), ("hbm_ctrl", [34]), ("io_cpu", [36]), ("m_cpu", [35]), ("pcie_ep", [38]), ("pe_cpu", [14]), ("pe_dma", [14, 23]), ("pe_fetch_store", [14]), ("pe_gemm", [14]), ("pe_ipcq", [23]), ("pe_math", [14]), ("pe_mmu", [39]), ("pe_scheduler", [14]), ("pe_tcm", [40]), ("sram", [41]), ("tiling", [42]), ] def _strip_bom(text: str) -> str: """Strip leading UTF-8 BOM if present.""" if text and ord(text[0]) == 0xFEFF: return text[1:] return text def _find_adrs(adr_dir: Path) -> list[tuple[int, str, Path]]: """Return [(num, slug, path), ...] for ADR files in adr_dir, sorted by num.""" out: list[tuple[int, str, Path]] = [] for p in sorted(adr_dir.iterdir()): if not p.is_file(): continue m = ADR_FILENAME_RE.match(p.name) if not m: continue out.append((int(m.group(1)), m.group(2), p)) out.sort(key=lambda t: t[0]) return out def _extract_title(path: Path) -> str: """Parse the title from the first line `# ADR-NNNN: `. Strips BOM.""" text = _strip_bom(path.read_text(encoding="utf-8")) first_line = text.split("\n", 1)[0] if text else "" m = TITLE_RE.match(first_line) if not m: raise ValueError( f"{path.name}: cannot parse title from first line: {first_line!r}" ) return m.group(2) def _build_index(adr_dir: Path, link_prefix: str) -> str: """Build the INDEX.md text for adr_dir. link_prefix is the relative href used for ADR links (e.g., ``./`` so links resolve relative to the INDEX file location). """ adrs = _find_adrs(adr_dir) if not adrs: raise RuntimeError(f"No ADR files found under {adr_dir}") # Validate every ADR is classified. missing = sorted(num for num, _slug, _ in adrs if num not in CLASSIFICATION) if missing: raise RuntimeError( "ADR(s) missing from CLASSIFICATION table in " "tools/generate_adr_index.py: " + ", ".join(f"ADR-{n:04d}" for n in missing) + ". Add an entry for each." ) # Map: num → (filename, title) num_to_meta: dict[int, tuple[str, str]] = {} for num, _slug, path in adrs: num_to_meta[num] = (path.name, _extract_title(path)) # ── Section assembly ──────────────────────────────────────────── lines: list[str] = [] lines.append("# ADR Index") lines.append("") lines.append( f"Auto-generated by `tools/generate_adr_index.py`. " f"Total ADRs: **{len(adrs)}**." ) lines.append("") lines.append( "Classification mirrors the `/report` skill's section assignment. " "When adding a new ADR, also add an entry to the " "`CLASSIFICATION` table in `tools/generate_adr_index.py`." ) lines.append("") def fmt_entry(num: int) -> str: fname, title = num_to_meta[num] return f"- [ADR-{num:04d}]({link_prefix}{fname}) — {title}" # Design Principles lines.append("## Design Principles") lines.append("") nums = sorted(n for n, (sec, _) in CLASSIFICATION.items() if sec == DESIGN_PRINCIPLES and n in num_to_meta) for n in nums: lines.append(fmt_entry(n)) lines.append("") # High-level Architecture (preserve declaration order via CLASSIFICATION dict's insertion order) lines.append("## High-level Architecture") lines.append("") nums = sorted(n for n, (sec, _) in CLASSIFICATION.items() if sec == HIGH_LEVEL and n in num_to_meta) for n in nums: sub = CLASSIFICATION[n][1] or "" fname, title = num_to_meta[n] if sub: lines.append( f"- [ADR-{n:04d}]({link_prefix}{fname}) — {title}" f" _({sub})_" ) else: lines.append(fmt_entry(n)) lines.append("") # Detailed Architecture (canonical component order) lines.append("## Detailed Architecture") lines.append("") lines.append("One subsection per component file under `src/kernbench/components/builtin/`.") lines.append("") for comp, adr_nums in DETAILED_COMPONENTS: lines.append(f"### {comp}") lines.append("") if adr_nums: for n in adr_nums: if n not in num_to_meta: raise RuntimeError( f"DETAILED_COMPONENTS references ADR-{n:04d} for " f"'{comp}' but no such ADR file exists." ) lines.append(fmt_entry(n)) else: lines.append("_(no ADR coverage)_") lines.append("") # Implementation Decisions — group by subgroup, preserving first-appearance order. lines.append("## Implementation Decisions") lines.append("") topic_order: list[str] = [] topic_to_nums: dict[str, list[int]] = {} for n, (sec, sub) in CLASSIFICATION.items(): if sec != IMPL_DECISIONS or n not in num_to_meta: continue topic = sub or "Uncategorized" if topic not in topic_to_nums: topic_order.append(topic) topic_to_nums[topic] = [] topic_to_nums[topic].append(n) # Stable order: by smallest ADR-number in topic, so older infra appears first. topic_order.sort(key=lambda t: min(topic_to_nums[t])) for topic in topic_order: lines.append(f"### {topic}") lines.append("") for n in sorted(topic_to_nums[topic]): lines.append(fmt_entry(n)) lines.append("") return "\n".join(lines).rstrip() + "\n" def _check_or_write(path: Path, content: str, check: bool) -> bool: """Write content to path, or compare in --check mode. Returns True on diff.""" existing = path.read_text(encoding="utf-8") if path.exists() else "" if check: if existing != content: print(f"[diff] {path} would change.") return True return False path.write_text(content, encoding="utf-8") if existing != content: print(f"[wrote] {path}") else: print(f"[unchanged] {path}") return False def main(argv: list[str] | None = None) -> int: p = argparse.ArgumentParser(description=__doc__) p.add_argument( "--root", type=Path, default=Path.cwd(), help="Repository root (default: cwd)", ) p.add_argument( "--check", action="store_true", help="Exit 1 if generated INDEX would differ from disk", ) args = p.parse_args(argv) en_dir = args.root / "docs" / "adr" ko_dir = args.root / "docs" / "adr-ko" if not en_dir.is_dir(): print(f"error: {en_dir} does not exist", file=sys.stderr) return 1 any_diff = False try: en_index = _build_index(en_dir, link_prefix="./") except (RuntimeError, ValueError) as e: print(f"error (EN): {e}", file=sys.stderr) return 1 any_diff |= _check_or_write(en_dir / "INDEX.md", en_index, args.check) if ko_dir.is_dir(): try: ko_index = _build_index(ko_dir, link_prefix="./") except (RuntimeError, ValueError) as e: print(f"error (KO): {e}", file=sys.stderr) return 1 any_diff |= _check_or_write(ko_dir / "INDEX.md", ko_index, args.check) if args.check and any_diff: print( "INDEX.md is out of date. " "Run `python tools/generate_adr_index.py` to refresh.", file=sys.stderr, ) return 1 return 0 if __name__ == "__main__": sys.exit(main())