adr: add INDEX.md (auto-generated by tools/generate_adr_index.py)

Adds a section-based table of contents for the 46-ADR corpus, mirroring
the /report skill's classification (Design Principles / High-level
Architecture / Detailed Architecture by component / Implementation
Decisions by topic). Generated for both docs/adr/ (EN titles) and
docs/adr-ko/ (KO titles) from one tool.

tools/generate_adr_index.py:
- Single CLASSIFICATION dict per ADR — add an entry when introducing a
  new ADR; the script fails loud if any file is missing from the table.
- DETAILED_COMPONENTS lists each builtin component and the ADR(s) that
  cover it (ADR-0014 appears under six PE engines; ADR-0023 under
  pe_dma + pe_ipcq).
- Accepts both ":" and "—" title separators (matching ADR-0033's
  existing format).
- --check mode for CI: exits 1 if INDEX.md is stale.

Also includes the docs/report/architecture-2026-1H.md generated by the
prior /report write (the public-facing architecture document; 836 lines,
76 source-attribution comments).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-22 11:15:37 -07:00
parent bd49c93703
commit e33e76f2d1
4 changed files with 1517 additions and 0 deletions
+333
View File
@@ -0,0 +1,333 @@
"""Generate docs/adr/INDEX.md (and docs/adr-ko/INDEX.md) from the ADR corpus.
Auto-derives a section-based index following the same classification as
the /report skill — Design Principles / High-level Architecture /
Detailed Architecture (by component) / Implementation Decisions
(by topic). Run before publishing to refresh INDEX.md.
The classification table below is the single source of truth. When a new
ADR is added under docs/adr/, append an entry to ``CLASSIFICATION``. The
script exits 1 if any ADR file is missing from the table or any title
cannot be parsed, so omissions surface in CI.
Usage:
python tools/generate_adr_index.py [--root <repo-root>] [--check]
--check : exit 1 if the generated INDEX differs from the on-disk file
(used by CI to detect un-regenerated indexes).
"""
from __future__ import annotations
import argparse
import re
import sys
from pathlib import Path
ADR_FILENAME_RE = re.compile(r"^ADR-(\d{4})-([a-z0-9_-]+)\.md$")
# Title separator may be ":" (most ADRs) or "—" (em-dash; ADR-0033 uses
# this). The verifier (tools/verify_adr_lang_pairs.py) only checks the
# number, so both styles already coexist in the corpus.
TITLE_RE = re.compile(r"^# ADR-(\d{4})\s*[:—]\s*(.+?)\s*$")
DESIGN_PRINCIPLES = "Design Principles"
HIGH_LEVEL = "High-level Architecture"
DETAILED = "Detailed Architecture"
IMPL_DECISIONS = "Implementation Decisions"
# (section, subgroup) per ADR. subgroup is used to sub-divide Detailed
# (by component, see DETAILED_COMPONENTS) and Implementation (by topic).
# Add a line here when introducing a new ADR.
CLASSIFICATION: dict[int, tuple[str, str | None]] = {
# Design Principles
13: (DESIGN_PRINCIPLES, None),
33: (DESIGN_PRINCIPLES, None),
# High-level Architecture
3: (HIGH_LEVEL, "System hierarchy (Tray / SIP / CUBE / PE)"),
7: (HIGH_LEVEL, "Runtime API ↔ sim_engine boundaries"),
16: (HIGH_LEVEL, "IOChiplet NOC and memory data path"),
17: (HIGH_LEVEL, "Cube NOC and HBM connectivity"),
# Detailed Architecture (subgroup matches DETAILED_COMPONENTS entries)
14: (DETAILED, "pe_pipeline"), # covers pe_cpu/pe_dma/pe_fetch_store/pe_gemm/pe_math/pe_scheduler
23: (DETAILED, "pe_ipcq"),
34: (DETAILED, "hbm_ctrl"),
35: (DETAILED, "m_cpu"),
36: (DETAILED, "io_cpu"),
37: (DETAILED, "forwarding"),
38: (DETAILED, "pcie_ep"),
39: (DETAILED, "pe_mmu"),
40: (DETAILED, "pe_tcm"),
41: (DETAILED, "sram"),
42: (DETAILED, "tiling"),
# Implementation Decisions
1: (IMPL_DECISIONS, "Address Scheme"),
2: (IMPL_DECISIONS, "Routing & Helper API"),
4: (IMPL_DECISIONS, "Memory Semantics & Local-HBM Bandwidth"),
5: (IMPL_DECISIONS, "Topology Compilation, Diagrams & Builder Algorithms"),
6: (IMPL_DECISIONS, "Topology Compilation, Diagrams & Builder Algorithms"),
8: (IMPL_DECISIONS, "Tensor Deployment and Allocation"),
9: (IMPL_DECISIONS, "Kernel Execution and Host-Device Messaging"),
10: (IMPL_DECISIONS, "CLI Surface and Semantics"),
11: (IMPL_DECISIONS, "Address Scheme"),
12: (IMPL_DECISIONS, "Kernel Execution and Host-Device Messaging"),
15: (IMPL_DECISIONS, "Component Port/Wire Fabric Model"),
20: (IMPL_DECISIONS, "Two-Pass Data Execution"),
22: (IMPL_DECISIONS, "2D Grid Program Identity"),
24: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
25: (IMPL_DECISIONS, "IPCQ Direction Addressing"),
26: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
27: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
32: (IMPL_DECISIONS, "Intercube All-Reduce"),
43: (IMPL_DECISIONS, "Evaluation Harnesses"),
44: (IMPL_DECISIONS, "Evaluation Harnesses"),
45: (IMPL_DECISIONS, "Bench Module Contract"),
46: (IMPL_DECISIONS, "Kernel-side tl.* API (TLContext)"),
47: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
48: (IMPL_DECISIONS, "Memory Allocator Algorithms"),
49: (IMPL_DECISIONS, "Probe Subcommand"),
50: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
51: (IMPL_DECISIONS, "Routing & Helper API"),
52: (IMPL_DECISIONS, "Sim-engine Op Log and Memory Store Schemas"),
53: (IMPL_DECISIONS, "Topology Compilation, Diagrams & Builder Algorithms"),
}
# Canonical component order for the Detailed Architecture section.
# Each entry: (component_name, list[ADR-numbers that cover it]).
# Order matches src/kernbench/components/builtin/*.py alphabetical
# (the same order /report uses).
DETAILED_COMPONENTS: list[tuple[str, list[int]]] = [
("forwarding", [37]),
("hbm_ctrl", [34]),
("io_cpu", [36]),
("m_cpu", [35]),
("pcie_ep", [38]),
("pe_cpu", [14]),
("pe_dma", [14, 23]),
("pe_fetch_store", [14]),
("pe_gemm", [14]),
("pe_ipcq", [23]),
("pe_math", [14]),
("pe_mmu", [39]),
("pe_scheduler", [14]),
("pe_tcm", [40]),
("sram", [41]),
("tiling", [42]),
]
def _strip_bom(text: str) -> str:
"""Strip leading UTF-8 BOM if present."""
if text and ord(text[0]) == 0xFEFF:
return text[1:]
return text
def _find_adrs(adr_dir: Path) -> list[tuple[int, str, Path]]:
"""Return [(num, slug, path), ...] for ADR files in adr_dir, sorted by num."""
out: list[tuple[int, str, Path]] = []
for p in sorted(adr_dir.iterdir()):
if not p.is_file():
continue
m = ADR_FILENAME_RE.match(p.name)
if not m:
continue
out.append((int(m.group(1)), m.group(2), p))
out.sort(key=lambda t: t[0])
return out
def _extract_title(path: Path) -> str:
"""Parse the title from the first line `# ADR-NNNN: <title>`. Strips BOM."""
text = _strip_bom(path.read_text(encoding="utf-8"))
first_line = text.split("\n", 1)[0] if text else ""
m = TITLE_RE.match(first_line)
if not m:
raise ValueError(
f"{path.name}: cannot parse title from first line: {first_line!r}"
)
return m.group(2)
def _build_index(adr_dir: Path, link_prefix: str) -> str:
"""Build the INDEX.md text for adr_dir.
link_prefix is the relative href used for ADR links (e.g., ``./``
so links resolve relative to the INDEX file location).
"""
adrs = _find_adrs(adr_dir)
if not adrs:
raise RuntimeError(f"No ADR files found under {adr_dir}")
# Validate every ADR is classified.
missing = sorted(num for num, _slug, _ in adrs if num not in CLASSIFICATION)
if missing:
raise RuntimeError(
"ADR(s) missing from CLASSIFICATION table in "
"tools/generate_adr_index.py: "
+ ", ".join(f"ADR-{n:04d}" for n in missing)
+ ". Add an entry for each."
)
# Map: num → (filename, title)
num_to_meta: dict[int, tuple[str, str]] = {}
for num, _slug, path in adrs:
num_to_meta[num] = (path.name, _extract_title(path))
# ── Section assembly ────────────────────────────────────────────
lines: list[str] = []
lines.append("# ADR Index")
lines.append("")
lines.append(
f"Auto-generated by `tools/generate_adr_index.py`. "
f"Total ADRs: **{len(adrs)}**."
)
lines.append("")
lines.append(
"Classification mirrors the `/report` skill's section assignment. "
"When adding a new ADR, also add an entry to the "
"`CLASSIFICATION` table in `tools/generate_adr_index.py`."
)
lines.append("")
def fmt_entry(num: int) -> str:
fname, title = num_to_meta[num]
return f"- [ADR-{num:04d}]({link_prefix}{fname}) — {title}"
# Design Principles
lines.append("## Design Principles")
lines.append("")
nums = sorted(n for n, (sec, _) in CLASSIFICATION.items()
if sec == DESIGN_PRINCIPLES and n in num_to_meta)
for n in nums:
lines.append(fmt_entry(n))
lines.append("")
# High-level Architecture (preserve declaration order via CLASSIFICATION dict's insertion order)
lines.append("## High-level Architecture")
lines.append("")
nums = sorted(n for n, (sec, _) in CLASSIFICATION.items()
if sec == HIGH_LEVEL and n in num_to_meta)
for n in nums:
sub = CLASSIFICATION[n][1] or ""
fname, title = num_to_meta[n]
if sub:
lines.append(
f"- [ADR-{n:04d}]({link_prefix}{fname}) — {title}"
f" _({sub})_"
)
else:
lines.append(fmt_entry(n))
lines.append("")
# Detailed Architecture (canonical component order)
lines.append("## Detailed Architecture")
lines.append("")
lines.append("One subsection per component file under `src/kernbench/components/builtin/`.")
lines.append("")
for comp, adr_nums in DETAILED_COMPONENTS:
lines.append(f"### {comp}")
lines.append("")
if adr_nums:
for n in adr_nums:
if n not in num_to_meta:
raise RuntimeError(
f"DETAILED_COMPONENTS references ADR-{n:04d} for "
f"'{comp}' but no such ADR file exists."
)
lines.append(fmt_entry(n))
else:
lines.append("_(no ADR coverage)_")
lines.append("")
# Implementation Decisions — group by subgroup, preserving first-appearance order.
lines.append("## Implementation Decisions")
lines.append("")
topic_order: list[str] = []
topic_to_nums: dict[str, list[int]] = {}
for n, (sec, sub) in CLASSIFICATION.items():
if sec != IMPL_DECISIONS or n not in num_to_meta:
continue
topic = sub or "Uncategorized"
if topic not in topic_to_nums:
topic_order.append(topic)
topic_to_nums[topic] = []
topic_to_nums[topic].append(n)
# Stable order: by smallest ADR-number in topic, so older infra appears first.
topic_order.sort(key=lambda t: min(topic_to_nums[t]))
for topic in topic_order:
lines.append(f"### {topic}")
lines.append("")
for n in sorted(topic_to_nums[topic]):
lines.append(fmt_entry(n))
lines.append("")
return "\n".join(lines).rstrip() + "\n"
def _check_or_write(path: Path, content: str, check: bool) -> bool:
"""Write content to path, or compare in --check mode. Returns True on diff."""
existing = path.read_text(encoding="utf-8") if path.exists() else ""
if check:
if existing != content:
print(f"[diff] {path} would change.")
return True
return False
path.write_text(content, encoding="utf-8")
if existing != content:
print(f"[wrote] {path}")
else:
print(f"[unchanged] {path}")
return False
def main(argv: list[str] | None = None) -> int:
p = argparse.ArgumentParser(description=__doc__)
p.add_argument(
"--root", type=Path, default=Path.cwd(),
help="Repository root (default: cwd)",
)
p.add_argument(
"--check", action="store_true",
help="Exit 1 if generated INDEX would differ from disk",
)
args = p.parse_args(argv)
en_dir = args.root / "docs" / "adr"
ko_dir = args.root / "docs" / "adr-ko"
if not en_dir.is_dir():
print(f"error: {en_dir} does not exist", file=sys.stderr)
return 1
any_diff = False
try:
en_index = _build_index(en_dir, link_prefix="./")
except (RuntimeError, ValueError) as e:
print(f"error (EN): {e}", file=sys.stderr)
return 1
any_diff |= _check_or_write(en_dir / "INDEX.md", en_index, args.check)
if ko_dir.is_dir():
try:
ko_index = _build_index(ko_dir, link_prefix="./")
except (RuntimeError, ValueError) as e:
print(f"error (KO): {e}", file=sys.stderr)
return 1
any_diff |= _check_or_write(ko_dir / "INDEX.md", ko_index, args.check)
if args.check and any_diff:
print(
"INDEX.md is out of date. "
"Run `python tools/generate_adr_index.py` to refresh.",
file=sys.stderr,
)
return 1
return 0
if __name__ == "__main__":
sys.exit(main())