e33e76f2d1
Adds a section-based table of contents for the 46-ADR corpus, mirroring the /report skill's classification (Design Principles / High-level Architecture / Detailed Architecture by component / Implementation Decisions by topic). Generated for both docs/adr/ (EN titles) and docs/adr-ko/ (KO titles) from one tool. tools/generate_adr_index.py: - Single CLASSIFICATION dict per ADR — add an entry when introducing a new ADR; the script fails loud if any file is missing from the table. - DETAILED_COMPONENTS lists each builtin component and the ADR(s) that cover it (ADR-0014 appears under six PE engines; ADR-0023 under pe_dma + pe_ipcq). - Accepts both ":" and "—" title separators (matching ADR-0033's existing format). - --check mode for CI: exits 1 if INDEX.md is stale. Also includes the docs/report/architecture-2026-1H.md generated by the prior /report write (the public-facing architecture document; 836 lines, 76 source-attribution comments). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
334 lines
12 KiB
Python
334 lines
12 KiB
Python
"""Generate docs/adr/INDEX.md (and docs/adr-ko/INDEX.md) from the ADR corpus.
|
|
|
|
Auto-derives a section-based index following the same classification as
|
|
the /report skill — Design Principles / High-level Architecture /
|
|
Detailed Architecture (by component) / Implementation Decisions
|
|
(by topic). Run before publishing to refresh INDEX.md.
|
|
|
|
The classification table below is the single source of truth. When a new
|
|
ADR is added under docs/adr/, append an entry to ``CLASSIFICATION``. The
|
|
script exits 1 if any ADR file is missing from the table or any title
|
|
cannot be parsed, so omissions surface in CI.
|
|
|
|
Usage:
|
|
python tools/generate_adr_index.py [--root <repo-root>] [--check]
|
|
|
|
--check : exit 1 if the generated INDEX differs from the on-disk file
|
|
(used by CI to detect un-regenerated indexes).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
ADR_FILENAME_RE = re.compile(r"^ADR-(\d{4})-([a-z0-9_-]+)\.md$")
|
|
# Title separator may be ":" (most ADRs) or "—" (em-dash; ADR-0033 uses
|
|
# this). The verifier (tools/verify_adr_lang_pairs.py) only checks the
|
|
# number, so both styles already coexist in the corpus.
|
|
TITLE_RE = re.compile(r"^# ADR-(\d{4})\s*[:—]\s*(.+?)\s*$")
|
|
|
|
DESIGN_PRINCIPLES = "Design Principles"
|
|
HIGH_LEVEL = "High-level Architecture"
|
|
DETAILED = "Detailed Architecture"
|
|
IMPL_DECISIONS = "Implementation Decisions"
|
|
|
|
|
|
# (section, subgroup) per ADR. subgroup is used to sub-divide Detailed
|
|
# (by component, see DETAILED_COMPONENTS) and Implementation (by topic).
|
|
# Add a line here when introducing a new ADR.
|
|
CLASSIFICATION: dict[int, tuple[str, str | None]] = {
|
|
# Design Principles
|
|
13: (DESIGN_PRINCIPLES, None),
|
|
33: (DESIGN_PRINCIPLES, None),
|
|
|
|
# High-level Architecture
|
|
3: (HIGH_LEVEL, "System hierarchy (Tray / SIP / CUBE / PE)"),
|
|
7: (HIGH_LEVEL, "Runtime API ↔ sim_engine boundaries"),
|
|
16: (HIGH_LEVEL, "IOChiplet NOC and memory data path"),
|
|
17: (HIGH_LEVEL, "Cube NOC and HBM connectivity"),
|
|
|
|
# Detailed Architecture (subgroup matches DETAILED_COMPONENTS entries)
|
|
14: (DETAILED, "pe_pipeline"), # covers pe_cpu/pe_dma/pe_fetch_store/pe_gemm/pe_math/pe_scheduler
|
|
23: (DETAILED, "pe_ipcq"),
|
|
34: (DETAILED, "hbm_ctrl"),
|
|
35: (DETAILED, "m_cpu"),
|
|
36: (DETAILED, "io_cpu"),
|
|
37: (DETAILED, "forwarding"),
|
|
38: (DETAILED, "pcie_ep"),
|
|
39: (DETAILED, "pe_mmu"),
|
|
40: (DETAILED, "pe_tcm"),
|
|
41: (DETAILED, "sram"),
|
|
42: (DETAILED, "tiling"),
|
|
|
|
# Implementation Decisions
|
|
1: (IMPL_DECISIONS, "Address Scheme"),
|
|
2: (IMPL_DECISIONS, "Routing & Helper API"),
|
|
4: (IMPL_DECISIONS, "Memory Semantics & Local-HBM Bandwidth"),
|
|
5: (IMPL_DECISIONS, "Topology Compilation, Diagrams & Builder Algorithms"),
|
|
6: (IMPL_DECISIONS, "Topology Compilation, Diagrams & Builder Algorithms"),
|
|
8: (IMPL_DECISIONS, "Tensor Deployment and Allocation"),
|
|
9: (IMPL_DECISIONS, "Kernel Execution and Host-Device Messaging"),
|
|
10: (IMPL_DECISIONS, "CLI Surface and Semantics"),
|
|
11: (IMPL_DECISIONS, "Address Scheme"),
|
|
12: (IMPL_DECISIONS, "Kernel Execution and Host-Device Messaging"),
|
|
15: (IMPL_DECISIONS, "Component Port/Wire Fabric Model"),
|
|
20: (IMPL_DECISIONS, "Two-Pass Data Execution"),
|
|
22: (IMPL_DECISIONS, "2D Grid Program Identity"),
|
|
24: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
|
|
25: (IMPL_DECISIONS, "IPCQ Direction Addressing"),
|
|
26: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
|
|
27: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
|
|
32: (IMPL_DECISIONS, "Intercube All-Reduce"),
|
|
43: (IMPL_DECISIONS, "Evaluation Harnesses"),
|
|
44: (IMPL_DECISIONS, "Evaluation Harnesses"),
|
|
45: (IMPL_DECISIONS, "Bench Module Contract"),
|
|
46: (IMPL_DECISIONS, "Kernel-side tl.* API (TLContext)"),
|
|
47: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
|
|
48: (IMPL_DECISIONS, "Memory Allocator Algorithms"),
|
|
49: (IMPL_DECISIONS, "Probe Subcommand"),
|
|
50: (IMPL_DECISIONS, "Parallelism (Launcher, DP, TP, AHBM backend, CCL algorithm)"),
|
|
51: (IMPL_DECISIONS, "Routing & Helper API"),
|
|
52: (IMPL_DECISIONS, "Sim-engine Op Log and Memory Store Schemas"),
|
|
53: (IMPL_DECISIONS, "Topology Compilation, Diagrams & Builder Algorithms"),
|
|
}
|
|
|
|
# Canonical component order for the Detailed Architecture section.
|
|
# Each entry: (component_name, list[ADR-numbers that cover it]).
|
|
# Order matches src/kernbench/components/builtin/*.py alphabetical
|
|
# (the same order /report uses).
|
|
DETAILED_COMPONENTS: list[tuple[str, list[int]]] = [
|
|
("forwarding", [37]),
|
|
("hbm_ctrl", [34]),
|
|
("io_cpu", [36]),
|
|
("m_cpu", [35]),
|
|
("pcie_ep", [38]),
|
|
("pe_cpu", [14]),
|
|
("pe_dma", [14, 23]),
|
|
("pe_fetch_store", [14]),
|
|
("pe_gemm", [14]),
|
|
("pe_ipcq", [23]),
|
|
("pe_math", [14]),
|
|
("pe_mmu", [39]),
|
|
("pe_scheduler", [14]),
|
|
("pe_tcm", [40]),
|
|
("sram", [41]),
|
|
("tiling", [42]),
|
|
]
|
|
|
|
|
|
def _strip_bom(text: str) -> str:
|
|
"""Strip leading UTF-8 BOM if present."""
|
|
if text and ord(text[0]) == 0xFEFF:
|
|
return text[1:]
|
|
return text
|
|
|
|
|
|
def _find_adrs(adr_dir: Path) -> list[tuple[int, str, Path]]:
|
|
"""Return [(num, slug, path), ...] for ADR files in adr_dir, sorted by num."""
|
|
out: list[tuple[int, str, Path]] = []
|
|
for p in sorted(adr_dir.iterdir()):
|
|
if not p.is_file():
|
|
continue
|
|
m = ADR_FILENAME_RE.match(p.name)
|
|
if not m:
|
|
continue
|
|
out.append((int(m.group(1)), m.group(2), p))
|
|
out.sort(key=lambda t: t[0])
|
|
return out
|
|
|
|
|
|
def _extract_title(path: Path) -> str:
|
|
"""Parse the title from the first line `# ADR-NNNN: <title>`. Strips BOM."""
|
|
text = _strip_bom(path.read_text(encoding="utf-8"))
|
|
first_line = text.split("\n", 1)[0] if text else ""
|
|
m = TITLE_RE.match(first_line)
|
|
if not m:
|
|
raise ValueError(
|
|
f"{path.name}: cannot parse title from first line: {first_line!r}"
|
|
)
|
|
return m.group(2)
|
|
|
|
|
|
def _build_index(adr_dir: Path, link_prefix: str) -> str:
|
|
"""Build the INDEX.md text for adr_dir.
|
|
|
|
link_prefix is the relative href used for ADR links (e.g., ``./``
|
|
so links resolve relative to the INDEX file location).
|
|
"""
|
|
adrs = _find_adrs(adr_dir)
|
|
if not adrs:
|
|
raise RuntimeError(f"No ADR files found under {adr_dir}")
|
|
|
|
# Validate every ADR is classified.
|
|
missing = sorted(num for num, _slug, _ in adrs if num not in CLASSIFICATION)
|
|
if missing:
|
|
raise RuntimeError(
|
|
"ADR(s) missing from CLASSIFICATION table in "
|
|
"tools/generate_adr_index.py: "
|
|
+ ", ".join(f"ADR-{n:04d}" for n in missing)
|
|
+ ". Add an entry for each."
|
|
)
|
|
|
|
# Map: num → (filename, title)
|
|
num_to_meta: dict[int, tuple[str, str]] = {}
|
|
for num, _slug, path in adrs:
|
|
num_to_meta[num] = (path.name, _extract_title(path))
|
|
|
|
# ── Section assembly ────────────────────────────────────────────
|
|
lines: list[str] = []
|
|
lines.append("# ADR Index")
|
|
lines.append("")
|
|
lines.append(
|
|
f"Auto-generated by `tools/generate_adr_index.py`. "
|
|
f"Total ADRs: **{len(adrs)}**."
|
|
)
|
|
lines.append("")
|
|
lines.append(
|
|
"Classification mirrors the `/report` skill's section assignment. "
|
|
"When adding a new ADR, also add an entry to the "
|
|
"`CLASSIFICATION` table in `tools/generate_adr_index.py`."
|
|
)
|
|
lines.append("")
|
|
|
|
def fmt_entry(num: int) -> str:
|
|
fname, title = num_to_meta[num]
|
|
return f"- [ADR-{num:04d}]({link_prefix}{fname}) — {title}"
|
|
|
|
# Design Principles
|
|
lines.append("## Design Principles")
|
|
lines.append("")
|
|
nums = sorted(n for n, (sec, _) in CLASSIFICATION.items()
|
|
if sec == DESIGN_PRINCIPLES and n in num_to_meta)
|
|
for n in nums:
|
|
lines.append(fmt_entry(n))
|
|
lines.append("")
|
|
|
|
# High-level Architecture (preserve declaration order via CLASSIFICATION dict's insertion order)
|
|
lines.append("## High-level Architecture")
|
|
lines.append("")
|
|
nums = sorted(n for n, (sec, _) in CLASSIFICATION.items()
|
|
if sec == HIGH_LEVEL and n in num_to_meta)
|
|
for n in nums:
|
|
sub = CLASSIFICATION[n][1] or ""
|
|
fname, title = num_to_meta[n]
|
|
if sub:
|
|
lines.append(
|
|
f"- [ADR-{n:04d}]({link_prefix}{fname}) — {title}"
|
|
f" _({sub})_"
|
|
)
|
|
else:
|
|
lines.append(fmt_entry(n))
|
|
lines.append("")
|
|
|
|
# Detailed Architecture (canonical component order)
|
|
lines.append("## Detailed Architecture")
|
|
lines.append("")
|
|
lines.append("One subsection per component file under `src/kernbench/components/builtin/`.")
|
|
lines.append("")
|
|
for comp, adr_nums in DETAILED_COMPONENTS:
|
|
lines.append(f"### {comp}")
|
|
lines.append("")
|
|
if adr_nums:
|
|
for n in adr_nums:
|
|
if n not in num_to_meta:
|
|
raise RuntimeError(
|
|
f"DETAILED_COMPONENTS references ADR-{n:04d} for "
|
|
f"'{comp}' but no such ADR file exists."
|
|
)
|
|
lines.append(fmt_entry(n))
|
|
else:
|
|
lines.append("_(no ADR coverage)_")
|
|
lines.append("")
|
|
|
|
# Implementation Decisions — group by subgroup, preserving first-appearance order.
|
|
lines.append("## Implementation Decisions")
|
|
lines.append("")
|
|
topic_order: list[str] = []
|
|
topic_to_nums: dict[str, list[int]] = {}
|
|
for n, (sec, sub) in CLASSIFICATION.items():
|
|
if sec != IMPL_DECISIONS or n not in num_to_meta:
|
|
continue
|
|
topic = sub or "Uncategorized"
|
|
if topic not in topic_to_nums:
|
|
topic_order.append(topic)
|
|
topic_to_nums[topic] = []
|
|
topic_to_nums[topic].append(n)
|
|
# Stable order: by smallest ADR-number in topic, so older infra appears first.
|
|
topic_order.sort(key=lambda t: min(topic_to_nums[t]))
|
|
for topic in topic_order:
|
|
lines.append(f"### {topic}")
|
|
lines.append("")
|
|
for n in sorted(topic_to_nums[topic]):
|
|
lines.append(fmt_entry(n))
|
|
lines.append("")
|
|
|
|
return "\n".join(lines).rstrip() + "\n"
|
|
|
|
|
|
def _check_or_write(path: Path, content: str, check: bool) -> bool:
|
|
"""Write content to path, or compare in --check mode. Returns True on diff."""
|
|
existing = path.read_text(encoding="utf-8") if path.exists() else ""
|
|
if check:
|
|
if existing != content:
|
|
print(f"[diff] {path} would change.")
|
|
return True
|
|
return False
|
|
path.write_text(content, encoding="utf-8")
|
|
if existing != content:
|
|
print(f"[wrote] {path}")
|
|
else:
|
|
print(f"[unchanged] {path}")
|
|
return False
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
p = argparse.ArgumentParser(description=__doc__)
|
|
p.add_argument(
|
|
"--root", type=Path, default=Path.cwd(),
|
|
help="Repository root (default: cwd)",
|
|
)
|
|
p.add_argument(
|
|
"--check", action="store_true",
|
|
help="Exit 1 if generated INDEX would differ from disk",
|
|
)
|
|
args = p.parse_args(argv)
|
|
|
|
en_dir = args.root / "docs" / "adr"
|
|
ko_dir = args.root / "docs" / "adr-ko"
|
|
|
|
if not en_dir.is_dir():
|
|
print(f"error: {en_dir} does not exist", file=sys.stderr)
|
|
return 1
|
|
|
|
any_diff = False
|
|
try:
|
|
en_index = _build_index(en_dir, link_prefix="./")
|
|
except (RuntimeError, ValueError) as e:
|
|
print(f"error (EN): {e}", file=sys.stderr)
|
|
return 1
|
|
any_diff |= _check_or_write(en_dir / "INDEX.md", en_index, args.check)
|
|
|
|
if ko_dir.is_dir():
|
|
try:
|
|
ko_index = _build_index(ko_dir, link_prefix="./")
|
|
except (RuntimeError, ValueError) as e:
|
|
print(f"error (KO): {e}", file=sys.stderr)
|
|
return 1
|
|
any_diff |= _check_or_write(ko_dir / "INDEX.md", ko_index, args.check)
|
|
|
|
if args.check and any_diff:
|
|
print(
|
|
"INDEX.md is out of date. "
|
|
"Run `python tools/generate_adr_index.py` to refresh.",
|
|
file=sys.stderr,
|
|
)
|
|
return 1
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|