"""Verify ADR language pair invariants. Policy (see CLAUDE.md Part 2 -> ADR Translation Discipline): docs/adr/ : English canonical docs/adr-ko/ : Korean translation (1:1 mirror) docs/adr-history/: frozen, not checked (transitional) docs/adr-proposed/: language-free, not checked Checks: - every docs/adr/.md has a matching docs/adr-ko/.md - every docs/adr-ko/.md has a matching docs/adr/.md (no orphans) - title line `# ADR-NNNN:` of each pair matches the filename's NNNN - `## Status` block content is byte-equal (after CRLF/LF normalization) between EN and KO Exit code: 0 if all OK, 1 if any mismatch. """ from __future__ import annotations import argparse import re import sys from pathlib import Path ADR_FILENAME_RE = re.compile(r"^ADR-(\d{4})-[a-z0-9_-]+\.md$") TITLE_RE = re.compile(r"^# ADR-(\d{4}):") def _normalize(text: str) -> str: return text.replace("\r\n", "\n").replace("\r", "\n") def find_adr_files(adr_dir: Path) -> dict[str, Path]: if not adr_dir.is_dir(): return {} return { p.name: p for p in sorted(adr_dir.iterdir()) if p.is_file() and ADR_FILENAME_RE.match(p.name) } def extract_title_id(text: str) -> str | None: lines = _normalize(text).splitlines() if not lines: return None m = TITLE_RE.match(lines[0]) return m.group(1) if m else None def extract_status_block(text: str) -> str | None: """Return content between `## Status` and the next `## ` heading, stripped. Returns None if no `## Status` heading exists. """ lines = _normalize(text).splitlines() in_status = False collected: list[str] = [] for line in lines: if line.strip() == "## Status": in_status = True continue if in_status and line.startswith("## "): break if in_status: collected.append(line) if not in_status: return None return "\n".join(collected).strip() def verify(root: Path) -> list[str]: errors: list[str] = [] en_dir = root / "docs" / "adr" ko_dir = root / "docs" / "adr-ko" en_files = find_adr_files(en_dir) ko_files = find_adr_files(ko_dir) for name in en_files: if name not in ko_files: errors.append(f"missing KO translation: docs/adr-ko/{name}") for name in ko_files: if name not in en_files: errors.append(f"orphan KO (no canonical EN): docs/adr-ko/{name}") for name in sorted(en_files.keys() & ko_files.keys()): m = ADR_FILENAME_RE.match(name) assert m is not None expected_id = m.group(1) en_text = en_files[name].read_text(encoding="utf-8") ko_text = ko_files[name].read_text(encoding="utf-8") en_id = extract_title_id(en_text) ko_id = extract_title_id(ko_text) if en_id != expected_id: errors.append( f"{name}: EN title ADR-ID {en_id!r} != filename {expected_id!r}" ) if ko_id != expected_id: errors.append( f"{name}: KO title ADR-ID {ko_id!r} != filename {expected_id!r}" ) en_status = extract_status_block(en_text) ko_status = extract_status_block(ko_text) if en_status is None: errors.append(f"{name}: EN missing `## Status` section") if ko_status is None: errors.append(f"{name}: KO missing `## Status` section") if en_status is not None and ko_status is not None and en_status != ko_status: errors.append( f"{name}: Status block mismatch\n" f" EN: {en_status!r}\n" f" KO: {ko_status!r}" ) return errors def main(argv: list[str] | None = None) -> int: p = argparse.ArgumentParser(description=__doc__) p.add_argument( "--root", type=Path, default=Path.cwd(), help="Repository root (default: cwd)", ) args = p.parse_args(argv) errors = verify(args.root) if errors: print("ADR language pair verification FAILED:") for e in errors: print(f" - {e}") return 1 print("ADR language pair verification OK") return 0 if __name__ == "__main__": sys.exit(main())