benches: package as kernbench.benches, add @bench registry + list subcommand

Move benches/ -> src/kernbench/benches/ and src/kernbench/cli/probe.py -> src/kernbench/probes/probe.py. Each bench self-registers via @bench(name=..., description=...); kernbench list enumerates benches with auto-assigned indices, --bench accepts kebab-case name or numeric index. Audit at package-import time fails if any non-underscore module forgets the decorator. ADR-0010 (EN + KO) updated to reflect the new resolver path, list subcommand, and probes package separation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 14:42:10 -07:00
parent 168b0c89f0
commit 049e3d8bb3
28 changed files with 398 additions and 79 deletions
@@ -0,0 +1,95 @@
+"""Tests for kernbench.benches.registry — @bench decorator + resolve/list."""
+from __future__ import annotations
+
+import pytest
+
+from kernbench.benches import registry
+
+
+EXPECTED_NAMES = [
+    "ccl-allreduce",
+    "gemm-single-pe",
+    "gpt3-qkv",
+    "ipcq-allreduce",
+    "matmul-composite",
+    "qkv-gemm",
+    "qkv-gemm-multi-pe",
+    "va-offset-verify",
+]
+
+
+def test_registry_lists_all_benches():
+    specs = registry.list_all()
+    names = [s.name for s in specs]
+    assert names == EXPECTED_NAMES
+
+
+def test_registry_indices_are_1_based_sorted_by_name():
+    specs = registry.list_all()
+    assert [s.index for s in specs] == list(range(1, len(EXPECTED_NAMES) + 1))
+    assert sorted(s.name for s in specs) == [s.name for s in specs]
+
+
+def test_resolve_by_name_returns_spec():
+    spec = registry.resolve("gemm-single-pe")
+    assert spec.name == "gemm-single-pe"
+    assert callable(spec.run)
+    assert spec.description.strip()
+
+
+def test_resolve_by_index_string_matches_list_order():
+    specs = registry.list_all()
+    third = specs[2]
+    resolved = registry.resolve(str(third.index))
+    assert resolved is third
+
+
+def test_resolve_unknown_name_raises():
+    with pytest.raises(ValueError, match="kernbench list"):
+        registry.resolve("does-not-exist")
+
+
+def test_resolve_unknown_index_raises():
+    with pytest.raises(ValueError, match="kernbench list"):
+        registry.resolve("99")
+
+
+def test_resolve_empty_identifier_raises():
+    with pytest.raises(ValueError):
+        registry.resolve("")
+
+
+def test_bench_decorator_rejects_invalid_name():
+    with pytest.raises(ValueError, match="kebab-case"):
+        registry.bench(name="Invalid_Name", description="x")
+
+
+def test_bench_decorator_rejects_empty_description():
+    with pytest.raises(ValueError, match="non-empty"):
+        registry.bench(name="ok-name", description="   ")
+
+
+def test_audit_raises_on_missing_decorator():
+    with pytest.raises(RuntimeError, match="missing @bench decorator"):
+        registry._audit_modules(
+            imported=["kernbench.benches.fake_no_dec", "kernbench.benches.real"],
+            registered={"kernbench.benches.real"},
+        )
+
+
+def test_audit_passes_when_all_registered():
+    registry._audit_modules(
+        imported=["kernbench.benches.a", "kernbench.benches.b"],
+        registered={"kernbench.benches.a", "kernbench.benches.b"},
+    )
+
+
+def test_duplicate_name_at_finalize_fails(monkeypatch):
+    """_finalize() rejects two pending entries with the same name."""
+    monkeypatch.setattr(registry, "_PENDING", [
+        ("dup", "d1", lambda: None),
+        ("dup", "d2", lambda: None),
+    ])
+    monkeypatch.setattr(registry, "_REGISTRY", {})
+    with pytest.raises(RuntimeError, match="duplicate bench name"):
+        registry._finalize()
@@ -6,17 +6,17 @@ def test_cli_main_arg_parsing(monkeypatch):
    def fake_cmd_run(args) -> int:
        assert args.cmd == "run"
        assert args.topology == "topology.yaml"
-        assert args.bench == "qkv_gemm"
+        assert args.bench == "qkv-gemm"
        assert args.device == None
        return 0

    # monkey patch the handler to test arg parsing without running the actual bench
    monkeypatch.setattr(cli_main, "cmd_run", fake_cmd_run)
-    rc = cli_main.main(["run", "--topology", "topology.yaml", "--bench", "qkv_gemm"])
+    rc = cli_main.main(["run", "--topology", "topology.yaml", "--bench", "qkv-gemm"])
    assert rc == 0


 def test_cli_main():
    """CLI bench run on single SIP device."""
-    rc = cli_main.main(["run", "--topology", "topology.yaml", "--bench", "qkv_gemm", "--device", "sip:0"])
+    rc = cli_main.main(["run", "--topology", "topology.yaml", "--bench", "qkv-gemm", "--device", "sip:0"])
    assert rc == 0
@@ -0,0 +1,44 @@
+"""Tests for `kernbench list` subcommand and `--bench <index>` resolution."""
+from __future__ import annotations
+
+import kernbench.cli.main as cli_main
+from kernbench.benches import registry
+
+
+def test_cli_list_outputs_all_benches(capsys):
+    rc = cli_main.main(["list"])
+    assert rc == 0
+    out = capsys.readouterr().out
+    for spec in registry.list_all():
+        assert spec.name in out
+    assert "DESCRIPTION" in out
+
+
+def test_cli_run_by_index(monkeypatch):
+    """CLI accepts numeric index for --bench; same callable as the name."""
+    qkv_spec = registry.resolve("qkv-gemm")
+
+    captured = {}
+
+    def fake_run_bench(*, topology, bench_fn, device, engine_factory):
+        captured["bench_fn"] = bench_fn
+
+        class _R:
+            traces = []
+            engine = None
+
+            class completion:
+                ok = True
+
+            def summary_text(self):
+                return ""
+        return _R()
+
+    monkeypatch.setattr(cli_main, "run_bench", fake_run_bench)
+    rc = cli_main.main([
+        "run", "--topology", "topology.yaml",
+        "--bench", str(qkv_spec.index),
+        "--device", "sip:0",
+    ])
+    assert rc == 0
+    assert captured["bench_fn"] is qkv_spec.run
@@ -11,7 +11,7 @@ def test_cli_verify_data_flag_parsed(monkeypatch):

    monkeypatch.setattr(cli_main, "cmd_run", fake_cmd_run)
    rc = cli_main.main([
-        "run", "--topology", "topology.yaml", "--bench", "qkv_gemm",
+        "run", "--topology", "topology.yaml", "--bench", "qkv-gemm",
        "--verify-data",
    ])
    assert rc == 0
@@ -26,7 +26,7 @@ def test_cli_verify_data_flag_default(monkeypatch):

    monkeypatch.setattr(cli_main, "cmd_run", fake_cmd_run)
    rc = cli_main.main([
-        "run", "--topology", "topology.yaml", "--bench", "qkv_gemm",
+        "run", "--topology", "topology.yaml", "--bench", "qkv-gemm",
    ])
    assert rc == 0

@@ -34,7 +34,7 @@ def test_cli_verify_data_flag_default(monkeypatch):
 def test_cmd_run_verify_data_enables_engine():
    """--verify-data runs full pipeline with enable_data=True and DataExecutor."""
    rc = cli_main.main([
-        "run", "--topology", "topology.yaml", "--bench", "qkv_gemm",
+        "run", "--topology", "topology.yaml", "--bench", "qkv-gemm",
        "--device", "sip:0", "--verify-data",
    ])
    assert rc == 0
@@ -43,7 +43,7 @@ def test_cmd_run_verify_data_enables_engine():
 def test_cmd_run_without_verify_data_no_op_log():
    """Without --verify-data, engine runs in timing-only mode (no op_log)."""
    rc = cli_main.main([
-        "run", "--topology", "topology.yaml", "--bench", "qkv_gemm",
+        "run", "--topology", "topology.yaml", "--bench", "qkv-gemm",
        "--device", "sip:0",
    ])
    assert rc == 0
@@ -235,7 +235,7 @@ def test_qkv_gemm_still_passes():
        correlation_id="test_regression",
        spec=graph.spec,
    )
-    from benches.qkv_gemm import run as bench_run
+    from kernbench.benches.qkv_gemm import run as bench_run
    bench_run(ctx)
    ctx.wait_all()
    # If we get here without exception, the benchmark succeeded
@@ -864,7 +864,7 @@ def test_mcpu_kernel_launch_composite():
 def test_qkv_gemm_bench_completes():
    """The qkv_gemm benchmark runs to completion without error."""
    clear_registry()
-    from benches.qkv_gemm import run as bench_run
+    from kernbench.benches.qkv_gemm import run as bench_run
    from kernbench.runtime_api.context import RuntimeContext

    graph = load_topology(TOPOLOGY_PATH)
@@ -958,7 +958,7 @@ def test_mcpu_multi_pe_kernel_launch():
 def test_qkv_gemm_bench_multi_pe_completes():
    """The qkv_gemm_multi_pe benchmark runs to completion without error."""
    clear_registry()
-    from benches.qkv_gemm_multi_pe import run as bench_run
+    from kernbench.benches.qkv_gemm_multi_pe import run as bench_run
    from kernbench.runtime_api.context import RuntimeContext

    graph = load_topology(TOPOLOGY_PATH)
@@ -263,7 +263,7 @@ def test_pe_cross_cube_best_worst():

 def test_probe_timestamp_trace():
    """_hop_timestamps must return monotonically increasing cumulative timestamps."""
-    from kernbench.cli.probe import _hop_timestamps, _build_edge_map
+    from kernbench.probes.probe import _hop_timestamps, _build_edge_map
    graph = _graph()
    edge_map = _build_edge_map(graph)
    resolver = AddressResolver(graph)
@@ -341,7 +341,7 @@ def test_hbm_efficiency_applied():

 def test_probe_sweep_saturation():
    """Utilization at 1MB must exceed utilization at 4KB for pe-local-hbm."""
-    from kernbench.cli.probe import _sweep_util
+    from kernbench.probes.probe import _sweep_util
    # pe-local-hbm: ovhd=2ns (router), wire~0.03ns, bn from topology
    bn = _hbm_effective_bw()
    u = _sweep_util(2.0, 0.03, bn)
@@ -143,7 +143,7 @@ def test_2d_bench_completes():
        engine=engine, target_device=DeviceSelector("sip:0"),
        correlation_id="vo3", spec=graph.spec,
    )
-    from benches.va_offset_verify import run as bench_run
+    from kernbench.benches.va_offset_verify import run as bench_run
    bench_run(ctx)
    ctx.wait_all()