benches: package as kernbench.benches, add @bench registry + list subcommand

Move benches/ -> src/kernbench/benches/ and src/kernbench/cli/probe.py ->
src/kernbench/probes/probe.py. Each bench self-registers via
@bench(name=..., description=...); kernbench list enumerates benches
with auto-assigned indices, --bench accepts kebab-case name or numeric
index. Audit at package-import time fails if any non-underscore module
forgets the decorator. ADR-0010 (EN + KO) updated to reflect the new
resolver path, list subcommand, and probes package separation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-20 14:42:10 -07:00
parent 168b0c89f0
commit 049e3d8bb3
28 changed files with 398 additions and 79 deletions
+95
View File
@@ -0,0 +1,95 @@
"""Tests for kernbench.benches.registry — @bench decorator + resolve/list."""
from __future__ import annotations
import pytest
from kernbench.benches import registry
EXPECTED_NAMES = [
"ccl-allreduce",
"gemm-single-pe",
"gpt3-qkv",
"ipcq-allreduce",
"matmul-composite",
"qkv-gemm",
"qkv-gemm-multi-pe",
"va-offset-verify",
]
def test_registry_lists_all_benches():
specs = registry.list_all()
names = [s.name for s in specs]
assert names == EXPECTED_NAMES
def test_registry_indices_are_1_based_sorted_by_name():
specs = registry.list_all()
assert [s.index for s in specs] == list(range(1, len(EXPECTED_NAMES) + 1))
assert sorted(s.name for s in specs) == [s.name for s in specs]
def test_resolve_by_name_returns_spec():
spec = registry.resolve("gemm-single-pe")
assert spec.name == "gemm-single-pe"
assert callable(spec.run)
assert spec.description.strip()
def test_resolve_by_index_string_matches_list_order():
specs = registry.list_all()
third = specs[2]
resolved = registry.resolve(str(third.index))
assert resolved is third
def test_resolve_unknown_name_raises():
with pytest.raises(ValueError, match="kernbench list"):
registry.resolve("does-not-exist")
def test_resolve_unknown_index_raises():
with pytest.raises(ValueError, match="kernbench list"):
registry.resolve("99")
def test_resolve_empty_identifier_raises():
with pytest.raises(ValueError):
registry.resolve("")
def test_bench_decorator_rejects_invalid_name():
with pytest.raises(ValueError, match="kebab-case"):
registry.bench(name="Invalid_Name", description="x")
def test_bench_decorator_rejects_empty_description():
with pytest.raises(ValueError, match="non-empty"):
registry.bench(name="ok-name", description=" ")
def test_audit_raises_on_missing_decorator():
with pytest.raises(RuntimeError, match="missing @bench decorator"):
registry._audit_modules(
imported=["kernbench.benches.fake_no_dec", "kernbench.benches.real"],
registered={"kernbench.benches.real"},
)
def test_audit_passes_when_all_registered():
registry._audit_modules(
imported=["kernbench.benches.a", "kernbench.benches.b"],
registered={"kernbench.benches.a", "kernbench.benches.b"},
)
def test_duplicate_name_at_finalize_fails(monkeypatch):
"""_finalize() rejects two pending entries with the same name."""
monkeypatch.setattr(registry, "_PENDING", [
("dup", "d1", lambda: None),
("dup", "d2", lambda: None),
])
monkeypatch.setattr(registry, "_REGISTRY", {})
with pytest.raises(RuntimeError, match="duplicate bench name"):
registry._finalize()
+3 -3
View File
@@ -6,17 +6,17 @@ def test_cli_main_arg_parsing(monkeypatch):
def fake_cmd_run(args) -> int:
assert args.cmd == "run"
assert args.topology == "topology.yaml"
assert args.bench == "qkv_gemm"
assert args.bench == "qkv-gemm"
assert args.device == None
return 0
# monkey patch the handler to test arg parsing without running the actual bench
monkeypatch.setattr(cli_main, "cmd_run", fake_cmd_run)
rc = cli_main.main(["run", "--topology", "topology.yaml", "--bench", "qkv_gemm"])
rc = cli_main.main(["run", "--topology", "topology.yaml", "--bench", "qkv-gemm"])
assert rc == 0
def test_cli_main():
"""CLI bench run on single SIP device."""
rc = cli_main.main(["run", "--topology", "topology.yaml", "--bench", "qkv_gemm", "--device", "sip:0"])
rc = cli_main.main(["run", "--topology", "topology.yaml", "--bench", "qkv-gemm", "--device", "sip:0"])
assert rc == 0
+44
View File
@@ -0,0 +1,44 @@
"""Tests for `kernbench list` subcommand and `--bench <index>` resolution."""
from __future__ import annotations
import kernbench.cli.main as cli_main
from kernbench.benches import registry
def test_cli_list_outputs_all_benches(capsys):
rc = cli_main.main(["list"])
assert rc == 0
out = capsys.readouterr().out
for spec in registry.list_all():
assert spec.name in out
assert "DESCRIPTION" in out
def test_cli_run_by_index(monkeypatch):
"""CLI accepts numeric index for --bench; same callable as the name."""
qkv_spec = registry.resolve("qkv-gemm")
captured = {}
def fake_run_bench(*, topology, bench_fn, device, engine_factory):
captured["bench_fn"] = bench_fn
class _R:
traces = []
engine = None
class completion:
ok = True
def summary_text(self):
return ""
return _R()
monkeypatch.setattr(cli_main, "run_bench", fake_run_bench)
rc = cli_main.main([
"run", "--topology", "topology.yaml",
"--bench", str(qkv_spec.index),
"--device", "sip:0",
])
assert rc == 0
assert captured["bench_fn"] is qkv_spec.run
+4 -4
View File
@@ -11,7 +11,7 @@ def test_cli_verify_data_flag_parsed(monkeypatch):
monkeypatch.setattr(cli_main, "cmd_run", fake_cmd_run)
rc = cli_main.main([
"run", "--topology", "topology.yaml", "--bench", "qkv_gemm",
"run", "--topology", "topology.yaml", "--bench", "qkv-gemm",
"--verify-data",
])
assert rc == 0
@@ -26,7 +26,7 @@ def test_cli_verify_data_flag_default(monkeypatch):
monkeypatch.setattr(cli_main, "cmd_run", fake_cmd_run)
rc = cli_main.main([
"run", "--topology", "topology.yaml", "--bench", "qkv_gemm",
"run", "--topology", "topology.yaml", "--bench", "qkv-gemm",
])
assert rc == 0
@@ -34,7 +34,7 @@ def test_cli_verify_data_flag_default(monkeypatch):
def test_cmd_run_verify_data_enables_engine():
"""--verify-data runs full pipeline with enable_data=True and DataExecutor."""
rc = cli_main.main([
"run", "--topology", "topology.yaml", "--bench", "qkv_gemm",
"run", "--topology", "topology.yaml", "--bench", "qkv-gemm",
"--device", "sip:0", "--verify-data",
])
assert rc == 0
@@ -43,7 +43,7 @@ def test_cmd_run_verify_data_enables_engine():
def test_cmd_run_without_verify_data_no_op_log():
"""Without --verify-data, engine runs in timing-only mode (no op_log)."""
rc = cli_main.main([
"run", "--topology", "topology.yaml", "--bench", "qkv_gemm",
"run", "--topology", "topology.yaml", "--bench", "qkv-gemm",
"--device", "sip:0",
])
assert rc == 0
+1 -1
View File
@@ -235,7 +235,7 @@ def test_qkv_gemm_still_passes():
correlation_id="test_regression",
spec=graph.spec,
)
from benches.qkv_gemm import run as bench_run
from kernbench.benches.qkv_gemm import run as bench_run
bench_run(ctx)
ctx.wait_all()
# If we get here without exception, the benchmark succeeded
+2 -2
View File
@@ -864,7 +864,7 @@ def test_mcpu_kernel_launch_composite():
def test_qkv_gemm_bench_completes():
"""The qkv_gemm benchmark runs to completion without error."""
clear_registry()
from benches.qkv_gemm import run as bench_run
from kernbench.benches.qkv_gemm import run as bench_run
from kernbench.runtime_api.context import RuntimeContext
graph = load_topology(TOPOLOGY_PATH)
@@ -958,7 +958,7 @@ def test_mcpu_multi_pe_kernel_launch():
def test_qkv_gemm_bench_multi_pe_completes():
"""The qkv_gemm_multi_pe benchmark runs to completion without error."""
clear_registry()
from benches.qkv_gemm_multi_pe import run as bench_run
from kernbench.benches.qkv_gemm_multi_pe import run as bench_run
from kernbench.runtime_api.context import RuntimeContext
graph = load_topology(TOPOLOGY_PATH)
+2 -2
View File
@@ -263,7 +263,7 @@ def test_pe_cross_cube_best_worst():
def test_probe_timestamp_trace():
"""_hop_timestamps must return monotonically increasing cumulative timestamps."""
from kernbench.cli.probe import _hop_timestamps, _build_edge_map
from kernbench.probes.probe import _hop_timestamps, _build_edge_map
graph = _graph()
edge_map = _build_edge_map(graph)
resolver = AddressResolver(graph)
@@ -341,7 +341,7 @@ def test_hbm_efficiency_applied():
def test_probe_sweep_saturation():
"""Utilization at 1MB must exceed utilization at 4KB for pe-local-hbm."""
from kernbench.cli.probe import _sweep_util
from kernbench.probes.probe import _sweep_util
# pe-local-hbm: ovhd=2ns (router), wire~0.03ns, bn from topology
bn = _hbm_effective_bw()
u = _sweep_util(2.0, 0.03, bn)
+1 -1
View File
@@ -143,7 +143,7 @@ def test_2d_bench_completes():
engine=engine, target_device=DeviceSelector("sip:0"),
correlation_id="vo3", spec=graph.spec,
)
from benches.va_offset_verify import run as bench_run
from kernbench.benches.va_offset_verify import run as bench_run
bench_run(ctx)
ctx.wait_all()