Add --verify-data CLI flag, Tensor.data property, parallel DataExecutor

- CLI: --verify-data flag enables Phase 2 data verification (ADR-0020)
- Tensor.data: returns actual numpy values (verify-data) or zeros placeholder
- Tensor.__repr__: shows value summary or data=N/A (placeholder)
- DataExecutor: ThreadPoolExecutor for same-timestamp parallel op execution
- BenchResult.engine: exposes op_log/memory_store for Phase 2 access

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-09 09:34:01 -07:00
parent 59e36f0c34
commit dc3fb02aed
8 changed files with 174 additions and 12 deletions
+49
View File
@@ -0,0 +1,49 @@
"""Tests for --verify-data CLI flag (Phase 1 verification)."""
import kernbench.cli.main as cli_main
def test_cli_verify_data_flag_parsed(monkeypatch):
"""--verify-data flag is parsed and stored as True."""
def fake_cmd_run(args) -> int:
assert args.verify_data is True
return 0
monkeypatch.setattr(cli_main, "cmd_run", fake_cmd_run)
rc = cli_main.main([
"run", "--topology", "topology.yaml", "--bench", "qkv_gemm",
"--verify-data",
])
assert rc == 0
def test_cli_verify_data_flag_default(monkeypatch):
"""Without --verify-data, flag defaults to False."""
def fake_cmd_run(args) -> int:
assert args.verify_data is False
return 0
monkeypatch.setattr(cli_main, "cmd_run", fake_cmd_run)
rc = cli_main.main([
"run", "--topology", "topology.yaml", "--bench", "qkv_gemm",
])
assert rc == 0
def test_cmd_run_verify_data_enables_engine():
"""--verify-data runs full pipeline with enable_data=True and DataExecutor."""
rc = cli_main.main([
"run", "--topology", "topology.yaml", "--bench", "qkv_gemm",
"--device", "sip:0", "--verify-data",
])
assert rc == 0
def test_cmd_run_without_verify_data_no_op_log():
"""Without --verify-data, engine runs in timing-only mode (no op_log)."""
rc = cli_main.main([
"run", "--topology", "topology.yaml", "--bench", "qkv_gemm",
"--device", "sip:0",
])
assert rc == 0
+38
View File
@@ -186,3 +186,41 @@ def test_sequential_gemm_then_math():
exp_result = store.read("tcm", 0x300)
assert np.allclose(exp_result, np.exp(expected_gemm))
def test_parallel_same_timestamp_ops():
"""Multiple independent ops at the same t_start produce correct results
when executed in parallel (ThreadPoolExecutor)."""
store = MemoryStore()
n_ops = 8
# Each op: independent GEMM writing to a different address
for i in range(n_ops):
a = np.full((4, 4), float(i + 1), dtype=np.float16)
b = np.eye(4, dtype=np.float16)
store.write("tcm", 0x1000 * i, a)
store.write("tcm", 0x1000 * i + 0x800, b)
ops = [
OpRecord(
t_start=0.0, t_end=100.0,
component_id=f"pe{i}.pe_gemm",
op_kind="gemm", op_name="gemm_f16",
params={
"src_a_addr": 0x1000 * i,
"src_b_addr": 0x1000 * i + 0x800,
"dst_addr": 0x80000 + 0x1000 * i,
"shape_a": (4, 4), "shape_b": (4, 4), "shape_out": (4, 4),
"dtype_in": "f16", "dtype_acc": "f32", "dtype_out": "f16",
"addr_space": "tcm",
},
)
for i in range(n_ops)
]
executor = DataExecutor(ops, store)
executor.run()
for i in range(n_ops):
result = store.read("tcm", 0x80000 + 0x1000 * i)
expected = np.full((4, 4), float(i + 1), dtype=np.float16)
assert np.allclose(result, expected), f"op {i}: expected {expected}, got {result}"