Add --verify-data CLI flag, Tensor.data property, parallel DataExecutor

- CLI: --verify-data flag enables Phase 2 data verification (ADR-0020)
- Tensor.data: returns actual numpy values (verify-data) or zeros placeholder
- Tensor.__repr__: shows value summary or data=N/A (placeholder)
- DataExecutor: ThreadPoolExecutor for same-timestamp parallel op execution
- BenchResult.engine: exposes op_log/memory_store for Phase 2 access

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-09 09:34:01 -07:00
parent 59e36f0c34
commit dc3fb02aed
8 changed files with 174 additions and 12 deletions
+13 -5
View File
@@ -6,6 +6,7 @@ Same-timestamp independent ops can be batched for efficiency.
"""
from __future__ import annotations
from concurrent.futures import ThreadPoolExecutor
from itertools import groupby
from typing import Any
@@ -28,11 +29,18 @@ class DataExecutor:
self.store = store
def run(self) -> None:
"""Execute all ops in op_log order, grouped by t_start."""
for _t, ops_iter in groupby(self._op_log, key=lambda r: r.t_start):
ops = list(ops_iter)
for op in ops:
self._execute_op(op)
"""Execute all ops in op_log order, grouped by t_start.
Same-timestamp ops are independent and executed in parallel
via ThreadPoolExecutor (numpy releases the GIL for BLAS ops).
"""
with ThreadPoolExecutor() as pool:
for _t, ops_iter in groupby(self._op_log, key=lambda r: r.t_start):
ops = list(ops_iter)
if len(ops) == 1:
self._execute_op(ops[0])
else:
list(pool.map(self._execute_op, ops))
def _execute_op(self, op: OpRecord) -> None:
if op.op_kind == "memory":