Add Phase 1→Phase 2 e2e data tests + GraphEngine enable_data mode

GraphEngine(enable_data=True):
- Creates MemoryStore + OpLogger
- Injects op_logger into all components
- Exposes engine.op_log and engine.memory_store properties

E2E tests (test_e2e_data.py):
- Engine data mode creates store + logger
- Default engine has no store
- PeDmaMsg completes successfully with data mode
- DataExecutor GEMM accuracy: random f16 matmul with f32 accumulation
- DataExecutor chain: GEMM → exp correctness
- DataExecutor verify API: pass/fail per tensor
- MemoryStore snapshot isolation between Phase 1 and Phase 2

382 tests passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-08 23:49:28 -07:00
parent f5d1606f9d
commit 95d583ef9f
2 changed files with 211 additions and 0 deletions
+27
View File
@@ -31,6 +31,7 @@ class GraphEngine:
graph: TopologyGraph,
*,
component_overrides: dict[str, type[ComponentBase]] | None = None,
enable_data: bool = False,
) -> None:
self._env = simpy.Environment()
self._resolver = AddressResolver(graph)
@@ -44,6 +45,15 @@ class GraphEngine:
self._events: dict[str, simpy.Event] = {}
self._counter = 0
overrides = component_overrides or {}
# ADR-0020: optional data execution support
self._op_logger = None
self._memory_store = None
if enable_data:
from kernbench.sim_engine.memory_store import MemoryStore
from kernbench.sim_engine.op_log import OpLogger
self._op_logger = OpLogger()
self._memory_store = MemoryStore()
ctx = ComponentContext(
router=self._router,
resolver=self._resolver,
@@ -51,6 +61,8 @@ class GraphEngine:
ns_per_mm=self._ns_per_mm,
edge_map=self._edge_map,
spec=graph.spec,
memory_store=self._memory_store,
op_logger=self._op_logger,
)
self._components: dict[str, ComponentBase] = {
node_id: ComponentRegistry.create(node, overrides, ctx)
@@ -108,10 +120,25 @@ class GraphEngine:
if mmu_comp is not None and hasattr(mmu_comp, "mmu"):
self._components[node_id]._mmu = mmu_comp.mmu
# Inject op_logger into all components (ADR-0020 D2)
if self._op_logger:
for comp in self._components.values():
comp._op_logger = self._op_logger
# Start components after all ports are wired (ADR-0015 D3)
for comp in self._components.values():
comp.start(self._env)
@property
def op_log(self):
"""Op log records from Phase 1 (ADR-0020)."""
return self._op_logger.records if self._op_logger else []
@property
def memory_store(self):
"""MemoryStore from Phase 1 (ADR-0020)."""
return self._memory_store
def submit(self, request: Any) -> RequestHandle:
self._counter += 1
handle = RequestHandle(f"h{self._counter}")