"""Tests for the SimPy component model and DI registry (ADR-0007 D3). Phase 1 verification: all tests FAIL until Phase 2 implements production code. Latency invariant after refactor: total_ns = Σ(wire propagation) + Σ(component.run() overhead_ns) + nbytes / bottleneck_bw This is identical to the current formula for Phase 0 (no contention). """ import pytest import simpy from pathlib import Path from kernbench.components.base import ComponentBase, ComponentRegistry from kernbench.components.builtin.forwarding import TransitComponent from kernbench.policy.address.phyaddr import PhysAddr from kernbench.runtime_api.kernel import MemoryReadMsg from kernbench.sim_engine.engine import GraphEngine from kernbench.topology.builder import load_topology from kernbench.topology.types import Node TOPOLOGY_PATH = Path(__file__).parent.parent / "topology.yaml" def _graph(): return load_topology(TOPOLOGY_PATH) def _hbm_pa(pe_id: int = 0) -> int: slice_bytes = 48 * (1 << 30) // 8 pa = PhysAddr.pe_hbm_addr( rack_id=0, sip_id=0, cube_id=0, pe_id=pe_id, pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes, ) return pa.encode() def _node(impl: str, overhead_ns: float = 0.0) -> Node: return Node(id="test", kind="xbar", impl=impl, attrs={"overhead_ns": overhead_ns}, pos_mm=None) # ── 1. unknown impl → error ────────────────────────────────────────── def test_registry_unknown_impl_raises_error(): """Unregistered impl raises ValueError (no fallback).""" node = _node("totally_unknown_v99", overhead_ns=5.0) with pytest.raises(ValueError, match="No component registered"): ComponentRegistry.create(node) # ── 2. TransitComponent yields exactly overhead_ns via simpy timeout ── def test_transit_component_yields_overhead_ns(): """TransitComponent.run() yields exactly node.attrs['overhead_ns'] ns.""" node = _node("xbar_v1", overhead_ns=3.0) comp = TransitComponent(node) env = simpy.Environment() def proc(): yield from comp.run(env, nbytes=4096) env.process(proc()) env.run() assert env.now == pytest.approx(3.0) def test_transit_component_zero_overhead_ns(): """TransitComponent with overhead_ns=0 still yields (no infinite loop).""" node = _node("noc_v1", overhead_ns=0.0) comp = TransitComponent(node) env = simpy.Environment() done = [] def proc(): yield from comp.run(env, nbytes=1024) done.append(True) env.process(proc()) env.run() assert done == [True] assert env.now == pytest.approx(0.0) # ── 3. DI override: custom component is invoked by engine ──────────── def test_engine_component_override_is_called(): """Custom component injected via component_overrides is invoked during simulation.""" class SpyXbar(ComponentBase): calls = 0 def run(self, env, nbytes): SpyXbar.calls += 1 yield env.timeout(0) SpyXbar.calls = 0 graph = _graph() engine = GraphEngine(graph, component_overrides={"xbar_v1": SpyXbar}) msg = MemoryReadMsg( correlation_id="c", request_id="r", src_sip=0, src_cube=0, src_pe=0, src_pa=_hbm_pa(pe_id=0), nbytes=4096, ) h = engine.submit(msg) engine.wait(h) # Path passes through xbar_top (impl=xbar_v1) assert SpyXbar.calls > 0 # ── 4. behavior unchanged: total_ns matches existing formula ───────── def test_engine_component_model_latency(): """MemoryRead D2H latency for local cube0 (4096B). Bypass path (m_cpu bypass): pcie_ep → io_noc → conn → io_ucie → cube_ucie → conn → noc → xbar_top → hbm_ctrl.slice0 Path goes through xbar_top (overhead_ns=2.0) instead of per-PE xbar. Latency must be positive and reasonable. """ graph = _graph() engine = GraphEngine(graph) msg = MemoryReadMsg( correlation_id="c", request_id="r", src_sip=0, src_cube=0, src_pe=0, src_pa=_hbm_pa(pe_id=0), nbytes=4096, ) h = engine.submit(msg) engine.wait(h) _, trace = engine.get_completion(h) # Verify positive latency; exact value depends on path through xbar_top assert trace["total_ns"] > 0 # ── 5. override is scoped: only targeted impl is replaced ──────────── def test_engine_override_is_scoped_to_impl(): """xbar_v1 override (ZeroXbar, no overhead_ns) reduces total_ns. xbar_top has overhead_ns=2.0 base + position-dependent distance. It is traversed on both the forward path and the reverse response path, so replacing it with a zero-latency impl removes all XBAR latency. With position-aware XBAR, the diff is >= 4.0ns (base) + distance contribution. """ class ZeroXbar(ComponentBase): def run(self, env, nbytes): yield env.timeout(0) graph = _graph() engine_default = GraphEngine(graph) engine_override = GraphEngine(graph, component_overrides={"xbar_v1": ZeroXbar}) msg = MemoryReadMsg( correlation_id="c", request_id="r", src_sip=0, src_cube=0, src_pe=0, src_pa=_hbm_pa(pe_id=0), nbytes=4096, ) h_d = engine_default.submit(msg) engine_default.wait(h_d) _, t_default = engine_default.get_completion(h_d) h_o = engine_override.submit(msg) engine_override.wait(h_o) _, t_override = engine_override.get_completion(h_o) # ZeroXbar removes base overhead_ns=2.0 + distance-based latency per traversal. # Forward + response = 2 traversals, so diff >= 4.0ns (base only). diff = t_default["total_ns"] - t_override["total_ns"] assert t_override["total_ns"] < t_default["total_ns"] assert diff >= 4.0 - 0.01, f"Expected diff >= 4.0ns, got {diff:.4f}ns"