commit - release 1

2026-03-18 11:47:48 -07:00
commit 6f43807900
109 changed files with 14909 additions and 0 deletions
@@ -0,0 +1,187 @@
+"""Tests for the SimPy component model and DI registry (ADR-0007 D3).
+
+Phase 1 verification: all tests FAIL until Phase 2 implements production code.
+
+Latency invariant after refactor:
+  total_ns = Σ(wire propagation) + Σ(component.run() overhead_ns) + nbytes / bottleneck_bw
+  This is identical to the current formula for Phase 0 (no contention).
+"""
+
+import pytest
+import simpy
+
+from pathlib import Path
+
+from kernbench.components.base import ComponentBase, ComponentRegistry
+from kernbench.components.impls.forwarding import TransitComponent
+from kernbench.policy.address.phyaddr import PhysAddr
+from kernbench.runtime_api.kernel import MemoryReadMsg
+from kernbench.sim_engine.engine import GraphEngine
+from kernbench.topology.builder import load_topology
+from kernbench.topology.types import Node
+
+TOPOLOGY_PATH = Path(__file__).parent.parent / "topology.yaml"
+
+
+def _graph():
+    return load_topology(TOPOLOGY_PATH)
+
+
+def _hbm_pa(pe_id: int = 0) -> int:
+    slice_bytes = 48 * (1 << 30) // 8
+    pa = PhysAddr.pe_hbm_addr(
+        rack_id=0, sip_id=0, cube_id=0, pe_id=pe_id,
+        pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
+    )
+    return pa.encode()
+
+
+def _node(impl: str, overhead_ns: float = 0.0) -> Node:
+    return Node(id="test", kind="xbar", impl=impl, attrs={"overhead_ns": overhead_ns}, pos_mm=None)
+
+
+# ── 1. unknown impl → error ──────────────────────────────────────────
+
+
+def test_registry_unknown_impl_raises_error():
+    """Unregistered impl raises ValueError (no fallback)."""
+    node = _node("totally_unknown_v99", overhead_ns=5.0)
+    with pytest.raises(ValueError, match="No component registered"):
+        ComponentRegistry.create(node)
+
+
+# ── 2. TransitComponent yields exactly overhead_ns via simpy timeout ──
+
+
+def test_transit_component_yields_overhead_ns():
+    """TransitComponent.run() yields exactly node.attrs['overhead_ns'] ns."""
+    node = _node("xbar_v1", overhead_ns=3.0)
+    comp = TransitComponent(node)
+    env = simpy.Environment()
+
+    def proc():
+        yield from comp.run(env, nbytes=4096)
+
+    env.process(proc())
+    env.run()
+    assert env.now == pytest.approx(3.0)
+
+
+def test_transit_component_zero_overhead_ns():
+    """TransitComponent with overhead_ns=0 still yields (no infinite loop)."""
+    node = _node("noc_v1", overhead_ns=0.0)
+    comp = TransitComponent(node)
+    env = simpy.Environment()
+
+    done = []
+
+    def proc():
+        yield from comp.run(env, nbytes=1024)
+        done.append(True)
+
+    env.process(proc())
+    env.run()
+    assert done == [True]
+    assert env.now == pytest.approx(0.0)
+
+
+# ── 3. DI override: custom component is invoked by engine ────────────
+
+
+def test_engine_component_override_is_called():
+    """Custom component injected via component_overrides is invoked during simulation."""
+
+    class SpyXbar(ComponentBase):
+        calls = 0
+
+        def run(self, env, nbytes):
+            SpyXbar.calls += 1
+            yield env.timeout(0)
+
+    SpyXbar.calls = 0
+    graph = _graph()
+    engine = GraphEngine(graph, component_overrides={"xbar_v1": SpyXbar})
+    msg = MemoryReadMsg(
+        correlation_id="c", request_id="r",
+        src_sip=0, src_cube=0, src_pe=0,
+        src_pa=_hbm_pa(pe_id=0), nbytes=4096,
+    )
+    h = engine.submit(msg)
+    engine.wait(h)
+    # PE0→slice0 path passes through xbar.pe0 (impl=xbar_v1)
+    assert SpyXbar.calls > 0
+
+
+# ── 4. behavior unchanged: total_ns matches existing formula ─────────
+
+
+def test_engine_component_model_same_latency_as_before():
+    """Phase B component model total_ns for PE0→slice0 local HBM (4096B).
+
+    Cut-through (wormhole) wire model: wires apply propagation only.
+    Serialization (drain) is computed per-path and applied once at the terminal.
+
+    Forward path:
+      Path 1: pcie_ep(5.0) + wire(1.0mm=0.01) + io_cpu(10.0)
+      Path 2: wire(3.5mm=0.035) + ucie-N(1.0)
+              + 2DMeshNOC(ucie-N→m_cpu: Manhattan 10.9mm=0.109) + m_cpu(5.0)
+      Path 3 DMA (m_cpu→noc→xbar.pe0→hbm_ctrl.slice0):
+              + 2DMeshNOC(m_cpu→xbar.pe0: Manhattan 15.0mm=0.15)
+              + xbar.pe0(2.0) + wire(2.5mm=0.025) + hbm_ctrl(0.0)
+              + drain_ns(4096/128 = 32.0, bottleneck = noc_to_xbar 128 GB/s)
+
+    Response path (reverse, nbytes=0, drain=0):
+      DMA response: hbm_ctrl→xbar.pe0→noc→m_cpu (propagation + xbar overhead_ns)
+      Command response: m_cpu→noc→ucie-N→io_cpu (propagation + ucie overhead_ns)
+
+    Total:  ~58.648 ns
+    """
+    graph = _graph()
+    engine = GraphEngine(graph)
+    msg = MemoryReadMsg(
+        correlation_id="c", request_id="r",
+        src_sip=0, src_cube=0, src_pe=0,
+        src_pa=_hbm_pa(pe_id=0), nbytes=4096,
+    )
+    h = engine.submit(msg)
+    engine.wait(h)
+    _, trace = engine.get_completion(h)
+    assert trace["total_ns"] == pytest.approx(58.648, rel=1e-4)
+
+
+# ── 5. override is scoped: only targeted impl is replaced ────────────
+
+
+def test_engine_override_is_scoped_to_impl():
+    """xbar_v1 override (ZeroXbar, no overhead_ns) reduces total_ns by exactly 4.0 ns.
+
+    xbar.pe0 has overhead_ns=2.0. It is traversed on both the forward DMA path
+    and the reverse response path, so replacing it with a zero-latency impl
+    removes 2.0 ns × 2 = 4.0 ns; all other components are unchanged.
+    """
+
+    class ZeroXbar(ComponentBase):
+        def run(self, env, nbytes):
+            yield env.timeout(0)
+
+    graph = _graph()
+    engine_default = GraphEngine(graph)
+    engine_override = GraphEngine(graph, component_overrides={"xbar_v1": ZeroXbar})
+
+    msg = MemoryReadMsg(
+        correlation_id="c", request_id="r",
+        src_sip=0, src_cube=0, src_pe=0,
+        src_pa=_hbm_pa(pe_id=0), nbytes=4096,
+    )
+
+    h_d = engine_default.submit(msg)
+    engine_default.wait(h_d)
+    _, t_default = engine_default.get_completion(h_d)
+
+    h_o = engine_override.submit(msg)
+    engine_override.wait(h_o)
+    _, t_override = engine_override.get_completion(h_o)
+
+    # ZeroXbar removes overhead_ns=2.0 from xbar.pe0 on forward + response = 4.0 ns faster
+    assert t_override["total_ns"] < t_default["total_ns"]
+    assert t_default["total_ns"] - t_override["total_ns"] == pytest.approx(4.0, rel=1e-6)