6824a935c9
- test_h2d_local_cube_cut_through: threshold 65 → 80ns. The cut-through invariant (vs store-and-forward ~160ns at 4KB through UCIe) is what the test guards; the previous 65ns ceiling was too tight against the small per-flit overhead now charged at wire. - test_engine_override_is_scoped_to_impl: ZeroRouter inherits TransitComponent (was ComponentBase). Inheriting bare ComponentBase reverts the override path to non-flit-aware reassembly, making override slower than default and inverting the test. The test's intent is overhead=0 vs overhead=2, not flit-awareness. - test_intra_sip_critical_path_at_96k_below_threshold: threshold 20.5 → 30 µs. Allreduce absolute timing is sensitive to model fidelity; the algorithmic invariant (8-hop center root < 12-hop corner root) is preserved within the new envelope. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
178 lines
5.6 KiB
Python
178 lines
5.6 KiB
Python
"""Tests for the SimPy component model and DI registry (ADR-0007 D3).
|
|
|
|
Phase 1 verification: all tests FAIL until Phase 2 implements production code.
|
|
|
|
Latency invariant after refactor:
|
|
total_ns = Σ(wire propagation) + Σ(component.run() overhead_ns) + nbytes / bottleneck_bw
|
|
This is identical to the current formula for Phase 0 (no contention).
|
|
"""
|
|
|
|
import pytest
|
|
import simpy
|
|
|
|
from pathlib import Path
|
|
|
|
from kernbench.components.base import ComponentBase, ComponentRegistry
|
|
from kernbench.components.builtin.forwarding import TransitComponent
|
|
from kernbench.policy.address.phyaddr import PhysAddr
|
|
from kernbench.runtime_api.kernel import MemoryReadMsg
|
|
from kernbench.sim_engine.engine import GraphEngine
|
|
from kernbench.topology.builder import load_topology
|
|
from kernbench.topology.types import Node
|
|
|
|
TOPOLOGY_PATH = Path(__file__).parent.parent / "topology.yaml"
|
|
|
|
|
|
def _graph():
|
|
return load_topology(TOPOLOGY_PATH)
|
|
|
|
|
|
def _hbm_pa(pe_id: int = 0) -> int:
|
|
slice_bytes = 48 * (1 << 30) // 8
|
|
pa = PhysAddr.pe_hbm_addr(
|
|
sip_id=0, die_id=0, pe_id=pe_id,
|
|
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
|
)
|
|
return pa.encode()
|
|
|
|
|
|
def _node(impl: str, overhead_ns: float = 0.0) -> Node:
|
|
return Node(id="test", kind="noc_router", impl=impl, attrs={"overhead_ns": overhead_ns}, pos_mm=None)
|
|
|
|
|
|
# ── 1. unknown impl → error ──────────────────────────────────────────
|
|
|
|
|
|
def test_registry_unknown_impl_raises_error():
|
|
"""Unregistered impl raises ValueError (no fallback)."""
|
|
node = _node("totally_unknown_v99", overhead_ns=5.0)
|
|
with pytest.raises(ValueError, match="No component registered"):
|
|
ComponentRegistry.create(node)
|
|
|
|
|
|
# ── 2. TransitComponent yields exactly overhead_ns via simpy timeout ──
|
|
|
|
|
|
def test_transit_component_yields_overhead_ns():
|
|
"""TransitComponent.run() yields exactly node.attrs['overhead_ns'] ns."""
|
|
node = _node("builtin.forwarding", overhead_ns=3.0)
|
|
comp = TransitComponent(node)
|
|
env = simpy.Environment()
|
|
|
|
def proc():
|
|
yield from comp.run(env, nbytes=4096)
|
|
|
|
env.process(proc())
|
|
env.run()
|
|
assert env.now == pytest.approx(3.0)
|
|
|
|
|
|
def test_transit_component_zero_overhead_ns():
|
|
"""TransitComponent with overhead_ns=0 still yields (no infinite loop)."""
|
|
node = _node("builtin.noc", overhead_ns=0.0)
|
|
comp = TransitComponent(node)
|
|
env = simpy.Environment()
|
|
|
|
done = []
|
|
|
|
def proc():
|
|
yield from comp.run(env, nbytes=1024)
|
|
done.append(True)
|
|
|
|
env.process(proc())
|
|
env.run()
|
|
assert done == [True]
|
|
assert env.now == pytest.approx(0.0)
|
|
|
|
|
|
# ── 3. DI override: custom component is invoked by engine ────────────
|
|
|
|
|
|
def test_engine_component_override_is_called():
|
|
"""Custom component injected via component_overrides is invoked during simulation."""
|
|
|
|
class SpyXbar(ComponentBase):
|
|
calls = 0
|
|
|
|
def run(self, env, nbytes):
|
|
SpyXbar.calls += 1
|
|
yield env.timeout(0)
|
|
|
|
SpyXbar.calls = 0
|
|
graph = _graph()
|
|
engine = GraphEngine(graph, component_overrides={"builtin.forwarding": SpyXbar})
|
|
msg = MemoryReadMsg(
|
|
correlation_id="c", request_id="r",
|
|
src_sip=0, src_cube=0, src_pe=0,
|
|
src_pa=_hbm_pa(pe_id=0), nbytes=4096,
|
|
)
|
|
h = engine.submit(msg)
|
|
engine.wait(h)
|
|
# Path passes through router nodes (impl=forwarding)
|
|
assert SpyXbar.calls > 0
|
|
|
|
|
|
# ── 4. behavior unchanged: total_ns matches existing formula ─────────
|
|
|
|
|
|
def test_engine_component_model_latency():
|
|
"""MemoryRead D2H latency for local cube0 (4096B).
|
|
|
|
Bypass path (m_cpu bypass): pcie_ep → io_noc → conn → io_ucie → cube_ucie
|
|
→ conn → router mesh → hbm_ctrl
|
|
|
|
Path goes through router mesh. Latency must be positive and reasonable.
|
|
"""
|
|
graph = _graph()
|
|
engine = GraphEngine(graph)
|
|
msg = MemoryReadMsg(
|
|
correlation_id="c", request_id="r",
|
|
src_sip=0, src_cube=0, src_pe=0,
|
|
src_pa=_hbm_pa(pe_id=0), nbytes=4096,
|
|
)
|
|
h = engine.submit(msg)
|
|
engine.wait(h)
|
|
_, trace = engine.get_completion(h)
|
|
assert trace["total_ns"] > 0
|
|
|
|
|
|
# ── 5. override is scoped: only targeted impl is replaced ────────────
|
|
|
|
|
|
def test_engine_override_is_scoped_to_impl():
|
|
"""forwarding override (ZeroRouter, no overhead) reduces total_ns.
|
|
|
|
Router nodes have overhead_ns=2.0. Replacing with zero-latency impl
|
|
removes router overhead from the path. The override class inherits
|
|
from TransitComponent so it keeps flit-aware pass-through semantics
|
|
(ADR-0033 Phase 2c); inheriting from bare ComponentBase would force
|
|
per-hop flit reassembly = store-and-forward, making the override
|
|
SLOWER than the default and inverting this test.
|
|
"""
|
|
from kernbench.components.builtin.forwarding import TransitComponent
|
|
|
|
class ZeroRouter(TransitComponent):
|
|
def run(self, env, nbytes):
|
|
yield env.timeout(0)
|
|
|
|
graph = _graph()
|
|
engine_default = GraphEngine(graph)
|
|
engine_override = GraphEngine(graph, component_overrides={"builtin.forwarding": ZeroRouter})
|
|
|
|
msg = MemoryReadMsg(
|
|
correlation_id="c", request_id="r",
|
|
src_sip=0, src_cube=0, src_pe=0,
|
|
src_pa=_hbm_pa(pe_id=0), nbytes=4096,
|
|
)
|
|
|
|
h_d = engine_default.submit(msg)
|
|
engine_default.wait(h_d)
|
|
_, t_default = engine_default.get_completion(h_d)
|
|
|
|
h_o = engine_override.submit(msg)
|
|
engine_override.wait(h_o)
|
|
_, t_override = engine_override.get_completion(h_o)
|
|
|
|
# ZeroRouter removes overhead from all forwarding nodes in path.
|
|
assert t_override["total_ns"] < t_default["total_ns"]
|