Files
kernbench2/tests/test_noc_mesh.py
T
ywkang 81cc32c46b ADR-0001 Rev 2: 51-bit PhysAddr layout with concrete sub-unit tables
Remove rack_id (4 bits), rename sip_seg→die_id, shift fields to enable
42-bit local_offset (4 TB per die). Define PE_LOCAL/MCPU_LOCAL/CUBE_SRAM
sub-unit tables for AHBM dies and IOCPU sub-unit table for IOCHIPLET
dies (1 TB window). Supersedes ADR-0031.

Also fixes latent VA/PA confusion in pe_dma pipeline DMA path where
virtual addresses were decoded as physical addresses without MMU
translation — previously masked by coincidental bit-position alignment.

529 passed (+6 recovered), 10 pre-existing failures unchanged.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 15:52:29 -07:00

553 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for CUBE NOC Explicit Router Mesh (ADR-0019).
Key changes verified:
- Explicit router nodes per cube from cube_mesh.yaml (6×6 grid)
- Auto-layout generates cube_mesh.yaml with PE/UCIe/M_CPU/SRAM attachments
- Mesh file caching with source_hash change detection
- Path routing: PE_DMA → router mesh → HBM_CTRL
Latency invariant:
Local HBM: PE_DMA → Router(overhead) → HBM_CTRL
Cross-row: PE_DMA → Router → mesh hops → Router → HBM_CTRL
Cross-cube: PE_DMA → Router → mesh → UCIe → ... → mesh → HBM_CTRL
"""
import pytest
import yaml
from pathlib import Path
from kernbench.policy.address.phyaddr import PhysAddr
from kernbench.policy.routing.router import AddressResolver, PathRouter
from kernbench.runtime_api.kernel import MemoryReadMsg, PeDmaMsg
from kernbench.sim_engine.engine import GraphEngine
from kernbench.topology.builder import load_topology
TOPOLOGY_PATH = Path(__file__).parent.parent / "topology.yaml"
MESH_PATH = Path(__file__).parent.parent / "cube_mesh.yaml"
def _graph():
return load_topology(TOPOLOGY_PATH)
def _engine():
return GraphEngine(_graph())
def _hbm_pa(sip=0, cube=0, pe_id=0):
slice_bytes = 48 * (1 << 30) // 8
pa = PhysAddr.pe_hbm_addr(
sip_id=sip, die_id=cube, pe_id=pe_id,
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
)
return pa.encode()
# ══════════════════════════════════════════════════════════════════
# 1. Mesh File Generation
# ══════════════════════════════════════════════════════════════════
def test_mesh_file_generated_on_load():
"""load_topology must generate cube_mesh.yaml at project root."""
if MESH_PATH.exists():
MESH_PATH.unlink()
_graph()
assert MESH_PATH.exists(), "cube_mesh.yaml not generated"
def test_mesh_file_has_source_hash():
"""cube_mesh.yaml must contain source_hash for change detection."""
_graph()
content = MESH_PATH.read_text()
assert "source_hash:" in content
def test_mesh_file_grid_dimensions():
"""Current config (n_connections=4, pe_per_corner=2) must produce 6x6 grid."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
assert mesh["mesh"]["rows"] == 6
assert mesh["mesh"]["cols"] == 6
def test_mesh_file_router_count():
"""6x6 grid minus 4 HBM exclusions = 32 routers."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
routers = {k: v for k, v in mesh["routers"].items() if v is not None}
assert len(routers) == 32
def test_mesh_file_hbm_exclusion():
"""Middle rows (2,3), middle cols (2,3) must be excluded (HBM zone)."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
for r in [2, 3]:
for c in [2, 3]:
key = f"r{r}c{c}"
assert mesh["routers"].get(key) is None, (
f"{key} should be HBM excluded"
)
def test_mesh_file_pe_attachments():
"""PE0 (NW corner) must be attached to router r0c0."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
r0c0 = mesh["routers"]["r0c0"]
assert "pe0.dma" in r0c0["attach"]
assert "pe0.cpu" in r0c0["attach"]
def test_mesh_file_pe_corner_positions():
"""PEs must be at correct corner positions in the grid.
NW (PE0,PE1) → row 0, cols 0,1 (left)
NE (PE2,PE3) → row 1, cols 4,5 (right)
SW (PE4,PE5) → row 4, cols 0,1 (left)
SE (PE6,PE7) → row 5, cols 4,5 (right)
"""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
expected = {
"r0c0": "pe0", "r0c1": "pe1", # NW
"r1c4": "pe2", "r1c5": "pe3", # NE
"r4c0": "pe4", "r4c1": "pe5", # SW
"r5c4": "pe6", "r5c5": "pe7", # SE
}
for router_id, pe_name in expected.items():
attach = mesh["routers"][router_id]["attach"]
assert f"{pe_name}.dma" in attach, (
f"{pe_name} should be attached to {router_id}"
)
def test_mesh_file_no_xbar_section():
"""mesh output must not contain xbar section (ADR-0019 D2)."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
assert "xbar" not in mesh, "xbar section should be removed from cube_mesh.yaml"
def test_mesh_file_pe_hbm_attached():
"""PE routers must have pe{idx}.hbm in attach list (ADR-0019 D1)."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
for rid, rdata in mesh["routers"].items():
if rdata is None:
continue
for item in rdata["attach"]:
if item.endswith(".dma"):
pe_prefix = item.rsplit(".", 1)[0]
hbm_item = f"{pe_prefix}.hbm"
assert hbm_item in rdata["attach"], (
f"{rid} has {item} but missing {hbm_item}"
)
def test_mesh_file_ucie_distribution():
"""UCIe-E connections must be distributed 1 per PE row.
E: c0=R(0,5), c1=R(1,5), c2=R(4,5), c3=R(5,5)
"""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
e_routers = ["r0c5", "r1c5", "r4c5", "r5c5"]
for i, rid in enumerate(e_routers):
attach = mesh["routers"][rid]["attach"]
assert f"ucie_e.c{i}" in attach, (
f"UCIe-E conn {i} should be on {rid}"
)
def test_mesh_not_regenerated_if_unchanged():
"""If topology params unchanged, cube_mesh.yaml must not be regenerated."""
_graph() # first load
mtime1 = MESH_PATH.stat().st_mtime
_graph() # second load
mtime2 = MESH_PATH.stat().st_mtime
assert mtime1 == mtime2, "mesh file regenerated despite no topology changes"
def test_mesh_ucie_w_attached_to_pe_rows():
"""UCIe-W connections must be distributed 1 per PE row on leftmost column.
W: c0=r0c0, c1=r1c0, c2=r4c0, c3=r5c0 (mirror of UCIe-E on col 0).
"""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
w_expected = {"r0c0": "ucie_w.c0", "r1c0": "ucie_w.c1",
"r4c0": "ucie_w.c2", "r5c0": "ucie_w.c3"}
for rid, attach_name in w_expected.items():
attach = mesh["routers"][rid]["attach"]
assert attach_name in attach, (
f"UCIe-W {attach_name} should be on {rid}, got attach={attach}"
)
def test_mesh_ucie_n_attached_to_pe_cols():
"""UCIe-N connections must be distributed across PE columns on top row.
N: c0=r0c0, c1=r0c1, c2=r0c4, c3=r0c5 (PE column positions on row 0).
"""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
n_expected = {"r0c0": "ucie_n.c0", "r0c1": "ucie_n.c1",
"r0c4": "ucie_n.c2", "r0c5": "ucie_n.c3"}
for rid, attach_name in n_expected.items():
attach = mesh["routers"][rid]["attach"]
assert attach_name in attach, (
f"UCIe-N {attach_name} should be on {rid}, got attach={attach}"
)
def test_mesh_ucie_s_attached_to_pe_cols():
"""UCIe-S connections must be distributed across PE columns on bottom row.
S: c0=r5c0, c1=r5c1, c2=r5c4, c3=r5c5 (PE column positions on row 5).
"""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
s_expected = {"r5c0": "ucie_s.c0", "r5c1": "ucie_s.c1",
"r5c4": "ucie_s.c2", "r5c5": "ucie_s.c3"}
for rid, attach_name in s_expected.items():
attach = mesh["routers"][rid]["attach"]
assert attach_name in attach, (
f"UCIe-S {attach_name} should be on {rid}, got attach={attach}"
)
def test_mesh_ucie_all_four_directions():
"""All four UCIe directions (N, S, E, W) must have router attachments."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
all_attach = []
for key, router in mesh["routers"].items():
if router is not None:
all_attach.extend(router["attach"])
for direction in ("ucie_n", "ucie_s", "ucie_e", "ucie_w"):
dir_conns = [a for a in all_attach if a.startswith(direction)]
assert len(dir_conns) == 4, (
f"{direction} should have 4 connections, found {len(dir_conns)}: {dir_conns}"
)
# ══════════════════════════════════════════════════════════════════
# 2. Topology Graph: Explicit Router Mesh (ADR-0019)
# ══════════════════════════════════════════════════════════════════
def test_router_nodes_exist():
"""Cube must have explicit router nodes from cube_mesh.yaml."""
graph = _graph()
for rkey in ["r0c0", "r0c1", "r1c4", "r5c5"]:
assert f"sip0.cube0.{rkey}" in graph.nodes, f"Router {rkey} missing"
def test_no_xbar_or_bridge_nodes():
"""xbar/bridge nodes must not exist (ADR-0019 D2)."""
graph = _graph()
bad = [n for n in graph.nodes if "xbar" in n or "bridge" in n]
assert len(bad) == 0, f"Old xbar/bridge nodes found: {bad[:5]}"
def test_no_single_noc_node():
"""Cube-level single noc node must not exist (replaced by explicit routers)."""
graph = _graph()
assert "sip0.cube0.noc" not in graph.nodes
def test_single_hbm_ctrl_node():
"""Each cube must have single hbm_ctrl (no slices)."""
graph = _graph()
assert "sip0.cube0.hbm_ctrl" in graph.nodes
slices = [n for n in graph.nodes if "hbm_ctrl.slice" in n]
assert len(slices) == 0, f"HBM slices should not exist: {slices[:3]}"
def test_router_mesh_edges():
"""Adjacent routers must be connected (router_mesh edges)."""
graph = _graph()
edge_set = {(e.src, e.dst) for e in graph.edges}
# r0c0 ↔ r0c1 (horizontal)
assert ("sip0.cube0.r0c0", "sip0.cube0.r0c1") in edge_set
assert ("sip0.cube0.r0c1", "sip0.cube0.r0c0") in edge_set
def test_pe_dma_connects_to_router():
"""PE_DMA must connect to router (pe_to_router kind)."""
graph = _graph()
pe0_edges = [e for e in graph.edges
if e.src == "sip0.cube0.pe0.pe_dma" and e.kind == "pe_to_router"]
assert len(pe0_edges) == 1, f"PE0 DMA should connect to 1 router, got {len(pe0_edges)}"
assert pe0_edges[0].dst == "sip0.cube0.r0c0"
def test_hbm_connects_to_all_routers():
"""HBM_CTRL must have edges to all non-null routers."""
graph = _graph()
hbm_out = [e for e in graph.edges
if e.src == "sip0.cube0.hbm_ctrl" and e.kind == "hbm_to_router"]
mesh = yaml.safe_load(MESH_PATH.read_text())
n_active = sum(1 for v in mesh["routers"].values() if v is not None)
assert len(hbm_out) == n_active, (
f"HBM should connect to {n_active} routers, got {len(hbm_out)}"
)
# ══════════════════════════════════════════════════════════════════
# 3. Path Routing
# ══════════════════════════════════════════════════════════════════
def test_local_hbm_path_through_router():
"""PE0 local HBM: path must go through PE's router to hbm_ctrl."""
graph = _graph()
router = PathRouter(graph)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
assert "sip0.cube0.r0c0" in path, f"PE0's router r0c0 missing from path: {path}"
assert "sip0.cube0.hbm_ctrl" == path[-1], f"Path should end at hbm_ctrl: {path}"
def test_remote_pe_hbm_has_more_hops():
"""PE0 → PE4's HBM (remote) must have more hops than local."""
graph = _graph()
router = PathRouter(graph)
local_path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
# PE4 is at r4c0, PE0 at r0c0 — must traverse mesh
remote_path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl")
# Both should work, local should be shorter or equal
assert len(local_path) >= 2
assert len(remote_path) >= 2
def test_mcpu_dma_path_through_router_mesh():
"""M_CPU DMA to local HBM: m_cpu → router mesh → hbm_ctrl."""
graph = _graph()
router = PathRouter(graph)
path = router.find_mcpu_dma_path(
"sip0.cube0.m_cpu", "sip0.cube0.hbm_ctrl"
)
assert path[0] == "sip0.cube0.m_cpu"
assert path[-1] == "sip0.cube0.hbm_ctrl"
assert any("r" in n and "c" in n for n in path), f"Router missing from path: {path}"
def test_cross_cube_path_through_ucie():
"""Cross-cube HBM: must traverse router → UCIe → remote router → hbm_ctrl."""
graph = _graph()
router = PathRouter(graph)
path = router.find_path("sip0.cube0.pe0", "sip0.cube4.hbm_ctrl")
assert any("ucie" in n.lower() for n in path), f"UCIe missing: {path}"
assert path[-1] == "sip0.cube4.hbm_ctrl"
def test_h2d_bypass_path_through_router():
"""H2D MemoryWrite bypass: pcie_ep → io_noc → cube_ucie → router → hbm."""
graph = _graph()
resolver = AddressResolver(graph)
router = PathRouter(graph)
pcie_ep = resolver.find_pcie_ep(0)
pa = _hbm_pa(sip=0, cube=0, pe_id=0)
hbm_target = resolver.resolve(PhysAddr.decode(pa))
path = router.find_memory_path(pcie_ep, hbm_target)
assert path[-1] == "sip0.cube0.hbm_ctrl", f"Path should end at hbm_ctrl: {path}"
assert any("r0c" in n or "r1c" in n for n in path), f"Router missing: {path}"
# ══════════════════════════════════════════════════════════════════
# 4. BW Configuration
# ══════════════════════════════════════════════════════════════════
def test_pe_dma_to_router_bw():
"""PE_DMA → router edge BW must be 256 GB/s."""
graph = _graph()
for e in graph.edges:
if e.src == "sip0.cube0.pe0.pe_dma" and e.kind == "pe_to_router":
assert e.bw_gbs == 256.0, (
f"PE_DMA→router BW should be 256 GB/s, got {e.bw_gbs}"
)
return
pytest.fail("PE_DMA → router edge not found")
def test_router_mesh_bw():
"""Router-router mesh edge BW must be 256 GB/s."""
graph = _graph()
for e in graph.edges:
if e.kind == "router_mesh" and "cube0" in e.src:
assert e.bw_gbs == 256.0, (
f"Router mesh BW should be 256 GB/s, got {e.bw_gbs}"
)
return
pytest.fail("Router mesh edge not found")
# ══════════════════════════════════════════════════════════════════
# 5. Latency
# ══════════════════════════════════════════════════════════════════
def test_local_hbm_read_completes():
"""Local HBM read must complete with ok=True and positive latency."""
engine = _engine()
msg = MemoryReadMsg(
correlation_id="mesh", request_id="local",
src_sip=0, src_cube=0, src_pe=0,
src_pa=_hbm_pa(pe_id=0), nbytes=4096,
)
h = engine.submit(msg)
engine.wait(h)
comp, trace = engine.get_completion(h)
assert comp.ok is True
assert trace["total_ns"] > 0
def test_remote_pe_latency_greater_than_local():
"""Remote PE HBM access must be slower than local (more mesh hops)."""
engine_local = _engine()
msg_local = MemoryReadMsg(
correlation_id="mesh", request_id="local",
src_sip=0, src_cube=0, src_pe=0,
src_pa=_hbm_pa(pe_id=0), nbytes=4096,
)
h_l = engine_local.submit(msg_local)
engine_local.wait(h_l)
_, t_local = engine_local.get_completion(h_l)
# PE0 accessing PE5's HBM (remote, more mesh hops)
engine_remote = _engine()
msg_remote = MemoryReadMsg(
correlation_id="mesh", request_id="remote",
src_sip=0, src_cube=0, src_pe=0,
src_pa=_hbm_pa(pe_id=5), nbytes=4096,
)
h_r = engine_remote.submit(msg_remote)
engine_remote.wait(h_r)
_, t_remote = engine_remote.get_completion(h_r)
assert t_remote["total_ns"] >= t_local["total_ns"], (
f"Remote ({t_remote['total_ns']:.2f}ns) must be >= "
f"local ({t_local['total_ns']:.2f}ns)"
)
def test_latency_deterministic():
"""Same request on two engines must produce identical latency."""
msg = MemoryReadMsg(
correlation_id="mesh", request_id="det",
src_sip=0, src_cube=0, src_pe=0,
src_pa=_hbm_pa(pe_id=0), nbytes=4096,
)
e1, e2 = _engine(), _engine()
h1 = e1.submit(msg)
e1.wait(h1)
_, t1 = e1.get_completion(h1)
h2 = e2.submit(msg)
e2.wait(h2)
_, t2 = e2.get_completion(h2)
assert t1["total_ns"] == t2["total_ns"]
# ══════════════════════════════════════════════════════════════════
# 6. NOC Component reads cube_mesh.yaml (Change 1)
# ══════════════════════════════════════════════════════════════════
def test_mesh_data_in_context_spec():
"""ComponentContext.spec must contain '_mesh' key with parsed cube_mesh.yaml data.
The builder must store the mesh dict in spec['_mesh'] so that NOC and XBAR
components can access router layout without reading the file directly.
"""
graph = _graph()
assert "_mesh" in graph.spec, (
"spec['_mesh'] missing: builder must store mesh data in spec"
)
mesh = graph.spec["_mesh"]
assert "routers" in mesh
assert "mesh" in mesh
assert mesh["mesh"]["rows"] == 6
assert mesh["mesh"]["cols"] == 6
def test_router_nodes_match_mesh():
"""Topology router nodes must match active routers in cube_mesh.yaml."""
graph = _graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
active_routers = [k for k, v in mesh["routers"].items() if v is not None]
for rkey in active_routers:
assert f"sip0.cube0.{rkey}" in graph.nodes, f"Router {rkey} missing from graph"
def test_null_routers_excluded():
"""HBM exclusion zone routers (null in mesh) must not be in graph."""
graph = _graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
null_routers = [k for k, v in mesh["routers"].items() if v is None]
for rkey in null_routers:
assert f"sip0.cube0.{rkey}" not in graph.nodes, f"Null router {rkey} in graph"
# ══════════════════════════════════════════════════════════════════
# 7. Router Mesh Latency (ADR-0019)
# ══════════════════════════════════════════════════════════════════
def _pe_dma_latency(pe_id: int, target_pe_id: int, nbytes: int = 4096) -> float:
"""Run PeDmaMsg from pe_id targeting target_pe_id's HBM, return total_ns."""
engine = _engine()
msg = PeDmaMsg(
correlation_id="mesh_lat", request_id=f"pe{pe_id}_t{target_pe_id}",
src_sip=0, src_cube=0, src_pe=pe_id,
dst_pa=_hbm_pa(pe_id=target_pe_id), nbytes=nbytes,
)
h = engine.submit(msg)
engine.wait(h)
_, trace = engine.get_completion(h)
return trace["total_ns"]
def test_local_hbm_latency_positive():
"""Local HBM access must have positive latency."""
t = _pe_dma_latency(pe_id=0, target_pe_id=0)
assert t > 0, f"Local HBM latency must be > 0, got {t}"
def test_pe_dma_latency_deterministic():
"""Same PE DMA request must produce identical latency."""
t1 = _pe_dma_latency(pe_id=1, target_pe_id=1)
t2 = _pe_dma_latency(pe_id=1, target_pe_id=1)
assert t1 == t2, f"Non-deterministic latency: {t1} vs {t2}"
def test_remote_pe_dma_latency_greater():
"""Remote PE HBM access (more mesh hops) should be >= local."""
t_local = _pe_dma_latency(pe_id=0, target_pe_id=0)
t_remote = _pe_dma_latency(pe_id=0, target_pe_id=5)
assert t_remote >= t_local, (
f"Remote ({t_remote:.4f}ns) must be >= local ({t_local:.4f}ns)"
)
# ══════════════════════════════════════════════════════════════════
# 8. PE-to-NOC Distance from Physical Position
# ══════════════════════════════════════════════════════════════════
def test_pe_router_edges_exist():
"""Each PE must have pe_to_router edges to its assigned router."""
graph = _graph()
pe_router_edges = [e for e in graph.edges
if e.kind == "pe_to_router" and "sip0.cube0" in e.src]
assert len(pe_router_edges) == 8, (
f"Expected 8 PE→router edges, got {len(pe_router_edges)}"
)