"""Tests for CUBE NOC Explicit Router Mesh (ADR-0017). Key changes verified: - Explicit router nodes per cube from cube_mesh.yaml (6×6 grid) - Auto-layout generates cube_mesh.yaml with PE/UCIe/M_CPU/SRAM attachments - Mesh file caching with source_hash change detection - Path routing: PE_DMA → router mesh → HBM_CTRL Latency invariant: Local HBM: PE_DMA → Router(overhead) → HBM_CTRL Cross-row: PE_DMA → Router → mesh hops → Router → HBM_CTRL Cross-cube: PE_DMA → Router → mesh → UCIe → ... → mesh → HBM_CTRL """ import pytest import yaml from pathlib import Path from kernbench.policy.address.phyaddr import PhysAddr from kernbench.policy.routing.router import AddressResolver, PathRouter from kernbench.runtime_api.kernel import MemoryReadMsg, PeDmaMsg from kernbench.sim_engine.engine import GraphEngine from kernbench.topology.builder import load_topology TOPOLOGY_PATH = Path(__file__).parent.parent / "topology.yaml" MESH_PATH = Path(__file__).parent.parent / "cube_mesh.yaml" def _graph(): return load_topology(TOPOLOGY_PATH) def _engine(): return GraphEngine(_graph()) def _hbm_pa(sip=0, cube=0, pe_id=0): slice_bytes = 48 * (1 << 30) // 8 pa = PhysAddr.pe_hbm_addr( sip_id=sip, die_id=cube, pe_id=pe_id, pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes, ) return pa.encode() # ══════════════════════════════════════════════════════════════════ # 1. Mesh File Generation # ══════════════════════════════════════════════════════════════════ def test_mesh_file_generated_on_load(): """load_topology must generate cube_mesh.yaml at project root.""" if MESH_PATH.exists(): MESH_PATH.unlink() _graph() assert MESH_PATH.exists(), "cube_mesh.yaml not generated" def test_mesh_file_has_source_hash(): """cube_mesh.yaml must contain source_hash for change detection.""" _graph() content = MESH_PATH.read_text() assert "source_hash:" in content def test_mesh_file_grid_dimensions(): """Current config (n_connections=4, pe_per_corner=2) must produce 6x6 grid.""" _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) assert mesh["mesh"]["rows"] == 6 assert mesh["mesh"]["cols"] == 6 def test_mesh_file_router_count(): """6x6 grid minus 4 HBM exclusions = 32 routers.""" _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) routers = {k: v for k, v in mesh["routers"].items() if v is not None} assert len(routers) == 32 def test_mesh_file_hbm_exclusion(): """Middle rows (2,3), middle cols (2,3) must be excluded (HBM zone).""" _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) for r in [2, 3]: for c in [2, 3]: key = f"r{r}c{c}" assert mesh["routers"].get(key) is None, ( f"{key} should be HBM excluded" ) def test_mesh_file_pe_attachments(): """PE0 (NW corner) must be attached to router r0c0.""" _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) r0c0 = mesh["routers"]["r0c0"] assert "pe0.dma" in r0c0["attach"] assert "pe0.cpu" in r0c0["attach"] def test_mesh_file_pe_corner_positions(): """PEs must be at correct corner positions in the grid. NW (PE0,PE1) → row 0, cols 0,1 (left) NE (PE2,PE3) → row 1, cols 4,5 (right) SW (PE4,PE5) → row 4, cols 0,1 (left) SE (PE6,PE7) → row 5, cols 4,5 (right) """ _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) expected = { "r0c0": "pe0", "r0c1": "pe1", # NW "r1c4": "pe2", "r1c5": "pe3", # NE "r4c0": "pe4", "r4c1": "pe5", # SW "r5c4": "pe6", "r5c5": "pe7", # SE } for router_id, pe_name in expected.items(): attach = mesh["routers"][router_id]["attach"] assert f"{pe_name}.dma" in attach, ( f"{pe_name} should be attached to {router_id}" ) def test_mesh_file_no_xbar_section(): """mesh output must not contain xbar section (ADR-0017 D1).""" _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) assert "xbar" not in mesh, "xbar section should be removed from cube_mesh.yaml" def test_mesh_file_pe_hbm_attached(): """PE routers must have pe{idx}.hbm in attach list (ADR-0017 D4).""" _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) for rid, rdata in mesh["routers"].items(): if rdata is None: continue for item in rdata["attach"]: if item.endswith(".dma"): pe_prefix = item.rsplit(".", 1)[0] hbm_item = f"{pe_prefix}.hbm" assert hbm_item in rdata["attach"], ( f"{rid} has {item} but missing {hbm_item}" ) def test_mesh_file_ucie_distribution(): """UCIe-E connections must be distributed 1 per PE row. E: c0=R(0,5), c1=R(1,5), c2=R(4,5), c3=R(5,5) """ _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) e_routers = ["r0c5", "r1c5", "r4c5", "r5c5"] for i, rid in enumerate(e_routers): attach = mesh["routers"][rid]["attach"] assert f"ucie_e.c{i}" in attach, ( f"UCIe-E conn {i} should be on {rid}" ) def test_mesh_not_regenerated_if_unchanged(): """If topology params unchanged, cube_mesh.yaml must not be regenerated.""" _graph() # first load mtime1 = MESH_PATH.stat().st_mtime _graph() # second load mtime2 = MESH_PATH.stat().st_mtime assert mtime1 == mtime2, "mesh file regenerated despite no topology changes" def test_mesh_ucie_w_attached_to_pe_rows(): """UCIe-W connections must be distributed 1 per PE row on leftmost column. W: c0=r0c0, c1=r1c0, c2=r4c0, c3=r5c0 (mirror of UCIe-E on col 0). """ _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) w_expected = {"r0c0": "ucie_w.c0", "r1c0": "ucie_w.c1", "r4c0": "ucie_w.c2", "r5c0": "ucie_w.c3"} for rid, attach_name in w_expected.items(): attach = mesh["routers"][rid]["attach"] assert attach_name in attach, ( f"UCIe-W {attach_name} should be on {rid}, got attach={attach}" ) def test_mesh_ucie_n_attached_to_pe_cols(): """UCIe-N connections must be distributed across PE columns on top row. N: c0=r0c0, c1=r0c1, c2=r0c4, c3=r0c5 (PE column positions on row 0). """ _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) n_expected = {"r0c0": "ucie_n.c0", "r0c1": "ucie_n.c1", "r0c4": "ucie_n.c2", "r0c5": "ucie_n.c3"} for rid, attach_name in n_expected.items(): attach = mesh["routers"][rid]["attach"] assert attach_name in attach, ( f"UCIe-N {attach_name} should be on {rid}, got attach={attach}" ) def test_mesh_ucie_s_attached_to_pe_cols(): """UCIe-S connections must be distributed across PE columns on bottom row. S: c0=r5c0, c1=r5c1, c2=r5c4, c3=r5c5 (PE column positions on row 5). """ _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) s_expected = {"r5c0": "ucie_s.c0", "r5c1": "ucie_s.c1", "r5c4": "ucie_s.c2", "r5c5": "ucie_s.c3"} for rid, attach_name in s_expected.items(): attach = mesh["routers"][rid]["attach"] assert attach_name in attach, ( f"UCIe-S {attach_name} should be on {rid}, got attach={attach}" ) def test_mesh_ucie_all_four_directions(): """All four UCIe directions (N, S, E, W) must have router attachments.""" _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) all_attach = [] for key, router in mesh["routers"].items(): if router is not None: all_attach.extend(router["attach"]) for direction in ("ucie_n", "ucie_s", "ucie_e", "ucie_w"): dir_conns = [a for a in all_attach if a.startswith(direction)] assert len(dir_conns) == 4, ( f"{direction} should have 4 connections, found {len(dir_conns)}: {dir_conns}" ) # ══════════════════════════════════════════════════════════════════ # 2. Topology Graph: Explicit Router Mesh (ADR-0017) # ══════════════════════════════════════════════════════════════════ def test_router_nodes_exist(): """Cube must have explicit router nodes from cube_mesh.yaml.""" graph = _graph() for rkey in ["r0c0", "r0c1", "r1c4", "r5c5"]: assert f"sip0.cube0.{rkey}" in graph.nodes, f"Router {rkey} missing" def test_no_xbar_or_bridge_nodes(): """xbar/bridge nodes must not exist (ADR-0017 D1).""" graph = _graph() bad = [n for n in graph.nodes if "xbar" in n or "bridge" in n] assert len(bad) == 0, f"Old xbar/bridge nodes found: {bad[:5]}" def test_no_single_noc_node(): """Cube-level single noc node must not exist (replaced by explicit routers).""" graph = _graph() assert "sip0.cube0.noc" not in graph.nodes def test_per_pe_hbm_ctrl_nodes(): """Each cube has 8 per-PE HBM CTRL instances (ADR-0017 D4). Each PE owns its own ``hbm_ctrl.pe{X}`` reachable through that PE's attaching router. No cube-wide single ``hbm_ctrl`` node exists. """ graph = _graph() for pe in range(8): assert f"sip0.cube0.hbm_ctrl.pe{pe}" in graph.nodes # Legacy single hbm_ctrl must not exist legacy_id = "sip0.cube0.hbm_ctrl" assert legacy_id not in graph.nodes, ( f"legacy {legacy_id} must not exist (per-PE partitioning, ADR-0017 D4)" ) def test_router_mesh_edges(): """Adjacent routers must be connected (router_mesh edges).""" graph = _graph() edge_set = {(e.src, e.dst) for e in graph.edges} # r0c0 ↔ r0c1 (horizontal) assert ("sip0.cube0.r0c0", "sip0.cube0.r0c1") in edge_set assert ("sip0.cube0.r0c1", "sip0.cube0.r0c0") in edge_set def test_pe_dma_connects_to_router(): """PE_DMA must connect to router (pe_to_router kind).""" graph = _graph() pe0_edges = [e for e in graph.edges if e.src == "sip0.cube0.pe0.pe_dma" and e.kind == "pe_to_router"] assert len(pe0_edges) == 1, f"PE0 DMA should connect to 1 router, got {len(pe0_edges)}" assert pe0_edges[0].dst == "sip0.cube0.r0c0" def test_each_hbm_ctrl_connects_only_to_owning_router(): """Each ``hbm_ctrl.pe{X}`` must have exactly one router edge (router_to_hbm + hbm_to_router) to its owning PE's attaching router (ADR-0017 D7). """ graph = _graph() pe_router = {0: "r0c0", 1: "r0c1", 2: "r1c4", 3: "r1c5", 4: "r4c0", 5: "r4c1", 6: "r5c4", 7: "r5c5"} for pe, rkey in pe_router.items(): nid = f"sip0.cube0.hbm_ctrl.pe{pe}" owner = f"sip0.cube0.{rkey}" outs = [e.dst for e in graph.edges if e.src == nid] ins = [e.src for e in graph.edges if e.dst == nid] assert outs == [owner], f"{nid} must out-edge only to {owner}; got {outs}" assert ins == [owner], f"{nid} must in-edge only from {owner}; got {ins}" # ══════════════════════════════════════════════════════════════════ # 3. Path Routing # ══════════════════════════════════════════════════════════════════ def test_local_hbm_path_through_router(): """PE0 local HBM: path must go through PE's router to hbm_ctrl.""" graph = _graph() router = PathRouter(graph) path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0") assert "sip0.cube0.r0c0" in path, f"PE0's router r0c0 missing from path: {path}" assert "sip0.cube0.hbm_ctrl.pe0" == path[-1], f"Path should end at hbm_ctrl: {path}" def test_remote_pe_hbm_has_more_hops(): """PE0 → PE4's HBM (remote) must have more hops than local.""" graph = _graph() router = PathRouter(graph) local_path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0") # PE4 is at r4c0, PE0 at r0c0 — must traverse mesh remote_path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl.pe0") # Both should work, local should be shorter or equal assert len(local_path) >= 2 assert len(remote_path) >= 2 def test_mcpu_dma_path_through_router_mesh(): """M_CPU DMA to local HBM: m_cpu → router mesh → hbm_ctrl.""" graph = _graph() router = PathRouter(graph) path = router.find_mcpu_dma_path( "sip0.cube0.m_cpu", "sip0.cube0.hbm_ctrl.pe0" ) assert path[0] == "sip0.cube0.m_cpu" assert path[-1] == "sip0.cube0.hbm_ctrl.pe0" assert any("r" in n and "c" in n for n in path), f"Router missing from path: {path}" def test_cross_cube_path_through_ucie(): """Cross-cube HBM: must traverse router → UCIe → remote router → hbm_ctrl.""" graph = _graph() router = PathRouter(graph) path = router.find_path("sip0.cube0.pe0", "sip0.cube4.hbm_ctrl.pe0") assert any("ucie" in n.lower() for n in path), f"UCIe missing: {path}" assert path[-1] == "sip0.cube4.hbm_ctrl.pe0" def test_h2d_bypass_path_through_router(): """H2D MemoryWrite bypass: pcie_ep → io_noc → cube_ucie → router → hbm.""" graph = _graph() resolver = AddressResolver(graph) router = PathRouter(graph) pcie_ep = resolver.find_pcie_ep(0) pa = _hbm_pa(sip=0, cube=0, pe_id=0) hbm_target = resolver.resolve(PhysAddr.decode(pa)) path = router.find_memory_path(pcie_ep, hbm_target) assert path[-1] == "sip0.cube0.hbm_ctrl.pe0", f"Path should end at hbm_ctrl: {path}" assert any("r0c" in n or "r1c" in n for n in path), f"Router missing: {path}" # ══════════════════════════════════════════════════════════════════ # 4. BW Configuration # ══════════════════════════════════════════════════════════════════ def test_pe_dma_to_router_bw(): """PE_DMA → router edge BW must be 256 GB/s.""" graph = _graph() for e in graph.edges: if e.src == "sip0.cube0.pe0.pe_dma" and e.kind == "pe_to_router": assert e.bw_gbs == 256.0, ( f"PE_DMA→router BW should be 256 GB/s, got {e.bw_gbs}" ) return pytest.fail("PE_DMA → router edge not found") def test_router_mesh_bw(): """Router-router mesh edge BW must be 256 GB/s.""" graph = _graph() for e in graph.edges: if e.kind == "router_mesh" and "cube0" in e.src: assert e.bw_gbs == 256.0, ( f"Router mesh BW should be 256 GB/s, got {e.bw_gbs}" ) return pytest.fail("Router mesh edge not found") # ══════════════════════════════════════════════════════════════════ # 5. Latency # ══════════════════════════════════════════════════════════════════ def test_local_hbm_read_completes(): """Local HBM read must complete with ok=True and positive latency.""" engine = _engine() msg = MemoryReadMsg( correlation_id="mesh", request_id="local", src_sip=0, src_cube=0, src_pe=0, src_pa=_hbm_pa(pe_id=0), nbytes=4096, ) h = engine.submit(msg) engine.wait(h) comp, trace = engine.get_completion(h) assert comp.ok is True assert trace["total_ns"] > 0 def test_remote_pe_latency_greater_than_local(): """Remote PE HBM access must be slower than local (more mesh hops).""" engine_local = _engine() msg_local = MemoryReadMsg( correlation_id="mesh", request_id="local", src_sip=0, src_cube=0, src_pe=0, src_pa=_hbm_pa(pe_id=0), nbytes=4096, ) h_l = engine_local.submit(msg_local) engine_local.wait(h_l) _, t_local = engine_local.get_completion(h_l) # PE0 accessing PE5's HBM (remote, more mesh hops) engine_remote = _engine() msg_remote = MemoryReadMsg( correlation_id="mesh", request_id="remote", src_sip=0, src_cube=0, src_pe=0, src_pa=_hbm_pa(pe_id=5), nbytes=4096, ) h_r = engine_remote.submit(msg_remote) engine_remote.wait(h_r) _, t_remote = engine_remote.get_completion(h_r) assert t_remote["total_ns"] >= t_local["total_ns"], ( f"Remote ({t_remote['total_ns']:.2f}ns) must be >= " f"local ({t_local['total_ns']:.2f}ns)" ) def test_latency_deterministic(): """Same request on two engines must produce identical latency.""" msg = MemoryReadMsg( correlation_id="mesh", request_id="det", src_sip=0, src_cube=0, src_pe=0, src_pa=_hbm_pa(pe_id=0), nbytes=4096, ) e1, e2 = _engine(), _engine() h1 = e1.submit(msg) e1.wait(h1) _, t1 = e1.get_completion(h1) h2 = e2.submit(msg) e2.wait(h2) _, t2 = e2.get_completion(h2) assert t1["total_ns"] == t2["total_ns"] # ══════════════════════════════════════════════════════════════════ # 6. NOC Component reads cube_mesh.yaml (Change 1) # ══════════════════════════════════════════════════════════════════ def test_mesh_data_in_context_spec(): """ComponentContext.spec must contain '_mesh' key with parsed cube_mesh.yaml data. The builder must store the mesh dict in spec['_mesh'] so that NOC and XBAR components can access router layout without reading the file directly. """ graph = _graph() assert "_mesh" in graph.spec, ( "spec['_mesh'] missing: builder must store mesh data in spec" ) mesh = graph.spec["_mesh"] assert "routers" in mesh assert "mesh" in mesh assert mesh["mesh"]["rows"] == 6 assert mesh["mesh"]["cols"] == 6 def test_router_nodes_match_mesh(): """Topology router nodes must match active routers in cube_mesh.yaml.""" graph = _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) active_routers = [k for k, v in mesh["routers"].items() if v is not None] for rkey in active_routers: assert f"sip0.cube0.{rkey}" in graph.nodes, f"Router {rkey} missing from graph" def test_null_routers_excluded(): """HBM exclusion zone routers (null in mesh) must not be in graph.""" graph = _graph() mesh = yaml.safe_load(MESH_PATH.read_text()) null_routers = [k for k, v in mesh["routers"].items() if v is None] for rkey in null_routers: assert f"sip0.cube0.{rkey}" not in graph.nodes, f"Null router {rkey} in graph" # ══════════════════════════════════════════════════════════════════ # 7. Router Mesh Latency (ADR-0017) # ══════════════════════════════════════════════════════════════════ def _pe_dma_latency(pe_id: int, target_pe_id: int, nbytes: int = 4096) -> float: """Run PeDmaMsg from pe_id targeting target_pe_id's HBM, return total_ns.""" engine = _engine() msg = PeDmaMsg( correlation_id="mesh_lat", request_id=f"pe{pe_id}_t{target_pe_id}", src_sip=0, src_cube=0, src_pe=pe_id, dst_pa=_hbm_pa(pe_id=target_pe_id), nbytes=nbytes, ) h = engine.submit(msg) engine.wait(h) _, trace = engine.get_completion(h) return trace["total_ns"] def test_local_hbm_latency_positive(): """Local HBM access must have positive latency.""" t = _pe_dma_latency(pe_id=0, target_pe_id=0) assert t > 0, f"Local HBM latency must be > 0, got {t}" def test_pe_dma_latency_deterministic(): """Same PE DMA request must produce identical latency.""" t1 = _pe_dma_latency(pe_id=1, target_pe_id=1) t2 = _pe_dma_latency(pe_id=1, target_pe_id=1) assert t1 == t2, f"Non-deterministic latency: {t1} vs {t2}" def test_remote_pe_dma_latency_greater(): """Remote PE HBM access (more mesh hops) should be >= local.""" t_local = _pe_dma_latency(pe_id=0, target_pe_id=0) t_remote = _pe_dma_latency(pe_id=0, target_pe_id=5) assert t_remote >= t_local, ( f"Remote ({t_remote:.4f}ns) must be >= local ({t_local:.4f}ns)" ) # ══════════════════════════════════════════════════════════════════ # 8. PE-to-NOC Distance from Physical Position # ══════════════════════════════════════════════════════════════════ def test_pe_router_edges_exist(): """Each PE must have pe_to_router edges to its assigned router.""" graph = _graph() pe_router_edges = [e for e in graph.edges if e.kind == "pe_to_router" and "sip0.cube0" in e.src] assert len(pe_router_edges) == 8, ( f"Expected 8 PE→router edges, got {len(pe_router_edges)}" )