Add probe CLI improvements, D2H read, UCIe/HBM tuning, BW sweep

- Probe CLI: restructured output (tables first, routes below), per-hop
  timestamps, split cross-cube into best/worst cases, D2H read section
- UCIe overhead: 1ns -> 8ns per port (16ns per crossing) to fix
  cross-cube-best < cross-half latency inversion
- HBM efficiency: added efficiency=0.8 factor to hbm_ctrl, reducing
  effective BW from 256 to 204.8 GB/s
- Multi-size BW sweep: saturation tables (4KB-1MB) for all probe cases
- Probe default data size: 4KB -> 32KB for more realistic measurements
- IOChiplet NOC + D2H topology and tests
- NOC mesh, xbar, BW occupancy components and tests
- Cube mesh visualization diagram

278 tests pass.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-19 01:16:18 -07:00
parent 6f43807900
commit d75da439c6
24 changed files with 3456 additions and 501 deletions
+753
View File
@@ -0,0 +1,753 @@
"""Tests for #5+#6 CUBE NOC Router Mesh + Position-Aware XBAR.
Phase 1 verification: all tests FAIL until Phase 2 implements production code.
Key changes verified:
- Single NOC node per cube with internal router mesh simulation
- Auto-layout generates cube_mesh.yaml (6x6 grid for n_connections=4)
- Position-aware XBAR (top/bottom) replaces per-PE xbar chaining
- Mesh file caching with source_hash change detection
- Path routing: PE_DMA → NOC → XBAR_top/bot → HBM_CTRL
Latency invariant after refactor:
Local HBM: PE_DMA → Router(overhead) → XBAR → HBM_CTRL
Cross-row: PE_DMA → Router → mesh traverse → Router → XBAR → bridge → XBAR → HBM_CTRL
Cross-cube: PE_DMA → Router → mesh → UCIe → ... → mesh → XBAR → HBM_CTRL
"""
import pytest
import yaml
from pathlib import Path
from kernbench.policy.address.phyaddr import PhysAddr
from kernbench.policy.routing.router import AddressResolver, PathRouter
from kernbench.runtime_api.kernel import MemoryReadMsg, PeDmaMsg
from kernbench.sim_engine.engine import GraphEngine
from kernbench.topology.builder import load_topology
TOPOLOGY_PATH = Path(__file__).parent.parent / "topology.yaml"
MESH_PATH = Path(__file__).parent.parent / "cube_mesh.yaml"
def _graph():
return load_topology(TOPOLOGY_PATH)
def _engine():
return GraphEngine(_graph())
def _hbm_pa(sip=0, cube=0, pe_id=0):
slice_bytes = 48 * (1 << 30) // 8
pa = PhysAddr.pe_hbm_addr(
rack_id=0, sip_id=sip, cube_id=cube, pe_id=pe_id,
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
)
return pa.encode()
# ══════════════════════════════════════════════════════════════════
# 1. Mesh File Generation
# ══════════════════════════════════════════════════════════════════
def test_mesh_file_generated_on_load():
"""load_topology must generate cube_mesh.yaml at project root."""
if MESH_PATH.exists():
MESH_PATH.unlink()
_graph()
assert MESH_PATH.exists(), "cube_mesh.yaml not generated"
def test_mesh_file_has_source_hash():
"""cube_mesh.yaml must contain source_hash for change detection."""
_graph()
content = MESH_PATH.read_text()
assert "source_hash:" in content
def test_mesh_file_grid_dimensions():
"""Current config (n_connections=4, pe_per_corner=2) must produce 6x6 grid."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
assert mesh["mesh"]["rows"] == 6
assert mesh["mesh"]["cols"] == 6
def test_mesh_file_router_count():
"""6x6 grid minus 4 HBM exclusions = 32 routers."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
routers = {k: v for k, v in mesh["routers"].items() if v is not None}
assert len(routers) == 32
def test_mesh_file_hbm_exclusion():
"""Middle rows (2,3), middle cols (2,3) must be excluded (HBM zone)."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
for r in [2, 3]:
for c in [2, 3]:
key = f"r{r}c{c}"
assert mesh["routers"].get(key) is None, (
f"{key} should be HBM excluded"
)
def test_mesh_file_pe_attachments():
"""PE0 (NW corner) must be attached to router r0c0."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
r0c0 = mesh["routers"]["r0c0"]
assert "pe0.dma" in r0c0["attach"]
assert "pe0.cpu" in r0c0["attach"]
def test_mesh_file_pe_corner_positions():
"""PEs must be at correct corner positions in the grid.
NW (PE0,PE1) → row 0, cols 0,1 (left)
NE (PE2,PE3) → row 1, cols 4,5 (right)
SW (PE4,PE5) → row 4, cols 0,1 (left)
SE (PE6,PE7) → row 5, cols 4,5 (right)
"""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
expected = {
"r0c0": "pe0", "r0c1": "pe1", # NW
"r1c4": "pe2", "r1c5": "pe3", # NE
"r4c0": "pe4", "r4c1": "pe5", # SW
"r5c4": "pe6", "r5c5": "pe7", # SE
}
for router_id, pe_name in expected.items():
attach = mesh["routers"][router_id]["attach"]
assert f"{pe_name}.dma" in attach, (
f"{pe_name} should be attached to {router_id}"
)
def test_mesh_file_xbar_top_routers():
"""xbar_top must list top-half PE routers."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
top_routers = mesh["xbar"]["top"]["routers"]
for rid in ["r0c0", "r0c1", "r1c4", "r1c5"]:
assert rid in top_routers, f"{rid} should connect to xbar_top"
def test_mesh_file_xbar_bot_routers():
"""xbar_bot must list bottom-half PE routers."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
bot_routers = mesh["xbar"]["bottom"]["routers"]
for rid in ["r4c0", "r4c1", "r5c4", "r5c5"]:
assert rid in bot_routers, f"{rid} should connect to xbar_bot"
def test_mesh_file_ucie_distribution():
"""UCIe-E connections must be distributed 1 per PE row.
E: c0=R(0,5), c1=R(1,5), c2=R(4,5), c3=R(5,5)
"""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
e_routers = ["r0c5", "r1c5", "r4c5", "r5c5"]
for i, rid in enumerate(e_routers):
attach = mesh["routers"][rid]["attach"]
assert f"ucie_e.c{i}" in attach, (
f"UCIe-E conn {i} should be on {rid}"
)
def test_mesh_not_regenerated_if_unchanged():
"""If topology params unchanged, cube_mesh.yaml must not be regenerated."""
_graph() # first load
mtime1 = MESH_PATH.stat().st_mtime
_graph() # second load
mtime2 = MESH_PATH.stat().st_mtime
assert mtime1 == mtime2, "mesh file regenerated despite no topology changes"
def test_mesh_ucie_w_attached_to_pe_rows():
"""UCIe-W connections must be distributed 1 per PE row on leftmost column.
W: c0=r0c0, c1=r1c0, c2=r4c0, c3=r5c0 (mirror of UCIe-E on col 0).
"""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
w_expected = {"r0c0": "ucie_w.c0", "r1c0": "ucie_w.c1",
"r4c0": "ucie_w.c2", "r5c0": "ucie_w.c3"}
for rid, attach_name in w_expected.items():
attach = mesh["routers"][rid]["attach"]
assert attach_name in attach, (
f"UCIe-W {attach_name} should be on {rid}, got attach={attach}"
)
def test_mesh_ucie_n_attached_to_pe_cols():
"""UCIe-N connections must be distributed across PE columns on top row.
N: c0=r0c0, c1=r0c1, c2=r0c4, c3=r0c5 (PE column positions on row 0).
"""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
n_expected = {"r0c0": "ucie_n.c0", "r0c1": "ucie_n.c1",
"r0c4": "ucie_n.c2", "r0c5": "ucie_n.c3"}
for rid, attach_name in n_expected.items():
attach = mesh["routers"][rid]["attach"]
assert attach_name in attach, (
f"UCIe-N {attach_name} should be on {rid}, got attach={attach}"
)
def test_mesh_ucie_s_attached_to_pe_cols():
"""UCIe-S connections must be distributed across PE columns on bottom row.
S: c0=r5c0, c1=r5c1, c2=r5c4, c3=r5c5 (PE column positions on row 5).
"""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
s_expected = {"r5c0": "ucie_s.c0", "r5c1": "ucie_s.c1",
"r5c4": "ucie_s.c2", "r5c5": "ucie_s.c3"}
for rid, attach_name in s_expected.items():
attach = mesh["routers"][rid]["attach"]
assert attach_name in attach, (
f"UCIe-S {attach_name} should be on {rid}, got attach={attach}"
)
def test_mesh_ucie_all_four_directions():
"""All four UCIe directions (N, S, E, W) must have router attachments."""
_graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
all_attach = []
for key, router in mesh["routers"].items():
if router is not None:
all_attach.extend(router["attach"])
for direction in ("ucie_n", "ucie_s", "ucie_e", "ucie_w"):
dir_conns = [a for a in all_attach if a.startswith(direction)]
assert len(dir_conns) == 4, (
f"{direction} should have 4 connections, found {len(dir_conns)}: {dir_conns}"
)
# ══════════════════════════════════════════════════════════════════
# 2. Topology Graph: XBAR Top/Bottom (replaces per-PE chaining)
# ══════════════════════════════════════════════════════════════════
def test_xbar_top_node_exists():
"""Each cube must have an xbar_top node."""
graph = _graph()
assert "sip0.cube0.xbar_top" in graph.nodes
def test_xbar_bot_node_exists():
"""Each cube must have an xbar_bot node."""
graph = _graph()
assert "sip0.cube0.xbar_bot" in graph.nodes
def test_no_per_pe_xbar_nodes():
"""Per-PE xbar nodes (xbar.pe0..pe7) must not exist."""
graph = _graph()
for i in range(8):
assert f"sip0.cube0.xbar.pe{i}" not in graph.nodes, (
f"xbar.pe{i} should not exist in new topology"
)
def test_no_xbar_chain_edges():
"""xbar_chain kind edges must not exist."""
graph = _graph()
chain_edges = [e for e in graph.edges if e.kind == "xbar_chain"]
assert len(chain_edges) == 0, (
f"Found {len(chain_edges)} xbar_chain edges; chaining is replaced by XBAR top/bot"
)
def test_xbar_top_to_hbm_slices_0_3():
"""xbar_top must connect to hbm_ctrl.slice0..3 (top HBM slices)."""
graph = _graph()
edge_set = {(e.src, e.dst) for e in graph.edges}
for i in range(4):
assert ("sip0.cube0.xbar_top", f"sip0.cube0.hbm_ctrl.slice{i}") in edge_set, (
f"xbar_top → hbm_ctrl.slice{i} edge missing"
)
def test_xbar_bot_to_hbm_slices_4_7():
"""xbar_bot must connect to hbm_ctrl.slice4..7 (bottom HBM slices)."""
graph = _graph()
edge_set = {(e.src, e.dst) for e in graph.edges}
for i in range(4, 8):
assert ("sip0.cube0.xbar_bot", f"sip0.cube0.hbm_ctrl.slice{i}") in edge_set, (
f"xbar_bot → hbm_ctrl.slice{i} edge missing"
)
def test_xbar_bridge_left():
"""bridge.left must connect xbar_top ↔ xbar_bot (bidirectional)."""
graph = _graph()
assert "sip0.cube0.bridge.left" in graph.nodes
edge_set = {(e.src, e.dst) for e in graph.edges}
assert ("sip0.cube0.xbar_top", "sip0.cube0.bridge.left") in edge_set
assert ("sip0.cube0.bridge.left", "sip0.cube0.xbar_bot") in edge_set
assert ("sip0.cube0.xbar_bot", "sip0.cube0.bridge.left") in edge_set
assert ("sip0.cube0.bridge.left", "sip0.cube0.xbar_top") in edge_set
def test_xbar_bridge_right():
"""bridge.right must connect xbar_top ↔ xbar_bot (bidirectional)."""
graph = _graph()
assert "sip0.cube0.bridge.right" in graph.nodes
edge_set = {(e.src, e.dst) for e in graph.edges}
assert ("sip0.cube0.xbar_top", "sip0.cube0.bridge.right") in edge_set
assert ("sip0.cube0.bridge.right", "sip0.cube0.xbar_bot") in edge_set
def test_noc_to_xbar_top_edge():
"""NOC must have edge to xbar_top (router attachment)."""
graph = _graph()
edge_set = {(e.src, e.dst) for e in graph.edges}
assert ("sip0.cube0.noc", "sip0.cube0.xbar_top") in edge_set
def test_noc_to_xbar_bot_edge():
"""NOC must have edge to xbar_bot (router attachment)."""
graph = _graph()
edge_set = {(e.src, e.dst) for e in graph.edges}
assert ("sip0.cube0.noc", "sip0.cube0.xbar_bot") in edge_set
def test_pe_dma_no_direct_xbar_edge():
"""PE_DMA must NOT have direct edge to any xbar node.
All HBM access goes through NOC (router attachment to XBAR).
"""
graph = _graph()
pe_to_xbar = [
e for e in graph.edges
if e.src == "sip0.cube0.pe0.pe_dma" and "xbar" in e.dst
]
assert len(pe_to_xbar) == 0, (
f"PE_DMA should not connect directly to XBAR. "
f"Found: {[(e.src, e.dst) for e in pe_to_xbar]}"
)
# ══════════════════════════════════════════════════════════════════
# 3. Path Routing
# ══════════════════════════════════════════════════════════════════
def test_local_hbm_path_includes_noc_and_xbar_top():
"""PE0 local HBM (slice0): path must include noc and xbar_top."""
graph = _graph()
router = PathRouter(graph)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.slice0")
assert "sip0.cube0.noc" in path, f"NOC missing from path: {path}"
assert "sip0.cube0.xbar_top" in path, f"xbar_top missing from path: {path}"
def test_cross_pe_same_row_stays_in_xbar_top():
"""PE0 → slice3 (both top row): xbar_top only, no bridge needed."""
graph = _graph()
router = PathRouter(graph)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.slice3")
assert "sip0.cube0.xbar_top" in path
assert "sip0.cube0.xbar_bot" not in path, (
f"Cross-PE same row should not use xbar_bot. Path: {path}"
)
assert not any("bridge" in n for n in path), (
f"Cross-PE same row should not use bridge. Path: {path}"
)
def test_cross_row_hbm_uses_bridge():
"""PE0 → slice5 (top→bottom): must traverse xbar_top → bridge → xbar_bot."""
graph = _graph()
router = PathRouter(graph)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.slice5")
assert "sip0.cube0.xbar_top" in path, f"xbar_top missing: {path}"
assert "sip0.cube0.xbar_bot" in path, f"xbar_bot missing: {path}"
assert any("bridge" in n for n in path), f"bridge missing: {path}"
def test_mcpu_dma_path_through_noc():
"""M_CPU DMA to local HBM: m_cpu → noc → xbar_top → hbm_ctrl."""
graph = _graph()
router = PathRouter(graph)
path = router.find_mcpu_dma_path(
"sip0.cube0.m_cpu", "sip0.cube0.hbm_ctrl.slice0"
)
assert "sip0.cube0.noc" in path, f"NOC missing: {path}"
assert "sip0.cube0.xbar_top" in path, f"xbar_top missing: {path}"
def test_cross_cube_path_through_mesh():
"""Cross-cube HBM: must traverse noc → UCIe → remote noc → xbar."""
graph = _graph()
router = PathRouter(graph)
path = router.find_path("sip0.cube0.pe0", "sip0.cube4.hbm_ctrl.slice0")
assert "sip0.cube0.noc" in path, f"Source NOC missing: {path}"
assert any("ucie" in n.lower() for n in path), f"UCIe missing: {path}"
assert "sip0.cube4.xbar_top" in path, f"Dest xbar_top missing: {path}"
def test_h2d_bypass_path_through_noc():
"""H2D MemoryWrite bypass: pcie_ep → io_noc → cube_ucie → noc → xbar → hbm."""
graph = _graph()
resolver = AddressResolver(graph)
router = PathRouter(graph)
pcie_ep = resolver.find_pcie_ep(0)
pa = _hbm_pa(sip=0, cube=0, pe_id=0)
hbm_target = resolver.resolve(PhysAddr.decode(pa))
path = router.find_memory_path(pcie_ep, hbm_target)
assert "sip0.cube0.noc" in path, f"NOC missing from H2D path: {path}"
assert "sip0.cube0.xbar_top" in path, f"xbar_top missing from H2D path: {path}"
# ══════════════════════════════════════════════════════════════════
# 4. BW Configuration
# ══════════════════════════════════════════════════════════════════
def test_pe_dma_to_noc_bw():
"""PE_DMA → NOC edge BW must be 256 GB/s (= HBM slice BW, no bottleneck)."""
graph = _graph()
for e in graph.edges:
if e.src == "sip0.cube0.pe0.pe_dma" and e.dst == "sip0.cube0.noc":
assert e.bw_gbs == 256.0, (
f"PE_DMA→NOC BW should be 256 GB/s, got {e.bw_gbs}"
)
return
pytest.fail("PE_DMA → NOC edge not found")
def test_noc_to_xbar_bw():
"""NOC → xbar_top edge BW must be 256 GB/s (= HBM slice BW)."""
graph = _graph()
for e in graph.edges:
if e.src == "sip0.cube0.noc" and e.dst == "sip0.cube0.xbar_top":
assert e.bw_gbs == 256.0, (
f"NOC→xbar_top BW should be 256 GB/s, got {e.bw_gbs}"
)
return
pytest.fail("NOC → xbar_top edge not found")
# ══════════════════════════════════════════════════════════════════
# 5. Latency
# ══════════════════════════════════════════════════════════════════
def test_local_hbm_read_completes():
"""Local HBM read must complete with ok=True and positive latency."""
engine = _engine()
msg = MemoryReadMsg(
correlation_id="mesh", request_id="local",
src_sip=0, src_cube=0, src_pe=0,
src_pa=_hbm_pa(pe_id=0), nbytes=4096,
)
h = engine.submit(msg)
engine.wait(h)
comp, trace = engine.get_completion(h)
assert comp.ok is True
assert trace["total_ns"] > 0
def test_cross_row_latency_greater_than_local():
"""Cross-row HBM access (PE0→slice5) must be slower than local (PE0→slice0).
Cross-row traverses mesh + bridge, local goes directly through router to XBAR.
"""
engine_local = _engine()
msg_local = MemoryReadMsg(
correlation_id="mesh", request_id="local",
src_sip=0, src_cube=0, src_pe=0,
src_pa=_hbm_pa(pe_id=0), nbytes=4096,
)
h_l = engine_local.submit(msg_local)
engine_local.wait(h_l)
_, t_local = engine_local.get_completion(h_l)
engine_cross = _engine()
msg_cross = MemoryReadMsg(
correlation_id="mesh", request_id="cross",
src_sip=0, src_cube=0, src_pe=0,
src_pa=_hbm_pa(pe_id=5), nbytes=4096,
)
h_c = engine_cross.submit(msg_cross)
engine_cross.wait(h_c)
_, t_cross = engine_cross.get_completion(h_c)
assert t_cross["total_ns"] > t_local["total_ns"], (
f"Cross-row ({t_cross['total_ns']:.2f}ns) must be > "
f"local ({t_local['total_ns']:.2f}ns)"
)
def test_latency_deterministic():
"""Same request on two engines must produce identical latency."""
msg = MemoryReadMsg(
correlation_id="mesh", request_id="det",
src_sip=0, src_cube=0, src_pe=0,
src_pa=_hbm_pa(pe_id=0), nbytes=4096,
)
e1, e2 = _engine(), _engine()
h1 = e1.submit(msg)
e1.wait(h1)
_, t1 = e1.get_completion(h1)
h2 = e2.submit(msg)
e2.wait(h2)
_, t2 = e2.get_completion(h2)
assert t1["total_ns"] == t2["total_ns"]
# ══════════════════════════════════════════════════════════════════
# 6. NOC Component reads cube_mesh.yaml (Change 1)
# ══════════════════════════════════════════════════════════════════
def test_mesh_data_in_context_spec():
"""ComponentContext.spec must contain '_mesh' key with parsed cube_mesh.yaml data.
The builder must store the mesh dict in spec['_mesh'] so that NOC and XBAR
components can access router layout without reading the file directly.
"""
graph = _graph()
assert "_mesh" in graph.spec, (
"spec['_mesh'] missing: builder must store mesh data in spec"
)
mesh = graph.spec["_mesh"]
assert "routers" in mesh
assert "mesh" in mesh
assert mesh["mesh"]["rows"] == 6
assert mesh["mesh"]["cols"] == 6
def test_noc_grid_from_mesh_routers():
"""NOC x_grid/y_grid must be derived from mesh router positions, not all nodes.
Mesh routers have 6 unique X values and 6 unique Y values.
The old approach (scanning all node positions) would produce many more grid lines
from UCIe, HBM, SRAM, etc. positions.
"""
graph = _graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
# Extract unique X and Y values from mesh routers (excluding HBM exclusions)
mesh_xs = set()
mesh_ys = set()
for key, router in mesh["routers"].items():
if router is not None:
mesh_xs.add(router["pos_mm"][0])
mesh_ys.add(router["pos_mm"][1])
# The NOC component should use exactly these grid positions
# Access through engine internals for verification
engine = _engine()
noc_comp = engine._components["sip0.cube0.noc"]
assert len(noc_comp._x_grid) == len(mesh_xs), (
f"NOC x_grid has {len(noc_comp._x_grid)} values, "
f"expected {len(mesh_xs)} from mesh routers"
)
assert len(noc_comp._y_grid) == len(mesh_ys), (
f"NOC y_grid has {len(noc_comp._y_grid)} values, "
f"expected {len(mesh_ys)} from mesh routers"
)
def test_noc_grid_excludes_hbm_zone():
"""NOC grid must not include positions from HBM-excluded routers.
HBM exclusion zone routers (r2c2, r2c3, r3c2, r3c3) are None in the mesh.
Their positions must not appear as router grid points in the NOC.
"""
graph = _graph()
mesh = yaml.safe_load(MESH_PATH.read_text())
# Get positions of active routers only
active_positions = set()
for key, router in mesh["routers"].items():
if router is not None:
active_positions.add(tuple(router["pos_mm"]))
# NOC should only use active router positions
engine = _engine()
noc_comp = engine._components["sip0.cube0.noc"]
noc_grid_points = {(x, y) for x in noc_comp._x_grid for y in noc_comp._y_grid}
# All active router positions should be representable in the grid
for pos in active_positions:
x, y = pos
assert any(abs(gx - x) < 0.01 for gx in noc_comp._x_grid), (
f"Active router X={x} not in NOC x_grid"
)
assert any(abs(gy - y) < 0.01 for gy in noc_comp._y_grid), (
f"Active router Y={y} not in NOC y_grid"
)
# ══════════════════════════════════════════════════════════════════
# 7. XBAR Position-Aware Latency (Change 2)
# ══════════════════════════════════════════════════════════════════
def _pe_dma_latency(pe_id: int, target_pe_id: int, nbytes: int = 4096) -> float:
"""Run PeDmaMsg from pe_id targeting target_pe_id's HBM slice, return total_ns."""
engine = _engine()
msg = PeDmaMsg(
correlation_id="xbar", request_id=f"pe{pe_id}_slice{target_pe_id}",
src_sip=0, src_cube=0, src_pe=pe_id,
dst_pa=_hbm_pa(pe_id=target_pe_id), nbytes=nbytes,
)
h = engine.submit(msg)
engine.wait(h)
_, trace = engine.get_completion(h)
return trace["total_ns"]
def test_xbar_pe0_slice0_lower_than_pe0_slice3():
"""PE0 (NW, left) → slice0 (left) must be faster than PE0 → slice3 (right).
Position-aware XBAR: PE0's router (r0c0, x=1.5) is closer to slice0 (left end)
than slice3 (right end). The XBAR internal latency should reflect this distance.
"""
t_near = _pe_dma_latency(pe_id=0, target_pe_id=0) # PE0 → slice0
t_far = _pe_dma_latency(pe_id=0, target_pe_id=3) # PE0 → slice3
assert t_near < t_far, (
f"PE0→slice0 ({t_near:.4f}ns) should be < PE0→slice3 ({t_far:.4f}ns) "
f"with position-aware XBAR"
)
def test_xbar_pe2_slice3_lower_than_pe2_slice0():
"""PE2 (NE, right) → slice3 (right) must be faster than PE2 → slice0 (left).
Mirror of test_xbar_pe0_slice0_lower_than_pe0_slice3.
PE2's router (r1c4, x=12.5) is closer to slice3 (right end).
"""
t_near = _pe_dma_latency(pe_id=2, target_pe_id=3) # PE2 → slice3
t_far = _pe_dma_latency(pe_id=2, target_pe_id=0) # PE2 → slice0
assert t_near < t_far, (
f"PE2→slice3 ({t_near:.4f}ns) should be < PE2→slice0 ({t_far:.4f}ns) "
f"with position-aware XBAR"
)
def test_xbar_symmetric_latency():
"""PE0→slice0 ≈ PE2→slice3 (symmetric positions in the crossbar).
PE0 (NW, x=1.5) distance to slice0 (left) should equal
PE2 (NE, x=12.5) distance to slice3 (right), within tolerance.
"""
t_pe0_s0 = _pe_dma_latency(pe_id=0, target_pe_id=0)
t_pe2_s3 = _pe_dma_latency(pe_id=2, target_pe_id=3)
diff = abs(t_pe0_s0 - t_pe2_s3)
# Allow small tolerance for different NOC paths
assert diff < 1.0, (
f"Symmetric latency mismatch: PE0→slice0={t_pe0_s0:.4f}ns, "
f"PE2→slice3={t_pe2_s3:.4f}ns, diff={diff:.4f}ns"
)
def test_xbar_position_aware_latency_positive():
"""All XBAR-routed paths must have positive latency (ADR-0002 D4)."""
for pe_id in range(4):
for target in range(4):
t = _pe_dma_latency(pe_id=pe_id, target_pe_id=target)
assert t > 0, (
f"PE{pe_id}→slice{target} latency must be > 0, got {t}"
)
def test_xbar_latency_deterministic():
"""Same (pe, slice) pair must always produce the same XBAR latency."""
t1 = _pe_dma_latency(pe_id=1, target_pe_id=2)
t2 = _pe_dma_latency(pe_id=1, target_pe_id=2)
assert t1 == t2, (
f"Non-deterministic XBAR latency: {t1} vs {t2}"
)
def test_xbar_cross_row_still_greater():
"""Cross-row HBM (PE0→slice5, via bridge) must still be > local (PE0→slice0).
Position-aware XBAR must not break the cross-row > local invariant.
"""
t_local = _pe_dma_latency(pe_id=0, target_pe_id=0) # same-half
t_cross = _pe_dma_latency(pe_id=0, target_pe_id=5) # cross-half via bridge
assert t_cross > t_local, (
f"Cross-row ({t_cross:.4f}ns) must be > local ({t_local:.4f}ns)"
)
# ══════════════════════════════════════════════════════════════════
# 8. PE-to-NOC Distance from Physical Position
# ══════════════════════════════════════════════════════════════════
def test_pe_noc_distance_reflects_physical_position():
"""PE→NOC edge distance must reflect actual PE-to-router physical distance.
NW PE0 (y=1.5) → router r0c0 (y=1.5): distance ≈ 0
NE PE2 (y=1.5) → router r1c4 (y=5.5): distance ≈ 4.0mm
SW PE4 (y=12.5) → router r4c0 (y=8.5): distance ≈ 4.0mm
SE PE6 (y=12.5) → router r5c4 (y=12.5): distance ≈ 0
"""
graph = _graph()
pe_noc_edges = {}
for e in graph.edges:
if e.kind == "pe_to_noc" and "cube0" in e.src:
# Extract pe index from "sip0.cube0.pe2.pe_dma"
pe_name = e.src.split(".")[-2] # "pe2"
pe_noc_edges[pe_name] = e.distance_mm
# NW (PE0,1) and SE (PE6,7): router at same position → distance ≈ 0
assert pe_noc_edges["pe0"] < 0.1, (
f"NW PE0 should be near its router, got distance={pe_noc_edges['pe0']}"
)
assert pe_noc_edges["pe1"] < 0.1, (
f"NW PE1 should be near its router, got distance={pe_noc_edges['pe1']}"
)
assert pe_noc_edges["pe6"] < 0.1, (
f"SE PE6 should be near its router, got distance={pe_noc_edges['pe6']}"
)
assert pe_noc_edges["pe7"] < 0.1, (
f"SE PE7 should be near its router, got distance={pe_noc_edges['pe7']}"
)
# NE (PE2,3) and SW (PE4,5): 4.0mm from router → distance > 3.5
assert pe_noc_edges["pe2"] > 3.5, (
f"NE PE2 should be ~4mm from router, got distance={pe_noc_edges['pe2']}"
)
assert pe_noc_edges["pe3"] > 3.5, (
f"NE PE3 should be ~4mm from router, got distance={pe_noc_edges['pe3']}"
)
assert pe_noc_edges["pe4"] > 3.5, (
f"SW PE4 should be ~4mm from router, got distance={pe_noc_edges['pe4']}"
)
assert pe_noc_edges["pe5"] > 3.5, (
f"SW PE5 should be ~4mm from router, got distance={pe_noc_edges['pe5']}"
)
def test_ne_pe_latency_greater_than_nw_pe():
"""NE PE2 → local HBM must be slower than NW PE0 → local HBM.
PE2 has 4mm extra wire to its router vs PE0 (0mm).
Both access their respective local HBM slice.
"""
t_nw = _pe_dma_latency(pe_id=0, target_pe_id=0) # PE0 → slice0
t_ne = _pe_dma_latency(pe_id=2, target_pe_id=2) # PE2 → slice2
assert t_ne > t_nw, (
f"NE PE2→slice2 ({t_ne:.4f}ns) should be > "
f"NW PE0→slice0 ({t_nw:.4f}ns) due to extra wire distance"
)