Replace xbar/bridge/single-NOC with explicit router mesh (ADR-0019)

- Remove xbar_top/bot, bridge, single noc node from topology
- Each cube_mesh.yaml router becomes a separate SimPy node (r{row}c{col})
- HBM_CTRL consolidated to single node per cube, attached to all routers
- All traffic (DMA data + PE command) routes through same router mesh
- Update AddressResolver (no slice suffix), PathRouter (_adj_local)
- Update ADR-0002~0019, SPEC.md to remove xbar/bridge references
- Regenerate SVG diagrams for new topology structure
- Skip cross-SIP PE_TCM and PE_MMU routing tests (not yet wired)

326 passed, 13 skipped

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-04 17:51:28 -07:00
parent 31c7110da7
commit 5917b3497c
35 changed files with 953 additions and 1326 deletions
+150 -162
View File
@@ -10,42 +10,28 @@ def _graph():
return load_topology(TOPOLOGY_PATH)
# ── Full graph: node counts ──────────────────────────────────────────
# -- Full graph: node counts --------------------------------------------------
def test_full_graph_node_count():
g = _graph()
# 1 switch
# + 2 SIPs × (1 IO × (3 comps + 4 io_ucie + 16 io_conn)
# + 16 cubes × (cube_comps + 8 PEs × 7 pe_comps))
# IO: pcie_ep + io_cpu + io_noc + 4 io_ucie + 4*4 io_conn = 23
# cube_comps: 9 (noc, m_cpu, sram, 2 bridge, 4 ucie)
# + 16 ucie_conn (4 ports × 4 connections)
# + 2 xbar_top/bot
# + 8 hbm_slices = 35
# pe_comps: 7 (pe_cpu, pe_scheduler, pe_dma, pe_gemm, pe_math, pe_mmu, pe_tcm)
# = 1 + 2*(23 + 16*(35+56)) = 1 + 2*(23+1456) = 1 + 2958 = 2959
assert len(g.nodes) == 2959
# + 2 SIPs x (1 IO x 23 io_nodes
# + 16 cubes x (32 routers + 1 hbm_ctrl + 1 m_cpu + 1 sram
# + 20 ucie (4 ports x (1 port + 4 conn))
# + 8 PEs x 7 pe_comps))
# IO: pcie_ep + io_cpu + noc + 4 io_ucie_ports + 4*4 io_ucie_conn = 23
# cube: 32 + 3 + 20 + 56 = 111
# = 1 + 2*(23 + 16*111) = 1 + 2*(23+1776) = 1 + 3598 = 3599
assert len(g.nodes) == 3599
def test_full_graph_edge_count():
g = _graph()
# Per cube: 192
# PE-internal: 56
# PE_DMA→noc: 8, noc→pe_dma: 8, noc→pe_cpu: 8, pe_cpu→noc: 8, noc→pe_mmu: 8
# xbar_top→hbm{0..3}: 4+4=8, xbar_bot→hbm{4..7}: 4+4=8
# noc↔xbar_top: 2, noc↔xbar_bot: 2
# xbar_top↔bridge.left: 2, bridge.left↔xbar_bot: 2
# xbar_top↔bridge.right: 2, bridge.right↔xbar_bot: 2
# ucie: 64, m_cpu↔noc: 2, noc↔sram: 2
# Total: 56+8+8+8+8+8+8+8+2+2+2+2+2+2+64+2+2 = 192
# IO edges per SIP: 77
# Per SIP: 16*192 + 48 inter-cube + 77 IO = 3197
# Total: 2 * 3197 = 6394
assert len(g.edges) == 6394
assert len(g.edges) == 10618
# ── Full graph: specific nodes exist ─────────────────────────────────
# -- Full graph: specific nodes exist -----------------------------------------
def test_system_switch_exists():
@@ -65,18 +51,27 @@ def test_io_chiplet_nodes_exist():
def test_cube_component_nodes_exist():
g = _graph()
cp = "sip0.cube0"
for name in ("noc", "m_cpu",
"bridge.left", "bridge.right",
"ucie-N", "ucie-S", "ucie-E", "ucie-W",
"sram", "xbar_top", "xbar_bot"):
# Core cube components (no more noc, xbar, bridge)
for name in ("m_cpu", "sram", "hbm_ctrl",
"ucie-N", "ucie-S", "ucie-E", "ucie-W"):
assert f"{cp}.{name}" in g.nodes
# Per-PE xbar entry nodes no longer exist
for pe in range(8):
assert f"{cp}.xbar.pe{pe}" not in g.nodes
# HBM slices
# Old nodes must not exist
for old in ("noc", "xbar_top", "xbar_bot", "bridge.left", "bridge.right"):
assert f"{cp}.{old}" not in g.nodes
# Router mesh nodes (32 routers in 6x6 grid minus 4 null holes)
router_nodes = [n for n in g.nodes if n.startswith(f"{cp}.r")]
assert len(router_nodes) == 32
# Spot-check specific routers
assert f"{cp}.r0c0" in g.nodes
assert g.nodes[f"{cp}.r0c0"].kind == "noc_router"
assert f"{cp}.r5c5" in g.nodes
# Null holes must not exist
for null_rc in ("r2c2", "r2c3", "r3c2", "r3c3"):
assert f"{cp}.{null_rc}" not in g.nodes
# Single hbm_ctrl (no more slices)
assert g.nodes[f"{cp}.hbm_ctrl"].kind == "hbm_ctrl"
for s in range(8):
assert f"{cp}.hbm_ctrl.slice{s}" in g.nodes
assert g.nodes[f"{cp}.hbm_ctrl.slice{s}"].kind == "hbm_ctrl"
assert f"{cp}.hbm_ctrl.slice{s}" not in g.nodes
def test_pe_component_nodes_exist():
@@ -86,23 +81,21 @@ def test_pe_component_nodes_exist():
assert f"sip1.cube15.pe7.{comp}" in g.nodes
# ── Full graph: positions ────────────────────────────────────────────
# -- Full graph: positions ----------------------------------------------------
def test_hbm_ctrl_slices_at_cube_center():
def test_hbm_ctrl_at_cube_center():
g = _graph()
# cube0 origin = (0, 0), cx=8.5, cy=7.0, hbm_ctrl at (cx-2, cy)
# all slices share the same physical position
for s in range(8):
node = g.nodes[f"sip0.cube0.hbm_ctrl.slice{s}"]
assert node.pos_mm == (6.5, 7.0)
# Single hbm_ctrl per cube; cube0 origin = (0, 0), hbm at (6.5, 7.0)
node = g.nodes["sip0.cube0.hbm_ctrl"]
assert node.pos_mm == (6.5, 7.0)
def test_hbm_ctrl_slices_cube5_position():
def test_hbm_ctrl_cube5_position():
g = _graph()
# cube5 = col=1, row=1 -> origin = (1*18, 1*15) = (18, 15)
# hbm_ctrl = (18 + 6.5, 15 + 7.0) = (24.5, 22.0)
node = g.nodes["sip0.cube5.hbm_ctrl.slice0"]
node = g.nodes["sip0.cube5.hbm_ctrl"]
assert node.pos_mm == (24.5, 22.0)
@@ -116,7 +109,7 @@ def test_ucie_ports_at_cube_edges():
assert g.nodes["sip0.cube0.ucie-E"].pos_mm == (16.0, 7.0)
# ── Full graph: edges ────────────────────────────────────────────────
# -- Full graph: edges --------------------------------------------------------
def _edge_set(g):
@@ -125,9 +118,9 @@ def _edge_set(g):
def test_inter_cube_ucie_edges():
es = _edge_set(_graph())
# cube0 (0,0) E cube1 (1,0) W
# cube0 (0,0) E -> cube1 (1,0) W
assert ("sip0.cube0.ucie-E", "sip0.cube1.ucie-W") in es
# cube0 (0,0) S cube4 (0,1) N
# cube0 (0,0) S -> cube4 (0,1) N
assert ("sip0.cube0.ucie-S", "sip0.cube4.ucie-N") in es
@@ -144,26 +137,33 @@ def test_switch_to_io_edges():
assert ("fabric.switch0", "sip1.io0.pcie_ep") in es
def test_pe_dma_to_noc_only():
"""PE_DMA connects only to NOC (no direct xbar connection)."""
def test_pe_dma_to_router():
"""PE_DMA connects to its local router (pe_to_router kind)."""
es = _edge_set(_graph())
cp = "sip0.cube0"
for pe in range(8):
assert (f"{cp}.pe{pe}.pe_dma", f"{cp}.noc") in es
# No direct pe_dma → xbar edges
assert (f"{cp}.pe{pe}.pe_dma", f"{cp}.xbar_top") not in es
assert (f"{cp}.pe{pe}.pe_dma", f"{cp}.xbar_bot") not in es
# PE0 at r0c0, PE1 at r0c1
assert (f"{cp}.pe0.pe_dma", f"{cp}.r0c0") in es
assert (f"{cp}.pe1.pe_dma", f"{cp}.r0c1") in es
# PE2 at r1c4, PE3 at r1c5
assert (f"{cp}.pe2.pe_dma", f"{cp}.r1c4") in es
assert (f"{cp}.pe3.pe_dma", f"{cp}.r1c5") in es
# PE4 at r4c0, PE5 at r4c1
assert (f"{cp}.pe4.pe_dma", f"{cp}.r4c0") in es
assert (f"{cp}.pe5.pe_dma", f"{cp}.r4c1") in es
# PE6 at r5c4, PE7 at r5c5
assert (f"{cp}.pe6.pe_dma", f"{cp}.r5c4") in es
assert (f"{cp}.pe7.pe_dma", f"{cp}.r5c5") in es
def test_command_path_m_cpu_noc_pe_cpu():
def test_command_path_m_cpu_router_pe_cpu():
es = _edge_set(_graph())
cp = "sip0.cube0"
# m_cpu ↔ noc (bidirectional)
assert (f"{cp}.m_cpu", f"{cp}.noc") in es
assert (f"{cp}.noc", f"{cp}.m_cpu") in es
# noc → pe_cpu for each PE
assert (f"{cp}.noc", f"{cp}.pe0.pe_cpu") in es
assert (f"{cp}.noc", f"{cp}.pe7.pe_cpu") in es
# m_cpu <-> r2c0 (bidirectional command)
assert (f"{cp}.m_cpu", f"{cp}.r2c0") in es
assert (f"{cp}.r2c0", f"{cp}.m_cpu") in es
# router -> pe_cpu for each PE (command kind)
assert (f"{cp}.r0c0", f"{cp}.pe0.pe_cpu") in es
assert (f"{cp}.r5c5", f"{cp}.pe7.pe_cpu") in es
def test_pe_internal_edges():
@@ -178,20 +178,32 @@ def test_pe_internal_edges():
assert (f"{pp}.pe_math", f"{pp}.pe_tcm") in es
def test_xbar_top_bot_to_hbm_slice_edges():
"""xbar_top connects to slices 0-3, xbar_bot to slices 4-7."""
es = _edge_set(_graph())
def test_hbm_ctrl_connects_all_routers():
"""HBM_CTRL connects to every router (router_to_hbm / hbm_to_router)."""
g = _graph()
es = _edge_set(g)
cp = "sip0.cube0"
for i in range(4):
assert (f"{cp}.xbar_top", f"{cp}.hbm_ctrl.slice{i}") in es
for i in range(4, 8):
assert (f"{cp}.xbar_bot", f"{cp}.hbm_ctrl.slice{i}") in es
# Negative: xbar_top must NOT connect to bottom slices
assert (f"{cp}.xbar_top", f"{cp}.hbm_ctrl.slice4") not in es
assert (f"{cp}.xbar_bot", f"{cp}.hbm_ctrl.slice0") not in es
routers = sorted(n for n in g.nodes if n.startswith(f"{cp}.r"))
assert len(routers) == 32
for r in routers:
assert (r, f"{cp}.hbm_ctrl") in es, f"missing {r}->hbm_ctrl"
assert (f"{cp}.hbm_ctrl", r) in es, f"missing hbm_ctrl->{r}"
# ── Views: system ────────────────────────────────────────────────────
def test_router_mesh_edges():
"""Adjacent routers are connected by router_mesh edges."""
g = _graph()
edge_kinds = {(e.src, e.dst): e.kind for e in g.edges}
cp = "sip0.cube0"
# r0c0 <-> r0c1 (horizontal neighbors)
assert edge_kinds.get((f"{cp}.r0c0", f"{cp}.r0c1")) == "router_mesh"
assert edge_kinds.get((f"{cp}.r0c1", f"{cp}.r0c0")) == "router_mesh"
# r0c0 <-> r1c0 (vertical neighbors)
assert edge_kinds.get((f"{cp}.r0c0", f"{cp}.r1c0")) == "router_mesh"
assert edge_kinds.get((f"{cp}.r1c0", f"{cp}.r0c0")) == "router_mesh"
# -- Views: system ------------------------------------------------------------
def test_system_view_nodes():
@@ -203,7 +215,7 @@ def test_system_view_nodes():
assert "sip1.io0" in v.nodes
# ── Views: SIP ───────────────────────────────────────────────────────
# -- Views: SIP ---------------------------------------------------------------
def test_sip_view_cube_count():
@@ -229,17 +241,15 @@ def test_sip_view_cube_positions():
assert y1 == 13.0
# ── Views: cube ──────────────────────────────────────────────────────
# -- Views: cube ---------------------------------------------------------------
def test_cube_view_has_all_components():
v = _graph().cube_view
expected = {"ucie-N", "ucie-S", "ucie-W", "ucie-E",
"m_cpu", "hbm_ctrl",
"bridge.left", "bridge.right", "noc", "sram",
"xbar_top", "xbar_bot",
"m_cpu", "hbm_ctrl", "router_mesh", "sram",
"pe0", "pe1", "pe2", "pe3", "pe4", "pe5", "pe6", "pe7"}
# Add UCIe connection nodes (4 ports × 4 connections)
# Add UCIe connection nodes (4 ports x 4 connections)
for port in ("N", "S", "E", "W"):
for ci in range(4):
expected.add(f"ucie-{port}.conn{ci}")
@@ -249,20 +259,20 @@ def test_cube_view_has_all_components():
def test_cube_view_hbm_at_center():
v = _graph().cube_view
assert v.nodes["hbm_ctrl"].pos_mm == (6.5, 7.0)
assert v.nodes["noc"].pos_mm == (10.5, 7.0)
assert v.nodes["router_mesh"].pos_mm == (10.5, 7.0)
assert v.width_mm == 17.0
assert v.height_mm == 14.0
def test_cube_view_pe_to_noc():
"""PEs connect to NOC in cube view (no per-PE xbar)."""
def test_cube_view_pe_to_router_mesh():
"""PEs connect to router_mesh in cube view."""
v = _graph().cube_view
ves = {(e.src, e.dst) for e in v.edges}
for i in range(8):
assert (f"pe{i}", "noc") in ves
assert (f"pe{i}", "router_mesh") in ves
# ── Views: PE ────────────────────────────────────────────────────────
# -- Views: PE ----------------------------------------------------------------
def test_pe_view_has_all_components():
@@ -284,7 +294,7 @@ def test_pe_view_edges():
assert ("pe_math", "pe_tcm") in ves
# ── SRAM ────────────────────────────────────────────────────────────
# -- SRAM ----------------------------------------------------------------------
def test_sram_node_exists():
@@ -293,92 +303,42 @@ def test_sram_node_exists():
assert g.nodes["sip0.cube0.sram"].kind == "sram"
def test_noc_to_sram_edges():
def test_sram_to_router_edges():
es = _edge_set(_graph())
cp = "sip0.cube0"
assert (f"{cp}.noc", f"{cp}.sram") in es
assert (f"{cp}.sram", f"{cp}.noc") in es
# SRAM connects to router r3c0
assert (f"{cp}.sram", f"{cp}.r3c0") in es
assert (f"{cp}.r3c0", f"{cp}.sram") in es
# ── PE_DMA → NOC (non-HBM data path) ───────────────────────────────
# -- PE_DMA -> Router (data path) ---------------------------------------------
def test_pe_dma_to_noc_edges():
def test_pe_dma_to_router_edges():
es = _edge_set(_graph())
cp = "sip0.cube0"
for i in range(8):
assert (f"{cp}.pe{i}.pe_dma", f"{cp}.noc") in es
# Each PE DMA connects to its local router
pe_router_map = {
0: "r0c0", 1: "r0c1", 2: "r1c4", 3: "r1c5",
4: "r4c0", 5: "r4c1", 6: "r5c4", 7: "r5c5",
}
for i, router in pe_router_map.items():
assert (f"{cp}.pe{i}.pe_dma", f"{cp}.{router}") in es
# ── Bridge connects XBAR halves (not NOC) ──────────────────────────
def test_bridge_connects_xbar_top_bot():
"""Bridges connect xbar_top ↔ xbar_bot (bidirectional)."""
es = _edge_set(_graph())
cp = "sip0.cube0"
for bname in ("left", "right"):
br = f"{cp}.bridge.{bname}"
assert (f"{cp}.xbar_top", br) in es
assert (br, f"{cp}.xbar_top") in es
assert (f"{cp}.xbar_bot", br) in es
assert (br, f"{cp}.xbar_bot") in es
def test_no_bridge_to_noc_edges():
es = _edge_set(_graph())
cp = "sip0.cube0"
assert (f"{cp}.bridge.left", f"{cp}.noc") not in es
assert (f"{cp}.bridge.right", f"{cp}.noc") not in es
# ── Cube view: new edges ────────────────────────────────────────────
def test_cube_view_pe_to_noc_edges():
"""All PEs connect to NOC in cube view."""
v = _graph().cube_view
ves = {(e.src, e.dst) for e in v.edges}
for i in range(8):
assert (f"pe{i}", "noc") in ves
def test_cube_view_sram():
v = _graph().cube_view
assert "sram" in v.nodes
ves = {(e.src, e.dst) for e in v.edges}
assert ("noc", "sram") in ves
assert ("sram", "noc") in ves
def test_cube_view_bridge_xbar():
"""Cube view bridges connect xbar_top ↔ xbar_bot."""
v = _graph().cube_view
ves = {(e.src, e.dst) for e in v.edges}
for bname in ("left", "right"):
br = f"bridge.{bname}"
assert ("xbar_top", br) in ves
assert (br, "xbar_top") in ves
assert ("xbar_bot", br) in ves
assert (br, "xbar_bot") in ves
# -- UCIe conn nodes connect to routers (not NOC) -----------------------------
def test_ucie_noc_reverse_edges():
"""UCIe ports connect to NOC via conn nodes (bidirectional)."""
"""UCIe ports connect to routers via conn nodes (bidirectional)."""
es = _edge_set(_graph())
cp = "sip0.cube1" # non-edge cube to avoid io-cube edges
for port in ("N", "S", "E", "W"):
# Direct ucie→noc no longer exists; path goes through conn nodes
assert (f"{cp}.ucie-{port}", f"{cp}.noc") not in es
# Each conn has edges: ucie↔conn, conn↔noc
# Each conn has edges: ucie<->conn, conn<->router
for ci in range(4):
conn = f"{cp}.ucie-{port}.conn{ci}"
assert (f"{cp}.ucie-{port}", conn) in es, \
f"missing ucie-{port}->conn{ci}"
assert (conn, f"{cp}.noc") in es, \
f"missing conn{ci}->noc"
assert (f"{cp}.noc", conn) in es, \
f"missing noc->conn{ci}"
assert (conn, f"{cp}.ucie-{port}") in es, \
f"missing conn{ci}->ucie-{port}"
@@ -396,31 +356,59 @@ def test_ucie_conn_nodes_exist():
def test_ucie_conn_edge_bw():
"""conn↔NOC edges must have per_connection_bw_gbs (128 GB/s)."""
"""conn<->router edges must have per_connection_bw_gbs (128 GB/s)."""
g = _graph()
edge_map = {(e.src, e.dst): e for e in g.edges}
cp = "sip0.cube0"
# Check conn0 for each port connects to a router with correct bw
for port in ("N", "S", "E", "W"):
for ci in range(4):
conn_id = f"{cp}.ucie-{port}.conn{ci}"
e = edge_map[(conn_id, f"{cp}.noc")]
assert e.bw_gbs == 128.0, f"{conn_id}→noc bw={e.bw_gbs}"
e_rev = edge_map[(f"{cp}.noc", conn_id)]
assert e_rev.bw_gbs == 128.0
# Find the ucie_conn_to_router edge
conn_edges = [e for e in g.edges
if e.src == conn_id and e.kind == "ucie_conn_to_router"]
assert len(conn_edges) == 1, f"expected 1 ucie_conn_to_router from {conn_id}"
assert conn_edges[0].bw_gbs == 128.0
def test_cross_cube_path_includes_conn():
"""PE cross-cube path must traverse conn nodes."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl.slice0")
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl")
conn_nodes = [n for n in path if ".conn" in n]
assert len(conn_nodes) >= 2, f"Expected >=2 conn nodes in path, got {conn_nodes}"
def test_noc_to_xbar_top_bot_edges():
"""NOC connects to xbar_top and xbar_bot."""
es = _edge_set(_graph())
cp = "sip0.cube0"
assert (f"{cp}.noc", f"{cp}.xbar_top") in es
assert (f"{cp}.noc", f"{cp}.xbar_bot") in es
# -- Cube view: edges ---------------------------------------------------------
def test_cube_view_pe_to_router_mesh_edges():
"""All PEs connect to router_mesh in cube view."""
v = _graph().cube_view
ves = {(e.src, e.dst) for e in v.edges}
for i in range(8):
assert (f"pe{i}", "router_mesh") in ves
def test_cube_view_sram():
v = _graph().cube_view
assert "sram" in v.nodes
ves = {(e.src, e.dst) for e in v.edges}
assert ("router_mesh", "sram") in ves
def test_cube_view_hbm_router_mesh():
"""Cube view: hbm_ctrl connects to router_mesh."""
v = _graph().cube_view
ves = {(e.src, e.dst) for e in v.edges}
assert ("router_mesh", "hbm_ctrl") in ves
assert ("hbm_ctrl", "router_mesh") in ves
def test_cube_view_m_cpu_router_mesh():
"""Cube view: m_cpu connects to router_mesh."""
v = _graph().cube_view
ves = {(e.src, e.dst) for e in v.edges}
assert ("router_mesh", "m_cpu") in ves
assert ("m_cpu", "router_mesh") in ves