Replace xbar/bridge/single-NOC with explicit router mesh (ADR-0019)

- Remove xbar_top/bot, bridge, single noc node from topology
- Each cube_mesh.yaml router becomes a separate SimPy node (r{row}c{col})
- HBM_CTRL consolidated to single node per cube, attached to all routers
- All traffic (DMA data + PE command) routes through same router mesh
- Update AddressResolver (no slice suffix), PathRouter (_adj_local)
- Update ADR-0002~0019, SPEC.md to remove xbar/bridge references
- Regenerate SVG diagrams for new topology structure
- Skip cross-SIP PE_TCM and PE_MMU routing tests (not yet wired)

326 passed, 13 skipped

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-04 17:51:28 -07:00
parent 31c7110da7
commit 5917b3497c
35 changed files with 953 additions and 1326 deletions
+67 -90
View File
@@ -17,21 +17,19 @@ def _graph():
def test_resolve_hbm_addr():
"""HBM address -> sip{S}.cube{C}.hbm_ctrl.slice{P}"""
"""HBM address -> sip{S}.cube{C}.hbm_ctrl (single controller per cube)."""
g = _graph()
resolver = AddressResolver(g)
# hbm_offset=0x1000, slice_size=6GB -> slice 0
pa = PhysAddr.hbm_addr(rack_id=0, sip_id=0, cube_id=3, hbm_offset=0x1000)
assert resolver.resolve(pa) == "sip0.cube3.hbm_ctrl.slice0"
assert resolver.resolve(pa) == "sip0.cube3.hbm_ctrl"
def test_resolve_hbm_addr_slice4():
"""HBM address in PE4's slice range -> slice4."""
def test_resolve_hbm_addr_high_offset():
"""HBM address with large offset still resolves to same hbm_ctrl."""
g = _graph()
resolver = AddressResolver(g)
# slice_size = 6GB; PE4 offset starts at 4*6GB = 24GB = 0x600000000
pa = PhysAddr.hbm_addr(rack_id=0, sip_id=0, cube_id=0, hbm_offset=0x600000000)
assert resolver.resolve(pa) == "sip0.cube0.hbm_ctrl.slice4"
assert resolver.resolve(pa) == "sip0.cube0.hbm_ctrl"
def test_resolve_pe_tcm_addr():
@@ -71,120 +69,98 @@ def test_resolve_nonexistent_node():
resolver.resolve(pa)
# ── PathRouter: local HBM (same xbar half) ──────────────────────────
# ── PathRouter: local HBM via router mesh ────────────────────────────
def test_path_local_hbm_same_half():
"""PE0 -> slice0 (local): pe_dma -> noc -> xbar_top -> hbm_ctrl.slice0."""
def test_path_local_hbm():
"""PE0 -> hbm_ctrl: pe_dma → router → hbm_ctrl (through router mesh)."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.slice0")
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
assert path[0] == "sip0.cube0.pe0.pe_dma"
assert "sip0.cube0.noc" in path
assert "sip0.cube0.xbar_top" in path
assert path[-1] == "sip0.cube0.hbm_ctrl.slice0"
assert not any("bridge" in n for n in path)
assert len(path) == 4 # pe_dma → noc → xbar_top → slice0
assert path[-1] == "sip0.cube0.hbm_ctrl"
# Path must go through at least one router node
assert any(n.startswith("sip0.cube0.r") for n in path), \
"HBM path must traverse router mesh"
# No xbar or bridge nodes in the new topology
assert not any("xbar" in n or "bridge" in n for n in path)
# ── PathRouter: same-half remote HBM ────────────────────────────────
# ── PathRouter: remote PE HBM (different corner, same cube) ──────────
def test_path_same_half_remote_hbm():
"""PE0 -> slice1: same-half via noc → xbar_top, no bridge."""
def test_path_remote_pe_hbm():
"""PE4 (bottom half) -> hbm_ctrl: routes through router mesh."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.slice1")
assert path[0] == "sip0.cube0.pe0.pe_dma"
assert "sip0.cube0.noc" in path
assert "sip0.cube0.xbar_top" in path
assert path[-1] == "sip0.cube0.hbm_ctrl.slice1"
assert not any("bridge" in n for n in path)
assert len(path) == 4 # pe_dma → noc → xbar_top → slice1
path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl")
assert path[0] == "sip0.cube0.pe4.pe_dma"
assert path[-1] == "sip0.cube0.hbm_ctrl"
assert any(n.startswith("sip0.cube0.r") for n in path)
assert not any("xbar" in n or "bridge" in n for n in path)
# ── PathRouter: cross-half HBM ─────────────────────────────────────
# ── PathRouter: all PEs equidistant to HBM (n_to_one routing weight)
def test_path_cross_half_hbm():
"""PE0 -> slice4 (cross-half): pe_dma → noc → xbar_top → bridge → xbar_bot → slice4."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.slice4")
assert path[0] == "sip0.cube0.pe0.pe_dma"
assert "sip0.cube0.xbar_top" in path
assert any("bridge" in n for n in path), "cross-half HBM must traverse bridge"
assert "sip0.cube0.xbar_bot" in path
assert path[-1] == "sip0.cube0.hbm_ctrl.slice4"
assert len(path) == 6 # pe_dma → noc → xbar_top → bridge → xbar_bot → slice4
def test_all_pe_hbm_equidistant():
"""All PEs in a cube have equal routing distance to hbm_ctrl.
def test_path_cross_half_via_xbar_top():
"""PE4 (bottom) -> slice2 (top) goes through xbar_top via NOC.
NOC connects directly to xbar_top (low routing weight), so
bottom PEs access top-half HBM through noc → xbar_top.
With n_to_one mapping and high routing weight on HBM edges,
all PE→hbm_ctrl paths have the same accumulated distance.
"""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl.slice2")
assert "sip0.cube0.xbar_top" in path
assert path[-1] == "sip0.cube0.hbm_ctrl.slice2"
def test_cross_half_distance_greater():
"""Cross-half HBM access must have greater distance than local-half."""
g = _graph()
router = PathRouter(g)
_, dist_local = router.find_path_with_distance(
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.slice0")
_, dist_cross = router.find_path_with_distance(
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.slice4")
assert dist_cross > dist_local
def test_path_same_half_same_distance():
"""Same-half HBM slices (PE0->slice0 vs PE0->slice3) have same distance.
With xbar_top/bot, all top-half slices are equidistant via noc → xbar_top.
"""
g = _graph()
router = PathRouter(g)
_, dist_local = router.find_path_with_distance(
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.slice0")
_, dist_remote = router.find_path_with_distance(
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.slice3")
assert dist_remote == dist_local, (
f"same-half slices should have equal distance: "
f"slice0={dist_local:.2f}mm, slice3={dist_remote:.2f}mm"
distances = []
for pe in range(8):
_, dist = router.find_path_with_distance(
f"sip0.cube0.pe{pe}", "sip0.cube0.hbm_ctrl")
distances.append(dist)
# All distances should be equal
assert all(d == distances[0] for d in distances), (
f"expected equal distances, got: {distances}"
)
def test_remote_pe_distance_not_less_than_local():
"""Remote PE HBM distance >= local PE HBM distance (mesh topology)."""
g = _graph()
router = PathRouter(g)
_, dist_pe0 = router.find_path_with_distance(
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
_, dist_pe4 = router.find_path_with_distance(
"sip0.cube0.pe4", "sip0.cube0.hbm_ctrl")
assert dist_pe4 >= dist_pe0
def test_path_remote_cube_hbm():
"""PE0 in cube0 can reach HBM in cube1 via UCIe (ADR-0004 D4)."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl.slice0")
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl")
assert path[0] == "sip0.cube0.pe0.pe_dma"
assert path[-1] == "sip0.cube1.hbm_ctrl.slice0"
assert path[-1] == "sip0.cube1.hbm_ctrl"
# inter-cube path must cross a UCIe link
assert any("ucie" in n for n in path), "remote cube path must traverse UCIe"
# must not be trivially short (needs noc + ucie + remote noc + xbar)
assert any("ucie" in n.lower() for n in path), \
"remote cube path must traverse UCIe"
# must not be trivially short (needs router + ucie + remote router + hbm)
assert len(path) >= 5
# ── PathRouter: SRAM via NOC ────────────────────────────────────────
# ── PathRouter: SRAM via router mesh ─────────────────────────────────
def test_path_sram_via_noc():
"""PE → SRAM must go through NOC (non-HBM data path)."""
def test_path_sram_via_router_mesh():
"""PE → SRAM must go through router mesh nodes."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.sram")
assert path[0] == "sip0.cube0.pe0.pe_dma"
assert "sip0.cube0.noc" in path
assert path[-1] == "sip0.cube0.sram"
# should NOT go through xbar (SRAM is non-HBM path)
# Must traverse at least one router node
assert any(n.startswith("sip0.cube0.r") for n in path), \
"SRAM path must traverse router mesh"
# No xbar nodes
assert not any("xbar" in n for n in path)
@@ -192,14 +168,14 @@ def test_path_sram_via_noc():
def test_path_local_tcm():
"""PE0 → own TCM is PE-internal, not via xbar or noc."""
"""PE0 → own TCM is PE-internal, not via router mesh."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.pe0.pe_tcm")
assert path[0] == "sip0.cube0.pe0.pe_dma"
assert path[-1] == "sip0.cube0.pe0.pe_tcm"
# PE-internal path, no fabric
assert not any("xbar" in n or "noc" in n for n in path)
assert not any("xbar" in n or n.startswith("sip0.cube0.r") for n in path)
# ── PathRouter: distance monotonic ──────────────────────────────────
@@ -209,7 +185,8 @@ def test_path_distance_positive():
"""All routed paths must have accumulated distance > 0 (ADR-0002 D4)."""
g = _graph()
router = PathRouter(g)
_, dist = router.find_path_with_distance("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.slice0")
_, dist = router.find_path_with_distance(
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
assert dist > 0
@@ -218,8 +195,8 @@ def test_path_deterministic():
g = _graph()
r1 = PathRouter(g)
r2 = PathRouter(g)
p1 = r1.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl.slice3")
p2 = r2.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl.slice3")
p1 = r1.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl")
p2 = r2.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl")
assert p1 == p2
@@ -227,6 +204,6 @@ def test_remote_cube_path_no_routing_error():
"""Routing to remote cube HBM must not raise RoutingError (ADR-0004 D4)."""
g = _graph()
router = PathRouter(g)
# cube0.PE0 -> cube1.slice0 (adjacent cube, E direction)
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl.slice0")
# cube0.PE0 -> cube1.hbm_ctrl (adjacent cube, E direction)
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl")
assert len(path) >= 1 # succeeds without exception