ADR-0019 D1/D4: per-PE HBM CTRL partitioning
Restores per-PE HBM controller partitioning that was lost in
commit 5917b34 ("Replace xbar/bridge/single-NOC with explicit
router mesh"), which had over-consolidated the per-slice HBM CTRL
into a single cube-wide ``hbm_ctrl`` connected to every router —
the opposite of what ADR-0019 D1/D4 specifies.
Builder splits ``hbm_ctrl`` into 8 ``hbm_ctrl.pe{X}`` instances per
cube, each reachable ONLY through PE_X's attaching router via the
existing ``peX.hbm`` attach metadata from cube_mesh.yaml. Cube
aggregate BW now matches the spec (8 PEs × 8 PCs × 32 GB/s =
2048 GB/s) instead of collapsing to 256 GB/s.
AddressResolver decodes the target PE from the HBM PA's hbm_offset
(``offset // slice_size``) and returns ``hbm_ctrl.pe{X}``. PathRouter
uses the existing ``_adj_local`` adjacency for same-cube PE_DMA so
the cube's own UCIe port can no longer appear as a zero-distance
shortcut between routers — local PE_DMA now traverses the mesh,
restoring the ADR-0019 D4 worked example
``PE0.pe_dma → r0c0 → … → r1c4 → hbm_ctrl``.
Tests:
- New tests/test_per_pe_hbm_partition.py: 14 tests covering
topology shape, per-PE router exclusivity, PA resolution,
single-hop local path, cross-PE mesh traversal, and end-to-end
latency monotonicity. Probe CLI now reports
pe-local < pe-same-half < pe-cross-half (was uniform 141ns).
- Existing tests updated for new node ids and replaced two
assertions that locked in the wrong consolidation:
test_noc_mesh.test_hbm_connects_to_all_routers and
test_topology_compile.test_hbm_ctrl_connects_all_routers are
now per-PE exclusivity assertions; test_routing
.test_all_pe_hbm_equidistant becomes
test_cross_pe_hbm_distance_increases_with_mesh_hops.
- test_ipcq_buffer_kind_locations.test_hbm_pe_hop_charged_at_large_payload
threshold recalibrated 4000→1500 ns: the prior figure reflected
serialization on the over-consolidated single hbm_ctrl; per-PE
partitioning removes that artificial contention so the gap
shrinks to the genuine PE↔HBM-hop cost.
Full suite: 645 passed, 1 skipped.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -17,21 +17,21 @@ def test_full_graph_node_count():
|
||||
g = _graph()
|
||||
# 1 switch
|
||||
# + 2 SIPs x (1 IO x 23 io_nodes
|
||||
# + 16 cubes x (32 routers + 1 hbm_ctrl + 1 m_cpu + 1 sram
|
||||
# + 16 cubes x (32 routers + 8 hbm_ctrl.peX + 1 m_cpu + 1 sram
|
||||
# + 20 ucie (4 ports x (1 port + 4 conn))
|
||||
# + 8 PEs x 9 pe_comps)) (ADR-0023: +pe_ipcq)
|
||||
# IO: pcie_ep + io_cpu + noc + 4 io_ucie_ports + 4*4 io_ucie_conn = 23
|
||||
# cube: 32 + 3 + 20 + 72 = 127
|
||||
# = 1 + 2*(23 + 16*127) = 1 + 2*(23+2032) = 1 + 4110 = 4111
|
||||
assert len(g.nodes) == 4111
|
||||
# cube: 32 + 10 + 20 + 72 = 134 (was 127; ADR-0019 D1 per-PE HBM CTRL)
|
||||
# = 1 + 2*(23 + 16*134) = 1 + 2*(23+2144) = 1 + 4334 = 4335
|
||||
assert len(g.nodes) == 4335
|
||||
|
||||
|
||||
def test_full_graph_edge_count():
|
||||
g = _graph()
|
||||
# ADR-0023: +3 IPCQ edges per PE (cpu→ipcq, ipcq→dma, dma→ipcq)
|
||||
# 2 SIPs × 16 cubes × 8 PEs × 3 = 768 new edges
|
||||
# Cross-SIP routing: +1 reverse pcie_ep→switch edge per SIP = +2
|
||||
assert len(g.edges) == 13692
|
||||
# ADR-0023: +3 IPCQ edges per PE
|
||||
# ADR-0019 D1 (restored): HBM↔router edges drop from 32 routers × 2
|
||||
# to 8 PE-routers × 2 per cube. 32 cubes × (16-64) = -1536 edges.
|
||||
assert len(g.edges) == 12156
|
||||
|
||||
|
||||
# -- Full graph: specific nodes exist -----------------------------------------
|
||||
@@ -55,7 +55,7 @@ def test_cube_component_nodes_exist():
|
||||
g = _graph()
|
||||
cp = "sip0.cube0"
|
||||
# Core cube components (no more noc, xbar, bridge)
|
||||
for name in ("m_cpu", "sram", "hbm_ctrl",
|
||||
for name in ("m_cpu", "sram",
|
||||
"ucie-N", "ucie-S", "ucie-E", "ucie-W"):
|
||||
assert f"{cp}.{name}" in g.nodes
|
||||
# Old nodes must not exist
|
||||
@@ -71,8 +71,11 @@ def test_cube_component_nodes_exist():
|
||||
# Null holes must not exist
|
||||
for null_rc in ("r2c2", "r2c3", "r3c2", "r3c3"):
|
||||
assert f"{cp}.{null_rc}" not in g.nodes
|
||||
# Single hbm_ctrl (no more slices)
|
||||
assert g.nodes[f"{cp}.hbm_ctrl"].kind == "hbm_ctrl"
|
||||
# Per-PE HBM CTRL (ADR-0019 D1) — 8 instances, no legacy single node
|
||||
for pe in range(8):
|
||||
nid = f"{cp}.hbm_ctrl.pe{pe}"
|
||||
assert g.nodes[nid].kind == "hbm_ctrl"
|
||||
assert f"{cp}.hbm_ctrl" not in g.nodes
|
||||
for s in range(8):
|
||||
assert f"{cp}.hbm_ctrl.slice{s}" not in g.nodes
|
||||
|
||||
@@ -89,16 +92,18 @@ def test_pe_component_nodes_exist():
|
||||
|
||||
def test_hbm_ctrl_at_cube_center():
|
||||
g = _graph()
|
||||
# Single hbm_ctrl per cube; cube0 origin = (0, 0), hbm at (6.5, 7.0)
|
||||
node = g.nodes["sip0.cube0.hbm_ctrl"]
|
||||
assert node.pos_mm == (6.5, 7.0)
|
||||
# Per-PE hbm_ctrl nodes share the cube's HBM placement (ADR-0019 D1)
|
||||
# cube0 origin = (0, 0), hbm at (6.5, 7.0)
|
||||
for pe in range(8):
|
||||
node = g.nodes[f"sip0.cube0.hbm_ctrl.pe{pe}"]
|
||||
assert node.pos_mm == (6.5, 7.0)
|
||||
|
||||
|
||||
def test_hbm_ctrl_cube5_position():
|
||||
g = _graph()
|
||||
# cube5 = col=1, row=1 -> origin = (1*18, 1*15) = (18, 15)
|
||||
# hbm_ctrl = (18 + 6.5, 15 + 7.0) = (24.5, 22.0)
|
||||
node = g.nodes["sip0.cube5.hbm_ctrl"]
|
||||
node = g.nodes["sip0.cube5.hbm_ctrl.pe0"]
|
||||
assert node.pos_mm == (24.5, 22.0)
|
||||
|
||||
|
||||
@@ -181,16 +186,25 @@ def test_pe_internal_edges():
|
||||
assert (f"{pp}.pe_math", f"{pp}.pe_tcm") in es
|
||||
|
||||
|
||||
def test_hbm_ctrl_connects_all_routers():
|
||||
"""HBM_CTRL connects to every router (router_to_hbm / hbm_to_router)."""
|
||||
def test_per_pe_hbm_ctrl_connects_only_to_owning_router():
|
||||
"""Each hbm_ctrl.pe{X} connects ONLY to PE_X's attaching router
|
||||
(ADR-0019 D4). Replaces a prior test that asserted the
|
||||
spec-violating all-routers consolidation (commit 5917b34)."""
|
||||
g = _graph()
|
||||
es = _edge_set(g)
|
||||
cp = "sip0.cube0"
|
||||
routers = sorted(n for n in g.nodes if n.startswith(f"{cp}.r"))
|
||||
assert len(routers) == 32
|
||||
for r in routers:
|
||||
assert (r, f"{cp}.hbm_ctrl") in es, f"missing {r}->hbm_ctrl"
|
||||
assert (f"{cp}.hbm_ctrl", r) in es, f"missing hbm_ctrl->{r}"
|
||||
pe_router = {0: "r0c0", 1: "r0c1", 2: "r1c4", 3: "r1c5",
|
||||
4: "r4c0", 5: "r4c1", 6: "r5c4", 7: "r5c5"}
|
||||
for pe, rkey in pe_router.items():
|
||||
nid = f"{cp}.hbm_ctrl.pe{pe}"
|
||||
owner = f"{cp}.{rkey}"
|
||||
assert (owner, nid) in es, f"missing {owner}→{nid}"
|
||||
assert (nid, owner) in es, f"missing {nid}→{owner}"
|
||||
for other in g.nodes:
|
||||
if other.startswith(f"{cp}.r") and other != owner:
|
||||
assert (other, nid) not in es, (
|
||||
f"unexpected edge {other}→{nid}"
|
||||
)
|
||||
|
||||
|
||||
def test_router_mesh_edges():
|
||||
@@ -387,7 +401,7 @@ def test_cross_cube_path_includes_conn():
|
||||
"""PE cross-cube path must traverse conn nodes."""
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl")
|
||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl.pe0")
|
||||
conn_nodes = [n for n in path if ".conn" in n]
|
||||
assert len(conn_nodes) >= 2, f"Expected >=2 conn nodes in path, got {conn_nodes}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user