ADR-0019 D1/D4: per-PE HBM CTRL partitioning

Restores per-PE HBM controller partitioning that was lost in
commit 5917b34 ("Replace xbar/bridge/single-NOC with explicit
router mesh"), which had over-consolidated the per-slice HBM CTRL
into a single cube-wide ``hbm_ctrl`` connected to every router —
the opposite of what ADR-0019 D1/D4 specifies.

Builder splits ``hbm_ctrl`` into 8 ``hbm_ctrl.pe{X}`` instances per
cube, each reachable ONLY through PE_X's attaching router via the
existing ``peX.hbm`` attach metadata from cube_mesh.yaml. Cube
aggregate BW now matches the spec (8 PEs × 8 PCs × 32 GB/s =
2048 GB/s) instead of collapsing to 256 GB/s.

AddressResolver decodes the target PE from the HBM PA's hbm_offset
(``offset // slice_size``) and returns ``hbm_ctrl.pe{X}``. PathRouter
uses the existing ``_adj_local`` adjacency for same-cube PE_DMA so
the cube's own UCIe port can no longer appear as a zero-distance
shortcut between routers — local PE_DMA now traverses the mesh,
restoring the ADR-0019 D4 worked example
``PE0.pe_dma → r0c0 → … → r1c4 → hbm_ctrl``.

Tests:
- New tests/test_per_pe_hbm_partition.py: 14 tests covering
  topology shape, per-PE router exclusivity, PA resolution,
  single-hop local path, cross-PE mesh traversal, and end-to-end
  latency monotonicity. Probe CLI now reports
  pe-local < pe-same-half < pe-cross-half (was uniform 141ns).
- Existing tests updated for new node ids and replaced two
  assertions that locked in the wrong consolidation:
  test_noc_mesh.test_hbm_connects_to_all_routers and
  test_topology_compile.test_hbm_ctrl_connects_all_routers are
  now per-PE exclusivity assertions; test_routing
  .test_all_pe_hbm_equidistant becomes
  test_cross_pe_hbm_distance_increases_with_mesh_hops.
- test_ipcq_buffer_kind_locations.test_hbm_pe_hop_charged_at_large_payload
  threshold recalibrated 4000→1500 ns: the prior figure reflected
  serialization on the over-consolidated single hbm_ctrl; per-PE
  partitioning removes that artificial contention so the gap
  shrinks to the genuine PE↔HBM-hop cost.

Full suite: 645 passed, 1 skipped.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-15 01:04:30 -07:00
parent aaa1cbfaf6
commit b8213d43a9
17 changed files with 486 additions and 168 deletions
+37 -23
View File
@@ -17,21 +17,21 @@ def test_full_graph_node_count():
g = _graph()
# 1 switch
# + 2 SIPs x (1 IO x 23 io_nodes
# + 16 cubes x (32 routers + 1 hbm_ctrl + 1 m_cpu + 1 sram
# + 16 cubes x (32 routers + 8 hbm_ctrl.peX + 1 m_cpu + 1 sram
# + 20 ucie (4 ports x (1 port + 4 conn))
# + 8 PEs x 9 pe_comps)) (ADR-0023: +pe_ipcq)
# IO: pcie_ep + io_cpu + noc + 4 io_ucie_ports + 4*4 io_ucie_conn = 23
# cube: 32 + 3 + 20 + 72 = 127
# = 1 + 2*(23 + 16*127) = 1 + 2*(23+2032) = 1 + 4110 = 4111
assert len(g.nodes) == 4111
# cube: 32 + 10 + 20 + 72 = 134 (was 127; ADR-0019 D1 per-PE HBM CTRL)
# = 1 + 2*(23 + 16*134) = 1 + 2*(23+2144) = 1 + 4334 = 4335
assert len(g.nodes) == 4335
def test_full_graph_edge_count():
g = _graph()
# ADR-0023: +3 IPCQ edges per PE (cpu→ipcq, ipcq→dma, dma→ipcq)
# 2 SIPs × 16 cubes × 8 PEs × 3 = 768 new edges
# Cross-SIP routing: +1 reverse pcie_ep→switch edge per SIP = +2
assert len(g.edges) == 13692
# ADR-0023: +3 IPCQ edges per PE
# ADR-0019 D1 (restored): HBM↔router edges drop from 32 routers × 2
# to 8 PE-routers × 2 per cube. 32 cubes × (16-64) = -1536 edges.
assert len(g.edges) == 12156
# -- Full graph: specific nodes exist -----------------------------------------
@@ -55,7 +55,7 @@ def test_cube_component_nodes_exist():
g = _graph()
cp = "sip0.cube0"
# Core cube components (no more noc, xbar, bridge)
for name in ("m_cpu", "sram", "hbm_ctrl",
for name in ("m_cpu", "sram",
"ucie-N", "ucie-S", "ucie-E", "ucie-W"):
assert f"{cp}.{name}" in g.nodes
# Old nodes must not exist
@@ -71,8 +71,11 @@ def test_cube_component_nodes_exist():
# Null holes must not exist
for null_rc in ("r2c2", "r2c3", "r3c2", "r3c3"):
assert f"{cp}.{null_rc}" not in g.nodes
# Single hbm_ctrl (no more slices)
assert g.nodes[f"{cp}.hbm_ctrl"].kind == "hbm_ctrl"
# Per-PE HBM CTRL (ADR-0019 D1) — 8 instances, no legacy single node
for pe in range(8):
nid = f"{cp}.hbm_ctrl.pe{pe}"
assert g.nodes[nid].kind == "hbm_ctrl"
assert f"{cp}.hbm_ctrl" not in g.nodes
for s in range(8):
assert f"{cp}.hbm_ctrl.slice{s}" not in g.nodes
@@ -89,16 +92,18 @@ def test_pe_component_nodes_exist():
def test_hbm_ctrl_at_cube_center():
g = _graph()
# Single hbm_ctrl per cube; cube0 origin = (0, 0), hbm at (6.5, 7.0)
node = g.nodes["sip0.cube0.hbm_ctrl"]
assert node.pos_mm == (6.5, 7.0)
# Per-PE hbm_ctrl nodes share the cube's HBM placement (ADR-0019 D1)
# cube0 origin = (0, 0), hbm at (6.5, 7.0)
for pe in range(8):
node = g.nodes[f"sip0.cube0.hbm_ctrl.pe{pe}"]
assert node.pos_mm == (6.5, 7.0)
def test_hbm_ctrl_cube5_position():
g = _graph()
# cube5 = col=1, row=1 -> origin = (1*18, 1*15) = (18, 15)
# hbm_ctrl = (18 + 6.5, 15 + 7.0) = (24.5, 22.0)
node = g.nodes["sip0.cube5.hbm_ctrl"]
node = g.nodes["sip0.cube5.hbm_ctrl.pe0"]
assert node.pos_mm == (24.5, 22.0)
@@ -181,16 +186,25 @@ def test_pe_internal_edges():
assert (f"{pp}.pe_math", f"{pp}.pe_tcm") in es
def test_hbm_ctrl_connects_all_routers():
"""HBM_CTRL connects to every router (router_to_hbm / hbm_to_router)."""
def test_per_pe_hbm_ctrl_connects_only_to_owning_router():
"""Each hbm_ctrl.pe{X} connects ONLY to PE_X's attaching router
(ADR-0019 D4). Replaces a prior test that asserted the
spec-violating all-routers consolidation (commit 5917b34)."""
g = _graph()
es = _edge_set(g)
cp = "sip0.cube0"
routers = sorted(n for n in g.nodes if n.startswith(f"{cp}.r"))
assert len(routers) == 32
for r in routers:
assert (r, f"{cp}.hbm_ctrl") in es, f"missing {r}->hbm_ctrl"
assert (f"{cp}.hbm_ctrl", r) in es, f"missing hbm_ctrl->{r}"
pe_router = {0: "r0c0", 1: "r0c1", 2: "r1c4", 3: "r1c5",
4: "r4c0", 5: "r4c1", 6: "r5c4", 7: "r5c5"}
for pe, rkey in pe_router.items():
nid = f"{cp}.hbm_ctrl.pe{pe}"
owner = f"{cp}.{rkey}"
assert (owner, nid) in es, f"missing {owner}{nid}"
assert (nid, owner) in es, f"missing {nid}{owner}"
for other in g.nodes:
if other.startswith(f"{cp}.r") and other != owner:
assert (other, nid) not in es, (
f"unexpected edge {other}{nid}"
)
def test_router_mesh_edges():
@@ -387,7 +401,7 @@ def test_cross_cube_path_includes_conn():
"""PE cross-cube path must traverse conn nodes."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl")
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl.pe0")
conn_nodes = [n for n in path if ".conn" in n]
assert len(conn_nodes) >= 2, f"Expected >=2 conn nodes in path, got {conn_nodes}"