ADR-0019 D1/D4: per-PE HBM CTRL partitioning

Restores per-PE HBM controller partitioning that was lost in
commit 5917b34 ("Replace xbar/bridge/single-NOC with explicit
router mesh"), which had over-consolidated the per-slice HBM CTRL
into a single cube-wide ``hbm_ctrl`` connected to every router —
the opposite of what ADR-0019 D1/D4 specifies.

Builder splits ``hbm_ctrl`` into 8 ``hbm_ctrl.pe{X}`` instances per
cube, each reachable ONLY through PE_X's attaching router via the
existing ``peX.hbm`` attach metadata from cube_mesh.yaml. Cube
aggregate BW now matches the spec (8 PEs × 8 PCs × 32 GB/s =
2048 GB/s) instead of collapsing to 256 GB/s.

AddressResolver decodes the target PE from the HBM PA's hbm_offset
(``offset // slice_size``) and returns ``hbm_ctrl.pe{X}``. PathRouter
uses the existing ``_adj_local`` adjacency for same-cube PE_DMA so
the cube's own UCIe port can no longer appear as a zero-distance
shortcut between routers — local PE_DMA now traverses the mesh,
restoring the ADR-0019 D4 worked example
``PE0.pe_dma → r0c0 → … → r1c4 → hbm_ctrl``.

Tests:
- New tests/test_per_pe_hbm_partition.py: 14 tests covering
  topology shape, per-PE router exclusivity, PA resolution,
  single-hop local path, cross-PE mesh traversal, and end-to-end
  latency monotonicity. Probe CLI now reports
  pe-local < pe-same-half < pe-cross-half (was uniform 141ns).
- Existing tests updated for new node ids and replaced two
  assertions that locked in the wrong consolidation:
  test_noc_mesh.test_hbm_connects_to_all_routers and
  test_topology_compile.test_hbm_ctrl_connects_all_routers are
  now per-PE exclusivity assertions; test_routing
  .test_all_pe_hbm_equidistant becomes
  test_cross_pe_hbm_distance_increases_with_mesh_hops.
- test_ipcq_buffer_kind_locations.test_hbm_pe_hop_charged_at_large_payload
  threshold recalibrated 4000→1500 ns: the prior figure reflected
  serialization on the over-consolidated single hbm_ctrl; per-PE
  partitioning removes that artificial contention so the gap
  shrinks to the genuine PE↔HBM-hop cost.

Full suite: 645 passed, 1 skipped.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-15 01:04:30 -07:00
parent aaa1cbfaf6
commit b8213d43a9
17 changed files with 486 additions and 168 deletions
+39 -23
View File
@@ -259,12 +259,21 @@ def test_no_single_noc_node():
assert "sip0.cube0.noc" not in graph.nodes
def test_single_hbm_ctrl_node():
"""Each cube must have single hbm_ctrl (no slices)."""
def test_per_pe_hbm_ctrl_nodes():
"""Each cube has 8 per-PE HBM CTRL instances (ADR-0019 D1).
Restored from over-consolidation in commit 5917b34. The legacy
single ``sip0.cube0.hbm_ctrl`` is gone; each PE owns its own
``hbm_ctrl.pe{X}`` reachable through that PE's attaching router.
"""
graph = _graph()
assert "sip0.cube0.hbm_ctrl" in graph.nodes
slices = [n for n in graph.nodes if "hbm_ctrl.slice" in n]
assert len(slices) == 0, f"HBM slices should not exist: {slices[:3]}"
for pe in range(8):
assert f"sip0.cube0.hbm_ctrl.pe{pe}" in graph.nodes
# Legacy single hbm_ctrl must not exist
legacy_id = "sip0.cube0.hbm_ctrl"
assert legacy_id not in graph.nodes, (
f"legacy {legacy_id} must be removed (per-PE partitioning, ADR-0019 D1)"
)
def test_router_mesh_edges():
@@ -285,16 +294,23 @@ def test_pe_dma_connects_to_router():
assert pe0_edges[0].dst == "sip0.cube0.r0c0"
def test_hbm_connects_to_all_routers():
"""HBM_CTRL must have edges to all non-null routers."""
def test_each_hbm_ctrl_connects_only_to_owning_router():
"""Each ``hbm_ctrl.pe{X}`` must have exactly one router edge
(router_to_hbm + hbm_to_router) to its owning PE's attaching
router (ADR-0019 D4). Replaces a prior test that asserted the
single hbm_ctrl was connected to all routers — that asserted the
spec-violating consolidation introduced in commit 5917b34.
"""
graph = _graph()
hbm_out = [e for e in graph.edges
if e.src == "sip0.cube0.hbm_ctrl" and e.kind == "hbm_to_router"]
mesh = yaml.safe_load(MESH_PATH.read_text())
n_active = sum(1 for v in mesh["routers"].values() if v is not None)
assert len(hbm_out) == n_active, (
f"HBM should connect to {n_active} routers, got {len(hbm_out)}"
)
pe_router = {0: "r0c0", 1: "r0c1", 2: "r1c4", 3: "r1c5",
4: "r4c0", 5: "r4c1", 6: "r5c4", 7: "r5c5"}
for pe, rkey in pe_router.items():
nid = f"sip0.cube0.hbm_ctrl.pe{pe}"
owner = f"sip0.cube0.{rkey}"
outs = [e.dst for e in graph.edges if e.src == nid]
ins = [e.src for e in graph.edges if e.dst == nid]
assert outs == [owner], f"{nid} must out-edge only to {owner}; got {outs}"
assert ins == [owner], f"{nid} must in-edge only from {owner}; got {ins}"
# ══════════════════════════════════════════════════════════════════
@@ -306,18 +322,18 @@ def test_local_hbm_path_through_router():
"""PE0 local HBM: path must go through PE's router to hbm_ctrl."""
graph = _graph()
router = PathRouter(graph)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0")
assert "sip0.cube0.r0c0" in path, f"PE0's router r0c0 missing from path: {path}"
assert "sip0.cube0.hbm_ctrl" == path[-1], f"Path should end at hbm_ctrl: {path}"
assert "sip0.cube0.hbm_ctrl.pe0" == path[-1], f"Path should end at hbm_ctrl: {path}"
def test_remote_pe_hbm_has_more_hops():
"""PE0 → PE4's HBM (remote) must have more hops than local."""
graph = _graph()
router = PathRouter(graph)
local_path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
local_path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0")
# PE4 is at r4c0, PE0 at r0c0 — must traverse mesh
remote_path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl")
remote_path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl.pe0")
# Both should work, local should be shorter or equal
assert len(local_path) >= 2
assert len(remote_path) >= 2
@@ -328,10 +344,10 @@ def test_mcpu_dma_path_through_router_mesh():
graph = _graph()
router = PathRouter(graph)
path = router.find_mcpu_dma_path(
"sip0.cube0.m_cpu", "sip0.cube0.hbm_ctrl"
"sip0.cube0.m_cpu", "sip0.cube0.hbm_ctrl.pe0"
)
assert path[0] == "sip0.cube0.m_cpu"
assert path[-1] == "sip0.cube0.hbm_ctrl"
assert path[-1] == "sip0.cube0.hbm_ctrl.pe0"
assert any("r" in n and "c" in n for n in path), f"Router missing from path: {path}"
@@ -339,9 +355,9 @@ def test_cross_cube_path_through_ucie():
"""Cross-cube HBM: must traverse router → UCIe → remote router → hbm_ctrl."""
graph = _graph()
router = PathRouter(graph)
path = router.find_path("sip0.cube0.pe0", "sip0.cube4.hbm_ctrl")
path = router.find_path("sip0.cube0.pe0", "sip0.cube4.hbm_ctrl.pe0")
assert any("ucie" in n.lower() for n in path), f"UCIe missing: {path}"
assert path[-1] == "sip0.cube4.hbm_ctrl"
assert path[-1] == "sip0.cube4.hbm_ctrl.pe0"
def test_h2d_bypass_path_through_router():
@@ -355,7 +371,7 @@ def test_h2d_bypass_path_through_router():
hbm_target = resolver.resolve(PhysAddr.decode(pa))
path = router.find_memory_path(pcie_ep, hbm_target)
assert path[-1] == "sip0.cube0.hbm_ctrl", f"Path should end at hbm_ctrl: {path}"
assert path[-1] == "sip0.cube0.hbm_ctrl.pe0", f"Path should end at hbm_ctrl: {path}"
assert any("r0c" in n or "r1c" in n for n in path), f"Router missing: {path}"