ADR-0019 D1/D4: per-PE HBM CTRL partitioning
Restores per-PE HBM controller partitioning that was lost in
commit 5917b34 ("Replace xbar/bridge/single-NOC with explicit
router mesh"), which had over-consolidated the per-slice HBM CTRL
into a single cube-wide ``hbm_ctrl`` connected to every router —
the opposite of what ADR-0019 D1/D4 specifies.
Builder splits ``hbm_ctrl`` into 8 ``hbm_ctrl.pe{X}`` instances per
cube, each reachable ONLY through PE_X's attaching router via the
existing ``peX.hbm`` attach metadata from cube_mesh.yaml. Cube
aggregate BW now matches the spec (8 PEs × 8 PCs × 32 GB/s =
2048 GB/s) instead of collapsing to 256 GB/s.
AddressResolver decodes the target PE from the HBM PA's hbm_offset
(``offset // slice_size``) and returns ``hbm_ctrl.pe{X}``. PathRouter
uses the existing ``_adj_local`` adjacency for same-cube PE_DMA so
the cube's own UCIe port can no longer appear as a zero-distance
shortcut between routers — local PE_DMA now traverses the mesh,
restoring the ADR-0019 D4 worked example
``PE0.pe_dma → r0c0 → … → r1c4 → hbm_ctrl``.
Tests:
- New tests/test_per_pe_hbm_partition.py: 14 tests covering
topology shape, per-PE router exclusivity, PA resolution,
single-hop local path, cross-PE mesh traversal, and end-to-end
latency monotonicity. Probe CLI now reports
pe-local < pe-same-half < pe-cross-half (was uniform 141ns).
- Existing tests updated for new node ids and replaced two
assertions that locked in the wrong consolidation:
test_noc_mesh.test_hbm_connects_to_all_routers and
test_topology_compile.test_hbm_ctrl_connects_all_routers are
now per-PE exclusivity assertions; test_routing
.test_all_pe_hbm_equidistant becomes
test_cross_pe_hbm_distance_increases_with_mesh_hops.
- test_ipcq_buffer_kind_locations.test_hbm_pe_hop_charged_at_large_payload
threshold recalibrated 4000→1500 ns: the prior figure reflected
serialization on the over-consolidated single hbm_ctrl; per-PE
partitioning removes that artificial contention so the gap
shrinks to the genuine PE↔HBM-hop cost.
Full suite: 645 passed, 1 skipped.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -404,20 +404,26 @@ def _instantiate_cube(
|
||||
label=name.upper().replace("_", " "),
|
||||
)
|
||||
|
||||
# ── HBM controller (single node, ADR-0019 D1, ADR-0033) ──
|
||||
# ── Per-PE HBM controller (ADR-0019 D1/D4) ──
|
||||
# Each PE owns one slice of the cube's HBM. The slice has its own
|
||||
# set of pseudo-channels and is reachable ONLY through that PE's
|
||||
# attaching router (see cube_mesh.yaml ``peX.hbm`` attach lists).
|
||||
# Restored after the ADR-0019 over-consolidation in commit 5917b34.
|
||||
hbm_spec = cube["components"]["hbm_ctrl"]
|
||||
hbm_lx, hbm_ly = local_pos["hbm_ctrl"]
|
||||
hbm_id = f"{cp}.hbm_ctrl"
|
||||
hbm_attrs = dict(hbm_spec["attrs"])
|
||||
_hbm_total_bw = float(cube["links"].get("hbm_to_router_bw_gbs", 256.0))
|
||||
_num_pcs = int(hbm_attrs.get("num_pcs", 8))
|
||||
hbm_attrs["num_pcs"] = _num_pcs
|
||||
hbm_attrs["pc_bw_gbs"] = _hbm_total_bw / _num_pcs
|
||||
nodes[hbm_id] = Node(
|
||||
id=hbm_id, kind=hbm_spec["kind"], impl=hbm_spec["impl"],
|
||||
attrs=hbm_attrs, pos_mm=(ox + hbm_lx, oy + hbm_ly),
|
||||
label="HBM CTRL",
|
||||
)
|
||||
_num_pcs = int(hbm_spec["attrs"].get("num_pcs", 8))
|
||||
pes_per_cube = int(cube["memory_map"].get("hbm_slices_per_cube", 8))
|
||||
for pe_idx in range(pes_per_cube):
|
||||
pe_hbm_id = f"{cp}.hbm_ctrl.pe{pe_idx}"
|
||||
pe_hbm_attrs = dict(hbm_spec["attrs"])
|
||||
pe_hbm_attrs["num_pcs"] = _num_pcs
|
||||
pe_hbm_attrs["pc_bw_gbs"] = _hbm_total_bw / _num_pcs
|
||||
nodes[pe_hbm_id] = Node(
|
||||
id=pe_hbm_id, kind=hbm_spec["kind"], impl=hbm_spec["impl"],
|
||||
attrs=pe_hbm_attrs, pos_mm=(ox + hbm_lx, oy + hbm_ly),
|
||||
label=f"HBM CTRL pe{pe_idx}",
|
||||
)
|
||||
|
||||
# ── Router mesh from cube_mesh.yaml (ADR-0019 D3) ──
|
||||
routers = mesh_data["routers"]
|
||||
@@ -566,7 +572,22 @@ def _instantiate_cube(
|
||||
kind="command",
|
||||
))
|
||||
elif item.endswith(".hbm"):
|
||||
pass # HBM edges handled below (all routers)
|
||||
# peX.hbm: router rXcY owns the entry to hbm_ctrl.peX.
|
||||
# (ADR-0019 D1/D4 — per-PE HBM partitioning.)
|
||||
pe_prefix = item.rsplit(".", 1)[0]
|
||||
pe_idx = int(pe_prefix.replace("pe", ""))
|
||||
pe_hbm_id = f"{cp}.hbm_ctrl.pe{pe_idx}"
|
||||
if pe_hbm_id in nodes:
|
||||
edges.append(Edge(
|
||||
src=rid, dst=pe_hbm_id,
|
||||
distance_mm=0.0, bw_gbs=hbm_to_router_bw,
|
||||
kind="router_to_hbm",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=pe_hbm_id, dst=rid,
|
||||
distance_mm=0.0, bw_gbs=hbm_to_router_bw,
|
||||
kind="hbm_to_router",
|
||||
))
|
||||
elif item == "m_cpu":
|
||||
# M_CPU ↔ router
|
||||
mcpu_id = f"{cp}.m_cpu"
|
||||
@@ -623,24 +644,10 @@ def _instantiate_cube(
|
||||
kind="router_to_ucie_conn",
|
||||
))
|
||||
|
||||
# ── HBM_CTRL ↔ all routers (ADR-0019 D1) ──
|
||||
# High routing weight prevents Dijkstra from using HBM as transit shortcut
|
||||
for rkey, rval in routers.items():
|
||||
if rval is None:
|
||||
continue
|
||||
rid = f"{cp}.{rkey}"
|
||||
edges.append(Edge(
|
||||
src=rid, dst=hbm_id,
|
||||
distance_mm=0.0, bw_gbs=hbm_to_router_bw,
|
||||
routing_weight_mm=1000.0,
|
||||
kind="router_to_hbm",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=hbm_id, dst=rid,
|
||||
distance_mm=0.0, bw_gbs=hbm_to_router_bw,
|
||||
routing_weight_mm=1000.0,
|
||||
kind="hbm_to_router",
|
||||
))
|
||||
# NOTE: HBM↔router edges are created in the per-router attach loop
|
||||
# above (peX.hbm items map router → hbm_ctrl.peX). Removed the
|
||||
# legacy "all routers → single hbm_ctrl" loop that bypassed the
|
||||
# ADR-0019 D4 per-PE partition.
|
||||
|
||||
|
||||
def _add_pe_internal_edges(edges: list[Edge], pp: str, pe_links: dict) -> None:
|
||||
|
||||
Reference in New Issue
Block a user