ADR-0019 D1/D4: per-PE HBM CTRL partitioning
Restores per-PE HBM controller partitioning that was lost in
commit 5917b34 ("Replace xbar/bridge/single-NOC with explicit
router mesh"), which had over-consolidated the per-slice HBM CTRL
into a single cube-wide ``hbm_ctrl`` connected to every router —
the opposite of what ADR-0019 D1/D4 specifies.
Builder splits ``hbm_ctrl`` into 8 ``hbm_ctrl.pe{X}`` instances per
cube, each reachable ONLY through PE_X's attaching router via the
existing ``peX.hbm`` attach metadata from cube_mesh.yaml. Cube
aggregate BW now matches the spec (8 PEs × 8 PCs × 32 GB/s =
2048 GB/s) instead of collapsing to 256 GB/s.
AddressResolver decodes the target PE from the HBM PA's hbm_offset
(``offset // slice_size``) and returns ``hbm_ctrl.pe{X}``. PathRouter
uses the existing ``_adj_local`` adjacency for same-cube PE_DMA so
the cube's own UCIe port can no longer appear as a zero-distance
shortcut between routers — local PE_DMA now traverses the mesh,
restoring the ADR-0019 D4 worked example
``PE0.pe_dma → r0c0 → … → r1c4 → hbm_ctrl``.
Tests:
- New tests/test_per_pe_hbm_partition.py: 14 tests covering
topology shape, per-PE router exclusivity, PA resolution,
single-hop local path, cross-PE mesh traversal, and end-to-end
latency monotonicity. Probe CLI now reports
pe-local < pe-same-half < pe-cross-half (was uniform 141ns).
- Existing tests updated for new node ids and replaced two
assertions that locked in the wrong consolidation:
test_noc_mesh.test_hbm_connects_to_all_routers and
test_topology_compile.test_hbm_ctrl_connects_all_routers are
now per-PE exclusivity assertions; test_routing
.test_all_pe_hbm_equidistant becomes
test_cross_pe_hbm_distance_increases_with_mesh_hops.
- test_ipcq_buffer_kind_locations.test_hbm_pe_hop_charged_at_large_payload
threshold recalibrated 4000→1500 ns: the prior figure reflected
serialization on the over-consolidated single hbm_ctrl; per-PE
partitioning removes that artificial contention so the gap
shrinks to the genuine PE↔HBM-hop cost.
Full suite: 645 passed, 1 skipped.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+45
-34
@@ -17,19 +17,21 @@ def _graph():
|
||||
|
||||
|
||||
def test_resolve_hbm_addr():
|
||||
"""HBM address -> sip{S}.cube{C}.hbm_ctrl (single controller per cube)."""
|
||||
"""HBM address -> sip{S}.cube{C}.hbm_ctrl.pe{X} (per-PE controller, ADR-0019 D1)."""
|
||||
g = _graph()
|
||||
resolver = AddressResolver(g)
|
||||
# offset 0x1000 falls inside PE0's slice (slice_size = 6 GB)
|
||||
pa = PhysAddr.hbm_addr(sip_id=0, die_id=3, hbm_offset=0x1000)
|
||||
assert resolver.resolve(pa) == "sip0.cube3.hbm_ctrl"
|
||||
assert resolver.resolve(pa) == "sip0.cube3.hbm_ctrl.pe0"
|
||||
|
||||
|
||||
def test_resolve_hbm_addr_high_offset():
|
||||
"""HBM address with large offset still resolves to same hbm_ctrl."""
|
||||
"""HBM offset that lands in PE4's slice must resolve to hbm_ctrl.pe4."""
|
||||
g = _graph()
|
||||
resolver = AddressResolver(g)
|
||||
# 0x600000000 / (6 GB) = 4
|
||||
pa = PhysAddr.hbm_addr(sip_id=0, die_id=0, hbm_offset=0x600000000)
|
||||
assert resolver.resolve(pa) == "sip0.cube0.hbm_ctrl"
|
||||
assert resolver.resolve(pa) == "sip0.cube0.hbm_ctrl.pe4"
|
||||
|
||||
|
||||
def test_resolve_pe_tcm_addr():
|
||||
@@ -73,12 +75,12 @@ def test_resolve_nonexistent_node():
|
||||
|
||||
|
||||
def test_path_local_hbm():
|
||||
"""PE0 -> hbm_ctrl: pe_dma -> router -> hbm_ctrl (through router mesh)."""
|
||||
"""PE0 -> own slice: pe_dma -> r0c0 -> hbm_ctrl.pe0 (1 mesh hop)."""
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
|
||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0")
|
||||
assert path[0] == "sip0.cube0.pe0.pe_dma"
|
||||
assert path[-1] == "sip0.cube0.hbm_ctrl"
|
||||
assert path[-1] == "sip0.cube0.hbm_ctrl.pe0"
|
||||
# Path must go through at least one router node
|
||||
assert any(n.startswith("sip0.cube0.r") for n in path), \
|
||||
"HBM path must traverse router mesh"
|
||||
@@ -90,56 +92,61 @@ def test_path_local_hbm():
|
||||
|
||||
|
||||
def test_path_remote_pe_hbm():
|
||||
"""PE4 (bottom half) -> hbm_ctrl: routes through router mesh."""
|
||||
"""PE4 (bottom half) -> its own slice: routes through router mesh."""
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl")
|
||||
path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl.pe4")
|
||||
assert path[0] == "sip0.cube0.pe4.pe_dma"
|
||||
assert path[-1] == "sip0.cube0.hbm_ctrl"
|
||||
assert path[-1] == "sip0.cube0.hbm_ctrl.pe4"
|
||||
assert any(n.startswith("sip0.cube0.r") for n in path)
|
||||
assert not any("xbar" in n or "bridge" in n for n in path)
|
||||
|
||||
|
||||
# ── PathRouter: all PEs equidistant to HBM (n_to_one routing weight) ─
|
||||
# ── PathRouter: cross-PE HBM distance reflects mesh hops (ADR-0019 D4) ─
|
||||
|
||||
|
||||
def test_all_pe_hbm_equidistant():
|
||||
"""All PEs in a cube have equal routing distance to hbm_ctrl.
|
||||
def test_cross_pe_hbm_distance_increases_with_mesh_hops():
|
||||
"""Restored ADR-0019 D4 behavior: accessing another PE's HBM slice
|
||||
must take more routing distance than accessing one's own slice,
|
||||
because each per-PE hbm_ctrl is reachable only via its PE's router.
|
||||
|
||||
With n_to_one mapping and high routing weight on HBM edges,
|
||||
all PE->hbm_ctrl paths have the same accumulated distance.
|
||||
Replaces a previous ``test_all_pe_hbm_equidistant`` that asserted the
|
||||
over-consolidated (spec-violating) behavior introduced in 5917b34.
|
||||
"""
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
distances = []
|
||||
for pe in range(8):
|
||||
_, dist = router.find_path_with_distance(
|
||||
f"sip0.cube0.pe{pe}", "sip0.cube0.hbm_ctrl")
|
||||
distances.append(dist)
|
||||
# All distances should be equal
|
||||
assert all(d == distances[0] for d in distances), (
|
||||
f"expected equal distances, got: {distances}"
|
||||
_, dist_local = router.find_path_with_distance(
|
||||
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0")
|
||||
_, dist_to_pe7 = router.find_path_with_distance(
|
||||
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe7")
|
||||
assert dist_to_pe7 > dist_local, (
|
||||
f"pe0→pe7_slice should require more mesh distance than pe0→pe0_slice; "
|
||||
f"got local={dist_local}, to_pe7={dist_to_pe7}"
|
||||
)
|
||||
|
||||
|
||||
def test_remote_pe_distance_not_less_than_local():
|
||||
"""Remote PE HBM distance >= local PE HBM distance (mesh topology)."""
|
||||
"""PE4 -> pe0_slice distance >= PE0 -> pe0_slice distance.
|
||||
|
||||
Both access pe0's slice (hbm_ctrl.pe0). PE0's path is shortest; PE4
|
||||
must mesh-route up to r0c0 before entering the slice.
|
||||
"""
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
_, dist_pe0 = router.find_path_with_distance(
|
||||
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
|
||||
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0")
|
||||
_, dist_pe4 = router.find_path_with_distance(
|
||||
"sip0.cube0.pe4", "sip0.cube0.hbm_ctrl")
|
||||
"sip0.cube0.pe4", "sip0.cube0.hbm_ctrl.pe0")
|
||||
assert dist_pe4 >= dist_pe0
|
||||
|
||||
|
||||
def test_path_remote_cube_hbm():
|
||||
"""PE0 in cube0 can reach HBM in cube1 via UCIe (ADR-0004 D4)."""
|
||||
"""PE0 in cube0 can reach pe0's HBM in cube1 via UCIe (ADR-0004 D4)."""
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl")
|
||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl.pe0")
|
||||
assert path[0] == "sip0.cube0.pe0.pe_dma"
|
||||
assert path[-1] == "sip0.cube1.hbm_ctrl"
|
||||
assert path[-1] == "sip0.cube1.hbm_ctrl.pe0"
|
||||
# inter-cube path must cross a UCIe link
|
||||
assert any("ucie" in n.lower() for n in path), \
|
||||
"remote cube path must traverse UCIe"
|
||||
@@ -182,11 +189,15 @@ def test_path_local_tcm():
|
||||
|
||||
|
||||
def test_path_distance_positive():
|
||||
"""All routed paths must have accumulated distance > 0 (ADR-0002 D4)."""
|
||||
"""Routed paths that traverse the mesh must have positive accumulated
|
||||
distance (ADR-0002 D4). Use a cross-PE target so the path includes
|
||||
inter-router mesh edges (which have non-zero distance_mm). The
|
||||
single-hop pe0→pe0_slice path stays at 0 because PE_DMA↔router and
|
||||
router↔hbm_ctrl are zero-length placements within the same corner."""
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
_, dist = router.find_path_with_distance(
|
||||
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
|
||||
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe7")
|
||||
assert dist > 0
|
||||
|
||||
|
||||
@@ -195,8 +206,8 @@ def test_path_deterministic():
|
||||
g = _graph()
|
||||
r1 = PathRouter(g)
|
||||
r2 = PathRouter(g)
|
||||
p1 = r1.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl")
|
||||
p2 = r2.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl")
|
||||
p1 = r1.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl.pe0")
|
||||
p2 = r2.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl.pe0")
|
||||
assert p1 == p2
|
||||
|
||||
|
||||
@@ -205,5 +216,5 @@ def test_remote_cube_path_no_routing_error():
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
# cube0.PE0 -> cube1.hbm_ctrl (adjacent cube, E direction)
|
||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl")
|
||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl.pe0")
|
||||
assert len(path) >= 1 # succeeds without exception
|
||||
|
||||
Reference in New Issue
Block a user