ADR-0019 D1/D4: per-PE HBM CTRL partitioning
Restores per-PE HBM controller partitioning that was lost in
commit 5917b34 ("Replace xbar/bridge/single-NOC with explicit
router mesh"), which had over-consolidated the per-slice HBM CTRL
into a single cube-wide ``hbm_ctrl`` connected to every router —
the opposite of what ADR-0019 D1/D4 specifies.
Builder splits ``hbm_ctrl`` into 8 ``hbm_ctrl.pe{X}`` instances per
cube, each reachable ONLY through PE_X's attaching router via the
existing ``peX.hbm`` attach metadata from cube_mesh.yaml. Cube
aggregate BW now matches the spec (8 PEs × 8 PCs × 32 GB/s =
2048 GB/s) instead of collapsing to 256 GB/s.
AddressResolver decodes the target PE from the HBM PA's hbm_offset
(``offset // slice_size``) and returns ``hbm_ctrl.pe{X}``. PathRouter
uses the existing ``_adj_local`` adjacency for same-cube PE_DMA so
the cube's own UCIe port can no longer appear as a zero-distance
shortcut between routers — local PE_DMA now traverses the mesh,
restoring the ADR-0019 D4 worked example
``PE0.pe_dma → r0c0 → … → r1c4 → hbm_ctrl``.
Tests:
- New tests/test_per_pe_hbm_partition.py: 14 tests covering
topology shape, per-PE router exclusivity, PA resolution,
single-hop local path, cross-PE mesh traversal, and end-to-end
latency monotonicity. Probe CLI now reports
pe-local < pe-same-half < pe-cross-half (was uniform 141ns).
- Existing tests updated for new node ids and replaced two
assertions that locked in the wrong consolidation:
test_noc_mesh.test_hbm_connects_to_all_routers and
test_topology_compile.test_hbm_ctrl_connects_all_routers are
now per-PE exclusivity assertions; test_routing
.test_all_pe_hbm_equidistant becomes
test_cross_pe_hbm_distance_increases_with_mesh_hops.
- test_ipcq_buffer_kind_locations.test_hbm_pe_hop_charged_at_large_payload
threshold recalibrated 4000→1500 ns: the prior figure reflected
serialization on the over-consolidated single hbm_ctrl; per-PE
partitioning removes that artificial contention so the gap
shrinks to the genuine PE↔HBM-hop cost.
Full suite: 645 passed, 1 skipped.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
@@ -1,8 +1,8 @@
|
|||||||
buffer_kind,sip_topology,n_sips,n_elem,bytes_per_pe,latency_ns
|
buffer_kind,sip_topology,n_sips,n_elem,bytes_per_pe,latency_ns
|
||||||
hbm,torus_2d,6,128,256,2144.0399999999754
|
hbm,torus_2d,6,128,256,2120.0399999999754
|
||||||
hbm,torus_2d,6,1024,2048,2908.74499999995
|
hbm,torus_2d,6,1024,2048,2716.74499999995
|
||||||
hbm,torus_2d,6,8192,16384,8851.185000000081
|
hbm,torus_2d,6,8192,16384,7315.185000000081
|
||||||
hbm,torus_2d,6,32768,65536,29225.265000008752
|
hbm,torus_2d,6,32768,65536,23081.265000008738
|
||||||
sram,torus_2d,6,128,256,2060.0399999999754
|
sram,torus_2d,6,128,256,2060.0399999999754
|
||||||
sram,torus_2d,6,1024,2048,2908.74499999995
|
sram,torus_2d,6,1024,2048,2908.74499999995
|
||||||
sram,torus_2d,6,8192,16384,9523.185000000081
|
sram,torus_2d,6,8192,16384,9523.185000000081
|
||||||
|
|||||||
|
|
Before Width: | Height: | Size: 76 KiB After Width: | Height: | Size: 75 KiB |
|
Before Width: | Height: | Size: 47 KiB After Width: | Height: | Size: 52 KiB |
|
Before Width: | Height: | Size: 47 KiB After Width: | Height: | Size: 46 KiB |
|
Before Width: | Height: | Size: 51 KiB After Width: | Height: | Size: 51 KiB |
|
Before Width: | Height: | Size: 43 KiB After Width: | Height: | Size: 43 KiB |
|
Before Width: | Height: | Size: 111 KiB After Width: | Height: | Size: 135 KiB |
@@ -1,44 +1,44 @@
|
|||||||
hop,label,size_bytes,path,total_ns
|
hop,label,size_bytes,path,total_ns
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),128,ipcq,42.8899999999976
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),128,ipcq,24.88749999999891
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),128,raw,29.0199999999968
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),128,raw,33.57999999999811
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),256,ipcq,48.1399999999976
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),256,ipcq,28.13749999999891
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),256,raw,31.0199999999968
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),256,raw,36.07999999999811
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),384,ipcq,50.3899999999976
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),384,ipcq,29.88749999999891
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),384,raw,32.0199999999968
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),384,raw,37.07999999999811
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),512,ipcq,52.6399999999976
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),512,ipcq,31.63749999999891
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),512,raw,33.0199999999968
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),512,raw,38.07999999999811
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),768,ipcq,57.1399999999976
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),768,ipcq,35.13749999999891
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),768,raw,35.0199999999968
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),768,raw,40.07999999999811
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),1024,ipcq,62.6399999999976
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),1024,ipcq,38.63749999999891
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),1024,raw,37.0199999999968
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),1024,raw,42.07999999999811
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),2048,ipcq,84.6399999999976
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),2048,ipcq,52.63749999999891
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),2048,raw,45.0199999999968
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),2048,raw,50.07999999999811
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),4096,ipcq,128.6399999999976
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),4096,ipcq,80.63750000000073
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),4096,raw,61.0199999999968
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),4096,raw,66.08000000000175
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),8192,ipcq,216.64000000000306
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),8192,ipcq,136.63750000000073
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),8192,raw,93.02000000000407
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),8192,raw,98.08000000000175
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),10240,ipcq,260.64000000000306
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),10240,ipcq,164.63750000000073
|
||||||
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),10240,raw,109.02000000000407
|
h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),10240,raw,114.08000000000175
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),128,ipcq,42.8899999999976
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),128,ipcq,38.49749999999585
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),128,raw,29.0199999999968
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),128,raw,47.18999999999505
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),256,ipcq,48.1399999999976
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),256,ipcq,43.24749999999585
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),256,raw,31.0199999999968
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),256,raw,51.18999999999505
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),384,ipcq,50.3899999999976
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),384,ipcq,44.99749999999585
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),384,raw,32.0199999999968
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),384,raw,52.18999999999505
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),512,ipcq,52.6399999999976
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),512,ipcq,46.74749999999585
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),512,raw,33.0199999999968
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),512,raw,53.18999999999505
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),768,ipcq,57.1399999999976
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),768,ipcq,50.24749999999585
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),768,raw,35.0199999999968
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),768,raw,55.18999999999505
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),1024,ipcq,62.6399999999976
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),1024,ipcq,53.74749999999585
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),1024,raw,37.0199999999968
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),1024,raw,57.18999999999505
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),2048,ipcq,84.6399999999976
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),2048,ipcq,67.74749999999585
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),2048,raw,45.0199999999968
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),2048,raw,65.18999999999505
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),4096,ipcq,128.6399999999976
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),4096,ipcq,95.74750000000131
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),4096,raw,61.0199999999968
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),4096,raw,81.19000000000233
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),8192,ipcq,216.64000000000306
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),8192,ipcq,151.7475000000013
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),8192,raw,93.02000000000407
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),8192,raw,113.19000000000233
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),10240,ipcq,260.64000000000306
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),10240,ipcq,179.7475000000013
|
||||||
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),10240,raw,109.02000000000407
|
h2_intra_vertical,Intra-cube vertical (pe0 to pe4),10240,raw,129.19000000000233
|
||||||
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),128,ipcq,81.15999999999804
|
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),128,ipcq,81.15999999999804
|
||||||
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),128,raw,89.28999999999724
|
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),128,raw,89.28999999999724
|
||||||
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),256,ipcq,88.65999999999804
|
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),256,ipcq,88.65999999999804
|
||||||
@@ -53,8 +53,8 @@ h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),1024,ipcq,103.15
|
|||||||
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),1024,raw,102.53999999999724
|
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),1024,raw,102.53999999999724
|
||||||
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),2048,ipcq,125.15999999999804
|
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),2048,ipcq,125.15999999999804
|
||||||
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),2048,raw,114.53999999999724
|
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),2048,raw,114.53999999999724
|
||||||
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),4096,ipcq,169.15999999999804
|
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),4096,ipcq,169.15999999999985
|
||||||
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),4096,raw,138.53999999999724
|
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),4096,raw,138.54000000000087
|
||||||
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),8192,ipcq,257.15999999999985
|
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),8192,ipcq,257.15999999999985
|
||||||
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),8192,raw,186.54000000000087
|
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),8192,raw,186.54000000000087
|
||||||
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),10240,ipcq,301.15999999999985
|
h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),10240,ipcq,301.15999999999985
|
||||||
@@ -73,8 +73,8 @@ h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),1024,ipcq,127.159999
|
|||||||
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),1024,raw,126.53999999999724
|
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),1024,raw,126.53999999999724
|
||||||
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),2048,ipcq,149.15999999999804
|
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),2048,ipcq,149.15999999999804
|
||||||
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),2048,raw,138.53999999999724
|
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),2048,raw,138.53999999999724
|
||||||
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),4096,ipcq,193.15999999999804
|
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),4096,ipcq,193.15999999999985
|
||||||
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),4096,raw,162.53999999999724
|
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),4096,raw,162.54000000000087
|
||||||
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),8192,ipcq,281.15999999999985
|
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),8192,ipcq,281.15999999999985
|
||||||
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),8192,raw,210.54000000000087
|
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),8192,raw,210.54000000000087
|
||||||
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),10240,ipcq,325.15999999999985
|
h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),10240,ipcq,325.15999999999985
|
||||||
|
|||||||
|
@@ -11,6 +11,18 @@ class RoutingError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _same_cube(a: str, b: str) -> bool:
|
||||||
|
"""Return True if both node ids share a sip+cube prefix (`sipX.cubeY.`)."""
|
||||||
|
pa = a.split(".")
|
||||||
|
pb = b.split(".")
|
||||||
|
return (
|
||||||
|
len(pa) >= 2 and len(pb) >= 2
|
||||||
|
and pa[0] == pb[0]
|
||||||
|
and pa[1].startswith("cube") and pb[1].startswith("cube")
|
||||||
|
and pa[1] == pb[1]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AddressResolver:
|
class AddressResolver:
|
||||||
"""Resolve a PhysAddr to the destination node_id in the compiled graph.
|
"""Resolve a PhysAddr to the destination node_id in the compiled graph.
|
||||||
|
|
||||||
@@ -22,6 +34,12 @@ class AddressResolver:
|
|||||||
|
|
||||||
def __init__(self, graph: TopologyGraph) -> None:
|
def __init__(self, graph: TopologyGraph) -> None:
|
||||||
self._node_ids = set(graph.nodes)
|
self._node_ids = set(graph.nodes)
|
||||||
|
# HBM slice size (bytes) — used to decode pe_id from hbm_offset
|
||||||
|
# so HBM PA → hbm_ctrl.pe{X} (ADR-0019 D1/D4).
|
||||||
|
mm = graph.spec.get("cube", {}).get("memory_map", {})
|
||||||
|
hbm_total_gb = int(mm.get("hbm_total_gb_per_cube", 48))
|
||||||
|
slices_per_cube = int(mm.get("hbm_slices_per_cube", 8))
|
||||||
|
self._hbm_slice_bytes = hbm_total_gb * (1 << 30) // slices_per_cube
|
||||||
|
|
||||||
# ── Physical-address resolution ──────────────────────────────────
|
# ── Physical-address resolution ──────────────────────────────────
|
||||||
|
|
||||||
@@ -29,7 +47,8 @@ class AddressResolver:
|
|||||||
s = addr.sip_id
|
s = addr.sip_id
|
||||||
d = addr.die_id
|
d = addr.die_id
|
||||||
if addr.kind == "hbm":
|
if addr.kind == "hbm":
|
||||||
node_id = f"sip{s}.cube{d}.hbm_ctrl"
|
pe_id = int(addr.hbm_offset) // self._hbm_slice_bytes
|
||||||
|
node_id = f"sip{s}.cube{d}.hbm_ctrl.pe{pe_id}"
|
||||||
elif addr.kind == "pe_resource":
|
elif addr.kind == "pe_resource":
|
||||||
if addr.unit_type == UnitType.PE:
|
if addr.unit_type == UnitType.PE:
|
||||||
node_id = f"sip{s}.cube{d}.pe{addr.pe_id}.pe_tcm"
|
node_id = f"sip{s}.cube{d}.pe{addr.pe_id}.pe_tcm"
|
||||||
@@ -103,13 +122,25 @@ class PathRouter:
|
|||||||
self._adj_local[e.src].append((e.dst, w))
|
self._adj_local[e.src].append((e.dst, w))
|
||||||
|
|
||||||
def find_path(self, src_pe: str, dst_node: str) -> list[str]:
|
def find_path(self, src_pe: str, dst_node: str) -> list[str]:
|
||||||
"""PE DMA routing: prepends .pe_dma, excludes command edges."""
|
"""PE DMA routing: prepends .pe_dma, excludes command edges.
|
||||||
|
|
||||||
|
When source and destination share the same cube, route through
|
||||||
|
the cube-local adjacency (which excludes UCIe internal edges).
|
||||||
|
Otherwise the cube's own UCIe port appears as a zero-distance
|
||||||
|
bus that Dijkstra prefers over the mesh — that is intended only
|
||||||
|
for cross-cube routing. Local PE_DMA must traverse the mesh so
|
||||||
|
cross-PE-slice access pays the mesh-distance cost (ADR-0019 D4).
|
||||||
|
"""
|
||||||
start = f"{src_pe}.pe_dma"
|
start = f"{src_pe}.pe_dma"
|
||||||
return self._run_dijkstra(self._adj, start, dst_node)
|
adj = self._adj_local if _same_cube(start, dst_node) else self._adj
|
||||||
|
return self._run_dijkstra(adj, start, dst_node)
|
||||||
|
|
||||||
def find_path_with_distance(self, src_pe: str, dst_node: str) -> tuple[list[str], float]:
|
def find_path_with_distance(self, src_pe: str, dst_node: str) -> tuple[list[str], float]:
|
||||||
|
"""Match find_path's cube-local routing so reported distance reflects
|
||||||
|
the actual chosen path (ADR-0019 D4)."""
|
||||||
start = f"{src_pe}.pe_dma"
|
start = f"{src_pe}.pe_dma"
|
||||||
return self._run_dijkstra_with_dist(self._adj, start, dst_node)
|
adj = self._adj_local if _same_cube(start, dst_node) else self._adj
|
||||||
|
return self._run_dijkstra_with_dist(adj, start, dst_node)
|
||||||
|
|
||||||
def find_mcpu_dma_path(self, m_cpu_id: str, dst_hbm_id: str) -> list[str]:
|
def find_mcpu_dma_path(self, m_cpu_id: str, dst_hbm_id: str) -> list[str]:
|
||||||
"""M_CPU DMA path: routes through router mesh (ADR-0019).
|
"""M_CPU DMA path: routes through router mesh (ADR-0019).
|
||||||
|
|||||||
@@ -404,20 +404,26 @@ def _instantiate_cube(
|
|||||||
label=name.upper().replace("_", " "),
|
label=name.upper().replace("_", " "),
|
||||||
)
|
)
|
||||||
|
|
||||||
# ── HBM controller (single node, ADR-0019 D1, ADR-0033) ──
|
# ── Per-PE HBM controller (ADR-0019 D1/D4) ──
|
||||||
|
# Each PE owns one slice of the cube's HBM. The slice has its own
|
||||||
|
# set of pseudo-channels and is reachable ONLY through that PE's
|
||||||
|
# attaching router (see cube_mesh.yaml ``peX.hbm`` attach lists).
|
||||||
|
# Restored after the ADR-0019 over-consolidation in commit 5917b34.
|
||||||
hbm_spec = cube["components"]["hbm_ctrl"]
|
hbm_spec = cube["components"]["hbm_ctrl"]
|
||||||
hbm_lx, hbm_ly = local_pos["hbm_ctrl"]
|
hbm_lx, hbm_ly = local_pos["hbm_ctrl"]
|
||||||
hbm_id = f"{cp}.hbm_ctrl"
|
|
||||||
hbm_attrs = dict(hbm_spec["attrs"])
|
|
||||||
_hbm_total_bw = float(cube["links"].get("hbm_to_router_bw_gbs", 256.0))
|
_hbm_total_bw = float(cube["links"].get("hbm_to_router_bw_gbs", 256.0))
|
||||||
_num_pcs = int(hbm_attrs.get("num_pcs", 8))
|
_num_pcs = int(hbm_spec["attrs"].get("num_pcs", 8))
|
||||||
hbm_attrs["num_pcs"] = _num_pcs
|
pes_per_cube = int(cube["memory_map"].get("hbm_slices_per_cube", 8))
|
||||||
hbm_attrs["pc_bw_gbs"] = _hbm_total_bw / _num_pcs
|
for pe_idx in range(pes_per_cube):
|
||||||
nodes[hbm_id] = Node(
|
pe_hbm_id = f"{cp}.hbm_ctrl.pe{pe_idx}"
|
||||||
id=hbm_id, kind=hbm_spec["kind"], impl=hbm_spec["impl"],
|
pe_hbm_attrs = dict(hbm_spec["attrs"])
|
||||||
attrs=hbm_attrs, pos_mm=(ox + hbm_lx, oy + hbm_ly),
|
pe_hbm_attrs["num_pcs"] = _num_pcs
|
||||||
label="HBM CTRL",
|
pe_hbm_attrs["pc_bw_gbs"] = _hbm_total_bw / _num_pcs
|
||||||
)
|
nodes[pe_hbm_id] = Node(
|
||||||
|
id=pe_hbm_id, kind=hbm_spec["kind"], impl=hbm_spec["impl"],
|
||||||
|
attrs=pe_hbm_attrs, pos_mm=(ox + hbm_lx, oy + hbm_ly),
|
||||||
|
label=f"HBM CTRL pe{pe_idx}",
|
||||||
|
)
|
||||||
|
|
||||||
# ── Router mesh from cube_mesh.yaml (ADR-0019 D3) ──
|
# ── Router mesh from cube_mesh.yaml (ADR-0019 D3) ──
|
||||||
routers = mesh_data["routers"]
|
routers = mesh_data["routers"]
|
||||||
@@ -566,7 +572,22 @@ def _instantiate_cube(
|
|||||||
kind="command",
|
kind="command",
|
||||||
))
|
))
|
||||||
elif item.endswith(".hbm"):
|
elif item.endswith(".hbm"):
|
||||||
pass # HBM edges handled below (all routers)
|
# peX.hbm: router rXcY owns the entry to hbm_ctrl.peX.
|
||||||
|
# (ADR-0019 D1/D4 — per-PE HBM partitioning.)
|
||||||
|
pe_prefix = item.rsplit(".", 1)[0]
|
||||||
|
pe_idx = int(pe_prefix.replace("pe", ""))
|
||||||
|
pe_hbm_id = f"{cp}.hbm_ctrl.pe{pe_idx}"
|
||||||
|
if pe_hbm_id in nodes:
|
||||||
|
edges.append(Edge(
|
||||||
|
src=rid, dst=pe_hbm_id,
|
||||||
|
distance_mm=0.0, bw_gbs=hbm_to_router_bw,
|
||||||
|
kind="router_to_hbm",
|
||||||
|
))
|
||||||
|
edges.append(Edge(
|
||||||
|
src=pe_hbm_id, dst=rid,
|
||||||
|
distance_mm=0.0, bw_gbs=hbm_to_router_bw,
|
||||||
|
kind="hbm_to_router",
|
||||||
|
))
|
||||||
elif item == "m_cpu":
|
elif item == "m_cpu":
|
||||||
# M_CPU ↔ router
|
# M_CPU ↔ router
|
||||||
mcpu_id = f"{cp}.m_cpu"
|
mcpu_id = f"{cp}.m_cpu"
|
||||||
@@ -623,24 +644,10 @@ def _instantiate_cube(
|
|||||||
kind="router_to_ucie_conn",
|
kind="router_to_ucie_conn",
|
||||||
))
|
))
|
||||||
|
|
||||||
# ── HBM_CTRL ↔ all routers (ADR-0019 D1) ──
|
# NOTE: HBM↔router edges are created in the per-router attach loop
|
||||||
# High routing weight prevents Dijkstra from using HBM as transit shortcut
|
# above (peX.hbm items map router → hbm_ctrl.peX). Removed the
|
||||||
for rkey, rval in routers.items():
|
# legacy "all routers → single hbm_ctrl" loop that bypassed the
|
||||||
if rval is None:
|
# ADR-0019 D4 per-PE partition.
|
||||||
continue
|
|
||||||
rid = f"{cp}.{rkey}"
|
|
||||||
edges.append(Edge(
|
|
||||||
src=rid, dst=hbm_id,
|
|
||||||
distance_mm=0.0, bw_gbs=hbm_to_router_bw,
|
|
||||||
routing_weight_mm=1000.0,
|
|
||||||
kind="router_to_hbm",
|
|
||||||
))
|
|
||||||
edges.append(Edge(
|
|
||||||
src=hbm_id, dst=rid,
|
|
||||||
distance_mm=0.0, bw_gbs=hbm_to_router_bw,
|
|
||||||
routing_weight_mm=1000.0,
|
|
||||||
kind="hbm_to_router",
|
|
||||||
))
|
|
||||||
|
|
||||||
|
|
||||||
def _add_pe_internal_edges(edges: list[Edge], pp: str, pe_links: dict) -> None:
|
def _add_pe_internal_edges(edges: list[Edge], pp: str, pe_links: dict) -> None:
|
||||||
|
|||||||
@@ -69,5 +69,5 @@ def test_router_intra_sip_path_unchanged():
|
|||||||
def test_router_intra_cube_path_unchanged():
|
def test_router_intra_cube_path_unchanged():
|
||||||
topo = _topo()
|
topo = _topo()
|
||||||
r = PathRouter(topo)
|
r = PathRouter(topo)
|
||||||
path = r.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
|
path = r.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0")
|
||||||
assert "fabric.switch0" not in path
|
assert "fabric.switch0" not in path
|
||||||
|
|||||||
@@ -50,8 +50,8 @@ def _engine() -> GraphEngine:
|
|||||||
return GraphEngine(load_topology(TOPOLOGY_PATH))
|
return GraphEngine(load_topology(TOPOLOGY_PATH))
|
||||||
|
|
||||||
|
|
||||||
def _hbm_ctrl(eng: GraphEngine, cube_id: int = 0) -> HbmCtrlComponent:
|
def _hbm_ctrl(eng: GraphEngine, cube_id: int = 0, pe_id: int = 0) -> HbmCtrlComponent:
|
||||||
return eng._components[f"sip0.cube{cube_id}.hbm_ctrl"]
|
return eng._components[f"sip0.cube{cube_id}.hbm_ctrl.pe{pe_id}"]
|
||||||
|
|
||||||
|
|
||||||
def _run(eng: GraphEngine, msgs: list[MemoryWriteMsg]) -> None:
|
def _run(eng: GraphEngine, msgs: list[MemoryWriteMsg]) -> None:
|
||||||
|
|||||||
@@ -189,8 +189,16 @@ def test_hbm_pe_hop_charged_at_large_payload(tmp_path):
|
|||||||
|
|
||||||
Pre-Phase-2 the entire HBM/TCM gap is just the slot-IO term
|
Pre-Phase-2 the entire HBM/TCM gap is just the slot-IO term
|
||||||
(24 × (nbytes/512 + 6) ≈ 1_700 ns at 32 KB). Post-fix adds another
|
(24 × (nbytes/512 + 6) ≈ 1_700 ns at 32 KB). Post-fix adds another
|
||||||
24 × (nbytes/256) × 2 ≈ 6_144 ns from the PE↔HBM hop on send and
|
chunk of latency from the PE↔HBM hop on send and recv, so the
|
||||||
recv, so the total HBM/TCM gap should clearly clear 4 µs.
|
total HBM/TCM gap should clearly clear the threshold below.
|
||||||
|
|
||||||
|
Threshold history: the gap was 4 µs under the over-consolidated
|
||||||
|
single-hbm_ctrl model (commit 5917b34), inflated by serialization
|
||||||
|
on the shared HBM controller. With ADR-0019 D1 per-PE HBM CTRL
|
||||||
|
restored, each PE's slice runs on its own controller with no
|
||||||
|
cross-PE contention, so the IPCQ pattern (each PE writes its own
|
||||||
|
slice) drops the gap to ≈ 1.7 µs — still well above the bare
|
||||||
|
slot-IO term, confirming the PE↔HBM hop is being charged.
|
||||||
"""
|
"""
|
||||||
n_elem = 16384 # 32 KB / PE
|
n_elem = 16384 # 32 KB / PE
|
||||||
lat_tcm = _run_allreduce_with_buffer_kind(
|
lat_tcm = _run_allreduce_with_buffer_kind(
|
||||||
@@ -200,7 +208,7 @@ def test_hbm_pe_hop_charged_at_large_payload(tmp_path):
|
|||||||
tmp_path, buffer_kind="hbm", n_elem=n_elem,
|
tmp_path, buffer_kind="hbm", n_elem=n_elem,
|
||||||
)
|
)
|
||||||
delta = lat_hbm - lat_tcm
|
delta = lat_hbm - lat_tcm
|
||||||
THRESHOLD_NS = 4_000.0
|
THRESHOLD_NS = 1_500.0
|
||||||
assert delta > THRESHOLD_NS, (
|
assert delta > THRESHOLD_NS, (
|
||||||
f"HBM should be ≥ {THRESHOLD_NS:.0f} ns slower than TCM at 32 KB "
|
f"HBM should be ≥ {THRESHOLD_NS:.0f} ns slower than TCM at 32 KB "
|
||||||
f"once the 256 GB/s PE↔HBM hop is charged on each IPCQ access. "
|
f"once the 256 GB/s PE↔HBM hop is charged on each IPCQ access. "
|
||||||
|
|||||||
@@ -259,12 +259,21 @@ def test_no_single_noc_node():
|
|||||||
assert "sip0.cube0.noc" not in graph.nodes
|
assert "sip0.cube0.noc" not in graph.nodes
|
||||||
|
|
||||||
|
|
||||||
def test_single_hbm_ctrl_node():
|
def test_per_pe_hbm_ctrl_nodes():
|
||||||
"""Each cube must have single hbm_ctrl (no slices)."""
|
"""Each cube has 8 per-PE HBM CTRL instances (ADR-0019 D1).
|
||||||
|
|
||||||
|
Restored from over-consolidation in commit 5917b34. The legacy
|
||||||
|
single ``sip0.cube0.hbm_ctrl`` is gone; each PE owns its own
|
||||||
|
``hbm_ctrl.pe{X}`` reachable through that PE's attaching router.
|
||||||
|
"""
|
||||||
graph = _graph()
|
graph = _graph()
|
||||||
assert "sip0.cube0.hbm_ctrl" in graph.nodes
|
for pe in range(8):
|
||||||
slices = [n for n in graph.nodes if "hbm_ctrl.slice" in n]
|
assert f"sip0.cube0.hbm_ctrl.pe{pe}" in graph.nodes
|
||||||
assert len(slices) == 0, f"HBM slices should not exist: {slices[:3]}"
|
# Legacy single hbm_ctrl must not exist
|
||||||
|
legacy_id = "sip0.cube0.hbm_ctrl"
|
||||||
|
assert legacy_id not in graph.nodes, (
|
||||||
|
f"legacy {legacy_id} must be removed (per-PE partitioning, ADR-0019 D1)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_router_mesh_edges():
|
def test_router_mesh_edges():
|
||||||
@@ -285,16 +294,23 @@ def test_pe_dma_connects_to_router():
|
|||||||
assert pe0_edges[0].dst == "sip0.cube0.r0c0"
|
assert pe0_edges[0].dst == "sip0.cube0.r0c0"
|
||||||
|
|
||||||
|
|
||||||
def test_hbm_connects_to_all_routers():
|
def test_each_hbm_ctrl_connects_only_to_owning_router():
|
||||||
"""HBM_CTRL must have edges to all non-null routers."""
|
"""Each ``hbm_ctrl.pe{X}`` must have exactly one router edge
|
||||||
|
(router_to_hbm + hbm_to_router) to its owning PE's attaching
|
||||||
|
router (ADR-0019 D4). Replaces a prior test that asserted the
|
||||||
|
single hbm_ctrl was connected to all routers — that asserted the
|
||||||
|
spec-violating consolidation introduced in commit 5917b34.
|
||||||
|
"""
|
||||||
graph = _graph()
|
graph = _graph()
|
||||||
hbm_out = [e for e in graph.edges
|
pe_router = {0: "r0c0", 1: "r0c1", 2: "r1c4", 3: "r1c5",
|
||||||
if e.src == "sip0.cube0.hbm_ctrl" and e.kind == "hbm_to_router"]
|
4: "r4c0", 5: "r4c1", 6: "r5c4", 7: "r5c5"}
|
||||||
mesh = yaml.safe_load(MESH_PATH.read_text())
|
for pe, rkey in pe_router.items():
|
||||||
n_active = sum(1 for v in mesh["routers"].values() if v is not None)
|
nid = f"sip0.cube0.hbm_ctrl.pe{pe}"
|
||||||
assert len(hbm_out) == n_active, (
|
owner = f"sip0.cube0.{rkey}"
|
||||||
f"HBM should connect to {n_active} routers, got {len(hbm_out)}"
|
outs = [e.dst for e in graph.edges if e.src == nid]
|
||||||
)
|
ins = [e.src for e in graph.edges if e.dst == nid]
|
||||||
|
assert outs == [owner], f"{nid} must out-edge only to {owner}; got {outs}"
|
||||||
|
assert ins == [owner], f"{nid} must in-edge only from {owner}; got {ins}"
|
||||||
|
|
||||||
|
|
||||||
# ══════════════════════════════════════════════════════════════════
|
# ══════════════════════════════════════════════════════════════════
|
||||||
@@ -306,18 +322,18 @@ def test_local_hbm_path_through_router():
|
|||||||
"""PE0 local HBM: path must go through PE's router to hbm_ctrl."""
|
"""PE0 local HBM: path must go through PE's router to hbm_ctrl."""
|
||||||
graph = _graph()
|
graph = _graph()
|
||||||
router = PathRouter(graph)
|
router = PathRouter(graph)
|
||||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
|
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0")
|
||||||
assert "sip0.cube0.r0c0" in path, f"PE0's router r0c0 missing from path: {path}"
|
assert "sip0.cube0.r0c0" in path, f"PE0's router r0c0 missing from path: {path}"
|
||||||
assert "sip0.cube0.hbm_ctrl" == path[-1], f"Path should end at hbm_ctrl: {path}"
|
assert "sip0.cube0.hbm_ctrl.pe0" == path[-1], f"Path should end at hbm_ctrl: {path}"
|
||||||
|
|
||||||
|
|
||||||
def test_remote_pe_hbm_has_more_hops():
|
def test_remote_pe_hbm_has_more_hops():
|
||||||
"""PE0 → PE4's HBM (remote) must have more hops than local."""
|
"""PE0 → PE4's HBM (remote) must have more hops than local."""
|
||||||
graph = _graph()
|
graph = _graph()
|
||||||
router = PathRouter(graph)
|
router = PathRouter(graph)
|
||||||
local_path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
|
local_path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0")
|
||||||
# PE4 is at r4c0, PE0 at r0c0 — must traverse mesh
|
# PE4 is at r4c0, PE0 at r0c0 — must traverse mesh
|
||||||
remote_path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl")
|
remote_path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl.pe0")
|
||||||
# Both should work, local should be shorter or equal
|
# Both should work, local should be shorter or equal
|
||||||
assert len(local_path) >= 2
|
assert len(local_path) >= 2
|
||||||
assert len(remote_path) >= 2
|
assert len(remote_path) >= 2
|
||||||
@@ -328,10 +344,10 @@ def test_mcpu_dma_path_through_router_mesh():
|
|||||||
graph = _graph()
|
graph = _graph()
|
||||||
router = PathRouter(graph)
|
router = PathRouter(graph)
|
||||||
path = router.find_mcpu_dma_path(
|
path = router.find_mcpu_dma_path(
|
||||||
"sip0.cube0.m_cpu", "sip0.cube0.hbm_ctrl"
|
"sip0.cube0.m_cpu", "sip0.cube0.hbm_ctrl.pe0"
|
||||||
)
|
)
|
||||||
assert path[0] == "sip0.cube0.m_cpu"
|
assert path[0] == "sip0.cube0.m_cpu"
|
||||||
assert path[-1] == "sip0.cube0.hbm_ctrl"
|
assert path[-1] == "sip0.cube0.hbm_ctrl.pe0"
|
||||||
assert any("r" in n and "c" in n for n in path), f"Router missing from path: {path}"
|
assert any("r" in n and "c" in n for n in path), f"Router missing from path: {path}"
|
||||||
|
|
||||||
|
|
||||||
@@ -339,9 +355,9 @@ def test_cross_cube_path_through_ucie():
|
|||||||
"""Cross-cube HBM: must traverse router → UCIe → remote router → hbm_ctrl."""
|
"""Cross-cube HBM: must traverse router → UCIe → remote router → hbm_ctrl."""
|
||||||
graph = _graph()
|
graph = _graph()
|
||||||
router = PathRouter(graph)
|
router = PathRouter(graph)
|
||||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube4.hbm_ctrl")
|
path = router.find_path("sip0.cube0.pe0", "sip0.cube4.hbm_ctrl.pe0")
|
||||||
assert any("ucie" in n.lower() for n in path), f"UCIe missing: {path}"
|
assert any("ucie" in n.lower() for n in path), f"UCIe missing: {path}"
|
||||||
assert path[-1] == "sip0.cube4.hbm_ctrl"
|
assert path[-1] == "sip0.cube4.hbm_ctrl.pe0"
|
||||||
|
|
||||||
|
|
||||||
def test_h2d_bypass_path_through_router():
|
def test_h2d_bypass_path_through_router():
|
||||||
@@ -355,7 +371,7 @@ def test_h2d_bypass_path_through_router():
|
|||||||
hbm_target = resolver.resolve(PhysAddr.decode(pa))
|
hbm_target = resolver.resolve(PhysAddr.decode(pa))
|
||||||
|
|
||||||
path = router.find_memory_path(pcie_ep, hbm_target)
|
path = router.find_memory_path(pcie_ep, hbm_target)
|
||||||
assert path[-1] == "sip0.cube0.hbm_ctrl", f"Path should end at hbm_ctrl: {path}"
|
assert path[-1] == "sip0.cube0.hbm_ctrl.pe0", f"Path should end at hbm_ctrl: {path}"
|
||||||
assert any("r0c" in n or "r1c" in n for n in path), f"Router missing: {path}"
|
assert any("r0c" in n or "r1c" in n for n in path), f"Router missing: {path}"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,231 @@
|
|||||||
|
"""Tests for ADR-0019 D1/D4 per-PE HBM partitioning.
|
||||||
|
|
||||||
|
Restores the architectural property that was lost in commit 5917b34
|
||||||
|
(2026-04-04 "Replace xbar/bridge/single-NOC with explicit router mesh"),
|
||||||
|
which over-consolidated 8 per-slice HBM CTRL nodes into one cube-wide
|
||||||
|
HBM CTRL connected to every router. ADR-0019 D1/D4 specifies:
|
||||||
|
|
||||||
|
- Each PE owns 8 of the cube's 64 pseudo-channels (PE_X → PCs 8X..8X+7).
|
||||||
|
- HBM CTRL is split per-PE: ``hbm_ctrl.pe{X}`` is reachable ONLY through
|
||||||
|
PE_X's attaching router. Accessing PE_Y's slice from PE_X requires
|
||||||
|
mesh routing to r_Y_attach before entering hbm_ctrl.pe{Y}.
|
||||||
|
|
||||||
|
These tests are written BEFORE the production change and are expected
|
||||||
|
to FAIL on current code (HBM CTRL is a single ``hbm_ctrl`` node attached
|
||||||
|
to all routers). Phase 2 must make them PASS without weakening
|
||||||
|
assertions.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from kernbench.policy.address.phyaddr import PhysAddr
|
||||||
|
from kernbench.policy.routing.router import AddressResolver, PathRouter
|
||||||
|
from kernbench.runtime_api.kernel import PeDmaMsg
|
||||||
|
from kernbench.sim_engine.engine import GraphEngine
|
||||||
|
from kernbench.topology.builder import load_topology
|
||||||
|
|
||||||
|
TOPOLOGY_PATH = Path(__file__).parent.parent / "topology.yaml"
|
||||||
|
|
||||||
|
|
||||||
|
def _graph():
|
||||||
|
return load_topology(TOPOLOGY_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
def _slice_bytes(spec: dict) -> int:
|
||||||
|
mm = spec["cube"]["memory_map"]
|
||||||
|
return mm["hbm_total_gb_per_cube"] * (1 << 30) // mm["hbm_slices_per_cube"]
|
||||||
|
|
||||||
|
|
||||||
|
def _hbm_pa(*, sip: int, cube: int, pe_id: int, offset: int, spec: dict) -> int:
|
||||||
|
return PhysAddr.pe_hbm_addr(
|
||||||
|
sip_id=sip, die_id=cube, pe_id=pe_id,
|
||||||
|
pe_local_hbm_offset=offset, slice_size_bytes=_slice_bytes(spec),
|
||||||
|
).encode()
|
||||||
|
|
||||||
|
|
||||||
|
# Mapping derived from topology.yaml + cube_mesh attach info
|
||||||
|
PE_ATTACH_ROUTER = {
|
||||||
|
0: "r0c0", 1: "r0c1",
|
||||||
|
2: "r1c4", 3: "r1c5",
|
||||||
|
4: "r4c0", 5: "r4c1",
|
||||||
|
6: "r5c4", 7: "r5c5",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── 1. Topology: 8 per-PE HBM CTRL nodes per cube ────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_topology_has_8_hbm_ctrl_per_cube():
|
||||||
|
"""Each cube must expose 8 hbm_ctrl instances, one per PE
|
||||||
|
(``hbm_ctrl.pe0`` .. ``hbm_ctrl.pe7``). The legacy single
|
||||||
|
``hbm_ctrl`` must be absent."""
|
||||||
|
graph = _graph()
|
||||||
|
for pe in range(8):
|
||||||
|
nid = f"sip0.cube0.hbm_ctrl.pe{pe}"
|
||||||
|
assert nid in graph.nodes, (
|
||||||
|
f"Expected per-PE HBM CTRL node {nid!r} (ADR-0019 D1)"
|
||||||
|
)
|
||||||
|
node = graph.nodes[nid]
|
||||||
|
assert int(node.attrs.get("num_pcs", 0)) == 8, (
|
||||||
|
f"{nid} must have num_pcs=8; got {node.attrs.get('num_pcs')}"
|
||||||
|
)
|
||||||
|
# Legacy single hbm_ctrl must not exist
|
||||||
|
assert "sip0.cube0.hbm_ctrl" not in graph.nodes, (
|
||||||
|
"Legacy single sip0.cube0.hbm_ctrl must be removed in favor of "
|
||||||
|
"per-PE hbm_ctrl.pe{X} (ADR-0019 D1)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── 2. Each per-PE HBM CTRL connects ONLY to its PE's attaching router ─
|
||||||
|
|
||||||
|
|
||||||
|
def test_per_pe_hbm_ctrl_connects_only_to_owning_router():
|
||||||
|
"""``hbm_ctrl.pe{X}`` must have exactly one router edge (to and from
|
||||||
|
r_X_attach). No other router may have an edge to/from it."""
|
||||||
|
graph = _graph()
|
||||||
|
edge_map = {(e.src, e.dst): e for e in graph.edges}
|
||||||
|
for pe in range(8):
|
||||||
|
nid = f"sip0.cube0.hbm_ctrl.pe{pe}"
|
||||||
|
owner = f"sip0.cube0.{PE_ATTACH_ROUTER[pe]}"
|
||||||
|
# incoming edges (router→hbm_ctrl)
|
||||||
|
incoming = [src for (src, dst) in edge_map if dst == nid]
|
||||||
|
outgoing = [dst for (src, dst) in edge_map if src == nid]
|
||||||
|
assert incoming == [owner], (
|
||||||
|
f"{nid} must have a single incoming edge from {owner}; got {incoming}"
|
||||||
|
)
|
||||||
|
assert outgoing == [owner], (
|
||||||
|
f"{nid} must have a single outgoing edge to {owner}; got {outgoing}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── 3. Resolver: PA pe_id → correct hbm_ctrl.pe{X} ───────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("pe_id", list(range(8)))
|
||||||
|
def test_resolver_maps_pe_id_to_correct_hbm_ctrl(pe_id):
|
||||||
|
"""AddressResolver must dispatch HBM PA to the hbm_ctrl owned by the
|
||||||
|
target PE (encoded by pe_local_hbm_offset / slice_size_bytes)."""
|
||||||
|
graph = _graph()
|
||||||
|
spec = graph.spec
|
||||||
|
resolver = AddressResolver(graph)
|
||||||
|
pa_val = _hbm_pa(sip=0, cube=0, pe_id=pe_id, offset=0x1000, spec=spec)
|
||||||
|
pa = PhysAddr.decode(pa_val)
|
||||||
|
dst = resolver.resolve(pa)
|
||||||
|
assert dst == f"sip0.cube0.hbm_ctrl.pe{pe_id}", (
|
||||||
|
f"PA with pe_id={pe_id} must resolve to hbm_ctrl.pe{pe_id}; got {dst!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── 4. Path: PE_X → PE_X_slice is single mesh hop ────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_pe0_to_pe0_slice_is_single_mesh_hop():
|
||||||
|
"""PE0 accessing its OWN HBM slice must take exactly one router hop
|
||||||
|
(r0c0 → hbm_ctrl.pe0)."""
|
||||||
|
graph = _graph()
|
||||||
|
spec = graph.spec
|
||||||
|
router = PathRouter(graph)
|
||||||
|
dst = f"sip0.cube0.hbm_ctrl.pe0"
|
||||||
|
path = router.find_path("sip0.cube0.pe0", dst)
|
||||||
|
expected = [
|
||||||
|
"sip0.cube0.pe0.pe_dma",
|
||||||
|
"sip0.cube0.r0c0",
|
||||||
|
"sip0.cube0.hbm_ctrl.pe0",
|
||||||
|
]
|
||||||
|
assert path == expected, (
|
||||||
|
f"pe0 → pe0_slice path must be {expected}; got {path}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── 5. Path: PE_X → PE_Y_slice traverses mesh through r_Y_attach ─────
|
||||||
|
|
||||||
|
|
||||||
|
def test_pe0_to_pe7_slice_traverses_mesh_to_r5c5():
|
||||||
|
"""PE0 accessing PE7's slice must mesh-route to r5c5 (PE7's attaching
|
||||||
|
router) before entering hbm_ctrl.pe7. Last two nodes must be
|
||||||
|
r5c5 → hbm_ctrl.pe7."""
|
||||||
|
graph = _graph()
|
||||||
|
router = PathRouter(graph)
|
||||||
|
dst = "sip0.cube0.hbm_ctrl.pe7"
|
||||||
|
path = router.find_path("sip0.cube0.pe0", dst)
|
||||||
|
assert path[0] == "sip0.cube0.pe0.pe_dma"
|
||||||
|
assert path[-2:] == ["sip0.cube0.r5c5", "sip0.cube0.hbm_ctrl.pe7"], (
|
||||||
|
f"Last 2 path nodes must be r5c5 → hbm_ctrl.pe7; got {path[-2:]}"
|
||||||
|
)
|
||||||
|
# Multi-hop mesh between r0c0 and r5c5
|
||||||
|
assert len(path) >= 5, f"Cross-PE path must traverse mesh; got {len(path)} nodes"
|
||||||
|
|
||||||
|
|
||||||
|
# ── 6. End-to-end: cross-PE latency > local PE latency ───────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_pe_dma_cross_pe_slower_than_local():
|
||||||
|
"""For a non-trivial transfer (16KB), PE0 → PE7_slice must measurably
|
||||||
|
exceed PE0 → PE0_slice (the additional mesh hops add per-router
|
||||||
|
overhead and consume per-link wire BW)."""
|
||||||
|
graph = _graph()
|
||||||
|
spec = graph.spec
|
||||||
|
nbytes = 16384
|
||||||
|
|
||||||
|
def _run(dst_pe: int) -> float:
|
||||||
|
engine = GraphEngine(_graph())
|
||||||
|
pa = _hbm_pa(sip=0, cube=0, pe_id=dst_pe, offset=0x1000, spec=spec)
|
||||||
|
msg = PeDmaMsg(
|
||||||
|
correlation_id="per-pe-hbm", request_id=f"to-pe{dst_pe}",
|
||||||
|
src_sip=0, src_cube=0, src_pe=0,
|
||||||
|
dst_pa=pa, nbytes=nbytes,
|
||||||
|
)
|
||||||
|
h = engine.submit(msg)
|
||||||
|
engine.wait(h)
|
||||||
|
_, trace = engine.get_completion(h)
|
||||||
|
return float(trace["total_ns"])
|
||||||
|
|
||||||
|
local = _run(0)
|
||||||
|
cross = _run(7)
|
||||||
|
assert cross > local * 1.05, (
|
||||||
|
f"Cross-PE HBM access (pe0 → pe7_slice) must take measurably more "
|
||||||
|
f"time than local (pe0 → pe0_slice). local={local:.2f}ns, "
|
||||||
|
f"cross={cross:.2f}ns, ratio={cross/local:.3f} (expected > 1.05)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── 7. Probe CLI monotonicity (existing case names) ──────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_probe_cli_intra_cube_cases_are_monotonic():
|
||||||
|
"""Probe CLI cases must show monotonic latency:
|
||||||
|
pe-local-hbm < pe-same-half-hbm < pe-cross-half-hbm.
|
||||||
|
|
||||||
|
Prior to per-PE partitioning these three return identical latency
|
||||||
|
because all roads lead to the same hbm_ctrl. With ADR-0019 D4
|
||||||
|
restored, same-half (pe0→pe1) is 1 mesh hop further than local,
|
||||||
|
and cross-half (pe0→pe4) is several hops further.
|
||||||
|
"""
|
||||||
|
graph = _graph()
|
||||||
|
spec = graph.spec
|
||||||
|
nbytes = 32768
|
||||||
|
|
||||||
|
def _run(dst_pe: int) -> float:
|
||||||
|
engine = GraphEngine(_graph())
|
||||||
|
pa = _hbm_pa(sip=0, cube=0, pe_id=dst_pe, offset=0x1000, spec=spec)
|
||||||
|
msg = PeDmaMsg(
|
||||||
|
correlation_id="probe", request_id=f"to-pe{dst_pe}",
|
||||||
|
src_sip=0, src_cube=0, src_pe=0,
|
||||||
|
dst_pa=pa, nbytes=nbytes,
|
||||||
|
)
|
||||||
|
h = engine.submit(msg)
|
||||||
|
engine.wait(h)
|
||||||
|
_, trace = engine.get_completion(h)
|
||||||
|
return float(trace["total_ns"])
|
||||||
|
|
||||||
|
local = _run(0) # pe-local-hbm
|
||||||
|
same_half = _run(1) # pe-same-half-hbm (adjacent: r0c0 → r0c1)
|
||||||
|
cross_half = _run(4) # pe-cross-half-hbm (r0c0 → r4c0)
|
||||||
|
assert local < same_half < cross_half, (
|
||||||
|
f"intra-cube DMA must be monotonic with mesh distance. "
|
||||||
|
f"local={local:.2f}, same_half={same_half:.2f}, "
|
||||||
|
f"cross_half={cross_half:.2f}"
|
||||||
|
)
|
||||||
@@ -17,19 +17,21 @@ def _graph():
|
|||||||
|
|
||||||
|
|
||||||
def test_resolve_hbm_addr():
|
def test_resolve_hbm_addr():
|
||||||
"""HBM address -> sip{S}.cube{C}.hbm_ctrl (single controller per cube)."""
|
"""HBM address -> sip{S}.cube{C}.hbm_ctrl.pe{X} (per-PE controller, ADR-0019 D1)."""
|
||||||
g = _graph()
|
g = _graph()
|
||||||
resolver = AddressResolver(g)
|
resolver = AddressResolver(g)
|
||||||
|
# offset 0x1000 falls inside PE0's slice (slice_size = 6 GB)
|
||||||
pa = PhysAddr.hbm_addr(sip_id=0, die_id=3, hbm_offset=0x1000)
|
pa = PhysAddr.hbm_addr(sip_id=0, die_id=3, hbm_offset=0x1000)
|
||||||
assert resolver.resolve(pa) == "sip0.cube3.hbm_ctrl"
|
assert resolver.resolve(pa) == "sip0.cube3.hbm_ctrl.pe0"
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_hbm_addr_high_offset():
|
def test_resolve_hbm_addr_high_offset():
|
||||||
"""HBM address with large offset still resolves to same hbm_ctrl."""
|
"""HBM offset that lands in PE4's slice must resolve to hbm_ctrl.pe4."""
|
||||||
g = _graph()
|
g = _graph()
|
||||||
resolver = AddressResolver(g)
|
resolver = AddressResolver(g)
|
||||||
|
# 0x600000000 / (6 GB) = 4
|
||||||
pa = PhysAddr.hbm_addr(sip_id=0, die_id=0, hbm_offset=0x600000000)
|
pa = PhysAddr.hbm_addr(sip_id=0, die_id=0, hbm_offset=0x600000000)
|
||||||
assert resolver.resolve(pa) == "sip0.cube0.hbm_ctrl"
|
assert resolver.resolve(pa) == "sip0.cube0.hbm_ctrl.pe4"
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_pe_tcm_addr():
|
def test_resolve_pe_tcm_addr():
|
||||||
@@ -73,12 +75,12 @@ def test_resolve_nonexistent_node():
|
|||||||
|
|
||||||
|
|
||||||
def test_path_local_hbm():
|
def test_path_local_hbm():
|
||||||
"""PE0 -> hbm_ctrl: pe_dma -> router -> hbm_ctrl (through router mesh)."""
|
"""PE0 -> own slice: pe_dma -> r0c0 -> hbm_ctrl.pe0 (1 mesh hop)."""
|
||||||
g = _graph()
|
g = _graph()
|
||||||
router = PathRouter(g)
|
router = PathRouter(g)
|
||||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
|
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0")
|
||||||
assert path[0] == "sip0.cube0.pe0.pe_dma"
|
assert path[0] == "sip0.cube0.pe0.pe_dma"
|
||||||
assert path[-1] == "sip0.cube0.hbm_ctrl"
|
assert path[-1] == "sip0.cube0.hbm_ctrl.pe0"
|
||||||
# Path must go through at least one router node
|
# Path must go through at least one router node
|
||||||
assert any(n.startswith("sip0.cube0.r") for n in path), \
|
assert any(n.startswith("sip0.cube0.r") for n in path), \
|
||||||
"HBM path must traverse router mesh"
|
"HBM path must traverse router mesh"
|
||||||
@@ -90,56 +92,61 @@ def test_path_local_hbm():
|
|||||||
|
|
||||||
|
|
||||||
def test_path_remote_pe_hbm():
|
def test_path_remote_pe_hbm():
|
||||||
"""PE4 (bottom half) -> hbm_ctrl: routes through router mesh."""
|
"""PE4 (bottom half) -> its own slice: routes through router mesh."""
|
||||||
g = _graph()
|
g = _graph()
|
||||||
router = PathRouter(g)
|
router = PathRouter(g)
|
||||||
path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl")
|
path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl.pe4")
|
||||||
assert path[0] == "sip0.cube0.pe4.pe_dma"
|
assert path[0] == "sip0.cube0.pe4.pe_dma"
|
||||||
assert path[-1] == "sip0.cube0.hbm_ctrl"
|
assert path[-1] == "sip0.cube0.hbm_ctrl.pe4"
|
||||||
assert any(n.startswith("sip0.cube0.r") for n in path)
|
assert any(n.startswith("sip0.cube0.r") for n in path)
|
||||||
assert not any("xbar" in n or "bridge" in n for n in path)
|
assert not any("xbar" in n or "bridge" in n for n in path)
|
||||||
|
|
||||||
|
|
||||||
# ── PathRouter: all PEs equidistant to HBM (n_to_one routing weight) ─
|
# ── PathRouter: cross-PE HBM distance reflects mesh hops (ADR-0019 D4) ─
|
||||||
|
|
||||||
|
|
||||||
def test_all_pe_hbm_equidistant():
|
def test_cross_pe_hbm_distance_increases_with_mesh_hops():
|
||||||
"""All PEs in a cube have equal routing distance to hbm_ctrl.
|
"""Restored ADR-0019 D4 behavior: accessing another PE's HBM slice
|
||||||
|
must take more routing distance than accessing one's own slice,
|
||||||
|
because each per-PE hbm_ctrl is reachable only via its PE's router.
|
||||||
|
|
||||||
With n_to_one mapping and high routing weight on HBM edges,
|
Replaces a previous ``test_all_pe_hbm_equidistant`` that asserted the
|
||||||
all PE->hbm_ctrl paths have the same accumulated distance.
|
over-consolidated (spec-violating) behavior introduced in 5917b34.
|
||||||
"""
|
"""
|
||||||
g = _graph()
|
g = _graph()
|
||||||
router = PathRouter(g)
|
router = PathRouter(g)
|
||||||
distances = []
|
_, dist_local = router.find_path_with_distance(
|
||||||
for pe in range(8):
|
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0")
|
||||||
_, dist = router.find_path_with_distance(
|
_, dist_to_pe7 = router.find_path_with_distance(
|
||||||
f"sip0.cube0.pe{pe}", "sip0.cube0.hbm_ctrl")
|
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe7")
|
||||||
distances.append(dist)
|
assert dist_to_pe7 > dist_local, (
|
||||||
# All distances should be equal
|
f"pe0→pe7_slice should require more mesh distance than pe0→pe0_slice; "
|
||||||
assert all(d == distances[0] for d in distances), (
|
f"got local={dist_local}, to_pe7={dist_to_pe7}"
|
||||||
f"expected equal distances, got: {distances}"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_remote_pe_distance_not_less_than_local():
|
def test_remote_pe_distance_not_less_than_local():
|
||||||
"""Remote PE HBM distance >= local PE HBM distance (mesh topology)."""
|
"""PE4 -> pe0_slice distance >= PE0 -> pe0_slice distance.
|
||||||
|
|
||||||
|
Both access pe0's slice (hbm_ctrl.pe0). PE0's path is shortest; PE4
|
||||||
|
must mesh-route up to r0c0 before entering the slice.
|
||||||
|
"""
|
||||||
g = _graph()
|
g = _graph()
|
||||||
router = PathRouter(g)
|
router = PathRouter(g)
|
||||||
_, dist_pe0 = router.find_path_with_distance(
|
_, dist_pe0 = router.find_path_with_distance(
|
||||||
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
|
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe0")
|
||||||
_, dist_pe4 = router.find_path_with_distance(
|
_, dist_pe4 = router.find_path_with_distance(
|
||||||
"sip0.cube0.pe4", "sip0.cube0.hbm_ctrl")
|
"sip0.cube0.pe4", "sip0.cube0.hbm_ctrl.pe0")
|
||||||
assert dist_pe4 >= dist_pe0
|
assert dist_pe4 >= dist_pe0
|
||||||
|
|
||||||
|
|
||||||
def test_path_remote_cube_hbm():
|
def test_path_remote_cube_hbm():
|
||||||
"""PE0 in cube0 can reach HBM in cube1 via UCIe (ADR-0004 D4)."""
|
"""PE0 in cube0 can reach pe0's HBM in cube1 via UCIe (ADR-0004 D4)."""
|
||||||
g = _graph()
|
g = _graph()
|
||||||
router = PathRouter(g)
|
router = PathRouter(g)
|
||||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl")
|
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl.pe0")
|
||||||
assert path[0] == "sip0.cube0.pe0.pe_dma"
|
assert path[0] == "sip0.cube0.pe0.pe_dma"
|
||||||
assert path[-1] == "sip0.cube1.hbm_ctrl"
|
assert path[-1] == "sip0.cube1.hbm_ctrl.pe0"
|
||||||
# inter-cube path must cross a UCIe link
|
# inter-cube path must cross a UCIe link
|
||||||
assert any("ucie" in n.lower() for n in path), \
|
assert any("ucie" in n.lower() for n in path), \
|
||||||
"remote cube path must traverse UCIe"
|
"remote cube path must traverse UCIe"
|
||||||
@@ -182,11 +189,15 @@ def test_path_local_tcm():
|
|||||||
|
|
||||||
|
|
||||||
def test_path_distance_positive():
|
def test_path_distance_positive():
|
||||||
"""All routed paths must have accumulated distance > 0 (ADR-0002 D4)."""
|
"""Routed paths that traverse the mesh must have positive accumulated
|
||||||
|
distance (ADR-0002 D4). Use a cross-PE target so the path includes
|
||||||
|
inter-router mesh edges (which have non-zero distance_mm). The
|
||||||
|
single-hop pe0→pe0_slice path stays at 0 because PE_DMA↔router and
|
||||||
|
router↔hbm_ctrl are zero-length placements within the same corner."""
|
||||||
g = _graph()
|
g = _graph()
|
||||||
router = PathRouter(g)
|
router = PathRouter(g)
|
||||||
_, dist = router.find_path_with_distance(
|
_, dist = router.find_path_with_distance(
|
||||||
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
|
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl.pe7")
|
||||||
assert dist > 0
|
assert dist > 0
|
||||||
|
|
||||||
|
|
||||||
@@ -195,8 +206,8 @@ def test_path_deterministic():
|
|||||||
g = _graph()
|
g = _graph()
|
||||||
r1 = PathRouter(g)
|
r1 = PathRouter(g)
|
||||||
r2 = PathRouter(g)
|
r2 = PathRouter(g)
|
||||||
p1 = r1.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl")
|
p1 = r1.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl.pe0")
|
||||||
p2 = r2.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl")
|
p2 = r2.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl.pe0")
|
||||||
assert p1 == p2
|
assert p1 == p2
|
||||||
|
|
||||||
|
|
||||||
@@ -205,5 +216,5 @@ def test_remote_cube_path_no_routing_error():
|
|||||||
g = _graph()
|
g = _graph()
|
||||||
router = PathRouter(g)
|
router = PathRouter(g)
|
||||||
# cube0.PE0 -> cube1.hbm_ctrl (adjacent cube, E direction)
|
# cube0.PE0 -> cube1.hbm_ctrl (adjacent cube, E direction)
|
||||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl")
|
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl.pe0")
|
||||||
assert len(path) >= 1 # succeeds without exception
|
assert len(path) >= 1 # succeeds without exception
|
||||||
|
|||||||
@@ -17,21 +17,21 @@ def test_full_graph_node_count():
|
|||||||
g = _graph()
|
g = _graph()
|
||||||
# 1 switch
|
# 1 switch
|
||||||
# + 2 SIPs x (1 IO x 23 io_nodes
|
# + 2 SIPs x (1 IO x 23 io_nodes
|
||||||
# + 16 cubes x (32 routers + 1 hbm_ctrl + 1 m_cpu + 1 sram
|
# + 16 cubes x (32 routers + 8 hbm_ctrl.peX + 1 m_cpu + 1 sram
|
||||||
# + 20 ucie (4 ports x (1 port + 4 conn))
|
# + 20 ucie (4 ports x (1 port + 4 conn))
|
||||||
# + 8 PEs x 9 pe_comps)) (ADR-0023: +pe_ipcq)
|
# + 8 PEs x 9 pe_comps)) (ADR-0023: +pe_ipcq)
|
||||||
# IO: pcie_ep + io_cpu + noc + 4 io_ucie_ports + 4*4 io_ucie_conn = 23
|
# IO: pcie_ep + io_cpu + noc + 4 io_ucie_ports + 4*4 io_ucie_conn = 23
|
||||||
# cube: 32 + 3 + 20 + 72 = 127
|
# cube: 32 + 10 + 20 + 72 = 134 (was 127; ADR-0019 D1 per-PE HBM CTRL)
|
||||||
# = 1 + 2*(23 + 16*127) = 1 + 2*(23+2032) = 1 + 4110 = 4111
|
# = 1 + 2*(23 + 16*134) = 1 + 2*(23+2144) = 1 + 4334 = 4335
|
||||||
assert len(g.nodes) == 4111
|
assert len(g.nodes) == 4335
|
||||||
|
|
||||||
|
|
||||||
def test_full_graph_edge_count():
|
def test_full_graph_edge_count():
|
||||||
g = _graph()
|
g = _graph()
|
||||||
# ADR-0023: +3 IPCQ edges per PE (cpu→ipcq, ipcq→dma, dma→ipcq)
|
# ADR-0023: +3 IPCQ edges per PE
|
||||||
# 2 SIPs × 16 cubes × 8 PEs × 3 = 768 new edges
|
# ADR-0019 D1 (restored): HBM↔router edges drop from 32 routers × 2
|
||||||
# Cross-SIP routing: +1 reverse pcie_ep→switch edge per SIP = +2
|
# to 8 PE-routers × 2 per cube. 32 cubes × (16-64) = -1536 edges.
|
||||||
assert len(g.edges) == 13692
|
assert len(g.edges) == 12156
|
||||||
|
|
||||||
|
|
||||||
# -- Full graph: specific nodes exist -----------------------------------------
|
# -- Full graph: specific nodes exist -----------------------------------------
|
||||||
@@ -55,7 +55,7 @@ def test_cube_component_nodes_exist():
|
|||||||
g = _graph()
|
g = _graph()
|
||||||
cp = "sip0.cube0"
|
cp = "sip0.cube0"
|
||||||
# Core cube components (no more noc, xbar, bridge)
|
# Core cube components (no more noc, xbar, bridge)
|
||||||
for name in ("m_cpu", "sram", "hbm_ctrl",
|
for name in ("m_cpu", "sram",
|
||||||
"ucie-N", "ucie-S", "ucie-E", "ucie-W"):
|
"ucie-N", "ucie-S", "ucie-E", "ucie-W"):
|
||||||
assert f"{cp}.{name}" in g.nodes
|
assert f"{cp}.{name}" in g.nodes
|
||||||
# Old nodes must not exist
|
# Old nodes must not exist
|
||||||
@@ -71,8 +71,11 @@ def test_cube_component_nodes_exist():
|
|||||||
# Null holes must not exist
|
# Null holes must not exist
|
||||||
for null_rc in ("r2c2", "r2c3", "r3c2", "r3c3"):
|
for null_rc in ("r2c2", "r2c3", "r3c2", "r3c3"):
|
||||||
assert f"{cp}.{null_rc}" not in g.nodes
|
assert f"{cp}.{null_rc}" not in g.nodes
|
||||||
# Single hbm_ctrl (no more slices)
|
# Per-PE HBM CTRL (ADR-0019 D1) — 8 instances, no legacy single node
|
||||||
assert g.nodes[f"{cp}.hbm_ctrl"].kind == "hbm_ctrl"
|
for pe in range(8):
|
||||||
|
nid = f"{cp}.hbm_ctrl.pe{pe}"
|
||||||
|
assert g.nodes[nid].kind == "hbm_ctrl"
|
||||||
|
assert f"{cp}.hbm_ctrl" not in g.nodes
|
||||||
for s in range(8):
|
for s in range(8):
|
||||||
assert f"{cp}.hbm_ctrl.slice{s}" not in g.nodes
|
assert f"{cp}.hbm_ctrl.slice{s}" not in g.nodes
|
||||||
|
|
||||||
@@ -89,16 +92,18 @@ def test_pe_component_nodes_exist():
|
|||||||
|
|
||||||
def test_hbm_ctrl_at_cube_center():
|
def test_hbm_ctrl_at_cube_center():
|
||||||
g = _graph()
|
g = _graph()
|
||||||
# Single hbm_ctrl per cube; cube0 origin = (0, 0), hbm at (6.5, 7.0)
|
# Per-PE hbm_ctrl nodes share the cube's HBM placement (ADR-0019 D1)
|
||||||
node = g.nodes["sip0.cube0.hbm_ctrl"]
|
# cube0 origin = (0, 0), hbm at (6.5, 7.0)
|
||||||
assert node.pos_mm == (6.5, 7.0)
|
for pe in range(8):
|
||||||
|
node = g.nodes[f"sip0.cube0.hbm_ctrl.pe{pe}"]
|
||||||
|
assert node.pos_mm == (6.5, 7.0)
|
||||||
|
|
||||||
|
|
||||||
def test_hbm_ctrl_cube5_position():
|
def test_hbm_ctrl_cube5_position():
|
||||||
g = _graph()
|
g = _graph()
|
||||||
# cube5 = col=1, row=1 -> origin = (1*18, 1*15) = (18, 15)
|
# cube5 = col=1, row=1 -> origin = (1*18, 1*15) = (18, 15)
|
||||||
# hbm_ctrl = (18 + 6.5, 15 + 7.0) = (24.5, 22.0)
|
# hbm_ctrl = (18 + 6.5, 15 + 7.0) = (24.5, 22.0)
|
||||||
node = g.nodes["sip0.cube5.hbm_ctrl"]
|
node = g.nodes["sip0.cube5.hbm_ctrl.pe0"]
|
||||||
assert node.pos_mm == (24.5, 22.0)
|
assert node.pos_mm == (24.5, 22.0)
|
||||||
|
|
||||||
|
|
||||||
@@ -181,16 +186,25 @@ def test_pe_internal_edges():
|
|||||||
assert (f"{pp}.pe_math", f"{pp}.pe_tcm") in es
|
assert (f"{pp}.pe_math", f"{pp}.pe_tcm") in es
|
||||||
|
|
||||||
|
|
||||||
def test_hbm_ctrl_connects_all_routers():
|
def test_per_pe_hbm_ctrl_connects_only_to_owning_router():
|
||||||
"""HBM_CTRL connects to every router (router_to_hbm / hbm_to_router)."""
|
"""Each hbm_ctrl.pe{X} connects ONLY to PE_X's attaching router
|
||||||
|
(ADR-0019 D4). Replaces a prior test that asserted the
|
||||||
|
spec-violating all-routers consolidation (commit 5917b34)."""
|
||||||
g = _graph()
|
g = _graph()
|
||||||
es = _edge_set(g)
|
es = _edge_set(g)
|
||||||
cp = "sip0.cube0"
|
cp = "sip0.cube0"
|
||||||
routers = sorted(n for n in g.nodes if n.startswith(f"{cp}.r"))
|
pe_router = {0: "r0c0", 1: "r0c1", 2: "r1c4", 3: "r1c5",
|
||||||
assert len(routers) == 32
|
4: "r4c0", 5: "r4c1", 6: "r5c4", 7: "r5c5"}
|
||||||
for r in routers:
|
for pe, rkey in pe_router.items():
|
||||||
assert (r, f"{cp}.hbm_ctrl") in es, f"missing {r}->hbm_ctrl"
|
nid = f"{cp}.hbm_ctrl.pe{pe}"
|
||||||
assert (f"{cp}.hbm_ctrl", r) in es, f"missing hbm_ctrl->{r}"
|
owner = f"{cp}.{rkey}"
|
||||||
|
assert (owner, nid) in es, f"missing {owner}→{nid}"
|
||||||
|
assert (nid, owner) in es, f"missing {nid}→{owner}"
|
||||||
|
for other in g.nodes:
|
||||||
|
if other.startswith(f"{cp}.r") and other != owner:
|
||||||
|
assert (other, nid) not in es, (
|
||||||
|
f"unexpected edge {other}→{nid}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_router_mesh_edges():
|
def test_router_mesh_edges():
|
||||||
@@ -387,7 +401,7 @@ def test_cross_cube_path_includes_conn():
|
|||||||
"""PE cross-cube path must traverse conn nodes."""
|
"""PE cross-cube path must traverse conn nodes."""
|
||||||
g = _graph()
|
g = _graph()
|
||||||
router = PathRouter(g)
|
router = PathRouter(g)
|
||||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl")
|
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl.pe0")
|
||||||
conn_nodes = [n for n in path if ".conn" in n]
|
conn_nodes = [n for n in path if ".conn" in n]
|
||||||
assert len(conn_nodes) >= 2, f"Expected >=2 conn nodes in path, got {conn_nodes}"
|
assert len(conn_nodes) >= 2, f"Expected >=2 conn nodes in path, got {conn_nodes}"
|
||||||
|
|
||||||
|
|||||||