Add probe CLI improvements, D2H read, UCIe/HBM tuning, BW sweep
- Probe CLI: restructured output (tables first, routes below), per-hop timestamps, split cross-cube into best/worst cases, D2H read section - UCIe overhead: 1ns -> 8ns per port (16ns per crossing) to fix cross-cube-best < cross-half latency inversion - HBM efficiency: added efficiency=0.8 factor to hbm_ctrl, reducing effective BW from 256 to 204.8 GB/s - Multi-size BW sweep: saturation tables (4KB-1MB) for all probe cases - Probe default data size: 4KB -> 32KB for more realistic measurements - IOChiplet NOC + D2H topology and tests - NOC mesh, xbar, BW occupancy components and tests - Cube mesh visualization diagram 278 tests pass. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+122
-106
@@ -1,5 +1,6 @@
|
||||
from pathlib import Path
|
||||
|
||||
from kernbench.policy.routing.router import PathRouter
|
||||
from kernbench.topology.builder import load_topology
|
||||
|
||||
TOPOLOGY_PATH = Path(__file__).parent.parent / "topology.yaml"
|
||||
@@ -15,28 +16,32 @@ def _graph():
|
||||
def test_full_graph_node_count():
|
||||
g = _graph()
|
||||
# 1 switch
|
||||
# + 2 SIPs × (1 IO × 2 comps + 16 cubes × (cube_comps + 8 PEs × 6 pe_comps))
|
||||
# + 2 SIPs × (1 IO × (3 comps + 4 io_ucie + 16 io_conn)
|
||||
# + 16 cubes × (cube_comps + 8 PEs × 6 pe_comps))
|
||||
# IO: pcie_ep + io_cpu + io_noc + 4 io_ucie + 4*4 io_conn = 23
|
||||
# cube_comps: 9 (noc, m_cpu, sram, 2 bridge, 4 ucie)
|
||||
# + 8 xbar.pe{0..7} [replaced xbar.top/xbar.bottom]
|
||||
# + 8 hbm_slices = 25
|
||||
# = 1 + 2*(2 + 16*(25+48)) = 1 + 2*(2+1168) = 1 + 2340 = 2341
|
||||
assert len(g.nodes) == 2341
|
||||
# + 16 ucie_conn (4 ports × 4 connections)
|
||||
# + 2 xbar_top/bot
|
||||
# + 8 hbm_slices = 35
|
||||
# = 1 + 2*(23 + 16*(35+48)) = 1 + 2*(23+1328) = 1 + 2702 = 2703
|
||||
assert len(g.nodes) == 2703
|
||||
|
||||
|
||||
def test_full_graph_edge_count():
|
||||
g = _graph()
|
||||
# Per cube: 144 (88 cube-fabric + 56 PE-internal)
|
||||
# cube-fabric: 8 pe→xbar.pe + 8 pe→noc + 8 noc→pe_cpu
|
||||
# + 8 xbar.pe→slice + 8 slice→xbar.pe (bidirectional for response)
|
||||
# + 12 xbar chain (3 pairs × 2 dir × 2 halves)
|
||||
# + 8 xbar.pe↔bridge (pe0↔bL, pe4↔bL, pe3↔bR, pe7↔bR, ×2 dir each)
|
||||
# + 4 noc→ucie + 4 ucie→noc (bidirectional)
|
||||
# + 8 noc→xbar.pe + 8 xbar.pe→noc (bidirectional for response)
|
||||
# + 1 m_cpu→noc + 1 noc→m_cpu + 1 noc→sram + 1 sram→noc = 88
|
||||
# Per SIP: 16*144 + 48 inter-cube(bidirectional) + 8 io↔cube(bidirectional)
|
||||
# + 1 io_internal + 1 switch→io = 2362
|
||||
# Total: 2 * 2362 = 4724
|
||||
assert len(g.edges) == 4724
|
||||
# Per cube: 168
|
||||
# PE-internal: 56
|
||||
# PE_DMA→noc: 8, noc→pe_cpu: 8
|
||||
# xbar_top→hbm{0..3}: 4+4=8, xbar_bot→hbm{4..7}: 4+4=8
|
||||
# noc↔xbar_top: 2, noc↔xbar_bot: 2
|
||||
# xbar_top↔bridge.left: 2, bridge.left↔xbar_bot: 2
|
||||
# xbar_top↔bridge.right: 2, bridge.right↔xbar_bot: 2
|
||||
# ucie: 64, m_cpu↔noc: 2, noc↔sram: 2
|
||||
# Total: 56+8+8+8+8+2+2+2+2+2+2+64+2+2 = 168
|
||||
# IO edges per SIP: 77
|
||||
# Per SIP: 16*168 + 48 inter-cube + 77 IO = 2813
|
||||
# Total: 2 * 2813 = 5626
|
||||
assert len(g.edges) == 5626
|
||||
|
||||
|
||||
# ── Full graph: specific nodes exist ─────────────────────────────────
|
||||
@@ -62,16 +67,12 @@ def test_cube_component_nodes_exist():
|
||||
for name in ("noc", "m_cpu",
|
||||
"bridge.left", "bridge.right",
|
||||
"ucie-N", "ucie-S", "ucie-E", "ucie-W",
|
||||
"sram"):
|
||||
"sram", "xbar_top", "xbar_bot"):
|
||||
assert f"{cp}.{name}" in g.nodes
|
||||
# xbar.top/xbar.bottom replaced by per-PE xbar entry nodes
|
||||
assert "sip0.cube0.xbar.top" not in g.nodes
|
||||
assert "sip0.cube0.xbar.bottom" not in g.nodes
|
||||
# Per-PE xbar entry nodes no longer exist
|
||||
for pe in range(8):
|
||||
node_id = f"{cp}.xbar.pe{pe}"
|
||||
assert node_id in g.nodes, f"{node_id} missing"
|
||||
assert g.nodes[node_id].kind == "xbar"
|
||||
# HBM slices (one per PE)
|
||||
assert f"{cp}.xbar.pe{pe}" not in g.nodes
|
||||
# HBM slices
|
||||
for s in range(8):
|
||||
assert f"{cp}.hbm_ctrl.slice{s}" in g.nodes
|
||||
assert g.nodes[f"{cp}.hbm_ctrl.slice{s}"].kind == "hbm_ctrl"
|
||||
@@ -131,9 +132,9 @@ def test_inter_cube_ucie_edges():
|
||||
|
||||
def test_io_to_cube_edges():
|
||||
es = _edge_set(_graph())
|
||||
# io0 connects to cubes (0,0)..(3,0) on N side
|
||||
assert ("sip0.io0.io_cpu", "sip0.cube0.ucie-N") in es
|
||||
assert ("sip0.io0.io_cpu", "sip0.cube3.ucie-N") in es
|
||||
# io0 connects io_ucie PHYs to cube UCIe ports on N side
|
||||
assert ("sip0.io0.ucie-P0", "sip0.cube0.ucie-N") in es
|
||||
assert ("sip0.io0.ucie-P3", "sip0.cube3.ucie-N") in es
|
||||
|
||||
|
||||
def test_switch_to_io_edges():
|
||||
@@ -142,15 +143,15 @@ def test_switch_to_io_edges():
|
||||
assert ("fabric.switch0", "sip1.io0.pcie_ep") in es
|
||||
|
||||
|
||||
def test_pe_to_xbar_edges():
|
||||
def test_pe_dma_to_noc_only():
|
||||
"""PE_DMA connects only to NOC (no direct xbar connection)."""
|
||||
es = _edge_set(_graph())
|
||||
cp = "sip0.cube0"
|
||||
# Each PE connects to its own xbar entry (per-PE chain model)
|
||||
for pe in range(8):
|
||||
assert (f"{cp}.pe{pe}.pe_dma", f"{cp}.xbar.pe{pe}") in es
|
||||
# Old shared xbar.top/bottom edges must NOT exist
|
||||
assert (f"{cp}.pe0.pe_dma", f"{cp}.xbar.top") not in es
|
||||
assert (f"{cp}.pe4.pe_dma", f"{cp}.xbar.bottom") not in es
|
||||
assert (f"{cp}.pe{pe}.pe_dma", f"{cp}.noc") in es
|
||||
# No direct pe_dma → xbar edges
|
||||
assert (f"{cp}.pe{pe}.pe_dma", f"{cp}.xbar_top") not in es
|
||||
assert (f"{cp}.pe{pe}.pe_dma", f"{cp}.xbar_bot") not in es
|
||||
|
||||
|
||||
def test_command_path_m_cpu_noc_pe_cpu():
|
||||
@@ -176,17 +177,17 @@ def test_pe_internal_edges():
|
||||
assert (f"{pp}.pe_math", f"{pp}.pe_tcm") in es
|
||||
|
||||
|
||||
def test_xbar_to_hbm_slice_edges():
|
||||
"""Each xbar.pe{i} connects only to its own (local) HBM slice."""
|
||||
def test_xbar_top_bot_to_hbm_slice_edges():
|
||||
"""xbar_top connects to slices 0-3, xbar_bot to slices 4-7."""
|
||||
es = _edge_set(_graph())
|
||||
cp = "sip0.cube0"
|
||||
# xbar.pe_i -> slice_i only (local Y-direction access)
|
||||
for pe in range(8):
|
||||
assert (f"{cp}.xbar.pe{pe}", f"{cp}.hbm_ctrl.slice{pe}") in es
|
||||
# Negative: xbar.pe_i must NOT directly connect to a different slice
|
||||
assert (f"{cp}.xbar.pe0", f"{cp}.hbm_ctrl.slice1") not in es
|
||||
assert (f"{cp}.xbar.pe0", f"{cp}.hbm_ctrl.slice4") not in es
|
||||
assert (f"{cp}.xbar.pe4", f"{cp}.hbm_ctrl.slice0") not in es
|
||||
for i in range(4):
|
||||
assert (f"{cp}.xbar_top", f"{cp}.hbm_ctrl.slice{i}") in es
|
||||
for i in range(4, 8):
|
||||
assert (f"{cp}.xbar_bot", f"{cp}.hbm_ctrl.slice{i}") in es
|
||||
# Negative: xbar_top must NOT connect to bottom slices
|
||||
assert (f"{cp}.xbar_top", f"{cp}.hbm_ctrl.slice4") not in es
|
||||
assert (f"{cp}.xbar_bot", f"{cp}.hbm_ctrl.slice0") not in es
|
||||
|
||||
|
||||
# ── Views: system ────────────────────────────────────────────────────
|
||||
@@ -235,9 +236,12 @@ def test_cube_view_has_all_components():
|
||||
expected = {"ucie-N", "ucie-S", "ucie-W", "ucie-E",
|
||||
"m_cpu", "hbm_ctrl",
|
||||
"bridge.left", "bridge.right", "noc", "sram",
|
||||
"xbar.pe0", "xbar.pe1", "xbar.pe2", "xbar.pe3",
|
||||
"xbar.pe4", "xbar.pe5", "xbar.pe6", "xbar.pe7",
|
||||
"xbar_top", "xbar_bot",
|
||||
"pe0", "pe1", "pe2", "pe3", "pe4", "pe5", "pe6", "pe7"}
|
||||
# Add UCIe connection nodes (4 ports × 4 connections)
|
||||
for port in ("N", "S", "E", "W"):
|
||||
for ci in range(4):
|
||||
expected.add(f"ucie-{port}.conn{ci}")
|
||||
assert set(v.nodes.keys()) == expected
|
||||
|
||||
|
||||
@@ -249,15 +253,12 @@ def test_cube_view_hbm_at_center():
|
||||
assert v.height_mm == 14.0
|
||||
|
||||
|
||||
def test_cube_view_pe_corner_mapping():
|
||||
def test_cube_view_pe_to_noc():
|
||||
"""PEs connect to NOC in cube view (no per-PE xbar)."""
|
||||
v = _graph().cube_view
|
||||
ves = {(e.src, e.dst) for e in v.edges}
|
||||
# Each PE connects to its own xbar entry (chain model)
|
||||
for i in range(8):
|
||||
assert (f"pe{i}", f"xbar.pe{i}") in ves
|
||||
# Old shared xbar.top/bottom mapping must not exist
|
||||
assert ("pe0", "xbar.top") not in ves
|
||||
assert ("pe4", "xbar.bottom") not in ves
|
||||
assert (f"pe{i}", "noc") in ves
|
||||
|
||||
|
||||
# ── Views: PE ────────────────────────────────────────────────────────
|
||||
@@ -311,24 +312,16 @@ def test_pe_dma_to_noc_edges():
|
||||
# ── Bridge connects XBAR halves (not NOC) ──────────────────────────
|
||||
|
||||
|
||||
def test_bridge_connects_xbar_halves():
|
||||
"""bridge.left connects leftmost PE nodes (pe0 top, pe4 bottom).
|
||||
bridge.right connects rightmost PE nodes (pe3 top, pe7 bottom)."""
|
||||
def test_bridge_connects_xbar_top_bot():
|
||||
"""Bridges connect xbar_top ↔ xbar_bot (bidirectional)."""
|
||||
es = _edge_set(_graph())
|
||||
cp = "sip0.cube0"
|
||||
# bridge.left ↔ pe0 (top-left) and pe4 (bottom-left)
|
||||
assert (f"{cp}.xbar.pe0", f"{cp}.bridge.left") in es
|
||||
assert (f"{cp}.bridge.left", f"{cp}.xbar.pe0") in es
|
||||
assert (f"{cp}.xbar.pe4", f"{cp}.bridge.left") in es
|
||||
assert (f"{cp}.bridge.left", f"{cp}.xbar.pe4") in es
|
||||
# bridge.right ↔ pe3 (top-right) and pe7 (bottom-right)
|
||||
assert (f"{cp}.xbar.pe3", f"{cp}.bridge.right") in es
|
||||
assert (f"{cp}.bridge.right", f"{cp}.xbar.pe3") in es
|
||||
assert (f"{cp}.xbar.pe7", f"{cp}.bridge.right") in es
|
||||
assert (f"{cp}.bridge.right", f"{cp}.xbar.pe7") in es
|
||||
# Old xbar.top/bottom ↔ bridge edges must NOT exist
|
||||
assert (f"{cp}.xbar.top", f"{cp}.bridge.left") not in es
|
||||
assert (f"{cp}.xbar.bottom", f"{cp}.bridge.left") not in es
|
||||
for bname in ("left", "right"):
|
||||
br = f"{cp}.bridge.{bname}"
|
||||
assert (f"{cp}.xbar_top", br) in es
|
||||
assert (br, f"{cp}.xbar_top") in es
|
||||
assert (f"{cp}.xbar_bot", br) in es
|
||||
assert (br, f"{cp}.xbar_bot") in es
|
||||
|
||||
|
||||
def test_no_bridge_to_noc_edges():
|
||||
@@ -341,7 +334,8 @@ def test_no_bridge_to_noc_edges():
|
||||
# ── Cube view: new edges ────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_cube_view_pe_to_noc():
|
||||
def test_cube_view_pe_to_noc_edges():
|
||||
"""All PEs connect to NOC in cube view."""
|
||||
v = _graph().cube_view
|
||||
ves = {(e.src, e.dst) for e in v.edges}
|
||||
for i in range(8):
|
||||
@@ -357,53 +351,75 @@ def test_cube_view_sram():
|
||||
|
||||
|
||||
def test_cube_view_bridge_xbar():
|
||||
"""Cube view bridges connect xbar_top ↔ xbar_bot."""
|
||||
v = _graph().cube_view
|
||||
ves = {(e.src, e.dst) for e in v.edges}
|
||||
# bridge.left connects pe0 (top-left) ↔ pe4 (bottom-left)
|
||||
assert ("xbar.pe0", "bridge.left") in ves
|
||||
assert ("bridge.left", "xbar.pe0") in ves
|
||||
assert ("xbar.pe4", "bridge.left") in ves
|
||||
assert ("bridge.left", "xbar.pe4") in ves
|
||||
# bridge.right connects pe3 (top-right) ↔ pe7 (bottom-right)
|
||||
assert ("xbar.pe3", "bridge.right") in ves
|
||||
assert ("bridge.right", "xbar.pe3") in ves
|
||||
assert ("xbar.pe7", "bridge.right") in ves
|
||||
assert ("bridge.right", "xbar.pe7") in ves
|
||||
|
||||
|
||||
# ── Chain xbar: new topology edges ──────────────────────────────────
|
||||
|
||||
|
||||
def test_xbar_chain_edges():
|
||||
"""Adjacent xbar.pe nodes within each half are bidirectionally connected."""
|
||||
es = _edge_set(_graph())
|
||||
cp = "sip0.cube0"
|
||||
# Top chain: pe0 ↔ pe1 ↔ pe2 ↔ pe3 (NW→NE direction)
|
||||
for a, b in [(0, 1), (1, 2), (2, 3)]:
|
||||
assert (f"{cp}.xbar.pe{a}", f"{cp}.xbar.pe{b}") in es, f"missing pe{a}→pe{b}"
|
||||
assert (f"{cp}.xbar.pe{b}", f"{cp}.xbar.pe{a}") in es, f"missing pe{b}→pe{a}"
|
||||
# Bottom chain: pe4 ↔ pe5 ↔ pe6 ↔ pe7
|
||||
for a, b in [(4, 5), (5, 6), (6, 7)]:
|
||||
assert (f"{cp}.xbar.pe{a}", f"{cp}.xbar.pe{b}") in es, f"missing pe{a}→pe{b}"
|
||||
assert (f"{cp}.xbar.pe{b}", f"{cp}.xbar.pe{a}") in es, f"missing pe{b}→pe{a}"
|
||||
# Negative: no cross-chain direct edges
|
||||
assert (f"{cp}.xbar.pe0", f"{cp}.xbar.pe2") not in es
|
||||
assert (f"{cp}.xbar.pe0", f"{cp}.xbar.pe4") not in es
|
||||
for bname in ("left", "right"):
|
||||
br = f"bridge.{bname}"
|
||||
assert ("xbar_top", br) in ves
|
||||
assert (br, "xbar_top") in ves
|
||||
assert ("xbar_bot", br) in ves
|
||||
assert (br, "xbar_bot") in ves
|
||||
|
||||
|
||||
def test_ucie_noc_reverse_edges():
|
||||
"""UCIe ports must have reverse edges back to NOC (bidirectional)."""
|
||||
"""UCIe ports connect to NOC via conn nodes (bidirectional)."""
|
||||
es = _edge_set(_graph())
|
||||
cp = "sip0.cube1" # non-edge cube to avoid io-cube edges
|
||||
for port in ("N", "S", "E", "W"):
|
||||
assert (f"{cp}.ucie-{port}", f"{cp}.noc") in es, \
|
||||
f"missing ucie-{port}->noc reverse edge"
|
||||
# Direct ucie→noc no longer exists; path goes through conn nodes
|
||||
assert (f"{cp}.ucie-{port}", f"{cp}.noc") not in es
|
||||
# Each conn has edges: ucie↔conn, conn↔noc
|
||||
for ci in range(4):
|
||||
conn = f"{cp}.ucie-{port}.conn{ci}"
|
||||
assert (f"{cp}.ucie-{port}", conn) in es, \
|
||||
f"missing ucie-{port}->conn{ci}"
|
||||
assert (conn, f"{cp}.noc") in es, \
|
||||
f"missing conn{ci}->noc"
|
||||
assert (f"{cp}.noc", conn) in es, \
|
||||
f"missing noc->conn{ci}"
|
||||
assert (conn, f"{cp}.ucie-{port}") in es, \
|
||||
f"missing conn{ci}->ucie-{port}"
|
||||
|
||||
|
||||
def test_noc_to_xbar_pe_edges():
|
||||
"""NOC connects to all xbar.pe nodes (for remote cube HBM access)."""
|
||||
def test_ucie_conn_nodes_exist():
|
||||
"""Each UCIe port must have n_connections independent conn nodes."""
|
||||
g = _graph()
|
||||
cp = "sip0.cube0"
|
||||
for port in ("N", "S", "E", "W"):
|
||||
for ci in range(4):
|
||||
conn_id = f"{cp}.ucie-{port}.conn{ci}"
|
||||
assert conn_id in g.nodes, f"missing {conn_id}"
|
||||
assert g.nodes[conn_id].kind == "ucie_conn"
|
||||
assert g.nodes[conn_id].attrs["overhead_ns"] == 0.0
|
||||
|
||||
|
||||
def test_ucie_conn_edge_bw():
|
||||
"""conn↔NOC edges must have per_connection_bw_gbs (128 GB/s)."""
|
||||
g = _graph()
|
||||
edge_map = {(e.src, e.dst): e for e in g.edges}
|
||||
cp = "sip0.cube0"
|
||||
for port in ("N", "S", "E", "W"):
|
||||
for ci in range(4):
|
||||
conn_id = f"{cp}.ucie-{port}.conn{ci}"
|
||||
e = edge_map[(conn_id, f"{cp}.noc")]
|
||||
assert e.bw_gbs == 128.0, f"{conn_id}→noc bw={e.bw_gbs}"
|
||||
e_rev = edge_map[(f"{cp}.noc", conn_id)]
|
||||
assert e_rev.bw_gbs == 128.0
|
||||
|
||||
|
||||
def test_cross_cube_path_includes_conn():
|
||||
"""PE cross-cube path must traverse conn nodes."""
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl.slice0")
|
||||
conn_nodes = [n for n in path if ".conn" in n]
|
||||
assert len(conn_nodes) >= 2, f"Expected >=2 conn nodes in path, got {conn_nodes}"
|
||||
|
||||
|
||||
def test_noc_to_xbar_top_bot_edges():
|
||||
"""NOC connects to xbar_top and xbar_bot."""
|
||||
es = _edge_set(_graph())
|
||||
cp = "sip0.cube0"
|
||||
for pe in range(8):
|
||||
assert (f"{cp}.noc", f"{cp}.xbar.pe{pe}") in es, \
|
||||
f"missing noc->xbar.pe{pe}"
|
||||
assert (f"{cp}.noc", f"{cp}.xbar_top") in es
|
||||
assert (f"{cp}.noc", f"{cp}.xbar_bot") in es
|
||||
|
||||
Reference in New Issue
Block a user