d75da439c6
- Probe CLI: restructured output (tables first, routes below), per-hop timestamps, split cross-cube into best/worst cases, D2H read section - UCIe overhead: 1ns -> 8ns per port (16ns per crossing) to fix cross-cube-best < cross-half latency inversion - HBM efficiency: added efficiency=0.8 factor to hbm_ctrl, reducing effective BW from 256 to 204.8 GB/s - Multi-size BW sweep: saturation tables (4KB-1MB) for all probe cases - Probe default data size: 4KB -> 32KB for more realistic measurements - IOChiplet NOC + D2H topology and tests - NOC mesh, xbar, BW occupancy components and tests - Cube mesh visualization diagram 278 tests pass. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
426 lines
15 KiB
Python
426 lines
15 KiB
Python
from pathlib import Path
|
||
|
||
from kernbench.policy.routing.router import PathRouter
|
||
from kernbench.topology.builder import load_topology
|
||
|
||
TOPOLOGY_PATH = Path(__file__).parent.parent / "topology.yaml"
|
||
|
||
|
||
def _graph():
|
||
return load_topology(TOPOLOGY_PATH)
|
||
|
||
|
||
# ── Full graph: node counts ──────────────────────────────────────────
|
||
|
||
|
||
def test_full_graph_node_count():
|
||
g = _graph()
|
||
# 1 switch
|
||
# + 2 SIPs × (1 IO × (3 comps + 4 io_ucie + 16 io_conn)
|
||
# + 16 cubes × (cube_comps + 8 PEs × 6 pe_comps))
|
||
# IO: pcie_ep + io_cpu + io_noc + 4 io_ucie + 4*4 io_conn = 23
|
||
# cube_comps: 9 (noc, m_cpu, sram, 2 bridge, 4 ucie)
|
||
# + 16 ucie_conn (4 ports × 4 connections)
|
||
# + 2 xbar_top/bot
|
||
# + 8 hbm_slices = 35
|
||
# = 1 + 2*(23 + 16*(35+48)) = 1 + 2*(23+1328) = 1 + 2702 = 2703
|
||
assert len(g.nodes) == 2703
|
||
|
||
|
||
def test_full_graph_edge_count():
|
||
g = _graph()
|
||
# Per cube: 168
|
||
# PE-internal: 56
|
||
# PE_DMA→noc: 8, noc→pe_cpu: 8
|
||
# xbar_top→hbm{0..3}: 4+4=8, xbar_bot→hbm{4..7}: 4+4=8
|
||
# noc↔xbar_top: 2, noc↔xbar_bot: 2
|
||
# xbar_top↔bridge.left: 2, bridge.left↔xbar_bot: 2
|
||
# xbar_top↔bridge.right: 2, bridge.right↔xbar_bot: 2
|
||
# ucie: 64, m_cpu↔noc: 2, noc↔sram: 2
|
||
# Total: 56+8+8+8+8+2+2+2+2+2+2+64+2+2 = 168
|
||
# IO edges per SIP: 77
|
||
# Per SIP: 16*168 + 48 inter-cube + 77 IO = 2813
|
||
# Total: 2 * 2813 = 5626
|
||
assert len(g.edges) == 5626
|
||
|
||
|
||
# ── Full graph: specific nodes exist ─────────────────────────────────
|
||
|
||
|
||
def test_system_switch_exists():
|
||
g = _graph()
|
||
assert "fabric.switch0" in g.nodes
|
||
assert g.nodes["fabric.switch0"].kind == "switch"
|
||
assert g.nodes["fabric.switch0"].pos_mm is None # abstract
|
||
|
||
|
||
def test_io_chiplet_nodes_exist():
|
||
g = _graph()
|
||
for s in range(2):
|
||
assert f"sip{s}.io0.pcie_ep" in g.nodes
|
||
assert f"sip{s}.io0.io_cpu" in g.nodes
|
||
|
||
|
||
def test_cube_component_nodes_exist():
|
||
g = _graph()
|
||
cp = "sip0.cube0"
|
||
for name in ("noc", "m_cpu",
|
||
"bridge.left", "bridge.right",
|
||
"ucie-N", "ucie-S", "ucie-E", "ucie-W",
|
||
"sram", "xbar_top", "xbar_bot"):
|
||
assert f"{cp}.{name}" in g.nodes
|
||
# Per-PE xbar entry nodes no longer exist
|
||
for pe in range(8):
|
||
assert f"{cp}.xbar.pe{pe}" not in g.nodes
|
||
# HBM slices
|
||
for s in range(8):
|
||
assert f"{cp}.hbm_ctrl.slice{s}" in g.nodes
|
||
assert g.nodes[f"{cp}.hbm_ctrl.slice{s}"].kind == "hbm_ctrl"
|
||
|
||
|
||
def test_pe_component_nodes_exist():
|
||
g = _graph()
|
||
for comp in ("pe_cpu", "pe_scheduler", "pe_dma", "pe_gemm", "pe_math", "pe_tcm"):
|
||
assert f"sip0.cube0.pe0.{comp}" in g.nodes
|
||
assert f"sip1.cube15.pe7.{comp}" in g.nodes
|
||
|
||
|
||
# ── Full graph: positions ────────────────────────────────────────────
|
||
|
||
|
||
def test_hbm_ctrl_slices_at_cube_center():
|
||
g = _graph()
|
||
# cube0 origin = (0, 0), cx=8.5, cy=7.0, hbm_ctrl at (cx-2, cy)
|
||
# all slices share the same physical position
|
||
for s in range(8):
|
||
node = g.nodes[f"sip0.cube0.hbm_ctrl.slice{s}"]
|
||
assert node.pos_mm == (6.5, 7.0)
|
||
|
||
|
||
def test_hbm_ctrl_slices_cube5_position():
|
||
g = _graph()
|
||
# cube5 = col=1, row=1 -> origin = (1*18, 1*15) = (18, 15)
|
||
# hbm_ctrl = (18 + 6.5, 15 + 7.0) = (24.5, 22.0)
|
||
node = g.nodes["sip0.cube5.hbm_ctrl.slice0"]
|
||
assert node.pos_mm == (24.5, 22.0)
|
||
|
||
|
||
def test_ucie_ports_at_cube_edges():
|
||
g = _graph()
|
||
# cube0 origin = (0, 0), cube_w=17, cube_h=14
|
||
# UCIe nodes inset by half-size so edges touch boundary
|
||
assert g.nodes["sip0.cube0.ucie-N"].pos_mm == (8.5, 0.6)
|
||
assert g.nodes["sip0.cube0.ucie-S"].pos_mm == (8.5, 13.4)
|
||
assert g.nodes["sip0.cube0.ucie-W"].pos_mm == (1.0, 7.0)
|
||
assert g.nodes["sip0.cube0.ucie-E"].pos_mm == (16.0, 7.0)
|
||
|
||
|
||
# ── Full graph: edges ────────────────────────────────────────────────
|
||
|
||
|
||
def _edge_set(g):
|
||
return {(e.src, e.dst) for e in g.edges}
|
||
|
||
|
||
def test_inter_cube_ucie_edges():
|
||
es = _edge_set(_graph())
|
||
# cube0 (0,0) E → cube1 (1,0) W
|
||
assert ("sip0.cube0.ucie-E", "sip0.cube1.ucie-W") in es
|
||
# cube0 (0,0) S → cube4 (0,1) N
|
||
assert ("sip0.cube0.ucie-S", "sip0.cube4.ucie-N") in es
|
||
|
||
|
||
def test_io_to_cube_edges():
|
||
es = _edge_set(_graph())
|
||
# io0 connects io_ucie PHYs to cube UCIe ports on N side
|
||
assert ("sip0.io0.ucie-P0", "sip0.cube0.ucie-N") in es
|
||
assert ("sip0.io0.ucie-P3", "sip0.cube3.ucie-N") in es
|
||
|
||
|
||
def test_switch_to_io_edges():
|
||
es = _edge_set(_graph())
|
||
assert ("fabric.switch0", "sip0.io0.pcie_ep") in es
|
||
assert ("fabric.switch0", "sip1.io0.pcie_ep") in es
|
||
|
||
|
||
def test_pe_dma_to_noc_only():
|
||
"""PE_DMA connects only to NOC (no direct xbar connection)."""
|
||
es = _edge_set(_graph())
|
||
cp = "sip0.cube0"
|
||
for pe in range(8):
|
||
assert (f"{cp}.pe{pe}.pe_dma", f"{cp}.noc") in es
|
||
# No direct pe_dma → xbar edges
|
||
assert (f"{cp}.pe{pe}.pe_dma", f"{cp}.xbar_top") not in es
|
||
assert (f"{cp}.pe{pe}.pe_dma", f"{cp}.xbar_bot") not in es
|
||
|
||
|
||
def test_command_path_m_cpu_noc_pe_cpu():
|
||
es = _edge_set(_graph())
|
||
cp = "sip0.cube0"
|
||
# m_cpu ↔ noc (bidirectional)
|
||
assert (f"{cp}.m_cpu", f"{cp}.noc") in es
|
||
assert (f"{cp}.noc", f"{cp}.m_cpu") in es
|
||
# noc → pe_cpu for each PE
|
||
assert (f"{cp}.noc", f"{cp}.pe0.pe_cpu") in es
|
||
assert (f"{cp}.noc", f"{cp}.pe7.pe_cpu") in es
|
||
|
||
|
||
def test_pe_internal_edges():
|
||
es = _edge_set(_graph())
|
||
pp = "sip0.cube0.pe0"
|
||
assert (f"{pp}.pe_cpu", f"{pp}.pe_scheduler") in es
|
||
assert (f"{pp}.pe_scheduler", f"{pp}.pe_dma") in es
|
||
assert (f"{pp}.pe_scheduler", f"{pp}.pe_gemm") in es
|
||
assert (f"{pp}.pe_scheduler", f"{pp}.pe_math") in es
|
||
assert (f"{pp}.pe_dma", f"{pp}.pe_tcm") in es
|
||
assert (f"{pp}.pe_gemm", f"{pp}.pe_tcm") in es
|
||
assert (f"{pp}.pe_math", f"{pp}.pe_tcm") in es
|
||
|
||
|
||
def test_xbar_top_bot_to_hbm_slice_edges():
|
||
"""xbar_top connects to slices 0-3, xbar_bot to slices 4-7."""
|
||
es = _edge_set(_graph())
|
||
cp = "sip0.cube0"
|
||
for i in range(4):
|
||
assert (f"{cp}.xbar_top", f"{cp}.hbm_ctrl.slice{i}") in es
|
||
for i in range(4, 8):
|
||
assert (f"{cp}.xbar_bot", f"{cp}.hbm_ctrl.slice{i}") in es
|
||
# Negative: xbar_top must NOT connect to bottom slices
|
||
assert (f"{cp}.xbar_top", f"{cp}.hbm_ctrl.slice4") not in es
|
||
assert (f"{cp}.xbar_bot", f"{cp}.hbm_ctrl.slice0") not in es
|
||
|
||
|
||
# ── Views: system ────────────────────────────────────────────────────
|
||
|
||
|
||
def test_system_view_nodes():
|
||
v = _graph().system_view
|
||
assert "fabric.switch0" in v.nodes
|
||
assert "sip0" in v.nodes
|
||
assert "sip1" in v.nodes
|
||
assert "sip0.io0" in v.nodes
|
||
assert "sip1.io0" in v.nodes
|
||
|
||
|
||
# ── Views: SIP ───────────────────────────────────────────────────────
|
||
|
||
|
||
def test_sip_view_cube_count():
|
||
v = _graph().sip_view
|
||
cube_nodes = [n for n in v.nodes if n.startswith("cube")]
|
||
assert len(cube_nodes) == 16
|
||
|
||
|
||
def test_sip_view_io_chiplets():
|
||
v = _graph().sip_view
|
||
assert "io0" in v.nodes
|
||
|
||
|
||
def test_sip_view_cube_positions():
|
||
v = _graph().sip_view
|
||
# cube0 (0,0): center = (8.5, 6+7.0) = (8.5, 13.0) [io_margin=6]
|
||
x, y = v.nodes["cube0"].pos_mm
|
||
assert x == 8.5
|
||
assert y == 13.0
|
||
# cube1 (1,0): center = (18+8.5, 13.0) = (26.5, 13.0)
|
||
x1, y1 = v.nodes["cube1"].pos_mm
|
||
assert x1 == 26.5
|
||
assert y1 == 13.0
|
||
|
||
|
||
# ── Views: cube ──────────────────────────────────────────────────────
|
||
|
||
|
||
def test_cube_view_has_all_components():
|
||
v = _graph().cube_view
|
||
expected = {"ucie-N", "ucie-S", "ucie-W", "ucie-E",
|
||
"m_cpu", "hbm_ctrl",
|
||
"bridge.left", "bridge.right", "noc", "sram",
|
||
"xbar_top", "xbar_bot",
|
||
"pe0", "pe1", "pe2", "pe3", "pe4", "pe5", "pe6", "pe7"}
|
||
# Add UCIe connection nodes (4 ports × 4 connections)
|
||
for port in ("N", "S", "E", "W"):
|
||
for ci in range(4):
|
||
expected.add(f"ucie-{port}.conn{ci}")
|
||
assert set(v.nodes.keys()) == expected
|
||
|
||
|
||
def test_cube_view_hbm_at_center():
|
||
v = _graph().cube_view
|
||
assert v.nodes["hbm_ctrl"].pos_mm == (6.5, 7.0)
|
||
assert v.nodes["noc"].pos_mm == (10.5, 7.0)
|
||
assert v.width_mm == 17.0
|
||
assert v.height_mm == 14.0
|
||
|
||
|
||
def test_cube_view_pe_to_noc():
|
||
"""PEs connect to NOC in cube view (no per-PE xbar)."""
|
||
v = _graph().cube_view
|
||
ves = {(e.src, e.dst) for e in v.edges}
|
||
for i in range(8):
|
||
assert (f"pe{i}", "noc") in ves
|
||
|
||
|
||
# ── Views: PE ────────────────────────────────────────────────────────
|
||
|
||
|
||
def test_pe_view_has_all_components():
|
||
v = _graph().pe_view
|
||
assert set(v.nodes.keys()) == {
|
||
"pe_cpu", "pe_scheduler", "pe_dma", "pe_gemm", "pe_math", "pe_tcm"
|
||
}
|
||
|
||
|
||
def test_pe_view_edges():
|
||
v = _graph().pe_view
|
||
ves = {(e.src, e.dst) for e in v.edges}
|
||
assert ("pe_cpu", "pe_scheduler") in ves
|
||
assert ("pe_scheduler", "pe_dma") in ves
|
||
assert ("pe_scheduler", "pe_gemm") in ves
|
||
assert ("pe_scheduler", "pe_math") in ves
|
||
assert ("pe_dma", "pe_tcm") in ves
|
||
assert ("pe_gemm", "pe_tcm") in ves
|
||
assert ("pe_math", "pe_tcm") in ves
|
||
|
||
|
||
# ── SRAM ────────────────────────────────────────────────────────────
|
||
|
||
|
||
def test_sram_node_exists():
|
||
g = _graph()
|
||
assert "sip0.cube0.sram" in g.nodes
|
||
assert g.nodes["sip0.cube0.sram"].kind == "sram"
|
||
|
||
|
||
def test_noc_to_sram_edges():
|
||
es = _edge_set(_graph())
|
||
cp = "sip0.cube0"
|
||
assert (f"{cp}.noc", f"{cp}.sram") in es
|
||
assert (f"{cp}.sram", f"{cp}.noc") in es
|
||
|
||
|
||
# ── PE_DMA → NOC (non-HBM data path) ───────────────────────────────
|
||
|
||
|
||
def test_pe_dma_to_noc_edges():
|
||
es = _edge_set(_graph())
|
||
cp = "sip0.cube0"
|
||
for i in range(8):
|
||
assert (f"{cp}.pe{i}.pe_dma", f"{cp}.noc") in es
|
||
|
||
|
||
# ── Bridge connects XBAR halves (not NOC) ──────────────────────────
|
||
|
||
|
||
def test_bridge_connects_xbar_top_bot():
|
||
"""Bridges connect xbar_top ↔ xbar_bot (bidirectional)."""
|
||
es = _edge_set(_graph())
|
||
cp = "sip0.cube0"
|
||
for bname in ("left", "right"):
|
||
br = f"{cp}.bridge.{bname}"
|
||
assert (f"{cp}.xbar_top", br) in es
|
||
assert (br, f"{cp}.xbar_top") in es
|
||
assert (f"{cp}.xbar_bot", br) in es
|
||
assert (br, f"{cp}.xbar_bot") in es
|
||
|
||
|
||
def test_no_bridge_to_noc_edges():
|
||
es = _edge_set(_graph())
|
||
cp = "sip0.cube0"
|
||
assert (f"{cp}.bridge.left", f"{cp}.noc") not in es
|
||
assert (f"{cp}.bridge.right", f"{cp}.noc") not in es
|
||
|
||
|
||
# ── Cube view: new edges ────────────────────────────────────────────
|
||
|
||
|
||
def test_cube_view_pe_to_noc_edges():
|
||
"""All PEs connect to NOC in cube view."""
|
||
v = _graph().cube_view
|
||
ves = {(e.src, e.dst) for e in v.edges}
|
||
for i in range(8):
|
||
assert (f"pe{i}", "noc") in ves
|
||
|
||
|
||
def test_cube_view_sram():
|
||
v = _graph().cube_view
|
||
assert "sram" in v.nodes
|
||
ves = {(e.src, e.dst) for e in v.edges}
|
||
assert ("noc", "sram") in ves
|
||
assert ("sram", "noc") in ves
|
||
|
||
|
||
def test_cube_view_bridge_xbar():
|
||
"""Cube view bridges connect xbar_top ↔ xbar_bot."""
|
||
v = _graph().cube_view
|
||
ves = {(e.src, e.dst) for e in v.edges}
|
||
for bname in ("left", "right"):
|
||
br = f"bridge.{bname}"
|
||
assert ("xbar_top", br) in ves
|
||
assert (br, "xbar_top") in ves
|
||
assert ("xbar_bot", br) in ves
|
||
assert (br, "xbar_bot") in ves
|
||
|
||
|
||
def test_ucie_noc_reverse_edges():
|
||
"""UCIe ports connect to NOC via conn nodes (bidirectional)."""
|
||
es = _edge_set(_graph())
|
||
cp = "sip0.cube1" # non-edge cube to avoid io-cube edges
|
||
for port in ("N", "S", "E", "W"):
|
||
# Direct ucie→noc no longer exists; path goes through conn nodes
|
||
assert (f"{cp}.ucie-{port}", f"{cp}.noc") not in es
|
||
# Each conn has edges: ucie↔conn, conn↔noc
|
||
for ci in range(4):
|
||
conn = f"{cp}.ucie-{port}.conn{ci}"
|
||
assert (f"{cp}.ucie-{port}", conn) in es, \
|
||
f"missing ucie-{port}->conn{ci}"
|
||
assert (conn, f"{cp}.noc") in es, \
|
||
f"missing conn{ci}->noc"
|
||
assert (f"{cp}.noc", conn) in es, \
|
||
f"missing noc->conn{ci}"
|
||
assert (conn, f"{cp}.ucie-{port}") in es, \
|
||
f"missing conn{ci}->ucie-{port}"
|
||
|
||
|
||
def test_ucie_conn_nodes_exist():
|
||
"""Each UCIe port must have n_connections independent conn nodes."""
|
||
g = _graph()
|
||
cp = "sip0.cube0"
|
||
for port in ("N", "S", "E", "W"):
|
||
for ci in range(4):
|
||
conn_id = f"{cp}.ucie-{port}.conn{ci}"
|
||
assert conn_id in g.nodes, f"missing {conn_id}"
|
||
assert g.nodes[conn_id].kind == "ucie_conn"
|
||
assert g.nodes[conn_id].attrs["overhead_ns"] == 0.0
|
||
|
||
|
||
def test_ucie_conn_edge_bw():
|
||
"""conn↔NOC edges must have per_connection_bw_gbs (128 GB/s)."""
|
||
g = _graph()
|
||
edge_map = {(e.src, e.dst): e for e in g.edges}
|
||
cp = "sip0.cube0"
|
||
for port in ("N", "S", "E", "W"):
|
||
for ci in range(4):
|
||
conn_id = f"{cp}.ucie-{port}.conn{ci}"
|
||
e = edge_map[(conn_id, f"{cp}.noc")]
|
||
assert e.bw_gbs == 128.0, f"{conn_id}→noc bw={e.bw_gbs}"
|
||
e_rev = edge_map[(f"{cp}.noc", conn_id)]
|
||
assert e_rev.bw_gbs == 128.0
|
||
|
||
|
||
def test_cross_cube_path_includes_conn():
|
||
"""PE cross-cube path must traverse conn nodes."""
|
||
g = _graph()
|
||
router = PathRouter(g)
|
||
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl.slice0")
|
||
conn_nodes = [n for n in path if ".conn" in n]
|
||
assert len(conn_nodes) >= 2, f"Expected >=2 conn nodes in path, got {conn_nodes}"
|
||
|
||
|
||
def test_noc_to_xbar_top_bot_edges():
|
||
"""NOC connects to xbar_top and xbar_bot."""
|
||
es = _edge_set(_graph())
|
||
cp = "sip0.cube0"
|
||
assert (f"{cp}.noc", f"{cp}.xbar_top") in es
|
||
assert (f"{cp}.noc", f"{cp}.xbar_bot") in es
|