Add probe CLI improvements, D2H read, UCIe/HBM tuning, BW sweep
- Probe CLI: restructured output (tables first, routes below), per-hop timestamps, split cross-cube into best/worst cases, D2H read section - UCIe overhead: 1ns -> 8ns per port (16ns per crossing) to fix cross-cube-best < cross-half latency inversion - HBM efficiency: added efficiency=0.8 factor to hbm_ctrl, reducing effective BW from 256 to 204.8 GB/s - Multi-size BW sweep: saturation tables (4KB-1MB) for all probe cases - Probe default data size: 4KB -> 32KB for more realistic measurements - IOChiplet NOC + D2H topology and tests - NOC mesh, xbar, BW occupancy components and tests - Cube mesh visualization diagram 278 tests pass. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+337
-201
@@ -5,11 +5,13 @@ TopologyGraph with nodes, edges, and representative view projections.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from .mesh_gen import ensure_mesh_file
|
||||
from .types import Edge, Node, TopologyGraph, TopologyHandle, ViewGraph
|
||||
|
||||
|
||||
@@ -42,6 +44,10 @@ def load_topology(path: Path) -> TopologyGraph:
|
||||
"""Load topology spec from file and compile into a topology graph."""
|
||||
spec = _read_spec(path)
|
||||
_validate_spec(spec)
|
||||
# Generate cube_mesh.yaml alongside the topology file
|
||||
mesh_path = path.parent / "cube_mesh.yaml"
|
||||
mesh_data = ensure_mesh_file(spec["cube"], mesh_path)
|
||||
spec["_mesh"] = mesh_data
|
||||
return _compile_graph(spec)
|
||||
|
||||
|
||||
@@ -110,7 +116,7 @@ def _compile_graph(spec: dict) -> TopologyGraph:
|
||||
cid = row * mesh_w + col
|
||||
cp = f"{sp}.cube{cid}"
|
||||
origin = (col * stride_x, row * stride_y)
|
||||
_instantiate_cube(nodes, edges, cp, cube_spec, origin)
|
||||
_instantiate_cube(nodes, edges, cp, cube_spec, origin, spec["_mesh"])
|
||||
|
||||
# Inter-cube UCIe mesh
|
||||
_add_inter_cube_edges(edges, sp, mesh_w, mesh_h, sip_spec)
|
||||
@@ -148,9 +154,9 @@ def _cube_local_positions(cube_w: float, cube_h: float) -> dict[str, tuple[float
|
||||
"ucie-W": (uw, cy),
|
||||
"ucie-E": (cube_w - uw, cy),
|
||||
"m_cpu": (cube_w - 2.5, cy - 1.5),
|
||||
"xbar.top": (cx, 3.5), # Y reference for top-half xbar.pe nodes
|
||||
"xbar_top": (cx, 3.5),
|
||||
"hbm_ctrl": (cx - 2.0, cy),
|
||||
"xbar.bottom": (cx, cube_h - 3.5), # Y reference for bottom-half xbar.pe nodes
|
||||
"xbar_bot": (cx, cube_h - 3.5),
|
||||
"bridge.left": (2.5, cy + 2.0),
|
||||
"bridge.right": (cube_w - 2.5, cy + 2.0),
|
||||
"noc": (cx + 2.0, cy),
|
||||
@@ -195,10 +201,11 @@ def _instantiate_io_chiplets(
|
||||
mesh_h: int,
|
||||
seam: float,
|
||||
) -> None:
|
||||
"""Add IO chiplet nodes and internal pcie_ep → io_cpu edges."""
|
||||
"""Add IO chiplet nodes: pcie_ep, io_cpu, io_noc, io_ucie PHYs, conn nodes."""
|
||||
io_spec = sip_spec["iochiplet"]
|
||||
comp = io_spec["components"]
|
||||
links = io_spec["links"]
|
||||
ucie_cfg = io_spec.get("ucie", {})
|
||||
mesh_total_w = mesh_w * cube_w + (mesh_w - 1) * seam
|
||||
mesh_total_h = mesh_h * cube_h + (mesh_h - 1) * seam
|
||||
|
||||
@@ -208,9 +215,9 @@ def _instantiate_io_chiplets(
|
||||
side = inst["place"]["side"]
|
||||
cx = mesh_total_w / 2
|
||||
if side == "N":
|
||||
pcie_y, cpu_y = -5.0, -3.0
|
||||
pcie_y, cpu_y, noc_y = -5.0, -3.0, -4.0
|
||||
else:
|
||||
pcie_y, cpu_y = mesh_total_h + 5.0, mesh_total_h + 3.0
|
||||
pcie_y, cpu_y, noc_y = mesh_total_h + 5.0, mesh_total_h + 3.0, mesh_total_h + 4.0
|
||||
|
||||
# pcie_ep
|
||||
ep = comp["pcie_ep"]
|
||||
@@ -228,13 +235,114 @@ def _instantiate_io_chiplets(
|
||||
attrs=cpu["attrs"], pos_mm=(cx, cpu_y), label="IO CPU",
|
||||
)
|
||||
|
||||
# Internal edge
|
||||
# io_noc (central switch inside IOChiplet)
|
||||
noc = comp["io_noc"]
|
||||
noc_id = f"{prefix}.noc"
|
||||
nodes[noc_id] = Node(
|
||||
id=noc_id, kind=noc["kind"], impl=noc["impl"],
|
||||
attrs=noc["attrs"], pos_mm=(cx, noc_y), label="IO NOC",
|
||||
)
|
||||
|
||||
# pcie_ep ↔ io_noc (bidirectional)
|
||||
edges.append(Edge(
|
||||
src=ep_id, dst=cpu_id,
|
||||
distance_mm=links["pcie_ep_to_io_cpu_mm"],
|
||||
bw_gbs=links["pcie_ep_to_io_cpu_bw_gbs"],
|
||||
src=ep_id, dst=noc_id,
|
||||
distance_mm=links["pcie_ep_to_noc_mm"],
|
||||
bw_gbs=links["pcie_ep_to_noc_bw_gbs"],
|
||||
kind="io_internal",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=noc_id, dst=ep_id,
|
||||
distance_mm=links["pcie_ep_to_noc_mm"],
|
||||
bw_gbs=links["pcie_ep_to_noc_bw_gbs"],
|
||||
kind="io_internal",
|
||||
))
|
||||
|
||||
# io_cpu ↔ io_noc (bidirectional)
|
||||
edges.append(Edge(
|
||||
src=cpu_id, dst=noc_id,
|
||||
distance_mm=links["io_cpu_to_noc_mm"],
|
||||
bw_gbs=links["io_cpu_to_noc_bw_gbs"],
|
||||
kind="io_internal",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=noc_id, dst=cpu_id,
|
||||
distance_mm=links["io_cpu_to_noc_mm"],
|
||||
bw_gbs=links["io_cpu_to_noc_bw_gbs"],
|
||||
kind="io_internal",
|
||||
))
|
||||
|
||||
# io_ucie PHY nodes + conn nodes per PHY
|
||||
io_ucie_ns = float(ucie_cfg.get("overhead_ns", 1.0))
|
||||
io_n_conn = int(ucie_cfg.get("n_connections", 4))
|
||||
io_conn_bw = float(ucie_cfg.get("per_connection_bw_gbs", 128.0))
|
||||
io_noc_to_ucie_mm = float(ucie_cfg.get("noc_to_ucie_mm", 0.5))
|
||||
|
||||
for phy in inst["ucie"]["phys"]:
|
||||
phy_id = f"{prefix}.ucie-{phy}"
|
||||
nodes[phy_id] = Node(
|
||||
id=phy_id, kind="io_ucie", impl="ucie_v1",
|
||||
attrs={"overhead_ns": io_ucie_ns},
|
||||
pos_mm=(cx, noc_y), label=f"IO UCIe-{phy}",
|
||||
)
|
||||
|
||||
for ci in range(io_n_conn):
|
||||
conn_id = f"{phy_id}.conn{ci}"
|
||||
nodes[conn_id] = Node(
|
||||
id=conn_id, kind="io_ucie_conn", impl="ucie_v1",
|
||||
attrs={"overhead_ns": 0.0},
|
||||
pos_mm=(cx, noc_y), label=f"IO UCIe-{phy} C{ci}",
|
||||
)
|
||||
# io_noc ↔ conn (per-connection BW)
|
||||
edges.append(Edge(
|
||||
src=noc_id, dst=conn_id,
|
||||
distance_mm=io_noc_to_ucie_mm,
|
||||
bw_gbs=io_conn_bw,
|
||||
kind="io_noc_to_conn",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=conn_id, dst=noc_id,
|
||||
distance_mm=io_noc_to_ucie_mm,
|
||||
bw_gbs=io_conn_bw,
|
||||
kind="conn_to_io_noc",
|
||||
))
|
||||
# conn ↔ io_ucie (internal, no BW limit)
|
||||
edges.append(Edge(
|
||||
src=conn_id, dst=phy_id,
|
||||
distance_mm=0.0, kind="io_ucie_internal",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=phy_id, dst=conn_id,
|
||||
distance_mm=0.0, kind="io_ucie_internal",
|
||||
))
|
||||
|
||||
|
||||
# ── PE-to-router distance ─────────────────────────────────────────
|
||||
|
||||
|
||||
def _compute_pe_noc_distances(
|
||||
mesh_data: dict,
|
||||
corner_pos: dict[str, list[tuple[float, float]]],
|
||||
corners: list[str],
|
||||
pe_per_corner: int,
|
||||
) -> dict[int, float]:
|
||||
"""Compute per-PE Euclidean distance from physical position to assigned router."""
|
||||
distances: dict[int, float] = {}
|
||||
routers = mesh_data["routers"]
|
||||
pe_idx = 0
|
||||
for corner in corners:
|
||||
for ci in range(pe_per_corner):
|
||||
pe_cx, pe_cy = corner_pos[corner][ci]
|
||||
target = f"pe{pe_idx}.dma"
|
||||
for _rkey, rval in routers.items():
|
||||
if rval is not None and target in rval.get("attach", []):
|
||||
rx, ry = rval["pos_mm"]
|
||||
dist = math.sqrt((pe_cx - rx) ** 2 + (pe_cy - ry) ** 2)
|
||||
distances[pe_idx] = round(dist, 2)
|
||||
break
|
||||
else:
|
||||
distances[pe_idx] = 0.0
|
||||
pe_idx += 1
|
||||
return distances
|
||||
|
||||
|
||||
# ── Instantiation: cube + PEs ───────────────────────────────────────
|
||||
@@ -246,18 +354,26 @@ def _instantiate_cube(
|
||||
cp: str,
|
||||
cube: dict,
|
||||
origin: tuple[float, float],
|
||||
mesh_data: dict,
|
||||
) -> None:
|
||||
"""Add all cube-internal nodes and edges, including PE instances."""
|
||||
"""Add all cube-internal nodes and edges, including PE instances.
|
||||
|
||||
Topology: PE_DMA → NOC → xbar_top/bot → HBM_CTRL.
|
||||
No per-PE xbar nodes; position-aware XBAR top/bottom replaces chaining.
|
||||
"""
|
||||
cube_w = cube["geometry"]["cube_mm"]["w"]
|
||||
cube_h = cube["geometry"]["cube_mm"]["h"]
|
||||
ox, oy = origin
|
||||
local_pos = _cube_local_positions(cube_w, cube_h)
|
||||
clinks = cube["links"]
|
||||
n_slices = cube["memory_map"]["hbm_slices_per_cube"]
|
||||
half = n_slices // 2
|
||||
|
||||
# ── UCIe ports ──
|
||||
ucie_ns = cube["ucie"]["overhead_ns"]
|
||||
for port in cube["ucie"]["ports"]:
|
||||
# ── UCIe ports + connection nodes ──
|
||||
ucie_cfg = cube["ucie"]
|
||||
ucie_ns = ucie_cfg["overhead_ns"]
|
||||
ucie_n_conn = ucie_cfg.get("n_connections", 1)
|
||||
for port in ucie_cfg["ports"]:
|
||||
pid = f"{cp}.ucie-{port}"
|
||||
lx, ly = local_pos[f"ucie-{port}"]
|
||||
nodes[pid] = Node(
|
||||
@@ -265,6 +381,14 @@ def _instantiate_cube(
|
||||
attrs={"overhead_ns": ucie_ns}, pos_mm=(ox + lx, oy + ly),
|
||||
label=f"UCIe-{port}",
|
||||
)
|
||||
for ci in range(ucie_n_conn):
|
||||
conn_id = f"{cp}.ucie-{port}.conn{ci}"
|
||||
nodes[conn_id] = Node(
|
||||
id=conn_id, kind="ucie_conn", impl="ucie_v1",
|
||||
attrs={"overhead_ns": 0.0},
|
||||
pos_mm=(ox + lx, oy + ly),
|
||||
label=f"UCIe-{port} C{ci}",
|
||||
)
|
||||
|
||||
# ── Named components: noc, m_cpu, sram ──
|
||||
for name in ("noc", "m_cpu", "sram"):
|
||||
@@ -277,7 +401,19 @@ def _instantiate_cube(
|
||||
label=name.upper().replace("_", " "),
|
||||
)
|
||||
|
||||
# ── HBM controller slices (one per PE) ──
|
||||
# ── xbar_top and xbar_bot (position-aware XBAR) ──
|
||||
xbar_spec = cube["components"]["xbar"]
|
||||
for xbar_name, xbar_cfg in [("xbar_top", xbar_spec["top"]),
|
||||
("xbar_bot", xbar_spec["bottom"])]:
|
||||
nid = f"{cp}.{xbar_name}"
|
||||
lx, ly = local_pos[xbar_name]
|
||||
nodes[nid] = Node(
|
||||
id=nid, kind=xbar_cfg["kind"], impl=xbar_cfg["impl"],
|
||||
attrs=xbar_cfg["attrs"], pos_mm=(ox + lx, oy + ly),
|
||||
label=xbar_name.upper().replace("_", " "),
|
||||
)
|
||||
|
||||
# ── HBM controller slices ──
|
||||
hbm_spec = cube["components"]["hbm_ctrl"]
|
||||
hbm_lx, hbm_ly = local_pos["hbm_ctrl"]
|
||||
for sl in range(n_slices):
|
||||
@@ -289,7 +425,7 @@ def _instantiate_cube(
|
||||
)
|
||||
|
||||
# ── Bridges ──
|
||||
for br in cube["components"]["xbar"]["bridges"]:
|
||||
for br in xbar_spec["bridges"]:
|
||||
bname = br["id"]
|
||||
nid = f"{cp}.bridge.{bname}"
|
||||
lx, ly = local_pos[f"bridge.{bname}"]
|
||||
@@ -299,34 +435,22 @@ def _instantiate_cube(
|
||||
label=f"Bridge {bname.upper()}",
|
||||
)
|
||||
|
||||
# ── PE instances + per-PE xbar entry nodes ──
|
||||
# ── PE instances (no per-PE xbar nodes) ──
|
||||
corners = cube["pe_layout"]["corners"]
|
||||
pe_per_corner = cube["pe_layout"]["pe_per_corner"]
|
||||
corner_pos = _corner_pe_positions(cube_w, cube_h)
|
||||
pe_tmpl = cube["pe_template"]
|
||||
pe_links = pe_tmpl["links"]
|
||||
|
||||
xbar_pe_spec = cube["components"]["xbar"]["pe"]
|
||||
xbar_top_y = local_pos["xbar.top"][1]
|
||||
xbar_bot_y = local_pos["xbar.bottom"][1]
|
||||
pe_noc_distances = _compute_pe_noc_distances(
|
||||
mesh_data, corner_pos, corners, pe_per_corner,
|
||||
)
|
||||
|
||||
pe_idx = 0
|
||||
for corner in corners:
|
||||
is_top = corner in ("NW", "NE")
|
||||
xbar_y = xbar_top_y if is_top else xbar_bot_y
|
||||
mm_key = "pe_to_xbar_row_n_mm" if is_top else "pe_to_xbar_row_s_mm"
|
||||
for ci in range(pe_per_corner):
|
||||
pp = f"{cp}.pe{pe_idx}"
|
||||
pe_cx, pe_cy = corner_pos[corner][ci]
|
||||
|
||||
# Per-PE xbar entry node
|
||||
xbar_nid = f"{cp}.xbar.pe{pe_idx}"
|
||||
nodes[xbar_nid] = Node(
|
||||
id=xbar_nid, kind=xbar_pe_spec["kind"], impl=xbar_pe_spec["impl"],
|
||||
attrs=xbar_pe_spec["attrs"], pos_mm=(ox + pe_cx, oy + xbar_y),
|
||||
label=f"XBAR PE{pe_idx}",
|
||||
)
|
||||
|
||||
# PE template components
|
||||
for comp_name, comp_spec in pe_tmpl["components"].items():
|
||||
cid = f"{pp}.{comp_name}"
|
||||
@@ -341,18 +465,10 @@ def _instantiate_cube(
|
||||
# PE-internal edges
|
||||
_add_pe_internal_edges(edges, pp, pe_links)
|
||||
|
||||
# PE_DMA → xbar.pe_i (HBM data path)
|
||||
edges.append(Edge(
|
||||
src=f"{pp}.pe_dma", dst=xbar_nid,
|
||||
distance_mm=clinks[mm_key],
|
||||
bw_gbs=clinks["pe_to_xbar_bw_gbs"],
|
||||
kind="pe_to_xbar",
|
||||
))
|
||||
|
||||
# PE_DMA → noc (non-HBM data path: SRAM, inter-cube, etc.)
|
||||
# PE_DMA → noc (distance auto-computed from PE physical position)
|
||||
edges.append(Edge(
|
||||
src=f"{pp}.pe_dma", dst=f"{cp}.noc",
|
||||
distance_mm=clinks["pe_dma_to_noc_mm"],
|
||||
distance_mm=pe_noc_distances.get(pe_idx, 0.0),
|
||||
bw_gbs=clinks["pe_dma_to_noc_bw_gbs"],
|
||||
kind="pe_to_noc",
|
||||
))
|
||||
@@ -366,97 +482,96 @@ def _instantiate_cube(
|
||||
|
||||
pe_idx += 1
|
||||
|
||||
# ── Cube fabric edges ──
|
||||
|
||||
# xbar.pe_i ↔ hbm_ctrl.slice_i (local Y-path, bidirectional for response)
|
||||
for i in range(n_slices):
|
||||
# ── xbar_top/bot → HBM slices ──
|
||||
hbm_eff = float(hbm_spec.get("attrs", {}).get("efficiency", 1.0))
|
||||
hbm_bw = clinks["xbar_to_hbm_bw_gbs"] * hbm_eff
|
||||
for i in range(half):
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.xbar.pe{i}", dst=f"{cp}.hbm_ctrl.slice{i}",
|
||||
src=f"{cp}.xbar_top", dst=f"{cp}.hbm_ctrl.slice{i}",
|
||||
distance_mm=clinks["xbar_to_hbm_mm"],
|
||||
bw_gbs=clinks["xbar_to_hbm_bw_gbs"],
|
||||
bw_gbs=hbm_bw,
|
||||
kind="xbar_to_hbm",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.hbm_ctrl.slice{i}", dst=f"{cp}.xbar.pe{i}",
|
||||
src=f"{cp}.hbm_ctrl.slice{i}", dst=f"{cp}.xbar_top",
|
||||
distance_mm=clinks["xbar_to_hbm_mm"],
|
||||
bw_gbs=clinks["xbar_to_hbm_bw_gbs"],
|
||||
bw_gbs=hbm_bw,
|
||||
kind="hbm_to_xbar",
|
||||
))
|
||||
for i in range(half, n_slices):
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.xbar_bot", dst=f"{cp}.hbm_ctrl.slice{i}",
|
||||
distance_mm=clinks["xbar_to_hbm_mm"],
|
||||
bw_gbs=hbm_bw,
|
||||
kind="xbar_to_hbm",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.hbm_ctrl.slice{i}", dst=f"{cp}.xbar_bot",
|
||||
distance_mm=clinks["xbar_to_hbm_mm"],
|
||||
bw_gbs=hbm_bw,
|
||||
kind="hbm_to_xbar",
|
||||
))
|
||||
|
||||
# xbar chain: pe0↔pe1↔pe2↔pe3 (top), pe4↔pe5↔pe6↔pe7 (bottom)
|
||||
half = n_slices // 2
|
||||
for half_start in (0, half):
|
||||
for i in range(half_start, half_start + half - 1):
|
||||
intra = ((i - half_start) % pe_per_corner) != (pe_per_corner - 1)
|
||||
x_dist = clinks["xbar_chain_intra_corner_mm"] if intra else clinks["xbar_chain_inter_corner_mm"]
|
||||
for a, b in [(i, i + 1), (i + 1, i)]:
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.xbar.pe{a}", dst=f"{cp}.xbar.pe{b}",
|
||||
distance_mm=x_dist,
|
||||
bw_gbs=clinks["xbar_x_bw_gbs"],
|
||||
kind="xbar_chain",
|
||||
))
|
||||
# ── NOC ↔ xbar_top/bot ──
|
||||
# xbar_top: primary (low routing weight), xbar_bot: secondary (high routing weight
|
||||
# steers Dijkstra through xbar_top→bridge→xbar_bot for cross-half access)
|
||||
noc_xbar_bw = clinks.get("noc_to_xbar_bw_gbs", 256.0)
|
||||
noc_xbar_mm = clinks.get("noc_to_xbar_mm", 0.0)
|
||||
for xbar_name, rw in [("xbar_top", None), ("xbar_bot", 100.0)]:
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.noc", dst=f"{cp}.{xbar_name}",
|
||||
distance_mm=noc_xbar_mm, bw_gbs=noc_xbar_bw,
|
||||
routing_weight_mm=rw, kind="noc_to_xbar",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.{xbar_name}", dst=f"{cp}.noc",
|
||||
distance_mm=noc_xbar_mm, bw_gbs=noc_xbar_bw,
|
||||
routing_weight_mm=rw, kind="xbar_to_noc",
|
||||
))
|
||||
|
||||
# bridge connections: pe0↔bridge.left↔pe4, pe3↔bridge.right↔pe7
|
||||
for bname, pe_top, pe_bot in [("left", 0, half), ("right", half - 1, n_slices - 1)]:
|
||||
# ── Bridge connections: xbar_top ↔ bridge ↔ xbar_bot ──
|
||||
bridge_mm = clinks.get("xbar_to_bridge_mm", 3.0)
|
||||
bridge_bw = clinks.get("xbar_to_bridge_bw_gbs", 128.0)
|
||||
for bname in ("left", "right"):
|
||||
br_node = f"{cp}.bridge.{bname}"
|
||||
for pe_i, br_mm_key in [(pe_top, "xbar_row_n_to_bridge_mm"),
|
||||
(pe_bot, "xbar_row_s_to_bridge_mm")]:
|
||||
xbar_node = f"{cp}.xbar.pe{pe_i}"
|
||||
for xbar_name in ("xbar_top", "xbar_bot"):
|
||||
edges.append(Edge(
|
||||
src=xbar_node, dst=br_node,
|
||||
distance_mm=clinks[br_mm_key],
|
||||
bw_gbs=clinks["xbar_to_bridge_bw_gbs"],
|
||||
src=f"{cp}.{xbar_name}", dst=br_node,
|
||||
distance_mm=bridge_mm, bw_gbs=bridge_bw,
|
||||
kind="xbar_to_bridge",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=br_node, dst=xbar_node,
|
||||
distance_mm=clinks[br_mm_key],
|
||||
bw_gbs=clinks["xbar_to_bridge_bw_gbs"],
|
||||
src=br_node, dst=f"{cp}.{xbar_name}",
|
||||
distance_mm=bridge_mm, bw_gbs=bridge_bw,
|
||||
kind="bridge_to_xbar",
|
||||
))
|
||||
|
||||
# ucie ↔ noc (UCIe-NOC boundary; per_connection_bw_gbs = 128 GB/s, n_connections = 4)
|
||||
_noc_ucie = clinks["noc_to_ucie"]
|
||||
for port in cube["ucie"]["ports"]:
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.ucie-{port}", dst=f"{cp}.noc",
|
||||
distance_mm=0.0,
|
||||
bw_gbs=_noc_ucie["per_connection_bw_gbs"],
|
||||
n_connections=_noc_ucie["n_connections"],
|
||||
kind="ucie_to_noc",
|
||||
))
|
||||
# ── UCIe ↔ conn ↔ NOC ──
|
||||
ucie_conn_bw = ucie_cfg.get("per_connection_bw_gbs", 128.0)
|
||||
for port in ucie_cfg["ports"]:
|
||||
ucie_id = f"{cp}.ucie-{port}"
|
||||
for ci in range(ucie_n_conn):
|
||||
conn_id = f"{cp}.ucie-{port}.conn{ci}"
|
||||
edges.append(Edge(
|
||||
src=ucie_id, dst=conn_id,
|
||||
distance_mm=0.0, kind="ucie_internal",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=conn_id, dst=ucie_id,
|
||||
distance_mm=0.0, kind="ucie_internal",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=conn_id, dst=f"{cp}.noc",
|
||||
distance_mm=0.0, bw_gbs=ucie_conn_bw,
|
||||
kind="ucie_conn_to_noc",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.noc", dst=conn_id,
|
||||
distance_mm=0.0, bw_gbs=ucie_conn_bw,
|
||||
kind="noc_to_ucie_conn",
|
||||
))
|
||||
|
||||
for port in cube["ucie"]["ports"]:
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.noc", dst=f"{cp}.ucie-{port}",
|
||||
distance_mm=0.0,
|
||||
bw_gbs=_noc_ucie["per_connection_bw_gbs"],
|
||||
n_connections=_noc_ucie["n_connections"],
|
||||
kind="noc_to_ucie",
|
||||
))
|
||||
|
||||
# noc ↔ xbar.pe{i}: wire delay is 0 (NOC traversal latency computed by TwoDMeshNocComponent);
|
||||
# routing_weight_mm=50.0 steers PE DMA Dijkstra away from this path (prefer direct pe_dma→xbar)
|
||||
_noc_xbar = clinks.get("noc_to_xbar", {})
|
||||
_noc_xbar_bw = _noc_xbar.get("per_connection_bw_gbs")
|
||||
for i in range(n_slices):
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.noc", dst=f"{cp}.xbar.pe{i}",
|
||||
distance_mm=0.0,
|
||||
bw_gbs=_noc_xbar_bw,
|
||||
routing_weight_mm=50.0,
|
||||
kind="noc_to_xbar",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.xbar.pe{i}", dst=f"{cp}.noc",
|
||||
distance_mm=0.0,
|
||||
bw_gbs=_noc_xbar_bw,
|
||||
routing_weight_mm=50.0,
|
||||
kind="xbar_to_noc",
|
||||
))
|
||||
|
||||
# m_cpu ↔ noc (command dispatch, both directions)
|
||||
# ── m_cpu ↔ noc (command dispatch) ──
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.m_cpu", dst=f"{cp}.noc",
|
||||
distance_mm=clinks["m_cpu_to_noc_mm"],
|
||||
@@ -468,7 +583,7 @@ def _instantiate_cube(
|
||||
kind="command",
|
||||
))
|
||||
|
||||
# noc ↔ sram (shared SRAM access; per_connection_bw_gbs = 128 GB/s, n_connections = 4)
|
||||
# ── noc ↔ sram ──
|
||||
_noc_sram = clinks["noc_to_sram"]
|
||||
edges.append(Edge(
|
||||
src=f"{cp}.noc", dst=f"{cp}.sram",
|
||||
@@ -550,28 +665,27 @@ def _add_inter_cube_edges(
|
||||
def _add_io_to_cube_edges(
|
||||
edges: list[Edge], sp: str, sip_spec: dict, mesh_w: int,
|
||||
) -> None:
|
||||
"""Add IO chiplet io_cpu ↔ cube UCIe edges (bidirectional for response)."""
|
||||
io_links = sip_spec["iochiplet"]["links"]
|
||||
io_to_ucie_mm = io_links["io_cpu_to_ucie_mm"]
|
||||
io_to_ucie_bw = io_links["io_cpu_to_ucie_bw_gbs"]
|
||||
"""Add IO chiplet io_ucie ↔ cube UCIe edges (bidirectional)."""
|
||||
for inst in sip_spec["iochiplet"]["instances"]:
|
||||
iid = inst["id"]
|
||||
io_cpu_id = f"{sp}.{iid}.io_cpu"
|
||||
phy_bw = float(inst["ucie"]["phy_bw_gbs"])
|
||||
for port in inst["cube_ports"]:
|
||||
cube_col, cube_row = port["cube"]["xy"]
|
||||
cube_id = cube_row * mesh_w + cube_col
|
||||
cube_side = port["cube_side"]
|
||||
ucie_id = f"{sp}.cube{cube_id}.ucie-{cube_side}"
|
||||
phy = port["phy"]
|
||||
io_ucie_id = f"{sp}.{iid}.ucie-{phy}"
|
||||
cube_ucie_id = f"{sp}.cube{cube_id}.ucie-{cube_side}"
|
||||
edges.append(Edge(
|
||||
src=io_cpu_id, dst=ucie_id,
|
||||
distance_mm=io_to_ucie_mm + port["distance_mm"],
|
||||
bw_gbs=io_to_ucie_bw,
|
||||
src=io_ucie_id, dst=cube_ucie_id,
|
||||
distance_mm=port["distance_mm"],
|
||||
bw_gbs=phy_bw,
|
||||
kind="io_to_cube",
|
||||
))
|
||||
edges.append(Edge(
|
||||
src=ucie_id, dst=io_cpu_id,
|
||||
distance_mm=io_to_ucie_mm + port["distance_mm"],
|
||||
bw_gbs=io_to_ucie_bw,
|
||||
src=cube_ucie_id, dst=io_ucie_id,
|
||||
distance_mm=port["distance_mm"],
|
||||
bw_gbs=phy_bw,
|
||||
kind="cube_to_io",
|
||||
))
|
||||
|
||||
@@ -704,11 +818,13 @@ def _build_sip_view(spec: dict) -> ViewGraph:
|
||||
))
|
||||
|
||||
# IO chiplets
|
||||
io_links = sip_spec["iochiplet"]["links"]
|
||||
io_ucie_cfg = sip_spec["iochiplet"].get("ucie", {})
|
||||
io_noc_to_ucie_mm = float(io_ucie_cfg.get("noc_to_ucie_mm", 0.5))
|
||||
for inst in sip_spec["iochiplet"]["instances"]:
|
||||
iid = inst["id"]
|
||||
side = inst["place"]["side"]
|
||||
iy = 2.0 if side == "N" else canvas_h - 2.0
|
||||
phy_bw = float(inst["ucie"]["phy_bw_gbs"])
|
||||
nodes[iid] = Node(
|
||||
id=iid, kind="iochiplet", impl="",
|
||||
attrs={}, pos_mm=(mesh_total_w / 2, iy), label=f"IO {iid}",
|
||||
@@ -718,8 +834,8 @@ def _build_sip_view(spec: dict) -> ViewGraph:
|
||||
cube_id = cube_row * mesh_w + cube_col
|
||||
view_edges.append(Edge(
|
||||
src=iid, dst=f"cube{cube_id}",
|
||||
distance_mm=io_links["io_cpu_to_ucie_mm"] + port["distance_mm"],
|
||||
bw_gbs=io_links["io_cpu_to_ucie_bw_gbs"],
|
||||
distance_mm=io_noc_to_ucie_mm + port["distance_mm"],
|
||||
bw_gbs=phy_bw,
|
||||
kind="io_to_cube",
|
||||
))
|
||||
|
||||
@@ -737,31 +853,52 @@ def _build_cube_view(spec: dict) -> ViewGraph:
|
||||
local_pos = _cube_local_positions(cube_w, cube_h)
|
||||
clinks = cube["links"]
|
||||
n_slices = cube["memory_map"]["hbm_slices_per_cube"]
|
||||
half = n_slices // 2
|
||||
|
||||
nodes: dict[str, Node] = {}
|
||||
view_edges: list[Edge] = []
|
||||
|
||||
# UCIe ports
|
||||
for port in cube["ucie"]["ports"]:
|
||||
# UCIe ports + connection nodes
|
||||
ucie_cfg = cube["ucie"]
|
||||
ucie_n_conn = ucie_cfg.get("n_connections", 1)
|
||||
for port in ucie_cfg["ports"]:
|
||||
pid = f"ucie-{port}"
|
||||
lx, ly = local_pos[pid]
|
||||
nodes[pid] = Node(
|
||||
id=pid, kind="ucie_port", impl="ucie_v1",
|
||||
attrs={}, pos_mm=(lx, ly), label=f"UCIe-{port}",
|
||||
)
|
||||
for ci in range(ucie_n_conn):
|
||||
conn_id = f"ucie-{port}.conn{ci}"
|
||||
nodes[conn_id] = Node(
|
||||
id=conn_id, kind="ucie_conn", impl="ucie_v1",
|
||||
attrs={"overhead_ns": 0.0}, pos_mm=(lx, ly),
|
||||
label=f"UCIe-{port} C{ci}",
|
||||
)
|
||||
|
||||
# Named components (hbm_ctrl as single representative node in view)
|
||||
for name in ("noc", "m_cpu", "hbm_ctrl", "sram"):
|
||||
c = cube["components"][name]
|
||||
lx, ly = local_pos[name]
|
||||
lx, ly = local_pos.get(name, local_pos.get("hbm_ctrl"))
|
||||
nodes[name] = Node(
|
||||
id=name, kind=c["kind"], impl=c["impl"],
|
||||
attrs=c["attrs"], pos_mm=(lx, ly),
|
||||
label=name.upper().replace("_", " "),
|
||||
)
|
||||
|
||||
# xbar_top, xbar_bot
|
||||
xbar_spec = cube["components"]["xbar"]
|
||||
for xbar_name, xbar_cfg in [("xbar_top", xbar_spec["top"]),
|
||||
("xbar_bot", xbar_spec["bottom"])]:
|
||||
lx, ly = local_pos[xbar_name]
|
||||
nodes[xbar_name] = Node(
|
||||
id=xbar_name, kind=xbar_cfg["kind"], impl=xbar_cfg["impl"],
|
||||
attrs=xbar_cfg["attrs"], pos_mm=(lx, ly),
|
||||
label=xbar_name.upper().replace("_", " "),
|
||||
)
|
||||
|
||||
# Bridges
|
||||
for br in cube["components"]["xbar"]["bridges"]:
|
||||
for br in xbar_spec["bridges"]:
|
||||
bname = br["id"]
|
||||
bid = f"bridge.{bname}"
|
||||
lx, ly = local_pos[bid]
|
||||
@@ -771,46 +908,29 @@ def _build_cube_view(spec: dict) -> ViewGraph:
|
||||
label=f"Bridge {bname.upper()}",
|
||||
)
|
||||
|
||||
# PEs as opaque blocks + per-PE xbar entry nodes
|
||||
# PEs as opaque blocks (no per-PE xbar nodes)
|
||||
corners = cube["pe_layout"]["corners"]
|
||||
pe_per_corner = cube["pe_layout"]["pe_per_corner"]
|
||||
corner_pos = _corner_pe_positions(cube_w, cube_h)
|
||||
xbar_pe_spec = cube["components"]["xbar"]["pe"]
|
||||
xbar_top_y = local_pos["xbar.top"][1]
|
||||
xbar_bot_y = local_pos["xbar.bottom"][1]
|
||||
mesh_data = spec.get("_mesh", {})
|
||||
pe_noc_distances = _compute_pe_noc_distances(
|
||||
mesh_data, corner_pos, corners, pe_per_corner,
|
||||
) if mesh_data else {}
|
||||
|
||||
pe_idx = 0
|
||||
for corner in corners:
|
||||
is_top = corner in ("NW", "NE")
|
||||
xbar_y = xbar_top_y if is_top else xbar_bot_y
|
||||
mm_key = "pe_to_xbar_row_n_mm" if is_top else "pe_to_xbar_row_s_mm"
|
||||
for ci in range(pe_per_corner):
|
||||
pid = f"pe{pe_idx}"
|
||||
xbar_id = f"xbar.pe{pe_idx}"
|
||||
px, py = corner_pos[corner][ci]
|
||||
|
||||
nodes[pid] = Node(
|
||||
id=pid, kind="pe", impl="",
|
||||
attrs={"corner": corner}, pos_mm=(px, py),
|
||||
label=f"PE{pe_idx}",
|
||||
)
|
||||
nodes[xbar_id] = Node(
|
||||
id=xbar_id, kind=xbar_pe_spec["kind"], impl=xbar_pe_spec["impl"],
|
||||
attrs=xbar_pe_spec["attrs"], pos_mm=(px, xbar_y),
|
||||
label=f"XBAR PE{pe_idx}",
|
||||
)
|
||||
|
||||
# PE → xbar.pe_i (HBM data path)
|
||||
view_edges.append(Edge(
|
||||
src=pid, dst=xbar_id,
|
||||
distance_mm=clinks[mm_key],
|
||||
bw_gbs=clinks["pe_to_xbar_bw_gbs"],
|
||||
kind="pe_to_xbar",
|
||||
))
|
||||
# PE → noc (non-HBM data path)
|
||||
# PE → noc (distance auto-computed from PE physical position)
|
||||
view_edges.append(Edge(
|
||||
src=pid, dst="noc",
|
||||
distance_mm=clinks["pe_dma_to_noc_mm"],
|
||||
distance_mm=pe_noc_distances.get(pe_idx, 0.0),
|
||||
bw_gbs=clinks["pe_dma_to_noc_bw_gbs"],
|
||||
kind="pe_to_noc",
|
||||
))
|
||||
@@ -822,60 +942,76 @@ def _build_cube_view(spec: dict) -> ViewGraph:
|
||||
))
|
||||
pe_idx += 1
|
||||
|
||||
# Cube fabric edges
|
||||
# xbar.pe_i → hbm_ctrl (single representative node in view)
|
||||
for i in range(n_slices):
|
||||
# xbar_top/bot → hbm_ctrl
|
||||
view_edges.append(Edge(
|
||||
src="xbar_top", dst="hbm_ctrl",
|
||||
distance_mm=clinks["xbar_to_hbm_mm"],
|
||||
bw_gbs=clinks["xbar_to_hbm_bw_gbs"],
|
||||
kind="xbar_to_hbm",
|
||||
))
|
||||
view_edges.append(Edge(
|
||||
src="xbar_bot", dst="hbm_ctrl",
|
||||
distance_mm=clinks["xbar_to_hbm_mm"],
|
||||
bw_gbs=clinks["xbar_to_hbm_bw_gbs"],
|
||||
kind="xbar_to_hbm",
|
||||
))
|
||||
|
||||
# noc ↔ xbar_top/bot
|
||||
noc_xbar_bw = clinks.get("noc_to_xbar_bw_gbs", 256.0)
|
||||
noc_xbar_mm = clinks.get("noc_to_xbar_mm", 0.0)
|
||||
for xbar_name in ("xbar_top", "xbar_bot"):
|
||||
view_edges.append(Edge(
|
||||
src=f"xbar.pe{i}", dst="hbm_ctrl",
|
||||
distance_mm=clinks["xbar_to_hbm_mm"],
|
||||
bw_gbs=clinks["xbar_to_hbm_bw_gbs"],
|
||||
kind="xbar_to_hbm",
|
||||
src="noc", dst=xbar_name,
|
||||
distance_mm=noc_xbar_mm, bw_gbs=noc_xbar_bw,
|
||||
kind="noc_to_xbar",
|
||||
))
|
||||
view_edges.append(Edge(
|
||||
src=xbar_name, dst="noc",
|
||||
distance_mm=noc_xbar_mm, bw_gbs=noc_xbar_bw,
|
||||
kind="xbar_to_noc",
|
||||
))
|
||||
|
||||
# xbar chain
|
||||
half = n_slices // 2
|
||||
for half_start in (0, half):
|
||||
for i in range(half_start, half_start + half - 1):
|
||||
intra = ((i - half_start) % pe_per_corner) != (pe_per_corner - 1)
|
||||
x_dist = clinks["xbar_chain_intra_corner_mm"] if intra else clinks["xbar_chain_inter_corner_mm"]
|
||||
for a, b in [(i, i + 1), (i + 1, i)]:
|
||||
view_edges.append(Edge(
|
||||
src=f"xbar.pe{a}", dst=f"xbar.pe{b}",
|
||||
distance_mm=x_dist,
|
||||
bw_gbs=clinks["xbar_x_bw_gbs"],
|
||||
kind="xbar_chain",
|
||||
))
|
||||
|
||||
# bridge connections
|
||||
for bname, pe_top, pe_bot in [("left", 0, half), ("right", half - 1, n_slices - 1)]:
|
||||
# bridge connections: xbar_top ↔ bridge ↔ xbar_bot
|
||||
bridge_mm = clinks.get("xbar_to_bridge_mm", 3.0)
|
||||
bridge_bw = clinks.get("xbar_to_bridge_bw_gbs", 128.0)
|
||||
for bname in ("left", "right"):
|
||||
br_id = f"bridge.{bname}"
|
||||
for pe_i, br_mm_key in [(pe_top, "xbar_row_n_to_bridge_mm"),
|
||||
(pe_bot, "xbar_row_s_to_bridge_mm")]:
|
||||
xbar_id = f"xbar.pe{pe_i}"
|
||||
for xbar_name in ("xbar_top", "xbar_bot"):
|
||||
view_edges.append(Edge(
|
||||
src=xbar_id, dst=br_id,
|
||||
distance_mm=clinks[br_mm_key],
|
||||
bw_gbs=clinks["xbar_to_bridge_bw_gbs"],
|
||||
src=xbar_name, dst=br_id,
|
||||
distance_mm=bridge_mm, bw_gbs=bridge_bw,
|
||||
kind="xbar_to_bridge",
|
||||
))
|
||||
view_edges.append(Edge(
|
||||
src=br_id, dst=xbar_id,
|
||||
distance_mm=clinks[br_mm_key],
|
||||
bw_gbs=clinks["xbar_to_bridge_bw_gbs"],
|
||||
src=br_id, dst=xbar_name,
|
||||
distance_mm=bridge_mm, bw_gbs=bridge_bw,
|
||||
kind="bridge_to_xbar",
|
||||
))
|
||||
|
||||
_noc_ucie_v = clinks["noc_to_ucie"]
|
||||
for port in cube["ucie"]["ports"]:
|
||||
view_edges.append(Edge(
|
||||
src="noc", dst=f"ucie-{port}",
|
||||
distance_mm=0.0,
|
||||
bw_gbs=_noc_ucie_v["per_connection_bw_gbs"],
|
||||
n_connections=_noc_ucie_v["n_connections"],
|
||||
kind="noc_to_ucie",
|
||||
))
|
||||
ucie_conn_bw_v = ucie_cfg.get("per_connection_bw_gbs", 128.0)
|
||||
for port in ucie_cfg["ports"]:
|
||||
for ci in range(ucie_n_conn):
|
||||
conn_id = f"ucie-{port}.conn{ci}"
|
||||
view_edges.append(Edge(
|
||||
src="noc", dst=conn_id,
|
||||
distance_mm=0.0, bw_gbs=ucie_conn_bw_v,
|
||||
kind="noc_to_ucie_conn",
|
||||
))
|
||||
view_edges.append(Edge(
|
||||
src=conn_id, dst=f"ucie-{port}",
|
||||
distance_mm=0.0, kind="ucie_internal",
|
||||
))
|
||||
view_edges.append(Edge(
|
||||
src=f"ucie-{port}", dst=conn_id,
|
||||
distance_mm=0.0, kind="ucie_internal",
|
||||
))
|
||||
view_edges.append(Edge(
|
||||
src=conn_id, dst="noc",
|
||||
distance_mm=0.0, bw_gbs=ucie_conn_bw_v,
|
||||
kind="ucie_conn_to_noc",
|
||||
))
|
||||
|
||||
# m_cpu ↔ noc (command dispatch, both directions)
|
||||
# m_cpu ↔ noc
|
||||
view_edges.append(Edge(
|
||||
src="m_cpu", dst="noc",
|
||||
distance_mm=clinks["m_cpu_to_noc_mm"],
|
||||
@@ -887,7 +1023,7 @@ def _build_cube_view(spec: dict) -> ViewGraph:
|
||||
kind="command",
|
||||
))
|
||||
|
||||
# noc ↔ sram (shared SRAM access, bidirectional)
|
||||
# noc ↔ sram
|
||||
_noc_sram_v = clinks["noc_to_sram"]
|
||||
view_edges.append(Edge(
|
||||
src="noc", dst="sram",
|
||||
|
||||
Reference in New Issue
Block a user