Remove xbar/noc remnants, rule-based cube-view connectors
- Delete xbar.py and noc.py (TwoDMeshNocComponent) — unused since router mesh - Remove xbar_v1/noc_2d_mesh_v1 from components.yaml - Fix pe_to_xbar → pe_to_router in routing exclusion set - Fix xbar_to_hbm_bw_gbs → hbm_to_router_bw_gbs in report.py - Update all docstrings/comments referencing xbar/bridge → router mesh - Cube-view connectors: rule-based _connector_points helper - PE↔router: single diagonal line (not chevron) - UCIe N/S: 45°→horizontal→45° - UCIe E/W: 45°→vertical→45° - HBM ports: 45°→horizontal→45° Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -116,7 +116,7 @@ def _fmt_util(eff: float, bn: float | None) -> str:
|
||||
|
||||
|
||||
def _short_name(node_id: str) -> str:
|
||||
"""Shorten node id: keep last 2 segments to avoid ambiguity (xbar.pe0 vs pe0)."""
|
||||
"""Shorten node id: keep last 2 segments to avoid ambiguity (router.pe0 vs pe0)."""
|
||||
parts = node_id.split(".")
|
||||
return ".".join(parts[-2:]) if len(parts) >= 2 else node_id
|
||||
|
||||
@@ -366,7 +366,7 @@ def run_probe(topology_path: str, case_filter: str | None = None) -> int:
|
||||
|
||||
# --- PE DMA Summary Table ---
|
||||
print()
|
||||
print(f"=== PE DMA Latency (pe_dma -> xbar -> HBM, data={nbytes}B) ===")
|
||||
print(f"=== PE DMA Latency (pe_dma -> router -> HBM, data={nbytes}B) ===")
|
||||
print(f" {'Case':<26} {'Target':<28} {'Actual':>8}"
|
||||
f" {'Ovhd':>6} {'Drain':>6} {'Wire':>5} {'Ovhd%':>6} {'Drain%':>7}"
|
||||
f" {'Eff.BW':>8} {'BN.BW':>8} {'Util%':>6}")
|
||||
|
||||
@@ -137,7 +137,7 @@ def _extract_peaks(spec: dict | None) -> tuple[float, float]:
|
||||
gemm_attrs = comps.get("pe_gemm", {}).get("attrs", {})
|
||||
peak_tflops = float(gemm_attrs.get("peak_tflops_f16", 0.0))
|
||||
cube_links = cube.get("links", {})
|
||||
hbm_bw = float(cube_links.get("xbar_to_hbm_bw_gbs", 0.0))
|
||||
hbm_bw = float(cube_links.get("hbm_to_router_bw_gbs", 0.0))
|
||||
return peak_tflops, hbm_bw
|
||||
|
||||
|
||||
|
||||
@@ -1,224 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Generator
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import simpy
|
||||
|
||||
from kernbench.components.base import ComponentBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from kernbench.components.context import ComponentContext
|
||||
from kernbench.topology.types import Node
|
||||
|
||||
|
||||
class TwoDMeshNocComponent(ComponentBase):
|
||||
"""2D mesh NOC modeled as a single smart node.
|
||||
|
||||
Latency model:
|
||||
- Traversal latency = Manhattan distance between prev_hop and next_hop
|
||||
node positions, split into XY segments, traversed with pipeline.
|
||||
- overhead_ns (from node.attrs) is added once per traversal.
|
||||
|
||||
Contention model:
|
||||
- Each directed XY segment is a simpy.Resource(capacity=1).
|
||||
- Pipeline: next segment's resource is requested before the current
|
||||
segment's timeout completes, so a free downstream segment is acquired
|
||||
immediately (wormhole-style cut-through).
|
||||
- Two transactions sharing a segment (same row or column band) contend.
|
||||
|
||||
Concurrency:
|
||||
- _worker spawns an independent SimPy process per transaction, so the
|
||||
NOC is never serialized at the node level — only at segment resources.
|
||||
"""
|
||||
|
||||
def __init__(self, node: Node, ctx: ComponentContext | None = None) -> None:
|
||||
super().__init__(node, ctx)
|
||||
self._env: simpy.Environment | None = None
|
||||
self._links: dict[tuple, simpy.Resource] = {}
|
||||
self._x_grid: list[float] = []
|
||||
self._y_grid: list[float] = []
|
||||
|
||||
def start(self, env: simpy.Environment) -> None:
|
||||
self._env = env
|
||||
self._build_grid()
|
||||
super().start(env)
|
||||
|
||||
def run(self, env: simpy.Environment, nbytes: int) -> Generator:
|
||||
yield env.timeout(0)
|
||||
|
||||
# ── Grid construction ────────────────────────────────────────────
|
||||
|
||||
def _build_grid(self) -> None:
|
||||
if not self.ctx:
|
||||
return
|
||||
mesh = self.ctx.spec.get("_mesh") if self.ctx.spec else None
|
||||
if mesh:
|
||||
self._build_grid_from_mesh(mesh)
|
||||
else:
|
||||
self._build_grid_from_positions()
|
||||
|
||||
def _build_grid_from_mesh(self, mesh: dict) -> None:
|
||||
"""Build XY grid from cube_mesh.yaml router positions (authoritative)."""
|
||||
origin_x, origin_y = self._cube_origin()
|
||||
xs: set[float] = set()
|
||||
ys: set[float] = set()
|
||||
for key, router in mesh.get("routers", {}).items():
|
||||
if router is not None:
|
||||
xs.add(round(origin_x + router["pos_mm"][0], 2))
|
||||
ys.add(round(origin_y + router["pos_mm"][1], 2))
|
||||
self._x_grid = sorted(xs)
|
||||
self._y_grid = sorted(ys)
|
||||
|
||||
def _build_grid_from_positions(self) -> None:
|
||||
"""Fallback: infer grid from all node positions in the cube."""
|
||||
cube_prefix = self.node.id.rsplit(".", 1)[0]
|
||||
xs: set[float] = set()
|
||||
ys: set[float] = set()
|
||||
for node_id, pos in self.ctx.positions.items():
|
||||
if node_id.startswith(cube_prefix + ".") and pos is not None:
|
||||
xs.add(round(pos[0], 2))
|
||||
ys.add(round(pos[1], 2))
|
||||
self._x_grid = sorted(xs)
|
||||
self._y_grid = sorted(ys)
|
||||
|
||||
def _cube_origin(self) -> tuple[float, float]:
|
||||
"""Compute absolute origin (top-left) of this cube from cube_id."""
|
||||
parts = self.node.id.split(".")
|
||||
cube_str = [p for p in parts if p.startswith("cube")][0]
|
||||
cube_id = int(cube_str[4:])
|
||||
spec = self.ctx.spec
|
||||
sip_spec = spec.get("sip", {})
|
||||
cube_spec = spec.get("cube", {})
|
||||
mesh_w = sip_spec.get("cube_mesh", {}).get("w", 4)
|
||||
cube_w = cube_spec.get("geometry", {}).get("cube_mm", {}).get("w", 17.0)
|
||||
cube_h = cube_spec.get("geometry", {}).get("cube_mm", {}).get("h", 14.0)
|
||||
seam = sip_spec.get("links", {}).get("inter_cube_mesh", {}).get(
|
||||
"distance_mm_across_seam", 1.0)
|
||||
col = cube_id % mesh_w
|
||||
row = cube_id // mesh_w
|
||||
return (col * (cube_w + seam), row * (cube_h + seam))
|
||||
|
||||
def _get_link(self, key: tuple) -> simpy.Resource:
|
||||
if key not in self._links:
|
||||
assert self._env is not None
|
||||
self._links[key] = simpy.Resource(self._env, capacity=1)
|
||||
return self._links[key]
|
||||
|
||||
# ── Worker ───────────────────────────────────────────────────────
|
||||
|
||||
def _worker(self, env: simpy.Environment) -> Generator:
|
||||
while True:
|
||||
txn: Any = yield self._inbox.get()
|
||||
env.process(self._route(env, txn))
|
||||
|
||||
def _route(self, env: simpy.Environment, txn: Any) -> Generator:
|
||||
prev_hop = txn.path[txn.step - 1] if txn.step > 0 else None
|
||||
next_hop = txn.next_hop
|
||||
overhead_ns = float(self.node.attrs.get("overhead_ns", 0.0))
|
||||
|
||||
links: list[tuple[tuple, float]] = []
|
||||
if prev_hop and next_hop and self.ctx:
|
||||
src_pos = self.ctx.positions.get(prev_hop)
|
||||
dst_pos = self.ctx.positions.get(next_hop)
|
||||
if src_pos and dst_pos:
|
||||
links = self._xy_links(src_pos, dst_pos)
|
||||
|
||||
if links:
|
||||
yield from self._traverse(env, links, overhead_ns)
|
||||
else:
|
||||
yield env.timeout(overhead_ns)
|
||||
|
||||
if next_hop:
|
||||
yield self.out_ports[next_hop].put(txn.advance())
|
||||
else:
|
||||
drain = getattr(txn, "drain_ns", 0.0)
|
||||
if drain > 0:
|
||||
yield env.timeout(drain)
|
||||
txn.done.succeed()
|
||||
|
||||
# ── XY routing and pipelined link traversal ──────────────────────
|
||||
|
||||
def _traverse(
|
||||
self,
|
||||
env: simpy.Environment,
|
||||
links: list[tuple[tuple, float]],
|
||||
overhead_ns: float,
|
||||
) -> Generator:
|
||||
"""Pipeline: request next segment before current timeout finishes."""
|
||||
ns_per_mm = self.ctx.ns_per_mm # type: ignore[union-attr]
|
||||
|
||||
# Acquire first link
|
||||
first_key, _ = links[0]
|
||||
current_resource = self._get_link(first_key)
|
||||
current_req = current_resource.request()
|
||||
yield current_req
|
||||
|
||||
for i, (_, dist_mm) in enumerate(links):
|
||||
# Request next link before current timeout (pipeline)
|
||||
if i + 1 < len(links):
|
||||
next_key, _ = links[i + 1]
|
||||
next_resource = self._get_link(next_key)
|
||||
next_req = next_resource.request()
|
||||
|
||||
yield env.timeout(dist_mm * ns_per_mm + (overhead_ns if i == 0 else 0.0))
|
||||
current_resource.release(current_req)
|
||||
|
||||
if i + 1 < len(links):
|
||||
yield next_req # usually already fulfilled (pipeline)
|
||||
current_resource = next_resource
|
||||
current_req = next_req
|
||||
|
||||
def _xy_links(
|
||||
self,
|
||||
src: tuple[float, float],
|
||||
dst: tuple[float, float],
|
||||
) -> list[tuple[tuple, float]]:
|
||||
"""XY routing: horizontal segment first, then vertical.
|
||||
|
||||
Returns list of (link_key, dist_mm) pairs, where link_key uniquely
|
||||
identifies a directed segment shared across concurrent transactions.
|
||||
"""
|
||||
x0, y0 = src
|
||||
x1, y1 = dst
|
||||
links: list[tuple[tuple, float]] = []
|
||||
|
||||
# Horizontal segment at y≈y0
|
||||
if abs(x0 - x1) > 1e-9:
|
||||
y_band = self._snap(y0, self._y_grid)
|
||||
for xa, xb in self._segments(x0, x1, self._x_grid):
|
||||
d = abs(xb - xa)
|
||||
if d > 1e-9:
|
||||
lo, hi = (xa, xb) if xa < xb else (xb, xa)
|
||||
dir_h = "E" if xb > xa else "W"
|
||||
links.append((("H", round(y_band, 2), round(lo, 2), round(hi, 2), dir_h), d))
|
||||
|
||||
# Vertical segment at x≈x1
|
||||
if abs(y0 - y1) > 1e-9:
|
||||
x_band = self._snap(x1, self._x_grid)
|
||||
for ya, yb in self._segments(y0, y1, self._y_grid):
|
||||
d = abs(yb - ya)
|
||||
if d > 1e-9:
|
||||
lo, hi = (ya, yb) if ya < yb else (yb, ya)
|
||||
dir_v = "S" if yb > ya else "N"
|
||||
links.append((("V", round(x_band, 2), round(lo, 2), round(hi, 2), dir_v), d))
|
||||
|
||||
return links
|
||||
|
||||
@staticmethod
|
||||
def _snap(val: float, grid: list[float]) -> float:
|
||||
if not grid:
|
||||
return val
|
||||
return min(grid, key=lambda g: abs(g - val))
|
||||
|
||||
@staticmethod
|
||||
def _segments(a: float, b: float, grid: list[float]) -> list[tuple[float, float]]:
|
||||
"""Consecutive (p_i, p_{i+1}) pairs covering range [a, b] using grid waypoints."""
|
||||
if abs(a - b) < 1e-9:
|
||||
return []
|
||||
lo, hi = (a, b) if a < b else (b, a)
|
||||
pts = [lo] + [g for g in grid if lo + 1e-9 < g < hi - 1e-9] + [hi]
|
||||
pairs = [(pts[i], pts[i + 1]) for i in range(len(pts) - 1)]
|
||||
if a > b:
|
||||
pairs = [(p2, p1) for p1, p2 in reversed(pairs)]
|
||||
return pairs
|
||||
@@ -96,7 +96,7 @@ class PeDmaComponent(PeEngineBase):
|
||||
request=sub_request, path=path, step=0,
|
||||
nbytes=cmd.nbytes, done=sub_done, drain_ns=drain_ns,
|
||||
)
|
||||
# Send to next hop (path[0] is pe_dma itself, path[1] is xbar)
|
||||
# Send to next hop (path[0] is pe_dma itself, path[1] is router)
|
||||
if len(path) > 1:
|
||||
yield self.out_ports[path[1]].put(sub_txn.advance())
|
||||
# DMA channel released after issue
|
||||
|
||||
@@ -1,168 +0,0 @@
|
||||
"""Position-aware XBAR component.
|
||||
|
||||
Models crossbar latency as base_overhead_ns + internal_distance * ns_per_mm,
|
||||
where internal_distance is the Manhattan distance between the entry port
|
||||
(PE router attachment) and exit port (HBM slice logical position) within
|
||||
the crossbar matrix.
|
||||
|
||||
PE router positions come from cube_mesh.yaml (via ctx.spec["_mesh"]).
|
||||
HBM slice positions are uniformly distributed across the HBM physical width.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Generator
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import simpy
|
||||
|
||||
from kernbench.components.base import ComponentBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from kernbench.components.context import ComponentContext
|
||||
from kernbench.topology.types import Node
|
||||
|
||||
|
||||
class PositionAwareXbarComponent(ComponentBase):
|
||||
"""XBAR with position-dependent latency based on PE-to-slice distance.
|
||||
|
||||
Latency = base_overhead_ns + |entry_port_x - exit_port_x| * ns_per_mm
|
||||
|
||||
Entry/exit port X positions are determined from the transaction path:
|
||||
- PE_DMA nodes: router X from cube_mesh.yaml
|
||||
- HBM slices: uniformly distributed across HBM physical width
|
||||
- Bridge nodes: physical X from topology positions
|
||||
- NOC: resolved by scanning path for PE_DMA node
|
||||
"""
|
||||
|
||||
def __init__(self, node: Node, ctx: ComponentContext | None = None) -> None:
|
||||
super().__init__(node, ctx)
|
||||
self._base_overhead_ns = float(node.attrs.get("overhead_ns", 0.0))
|
||||
self._pe_router_xs: dict[str, float] = {}
|
||||
self._slice_xs: dict[str, float] = {}
|
||||
self._bridge_xs: dict[str, float] = {}
|
||||
self._ns_per_mm: float = 0.0
|
||||
|
||||
def start(self, env: simpy.Environment) -> None:
|
||||
self._build_position_map()
|
||||
super().start(env)
|
||||
|
||||
def run(self, env: simpy.Environment, nbytes: int) -> Generator:
|
||||
yield env.timeout(self._base_overhead_ns)
|
||||
|
||||
# ── Position map construction ─────────────────────────────────
|
||||
|
||||
def _build_position_map(self) -> None:
|
||||
if not self.ctx or not self.ctx.spec:
|
||||
return
|
||||
mesh = self.ctx.spec.get("_mesh")
|
||||
if not mesh:
|
||||
return
|
||||
|
||||
self._ns_per_mm = self.ctx.ns_per_mm
|
||||
cube_prefix = self.node.id.rsplit(".", 1)[0]
|
||||
xbar_name = self.node.id.rsplit(".", 1)[1]
|
||||
is_top = xbar_name == "xbar_top"
|
||||
xbar_key = "top" if is_top else "bottom"
|
||||
|
||||
# PE router X positions from mesh attachments
|
||||
routers_list = mesh.get("xbar", {}).get(xbar_key, {}).get("routers", [])
|
||||
for router_id in routers_list:
|
||||
router_data = mesh["routers"].get(router_id)
|
||||
if router_data is None:
|
||||
continue
|
||||
router_x = router_data["pos_mm"][0]
|
||||
for attach in router_data.get("attach", []):
|
||||
if attach.endswith(".dma"):
|
||||
pe_name = attach.split(".")[0]
|
||||
pe_dma_id = f"{cube_prefix}.{pe_name}.pe_dma"
|
||||
self._pe_router_xs[pe_dma_id] = router_x
|
||||
|
||||
# HBM slice X positions: uniformly distributed across HBM width
|
||||
cube_spec = self.ctx.spec.get("cube", {})
|
||||
cube_w = cube_spec.get("geometry", {}).get("cube_mm", {}).get("w", 17.0)
|
||||
hbm_w = cube_spec.get("geometry", {}).get("hbm_mm", {}).get("w", 9.0)
|
||||
n_slices = cube_spec.get("memory_map", {}).get("hbm_slices_per_cube", 8)
|
||||
half = n_slices // 2
|
||||
hbm_left = (cube_w - hbm_w) / 2
|
||||
|
||||
if is_top:
|
||||
slice_range = range(half)
|
||||
else:
|
||||
slice_range = range(half, n_slices)
|
||||
|
||||
n = len(list(slice_range))
|
||||
for i, sl in enumerate(slice_range):
|
||||
if n > 1:
|
||||
x = hbm_left + i * hbm_w / (n - 1)
|
||||
else:
|
||||
x = cube_w / 2
|
||||
self._slice_xs[f"{cube_prefix}.hbm_ctrl.slice{sl}"] = x
|
||||
|
||||
# Bridge X positions from topology positions
|
||||
for node_id, pos in self.ctx.positions.items():
|
||||
if node_id.startswith(cube_prefix + ".bridge.") and pos is not None:
|
||||
origin_x = self._cube_origin_x()
|
||||
self._bridge_xs[node_id] = pos[0] - origin_x
|
||||
|
||||
def _cube_origin_x(self) -> float:
|
||||
"""Compute absolute X origin of this cube."""
|
||||
parts = self.node.id.split(".")
|
||||
cube_str = [p for p in parts if p.startswith("cube")][0]
|
||||
cube_id = int(cube_str[4:])
|
||||
spec = self.ctx.spec
|
||||
sip_spec = spec.get("sip", {})
|
||||
cube_spec = spec.get("cube", {})
|
||||
mesh_w = sip_spec.get("cube_mesh", {}).get("w", 4)
|
||||
cube_w = cube_spec.get("geometry", {}).get("cube_mm", {}).get("w", 17.0)
|
||||
seam = sip_spec.get("links", {}).get("inter_cube_mesh", {}).get(
|
||||
"distance_mm_across_seam", 1.0)
|
||||
col = cube_id % mesh_w
|
||||
return col * (cube_w + seam)
|
||||
|
||||
# ── Worker override ───────────────────────────────────────────
|
||||
|
||||
def _worker(self, env: simpy.Environment) -> Generator:
|
||||
while True:
|
||||
txn: Any = yield self._inbox.get()
|
||||
env.process(self._position_aware_forward(env, txn))
|
||||
|
||||
def _position_aware_forward(
|
||||
self, env: simpy.Environment, txn: Any,
|
||||
) -> Generator:
|
||||
prev_hop = txn.path[txn.step - 1] if txn.step > 0 else None
|
||||
next_hop = txn.next_hop
|
||||
|
||||
overhead = self._base_overhead_ns
|
||||
if prev_hop and next_hop and self._ns_per_mm > 0:
|
||||
entry_x = self._get_port_x(prev_hop, txn.path)
|
||||
exit_x = self._get_port_x(next_hop, txn.path)
|
||||
if entry_x is not None and exit_x is not None:
|
||||
overhead = self._base_overhead_ns + abs(entry_x - exit_x) * self._ns_per_mm
|
||||
|
||||
yield env.timeout(overhead)
|
||||
|
||||
if next_hop:
|
||||
yield self.out_ports[next_hop].put(txn.advance())
|
||||
else:
|
||||
drain = getattr(txn, "drain_ns", 0.0)
|
||||
if drain > 0:
|
||||
yield env.timeout(drain)
|
||||
txn.done.succeed()
|
||||
|
||||
def _get_port_x(self, node_id: str, path: list[str]) -> float | None:
|
||||
"""Resolve the X position of an XBAR port from node context."""
|
||||
# Direct lookup: PE DMA
|
||||
if node_id in self._pe_router_xs:
|
||||
return self._pe_router_xs[node_id]
|
||||
# Direct lookup: HBM slice
|
||||
if node_id in self._slice_xs:
|
||||
return self._slice_xs[node_id]
|
||||
# Direct lookup: bridge
|
||||
if node_id in self._bridge_xs:
|
||||
return self._bridge_xs[node_id]
|
||||
# NOC: scan path for PE DMA node
|
||||
if "noc" in node_id:
|
||||
for p in path:
|
||||
if p in self._pe_router_xs:
|
||||
return self._pe_router_xs[p]
|
||||
return None
|
||||
@@ -81,7 +81,7 @@ class PathRouter:
|
||||
|
||||
# Edge kinds excluded from M_CPU DMA adjacency: prevents routing through
|
||||
# PE-internal pipeline nodes when computing DMA paths.
|
||||
_MCPU_DMA_EXCLUDE = {"pe_internal", "pe_to_xbar"}
|
||||
_MCPU_DMA_EXCLUDE = {"pe_internal", "pe_to_router"}
|
||||
|
||||
_UCIE_KINDS = {"ucie_internal", "ucie_conn_to_router", "router_to_ucie_conn",
|
||||
"ucie_conn_to_noc", "noc_to_ucie_conn", "ucie_mesh",
|
||||
@@ -124,9 +124,9 @@ class PathRouter:
|
||||
return self._run_dijkstra(self._adj_all, m_cpu_id, dst_hbm_id)
|
||||
|
||||
def find_memory_path(self, src: str, dst: str) -> list[str]:
|
||||
"""Direct memory path: pcie_ep → io_noc → cube → xbar → hbm_ctrl.
|
||||
"""Direct memory path: pcie_ep → io_noc → cube → router mesh → hbm_ctrl.
|
||||
|
||||
Uses _adj_mcpu_dma which excludes pe_internal and pe_to_xbar edges,
|
||||
Uses _adj_mcpu_dma which excludes pe_internal and pe_to_router edges,
|
||||
preventing routing through PE pipeline nodes.
|
||||
"""
|
||||
return self._run_dijkstra(self._adj_mcpu_dma, src, dst)
|
||||
|
||||
@@ -19,9 +19,9 @@ class GraphEngine:
|
||||
"""simpy-based discrete-event simulation engine.
|
||||
|
||||
Request routing:
|
||||
MemoryWrite/Read: pcie_ep → io_noc → cube → xbar → hbm_ctrl (m_cpu bypass)
|
||||
MemoryWrite/Read: pcie_ep → io_noc → cube → router mesh → hbm_ctrl (m_cpu bypass)
|
||||
KernelLaunch: pcie_ep → io_noc → io_cpu → io_noc → cube → m_cpu → PE
|
||||
PeDmaMsg: pe_dma → xbar → hbm_ctrl (direct probe)
|
||||
PeDmaMsg: pe_dma → router mesh → hbm_ctrl (direct probe)
|
||||
|
||||
Component implementations are DI-injectable via component_overrides (ADR-0007 D3).
|
||||
"""
|
||||
@@ -261,7 +261,7 @@ class GraphEngine:
|
||||
done.succeed()
|
||||
|
||||
def _process_memory_direct(self, key: str, request: Any, done: simpy.Event):
|
||||
"""Direct memory path: pcie_ep → io_noc → cube → xbar → hbm_ctrl.
|
||||
"""Direct memory path: pcie_ep → io_noc → cube → router mesh → hbm_ctrl.
|
||||
|
||||
MemoryWrite: data flows forward (nbytes on wires), drain at hbm_ctrl terminal.
|
||||
MemoryRead: command flows forward (nbytes=0), hbm_ctrl sends data back on
|
||||
|
||||
@@ -287,7 +287,7 @@ def _generate_probe_d2h(graph, edge_map) -> list[dict]:
|
||||
|
||||
|
||||
def _generate_probe_pe_dma(graph, edge_map) -> list[dict]:
|
||||
"""PE DMA probes: pe_dma → xbar → HBM."""
|
||||
"""PE DMA probes: pe_dma → router mesh → HBM."""
|
||||
from kernbench.policy.address.phyaddr import PhysAddr
|
||||
from kernbench.policy.routing.router import AddressResolver, PathRouter
|
||||
|
||||
|
||||
@@ -385,6 +385,55 @@ def _escape(text: str) -> str:
|
||||
return text.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
|
||||
|
||||
# ── Connector helper ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _connector_points(
|
||||
rx: float, ry: float, cx: float, cy: float
|
||||
) -> str:
|
||||
"""Return SVG polyline points for a rule-based connector.
|
||||
|
||||
Horizontal-dominant (|dx| >= |dy|): 45° → horizontal straight → 45°.
|
||||
Vertical-dominant (|dy| > |dx|): 45° → vertical straight → 45°.
|
||||
Near-equal or tiny distance: single straight line.
|
||||
"""
|
||||
dx = cx - rx
|
||||
dy = cy - ry
|
||||
adx, ady = abs(dx), abs(dy)
|
||||
|
||||
# Trivial distance → single line
|
||||
# Near-45° diagonal for short distances only (e.g. PE↔router)
|
||||
if adx + ady < 4 or (abs(adx - ady) < 4 and adx + ady < 80):
|
||||
return f"{rx:.0f},{ry:.0f} {cx:.0f},{cy:.0f}"
|
||||
|
||||
sx = 1 if dx >= 0 else -1
|
||||
sy = 1 if dy >= 0 else -1
|
||||
|
||||
if adx >= ady:
|
||||
# Horizontal-dominant: stubs handle vertical, straight is horizontal
|
||||
stub = ady / 2
|
||||
if stub < 2:
|
||||
return f"{rx:.0f},{ry:.0f} {cx:.0f},{cy:.0f}"
|
||||
r45x = rx + sx * stub
|
||||
r45y = ry + sy * stub
|
||||
c45x = cx - sx * stub
|
||||
c45y = cy - sy * stub # r45y == c45y (horizontal)
|
||||
else:
|
||||
# Vertical-dominant: stubs handle horizontal, straight is vertical
|
||||
stub = adx / 2
|
||||
if stub < 2:
|
||||
return f"{rx:.0f},{ry:.0f} {cx:.0f},{cy:.0f}"
|
||||
r45x = rx + sx * stub
|
||||
r45y = ry + sy * stub
|
||||
c45x = cx - sx * stub
|
||||
c45y = cy - sy * stub # r45x == c45x (vertical)
|
||||
|
||||
return (
|
||||
f"{rx:.0f},{ry:.0f} {r45x:.0f},{r45y:.0f} "
|
||||
f"{c45x:.0f},{c45y:.0f} {cx:.0f},{cy:.0f}"
|
||||
)
|
||||
|
||||
|
||||
# ── Cube-specific renderer ──────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -637,55 +686,39 @@ def _render_cube_view_svg(view: ViewGraph, spec: dict) -> str:
|
||||
f'text-anchor="middle" font-family="monospace" font-size="{font_sz}" '
|
||||
f'font-weight="bold" fill="{style["text"]}">{_escape(label)}</text>'
|
||||
)
|
||||
# Connector: router ─45°─ straight ─45°─ component
|
||||
# Connector: rule-based (short → 45° line, long → 45°-straight-45°)
|
||||
sc = style["stroke"]
|
||||
d = 12 # 45° stub length (px)
|
||||
|
||||
# Determine start (router edge) and end (component edge) points
|
||||
bxc = bx + blk_w / 2 # component center x
|
||||
if kind == "mcpu":
|
||||
# Router top → 45° NW stub → vertical → 45° into block bottom
|
||||
rx2, ry2 = px, py - r_size
|
||||
bxc, byc = bx + blk_w / 2, by + blk_h
|
||||
parts.append(
|
||||
f' <polyline points="'
|
||||
f'{rx2:.0f},{ry2:.0f} {rx2 - d:.0f},{ry2 - d:.0f} '
|
||||
f'{rx2 - d:.0f},{byc + d:.0f} {bxc:.0f},{byc:.0f}" '
|
||||
f'fill="none" stroke="{sc}" stroke-width="1" opacity="0.6"/>'
|
||||
)
|
||||
rx0, ry0 = px, py - r_size # router top
|
||||
cx0, cy0 = bxc, by + blk_h # component bottom
|
||||
elif kind == "sram":
|
||||
# Router bottom → 45° SW stub → vertical → 45° into block top
|
||||
rx2, ry2 = px, py + r_size
|
||||
bxc, byc = bx + blk_w / 2, by
|
||||
rx0, ry0 = px, py + r_size # router bottom
|
||||
cx0, cy0 = bxc, by # component top
|
||||
elif is_top:
|
||||
rx0, ry0 = px, py - r_size # router top
|
||||
cx0, cy0 = bx + blk_w / 2 + offset_x, by + blk_h # component bottom
|
||||
else:
|
||||
rx0, ry0 = px, py + r_size # router bottom
|
||||
cx0, cy0 = bx + blk_w / 2 + offset_x, by # component top
|
||||
|
||||
# PE/M_CPU/SRAM directly above/below router (same X):
|
||||
# single diagonal line from router center to component right edge
|
||||
if abs(cx0 - rx0) < 2 and abs(cy0 - ry0) > 4:
|
||||
cx0 = bx + blk_w - 2
|
||||
parts.append(
|
||||
f' <polyline points="'
|
||||
f'{rx2:.0f},{ry2:.0f} {rx2 - d:.0f},{ry2 + d:.0f} '
|
||||
f'{rx2 - d:.0f},{byc - d:.0f} {bxc:.0f},{byc:.0f}" '
|
||||
f'fill="none" stroke="{sc}" stroke-width="1" opacity="0.6"/>'
|
||||
f' <line x1="{rx0:.0f}" y1="{ry0:.0f}" '
|
||||
f'x2="{cx0:.0f}" y2="{cy0:.0f}" '
|
||||
f'stroke="{sc}" stroke-width="1" opacity="0.6"/>'
|
||||
)
|
||||
else:
|
||||
# PE: vertical direction
|
||||
bxc = bx + blk_w / 2 + offset_x
|
||||
if is_top:
|
||||
rx2, ry2 = px, py - r_size # router top
|
||||
byc = by + blk_h # block bottom
|
||||
# 45° stub from router, vertical, 45° into block
|
||||
sx = bxc - px # horizontal shift direction
|
||||
sd = d if sx >= 0 else -d
|
||||
parts.append(
|
||||
f' <polyline points="'
|
||||
f'{rx2:.0f},{ry2:.0f} {rx2 + sd:.0f},{ry2 - d:.0f} '
|
||||
f'{rx2 + sd:.0f},{byc + d:.0f} {bxc:.0f},{byc:.0f}" '
|
||||
f'fill="none" stroke="{sc}" stroke-width="1" opacity="0.6"/>'
|
||||
)
|
||||
else:
|
||||
rx2, ry2 = px, py + r_size # router bottom
|
||||
byc = by # block top
|
||||
sx = bxc - px
|
||||
sd = d if sx >= 0 else -d
|
||||
parts.append(
|
||||
f' <polyline points="'
|
||||
f'{rx2:.0f},{ry2:.0f} {rx2 + sd:.0f},{ry2 + d:.0f} '
|
||||
f'{rx2 + sd:.0f},{byc - d:.0f} {bxc:.0f},{byc:.0f}" '
|
||||
f'fill="none" stroke="{sc}" stroke-width="1" opacity="0.6"/>'
|
||||
)
|
||||
pts = _connector_points(rx0, ry0, cx0, cy0)
|
||||
parts.append(
|
||||
f' <polyline points="{pts}" '
|
||||
f'fill="none" stroke="{sc}" stroke-width="1" opacity="0.6"/>'
|
||||
)
|
||||
|
||||
# (PE→HBM BW annotation drawn in the PE→HBM port group section above)
|
||||
|
||||
@@ -705,26 +738,13 @@ def _render_cube_view_svg(view: ViewGraph, spec: dict) -> str:
|
||||
rpx, rpy = mm2px(rx, ry)
|
||||
tgx, tgy = _pe_hbm_targets[pe_id]
|
||||
r_edge_y = rpy + r_size if rpy < hbm_y else rpy - r_size
|
||||
# 45° stub from router → vertical → 45° into HBM port
|
||||
d = 12 # stub length
|
||||
sx = tgx - rpx
|
||||
sd = d if sx >= 0 else -d
|
||||
if rpy < hbm_y:
|
||||
parts.append(
|
||||
f' <polyline points="'
|
||||
f'{rpx:.0f},{r_edge_y:.0f} {rpx + sd:.0f},{r_edge_y + d:.0f} '
|
||||
f'{rpx + sd:.0f},{tgy - d:.0f} {tgx:.0f},{tgy:.0f}" '
|
||||
f'fill="none" stroke="#10b981" stroke-width="1.5" opacity="0.6" '
|
||||
f'stroke-dasharray="4,3"/>'
|
||||
)
|
||||
else:
|
||||
parts.append(
|
||||
f' <polyline points="'
|
||||
f'{rpx:.0f},{r_edge_y:.0f} {rpx + sd:.0f},{r_edge_y - d:.0f} '
|
||||
f'{rpx + sd:.0f},{tgy + d:.0f} {tgx:.0f},{tgy:.0f}" '
|
||||
f'fill="none" stroke="#10b981" stroke-width="1.5" opacity="0.6" '
|
||||
f'stroke-dasharray="4,3"/>'
|
||||
)
|
||||
# Rule-based connector: router → HBM port group
|
||||
pts = _connector_points(rpx, r_edge_y, tgx, tgy)
|
||||
parts.append(
|
||||
f' <polyline points="{pts}" '
|
||||
f'fill="none" stroke="#10b981" stroke-width="1.5" opacity="0.6" '
|
||||
f'stroke-dasharray="4,3"/>'
|
||||
)
|
||||
# BW annotation at midpoint
|
||||
mx = (rpx + tgx) / 2 + 10
|
||||
my = (r_edge_y + tgy) / 2
|
||||
@@ -818,53 +838,27 @@ def _render_cube_view_svg(view: ViewGraph, spec: dict) -> str:
|
||||
f'{conn}</text>'
|
||||
)
|
||||
|
||||
# Connector: router ─45°stub─ straight ─45°stub─ UCIe port
|
||||
# Connector: rule-based router → UCIe port
|
||||
rpx, rpy = mm2px(crx, cry)
|
||||
d = 10
|
||||
if direction == "N":
|
||||
rx, ry = rpx, rpy - r_size
|
||||
tx, ty = lx, cy_box + ch
|
||||
sx = tx - rx
|
||||
sd = d if sx >= 0 else -d
|
||||
parts.append(
|
||||
f' <polyline points="'
|
||||
f'{rx:.0f},{ry:.0f} {rx + sd:.0f},{ry - d:.0f} '
|
||||
f'{rx + sd:.0f},{ty + d:.0f} {tx:.0f},{ty:.0f}" '
|
||||
f'fill="none" stroke="{c_color}" stroke-width="1" opacity="0.5"/>'
|
||||
)
|
||||
elif direction == "S":
|
||||
rx, ry = rpx, rpy + r_size
|
||||
tx, ty = lx, cy_box
|
||||
sx = tx - rx
|
||||
sd = d if sx >= 0 else -d
|
||||
parts.append(
|
||||
f' <polyline points="'
|
||||
f'{rx:.0f},{ry:.0f} {rx + sd:.0f},{ry + d:.0f} '
|
||||
f'{rx + sd:.0f},{ty - d:.0f} {tx:.0f},{ty:.0f}" '
|
||||
f'fill="none" stroke="{c_color}" stroke-width="1" opacity="0.5"/>'
|
||||
)
|
||||
elif direction == "W":
|
||||
rx, ry = rpx - r_size, rpy
|
||||
tx, ty = cx + cw, cy_box + ch / 2
|
||||
sy = ty - ry
|
||||
sd = d if sy >= 0 else -d
|
||||
parts.append(
|
||||
f' <polyline points="'
|
||||
f'{rx:.0f},{ry:.0f} {rx - d:.0f},{ry + sd:.0f} '
|
||||
f'{tx + d:.0f},{ry + sd:.0f} {tx:.0f},{ty:.0f}" '
|
||||
f'fill="none" stroke="{c_color}" stroke-width="1" opacity="0.5"/>'
|
||||
)
|
||||
elif direction == "E":
|
||||
rx, ry = rpx + r_size, rpy
|
||||
tx, ty = cx, cy_box + ch / 2
|
||||
sy = ty - ry
|
||||
sd = d if sy >= 0 else -d
|
||||
parts.append(
|
||||
f' <polyline points="'
|
||||
f'{rx:.0f},{ry:.0f} {rx + d:.0f},{ry + sd:.0f} '
|
||||
f'{tx - d:.0f},{ry + sd:.0f} {tx:.0f},{ty:.0f}" '
|
||||
f'fill="none" stroke="{c_color}" stroke-width="1" opacity="0.5"/>'
|
||||
)
|
||||
else:
|
||||
continue
|
||||
pts = _connector_points(rx, ry, tx, ty)
|
||||
parts.append(
|
||||
f' <polyline points="{pts}" '
|
||||
f'fill="none" stroke="{c_color}" stroke-width="1" opacity="0.5"/>'
|
||||
)
|
||||
|
||||
# ── Legend ──
|
||||
ly = h_px - 35
|
||||
|
||||
Reference in New Issue
Block a user