Remove xbar/noc remnants, rule-based cube-view connectors

- Delete xbar.py and noc.py (TwoDMeshNocComponent) — unused since router mesh
- Remove xbar_v1/noc_2d_mesh_v1 from components.yaml
- Fix pe_to_xbar → pe_to_router in routing exclusion set
- Fix xbar_to_hbm_bw_gbs → hbm_to_router_bw_gbs in report.py
- Update all docstrings/comments referencing xbar/bridge → router mesh
- Cube-view connectors: rule-based _connector_points helper
  - PE↔router: single diagonal line (not chevron)
  - UCIe N/S: 45°→horizontal→45°
  - UCIe E/W: 45°→vertical→45°
  - HBM ports: 45°→horizontal→45°

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-06 23:59:12 -07:00
parent 7640635f90
commit eb792e6212
17 changed files with 163 additions and 571 deletions
-224
View File
@@ -1,224 +0,0 @@
from __future__ import annotations
from collections.abc import Generator
from typing import TYPE_CHECKING, Any
import simpy
from kernbench.components.base import ComponentBase
if TYPE_CHECKING:
from kernbench.components.context import ComponentContext
from kernbench.topology.types import Node
class TwoDMeshNocComponent(ComponentBase):
"""2D mesh NOC modeled as a single smart node.
Latency model:
- Traversal latency = Manhattan distance between prev_hop and next_hop
node positions, split into XY segments, traversed with pipeline.
- overhead_ns (from node.attrs) is added once per traversal.
Contention model:
- Each directed XY segment is a simpy.Resource(capacity=1).
- Pipeline: next segment's resource is requested before the current
segment's timeout completes, so a free downstream segment is acquired
immediately (wormhole-style cut-through).
- Two transactions sharing a segment (same row or column band) contend.
Concurrency:
- _worker spawns an independent SimPy process per transaction, so the
NOC is never serialized at the node level — only at segment resources.
"""
def __init__(self, node: Node, ctx: ComponentContext | None = None) -> None:
super().__init__(node, ctx)
self._env: simpy.Environment | None = None
self._links: dict[tuple, simpy.Resource] = {}
self._x_grid: list[float] = []
self._y_grid: list[float] = []
def start(self, env: simpy.Environment) -> None:
self._env = env
self._build_grid()
super().start(env)
def run(self, env: simpy.Environment, nbytes: int) -> Generator:
yield env.timeout(0)
# ── Grid construction ────────────────────────────────────────────
def _build_grid(self) -> None:
if not self.ctx:
return
mesh = self.ctx.spec.get("_mesh") if self.ctx.spec else None
if mesh:
self._build_grid_from_mesh(mesh)
else:
self._build_grid_from_positions()
def _build_grid_from_mesh(self, mesh: dict) -> None:
"""Build XY grid from cube_mesh.yaml router positions (authoritative)."""
origin_x, origin_y = self._cube_origin()
xs: set[float] = set()
ys: set[float] = set()
for key, router in mesh.get("routers", {}).items():
if router is not None:
xs.add(round(origin_x + router["pos_mm"][0], 2))
ys.add(round(origin_y + router["pos_mm"][1], 2))
self._x_grid = sorted(xs)
self._y_grid = sorted(ys)
def _build_grid_from_positions(self) -> None:
"""Fallback: infer grid from all node positions in the cube."""
cube_prefix = self.node.id.rsplit(".", 1)[0]
xs: set[float] = set()
ys: set[float] = set()
for node_id, pos in self.ctx.positions.items():
if node_id.startswith(cube_prefix + ".") and pos is not None:
xs.add(round(pos[0], 2))
ys.add(round(pos[1], 2))
self._x_grid = sorted(xs)
self._y_grid = sorted(ys)
def _cube_origin(self) -> tuple[float, float]:
"""Compute absolute origin (top-left) of this cube from cube_id."""
parts = self.node.id.split(".")
cube_str = [p for p in parts if p.startswith("cube")][0]
cube_id = int(cube_str[4:])
spec = self.ctx.spec
sip_spec = spec.get("sip", {})
cube_spec = spec.get("cube", {})
mesh_w = sip_spec.get("cube_mesh", {}).get("w", 4)
cube_w = cube_spec.get("geometry", {}).get("cube_mm", {}).get("w", 17.0)
cube_h = cube_spec.get("geometry", {}).get("cube_mm", {}).get("h", 14.0)
seam = sip_spec.get("links", {}).get("inter_cube_mesh", {}).get(
"distance_mm_across_seam", 1.0)
col = cube_id % mesh_w
row = cube_id // mesh_w
return (col * (cube_w + seam), row * (cube_h + seam))
def _get_link(self, key: tuple) -> simpy.Resource:
if key not in self._links:
assert self._env is not None
self._links[key] = simpy.Resource(self._env, capacity=1)
return self._links[key]
# ── Worker ───────────────────────────────────────────────────────
def _worker(self, env: simpy.Environment) -> Generator:
while True:
txn: Any = yield self._inbox.get()
env.process(self._route(env, txn))
def _route(self, env: simpy.Environment, txn: Any) -> Generator:
prev_hop = txn.path[txn.step - 1] if txn.step > 0 else None
next_hop = txn.next_hop
overhead_ns = float(self.node.attrs.get("overhead_ns", 0.0))
links: list[tuple[tuple, float]] = []
if prev_hop and next_hop and self.ctx:
src_pos = self.ctx.positions.get(prev_hop)
dst_pos = self.ctx.positions.get(next_hop)
if src_pos and dst_pos:
links = self._xy_links(src_pos, dst_pos)
if links:
yield from self._traverse(env, links, overhead_ns)
else:
yield env.timeout(overhead_ns)
if next_hop:
yield self.out_ports[next_hop].put(txn.advance())
else:
drain = getattr(txn, "drain_ns", 0.0)
if drain > 0:
yield env.timeout(drain)
txn.done.succeed()
# ── XY routing and pipelined link traversal ──────────────────────
def _traverse(
self,
env: simpy.Environment,
links: list[tuple[tuple, float]],
overhead_ns: float,
) -> Generator:
"""Pipeline: request next segment before current timeout finishes."""
ns_per_mm = self.ctx.ns_per_mm # type: ignore[union-attr]
# Acquire first link
first_key, _ = links[0]
current_resource = self._get_link(first_key)
current_req = current_resource.request()
yield current_req
for i, (_, dist_mm) in enumerate(links):
# Request next link before current timeout (pipeline)
if i + 1 < len(links):
next_key, _ = links[i + 1]
next_resource = self._get_link(next_key)
next_req = next_resource.request()
yield env.timeout(dist_mm * ns_per_mm + (overhead_ns if i == 0 else 0.0))
current_resource.release(current_req)
if i + 1 < len(links):
yield next_req # usually already fulfilled (pipeline)
current_resource = next_resource
current_req = next_req
def _xy_links(
self,
src: tuple[float, float],
dst: tuple[float, float],
) -> list[tuple[tuple, float]]:
"""XY routing: horizontal segment first, then vertical.
Returns list of (link_key, dist_mm) pairs, where link_key uniquely
identifies a directed segment shared across concurrent transactions.
"""
x0, y0 = src
x1, y1 = dst
links: list[tuple[tuple, float]] = []
# Horizontal segment at y≈y0
if abs(x0 - x1) > 1e-9:
y_band = self._snap(y0, self._y_grid)
for xa, xb in self._segments(x0, x1, self._x_grid):
d = abs(xb - xa)
if d > 1e-9:
lo, hi = (xa, xb) if xa < xb else (xb, xa)
dir_h = "E" if xb > xa else "W"
links.append((("H", round(y_band, 2), round(lo, 2), round(hi, 2), dir_h), d))
# Vertical segment at x≈x1
if abs(y0 - y1) > 1e-9:
x_band = self._snap(x1, self._x_grid)
for ya, yb in self._segments(y0, y1, self._y_grid):
d = abs(yb - ya)
if d > 1e-9:
lo, hi = (ya, yb) if ya < yb else (yb, ya)
dir_v = "S" if yb > ya else "N"
links.append((("V", round(x_band, 2), round(lo, 2), round(hi, 2), dir_v), d))
return links
@staticmethod
def _snap(val: float, grid: list[float]) -> float:
if not grid:
return val
return min(grid, key=lambda g: abs(g - val))
@staticmethod
def _segments(a: float, b: float, grid: list[float]) -> list[tuple[float, float]]:
"""Consecutive (p_i, p_{i+1}) pairs covering range [a, b] using grid waypoints."""
if abs(a - b) < 1e-9:
return []
lo, hi = (a, b) if a < b else (b, a)
pts = [lo] + [g for g in grid if lo + 1e-9 < g < hi - 1e-9] + [hi]
pairs = [(pts[i], pts[i + 1]) for i in range(len(pts) - 1)]
if a > b:
pairs = [(p2, p1) for p1, p2 in reversed(pairs)]
return pairs
+1 -1
View File
@@ -96,7 +96,7 @@ class PeDmaComponent(PeEngineBase):
request=sub_request, path=path, step=0,
nbytes=cmd.nbytes, done=sub_done, drain_ns=drain_ns,
)
# Send to next hop (path[0] is pe_dma itself, path[1] is xbar)
# Send to next hop (path[0] is pe_dma itself, path[1] is router)
if len(path) > 1:
yield self.out_ports[path[1]].put(sub_txn.advance())
# DMA channel released after issue
-168
View File
@@ -1,168 +0,0 @@
"""Position-aware XBAR component.
Models crossbar latency as base_overhead_ns + internal_distance * ns_per_mm,
where internal_distance is the Manhattan distance between the entry port
(PE router attachment) and exit port (HBM slice logical position) within
the crossbar matrix.
PE router positions come from cube_mesh.yaml (via ctx.spec["_mesh"]).
HBM slice positions are uniformly distributed across the HBM physical width.
"""
from __future__ import annotations
from collections.abc import Generator
from typing import TYPE_CHECKING, Any
import simpy
from kernbench.components.base import ComponentBase
if TYPE_CHECKING:
from kernbench.components.context import ComponentContext
from kernbench.topology.types import Node
class PositionAwareXbarComponent(ComponentBase):
"""XBAR with position-dependent latency based on PE-to-slice distance.
Latency = base_overhead_ns + |entry_port_x - exit_port_x| * ns_per_mm
Entry/exit port X positions are determined from the transaction path:
- PE_DMA nodes: router X from cube_mesh.yaml
- HBM slices: uniformly distributed across HBM physical width
- Bridge nodes: physical X from topology positions
- NOC: resolved by scanning path for PE_DMA node
"""
def __init__(self, node: Node, ctx: ComponentContext | None = None) -> None:
super().__init__(node, ctx)
self._base_overhead_ns = float(node.attrs.get("overhead_ns", 0.0))
self._pe_router_xs: dict[str, float] = {}
self._slice_xs: dict[str, float] = {}
self._bridge_xs: dict[str, float] = {}
self._ns_per_mm: float = 0.0
def start(self, env: simpy.Environment) -> None:
self._build_position_map()
super().start(env)
def run(self, env: simpy.Environment, nbytes: int) -> Generator:
yield env.timeout(self._base_overhead_ns)
# ── Position map construction ─────────────────────────────────
def _build_position_map(self) -> None:
if not self.ctx or not self.ctx.spec:
return
mesh = self.ctx.spec.get("_mesh")
if not mesh:
return
self._ns_per_mm = self.ctx.ns_per_mm
cube_prefix = self.node.id.rsplit(".", 1)[0]
xbar_name = self.node.id.rsplit(".", 1)[1]
is_top = xbar_name == "xbar_top"
xbar_key = "top" if is_top else "bottom"
# PE router X positions from mesh attachments
routers_list = mesh.get("xbar", {}).get(xbar_key, {}).get("routers", [])
for router_id in routers_list:
router_data = mesh["routers"].get(router_id)
if router_data is None:
continue
router_x = router_data["pos_mm"][0]
for attach in router_data.get("attach", []):
if attach.endswith(".dma"):
pe_name = attach.split(".")[0]
pe_dma_id = f"{cube_prefix}.{pe_name}.pe_dma"
self._pe_router_xs[pe_dma_id] = router_x
# HBM slice X positions: uniformly distributed across HBM width
cube_spec = self.ctx.spec.get("cube", {})
cube_w = cube_spec.get("geometry", {}).get("cube_mm", {}).get("w", 17.0)
hbm_w = cube_spec.get("geometry", {}).get("hbm_mm", {}).get("w", 9.0)
n_slices = cube_spec.get("memory_map", {}).get("hbm_slices_per_cube", 8)
half = n_slices // 2
hbm_left = (cube_w - hbm_w) / 2
if is_top:
slice_range = range(half)
else:
slice_range = range(half, n_slices)
n = len(list(slice_range))
for i, sl in enumerate(slice_range):
if n > 1:
x = hbm_left + i * hbm_w / (n - 1)
else:
x = cube_w / 2
self._slice_xs[f"{cube_prefix}.hbm_ctrl.slice{sl}"] = x
# Bridge X positions from topology positions
for node_id, pos in self.ctx.positions.items():
if node_id.startswith(cube_prefix + ".bridge.") and pos is not None:
origin_x = self._cube_origin_x()
self._bridge_xs[node_id] = pos[0] - origin_x
def _cube_origin_x(self) -> float:
"""Compute absolute X origin of this cube."""
parts = self.node.id.split(".")
cube_str = [p for p in parts if p.startswith("cube")][0]
cube_id = int(cube_str[4:])
spec = self.ctx.spec
sip_spec = spec.get("sip", {})
cube_spec = spec.get("cube", {})
mesh_w = sip_spec.get("cube_mesh", {}).get("w", 4)
cube_w = cube_spec.get("geometry", {}).get("cube_mm", {}).get("w", 17.0)
seam = sip_spec.get("links", {}).get("inter_cube_mesh", {}).get(
"distance_mm_across_seam", 1.0)
col = cube_id % mesh_w
return col * (cube_w + seam)
# ── Worker override ───────────────────────────────────────────
def _worker(self, env: simpy.Environment) -> Generator:
while True:
txn: Any = yield self._inbox.get()
env.process(self._position_aware_forward(env, txn))
def _position_aware_forward(
self, env: simpy.Environment, txn: Any,
) -> Generator:
prev_hop = txn.path[txn.step - 1] if txn.step > 0 else None
next_hop = txn.next_hop
overhead = self._base_overhead_ns
if prev_hop and next_hop and self._ns_per_mm > 0:
entry_x = self._get_port_x(prev_hop, txn.path)
exit_x = self._get_port_x(next_hop, txn.path)
if entry_x is not None and exit_x is not None:
overhead = self._base_overhead_ns + abs(entry_x - exit_x) * self._ns_per_mm
yield env.timeout(overhead)
if next_hop:
yield self.out_ports[next_hop].put(txn.advance())
else:
drain = getattr(txn, "drain_ns", 0.0)
if drain > 0:
yield env.timeout(drain)
txn.done.succeed()
def _get_port_x(self, node_id: str, path: list[str]) -> float | None:
"""Resolve the X position of an XBAR port from node context."""
# Direct lookup: PE DMA
if node_id in self._pe_router_xs:
return self._pe_router_xs[node_id]
# Direct lookup: HBM slice
if node_id in self._slice_xs:
return self._slice_xs[node_id]
# Direct lookup: bridge
if node_id in self._bridge_xs:
return self._bridge_xs[node_id]
# NOC: scan path for PE DMA node
if "noc" in node_id:
for p in path:
if p in self._pe_router_xs:
return self._pe_router_xs[p]
return None