diff --git a/docs/diagrams/cube_view.svg b/docs/diagrams/cube_view.svg
index 1900de0..a51b065 100644
--- a/docs/diagrams/cube_view.svg
+++ b/docs/diagrams/cube_view.svg
@@ -7,71 +7,78 @@
HBM_CTRL | 64 pseudo channels
Total BW: 2048 GB/s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 64 ports | 8 per PE (color-coded)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ PE0×8ch
+ PE1×8ch
+ PE2×8ch
+ PE3×8ch
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ PE4×8ch
+ PE5×8ch
+ PE6×8ch
+ PE7×8ch
@@ -129,44 +136,42 @@
PE0
-
- UCIe-W.c0
-
-
- UCIe-N.c0
-
- 256GB/s
+
+ UCIe-W.c0
+
+
+ UCIe-N.c0
+
r0c1
-
- PE1
-
-
- UCIe-N.c1
-
- 256GB/s
+
+ PE1
+
+
+ UCIe-N.c1
+
r0c2
r0c3
r0c4
-
- UCIe-N.c2
-
+
+ UCIe-N.c2
+
r0c5
-
- UCIe-E.c0
-
-
- UCIe-N.c3
-
+
+ UCIe-E.c0
+
+
+ UCIe-N.c3
+
r1c0
-
- UCIe-W.c1
-
+
+ UCIe-W.c1
+
r1c1
@@ -175,23 +180,21 @@
r1c3
r1c4
-
- PE2
+
+ PE2
- 256GB/s
r1c5
-
- PE3
-
-
- UCIe-E.c1
-
- 256GB/s
+
+ PE3
+
+
+ UCIe-E.c1
+
r2c0
-
- M_CPU
+
+ M_CPU
r2c1
@@ -201,8 +204,8 @@
r2c5
r3c0
-
- SRAM
+
+ SRAM
r3c1
@@ -212,19 +215,17 @@
r3c5
r4c0
-
- PE4
-
-
- UCIe-W.c2
-
- 256GB/s
+
+ PE4
+
+
+ UCIe-W.c2
+
r4c1
-
- PE5
+
+ PE5
- 256GB/s
r4c2
@@ -233,79 +234,61 @@
r4c4
r4c5
-
- UCIe-E.c2
-
+
+ UCIe-E.c2
+
r5c0
-
- UCIe-W.c3
-
-
- UCIe-S.c0
-
+
+ UCIe-W.c3
+
+
+ UCIe-S.c0
+
r5c1
-
- UCIe-S.c1
-
+
+ UCIe-S.c1
+
r5c2
r5c3
r5c4
-
- PE6
-
-
- UCIe-S.c2
-
- 256GB/s
+
+ PE6
+
+
+ UCIe-S.c2
+
r5c5
-
- PE7
-
-
- UCIe-E.c3
-
-
- UCIe-S.c3
-
- 256GB/s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ PE7
+
+
+ UCIe-E.c3
+
+
+ UCIe-S.c3
+
+
+ 256GB/s
+
+ 256GB/s
+
+ 256GB/s
+
+ 256GB/s
+
+ 256GB/s
+
+ 256GB/s
+
+ 256GB/s
+
+ 256GB/s
PE Router
diff --git a/src/kernbench/topology/visualizer.py b/src/kernbench/topology/visualizer.py
index e61a4fe..e1df01d 100644
--- a/src/kernbench/topology/visualizer.py
+++ b/src/kernbench/topology/visualizer.py
@@ -468,29 +468,52 @@ def _render_cube_view_svg(view: ViewGraph, spec: dict) -> str:
f'Total BW: {total_ch * channel_bw:.0f} GB/s'
)
- # ── Pseudo channel port indicators (horizontal bar inside HBM zone) ──
- port_bar_y = hcy + 15
- port_bar_w = 8.0 * scale # slightly narrower than HBM zone
- port_bar_x = hcx - port_bar_w / 2
- port_w = port_bar_w / total_ch
- for i in range(total_ch):
- pe_owner = i // channels_per_pe
- # Color by PE owner
- colors = ["#3b82f6", "#60a5fa", "#8b5cf6", "#a78bfa",
- "#f59e0b", "#fbbf24", "#ef4444", "#f87171"]
- c = colors[pe_owner % len(colors)]
- px = port_bar_x + i * port_w
- parts.append(
- f' '
- )
- # Port bar label
- parts.append(
- f' '
- f'{total_ch} ports | {channels_per_pe} per PE (color-coded)'
- )
+ # ── Pseudo channel ports on HBM top/bottom edges ──
+ # Top edge: 32 ports (PE0..PE3, 8 each), Bottom edge: 32 ports (PE4..PE7)
+ half_ch = total_ch // 2
+ pes_per_half = half_ch // channels_per_pe # 4 PEs per half
+ port_bar_w = hbm_w - 20 # slightly narrower than HBM zone
+ port_w = port_bar_w / half_ch
+ port_h = 8
+ pe_colors = ["#3b82f6", "#60a5fa", "#8b5cf6", "#a78bfa",
+ "#f59e0b", "#fbbf24", "#ef4444", "#f87171"]
+
+ for half_idx, (edge_y, pe_start) in enumerate([
+ (hbm_y + 4, 0), # top edge, PE0-PE3
+ (hbm_y + hbm_h - port_h - 4, pes_per_half), # bottom edge, PE4-PE7
+ ]):
+ bar_x = hbm_x + 10
+ for i in range(half_ch):
+ pe_owner = pe_start + i // channels_per_pe
+ c = pe_colors[pe_owner % len(pe_colors)]
+ px = bar_x + i * port_w
+ parts.append(
+ f' '
+ )
+ # Per-PE group labels
+ for p in range(pes_per_half):
+ gx = bar_x + (p * channels_per_pe + channels_per_pe / 2) * port_w
+ label_y = edge_y - 3 if half_idx == 0 else edge_y + port_h + 8
+ parts.append(
+ f' '
+ f'PE{pe_start + p}×{channels_per_pe}ch'
+ )
+
+ # Store port group centers for PE→HBM connection lines (used later)
+ _pe_hbm_targets: dict[int, tuple[float, float]] = {}
+ for half_idx, (edge_y, pe_start) in enumerate([
+ (hbm_y + 4, 0),
+ (hbm_y + hbm_h - port_h - 4, pes_per_half),
+ ]):
+ bar_x = hbm_x + 10
+ for p in range(pes_per_half):
+ pe_id = pe_start + p
+ gx = bar_x + (p * channels_per_pe + channels_per_pe / 2) * port_w
+ gy = edge_y if half_idx == 0 else edge_y + port_h
+ _pe_hbm_targets[pe_id] = (gx, gy)
# ── Router mesh links ──
for r in range(n_rows):
@@ -589,20 +612,27 @@ def _render_cube_view_svg(view: ViewGraph, spec: dict) -> str:
offset_x = (bi - (len(blocks) - 1) / 2) * (blk_w + 4)
if kind == "ucie":
- # UCIe: place at cube edge direction
+ # UCIe: place flush against cube edge at router position
direction = label.split("-")[1].split(".")[0] if "-" in label else ""
+ ucie_w, ucie_h = 22, 10 # smaller blocks for UCIe ports
if direction == "N":
- bx, by = px + offset_x - blk_w / 2, pad - blk_h - 4
+ bx = px - ucie_w / 2
+ by = pad - ucie_h # flush against top edge
+ blk_w, blk_h = ucie_w, ucie_h
elif direction == "S":
- by_base = pad + cube_h * scale
- bx, by = px + offset_x - blk_w / 2, by_base + 4
+ bx = px - ucie_w / 2
+ by = pad + cube_h * scale # flush against bottom edge
+ blk_w, blk_h = ucie_w, ucie_h
elif direction == "W":
- bx, by = pad - blk_w - 4, py + offset_x - blk_h / 2
+ bx = pad - ucie_w # flush against left edge
+ by = py - ucie_h / 2
+ blk_w, blk_h = ucie_w, ucie_h
elif direction == "E":
- bx_base = pad + cube_w * scale
- bx, by = bx_base + 4, py + offset_x - blk_h / 2
+ bx = pad + cube_w * scale # flush against right edge
+ by = py - ucie_h / 2
+ blk_w, blk_h = ucie_w, ucie_h
else:
- bx, by = px + offset_x - blk_w / 2, py - r_size - blk_h - 4
+ bx, by = px - blk_w / 2, py - r_size - blk_h - 4
elif kind in ("mcpu", "sram"):
# M_CPU/SRAM: place to the left of router (avoid mesh overlap)
bx = px - r_size - blk_w - 6
@@ -672,40 +702,38 @@ def _render_cube_view_svg(view: ViewGraph, spec: dict) -> str:
f'stroke="{style["stroke"]}" stroke-width="1" opacity="0.6"/>'
)
- # ── PE router → HBM BW annotation ──
- if pe_items:
- pe_hbm_edge = hbm_y if py < hbm_y else hbm_y + hbm_h
- pe_r_edge = py + r_size if py < hbm_y else py - r_size
- bw_x = px + 14
- bw_y = (pe_r_edge + pe_hbm_edge) / 2
- parts.append(
- f' '
- f'{agg_bw:.0f}GB/s'
- )
+ # (PE→HBM BW annotation drawn in the PE→HBM port group section above)
- # ── Router → HBM_CTRL lines (drawn last, on top of everything) ──
- # Lines go from router to the HBM zone edge, angled toward HBM center
- # to visually distinguish from vertical mesh links
+ # ── PE Router → HBM pseudo channel port group lines ──
+ # Each PE router connects to its port group center on the HBM edge
for rkey, rval in routers.items():
if rval is None:
continue
+ attach = rval.get("attach", [])
+ pe_dma_items = [a for a in attach if a.endswith(".dma")]
+ if not pe_dma_items:
+ continue
+ pe_id = int(pe_dma_items[0].split(".")[0].replace("pe", ""))
+ if pe_id not in _pe_hbm_targets:
+ continue
rx, ry = rval["pos_mm"]
- px, py = mm2px(rx, ry)
- hbm_edge_y = hbm_y if py < hbm_y else hbm_y + hbm_h
- r_edge_y = py + r_size if py < hbm_y else py - r_size
- if abs(r_edge_y - hbm_edge_y) > 10:
- has_pe = any(a.endswith(".dma") for a in rval.get("attach", []))
- sw = "1.5" if has_pe else "0.7"
- op = "0.6" if has_pe else "0.15"
- # Angle toward HBM center x (hcx) — slight offset, not fully straight
- dx = (hcx - px) * 0.3 # 30% pull toward center
- parts.append(
- f' '
- )
+ rpx, rpy = mm2px(rx, ry)
+ tgx, tgy = _pe_hbm_targets[pe_id]
+ r_edge_y = rpy + r_size if rpy < hbm_y else rpy - r_size
+ parts.append(
+ f' '
+ )
+ # BW annotation at midpoint
+ mx = (rpx + tgx) / 2 + 10
+ my = (r_edge_y + tgy) / 2
+ parts.append(
+ f' '
+ f'{agg_bw:.0f}GB/s'
+ )
# ── Legend ──
ly = h_px - 35