diff --git a/docs/diagrams/cube_view.svg b/docs/diagrams/cube_view.svg index 1900de0..a51b065 100644 --- a/docs/diagrams/cube_view.svg +++ b/docs/diagrams/cube_view.svg @@ -7,71 +7,78 @@ HBM_CTRL | 64 pseudo channels Total BW: 2048 GB/s - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 64 ports | 8 per PE (color-coded) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PE0×8ch + PE1×8ch + PE2×8ch + PE3×8ch + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PE4×8ch + PE5×8ch + PE6×8ch + PE7×8ch @@ -129,44 +136,42 @@ PE0 - - UCIe-W.c0 - - - UCIe-N.c0 - - 256GB/s + + UCIe-W.c0 + + + UCIe-N.c0 + r0c1 - - PE1 - - - UCIe-N.c1 - - 256GB/s + + PE1 + + + UCIe-N.c1 + r0c2 r0c3 r0c4 - - UCIe-N.c2 - + + UCIe-N.c2 + r0c5 - - UCIe-E.c0 - - - UCIe-N.c3 - + + UCIe-E.c0 + + + UCIe-N.c3 + r1c0 - - UCIe-W.c1 - + + UCIe-W.c1 + r1c1 @@ -175,23 +180,21 @@ r1c3 r1c4 - - PE2 + + PE2 - 256GB/s r1c5 - - PE3 - - - UCIe-E.c1 - - 256GB/s + + PE3 + + + UCIe-E.c1 + r2c0 - - M_CPU + + M_CPU r2c1 @@ -201,8 +204,8 @@ r2c5 r3c0 - - SRAM + + SRAM r3c1 @@ -212,19 +215,17 @@ r3c5 r4c0 - - PE4 - - - UCIe-W.c2 - - 256GB/s + + PE4 + + + UCIe-W.c2 + r4c1 - - PE5 + + PE5 - 256GB/s r4c2 @@ -233,79 +234,61 @@ r4c4 r4c5 - - UCIe-E.c2 - + + UCIe-E.c2 + r5c0 - - UCIe-W.c3 - - - UCIe-S.c0 - + + UCIe-W.c3 + + + UCIe-S.c0 + r5c1 - - UCIe-S.c1 - + + UCIe-S.c1 + r5c2 r5c3 r5c4 - - PE6 - - - UCIe-S.c2 - - 256GB/s + + PE6 + + + UCIe-S.c2 + r5c5 - - PE7 - - - UCIe-E.c3 - - - UCIe-S.c3 - - 256GB/s - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + PE7 + + + UCIe-E.c3 + + + UCIe-S.c3 + + + 256GB/s + + 256GB/s + + 256GB/s + + 256GB/s + + 256GB/s + + 256GB/s + + 256GB/s + + 256GB/s PE Router diff --git a/src/kernbench/topology/visualizer.py b/src/kernbench/topology/visualizer.py index e61a4fe..e1df01d 100644 --- a/src/kernbench/topology/visualizer.py +++ b/src/kernbench/topology/visualizer.py @@ -468,29 +468,52 @@ def _render_cube_view_svg(view: ViewGraph, spec: dict) -> str: f'Total BW: {total_ch * channel_bw:.0f} GB/s' ) - # ── Pseudo channel port indicators (horizontal bar inside HBM zone) ── - port_bar_y = hcy + 15 - port_bar_w = 8.0 * scale # slightly narrower than HBM zone - port_bar_x = hcx - port_bar_w / 2 - port_w = port_bar_w / total_ch - for i in range(total_ch): - pe_owner = i // channels_per_pe - # Color by PE owner - colors = ["#3b82f6", "#60a5fa", "#8b5cf6", "#a78bfa", - "#f59e0b", "#fbbf24", "#ef4444", "#f87171"] - c = colors[pe_owner % len(colors)] - px = port_bar_x + i * port_w - parts.append( - f' ' - ) - # Port bar label - parts.append( - f' ' - f'{total_ch} ports | {channels_per_pe} per PE (color-coded)' - ) + # ── Pseudo channel ports on HBM top/bottom edges ── + # Top edge: 32 ports (PE0..PE3, 8 each), Bottom edge: 32 ports (PE4..PE7) + half_ch = total_ch // 2 + pes_per_half = half_ch // channels_per_pe # 4 PEs per half + port_bar_w = hbm_w - 20 # slightly narrower than HBM zone + port_w = port_bar_w / half_ch + port_h = 8 + pe_colors = ["#3b82f6", "#60a5fa", "#8b5cf6", "#a78bfa", + "#f59e0b", "#fbbf24", "#ef4444", "#f87171"] + + for half_idx, (edge_y, pe_start) in enumerate([ + (hbm_y + 4, 0), # top edge, PE0-PE3 + (hbm_y + hbm_h - port_h - 4, pes_per_half), # bottom edge, PE4-PE7 + ]): + bar_x = hbm_x + 10 + for i in range(half_ch): + pe_owner = pe_start + i // channels_per_pe + c = pe_colors[pe_owner % len(pe_colors)] + px = bar_x + i * port_w + parts.append( + f' ' + ) + # Per-PE group labels + for p in range(pes_per_half): + gx = bar_x + (p * channels_per_pe + channels_per_pe / 2) * port_w + label_y = edge_y - 3 if half_idx == 0 else edge_y + port_h + 8 + parts.append( + f' ' + f'PE{pe_start + p}×{channels_per_pe}ch' + ) + + # Store port group centers for PE→HBM connection lines (used later) + _pe_hbm_targets: dict[int, tuple[float, float]] = {} + for half_idx, (edge_y, pe_start) in enumerate([ + (hbm_y + 4, 0), + (hbm_y + hbm_h - port_h - 4, pes_per_half), + ]): + bar_x = hbm_x + 10 + for p in range(pes_per_half): + pe_id = pe_start + p + gx = bar_x + (p * channels_per_pe + channels_per_pe / 2) * port_w + gy = edge_y if half_idx == 0 else edge_y + port_h + _pe_hbm_targets[pe_id] = (gx, gy) # ── Router mesh links ── for r in range(n_rows): @@ -589,20 +612,27 @@ def _render_cube_view_svg(view: ViewGraph, spec: dict) -> str: offset_x = (bi - (len(blocks) - 1) / 2) * (blk_w + 4) if kind == "ucie": - # UCIe: place at cube edge direction + # UCIe: place flush against cube edge at router position direction = label.split("-")[1].split(".")[0] if "-" in label else "" + ucie_w, ucie_h = 22, 10 # smaller blocks for UCIe ports if direction == "N": - bx, by = px + offset_x - blk_w / 2, pad - blk_h - 4 + bx = px - ucie_w / 2 + by = pad - ucie_h # flush against top edge + blk_w, blk_h = ucie_w, ucie_h elif direction == "S": - by_base = pad + cube_h * scale - bx, by = px + offset_x - blk_w / 2, by_base + 4 + bx = px - ucie_w / 2 + by = pad + cube_h * scale # flush against bottom edge + blk_w, blk_h = ucie_w, ucie_h elif direction == "W": - bx, by = pad - blk_w - 4, py + offset_x - blk_h / 2 + bx = pad - ucie_w # flush against left edge + by = py - ucie_h / 2 + blk_w, blk_h = ucie_w, ucie_h elif direction == "E": - bx_base = pad + cube_w * scale - bx, by = bx_base + 4, py + offset_x - blk_h / 2 + bx = pad + cube_w * scale # flush against right edge + by = py - ucie_h / 2 + blk_w, blk_h = ucie_w, ucie_h else: - bx, by = px + offset_x - blk_w / 2, py - r_size - blk_h - 4 + bx, by = px - blk_w / 2, py - r_size - blk_h - 4 elif kind in ("mcpu", "sram"): # M_CPU/SRAM: place to the left of router (avoid mesh overlap) bx = px - r_size - blk_w - 6 @@ -672,40 +702,38 @@ def _render_cube_view_svg(view: ViewGraph, spec: dict) -> str: f'stroke="{style["stroke"]}" stroke-width="1" opacity="0.6"/>' ) - # ── PE router → HBM BW annotation ── - if pe_items: - pe_hbm_edge = hbm_y if py < hbm_y else hbm_y + hbm_h - pe_r_edge = py + r_size if py < hbm_y else py - r_size - bw_x = px + 14 - bw_y = (pe_r_edge + pe_hbm_edge) / 2 - parts.append( - f' ' - f'{agg_bw:.0f}GB/s' - ) + # (PE→HBM BW annotation drawn in the PE→HBM port group section above) - # ── Router → HBM_CTRL lines (drawn last, on top of everything) ── - # Lines go from router to the HBM zone edge, angled toward HBM center - # to visually distinguish from vertical mesh links + # ── PE Router → HBM pseudo channel port group lines ── + # Each PE router connects to its port group center on the HBM edge for rkey, rval in routers.items(): if rval is None: continue + attach = rval.get("attach", []) + pe_dma_items = [a for a in attach if a.endswith(".dma")] + if not pe_dma_items: + continue + pe_id = int(pe_dma_items[0].split(".")[0].replace("pe", "")) + if pe_id not in _pe_hbm_targets: + continue rx, ry = rval["pos_mm"] - px, py = mm2px(rx, ry) - hbm_edge_y = hbm_y if py < hbm_y else hbm_y + hbm_h - r_edge_y = py + r_size if py < hbm_y else py - r_size - if abs(r_edge_y - hbm_edge_y) > 10: - has_pe = any(a.endswith(".dma") for a in rval.get("attach", [])) - sw = "1.5" if has_pe else "0.7" - op = "0.6" if has_pe else "0.15" - # Angle toward HBM center x (hcx) — slight offset, not fully straight - dx = (hcx - px) * 0.3 # 30% pull toward center - parts.append( - f' ' - ) + rpx, rpy = mm2px(rx, ry) + tgx, tgy = _pe_hbm_targets[pe_id] + r_edge_y = rpy + r_size if rpy < hbm_y else rpy - r_size + parts.append( + f' ' + ) + # BW annotation at midpoint + mx = (rpx + tgx) / 2 + 10 + my = (r_edge_y + tgy) / 2 + parts.append( + f' ' + f'{agg_bw:.0f}GB/s' + ) # ── Legend ── ly = h_px - 35