Files
kernbench2/scripts/build_overview_slides.py
T
mukesh 5accd98171 Add deck builder + overview-with-ref diagram scripts
scripts/build_overview_slides.py renders a 5-slide PPTX
(kernbench2_overview.pptx) summarizing architecture, model
correctness, IPCQ, allreduce, and buffer-kind tier comparison.

scripts/emit_overview_with_external_ref.py renders log-y and
broken-y variants of the allreduce overview (overview_log.png,
overview_broken.png) including a 366 µs ext-sim reference marker
at 96 KB / PE.

Also includes cube_mesh_view.png rendered from the SVG.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 18:20:54 -07:00

172 lines
6.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Generate a 5-slide PPTX summarizing the kernbench2 model.
Slides (in order):
1. Overall architecture — how PEs are connected (cube_mesh_view)
2. Model correctness — DMA vs P2P latency (pe2pe overview)
3. PE-to-PE IPCQ communication (ipcq_two_pe_dma)
4. 6-device allreduce — model vs theoretical vs ext-sim (overview_broken)
5. IPCQ buffer-kind sweep — TCM vs SRAM vs HBM (buffer_kind_sweep)
This is a derived-artifact generator — no production code touched.
"""
from __future__ import annotations
from pathlib import Path
from PIL import Image
from pptx import Presentation
from pptx.dml.color import RGBColor
from pptx.enum.shapes import MSO_SHAPE
from pptx.util import Emu, Inches, Pt
ROOT = Path(__file__).resolve().parent.parent
DIAG = ROOT / "docs" / "diagrams"
OUT = DIAG / "kernbench2_overview.pptx"
# 16:9 widescreen — 13.333 × 7.5 in
SLIDE_W_IN = 13.333
SLIDE_H_IN = 7.5
SLIDES = [
{
"title": "1. CUBE Architecture: NOC Router Mesh + PE Connectivity",
"image": DIAG / "cube_mesh_view.png",
"bullets": [
"Each CUBE holds an 8-PE NOC mesh wired through routers (R0..R7)",
"Every PE has IO_CPU, M_CPU, PE_CPU + IPCQ engine + DMA engine",
"Inter-cube traffic exits via UCIe/UAL ports; SIPs stitch into ring/torus/mesh",
"Foundation for every latency, IPCQ, and allreduce experiment that follows",
],
},
{
"title": "2. Model Correctness: DMA vs P2P Latency Sweep",
"image": DIAG / "pe2pe_latency_plots" / "overview.png",
"bullets": [
"Sweeps payload size across PE-to-PE paths and compares to DMA",
"Confirms the simulator reproduces the expected DMA/P2P crossover",
"Acts as the per-hop ground truth that feeds collective-level models",
],
},
{
"title": "3. IPCQ: How Two PEs Communicate (DMA + Slot Memory)",
"image": DIAG / "ipcq_diagram_plots" / "ipcq_two_pe_dma.png",
"bullets": [
"Sender pushes payload through PE_DMA → fabric → receiver IPCQ slot",
"Slot memory (TCM/SRAM/HBM) charges a write on arrival, a read on consume",
"Credit return rides the fabric path back (16 B packet, no slot-IO)",
"This is the building block the multi-device allreduce composes",
],
},
{
"title": "4. 6-Device Allreduce: Model vs Theoretical vs External Simulator",
"image": DIAG / "allreduce_latency_plots" / "overview_broken.png",
"bullets": [
"Three SIP topologies (ring / torus / mesh) swept 16 B → 96 KB per PE",
"Dashed red curve: hand-derived theoretical model for torus_2d (6 SIPs)",
"Top panel (broken y-axis): single-device reduce on ext-sim ≈ 366 µs",
"Our 6-device collective lands at ~1722 µs — ~17× faster than ext-sim baseline",
],
},
{
"title": "5. IPCQ Slot Memory: TCM vs SRAM vs HBM",
"image": DIAG / "allreduce_latency_plots" / "buffer_kind_sweep.png",
"bullets": [
"Same allreduce with slot memory swapped: TCM (per-PE local) / SRAM / HBM (cube-shared, behind router link)",
"Cost = NoC drain + slot-IO + PE↔bank hop; only TCM skips the bank hop",
"Topology link BWs set the order: SRAM bank link 128 GB/s is the narrowest in the system, HBM 256 GB/s",
"At 64 KB / PE: TCM 12.0 µs < HBM 21.4 µs < SRAM 24.3 µs — SRAM is slowest because of its narrow bank link",
],
},
]
def _add_title(slide, text):
left = Inches(0.4)
top = Inches(0.25)
width = Inches(SLIDE_W_IN - 0.8)
height = Inches(0.7)
box = slide.shapes.add_textbox(left, top, width, height)
tf = box.text_frame
tf.margin_left = tf.margin_right = Emu(0)
tf.margin_top = tf.margin_bottom = Emu(0)
p = tf.paragraphs[0]
run = p.add_run()
run.text = text
run.font.size = Pt(26)
run.font.bold = True
run.font.color.rgb = RGBColor(0x10, 0x2A, 0x55)
return box
def _add_image_centered(slide, img_path, *, left_in, top_in, max_w_in, max_h_in):
with Image.open(img_path) as im:
iw, ih = im.size
max_w_emu = Inches(max_w_in)
max_h_emu = Inches(max_h_in)
scale = min(max_w_emu / iw, max_h_emu / ih)
w = int(iw * scale)
h = int(ih * scale)
left = Inches(left_in) + (max_w_emu - w) // 2
top = Inches(top_in) + (max_h_emu - h) // 2
slide.shapes.add_picture(str(img_path), left, top, width=w, height=h)
def _add_bullets(slide, bullets, *, left_in, top_in, width_in, height_in):
box = slide.shapes.add_textbox(
Inches(left_in), Inches(top_in), Inches(width_in), Inches(height_in),
)
tf = box.text_frame
tf.word_wrap = True
for i, line in enumerate(bullets):
p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
p.level = 0
run = p.add_run()
run.text = "" + line
run.font.size = Pt(15)
run.font.color.rgb = RGBColor(0x22, 0x22, 0x22)
p.space_after = Pt(6)
def _add_footer(slide, idx, total):
box = slide.shapes.add_textbox(
Inches(SLIDE_W_IN - 1.2), Inches(SLIDE_H_IN - 0.45),
Inches(1.0), Inches(0.3),
)
p = box.text_frame.paragraphs[0]
run = p.add_run()
run.text = f"{idx} / {total}"
run.font.size = Pt(10)
run.font.color.rgb = RGBColor(0x88, 0x88, 0x88)
def build():
prs = Presentation()
prs.slide_width = Inches(SLIDE_W_IN)
prs.slide_height = Inches(SLIDE_H_IN)
blank = prs.slide_layouts[6]
for i, cfg in enumerate(SLIDES, start=1):
slide = prs.slides.add_slide(blank)
_add_title(slide, cfg["title"])
# Layout: image on the left (8.4 in wide), bullets on the right (4.4 in).
_add_image_centered(
slide, cfg["image"],
left_in=0.3, top_in=1.05,
max_w_in=8.3, max_h_in=5.9,
)
_add_bullets(
slide, cfg["bullets"],
left_in=8.8, top_in=1.2,
width_in=4.3, height_in=5.7,
)
_add_footer(slide, i, len(SLIDES))
OUT.parent.mkdir(parents=True, exist_ok=True)
prs.save(OUT)
print(f"wrote {OUT}")
if __name__ == "__main__":
build()