From 54fcb7e4bccc8a8b1fc3d7a3620ba0c08a9fb5ad Mon Sep 17 00:00:00 2001 From: Mukesh Garg Date: Mon, 27 Apr 2026 21:42:44 -0700 Subject: [PATCH] Add tests/test_emit_ipcq_diagram.py (missed from earlier commit) This is the diagram generator that emits ipcq_send_recv.png and ipcq_two_pe_dma.png (referenced by commit 1e39214 but accidentally left untracked). Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_emit_ipcq_diagram.py | 622 ++++++++++++++++++++++++++++++++ 1 file changed, 622 insertions(+) create mode 100644 tests/test_emit_ipcq_diagram.py diff --git a/tests/test_emit_ipcq_diagram.py b/tests/test_emit_ipcq_diagram.py new file mode 100644 index 0000000..b4aa5b1 --- /dev/null +++ b/tests/test_emit_ipcq_diagram.py @@ -0,0 +1,622 @@ +"""High-level IPCQ + SFR connection diagram (presentation only). + +Renders ``docs/diagrams/ipcq_diagram_plots/ipcq_send_recv.png`` showing one +concrete example: SIP 0 / cube 0 / pe 0 sending to pe 1 in the +``intra_E`` direction. Boxes and arrows are grounded in the actual +code paths: + + - PE_IPCQ SFR fields: src/kernbench/components/builtin/pe_ipcq.py + - SFR install: src/kernbench/ccl/install.py + + src/kernbench/ccl/sfr_config.py + - PE_DMA outbound / + inbound atomic write: src/kernbench/components/builtin/pe_dma.py + +This is a pure-plotting test (no simulation). It exists so the diagram +can be regenerated reproducibly alongside the rest of the suite. +""" +from __future__ import annotations + +from pathlib import Path + + +_OUT_DIR = (Path(__file__).parent.parent / "docs" / "diagrams" + / "ipcq_diagram_plots") + +# Color palette (matches the topology diagram for visual continuity). +_BG = "#fafbfd" +_FRAME = "#3a3f4a" +_TEXT = "#1f2530" +_BLUE = "#2c6fb6" +_GREEN = "#2e8a4e" +_ORANGE = "#d3722a" +_PURPLE = "#7a4cb6" +_BOX_FILL = "#eaf2fb" +_BOX_EDGE = "#2c4a78" +_HW_FILL = "#f3ecda" +_HW_EDGE = "#a07a2a" +_MEM_FILL = "#e8f3e8" +_MEM_EDGE = "#2e8a4e" + + +def _box(ax, x, y, w, h, title, lines, *, fill=_BOX_FILL, edge=_BOX_EDGE, + title_color=None, font=9): + from matplotlib.patches import FancyBboxPatch + box = FancyBboxPatch( + (x, y), w, h, + boxstyle="round,pad=0.04,rounding_size=0.18", + linewidth=1.6, edgecolor=edge, facecolor=fill, zorder=2, + ) + ax.add_patch(box) + ax.text(x + w / 2, y + h - 0.45, title, + ha="center", va="top", fontsize=font + 1.5, + fontweight="bold", + color=title_color or edge, zorder=3) + for i, line in enumerate(lines): + ax.text( + x + 0.25, y + h - 1.1 - i * 0.45, line, + ha="left", va="top", fontsize=font - 0.5, color=_TEXT, + family="monospace", zorder=3, + ) + + +def _arrow(ax, xy_from, xy_to, *, color=_BLUE, lw=1.8, curve=0.0, + style="-|>", alpha=1.0, zorder=4): + from matplotlib.patches import FancyArrowPatch + arrow = FancyArrowPatch( + xy_from, xy_to, + arrowstyle=style, mutation_scale=14, + color=color, lw=lw, alpha=alpha, + connectionstyle=f"arc3,rad={curve}", + zorder=zorder, + ) + ax.add_patch(arrow) + + +def _step_label(ax, x, y, n, text, color=_BLUE): + from matplotlib.patches import Circle + ax.add_patch(Circle((x, y), 0.28, facecolor=color, edgecolor="white", + linewidth=1.4, zorder=5)) + ax.text(x, y, str(n), ha="center", va="center", fontsize=9, + fontweight="bold", color="white", zorder=6) + ax.text(x + 0.45, y, text, ha="left", va="center", fontsize=9, + color=_TEXT, zorder=6) + + +def emit_ipcq_diagram() -> str: + import matplotlib.pyplot as plt + from matplotlib.patches import FancyBboxPatch, Rectangle + + _OUT_DIR.mkdir(parents=True, exist_ok=True) + fig, ax = plt.subplots(figsize=(18, 11), facecolor="white") + ax.set_xlim(0, 22) + ax.set_ylim(0, 14) + ax.set_aspect("equal") + ax.axis("off") + ax.set_facecolor(_BG) + + # Outer panel border. + border = FancyBboxPatch( + (0.15, 0.15), 21.7, 13.7, + boxstyle="round,pad=0.02,rounding_size=0.20", + linewidth=1.4, edgecolor=_FRAME, facecolor=_BG, zorder=0, + ) + ax.add_patch(border) + + ax.set_title( + "IPCQ — SFR state and send/recv path between pe0 and pe1 " + "(intra_E direction, SIP 0 / cube 0)", + fontsize=14, fontweight="bold", color=_TEXT, pad=12, + ) + + # ── pe0 side (left half) ──────────────────────────────────────── + _box( + ax, x=0.8, y=8.4, w=8.4, h=5.0, + title="pe0.pe_ipcq (SFR — direction: intra_E)", + lines=[ + "neighbor_table[intra_E]:", + " peer = sip0.cube0.pe1", + " peer.rx_base_pa → pe1's intra_W slot ring", + " my_rx_base_pa → pe0's intra_E slot ring", + " n_slots = 8 slot_size = 512 B", + "", + "head/tail counters (per direction):", + " my_head # ++ on tl.send", + " my_tail # ++ on tl.recv", + " peer_head_cache # updated on IpcqMetaArrival", + " peer_tail_cache # updated on IpcqCreditMetadata", + "", + "send blocks while (my_head − peer_tail_cache) ≥ n_slots", + ], + edge=_BOX_EDGE, fill=_BOX_FILL, + ) + + _box( + ax, x=0.8, y=4.5, w=8.4, h=2.7, + title="pe0.pe_dma (outbound IPCQ driver)", + lines=[ + "_handle_ipcq_outbound():", + " • snapshot src bytes from MemoryStore", + " • find fabric path → pe1.pe_dma", + " • send Transaction; do NOT wait (fire-and-forget)", + ], + edge=_HW_EDGE, fill=_HW_FILL, + ) + + # ── pe1 side (right half) ─────────────────────────────────────── + _box( + ax, x=12.8, y=8.4, w=8.4, h=5.0, + title="pe1.pe_ipcq (SFR — direction: intra_W)", + lines=[ + "neighbor_table[intra_W]:", + " peer = sip0.cube0.pe0", + " peer.rx_base_pa → pe0's intra_E slot ring", + " my_rx_base_pa → pe1's intra_W slot ring", + " n_slots = 8 slot_size = 512 B", + "", + "head/tail counters (per direction):", + " my_head # ++ on tl.send (other direction)", + " my_tail # ++ on tl.recv (this direction)", + " peer_head_cache # updated on IpcqMetaArrival", + " peer_tail_cache # updated on IpcqCreditMetadata", + "", + "recv blocks while peer_head_cache ≤ my_tail", + ], + edge=_BOX_EDGE, fill=_BOX_FILL, + ) + + _box( + ax, x=12.8, y=4.5, w=8.4, h=2.7, + title="pe1.pe_dma (inbound IPCQ driver)", + lines=[ + "_handle_ipcq_inbound():", + " • pay terminal drain over fabric BW", + " • atomic: write data into pe1's intra_W slot", + " • forward IpcqMetaArrival → pe1.pe_ipcq", + ], + edge=_HW_EDGE, fill=_HW_FILL, + ) + + # ── Slot ring buffer (under pe1.pe_dma) ───────────────────────── + ring_x0, ring_y0 = 12.8, 1.1 + ring_w, ring_h = 8.4, 2.6 + box = FancyBboxPatch( + (ring_x0, ring_y0), ring_w, ring_h, + boxstyle="round,pad=0.04,rounding_size=0.16", + linewidth=1.6, edgecolor=_MEM_EDGE, facecolor=_MEM_FILL, zorder=2, + ) + ax.add_patch(box) + ax.text( + ring_x0 + ring_w / 2, ring_y0 + ring_h - 0.42, + "MemoryStore[buffer_kind] pe1's intra_W slot ring " + "(n_slots = 8, slot_size = 512 B)", + ha="center", va="top", fontsize=10, fontweight="bold", + color=_MEM_EDGE, zorder=3, + ) + # 8 slots laid out horizontally inside the ring panel. + n_slots = 8 + pad = 0.35 + slot_w = (ring_w - 2 * pad) / n_slots + slot_h = 0.85 + slot_y = ring_y0 + 0.3 + for i in range(n_slots): + sx = ring_x0 + pad + i * slot_w + is_active = (i == 3) # Highlight one example slot + face = "#ffd9b8" if is_active else "white" + edge = _ORANGE if is_active else _MEM_EDGE + rect = Rectangle( + (sx + 0.05, slot_y), slot_w - 0.10, slot_h, + linewidth=1.2, facecolor=face, edgecolor=edge, zorder=3, + ) + ax.add_patch(rect) + ax.text( + sx + slot_w / 2, slot_y + slot_h / 2, + f"s{i}", ha="center", va="center", fontsize=9, + color=_ORANGE if is_active else _TEXT, + fontweight="bold" if is_active else "normal", zorder=4, + ) + ax.text( + ring_x0 + pad + 3 * slot_w + slot_w / 2, slot_y - 0.30, + "slot_idx = my_head % n_slots", + ha="center", va="top", fontsize=8, style="italic", + color=_ORANGE, + ) + + # ── Fabric label (between pe0.pe_dma and pe1.pe_dma) ──────────── + fab = FancyBboxPatch( + (9.6, 5.0), 2.6, 1.7, + boxstyle="round,pad=0.04,rounding_size=0.20", + linewidth=1.4, edgecolor=_PURPLE, facecolor="white", zorder=2, + ) + ax.add_patch(fab) + ax.text(10.9, 6.4, "Fabric", ha="center", va="center", + fontsize=11, fontweight="bold", color=_PURPLE) + ax.text(10.9, 5.7, "(NoC routers,\npe_dma → pe_dma)", + ha="center", va="center", fontsize=8, color=_TEXT) + + # ── Arrows + step labels ──────────────────────────────────────── + # 1. tl.send ↘ pe0.pe_ipcq + _arrow(ax, (9.2, 12.9), (9.7, 12.9), color=_BLUE) # placeholder so number lands + _step_label(ax, 0.5, 13.6, + 1, "kernel calls tl.send(dir='intra_E', src_addr=X)", + color=_BLUE) + # 2. pe0.pe_ipcq → pe0.pe_dma (IpcqDmaToken) + _arrow(ax, (5.0, 8.4), (5.0, 7.2), color=_BLUE, lw=2.0) + ax.text(5.2, 7.85, "IpcqDmaToken\n" + "dst = peer.rx_base_pa + slot_idx*512", + ha="left", va="center", fontsize=8, color=_BLUE, + family="monospace") + # 3. pe0.pe_dma → fabric → pe1.pe_dma (data, fire-and-forget) + _arrow(ax, (9.2, 5.85), (9.6, 5.85), color=_BLUE, lw=2.0) + _arrow(ax, (12.2, 5.85), (12.8, 5.85), color=_BLUE, lw=2.0) + ax.text(10.9, 4.7, "data (fire-and-forget)", + ha="center", va="center", fontsize=8, style="italic", + color=_BLUE) + # 4. pe1.pe_dma → MemoryStore slot (atomic) + _arrow(ax, (17.0, 4.5), (17.0, 3.7), color=_GREEN, lw=2.0) + ax.text(17.2, 4.10, "atomic write", + ha="left", va="center", fontsize=8, color=_GREEN, + family="monospace") + # 5. pe1.pe_dma → pe1.pe_ipcq (IpcqMetaArrival) + _arrow(ax, (15.0, 7.2), (15.0, 8.4), color=_GREEN, lw=2.0) + ax.text(13.0, 7.85, "IpcqMetaArrival\n" + "→ peer_head_cache update", + ha="left", va="center", fontsize=8, color=_GREEN, + family="monospace") + # 6. tl.recv unblocks (annotation only) + _step_label(ax, 12.85, 13.6, + 6, "tl.recv(dir='intra_W') unblocks; consume slot; my_tail++", + color=_GREEN) + # 7. pe1.pe_ipcq → pe0.pe_ipcq (IpcqCreditMetadata, fast-path SimPy Store) + _arrow(ax, (12.8, 11.0), (9.2, 11.0), + color=_ORANGE, lw=2.0, curve=0.18) + ax.text(11.0, 11.55, + "IpcqCreditMetadata (consumer_seq, dst_rx_base_pa)\n" + "→ pe0's credit_inbox (SimPy Store, no fabric)", + ha="center", va="center", fontsize=8, color=_ORANGE, + family="monospace") + # 8. pe0.peer_tail_cache update unblocks tl.send + ax.text(0.5, 0.55, + "Steps 1–3 = data path (fabric, fire-and-forget); " + "4–6 = receiver wake-up; 7 = credit return (fast path); " + "8 = sender unblocks when peer_tail_cache catches up.", + ha="left", va="center", fontsize=9, color=_TEXT, + style="italic") + + # In-figure step legend (top, between pe0/pe1 panels). + legend_x = 9.4 + legend_y = 13.5 + _step_label(ax, legend_x, legend_y, 2, + "PE_IPCQ → PE_DMA (token)", color=_BLUE) + _step_label(ax, legend_x, legend_y - 0.45, 3, + "PE_DMA → fabric → PE_DMA (data)", color=_BLUE) + _step_label(ax, legend_x, legend_y - 0.90, 4, + "atomic slot write", color=_GREEN) + _step_label(ax, legend_x, legend_y - 1.35, 5, + "IpcqMetaArrival", color=_GREEN) + _step_label(ax, legend_x, legend_y - 1.80, 7, + "IpcqCreditMetadata", color=_ORANGE) + + out_path = _OUT_DIR / "ipcq_send_recv.png" + fig.savefig(out_path, dpi=130, bbox_inches="tight", + facecolor=fig.get_facecolor()) + + import matplotlib.pyplot as _plt + _plt.close(fig) + return str(out_path) + + +def test_emit_ipcq_diagram(): + out = emit_ipcq_diagram() + assert Path(out).exists() + + +# ── 2nd diagram: two-PE data + DMA + IPCQ-memory layout ────────────── + + +def _pe_panel(ax, x0, y0, w, h, label, *, edge=_FRAME, fill="white"): + """Outer container for one PE: title bar + body.""" + from matplotlib.patches import FancyBboxPatch + box = FancyBboxPatch( + (x0, y0), w, h, + boxstyle="round,pad=0.04,rounding_size=0.20", + linewidth=1.8, edgecolor=edge, facecolor=fill, zorder=1, + ) + ax.add_patch(box) + # Title band + title_h = 0.55 + band = FancyBboxPatch( + (x0 + 0.12, y0 + h - title_h - 0.10), w - 0.24, title_h, + boxstyle="round,pad=0.02,rounding_size=0.10", + linewidth=0, edgecolor="none", facecolor=edge, zorder=2, + ) + ax.add_patch(band) + ax.text( + x0 + w / 2, y0 + h - title_h / 2 - 0.10, label, + ha="center", va="center", fontsize=12, fontweight="bold", + color="white", zorder=3, + ) + + +def _sub_block(ax, cx, cy, w, h, title, body_lines, *, + fill, edge, font=9): + from matplotlib.patches import FancyBboxPatch + rect = FancyBboxPatch( + (cx - w / 2, cy - h / 2), w, h, + boxstyle="round,pad=0.02,rounding_size=0.10", + linewidth=1.4, edgecolor=edge, facecolor=fill, zorder=3, + ) + ax.add_patch(rect) + ax.text(cx, cy + h / 2 - 0.30, title, ha="center", va="top", + fontsize=font + 1, fontweight="bold", color=edge, zorder=4) + for i, line in enumerate(body_lines): + ax.text( + cx, cy + h / 2 - 0.75 - i * 0.34, line, + ha="center", va="top", fontsize=font - 0.5, color=_TEXT, + family="monospace", zorder=4, + ) + + +def _tcm_with_slots(ax, cx, cy, w, h, *, n_slots=8, active_slot=3, + title="PE_TCM (local memory)"): + """Draw a TCM box that contains a source buffer + IPCQ slot ring.""" + from matplotlib.patches import FancyBboxPatch, Rectangle + rect = FancyBboxPatch( + (cx - w / 2, cy - h / 2), w, h, + boxstyle="round,pad=0.02,rounding_size=0.10", + linewidth=1.4, edgecolor=_MEM_EDGE, facecolor=_MEM_FILL, zorder=3, + ) + ax.add_patch(rect) + ax.text( + cx, cy + h / 2 - 0.28, title, ha="center", va="top", + fontsize=9.5, fontweight="bold", color=_MEM_EDGE, zorder=4, + ) + + # Source buffer region (left part). + src_w = (w - 0.6) * 0.30 + src_h = h - 1.20 + sx = cx - w / 2 + 0.20 + sy = cy - h / 2 + 0.20 + src_rect = Rectangle( + (sx, sy), src_w, src_h, + linewidth=1.0, facecolor="white", edgecolor=_BLUE, zorder=4, + ) + ax.add_patch(src_rect) + ax.text(sx + src_w / 2, sy + src_h / 2 + 0.18, "source", + ha="center", va="center", fontsize=8.5, color=_BLUE, + fontweight="bold", zorder=5) + ax.text(sx + src_w / 2, sy + src_h / 2 - 0.18, "buffer", + ha="center", va="center", fontsize=8.5, color=_BLUE, + fontweight="bold", zorder=5) + + # Slot ring region (right part). + ring_x0 = sx + src_w + 0.30 + ring_w = (cx + w / 2 - 0.20) - ring_x0 + ring_y0 = sy + ring_h = src_h + ring_rect = Rectangle( + (ring_x0, ring_y0), ring_w, ring_h, + linewidth=1.0, facecolor="white", edgecolor=_ORANGE, zorder=4, + ) + ax.add_patch(ring_rect) + ax.text( + ring_x0 + ring_w / 2, ring_y0 + ring_h - 0.18, + "IPCQ slot ring (intra_W)", + ha="center", va="top", fontsize=8.5, color=_ORANGE, + fontweight="bold", zorder=5, + ) + # Draw 8 slots in a 2×4 grid. + cols = 4 + rows = 2 + slot_inner_pad = 0.12 + sw = (ring_w - (cols + 1) * slot_inner_pad) / cols + sh = (ring_h - 0.65 - (rows + 1) * slot_inner_pad) / rows + for i in range(n_slots): + r = i // cols + c = i % cols + sx_i = ring_x0 + slot_inner_pad + c * (sw + slot_inner_pad) + sy_i = (ring_y0 + slot_inner_pad + + (rows - 1 - r) * (sh + slot_inner_pad)) + is_active = (i == active_slot) + face = "#ffd9b8" if is_active else "white" + edge = _ORANGE if is_active else "#c9c9c9" + ax.add_patch(Rectangle( + (sx_i, sy_i), sw, sh, + linewidth=1.0, facecolor=face, edgecolor=edge, zorder=5, + )) + ax.text( + sx_i + sw / 2, sy_i + sh / 2, f"s{i}", + ha="center", va="center", fontsize=8, + fontweight="bold" if is_active else "normal", + color=_ORANGE if is_active else "#666", + zorder=6, + ) + + +def emit_ipcq_dma_diagram() -> str: + """Two-PE diagram emphasising: outbound DMA writes DIRECTLY into the + receiver's local memory (slot ring in PE_TCM). pe1.pe_dma is the + inbound memory port that pays drain + emits the MetaArrival notice; + the actual DMA payload terminates in the slot, not in another DMA. + """ + import matplotlib.pyplot as plt + from matplotlib.patches import FancyBboxPatch + + _OUT_DIR.mkdir(parents=True, exist_ok=True) + fig, ax = plt.subplots(figsize=(22, 12), facecolor="white") + XMAX, YMAX = 28.0, 14.0 + ax.set_xlim(0, XMAX) + ax.set_ylim(0, YMAX) + ax.set_aspect("equal") + ax.axis("off") + ax.set_facecolor(_BG) + + # Outer page border. + ax.add_patch(FancyBboxPatch( + (0.20, 0.20), XMAX - 0.40, YMAX - 0.40, + boxstyle="round,pad=0.02,rounding_size=0.20", + linewidth=1.4, edgecolor=_FRAME, facecolor=_BG, zorder=0, + )) + + ax.set_title( + "Two PEs over IPCQ — outbound DMA lands DIRECTLY in receiver " + "memory (slot ring in PE_TCM)", + fontsize=14, fontweight="bold", color=_TEXT, pad=12, + ) + + # ── PE panels ─────────────────────────────────────────────────── + PE0_X, PE0_W = 0.8, 11.6 + PE1_X, PE1_W = 15.6, 11.6 + PE_Y, PE_H = 1.6, 10.4 + + _pe_panel(ax, x0=PE0_X, y0=PE_Y, w=PE0_W, h=PE_H, + label="PE 0 (sender — sip0.cube0.pe0)", + edge=_BLUE, fill="white") + _pe_panel(ax, x0=PE1_X, y0=PE_Y, w=PE1_W, h=PE_H, + label="PE 1 (receiver — sip0.cube0.pe1)", + edge=_GREEN, fill="white") + + # ── PE 0 sub-blocks ───────────────────────────────────────────── + # Top row: PE_CPU and PE_IPCQ + _sub_block( + ax, cx=PE0_X + 2.5, cy=10.3, w=3.4, h=1.6, + title="PE_CPU", + body_lines=["kernel:", + " tl.send(dir='intra_E',", + " src=ptr)"], + fill=_BOX_FILL, edge=_BOX_EDGE, + ) + _sub_block( + ax, cx=PE0_X + 8.4, cy=10.3, w=4.0, h=1.6, + title="PE_IPCQ (control / SFR)", + body_lines=["per-direction state:", + " head/tail, peer.rx_base_pa,", + " peer_tail_cache"], + fill=_BOX_FILL, edge=_BOX_EDGE, + ) + # Mid: PE_TCM (left, with src + slot ring) and PE_DMA outbound (right) + _tcm_with_slots( + ax, cx=PE0_X + 3.0, cy=5.4, w=5.6, h=3.6, + n_slots=8, active_slot=-1, + title="PE_TCM (local memory · buffer_kind = tcm)", + ) + _sub_block( + ax, cx=PE0_X + 8.6, cy=5.4, w=3.6, h=3.6, + title="PE_DMA (outbound)", + body_lines=["snapshot src bytes", + " from PE_TCM", + "build Transaction", + " (dst = peer's slot PA)", + "fire onto fabric;", + " do not wait for ack"], + fill=_HW_FILL, edge=_HW_EDGE, + ) + # Arrows on PE 0 side + _arrow(ax, (PE0_X + 4.20, 10.3), (PE0_X + 6.40, 10.3), + color=_BLUE, lw=1.7) + ax.text(PE0_X + 5.30, 10.65, "tl.send", + ha="center", va="center", fontsize=8.5, color=_BLUE, + fontweight="bold") + # PE_IPCQ → PE_DMA control (kept; label removed per request) + _arrow(ax, (PE0_X + 8.4, 9.50), (PE0_X + 8.6, 7.20), + color=_ORANGE, lw=1.6) + # PE_TCM(src) → PE_DMA (read source data) + _arrow(ax, (PE0_X + 5.80, 5.40), (PE0_X + 6.80, 5.40), + color=_BLUE, lw=2.0) + ax.text(PE0_X + 6.30, 6.05, "read source\n(snapshot)", + ha="center", va="bottom", fontsize=7.5, color=_BLUE, + family="monospace") + + # ── Fabric in the middle ──────────────────────────────────────── + FAB_X0, FAB_W = 12.6, 2.8 + FAB_Y0, FAB_H = 4.6, 2.2 + ax.add_patch(FancyBboxPatch( + (FAB_X0, FAB_Y0), FAB_W, FAB_H, + boxstyle="round,pad=0.04,rounding_size=0.20", + linewidth=1.6, edgecolor=_PURPLE, facecolor="white", zorder=2, + )) + ax.text(FAB_X0 + FAB_W / 2, FAB_Y0 + FAB_H - 0.45, + "NoC Fabric", ha="center", va="center", + fontsize=12, fontweight="bold", color=_PURPLE) + ax.text(FAB_X0 + FAB_W / 2, FAB_Y0 + 0.55, + "(routers, links;\nfabric BW + drain time)", + ha="center", va="center", fontsize=8.5, color=_TEXT) + + # ── PE 1 sub-blocks ───────────────────────────────────────────── + # Top row: PE_IPCQ and PE_CPU + _sub_block( + ax, cx=PE1_X + 3.2, cy=10.3, w=4.0, h=1.6, + title="PE_IPCQ (control / SFR)", + body_lines=["per-direction state:", + " head/tail, peer_head_cache,", + " my_rx_base_pa"], + fill=_BOX_FILL, edge=_BOX_EDGE, + ) + _sub_block( + ax, cx=PE1_X + 9.1, cy=10.3, w=3.4, h=1.6, + title="PE_CPU", + body_lines=["kernel:", + " ptr = tl.recv(", + " dir='intra_W')"], + fill=_BOX_FILL, edge=_BOX_EDGE, + ) + # Wide PE_TCM occupying the centre-bottom of PE 1 — the DMA payload + # terminates HERE (not in any DMA component). + _tcm_with_slots( + ax, cx=PE1_X + 5.0, cy=5.4, w=8.4, h=3.6, + n_slots=8, active_slot=3, + title="PE_TCM (local memory · buffer_kind = tcm)", + ) + + # ── DATA arrows: outbound DMA ──► RECEIVER MEMORY (the slot) ─── + # The inbound PE_DMA is NOT on the data path — it's a sim-side + # bookkeeper that pays terminal drain + emits MetaArrival. The + # actual DMA payload jumps fabric → slot directly. + # 1) pe0.PE_DMA → fabric + _arrow(ax, (PE0_X + 10.40, 5.40), (FAB_X0, 5.40), + color=_BLUE, lw=2.8) + # 2) fabric → PE_TCM slot s3 (DMA payload terminates IN MEMORY) + SLOT_X = PE1_X + 2.95 # x-coordinate of slot s3 within PE_TCM + _arrow(ax, (FAB_X0 + FAB_W, 5.40), (SLOT_X, 5.40), + color=_BLUE, lw=2.8) + + # PE_IPCQ → PE_CPU: tl.recv unblocks + _arrow(ax, (PE1_X + 5.20, 10.30), (PE1_X + 7.40, 10.30), + color=_GREEN, lw=1.7) + ax.text(PE1_X + 6.30, 10.65, "unblock tl.recv", + ha="center", va="center", fontsize=8.5, color=_GREEN, + fontweight="bold") + # PE_CPU → PE_TCM: kernel reads consumed slot via returned ptr + _arrow(ax, (PE1_X + 9.10, 9.50), (PE1_X + 8.10, 7.20), + color=_GREEN, lw=1.4, curve=0.10) + ax.text(PE1_X + 9.30, 8.30, "kernel reads\nslot data", + ha="left", va="center", fontsize=7.5, color=_GREEN) + + # (Credit-return arrow + label removed per request — see code + # for the actual mechanism: pe1.pe_ipcq → pe0.credit_inbox via + # SimPy Store after env.timeout(fabric_path_latency_ns).) + + # ── Footer legend ────────────────────────────────────────────── + ax.text(0.6, 0.85, + "DATA (blue) : pe0 PE_TCM[src] → pe0 PE_DMA → " + "NoC fabric → pe1 PE_TCM[slot s3] ← DMA write " + "terminates IN MEMORY", + ha="left", va="center", fontsize=9, color=_TEXT, + style="italic") + ax.text(0.6, 0.45, + "CTRL (orange) : PE_IPCQ issues IpcqDmaToken on send; " + "pe1's inbound port emits MetaArrival; credit return " + "uses the fabric path (timing) but bypasses the per-hop " + "component graph (D9 fast path).", + ha="left", va="center", fontsize=9, color=_TEXT, + style="italic") + + out_path = _OUT_DIR / "ipcq_two_pe_dma.png" + fig.savefig(out_path, dpi=130, bbox_inches="tight", + facecolor=fig.get_facecolor()) + plt.close(fig) + return str(out_path) + + +def test_emit_ipcq_dma_diagram(): + out = emit_ipcq_dma_diagram() + assert Path(out).exists()