Add tl.recv_no_consume diagnostic API for apples-to-apples pe2pe plot
The pe2pe overview compared IPCQ (tl.send + tl.recv) against raw DMA (tl.load + tl.store), but DMA is one-sided — DST never reads — while tl.recv pays a slot-read on DST. The comparison was unfair: IPCQ looked slower partly because it does more work. Adds tl.recv_no_consume() — a separate, diagnostic-only entry point that blocks for slot arrival but skips the slot-read (and bank-hop) charge on DST. Production tl.recv is unchanged (no `consume` kwarg on the public API), so the diagnostic flag can never accidentally leak into real workloads. Updates test_pe_to_pe_latency to call tl.recv_no_consume so the overview.png shows IPCQ no-consume vs raw DMA on equal footing. Also fixes PLOT_DIR back to docs/diagrams/pe2pe_latency_plots/ (was lost in a merge). Adds scripts/replot_pe2pe.py for label-only re-renders without re-measuring. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,141 @@
|
||||
"""Re-render pe2pe latency PNGs from the existing summary.csv with the
|
||||
current (no-consume) labels. Used after a label-only test edit to avoid
|
||||
re-measuring (~5 min) when the data on disk is already correct.
|
||||
|
||||
Reads docs/diagrams/pe2pe_latency_plots/summary.csv. Plots 2 curves:
|
||||
"IPCQ no-consume" (from the ipcq_no_consume rows if present, else from
|
||||
the ipcq rows) and "Raw DMA" (raw rows).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
PLOT_DIR = ROOT / "docs" / "diagrams" / "pe2pe_latency_plots"
|
||||
CSV_PATH = PLOT_DIR / "summary.csv"
|
||||
|
||||
|
||||
def _load_records():
|
||||
rows = []
|
||||
with open(CSV_PATH, newline="") as f:
|
||||
for r in csv.DictReader(f):
|
||||
rows.append({
|
||||
"hop": r["hop"],
|
||||
"label": r["label"],
|
||||
"size_bytes": int(r["size_bytes"]),
|
||||
"path": r["path"],
|
||||
"total_ns": float(r["total_ns"]),
|
||||
})
|
||||
return rows
|
||||
|
||||
|
||||
def _ipcq_rows(records, hop):
|
||||
# Prefer ipcq_no_consume if present (older 3-path CSV); fall back to ipcq
|
||||
# (current single-path CSV where ipcq IS no-consume).
|
||||
nc = [r for r in records
|
||||
if r["hop"] == hop and r["path"] == "ipcq_no_consume"]
|
||||
if nc:
|
||||
return sorted(nc, key=lambda r: r["size_bytes"])
|
||||
return sorted(
|
||||
[r for r in records if r["hop"] == hop and r["path"] == "ipcq"],
|
||||
key=lambda r: r["size_bytes"],
|
||||
)
|
||||
|
||||
|
||||
def _raw_rows(records, hop):
|
||||
return sorted(
|
||||
[r for r in records if r["hop"] == hop and r["path"] == "raw"],
|
||||
key=lambda r: r["size_bytes"],
|
||||
)
|
||||
|
||||
|
||||
def _hops(records):
|
||||
seen = []
|
||||
for r in records:
|
||||
if r["hop"] not in {h["id"] for h in seen}:
|
||||
seen.append({"id": r["hop"], "label": r["label"]})
|
||||
return seen
|
||||
|
||||
|
||||
def _plot_per_hop(records, hop, path):
|
||||
ipcq = _ipcq_rows(records, hop["id"])
|
||||
raw = _raw_rows(records, hop["id"])
|
||||
fig, ax = plt.subplots(figsize=(8, 5))
|
||||
if ipcq:
|
||||
ax.plot(
|
||||
[r["size_bytes"] for r in ipcq],
|
||||
[r["total_ns"] for r in ipcq],
|
||||
marker="o", color="tab:blue",
|
||||
label="IPCQ no-consume (send/recv, no slot read)",
|
||||
)
|
||||
if raw:
|
||||
ax.plot(
|
||||
[r["size_bytes"] for r in raw],
|
||||
[r["total_ns"] for r in raw],
|
||||
marker="s", color="tab:orange",
|
||||
label="Raw DMA (load+store)",
|
||||
)
|
||||
ax.set_xlabel("Data size (bytes)")
|
||||
ax.set_ylabel("Latency (ns)")
|
||||
ax.set_title(hop["label"])
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.legend()
|
||||
fig.tight_layout()
|
||||
fig.savefig(path, dpi=120)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def _plot_overview(records, hops, path):
|
||||
fig, axes = plt.subplots(2, 2, figsize=(13, 9))
|
||||
axes = axes.flatten()
|
||||
for i, hop in enumerate(hops):
|
||||
ax = axes[i]
|
||||
ipcq = _ipcq_rows(records, hop["id"])
|
||||
raw = _raw_rows(records, hop["id"])
|
||||
if ipcq:
|
||||
ax.plot(
|
||||
[r["size_bytes"] for r in ipcq],
|
||||
[r["total_ns"] for r in ipcq],
|
||||
marker="o", color="tab:blue",
|
||||
label="IPCQ no-consume",
|
||||
)
|
||||
if raw:
|
||||
ax.plot(
|
||||
[r["size_bytes"] for r in raw],
|
||||
[r["total_ns"] for r in raw],
|
||||
marker="s", color="tab:orange",
|
||||
label="Raw DMA",
|
||||
)
|
||||
ax.set_title(hop["label"], fontsize=10)
|
||||
ax.set_xlabel("bytes")
|
||||
ax.set_ylabel("ns")
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.legend(fontsize=8)
|
||||
for j in range(len(hops), len(axes)):
|
||||
axes[j].axis("off")
|
||||
fig.suptitle(
|
||||
"PE-to-PE latency: IPCQ no-consume vs raw DMA",
|
||||
fontsize=14,
|
||||
)
|
||||
fig.tight_layout()
|
||||
fig.savefig(path, dpi=120)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def main():
|
||||
records = _load_records()
|
||||
hops = _hops(records)
|
||||
for hop in hops:
|
||||
out = PLOT_DIR / f"{hop['id']}.png"
|
||||
_plot_per_hop(records, hop, out)
|
||||
print(f"wrote {out}")
|
||||
overview = PLOT_DIR / "overview.png"
|
||||
_plot_overview(records, hops, overview)
|
||||
print(f"wrote {overview}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user