Allreduce sweep: parametrized + xdist parallelism + topology diagram
Refactor the latency sweep from one giant test into 36 parametrized cases that run in parallel under xdist (~6-8x faster: 1:49 instead of ~10 min). Each case writes a JSON row to a staging dir; conftest sessionfinish hook aggregates rows on the controller node into summary.csv and the per-topology + overview plots. Aggregator gains a CSV fallback so plot-only tweaks no longer require re-running the sweep. Overview plot updates: - 96 KB explicit x-axis marker with vertical dotted line - horizontal theoretical 2D-torus reference (10600 ns) - annotation showing both theoretical and simulated values at 96 KB - drop overlapping 128 KB tick New topology.png: 2x2 panel diagram showing device-level topology (ring, torus 2x3, mesh 2x3) and the cube-level reduction inside SIP 0. Wrap arrows anchor on box edges and arc outside rows/columns so they do not overlap any SIP. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -7,11 +7,45 @@ stateful/SimPy-event-consuming and MUST NOT be shared).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from kernbench.topology.builder import resolve_topology
|
||||
|
||||
|
||||
def pytest_sessionfinish(session, exitstatus):
|
||||
"""Aggregate parametrized sweep rows into combined CSV + PNG plots.
|
||||
|
||||
Runs on the controller node only (xdist worker processes set
|
||||
``PYTEST_XDIST_WORKER``; we skip those). Idempotent — does nothing
|
||||
if no sweep rows are present (e.g., when the sweep was filtered out).
|
||||
"""
|
||||
if os.environ.get("PYTEST_XDIST_WORKER"):
|
||||
return
|
||||
import importlib.util
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
mod_path = Path(__file__).parent / "test_allreduce_multidevice.py"
|
||||
if not mod_path.exists():
|
||||
return
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"_test_allreduce_multidevice_for_aggregate", mod_path,
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
return
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
sys.modules[spec.name] = mod
|
||||
try:
|
||||
spec.loader.exec_module(mod)
|
||||
agg = getattr(mod, "_aggregate_sweep_plots", None)
|
||||
if agg is not None:
|
||||
agg()
|
||||
except Exception as e:
|
||||
print(f"[conftest] sweep aggregation failed: {e}")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def topology():
|
||||
"""Session-scoped parsed topology (immutable graph + spec).
|
||||
|
||||
Reference in New Issue
Block a user