Files
kernbench2/tests/conftest.py
T
mukesh 04c912f53e Allreduce sweep: parametrized + xdist parallelism + topology diagram
Refactor the latency sweep from one giant test into 36 parametrized
cases that run in parallel under xdist (~6-8x faster: 1:49 instead of
~10 min). Each case writes a JSON row to a staging dir; conftest
sessionfinish hook aggregates rows on the controller node into
summary.csv and the per-topology + overview plots.

Aggregator gains a CSV fallback so plot-only tweaks no longer require
re-running the sweep.

Overview plot updates:
- 96 KB explicit x-axis marker with vertical dotted line
- horizontal theoretical 2D-torus reference (10600 ns)
- annotation showing both theoretical and simulated values at 96 KB
- drop overlapping 128 KB tick

New topology.png: 2x2 panel diagram showing device-level topology
(ring, torus 2x3, mesh 2x3) and the cube-level reduction inside SIP 0.
Wrap arrows anchor on box edges and arc outside rows/columns so they
do not overlap any SIP.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 16:43:19 -07:00

71 lines
2.1 KiB
Python

"""Shared pytest fixtures for the kernbench test suite.
Session-scoped topology caching: ``resolve_topology("topology.yaml")`` is
pure (no side effects), so we cache the result across all tests in a
worker process. Each test still builds its own ``GraphEngine`` (which is
stateful/SimPy-event-consuming and MUST NOT be shared).
"""
from __future__ import annotations
import os
import pytest
from kernbench.topology.builder import resolve_topology
def pytest_sessionfinish(session, exitstatus):
"""Aggregate parametrized sweep rows into combined CSV + PNG plots.
Runs on the controller node only (xdist worker processes set
``PYTEST_XDIST_WORKER``; we skip those). Idempotent — does nothing
if no sweep rows are present (e.g., when the sweep was filtered out).
"""
if os.environ.get("PYTEST_XDIST_WORKER"):
return
import importlib.util
import sys
from pathlib import Path
mod_path = Path(__file__).parent / "test_allreduce_multidevice.py"
if not mod_path.exists():
return
spec = importlib.util.spec_from_file_location(
"_test_allreduce_multidevice_for_aggregate", mod_path,
)
if spec is None or spec.loader is None:
return
mod = importlib.util.module_from_spec(spec)
sys.modules[spec.name] = mod
try:
spec.loader.exec_module(mod)
agg = getattr(mod, "_aggregate_sweep_plots", None)
if agg is not None:
agg()
except Exception as e:
print(f"[conftest] sweep aggregation failed: {e}")
@pytest.fixture(scope="session")
def topology():
"""Session-scoped parsed topology (immutable graph + spec).
Usage in tests::
def test_foo(topology):
engine = GraphEngine(topology.topology_obj, enable_data=True)
"""
return resolve_topology("topology.yaml")
@pytest.fixture(scope="session")
def topology_obj(topology):
"""The TopologyGraph inside the handle (convenience shortcut)."""
return topology.topology_obj
@pytest.fixture(scope="session")
def spec(topology):
"""Topology spec dict (convenience shortcut)."""
return topology.topology_obj.spec