Files
kernbench2/tests/conftest.py
T
mukesh 84a1325e5c ADR-0023 D9.7: IPCQ slot-memory latency model (TCM/SRAM/HBM)
Charge per-tier bandwidth + setup overhead at IPCQ slot WRITE
(receiver inbound DMA, in pe_dma._handle_ipcq_inbound) and slot
READ (recv consume, in pe_ipcq._handle_recv). Tier table
(common/ipcq_types.py):
  tcm  : 512 GB/s, 0 ns
  sram : 128 GB/s, 2 ns
  hbm  :  32 GB/s, 6 ns

Before this change, slot read/write was free regardless of
buffer_kind, making memory-tier choice invisible in simulated
latency. After the change, swapping buffer_kind in ccl.yaml
produces measurable per-tier separation in allreduce latency.

Tests:
  test_ipcq_buffer_kind_latency.py — three micro-tests asserting
    tcm < sram < hbm ordering, payload-scaling, and that
    buffer_kind sensitivity grows with payload (credit-only path
    stays fabric-bound).
  test_allreduce_buffer_kind_sweep.py — 12-config parametrized
    sweep emitting buffer_kind_sweep.png (3 lines, torus_2d).

conftest sessionfinish hook generalised to dispatch multiple
sweep aggregators (allreduce + buffer-kind).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 21:28:34 -07:00

75 lines
2.3 KiB
Python

"""Shared pytest fixtures for the kernbench test suite.
Session-scoped topology caching: ``resolve_topology("topology.yaml")`` is
pure (no side effects), so we cache the result across all tests in a
worker process. Each test still builds its own ``GraphEngine`` (which is
stateful/SimPy-event-consuming and MUST NOT be shared).
"""
from __future__ import annotations
import os
import pytest
from kernbench.topology.builder import resolve_topology
def pytest_sessionfinish(session, exitstatus):
"""Aggregate parametrized sweep rows into combined CSV + PNG plots.
Runs on the controller node only (xdist worker processes set
``PYTEST_XDIST_WORKER``; we skip those). Idempotent — does nothing
if no sweep rows are present (e.g., when the sweep was filtered out).
"""
if os.environ.get("PYTEST_XDIST_WORKER"):
return
import importlib.util
import sys
from pathlib import Path
def _exec(name: str, attr: str) -> None:
mod_path = Path(__file__).parent / name
if not mod_path.exists():
return
s = importlib.util.spec_from_file_location(
f"_{name.removesuffix('.py')}_for_aggregate", mod_path,
)
if s is None or s.loader is None:
return
mod = importlib.util.module_from_spec(s)
sys.modules[s.name] = mod
try:
s.loader.exec_module(mod)
fn = getattr(mod, attr, None)
if fn is not None:
fn()
except Exception as e:
print(f"[conftest] aggregator {attr}() in {name} failed: {e}")
_exec("test_allreduce_multidevice.py", "_aggregate_sweep_plots")
_exec("test_allreduce_buffer_kind_sweep.py", "aggregate_buffer_kind_plot")
@pytest.fixture(scope="session")
def topology():
"""Session-scoped parsed topology (immutable graph + spec).
Usage in tests::
def test_foo(topology):
engine = GraphEngine(topology.topology_obj, enable_data=True)
"""
return resolve_topology("topology.yaml")
@pytest.fixture(scope="session")
def topology_obj(topology):
"""The TopologyGraph inside the handle (convenience shortcut)."""
return topology.topology_obj
@pytest.fixture(scope="session")
def spec(topology):
"""Topology spec dict (convenience shortcut)."""
return topology.topology_obj.spec