kernbench2/tests/sccl/test_allreduce_ring_torus_mesh.py

"""Correctness of intercube allreduce across SIP topologies (distributed path).

Routes through init_process_group → mp.spawn → dist.all_reduce for ring_1d,
torus_2d (2×3), and mesh_2d_no_wrap (2×3). Per-rank correctness is asserted
inside the worker; spawn raises on failure.
"""
from __future__ import annotations

import pytest

from tests.sccl._allreduce_helpers import (
    CONFIGS,
    DEFAULT_N_ELEM,
    _crit_ns,
    _run_distributed,
    _write_temp_configs,
)


@pytest.mark.parametrize(
    "algorithm,sip_topology,n_sips,sip_w,sip_h", CONFIGS,
)
def test_allreduce(
    tmp_path, monkeypatch, algorithm, sip_topology, n_sips, sip_w, sip_h,
):
    topo_path, _ = _write_temp_configs(
        tmp_path, sip_topology, n_sips, algorithm,
        sip_w=sip_w, sip_h=sip_h,
    )
    engine, _n_cubes = _run_distributed(
        tmp_path, monkeypatch, topo_path,
        f"test_{algorithm}_{sip_topology}", DEFAULT_N_ELEM,
    )
    # A positive critical path confirms the kernel actually ran.
    assert _crit_ns(engine) > 0.0