Files
kernbench2/tests/test_intercube_sfr_config.py
T
mukesh ff7d727ddd CCL allreduce: rename to lrab_hierarchical_allreduce + descriptive plots
Rename the intercube all-reduce identity to lrab_hierarchical_allreduce
(module, config key, distributed test) so the name reflects both levels
it implements: LRAB intra-SIP (local reduce to center root + broadcast)
and the hierarchical inter-SIP topology exchange (ring/torus/mesh).
ADR-0032 slug kept as the stable decision id; pure rename, no logic change.

Also in this batch:
- ADR-0032 (EN+KO): document the shipped center-root bidirectional reduce
  (doc was stale corner-root); annotate ccl.yaml root_cube as a placeholder.
- Rename allreduce + pe2pe latency plots to descriptive, title-matching
  filenames and retitle the in-plot headings; drop overview/overview_log.
- Point the PPTX image refs at the new plot names.

Doc + derived-artifact + rename only; no simulation behavior changed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 20:50:48 -07:00

140 lines
5.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for configure_sfr_intercube_multisip neighbor table wiring.
Verifies full IPCQ hardware wiring (independent of DPPolicy):
- intra-cube (2×4 PE grid) → intra_N/S/E/W
- intercube same-lane → N/S/E/W
- inter-SIP same-(cube, pe) → global_N/S/E/W
"""
from __future__ import annotations
from pathlib import Path
from kernbench.ccl.install import load_ccl_config, resolve_algorithm_config
from kernbench.ccl.sfr_config import configure_sfr_intercube_multisip
from kernbench.sim_engine.engine import GraphEngine
from kernbench.topology.builder import resolve_topology
TOPOLOGY_PATH = Path(__file__).parent.parent / "topology.yaml"
N_CUBES = 16
PES_PER_CUBE = 8
def _engine_and_spec():
topo = resolve_topology(str(TOPOLOGY_PATH))
engine = GraphEngine(topo.topology_obj, enable_data=True)
return engine, topo.topology_obj.spec
def _merged_cfg():
cfg = load_ccl_config()
return resolve_algorithm_config(cfg, name="lrab_hierarchical_allreduce")
class TestConfigureSfrNeighborTables:
def test_world_size_and_rank_to_pe(self):
engine, spec = _engine_and_spec()
cfg = _merged_cfg()
plan = configure_sfr_intercube_multisip(engine, spec, cfg)
n_sips = int(spec["system"]["sips"]["count"])
expected = n_sips * N_CUBES * PES_PER_CUBE
assert plan["world_size"] == expected
assert len(plan["rank_to_pe"]) == expected
# ── Intra-cube (intra_N/S/E/W) ────────────────────────────────
def test_pe0_intra_cube_has_intra_E_and_intra_S(self):
"""pe0 is NW of the 2×4 PE grid: intra_E=pe1, intra_S=pe4."""
engine, spec = _engine_and_spec()
cfg = _merged_cfg()
configure_sfr_intercube_multisip(engine, spec, cfg)
qp = engine._components["sip0.cube0.pe0.pe_ipcq"].queue_pairs
assert "intra_E" in qp
assert qp["intra_E"]["peer"].pe == 1
assert "intra_S" in qp
assert qp["intra_S"]["peer"].pe == 4
assert "intra_W" not in qp
assert "intra_N" not in qp
def test_pe5_intra_cube_has_all_four(self):
"""pe5 (row=1, col=1 in 2×4 grid) has all 4 intra directions.
Intra neighbors: intra_N=pe1, intra_E=pe6, intra_W=pe4,
intra_S not present (row=1 is bottom row).
"""
engine, spec = _engine_and_spec()
cfg = _merged_cfg()
configure_sfr_intercube_multisip(engine, spec, cfg)
qp = engine._components["sip0.cube0.pe5.pe_ipcq"].queue_pairs
assert qp["intra_N"]["peer"].pe == 1
assert qp["intra_E"]["peer"].pe == 6
assert qp["intra_W"]["peer"].pe == 4
assert "intra_S" not in qp # bottom row
# ── Intercube same-lane (N/S/E/W) ─────────────────────────────
def test_corner_cube0_pe0_has_intercube_E_and_S(self):
"""Cube 0 (NW mesh corner): intercube E→cube1, S→cube4."""
engine, spec = _engine_and_spec()
cfg = _merged_cfg()
configure_sfr_intercube_multisip(engine, spec, cfg)
qp = engine._components["sip0.cube0.pe0.pe_ipcq"].queue_pairs
assert qp["E"]["peer"].cube == 1
assert qp["E"]["peer"].pe == 0 # same-lane
assert qp["S"]["peer"].cube == 4
assert qp["S"]["peer"].pe == 0
assert "W" not in qp, "cube 0 has no west neighbor"
assert "N" not in qp, "cube 0 has no north neighbor"
def test_interior_cube5_pe3_has_all_four_intercube_same_lane(self):
"""Cube 5 interior, pe3: intercube N/S/E/W all present, same-lane."""
engine, spec = _engine_and_spec()
cfg = _merged_cfg()
configure_sfr_intercube_multisip(engine, spec, cfg)
qp = engine._components["sip0.cube5.pe3.pe_ipcq"].queue_pairs
for d, expected_cube in [("N", 1), ("S", 9), ("E", 6), ("W", 4)]:
assert qp[d]["peer"].cube == expected_cube
assert qp[d]["peer"].pe == 3 # same-lane
def test_all_pes_have_intercube_wiring(self):
"""Every PE on every interior cube has intercube same-lane wiring."""
engine, spec = _engine_and_spec()
cfg = _merged_cfg()
configure_sfr_intercube_multisip(engine, spec, cfg)
# Interior cube 5: every PE should have N/S/E/W same-lane.
for pe in range(PES_PER_CUBE):
qp = engine._components[f"sip0.cube5.pe{pe}.pe_ipcq"].queue_pairs
for d in ("N", "S", "E", "W"):
assert d in qp, f"sip0.cube5.pe{pe} missing intercube {d}"
assert qp[d]["peer"].pe == pe, (
f"sip0.cube5.pe{pe} {d} not same-lane"
)
# ── Inter-SIP (global_*) ──────────────────────────────────────
def test_every_pe_on_every_cube_has_inter_sip(self):
"""All PEs on all cubes wired for inter-SIP via global_*."""
engine, spec = _engine_and_spec()
cfg = _merged_cfg()
configure_sfr_intercube_multisip(engine, spec, cfg)
for cube_id in range(N_CUBES):
for pe in range(PES_PER_CUBE):
qp = engine._components[
f"sip0.cube{cube_id}.pe{pe}.pe_ipcq"
].queue_pairs
assert "global_E" in qp, (
f"sip0.cube{cube_id}.pe{pe} missing global_E"
)
assert "global_W" in qp
# Peer must be same (cube, pe) on another SIP.
assert qp["global_E"]["peer"].sip == 1
assert qp["global_E"]["peer"].cube == cube_id
assert qp["global_E"]["peer"].pe == pe