"""Tests for CCL backend install (ADR-0023 D10/D11).""" from __future__ import annotations from kernbench.ccl.install import ( install_ipcq, linear_rank_to_pe, load_ccl_config, resolve_algorithm_config, ) from kernbench.sim_engine.engine import GraphEngine from kernbench.topology.builder import resolve_topology def _engine(): topo = resolve_topology("topology.yaml").topology_obj return GraphEngine(topo, enable_data=True), topo def test_load_ccl_config(): cfg = load_ccl_config() assert "defaults" in cfg assert "algorithms" in cfg def test_resolve_algorithm_config_default(): cfg = load_ccl_config() merged = resolve_algorithm_config(cfg) assert merged["algorithm"] == cfg["defaults"]["algorithm"] # ccl.yaml no longer carries defaults.world_size — backend derives # it from topology.yaml at install time. Just check the field is # absent here (verified per-test where install_ipcq is called). assert "world_size" not in merged or merged["world_size"] >= 1 def test_resolve_algorithm_config_override(): cfg = load_ccl_config() merged = resolve_algorithm_config(cfg, name="ring_allreduce_hbm") assert merged["algorithm"] == "ring_allreduce_hbm" assert merged["buffer_kind"] == "hbm" # algo override # defaults still apply assert merged["n_slots"] == cfg["defaults"]["n_slots"] def test_linear_rank_to_pe(): engine, topo = _engine() spec = topo.spec # Cube 0 of SIP 0 assert linear_rank_to_pe(0, spec) == (0, 0, 0) assert linear_rank_to_pe(7, spec) == (0, 0, 7) # Should not exceed total PE count pes_per_sip = ( spec["sip"]["cube_mesh"]["w"] * spec["sip"]["cube_mesh"]["h"] * spec["cube"]["pe_layout"]["pe_per_corner"] * len(spec["cube"]["pe_layout"]["corners"]) ) sips = spec["system"]["sips"]["count"] total = sips * pes_per_sip assert total >= 8 def test_install_ipcq_neighbors_correct(): engine, topo = _engine() cfg = load_ccl_config() merged = resolve_algorithm_config(cfg, name="ring_allreduce_tcm") # Force a single-cube 8-rank install for the assertions below. merged["world_size"] = 8 plan = install_ipcq(engine, topo.spec, merged) assert plan["world_size"] == 8 assert plan["buffer_kind"] == "tcm" # Each rank should have E and W entries for r, nbrs in plan["neighbor_table"].items(): assert "E" in nbrs assert "W" in nbrs # Inspect installed PE_IPCQ for rank 0 ipcq = engine._components["sip0.cube0.pe0.pe_ipcq"] qp_e = ipcq.queue_pairs["E"] qp_w = ipcq.queue_pairs["W"] assert qp_e["peer"].pe == 1 # rank 0's E neighbor is rank 1 assert qp_w["peer"].pe == 7 # rank 0's W neighbor is rank 7 # rx_base addresses should be unique assert qp_e["my_rx_base_pa"] != qp_w["my_rx_base_pa"] def test_install_ipcq_credit_stores_wired(): engine, topo = _engine() cfg = load_ccl_config() merged = resolve_algorithm_config(cfg, name="ring_allreduce_tcm") merged["world_size"] = 8 install_ipcq(engine, topo.spec, merged) # rank 0 (pe0) sending E goes to rank 1 (pe1) # rank 0's peer_credit_store on E direction should equal rank 1's credit_inbox pe0 = engine._components["sip0.cube0.pe0.pe_ipcq"] pe1 = engine._components["sip0.cube0.pe1.pe_ipcq"] qp_e = pe0.queue_pairs["E"] assert qp_e["peer_credit_store"] is pe1.credit_inbox # ── ADR-0025 D1: reverse_direction opposite-preference ─────────────── def test_reverse_direction_opposite_preference_2rank_ring(): """ADR-0025 D1: In a 2-rank bidirectional ring both E and W point to the same peer; reverse_direction must pick the OPPOSITE direction (W for E, E for W) so rx_base targets the semantically-correct slot. Concretely: rank 0 sending via E to rank 1 must target rank 1's W-rx buffer (not rank 1's E-rx), because rank 1's kernel recv(W) reads from its W-rx. """ engine, topo = _engine() cfg = load_ccl_config() merged = resolve_algorithm_config(cfg, name="ring_allreduce_tcm") merged["world_size"] = 2 install_ipcq(engine, topo.spec, merged) ipcq0 = engine._components["sip0.cube0.pe0.pe_ipcq"] ipcq1 = engine._components["sip0.cube0.pe1.pe_ipcq"] rank1_e_rx = ipcq1.queue_pairs["E"]["my_rx_base_pa"] rank1_w_rx = ipcq1.queue_pairs["W"]["my_rx_base_pa"] qp0_e = ipcq0.queue_pairs["E"] qp0_w = ipcq0.queue_pairs["W"] # rank 0's E entry should target rank 1's W-rx (opposite), NOT rank 1's E-rx. assert qp0_e["peer"].rx_base_pa == rank1_w_rx, ( f"expected rank 0's E peer.rx_base_pa == rank 1's W-rx ({rank1_w_rx:#x}), " f"got {qp0_e['peer'].rx_base_pa:#x} (matches E-rx: {rank1_e_rx:#x}) — " f"reverse_direction picked same-label instead of opposite" ) # rank 0's W entry should target rank 1's E-rx (opposite). assert qp0_w["peer"].rx_base_pa == rank1_e_rx def test_reverse_direction_opposite_preference_4rank_ring_sanity(): """ADR-0025 D1 sanity: ws>=3 ring. E and W have distinct peers, so opposite-preference produces same result as old dict-order first-match. This test should PASS both under current and post-fix code. """ engine, topo = _engine() cfg = load_ccl_config() merged = resolve_algorithm_config(cfg, name="ring_allreduce_tcm") merged["world_size"] = 4 install_ipcq(engine, topo.spec, merged) ipcq0 = engine._components["sip0.cube0.pe0.pe_ipcq"] ipcq1 = engine._components["sip0.cube0.pe1.pe_ipcq"] ipcq3 = engine._components["sip0.cube0.pe3.pe_ipcq"] # rank 0 E → rank 1 → rank 1's W-rx qp0_e = ipcq0.queue_pairs["E"] assert qp0_e["peer"].rx_base_pa == ipcq1.queue_pairs["W"]["my_rx_base_pa"] # rank 0 W → rank 3 (last in ring) → rank 3's E-rx qp0_w = ipcq0.queue_pairs["W"] assert qp0_w["peer"].rx_base_pa == ipcq3.queue_pairs["E"]["my_rx_base_pa"]