ADR-0032 + intra_* opposite directions in IPCQ install
Add intra_N/S/E/W to install.py _OPPOSITE_DIR table so the intra-cube PE-to-PE namespace is symmetrical with intercube N/S/E/W. ADR-0032 documents the intercube allreduce algorithm (supersedes ADR-0029). Refresh ADR-0024/0025/0029 cross-refs and update test_intercube_sfr_config.py to cover the new intra_* mappings. Drop the obsolete test_ccl_round_robin_recv.py (replaced by intercube tests). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,48 +0,0 @@
|
||||
"""Test that tl.recv() (no direction) works under the mock runtime
|
||||
and the SimPy PE_IPCQ component (ADR-0023 D4 weak fairness)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from kernbench.ccl.testing import run_kernel_in_mock
|
||||
|
||||
|
||||
def kernel_round_robin(t_ptr, n_elem, tl):
|
||||
"""Each PE sends one tile E then receives N-1 tiles via round-robin.
|
||||
Uses TensorHandle math (PE_MATH) so Phase 2 produces correct HBM
|
||||
contents under SimPy + op_log replay."""
|
||||
rank = tl.program_id(axis=0)
|
||||
world_size = tl.num_programs(axis=0)
|
||||
nbytes = n_elem * 2
|
||||
|
||||
pe_addr = t_ptr + rank * nbytes
|
||||
acc = tl.load(pe_addr, shape=(n_elem,), dtype="f16")
|
||||
current = acc
|
||||
|
||||
for _step in range(world_size - 1):
|
||||
tl.send(dir="E", src=current)
|
||||
# No direction → round-robin
|
||||
recv = tl.recv(shape=(n_elem,), dtype="f16")
|
||||
acc = acc + recv
|
||||
current = recv # forward W's tile to E next round
|
||||
|
||||
tl.store(pe_addr, acc)
|
||||
|
||||
|
||||
def test_round_robin_recv_mock_runtime():
|
||||
n_elem = 8
|
||||
inputs = [
|
||||
np.full((n_elem,), float(r + 1), dtype=np.float16)
|
||||
for r in range(4)
|
||||
]
|
||||
expected = sum(inputs) # [10,...]
|
||||
|
||||
outputs = run_kernel_in_mock(
|
||||
kernel_fn=kernel_round_robin,
|
||||
world_size=4,
|
||||
topology="ring_1d",
|
||||
inputs=inputs,
|
||||
kernel_args=(n_elem,),
|
||||
)
|
||||
for r in range(4):
|
||||
assert np.allclose(outputs[r], expected)
|
||||
@@ -1,8 +1,9 @@
|
||||
"""Tests for configure_sfr_intercube_multisip neighbor table wiring.
|
||||
|
||||
Verifies that IPCQ neighbor tables are correctly installed for
|
||||
intercube (pe0, 4×4 mesh N/S/E/W) + inter-SIP (pe0, all cubes,
|
||||
global_E/global_W) communication.
|
||||
Verifies full IPCQ hardware wiring (independent of DPPolicy):
|
||||
- intra-cube (2×4 PE grid) → intra_N/S/E/W
|
||||
- intercube same-lane → N/S/E/W
|
||||
- inter-SIP same-(cube, pe) → global_N/S/E/W
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -16,6 +17,7 @@ from kernbench.topology.builder import resolve_topology
|
||||
TOPOLOGY_PATH = Path(__file__).parent.parent / "topology.yaml"
|
||||
|
||||
N_CUBES = 16
|
||||
PES_PER_CUBE = 8
|
||||
|
||||
|
||||
def _engine_and_spec():
|
||||
@@ -36,78 +38,102 @@ class TestConfigureSfrNeighborTables:
|
||||
plan = configure_sfr_intercube_multisip(engine, spec, cfg)
|
||||
|
||||
n_sips = int(spec["system"]["sips"]["count"])
|
||||
assert plan["world_size"] == n_sips * N_CUBES
|
||||
assert len(plan["rank_to_pe"]) == n_sips * N_CUBES
|
||||
for pe_idx, (sip, cube, pe) in enumerate(plan["rank_to_pe"]):
|
||||
assert pe == 0, f"pe_idx {pe_idx}: pe must be 0, got {pe}"
|
||||
expected = n_sips * N_CUBES * PES_PER_CUBE
|
||||
assert plan["world_size"] == expected
|
||||
assert len(plan["rank_to_pe"]) == expected
|
||||
|
||||
def test_corner_cube0_has_E_and_S_only(self):
|
||||
"""Cube 0 (row=0, col=0) is NW corner: only E and S neighbors."""
|
||||
# ── Intra-cube (intra_N/S/E/W) ────────────────────────────────
|
||||
|
||||
def test_pe0_intra_cube_has_intra_E_and_intra_S(self):
|
||||
"""pe0 is NW of the 2×4 PE grid: intra_E=pe1, intra_S=pe4."""
|
||||
engine, spec = _engine_and_spec()
|
||||
cfg = _merged_cfg()
|
||||
configure_sfr_intercube_multisip(engine, spec, cfg)
|
||||
|
||||
ipcq = engine._components["sip0.cube0.pe0.pe_ipcq"]
|
||||
qp = ipcq.queue_pairs
|
||||
assert "E" in qp, "cube 0 must have E neighbor"
|
||||
assert "S" in qp, "cube 0 must have S neighbor"
|
||||
assert "W" not in qp, "cube 0 (col=0) must NOT have W neighbor"
|
||||
assert "N" not in qp, "cube 0 (row=0) must NOT have N neighbor"
|
||||
qp = engine._components["sip0.cube0.pe0.pe_ipcq"].queue_pairs
|
||||
assert "intra_E" in qp
|
||||
assert qp["intra_E"]["peer"].pe == 1
|
||||
assert "intra_S" in qp
|
||||
assert qp["intra_S"]["peer"].pe == 4
|
||||
assert "intra_W" not in qp
|
||||
assert "intra_N" not in qp
|
||||
|
||||
def test_pe5_intra_cube_has_all_four(self):
|
||||
"""pe5 (row=1, col=1 in 2×4 grid) has all 4 intra directions.
|
||||
|
||||
Intra neighbors: intra_N=pe1, intra_E=pe6, intra_W=pe4,
|
||||
intra_S not present (row=1 is bottom row).
|
||||
"""
|
||||
engine, spec = _engine_and_spec()
|
||||
cfg = _merged_cfg()
|
||||
configure_sfr_intercube_multisip(engine, spec, cfg)
|
||||
|
||||
qp = engine._components["sip0.cube0.pe5.pe_ipcq"].queue_pairs
|
||||
assert qp["intra_N"]["peer"].pe == 1
|
||||
assert qp["intra_E"]["peer"].pe == 6
|
||||
assert qp["intra_W"]["peer"].pe == 4
|
||||
assert "intra_S" not in qp # bottom row
|
||||
|
||||
# ── Intercube same-lane (N/S/E/W) ─────────────────────────────
|
||||
|
||||
def test_corner_cube0_pe0_has_intercube_E_and_S(self):
|
||||
"""Cube 0 (NW mesh corner): intercube E→cube1, S→cube4."""
|
||||
engine, spec = _engine_and_spec()
|
||||
cfg = _merged_cfg()
|
||||
configure_sfr_intercube_multisip(engine, spec, cfg)
|
||||
|
||||
qp = engine._components["sip0.cube0.pe0.pe_ipcq"].queue_pairs
|
||||
assert qp["E"]["peer"].cube == 1
|
||||
assert qp["E"]["peer"].pe == 0 # same-lane
|
||||
assert qp["S"]["peer"].cube == 4
|
||||
assert qp["S"]["peer"].pe == 0
|
||||
assert "W" not in qp, "cube 0 has no west neighbor"
|
||||
assert "N" not in qp, "cube 0 has no north neighbor"
|
||||
|
||||
def test_interior_cube5_has_all_four(self):
|
||||
"""Cube 5 (row=1, col=1) is interior: N/S/E/W all present."""
|
||||
def test_interior_cube5_pe3_has_all_four_intercube_same_lane(self):
|
||||
"""Cube 5 interior, pe3: intercube N/S/E/W all present, same-lane."""
|
||||
engine, spec = _engine_and_spec()
|
||||
cfg = _merged_cfg()
|
||||
configure_sfr_intercube_multisip(engine, spec, cfg)
|
||||
|
||||
ipcq = engine._components["sip0.cube5.pe0.pe_ipcq"]
|
||||
qp = ipcq.queue_pairs
|
||||
assert qp["N"]["peer"].cube == 1
|
||||
assert qp["S"]["peer"].cube == 9
|
||||
assert qp["E"]["peer"].cube == 6
|
||||
assert qp["W"]["peer"].cube == 4
|
||||
qp = engine._components["sip0.cube5.pe3.pe_ipcq"].queue_pairs
|
||||
for d, expected_cube in [("N", 1), ("S", 9), ("E", 6), ("W", 4)]:
|
||||
assert qp[d]["peer"].cube == expected_cube
|
||||
assert qp[d]["peer"].pe == 3 # same-lane
|
||||
|
||||
def test_root_cube15_has_inter_sip(self):
|
||||
"""Cube 15 (root, SE corner) has N, W + global_E/global_W."""
|
||||
def test_all_pes_have_intercube_wiring(self):
|
||||
"""Every PE on every interior cube has intercube same-lane wiring."""
|
||||
engine, spec = _engine_and_spec()
|
||||
cfg = _merged_cfg()
|
||||
configure_sfr_intercube_multisip(engine, spec, cfg)
|
||||
|
||||
ipcq0 = engine._components["sip0.cube15.pe0.pe_ipcq"]
|
||||
qp0 = ipcq0.queue_pairs
|
||||
assert "N" in qp0
|
||||
assert "W" in qp0
|
||||
assert "E" not in qp0, "cube 15 (col=3) must NOT have E"
|
||||
assert "S" not in qp0, "cube 15 (row=3) must NOT have S"
|
||||
assert "global_E" in qp0, "root cube must have global_E"
|
||||
assert "global_W" in qp0, "root cube must have global_W"
|
||||
assert qp0["global_E"]["peer"].sip == 1
|
||||
assert qp0["global_E"]["peer"].cube == 15
|
||||
|
||||
ipcq1 = engine._components["sip1.cube15.pe0.pe_ipcq"]
|
||||
qp1 = ipcq1.queue_pairs
|
||||
assert qp1["global_E"]["peer"].sip == 0
|
||||
assert qp1["global_E"]["peer"].cube == 15
|
||||
|
||||
def test_all_cubes_have_inter_sip(self):
|
||||
"""ALL cubes (not just root) are wired for inter-SIP."""
|
||||
engine, spec = _engine_and_spec()
|
||||
cfg = _merged_cfg()
|
||||
configure_sfr_intercube_multisip(engine, spec, cfg)
|
||||
|
||||
root_cube = int(cfg.get("root_cube", N_CUBES - 1))
|
||||
for cube_id in range(N_CUBES):
|
||||
ipcq = engine._components[f"sip0.cube{cube_id}.pe0.pe_ipcq"]
|
||||
qp = ipcq.queue_pairs
|
||||
assert "global_E" in qp, (
|
||||
f"sip0.cube{cube_id}.pe0 missing global_E"
|
||||
)
|
||||
assert "global_W" in qp, (
|
||||
f"sip0.cube{cube_id}.pe0 missing global_W"
|
||||
)
|
||||
if cube_id == root_cube:
|
||||
assert qp["global_E"]["peer"].sip != 0, (
|
||||
f"root cube {root_cube} global_E must point to another SIP"
|
||||
# Interior cube 5: every PE should have N/S/E/W same-lane.
|
||||
for pe in range(PES_PER_CUBE):
|
||||
qp = engine._components[f"sip0.cube5.pe{pe}.pe_ipcq"].queue_pairs
|
||||
for d in ("N", "S", "E", "W"):
|
||||
assert d in qp, f"sip0.cube5.pe{pe} missing intercube {d}"
|
||||
assert qp[d]["peer"].pe == pe, (
|
||||
f"sip0.cube5.pe{pe} {d} not same-lane"
|
||||
)
|
||||
|
||||
# ── Inter-SIP (global_*) ──────────────────────────────────────
|
||||
|
||||
def test_every_pe_on_every_cube_has_inter_sip(self):
|
||||
"""All PEs on all cubes wired for inter-SIP via global_*."""
|
||||
engine, spec = _engine_and_spec()
|
||||
cfg = _merged_cfg()
|
||||
configure_sfr_intercube_multisip(engine, spec, cfg)
|
||||
|
||||
for cube_id in range(N_CUBES):
|
||||
for pe in range(PES_PER_CUBE):
|
||||
qp = engine._components[
|
||||
f"sip0.cube{cube_id}.pe{pe}.pe_ipcq"
|
||||
].queue_pairs
|
||||
assert "global_E" in qp, (
|
||||
f"sip0.cube{cube_id}.pe{pe} missing global_E"
|
||||
)
|
||||
assert "global_W" in qp
|
||||
# Peer must be same (cube, pe) on another SIP.
|
||||
assert qp["global_E"]["peer"].sip == 1
|
||||
assert qp["global_E"]["peer"].cube == cube_id
|
||||
assert qp["global_E"]["peer"].pe == pe
|
||||
|
||||
Reference in New Issue
Block a user