Rectangular SIP topology + 6-device allreduce sweep
mesh_2d, torus_2d, and mesh_2d_no_wrap accept optional w,h kwargs; sqrt fall-back preserved for square layouts (back-compat tests confirm 4-SIP and 9-SIP square configs still work). sfr_config reads system.sips.w/h from spec and threads dims through to the topology fn. test_allreduce_multidevice CONFIGS switched from 4 SIPs (square) to 6 SIPs: ring_1d_6sip, torus_2d_6sip_2x3, mesh_2d_no_wrap_6sip_2x3. _write_temp_configs writes system.sips.w/h when supplied; _sip_topo_dims reads them back. Latency sweep loop also moved to 6-SIP layouts. Linear-scale plot variants dropped -- only log-scale *.png + summary.csv emitted. Plots in tests/allreduce_latency_plots regenerated. New tests/test_sip_topology_rectangular.py asserts neighbor correctness for 2x3 layouts and back-compat for square fallback. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,22 +1,24 @@
|
||||
"""SFR configuration for intercube + inter-SIP IPCQ wiring.
|
||||
"""SFR configuration for the full IPCQ hardware wiring.
|
||||
|
||||
Provides ``configure_sfr_intercube_multisip`` which programs PE_IPCQ
|
||||
neighbor tables for:
|
||||
Installs PE_IPCQ neighbor tables modeling the physical hardware.
|
||||
Wiring is independent of DPPolicy / kernel choice — the kernel decides
|
||||
at runtime which links to use.
|
||||
|
||||
1. Intercube within each SIP — pe0 of every cube connects to pe0 of
|
||||
its N/S/E/W mesh neighbors (no wrap-around).
|
||||
2. Inter-SIP on ALL cubes — pe0 of cube_c on sip_A connects to pe0 of
|
||||
cube_c on each peer SIP, using ``global_E``/``global_W`` (ring) or
|
||||
``global_N``/``global_S``/``global_E``/``global_W`` (mesh/torus)
|
||||
direction labels. Wiring all cubes allows the kernel to
|
||||
dynamically elect the root cube at runtime.
|
||||
Direction label namespaces (disjoint):
|
||||
|
||||
SIP-level topology is read from ``topology.yaml`` →
|
||||
``system.sips.topology`` (e.g. ``ring_1d``, ``mesh_2d``).
|
||||
Intercube mesh dimensions come from ``sip.cube_mesh.w/h``.
|
||||
- Intra-cube PE-to-PE: ``intra_N / intra_S / intra_E / intra_W``
|
||||
Logical 2×4 PE grid within a cube (no wrap):
|
||||
|
||||
Internally delegates to ``install_ipcq`` with a computed ``rank_to_pe``
|
||||
(pe0-only) and a closure-captured ``neighbors()`` function.
|
||||
Row 0: pe0 pe1 pe2 pe3
|
||||
Row 1: pe4 pe5 pe6 pe7
|
||||
|
||||
- Intercube same-lane: ``N / S / E / W``
|
||||
``pe_i of cube_A ↔ pe_i of cube_B`` across the 4×4 cube mesh
|
||||
(no wrap). Every PE i ∈ [0..7] wired independently.
|
||||
|
||||
- Inter-SIP same-(cube, pe): ``global_N / global_S / global_E / global_W``
|
||||
``pe_i of cube_c on sip_A ↔ pe_i of cube_c on sip_B`` per
|
||||
``topology.yaml → system.sips.topology``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -27,12 +29,46 @@ from kernbench.ccl.install import install_ipcq
|
||||
from kernbench.ccl.topologies import _BUILTIN as _TOPO_BUILTINS
|
||||
|
||||
|
||||
# ── Intra-cube 2×4 PE grid ───────────────────────────────────────────
|
||||
|
||||
_PE_GRID_COLS = 4
|
||||
_PE_GRID_ROWS = 2
|
||||
_PES_PER_CUBE = _PE_GRID_COLS * _PE_GRID_ROWS # 8
|
||||
|
||||
|
||||
def _intra_cube_neighbors(pe: int) -> dict[str, int]:
|
||||
"""Logical 2×4 PE grid neighbors within a cube (no wrap).
|
||||
|
||||
Returns directions in the ``intra_*`` namespace.
|
||||
"""
|
||||
row, col = divmod(pe, _PE_GRID_COLS)
|
||||
nbrs: dict[str, int] = {}
|
||||
if col < _PE_GRID_COLS - 1:
|
||||
nbrs["intra_E"] = row * _PE_GRID_COLS + (col + 1)
|
||||
if col > 0:
|
||||
nbrs["intra_W"] = row * _PE_GRID_COLS + (col - 1)
|
||||
if row < _PE_GRID_ROWS - 1:
|
||||
nbrs["intra_S"] = (row + 1) * _PE_GRID_COLS + col
|
||||
if row > 0:
|
||||
nbrs["intra_N"] = (row - 1) * _PE_GRID_COLS + col
|
||||
return nbrs
|
||||
|
||||
|
||||
# ── Public entry point ───────────────────────────────────────────────
|
||||
|
||||
|
||||
def configure_sfr_intercube_multisip(
|
||||
engine: Any,
|
||||
spec: dict,
|
||||
cfg: dict,
|
||||
) -> dict[str, Any]:
|
||||
"""Wire IPCQ for intercube (pe0, mesh) + inter-SIP (pe0, all cubes).
|
||||
"""Wire the full IPCQ hardware model.
|
||||
|
||||
Every PE on every cube on every SIP gets neighbor table entries for:
|
||||
|
||||
- intra-cube (2×4 grid) in the ``intra_*`` namespace
|
||||
- intercube same-lane (4×4 cube mesh, no wrap) in ``N/S/E/W``
|
||||
- inter-SIP same-(cube, pe) in ``global_*``
|
||||
|
||||
Args:
|
||||
engine: GraphEngine with ``_components``.
|
||||
@@ -46,48 +82,71 @@ def configure_sfr_intercube_multisip(
|
||||
mesh_w = int(cm["w"])
|
||||
mesh_h = int(cm["h"])
|
||||
n_cubes = mesh_w * mesh_h
|
||||
n_sips = int(spec.get("system", {}).get("sips", {}).get("count", 1))
|
||||
sip_topology = str(
|
||||
spec.get("system", {}).get("sips", {}).get("topology", "ring_1d")
|
||||
)
|
||||
sips_cfg = spec.get("system", {}).get("sips", {})
|
||||
n_sips = int(sips_cfg.get("count", 1))
|
||||
sip_topology = str(sips_cfg.get("topology", "ring_1d"))
|
||||
sip_w = sips_cfg.get("w")
|
||||
sip_h = sips_cfg.get("h")
|
||||
sip_w = int(sip_w) if sip_w is not None else None
|
||||
sip_h = int(sip_h) if sip_h is not None else None
|
||||
|
||||
if sip_topology not in _TOPO_BUILTINS:
|
||||
raise ValueError(
|
||||
f"Unknown sip topology '{sip_topology}'. "
|
||||
f"Available: {list(_TOPO_BUILTINS)}"
|
||||
)
|
||||
sip_topo_fn = _TOPO_BUILTINS[sip_topology]
|
||||
_sip_topo_fn_raw = _TOPO_BUILTINS[sip_topology]
|
||||
|
||||
world_size = n_sips * n_cubes
|
||||
def sip_topo_fn(rank: int, ws: int) -> dict:
|
||||
if sip_w is not None and sip_h is not None:
|
||||
try:
|
||||
return _sip_topo_fn_raw(rank, ws, w=sip_w, h=sip_h)
|
||||
except TypeError:
|
||||
pass
|
||||
return _sip_topo_fn_raw(rank, ws)
|
||||
|
||||
pes_per_cube = _PES_PER_CUBE
|
||||
world_size = n_sips * n_cubes * pes_per_cube
|
||||
pe_idx_to_pe: list[tuple[int, int, int]] = [
|
||||
(sip, cube, 0)
|
||||
(sip, cube, pe)
|
||||
for sip in range(n_sips)
|
||||
for cube in range(n_cubes)
|
||||
for pe in range(pes_per_cube)
|
||||
]
|
||||
|
||||
def _pe_idx(sip: int, cube: int, pe: int) -> int:
|
||||
return (sip * n_cubes + cube) * pes_per_cube + pe
|
||||
|
||||
def _neighbors(pe_idx: int, ws: int, _base: dict) -> dict[str, int]:
|
||||
sip = pe_idx // n_cubes
|
||||
cube = pe_idx % n_cubes
|
||||
tmp = pe_idx
|
||||
pe = tmp % pes_per_cube
|
||||
tmp //= pes_per_cube
|
||||
cube = tmp % n_cubes
|
||||
sip = tmp // n_cubes
|
||||
row = cube // mesh_w
|
||||
col = cube % mesh_w
|
||||
|
||||
nbrs: dict[str, int] = {}
|
||||
|
||||
# Intercube within SIP (mesh, no wrap-around)
|
||||
if col < mesh_w - 1:
|
||||
nbrs["E"] = sip * n_cubes + (row * mesh_w + col + 1)
|
||||
if col > 0:
|
||||
nbrs["W"] = sip * n_cubes + (row * mesh_w + col - 1)
|
||||
if row < mesh_h - 1:
|
||||
nbrs["S"] = sip * n_cubes + ((row + 1) * mesh_w + col)
|
||||
if row > 0:
|
||||
nbrs["N"] = sip * n_cubes + ((row - 1) * mesh_w + col)
|
||||
# ── Intra-cube (intra_N/S/E/W) ──
|
||||
for d, peer_pe in _intra_cube_neighbors(pe).items():
|
||||
nbrs[d] = _pe_idx(sip, cube, peer_pe)
|
||||
|
||||
# Inter-SIP on ALL cubes
|
||||
# ── Intercube same-lane (N/S/E/W, 4×4 no wrap) ──
|
||||
if col < mesh_w - 1:
|
||||
nbrs["E"] = _pe_idx(sip, row * mesh_w + (col + 1), pe)
|
||||
if col > 0:
|
||||
nbrs["W"] = _pe_idx(sip, row * mesh_w + (col - 1), pe)
|
||||
if row < mesh_h - 1:
|
||||
nbrs["S"] = _pe_idx(sip, (row + 1) * mesh_w + col, pe)
|
||||
if row > 0:
|
||||
nbrs["N"] = _pe_idx(sip, (row - 1) * mesh_w + col, pe)
|
||||
|
||||
# ── Inter-SIP same-(cube, pe) (global_*) ──
|
||||
if n_sips > 1:
|
||||
sip_nbrs = sip_topo_fn(sip, n_sips)
|
||||
for d, peer_sip in sip_nbrs.items():
|
||||
nbrs[f"global_{d}"] = peer_sip * n_cubes + cube
|
||||
nbrs[f"global_{d}"] = _pe_idx(peer_sip, cube, pe)
|
||||
|
||||
return nbrs
|
||||
|
||||
|
||||
@@ -33,23 +33,41 @@ def ring_1d_unidir(rank: int, world_size: int) -> NeighborMap:
|
||||
return {"E": (rank + 1) % world_size}
|
||||
|
||||
|
||||
def mesh_2d(rank: int, world_size: int) -> NeighborMap:
|
||||
"""Square 2D mesh (N/S/E/W).
|
||||
|
||||
Layout: rank = row * side + col, with side = sqrt(world_size).
|
||||
Wrap-around (torus) on all four edges.
|
||||
"""
|
||||
def _resolve_2d_dims(
|
||||
world_size: int, w: int | None, h: int | None, name: str,
|
||||
) -> tuple[int, int]:
|
||||
if w is not None and h is not None:
|
||||
if w * h != world_size:
|
||||
raise ValueError(
|
||||
f"{name}: w*h ({w}*{h}) != world_size ({world_size})"
|
||||
)
|
||||
return w, h
|
||||
side = int(round(world_size ** 0.5))
|
||||
if side * side != world_size:
|
||||
raise ValueError(
|
||||
f"mesh_2d requires square world_size, got {world_size}"
|
||||
f"{name} requires square world_size or explicit w,h, "
|
||||
f"got {world_size}"
|
||||
)
|
||||
r, c = divmod(rank, side)
|
||||
return side, side
|
||||
|
||||
|
||||
def mesh_2d(
|
||||
rank: int, world_size: int,
|
||||
w: int | None = None, h: int | None = None,
|
||||
) -> NeighborMap:
|
||||
"""2D mesh (N/S/E/W) with wrap-around on all four edges.
|
||||
|
||||
Layout: rank = row * w + col. When w, h are given, supports
|
||||
rectangular (e.g. 2x3) layouts. Otherwise falls back to square
|
||||
side = sqrt(world_size).
|
||||
"""
|
||||
w, h = _resolve_2d_dims(world_size, w, h, "mesh_2d")
|
||||
r, c = divmod(rank, w)
|
||||
return {
|
||||
"N": ((r - 1) % side) * side + c,
|
||||
"S": ((r + 1) % side) * side + c,
|
||||
"W": r * side + (c - 1) % side,
|
||||
"E": r * side + (c + 1) % side,
|
||||
"N": ((r - 1) % h) * w + c,
|
||||
"S": ((r + 1) % h) * w + c,
|
||||
"W": r * w + (c - 1) % w,
|
||||
"E": r * w + (c + 1) % w,
|
||||
}
|
||||
|
||||
|
||||
@@ -73,36 +91,30 @@ def tree_binary(rank: int, world_size: int) -> NeighborMap:
|
||||
return n
|
||||
|
||||
|
||||
def torus_2d(rank: int, world_size: int) -> NeighborMap:
|
||||
"""Square 2D torus (N/S/E/W) with wrap-around on all edges.
|
||||
|
||||
Alias for mesh_2d (which already wraps). Explicit name for clarity
|
||||
when used as a SIP-level topology.
|
||||
"""
|
||||
return mesh_2d(rank, world_size)
|
||||
def torus_2d(
|
||||
rank: int, world_size: int,
|
||||
w: int | None = None, h: int | None = None,
|
||||
) -> NeighborMap:
|
||||
"""2D torus (N/S/E/W) with wrap-around on all edges. Alias for mesh_2d."""
|
||||
return mesh_2d(rank, world_size, w=w, h=h)
|
||||
|
||||
|
||||
def mesh_2d_no_wrap(rank: int, world_size: int) -> NeighborMap:
|
||||
"""Square 2D mesh (N/S/E/W) WITHOUT wrap-around.
|
||||
|
||||
Edge nodes have fewer neighbors (no wrapping). Used for SIP-level
|
||||
topologies where physical links don't wrap.
|
||||
"""
|
||||
side = int(round(world_size ** 0.5))
|
||||
if side * side != world_size:
|
||||
raise ValueError(
|
||||
f"mesh_2d_no_wrap requires square world_size, got {world_size}"
|
||||
)
|
||||
r, c = divmod(rank, side)
|
||||
def mesh_2d_no_wrap(
|
||||
rank: int, world_size: int,
|
||||
w: int | None = None, h: int | None = None,
|
||||
) -> NeighborMap:
|
||||
"""2D mesh (N/S/E/W) WITHOUT wrap-around. Supports rectangular dims."""
|
||||
w, h = _resolve_2d_dims(world_size, w, h, "mesh_2d_no_wrap")
|
||||
r, c = divmod(rank, w)
|
||||
n: NeighborMap = {}
|
||||
if r > 0:
|
||||
n["N"] = (r - 1) * side + c
|
||||
if r < side - 1:
|
||||
n["S"] = (r + 1) * side + c
|
||||
n["N"] = (r - 1) * w + c
|
||||
if r < h - 1:
|
||||
n["S"] = (r + 1) * w + c
|
||||
if c > 0:
|
||||
n["W"] = r * side + (c - 1)
|
||||
if c < side - 1:
|
||||
n["E"] = r * side + (c + 1)
|
||||
n["W"] = r * w + (c - 1)
|
||||
if c < w - 1:
|
||||
n["E"] = r * w + (c + 1)
|
||||
return n
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user