CCL allreduce: rename to lrab_hierarchical_allreduce + descriptive plots
Rename the intercube all-reduce identity to lrab_hierarchical_allreduce (module, config key, distributed test) so the name reflects both levels it implements: LRAB intra-SIP (local reduce to center root + broadcast) and the hierarchical inter-SIP topology exchange (ring/torus/mesh). ADR-0032 slug kept as the stable decision id; pure rename, no logic change. Also in this batch: - ADR-0032 (EN+KO): document the shipped center-root bidirectional reduce (doc was stale corner-root); annotate ccl.yaml root_cube as a placeholder. - Rename allreduce + pe2pe latency plots to descriptive, title-matching filenames and retitle the in-plot headings; drop overview/overview_log. - Point the PPTX image refs at the new plot names. Doc + derived-artifact + rename only; no simulation behavior changed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,7 +3,8 @@
|
||||
Parametrized over (buffer_kind, n_elem). Each case runs the standard
|
||||
config-driven allreduce app and writes a JSON row to a shared staging
|
||||
dir; the conftest sessionfinish hook (added in Phase 1) aggregates
|
||||
rows into ``docs/diagrams/allreduce_latency_plots/buffer_kind_sweep.png``.
|
||||
rows into ``docs/diagrams/allreduce_latency_plots/
|
||||
AllReduce_LRAB_2Dtorus_6SiP_2x3_with_TCM_SRAM_HBM.png``.
|
||||
|
||||
Pre-Phase-2: the three buffer-kind lines overlap exactly because slot
|
||||
access is latency-free today. Post-Phase-2 they spread out (tcm
|
||||
@@ -36,6 +37,8 @@ _ELEM_BYTES_F16 = 2
|
||||
_OUT_DIR = (Path(__file__).parent.parent / "docs" / "diagrams"
|
||||
/ "allreduce_latency_plots")
|
||||
_ROWS_DIR = _OUT_DIR / "_buffer_kind_rows"
|
||||
# Descriptive output stem (shared by the .png and .csv).
|
||||
_OUT_STEM = "AllReduce_LRAB_2Dtorus_6SiP_2x3_with_TCM_SRAM_HBM"
|
||||
|
||||
|
||||
def _bk_params():
|
||||
@@ -55,7 +58,7 @@ def test_buffer_kind_allreduce_one(tmp_path, buffer_kind, n_elem):
|
||||
sub,
|
||||
sip_topology="torus_2d",
|
||||
n_sips=6,
|
||||
algorithm="intercube_allreduce",
|
||||
algorithm="lrab_hierarchical_allreduce",
|
||||
sip_w=3, sip_h=2,
|
||||
n_elem_override=n_elem,
|
||||
)
|
||||
@@ -64,7 +67,7 @@ def test_buffer_kind_allreduce_one(tmp_path, buffer_kind, n_elem):
|
||||
ccl_cfg = yaml.safe_load(f)
|
||||
ccl_cfg.setdefault("defaults", {})["buffer_kind"] = buffer_kind
|
||||
ccl_cfg.setdefault("algorithms", {}).setdefault(
|
||||
"intercube_allreduce", {},
|
||||
"lrab_hierarchical_allreduce", {},
|
||||
)["buffer_kind"] = buffer_kind
|
||||
with open(ccl_path, "w") as f:
|
||||
yaml.dump(ccl_cfg, f, default_flow_style=False)
|
||||
@@ -81,7 +84,7 @@ def test_buffer_kind_allreduce_one(tmp_path, buffer_kind, n_elem):
|
||||
) as ctx:
|
||||
result = run_allreduce(
|
||||
ctx, engine, spec,
|
||||
algorithm="intercube_allreduce", ccl_yaml=ccl_path,
|
||||
algorithm="lrab_hierarchical_allreduce", ccl_yaml=ccl_path,
|
||||
)
|
||||
assert result["ok_cubes"] > 0
|
||||
|
||||
@@ -108,7 +111,7 @@ def test_buffer_kind_allreduce_one(tmp_path, buffer_kind, n_elem):
|
||||
|
||||
|
||||
def aggregate_buffer_kind_plot() -> bool:
|
||||
"""Read per-config rows and emit buffer_kind_sweep.png + CSV.
|
||||
"""Read per-config rows and emit the descriptive .png + .csv (_OUT_STEM).
|
||||
|
||||
Called from conftest.pytest_sessionfinish (controller-only).
|
||||
Returns True if rows were aggregated.
|
||||
@@ -141,7 +144,7 @@ def aggregate_buffer_kind_plot() -> bool:
|
||||
_bytes_fmt = FuncFormatter(_fmt_bytes)
|
||||
|
||||
_OUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
with open(_OUT_DIR / "buffer_kind_sweep.csv", "w",
|
||||
with open(_OUT_DIR / f"{_OUT_STEM}.csv", "w",
|
||||
newline="", encoding="utf-8") as f:
|
||||
w = csv.DictWriter(f, fieldnames=[
|
||||
"buffer_kind", "sip_topology", "n_sips", "n_elem",
|
||||
@@ -172,13 +175,13 @@ def aggregate_buffer_kind_plot() -> bool:
|
||||
ax.set_xlabel("Bytes per PE (log scale)")
|
||||
ax.set_ylabel("Time (ns)")
|
||||
ax.set_title(
|
||||
"Allreduce torus_2d (6 SIPs, 3×2) — IPCQ slot memory tier"
|
||||
"AllReduce_LRAB_2Dtorus_6SiP(2x3) — IPCQ memory (SRAM, TCM, HBM)"
|
||||
)
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.legend()
|
||||
ax.xaxis.set_major_formatter(_bytes_fmt)
|
||||
fig.tight_layout()
|
||||
fig.savefig(_OUT_DIR / "buffer_kind_sweep.png", dpi=130)
|
||||
fig.savefig(_OUT_DIR / f"{_OUT_STEM}.png", dpi=130)
|
||||
plt.close(fig)
|
||||
|
||||
for p in row_files:
|
||||
@@ -191,6 +194,6 @@ def aggregate_buffer_kind_plot() -> bool:
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
print(f"\nWrote {_OUT_DIR / 'buffer_kind_sweep.png'} "
|
||||
print(f"\nWrote {_OUT_DIR / f'{_OUT_STEM}.png'} "
|
||||
f"from {len(records)} rows")
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user