b3ca532023
Add 5 of the 6 figure renderers ADR-0057 D3 sub-cycle 4c specifies:
- gqa_op_log_{panel}.png × 4 — per-panel bar chart of the 5 op_log
counts (gemm, ipcq_send, ipcq_recv, dma_read, dma_write).
- gqa_comparison.png — cross-panel grouped bars over the same 5 series.
Sixth figure (gqa_scaling.png) depends on sub-cycle 4b's Q/cube ∈
{1, 2, 4} sweep on multi_user_* panels and is deferred until that
data exists; emit_all_gqa_plots returns just the 5 in-scope paths.
Add MILESTONE_FAST=1 mode to run(): skip the panel sweep, reuse the
committed sweep.json, render figures only. Validation mode unchanged.
The runtime errors clearly when neither env var is set, listing the
two supported modes.
Renderers live in the bench module (the milestone-1h-gemm pattern);
tests/gqa/_gqa_plot_helpers.py re-exports them for figure tests.
Tests: tests/gqa/test_plot_gqa_figures.py — 7 tests, all green:
- 4 parametrized per-panel emit assertions
- 1 comparison emit assertion
- 1 emit_all returns exactly 5 PNG paths
- 1 default out_dir matches the bench _OUTPUT_DIR
Commits the 5 PNG baselines under the bench output dir alongside
sweep.json, mirroring milestone-1h-gemm's committed-figures pattern.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
110 lines
3.6 KiB
Python
110 lines
3.6 KiB
Python
"""Phase 1 spec test for GQA figure renderers (sub-cycle 4c).
|
|
|
|
ADR-0057 D3 sub-cycle 4c adds 6 figure renderers; this test pins the
|
|
5 of 6 that don't depend on sub-cycle 4b's Q/cube sweep:
|
|
|
|
- 4 per-panel op_log_summary PNGs (one per panel of v1's sweep.json)
|
|
- 1 cross-panel ``gqa_comparison.png`` (4-panel grouped bars over the
|
|
5 op_log_summary counts: gemm, ipcq_send, ipcq_recv, dma_read, dma_write)
|
|
|
|
The 6th, ``gqa_scaling.png``, needs the Q/cube ∈ {1, 2, 4} sweep from
|
|
sub-cycle 4b and is deferred.
|
|
|
|
Each test depends on the committed
|
|
``benches/1H_milestone_output/gqa/sweep.json`` (landed in commit
|
|
``e748a62``); they assert the renderer writes a non-empty PNG at the
|
|
expected path.
|
|
|
|
Phase 1 expectation: tests fail at import (renderer functions don't
|
|
exist yet on the bench module). Phase 2 lands them and the tests
|
|
turn green.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from tests.gqa._gqa_plot_helpers import (
|
|
GQA_PLOTS_DIR,
|
|
GQA_SWEEP_JSON,
|
|
emit_all_gqa_plots,
|
|
emit_gqa_comparison,
|
|
emit_panel_op_log_summary,
|
|
)
|
|
|
|
|
|
_PANELS = (
|
|
"single_user_prefill",
|
|
"multi_user_prefill",
|
|
"single_user_decode",
|
|
"multi_user_decode",
|
|
)
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
not GQA_SWEEP_JSON.exists(),
|
|
reason="gqa sweep.json absent; run milestone-gqa-llama70b first",
|
|
)
|
|
@pytest.mark.parametrize("panel", _PANELS)
|
|
def test_emit_panel_op_log_summary_writes_png_for_each_panel(panel):
|
|
out = emit_panel_op_log_summary(panel)
|
|
assert out is not None, f"{panel}: renderer returned None"
|
|
path = Path(out)
|
|
assert path.exists(), f"{panel}: expected PNG at {path}"
|
|
assert path.suffix == ".png", f"{panel}: not a PNG: {path}"
|
|
assert path.stat().st_size > 0, f"{panel}: empty PNG: {path}"
|
|
assert panel in path.stem, (
|
|
f"{panel}: panel name not in filename {path.name}"
|
|
)
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
not GQA_SWEEP_JSON.exists(),
|
|
reason="gqa sweep.json absent; run milestone-gqa-llama70b first",
|
|
)
|
|
def test_emit_gqa_comparison_writes_png():
|
|
out = emit_gqa_comparison()
|
|
assert out is not None
|
|
path = Path(out)
|
|
assert path.exists()
|
|
assert path.name == "gqa_comparison.png"
|
|
assert path.stat().st_size > 0
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
not GQA_SWEEP_JSON.exists(),
|
|
reason="gqa sweep.json absent; run milestone-gqa-llama70b first",
|
|
)
|
|
def test_emit_all_gqa_plots_writes_five_figures():
|
|
"""emit_all returns a list of 5 written PNG paths (deferring the
|
|
6th gqa_scaling.png to after sub-cycle 4b lands the Q/cube sweep)."""
|
|
paths = emit_all_gqa_plots()
|
|
assert isinstance(paths, list)
|
|
# 4 per-panel + 1 comparison.
|
|
assert len(paths) == 5, f"expected 5 PNGs, got {len(paths)}: {paths}"
|
|
for p in paths:
|
|
assert Path(p).exists() and Path(p).stat().st_size > 0
|
|
names = {Path(p).name for p in paths}
|
|
assert "gqa_comparison.png" in names
|
|
for panel in _PANELS:
|
|
assert any(panel in n for n in names), (
|
|
f"no per-panel PNG for {panel} in {names}"
|
|
)
|
|
|
|
|
|
def test_emit_all_gqa_plots_output_dir_matches_bench_output_dir():
|
|
"""The renderers must write under the bench's own _OUTPUT_DIR so
|
|
MILESTONE_FAST=1 reuse (and committed baselines) all point at the
|
|
same on-disk location."""
|
|
# Stub assertion that fails until emit_all_gqa_plots exists with a
|
|
# default ``out_dir`` argument identical to GQA_PLOTS_DIR.
|
|
import inspect
|
|
|
|
sig = inspect.signature(emit_all_gqa_plots)
|
|
assert "out_dir" in sig.parameters
|
|
default = sig.parameters["out_dir"].default
|
|
assert Path(default) == GQA_PLOTS_DIR, (
|
|
f"default out_dir {default} != bench _OUTPUT_DIR {GQA_PLOTS_DIR}"
|
|
)
|