kernbench2/tests/gqa/test_plot_gqa_figures.py

"""Phase 1 spec test for GQA figure renderers (sub-cycle 4c).

ADR-0057 D3 sub-cycle 4c adds 6 figure renderers; this test pins the
5 of 6 that don't depend on sub-cycle 4b's Q/cube sweep:

  - 4 per-panel op_log_summary PNGs (one per panel of v1's sweep.json)
  - 1 cross-panel ``gqa_comparison.png`` (4-panel grouped bars over the
    5 op_log_summary counts: gemm, ipcq_send, ipcq_recv, dma_read, dma_write)

The 6th, ``gqa_scaling.png``, needs the Q/cube ∈ {1, 2, 4} sweep from
sub-cycle 4b and is deferred.

Each test depends on the committed
``benches/1H_milestone_output/gqa/sweep.json`` (landed in commit
``e748a62``); they assert the renderer writes a non-empty PNG at the
expected path.

Phase 1 expectation: tests fail at import (renderer functions don't
exist yet on the bench module). Phase 2 lands them and the tests
turn green.
"""
from __future__ import annotations

from pathlib import Path

import pytest

from tests.gqa._gqa_plot_helpers import (
    GQA_PLOTS_DIR,
    GQA_SWEEP_JSON,
    emit_all_gqa_plots,
    emit_gqa_comparison,
    emit_panel_op_log_summary,
)


_PANELS = (
    "single_user_prefill",
    "multi_user_prefill",
    "single_user_decode",
    "multi_user_decode",
)


@pytest.mark.skipif(
    not GQA_SWEEP_JSON.exists(),
    reason="gqa sweep.json absent; run milestone-gqa-llama70b first",
)
@pytest.mark.parametrize("panel", _PANELS)
def test_emit_panel_op_log_summary_writes_png_for_each_panel(panel):
    out = emit_panel_op_log_summary(panel)
    assert out is not None, f"{panel}: renderer returned None"
    path = Path(out)
    assert path.exists(), f"{panel}: expected PNG at {path}"
    assert path.suffix == ".png", f"{panel}: not a PNG: {path}"
    assert path.stat().st_size > 0, f"{panel}: empty PNG: {path}"
    assert panel in path.stem, (
        f"{panel}: panel name not in filename {path.name}"
    )


@pytest.mark.skipif(
    not GQA_SWEEP_JSON.exists(),
    reason="gqa sweep.json absent; run milestone-gqa-llama70b first",
)
def test_emit_gqa_comparison_writes_png():
    out = emit_gqa_comparison()
    assert out is not None
    path = Path(out)
    assert path.exists()
    assert path.name == "gqa_comparison.png"
    assert path.stat().st_size > 0


@pytest.mark.skipif(
    not GQA_SWEEP_JSON.exists(),
    reason="gqa sweep.json absent; run milestone-gqa-llama70b first",
)
def test_emit_all_gqa_plots_writes_five_figures():
    """emit_all returns a list of 5 written PNG paths (deferring the
    6th gqa_scaling.png to after sub-cycle 4b lands the Q/cube sweep)."""
    paths = emit_all_gqa_plots()
    assert isinstance(paths, list)
    # 4 per-panel + 1 comparison.
    assert len(paths) == 5, f"expected 5 PNGs, got {len(paths)}: {paths}"
    for p in paths:
        assert Path(p).exists() and Path(p).stat().st_size > 0
    names = {Path(p).name for p in paths}
    assert "gqa_comparison.png" in names
    for panel in _PANELS:
        assert any(panel in n for n in names), (
            f"no per-panel PNG for {panel} in {names}"
        )


def test_emit_all_gqa_plots_output_dir_matches_bench_output_dir():
    """The renderers must write under the bench's own _OUTPUT_DIR so
    MILESTONE_FAST=1 reuse (and committed baselines) all point at the
    same on-disk location."""
    # Stub assertion that fails until emit_all_gqa_plots exists with a
    # default ``out_dir`` argument identical to GQA_PLOTS_DIR.
    import inspect

    sig = inspect.signature(emit_all_gqa_plots)
    assert "out_dir" in sig.parameters
    default = sig.parameters["out_dir"].default
    assert Path(default) == GQA_PLOTS_DIR, (
        f"default out_dir {default} != bench _OUTPUT_DIR {GQA_PLOTS_DIR}"
    )