kernbench2/tests/test_tl_ipcq_api.py

"""Tests for tl.send / tl.recv API (ADR-0023 D4 + D9.5)."""
from __future__ import annotations

from typing import Any

import simpy
from greenlet import greenlet

from kernbench.common.ipcq_types import (
    IpcqRecvCmd,
    IpcqRequest,
    IpcqSendCmd,
)
from kernbench.triton_emu.tl_context import TLContext


# ── Command-list mode (no runner) ────────────────────────────────────


def test_tl_send_command_list_mode():
    tl = TLContext(pe_id=0, num_programs=4, dispatch_cycles=0)
    tl.send(dir="E", src_addr=0x500, nbytes=64, shape=(8,), dtype="f16")
    cmds = tl.commands
    sends = [c for c in cmds if isinstance(c, IpcqSendCmd)]
    assert len(sends) == 1
    assert sends[0].direction == "E"
    assert sends[0].src_addr == 0x500
    assert sends[0].nbytes == 64


def test_tl_recv_command_list_mode():
    tl = TLContext(pe_id=0, num_programs=4, dispatch_cycles=0)
    handle = tl.recv(dir="W", shape=(8,), dtype="f16")
    cmds = tl.commands
    recvs = [c for c in cmds if isinstance(c, IpcqRecvCmd)]
    assert len(recvs) == 1
    assert recvs[0].direction == "W"
    # In command-list mode (no runner), tl.recv returns a placeholder
    # TensorHandle (no actual data movement happens until SimPy)
    assert handle.shape == (8,)
    assert handle.dtype == "f16"


def test_tl_recv_round_robin_no_dir():
    tl = TLContext(pe_id=0, num_programs=4, dispatch_cycles=0)
    tl.recv(shape=(8,), dtype="f16")
    cmds = tl.commands
    recvs = [c for c in cmds if isinstance(c, IpcqRecvCmd)]
    assert recvs[0].direction is None


# ── Runner mode (greenlet) ──────────────────────────────────────────


class _StubRunner:
    """Minimal runner that auto-responds to IpcqSendCmd / IpcqRecvCmd."""

    def __init__(self) -> None:
        self.received: list[Any] = []

    def switch_to_simpy(self, cmd: Any) -> Any:
        self.received.append(cmd)
        if isinstance(cmd, IpcqSendCmd):
            return None
        if isinstance(cmd, IpcqRecvCmd):
            # Return a fake slot dict
            return {
                "data": None,
                "src_space": "tcm",
                "src_addr": 0xABCD,
                "direction": cmd.direction or "E",
                "dtype": cmd.dtype,
                "shape": cmd.shape,
                "nbytes": 16,
            }
        return None


def test_tl_send_runner_mode():
    runner = _StubRunner()
    tl = TLContext(pe_id=0, num_programs=4, dispatch_cycles=0, runner=runner)
    tl.send(dir="E", src_addr=0x500, nbytes=64, shape=(8,), dtype="f16")
    assert len(runner.received) == 1
    assert isinstance(runner.received[0], IpcqSendCmd)


def test_tl_recv_runner_mode_returns_handle_with_slot_addr():
    runner = _StubRunner()
    tl = TLContext(pe_id=0, num_programs=4, dispatch_cycles=0, runner=runner)
    h = tl.recv(dir="W", shape=(8,), dtype="f16")
    assert isinstance(runner.received[0], IpcqRecvCmd)
    # The returned TensorHandle's addr should reflect the slot
    assert h.addr == 0xABCD
    assert h.shape == (8,)
    assert h.dtype == "f16"