"""Tests for PE_DMA IPCQ handling (ADR-0023 D8 + D9 atomic). PE_DMA gains two new behaviors: 1. Outbound: when it receives an IpcqDmaToken from local PE_IPCQ, it forwards it through the fabric (next-hop port) toward the peer PE_DMA. 2. Inbound: when it receives a Transaction wrapping an IpcqDmaToken, it performs MemoryStore.write at dst_endpoint.buffer_kind/dst_addr and forwards IpcqMetaArrival(token) to local PE_IPCQ — both in the SAME SimPy step (I6 MUST). """ from __future__ import annotations from dataclasses import dataclass, field from typing import Any import numpy as np import simpy from kernbench.common.ipcq_types import ( IpcqDmaToken, IpcqEndpoint, IpcqMetaArrival, ) from kernbench.components.builtin.pe_dma import PeDmaComponent from kernbench.sim_engine.memory_store import MemoryStore from kernbench.sim_engine.transaction import Transaction from kernbench.topology.types import Node # ── Mock context ───────────────────────────────────────────────────── @dataclass class _MockResolver: pass @dataclass class _MockRouter: """Returns a fixed two-hop path for any (src, dst).""" def find_path(self, src: str, dst: str) -> list[str]: return [src, "fake_router", dst] @dataclass class _MockCtx: router: Any = field(default_factory=_MockRouter) resolver: Any = field(default_factory=_MockResolver) memory_store: Any = None edge_map: dict = field(default_factory=dict) spec: dict = field(default_factory=dict) op_logger: Any = None def compute_drain_ns(self, path: list[str], nbytes: int) -> float: return 0.0 def get_shared_resource(self, env, key, capacity=1): return simpy.Resource(env, capacity=capacity) def _make_pe_dma( env: simpy.Environment, pe_prefix: str, store: MemoryStore | None = None, ) -> PeDmaComponent: node = Node( id=f"{pe_prefix}.pe_dma", kind="pe_dma", impl="builtin.pe_dma", attrs={}, pos_mm=None, ) ctx = _MockCtx(memory_store=store) comp = PeDmaComponent(node, ctx=ctx) comp.in_ports["host"] = simpy.Store(env) comp.out_ports["fake_router"] = simpy.Store(env) comp.out_ports[f"{pe_prefix}.pe_ipcq"] = simpy.Store(env) comp.start(env) return comp def _make_endpoint(sip=0, cube=0, pe=1, buffer_kind="tcm") -> IpcqEndpoint: return IpcqEndpoint( sip=sip, cube=cube, pe=pe, buffer_kind=buffer_kind, rx_base_pa=0x10_000, rx_base_va=0, n_slots=4, slot_size=4096, ) # ── Outbound: PE_IPCQ → PE_DMA → fabric ────────────────────────────── def test_outbound_forwards_token_through_fabric(): env = simpy.Environment() store = MemoryStore() src_arr = np.arange(16, dtype=np.float16) store.write("tcm", 0x500, src_arr) src = _make_pe_dma(env, "sip0.cube0.pe0", store=store) peer = _make_endpoint(pe=1) token = IpcqDmaToken( src_addr=0x500, src_space="tcm", dst_addr=0x10_000, dst_endpoint=peer, nbytes=32, handle_id="t1", shape=(16,), dtype="f16", sender_seq=0, src_sip=0, src_cube=0, src_pe=0, src_direction="E", ) src.in_ports["host"].put(token) env.run(until=10) # The token should be wrapped in a Transaction and forwarded to "fake_router" fab = src.out_ports["fake_router"] assert len(fab.items) == 1 txn = fab.items[0] assert isinstance(txn, Transaction) assert isinstance(txn.request, IpcqDmaToken) assert txn.request.dst_addr == 0x10_000 # ── Inbound: PE_DMA → MemoryStore.write + IpcqMetaArrival forward ─── def test_inbound_writes_memory_and_forwards_metadata_atomically(): env = simpy.Environment() store = MemoryStore() # Sender wrote source data to MemoryStore src_arr = np.arange(16, dtype=np.float16) + 100 store.write("tcm", 0x500, src_arr) dst = _make_pe_dma(env, "sip0.cube0.pe1", store=store) peer = _make_endpoint(sip=0, cube=0, pe=1, buffer_kind="tcm") token = IpcqDmaToken( src_addr=0x500, src_space="tcm", dst_addr=0x10_000, dst_endpoint=peer, nbytes=32, handle_id="t1", shape=(16,), dtype="f16", sender_seq=0, src_sip=0, src_cube=0, src_pe=0, src_direction="E", ) # Wrap in a Transaction with this PE_DMA as the terminal done = env.event() txn = Transaction( request=token, path=["fake_router", "sip0.cube0.pe1.pe_dma"], step=1, nbytes=32, done=done, ) dst.in_ports["host"].put(txn) env.run(until=done) # 1. MemoryStore should have the data at dst_addr arrived = store.read("tcm", 0x10_000, shape=(16,), dtype="f16") assert np.array_equal(arrived, src_arr) # 2. IpcqMetaArrival should be in PE_IPCQ port ipcq_port = dst.out_ports["sip0.cube0.pe1.pe_ipcq"] assert len(ipcq_port.items) == 1 arrival = ipcq_port.items[0] assert isinstance(arrival, IpcqMetaArrival) assert arrival.token.sender_seq == 0 assert arrival.token.src_pe == 0 def test_inbound_no_yield_between_write_and_metadata_forward(): """Soft check: when multiple inbound IPCQ tokens arrive, the order of MemoryStore writes and IpcqMetaArrival forwards is preserved (no interleaving from extraneous yields). """ env = simpy.Environment() store = MemoryStore() for i in range(3): store.write("tcm", 0x500 + i * 0x100, np.arange(8, dtype=np.float16) + i * 10) dst = _make_pe_dma(env, "sip0.cube0.pe1", store=store) peer = _make_endpoint(sip=0, cube=0, pe=1) for i in range(3): token = IpcqDmaToken( src_addr=0x500 + i * 0x100, src_space="tcm", dst_addr=0x10_000 + i * 0x100, dst_endpoint=peer, nbytes=16, handle_id=f"t{i}", shape=(8,), dtype="f16", sender_seq=i, src_sip=0, src_cube=0, src_pe=0, src_direction="E", ) done = env.event() txn = Transaction( request=token, path=["fake_router", "sip0.cube0.pe1.pe_dma"], step=1, nbytes=16, done=done, ) dst.in_ports["host"].put(txn) env.run(until=done) # Check ordering of arrivals ipcq_port = dst.out_ports["sip0.cube0.pe1.pe_ipcq"] arrivals = list(ipcq_port.items) assert [a.token.sender_seq for a in arrivals] == [0, 1, 2] # Memory must be in order for i in range(3): arr = store.read("tcm", 0x10_000 + i * 0x100, shape=(8,), dtype="f16") assert arr[0] == i * 10