Composite GEMM: K-loop accumulator residency, pinned operands, sweep + deck

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-13 15:00:41 -07:00
parent 5accd98171
commit 83ea97b05f
11 changed files with 4219 additions and 51 deletions
+7 -4
View File
@@ -123,13 +123,14 @@ class TLContext:
def _make_handle(
self, addr: int, shape: tuple[int, ...], dtype: str,
space: str = "tcm",
space: str = "tcm", pinned: bool = False,
) -> TensorHandle:
return TensorHandle(
id=self._next_handle_id(),
addr=addr, shape=shape, dtype=dtype,
nbytes=self._nbytes(shape, dtype),
space=space,
pinned=pinned,
)
def _make_compute_out(
@@ -184,15 +185,17 @@ class TLContext:
actually lives in Phase 2 storage.
"""
self._emit_dispatch_overhead()
handle = self._make_handle(addr=ptr, shape=shape, dtype=dtype, space="hbm")
handle = self._make_handle(
addr=ptr, shape=shape, dtype=dtype, space="hbm", pinned=True,
)
cmd = DmaReadCmd(handle=handle, src_addr=ptr, nbytes=handle.nbytes)
data = self._emit(cmd)
if data is not None:
# Greenlet mode: attach real data to handle (preserve space)
# Greenlet mode: attach real data to handle (preserve space + pinned)
return TensorHandle(
id=handle.id, addr=handle.addr, shape=handle.shape,
dtype=handle.dtype, nbytes=handle.nbytes, data=data,
space=handle.space,
space=handle.space, pinned=handle.pinned,
)
return handle