Composite GEMM: K-loop accumulator residency, pinned operands, sweep + deck
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -123,13 +123,14 @@ class TLContext:
|
||||
|
||||
def _make_handle(
|
||||
self, addr: int, shape: tuple[int, ...], dtype: str,
|
||||
space: str = "tcm",
|
||||
space: str = "tcm", pinned: bool = False,
|
||||
) -> TensorHandle:
|
||||
return TensorHandle(
|
||||
id=self._next_handle_id(),
|
||||
addr=addr, shape=shape, dtype=dtype,
|
||||
nbytes=self._nbytes(shape, dtype),
|
||||
space=space,
|
||||
pinned=pinned,
|
||||
)
|
||||
|
||||
def _make_compute_out(
|
||||
@@ -184,15 +185,17 @@ class TLContext:
|
||||
actually lives in Phase 2 storage.
|
||||
"""
|
||||
self._emit_dispatch_overhead()
|
||||
handle = self._make_handle(addr=ptr, shape=shape, dtype=dtype, space="hbm")
|
||||
handle = self._make_handle(
|
||||
addr=ptr, shape=shape, dtype=dtype, space="hbm", pinned=True,
|
||||
)
|
||||
cmd = DmaReadCmd(handle=handle, src_addr=ptr, nbytes=handle.nbytes)
|
||||
data = self._emit(cmd)
|
||||
if data is not None:
|
||||
# Greenlet mode: attach real data to handle (preserve space)
|
||||
# Greenlet mode: attach real data to handle (preserve space + pinned)
|
||||
return TensorHandle(
|
||||
id=handle.id, addr=handle.addr, shape=handle.shape,
|
||||
dtype=handle.dtype, nbytes=handle.nbytes, data=data,
|
||||
space=handle.space,
|
||||
space=handle.space, pinned=handle.pinned,
|
||||
)
|
||||
return handle
|
||||
|
||||
|
||||
Reference in New Issue
Block a user