Add virtual memory support: PE_MMU, VA allocator, fabric MmuMapMsg
Implement VA/MMU layer (ADR-0011 Phase 1) enabling Triton kernels to use contiguous virtual addresses on sharded tensors. Key changes: - PE_MMU component: hybrid inbox (MmuMapMsg) + sync translate() for PE_DMA - VirtualAllocator + PEMemAllocator: free-list with coalescing - MmuMapMsg/MmuUnmapMsg fabric path with SIP-level routing - DPPolicy-based mapping: replicate=local, sharded=broadcast - Tensor lifecycle: del + weakref cleanup, context manager - Rename: TensorHandle.pa→addr, DmaReadCmd.src_pa→src_addr, ctx→torch Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -34,7 +34,7 @@ def test_tl_load_generates_dma_read():
|
||||
cmds = tl.commands
|
||||
assert len(cmds) == 1
|
||||
assert isinstance(cmds[0], DmaReadCmd)
|
||||
assert cmds[0].src_pa == 0x1000
|
||||
assert cmds[0].src_addr == 0x1000
|
||||
assert cmds[0].nbytes == 32 * 64 * 2
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ def test_tl_store_generates_dma_write():
|
||||
tl.store(0x2000, h)
|
||||
cmds = [c for c in tl.commands if isinstance(c, DmaWriteCmd)]
|
||||
assert len(cmds) == 1
|
||||
assert cmds[0].dst_pa == 0x2000
|
||||
assert cmds[0].dst_addr == 0x2000
|
||||
assert cmds[0].nbytes == 16 * 16 * 4
|
||||
|
||||
|
||||
@@ -148,7 +148,7 @@ def test_tl_composite_nonblocking():
|
||||
comp_cmds = [c for c in tl.commands if isinstance(c, CompositeCmd)]
|
||||
assert len(comp_cmds) == 1
|
||||
assert comp_cmds[0].op == "gemm"
|
||||
assert comp_cmds[0].out_pa == 0x3000
|
||||
assert comp_cmds[0].out_addr == 0x3000
|
||||
assert comp_cmds[0].out_nbytes == 32 * 32 * 2 # M×N×dtype_bytes
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user