Add virtual memory support: PE_MMU, VA allocator, fabric MmuMapMsg
Implement VA/MMU layer (ADR-0011 Phase 1) enabling Triton kernels to use contiguous virtual addresses on sharded tensors. Key changes: - PE_MMU component: hybrid inbox (MmuMapMsg) + sync translate() for PE_DMA - VirtualAllocator + PEMemAllocator: free-list with coalescing - MmuMapMsg/MmuUnmapMsg fabric path with SIP-level routing - DPPolicy-based mapping: replicate=local, sharded=broadcast - Tensor lifecycle: del + weakref cleanup, context manager - Rename: TensorHandle.pa→addr, DmaReadCmd.src_pa→src_addr, ctx→torch Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -17,31 +17,32 @@ def test_full_graph_node_count():
|
||||
g = _graph()
|
||||
# 1 switch
|
||||
# + 2 SIPs × (1 IO × (3 comps + 4 io_ucie + 16 io_conn)
|
||||
# + 16 cubes × (cube_comps + 8 PEs × 6 pe_comps))
|
||||
# + 16 cubes × (cube_comps + 8 PEs × 7 pe_comps))
|
||||
# IO: pcie_ep + io_cpu + io_noc + 4 io_ucie + 4*4 io_conn = 23
|
||||
# cube_comps: 9 (noc, m_cpu, sram, 2 bridge, 4 ucie)
|
||||
# + 16 ucie_conn (4 ports × 4 connections)
|
||||
# + 2 xbar_top/bot
|
||||
# + 8 hbm_slices = 35
|
||||
# = 1 + 2*(23 + 16*(35+48)) = 1 + 2*(23+1328) = 1 + 2702 = 2703
|
||||
assert len(g.nodes) == 2703
|
||||
# pe_comps: 7 (pe_cpu, pe_scheduler, pe_dma, pe_gemm, pe_math, pe_mmu, pe_tcm)
|
||||
# = 1 + 2*(23 + 16*(35+56)) = 1 + 2*(23+1456) = 1 + 2958 = 2959
|
||||
assert len(g.nodes) == 2959
|
||||
|
||||
|
||||
def test_full_graph_edge_count():
|
||||
g = _graph()
|
||||
# Per cube: 184
|
||||
# Per cube: 192
|
||||
# PE-internal: 56
|
||||
# PE_DMA→noc: 8, noc→pe_dma: 8, noc→pe_cpu: 8, pe_cpu→noc: 8
|
||||
# PE_DMA→noc: 8, noc→pe_dma: 8, noc→pe_cpu: 8, pe_cpu→noc: 8, noc→pe_mmu: 8
|
||||
# xbar_top→hbm{0..3}: 4+4=8, xbar_bot→hbm{4..7}: 4+4=8
|
||||
# noc↔xbar_top: 2, noc↔xbar_bot: 2
|
||||
# xbar_top↔bridge.left: 2, bridge.left↔xbar_bot: 2
|
||||
# xbar_top↔bridge.right: 2, bridge.right↔xbar_bot: 2
|
||||
# ucie: 64, m_cpu↔noc: 2, noc↔sram: 2
|
||||
# Total: 56+8+8+8+8+8+8+2+2+2+2+2+2+64+2+2 = 184
|
||||
# Total: 56+8+8+8+8+8+8+8+2+2+2+2+2+2+64+2+2 = 192
|
||||
# IO edges per SIP: 77
|
||||
# Per SIP: 16*184 + 48 inter-cube + 77 IO = 3069
|
||||
# Total: 2 * 3069 = 6138
|
||||
assert len(g.edges) == 6138
|
||||
# Per SIP: 16*192 + 48 inter-cube + 77 IO = 3197
|
||||
# Total: 2 * 3197 = 6394
|
||||
assert len(g.edges) == 6394
|
||||
|
||||
|
||||
# ── Full graph: specific nodes exist ─────────────────────────────────
|
||||
@@ -267,7 +268,7 @@ def test_cube_view_pe_to_noc():
|
||||
def test_pe_view_has_all_components():
|
||||
v = _graph().pe_view
|
||||
assert set(v.nodes.keys()) == {
|
||||
"pe_cpu", "pe_scheduler", "pe_dma", "pe_gemm", "pe_math", "pe_tcm"
|
||||
"pe_cpu", "pe_scheduler", "pe_dma", "pe_gemm", "pe_math", "pe_mmu", "pe_tcm"
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user