Add SIP-level tensor parallelism, component registry YAML, VA offset verification

- DPPolicy: 3-level (sip/cube/pe), unified naming (column_wise/row_wise) - PE_CPU: auto num_programs from cube shard count - context.launch(): per-SIP KernelLaunchMsg with local va_base + auto local shape - deploy_tensor: removed mmus param, MMU mapping is context-only responsibility - ComponentRegistry: YAML-based lazy loading (components.yaml), impls→builtin rename - VA offset bench + tests: 2D/1D, standard Triton kernel pattern Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 01:13:17 -07:00
parent 08812eda58
commit 63669f82cb
35 changed files with 813 additions and 219 deletions
@@ -73,7 +73,7 @@ def test_mmu_unmap_msg_fields():
 def test_pe_mmu_registry():
    """pe_mmu_v1 impl resolves in ComponentRegistry."""
    from kernbench.components.base import ComponentRegistry
-    from kernbench.components.impls.pe_mmu import PeMmuComponent
+    from kernbench.components.builtin.pe_mmu import PeMmuComponent
    from kernbench.topology.types import Node

    node = Node(
@@ -93,7 +93,7 @@ def test_pe_mmu_registry():
 def test_pe_mmu_processes_map_msg():
    """PE_MMU component receives MmuMapMsg → translate works."""
    import simpy
-    from kernbench.components.impls.pe_mmu import PeMmuComponent
+    from kernbench.components.builtin.pe_mmu import PeMmuComponent
    from kernbench.sim_engine.transaction import Transaction
    from kernbench.topology.types import Node

@@ -152,7 +152,7 @@ def test_pe_dma_translates_va():
    # This test validates the interface contract. Full integration test
    # requires the engine wiring which is validated in test_engine.
    # Here we check that PE_DMA has an mmu attribute it can call.
-    from kernbench.components.impls.pe_dma import PeDmaComponent
+    from kernbench.components.builtin.pe_dma import PeDmaComponent
    from kernbench.topology.types import Node

    node = Node(