From 81ce55571d84fdd61f17881315989c28837c9f88 Mon Sep 17 00:00:00 2001 From: Yangwook Kang Date: Thu, 9 Apr 2026 00:16:24 -0700 Subject: [PATCH] Rename impl names: add builtin. prefix for clear provenance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - components.yaml: all builtin impls use builtin.xxx naming - topology.yaml: all impl references updated to builtin.xxx - builder.py: hardcoded ucie impl → builtin.ucie - Tests: all impl string references updated Convention: builtin. for built-in, custom. for user-defined. 382 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) --- components.yaml | 45 +++++++++++++++++-------------- src/kernbench/topology/builder.py | 12 ++++----- tests/test_component_registry.py | 14 +++++----- tests/test_mmu_component.py | 8 +++--- tests/test_pe_components.py | 26 +++++++++--------- tests/test_phase_a_components.py | 32 +++++++++++----------- topology.yaml | 32 +++++++++++----------- 7 files changed, 87 insertions(+), 82 deletions(-) diff --git a/components.yaml b/components.yaml index ad39d6d..f3ab639 100644 --- a/components.yaml +++ b/components.yaml @@ -2,6 +2,10 @@ # Maps impl names (used in topology.yaml) to Python class paths. # Format: impl_name: module.path:ClassName # +# Naming convention: +# builtin. — built-in implementations +# custom. — user-defined implementations +# # ── Adding custom components ────────────────────────────────────────── # # 1. Create your implementation in: @@ -10,41 +14,42 @@ # Your class must inherit from ComponentBase (or PeEngineBase for PE engines). # # 2. Register it below under "Custom" with a unique impl name: -# my_pe_cpu_v2: kernbench.components.custom.my_pe_cpu:MyPeCpuComponent +# custom.my_pe_cpu: kernbench.components.custom.my_pe_cpu:MyPeCpuComponent # # 3. Reference it in topology.yaml: -# pe_cpu: { kind: pe_cpu, impl: my_pe_cpu_v2, attrs: { ... } } +# pe_cpu: { kind: pe_cpu, impl: custom.my_pe_cpu, attrs: { ... } } # # 4. Add unit tests in: # tests/custom/test_.py # # External packages also work — use the full module path: -# fast_gemm_v1: my_team.accel.fast_gemm:FastGemmComponent +# custom.fast_gemm: my_team.accel.fast_gemm:FastGemmComponent # ────────────────────────────────────────────────────────────────────── components: # Infrastructure - forwarding_v1: kernbench.components.builtin.forwarding:TransitComponent - switch_v1: kernbench.components.builtin.forwarding:TransitComponent - noc_v1: kernbench.components.builtin.forwarding:TransitComponent - ucie_v1: kernbench.components.builtin.forwarding:TransitComponent + builtin.forwarding: kernbench.components.builtin.forwarding:TransitComponent + builtin.switch: kernbench.components.builtin.forwarding:TransitComponent + builtin.noc: kernbench.components.builtin.forwarding:TransitComponent + builtin.ucie: kernbench.components.builtin.forwarding:TransitComponent + # IO / Host interface - pcie_ep_v1: kernbench.components.builtin.pcie_ep:PcieEpComponent - io_cpu_v1: kernbench.components.builtin.io_cpu:IoCpuComponent + builtin.pcie_ep: kernbench.components.builtin.pcie_ep:PcieEpComponent + builtin.io_cpu: kernbench.components.builtin.io_cpu:IoCpuComponent # Cube-level - m_cpu_v1: kernbench.components.builtin.m_cpu:MCpuComponent - hbm_ctrl_v1: kernbench.components.builtin.hbm_ctrl:HbmCtrlComponent - sram_v1: kernbench.components.builtin.sram:SramComponent + builtin.m_cpu: kernbench.components.builtin.m_cpu:MCpuComponent + builtin.hbm_ctrl: kernbench.components.builtin.hbm_ctrl:HbmCtrlComponent + builtin.sram: kernbench.components.builtin.sram:SramComponent # PE-level - pe_cpu_v1: kernbench.components.builtin.pe_cpu:PeCpuComponent - pe_scheduler_v1: kernbench.components.builtin.pe_scheduler:PeSchedulerComponent - pe_dma_v1: kernbench.components.builtin.pe_dma:PeDmaComponent - pe_gemm_v1: kernbench.components.builtin.pe_gemm:PeGemmComponent - pe_math_v1: kernbench.components.builtin.pe_math:PeMathComponent - pe_fetch_store_v1: kernbench.components.builtin.pe_fetch_store:PeFetchStoreComponent - pe_mmu_v1: kernbench.components.builtin.pe_mmu:PeMmuComponent - pe_tcm_v1: kernbench.components.builtin.pe_tcm:PeTcmComponent + builtin.pe_cpu: kernbench.components.builtin.pe_cpu:PeCpuComponent + builtin.pe_scheduler: kernbench.components.builtin.pe_scheduler:PeSchedulerComponent + builtin.pe_dma: kernbench.components.builtin.pe_dma:PeDmaComponent + builtin.pe_gemm: kernbench.components.builtin.pe_gemm:PeGemmComponent + builtin.pe_math: kernbench.components.builtin.pe_math:PeMathComponent + builtin.pe_fetch_store: kernbench.components.builtin.pe_fetch_store:PeFetchStoreComponent + builtin.pe_mmu: kernbench.components.builtin.pe_mmu:PeMmuComponent + builtin.pe_tcm: kernbench.components.builtin.pe_tcm:PeTcmComponent # Custom — add your implementations here diff --git a/src/kernbench/topology/builder.py b/src/kernbench/topology/builder.py index c525f4f..ae18f3c 100644 --- a/src/kernbench/topology/builder.py +++ b/src/kernbench/topology/builder.py @@ -277,7 +277,7 @@ def _instantiate_io_chiplets( for phy in inst["ucie"]["phys"]: phy_id = f"{prefix}.ucie-{phy}" nodes[phy_id] = Node( - id=phy_id, kind="io_ucie", impl="ucie_v1", + id=phy_id, kind="io_ucie", impl="builtin.ucie", attrs={"overhead_ns": io_ucie_ns}, pos_mm=(cx, noc_y), label=f"IO UCIe-{phy}", ) @@ -285,7 +285,7 @@ def _instantiate_io_chiplets( for ci in range(io_n_conn): conn_id = f"{phy_id}.conn{ci}" nodes[conn_id] = Node( - id=conn_id, kind="io_ucie_conn", impl="ucie_v1", + id=conn_id, kind="io_ucie_conn", impl="builtin.ucie", attrs={"overhead_ns": 0.0}, pos_mm=(cx, noc_y), label=f"IO UCIe-{phy} C{ci}", ) @@ -379,14 +379,14 @@ def _instantiate_cube( pid = f"{cp}.ucie-{port}" lx, ly = local_pos[f"ucie-{port}"] nodes[pid] = Node( - id=pid, kind="ucie_port", impl="ucie_v1", + id=pid, kind="ucie_port", impl="builtin.ucie", attrs={"overhead_ns": ucie_ns}, pos_mm=(ox + lx, oy + ly), label=f"UCIe-{port}", ) for ci in range(ucie_n_conn): conn_id = f"{cp}.ucie-{port}.conn{ci}" nodes[conn_id] = Node( - id=conn_id, kind="ucie_conn", impl="ucie_v1", + id=conn_id, kind="ucie_conn", impl="builtin.ucie", attrs={"overhead_ns": 0.0}, pos_mm=(ox + lx, oy + ly), label=f"UCIe-{port} C{ci}", @@ -937,13 +937,13 @@ def _build_cube_view(spec: dict) -> ViewGraph: pid = f"ucie-{port}" lx, ly = local_pos[pid] nodes[pid] = Node( - id=pid, kind="ucie_port", impl="ucie_v1", + id=pid, kind="ucie_port", impl="builtin.ucie", attrs={}, pos_mm=(lx, ly), label=f"UCIe-{port}", ) for ci in range(ucie_n_conn): conn_id = f"ucie-{port}.conn{ci}" nodes[conn_id] = Node( - id=conn_id, kind="ucie_conn", impl="ucie_v1", + id=conn_id, kind="ucie_conn", impl="builtin.ucie", attrs={"overhead_ns": 0.0}, pos_mm=(lx, ly), label=f"UCIe-{port} C{ci}", ) diff --git a/tests/test_component_registry.py b/tests/test_component_registry.py index 4e344a5..055bb6d 100644 --- a/tests/test_component_registry.py +++ b/tests/test_component_registry.py @@ -55,7 +55,7 @@ def test_registry_unknown_impl_raises_error(): def test_transit_component_yields_overhead_ns(): """TransitComponent.run() yields exactly node.attrs['overhead_ns'] ns.""" - node = _node("forwarding_v1", overhead_ns=3.0) + node = _node("builtin.forwarding", overhead_ns=3.0) comp = TransitComponent(node) env = simpy.Environment() @@ -69,7 +69,7 @@ def test_transit_component_yields_overhead_ns(): def test_transit_component_zero_overhead_ns(): """TransitComponent with overhead_ns=0 still yields (no infinite loop).""" - node = _node("noc_v1", overhead_ns=0.0) + node = _node("builtin.noc", overhead_ns=0.0) comp = TransitComponent(node) env = simpy.Environment() @@ -100,7 +100,7 @@ def test_engine_component_override_is_called(): SpyXbar.calls = 0 graph = _graph() - engine = GraphEngine(graph, component_overrides={"forwarding_v1": SpyXbar}) + engine = GraphEngine(graph, component_overrides={"builtin.forwarding": SpyXbar}) msg = MemoryReadMsg( correlation_id="c", request_id="r", src_sip=0, src_cube=0, src_pe=0, @@ -108,7 +108,7 @@ def test_engine_component_override_is_called(): ) h = engine.submit(msg) engine.wait(h) - # Path passes through router nodes (impl=forwarding_v1) + # Path passes through router nodes (impl=forwarding) assert SpyXbar.calls > 0 @@ -140,7 +140,7 @@ def test_engine_component_model_latency(): def test_engine_override_is_scoped_to_impl(): - """forwarding_v1 override (ZeroRouter, no overhead) reduces total_ns. + """forwarding override (ZeroRouter, no overhead) reduces total_ns. Router nodes have overhead_ns=2.0. Replacing with zero-latency impl removes router overhead from the path. @@ -152,7 +152,7 @@ def test_engine_override_is_scoped_to_impl(): graph = _graph() engine_default = GraphEngine(graph) - engine_override = GraphEngine(graph, component_overrides={"forwarding_v1": ZeroRouter}) + engine_override = GraphEngine(graph, component_overrides={"builtin.forwarding": ZeroRouter}) msg = MemoryReadMsg( correlation_id="c", request_id="r", @@ -168,5 +168,5 @@ def test_engine_override_is_scoped_to_impl(): engine_override.wait(h_o) _, t_override = engine_override.get_completion(h_o) - # ZeroRouter removes overhead from all forwarding_v1 nodes in path. + # ZeroRouter removes overhead from all forwarding nodes in path. assert t_override["total_ns"] < t_default["total_ns"] diff --git a/tests/test_mmu_component.py b/tests/test_mmu_component.py index b4ec8ed..8ab98d5 100644 --- a/tests/test_mmu_component.py +++ b/tests/test_mmu_component.py @@ -71,7 +71,7 @@ def test_mmu_unmap_msg_fields(): def test_pe_mmu_registry(): - """pe_mmu_v1 impl resolves in ComponentRegistry.""" + """pe_mmu impl resolves in ComponentRegistry.""" from kernbench.components.base import ComponentRegistry from kernbench.components.builtin.pe_mmu import PeMmuComponent from kernbench.topology.types import Node @@ -79,7 +79,7 @@ def test_pe_mmu_registry(): node = Node( id="sip0.cube0.pe0.pe_mmu", kind="pe_mmu", - impl="pe_mmu_v1", + impl="builtin.pe_mmu", pos_mm=None, attrs={"tlb_overhead_ns": 0.5}, ) @@ -101,7 +101,7 @@ def test_pe_mmu_processes_map_msg(): node = Node( id="sip0.cube0.pe0.pe_mmu", kind="pe_mmu", - impl="pe_mmu_v1", + impl="builtin.pe_mmu", pos_mm=None, attrs={"tlb_overhead_ns": 0.5, "page_size": 4096}, ) @@ -158,7 +158,7 @@ def test_pe_dma_translates_va(): node = Node( id="sip0.cube0.pe0.pe_dma", kind="pe_dma", - impl="pe_dma_v1", + impl="builtin.pe_dma", pos_mm=None, attrs={"rd_engines": 1, "wr_engines": 1}, ) diff --git a/tests/test_pe_components.py b/tests/test_pe_components.py index f6d7685..eceb27b 100644 --- a/tests/test_pe_components.py +++ b/tests/test_pe_components.py @@ -65,15 +65,15 @@ def _hbm_pa(sip: int = 0, cube: int = 0, pe_id: int = 0) -> int: def test_pe_registry_resolves_all(): """All 6 PE component impl strings must resolve to their specific classes.""" expected = { - "pe_cpu_v1": PeCpuComponent, - "pe_scheduler_v1": PeSchedulerComponent, - "pe_dma_v1": PeDmaComponent, - "pe_gemm_v1": PeGemmComponent, - "pe_math_v1": PeMathComponent, - "pe_tcm_v1": PeTcmComponent, + "builtin.pe_cpu": PeCpuComponent, + "builtin.pe_scheduler": PeSchedulerComponent, + "builtin.pe_dma": PeDmaComponent, + "builtin.pe_gemm": PeGemmComponent, + "builtin.pe_math": PeMathComponent, + "builtin.pe_tcm": PeTcmComponent, } for impl, cls in expected.items(): - node = Node(id=f"test.{impl}", kind=impl.replace("_v1", ""), + node = Node(id=f"test.{impl}", kind=impl.replace("", ""), impl=impl, pos_mm=None, attrs={}) comp = ComponentRegistry.create(node) assert isinstance(comp, cls), f"{impl} resolved to {type(comp)}, expected {cls}" @@ -90,7 +90,7 @@ def test_pe_dma_dual_channel_concurrent(): """ env = simpy.Environment() node = Node(id="sip0.cube0.pe0.pe_dma", kind="pe_dma", - impl="pe_dma_v1", pos_mm=None, + impl="builtin.pe_dma", pos_mm=None, attrs={"rd_engines": 1, "wr_engines": 1}) comp = PeDmaComponent(node) @@ -151,7 +151,7 @@ def test_pe_dma_same_channel_serializes(): """Two READ operations on the same PE_DMA must serialize (capacity=1).""" env = simpy.Environment() node = Node(id="sip0.cube0.pe0.pe_dma", kind="pe_dma", - impl="pe_dma_v1", pos_mm=None, + impl="builtin.pe_dma", pos_mm=None, attrs={"rd_engines": 1, "wr_engines": 1}) comp = PeDmaComponent(node) @@ -219,11 +219,11 @@ def test_pe_accel_shared_slot(): pe_prefix = "sip0.cube0.pe0" gemm_node = Node( - id=f"{pe_prefix}.pe_gemm", kind="pe_gemm", impl="pe_gemm_v1", + id=f"{pe_prefix}.pe_gemm", kind="pe_gemm", impl="builtin.pe_gemm", pos_mm=None, attrs={"overhead_ns": 10.0, "shared_resource": "accel_slot"}, ) math_node = Node( - id=f"{pe_prefix}.pe_math", kind="pe_math", impl="pe_math_v1", + id=f"{pe_prefix}.pe_math", kind="pe_math", impl="builtin.pe_math", pos_mm=None, attrs={"overhead_ns": 10.0, "shared_resource": "accel_slot"}, ) gemm = PeGemmComponent(gemm_node, ctx) @@ -302,7 +302,7 @@ def test_pe_gemm_handles_pe_internal_txn(): pe_prefix = "sip0.cube0.pe0" gemm_node = Node( - id=f"{pe_prefix}.pe_gemm", kind="pe_gemm", impl="pe_gemm_v1", + id=f"{pe_prefix}.pe_gemm", kind="pe_gemm", impl="builtin.pe_gemm", pos_mm=None, attrs={"overhead_ns": 5.0, "shared_resource": "accel_slot"}, ) gemm = PeGemmComponent(gemm_node, ctx) @@ -343,7 +343,7 @@ def test_pe_math_handles_pe_internal_txn(): pe_prefix = "sip0.cube0.pe0" math_node = Node( - id=f"{pe_prefix}.pe_math", kind="pe_math", impl="pe_math_v1", + id=f"{pe_prefix}.pe_math", kind="pe_math", impl="builtin.pe_math", pos_mm=None, attrs={"overhead_ns": 3.0, "shared_resource": "accel_slot"}, ) math_comp = PeMathComponent(math_node, ctx) diff --git a/tests/test_phase_a_components.py b/tests/test_phase_a_components.py index 466cb73..0330943 100644 --- a/tests/test_phase_a_components.py +++ b/tests/test_phase_a_components.py @@ -57,7 +57,7 @@ def _inject(store: simpy.Store, txn: Transaction): def test_transit_component_run_overhead_ns(): """TransitComponent.run() yields exactly overhead_ns.""" - node = _node("forwarding_v1", {"overhead_ns": 7.5}) + node = _node("builtin.forwarding", {"overhead_ns": 7.5}) comp = TransitComponent(node) env = simpy.Environment() @@ -71,7 +71,7 @@ def test_transit_component_run_overhead_ns(): def test_transit_component_run_zero_overhead_ns(): """TransitComponent.run() with overhead_ns=0 completes immediately.""" - node = _node("noc_v1", {"overhead_ns": 0.0}) + node = _node("builtin.noc", {"overhead_ns": 0.0}) comp = TransitComponent(node) env = simpy.Environment() done = [] @@ -91,7 +91,7 @@ def test_transit_component_run_zero_overhead_ns(): def test_io_cpu_component_run_overhead_ns(): """IoCpuComponent.run() yields exactly overhead_ns.""" - node = _node("io_cpu_v1", {"overhead_ns": 10.0}) + node = _node("builtin.io_cpu", {"overhead_ns": 10.0}) comp = IoCpuComponent(node) env = simpy.Environment() @@ -108,7 +108,7 @@ def test_io_cpu_component_run_overhead_ns(): def test_hbm_ctrl_terminal_succeeds_done(): """HbmCtrlComponent is a terminal node: succeeds txn.done after run().""" - node = _node("hbm_ctrl_v1", {"overhead_ns": 0.0, "capacity": 1}) + node = _node("builtin.hbm_ctrl", {"overhead_ns": 0.0, "capacity": 1}) comp = HbmCtrlComponent(node) env = simpy.Environment() done_event = env.event() @@ -121,7 +121,7 @@ def test_hbm_ctrl_terminal_succeeds_done(): def test_hbm_ctrl_resource_serializes_requests(): """HbmCtrlComponent with capacity=1 serializes concurrent requests.""" - node = _node("hbm_ctrl_v1", {"overhead_ns": 5.0, "capacity": 1}) + node = _node("builtin.hbm_ctrl", {"overhead_ns": 5.0, "capacity": 1}) comp = HbmCtrlComponent(node) env = simpy.Environment() in_store: simpy.Store = simpy.Store(env) @@ -151,7 +151,7 @@ def test_hbm_ctrl_resource_serializes_requests(): def test_sram_terminal_succeeds_done(): """SramComponent is a terminal node: succeeds txn.done after run().""" - node = _node("sram_v1", {"overhead_ns": 2.0}) + node = _node("builtin.sram", {"overhead_ns": 2.0}) comp = SramComponent(node) env = simpy.Environment() done_event = env.event() @@ -168,7 +168,7 @@ def test_sram_terminal_succeeds_done(): def test_m_cpu_forwards_when_not_terminal(): """MCpuComponent forwards Transaction to next hop when not terminal.""" - node = _node("m_cpu_v1", {"overhead_ns": 5.0}) + node = _node("builtin.m_cpu", {"overhead_ns": 5.0}) comp = MCpuComponent(node) env = simpy.Environment() @@ -213,7 +213,7 @@ def test_m_cpu_forwards_when_not_terminal(): def test_m_cpu_terminal_no_ctx_completes(): """MCpuComponent without ctx completes txn.done when it is the terminal hop.""" - node = _node("m_cpu_v1", {"overhead_ns": 0.0}) + node = _node("builtin.m_cpu", {"overhead_ns": 0.0}) comp = MCpuComponent(node, ctx=None) env = simpy.Environment() done_event = env.event() @@ -228,14 +228,14 @@ def test_m_cpu_terminal_no_ctx_completes(): @pytest.mark.parametrize("impl,expected_cls", [ - ("forwarding_v1", TransitComponent), - ("noc_v1", TransitComponent), - ("ucie_v1", TransitComponent), - ("pcie_ep_v1", PcieEpComponent), - ("io_cpu_v1", IoCpuComponent), - ("m_cpu_v1", MCpuComponent), - ("hbm_ctrl_v1", HbmCtrlComponent), - ("sram_v1", SramComponent), + ("builtin.forwarding", TransitComponent), + ("builtin.noc", TransitComponent), + ("builtin.ucie", TransitComponent), + ("builtin.pcie_ep", PcieEpComponent), + ("builtin.io_cpu", IoCpuComponent), + ("builtin.m_cpu", MCpuComponent), + ("builtin.hbm_ctrl", HbmCtrlComponent), + ("builtin.sram", SramComponent), ]) def test_registry_resolves_impl(impl, expected_cls): """ComponentRegistry.create() returns the correct concrete class for each impl.""" diff --git a/topology.yaml b/topology.yaml index fedde91..ad936cc 100644 --- a/topology.yaml +++ b/topology.yaml @@ -6,7 +6,7 @@ system: count: 2 components: - switch: { kind: switch, impl: switch_v1, attrs: { overhead_ns: 5.0 } } + switch: { kind: switch, impl: builtin.switch, attrs: { overhead_ns: 5.0 } } links: io_ep_to_switch: @@ -19,9 +19,9 @@ sip: iochiplet: components: - pcie_ep: { kind: pcie_ep, impl: pcie_ep_v1, attrs: { overhead_ns: 5.0 } } - io_cpu: { kind: io_cpu, impl: io_cpu_v1, attrs: { overhead_ns: 10.0 } } - io_noc: { kind: io_noc, impl: forwarding_v1, attrs: { overhead_ns: 0.0 } } + pcie_ep: { kind: pcie_ep, impl: builtin.pcie_ep, attrs: { overhead_ns: 5.0 } } + io_cpu: { kind: io_cpu, impl: builtin.io_cpu, attrs: { overhead_ns: 10.0 } } + io_noc: { kind: io_noc, impl: builtin.forwarding, attrs: { overhead_ns: 0.0 } } links: pcie_ep_to_noc_bw_gbs: 256.0 pcie_ep_to_noc_mm: 1.0 @@ -60,14 +60,14 @@ cube: pe_template: components: - pe_cpu: { kind: pe_cpu, impl: pe_cpu_v1, attrs: { overhead_ns: 2.0 } } - pe_scheduler: { kind: pe_scheduler, impl: pe_scheduler_v1, attrs: { overhead_ns: 1.0 } } - pe_dma: { kind: pe_dma, impl: pe_dma_v1, attrs: { rd_engines: 1, wr_engines: 1 } } - pe_gemm: { kind: pe_gemm, impl: pe_gemm_v1, attrs: { overhead_ns: 0.0, shared_resource: accel_slot, peak_tflops_f16: 8.0 } } - pe_math: { kind: pe_math, impl: pe_math_v1, attrs: { overhead_ns: 0.0, shared_resource: accel_slot } } - pe_fetch_store: { kind: pe_fetch_store, impl: pe_fetch_store_v1, attrs: { overhead_ns: 0.0 } } - pe_mmu: { kind: pe_mmu, impl: pe_mmu_v1, attrs: { tlb_overhead_ns: 0.5, page_size: 4096 } } - pe_tcm: { kind: pe_tcm, impl: pe_tcm_v1, attrs: { size_mb: 16, read_bw_gbs: 512.0, write_bw_gbs: 512.0 } } + pe_cpu: { kind: pe_cpu, impl: builtin.pe_cpu, attrs: { overhead_ns: 2.0 } } + pe_scheduler: { kind: pe_scheduler, impl: builtin.pe_scheduler, attrs: { overhead_ns: 1.0 } } + pe_dma: { kind: pe_dma, impl: builtin.pe_dma, attrs: { rd_engines: 1, wr_engines: 1 } } + pe_gemm: { kind: pe_gemm, impl: builtin.pe_gemm, attrs: { overhead_ns: 0.0, shared_resource: accel_slot, peak_tflops_f16: 8.0 } } + pe_math: { kind: pe_math, impl: builtin.pe_math, attrs: { overhead_ns: 0.0, shared_resource: accel_slot } } + pe_fetch_store: { kind: pe_fetch_store, impl: builtin.pe_fetch_store, attrs: { overhead_ns: 0.0 } } + pe_mmu: { kind: pe_mmu, impl: builtin.pe_mmu, attrs: { tlb_overhead_ns: 0.5, page_size: 4096 } } + pe_tcm: { kind: pe_tcm, impl: builtin.pe_tcm, attrs: { size_mb: 16, read_bw_gbs: 512.0, write_bw_gbs: 512.0 } } links: pe_cpu_to_scheduler_mm: 0.5 scheduler_to_dma_mm: 0.5 @@ -99,10 +99,10 @@ cube: hbm_channel_bw_gbs: 32.0 # per-channel bandwidth (GB/s) components: - noc_router: { kind: noc_router, impl: forwarding_v1, attrs: { overhead_ns: 2.0 } } - m_cpu: { kind: m_cpu, impl: m_cpu_v1, attrs: { overhead_ns: 5.0 } } - hbm_ctrl: { kind: hbm_ctrl, impl: hbm_ctrl_v1, attrs: { capacity: 1, efficiency: 1.0 } } - sram: { kind: sram, impl: sram_v1, attrs: { size_mb: 32, overhead_ns: 2.0 } } + noc_router: { kind: noc_router, impl: builtin.forwarding, attrs: { overhead_ns: 2.0 } } + m_cpu: { kind: m_cpu, impl: builtin.m_cpu, attrs: { overhead_ns: 5.0 } } + hbm_ctrl: { kind: hbm_ctrl, impl: builtin.hbm_ctrl, attrs: { capacity: 1, efficiency: 1.0 } } + sram: { kind: sram, impl: builtin.sram, attrs: { size_mb: 32, overhead_ns: 2.0 } } # Physical placement of non-PE components (mm coordinates) placement: