Restructure legacy backups, remove pe_accel, fix DMA self-routing

- Move builtin_legacy/ → legacy/builtin/ (cleaner structure)
- Move pe_accel_legacy/ → legacy/pe_accel/
- Remove custom/pe_accel/ (replaced by new builtin)
- Remove pe_scheduler_v2 from components.yaml
- Switch topology.yaml to pe_scheduler_v1 (new builtin)
- Fix PE_DMA self-routing: handle consecutive DMA_READ stages
  (same component consecutive stages processed in-place, not via port)

382 tests passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-09 00:02:26 -07:00
parent 95d583ef9f
commit 1d95df4bee
44 changed files with 23 additions and 1660 deletions
+20 -15
View File
@@ -121,17 +121,31 @@ class PeDmaComponent(PeEngineBase):
def _pipeline_process(self, env: simpy.Environment, token: Any) -> Generator:
"""Pipeline mode: DMA read/write via fabric, then self-route."""
from kernbench.common.pe_commands import DmaReadCmd, DmaWriteCmd, TensorHandle
self._on_process_start(env, token)
yield from self._do_pipeline_dma(env, token)
self._on_process_end(env, token)
# Self-routing (handle same-component consecutive stages)
next_stage = token.advance()
while next_stage is not None and next_stage.component == self.node.id:
self._on_process_start(env, token)
yield from self._do_pipeline_dma(env, token)
self._on_process_end(env, token)
next_stage = token.advance()
if next_stage is not None:
yield self.out_ports[next_stage.component].put(token)
else:
token.pipeline_ctx.complete_tile()
def _do_pipeline_dma(self, env, token):
"""Core DMA logic for pipeline mode."""
from kernbench.policy.address.phyaddr import PhysAddr
from kernbench.runtime_api.kernel import PeDmaMsg
self._on_process_start(env, token)
params = token.params
stage_type = token.current_stage.stage_type
from kernbench.components.builtin.pe_types import StageType
is_write = stage_type == StageType.DMA_WRITE
is_write = token.current_stage.stage_type == StageType.DMA_WRITE
addr = params.get("dst_addr" if is_write else "src_addr", 0)
nbytes = params.get("nbytes", 0)
@@ -163,15 +177,6 @@ class PeDmaComponent(PeEngineBase):
yield sub_done
self._on_process_end(env, token)
# Self-routing
next_stage = token.advance()
if next_stage is not None:
yield self.out_ports[next_stage.component].put(token)
else:
token.pipeline_ctx.complete_tile()
def _forward_txn(self, env: simpy.Environment, txn: Any) -> Generator:
"""Handle external Transaction (PeDmaMsg probe, M_CPU DMA) with channel acquisition."""
# Response transactions bypass DMA channel (no outbound resource needed)