Implement ADR-0021: PE pipeline refactor with token self-routing
Step 1-2: Backup existing code - builtin/ → builtin_legacy/ (unchanged backup) - custom/pe_accel/ → custom/pe_accel_legacy/ (unchanged backup) Step 3-4: New pipeline types and tiling - pe_types.py: StageType, Stage, TilePlan, PipelinePlan, PipelineContext, TileToken - tiling.py: generate_gemm_plan, generate_math_plan (ported from pe_accel) Step 5: Component implementations (ADR-0021 D4-D6) - PE_SCHEDULER: _feed_loop (singleton FIFO feeder) + plan generation - PE_FETCH_STORE: new component — TCM ↔ Register File - PE_GEMM: TileToken pipeline + legacy PeInternalTxn dual-mode - PE_MATH: TileToken pipeline + legacy dual-mode - PE_DMA: TileToken pipeline + legacy + fabric Transaction triple-mode - PE_TCM: TcmRequest handler with dual-channel BW serialization Step 6: Infrastructure - topology.yaml: pe_fetch_store component + chaining edges - components.yaml: pe_fetch_store_v1 registration - builder.py: PE_COMP_OFFSETS, _add_pe_internal_edges, PE view positions - Tests: node/edge counts, PE component sets updated All components handle both TileToken (pipeline) and PeInternalTxn (legacy). Token self-routing: components read next stage from token.plan, chain via out_port. 366 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -105,6 +105,73 @@ class PeDmaComponent(PeEngineBase):
|
||||
yield sub_done
|
||||
pe_txn.done.succeed()
|
||||
|
||||
def _worker(self, env: simpy.Environment) -> Generator:
|
||||
"""Handle TileToken (pipeline), PeInternalTxn (legacy), and Transaction (fabric)."""
|
||||
from kernbench.common.pe_commands import PeInternalTxn
|
||||
from kernbench.components.builtin.pe_types import TileToken
|
||||
|
||||
while True:
|
||||
msg: Any = yield self._inbox.get()
|
||||
if isinstance(msg, TileToken):
|
||||
env.process(self._pipeline_process(env, msg))
|
||||
elif isinstance(msg, PeInternalTxn):
|
||||
env.process(self._handle_with_hooks(env, msg))
|
||||
else:
|
||||
env.process(self._forward_txn(env, msg))
|
||||
|
||||
def _pipeline_process(self, env: simpy.Environment, token: Any) -> Generator:
|
||||
"""Pipeline mode: DMA read/write via fabric, then self-route."""
|
||||
from kernbench.common.pe_commands import DmaReadCmd, DmaWriteCmd, TensorHandle
|
||||
from kernbench.policy.address.phyaddr import PhysAddr
|
||||
from kernbench.runtime_api.kernel import PeDmaMsg
|
||||
|
||||
self._on_process_start(env, token)
|
||||
|
||||
params = token.params
|
||||
stage_type = token.current_stage.stage_type
|
||||
|
||||
from kernbench.components.builtin.pe_types import StageType
|
||||
is_write = stage_type == StageType.DMA_WRITE
|
||||
addr = params.get("dst_addr" if is_write else "src_addr", 0)
|
||||
nbytes = params.get("nbytes", 0)
|
||||
|
||||
if nbytes > 0 and self.ctx:
|
||||
dma_res = self._dma_write if is_write else self._dma_read
|
||||
assert dma_res is not None
|
||||
|
||||
pa = PhysAddr.decode(addr)
|
||||
dst_node = self.ctx.resolver.resolve(pa)
|
||||
path = self.ctx.router.find_path(self._pe_prefix, dst_node)
|
||||
drain_ns = self.ctx.compute_drain_ns(path, nbytes)
|
||||
|
||||
with dma_res.request() as req:
|
||||
yield req
|
||||
sub_done = env.event()
|
||||
sub_request = PeDmaMsg(
|
||||
correlation_id="pipeline",
|
||||
request_id=f"tile_{token.tile_id}",
|
||||
src_sip=0, src_cube=0, src_pe=0,
|
||||
dst_pa=addr, nbytes=nbytes,
|
||||
is_write=is_write,
|
||||
)
|
||||
sub_txn = Transaction(
|
||||
request=sub_request, path=path, step=0,
|
||||
nbytes=nbytes, done=sub_done, drain_ns=drain_ns,
|
||||
)
|
||||
if len(path) > 1:
|
||||
yield self.out_ports[path[1]].put(sub_txn.advance())
|
||||
|
||||
yield sub_done
|
||||
|
||||
self._on_process_end(env, token)
|
||||
|
||||
# Self-routing
|
||||
next_stage = token.advance()
|
||||
if next_stage is not None:
|
||||
yield self.out_ports[next_stage.component].put(token)
|
||||
else:
|
||||
token.pipeline_ctx.complete_tile()
|
||||
|
||||
def _forward_txn(self, env: simpy.Environment, txn: Any) -> Generator:
|
||||
"""Handle external Transaction (PeDmaMsg probe, M_CPU DMA) with channel acquisition."""
|
||||
# Response transactions bypass DMA channel (no outbound resource needed)
|
||||
|
||||
Reference in New Issue
Block a user