Implement ADR-0021: PE pipeline refactor with token self-routing
Step 1-2: Backup existing code - builtin/ → builtin_legacy/ (unchanged backup) - custom/pe_accel/ → custom/pe_accel_legacy/ (unchanged backup) Step 3-4: New pipeline types and tiling - pe_types.py: StageType, Stage, TilePlan, PipelinePlan, PipelineContext, TileToken - tiling.py: generate_gemm_plan, generate_math_plan (ported from pe_accel) Step 5: Component implementations (ADR-0021 D4-D6) - PE_SCHEDULER: _feed_loop (singleton FIFO feeder) + plan generation - PE_FETCH_STORE: new component — TCM ↔ Register File - PE_GEMM: TileToken pipeline + legacy PeInternalTxn dual-mode - PE_MATH: TileToken pipeline + legacy dual-mode - PE_DMA: TileToken pipeline + legacy + fabric Transaction triple-mode - PE_TCM: TcmRequest handler with dual-channel BW serialization Step 6: Infrastructure - topology.yaml: pe_fetch_store component + chaining edges - components.yaml: pe_fetch_store_v1 registration - builder.py: PE_COMP_OFFSETS, _add_pe_internal_edges, PE view positions - Tests: node/edge counts, PE component sets updated All components handle both TileToken (pipeline) and PeInternalTxn (legacy). Token self-routing: components read next stage from token.plan, chain via out_port. 366 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -20,6 +20,7 @@ _PE_COMP_OFFSETS = {
|
||||
"pe_cpu": (-0.3, 0.0),
|
||||
"pe_scheduler": (-0.15, 0.0),
|
||||
"pe_dma": (0.0, -0.15),
|
||||
"pe_fetch_store": (0.15, 0.0),
|
||||
"pe_gemm": (0.0, 0.0),
|
||||
"pe_math": (0.0, 0.15),
|
||||
"pe_mmu": (0.15, -0.15),
|
||||
@@ -637,12 +638,13 @@ def _instantiate_cube(
|
||||
|
||||
|
||||
def _add_pe_internal_edges(edges: list[Edge], pp: str, pe_links: dict) -> None:
|
||||
"""Add PE-internal edges for a single PE instance."""
|
||||
"""Add PE-internal edges for a single PE instance (ADR-0021)."""
|
||||
edges.append(Edge(
|
||||
src=f"{pp}.pe_cpu", dst=f"{pp}.pe_scheduler",
|
||||
distance_mm=pe_links["pe_cpu_to_scheduler_mm"],
|
||||
kind="pe_internal",
|
||||
))
|
||||
# Scheduler → engines (initial dispatch)
|
||||
for eng, key in [("pe_dma", "scheduler_to_dma_mm"),
|
||||
("pe_gemm", "scheduler_to_gemm_mm"),
|
||||
("pe_math", "scheduler_to_math_mm")]:
|
||||
@@ -651,6 +653,15 @@ def _add_pe_internal_edges(edges: list[Edge], pp: str, pe_links: dict) -> None:
|
||||
distance_mm=pe_links[key],
|
||||
kind="pe_internal",
|
||||
))
|
||||
# Scheduler → fetch_store (initial dispatch)
|
||||
if "scheduler_to_fetch_store_mm" in pe_links:
|
||||
edges.append(Edge(
|
||||
src=f"{pp}.pe_scheduler", dst=f"{pp}.pe_fetch_store",
|
||||
distance_mm=pe_links["scheduler_to_fetch_store_mm"],
|
||||
kind="pe_internal",
|
||||
))
|
||||
|
||||
# Engine → TCM (legacy BW edges)
|
||||
for eng, mm_key, bw_key in [("pe_dma", "dma_to_tcm_mm", "dma_to_tcm_bw_gbs"),
|
||||
("pe_gemm", "gemm_to_tcm_mm", "gemm_to_tcm_bw_gbs"),
|
||||
("pe_math", "math_to_tcm_mm", "math_to_tcm_bw_gbs")]:
|
||||
@@ -661,6 +672,32 @@ def _add_pe_internal_edges(edges: list[Edge], pp: str, pe_links: dict) -> None:
|
||||
kind="pe_internal",
|
||||
))
|
||||
|
||||
# Fetch/Store → TCM (ADR-0021 D5)
|
||||
if "fetch_store_to_tcm_mm" in pe_links:
|
||||
edges.append(Edge(
|
||||
src=f"{pp}.pe_fetch_store", dst=f"{pp}.pe_tcm",
|
||||
distance_mm=pe_links["fetch_store_to_tcm_mm"],
|
||||
bw_gbs=pe_links.get("fetch_store_to_tcm_bw_gbs", 512.0),
|
||||
kind="pe_internal",
|
||||
))
|
||||
|
||||
# Chaining edges (ADR-0021 D4 — token self-routing)
|
||||
chaining = [
|
||||
("pe_dma", "pe_fetch_store", "dma_to_fetch_store_mm"),
|
||||
("pe_fetch_store", "pe_gemm", "fetch_store_to_gemm_mm"),
|
||||
("pe_fetch_store", "pe_math", "fetch_store_to_math_mm"),
|
||||
("pe_gemm", "pe_fetch_store", "gemm_to_fetch_store_mm"),
|
||||
("pe_math", "pe_fetch_store", "math_to_fetch_store_mm"),
|
||||
("pe_fetch_store", "pe_dma", "fetch_store_to_dma_mm"),
|
||||
]
|
||||
for src_eng, dst_eng, mm_key in chaining:
|
||||
if mm_key in pe_links:
|
||||
edges.append(Edge(
|
||||
src=f"{pp}.{src_eng}", dst=f"{pp}.{dst_eng}",
|
||||
distance_mm=pe_links[mm_key],
|
||||
kind="pe_internal",
|
||||
))
|
||||
|
||||
|
||||
# ── Inter-cube / IO / system edges ──────────────────────────────────
|
||||
|
||||
@@ -1071,6 +1108,7 @@ def _build_pe_view(spec: dict) -> ViewGraph:
|
||||
"pe_cpu": (1.5, 4.0),
|
||||
"pe_scheduler": (4.0, 4.0),
|
||||
"pe_dma": (7.0, 1.5),
|
||||
"pe_fetch_store": (8.5, 4.0),
|
||||
"pe_gemm": (7.0, 4.0),
|
||||
"pe_math": (7.0, 6.5),
|
||||
"pe_mmu": (4.0, 1.5),
|
||||
@@ -1101,6 +1139,12 @@ def _build_pe_view(spec: dict) -> ViewGraph:
|
||||
distance_mm=pe_links[key],
|
||||
kind="pe_internal",
|
||||
))
|
||||
if "scheduler_to_fetch_store_mm" in pe_links:
|
||||
view_edges.append(Edge(
|
||||
src="pe_scheduler", dst="pe_fetch_store",
|
||||
distance_mm=pe_links["scheduler_to_fetch_store_mm"],
|
||||
kind="pe_internal",
|
||||
))
|
||||
for eng, mm_key, bw_key in [("pe_dma", "dma_to_tcm_mm", "dma_to_tcm_bw_gbs"),
|
||||
("pe_gemm", "gemm_to_tcm_mm", "gemm_to_tcm_bw_gbs"),
|
||||
("pe_math", "math_to_tcm_mm", "math_to_tcm_bw_gbs")]:
|
||||
@@ -1110,6 +1154,13 @@ def _build_pe_view(spec: dict) -> ViewGraph:
|
||||
bw_gbs=pe_links[bw_key],
|
||||
kind="pe_internal",
|
||||
))
|
||||
if "fetch_store_to_tcm_mm" in pe_links:
|
||||
view_edges.append(Edge(
|
||||
src="pe_fetch_store", dst="pe_tcm",
|
||||
distance_mm=pe_links["fetch_store_to_tcm_mm"],
|
||||
bw_gbs=pe_links.get("fetch_store_to_tcm_bw_gbs", 512.0),
|
||||
kind="pe_internal",
|
||||
))
|
||||
|
||||
return ViewGraph(
|
||||
name="pe", nodes=nodes, edges=view_edges,
|
||||
|
||||
Reference in New Issue
Block a user