commit - release 1
This commit is contained in:
@@ -0,0 +1,85 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from kernbench.policy.address.phyaddr import PhysAddr
|
||||
|
||||
|
||||
class AllocationError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AddressConfig:
|
||||
sip_count: int
|
||||
cubes_per_sip: int
|
||||
pes_per_cube: int
|
||||
hbm_bytes_per_cube: int
|
||||
hbm_slices_per_cube: int
|
||||
tcm_bytes_per_pe: int
|
||||
tcm_scheduler_reserved_bytes: int
|
||||
sram_bytes_per_cube: int
|
||||
|
||||
@property
|
||||
def hbm_slice_bytes(self) -> int:
|
||||
return self.hbm_bytes_per_cube // self.hbm_slices_per_cube
|
||||
|
||||
@property
|
||||
def tcm_allocatable_bytes(self) -> int:
|
||||
return self.tcm_bytes_per_pe - self.tcm_scheduler_reserved_bytes
|
||||
|
||||
|
||||
class PEMemAllocator:
|
||||
def __init__(
|
||||
self, rack_id: int, sip_id: int, cube_id: int, pe_id: int, cfg: AddressConfig,
|
||||
) -> None:
|
||||
self._rack_id = rack_id
|
||||
self._sip_id = sip_id
|
||||
self._cube_id = cube_id
|
||||
self._pe_id = pe_id
|
||||
self._cfg = cfg
|
||||
self._hbm_cursor = 0
|
||||
self._tcm_cursor = 0
|
||||
|
||||
def alloc_hbm(self, nbytes: int) -> PhysAddr:
|
||||
if self._hbm_cursor + nbytes > self._cfg.hbm_slice_bytes:
|
||||
raise AllocationError(
|
||||
f"HBM overflow: need {nbytes}, "
|
||||
f"available {self._cfg.hbm_slice_bytes - self._hbm_cursor}"
|
||||
)
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=self._rack_id, sip_id=self._sip_id, cube_id=self._cube_id,
|
||||
pe_id=self._pe_id, pe_local_hbm_offset=self._hbm_cursor,
|
||||
slice_size_bytes=self._cfg.hbm_slice_bytes,
|
||||
)
|
||||
self._hbm_cursor += nbytes
|
||||
return pa
|
||||
|
||||
def alloc_tcm(self, nbytes: int) -> PhysAddr:
|
||||
if self._tcm_cursor + nbytes > self._cfg.tcm_allocatable_bytes:
|
||||
raise AllocationError(
|
||||
f"TCM overflow: need {nbytes}, "
|
||||
f"available {self._cfg.tcm_allocatable_bytes - self._tcm_cursor}"
|
||||
)
|
||||
pa = PhysAddr.pe_tcm_addr(
|
||||
rack_id=self._rack_id, sip_id=self._sip_id, cube_id=self._cube_id,
|
||||
pe_id=self._pe_id, tcm_offset=self._tcm_cursor,
|
||||
)
|
||||
self._tcm_cursor += nbytes
|
||||
return pa
|
||||
|
||||
@property
|
||||
def hbm_used(self) -> int:
|
||||
return self._hbm_cursor
|
||||
|
||||
@property
|
||||
def hbm_total(self) -> int:
|
||||
return self._cfg.hbm_slice_bytes
|
||||
|
||||
@property
|
||||
def tcm_used(self) -> int:
|
||||
return self._tcm_cursor
|
||||
|
||||
@property
|
||||
def tcm_total(self) -> int:
|
||||
return self._cfg.tcm_allocatable_bytes
|
||||
@@ -0,0 +1,184 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import IntEnum
|
||||
from typing import Literal
|
||||
|
||||
MAX_51 = (1 << 51) - 1
|
||||
|
||||
|
||||
class PhysAddrError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _chk_range(name: str, v: int, bits: int) -> None:
|
||||
if not (0 <= v < (1 << bits)):
|
||||
raise PhysAddrError(f"{name} out of range for {bits} bits: {v}")
|
||||
|
||||
|
||||
def _chk_max(name: str, v: int, maxv: int) -> None:
|
||||
if not (0 <= v <= maxv):
|
||||
raise PhysAddrError(f"{name} out of range (0..{maxv}): {v}")
|
||||
|
||||
|
||||
class UnitType(IntEnum):
|
||||
PE = 0
|
||||
MCPU = 1
|
||||
SRAM = 2
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PhysAddr:
|
||||
"""
|
||||
51-bit physical address value object.
|
||||
|
||||
Layout:
|
||||
[50:47] rack_id (4)
|
||||
[46:43] sip_id (4)
|
||||
[42:38] sip_seg (5) # cube_id
|
||||
[37:0] local_offset (38) => each segment is 256GB
|
||||
|
||||
local_offset:
|
||||
[37] selector: 1 = HBM window (128GB reserved), 0 = PE resource window
|
||||
"""
|
||||
|
||||
rack_id: int
|
||||
sip_id: int
|
||||
sip_seg: int
|
||||
local_offset: int
|
||||
|
||||
kind: Literal["hbm", "pe_resource", "raw"] = "raw"
|
||||
cube_id: int = 0
|
||||
unit_type: UnitType = UnitType.PE
|
||||
pe_id: int = 0
|
||||
ext: int = 0
|
||||
sub_offset: int = 0
|
||||
hbm_offset: int = 0
|
||||
|
||||
HBM_WINDOW_BYTES = 1 << 37 # 128GB
|
||||
|
||||
def encode(self) -> int:
|
||||
_chk_range("rack_id", self.rack_id, 4)
|
||||
_chk_range("sip_id", self.sip_id, 4)
|
||||
_chk_range("sip_seg", self.sip_seg, 5)
|
||||
_chk_range("local_offset", self.local_offset, 38)
|
||||
addr = (self.rack_id << 47) | (self.sip_id << 43) | (self.sip_seg << 38) | self.local_offset
|
||||
if not (0 <= addr <= MAX_51):
|
||||
raise PhysAddrError("address exceeds 51-bit space")
|
||||
return addr
|
||||
|
||||
@staticmethod
|
||||
def decode(addr: int) -> PhysAddr:
|
||||
if not (0 <= addr <= MAX_51):
|
||||
raise PhysAddrError("addr must be a 51-bit value")
|
||||
rack = (addr >> 47) & 0xF
|
||||
sip_id = (addr >> 43) & 0xF
|
||||
sip_seg = (addr >> 38) & 0x1F
|
||||
off = addr & ((1 << 38) - 1)
|
||||
cube_id = sip_seg
|
||||
sel = (off >> 37) & 0x1
|
||||
if sel == 1:
|
||||
hbm_offset = int(off & ((1 << 37) - 1))
|
||||
return PhysAddr(
|
||||
rack_id=rack,
|
||||
sip_id=sip_id,
|
||||
sip_seg=sip_seg,
|
||||
local_offset=off,
|
||||
kind="hbm",
|
||||
cube_id=cube_id,
|
||||
hbm_offset=hbm_offset,
|
||||
)
|
||||
# PE resource decode
|
||||
raw_ut = int((off >> 34) & 0x7)
|
||||
try:
|
||||
unit_type = UnitType(raw_ut)
|
||||
except ValueError:
|
||||
raise PhysAddrError(f"unknown unit_type: {raw_ut}") from None
|
||||
pe_id = int((off >> 30) & 0xF)
|
||||
ext = int((off >> 29) & 0x1)
|
||||
sub_offset = int(off & ((1 << 29) - 1))
|
||||
return PhysAddr(
|
||||
rack_id=rack,
|
||||
sip_id=sip_id,
|
||||
sip_seg=sip_seg,
|
||||
local_offset=off,
|
||||
kind="pe_resource",
|
||||
cube_id=cube_id,
|
||||
unit_type=unit_type,
|
||||
pe_id=pe_id,
|
||||
ext=ext,
|
||||
sub_offset=sub_offset,
|
||||
hbm_offset=0,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def hbm_addr(*, rack_id: int, sip_id: int, cube_id: int, hbm_offset: int) -> PhysAddr:
|
||||
_chk_max("cube_id", cube_id, 31)
|
||||
_chk_range("hbm_offset", hbm_offset, 37)
|
||||
sip_seg = cube_id
|
||||
local_offset = (1 << 37) | int(hbm_offset)
|
||||
return PhysAddr(
|
||||
rack_id=rack_id,
|
||||
sip_id=sip_id,
|
||||
sip_seg=sip_seg,
|
||||
local_offset=local_offset,
|
||||
kind="hbm",
|
||||
cube_id=cube_id,
|
||||
hbm_offset=int(hbm_offset),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def pe_hbm_addr(
|
||||
*,
|
||||
rack_id: int,
|
||||
sip_id: int,
|
||||
cube_id: int,
|
||||
pe_id: int,
|
||||
pe_local_hbm_offset: int,
|
||||
slice_size_bytes: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("cube_id", cube_id, 31)
|
||||
_chk_range("pe_id", pe_id, 4)
|
||||
if not (0 <= pe_local_hbm_offset < slice_size_bytes):
|
||||
raise PhysAddrError("pe_local_hbm_offset out of PE local slice range")
|
||||
hbm_offset = int(pe_id) * int(slice_size_bytes) + int(pe_local_hbm_offset)
|
||||
if not (0 <= hbm_offset < PhysAddr.HBM_WINDOW_BYTES):
|
||||
raise PhysAddrError("HBM offset exceeds reserved 128GB window")
|
||||
return PhysAddr.hbm_addr(
|
||||
rack_id=rack_id, sip_id=sip_id, cube_id=cube_id, hbm_offset=hbm_offset
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def hbm_pe_id(hbm_offset: int, slice_size_bytes: int) -> int:
|
||||
return hbm_offset // slice_size_bytes
|
||||
|
||||
@staticmethod
|
||||
def cube_sram_addr(
|
||||
*, rack_id: int, sip_id: int, cube_id: int, sram_offset: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("cube_id", cube_id, 31)
|
||||
_chk_range("sram_offset", sram_offset, 29)
|
||||
sip_seg = cube_id
|
||||
local_offset = (UnitType.SRAM << 34) | sram_offset
|
||||
return PhysAddr(
|
||||
rack_id=rack_id, sip_id=sip_id, sip_seg=sip_seg,
|
||||
local_offset=local_offset,
|
||||
kind="pe_resource", cube_id=cube_id,
|
||||
unit_type=UnitType.SRAM, sub_offset=sram_offset,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def pe_tcm_addr(
|
||||
*, rack_id: int, sip_id: int, cube_id: int, pe_id: int, tcm_offset: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("cube_id", cube_id, 31)
|
||||
_chk_range("pe_id", pe_id, 4)
|
||||
_chk_range("tcm_offset", tcm_offset, 29)
|
||||
sip_seg = cube_id
|
||||
local_offset = (UnitType.PE << 34) | (pe_id << 30) | tcm_offset
|
||||
return PhysAddr(
|
||||
rack_id=rack_id, sip_id=sip_id, sip_seg=sip_seg,
|
||||
local_offset=local_offset,
|
||||
kind="pe_resource", cube_id=cube_id,
|
||||
unit_type=UnitType.PE, pe_id=pe_id, sub_offset=tcm_offset,
|
||||
)
|
||||
@@ -0,0 +1,174 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from math import ceil
|
||||
from typing import Literal
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DPPolicy:
|
||||
"""Two-level data-parallel policy: cube-level + pe-level."""
|
||||
|
||||
cube: Literal["replicate", "shard_m", "shard_k"] = "replicate"
|
||||
pe: Literal["replicate", "column_wise", "row_wise"] = "replicate"
|
||||
|
||||
|
||||
def resolve_dp_policy(
|
||||
policy: DPPolicy,
|
||||
*,
|
||||
shape: tuple[int, int],
|
||||
itemsize: int,
|
||||
num_pe: int,
|
||||
num_cubes: int = 1,
|
||||
) -> list[ShardSpec]:
|
||||
"""Resolve a DPPolicy into a list[ShardSpec] with two-level resolution.
|
||||
|
||||
Cube-level policy distributes across cubes, pe-level distributes within
|
||||
each cube. ShardSpec.pe_index uses flat indexing: cube_id * num_pe + pe_id.
|
||||
"""
|
||||
_PE_RESOLVERS = {
|
||||
"replicate": replicate,
|
||||
"column_wise": column_wise,
|
||||
"row_wise": row_wise,
|
||||
}
|
||||
resolver = _PE_RESOLVERS.get(policy.pe)
|
||||
if resolver is None:
|
||||
raise ValueError(f"Unknown pe-level policy: {policy.pe}")
|
||||
|
||||
if num_cubes <= 1:
|
||||
return resolver(shape=shape, itemsize=itemsize, num_pe=num_pe)
|
||||
|
||||
# Two-level resolution: cube-level → pe-level
|
||||
M, K = shape
|
||||
all_shards: list[ShardSpec] = []
|
||||
|
||||
for cube_id in range(num_cubes):
|
||||
# Determine per-cube shape based on cube-level policy
|
||||
if policy.cube == "replicate":
|
||||
cube_shape = (M, K)
|
||||
cube_offset = 0
|
||||
elif policy.cube == "shard_m":
|
||||
chunk_m = M // num_cubes
|
||||
cube_shape = (chunk_m, K)
|
||||
cube_offset = cube_id * chunk_m * K * itemsize
|
||||
elif policy.cube == "shard_k":
|
||||
chunk_k = K // num_cubes
|
||||
cube_shape = (M, chunk_k)
|
||||
cube_offset = cube_id * M * chunk_k * itemsize
|
||||
else:
|
||||
raise ValueError(f"Unknown cube-level policy: {policy.cube}")
|
||||
|
||||
# Resolve pe-level within this cube's shape
|
||||
pe_shards = resolver(shape=cube_shape, itemsize=itemsize, num_pe=num_pe)
|
||||
|
||||
# Remap pe_index to flat index and adjust offset
|
||||
for ps in pe_shards:
|
||||
flat_idx = cube_id * num_pe + ps.pe_index
|
||||
all_shards.append(ShardSpec(
|
||||
pe_index=flat_idx,
|
||||
offset_bytes=cube_offset + ps.offset_bytes,
|
||||
nbytes=ps.nbytes,
|
||||
))
|
||||
|
||||
return all_shards
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ShardSpec:
|
||||
pe_index: int
|
||||
offset_bytes: int
|
||||
nbytes: int
|
||||
|
||||
|
||||
def column_wise(
|
||||
*, shape: tuple[int, int], itemsize: int, num_pe: int,
|
||||
) -> list[ShardSpec]:
|
||||
"""Split K axis into num_pe equal parts. Each PE gets (M, K/P)."""
|
||||
M, K = shape
|
||||
chunk_k = K // num_pe
|
||||
chunk_bytes = M * chunk_k * itemsize
|
||||
shards = []
|
||||
for i in range(num_pe):
|
||||
shards.append(ShardSpec(
|
||||
pe_index=i,
|
||||
offset_bytes=i * chunk_bytes,
|
||||
nbytes=chunk_bytes,
|
||||
))
|
||||
return shards
|
||||
|
||||
|
||||
def row_wise(
|
||||
*, shape: tuple[int, int], itemsize: int, num_pe: int,
|
||||
) -> list[ShardSpec]:
|
||||
"""Split M axis into num_pe equal parts. Each PE gets (M/P, K)."""
|
||||
M, K = shape
|
||||
chunk_m = M // num_pe
|
||||
chunk_bytes = chunk_m * K * itemsize
|
||||
shards = []
|
||||
for i in range(num_pe):
|
||||
shards.append(ShardSpec(
|
||||
pe_index=i,
|
||||
offset_bytes=i * chunk_bytes,
|
||||
nbytes=chunk_bytes,
|
||||
))
|
||||
return shards
|
||||
|
||||
|
||||
def replicate(
|
||||
*, shape: tuple[int, int], itemsize: int, num_pe: int,
|
||||
) -> list[ShardSpec]:
|
||||
"""Full copy per PE. Each PE gets (M, K)."""
|
||||
M, K = shape
|
||||
full_bytes = M * K * itemsize
|
||||
return [
|
||||
ShardSpec(pe_index=i, offset_bytes=0, nbytes=full_bytes)
|
||||
for i in range(num_pe)
|
||||
]
|
||||
|
||||
|
||||
def tiled_column_major(
|
||||
*, shape: tuple[int, int], itemsize: int, num_pe: int,
|
||||
tile_m: int, tile_k: int,
|
||||
) -> list[ShardSpec]:
|
||||
"""2D tiling, column-major order (K axis first), round-robin across PEs."""
|
||||
M, K = shape
|
||||
tiles_m = ceil(M / tile_m)
|
||||
tiles_k = ceil(K / tile_k)
|
||||
tile_bytes = tile_m * tile_k * itemsize
|
||||
row_bytes = K * itemsize
|
||||
shards = []
|
||||
idx = 0
|
||||
for mi in range(tiles_m):
|
||||
for ki in range(tiles_k):
|
||||
offset = (mi * tile_m * row_bytes) + (ki * tile_k * itemsize)
|
||||
shards.append(ShardSpec(
|
||||
pe_index=idx % num_pe,
|
||||
offset_bytes=offset,
|
||||
nbytes=tile_bytes,
|
||||
))
|
||||
idx += 1
|
||||
return shards
|
||||
|
||||
|
||||
def tiled_row_major(
|
||||
*, shape: tuple[int, int], itemsize: int, num_pe: int,
|
||||
tile_m: int, tile_k: int,
|
||||
) -> list[ShardSpec]:
|
||||
"""2D tiling, row-major order (M axis first), round-robin across PEs."""
|
||||
M, K = shape
|
||||
tiles_m = ceil(M / tile_m)
|
||||
tiles_k = ceil(K / tile_k)
|
||||
tile_bytes = tile_m * tile_k * itemsize
|
||||
row_bytes = K * itemsize
|
||||
shards = []
|
||||
idx = 0
|
||||
for ki in range(tiles_k):
|
||||
for mi in range(tiles_m):
|
||||
offset = (mi * tile_m * row_bytes) + (ki * tile_k * itemsize)
|
||||
shards.append(ShardSpec(
|
||||
pe_index=idx % num_pe,
|
||||
offset_bytes=offset,
|
||||
nbytes=tile_bytes,
|
||||
))
|
||||
idx += 1
|
||||
return shards
|
||||
@@ -0,0 +1,184 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import heapq
|
||||
from collections import defaultdict
|
||||
|
||||
from kernbench.policy.address.phyaddr import PhysAddr, UnitType
|
||||
from kernbench.topology.types import TopologyGraph
|
||||
|
||||
|
||||
class RoutingError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class AddressResolver:
|
||||
"""Resolve a PhysAddr to the destination node_id in the compiled graph.
|
||||
|
||||
Also provides named node lookups (find_m_cpu, find_pcie_ep, …) so that
|
||||
component implementations never construct node_id strings directly.
|
||||
Centralising the naming convention here means a single change propagates
|
||||
everywhere (ADR-0015 D4).
|
||||
"""
|
||||
|
||||
def __init__(self, graph: TopologyGraph) -> None:
|
||||
self._node_ids = set(graph.nodes)
|
||||
mm = graph.spec["cube"]["memory_map"]
|
||||
self._slice_size_bytes = mm["hbm_total_gb_per_cube"] * (1 << 30) // mm["hbm_slices_per_cube"]
|
||||
|
||||
# ── Physical-address resolution ──────────────────────────────────
|
||||
|
||||
def resolve(self, addr: PhysAddr) -> str:
|
||||
s = addr.sip_id
|
||||
c = addr.cube_id
|
||||
if addr.kind == "hbm":
|
||||
pe_slice = PhysAddr.hbm_pe_id(addr.hbm_offset, self._slice_size_bytes)
|
||||
node_id = f"sip{s}.cube{c}.hbm_ctrl.slice{pe_slice}"
|
||||
elif addr.kind == "pe_resource":
|
||||
if addr.unit_type == UnitType.PE:
|
||||
node_id = f"sip{s}.cube{c}.pe{addr.pe_id}.pe_tcm"
|
||||
elif addr.unit_type == UnitType.SRAM:
|
||||
node_id = f"sip{s}.cube{c}.sram"
|
||||
elif addr.unit_type == UnitType.MCPU:
|
||||
node_id = f"sip{s}.cube{c}.m_cpu"
|
||||
else:
|
||||
raise RoutingError(f"unsupported unit_type: {addr.unit_type}")
|
||||
else:
|
||||
raise RoutingError(f"unsupported address kind: {addr.kind}")
|
||||
if node_id not in self._node_ids:
|
||||
raise RoutingError(f"node {node_id} not found in topology")
|
||||
return node_id
|
||||
|
||||
# ── Named node lookups ───────────────────────────────────────────
|
||||
|
||||
def find_m_cpu(self, sip: int, cube: int) -> str:
|
||||
node_id = f"sip{sip}.cube{cube}.m_cpu"
|
||||
if node_id not in self._node_ids:
|
||||
raise RoutingError(f"M_CPU not found: {node_id}")
|
||||
return node_id
|
||||
|
||||
def find_pcie_ep(self, sip: int, io_id: str = "io0") -> str:
|
||||
node_id = f"sip{sip}.{io_id}.pcie_ep"
|
||||
if node_id not in self._node_ids:
|
||||
raise RoutingError(f"PCIE_EP not found: {node_id}")
|
||||
return node_id
|
||||
|
||||
def find_io_cpu(self, sip: int, io_id: str = "io0") -> str:
|
||||
node_id = f"sip{sip}.{io_id}.io_cpu"
|
||||
if node_id not in self._node_ids:
|
||||
raise RoutingError(f"IO_CPU not found: {node_id}")
|
||||
return node_id
|
||||
|
||||
def find_all_pcie_eps(self) -> list[str]:
|
||||
"""Return all PCIE_EP node ids across all SIPs, sorted."""
|
||||
return sorted(nid for nid in self._node_ids if nid.endswith(".pcie_ep"))
|
||||
|
||||
|
||||
class PathRouter:
|
||||
"""Find data-path from a source PE (or arbitrary node) to a destination node.
|
||||
|
||||
Two adjacency graphs are maintained:
|
||||
_adj — excludes command edges (used by PE DMA routing, find_path)
|
||||
_adj_all — includes all edges (used by component-to-component routing,
|
||||
find_node_path; required because M_CPU↔NOC links are "command")
|
||||
"""
|
||||
|
||||
# Edge kinds excluded from M_CPU DMA adjacency: prevents routing through
|
||||
# PE-internal pipeline nodes when computing DMA paths.
|
||||
_MCPU_DMA_EXCLUDE = {"pe_internal", "pe_to_xbar"}
|
||||
|
||||
def __init__(self, graph: TopologyGraph) -> None:
|
||||
self._adj: dict[str, list[tuple[str, float]]] = defaultdict(list)
|
||||
self._adj_all: dict[str, list[tuple[str, float]]] = defaultdict(list)
|
||||
self._adj_mcpu_dma: dict[str, list[tuple[str, float]]] = defaultdict(list)
|
||||
for e in graph.edges:
|
||||
w = e.routing_weight_mm if e.routing_weight_mm is not None else e.distance_mm
|
||||
self._adj_all[e.src].append((e.dst, w))
|
||||
if e.kind != "command":
|
||||
self._adj[e.src].append((e.dst, w))
|
||||
if e.kind not in self._MCPU_DMA_EXCLUDE:
|
||||
self._adj_mcpu_dma[e.src].append((e.dst, w))
|
||||
|
||||
def find_path(self, src_pe: str, dst_node: str) -> list[str]:
|
||||
"""PE DMA routing: prepends .pe_dma, excludes command edges."""
|
||||
start = f"{src_pe}.pe_dma"
|
||||
return self._run_dijkstra(self._adj, start, dst_node)
|
||||
|
||||
def find_path_with_distance(self, src_pe: str, dst_node: str) -> tuple[list[str], float]:
|
||||
start = f"{src_pe}.pe_dma"
|
||||
return self._run_dijkstra_with_dist(self._adj, start, dst_node)
|
||||
|
||||
def find_mcpu_dma_path(self, m_cpu_id: str, dst_hbm_slice_id: str) -> list[str]:
|
||||
"""M_CPU DMA path: never routes through PE-internal nodes (ADR-0015 D5).
|
||||
|
||||
Same-cube: deterministic [m_cpu, noc, xbar.pe_i, hbm_ctrl.slice_i].
|
||||
Cross-cube: Dijkstra via _adj_mcpu_dma (pe_internal/pe_to_xbar excluded)
|
||||
→ routes through NOC → UCIe → target cube NOC → xbar → HBM.
|
||||
"""
|
||||
m_cube = ".".join(m_cpu_id.split(".")[:2])
|
||||
d_cube = ".".join(dst_hbm_slice_id.split(".")[:2])
|
||||
if m_cube == d_cube:
|
||||
slice_idx = int(dst_hbm_slice_id.rsplit("slice", 1)[1])
|
||||
return [
|
||||
m_cpu_id,
|
||||
f"{m_cube}.noc",
|
||||
f"{m_cube}.xbar.pe{slice_idx}",
|
||||
dst_hbm_slice_id,
|
||||
]
|
||||
return self._run_dijkstra(self._adj_mcpu_dma, m_cpu_id, dst_hbm_slice_id)
|
||||
|
||||
def find_node_path(self, src: str, dst: str) -> list[str]:
|
||||
"""General routing between arbitrary nodes, including command edges.
|
||||
|
||||
Used by components (IoCpuComponent, MCpuComponent) that route through
|
||||
M_CPU↔NOC command-kind links.
|
||||
"""
|
||||
return self._run_dijkstra(self._adj_all, src, dst)
|
||||
|
||||
def _run_dijkstra(
|
||||
self,
|
||||
adj: dict[str, list[tuple[str, float]]],
|
||||
start: str,
|
||||
goal: str,
|
||||
) -> list[str]:
|
||||
path, _ = self._run_dijkstra_with_dist(adj, start, goal)
|
||||
return path
|
||||
|
||||
def _run_dijkstra_with_dist(
|
||||
self,
|
||||
adj: dict[str, list[tuple[str, float]]],
|
||||
start: str,
|
||||
goal: str,
|
||||
) -> tuple[list[str], float]:
|
||||
if start == goal:
|
||||
return [start], 0.0
|
||||
best: dict[str, float] = {start: 0.0}
|
||||
prev: dict[str, str] = {}
|
||||
heap: list[tuple[float, str]] = [(0.0, start)]
|
||||
while heap:
|
||||
d, node = heapq.heappop(heap)
|
||||
if node == goal:
|
||||
path: list[str] = []
|
||||
cur = goal
|
||||
while cur != start:
|
||||
path.append(cur)
|
||||
cur = prev[cur]
|
||||
path.append(start)
|
||||
path.reverse()
|
||||
return path, d
|
||||
if d > best.get(node, float("inf")):
|
||||
continue
|
||||
for neighbor, edge_dist in adj[node]:
|
||||
new_d = d + edge_dist
|
||||
if new_d < best.get(neighbor, float("inf")):
|
||||
best[neighbor] = new_d
|
||||
prev[neighbor] = node
|
||||
heapq.heappush(heap, (new_d, neighbor))
|
||||
raise RoutingError(f"no path from {start} to {goal}")
|
||||
|
||||
# ── backward-compat shims (used by existing tests) ───────────────
|
||||
|
||||
def _dijkstra(self, start: str, goal: str) -> list[str]:
|
||||
return self._run_dijkstra(self._adj, start, goal)
|
||||
|
||||
def _dijkstra_with_dist(self, start: str, goal: str) -> tuple[list[str], float]:
|
||||
return self._run_dijkstra_with_dist(self._adj, start, goal)
|
||||
Reference in New Issue
Block a user