from __future__ import annotations import bisect from dataclasses import dataclass from kernbench.policy.address.phyaddr import PhysAddr class AllocationError(Exception): pass @dataclass(frozen=True) class AddressConfig: sip_count: int cubes_per_sip: int pes_per_cube: int hbm_bytes_per_cube: int hbm_slices_per_cube: int tcm_bytes_per_pe: int tcm_scheduler_reserved_bytes: int sram_bytes_per_cube: int @property def hbm_slice_bytes(self) -> int: return self.hbm_bytes_per_cube // self.hbm_slices_per_cube @property def tcm_allocatable_bytes(self) -> int: return self.tcm_bytes_per_pe - self.tcm_scheduler_reserved_bytes class _FreeList: """Offset-based free-list allocator with coalescing.""" def __init__(self, capacity: int) -> None: self._capacity = capacity self._used = 0 self._free: list[tuple[int, int]] = [(0, capacity)] # (offset, size) @property def used(self) -> int: return self._used @property def total(self) -> int: return self._capacity def alloc(self, nbytes: int) -> int: """Allocate nbytes, return offset. Raises AllocationError if full.""" for i, (start, size) in enumerate(self._free): if size >= nbytes: if size == nbytes: self._free.pop(i) else: self._free[i] = (start + nbytes, size - nbytes) self._used += nbytes return start raise AllocationError( f"overflow: need {nbytes}, " f"largest free block {max((s for _, s in self._free), default=0)}" ) def free(self, offset: int, nbytes: int) -> None: """Return a range to the free-list with coalescing.""" self._used -= nbytes new_start = offset new_end = offset + nbytes idx = bisect.bisect_left(self._free, (offset,)) # Coalesce with previous block if idx > 0: prev_start, prev_size = self._free[idx - 1] if prev_start + prev_size == new_start: new_start = prev_start idx -= 1 self._free.pop(idx) # Coalesce with next block if idx < len(self._free): next_start, next_size = self._free[idx] if new_end == next_start: new_end = next_start + next_size self._free.pop(idx) self._free.insert(idx, (new_start, new_end - new_start)) class PEMemAllocator: def __init__( self, rack_id: int, sip_id: int, cube_id: int, pe_id: int, cfg: AddressConfig, ) -> None: self._rack_id = rack_id self._sip_id = sip_id self._cube_id = cube_id self._pe_id = pe_id self._cfg = cfg self._hbm = _FreeList(cfg.hbm_slice_bytes) self._tcm = _FreeList(cfg.tcm_allocatable_bytes) def alloc_hbm(self, nbytes: int) -> PhysAddr: try: offset = self._hbm.alloc(nbytes) except AllocationError: raise AllocationError( f"HBM overflow: need {nbytes}, " f"available {self._cfg.hbm_slice_bytes - self._hbm.used}" ) return PhysAddr.pe_hbm_addr( rack_id=self._rack_id, sip_id=self._sip_id, cube_id=self._cube_id, pe_id=self._pe_id, pe_local_hbm_offset=offset, slice_size_bytes=self._cfg.hbm_slice_bytes, ) def free_hbm(self, pa: PhysAddr, nbytes: int) -> None: # Extract PE-local offset from the PA's hbm_offset pe_slice_start = self._pe_id * self._cfg.hbm_slice_bytes offset = pa.hbm_offset - pe_slice_start self._hbm.free(offset, nbytes) def alloc_tcm(self, nbytes: int) -> PhysAddr: try: offset = self._tcm.alloc(nbytes) except AllocationError: raise AllocationError( f"TCM overflow: need {nbytes}, " f"available {self._cfg.tcm_allocatable_bytes - self._tcm.used}" ) return PhysAddr.pe_tcm_addr( rack_id=self._rack_id, sip_id=self._sip_id, cube_id=self._cube_id, pe_id=self._pe_id, tcm_offset=offset, ) def free_tcm(self, pa: PhysAddr, nbytes: int) -> None: self._tcm.free(pa.sub_offset, nbytes) @property def hbm_used(self) -> int: return self._hbm.used @property def hbm_total(self) -> int: return self._cfg.hbm_slice_bytes @property def tcm_used(self) -> int: return self._tcm.used @property def tcm_total(self) -> int: return self._cfg.tcm_allocatable_bytes