from __future__ import annotations from dataclasses import dataclass from enum import IntEnum from typing import Literal MAX_51 = (1 << 51) - 1 # ── Layout constants (ADR-0001 Rev 2) ──────────────────────────────── # [50:47] sip_id (4) # [46:42] die_id (5) # [41: 0] local_offset (42) _SIP_SHIFT = 47 _DIE_SHIFT = 42 _LOCAL_BITS = 42 _LOCAL_MASK = (1 << _LOCAL_BITS) - 1 # AHBM die: [41:38] MBZ, [37] addr_space, [36:0] sub-address _AHBM_SEL_BIT = 37 _AHBM_LOCAL_USED = 38 # bits actually meaningful for AHBM # Resource window: [36:34] resource_kind, [33:0] kind_local _RES_KIND_SHIFT = 34 _RES_KIND_MASK = 0x7 # PE_LOCAL: [32:29] pe_id, [28:25] pe_sub_unit, [24:0] sub_offset _PE_ID_SHIFT = 29 _PE_SUB_SHIFT = 25 _PE_SUB_OFFSET_BITS = 25 # MCPU_LOCAL: [29:25] mcpu_sub_unit, [24:0] sub_offset _MCPU_SUB_SHIFT = 25 # CUBE_SRAM: [24:0] sram_offset _SRAM_OFFSET_BITS = 25 # IOCHIPLET: [41:40] MBZ, [39:0] chiplet_offset _CHIPLET_LOCAL_BITS = 40 _IOCPU_BOUNDARY = 1 << 31 # 2 GB # IOCPU: [30:27] iocpu_sub_unit, [26:0] sub_offset _IOCPU_SUB_SHIFT = 27 _IOCPU_SUB_OFFSET_BITS = 27 # die_id ranges _AHBM_DIE_MAX = 15 _CHIPLET_DIE_MIN = 16 _CHIPLET_DIE_MAX = 20 class PhysAddrError(Exception): pass def _chk_range(name: str, v: int, bits: int) -> None: if not (0 <= v < (1 << bits)): raise PhysAddrError(f"{name} out of range for {bits} bits: {v}") def _chk_max(name: str, v: int, maxv: int) -> None: if not (0 <= v <= maxv): raise PhysAddrError(f"{name} out of range (0..{maxv}): {v}") class UnitType(IntEnum): """resource_kind values for AHBM resource window.""" PE = 0 # PE_LOCAL MCPU = 1 # MCPU_LOCAL SRAM = 2 # CUBE_SRAM class PESubUnit(IntEnum): PE_CPU_DTCM = 0 MATH_ENGINE_DTCM = 1 IPCQ = 2 PE_CPU_SFR = 3 MATH_ENGINE_SFR = 4 DMA_ENGINE_SFR = 5 PE_TCM = 6 class MCPUSubUnit(IntEnum): MCPU_ITCM = 0 MCPU_DTCM = 1 IPCQ = 2 MCPU_SFR = 3 MCPU_DMA_SFR = 4 MCPU_SRAM = 5 class IOCPUSubUnit(IntEnum): IOCPU_ITCM = 0 IOCPU_DTCM = 1 IPCQ = 2 IOCPU_SFR = 3 IO_DMA_SFR = 4 IO_SRAM = 5 @dataclass(frozen=True) class PhysAddr: """51-bit physical address value object (ADR-0001 Rev 2). Layout: [50:47] sip_id (4) -- 16 SIPs [46:42] die_id (5) -- 0..15 AHBM, 16..20 IOCHIPLET [41: 0] local_offset (42) -- 4 TB per die """ sip_id: int die_id: int local_offset: int kind: Literal["hbm", "pe_resource", "iocpu", "ual", "raw"] = "raw" unit_type: UnitType = UnitType.PE pe_id: int = 0 pe_sub_unit: int = 0 sub_offset: int = 0 hbm_offset: int = 0 iocpu_sub_unit: int = 0 chiplet_offset: int = 0 mcpu_sub_unit: int = 0 HBM_WINDOW_BYTES = 1 << 37 # 128 GB # ── encode / decode ────────────────────────────────────────────── def encode(self) -> int: _chk_range("sip_id", self.sip_id, 4) _chk_range("die_id", self.die_id, 5) _chk_range("local_offset", self.local_offset, _LOCAL_BITS) # MBZ enforcement if self.die_id <= _AHBM_DIE_MAX: mbz_top = (self.local_offset >> _AHBM_LOCAL_USED) & 0xF if mbz_top != 0: raise PhysAddrError("AHBM local_offset bits [41:38] must be zero") elif _CHIPLET_DIE_MIN <= self.die_id <= _CHIPLET_DIE_MAX: mbz_top = (self.local_offset >> _CHIPLET_LOCAL_BITS) & 0x3 if mbz_top != 0: raise PhysAddrError("IOCHIPLET local_offset bits [41:40] must be zero") addr = (self.sip_id << _SIP_SHIFT) | (self.die_id << _DIE_SHIFT) | self.local_offset return addr @staticmethod def decode(addr: int) -> PhysAddr: if not (0 <= addr <= MAX_51): raise PhysAddrError("addr must be a 51-bit value") sip_id = (addr >> _SIP_SHIFT) & 0xF die_id = (addr >> _DIE_SHIFT) & 0x1F local_offset = addr & _LOCAL_MASK if die_id <= _AHBM_DIE_MAX: return PhysAddr._decode_ahbm(sip_id, die_id, local_offset) elif _CHIPLET_DIE_MIN <= die_id <= _CHIPLET_DIE_MAX: return PhysAddr._decode_chiplet(sip_id, die_id, local_offset) else: raise PhysAddrError(f"die_id {die_id} is reserved (21..31)") @staticmethod def _decode_ahbm(sip_id: int, die_id: int, local_offset: int) -> PhysAddr: sel = (local_offset >> _AHBM_SEL_BIT) & 0x1 if sel == 1: hbm_offset = int(local_offset & ((1 << _AHBM_SEL_BIT) - 1)) return PhysAddr( sip_id=sip_id, die_id=die_id, local_offset=local_offset, kind="hbm", hbm_offset=hbm_offset, ) # Resource window res_kind = int((local_offset >> _RES_KIND_SHIFT) & _RES_KIND_MASK) try: unit_type = UnitType(res_kind) except ValueError: raise PhysAddrError(f"unknown resource_kind: {res_kind}") from None if unit_type == UnitType.PE: pe_id = int((local_offset >> _PE_ID_SHIFT) & 0xF) pe_sub = int((local_offset >> _PE_SUB_SHIFT) & 0xF) sub_off = int(local_offset & ((1 << _PE_SUB_OFFSET_BITS) - 1)) return PhysAddr( sip_id=sip_id, die_id=die_id, local_offset=local_offset, kind="pe_resource", unit_type=unit_type, pe_id=pe_id, pe_sub_unit=pe_sub, sub_offset=sub_off, ) elif unit_type == UnitType.MCPU: mcpu_sub = int((local_offset >> _MCPU_SUB_SHIFT) & 0x1F) sub_off = int(local_offset & ((1 << _PE_SUB_OFFSET_BITS) - 1)) return PhysAddr( sip_id=sip_id, die_id=die_id, local_offset=local_offset, kind="pe_resource", unit_type=unit_type, mcpu_sub_unit=mcpu_sub, sub_offset=sub_off, ) else: # SRAM sub_off = int(local_offset & ((1 << _SRAM_OFFSET_BITS) - 1)) return PhysAddr( sip_id=sip_id, die_id=die_id, local_offset=local_offset, kind="pe_resource", unit_type=unit_type, sub_offset=sub_off, ) @staticmethod def _decode_chiplet(sip_id: int, die_id: int, local_offset: int) -> PhysAddr: chip_off = local_offset & ((1 << _CHIPLET_LOCAL_BITS) - 1) if chip_off < _IOCPU_BOUNDARY: iocpu_sub = int((chip_off >> _IOCPU_SUB_SHIFT) & 0xF) sub_off = int(chip_off & ((1 << _IOCPU_SUB_OFFSET_BITS) - 1)) return PhysAddr( sip_id=sip_id, die_id=die_id, local_offset=local_offset, kind="iocpu", chiplet_offset=chip_off, iocpu_sub_unit=iocpu_sub, sub_offset=sub_off, ) else: return PhysAddr( sip_id=sip_id, die_id=die_id, local_offset=local_offset, kind="ual", chiplet_offset=chip_off, ) # ── AHBM factory methods ──────────────────────────────────────── @staticmethod def hbm_addr(*, sip_id: int, die_id: int, hbm_offset: int) -> PhysAddr: _chk_max("die_id", die_id, _AHBM_DIE_MAX) _chk_range("hbm_offset", hbm_offset, _AHBM_SEL_BIT) local_offset = (1 << _AHBM_SEL_BIT) | int(hbm_offset) return PhysAddr( sip_id=sip_id, die_id=die_id, local_offset=local_offset, kind="hbm", hbm_offset=int(hbm_offset), ) @staticmethod def pe_hbm_addr( *, sip_id: int, die_id: int, pe_id: int, pe_local_hbm_offset: int, slice_size_bytes: int, ) -> PhysAddr: _chk_max("die_id", die_id, _AHBM_DIE_MAX) _chk_range("pe_id", pe_id, 4) if not (0 <= pe_local_hbm_offset < slice_size_bytes): raise PhysAddrError("pe_local_hbm_offset out of PE local slice range") hbm_offset = int(pe_id) * int(slice_size_bytes) + int(pe_local_hbm_offset) if not (0 <= hbm_offset < PhysAddr.HBM_WINDOW_BYTES): raise PhysAddrError("HBM offset exceeds reserved 128GB window") return PhysAddr.hbm_addr(sip_id=sip_id, die_id=die_id, hbm_offset=hbm_offset) @staticmethod def hbm_pe_id(hbm_offset: int, slice_size_bytes: int) -> int: return hbm_offset // slice_size_bytes @staticmethod def pe_tcm_addr( *, sip_id: int, die_id: int, pe_id: int, tcm_offset: int, ) -> PhysAddr: return PhysAddr.pe_resource_addr( sip_id=sip_id, die_id=die_id, pe_id=pe_id, pe_sub_unit=PESubUnit.PE_TCM, sub_offset=tcm_offset, ) @staticmethod def pe_resource_addr( *, sip_id: int, die_id: int, pe_id: int, pe_sub_unit: int, sub_offset: int, ) -> PhysAddr: _chk_max("die_id", die_id, _AHBM_DIE_MAX) _chk_range("pe_id", pe_id, 4) _chk_range("pe_sub_unit", pe_sub_unit, 4) _chk_range("sub_offset", sub_offset, _PE_SUB_OFFSET_BITS) local_offset = ( (UnitType.PE << _RES_KIND_SHIFT) | (pe_id << _PE_ID_SHIFT) | (pe_sub_unit << _PE_SUB_SHIFT) | sub_offset ) return PhysAddr( sip_id=sip_id, die_id=die_id, local_offset=local_offset, kind="pe_resource", unit_type=UnitType.PE, pe_id=pe_id, pe_sub_unit=pe_sub_unit, sub_offset=sub_offset, ) @staticmethod def cube_sram_addr( *, sip_id: int, die_id: int, sram_offset: int, ) -> PhysAddr: _chk_max("die_id", die_id, _AHBM_DIE_MAX) _chk_range("sram_offset", sram_offset, _SRAM_OFFSET_BITS) local_offset = (UnitType.SRAM << _RES_KIND_SHIFT) | sram_offset return PhysAddr( sip_id=sip_id, die_id=die_id, local_offset=local_offset, kind="pe_resource", unit_type=UnitType.SRAM, sub_offset=sram_offset, ) @staticmethod def mcpu_resource_addr( *, sip_id: int, die_id: int, mcpu_sub_unit: int, sub_offset: int, ) -> PhysAddr: _chk_max("die_id", die_id, _AHBM_DIE_MAX) _chk_range("mcpu_sub_unit", mcpu_sub_unit, 5) _chk_range("sub_offset", sub_offset, _PE_SUB_OFFSET_BITS) local_offset = ( (UnitType.MCPU << _RES_KIND_SHIFT) | (mcpu_sub_unit << _MCPU_SUB_SHIFT) | sub_offset ) return PhysAddr( sip_id=sip_id, die_id=die_id, local_offset=local_offset, kind="pe_resource", unit_type=UnitType.MCPU, mcpu_sub_unit=mcpu_sub_unit, sub_offset=sub_offset, ) # ── IOCHIPLET factory methods ──────────────────────────────────── @staticmethod def iocpu_resource_addr( *, sip_id: int, die_id: int, iocpu_sub_unit: int, sub_offset: int, ) -> PhysAddr: _chk_max("die_id", die_id, _CHIPLET_DIE_MAX) if die_id < _CHIPLET_DIE_MIN: raise PhysAddrError( f"die_id {die_id} is not an IOCHIPLET " f"(must be {_CHIPLET_DIE_MIN}..{_CHIPLET_DIE_MAX})" ) _chk_range("iocpu_sub_unit", iocpu_sub_unit, 4) _chk_range("sub_offset", sub_offset, _IOCPU_SUB_OFFSET_BITS) chiplet_offset = (iocpu_sub_unit << _IOCPU_SUB_SHIFT) | sub_offset if chiplet_offset >= _IOCPU_BOUNDARY: raise PhysAddrError("IOCPU region overflow (must be < 2 GB)") return PhysAddr( sip_id=sip_id, die_id=die_id, local_offset=chiplet_offset, kind="iocpu", chiplet_offset=chiplet_offset, iocpu_sub_unit=iocpu_sub_unit, sub_offset=sub_offset, ) @staticmethod def ual_addr(*, sip_id: int, die_id: int, ual_offset: int) -> PhysAddr: _chk_max("die_id", die_id, _CHIPLET_DIE_MAX) if die_id < _CHIPLET_DIE_MIN: raise PhysAddrError(f"die_id {die_id} is not an IOCHIPLET") chiplet_offset = _IOCPU_BOUNDARY + ual_offset _chk_range("chiplet_offset", chiplet_offset, _CHIPLET_LOCAL_BITS) return PhysAddr( sip_id=sip_id, die_id=die_id, local_offset=chiplet_offset, kind="ual", chiplet_offset=chiplet_offset, )