81cc32c46b
Remove rack_id (4 bits), rename sip_seg→die_id, shift fields to enable 42-bit local_offset (4 TB per die). Define PE_LOCAL/MCPU_LOCAL/CUBE_SRAM sub-unit tables for AHBM dies and IOCPU sub-unit table for IOCHIPLET dies (1 TB window). Supersedes ADR-0031. Also fixes latent VA/PA confusion in pe_dma pipeline DMA path where virtual addresses were decoded as physical addresses without MMU translation — previously masked by coincidental bit-position alignment. 529 passed (+6 recovered), 10 pre-existing failures unchanged. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
341 lines
12 KiB
Python
341 lines
12 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from enum import IntEnum
|
|
from typing import Literal
|
|
|
|
MAX_51 = (1 << 51) - 1
|
|
|
|
# ── Layout constants (ADR-0001 Rev 2) ────────────────────────────────
|
|
# [50:47] sip_id (4)
|
|
# [46:42] die_id (5)
|
|
# [41: 0] local_offset (42)
|
|
_SIP_SHIFT = 47
|
|
_DIE_SHIFT = 42
|
|
_LOCAL_BITS = 42
|
|
_LOCAL_MASK = (1 << _LOCAL_BITS) - 1
|
|
|
|
# AHBM die: [41:38] MBZ, [37] addr_space, [36:0] sub-address
|
|
_AHBM_SEL_BIT = 37
|
|
_AHBM_LOCAL_USED = 38 # bits actually meaningful for AHBM
|
|
|
|
# Resource window: [36:34] resource_kind, [33:0] kind_local
|
|
_RES_KIND_SHIFT = 34
|
|
_RES_KIND_MASK = 0x7
|
|
|
|
# PE_LOCAL: [32:29] pe_id, [28:25] pe_sub_unit, [24:0] sub_offset
|
|
_PE_ID_SHIFT = 29
|
|
_PE_SUB_SHIFT = 25
|
|
_PE_SUB_OFFSET_BITS = 25
|
|
|
|
# MCPU_LOCAL: [29:25] mcpu_sub_unit, [24:0] sub_offset
|
|
_MCPU_SUB_SHIFT = 25
|
|
|
|
# CUBE_SRAM: [24:0] sram_offset
|
|
_SRAM_OFFSET_BITS = 25
|
|
|
|
# IOCHIPLET: [41:40] MBZ, [39:0] chiplet_offset
|
|
_CHIPLET_LOCAL_BITS = 40
|
|
_IOCPU_BOUNDARY = 1 << 31 # 2 GB
|
|
|
|
# IOCPU: [30:27] iocpu_sub_unit, [26:0] sub_offset
|
|
_IOCPU_SUB_SHIFT = 27
|
|
_IOCPU_SUB_OFFSET_BITS = 27
|
|
|
|
# die_id ranges
|
|
_AHBM_DIE_MAX = 15
|
|
_CHIPLET_DIE_MIN = 16
|
|
_CHIPLET_DIE_MAX = 20
|
|
|
|
|
|
class PhysAddrError(Exception):
|
|
pass
|
|
|
|
|
|
def _chk_range(name: str, v: int, bits: int) -> None:
|
|
if not (0 <= v < (1 << bits)):
|
|
raise PhysAddrError(f"{name} out of range for {bits} bits: {v}")
|
|
|
|
|
|
def _chk_max(name: str, v: int, maxv: int) -> None:
|
|
if not (0 <= v <= maxv):
|
|
raise PhysAddrError(f"{name} out of range (0..{maxv}): {v}")
|
|
|
|
|
|
class UnitType(IntEnum):
|
|
"""resource_kind values for AHBM resource window."""
|
|
PE = 0 # PE_LOCAL
|
|
MCPU = 1 # MCPU_LOCAL
|
|
SRAM = 2 # CUBE_SRAM
|
|
|
|
|
|
class PESubUnit(IntEnum):
|
|
PE_CPU_DTCM = 0
|
|
MATH_ENGINE_DTCM = 1
|
|
IPCQ = 2
|
|
PE_CPU_SFR = 3
|
|
MATH_ENGINE_SFR = 4
|
|
DMA_ENGINE_SFR = 5
|
|
PE_TCM = 6
|
|
|
|
|
|
class MCPUSubUnit(IntEnum):
|
|
MCPU_ITCM = 0
|
|
MCPU_DTCM = 1
|
|
IPCQ = 2
|
|
MCPU_SFR = 3
|
|
MCPU_DMA_SFR = 4
|
|
MCPU_SRAM = 5
|
|
|
|
|
|
class IOCPUSubUnit(IntEnum):
|
|
IOCPU_ITCM = 0
|
|
IOCPU_DTCM = 1
|
|
IPCQ = 2
|
|
IOCPU_SFR = 3
|
|
IO_DMA_SFR = 4
|
|
IO_SRAM = 5
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class PhysAddr:
|
|
"""51-bit physical address value object (ADR-0001 Rev 2).
|
|
|
|
Layout:
|
|
[50:47] sip_id (4) -- 16 SIPs
|
|
[46:42] die_id (5) -- 0..15 AHBM, 16..20 IOCHIPLET
|
|
[41: 0] local_offset (42) -- 4 TB per die
|
|
"""
|
|
|
|
sip_id: int
|
|
die_id: int
|
|
local_offset: int
|
|
|
|
kind: Literal["hbm", "pe_resource", "iocpu", "ual", "raw"] = "raw"
|
|
unit_type: UnitType = UnitType.PE
|
|
pe_id: int = 0
|
|
pe_sub_unit: int = 0
|
|
sub_offset: int = 0
|
|
hbm_offset: int = 0
|
|
iocpu_sub_unit: int = 0
|
|
chiplet_offset: int = 0
|
|
mcpu_sub_unit: int = 0
|
|
|
|
HBM_WINDOW_BYTES = 1 << 37 # 128 GB
|
|
|
|
# ── encode / decode ──────────────────────────────────────────────
|
|
|
|
def encode(self) -> int:
|
|
_chk_range("sip_id", self.sip_id, 4)
|
|
_chk_range("die_id", self.die_id, 5)
|
|
_chk_range("local_offset", self.local_offset, _LOCAL_BITS)
|
|
# MBZ enforcement
|
|
if self.die_id <= _AHBM_DIE_MAX:
|
|
mbz_top = (self.local_offset >> _AHBM_LOCAL_USED) & 0xF
|
|
if mbz_top != 0:
|
|
raise PhysAddrError("AHBM local_offset bits [41:38] must be zero")
|
|
elif _CHIPLET_DIE_MIN <= self.die_id <= _CHIPLET_DIE_MAX:
|
|
mbz_top = (self.local_offset >> _CHIPLET_LOCAL_BITS) & 0x3
|
|
if mbz_top != 0:
|
|
raise PhysAddrError("IOCHIPLET local_offset bits [41:40] must be zero")
|
|
addr = (self.sip_id << _SIP_SHIFT) | (self.die_id << _DIE_SHIFT) | self.local_offset
|
|
return addr
|
|
|
|
@staticmethod
|
|
def decode(addr: int) -> PhysAddr:
|
|
if not (0 <= addr <= MAX_51):
|
|
raise PhysAddrError("addr must be a 51-bit value")
|
|
sip_id = (addr >> _SIP_SHIFT) & 0xF
|
|
die_id = (addr >> _DIE_SHIFT) & 0x1F
|
|
local_offset = addr & _LOCAL_MASK
|
|
|
|
if die_id <= _AHBM_DIE_MAX:
|
|
return PhysAddr._decode_ahbm(sip_id, die_id, local_offset)
|
|
elif _CHIPLET_DIE_MIN <= die_id <= _CHIPLET_DIE_MAX:
|
|
return PhysAddr._decode_chiplet(sip_id, die_id, local_offset)
|
|
else:
|
|
raise PhysAddrError(f"die_id {die_id} is reserved (21..31)")
|
|
|
|
@staticmethod
|
|
def _decode_ahbm(sip_id: int, die_id: int, local_offset: int) -> PhysAddr:
|
|
sel = (local_offset >> _AHBM_SEL_BIT) & 0x1
|
|
if sel == 1:
|
|
hbm_offset = int(local_offset & ((1 << _AHBM_SEL_BIT) - 1))
|
|
return PhysAddr(
|
|
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
|
kind="hbm", hbm_offset=hbm_offset,
|
|
)
|
|
# Resource window
|
|
res_kind = int((local_offset >> _RES_KIND_SHIFT) & _RES_KIND_MASK)
|
|
try:
|
|
unit_type = UnitType(res_kind)
|
|
except ValueError:
|
|
raise PhysAddrError(f"unknown resource_kind: {res_kind}") from None
|
|
|
|
if unit_type == UnitType.PE:
|
|
pe_id = int((local_offset >> _PE_ID_SHIFT) & 0xF)
|
|
pe_sub = int((local_offset >> _PE_SUB_SHIFT) & 0xF)
|
|
sub_off = int(local_offset & ((1 << _PE_SUB_OFFSET_BITS) - 1))
|
|
return PhysAddr(
|
|
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
|
kind="pe_resource", unit_type=unit_type,
|
|
pe_id=pe_id, pe_sub_unit=pe_sub, sub_offset=sub_off,
|
|
)
|
|
elif unit_type == UnitType.MCPU:
|
|
mcpu_sub = int((local_offset >> _MCPU_SUB_SHIFT) & 0x1F)
|
|
sub_off = int(local_offset & ((1 << _PE_SUB_OFFSET_BITS) - 1))
|
|
return PhysAddr(
|
|
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
|
kind="pe_resource", unit_type=unit_type,
|
|
mcpu_sub_unit=mcpu_sub, sub_offset=sub_off,
|
|
)
|
|
else: # SRAM
|
|
sub_off = int(local_offset & ((1 << _SRAM_OFFSET_BITS) - 1))
|
|
return PhysAddr(
|
|
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
|
kind="pe_resource", unit_type=unit_type,
|
|
sub_offset=sub_off,
|
|
)
|
|
|
|
@staticmethod
|
|
def _decode_chiplet(sip_id: int, die_id: int, local_offset: int) -> PhysAddr:
|
|
chip_off = local_offset & ((1 << _CHIPLET_LOCAL_BITS) - 1)
|
|
if chip_off < _IOCPU_BOUNDARY:
|
|
iocpu_sub = int((chip_off >> _IOCPU_SUB_SHIFT) & 0xF)
|
|
sub_off = int(chip_off & ((1 << _IOCPU_SUB_OFFSET_BITS) - 1))
|
|
return PhysAddr(
|
|
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
|
kind="iocpu", chiplet_offset=chip_off,
|
|
iocpu_sub_unit=iocpu_sub, sub_offset=sub_off,
|
|
)
|
|
else:
|
|
return PhysAddr(
|
|
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
|
kind="ual", chiplet_offset=chip_off,
|
|
)
|
|
|
|
# ── AHBM factory methods ────────────────────────────────────────
|
|
|
|
@staticmethod
|
|
def hbm_addr(*, sip_id: int, die_id: int, hbm_offset: int) -> PhysAddr:
|
|
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
|
_chk_range("hbm_offset", hbm_offset, _AHBM_SEL_BIT)
|
|
local_offset = (1 << _AHBM_SEL_BIT) | int(hbm_offset)
|
|
return PhysAddr(
|
|
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
|
kind="hbm", hbm_offset=int(hbm_offset),
|
|
)
|
|
|
|
@staticmethod
|
|
def pe_hbm_addr(
|
|
*, sip_id: int, die_id: int,
|
|
pe_id: int, pe_local_hbm_offset: int, slice_size_bytes: int,
|
|
) -> PhysAddr:
|
|
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
|
_chk_range("pe_id", pe_id, 4)
|
|
if not (0 <= pe_local_hbm_offset < slice_size_bytes):
|
|
raise PhysAddrError("pe_local_hbm_offset out of PE local slice range")
|
|
hbm_offset = int(pe_id) * int(slice_size_bytes) + int(pe_local_hbm_offset)
|
|
if not (0 <= hbm_offset < PhysAddr.HBM_WINDOW_BYTES):
|
|
raise PhysAddrError("HBM offset exceeds reserved 128GB window")
|
|
return PhysAddr.hbm_addr(sip_id=sip_id, die_id=die_id, hbm_offset=hbm_offset)
|
|
|
|
@staticmethod
|
|
def hbm_pe_id(hbm_offset: int, slice_size_bytes: int) -> int:
|
|
return hbm_offset // slice_size_bytes
|
|
|
|
@staticmethod
|
|
def pe_tcm_addr(
|
|
*, sip_id: int, die_id: int, pe_id: int, tcm_offset: int,
|
|
) -> PhysAddr:
|
|
return PhysAddr.pe_resource_addr(
|
|
sip_id=sip_id, die_id=die_id, pe_id=pe_id,
|
|
pe_sub_unit=PESubUnit.PE_TCM, sub_offset=tcm_offset,
|
|
)
|
|
|
|
@staticmethod
|
|
def pe_resource_addr(
|
|
*, sip_id: int, die_id: int, pe_id: int,
|
|
pe_sub_unit: int, sub_offset: int,
|
|
) -> PhysAddr:
|
|
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
|
_chk_range("pe_id", pe_id, 4)
|
|
_chk_range("pe_sub_unit", pe_sub_unit, 4)
|
|
_chk_range("sub_offset", sub_offset, _PE_SUB_OFFSET_BITS)
|
|
local_offset = (
|
|
(UnitType.PE << _RES_KIND_SHIFT)
|
|
| (pe_id << _PE_ID_SHIFT)
|
|
| (pe_sub_unit << _PE_SUB_SHIFT)
|
|
| sub_offset
|
|
)
|
|
return PhysAddr(
|
|
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
|
kind="pe_resource", unit_type=UnitType.PE,
|
|
pe_id=pe_id, pe_sub_unit=pe_sub_unit, sub_offset=sub_offset,
|
|
)
|
|
|
|
@staticmethod
|
|
def cube_sram_addr(
|
|
*, sip_id: int, die_id: int, sram_offset: int,
|
|
) -> PhysAddr:
|
|
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
|
_chk_range("sram_offset", sram_offset, _SRAM_OFFSET_BITS)
|
|
local_offset = (UnitType.SRAM << _RES_KIND_SHIFT) | sram_offset
|
|
return PhysAddr(
|
|
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
|
kind="pe_resource", unit_type=UnitType.SRAM, sub_offset=sram_offset,
|
|
)
|
|
|
|
@staticmethod
|
|
def mcpu_resource_addr(
|
|
*, sip_id: int, die_id: int, mcpu_sub_unit: int, sub_offset: int,
|
|
) -> PhysAddr:
|
|
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
|
_chk_range("mcpu_sub_unit", mcpu_sub_unit, 5)
|
|
_chk_range("sub_offset", sub_offset, _PE_SUB_OFFSET_BITS)
|
|
local_offset = (
|
|
(UnitType.MCPU << _RES_KIND_SHIFT)
|
|
| (mcpu_sub_unit << _MCPU_SUB_SHIFT)
|
|
| sub_offset
|
|
)
|
|
return PhysAddr(
|
|
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
|
kind="pe_resource", unit_type=UnitType.MCPU,
|
|
mcpu_sub_unit=mcpu_sub_unit, sub_offset=sub_offset,
|
|
)
|
|
|
|
# ── IOCHIPLET factory methods ────────────────────────────────────
|
|
|
|
@staticmethod
|
|
def iocpu_resource_addr(
|
|
*, sip_id: int, die_id: int, iocpu_sub_unit: int, sub_offset: int,
|
|
) -> PhysAddr:
|
|
_chk_max("die_id", die_id, _CHIPLET_DIE_MAX)
|
|
if die_id < _CHIPLET_DIE_MIN:
|
|
raise PhysAddrError(
|
|
f"die_id {die_id} is not an IOCHIPLET "
|
|
f"(must be {_CHIPLET_DIE_MIN}..{_CHIPLET_DIE_MAX})"
|
|
)
|
|
_chk_range("iocpu_sub_unit", iocpu_sub_unit, 4)
|
|
_chk_range("sub_offset", sub_offset, _IOCPU_SUB_OFFSET_BITS)
|
|
chiplet_offset = (iocpu_sub_unit << _IOCPU_SUB_SHIFT) | sub_offset
|
|
if chiplet_offset >= _IOCPU_BOUNDARY:
|
|
raise PhysAddrError("IOCPU region overflow (must be < 2 GB)")
|
|
return PhysAddr(
|
|
sip_id=sip_id, die_id=die_id, local_offset=chiplet_offset,
|
|
kind="iocpu", chiplet_offset=chiplet_offset,
|
|
iocpu_sub_unit=iocpu_sub_unit, sub_offset=sub_offset,
|
|
)
|
|
|
|
@staticmethod
|
|
def ual_addr(*, sip_id: int, die_id: int, ual_offset: int) -> PhysAddr:
|
|
_chk_max("die_id", die_id, _CHIPLET_DIE_MAX)
|
|
if die_id < _CHIPLET_DIE_MIN:
|
|
raise PhysAddrError(f"die_id {die_id} is not an IOCHIPLET")
|
|
chiplet_offset = _IOCPU_BOUNDARY + ual_offset
|
|
_chk_range("chiplet_offset", chiplet_offset, _CHIPLET_LOCAL_BITS)
|
|
return PhysAddr(
|
|
sip_id=sip_id, die_id=die_id, local_offset=chiplet_offset,
|
|
kind="ual", chiplet_offset=chiplet_offset,
|
|
)
|