ADR-0001 Rev 2: 51-bit PhysAddr layout with concrete sub-unit tables
Remove rack_id (4 bits), rename sip_seg→die_id, shift fields to enable 42-bit local_offset (4 TB per die). Define PE_LOCAL/MCPU_LOCAL/CUBE_SRAM sub-unit tables for AHBM dies and IOCPU sub-unit table for IOCHIPLET dies (1 TB window). Supersedes ADR-0031. Also fixes latent VA/PA confusion in pe_dma pipeline DMA path where virtual addresses were decoded as physical addresses without MMU translation — previously masked by coincidental bit-position alignment. 529 passed (+6 recovered), 10 pre-existing failures unchanged. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,47 @@ from typing import Literal
|
||||
|
||||
MAX_51 = (1 << 51) - 1
|
||||
|
||||
# ── Layout constants (ADR-0001 Rev 2) ────────────────────────────────
|
||||
# [50:47] sip_id (4)
|
||||
# [46:42] die_id (5)
|
||||
# [41: 0] local_offset (42)
|
||||
_SIP_SHIFT = 47
|
||||
_DIE_SHIFT = 42
|
||||
_LOCAL_BITS = 42
|
||||
_LOCAL_MASK = (1 << _LOCAL_BITS) - 1
|
||||
|
||||
# AHBM die: [41:38] MBZ, [37] addr_space, [36:0] sub-address
|
||||
_AHBM_SEL_BIT = 37
|
||||
_AHBM_LOCAL_USED = 38 # bits actually meaningful for AHBM
|
||||
|
||||
# Resource window: [36:34] resource_kind, [33:0] kind_local
|
||||
_RES_KIND_SHIFT = 34
|
||||
_RES_KIND_MASK = 0x7
|
||||
|
||||
# PE_LOCAL: [32:29] pe_id, [28:25] pe_sub_unit, [24:0] sub_offset
|
||||
_PE_ID_SHIFT = 29
|
||||
_PE_SUB_SHIFT = 25
|
||||
_PE_SUB_OFFSET_BITS = 25
|
||||
|
||||
# MCPU_LOCAL: [29:25] mcpu_sub_unit, [24:0] sub_offset
|
||||
_MCPU_SUB_SHIFT = 25
|
||||
|
||||
# CUBE_SRAM: [24:0] sram_offset
|
||||
_SRAM_OFFSET_BITS = 25
|
||||
|
||||
# IOCHIPLET: [41:40] MBZ, [39:0] chiplet_offset
|
||||
_CHIPLET_LOCAL_BITS = 40
|
||||
_IOCPU_BOUNDARY = 1 << 31 # 2 GB
|
||||
|
||||
# IOCPU: [30:27] iocpu_sub_unit, [26:0] sub_offset
|
||||
_IOCPU_SUB_SHIFT = 27
|
||||
_IOCPU_SUB_OFFSET_BITS = 27
|
||||
|
||||
# die_id ranges
|
||||
_AHBM_DIE_MAX = 15
|
||||
_CHIPLET_DIE_MIN = 16
|
||||
_CHIPLET_DIE_MAX = 20
|
||||
|
||||
|
||||
class PhysAddrError(Exception):
|
||||
pass
|
||||
@@ -22,163 +63,278 @@ def _chk_max(name: str, v: int, maxv: int) -> None:
|
||||
|
||||
|
||||
class UnitType(IntEnum):
|
||||
PE = 0
|
||||
MCPU = 1
|
||||
SRAM = 2
|
||||
"""resource_kind values for AHBM resource window."""
|
||||
PE = 0 # PE_LOCAL
|
||||
MCPU = 1 # MCPU_LOCAL
|
||||
SRAM = 2 # CUBE_SRAM
|
||||
|
||||
|
||||
class PESubUnit(IntEnum):
|
||||
PE_CPU_DTCM = 0
|
||||
MATH_ENGINE_DTCM = 1
|
||||
IPCQ = 2
|
||||
PE_CPU_SFR = 3
|
||||
MATH_ENGINE_SFR = 4
|
||||
DMA_ENGINE_SFR = 5
|
||||
PE_TCM = 6
|
||||
|
||||
|
||||
class MCPUSubUnit(IntEnum):
|
||||
MCPU_ITCM = 0
|
||||
MCPU_DTCM = 1
|
||||
IPCQ = 2
|
||||
MCPU_SFR = 3
|
||||
MCPU_DMA_SFR = 4
|
||||
MCPU_SRAM = 5
|
||||
|
||||
|
||||
class IOCPUSubUnit(IntEnum):
|
||||
IOCPU_ITCM = 0
|
||||
IOCPU_DTCM = 1
|
||||
IPCQ = 2
|
||||
IOCPU_SFR = 3
|
||||
IO_DMA_SFR = 4
|
||||
IO_SRAM = 5
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PhysAddr:
|
||||
"""
|
||||
51-bit physical address value object.
|
||||
"""51-bit physical address value object (ADR-0001 Rev 2).
|
||||
|
||||
Layout:
|
||||
[50:47] rack_id (4)
|
||||
[46:43] sip_id (4)
|
||||
[42:38] sip_seg (5) # cube_id
|
||||
[37:0] local_offset (38) => each segment is 256GB
|
||||
|
||||
local_offset:
|
||||
[37] selector: 1 = HBM window (128GB reserved), 0 = PE resource window
|
||||
[50:47] sip_id (4) -- 16 SIPs
|
||||
[46:42] die_id (5) -- 0..15 AHBM, 16..20 IOCHIPLET
|
||||
[41: 0] local_offset (42) -- 4 TB per die
|
||||
"""
|
||||
|
||||
rack_id: int
|
||||
sip_id: int
|
||||
sip_seg: int
|
||||
die_id: int
|
||||
local_offset: int
|
||||
|
||||
kind: Literal["hbm", "pe_resource", "raw"] = "raw"
|
||||
cube_id: int = 0
|
||||
kind: Literal["hbm", "pe_resource", "iocpu", "ual", "raw"] = "raw"
|
||||
unit_type: UnitType = UnitType.PE
|
||||
pe_id: int = 0
|
||||
ext: int = 0
|
||||
pe_sub_unit: int = 0
|
||||
sub_offset: int = 0
|
||||
hbm_offset: int = 0
|
||||
iocpu_sub_unit: int = 0
|
||||
chiplet_offset: int = 0
|
||||
mcpu_sub_unit: int = 0
|
||||
|
||||
HBM_WINDOW_BYTES = 1 << 37 # 128GB
|
||||
HBM_WINDOW_BYTES = 1 << 37 # 128 GB
|
||||
|
||||
# ── encode / decode ──────────────────────────────────────────────
|
||||
|
||||
def encode(self) -> int:
|
||||
_chk_range("rack_id", self.rack_id, 4)
|
||||
_chk_range("sip_id", self.sip_id, 4)
|
||||
_chk_range("sip_seg", self.sip_seg, 5)
|
||||
_chk_range("local_offset", self.local_offset, 38)
|
||||
addr = (self.rack_id << 47) | (self.sip_id << 43) | (self.sip_seg << 38) | self.local_offset
|
||||
if not (0 <= addr <= MAX_51):
|
||||
raise PhysAddrError("address exceeds 51-bit space")
|
||||
_chk_range("die_id", self.die_id, 5)
|
||||
_chk_range("local_offset", self.local_offset, _LOCAL_BITS)
|
||||
# MBZ enforcement
|
||||
if self.die_id <= _AHBM_DIE_MAX:
|
||||
mbz_top = (self.local_offset >> _AHBM_LOCAL_USED) & 0xF
|
||||
if mbz_top != 0:
|
||||
raise PhysAddrError("AHBM local_offset bits [41:38] must be zero")
|
||||
elif _CHIPLET_DIE_MIN <= self.die_id <= _CHIPLET_DIE_MAX:
|
||||
mbz_top = (self.local_offset >> _CHIPLET_LOCAL_BITS) & 0x3
|
||||
if mbz_top != 0:
|
||||
raise PhysAddrError("IOCHIPLET local_offset bits [41:40] must be zero")
|
||||
addr = (self.sip_id << _SIP_SHIFT) | (self.die_id << _DIE_SHIFT) | self.local_offset
|
||||
return addr
|
||||
|
||||
@staticmethod
|
||||
def decode(addr: int) -> PhysAddr:
|
||||
if not (0 <= addr <= MAX_51):
|
||||
raise PhysAddrError("addr must be a 51-bit value")
|
||||
rack = (addr >> 47) & 0xF
|
||||
sip_id = (addr >> 43) & 0xF
|
||||
sip_seg = (addr >> 38) & 0x1F
|
||||
off = addr & ((1 << 38) - 1)
|
||||
cube_id = sip_seg
|
||||
sel = (off >> 37) & 0x1
|
||||
if sel == 1:
|
||||
hbm_offset = int(off & ((1 << 37) - 1))
|
||||
return PhysAddr(
|
||||
rack_id=rack,
|
||||
sip_id=sip_id,
|
||||
sip_seg=sip_seg,
|
||||
local_offset=off,
|
||||
kind="hbm",
|
||||
cube_id=cube_id,
|
||||
hbm_offset=hbm_offset,
|
||||
)
|
||||
# PE resource decode
|
||||
raw_ut = int((off >> 34) & 0x7)
|
||||
try:
|
||||
unit_type = UnitType(raw_ut)
|
||||
except ValueError:
|
||||
raise PhysAddrError(f"unknown unit_type: {raw_ut}") from None
|
||||
pe_id = int((off >> 30) & 0xF)
|
||||
ext = int((off >> 29) & 0x1)
|
||||
sub_offset = int(off & ((1 << 29) - 1))
|
||||
return PhysAddr(
|
||||
rack_id=rack,
|
||||
sip_id=sip_id,
|
||||
sip_seg=sip_seg,
|
||||
local_offset=off,
|
||||
kind="pe_resource",
|
||||
cube_id=cube_id,
|
||||
unit_type=unit_type,
|
||||
pe_id=pe_id,
|
||||
ext=ext,
|
||||
sub_offset=sub_offset,
|
||||
hbm_offset=0,
|
||||
)
|
||||
sip_id = (addr >> _SIP_SHIFT) & 0xF
|
||||
die_id = (addr >> _DIE_SHIFT) & 0x1F
|
||||
local_offset = addr & _LOCAL_MASK
|
||||
|
||||
if die_id <= _AHBM_DIE_MAX:
|
||||
return PhysAddr._decode_ahbm(sip_id, die_id, local_offset)
|
||||
elif _CHIPLET_DIE_MIN <= die_id <= _CHIPLET_DIE_MAX:
|
||||
return PhysAddr._decode_chiplet(sip_id, die_id, local_offset)
|
||||
else:
|
||||
raise PhysAddrError(f"die_id {die_id} is reserved (21..31)")
|
||||
|
||||
@staticmethod
|
||||
def hbm_addr(*, rack_id: int, sip_id: int, cube_id: int, hbm_offset: int) -> PhysAddr:
|
||||
_chk_max("cube_id", cube_id, 31)
|
||||
_chk_range("hbm_offset", hbm_offset, 37)
|
||||
sip_seg = cube_id
|
||||
local_offset = (1 << 37) | int(hbm_offset)
|
||||
def _decode_ahbm(sip_id: int, die_id: int, local_offset: int) -> PhysAddr:
|
||||
sel = (local_offset >> _AHBM_SEL_BIT) & 0x1
|
||||
if sel == 1:
|
||||
hbm_offset = int(local_offset & ((1 << _AHBM_SEL_BIT) - 1))
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="hbm", hbm_offset=hbm_offset,
|
||||
)
|
||||
# Resource window
|
||||
res_kind = int((local_offset >> _RES_KIND_SHIFT) & _RES_KIND_MASK)
|
||||
try:
|
||||
unit_type = UnitType(res_kind)
|
||||
except ValueError:
|
||||
raise PhysAddrError(f"unknown resource_kind: {res_kind}") from None
|
||||
|
||||
if unit_type == UnitType.PE:
|
||||
pe_id = int((local_offset >> _PE_ID_SHIFT) & 0xF)
|
||||
pe_sub = int((local_offset >> _PE_SUB_SHIFT) & 0xF)
|
||||
sub_off = int(local_offset & ((1 << _PE_SUB_OFFSET_BITS) - 1))
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="pe_resource", unit_type=unit_type,
|
||||
pe_id=pe_id, pe_sub_unit=pe_sub, sub_offset=sub_off,
|
||||
)
|
||||
elif unit_type == UnitType.MCPU:
|
||||
mcpu_sub = int((local_offset >> _MCPU_SUB_SHIFT) & 0x1F)
|
||||
sub_off = int(local_offset & ((1 << _PE_SUB_OFFSET_BITS) - 1))
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="pe_resource", unit_type=unit_type,
|
||||
mcpu_sub_unit=mcpu_sub, sub_offset=sub_off,
|
||||
)
|
||||
else: # SRAM
|
||||
sub_off = int(local_offset & ((1 << _SRAM_OFFSET_BITS) - 1))
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="pe_resource", unit_type=unit_type,
|
||||
sub_offset=sub_off,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _decode_chiplet(sip_id: int, die_id: int, local_offset: int) -> PhysAddr:
|
||||
chip_off = local_offset & ((1 << _CHIPLET_LOCAL_BITS) - 1)
|
||||
if chip_off < _IOCPU_BOUNDARY:
|
||||
iocpu_sub = int((chip_off >> _IOCPU_SUB_SHIFT) & 0xF)
|
||||
sub_off = int(chip_off & ((1 << _IOCPU_SUB_OFFSET_BITS) - 1))
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="iocpu", chiplet_offset=chip_off,
|
||||
iocpu_sub_unit=iocpu_sub, sub_offset=sub_off,
|
||||
)
|
||||
else:
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="ual", chiplet_offset=chip_off,
|
||||
)
|
||||
|
||||
# ── AHBM factory methods ────────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def hbm_addr(*, sip_id: int, die_id: int, hbm_offset: int) -> PhysAddr:
|
||||
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
||||
_chk_range("hbm_offset", hbm_offset, _AHBM_SEL_BIT)
|
||||
local_offset = (1 << _AHBM_SEL_BIT) | int(hbm_offset)
|
||||
return PhysAddr(
|
||||
rack_id=rack_id,
|
||||
sip_id=sip_id,
|
||||
sip_seg=sip_seg,
|
||||
local_offset=local_offset,
|
||||
kind="hbm",
|
||||
cube_id=cube_id,
|
||||
hbm_offset=int(hbm_offset),
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="hbm", hbm_offset=int(hbm_offset),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def pe_hbm_addr(
|
||||
*,
|
||||
rack_id: int,
|
||||
sip_id: int,
|
||||
cube_id: int,
|
||||
pe_id: int,
|
||||
pe_local_hbm_offset: int,
|
||||
slice_size_bytes: int,
|
||||
*, sip_id: int, die_id: int,
|
||||
pe_id: int, pe_local_hbm_offset: int, slice_size_bytes: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("cube_id", cube_id, 31)
|
||||
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
||||
_chk_range("pe_id", pe_id, 4)
|
||||
if not (0 <= pe_local_hbm_offset < slice_size_bytes):
|
||||
raise PhysAddrError("pe_local_hbm_offset out of PE local slice range")
|
||||
hbm_offset = int(pe_id) * int(slice_size_bytes) + int(pe_local_hbm_offset)
|
||||
if not (0 <= hbm_offset < PhysAddr.HBM_WINDOW_BYTES):
|
||||
raise PhysAddrError("HBM offset exceeds reserved 128GB window")
|
||||
return PhysAddr.hbm_addr(
|
||||
rack_id=rack_id, sip_id=sip_id, cube_id=cube_id, hbm_offset=hbm_offset
|
||||
)
|
||||
return PhysAddr.hbm_addr(sip_id=sip_id, die_id=die_id, hbm_offset=hbm_offset)
|
||||
|
||||
@staticmethod
|
||||
def hbm_pe_id(hbm_offset: int, slice_size_bytes: int) -> int:
|
||||
return hbm_offset // slice_size_bytes
|
||||
|
||||
@staticmethod
|
||||
def cube_sram_addr(
|
||||
*, rack_id: int, sip_id: int, cube_id: int, sram_offset: int,
|
||||
def pe_tcm_addr(
|
||||
*, sip_id: int, die_id: int, pe_id: int, tcm_offset: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("cube_id", cube_id, 31)
|
||||
_chk_range("sram_offset", sram_offset, 29)
|
||||
sip_seg = cube_id
|
||||
local_offset = (UnitType.SRAM << 34) | sram_offset
|
||||
return PhysAddr(
|
||||
rack_id=rack_id, sip_id=sip_id, sip_seg=sip_seg,
|
||||
local_offset=local_offset,
|
||||
kind="pe_resource", cube_id=cube_id,
|
||||
unit_type=UnitType.SRAM, sub_offset=sram_offset,
|
||||
return PhysAddr.pe_resource_addr(
|
||||
sip_id=sip_id, die_id=die_id, pe_id=pe_id,
|
||||
pe_sub_unit=PESubUnit.PE_TCM, sub_offset=tcm_offset,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def pe_tcm_addr(
|
||||
*, rack_id: int, sip_id: int, cube_id: int, pe_id: int, tcm_offset: int,
|
||||
def pe_resource_addr(
|
||||
*, sip_id: int, die_id: int, pe_id: int,
|
||||
pe_sub_unit: int, sub_offset: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("cube_id", cube_id, 31)
|
||||
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
||||
_chk_range("pe_id", pe_id, 4)
|
||||
_chk_range("tcm_offset", tcm_offset, 29)
|
||||
sip_seg = cube_id
|
||||
local_offset = (UnitType.PE << 34) | (pe_id << 30) | tcm_offset
|
||||
return PhysAddr(
|
||||
rack_id=rack_id, sip_id=sip_id, sip_seg=sip_seg,
|
||||
local_offset=local_offset,
|
||||
kind="pe_resource", cube_id=cube_id,
|
||||
unit_type=UnitType.PE, pe_id=pe_id, sub_offset=tcm_offset,
|
||||
_chk_range("pe_sub_unit", pe_sub_unit, 4)
|
||||
_chk_range("sub_offset", sub_offset, _PE_SUB_OFFSET_BITS)
|
||||
local_offset = (
|
||||
(UnitType.PE << _RES_KIND_SHIFT)
|
||||
| (pe_id << _PE_ID_SHIFT)
|
||||
| (pe_sub_unit << _PE_SUB_SHIFT)
|
||||
| sub_offset
|
||||
)
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="pe_resource", unit_type=UnitType.PE,
|
||||
pe_id=pe_id, pe_sub_unit=pe_sub_unit, sub_offset=sub_offset,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def cube_sram_addr(
|
||||
*, sip_id: int, die_id: int, sram_offset: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
||||
_chk_range("sram_offset", sram_offset, _SRAM_OFFSET_BITS)
|
||||
local_offset = (UnitType.SRAM << _RES_KIND_SHIFT) | sram_offset
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="pe_resource", unit_type=UnitType.SRAM, sub_offset=sram_offset,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def mcpu_resource_addr(
|
||||
*, sip_id: int, die_id: int, mcpu_sub_unit: int, sub_offset: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
||||
_chk_range("mcpu_sub_unit", mcpu_sub_unit, 5)
|
||||
_chk_range("sub_offset", sub_offset, _PE_SUB_OFFSET_BITS)
|
||||
local_offset = (
|
||||
(UnitType.MCPU << _RES_KIND_SHIFT)
|
||||
| (mcpu_sub_unit << _MCPU_SUB_SHIFT)
|
||||
| sub_offset
|
||||
)
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="pe_resource", unit_type=UnitType.MCPU,
|
||||
mcpu_sub_unit=mcpu_sub_unit, sub_offset=sub_offset,
|
||||
)
|
||||
|
||||
# ── IOCHIPLET factory methods ────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def iocpu_resource_addr(
|
||||
*, sip_id: int, die_id: int, iocpu_sub_unit: int, sub_offset: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("die_id", die_id, _CHIPLET_DIE_MAX)
|
||||
if die_id < _CHIPLET_DIE_MIN:
|
||||
raise PhysAddrError(
|
||||
f"die_id {die_id} is not an IOCHIPLET "
|
||||
f"(must be {_CHIPLET_DIE_MIN}..{_CHIPLET_DIE_MAX})"
|
||||
)
|
||||
_chk_range("iocpu_sub_unit", iocpu_sub_unit, 4)
|
||||
_chk_range("sub_offset", sub_offset, _IOCPU_SUB_OFFSET_BITS)
|
||||
chiplet_offset = (iocpu_sub_unit << _IOCPU_SUB_SHIFT) | sub_offset
|
||||
if chiplet_offset >= _IOCPU_BOUNDARY:
|
||||
raise PhysAddrError("IOCPU region overflow (must be < 2 GB)")
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=chiplet_offset,
|
||||
kind="iocpu", chiplet_offset=chiplet_offset,
|
||||
iocpu_sub_unit=iocpu_sub_unit, sub_offset=sub_offset,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def ual_addr(*, sip_id: int, die_id: int, ual_offset: int) -> PhysAddr:
|
||||
_chk_max("die_id", die_id, _CHIPLET_DIE_MAX)
|
||||
if die_id < _CHIPLET_DIE_MIN:
|
||||
raise PhysAddrError(f"die_id {die_id} is not an IOCHIPLET")
|
||||
chiplet_offset = _IOCPU_BOUNDARY + ual_offset
|
||||
_chk_range("chiplet_offset", chiplet_offset, _CHIPLET_LOCAL_BITS)
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=chiplet_offset,
|
||||
kind="ual", chiplet_offset=chiplet_offset,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user