ADR-0001 Rev 2: 51-bit PhysAddr layout with concrete sub-unit tables

Remove rack_id (4 bits), rename sip_seg→die_id, shift fields to enable
42-bit local_offset (4 TB per die). Define PE_LOCAL/MCPU_LOCAL/CUBE_SRAM
sub-unit tables for AHBM dies and IOCPU sub-unit table for IOCHIPLET
dies (1 TB window). Supersedes ADR-0031.

Also fixes latent VA/PA confusion in pe_dma pipeline DMA path where
virtual addresses were decoded as physical addresses without MMU
translation — previously masked by coincidental bit-position alignment.

529 passed (+6 recovered), 10 pre-existing failures unchanged.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-27 15:52:29 -07:00
parent e9cc40f74d
commit 81cc32c46b
27 changed files with 814 additions and 272 deletions
+264 -108
View File
@@ -6,6 +6,47 @@ from typing import Literal
MAX_51 = (1 << 51) - 1
# ── Layout constants (ADR-0001 Rev 2) ────────────────────────────────
# [50:47] sip_id (4)
# [46:42] die_id (5)
# [41: 0] local_offset (42)
_SIP_SHIFT = 47
_DIE_SHIFT = 42
_LOCAL_BITS = 42
_LOCAL_MASK = (1 << _LOCAL_BITS) - 1
# AHBM die: [41:38] MBZ, [37] addr_space, [36:0] sub-address
_AHBM_SEL_BIT = 37
_AHBM_LOCAL_USED = 38 # bits actually meaningful for AHBM
# Resource window: [36:34] resource_kind, [33:0] kind_local
_RES_KIND_SHIFT = 34
_RES_KIND_MASK = 0x7
# PE_LOCAL: [32:29] pe_id, [28:25] pe_sub_unit, [24:0] sub_offset
_PE_ID_SHIFT = 29
_PE_SUB_SHIFT = 25
_PE_SUB_OFFSET_BITS = 25
# MCPU_LOCAL: [29:25] mcpu_sub_unit, [24:0] sub_offset
_MCPU_SUB_SHIFT = 25
# CUBE_SRAM: [24:0] sram_offset
_SRAM_OFFSET_BITS = 25
# IOCHIPLET: [41:40] MBZ, [39:0] chiplet_offset
_CHIPLET_LOCAL_BITS = 40
_IOCPU_BOUNDARY = 1 << 31 # 2 GB
# IOCPU: [30:27] iocpu_sub_unit, [26:0] sub_offset
_IOCPU_SUB_SHIFT = 27
_IOCPU_SUB_OFFSET_BITS = 27
# die_id ranges
_AHBM_DIE_MAX = 15
_CHIPLET_DIE_MIN = 16
_CHIPLET_DIE_MAX = 20
class PhysAddrError(Exception):
pass
@@ -22,163 +63,278 @@ def _chk_max(name: str, v: int, maxv: int) -> None:
class UnitType(IntEnum):
PE = 0
MCPU = 1
SRAM = 2
"""resource_kind values for AHBM resource window."""
PE = 0 # PE_LOCAL
MCPU = 1 # MCPU_LOCAL
SRAM = 2 # CUBE_SRAM
class PESubUnit(IntEnum):
PE_CPU_DTCM = 0
MATH_ENGINE_DTCM = 1
IPCQ = 2
PE_CPU_SFR = 3
MATH_ENGINE_SFR = 4
DMA_ENGINE_SFR = 5
PE_TCM = 6
class MCPUSubUnit(IntEnum):
MCPU_ITCM = 0
MCPU_DTCM = 1
IPCQ = 2
MCPU_SFR = 3
MCPU_DMA_SFR = 4
MCPU_SRAM = 5
class IOCPUSubUnit(IntEnum):
IOCPU_ITCM = 0
IOCPU_DTCM = 1
IPCQ = 2
IOCPU_SFR = 3
IO_DMA_SFR = 4
IO_SRAM = 5
@dataclass(frozen=True)
class PhysAddr:
"""
51-bit physical address value object.
"""51-bit physical address value object (ADR-0001 Rev 2).
Layout:
[50:47] rack_id (4)
[46:43] sip_id (4)
[42:38] sip_seg (5) # cube_id
[37:0] local_offset (38) => each segment is 256GB
local_offset:
[37] selector: 1 = HBM window (128GB reserved), 0 = PE resource window
[50:47] sip_id (4) -- 16 SIPs
[46:42] die_id (5) -- 0..15 AHBM, 16..20 IOCHIPLET
[41: 0] local_offset (42) -- 4 TB per die
"""
rack_id: int
sip_id: int
sip_seg: int
die_id: int
local_offset: int
kind: Literal["hbm", "pe_resource", "raw"] = "raw"
cube_id: int = 0
kind: Literal["hbm", "pe_resource", "iocpu", "ual", "raw"] = "raw"
unit_type: UnitType = UnitType.PE
pe_id: int = 0
ext: int = 0
pe_sub_unit: int = 0
sub_offset: int = 0
hbm_offset: int = 0
iocpu_sub_unit: int = 0
chiplet_offset: int = 0
mcpu_sub_unit: int = 0
HBM_WINDOW_BYTES = 1 << 37 # 128GB
HBM_WINDOW_BYTES = 1 << 37 # 128 GB
# ── encode / decode ──────────────────────────────────────────────
def encode(self) -> int:
_chk_range("rack_id", self.rack_id, 4)
_chk_range("sip_id", self.sip_id, 4)
_chk_range("sip_seg", self.sip_seg, 5)
_chk_range("local_offset", self.local_offset, 38)
addr = (self.rack_id << 47) | (self.sip_id << 43) | (self.sip_seg << 38) | self.local_offset
if not (0 <= addr <= MAX_51):
raise PhysAddrError("address exceeds 51-bit space")
_chk_range("die_id", self.die_id, 5)
_chk_range("local_offset", self.local_offset, _LOCAL_BITS)
# MBZ enforcement
if self.die_id <= _AHBM_DIE_MAX:
mbz_top = (self.local_offset >> _AHBM_LOCAL_USED) & 0xF
if mbz_top != 0:
raise PhysAddrError("AHBM local_offset bits [41:38] must be zero")
elif _CHIPLET_DIE_MIN <= self.die_id <= _CHIPLET_DIE_MAX:
mbz_top = (self.local_offset >> _CHIPLET_LOCAL_BITS) & 0x3
if mbz_top != 0:
raise PhysAddrError("IOCHIPLET local_offset bits [41:40] must be zero")
addr = (self.sip_id << _SIP_SHIFT) | (self.die_id << _DIE_SHIFT) | self.local_offset
return addr
@staticmethod
def decode(addr: int) -> PhysAddr:
if not (0 <= addr <= MAX_51):
raise PhysAddrError("addr must be a 51-bit value")
rack = (addr >> 47) & 0xF
sip_id = (addr >> 43) & 0xF
sip_seg = (addr >> 38) & 0x1F
off = addr & ((1 << 38) - 1)
cube_id = sip_seg
sel = (off >> 37) & 0x1
if sel == 1:
hbm_offset = int(off & ((1 << 37) - 1))
return PhysAddr(
rack_id=rack,
sip_id=sip_id,
sip_seg=sip_seg,
local_offset=off,
kind="hbm",
cube_id=cube_id,
hbm_offset=hbm_offset,
)
# PE resource decode
raw_ut = int((off >> 34) & 0x7)
try:
unit_type = UnitType(raw_ut)
except ValueError:
raise PhysAddrError(f"unknown unit_type: {raw_ut}") from None
pe_id = int((off >> 30) & 0xF)
ext = int((off >> 29) & 0x1)
sub_offset = int(off & ((1 << 29) - 1))
return PhysAddr(
rack_id=rack,
sip_id=sip_id,
sip_seg=sip_seg,
local_offset=off,
kind="pe_resource",
cube_id=cube_id,
unit_type=unit_type,
pe_id=pe_id,
ext=ext,
sub_offset=sub_offset,
hbm_offset=0,
)
sip_id = (addr >> _SIP_SHIFT) & 0xF
die_id = (addr >> _DIE_SHIFT) & 0x1F
local_offset = addr & _LOCAL_MASK
if die_id <= _AHBM_DIE_MAX:
return PhysAddr._decode_ahbm(sip_id, die_id, local_offset)
elif _CHIPLET_DIE_MIN <= die_id <= _CHIPLET_DIE_MAX:
return PhysAddr._decode_chiplet(sip_id, die_id, local_offset)
else:
raise PhysAddrError(f"die_id {die_id} is reserved (21..31)")
@staticmethod
def hbm_addr(*, rack_id: int, sip_id: int, cube_id: int, hbm_offset: int) -> PhysAddr:
_chk_max("cube_id", cube_id, 31)
_chk_range("hbm_offset", hbm_offset, 37)
sip_seg = cube_id
local_offset = (1 << 37) | int(hbm_offset)
def _decode_ahbm(sip_id: int, die_id: int, local_offset: int) -> PhysAddr:
sel = (local_offset >> _AHBM_SEL_BIT) & 0x1
if sel == 1:
hbm_offset = int(local_offset & ((1 << _AHBM_SEL_BIT) - 1))
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="hbm", hbm_offset=hbm_offset,
)
# Resource window
res_kind = int((local_offset >> _RES_KIND_SHIFT) & _RES_KIND_MASK)
try:
unit_type = UnitType(res_kind)
except ValueError:
raise PhysAddrError(f"unknown resource_kind: {res_kind}") from None
if unit_type == UnitType.PE:
pe_id = int((local_offset >> _PE_ID_SHIFT) & 0xF)
pe_sub = int((local_offset >> _PE_SUB_SHIFT) & 0xF)
sub_off = int(local_offset & ((1 << _PE_SUB_OFFSET_BITS) - 1))
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="pe_resource", unit_type=unit_type,
pe_id=pe_id, pe_sub_unit=pe_sub, sub_offset=sub_off,
)
elif unit_type == UnitType.MCPU:
mcpu_sub = int((local_offset >> _MCPU_SUB_SHIFT) & 0x1F)
sub_off = int(local_offset & ((1 << _PE_SUB_OFFSET_BITS) - 1))
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="pe_resource", unit_type=unit_type,
mcpu_sub_unit=mcpu_sub, sub_offset=sub_off,
)
else: # SRAM
sub_off = int(local_offset & ((1 << _SRAM_OFFSET_BITS) - 1))
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="pe_resource", unit_type=unit_type,
sub_offset=sub_off,
)
@staticmethod
def _decode_chiplet(sip_id: int, die_id: int, local_offset: int) -> PhysAddr:
chip_off = local_offset & ((1 << _CHIPLET_LOCAL_BITS) - 1)
if chip_off < _IOCPU_BOUNDARY:
iocpu_sub = int((chip_off >> _IOCPU_SUB_SHIFT) & 0xF)
sub_off = int(chip_off & ((1 << _IOCPU_SUB_OFFSET_BITS) - 1))
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="iocpu", chiplet_offset=chip_off,
iocpu_sub_unit=iocpu_sub, sub_offset=sub_off,
)
else:
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="ual", chiplet_offset=chip_off,
)
# ── AHBM factory methods ────────────────────────────────────────
@staticmethod
def hbm_addr(*, sip_id: int, die_id: int, hbm_offset: int) -> PhysAddr:
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
_chk_range("hbm_offset", hbm_offset, _AHBM_SEL_BIT)
local_offset = (1 << _AHBM_SEL_BIT) | int(hbm_offset)
return PhysAddr(
rack_id=rack_id,
sip_id=sip_id,
sip_seg=sip_seg,
local_offset=local_offset,
kind="hbm",
cube_id=cube_id,
hbm_offset=int(hbm_offset),
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="hbm", hbm_offset=int(hbm_offset),
)
@staticmethod
def pe_hbm_addr(
*,
rack_id: int,
sip_id: int,
cube_id: int,
pe_id: int,
pe_local_hbm_offset: int,
slice_size_bytes: int,
*, sip_id: int, die_id: int,
pe_id: int, pe_local_hbm_offset: int, slice_size_bytes: int,
) -> PhysAddr:
_chk_max("cube_id", cube_id, 31)
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
_chk_range("pe_id", pe_id, 4)
if not (0 <= pe_local_hbm_offset < slice_size_bytes):
raise PhysAddrError("pe_local_hbm_offset out of PE local slice range")
hbm_offset = int(pe_id) * int(slice_size_bytes) + int(pe_local_hbm_offset)
if not (0 <= hbm_offset < PhysAddr.HBM_WINDOW_BYTES):
raise PhysAddrError("HBM offset exceeds reserved 128GB window")
return PhysAddr.hbm_addr(
rack_id=rack_id, sip_id=sip_id, cube_id=cube_id, hbm_offset=hbm_offset
)
return PhysAddr.hbm_addr(sip_id=sip_id, die_id=die_id, hbm_offset=hbm_offset)
@staticmethod
def hbm_pe_id(hbm_offset: int, slice_size_bytes: int) -> int:
return hbm_offset // slice_size_bytes
@staticmethod
def cube_sram_addr(
*, rack_id: int, sip_id: int, cube_id: int, sram_offset: int,
def pe_tcm_addr(
*, sip_id: int, die_id: int, pe_id: int, tcm_offset: int,
) -> PhysAddr:
_chk_max("cube_id", cube_id, 31)
_chk_range("sram_offset", sram_offset, 29)
sip_seg = cube_id
local_offset = (UnitType.SRAM << 34) | sram_offset
return PhysAddr(
rack_id=rack_id, sip_id=sip_id, sip_seg=sip_seg,
local_offset=local_offset,
kind="pe_resource", cube_id=cube_id,
unit_type=UnitType.SRAM, sub_offset=sram_offset,
return PhysAddr.pe_resource_addr(
sip_id=sip_id, die_id=die_id, pe_id=pe_id,
pe_sub_unit=PESubUnit.PE_TCM, sub_offset=tcm_offset,
)
@staticmethod
def pe_tcm_addr(
*, rack_id: int, sip_id: int, cube_id: int, pe_id: int, tcm_offset: int,
def pe_resource_addr(
*, sip_id: int, die_id: int, pe_id: int,
pe_sub_unit: int, sub_offset: int,
) -> PhysAddr:
_chk_max("cube_id", cube_id, 31)
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
_chk_range("pe_id", pe_id, 4)
_chk_range("tcm_offset", tcm_offset, 29)
sip_seg = cube_id
local_offset = (UnitType.PE << 34) | (pe_id << 30) | tcm_offset
return PhysAddr(
rack_id=rack_id, sip_id=sip_id, sip_seg=sip_seg,
local_offset=local_offset,
kind="pe_resource", cube_id=cube_id,
unit_type=UnitType.PE, pe_id=pe_id, sub_offset=tcm_offset,
_chk_range("pe_sub_unit", pe_sub_unit, 4)
_chk_range("sub_offset", sub_offset, _PE_SUB_OFFSET_BITS)
local_offset = (
(UnitType.PE << _RES_KIND_SHIFT)
| (pe_id << _PE_ID_SHIFT)
| (pe_sub_unit << _PE_SUB_SHIFT)
| sub_offset
)
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="pe_resource", unit_type=UnitType.PE,
pe_id=pe_id, pe_sub_unit=pe_sub_unit, sub_offset=sub_offset,
)
@staticmethod
def cube_sram_addr(
*, sip_id: int, die_id: int, sram_offset: int,
) -> PhysAddr:
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
_chk_range("sram_offset", sram_offset, _SRAM_OFFSET_BITS)
local_offset = (UnitType.SRAM << _RES_KIND_SHIFT) | sram_offset
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="pe_resource", unit_type=UnitType.SRAM, sub_offset=sram_offset,
)
@staticmethod
def mcpu_resource_addr(
*, sip_id: int, die_id: int, mcpu_sub_unit: int, sub_offset: int,
) -> PhysAddr:
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
_chk_range("mcpu_sub_unit", mcpu_sub_unit, 5)
_chk_range("sub_offset", sub_offset, _PE_SUB_OFFSET_BITS)
local_offset = (
(UnitType.MCPU << _RES_KIND_SHIFT)
| (mcpu_sub_unit << _MCPU_SUB_SHIFT)
| sub_offset
)
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="pe_resource", unit_type=UnitType.MCPU,
mcpu_sub_unit=mcpu_sub_unit, sub_offset=sub_offset,
)
# ── IOCHIPLET factory methods ────────────────────────────────────
@staticmethod
def iocpu_resource_addr(
*, sip_id: int, die_id: int, iocpu_sub_unit: int, sub_offset: int,
) -> PhysAddr:
_chk_max("die_id", die_id, _CHIPLET_DIE_MAX)
if die_id < _CHIPLET_DIE_MIN:
raise PhysAddrError(
f"die_id {die_id} is not an IOCHIPLET "
f"(must be {_CHIPLET_DIE_MIN}..{_CHIPLET_DIE_MAX})"
)
_chk_range("iocpu_sub_unit", iocpu_sub_unit, 4)
_chk_range("sub_offset", sub_offset, _IOCPU_SUB_OFFSET_BITS)
chiplet_offset = (iocpu_sub_unit << _IOCPU_SUB_SHIFT) | sub_offset
if chiplet_offset >= _IOCPU_BOUNDARY:
raise PhysAddrError("IOCPU region overflow (must be < 2 GB)")
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=chiplet_offset,
kind="iocpu", chiplet_offset=chiplet_offset,
iocpu_sub_unit=iocpu_sub_unit, sub_offset=sub_offset,
)
@staticmethod
def ual_addr(*, sip_id: int, die_id: int, ual_offset: int) -> PhysAddr:
_chk_max("die_id", die_id, _CHIPLET_DIE_MAX)
if die_id < _CHIPLET_DIE_MIN:
raise PhysAddrError(f"die_id {die_id} is not an IOCHIPLET")
chiplet_offset = _IOCPU_BOUNDARY + ual_offset
_chk_range("chiplet_offset", chiplet_offset, _CHIPLET_LOCAL_BITS)
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=chiplet_offset,
kind="ual", chiplet_offset=chiplet_offset,
)