ADR-0001 Rev 2: 51-bit PhysAddr layout with concrete sub-unit tables
Remove rack_id (4 bits), rename sip_seg→die_id, shift fields to enable 42-bit local_offset (4 TB per die). Define PE_LOCAL/MCPU_LOCAL/CUBE_SRAM sub-unit tables for AHBM dies and IOCPU sub-unit table for IOCHIPLET dies (1 TB window). Supersedes ADR-0031. Also fixes latent VA/PA confusion in pe_dma pipeline DMA path where virtual addresses were decoded as physical addresses without MMU translation — previously masked by coincidental bit-position alignment. 529 passed (+6 recovered), 10 pre-existing failures unchanged. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -23,7 +23,7 @@ def _hbm_pa(sip: int, cube: int, pe_id: int, spec: dict) -> int:
|
||||
mm = spec["cube"]["memory_map"]
|
||||
slice_bytes = mm["hbm_total_gb_per_cube"] * (1 << 30) // mm["hbm_slices_per_cube"]
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=sip, cube_id=cube, pe_id=pe_id,
|
||||
sip_id=sip, die_id=cube, pe_id=pe_id,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
return pa.encode()
|
||||
|
||||
@@ -220,10 +220,10 @@ class IoCpuComponent(ComponentBase):
|
||||
return []
|
||||
|
||||
def _cube_from_pa(self, pa_val: int, fallback: int) -> int:
|
||||
"""Extract cube_id from a physical address, with fallback."""
|
||||
"""Extract die_id from a physical address, with fallback."""
|
||||
from kernbench.policy.address.phyaddr import PhysAddr
|
||||
try:
|
||||
return PhysAddr.decode(pa_val).cube_id
|
||||
return PhysAddr.decode(pa_val).die_id
|
||||
except Exception:
|
||||
return fallback
|
||||
|
||||
|
||||
@@ -302,7 +302,16 @@ class PeDmaComponent(PeEngineBase):
|
||||
dma_res = self._dma_write if is_write else self._dma_read
|
||||
assert dma_res is not None
|
||||
|
||||
pa = PhysAddr.decode(addr)
|
||||
# Translate VA → PA via MMU (same logic as non-pipeline path)
|
||||
target_pa = addr
|
||||
if self._mmu is not None:
|
||||
from kernbench.policy.address.pe_mmu import PageFault
|
||||
try:
|
||||
target_pa = self._mmu.translate(addr)
|
||||
except PageFault:
|
||||
target_pa = addr # fallback: treat as PA directly
|
||||
|
||||
pa = PhysAddr.decode(target_pa)
|
||||
dst_node = self.ctx.resolver.resolve(pa)
|
||||
path = self.ctx.router.find_path(self._pe_prefix, dst_node)
|
||||
drain_ns = self.ctx.compute_drain_ns(path, nbytes)
|
||||
@@ -314,7 +323,7 @@ class PeDmaComponent(PeEngineBase):
|
||||
correlation_id="pipeline",
|
||||
request_id=f"tile_{token.tile_id}",
|
||||
src_sip=0, src_cube=0, src_pe=0,
|
||||
dst_pa=addr, nbytes=nbytes,
|
||||
dst_pa=target_pa, nbytes=nbytes,
|
||||
is_write=is_write,
|
||||
)
|
||||
sub_txn = Transaction(
|
||||
|
||||
@@ -207,10 +207,10 @@ class IoCpuComponent(ComponentBase):
|
||||
return []
|
||||
|
||||
def _cube_from_pa(self, pa_val: int, fallback: int) -> int:
|
||||
"""Extract cube_id from a physical address, with fallback."""
|
||||
"""Extract die_id from a physical address, with fallback."""
|
||||
from kernbench.policy.address.phyaddr import PhysAddr
|
||||
try:
|
||||
return PhysAddr.decode(pa_val).cube_id
|
||||
return PhysAddr.decode(pa_val).die_id
|
||||
except Exception:
|
||||
return fallback
|
||||
|
||||
|
||||
@@ -89,11 +89,10 @@ class _FreeList:
|
||||
|
||||
class PEMemAllocator:
|
||||
def __init__(
|
||||
self, rack_id: int, sip_id: int, cube_id: int, pe_id: int, cfg: AddressConfig,
|
||||
self, sip_id: int, die_id: int, pe_id: int, cfg: AddressConfig,
|
||||
) -> None:
|
||||
self._rack_id = rack_id
|
||||
self._sip_id = sip_id
|
||||
self._cube_id = cube_id
|
||||
self._die_id = die_id
|
||||
self._pe_id = pe_id
|
||||
self._cfg = cfg
|
||||
self._hbm = _FreeList(cfg.hbm_slice_bytes)
|
||||
@@ -108,7 +107,7 @@ class PEMemAllocator:
|
||||
f"available {self._cfg.hbm_slice_bytes - self._hbm.used}"
|
||||
)
|
||||
return PhysAddr.pe_hbm_addr(
|
||||
rack_id=self._rack_id, sip_id=self._sip_id, cube_id=self._cube_id,
|
||||
sip_id=self._sip_id, die_id=self._die_id,
|
||||
pe_id=self._pe_id, pe_local_hbm_offset=offset,
|
||||
slice_size_bytes=self._cfg.hbm_slice_bytes,
|
||||
)
|
||||
@@ -128,7 +127,7 @@ class PEMemAllocator:
|
||||
f"available {self._cfg.tcm_allocatable_bytes - self._tcm.used}"
|
||||
)
|
||||
return PhysAddr.pe_tcm_addr(
|
||||
rack_id=self._rack_id, sip_id=self._sip_id, cube_id=self._cube_id,
|
||||
sip_id=self._sip_id, die_id=self._die_id,
|
||||
pe_id=self._pe_id, tcm_offset=offset,
|
||||
)
|
||||
|
||||
|
||||
@@ -6,6 +6,47 @@ from typing import Literal
|
||||
|
||||
MAX_51 = (1 << 51) - 1
|
||||
|
||||
# ── Layout constants (ADR-0001 Rev 2) ────────────────────────────────
|
||||
# [50:47] sip_id (4)
|
||||
# [46:42] die_id (5)
|
||||
# [41: 0] local_offset (42)
|
||||
_SIP_SHIFT = 47
|
||||
_DIE_SHIFT = 42
|
||||
_LOCAL_BITS = 42
|
||||
_LOCAL_MASK = (1 << _LOCAL_BITS) - 1
|
||||
|
||||
# AHBM die: [41:38] MBZ, [37] addr_space, [36:0] sub-address
|
||||
_AHBM_SEL_BIT = 37
|
||||
_AHBM_LOCAL_USED = 38 # bits actually meaningful for AHBM
|
||||
|
||||
# Resource window: [36:34] resource_kind, [33:0] kind_local
|
||||
_RES_KIND_SHIFT = 34
|
||||
_RES_KIND_MASK = 0x7
|
||||
|
||||
# PE_LOCAL: [32:29] pe_id, [28:25] pe_sub_unit, [24:0] sub_offset
|
||||
_PE_ID_SHIFT = 29
|
||||
_PE_SUB_SHIFT = 25
|
||||
_PE_SUB_OFFSET_BITS = 25
|
||||
|
||||
# MCPU_LOCAL: [29:25] mcpu_sub_unit, [24:0] sub_offset
|
||||
_MCPU_SUB_SHIFT = 25
|
||||
|
||||
# CUBE_SRAM: [24:0] sram_offset
|
||||
_SRAM_OFFSET_BITS = 25
|
||||
|
||||
# IOCHIPLET: [41:40] MBZ, [39:0] chiplet_offset
|
||||
_CHIPLET_LOCAL_BITS = 40
|
||||
_IOCPU_BOUNDARY = 1 << 31 # 2 GB
|
||||
|
||||
# IOCPU: [30:27] iocpu_sub_unit, [26:0] sub_offset
|
||||
_IOCPU_SUB_SHIFT = 27
|
||||
_IOCPU_SUB_OFFSET_BITS = 27
|
||||
|
||||
# die_id ranges
|
||||
_AHBM_DIE_MAX = 15
|
||||
_CHIPLET_DIE_MIN = 16
|
||||
_CHIPLET_DIE_MAX = 20
|
||||
|
||||
|
||||
class PhysAddrError(Exception):
|
||||
pass
|
||||
@@ -22,163 +63,278 @@ def _chk_max(name: str, v: int, maxv: int) -> None:
|
||||
|
||||
|
||||
class UnitType(IntEnum):
|
||||
PE = 0
|
||||
MCPU = 1
|
||||
SRAM = 2
|
||||
"""resource_kind values for AHBM resource window."""
|
||||
PE = 0 # PE_LOCAL
|
||||
MCPU = 1 # MCPU_LOCAL
|
||||
SRAM = 2 # CUBE_SRAM
|
||||
|
||||
|
||||
class PESubUnit(IntEnum):
|
||||
PE_CPU_DTCM = 0
|
||||
MATH_ENGINE_DTCM = 1
|
||||
IPCQ = 2
|
||||
PE_CPU_SFR = 3
|
||||
MATH_ENGINE_SFR = 4
|
||||
DMA_ENGINE_SFR = 5
|
||||
PE_TCM = 6
|
||||
|
||||
|
||||
class MCPUSubUnit(IntEnum):
|
||||
MCPU_ITCM = 0
|
||||
MCPU_DTCM = 1
|
||||
IPCQ = 2
|
||||
MCPU_SFR = 3
|
||||
MCPU_DMA_SFR = 4
|
||||
MCPU_SRAM = 5
|
||||
|
||||
|
||||
class IOCPUSubUnit(IntEnum):
|
||||
IOCPU_ITCM = 0
|
||||
IOCPU_DTCM = 1
|
||||
IPCQ = 2
|
||||
IOCPU_SFR = 3
|
||||
IO_DMA_SFR = 4
|
||||
IO_SRAM = 5
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PhysAddr:
|
||||
"""
|
||||
51-bit physical address value object.
|
||||
"""51-bit physical address value object (ADR-0001 Rev 2).
|
||||
|
||||
Layout:
|
||||
[50:47] rack_id (4)
|
||||
[46:43] sip_id (4)
|
||||
[42:38] sip_seg (5) # cube_id
|
||||
[37:0] local_offset (38) => each segment is 256GB
|
||||
|
||||
local_offset:
|
||||
[37] selector: 1 = HBM window (128GB reserved), 0 = PE resource window
|
||||
[50:47] sip_id (4) -- 16 SIPs
|
||||
[46:42] die_id (5) -- 0..15 AHBM, 16..20 IOCHIPLET
|
||||
[41: 0] local_offset (42) -- 4 TB per die
|
||||
"""
|
||||
|
||||
rack_id: int
|
||||
sip_id: int
|
||||
sip_seg: int
|
||||
die_id: int
|
||||
local_offset: int
|
||||
|
||||
kind: Literal["hbm", "pe_resource", "raw"] = "raw"
|
||||
cube_id: int = 0
|
||||
kind: Literal["hbm", "pe_resource", "iocpu", "ual", "raw"] = "raw"
|
||||
unit_type: UnitType = UnitType.PE
|
||||
pe_id: int = 0
|
||||
ext: int = 0
|
||||
pe_sub_unit: int = 0
|
||||
sub_offset: int = 0
|
||||
hbm_offset: int = 0
|
||||
iocpu_sub_unit: int = 0
|
||||
chiplet_offset: int = 0
|
||||
mcpu_sub_unit: int = 0
|
||||
|
||||
HBM_WINDOW_BYTES = 1 << 37 # 128GB
|
||||
HBM_WINDOW_BYTES = 1 << 37 # 128 GB
|
||||
|
||||
# ── encode / decode ──────────────────────────────────────────────
|
||||
|
||||
def encode(self) -> int:
|
||||
_chk_range("rack_id", self.rack_id, 4)
|
||||
_chk_range("sip_id", self.sip_id, 4)
|
||||
_chk_range("sip_seg", self.sip_seg, 5)
|
||||
_chk_range("local_offset", self.local_offset, 38)
|
||||
addr = (self.rack_id << 47) | (self.sip_id << 43) | (self.sip_seg << 38) | self.local_offset
|
||||
if not (0 <= addr <= MAX_51):
|
||||
raise PhysAddrError("address exceeds 51-bit space")
|
||||
_chk_range("die_id", self.die_id, 5)
|
||||
_chk_range("local_offset", self.local_offset, _LOCAL_BITS)
|
||||
# MBZ enforcement
|
||||
if self.die_id <= _AHBM_DIE_MAX:
|
||||
mbz_top = (self.local_offset >> _AHBM_LOCAL_USED) & 0xF
|
||||
if mbz_top != 0:
|
||||
raise PhysAddrError("AHBM local_offset bits [41:38] must be zero")
|
||||
elif _CHIPLET_DIE_MIN <= self.die_id <= _CHIPLET_DIE_MAX:
|
||||
mbz_top = (self.local_offset >> _CHIPLET_LOCAL_BITS) & 0x3
|
||||
if mbz_top != 0:
|
||||
raise PhysAddrError("IOCHIPLET local_offset bits [41:40] must be zero")
|
||||
addr = (self.sip_id << _SIP_SHIFT) | (self.die_id << _DIE_SHIFT) | self.local_offset
|
||||
return addr
|
||||
|
||||
@staticmethod
|
||||
def decode(addr: int) -> PhysAddr:
|
||||
if not (0 <= addr <= MAX_51):
|
||||
raise PhysAddrError("addr must be a 51-bit value")
|
||||
rack = (addr >> 47) & 0xF
|
||||
sip_id = (addr >> 43) & 0xF
|
||||
sip_seg = (addr >> 38) & 0x1F
|
||||
off = addr & ((1 << 38) - 1)
|
||||
cube_id = sip_seg
|
||||
sel = (off >> 37) & 0x1
|
||||
if sel == 1:
|
||||
hbm_offset = int(off & ((1 << 37) - 1))
|
||||
return PhysAddr(
|
||||
rack_id=rack,
|
||||
sip_id=sip_id,
|
||||
sip_seg=sip_seg,
|
||||
local_offset=off,
|
||||
kind="hbm",
|
||||
cube_id=cube_id,
|
||||
hbm_offset=hbm_offset,
|
||||
)
|
||||
# PE resource decode
|
||||
raw_ut = int((off >> 34) & 0x7)
|
||||
try:
|
||||
unit_type = UnitType(raw_ut)
|
||||
except ValueError:
|
||||
raise PhysAddrError(f"unknown unit_type: {raw_ut}") from None
|
||||
pe_id = int((off >> 30) & 0xF)
|
||||
ext = int((off >> 29) & 0x1)
|
||||
sub_offset = int(off & ((1 << 29) - 1))
|
||||
return PhysAddr(
|
||||
rack_id=rack,
|
||||
sip_id=sip_id,
|
||||
sip_seg=sip_seg,
|
||||
local_offset=off,
|
||||
kind="pe_resource",
|
||||
cube_id=cube_id,
|
||||
unit_type=unit_type,
|
||||
pe_id=pe_id,
|
||||
ext=ext,
|
||||
sub_offset=sub_offset,
|
||||
hbm_offset=0,
|
||||
)
|
||||
sip_id = (addr >> _SIP_SHIFT) & 0xF
|
||||
die_id = (addr >> _DIE_SHIFT) & 0x1F
|
||||
local_offset = addr & _LOCAL_MASK
|
||||
|
||||
if die_id <= _AHBM_DIE_MAX:
|
||||
return PhysAddr._decode_ahbm(sip_id, die_id, local_offset)
|
||||
elif _CHIPLET_DIE_MIN <= die_id <= _CHIPLET_DIE_MAX:
|
||||
return PhysAddr._decode_chiplet(sip_id, die_id, local_offset)
|
||||
else:
|
||||
raise PhysAddrError(f"die_id {die_id} is reserved (21..31)")
|
||||
|
||||
@staticmethod
|
||||
def hbm_addr(*, rack_id: int, sip_id: int, cube_id: int, hbm_offset: int) -> PhysAddr:
|
||||
_chk_max("cube_id", cube_id, 31)
|
||||
_chk_range("hbm_offset", hbm_offset, 37)
|
||||
sip_seg = cube_id
|
||||
local_offset = (1 << 37) | int(hbm_offset)
|
||||
def _decode_ahbm(sip_id: int, die_id: int, local_offset: int) -> PhysAddr:
|
||||
sel = (local_offset >> _AHBM_SEL_BIT) & 0x1
|
||||
if sel == 1:
|
||||
hbm_offset = int(local_offset & ((1 << _AHBM_SEL_BIT) - 1))
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="hbm", hbm_offset=hbm_offset,
|
||||
)
|
||||
# Resource window
|
||||
res_kind = int((local_offset >> _RES_KIND_SHIFT) & _RES_KIND_MASK)
|
||||
try:
|
||||
unit_type = UnitType(res_kind)
|
||||
except ValueError:
|
||||
raise PhysAddrError(f"unknown resource_kind: {res_kind}") from None
|
||||
|
||||
if unit_type == UnitType.PE:
|
||||
pe_id = int((local_offset >> _PE_ID_SHIFT) & 0xF)
|
||||
pe_sub = int((local_offset >> _PE_SUB_SHIFT) & 0xF)
|
||||
sub_off = int(local_offset & ((1 << _PE_SUB_OFFSET_BITS) - 1))
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="pe_resource", unit_type=unit_type,
|
||||
pe_id=pe_id, pe_sub_unit=pe_sub, sub_offset=sub_off,
|
||||
)
|
||||
elif unit_type == UnitType.MCPU:
|
||||
mcpu_sub = int((local_offset >> _MCPU_SUB_SHIFT) & 0x1F)
|
||||
sub_off = int(local_offset & ((1 << _PE_SUB_OFFSET_BITS) - 1))
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="pe_resource", unit_type=unit_type,
|
||||
mcpu_sub_unit=mcpu_sub, sub_offset=sub_off,
|
||||
)
|
||||
else: # SRAM
|
||||
sub_off = int(local_offset & ((1 << _SRAM_OFFSET_BITS) - 1))
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="pe_resource", unit_type=unit_type,
|
||||
sub_offset=sub_off,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _decode_chiplet(sip_id: int, die_id: int, local_offset: int) -> PhysAddr:
|
||||
chip_off = local_offset & ((1 << _CHIPLET_LOCAL_BITS) - 1)
|
||||
if chip_off < _IOCPU_BOUNDARY:
|
||||
iocpu_sub = int((chip_off >> _IOCPU_SUB_SHIFT) & 0xF)
|
||||
sub_off = int(chip_off & ((1 << _IOCPU_SUB_OFFSET_BITS) - 1))
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="iocpu", chiplet_offset=chip_off,
|
||||
iocpu_sub_unit=iocpu_sub, sub_offset=sub_off,
|
||||
)
|
||||
else:
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="ual", chiplet_offset=chip_off,
|
||||
)
|
||||
|
||||
# ── AHBM factory methods ────────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def hbm_addr(*, sip_id: int, die_id: int, hbm_offset: int) -> PhysAddr:
|
||||
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
||||
_chk_range("hbm_offset", hbm_offset, _AHBM_SEL_BIT)
|
||||
local_offset = (1 << _AHBM_SEL_BIT) | int(hbm_offset)
|
||||
return PhysAddr(
|
||||
rack_id=rack_id,
|
||||
sip_id=sip_id,
|
||||
sip_seg=sip_seg,
|
||||
local_offset=local_offset,
|
||||
kind="hbm",
|
||||
cube_id=cube_id,
|
||||
hbm_offset=int(hbm_offset),
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="hbm", hbm_offset=int(hbm_offset),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def pe_hbm_addr(
|
||||
*,
|
||||
rack_id: int,
|
||||
sip_id: int,
|
||||
cube_id: int,
|
||||
pe_id: int,
|
||||
pe_local_hbm_offset: int,
|
||||
slice_size_bytes: int,
|
||||
*, sip_id: int, die_id: int,
|
||||
pe_id: int, pe_local_hbm_offset: int, slice_size_bytes: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("cube_id", cube_id, 31)
|
||||
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
||||
_chk_range("pe_id", pe_id, 4)
|
||||
if not (0 <= pe_local_hbm_offset < slice_size_bytes):
|
||||
raise PhysAddrError("pe_local_hbm_offset out of PE local slice range")
|
||||
hbm_offset = int(pe_id) * int(slice_size_bytes) + int(pe_local_hbm_offset)
|
||||
if not (0 <= hbm_offset < PhysAddr.HBM_WINDOW_BYTES):
|
||||
raise PhysAddrError("HBM offset exceeds reserved 128GB window")
|
||||
return PhysAddr.hbm_addr(
|
||||
rack_id=rack_id, sip_id=sip_id, cube_id=cube_id, hbm_offset=hbm_offset
|
||||
)
|
||||
return PhysAddr.hbm_addr(sip_id=sip_id, die_id=die_id, hbm_offset=hbm_offset)
|
||||
|
||||
@staticmethod
|
||||
def hbm_pe_id(hbm_offset: int, slice_size_bytes: int) -> int:
|
||||
return hbm_offset // slice_size_bytes
|
||||
|
||||
@staticmethod
|
||||
def cube_sram_addr(
|
||||
*, rack_id: int, sip_id: int, cube_id: int, sram_offset: int,
|
||||
def pe_tcm_addr(
|
||||
*, sip_id: int, die_id: int, pe_id: int, tcm_offset: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("cube_id", cube_id, 31)
|
||||
_chk_range("sram_offset", sram_offset, 29)
|
||||
sip_seg = cube_id
|
||||
local_offset = (UnitType.SRAM << 34) | sram_offset
|
||||
return PhysAddr(
|
||||
rack_id=rack_id, sip_id=sip_id, sip_seg=sip_seg,
|
||||
local_offset=local_offset,
|
||||
kind="pe_resource", cube_id=cube_id,
|
||||
unit_type=UnitType.SRAM, sub_offset=sram_offset,
|
||||
return PhysAddr.pe_resource_addr(
|
||||
sip_id=sip_id, die_id=die_id, pe_id=pe_id,
|
||||
pe_sub_unit=PESubUnit.PE_TCM, sub_offset=tcm_offset,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def pe_tcm_addr(
|
||||
*, rack_id: int, sip_id: int, cube_id: int, pe_id: int, tcm_offset: int,
|
||||
def pe_resource_addr(
|
||||
*, sip_id: int, die_id: int, pe_id: int,
|
||||
pe_sub_unit: int, sub_offset: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("cube_id", cube_id, 31)
|
||||
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
||||
_chk_range("pe_id", pe_id, 4)
|
||||
_chk_range("tcm_offset", tcm_offset, 29)
|
||||
sip_seg = cube_id
|
||||
local_offset = (UnitType.PE << 34) | (pe_id << 30) | tcm_offset
|
||||
return PhysAddr(
|
||||
rack_id=rack_id, sip_id=sip_id, sip_seg=sip_seg,
|
||||
local_offset=local_offset,
|
||||
kind="pe_resource", cube_id=cube_id,
|
||||
unit_type=UnitType.PE, pe_id=pe_id, sub_offset=tcm_offset,
|
||||
_chk_range("pe_sub_unit", pe_sub_unit, 4)
|
||||
_chk_range("sub_offset", sub_offset, _PE_SUB_OFFSET_BITS)
|
||||
local_offset = (
|
||||
(UnitType.PE << _RES_KIND_SHIFT)
|
||||
| (pe_id << _PE_ID_SHIFT)
|
||||
| (pe_sub_unit << _PE_SUB_SHIFT)
|
||||
| sub_offset
|
||||
)
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="pe_resource", unit_type=UnitType.PE,
|
||||
pe_id=pe_id, pe_sub_unit=pe_sub_unit, sub_offset=sub_offset,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def cube_sram_addr(
|
||||
*, sip_id: int, die_id: int, sram_offset: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
||||
_chk_range("sram_offset", sram_offset, _SRAM_OFFSET_BITS)
|
||||
local_offset = (UnitType.SRAM << _RES_KIND_SHIFT) | sram_offset
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="pe_resource", unit_type=UnitType.SRAM, sub_offset=sram_offset,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def mcpu_resource_addr(
|
||||
*, sip_id: int, die_id: int, mcpu_sub_unit: int, sub_offset: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
|
||||
_chk_range("mcpu_sub_unit", mcpu_sub_unit, 5)
|
||||
_chk_range("sub_offset", sub_offset, _PE_SUB_OFFSET_BITS)
|
||||
local_offset = (
|
||||
(UnitType.MCPU << _RES_KIND_SHIFT)
|
||||
| (mcpu_sub_unit << _MCPU_SUB_SHIFT)
|
||||
| sub_offset
|
||||
)
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
|
||||
kind="pe_resource", unit_type=UnitType.MCPU,
|
||||
mcpu_sub_unit=mcpu_sub_unit, sub_offset=sub_offset,
|
||||
)
|
||||
|
||||
# ── IOCHIPLET factory methods ────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def iocpu_resource_addr(
|
||||
*, sip_id: int, die_id: int, iocpu_sub_unit: int, sub_offset: int,
|
||||
) -> PhysAddr:
|
||||
_chk_max("die_id", die_id, _CHIPLET_DIE_MAX)
|
||||
if die_id < _CHIPLET_DIE_MIN:
|
||||
raise PhysAddrError(
|
||||
f"die_id {die_id} is not an IOCHIPLET "
|
||||
f"(must be {_CHIPLET_DIE_MIN}..{_CHIPLET_DIE_MAX})"
|
||||
)
|
||||
_chk_range("iocpu_sub_unit", iocpu_sub_unit, 4)
|
||||
_chk_range("sub_offset", sub_offset, _IOCPU_SUB_OFFSET_BITS)
|
||||
chiplet_offset = (iocpu_sub_unit << _IOCPU_SUB_SHIFT) | sub_offset
|
||||
if chiplet_offset >= _IOCPU_BOUNDARY:
|
||||
raise PhysAddrError("IOCPU region overflow (must be < 2 GB)")
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=chiplet_offset,
|
||||
kind="iocpu", chiplet_offset=chiplet_offset,
|
||||
iocpu_sub_unit=iocpu_sub_unit, sub_offset=sub_offset,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def ual_addr(*, sip_id: int, die_id: int, ual_offset: int) -> PhysAddr:
|
||||
_chk_max("die_id", die_id, _CHIPLET_DIE_MAX)
|
||||
if die_id < _CHIPLET_DIE_MIN:
|
||||
raise PhysAddrError(f"die_id {die_id} is not an IOCHIPLET")
|
||||
chiplet_offset = _IOCPU_BOUNDARY + ual_offset
|
||||
_chk_range("chiplet_offset", chiplet_offset, _CHIPLET_LOCAL_BITS)
|
||||
return PhysAddr(
|
||||
sip_id=sip_id, die_id=die_id, local_offset=chiplet_offset,
|
||||
kind="ual", chiplet_offset=chiplet_offset,
|
||||
)
|
||||
|
||||
@@ -27,16 +27,16 @@ class AddressResolver:
|
||||
|
||||
def resolve(self, addr: PhysAddr) -> str:
|
||||
s = addr.sip_id
|
||||
c = addr.cube_id
|
||||
d = addr.die_id
|
||||
if addr.kind == "hbm":
|
||||
node_id = f"sip{s}.cube{c}.hbm_ctrl"
|
||||
node_id = f"sip{s}.cube{d}.hbm_ctrl"
|
||||
elif addr.kind == "pe_resource":
|
||||
if addr.unit_type == UnitType.PE:
|
||||
node_id = f"sip{s}.cube{c}.pe{addr.pe_id}.pe_tcm"
|
||||
node_id = f"sip{s}.cube{d}.pe{addr.pe_id}.pe_tcm"
|
||||
elif addr.unit_type == UnitType.SRAM:
|
||||
node_id = f"sip{s}.cube{c}.sram"
|
||||
node_id = f"sip{s}.cube{d}.sram"
|
||||
elif addr.unit_type == UnitType.MCPU:
|
||||
node_id = f"sip{s}.cube{c}.m_cpu"
|
||||
node_id = f"sip{s}.cube{d}.m_cpu"
|
||||
else:
|
||||
raise RoutingError(f"unsupported unit_type: {addr.unit_type}")
|
||||
else:
|
||||
|
||||
@@ -385,7 +385,7 @@ class RuntimeContext:
|
||||
for cube_id in range(cubes_per_sip):
|
||||
for pe_id in range(pes_per_cube):
|
||||
self._allocators[(sip_id, cube_id, pe_id)] = PEMemAllocator(
|
||||
rack_id=0, sip_id=sip_id, cube_id=cube_id, pe_id=pe_id, cfg=cfg,
|
||||
sip_id=sip_id, die_id=cube_id, pe_id=pe_id, cfg=cfg,
|
||||
)
|
||||
|
||||
# Initialize VA allocator (MMU mappings are installed via fabric MmuMapMsg)
|
||||
|
||||
@@ -212,7 +212,7 @@ def _generate_probe_h2d(graph, edge_map) -> list[dict]:
|
||||
t_offset = 0.0
|
||||
for rid, (name, cube, hops) in enumerate(cases):
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=0, cube_id=cube, pe_id=0,
|
||||
sip_id=0, die_id=cube, pe_id=0,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
dst_node = resolver.resolve(pa)
|
||||
@@ -256,7 +256,7 @@ def _generate_probe_d2h(graph, edge_map) -> list[dict]:
|
||||
t_offset = 0.0
|
||||
for rid, (name, cube, hops) in enumerate(cases):
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=0, cube_id=cube, pe_id=0,
|
||||
sip_id=0, die_id=cube, pe_id=0,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
dst_node = resolver.resolve(pa)
|
||||
@@ -310,7 +310,7 @@ def _generate_probe_pe_dma(graph, edge_map) -> list[dict]:
|
||||
t_offset = 0.0
|
||||
for rid, (name, sip, src_cube, src_pe, dst_cube, dst_pe) in enumerate(cases):
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=sip, cube_id=dst_cube, pe_id=dst_pe,
|
||||
sip_id=sip, die_id=dst_cube, pe_id=dst_pe,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
dst_node = resolver.resolve(pa)
|
||||
|
||||
Reference in New Issue
Block a user