ADR-0001 Rev 2: 51-bit PhysAddr layout with concrete sub-unit tables

Remove rack_id (4 bits), rename sip_seg→die_id, shift fields to enable
42-bit local_offset (4 TB per die). Define PE_LOCAL/MCPU_LOCAL/CUBE_SRAM
sub-unit tables for AHBM dies and IOCPU sub-unit table for IOCHIPLET
dies (1 TB window). Supersedes ADR-0031.

Also fixes latent VA/PA confusion in pe_dma pipeline DMA path where
virtual addresses were decoded as physical addresses without MMU
translation — previously masked by coincidental bit-position alignment.

529 passed (+6 recovered), 10 pre-existing failures unchanged.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-27 15:52:29 -07:00
parent e9cc40f74d
commit 81cc32c46b
27 changed files with 814 additions and 272 deletions
+1 -1
View File
@@ -23,7 +23,7 @@ def _hbm_pa(sip: int, cube: int, pe_id: int, spec: dict) -> int:
mm = spec["cube"]["memory_map"]
slice_bytes = mm["hbm_total_gb_per_cube"] * (1 << 30) // mm["hbm_slices_per_cube"]
pa = PhysAddr.pe_hbm_addr(
rack_id=0, sip_id=sip, cube_id=cube, pe_id=pe_id,
sip_id=sip, die_id=cube, pe_id=pe_id,
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
)
return pa.encode()
+2 -2
View File
@@ -220,10 +220,10 @@ class IoCpuComponent(ComponentBase):
return []
def _cube_from_pa(self, pa_val: int, fallback: int) -> int:
"""Extract cube_id from a physical address, with fallback."""
"""Extract die_id from a physical address, with fallback."""
from kernbench.policy.address.phyaddr import PhysAddr
try:
return PhysAddr.decode(pa_val).cube_id
return PhysAddr.decode(pa_val).die_id
except Exception:
return fallback
+11 -2
View File
@@ -302,7 +302,16 @@ class PeDmaComponent(PeEngineBase):
dma_res = self._dma_write if is_write else self._dma_read
assert dma_res is not None
pa = PhysAddr.decode(addr)
# Translate VA → PA via MMU (same logic as non-pipeline path)
target_pa = addr
if self._mmu is not None:
from kernbench.policy.address.pe_mmu import PageFault
try:
target_pa = self._mmu.translate(addr)
except PageFault:
target_pa = addr # fallback: treat as PA directly
pa = PhysAddr.decode(target_pa)
dst_node = self.ctx.resolver.resolve(pa)
path = self.ctx.router.find_path(self._pe_prefix, dst_node)
drain_ns = self.ctx.compute_drain_ns(path, nbytes)
@@ -314,7 +323,7 @@ class PeDmaComponent(PeEngineBase):
correlation_id="pipeline",
request_id=f"tile_{token.tile_id}",
src_sip=0, src_cube=0, src_pe=0,
dst_pa=addr, nbytes=nbytes,
dst_pa=target_pa, nbytes=nbytes,
is_write=is_write,
)
sub_txn = Transaction(
@@ -207,10 +207,10 @@ class IoCpuComponent(ComponentBase):
return []
def _cube_from_pa(self, pa_val: int, fallback: int) -> int:
"""Extract cube_id from a physical address, with fallback."""
"""Extract die_id from a physical address, with fallback."""
from kernbench.policy.address.phyaddr import PhysAddr
try:
return PhysAddr.decode(pa_val).cube_id
return PhysAddr.decode(pa_val).die_id
except Exception:
return fallback
+4 -5
View File
@@ -89,11 +89,10 @@ class _FreeList:
class PEMemAllocator:
def __init__(
self, rack_id: int, sip_id: int, cube_id: int, pe_id: int, cfg: AddressConfig,
self, sip_id: int, die_id: int, pe_id: int, cfg: AddressConfig,
) -> None:
self._rack_id = rack_id
self._sip_id = sip_id
self._cube_id = cube_id
self._die_id = die_id
self._pe_id = pe_id
self._cfg = cfg
self._hbm = _FreeList(cfg.hbm_slice_bytes)
@@ -108,7 +107,7 @@ class PEMemAllocator:
f"available {self._cfg.hbm_slice_bytes - self._hbm.used}"
)
return PhysAddr.pe_hbm_addr(
rack_id=self._rack_id, sip_id=self._sip_id, cube_id=self._cube_id,
sip_id=self._sip_id, die_id=self._die_id,
pe_id=self._pe_id, pe_local_hbm_offset=offset,
slice_size_bytes=self._cfg.hbm_slice_bytes,
)
@@ -128,7 +127,7 @@ class PEMemAllocator:
f"available {self._cfg.tcm_allocatable_bytes - self._tcm.used}"
)
return PhysAddr.pe_tcm_addr(
rack_id=self._rack_id, sip_id=self._sip_id, cube_id=self._cube_id,
sip_id=self._sip_id, die_id=self._die_id,
pe_id=self._pe_id, tcm_offset=offset,
)
+264 -108
View File
@@ -6,6 +6,47 @@ from typing import Literal
MAX_51 = (1 << 51) - 1
# ── Layout constants (ADR-0001 Rev 2) ────────────────────────────────
# [50:47] sip_id (4)
# [46:42] die_id (5)
# [41: 0] local_offset (42)
_SIP_SHIFT = 47
_DIE_SHIFT = 42
_LOCAL_BITS = 42
_LOCAL_MASK = (1 << _LOCAL_BITS) - 1
# AHBM die: [41:38] MBZ, [37] addr_space, [36:0] sub-address
_AHBM_SEL_BIT = 37
_AHBM_LOCAL_USED = 38 # bits actually meaningful for AHBM
# Resource window: [36:34] resource_kind, [33:0] kind_local
_RES_KIND_SHIFT = 34
_RES_KIND_MASK = 0x7
# PE_LOCAL: [32:29] pe_id, [28:25] pe_sub_unit, [24:0] sub_offset
_PE_ID_SHIFT = 29
_PE_SUB_SHIFT = 25
_PE_SUB_OFFSET_BITS = 25
# MCPU_LOCAL: [29:25] mcpu_sub_unit, [24:0] sub_offset
_MCPU_SUB_SHIFT = 25
# CUBE_SRAM: [24:0] sram_offset
_SRAM_OFFSET_BITS = 25
# IOCHIPLET: [41:40] MBZ, [39:0] chiplet_offset
_CHIPLET_LOCAL_BITS = 40
_IOCPU_BOUNDARY = 1 << 31 # 2 GB
# IOCPU: [30:27] iocpu_sub_unit, [26:0] sub_offset
_IOCPU_SUB_SHIFT = 27
_IOCPU_SUB_OFFSET_BITS = 27
# die_id ranges
_AHBM_DIE_MAX = 15
_CHIPLET_DIE_MIN = 16
_CHIPLET_DIE_MAX = 20
class PhysAddrError(Exception):
pass
@@ -22,163 +63,278 @@ def _chk_max(name: str, v: int, maxv: int) -> None:
class UnitType(IntEnum):
PE = 0
MCPU = 1
SRAM = 2
"""resource_kind values for AHBM resource window."""
PE = 0 # PE_LOCAL
MCPU = 1 # MCPU_LOCAL
SRAM = 2 # CUBE_SRAM
class PESubUnit(IntEnum):
PE_CPU_DTCM = 0
MATH_ENGINE_DTCM = 1
IPCQ = 2
PE_CPU_SFR = 3
MATH_ENGINE_SFR = 4
DMA_ENGINE_SFR = 5
PE_TCM = 6
class MCPUSubUnit(IntEnum):
MCPU_ITCM = 0
MCPU_DTCM = 1
IPCQ = 2
MCPU_SFR = 3
MCPU_DMA_SFR = 4
MCPU_SRAM = 5
class IOCPUSubUnit(IntEnum):
IOCPU_ITCM = 0
IOCPU_DTCM = 1
IPCQ = 2
IOCPU_SFR = 3
IO_DMA_SFR = 4
IO_SRAM = 5
@dataclass(frozen=True)
class PhysAddr:
"""
51-bit physical address value object.
"""51-bit physical address value object (ADR-0001 Rev 2).
Layout:
[50:47] rack_id (4)
[46:43] sip_id (4)
[42:38] sip_seg (5) # cube_id
[37:0] local_offset (38) => each segment is 256GB
local_offset:
[37] selector: 1 = HBM window (128GB reserved), 0 = PE resource window
[50:47] sip_id (4) -- 16 SIPs
[46:42] die_id (5) -- 0..15 AHBM, 16..20 IOCHIPLET
[41: 0] local_offset (42) -- 4 TB per die
"""
rack_id: int
sip_id: int
sip_seg: int
die_id: int
local_offset: int
kind: Literal["hbm", "pe_resource", "raw"] = "raw"
cube_id: int = 0
kind: Literal["hbm", "pe_resource", "iocpu", "ual", "raw"] = "raw"
unit_type: UnitType = UnitType.PE
pe_id: int = 0
ext: int = 0
pe_sub_unit: int = 0
sub_offset: int = 0
hbm_offset: int = 0
iocpu_sub_unit: int = 0
chiplet_offset: int = 0
mcpu_sub_unit: int = 0
HBM_WINDOW_BYTES = 1 << 37 # 128GB
HBM_WINDOW_BYTES = 1 << 37 # 128 GB
# ── encode / decode ──────────────────────────────────────────────
def encode(self) -> int:
_chk_range("rack_id", self.rack_id, 4)
_chk_range("sip_id", self.sip_id, 4)
_chk_range("sip_seg", self.sip_seg, 5)
_chk_range("local_offset", self.local_offset, 38)
addr = (self.rack_id << 47) | (self.sip_id << 43) | (self.sip_seg << 38) | self.local_offset
if not (0 <= addr <= MAX_51):
raise PhysAddrError("address exceeds 51-bit space")
_chk_range("die_id", self.die_id, 5)
_chk_range("local_offset", self.local_offset, _LOCAL_BITS)
# MBZ enforcement
if self.die_id <= _AHBM_DIE_MAX:
mbz_top = (self.local_offset >> _AHBM_LOCAL_USED) & 0xF
if mbz_top != 0:
raise PhysAddrError("AHBM local_offset bits [41:38] must be zero")
elif _CHIPLET_DIE_MIN <= self.die_id <= _CHIPLET_DIE_MAX:
mbz_top = (self.local_offset >> _CHIPLET_LOCAL_BITS) & 0x3
if mbz_top != 0:
raise PhysAddrError("IOCHIPLET local_offset bits [41:40] must be zero")
addr = (self.sip_id << _SIP_SHIFT) | (self.die_id << _DIE_SHIFT) | self.local_offset
return addr
@staticmethod
def decode(addr: int) -> PhysAddr:
if not (0 <= addr <= MAX_51):
raise PhysAddrError("addr must be a 51-bit value")
rack = (addr >> 47) & 0xF
sip_id = (addr >> 43) & 0xF
sip_seg = (addr >> 38) & 0x1F
off = addr & ((1 << 38) - 1)
cube_id = sip_seg
sel = (off >> 37) & 0x1
if sel == 1:
hbm_offset = int(off & ((1 << 37) - 1))
return PhysAddr(
rack_id=rack,
sip_id=sip_id,
sip_seg=sip_seg,
local_offset=off,
kind="hbm",
cube_id=cube_id,
hbm_offset=hbm_offset,
)
# PE resource decode
raw_ut = int((off >> 34) & 0x7)
try:
unit_type = UnitType(raw_ut)
except ValueError:
raise PhysAddrError(f"unknown unit_type: {raw_ut}") from None
pe_id = int((off >> 30) & 0xF)
ext = int((off >> 29) & 0x1)
sub_offset = int(off & ((1 << 29) - 1))
return PhysAddr(
rack_id=rack,
sip_id=sip_id,
sip_seg=sip_seg,
local_offset=off,
kind="pe_resource",
cube_id=cube_id,
unit_type=unit_type,
pe_id=pe_id,
ext=ext,
sub_offset=sub_offset,
hbm_offset=0,
)
sip_id = (addr >> _SIP_SHIFT) & 0xF
die_id = (addr >> _DIE_SHIFT) & 0x1F
local_offset = addr & _LOCAL_MASK
if die_id <= _AHBM_DIE_MAX:
return PhysAddr._decode_ahbm(sip_id, die_id, local_offset)
elif _CHIPLET_DIE_MIN <= die_id <= _CHIPLET_DIE_MAX:
return PhysAddr._decode_chiplet(sip_id, die_id, local_offset)
else:
raise PhysAddrError(f"die_id {die_id} is reserved (21..31)")
@staticmethod
def hbm_addr(*, rack_id: int, sip_id: int, cube_id: int, hbm_offset: int) -> PhysAddr:
_chk_max("cube_id", cube_id, 31)
_chk_range("hbm_offset", hbm_offset, 37)
sip_seg = cube_id
local_offset = (1 << 37) | int(hbm_offset)
def _decode_ahbm(sip_id: int, die_id: int, local_offset: int) -> PhysAddr:
sel = (local_offset >> _AHBM_SEL_BIT) & 0x1
if sel == 1:
hbm_offset = int(local_offset & ((1 << _AHBM_SEL_BIT) - 1))
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="hbm", hbm_offset=hbm_offset,
)
# Resource window
res_kind = int((local_offset >> _RES_KIND_SHIFT) & _RES_KIND_MASK)
try:
unit_type = UnitType(res_kind)
except ValueError:
raise PhysAddrError(f"unknown resource_kind: {res_kind}") from None
if unit_type == UnitType.PE:
pe_id = int((local_offset >> _PE_ID_SHIFT) & 0xF)
pe_sub = int((local_offset >> _PE_SUB_SHIFT) & 0xF)
sub_off = int(local_offset & ((1 << _PE_SUB_OFFSET_BITS) - 1))
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="pe_resource", unit_type=unit_type,
pe_id=pe_id, pe_sub_unit=pe_sub, sub_offset=sub_off,
)
elif unit_type == UnitType.MCPU:
mcpu_sub = int((local_offset >> _MCPU_SUB_SHIFT) & 0x1F)
sub_off = int(local_offset & ((1 << _PE_SUB_OFFSET_BITS) - 1))
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="pe_resource", unit_type=unit_type,
mcpu_sub_unit=mcpu_sub, sub_offset=sub_off,
)
else: # SRAM
sub_off = int(local_offset & ((1 << _SRAM_OFFSET_BITS) - 1))
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="pe_resource", unit_type=unit_type,
sub_offset=sub_off,
)
@staticmethod
def _decode_chiplet(sip_id: int, die_id: int, local_offset: int) -> PhysAddr:
chip_off = local_offset & ((1 << _CHIPLET_LOCAL_BITS) - 1)
if chip_off < _IOCPU_BOUNDARY:
iocpu_sub = int((chip_off >> _IOCPU_SUB_SHIFT) & 0xF)
sub_off = int(chip_off & ((1 << _IOCPU_SUB_OFFSET_BITS) - 1))
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="iocpu", chiplet_offset=chip_off,
iocpu_sub_unit=iocpu_sub, sub_offset=sub_off,
)
else:
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="ual", chiplet_offset=chip_off,
)
# ── AHBM factory methods ────────────────────────────────────────
@staticmethod
def hbm_addr(*, sip_id: int, die_id: int, hbm_offset: int) -> PhysAddr:
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
_chk_range("hbm_offset", hbm_offset, _AHBM_SEL_BIT)
local_offset = (1 << _AHBM_SEL_BIT) | int(hbm_offset)
return PhysAddr(
rack_id=rack_id,
sip_id=sip_id,
sip_seg=sip_seg,
local_offset=local_offset,
kind="hbm",
cube_id=cube_id,
hbm_offset=int(hbm_offset),
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="hbm", hbm_offset=int(hbm_offset),
)
@staticmethod
def pe_hbm_addr(
*,
rack_id: int,
sip_id: int,
cube_id: int,
pe_id: int,
pe_local_hbm_offset: int,
slice_size_bytes: int,
*, sip_id: int, die_id: int,
pe_id: int, pe_local_hbm_offset: int, slice_size_bytes: int,
) -> PhysAddr:
_chk_max("cube_id", cube_id, 31)
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
_chk_range("pe_id", pe_id, 4)
if not (0 <= pe_local_hbm_offset < slice_size_bytes):
raise PhysAddrError("pe_local_hbm_offset out of PE local slice range")
hbm_offset = int(pe_id) * int(slice_size_bytes) + int(pe_local_hbm_offset)
if not (0 <= hbm_offset < PhysAddr.HBM_WINDOW_BYTES):
raise PhysAddrError("HBM offset exceeds reserved 128GB window")
return PhysAddr.hbm_addr(
rack_id=rack_id, sip_id=sip_id, cube_id=cube_id, hbm_offset=hbm_offset
)
return PhysAddr.hbm_addr(sip_id=sip_id, die_id=die_id, hbm_offset=hbm_offset)
@staticmethod
def hbm_pe_id(hbm_offset: int, slice_size_bytes: int) -> int:
return hbm_offset // slice_size_bytes
@staticmethod
def cube_sram_addr(
*, rack_id: int, sip_id: int, cube_id: int, sram_offset: int,
def pe_tcm_addr(
*, sip_id: int, die_id: int, pe_id: int, tcm_offset: int,
) -> PhysAddr:
_chk_max("cube_id", cube_id, 31)
_chk_range("sram_offset", sram_offset, 29)
sip_seg = cube_id
local_offset = (UnitType.SRAM << 34) | sram_offset
return PhysAddr(
rack_id=rack_id, sip_id=sip_id, sip_seg=sip_seg,
local_offset=local_offset,
kind="pe_resource", cube_id=cube_id,
unit_type=UnitType.SRAM, sub_offset=sram_offset,
return PhysAddr.pe_resource_addr(
sip_id=sip_id, die_id=die_id, pe_id=pe_id,
pe_sub_unit=PESubUnit.PE_TCM, sub_offset=tcm_offset,
)
@staticmethod
def pe_tcm_addr(
*, rack_id: int, sip_id: int, cube_id: int, pe_id: int, tcm_offset: int,
def pe_resource_addr(
*, sip_id: int, die_id: int, pe_id: int,
pe_sub_unit: int, sub_offset: int,
) -> PhysAddr:
_chk_max("cube_id", cube_id, 31)
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
_chk_range("pe_id", pe_id, 4)
_chk_range("tcm_offset", tcm_offset, 29)
sip_seg = cube_id
local_offset = (UnitType.PE << 34) | (pe_id << 30) | tcm_offset
return PhysAddr(
rack_id=rack_id, sip_id=sip_id, sip_seg=sip_seg,
local_offset=local_offset,
kind="pe_resource", cube_id=cube_id,
unit_type=UnitType.PE, pe_id=pe_id, sub_offset=tcm_offset,
_chk_range("pe_sub_unit", pe_sub_unit, 4)
_chk_range("sub_offset", sub_offset, _PE_SUB_OFFSET_BITS)
local_offset = (
(UnitType.PE << _RES_KIND_SHIFT)
| (pe_id << _PE_ID_SHIFT)
| (pe_sub_unit << _PE_SUB_SHIFT)
| sub_offset
)
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="pe_resource", unit_type=UnitType.PE,
pe_id=pe_id, pe_sub_unit=pe_sub_unit, sub_offset=sub_offset,
)
@staticmethod
def cube_sram_addr(
*, sip_id: int, die_id: int, sram_offset: int,
) -> PhysAddr:
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
_chk_range("sram_offset", sram_offset, _SRAM_OFFSET_BITS)
local_offset = (UnitType.SRAM << _RES_KIND_SHIFT) | sram_offset
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="pe_resource", unit_type=UnitType.SRAM, sub_offset=sram_offset,
)
@staticmethod
def mcpu_resource_addr(
*, sip_id: int, die_id: int, mcpu_sub_unit: int, sub_offset: int,
) -> PhysAddr:
_chk_max("die_id", die_id, _AHBM_DIE_MAX)
_chk_range("mcpu_sub_unit", mcpu_sub_unit, 5)
_chk_range("sub_offset", sub_offset, _PE_SUB_OFFSET_BITS)
local_offset = (
(UnitType.MCPU << _RES_KIND_SHIFT)
| (mcpu_sub_unit << _MCPU_SUB_SHIFT)
| sub_offset
)
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=local_offset,
kind="pe_resource", unit_type=UnitType.MCPU,
mcpu_sub_unit=mcpu_sub_unit, sub_offset=sub_offset,
)
# ── IOCHIPLET factory methods ────────────────────────────────────
@staticmethod
def iocpu_resource_addr(
*, sip_id: int, die_id: int, iocpu_sub_unit: int, sub_offset: int,
) -> PhysAddr:
_chk_max("die_id", die_id, _CHIPLET_DIE_MAX)
if die_id < _CHIPLET_DIE_MIN:
raise PhysAddrError(
f"die_id {die_id} is not an IOCHIPLET "
f"(must be {_CHIPLET_DIE_MIN}..{_CHIPLET_DIE_MAX})"
)
_chk_range("iocpu_sub_unit", iocpu_sub_unit, 4)
_chk_range("sub_offset", sub_offset, _IOCPU_SUB_OFFSET_BITS)
chiplet_offset = (iocpu_sub_unit << _IOCPU_SUB_SHIFT) | sub_offset
if chiplet_offset >= _IOCPU_BOUNDARY:
raise PhysAddrError("IOCPU region overflow (must be < 2 GB)")
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=chiplet_offset,
kind="iocpu", chiplet_offset=chiplet_offset,
iocpu_sub_unit=iocpu_sub_unit, sub_offset=sub_offset,
)
@staticmethod
def ual_addr(*, sip_id: int, die_id: int, ual_offset: int) -> PhysAddr:
_chk_max("die_id", die_id, _CHIPLET_DIE_MAX)
if die_id < _CHIPLET_DIE_MIN:
raise PhysAddrError(f"die_id {die_id} is not an IOCHIPLET")
chiplet_offset = _IOCPU_BOUNDARY + ual_offset
_chk_range("chiplet_offset", chiplet_offset, _CHIPLET_LOCAL_BITS)
return PhysAddr(
sip_id=sip_id, die_id=die_id, local_offset=chiplet_offset,
kind="ual", chiplet_offset=chiplet_offset,
)
+5 -5
View File
@@ -27,16 +27,16 @@ class AddressResolver:
def resolve(self, addr: PhysAddr) -> str:
s = addr.sip_id
c = addr.cube_id
d = addr.die_id
if addr.kind == "hbm":
node_id = f"sip{s}.cube{c}.hbm_ctrl"
node_id = f"sip{s}.cube{d}.hbm_ctrl"
elif addr.kind == "pe_resource":
if addr.unit_type == UnitType.PE:
node_id = f"sip{s}.cube{c}.pe{addr.pe_id}.pe_tcm"
node_id = f"sip{s}.cube{d}.pe{addr.pe_id}.pe_tcm"
elif addr.unit_type == UnitType.SRAM:
node_id = f"sip{s}.cube{c}.sram"
node_id = f"sip{s}.cube{d}.sram"
elif addr.unit_type == UnitType.MCPU:
node_id = f"sip{s}.cube{c}.m_cpu"
node_id = f"sip{s}.cube{d}.m_cpu"
else:
raise RoutingError(f"unsupported unit_type: {addr.unit_type}")
else:
+1 -1
View File
@@ -385,7 +385,7 @@ class RuntimeContext:
for cube_id in range(cubes_per_sip):
for pe_id in range(pes_per_cube):
self._allocators[(sip_id, cube_id, pe_id)] = PEMemAllocator(
rack_id=0, sip_id=sip_id, cube_id=cube_id, pe_id=pe_id, cfg=cfg,
sip_id=sip_id, die_id=cube_id, pe_id=pe_id, cfg=cfg,
)
# Initialize VA allocator (MMU mappings are installed via fabric MmuMapMsg)
+3 -3
View File
@@ -212,7 +212,7 @@ def _generate_probe_h2d(graph, edge_map) -> list[dict]:
t_offset = 0.0
for rid, (name, cube, hops) in enumerate(cases):
pa = PhysAddr.pe_hbm_addr(
rack_id=0, sip_id=0, cube_id=cube, pe_id=0,
sip_id=0, die_id=cube, pe_id=0,
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
)
dst_node = resolver.resolve(pa)
@@ -256,7 +256,7 @@ def _generate_probe_d2h(graph, edge_map) -> list[dict]:
t_offset = 0.0
for rid, (name, cube, hops) in enumerate(cases):
pa = PhysAddr.pe_hbm_addr(
rack_id=0, sip_id=0, cube_id=cube, pe_id=0,
sip_id=0, die_id=cube, pe_id=0,
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
)
dst_node = resolver.resolve(pa)
@@ -310,7 +310,7 @@ def _generate_probe_pe_dma(graph, edge_map) -> list[dict]:
t_offset = 0.0
for rid, (name, sip, src_cube, src_pe, dst_cube, dst_pe) in enumerate(cases):
pa = PhysAddr.pe_hbm_addr(
rack_id=0, sip_id=sip, cube_id=dst_cube, pe_id=dst_pe,
sip_id=sip, die_id=dst_cube, pe_id=dst_pe,
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
)
dst_node = resolver.resolve(pa)