ADR-0001 Rev 2: 51-bit PhysAddr layout with concrete sub-unit tables
Remove rack_id (4 bits), rename sip_seg→die_id, shift fields to enable 42-bit local_offset (4 TB per die). Define PE_LOCAL/MCPU_LOCAL/CUBE_SRAM sub-unit tables for AHBM dies and IOCPU sub-unit table for IOCHIPLET dies (1 TB window). Supersedes ADR-0031. Also fixes latent VA/PA confusion in pe_dma pipeline DMA path where virtual addresses were decoded as physical addresses without MMU translation — previously masked by coincidental bit-position alignment. 529 passed (+6 recovered), 10 pre-existing failures unchanged. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -149,7 +149,7 @@ def _make_tuple_allocators(
|
||||
) -> dict[tuple[int, int, int], PEMemAllocator]:
|
||||
return {
|
||||
(s, c, p): PEMemAllocator(
|
||||
rack_id=0, sip_id=s, cube_id=c, pe_id=p, cfg=_CFG,
|
||||
sip_id=s, die_id=c, pe_id=p, cfg=_CFG,
|
||||
)
|
||||
for s in range(num_sips)
|
||||
for c in range(num_cubes)
|
||||
|
||||
@@ -23,7 +23,7 @@ def _engine():
|
||||
def _hbm_pa(sip: int = 0, cube: int = 0, pe_id: int = 0) -> int:
|
||||
slice_bytes = 48 * (1 << 30) // 8
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=sip, cube_id=cube, pe_id=pe_id,
|
||||
sip_id=sip, die_id=cube, pe_id=pe_id,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
return pa.encode()
|
||||
|
||||
@@ -30,7 +30,7 @@ def _graph():
|
||||
def _hbm_pa(pe_id: int = 0) -> int:
|
||||
slice_bytes = 48 * (1 << 30) // 8
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=0, cube_id=0, pe_id=pe_id,
|
||||
sip_id=0, die_id=0, pe_id=pe_id,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
return pa.encode()
|
||||
|
||||
@@ -50,7 +50,7 @@ def _hbm_pa(sip: int = 0, cube: int = 0, pe_id: int = 0) -> int:
|
||||
from kernbench.policy.address.phyaddr import PhysAddr
|
||||
slice_bytes = 48 * (1 << 30) // 8
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=sip, cube_id=cube, pe_id=pe_id,
|
||||
sip_id=sip, die_id=cube, pe_id=pe_id,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
return pa.encode()
|
||||
|
||||
@@ -31,7 +31,7 @@ def _hbm_pa(sip=0, cube=0, pe_id=0):
|
||||
from kernbench.policy.address.phyaddr import PhysAddr
|
||||
slice_bytes = 48 * (1 << 30) // 8
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=sip, cube_id=cube, pe_id=pe_id,
|
||||
sip_id=sip, die_id=cube, pe_id=pe_id,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
return pa.encode()
|
||||
|
||||
@@ -29,7 +29,7 @@ def _hbm_pa(sip: int = 0, cube: int = 0, pe_id: int = 0) -> int:
|
||||
# 48 GB / 8 slices = 6 GB per slice
|
||||
slice_bytes = 48 * (1 << 30) // 8
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=sip, cube_id=cube, pe_id=pe_id,
|
||||
sip_id=sip, die_id=cube, pe_id=pe_id,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
return pa.encode()
|
||||
@@ -37,7 +37,7 @@ def _hbm_pa(sip: int = 0, cube: int = 0, pe_id: int = 0) -> int:
|
||||
|
||||
def _sram_pa(sip: int = 0, cube: int = 0) -> int:
|
||||
"""Create an SRAM physical address."""
|
||||
pa = PhysAddr.cube_sram_addr(rack_id=0, sip_id=sip, cube_id=cube, sram_offset=0x800)
|
||||
pa = PhysAddr.cube_sram_addr(sip_id=sip, die_id=cube, sram_offset=0x800)
|
||||
return pa.encode()
|
||||
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ def _engine():
|
||||
def _hbm_pa(sip: int = 0, cube: int = 0, pe_id: int = 0) -> int:
|
||||
slice_bytes = 48 * (1 << 30) // 8
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=sip, cube_id=cube, pe_id=pe_id,
|
||||
sip_id=sip, die_id=cube, pe_id=pe_id,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
return pa.encode()
|
||||
|
||||
@@ -38,7 +38,7 @@ def _engine():
|
||||
def _hbm_pa(sip=0, cube=0, pe_id=0):
|
||||
slice_bytes = 48 * (1 << 30) // 8
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=sip, cube_id=cube, pe_id=pe_id,
|
||||
sip_id=sip, die_id=cube, pe_id=pe_id,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
return pa.encode()
|
||||
|
||||
@@ -53,7 +53,7 @@ def _engine():
|
||||
def _hbm_pa(sip: int = 0, cube: int = 0, pe_id: int = 0) -> int:
|
||||
slice_bytes = 48 * (1 << 30) // 8
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=sip, cube_id=cube, pe_id=pe_id,
|
||||
sip_id=sip, die_id=cube, pe_id=pe_id,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
return pa.encode()
|
||||
|
||||
+182
-62
@@ -1,7 +1,10 @@
|
||||
import pytest
|
||||
|
||||
from kernbench.policy.address.allocator import AddressConfig, AllocationError, PEMemAllocator
|
||||
from kernbench.policy.address.phyaddr import PhysAddr, PhysAddrError, UnitType
|
||||
from kernbench.policy.address.phyaddr import (
|
||||
PhysAddr, PhysAddrError, UnitType,
|
||||
PESubUnit, MCPUSubUnit, IOCPUSubUnit,
|
||||
)
|
||||
|
||||
_MB = 1 << 20
|
||||
_GB = 1 << 30
|
||||
@@ -23,13 +26,11 @@ _CFG = AddressConfig(
|
||||
|
||||
|
||||
def test_physaddr_immutable():
|
||||
pa = PhysAddr.hbm_addr(rack_id=0, sip_id=0, cube_id=0, hbm_offset=0)
|
||||
pa = PhysAddr.hbm_addr(sip_id=0, die_id=0, hbm_offset=0)
|
||||
with pytest.raises(AttributeError):
|
||||
pa.rack_id = 1 # type: ignore[misc]
|
||||
# hashable
|
||||
{pa}
|
||||
# comparable
|
||||
pa2 = PhysAddr.hbm_addr(rack_id=0, sip_id=0, cube_id=0, hbm_offset=0)
|
||||
pa.sip_id = 1 # type: ignore[misc]
|
||||
{pa} # hashable
|
||||
pa2 = PhysAddr.hbm_addr(sip_id=0, die_id=0, hbm_offset=0)
|
||||
assert pa == pa2
|
||||
|
||||
|
||||
@@ -37,120 +38,133 @@ def test_physaddr_immutable():
|
||||
|
||||
|
||||
def test_hbm_encode_decode_roundtrip():
|
||||
pa = PhysAddr.hbm_addr(rack_id=2, sip_id=3, cube_id=5, hbm_offset=0x1000)
|
||||
pa = PhysAddr.hbm_addr(sip_id=3, die_id=5, hbm_offset=0x1000)
|
||||
raw = pa.encode()
|
||||
dec = PhysAddr.decode(raw)
|
||||
assert dec.rack_id == 2
|
||||
assert dec.sip_id == 3
|
||||
assert dec.cube_id == 5
|
||||
assert dec.die_id == 5
|
||||
assert dec.kind == "hbm"
|
||||
assert dec.hbm_offset == 0x1000
|
||||
|
||||
|
||||
# ── PE resource encode/decode roundtrip ─────────────────────────────
|
||||
# ── PE resource encode/decode roundtrip (new layout) ───────────────
|
||||
|
||||
|
||||
def test_pe_resource_encode_decode_roundtrip():
|
||||
pa = PhysAddr(
|
||||
rack_id=1, sip_id=2, sip_seg=7, local_offset=0,
|
||||
kind="pe_resource", cube_id=7,
|
||||
unit_type=UnitType.PE, pe_id=3, ext=1, sub_offset=0xFF,
|
||||
pa = PhysAddr.pe_resource_addr(
|
||||
sip_id=2, die_id=7, pe_id=3,
|
||||
pe_sub_unit=PESubUnit.PE_TCM, sub_offset=0xFF,
|
||||
)
|
||||
# manually build local_offset matching bit layout
|
||||
local_offset = (UnitType.PE << 34) | (3 << 30) | (1 << 29) | 0xFF
|
||||
pa2 = PhysAddr(
|
||||
rack_id=1, sip_id=2, sip_seg=7, local_offset=local_offset,
|
||||
kind="pe_resource", cube_id=7,
|
||||
unit_type=UnitType.PE, pe_id=3, ext=1, sub_offset=0xFF,
|
||||
)
|
||||
raw = pa2.encode()
|
||||
raw = pa.encode()
|
||||
dec = PhysAddr.decode(raw)
|
||||
assert dec.kind == "pe_resource"
|
||||
assert dec.unit_type == UnitType.PE
|
||||
assert dec.pe_id == 3
|
||||
assert dec.ext == 1
|
||||
assert dec.pe_sub_unit == PESubUnit.PE_TCM
|
||||
assert dec.sub_offset == 0xFF
|
||||
assert dec.die_id == 7
|
||||
assert dec.sip_id == 2
|
||||
|
||||
|
||||
def test_pe_resource_all_sub_units():
|
||||
"""Each PE sub-unit roundtrips correctly."""
|
||||
for su in PESubUnit:
|
||||
pa = PhysAddr.pe_resource_addr(
|
||||
sip_id=0, die_id=0, pe_id=0,
|
||||
pe_sub_unit=su, sub_offset=42,
|
||||
)
|
||||
dec = PhysAddr.decode(pa.encode())
|
||||
assert dec.pe_sub_unit == su
|
||||
assert dec.sub_offset == 42
|
||||
|
||||
|
||||
# ── pe_hbm_addr factory ────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_pe_hbm_addr_factory():
|
||||
SLICE = 6 * (1 << 30) # 6 GB per PE slice
|
||||
SLICE = 6 * _GB
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=0, cube_id=0,
|
||||
sip_id=0, die_id=0,
|
||||
pe_id=2, pe_local_hbm_offset=1024, slice_size_bytes=SLICE,
|
||||
)
|
||||
assert pa.kind == "hbm"
|
||||
assert pa.cube_id == 0
|
||||
assert pa.die_id == 0
|
||||
assert pa.hbm_offset == 2 * SLICE + 1024
|
||||
|
||||
|
||||
def test_pe_hbm_addr_overflow():
|
||||
SLICE = 6 * (1 << 30)
|
||||
SLICE = 6 * _GB
|
||||
with pytest.raises(PhysAddrError, match="pe_local_hbm_offset"):
|
||||
PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=0, cube_id=0,
|
||||
sip_id=0, die_id=0,
|
||||
pe_id=0, pe_local_hbm_offset=SLICE, slice_size_bytes=SLICE,
|
||||
)
|
||||
|
||||
|
||||
# ── Invalid unit_type decode (fix #1) ──────────────────────────────
|
||||
# ── Invalid resource_kind decode ──────────────────────────────────
|
||||
|
||||
|
||||
def test_invalid_unit_type_raises():
|
||||
# Craft a PE-resource address with unit_type=7 (invalid)
|
||||
local_offset = (7 << 34) | (0 << 30) | 0
|
||||
pa_raw = PhysAddr(
|
||||
rack_id=0, sip_id=0, sip_seg=0, local_offset=local_offset,
|
||||
)
|
||||
def test_invalid_resource_kind_raises():
|
||||
# resource_kind=7 (invalid), addr_space=0
|
||||
local_offset = (7 << 34) | 0
|
||||
pa_raw = PhysAddr(sip_id=0, die_id=0, local_offset=local_offset)
|
||||
raw = pa_raw.encode()
|
||||
with pytest.raises(PhysAddrError, match="unit_type"):
|
||||
with pytest.raises(PhysAddrError, match="resource_kind"):
|
||||
PhysAddr.decode(raw)
|
||||
|
||||
|
||||
# ── hbm_pe_id utility (fix #3) ─────────────────────────────────────
|
||||
# ── hbm_pe_id utility ─────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_hbm_pe_id_utility():
|
||||
SLICE = 6 * (1 << 30) # 6 GB
|
||||
SLICE = 6 * _GB
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=0, cube_id=0,
|
||||
sip_id=0, die_id=0,
|
||||
pe_id=5, pe_local_hbm_offset=256, slice_size_bytes=SLICE,
|
||||
)
|
||||
assert PhysAddr.hbm_pe_id(pa.hbm_offset, SLICE) == 5
|
||||
|
||||
|
||||
# ── UnitType.SRAM exists (fix #5) ──────────────────────────────────
|
||||
# ── UnitType / sub-unit enums ──────────────────────────────────────
|
||||
|
||||
|
||||
def test_sram_unit_type_exists():
|
||||
assert UnitType.SRAM == 2
|
||||
|
||||
|
||||
def test_pe_sub_unit_enum():
|
||||
assert PESubUnit.PE_TCM == 6
|
||||
assert PESubUnit.IPCQ == 2
|
||||
|
||||
|
||||
def test_mcpu_sub_unit_enum():
|
||||
assert MCPUSubUnit.MCPU_SRAM == 5
|
||||
|
||||
|
||||
def test_iocpu_sub_unit_enum():
|
||||
assert IOCPUSubUnit.IO_SRAM == 5
|
||||
|
||||
|
||||
# ── cube_sram_addr factory + roundtrip ──────────────────────────────
|
||||
|
||||
|
||||
def test_cube_sram_addr_roundtrip():
|
||||
pa = PhysAddr.cube_sram_addr(
|
||||
rack_id=0, sip_id=1, cube_id=3, sram_offset=0x800,
|
||||
)
|
||||
pa = PhysAddr.cube_sram_addr(sip_id=1, die_id=3, sram_offset=0x800)
|
||||
assert pa.kind == "pe_resource"
|
||||
assert pa.unit_type == UnitType.SRAM
|
||||
assert pa.cube_id == 3
|
||||
assert pa.die_id == 3
|
||||
assert pa.sub_offset == 0x800
|
||||
# encode → decode roundtrip
|
||||
dec = PhysAddr.decode(pa.encode())
|
||||
assert dec.unit_type == UnitType.SRAM
|
||||
assert dec.cube_id == 3
|
||||
assert dec.die_id == 3
|
||||
assert dec.sub_offset == 0x800
|
||||
|
||||
|
||||
def test_cube_sram_addr_range_check():
|
||||
with pytest.raises(PhysAddrError):
|
||||
PhysAddr.cube_sram_addr(
|
||||
rack_id=0, sip_id=0, cube_id=0,
|
||||
sram_offset=(1 << 29), # exceeds 29-bit sub_offset
|
||||
sip_id=0, die_id=0,
|
||||
sram_offset=(1 << 25), # exceeds 25-bit sub_offset
|
||||
)
|
||||
|
||||
|
||||
@@ -158,29 +172,137 @@ def test_cube_sram_addr_range_check():
|
||||
|
||||
|
||||
def test_pe_tcm_addr_roundtrip():
|
||||
pa = PhysAddr.pe_tcm_addr(
|
||||
rack_id=0, sip_id=0, cube_id=2, pe_id=7, tcm_offset=0x400,
|
||||
)
|
||||
pa = PhysAddr.pe_tcm_addr(sip_id=0, die_id=2, pe_id=7, tcm_offset=0x400)
|
||||
assert pa.kind == "pe_resource"
|
||||
assert pa.unit_type == UnitType.PE
|
||||
assert pa.pe_id == 7
|
||||
assert pa.cube_id == 2
|
||||
assert pa.die_id == 2
|
||||
assert pa.pe_sub_unit == PESubUnit.PE_TCM
|
||||
assert pa.sub_offset == 0x400
|
||||
# encode → decode roundtrip
|
||||
dec = PhysAddr.decode(pa.encode())
|
||||
assert dec.unit_type == UnitType.PE
|
||||
assert dec.pe_id == 7
|
||||
assert dec.pe_sub_unit == PESubUnit.PE_TCM
|
||||
assert dec.sub_offset == 0x400
|
||||
|
||||
|
||||
def test_pe_tcm_addr_range_check():
|
||||
with pytest.raises(PhysAddrError):
|
||||
PhysAddr.pe_tcm_addr(
|
||||
rack_id=0, sip_id=0, cube_id=0, pe_id=0,
|
||||
tcm_offset=(1 << 29), # exceeds 29-bit sub_offset
|
||||
sip_id=0, die_id=0, pe_id=0,
|
||||
tcm_offset=(1 << 25), # exceeds 25-bit sub_offset
|
||||
)
|
||||
|
||||
|
||||
# ── MCPU resource factory + roundtrip ──────────────────────────────
|
||||
|
||||
|
||||
def test_mcpu_resource_roundtrip():
|
||||
pa = PhysAddr.mcpu_resource_addr(
|
||||
sip_id=0, die_id=1,
|
||||
mcpu_sub_unit=MCPUSubUnit.MCPU_SRAM, sub_offset=0x100,
|
||||
)
|
||||
assert pa.kind == "pe_resource"
|
||||
assert pa.unit_type == UnitType.MCPU
|
||||
assert pa.mcpu_sub_unit == MCPUSubUnit.MCPU_SRAM
|
||||
assert pa.sub_offset == 0x100
|
||||
dec = PhysAddr.decode(pa.encode())
|
||||
assert dec.unit_type == UnitType.MCPU
|
||||
assert dec.mcpu_sub_unit == MCPUSubUnit.MCPU_SRAM
|
||||
assert dec.sub_offset == 0x100
|
||||
|
||||
|
||||
# ── IOCHIPLET: IOCPU factory + roundtrip ────────────────────────────
|
||||
|
||||
|
||||
def test_iocpu_resource_roundtrip():
|
||||
pa = PhysAddr.iocpu_resource_addr(
|
||||
sip_id=1, die_id=17,
|
||||
iocpu_sub_unit=IOCPUSubUnit.IPCQ, sub_offset=0x20000,
|
||||
)
|
||||
assert pa.kind == "iocpu"
|
||||
assert pa.iocpu_sub_unit == IOCPUSubUnit.IPCQ
|
||||
assert pa.sub_offset == 0x20000
|
||||
dec = PhysAddr.decode(pa.encode())
|
||||
assert dec.kind == "iocpu"
|
||||
assert dec.iocpu_sub_unit == IOCPUSubUnit.IPCQ
|
||||
assert dec.sub_offset == 0x20000
|
||||
assert dec.die_id == 17
|
||||
|
||||
|
||||
def test_iocpu_die_range_check():
|
||||
with pytest.raises(PhysAddrError, match="IOCHIPLET"):
|
||||
PhysAddr.iocpu_resource_addr(
|
||||
sip_id=0, die_id=5, # not a chiplet die
|
||||
iocpu_sub_unit=0, sub_offset=0,
|
||||
)
|
||||
|
||||
|
||||
# ── IOCHIPLET: UAL factory + roundtrip ──────────────────────────────
|
||||
|
||||
|
||||
def test_ual_addr_roundtrip():
|
||||
pa = PhysAddr.ual_addr(sip_id=0, die_id=16, ual_offset=0x1000)
|
||||
assert pa.kind == "ual"
|
||||
dec = PhysAddr.decode(pa.encode())
|
||||
assert dec.kind == "ual"
|
||||
assert dec.die_id == 16
|
||||
assert dec.chiplet_offset >= (1 << 31) # >= 2 GB boundary
|
||||
|
||||
|
||||
# ── die_id dispatch ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_die_id_ahbm_range():
|
||||
for die in [0, 15]:
|
||||
pa = PhysAddr.hbm_addr(sip_id=0, die_id=die, hbm_offset=0)
|
||||
dec = PhysAddr.decode(pa.encode())
|
||||
assert dec.kind == "hbm"
|
||||
assert dec.die_id == die
|
||||
|
||||
|
||||
def test_die_id_chiplet_range():
|
||||
for die in [16, 20]:
|
||||
pa = PhysAddr.iocpu_resource_addr(
|
||||
sip_id=0, die_id=die,
|
||||
iocpu_sub_unit=0, sub_offset=0,
|
||||
)
|
||||
dec = PhysAddr.decode(pa.encode())
|
||||
assert dec.kind == "iocpu"
|
||||
assert dec.die_id == die
|
||||
|
||||
|
||||
def test_die_id_reserved_raises():
|
||||
raw = (0 << 47) | (21 << 42) | 0 # die_id=21 (reserved)
|
||||
with pytest.raises(PhysAddrError, match="reserved"):
|
||||
PhysAddr.decode(raw)
|
||||
|
||||
|
||||
# ── Boundary values ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_sip_boundary():
|
||||
pa = PhysAddr.hbm_addr(sip_id=15, die_id=0, hbm_offset=0)
|
||||
dec = PhysAddr.decode(pa.encode())
|
||||
assert dec.sip_id == 15
|
||||
|
||||
|
||||
def test_mbz_enforcement_ahbm():
|
||||
"""AHBM local_offset bits [41:38] must be zero."""
|
||||
local_offset = (1 << 38) | (1 << 37) # MBZ bit set + HBM
|
||||
pa = PhysAddr(sip_id=0, die_id=0, local_offset=local_offset)
|
||||
with pytest.raises(PhysAddrError, match="bits \\[41:38\\]"):
|
||||
pa.encode()
|
||||
|
||||
|
||||
def test_mbz_enforcement_chiplet():
|
||||
"""IOCHIPLET local_offset bits [41:40] must be zero."""
|
||||
local_offset = (1 << 40) | 0 # MBZ bit set
|
||||
pa = PhysAddr(sip_id=0, die_id=16, local_offset=local_offset)
|
||||
with pytest.raises(PhysAddrError, match="bits \\[41:40\\]"):
|
||||
pa.encode()
|
||||
|
||||
|
||||
# ── AddressConfig ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -193,7 +315,7 @@ def test_address_config_derived_sizes():
|
||||
|
||||
|
||||
def _make_alloc(pe_id: int = 0) -> PEMemAllocator:
|
||||
return PEMemAllocator(rack_id=0, sip_id=0, cube_id=0, pe_id=pe_id, cfg=_CFG)
|
||||
return PEMemAllocator(sip_id=0, die_id=0, pe_id=pe_id, cfg=_CFG)
|
||||
|
||||
|
||||
def test_allocator_hbm_basic():
|
||||
@@ -201,8 +323,7 @@ def test_allocator_hbm_basic():
|
||||
pa = a.alloc_hbm(4096)
|
||||
assert pa.kind == "hbm"
|
||||
assert pa.sip_id == 0
|
||||
assert pa.cube_id == 0
|
||||
# hbm_offset should be pe3's slice start
|
||||
assert pa.die_id == 0
|
||||
assert pa.hbm_offset == 3 * 6 * _GB
|
||||
|
||||
|
||||
@@ -210,8 +331,8 @@ def test_allocator_hbm_sequential():
|
||||
a = _make_alloc()
|
||||
pa1 = a.alloc_hbm(1024)
|
||||
pa2 = a.alloc_hbm(2048)
|
||||
assert pa1.hbm_offset == 0 # pe0 slice start + 0
|
||||
assert pa2.hbm_offset == 1024 # pe0 slice start + 1024
|
||||
assert pa1.hbm_offset == 0
|
||||
assert pa2.hbm_offset == 1024
|
||||
|
||||
|
||||
def test_allocator_hbm_overflow():
|
||||
@@ -235,7 +356,6 @@ def test_allocator_tcm_basic():
|
||||
|
||||
def test_allocator_tcm_respects_reserved():
|
||||
a = _make_alloc()
|
||||
# allocatable = 12 MB, should succeed
|
||||
a.alloc_tcm(12 * _MB)
|
||||
assert a.tcm_used == 12 * _MB
|
||||
assert a.tcm_total == 12 * _MB
|
||||
|
||||
+1
-1
@@ -21,7 +21,7 @@ def _engine():
|
||||
def _hbm_pa(sip: int = 0, cube: int = 0, pe_id: int = 0) -> int:
|
||||
slice_bytes = 48 * (1 << 30) // 8
|
||||
pa = PhysAddr.pe_hbm_addr(
|
||||
rack_id=0, sip_id=sip, cube_id=cube, pe_id=pe_id,
|
||||
sip_id=sip, die_id=cube, pe_id=pe_id,
|
||||
pe_local_hbm_offset=0x1000, slice_size_bytes=slice_bytes,
|
||||
)
|
||||
return pa.encode()
|
||||
|
||||
+15
-15
@@ -20,7 +20,7 @@ def test_resolve_hbm_addr():
|
||||
"""HBM address -> sip{S}.cube{C}.hbm_ctrl (single controller per cube)."""
|
||||
g = _graph()
|
||||
resolver = AddressResolver(g)
|
||||
pa = PhysAddr.hbm_addr(rack_id=0, sip_id=0, cube_id=3, hbm_offset=0x1000)
|
||||
pa = PhysAddr.hbm_addr(sip_id=0, die_id=3, hbm_offset=0x1000)
|
||||
assert resolver.resolve(pa) == "sip0.cube3.hbm_ctrl"
|
||||
|
||||
|
||||
@@ -28,33 +28,33 @@ def test_resolve_hbm_addr_high_offset():
|
||||
"""HBM address with large offset still resolves to same hbm_ctrl."""
|
||||
g = _graph()
|
||||
resolver = AddressResolver(g)
|
||||
pa = PhysAddr.hbm_addr(rack_id=0, sip_id=0, cube_id=0, hbm_offset=0x600000000)
|
||||
pa = PhysAddr.hbm_addr(sip_id=0, die_id=0, hbm_offset=0x600000000)
|
||||
assert resolver.resolve(pa) == "sip0.cube0.hbm_ctrl"
|
||||
|
||||
|
||||
def test_resolve_pe_tcm_addr():
|
||||
"""PE TCM address → sip{S}.cube{C}.pe{P}.pe_tcm"""
|
||||
"""PE TCM address -> sip{S}.cube{C}.pe{P}.pe_tcm"""
|
||||
g = _graph()
|
||||
resolver = AddressResolver(g)
|
||||
pa = PhysAddr.pe_tcm_addr(rack_id=0, sip_id=1, cube_id=5, pe_id=7, tcm_offset=0x400)
|
||||
pa = PhysAddr.pe_tcm_addr(sip_id=1, die_id=5, pe_id=7, tcm_offset=0x400)
|
||||
assert resolver.resolve(pa) == "sip1.cube5.pe7.pe_tcm"
|
||||
|
||||
|
||||
def test_resolve_sram_addr():
|
||||
"""SRAM address → sip{S}.cube{C}.sram"""
|
||||
"""SRAM address -> sip{S}.cube{C}.sram"""
|
||||
g = _graph()
|
||||
resolver = AddressResolver(g)
|
||||
pa = PhysAddr.cube_sram_addr(rack_id=0, sip_id=0, cube_id=10, sram_offset=0x800)
|
||||
pa = PhysAddr.cube_sram_addr(sip_id=0, die_id=10, sram_offset=0x800)
|
||||
assert resolver.resolve(pa) == "sip0.cube10.sram"
|
||||
|
||||
|
||||
def test_resolve_mcpu_addr():
|
||||
"""MCPU pe_resource address → sip{S}.cube{C}.m_cpu"""
|
||||
"""MCPU pe_resource address -> sip{S}.cube{C}.m_cpu"""
|
||||
g = _graph()
|
||||
resolver = AddressResolver(g)
|
||||
pa = PhysAddr(
|
||||
rack_id=0, sip_id=0, sip_seg=2, local_offset=(UnitType.MCPU << 34),
|
||||
kind="pe_resource", cube_id=2, unit_type=UnitType.MCPU,
|
||||
pa = PhysAddr.mcpu_resource_addr(
|
||||
sip_id=0, die_id=2,
|
||||
mcpu_sub_unit=0, sub_offset=0,
|
||||
)
|
||||
assert resolver.resolve(pa) == "sip0.cube2.m_cpu"
|
||||
|
||||
@@ -64,7 +64,7 @@ def test_resolve_nonexistent_node():
|
||||
g = _graph()
|
||||
resolver = AddressResolver(g)
|
||||
# sip_id=15 doesn't exist in the 2-SIP topology
|
||||
pa = PhysAddr.hbm_addr(rack_id=0, sip_id=15, cube_id=0, hbm_offset=0)
|
||||
pa = PhysAddr.hbm_addr(sip_id=15, die_id=0, hbm_offset=0)
|
||||
with pytest.raises(RoutingError):
|
||||
resolver.resolve(pa)
|
||||
|
||||
@@ -73,7 +73,7 @@ def test_resolve_nonexistent_node():
|
||||
|
||||
|
||||
def test_path_local_hbm():
|
||||
"""PE0 -> hbm_ctrl: pe_dma → router → hbm_ctrl (through router mesh)."""
|
||||
"""PE0 -> hbm_ctrl: pe_dma -> router -> hbm_ctrl (through router mesh)."""
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
|
||||
@@ -107,7 +107,7 @@ def test_all_pe_hbm_equidistant():
|
||||
"""All PEs in a cube have equal routing distance to hbm_ctrl.
|
||||
|
||||
With n_to_one mapping and high routing weight on HBM edges,
|
||||
all PE→hbm_ctrl paths have the same accumulated distance.
|
||||
all PE->hbm_ctrl paths have the same accumulated distance.
|
||||
"""
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
@@ -151,7 +151,7 @@ def test_path_remote_cube_hbm():
|
||||
|
||||
|
||||
def test_path_sram_via_router_mesh():
|
||||
"""PE → SRAM must go through router mesh nodes."""
|
||||
"""PE -> SRAM must go through router mesh nodes."""
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.sram")
|
||||
@@ -168,7 +168,7 @@ def test_path_sram_via_router_mesh():
|
||||
|
||||
|
||||
def test_path_local_tcm():
|
||||
"""PE0 → own TCM is PE-internal, not via router mesh."""
|
||||
"""PE0 -> own TCM is PE-internal, not via router mesh."""
|
||||
g = _graph()
|
||||
router = PathRouter(g)
|
||||
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.pe0.pe_tcm")
|
||||
|
||||
@@ -44,7 +44,7 @@ _CFG = AddressConfig(
|
||||
|
||||
def _make_allocators(num_pe: int = 8) -> dict[tuple[int, int, int], PEMemAllocator]:
|
||||
return {
|
||||
(0, 0, i): PEMemAllocator(rack_id=0, sip_id=0, cube_id=0, pe_id=i, cfg=_CFG)
|
||||
(0, 0, i): PEMemAllocator(sip_id=0, die_id=0, pe_id=i, cfg=_CFG)
|
||||
for i in range(num_pe)
|
||||
}
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ def _make_ctx():
|
||||
|
||||
def test_allocator_free_hbm_reclaims_space():
|
||||
"""free_hbm returns HBM space; subsequent alloc can reuse it."""
|
||||
a = PEMemAllocator(rack_id=0, sip_id=0, cube_id=0, pe_id=0, cfg=_CFG)
|
||||
a = PEMemAllocator(sip_id=0, die_id=0, pe_id=0, cfg=_CFG)
|
||||
pa1 = a.alloc_hbm(4096)
|
||||
used_after_alloc = a.hbm_used
|
||||
a.free_hbm(pa1, 4096)
|
||||
@@ -66,7 +66,7 @@ def test_allocator_free_hbm_reclaims_space():
|
||||
|
||||
def test_allocator_free_tcm_reclaims_space():
|
||||
"""free_tcm returns TCM space."""
|
||||
a = PEMemAllocator(rack_id=0, sip_id=0, cube_id=0, pe_id=0, cfg=_CFG)
|
||||
a = PEMemAllocator(sip_id=0, die_id=0, pe_id=0, cfg=_CFG)
|
||||
pa1 = a.alloc_tcm(256)
|
||||
used_after_alloc = a.tcm_used
|
||||
a.free_tcm(pa1, 256)
|
||||
|
||||
@@ -39,7 +39,7 @@ _CFG = AddressConfig(
|
||||
|
||||
def _make_allocators(num_pe: int = 8) -> dict[tuple[int, int, int], PEMemAllocator]:
|
||||
return {
|
||||
(0, 0, i): PEMemAllocator(rack_id=0, sip_id=0, cube_id=0, pe_id=i, cfg=_CFG)
|
||||
(0, 0, i): PEMemAllocator(sip_id=0, die_id=0, pe_id=i, cfg=_CFG)
|
||||
for i in range(num_pe)
|
||||
}
|
||||
|
||||
|
||||
@@ -70,7 +70,7 @@ def _make_standalone(shape, num_pe=NUM_PE):
|
||||
sram_bytes_per_cube=32 * _MB,
|
||||
)
|
||||
allocators = {
|
||||
(0, 0, i): PEMemAllocator(rack_id=0, sip_id=0, cube_id=0, pe_id=i, cfg=cfg)
|
||||
(0, 0, i): PEMemAllocator(sip_id=0, die_id=0, pe_id=i, cfg=cfg)
|
||||
for i in range(num_pe)
|
||||
}
|
||||
va_alloc = VirtualAllocator(va_base=0x1_0000_0000, va_size=64 * _GB, page_size=4096)
|
||||
|
||||
Reference in New Issue
Block a user