Files
kernbench2/tests/test_routing.py
T
ywkang 81cc32c46b ADR-0001 Rev 2: 51-bit PhysAddr layout with concrete sub-unit tables
Remove rack_id (4 bits), rename sip_seg→die_id, shift fields to enable
42-bit local_offset (4 TB per die). Define PE_LOCAL/MCPU_LOCAL/CUBE_SRAM
sub-unit tables for AHBM dies and IOCPU sub-unit table for IOCHIPLET
dies (1 TB window). Supersedes ADR-0031.

Also fixes latent VA/PA confusion in pe_dma pipeline DMA path where
virtual addresses were decoded as physical addresses without MMU
translation — previously masked by coincidental bit-position alignment.

529 passed (+6 recovered), 10 pre-existing failures unchanged.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 15:52:29 -07:00

210 lines
7.3 KiB
Python

import pytest
from pathlib import Path
from kernbench.policy.address.phyaddr import PhysAddr, UnitType
from kernbench.policy.routing.router import AddressResolver, PathRouter, RoutingError
from kernbench.topology.builder import load_topology
TOPOLOGY_PATH = Path(__file__).parent.parent / "topology.yaml"
def _graph():
return load_topology(TOPOLOGY_PATH)
# ── AddressResolver ──────────────────────────────────────────────────
def test_resolve_hbm_addr():
"""HBM address -> sip{S}.cube{C}.hbm_ctrl (single controller per cube)."""
g = _graph()
resolver = AddressResolver(g)
pa = PhysAddr.hbm_addr(sip_id=0, die_id=3, hbm_offset=0x1000)
assert resolver.resolve(pa) == "sip0.cube3.hbm_ctrl"
def test_resolve_hbm_addr_high_offset():
"""HBM address with large offset still resolves to same hbm_ctrl."""
g = _graph()
resolver = AddressResolver(g)
pa = PhysAddr.hbm_addr(sip_id=0, die_id=0, hbm_offset=0x600000000)
assert resolver.resolve(pa) == "sip0.cube0.hbm_ctrl"
def test_resolve_pe_tcm_addr():
"""PE TCM address -> sip{S}.cube{C}.pe{P}.pe_tcm"""
g = _graph()
resolver = AddressResolver(g)
pa = PhysAddr.pe_tcm_addr(sip_id=1, die_id=5, pe_id=7, tcm_offset=0x400)
assert resolver.resolve(pa) == "sip1.cube5.pe7.pe_tcm"
def test_resolve_sram_addr():
"""SRAM address -> sip{S}.cube{C}.sram"""
g = _graph()
resolver = AddressResolver(g)
pa = PhysAddr.cube_sram_addr(sip_id=0, die_id=10, sram_offset=0x800)
assert resolver.resolve(pa) == "sip0.cube10.sram"
def test_resolve_mcpu_addr():
"""MCPU pe_resource address -> sip{S}.cube{C}.m_cpu"""
g = _graph()
resolver = AddressResolver(g)
pa = PhysAddr.mcpu_resource_addr(
sip_id=0, die_id=2,
mcpu_sub_unit=0, sub_offset=0,
)
assert resolver.resolve(pa) == "sip0.cube2.m_cpu"
def test_resolve_nonexistent_node():
"""Address pointing to a node outside the compiled topology raises RoutingError."""
g = _graph()
resolver = AddressResolver(g)
# sip_id=15 doesn't exist in the 2-SIP topology
pa = PhysAddr.hbm_addr(sip_id=15, die_id=0, hbm_offset=0)
with pytest.raises(RoutingError):
resolver.resolve(pa)
# ── PathRouter: local HBM via router mesh ────────────────────────────
def test_path_local_hbm():
"""PE0 -> hbm_ctrl: pe_dma -> router -> hbm_ctrl (through router mesh)."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
assert path[0] == "sip0.cube0.pe0.pe_dma"
assert path[-1] == "sip0.cube0.hbm_ctrl"
# Path must go through at least one router node
assert any(n.startswith("sip0.cube0.r") for n in path), \
"HBM path must traverse router mesh"
# No xbar or bridge nodes in the new topology
assert not any("xbar" in n or "bridge" in n for n in path)
# ── PathRouter: remote PE HBM (different corner, same cube) ──────────
def test_path_remote_pe_hbm():
"""PE4 (bottom half) -> hbm_ctrl: routes through router mesh."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe4", "sip0.cube0.hbm_ctrl")
assert path[0] == "sip0.cube0.pe4.pe_dma"
assert path[-1] == "sip0.cube0.hbm_ctrl"
assert any(n.startswith("sip0.cube0.r") for n in path)
assert not any("xbar" in n or "bridge" in n for n in path)
# ── PathRouter: all PEs equidistant to HBM (n_to_one routing weight) ─
def test_all_pe_hbm_equidistant():
"""All PEs in a cube have equal routing distance to hbm_ctrl.
With n_to_one mapping and high routing weight on HBM edges,
all PE->hbm_ctrl paths have the same accumulated distance.
"""
g = _graph()
router = PathRouter(g)
distances = []
for pe in range(8):
_, dist = router.find_path_with_distance(
f"sip0.cube0.pe{pe}", "sip0.cube0.hbm_ctrl")
distances.append(dist)
# All distances should be equal
assert all(d == distances[0] for d in distances), (
f"expected equal distances, got: {distances}"
)
def test_remote_pe_distance_not_less_than_local():
"""Remote PE HBM distance >= local PE HBM distance (mesh topology)."""
g = _graph()
router = PathRouter(g)
_, dist_pe0 = router.find_path_with_distance(
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
_, dist_pe4 = router.find_path_with_distance(
"sip0.cube0.pe4", "sip0.cube0.hbm_ctrl")
assert dist_pe4 >= dist_pe0
def test_path_remote_cube_hbm():
"""PE0 in cube0 can reach HBM in cube1 via UCIe (ADR-0004 D4)."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl")
assert path[0] == "sip0.cube0.pe0.pe_dma"
assert path[-1] == "sip0.cube1.hbm_ctrl"
# inter-cube path must cross a UCIe link
assert any("ucie" in n.lower() for n in path), \
"remote cube path must traverse UCIe"
# must not be trivially short (needs router + ucie + remote router + hbm)
assert len(path) >= 5
# ── PathRouter: SRAM via router mesh ─────────────────────────────────
def test_path_sram_via_router_mesh():
"""PE -> SRAM must go through router mesh nodes."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.sram")
assert path[0] == "sip0.cube0.pe0.pe_dma"
assert path[-1] == "sip0.cube0.sram"
# Must traverse at least one router node
assert any(n.startswith("sip0.cube0.r") for n in path), \
"SRAM path must traverse router mesh"
# No xbar nodes
assert not any("xbar" in n for n in path)
# ── PathRouter: PE TCM (local) ──────────────────────────────────────
def test_path_local_tcm():
"""PE0 -> own TCM is PE-internal, not via router mesh."""
g = _graph()
router = PathRouter(g)
path = router.find_path("sip0.cube0.pe0", "sip0.cube0.pe0.pe_tcm")
assert path[0] == "sip0.cube0.pe0.pe_dma"
assert path[-1] == "sip0.cube0.pe0.pe_tcm"
# PE-internal path, no fabric
assert not any("xbar" in n or n.startswith("sip0.cube0.r") for n in path)
# ── PathRouter: distance monotonic ──────────────────────────────────
def test_path_distance_positive():
"""All routed paths must have accumulated distance > 0 (ADR-0002 D4)."""
g = _graph()
router = PathRouter(g)
_, dist = router.find_path_with_distance(
"sip0.cube0.pe0", "sip0.cube0.hbm_ctrl")
assert dist > 0
def test_path_deterministic():
"""Same (src, dst) must always produce the same path."""
g = _graph()
r1 = PathRouter(g)
r2 = PathRouter(g)
p1 = r1.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl")
p2 = r2.find_path("sip0.cube0.pe3", "sip0.cube0.hbm_ctrl")
assert p1 == p2
def test_remote_cube_path_no_routing_error():
"""Routing to remote cube HBM must not raise RoutingError (ADR-0004 D4)."""
g = _graph()
router = PathRouter(g)
# cube0.PE0 -> cube1.hbm_ctrl (adjacent cube, E direction)
path = router.find_path("sip0.cube0.pe0", "sip0.cube1.hbm_ctrl")
assert len(path) >= 1 # succeeds without exception