Add probe CLI improvements, D2H read, UCIe/HBM tuning, BW sweep
- Probe CLI: restructured output (tables first, routes below), per-hop timestamps, split cross-cube into best/worst cases, D2H read section - UCIe overhead: 1ns -> 8ns per port (16ns per crossing) to fix cross-cube-best < cross-half latency inversion - HBM efficiency: added efficiency=0.8 factor to hbm_ctrl, reducing effective BW from 256 to 204.8 GB/s - Multi-size BW sweep: saturation tables (4KB-1MB) for all probe cases - Probe default data size: 4KB -> 32KB for more realistic measurements - IOChiplet NOC + D2H topology and tests - NOC mesh, xbar, BW occupancy components and tests - Cube mesh visualization diagram 278 tests pass. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -110,7 +110,7 @@ class PathRouter:
|
||||
def find_mcpu_dma_path(self, m_cpu_id: str, dst_hbm_slice_id: str) -> list[str]:
|
||||
"""M_CPU DMA path: never routes through PE-internal nodes (ADR-0015 D5).
|
||||
|
||||
Same-cube: deterministic [m_cpu, noc, xbar.pe_i, hbm_ctrl.slice_i].
|
||||
Same-cube: deterministic [m_cpu, noc, xbar_top/bot, hbm_ctrl.slice_i].
|
||||
Cross-cube: Dijkstra via _adj_mcpu_dma (pe_internal/pe_to_xbar excluded)
|
||||
→ routes through NOC → UCIe → target cube NOC → xbar → HBM.
|
||||
"""
|
||||
@@ -118,14 +118,23 @@ class PathRouter:
|
||||
d_cube = ".".join(dst_hbm_slice_id.split(".")[:2])
|
||||
if m_cube == d_cube:
|
||||
slice_idx = int(dst_hbm_slice_id.rsplit("slice", 1)[1])
|
||||
xbar = "xbar_top" if slice_idx < 4 else "xbar_bot"
|
||||
return [
|
||||
m_cpu_id,
|
||||
f"{m_cube}.noc",
|
||||
f"{m_cube}.xbar.pe{slice_idx}",
|
||||
f"{m_cube}.{xbar}",
|
||||
dst_hbm_slice_id,
|
||||
]
|
||||
return self._run_dijkstra(self._adj_mcpu_dma, m_cpu_id, dst_hbm_slice_id)
|
||||
|
||||
def find_memory_path(self, src: str, dst: str) -> list[str]:
|
||||
"""Direct memory path: pcie_ep → io_noc → cube → xbar → hbm_ctrl.
|
||||
|
||||
Uses _adj_mcpu_dma which excludes pe_internal and pe_to_xbar edges,
|
||||
preventing routing through PE pipeline nodes.
|
||||
"""
|
||||
return self._run_dijkstra(self._adj_mcpu_dma, src, dst)
|
||||
|
||||
def find_node_path(self, src: str, dst: str) -> list[str]:
|
||||
"""General routing between arbitrary nodes, including command edges.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user