Remove xbar/noc remnants, rule-based cube-view connectors

- Delete xbar.py and noc.py (TwoDMeshNocComponent) — unused since router mesh
- Remove xbar_v1/noc_2d_mesh_v1 from components.yaml
- Fix pe_to_xbar → pe_to_router in routing exclusion set
- Fix xbar_to_hbm_bw_gbs → hbm_to_router_bw_gbs in report.py
- Update all docstrings/comments referencing xbar/bridge → router mesh
- Cube-view connectors: rule-based _connector_points helper
  - PE↔router: single diagonal line (not chevron)
  - UCIe N/S: 45°→horizontal→45°
  - UCIe E/W: 45°→vertical→45°
  - HBM ports: 45°→horizontal→45°

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-06 23:59:12 -07:00
parent 7640635f90
commit eb792e6212
17 changed files with 163 additions and 571 deletions
+4 -6
View File
@@ -37,7 +37,7 @@ def _hbm_pa(pe_id: int = 0) -> int:
def _node(impl: str, overhead_ns: float = 0.0) -> Node:
return Node(id="test", kind="xbar", impl=impl, attrs={"overhead_ns": overhead_ns}, pos_mm=None)
return Node(id="test", kind="noc_router", impl=impl, attrs={"overhead_ns": overhead_ns}, pos_mm=None)
# ── 1. unknown impl → error ──────────────────────────────────────────
@@ -55,7 +55,7 @@ def test_registry_unknown_impl_raises_error():
def test_transit_component_yields_overhead_ns():
"""TransitComponent.run() yields exactly node.attrs['overhead_ns'] ns."""
node = _node("xbar_v1", overhead_ns=3.0)
node = _node("forwarding_v1", overhead_ns=3.0)
comp = TransitComponent(node)
env = simpy.Environment()
@@ -119,10 +119,9 @@ def test_engine_component_model_latency():
"""MemoryRead D2H latency for local cube0 (4096B).
Bypass path (m_cpu bypass): pcie_ep → io_noc → conn → io_ucie → cube_ucie
→ conn → noc → xbar_top → hbm_ctrl.slice0
→ conn → router mesh → hbm_ctrl
Path goes through xbar_top (overhead_ns=2.0) instead of per-PE xbar.
Latency must be positive and reasonable.
Path goes through router mesh. Latency must be positive and reasonable.
"""
graph = _graph()
engine = GraphEngine(graph)
@@ -134,7 +133,6 @@ def test_engine_component_model_latency():
h = engine.submit(msg)
engine.wait(h)
_, trace = engine.get_completion(h)
# Verify positive latency; exact value depends on path through xbar_top
assert trace["total_ns"] > 0
+8 -11
View File
@@ -1,18 +1,15 @@
"""Tests for #5+#6 CUBE NOC Router Mesh + Position-Aware XBAR.
Phase 1 verification: all tests FAIL until Phase 2 implements production code.
"""Tests for CUBE NOC Explicit Router Mesh (ADR-0019).
Key changes verified:
- Single NOC node per cube with internal router mesh simulation
- Auto-layout generates cube_mesh.yaml (6x6 grid for n_connections=4)
- Position-aware XBAR (top/bottom) replaces per-PE xbar chaining
- Explicit router nodes per cube from cube_mesh.yaml (6×6 grid)
- Auto-layout generates cube_mesh.yaml with PE/UCIe/M_CPU/SRAM attachments
- Mesh file caching with source_hash change detection
- Path routing: PE_DMA → NOC → XBAR_top/bot → HBM_CTRL
- Path routing: PE_DMA → router mesh → HBM_CTRL
Latency invariant after refactor:
Local HBM: PE_DMA → Router(overhead) → XBAR → HBM_CTRL
Cross-row: PE_DMA → Router → mesh traverse → Router → XBAR → bridge → XBAR → HBM_CTRL
Cross-cube: PE_DMA → Router → mesh → UCIe → ... → mesh → XBAR → HBM_CTRL
Latency invariant:
Local HBM: PE_DMA → Router(overhead) → HBM_CTRL
Cross-row: PE_DMA → Router → mesh hops → Router → HBM_CTRL
Cross-cube: PE_DMA → Router → mesh → UCIe → ... → mesh → HBM_CTRL
"""
import pytest
-2
View File
@@ -24,7 +24,6 @@ from kernbench.components.builtin import (
IoCpuComponent,
MCpuComponent,
PcieEpComponent,
PositionAwareXbarComponent,
SramComponent,
TransitComponent,
)
@@ -232,7 +231,6 @@ def test_m_cpu_terminal_no_ctx_completes():
("forwarding_v1", TransitComponent),
("noc_v1", TransitComponent),
("ucie_v1", TransitComponent),
("xbar_v1", PositionAwareXbarComponent),
("pcie_ep_v1", PcieEpComponent),
("io_cpu_v1", IoCpuComponent),
("m_cpu_v1", MCpuComponent),
+3 -3
View File
@@ -1,7 +1,7 @@
"""Tests for H2D writes and PE DMA probe latency invariants.
H2D tests use MemoryWriteMsg (pcie_ep → io_cpu → m_cpu → hbm_ctrl → response).
PE DMA tests use PeDmaMsg (direct pe_dma → xbar → hbm_ctrl injection).
PE DMA tests use PeDmaMsg (direct pe_dma → router mesh → hbm_ctrl injection).
"""
from pathlib import Path
@@ -118,7 +118,7 @@ def test_h2d_local_cube_cut_through():
"""H2D to local cube with cut-through should be < 50ns for 4096B.
Full command path: pcie_ep → io_cpu → ucie → noc → m_cpu
DMA: m_cpu → noc → xbar → hbm_ctrl (drain once at terminal)
DMA: m_cpu → router mesh → hbm_ctrl (drain once at terminal)
Plus response path back.
With store-and-forward each hop would serialize; cut-through keeps it low.
"""
@@ -205,7 +205,7 @@ def test_pe_dma_local_bottleneck_hbm():
def test_pe_dma_same_half_bottleneck_hbm():
"""PE DMA pe0→slice1 (same half via xbar_top): bottleneck = HBM effective BW."""
"""PE DMA pe0→pe1 HBM (same row via router mesh): bottleneck = HBM effective BW."""
bn = _pe_dma_bottleneck(src_cube=0, src_pe=0, dst_pe=1)
expected = _hbm_effective_bw()
assert bn == expected, f"Same-half PE DMA bottleneck {bn}, expected {expected}"
+6 -6
View File
@@ -158,9 +158,9 @@ def test_pe_dma_to_router():
def test_command_path_m_cpu_router_pe_cpu():
es = _edge_set(_graph())
cp = "sip0.cube0"
# m_cpu <-> r0c2 (bidirectional command)
assert (f"{cp}.m_cpu", f"{cp}.r0c2") in es
assert (f"{cp}.r0c2", f"{cp}.m_cpu") in es
# m_cpu <-> r1c2 (bidirectional command)
assert (f"{cp}.m_cpu", f"{cp}.r1c2") in es
assert (f"{cp}.r1c2", f"{cp}.m_cpu") in es
# router -> pe_cpu for each PE (command kind)
assert (f"{cp}.r0c0", f"{cp}.pe0.pe_cpu") in es
assert (f"{cp}.r5c5", f"{cp}.pe7.pe_cpu") in es
@@ -416,8 +416,8 @@ def test_cube_view_hbm_router():
def test_cube_view_m_cpu_router():
"""Cube view: m_cpu connects to its router r0c2."""
"""Cube view: m_cpu connects to its router r1c2."""
v = _graph().cube_view
ves = {(e.src, e.dst) for e in v.edges}
assert ("m_cpu", "r0c2") in ves
assert ("r0c2", "m_cpu") in ves
assert ("m_cpu", "r1c2") in ves
assert ("r1c2", "m_cpu") in ves