Remove xbar/noc remnants, rule-based cube-view connectors

- Delete xbar.py and noc.py (TwoDMeshNocComponent) — unused since router mesh - Remove xbar_v1/noc_2d_mesh_v1 from components.yaml - Fix pe_to_xbar → pe_to_router in routing exclusion set - Fix xbar_to_hbm_bw_gbs → hbm_to_router_bw_gbs in report.py - Update all docstrings/comments referencing xbar/bridge → router mesh - Cube-view connectors: rule-based _connector_points helper - PE↔router: single diagonal line (not chevron) - UCIe N/S: 45°→horizontal→45° - UCIe E/W: 45°→vertical→45° - HBM ports: 45°→horizontal→45° Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 23:59:12 -07:00
parent 7640635f90
commit eb792e6212
17 changed files with 163 additions and 571 deletions
@@ -37,7 +37,7 @@ def _hbm_pa(pe_id: int = 0) -> int:


 def _node(impl: str, overhead_ns: float = 0.0) -> Node:
-    return Node(id="test", kind="xbar", impl=impl, attrs={"overhead_ns": overhead_ns}, pos_mm=None)
+    return Node(id="test", kind="noc_router", impl=impl, attrs={"overhead_ns": overhead_ns}, pos_mm=None)


 # ── 1. unknown impl → error ──────────────────────────────────────────
@@ -55,7 +55,7 @@ def test_registry_unknown_impl_raises_error():

 def test_transit_component_yields_overhead_ns():
    """TransitComponent.run() yields exactly node.attrs['overhead_ns'] ns."""
-    node = _node("xbar_v1", overhead_ns=3.0)
+    node = _node("forwarding_v1", overhead_ns=3.0)
    comp = TransitComponent(node)
    env = simpy.Environment()

@@ -119,10 +119,9 @@ def test_engine_component_model_latency():
    """MemoryRead D2H latency for local cube0 (4096B).

    Bypass path (m_cpu bypass): pcie_ep → io_noc → conn → io_ucie → cube_ucie
-    → conn → noc → xbar_top → hbm_ctrl.slice0
+    → conn → router mesh → hbm_ctrl

-    Path goes through xbar_top (overhead_ns=2.0) instead of per-PE xbar.
-    Latency must be positive and reasonable.
+    Path goes through router mesh. Latency must be positive and reasonable.
    """
    graph = _graph()
    engine = GraphEngine(graph)
@@ -134,7 +133,6 @@ def test_engine_component_model_latency():
    h = engine.submit(msg)
    engine.wait(h)
    _, trace = engine.get_completion(h)
-    # Verify positive latency; exact value depends on path through xbar_top
    assert trace["total_ns"] > 0


@@ -1,18 +1,15 @@
-"""Tests for #5+#6 CUBE NOC Router Mesh + Position-Aware XBAR.
-
-Phase 1 verification: all tests FAIL until Phase 2 implements production code.
+"""Tests for CUBE NOC Explicit Router Mesh (ADR-0019).

 Key changes verified:
-  - Single NOC node per cube with internal router mesh simulation
-  - Auto-layout generates cube_mesh.yaml (6x6 grid for n_connections=4)
-  - Position-aware XBAR (top/bottom) replaces per-PE xbar chaining
+  - Explicit router nodes per cube from cube_mesh.yaml (6×6 grid)
+  - Auto-layout generates cube_mesh.yaml with PE/UCIe/M_CPU/SRAM attachments
  - Mesh file caching with source_hash change detection
-  - Path routing: PE_DMA → NOC → XBAR_top/bot → HBM_CTRL
+  - Path routing: PE_DMA → router mesh → HBM_CTRL

-Latency invariant after refactor:
-  Local HBM: PE_DMA → Router(overhead) → XBAR → HBM_CTRL
-  Cross-row:  PE_DMA → Router → mesh traverse → Router → XBAR → bridge → XBAR → HBM_CTRL
-  Cross-cube: PE_DMA → Router → mesh → UCIe → ... → mesh → XBAR → HBM_CTRL
+Latency invariant:
+  Local HBM:  PE_DMA → Router(overhead) → HBM_CTRL
+  Cross-row:  PE_DMA → Router → mesh hops → Router → HBM_CTRL
+  Cross-cube: PE_DMA → Router → mesh → UCIe → ... → mesh → HBM_CTRL
 """

 import pytest
@@ -24,7 +24,6 @@ from kernbench.components.builtin import (
    IoCpuComponent,
    MCpuComponent,
    PcieEpComponent,
-    PositionAwareXbarComponent,
    SramComponent,
    TransitComponent,
 )
@@ -232,7 +231,6 @@ def test_m_cpu_terminal_no_ctx_completes():
    ("forwarding_v1", TransitComponent),
    ("noc_v1",        TransitComponent),
    ("ucie_v1",       TransitComponent),
-    ("xbar_v1",       PositionAwareXbarComponent),
    ("pcie_ep_v1",    PcieEpComponent),
    ("io_cpu_v1",     IoCpuComponent),
    ("m_cpu_v1",      MCpuComponent),
@@ -1,7 +1,7 @@
 """Tests for H2D writes and PE DMA probe latency invariants.

 H2D tests use MemoryWriteMsg (pcie_ep → io_cpu → m_cpu → hbm_ctrl → response).
-PE DMA tests use PeDmaMsg (direct pe_dma → xbar → hbm_ctrl injection).
+PE DMA tests use PeDmaMsg (direct pe_dma → router mesh → hbm_ctrl injection).
 """
 from pathlib import Path

@@ -118,7 +118,7 @@ def test_h2d_local_cube_cut_through():
    """H2D to local cube with cut-through should be < 50ns for 4096B.

    Full command path: pcie_ep → io_cpu → ucie → noc → m_cpu
-    DMA: m_cpu → noc → xbar → hbm_ctrl (drain once at terminal)
+    DMA: m_cpu → router mesh → hbm_ctrl (drain once at terminal)
    Plus response path back.
    With store-and-forward each hop would serialize; cut-through keeps it low.
    """
@@ -205,7 +205,7 @@ def test_pe_dma_local_bottleneck_hbm():


 def test_pe_dma_same_half_bottleneck_hbm():
-    """PE DMA pe0→slice1 (same half via xbar_top): bottleneck = HBM effective BW."""
+    """PE DMA pe0→pe1 HBM (same row via router mesh): bottleneck = HBM effective BW."""
    bn = _pe_dma_bottleneck(src_cube=0, src_pe=0, dst_pe=1)
    expected = _hbm_effective_bw()
    assert bn == expected, f"Same-half PE DMA bottleneck {bn}, expected {expected}"
@@ -158,9 +158,9 @@ def test_pe_dma_to_router():
 def test_command_path_m_cpu_router_pe_cpu():
    es = _edge_set(_graph())
    cp = "sip0.cube0"
-    # m_cpu <-> r0c2 (bidirectional command)
-    assert (f"{cp}.m_cpu", f"{cp}.r0c2") in es
-    assert (f"{cp}.r0c2", f"{cp}.m_cpu") in es
+    # m_cpu <-> r1c2 (bidirectional command)
+    assert (f"{cp}.m_cpu", f"{cp}.r1c2") in es
+    assert (f"{cp}.r1c2", f"{cp}.m_cpu") in es
    # router -> pe_cpu for each PE (command kind)
    assert (f"{cp}.r0c0", f"{cp}.pe0.pe_cpu") in es
    assert (f"{cp}.r5c5", f"{cp}.pe7.pe_cpu") in es
@@ -416,8 +416,8 @@ def test_cube_view_hbm_router():


 def test_cube_view_m_cpu_router():
-    """Cube view: m_cpu connects to its router r0c2."""
+    """Cube view: m_cpu connects to its router r1c2."""
    v = _graph().cube_view
    ves = {(e.src, e.dst) for e in v.edges}
-    assert ("m_cpu", "r0c2") in ves
-    assert ("r0c2", "m_cpu") in ves
+    assert ("m_cpu", "r1c2") in ves
+    assert ("r1c2", "m_cpu") in ves