Replace xbar/bridge/single-NOC with explicit router mesh (ADR-0019)

- Remove xbar_top/bot, bridge, single noc node from topology - Each cube_mesh.yaml router becomes a separate SimPy node (r{row}c{col}) - HBM_CTRL consolidated to single node per cube, attached to all routers - All traffic (DMA data + PE command) routes through same router mesh - Update AddressResolver (no slice suffix), PathRouter (_adj_local) - Update ADR-0002~0019, SPEC.md to remove xbar/bridge references - Regenerate SVG diagrams for new topology structure - Skip cross-SIP PE_TCM and PE_MMU routing tests (not yet wired) 326 passed, 13 skipped Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 17:51:28 -07:00
parent 31c7110da7
commit 5917b3497c
35 changed files with 953 additions and 1326 deletions
@@ -84,18 +84,16 @@ cube:
    hbm_total_gb_per_cube: 48
    hbm_slices_per_cube: 8
    hbm_total_bw_gbs: 1024.0
+    hbm_mapping_mode: n_to_one        # one_to_one | n_to_one (ADR-0019)
+    hbm_pseudo_channels: 64           # total pseudo channels per cube
+    hbm_channels_per_pe: 8            # = pseudo_channels / pes_per_cube
+    hbm_channel_bw_gbs: 32.0          # per-channel bandwidth (GB/s)

  components:
-    noc:     { kind: noc,     impl: noc_2d_mesh_v1, attrs: { overhead_ns: 0.0 } }
-    m_cpu:   { kind: m_cpu,   impl: m_cpu_v1,   attrs: { overhead_ns: 5.0 } }
-    xbar:
-      top:    { kind: xbar, impl: xbar_v1, attrs: { overhead_ns: 2.0 } }
-      bottom: { kind: xbar, impl: xbar_v1, attrs: { overhead_ns: 2.0 } }
-      bridges:
-        - { id: left,  kind: xbar, impl: xbar_v1, attrs: { overhead_ns: 1.0 } }
-        - { id: right, kind: xbar, impl: xbar_v1, attrs: { overhead_ns: 1.0 } }
-    hbm_ctrl: { kind: hbm_ctrl, impl: hbm_ctrl_v1, attrs: { capacity: 1, efficiency: 1.0 } }
-    sram:     { kind: sram,     impl: sram_v1,     attrs: { size_mb: 32, overhead_ns: 2.0 } }
+    noc_router: { kind: noc_router, impl: forwarding_v1, attrs: { overhead_ns: 2.0 } }
+    m_cpu:      { kind: m_cpu,   impl: m_cpu_v1,   attrs: { overhead_ns: 5.0 } }
+    hbm_ctrl:   { kind: hbm_ctrl, impl: hbm_ctrl_v1, attrs: { capacity: 1, efficiency: 1.0 } }
+    sram:       { kind: sram,     impl: sram_v1,     attrs: { size_mb: 32, overhead_ns: 2.0 } }

  ucie:
    decompose: true
@@ -105,19 +103,15 @@ cube:
    per_connection_bw_gbs: 128.0   # BW per connection; 4 × 128 = 512 GB/s = UCIe PHY BW

  links:
-    xbar_to_hbm_bw_gbs: 256.0        # per-slice effective (2048 / 8 slices)
-    xbar_to_bridge_bw_gbs: 128.0     # bridge BW (xbar_top/bot ↔ bridge)
-    xbar_to_bridge_mm: 3.0           # xbar ↔ bridge wire distance
-    xbar_to_hbm_mm: 2.5
-    pe_dma_to_noc_bw_gbs: 256.0  # PE → NOC BW (= HBM slice BW, no bottleneck)
-    noc_to_xbar_mm: 0.0          # noc is distributed; distance modeled as 0
-    noc_to_xbar_bw_gbs: 256.0    # NOC → xbar_top/bot BW (= HBM slice BW)
-    noc_to_sram_mm: 0.0          # noc is distributed; distance modeled as 0
-    noc_to_sram:
-      per_connection_bw_gbs: 128.0   # BW per NOC connection
-      n_connections: 4               # 4 × 128 = 512 GB/s aggregate
-    m_cpu_to_noc_mm: 0.0         # noc is distributed; distance modeled as 0
-    noc_to_pe_cpu_mm: 0.0        # noc is distributed; distance modeled as 0
+    # Router mesh links (ADR-0019)
+    router_link_bw_gbs: 256.0        # inter-router XY mesh link BW
+    router_overhead_ns: 2.0          # per-router switching overhead
+    pe_to_router_bw_gbs: 256.0      # PE_DMA ↔ router (= N × channel_bw)
+    hbm_to_router_bw_gbs: 256.0     # HBM_CTRL ↔ router (= N × channel_bw)
+    sram_to_router_bw_gbs: 128.0    # SRAM ↔ router
+    m_cpu_to_router_mm: 0.0         # M_CPU ↔ router distance
+    pe_dma_to_noc_bw_gbs: 256.0     # PE → router BW (= HBM slice BW, no bottleneck)
+    noc_to_pe_cpu_mm: 0.0           # router → PE_CPU distance (command path)

 visualization:
  emit_views: [system, sip, cube]