commit - release 1
This commit is contained in:
+126
@@ -0,0 +1,126 @@
|
||||
|
||||
system:
|
||||
ns_per_mm: 0.01 # wire propagation delay: 10 ps/mm (on-chip silicon)
|
||||
|
||||
sips:
|
||||
count: 2
|
||||
|
||||
components:
|
||||
switch: { kind: switch, impl: switch_v1, attrs: { overhead_ns: 5.0 } }
|
||||
|
||||
links:
|
||||
io_ep_to_switch:
|
||||
kind: pcie
|
||||
bw_gbs_per_ep: 768.0
|
||||
distance_mm: 20.0
|
||||
|
||||
sip:
|
||||
cube_mesh: { w: 4, h: 4 }
|
||||
|
||||
iochiplet:
|
||||
components:
|
||||
pcie_ep: { kind: pcie_ep, impl: pcie_ep_v1, attrs: { overhead_ns: 5.0 } }
|
||||
io_cpu: { kind: io_cpu, impl: io_cpu_v1, attrs: { overhead_ns: 10.0 } }
|
||||
links:
|
||||
pcie_ep_to_io_cpu_bw_gbs: 256.0 # matches system.links.io_ep_to_switch.bw_gbs_per_ep
|
||||
pcie_ep_to_io_cpu_mm: 1.0
|
||||
io_cpu_to_ucie_bw_gbs: 512.0 # matches ucie.phy_bw_gbs per PHY
|
||||
io_cpu_to_ucie_mm: 1.5
|
||||
instances:
|
||||
- id: io0
|
||||
place: { side: N, offset_norm: 0.5 }
|
||||
ucie: { phy_bw_gbs: 512.0, phys: [P0, P1, P2, P3] }
|
||||
cube_ports:
|
||||
- { cube: {xy: [0,0]}, cube_side: N, phy: P0, distance_mm: 2.0 }
|
||||
- { cube: {xy: [1,0]}, cube_side: N, phy: P1, distance_mm: 2.0 }
|
||||
- { cube: {xy: [2,0]}, cube_side: N, phy: P2, distance_mm: 2.0 }
|
||||
- { cube: {xy: [3,0]}, cube_side: N, phy: P3, distance_mm: 2.0 }
|
||||
|
||||
links:
|
||||
inter_cube_mesh:
|
||||
bw_gbs_per_ucie_phy: 512.0
|
||||
distance_mm_across_seam: 1.0
|
||||
routing: { algo: xy }
|
||||
|
||||
cube:
|
||||
geometry:
|
||||
cube_mm: { w: 17.0, h: 14.0 }
|
||||
hbm_mm: { w: 9.0, h: 5.0 }
|
||||
ucie_mm: { size: 2.0 }
|
||||
|
||||
pe_layout:
|
||||
corners: [NW, NE, SW, SE] # N corners → xbar top row; S corners → xbar bottom row
|
||||
pe_per_corner: 2 # total PEs per cube: 4 * 2 = 8
|
||||
|
||||
pe_template:
|
||||
components:
|
||||
pe_cpu: { kind: pe_cpu, impl: pe_cpu_v1, attrs: { overhead_ns: 2.0 } }
|
||||
pe_scheduler: { kind: pe_scheduler, impl: pe_scheduler_v1, attrs: { overhead_ns: 1.0 } }
|
||||
pe_dma: { kind: pe_dma, impl: pe_dma_v1, attrs: { rd_engines: 1, wr_engines: 1 } }
|
||||
pe_gemm: { kind: pe_gemm, impl: pe_gemm_v1, attrs: { overhead_ns: 0.0, shared_resource: accel_slot, peak_tflops_f16: 8.0 } }
|
||||
pe_math: { kind: pe_math, impl: pe_math_v1, attrs: { overhead_ns: 0.0, shared_resource: accel_slot } }
|
||||
pe_tcm: { kind: pe_tcm, impl: pe_tcm_v1, attrs:
|
||||
{ size_mb: 16 } }
|
||||
links:
|
||||
pe_cpu_to_scheduler_mm: 0.5
|
||||
scheduler_to_dma_mm: 0.5
|
||||
scheduler_to_gemm_mm: 0.5
|
||||
scheduler_to_math_mm: 0.5
|
||||
dma_to_tcm_bw_gbs: 512.0
|
||||
dma_to_tcm_mm: 0.5
|
||||
gemm_to_tcm_bw_gbs: 512.0 # GEMM reads inputs from TCM (ADR-0014 D5)
|
||||
gemm_to_tcm_mm: 0.5
|
||||
math_to_tcm_bw_gbs: 512.0
|
||||
math_to_tcm_mm: 0.5
|
||||
|
||||
memory_map:
|
||||
hbm_total_gb_per_cube: 48
|
||||
hbm_slices_per_cube: 8
|
||||
hbm_total_bw_gbs: 1024.0
|
||||
|
||||
components:
|
||||
noc: { kind: noc, impl: noc_2d_mesh_v1, attrs: { overhead_ns: 0.0 } }
|
||||
m_cpu: { kind: m_cpu, impl: m_cpu_v1, attrs: { overhead_ns: 5.0 } }
|
||||
xbar:
|
||||
pe: { kind: xbar, impl: xbar_v1, attrs: { overhead_ns: 2.0 } }
|
||||
bridges:
|
||||
- { id: left, kind: xbar, impl: xbar_v1, attrs: { overhead_ns: 1.0 } }
|
||||
- { id: right, kind: xbar, impl: xbar_v1, attrs: { overhead_ns: 1.0 } }
|
||||
hbm_ctrl: { kind: hbm_ctrl, impl: hbm_ctrl_v1, attrs: { capacity: 1 } }
|
||||
sram: { kind: sram, impl: sram_v1, attrs: { size_mb: 32, overhead_ns: 2.0 } }
|
||||
|
||||
ucie:
|
||||
decompose: true
|
||||
ports: [N, S, E, W]
|
||||
overhead_ns: 1.0
|
||||
|
||||
links:
|
||||
pe_to_xbar_bw_gbs: 256.0 # per-PE effective (2048 / 8 PEs)
|
||||
xbar_to_hbm_bw_gbs: 256.0 # per-PE effective (2048 / 8 PEs)
|
||||
xbar_to_bridge_bw_gbs: 128.0 # bridge BW (same as xbar chain BW)
|
||||
xbar_x_bw_gbs: 128.0 # X-direction BW for xbar chain traversal
|
||||
xbar_chain_intra_corner_mm: 2.0 # xbar wire distance within same corner PE pair
|
||||
xbar_chain_inter_corner_mm: 10.0 # xbar wire distance between corner pairs (NW↔NE, SW↔SE)
|
||||
xbar_row_n_to_bridge_mm: 3.0
|
||||
xbar_row_s_to_bridge_mm: 3.0
|
||||
xbar_to_hbm_mm: 2.5
|
||||
pe_to_xbar_row_n_mm: 6.0
|
||||
pe_to_xbar_row_s_mm: 6.0
|
||||
pe_dma_to_noc_mm: 0.0 # noc is distributed; distance modeled as 0
|
||||
pe_dma_to_noc_bw_gbs: 512.0 # PE non-HBM data path BW
|
||||
noc_to_xbar:
|
||||
per_connection_bw_gbs: 128.0 # BW per NOC connection
|
||||
noc_to_sram_mm: 0.0 # noc is distributed; distance modeled as 0
|
||||
noc_to_sram:
|
||||
per_connection_bw_gbs: 128.0 # BW per NOC connection
|
||||
n_connections: 4 # 4 × 128 = 512 GB/s aggregate
|
||||
noc_to_ucie:
|
||||
per_connection_bw_gbs: 128.0 # BW per NOC connection
|
||||
n_connections: 4 # 4 × 128 = 512 GB/s = UCIe PHY BW
|
||||
m_cpu_to_noc_mm: 0.0 # noc is distributed; distance modeled as 0
|
||||
noc_to_pe_cpu_mm: 0.0 # noc is distributed; distance modeled as 0
|
||||
|
||||
visualization:
|
||||
emit_views: [system, sip, cube]
|
||||
sip_ids: [0]
|
||||
cubes: [0, 9, 15]
|
||||
Reference in New Issue
Block a user