IPCQ-DMA co-design HW design doc + fix IPCQ slot BW model
Add hardware design document (docs/ipcq-dma-codesign-hw.md) covering PE_IPCQ high-level architecture, simulator verification, proposed HW implementation, and alternatives analysis. Include D2 block diagrams for baseline and proposed PE architectures. Fix IPCQ slot-memory bandwidth parameters to match topology.yaml: SRAM 128→512 GB/s (intrinsic BW, NoC-bottlenecked at 128), HBM 32→256 GB/s (was per-channel, now per-PE aggregate). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,157 @@
|
||||
direction: right
|
||||
|
||||
pe: PE {
|
||||
style.fill: "#f8f9fa"
|
||||
style.stroke: "#495057"
|
||||
style.border-radius: 8
|
||||
|
||||
cpu: PE_CPU (control) {
|
||||
style.fill: "#bbdefb"
|
||||
style.stroke: "#1565c0"
|
||||
style.border-radius: 4
|
||||
}
|
||||
|
||||
sched: PE_SCHED (dispatch) {
|
||||
style.fill: "#bbdefb"
|
||||
style.stroke: "#1565c0"
|
||||
style.border-radius: 4
|
||||
}
|
||||
|
||||
ipcq_added: IPCQ (added) {
|
||||
style.fill: "#e1f5fe"
|
||||
style.stroke: "#0277bd"
|
||||
style.stroke-dash: 5
|
||||
style.stroke-width: 2
|
||||
style.border-radius: 6
|
||||
|
||||
ipcq: PE_IPCQ (control plane) {
|
||||
style.fill: "#bbdefb"
|
||||
style.stroke: "#1565c0"
|
||||
style.border-radius: 4
|
||||
}
|
||||
}
|
||||
|
||||
dma: PE_DMA (single FIFO inbox) {
|
||||
style.fill: "#fff3e0"
|
||||
style.stroke: "#e65100"
|
||||
style.border-radius: 6
|
||||
}
|
||||
|
||||
fs: PE_FETCH_STORE {
|
||||
style.fill: "#c8e6c9"
|
||||
style.stroke: "#2e7d32"
|
||||
style.border-radius: 4
|
||||
}
|
||||
|
||||
tcm: TCM (16MB SRAM) {
|
||||
style.fill: "#fce4ec"
|
||||
style.stroke: "#c62828"
|
||||
style.border-radius: 6
|
||||
|
||||
ipcq_slot: IPCQ Slot Region {
|
||||
style.stroke-dash: 5
|
||||
style.fill: "#ffcdd2"
|
||||
style.stroke: "#c62828"
|
||||
style.border-radius: 3
|
||||
}
|
||||
}
|
||||
|
||||
gemm: GEMM engine {
|
||||
style.fill: "#c8e6c9"
|
||||
style.stroke: "#2e7d32"
|
||||
style.border-radius: 4
|
||||
}
|
||||
|
||||
math: MATH engine {
|
||||
style.fill: "#c8e6c9"
|
||||
style.stroke: "#2e7d32"
|
||||
style.border-radius: 4
|
||||
}
|
||||
|
||||
fport: Fabric Port {
|
||||
style.fill: "#ffe0b2"
|
||||
style.stroke: "#e65100"
|
||||
style.border-radius: 4
|
||||
}
|
||||
|
||||
# Control → dispatch
|
||||
cpu -> sched: cmd dispatch
|
||||
cpu -> ipcq_added.ipcq: IpcqRequest
|
||||
|
||||
# Compute pipeline
|
||||
sched -> dma: TileToken\n(compute port)
|
||||
dma -> fs: TileToken
|
||||
dma <-> tcm: DMA_READ/WRITE\n(HBM ↔ TCM)
|
||||
fs <-> tcm: fetch/store\n(TCM ↔ reg)
|
||||
fs -> gemm: TileToken
|
||||
fs -> math: TileToken
|
||||
gemm -> fs: TileToken
|
||||
math -> fs: TileToken
|
||||
|
||||
# IPCQ data path — outbound
|
||||
ipcq_added.ipcq -> dma: IpcqDmaToken\n(IPCQ port) {style.stroke: "#1565c0"}
|
||||
|
||||
# IPCQ data path — inbound (MetaArrival: DMA → IPCQ)
|
||||
dma -> ipcq_added.ipcq: IpcqMetaArrival {style.stroke: "#1565c0"}
|
||||
|
||||
# Credit return (dashed)
|
||||
ipcq_added.ipcq -> dma: IpcqCreditMetadata\n(NoC latency charged) {
|
||||
style.stroke: "#7b1fa2"
|
||||
style.stroke-dash: 5
|
||||
}
|
||||
|
||||
# DMA ↔ Fabric
|
||||
dma <-> fport
|
||||
}
|
||||
|
||||
# ── NoC Router + attached resources ──
|
||||
|
||||
noc: NoC Router {
|
||||
style.fill: "#f3e5f5"
|
||||
style.stroke: "#6a1b9a"
|
||||
style.border-radius: 6
|
||||
}
|
||||
|
||||
hbm: Local HBM {
|
||||
style.fill: "#e8eaf6"
|
||||
style.stroke: "#283593"
|
||||
style.border-radius: 6
|
||||
|
||||
ipcq_slot_hbm: IPCQ Slot Region {
|
||||
style.stroke-dash: 5
|
||||
style.fill: "#c5cae9"
|
||||
style.stroke: "#283593"
|
||||
style.border-radius: 3
|
||||
}
|
||||
}
|
||||
|
||||
sram: Cube SRAM {
|
||||
style.fill: "#e0f7fa"
|
||||
style.stroke: "#00695c"
|
||||
style.border-radius: 6
|
||||
|
||||
ipcq_slot_sram: IPCQ Slot Region {
|
||||
style.stroke-dash: 5
|
||||
style.fill: "#b2dfdb"
|
||||
style.stroke: "#00695c"
|
||||
style.border-radius: 3
|
||||
}
|
||||
}
|
||||
|
||||
other_pe: Other PEs {
|
||||
style.fill: "#ede7f6"
|
||||
style.stroke: "#6a1b9a"
|
||||
style.border-radius: 6
|
||||
}
|
||||
|
||||
other_cube: Other Cubes / SIPs {
|
||||
style.fill: "#ede7f6"
|
||||
style.stroke: "#6a1b9a"
|
||||
style.border-radius: 6
|
||||
}
|
||||
|
||||
pe.fport <-> noc
|
||||
noc <-> hbm
|
||||
noc <-> sram
|
||||
noc <-> other_pe
|
||||
noc <-> other_cube
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 1014 KiB |
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 44 KiB |
@@ -0,0 +1,166 @@
|
||||
direction: right
|
||||
|
||||
pe: PE {
|
||||
style.fill: "#f8f9fa"
|
||||
style.stroke: "#495057"
|
||||
style.border-radius: 8
|
||||
|
||||
cpu: PE_CPU (control) {
|
||||
style.fill: "#bbdefb"
|
||||
style.stroke: "#1565c0"
|
||||
style.border-radius: 4
|
||||
}
|
||||
|
||||
sched: PE_SCHED (dispatch) {
|
||||
style.fill: "#bbdefb"
|
||||
style.stroke: "#1565c0"
|
||||
style.border-radius: 4
|
||||
}
|
||||
|
||||
ipcq: IPCQ Controller (NEW) {
|
||||
style.fill: "#e1f5fe"
|
||||
style.stroke: "#0277bd"
|
||||
style.border-radius: 6
|
||||
style.stroke-width: 2
|
||||
|
||||
ptrmgmt: Pointer Mgmt {
|
||||
style.fill: "#b3e5fc"
|
||||
style.stroke: "#0277bd"
|
||||
style.border-radius: 4
|
||||
|
||||
qprf: QPair Reg File
|
||||
bp: Backpressure
|
||||
sag: Slot Addr Gen
|
||||
}
|
||||
|
||||
sideband: Sideband {
|
||||
style.fill: "#b3e5fc"
|
||||
style.stroke: "#0277bd"
|
||||
style.border-radius: 4
|
||||
|
||||
metax: Meta Extractor
|
||||
crinj: Credit Injector
|
||||
crrcv: Credit Receiver
|
||||
}
|
||||
}
|
||||
|
||||
dma: PE_DMA (MOD) {
|
||||
style.fill: "#fff3e0"
|
||||
style.stroke: "#e65100"
|
||||
style.border-radius: 6
|
||||
|
||||
compute_port: compute port {
|
||||
style.fill: "#ffe0b2"
|
||||
style.stroke: "#e65100"
|
||||
style.border-radius: 4
|
||||
}
|
||||
ipcq_port: IPCQ port {
|
||||
style.fill: "#ffe0b2"
|
||||
style.stroke: "#e65100"
|
||||
style.border-radius: 4
|
||||
}
|
||||
wrr: WRR Arbiter (NEW) {
|
||||
style.fill: "#ffcc80"
|
||||
style.stroke: "#e65100"
|
||||
style.border-radius: 4
|
||||
style.stroke-width: 2
|
||||
}
|
||||
|
||||
compute_port -> wrr
|
||||
ipcq_port -> wrr
|
||||
}
|
||||
|
||||
fs: PE_FETCH_STORE {
|
||||
style.fill: "#c8e6c9"
|
||||
style.stroke: "#2e7d32"
|
||||
style.border-radius: 4
|
||||
}
|
||||
|
||||
tcm: TCM (16MB SRAM) {
|
||||
style.fill: "#fce4ec"
|
||||
style.stroke: "#c62828"
|
||||
style.border-radius: 6
|
||||
|
||||
work: Kernel Working Memory {
|
||||
style.fill: "#f8bbd0"
|
||||
style.stroke: "#c62828"
|
||||
style.border-radius: 4
|
||||
}
|
||||
slot: IPCQ Slot Region (rsv) {
|
||||
style.fill: "#f48fb1"
|
||||
style.stroke: "#c62828"
|
||||
style.border-radius: 4
|
||||
style.stroke-width: 2
|
||||
}
|
||||
}
|
||||
|
||||
gemm: GEMM engine {
|
||||
style.fill: "#c8e6c9"
|
||||
style.stroke: "#2e7d32"
|
||||
style.border-radius: 4
|
||||
}
|
||||
|
||||
math: MATH engine {
|
||||
style.fill: "#c8e6c9"
|
||||
style.stroke: "#2e7d32"
|
||||
style.border-radius: 4
|
||||
}
|
||||
|
||||
fport: Fabric Port {
|
||||
style.fill: "#ffe0b2"
|
||||
style.stroke: "#e65100"
|
||||
style.border-radius: 4
|
||||
}
|
||||
|
||||
# Control
|
||||
cpu -> sched: cmd dispatch
|
||||
cpu -> ipcq: MMIO
|
||||
|
||||
# Compute pipeline
|
||||
sched -> dma.compute_port: TileToken
|
||||
dma -> fs: TileToken
|
||||
dma <-> tcm.work: DMA_READ/WRITE\n(HBM ↔ TCM)
|
||||
fs <-> tcm.work: fetch/store\n(TCM ↔ reg)
|
||||
fs -> gemm: TileToken
|
||||
fs -> math: TileToken
|
||||
gemm -> fs: TileToken
|
||||
math -> fs: TileToken
|
||||
|
||||
# IPCQ data path
|
||||
ipcq -> dma.ipcq_port: IpcqDmaToken {style.stroke: "#0277bd"}
|
||||
dma -> ipcq.sideband.metax: IpcqMetaArrival {style.stroke: "#0277bd"}
|
||||
|
||||
# IPCQ slot R/W
|
||||
dma <-> tcm.slot: slot read/write {
|
||||
style.stroke: "#0277bd"
|
||||
style.stroke-dash: 3
|
||||
}
|
||||
|
||||
# Credit via fabric port
|
||||
ipcq.sideband.crinj -> fport: credit out (16B) {
|
||||
style.stroke: "#7b1fa2"
|
||||
style.stroke-dash: 5
|
||||
}
|
||||
fport -> ipcq.sideband.crrcv: credit in (16B) {
|
||||
style.stroke: "#7b1fa2"
|
||||
style.stroke-dash: 5
|
||||
}
|
||||
|
||||
# DMA ↔ Fabric
|
||||
dma.wrr <-> fport
|
||||
}
|
||||
|
||||
noc: NoC Router {
|
||||
style.fill: "#f3e5f5"
|
||||
style.stroke: "#6a1b9a"
|
||||
style.border-radius: 6
|
||||
}
|
||||
|
||||
ext: Other PEs / Cubes / SIPs {
|
||||
style.fill: "#ede7f6"
|
||||
style.stroke: "#6a1b9a"
|
||||
style.border-radius: 6
|
||||
}
|
||||
|
||||
pe.fport <-> noc
|
||||
noc <-> ext
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 836 KiB |
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 48 KiB |
Reference in New Issue
Block a user