diff --git a/docs/diagrams/allreduce_latency_plots/buffer_kind_sweep.csv b/docs/diagrams/allreduce_latency_plots/buffer_kind_sweep.csv index 2496d4e..94cf35a 100644 --- a/docs/diagrams/allreduce_latency_plots/buffer_kind_sweep.csv +++ b/docs/diagrams/allreduce_latency_plots/buffer_kind_sweep.csv @@ -1,13 +1,13 @@ -buffer_kind,sip_topology,n_sips,n_elem,bytes_per_pe,latency_ns -hbm,torus_2d,6,128,256,1858.0399999999827 -hbm,torus_2d,6,1024,2048,2389.0399999999827 -hbm,torus_2d,6,8192,16384,6673.039999999986 -hbm,torus_2d,6,32768,65536,21361.03999999992 -sram,torus_2d,6,128,256,1774.0399999999827 -sram,torus_2d,6,1024,2048,2389.0399999999827 -sram,torus_2d,6,8192,16384,7345.039999999986 -sram,torus_2d,6,32768,65536,24337.039999999935 -tcm,torus_2d,6,128,256,1678.0399999999827 -tcm,torus_2d,6,1024,2048,1957.0399999999827 -tcm,torus_2d,6,8192,16384,4225.039999999986 -tcm,torus_2d,6,32768,65536,12001.03999999992 +buffer_kind,sip_topology,n_sips,n_elem,bytes_per_pe,latency_ns +hbm,torus_2d,6,128,256,2144.0399999999754 +hbm,torus_2d,6,1024,2048,2908.74499999995 +hbm,torus_2d,6,8192,16384,8851.185000000081 +hbm,torus_2d,6,32768,65536,29225.265000008752 +sram,torus_2d,6,128,256,2060.0399999999754 +sram,torus_2d,6,1024,2048,2908.74499999995 +sram,torus_2d,6,8192,16384,9523.185000000081 +sram,torus_2d,6,32768,65536,32201.265000008752 +tcm,torus_2d,6,128,256,1964.0399999999754 +tcm,torus_2d,6,1024,2048,2476.74499999995 +tcm,torus_2d,6,8192,16384,6403.185000000081 +tcm,torus_2d,6,32768,65536,19865.265000008738 diff --git a/docs/diagrams/allreduce_latency_plots/buffer_kind_sweep.png b/docs/diagrams/allreduce_latency_plots/buffer_kind_sweep.png index 3eb62b8..c839bda 100644 Binary files a/docs/diagrams/allreduce_latency_plots/buffer_kind_sweep.png and b/docs/diagrams/allreduce_latency_plots/buffer_kind_sweep.png differ diff --git a/docs/diagrams/allreduce_latency_plots/mesh_2d_no_wrap.png b/docs/diagrams/allreduce_latency_plots/mesh_2d_no_wrap.png index 33ae4ea..cbdebcb 100644 Binary files a/docs/diagrams/allreduce_latency_plots/mesh_2d_no_wrap.png and b/docs/diagrams/allreduce_latency_plots/mesh_2d_no_wrap.png differ diff --git a/docs/diagrams/allreduce_latency_plots/overview.png b/docs/diagrams/allreduce_latency_plots/overview.png index bad2afa..a985edd 100644 Binary files a/docs/diagrams/allreduce_latency_plots/overview.png and b/docs/diagrams/allreduce_latency_plots/overview.png differ diff --git a/docs/diagrams/allreduce_latency_plots/ring_1d.png b/docs/diagrams/allreduce_latency_plots/ring_1d.png index 6fcc8a5..8ab2cdb 100644 Binary files a/docs/diagrams/allreduce_latency_plots/ring_1d.png and b/docs/diagrams/allreduce_latency_plots/ring_1d.png differ diff --git a/docs/diagrams/allreduce_latency_plots/summary.csv b/docs/diagrams/allreduce_latency_plots/summary.csv index 2aa2778..7ea2508 100644 --- a/docs/diagrams/allreduce_latency_plots/summary.csv +++ b/docs/diagrams/allreduce_latency_plots/summary.csv @@ -1,37 +1,37 @@ -algorithm,sip_topology,n_sips,n_elem,bytes_per_pe,bytes_per_sip,latency_ns -intercube_allreduce,mesh_2d_no_wrap,6,8,16,256,2626.302499999998 -intercube_allreduce,mesh_2d_no_wrap,6,32,64,1024,2634.7399999999952 -intercube_allreduce,mesh_2d_no_wrap,6,64,128,2048,2645.9899999999925 -intercube_allreduce,mesh_2d_no_wrap,6,128,256,4096,2668.489999999987 -intercube_allreduce,mesh_2d_no_wrap,6,512,1024,16384,2812.489999999987 -intercube_allreduce,mesh_2d_no_wrap,6,1024,2048,32768,3010.489999999987 -intercube_allreduce,mesh_2d_no_wrap,6,2048,4096,65536,3406.489999999987 -intercube_allreduce,mesh_2d_no_wrap,6,4096,8192,131072,4198.489999999965 -intercube_allreduce,mesh_2d_no_wrap,6,8192,16384,262144,5782.489999999969 -intercube_allreduce,mesh_2d_no_wrap,6,16384,32768,524288,8950.489999999925 -intercube_allreduce,mesh_2d_no_wrap,6,32768,65536,1048576,15286.48999999986 -intercube_allreduce,mesh_2d_no_wrap,6,49152,98304,1572864,21622.489999999932 -intercube_allreduce,ring_1d,6,8,16,256,2302.9849999999933 -intercube_allreduce,ring_1d,6,32,64,1024,2310.8599999999906 -intercube_allreduce,ring_1d,6,64,128,2048,2321.359999999988 -intercube_allreduce,ring_1d,6,128,256,4096,2342.3599999999824 -intercube_allreduce,ring_1d,6,512,1024,16384,2479.3599999999824 -intercube_allreduce,ring_1d,6,1024,2048,32768,2669.3599999999824 -intercube_allreduce,ring_1d,6,2048,4096,65536,3049.3599999999824 -intercube_allreduce,ring_1d,6,4096,8192,131072,3809.3599999999715 -intercube_allreduce,ring_1d,6,8192,16384,262144,5329.359999999979 -intercube_allreduce,ring_1d,6,16384,32768,524288,8369.35999999992 -intercube_allreduce,ring_1d,6,32768,65536,1048576,14449.359999999899 -intercube_allreduce,ring_1d,6,49152,98304,1572864,20529.35999999997 -intercube_allreduce,torus_2d,6,8,16,256,1644.2899999999936 -intercube_allreduce,torus_2d,6,32,64,1024,1651.0399999999909 -intercube_allreduce,torus_2d,6,64,128,2048,1660.0399999999881 -intercube_allreduce,torus_2d,6,128,256,4096,1678.0399999999827 -intercube_allreduce,torus_2d,6,512,1024,16384,1795.0399999999827 -intercube_allreduce,torus_2d,6,1024,2048,32768,1957.0399999999827 -intercube_allreduce,torus_2d,6,2048,4096,65536,2281.0399999999827 -intercube_allreduce,torus_2d,6,4096,8192,131072,2929.039999999979 -intercube_allreduce,torus_2d,6,8192,16384,262144,4225.039999999986 -intercube_allreduce,torus_2d,6,16384,32768,524288,6817.039999999943 -intercube_allreduce,torus_2d,6,32768,65536,1048576,12001.03999999992 -intercube_allreduce,torus_2d,6,49152,98304,1572864,17185.039999999994 +algorithm,sip_topology,n_sips,n_elem,bytes_per_pe,bytes_per_sip,latency_ns +intercube_allreduce,mesh_2d_no_wrap,6,8,16,256,2666.5524999999725 +intercube_allreduce,mesh_2d_no_wrap,6,32,64,1024,2747.7399999999725 +intercube_allreduce,mesh_2d_no_wrap,6,64,128,2048,2855.98999999998 +intercube_allreduce,mesh_2d_no_wrap,6,128,256,4096,3072.4899999999725 +intercube_allreduce,mesh_2d_no_wrap,6,512,1024,16384,3336.579999999951 +intercube_allreduce,mesh_2d_no_wrap,6,1024,2048,32768,3707.49999999992 +intercube_allreduce,mesh_2d_no_wrap,6,2048,4096,65536,4449.339999999875 +intercube_allreduce,mesh_2d_no_wrap,6,4096,8192,131072,5933.020000000055 +intercube_allreduce,mesh_2d_no_wrap,6,8192,16384,262144,8900.380000000157 +intercube_allreduce,mesh_2d_no_wrap,6,16384,32768,524288,14835.099999997583 +intercube_allreduce,mesh_2d_no_wrap,6,32768,65536,1048576,26704.540000017492 +intercube_allreduce,mesh_2d_no_wrap,6,49152,98304,1572864,38573.980000026335 +intercube_allreduce,ring_1d,6,8,16,256,2365.2558333333036 +intercube_allreduce,ring_1d,6,32,64,1024,2436.9433333333036 +intercube_allreduce,ring_1d,6,64,128,2048,2532.526666666643 +intercube_allreduce,ring_1d,6,128,256,4096,2723.6933333333036 +intercube_allreduce,ring_1d,6,512,1024,16384,3042.0349999999544 +intercube_allreduce,ring_1d,6,1024,2048,32768,3390.201666666597 +intercube_allreduce,ring_1d,6,2048,4096,65536,4079.7349999998714 +intercube_allreduce,ring_1d,6,4096,8192,131072,5458.801666666721 +intercube_allreduce,ring_1d,6,8192,16384,262144,8216.93500000014 +intercube_allreduce,ring_1d,6,16384,32768,524288,13733.201666664638 +intercube_allreduce,ring_1d,6,32768,65536,1048576,24765.735000014545 +intercube_allreduce,ring_1d,6,49152,98304,1572864,35798.268333355256 +intercube_allreduce,torus_2d,6,8,16,256,1700.6024999999754 +intercube_allreduce,torus_2d,6,32,64,1024,1753.2899999999754 +intercube_allreduce,torus_2d,6,64,128,2048,1823.539999999979 +intercube_allreduce,torus_2d,6,128,256,4096,1964.0399999999754 +intercube_allreduce,torus_2d,6,512,1024,16384,2196.2849999999653 +intercube_allreduce,torus_2d,6,1024,2048,32768,2476.74499999995 +intercube_allreduce,torus_2d,6,2048,4096,65536,3037.664999999919 +intercube_allreduce,torus_2d,6,4096,8192,131072,4159.50500000003 +intercube_allreduce,torus_2d,6,8192,16384,262144,6403.185000000081 +intercube_allreduce,torus_2d,6,16384,32768,524288,10890.544999998769 +intercube_allreduce,torus_2d,6,32768,65536,1048576,19865.265000008738 +intercube_allreduce,torus_2d,6,49152,98304,1572864,28839.985000013185 diff --git a/docs/diagrams/allreduce_latency_plots/torus_2d.png b/docs/diagrams/allreduce_latency_plots/torus_2d.png index bccab92..5986608 100644 Binary files a/docs/diagrams/allreduce_latency_plots/torus_2d.png and b/docs/diagrams/allreduce_latency_plots/torus_2d.png differ diff --git a/docs/diagrams/ipcq_diagram_plots/ipcq_send_recv.png b/docs/diagrams/ipcq_diagram_plots/ipcq_send_recv.png index 99dbf11..3151932 100644 Binary files a/docs/diagrams/ipcq_diagram_plots/ipcq_send_recv.png and b/docs/diagrams/ipcq_diagram_plots/ipcq_send_recv.png differ diff --git a/docs/diagrams/ipcq_diagram_plots/ipcq_two_pe_dma.png b/docs/diagrams/ipcq_diagram_plots/ipcq_two_pe_dma.png index 78f2f42..c071a9e 100644 Binary files a/docs/diagrams/ipcq_diagram_plots/ipcq_two_pe_dma.png and b/docs/diagrams/ipcq_diagram_plots/ipcq_two_pe_dma.png differ diff --git a/docs/diagrams/pe2pe_latency_plots/h1_intra_horizontal.png b/docs/diagrams/pe2pe_latency_plots/h1_intra_horizontal.png index 848db8e..91efea1 100644 Binary files a/docs/diagrams/pe2pe_latency_plots/h1_intra_horizontal.png and b/docs/diagrams/pe2pe_latency_plots/h1_intra_horizontal.png differ diff --git a/docs/diagrams/pe2pe_latency_plots/h2_intra_vertical.png b/docs/diagrams/pe2pe_latency_plots/h2_intra_vertical.png index 396bb0d..af0c83e 100644 Binary files a/docs/diagrams/pe2pe_latency_plots/h2_intra_vertical.png and b/docs/diagrams/pe2pe_latency_plots/h2_intra_vertical.png differ diff --git a/docs/diagrams/pe2pe_latency_plots/h3_inter_cube_horizontal.png b/docs/diagrams/pe2pe_latency_plots/h3_inter_cube_horizontal.png index ab29c2f..66afdfb 100644 Binary files a/docs/diagrams/pe2pe_latency_plots/h3_inter_cube_horizontal.png and b/docs/diagrams/pe2pe_latency_plots/h3_inter_cube_horizontal.png differ diff --git a/docs/diagrams/pe2pe_latency_plots/h4_inter_cube_vertical.png b/docs/diagrams/pe2pe_latency_plots/h4_inter_cube_vertical.png index 2a8212d..618f72b 100644 Binary files a/docs/diagrams/pe2pe_latency_plots/h4_inter_cube_vertical.png and b/docs/diagrams/pe2pe_latency_plots/h4_inter_cube_vertical.png differ diff --git a/docs/diagrams/pe2pe_latency_plots/overview.png b/docs/diagrams/pe2pe_latency_plots/overview.png index 6193f96..5dc105e 100644 Binary files a/docs/diagrams/pe2pe_latency_plots/overview.png and b/docs/diagrams/pe2pe_latency_plots/overview.png differ diff --git a/docs/diagrams/pe2pe_latency_plots/summary.csv b/docs/diagrams/pe2pe_latency_plots/summary.csv index ee95166..3db9947 100644 --- a/docs/diagrams/pe2pe_latency_plots/summary.csv +++ b/docs/diagrams/pe2pe_latency_plots/summary.csv @@ -1,81 +1,81 @@ -hop,label,size_bytes,path,total_ns -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),128,ipcq,31.3899999999976 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),128,raw,12.019999999996799 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),256,ipcq,33.1399999999976 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),256,raw,13.019999999996799 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),384,ipcq,34.8899999999976 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),384,raw,14.019999999996799 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),512,ipcq,36.6399999999976 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),512,raw,15.019999999996799 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),768,ipcq,40.1399999999976 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),768,raw,17.0199999999968 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),1024,ipcq,43.6399999999976 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),1024,raw,19.0199999999968 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),2048,ipcq,57.6399999999976 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),2048,raw,27.0199999999968 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),4096,ipcq,85.6399999999976 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),4096,raw,43.0199999999968 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),8192,ipcq,141.64000000000306 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),8192,raw,75.02000000000407 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),10240,ipcq,169.64000000000306 -h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),10240,raw,91.02000000000407 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),128,ipcq,31.3899999999976 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),128,raw,12.019999999996799 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),256,ipcq,33.1399999999976 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),256,raw,13.019999999996799 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),384,ipcq,34.8899999999976 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),384,raw,14.019999999996799 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),512,ipcq,36.6399999999976 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),512,raw,15.019999999996799 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),768,ipcq,40.1399999999976 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),768,raw,17.0199999999968 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),1024,ipcq,43.6399999999976 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),1024,raw,19.0199999999968 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),2048,ipcq,57.6399999999976 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),2048,raw,27.0199999999968 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),4096,ipcq,85.6399999999976 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),4096,raw,43.0199999999968 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),8192,ipcq,141.64000000000306 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),8192,raw,75.02000000000407 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),10240,ipcq,169.64000000000306 -h2_intra_vertical,Intra-cube vertical (pe0 to pe4),10240,raw,91.02000000000407 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),128,ipcq,67.40999999999804 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),128,raw,68.53999999999724 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),256,ipcq,69.15999999999804 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),256,raw,70.03999999999724 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),384,ipcq,70.90999999999804 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),384,raw,71.53999999999724 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),512,ipcq,72.65999999999804 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),512,raw,73.03999999999724 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),768,ipcq,76.15999999999804 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),768,raw,76.03999999999724 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),1024,ipcq,79.65999999999804 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),1024,raw,79.03999999999724 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),2048,ipcq,93.65999999999804 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),2048,raw,91.03999999999724 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),4096,ipcq,121.65999999999804 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),4096,raw,115.03999999999724 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),8192,ipcq,177.65999999999985 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),8192,raw,163.04000000000087 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),10240,ipcq,205.65999999999985 -h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),10240,raw,187.04000000000087 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),128,ipcq,87.40999999999804 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),128,raw,88.53999999999724 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),256,ipcq,89.15999999999804 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),256,raw,90.03999999999724 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),384,ipcq,90.90999999999804 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),384,raw,91.53999999999724 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),512,ipcq,92.65999999999804 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),512,raw,93.03999999999724 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),768,ipcq,96.15999999999804 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),768,raw,96.03999999999724 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),1024,ipcq,99.65999999999804 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),1024,raw,99.03999999999724 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),2048,ipcq,113.65999999999804 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),2048,raw,111.03999999999724 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),4096,ipcq,141.65999999999804 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),4096,raw,135.03999999999724 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),8192,ipcq,197.65999999999985 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),8192,raw,183.04000000000087 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),10240,ipcq,225.65999999999985 -h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),10240,raw,207.04000000000087 +hop,label,size_bytes,path,total_ns +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),128,ipcq,42.8899999999976 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),128,raw,29.0199999999968 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),256,ipcq,48.1399999999976 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),256,raw,31.0199999999968 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),384,ipcq,50.3899999999976 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),384,raw,32.0199999999968 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),512,ipcq,52.6399999999976 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),512,raw,33.0199999999968 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),768,ipcq,57.1399999999976 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),768,raw,35.0199999999968 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),1024,ipcq,62.6399999999976 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),1024,raw,37.0199999999968 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),2048,ipcq,84.6399999999976 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),2048,raw,45.0199999999968 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),4096,ipcq,128.6399999999976 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),4096,raw,61.0199999999968 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),8192,ipcq,216.64000000000306 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),8192,raw,93.02000000000407 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),10240,ipcq,260.64000000000306 +h1_intra_horizontal,Intra-cube horizontal (pe0 to pe1),10240,raw,109.02000000000407 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),128,ipcq,42.8899999999976 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),128,raw,29.0199999999968 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),256,ipcq,48.1399999999976 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),256,raw,31.0199999999968 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),384,ipcq,50.3899999999976 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),384,raw,32.0199999999968 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),512,ipcq,52.6399999999976 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),512,raw,33.0199999999968 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),768,ipcq,57.1399999999976 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),768,raw,35.0199999999968 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),1024,ipcq,62.6399999999976 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),1024,raw,37.0199999999968 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),2048,ipcq,84.6399999999976 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),2048,raw,45.0199999999968 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),4096,ipcq,128.6399999999976 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),4096,raw,61.0199999999968 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),8192,ipcq,216.64000000000306 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),8192,raw,93.02000000000407 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),10240,ipcq,260.64000000000306 +h2_intra_vertical,Intra-cube vertical (pe0 to pe4),10240,raw,109.02000000000407 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),128,ipcq,81.15999999999804 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),128,raw,89.28999999999724 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),256,ipcq,88.65999999999804 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),256,raw,95.53999999999724 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),384,ipcq,90.90999999999804 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),384,raw,96.53999999999724 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),512,ipcq,93.15999999999804 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),512,raw,97.53999999999724 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),768,ipcq,97.65999999999804 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),768,raw,99.53999999999724 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),1024,ipcq,103.15999999999804 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),1024,raw,102.53999999999724 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),2048,ipcq,125.15999999999804 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),2048,raw,114.53999999999724 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),4096,ipcq,169.15999999999804 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),4096,raw,138.53999999999724 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),8192,ipcq,257.15999999999985 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),8192,raw,186.54000000000087 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),10240,ipcq,301.15999999999985 +h3_inter_cube_horizontal,Inter-cube horizontal (cube0 to cube1),10240,raw,210.54000000000087 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),128,ipcq,103.15999999999804 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),128,raw,111.28999999999724 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),256,ipcq,112.65999999999804 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),256,raw,119.53999999999724 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),384,ipcq,114.90999999999804 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),384,raw,120.53999999999724 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),512,ipcq,117.15999999999804 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),512,raw,121.53999999999724 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),768,ipcq,121.65999999999804 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),768,raw,123.53999999999724 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),1024,ipcq,127.15999999999804 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),1024,raw,126.53999999999724 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),2048,ipcq,149.15999999999804 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),2048,raw,138.53999999999724 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),4096,ipcq,193.15999999999804 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),4096,raw,162.53999999999724 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),8192,ipcq,281.15999999999985 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),8192,raw,210.54000000000087 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),10240,ipcq,325.15999999999985 +h4_inter_cube_vertical,Inter-cube vertical (cube0 to cube4),10240,raw,234.54000000000087 diff --git a/docs/diagrams/pe_baseline copy.png b/docs/diagrams/pe_baseline copy.png new file mode 100644 index 0000000..977d655 Binary files /dev/null and b/docs/diagrams/pe_baseline copy.png differ diff --git a/docs/diagrams/pe_view.svg b/docs/diagrams/pe_view.svg index ea5ffa0..2641b47 100644 --- a/docs/diagrams/pe_view.svg +++ b/docs/diagrams/pe_view.svg @@ -1,33 +1,101 @@ - + pe - - PE VIEW - - 0.5mm - - 0.5mm - - 0.5mm - - 0.5mm - - 0.5mm 512GB/s - - 0.5mm 512GB/s - - 0.5mm 512GB/s - - PE CPU - - PE SCHEDULER - - PE DMA - - PE GEMM - - PE MATH - - PE MMU - - PE TCM - \ No newline at end of file + + PE VIEW + + + + + + PE CPU + + + + PE SCHEDULER + + + + PE IPCQ + (control plane) + + + + PE MMU + + + + PE DMA + + + + PE GEMM + + + + PE MATH + + + + PE TCM + + + IPCQ Slot Region + + + + + + cmd + + + + + IpcqRequest + + + + TileToken + + + + + + + + + + DMA R/W + + + + 512GB/s + + + + 512GB/s + + + + + IpcqDmaToken + + + + + IpcqMetaArrival + + + + IpcqCreditMeta (dashed) + + + + + IPCQ data path + + IPCQ credit return + + Compute data path + + IPCQ (new) +