Allreduce sweep: parametrized + xdist parallelism + topology diagram
Refactor the latency sweep from one giant test into 36 parametrized cases that run in parallel under xdist (~6-8x faster: 1:49 instead of ~10 min). Each case writes a JSON row to a staging dir; conftest sessionfinish hook aggregates rows on the controller node into summary.csv and the per-topology + overview plots. Aggregator gains a CSV fallback so plot-only tweaks no longer require re-running the sweep. Overview plot updates: - 96 KB explicit x-axis marker with vertical dotted line - horizontal theoretical 2D-torus reference (10600 ns) - annotation showing both theoretical and simulated values at 96 KB - drop overlapping 128 KB tick New topology.png: 2x2 panel diagram showing device-level topology (ring, torus 2x3, mesh 2x3) and the cube-level reduction inside SIP 0. Wrap arrows anchor on box edges and arc outside rows/columns so they do not overlap any SIP. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,26 +1,4 @@
|
||||
algorithm,sip_topology,n_sips,n_elem,bytes_per_pe,bytes_per_sip,latency_ns
|
||||
intercube_allreduce,ring_1d,6,8,16,256,3073.1299999999937
|
||||
intercube_allreduce,ring_1d,6,32,64,1024,3079.8799999999947
|
||||
intercube_allreduce,ring_1d,6,64,128,2048,3088.879999999992
|
||||
intercube_allreduce,ring_1d,6,128,256,4096,3106.8799999999865
|
||||
intercube_allreduce,ring_1d,6,512,1024,16384,3225.8799999999865
|
||||
intercube_allreduce,ring_1d,6,1024,2048,32768,3391.8799999999865
|
||||
intercube_allreduce,ring_1d,6,2048,4096,65536,3723.8799999999865
|
||||
intercube_allreduce,ring_1d,6,4096,8192,131072,4387.879999999965
|
||||
intercube_allreduce,ring_1d,6,8192,16384,262144,5715.879999999957
|
||||
intercube_allreduce,ring_1d,6,16384,32768,524288,8371.879999999932
|
||||
intercube_allreduce,ring_1d,6,32768,65536,1048576,13683.879999999903
|
||||
intercube_allreduce,torus_2d,6,8,16,256,2190.4799999999923
|
||||
intercube_allreduce,torus_2d,6,32,64,1024,2196.479999999993
|
||||
intercube_allreduce,torus_2d,6,64,128,2048,2204.4799999999905
|
||||
intercube_allreduce,torus_2d,6,128,256,4096,2220.479999999985
|
||||
intercube_allreduce,torus_2d,6,512,1024,16384,2325.479999999985
|
||||
intercube_allreduce,torus_2d,6,1024,2048,32768,2471.479999999985
|
||||
intercube_allreduce,torus_2d,6,2048,4096,65536,2763.479999999985
|
||||
intercube_allreduce,torus_2d,6,4096,8192,131072,3347.4799999999777
|
||||
intercube_allreduce,torus_2d,6,8192,16384,262144,4515.4799999999705
|
||||
intercube_allreduce,torus_2d,6,16384,32768,524288,6851.479999999952
|
||||
intercube_allreduce,torus_2d,6,32768,65536,1048576,11523.479999999923
|
||||
intercube_allreduce,mesh_2d_no_wrap,6,8,16,256,3508.4249999999993
|
||||
intercube_allreduce,mesh_2d_no_wrap,6,32,64,1024,3515.55
|
||||
intercube_allreduce,mesh_2d_no_wrap,6,64,128,2048,3525.0499999999975
|
||||
@@ -32,3 +10,28 @@ intercube_allreduce,mesh_2d_no_wrap,6,4096,8192,131072,4857.049999999959
|
||||
intercube_allreduce,mesh_2d_no_wrap,6,8192,16384,262144,6217.049999999945
|
||||
intercube_allreduce,mesh_2d_no_wrap,6,16384,32768,524288,8937.049999999937
|
||||
intercube_allreduce,mesh_2d_no_wrap,6,32768,65536,1048576,14377.049999999872
|
||||
intercube_allreduce,mesh_2d_no_wrap,6,49152,98304,1572864,19817.049999999872
|
||||
intercube_allreduce,ring_1d,6,8,16,256,3073.1299999999937
|
||||
intercube_allreduce,ring_1d,6,32,64,1024,3079.8799999999947
|
||||
intercube_allreduce,ring_1d,6,64,128,2048,3088.879999999992
|
||||
intercube_allreduce,ring_1d,6,128,256,4096,3106.8799999999865
|
||||
intercube_allreduce,ring_1d,6,512,1024,16384,3225.8799999999865
|
||||
intercube_allreduce,ring_1d,6,1024,2048,32768,3391.8799999999865
|
||||
intercube_allreduce,ring_1d,6,2048,4096,65536,3723.8799999999865
|
||||
intercube_allreduce,ring_1d,6,4096,8192,131072,4387.879999999965
|
||||
intercube_allreduce,ring_1d,6,8192,16384,262144,5715.879999999957
|
||||
intercube_allreduce,ring_1d,6,16384,32768,524288,8371.879999999932
|
||||
intercube_allreduce,ring_1d,6,32768,65536,1048576,13683.879999999903
|
||||
intercube_allreduce,ring_1d,6,49152,98304,1572864,18995.879999999917
|
||||
intercube_allreduce,torus_2d,6,8,16,256,2190.4799999999923
|
||||
intercube_allreduce,torus_2d,6,32,64,1024,2196.479999999993
|
||||
intercube_allreduce,torus_2d,6,64,128,2048,2204.4799999999905
|
||||
intercube_allreduce,torus_2d,6,128,256,4096,2220.479999999985
|
||||
intercube_allreduce,torus_2d,6,512,1024,16384,2325.479999999985
|
||||
intercube_allreduce,torus_2d,6,1024,2048,32768,2471.479999999985
|
||||
intercube_allreduce,torus_2d,6,2048,4096,65536,2763.479999999985
|
||||
intercube_allreduce,torus_2d,6,4096,8192,131072,3347.4799999999777
|
||||
intercube_allreduce,torus_2d,6,8192,16384,262144,4515.4799999999705
|
||||
intercube_allreduce,torus_2d,6,16384,32768,524288,6851.479999999952
|
||||
intercube_allreduce,torus_2d,6,32768,65536,1048576,11523.479999999923
|
||||
intercube_allreduce,torus_2d,6,49152,98304,1572864,16195.479999999952
|
||||
|
||||
|
Reference in New Issue
Block a user