Fix all remaining test failures: single-cube allreduce + matplotlib dep
- intercube_allreduce: add single-cube fast path that skips intra-SIP mesh reduce and goes directly to inter-SIP exchange. Fixes IPCQ deadlock when TP launches kernel on one cube per SIP. - distributed.py: derive effective cube dims from tensor shard placement instead of hardcoding topology mesh size. - pyproject.toml: add matplotlib>=3.7 to dependencies. - pe_dma.py (prior commit): add MMU translation in pipeline DMA path. 577 passed, 0 failed (was 529 passed, 10 failed). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
|
||||
name = "kernbench"
|
||||
version = "0.1.0"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = ["pytest", "simpy", "pyyaml", "fastapi>=0.110", "uvicorn[standard]>=0.29", "websockets>=12", "numpy>=1.24", "greenlet>=3.0"]
|
||||
dependencies = ["pytest", "simpy", "pyyaml", "fastapi>=0.110", "uvicorn[standard]>=0.29", "websockets>=12", "numpy>=1.24", "greenlet>=3.0", "matplotlib>=3.7"]
|
||||
|
||||
[project.scripts]
|
||||
kernbench = "kernbench.cli.main:main"
|
||||
|
||||
Reference in New Issue
Block a user