commit - release 1

This commit is contained in:
2026-03-18 11:47:48 -07:00
commit 6f43807900
109 changed files with 14909 additions and 0 deletions
+166
View File
@@ -0,0 +1,166 @@
from __future__ import annotations
import math
from dataclasses import dataclass
from typing import Literal
from kernbench.policy.address.allocator import PEMemAllocator
from kernbench.policy.placement.dp import DPPolicy, ShardSpec
from kernbench.runtime_api.kernel import TensorArg, TensorArgShard
@dataclass(frozen=True)
class TensorShard:
sip: int
cube: int
pe: int
pa: int
nbytes: int
offset_bytes: int
@dataclass(frozen=True)
class TensorHandle:
name: str
shape: tuple[int, ...]
dtype: str
itemsize: int
shards: tuple[TensorShard, ...]
@property
def nbytes(self) -> int:
return math.prod(self.shape) * self.itemsize
_DTYPE_ITEMSIZE = {
"fp16": 2, "float16": 2, "f16": 2,
"fp32": 4, "float32": 4, "f32": 4,
"bf16": 2,
"int8": 1, "i8": 1,
"int16": 2, "i16": 2,
"int32": 4, "i32": 4,
}
def dtype_itemsize(dtype: str) -> int:
if dtype not in _DTYPE_ITEMSIZE:
raise ValueError(f"unsupported dtype: {dtype}")
return _DTYPE_ITEMSIZE[dtype]
def deploy_tensor(
*,
name: str,
shape: tuple[int, ...],
dtype: str,
placement: list[ShardSpec],
allocators: dict[int, PEMemAllocator],
mem_kind: Literal["hbm", "tcm"] = "hbm",
) -> TensorHandle:
isize = dtype_itemsize(dtype)
shards: list[TensorShard] = []
for spec in placement:
alloc = allocators[spec.pe_index]
if mem_kind == "hbm":
pa = alloc.alloc_hbm(spec.nbytes)
else:
pa = alloc.alloc_tcm(spec.nbytes)
shards.append(TensorShard(
sip=alloc._sip_id,
cube=alloc._cube_id,
pe=alloc._pe_id,
pa=pa.encode(),
nbytes=spec.nbytes,
offset_bytes=spec.offset_bytes,
))
return TensorHandle(
name=name,
shape=shape,
dtype=dtype,
itemsize=isize,
shards=tuple(shards),
)
# ── PyTorch-like Tensor API ──────────────────────────────────────────
@dataclass(frozen=True)
class DPMetadata:
"""Data-parallel placement metadata (stored as Tensor._dp_metadata)."""
placement: list[ShardSpec]
dp_policy: DPPolicy | None = None
sip: int = 0
cube: int = 0
target_pe: int | str = 0 # int → single PE, "all" → all PEs
class Tensor:
"""PyTorch-like tensor for benchmark code.
Usage::
a = ctx.zeros((M, K), dtype="f16")
a = ctx.zeros((M, K), dtype="f16", placement=dp.replicate(num_pe=8))
ctx.launch("kernel_name", kernel_fn, a, b, out, M=M, K=K)
"""
def __init__(
self,
shape: tuple[int, ...],
dtype: str = "f16",
name: str = "",
) -> None:
self.shape = shape
self.dtype = dtype
self.name = name
self._dp_metadata: DPMetadata | None = None
self._handle: TensorHandle | None = None
@property
def itemsize(self) -> int:
return dtype_itemsize(self.dtype)
@property
def nbytes(self) -> int:
return math.prod(self.shape) * self.itemsize
@property
def pa(self) -> int:
"""Primary PA (first shard). Used as kernel pointer argument."""
if self._handle is None or not self._handle.shards:
raise RuntimeError(f"Tensor '{self.name}' is not deployed yet")
return self._handle.shards[0].pa
def to(
self,
placement: list[ShardSpec] | None = None,
*,
dp_policy: DPPolicy | None = None,
sip: int = 0,
cube: int = 0,
target_pe: int | str = 0,
) -> Tensor:
"""Set DP placement metadata (like torch.Tensor.to())."""
if placement is None:
placement = [ShardSpec(pe_index=0, offset_bytes=0, nbytes=self.nbytes)]
self._dp_metadata = DPMetadata(
placement=placement, dp_policy=dp_policy,
sip=sip, cube=cube, target_pe=target_pe,
)
return self
def to_tensor_arg(self) -> TensorArg:
"""Convert deployed shards to KernelLaunchMsg TensorArg."""
if self._handle is None:
raise RuntimeError(f"Tensor '{self.name}' is not deployed yet")
return TensorArg(
shards=tuple(
TensorArgShard(
sip=s.sip, cube=s.cube, pe=s.pe,
pa=s.pa, nbytes=s.nbytes, offset_bytes=s.offset_bytes,
)
for s in self._handle.shards
),
)