commit - release 1

2026-03-18 11:47:48 -07:00
commit 6f43807900
109 changed files with 14909 additions and 0 deletions
@@ -0,0 +1,123 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal, TypeAlias
+
+
+@dataclass(frozen=True)
+class MemoryWriteMsg:
+    correlation_id: str
+    request_id: str
+    dst_sip: int
+    dst_cube: int
+    dst_pe: int
+    dst_pa: int
+    nbytes: int
+    src_kind: Literal["pattern", "host_buffer_ref"] = "pattern"
+    pattern: str | None = None
+    target_cubes: tuple[int, ...] | Literal["all"] = "all"
+    target_pe: int | Literal["all"] = "all"
+    msg_type: Literal["memory_write"] = "memory_write"
+
+
+@dataclass(frozen=True)
+class MemoryReadMsg:
+    correlation_id: str
+    request_id: str
+    src_sip: int
+    src_cube: int
+    src_pe: int
+    src_pa: int
+    nbytes: int
+    target_cubes: tuple[int, ...] | Literal["all"] = "all"
+    target_pe: int | Literal["all"] = "all"
+    msg_type: Literal["memory_read"] = "memory_read"
+
+
+@dataclass(frozen=True)
+class KernelRef:
+    """Reference to a kernel binary or builtin timing model.
+
+    Kernel binaries must be pre-deployed to device memory via MemoryWriteMsg.
+    KernelLaunchMsg references the deployed location by PA — source code or IR
+    MUST NOT be embedded in launch messages.
+
+    - "deployed": kernel binary pre-deployed to HBM/SRAM at deploy_pa.
+    - "builtin":  simulator built-in timing model, identified by name.
+    """
+
+    name: str
+    kind: Literal["deployed", "builtin"]
+    deploy_pa: int | None = None
+    deploy_sip: int = 0
+    deploy_cube: int = 0
+    deploy_pe: int = 0
+    nbytes_code: int = 0
+
+
+@dataclass(frozen=True)
+class TensorArgShard:
+    sip: int
+    cube: int
+    pe: int
+    pa: int
+    nbytes: int
+    offset_bytes: int
+
+
+@dataclass(frozen=True)
+class TensorArg:
+    shards: tuple[TensorArgShard, ...]
+    arg_kind: Literal["tensor"] = "tensor"
+
+
+@dataclass(frozen=True)
+class ScalarArg:
+    dtype: str
+    value: float | int
+    arg_kind: Literal["scalar"] = "scalar"
+
+
+KernelArg: TypeAlias = TensorArg | ScalarArg
+
+
+@dataclass(frozen=True)
+class KernelLaunchMsg:
+    correlation_id: str
+    request_id: str
+    kernel_ref: KernelRef
+    args: tuple[KernelArg, ...]
+    target_cubes: tuple[int, ...] | Literal["all"] = "all"
+    target_pe: int | Literal["all"] = "all"
+    msg_type: Literal["kernel_launch"] = "kernel_launch"
+
+
+@dataclass(frozen=True)
+class ResponseMsg:
+    """Device→Host response carrying PE execution result."""
+
+    correlation_id: str
+    request_id: str
+    src_cube: int
+    src_pe: int
+    success: bool
+    msg_type: Literal["response"] = "response"
+
+
+@dataclass(frozen=True)
+class PeDmaMsg:
+    """Direct PE DMA request: host injects a transfer at PE_DMA level.
+
+    Used by the probe utility to measure PE→HBM latency without requiring
+    the full PE_CPU → scheduler → DMA pipeline.
+    """
+
+    correlation_id: str
+    request_id: str
+    src_sip: int
+    src_cube: int
+    src_pe: int
+    dst_pa: int
+    nbytes: int
+    is_write: bool = False
+    msg_type: Literal["pe_dma"] = "pe_dma"