Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion build2cmake/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pub struct General {
pub python_depends: Option<Vec<String>>,

pub cuda: Option<CudaGeneral>,
pub neuron: Option<NeuronGeneral>,
pub xpu: Option<XpuGeneral>,
}

Expand Down Expand Up @@ -106,6 +107,10 @@ pub struct XpuGeneral {
pub python_depends: Option<Vec<String>>,
}

pub struct NeuronGeneral {
pub python_depends: Option<Vec<String>>,
}

pub struct Hub {
pub repo_id: Option<String>,
pub branch: Option<String>,
Expand Down Expand Up @@ -237,16 +242,18 @@ pub enum Backend {
Cpu,
Cuda,
Metal,
Neuron,
Rocm,
Xpu,
}

impl Backend {
pub const fn all() -> [Backend; 5] {
pub const fn all() -> [Backend; 6] {
[
Backend::Cpu,
Backend::Cuda,
Backend::Metal,
Backend::Neuron,
Backend::Rocm,
Backend::Xpu,
]
Expand All @@ -259,6 +266,7 @@ impl Display for Backend {
Backend::Cpu => write!(f, "cpu"),
Backend::Cuda => write!(f, "cuda"),
Backend::Metal => write!(f, "metal"),
Backend::Neuron => write!(f, "neuron"),
Backend::Rocm => write!(f, "rocm"),
Backend::Xpu => write!(f, "xpu"),
}
Expand All @@ -273,6 +281,7 @@ impl FromStr for Backend {
"cpu" => Ok(Backend::Cpu),
"cuda" => Ok(Backend::Cuda),
"metal" => Ok(Backend::Metal),
"neuron" => Ok(Backend::Neuron),
"rocm" => Ok(Backend::Rocm),
"xpu" => Ok(Backend::Xpu),
_ => Err(format!("Unknown backend: {s}")),
Expand Down
2 changes: 2 additions & 0 deletions build2cmake/src/config/v1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ impl TryFrom<Build> for super::Build {
Backend::Cpu,
Backend::Cuda,
Backend::Metal,
Backend::Neuron,
Backend::Rocm,
Backend::Xpu,
]
Expand All @@ -102,6 +103,7 @@ impl TryFrom<Build> for super::Build {
license: None,
backends,
hub: None,
neuron: None,
python_depends: None,
cuda: None,
xpu: None,
Expand Down
2 changes: 2 additions & 0 deletions build2cmake/src/config/v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ impl TryFrom<Build> for super::Build {
Backend::Cpu,
Backend::Cuda,
Backend::Metal,
Backend::Neuron,
Backend::Rocm,
Backend::Xpu,
]
Expand Down Expand Up @@ -168,6 +169,7 @@ impl General {
backends,
cuda,
hub: general.hub.map(Into::into),
neuron: None,
python_depends: None,
xpu: None,
}
Expand Down
29 changes: 29 additions & 0 deletions build2cmake/src/config/v3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ pub struct General {

pub hub: Option<Hub>,

pub neuron: Option<NeuronGeneral>,

pub python_depends: Option<Vec<String>>,

pub xpu: Option<XpuGeneral>,
Expand All @@ -44,6 +46,12 @@ pub struct CudaGeneral {
pub python_depends: Option<Vec<String>>,
}

#[derive(Debug, Deserialize, Serialize)]
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
pub struct NeuronGeneral {
pub python_depends: Option<Vec<String>>,
}

#[derive(Debug, Deserialize, Serialize)]
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
pub struct XpuGeneral {
Expand Down Expand Up @@ -121,6 +129,7 @@ pub enum Backend {
Cpu,
Cuda,
Metal,
Neuron,
Rocm,
Xpu,
}
Expand Down Expand Up @@ -150,6 +159,7 @@ impl From<General> for super::General {
backends: general.backends.into_iter().map(Into::into).collect(),
cuda: general.cuda.map(Into::into),
hub: general.hub.map(Into::into),
neuron: general.neuron.map(Into::into),
python_depends: general.python_depends,
xpu: general.xpu.map(Into::into),
}
Expand All @@ -166,6 +176,14 @@ impl From<CudaGeneral> for super::CudaGeneral {
}
}

impl From<NeuronGeneral> for super::NeuronGeneral {
fn from(neuron: NeuronGeneral) -> Self {
Self {
python_depends: neuron.python_depends,
}
}
}

impl From<XpuGeneral> for super::XpuGeneral {
fn from(xpu: XpuGeneral) -> Self {
Self {
Expand Down Expand Up @@ -201,6 +219,7 @@ impl From<Backend> for super::Backend {
Backend::Cpu => super::Backend::Cpu,
Backend::Cuda => super::Backend::Cuda,
Backend::Metal => super::Backend::Metal,
Backend::Neuron => super::Backend::Neuron,
Backend::Rocm => super::Backend::Rocm,
Backend::Xpu => super::Backend::Xpu,
}
Expand Down Expand Up @@ -304,6 +323,7 @@ impl From<super::General> for General {
backends: general.backends.into_iter().map(Into::into).collect(),
cuda: general.cuda.map(Into::into),
hub: general.hub.map(Into::into),
neuron: general.neuron.map(Into::into),
python_depends: general.python_depends,
xpu: general.xpu.map(Into::into),
}
Expand All @@ -320,6 +340,14 @@ impl From<super::CudaGeneral> for CudaGeneral {
}
}

impl From<super::NeuronGeneral> for NeuronGeneral {
fn from(neuron: super::NeuronGeneral) -> Self {
Self {
python_depends: neuron.python_depends,
}
}
}

impl From<super::XpuGeneral> for XpuGeneral {
fn from(xpu: super::XpuGeneral) -> Self {
Self {
Expand Down Expand Up @@ -355,6 +383,7 @@ impl From<super::Backend> for Backend {
super::Backend::Cpu => Backend::Cpu,
super::Backend::Cuda => Backend::Cuda,
super::Backend::Metal => Backend::Metal,
super::Backend::Neuron => Backend::Neuron,
super::Backend::Rocm => Backend::Rocm,
super::Backend::Xpu => Backend::Xpu,
}
Expand Down
6 changes: 6 additions & 0 deletions build2cmake/src/python_dependencies.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
}
},
"metal": {},
"neuron": {
"nki": {
"nix": [],
"python": ["nki"]
}
},
"rocm": {},
"xpu": {
"onednn": {
Expand Down
10 changes: 7 additions & 3 deletions build2cmake/src/templates/noarch/setup.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/usr/bin/env python

import shutil
from pathlib import Path
from typing import Any
from pathlib import Path
import shutil
import sys

from setuptools import setup
from setuptools.command.build import build
Expand Down Expand Up @@ -30,7 +31,10 @@ def run(self) -> None:
"""Execute the build command."""
project_root = Path(__file__).parent

import tomllib
if sys.version_info >= (3, 11):
import tomllib
else:
import tomli as tomllib

with open(project_root / "build.toml", "rb") as f:
build_toml: dict[str, Any] = tomllib.load(f)
Expand Down
9 changes: 9 additions & 0 deletions builder/examples/relu-nki/build.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[general]
name = "relu-nki"
version = 1
backends = [
"neuron",
]

[general.neuron]
python-depends = ["nki"]
27 changes: 27 additions & 0 deletions builder/examples/relu-nki/torch-ext/relu_nki/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import nki
import nki.language as nl
import nki.isa as nisa

from ._ops import ops


@nki.jit(platform_target="trn2")
def relu(x):
# Check the first dimension's size to ensure it does not exceed on-chip
# memory tile size, since this simple kernel does not tile inputs.
assert x.shape[0] <= nl.tile_size.pmax
x_tile = sbuf.view(dtype=x.dtype, shape=x.shape)
nisa.dma_copy(dst=x_tile, src=x)
out_tile = sbuf.view(dtype=x.dtype, shape=x.shape)
nisa.tensor_scalar(dst=out_tile, data=x_tile, operand0=0, op0=nl.maximum)
c_output = hbm.view(dtype=x.dtype, shape=x.shape)
nisa.dma_copy(dst=c_output, src=out_tile)
return c_output


from . import layers

__all__ = [
"layers",
"relu",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import torch
import torch.nn as nn

from .. import relu


class ReLU(nn.Module):
def forward(self, x: torch.Tensor) -> torch.Tensor:
return relu(x)
8 changes: 7 additions & 1 deletion kernels/src/kernels/layer/kernelize.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:

def _validate_device_type(device_type: str) -> None:
"""Validate that the device type is supported."""
supported_devices = {"cpu", "cuda", "mps", "npu", "rocm", "xpu"}
supported_devices = {"cpu", "cuda", "mps", "neuron", "npu", "rocm", "xpu"}
if device_type not in supported_devices:
raise ValueError(
f"Unsupported device type '{device_type}'. Supported device types are: {', '.join(sorted(supported_devices))}"
Expand Down Expand Up @@ -310,3 +310,9 @@ def _is_rocm_platform():
import torch

return torch.version.hip is not None


def _has_neuron_ops():
import torch

return hasattr(torch, "neuron")
22 changes: 22 additions & 0 deletions kernels/src/kernels/layer/repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def create_repo(device: Device) -> "DeviceRepos":
return _XPURepos()
elif device.type == "npu":
return _NPURepos()
elif device.type == "neuron":
return _NeuronRepos()
else:
raise ValueError(f"Unknown device type: {device.type}")

Expand Down Expand Up @@ -93,6 +95,26 @@ def insert(self, device: Device, repos: dict[Mode, RepositoryProtocol]):
self._repos = repos


class _NeuronRepos(DeviceRepos):
_repos: dict[Mode, RepositoryProtocol]

def __init__(self):
super().__init__()
self._repos = {}

@property
def repos(
self,
) -> dict[Mode, RepositoryProtocol] | None:
return self._repos

def insert(self, device: Device, repos: dict[Mode, RepositoryProtocol]):
if device.type != "neuron":
raise ValueError(f"Device type must be 'neuron', got {device.type}")

self._repos = repos


class _NPURepos(DeviceRepos):
_repos: dict[Mode, RepositoryProtocol]

Expand Down
6 changes: 6 additions & 0 deletions kernels/src/kernels/python_depends.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
}
},
"metal": {},
"neuron": {
"nki": {
"nix": [],
"python": ["nki"]
}
},
"rocm": {},
"xpu": {
"onednn": {
Expand Down
Loading
Loading