huggingface · danieldk · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026
diff --git a/.github/workflows/build_kernel_xpu.yaml b/.github/workflows/build_kernel_xpu.yaml
@@ -34,13 +34,13 @@ jobs:
       # For now we only test that there are no regressions in building XPU
       # kernels. Also run tests once we have a XPU runner.
       - name: Build relu kernel
-        run: ( cd examples/kernels/relu && nix build .\#redistributable.torch29-cxx11-xpu20252-x86_64-linux -L )
+        run: ( cd examples/kernels/relu && nix build .\#redistributable.torch210-cxx11-xpu20253-x86_64-linux -L )
 
       - name: Build relu tvm-ffi kernel
-        run: ( cd examples/kernels/relu-tvm-ffi && nix build .\#redistributable.tvm-ffi01-xpu20252-x86_64-linux -L )
+        run: ( cd examples/kernels/relu-tvm-ffi && nix build .\#redistributable.tvm-ffi01-xpu20253-x86_64-linux -L )
 
       - name: Build relu kernel (compiler flags)
-        run: ( cd examples/kernels/relu-compiler-flags && nix build .\#redistributable.torch29-cxx11-xpu20252-x86_64-linux )
+        run: ( cd examples/kernels/relu-compiler-flags && nix build .\#redistributable.torch210-cxx11-xpu20253-x86_64-linux )
 
       - name: Build cutlass-gemm kernel
-        run: ( cd examples/kernels/cutlass-gemm && nix build .\#redistributable.torch29-cxx11-xpu20252-x86_64-linux -L )
+        run: ( cd examples/kernels/cutlass-gemm && nix build .\#redistributable.torch210-cxx11-xpu20253-x86_64-linux -L )
diff --git a/docs/source/builder/build-variants.md b/docs/source/builder/build-variants.md
@@ -55,7 +55,7 @@ available. This list will be updated as new PyTorch versions are released.
 ## XPU x86_64-linux
 
 - `torch210-cxx11-xpu20253-x86_64-linux`
-- `torch29-cxx11-xpu20252-x86_64-linux`
+- `torch211-cxx11-xpu20253-x86_64-linux`
 
 ## Python-only kernels
 

diff --git a/kernel-builder/src/pyproject/templates/xpu/dep-sycl-tla.cmake b/kernel-builder/src/pyproject/templates/xpu/dep-sycl-tla.cmake
@@ -4,12 +4,6 @@ find_package(SyclTla)
 
 if(DPCPP_VERSION STREQUAL "2025.3")
   set(SYCL_TLA_REVISION "14055e78510b8776ba739755eb57e592fdceefdb" CACHE STRING "CUTLASS revision to use")
-elseif(DPCPP_VERSION STREQUAL "2025.2")
-  set(SYCL_TLA_REVISION "14055e78510b8776ba739755eb57e592fdceefdb" CACHE STRING "CUTLASS revision to use")
-elseif(DPCPP_VERSION STREQUAL "2025.1")
-  set(SYCL_TLA_REVISION "v3.9-0.3" CACHE STRING "CUTLASS revision to use")
-elseif(DPCPP_VERSION STREQUAL "2025.0")
-  set(SYCL_TLA_REVISION "v3.9-0.2" CACHE STRING "CUTLASS revision to use")
 else()
   message(FATAL_ERROR "Unknown DPCPP_VERSION: ${DPCPP_VERSION}")
 endif()

diff --git a/nix-builder/build-variants.json b/nix-builder/build-variants.json
@@ -46,7 +46,7 @@
     ],
     "xpu": [
       "torch210-cxx11-xpu20253-x86_64-linux",
-      "torch29-cxx11-xpu20252-x86_64-linux"
+      "torch211-cxx11-xpu20253-x86_64-linux"
     ]
   }
 }
diff --git a/nix-builder/lib/mk-build-set.nix b/nix-builder/lib/mk-build-set.nix
@@ -34,7 +34,7 @@ let
   };
 
   overlayForXpuVersion = xpuVersion: self: super: {
-    xpuPackages = super."xpuPackages_${flattenVersion xpuVersion}";
+    xpuPackages = super."xpuPackages_${lib.replaceStrings [ "." ] [ "_" ] xpuVersion}";
   };
 
   backendConfig = {

diff --git a/nix-builder/overlay.nix b/nix-builder/overlay.nix
@@ -63,7 +63,12 @@ in
   # Python packages
   pythonPackagesExtensions = prev.pythonPackagesExtensions ++ [
     (
-      python-self: python-super: with python-self; {
+      python-self: python-super:
+      with python-self;
+      let
+        triton-xpu = callPackage ./pkgs/python-modules/triton-xpu { };
+      in
+      {
         cuda-bindings = python-self.callPackage ./pkgs/python-modules/cuda-bindings { };
 
         cuda-pathfinder = python-self.callPackage ./pkgs/python-modules/cuda-pathfinder { };
@@ -143,19 +148,24 @@ in
           }
         );
 
+        # Remove once sglang moves to a newer Torch version.
         torch-bin_2_9 = mkTorch {
           version = "2.9";
-          xpuPackages = final.xpuPackages_2025_2;
+          triton-xpu = null;
+          # Not supported anymore.
+          xpuPackages = null;
         };
 
         torch-bin_2_10 = mkTorch {
           version = "2.10";
-          xpuPackages = final.xpuPackages_2025_3;
+          triton-xpu = triton-xpu_3_6_0;
+          xpuPackages = final.xpuPackages_2025_3_1;
         };
 
         torch-bin_2_11 = mkTorch {
           version = "2.11";
-          xpuPackages = final.xpuPackages_2025_3;
+          triton-xpu = triton-xpu_3_7_0;
+          xpuPackages = final.xpuPackages_2025_3_2;
         };
 
         transformers = python-super.transformers.overridePythonAttrs (prevAttrs: rec {
@@ -171,10 +181,9 @@ in
           ];
         });
 
-        triton-xpu_2_9 = callPackage ./pkgs/python-modules/triton-xpu {
-          torchVersion = "2.9";
-          xpuPackages = final.xpuPackages_2025_2;
-        };
+        triton-xpu_3_6_0 = triton-xpu.triton-xpu_3_6_0;
+
+        triton-xpu_3_7_0 = triton-xpu.triton-xpu_3_7_0;
 
         tvm-ffi = callPackage ./pkgs/python-modules/tvm-ffi {
         };
@@ -183,7 +192,7 @@ in
     (import ./pkgs/python-modules/hooks)
   ];
 
-  xpuPackages = final.xpuPackages_2025_1;
+  xpuPackages = final.xpuPackages_2025_3_1;
 }
 // (import ./pkgs/cutlass { pkgs = final; })
 // (
@@ -211,15 +220,14 @@ in
     flattenVersion = prev.lib.strings.replaceStrings [ "." ] [ "_" ];
     readPackageMetadata = path: (builtins.fromJSON (builtins.readFile path));
     xpuVersions = [
-      "2025.1.3"
-      "2025.2.1"
       "2025.3.1"
+      "2025.3.2"
     ];
     newXpuPackages = final.callPackage ./pkgs/xpu-packages { };
   in
   builtins.listToAttrs (
     map (version: {
-      name = "xpuPackages_${flattenVersion (prev.lib.versions.majorMinor version)}";
+      name = "xpuPackages_${flattenVersion version}";
       value = newXpuPackages {
         packageMetadata = readPackageMetadata ./pkgs/xpu-packages/intel-deep-learning-${version}.json;
       };

diff --git a/nix-builder/pkgs/python-modules/hooks/python-relax-wheel-deps-hook.sh b/nix-builder/pkgs/python-modules/hooks/python-relax-wheel-deps-hook.sh
@@ -50,8 +50,12 @@ _pythonRelaxWheelDeps() {
     else
         # shellcheck disable=SC2048
         for dep in ${pythonRelaxWheelDeps[*]}; do
+            # NOTE: we match anything but valid PEP 508 characters characters
+            #       as the first character of the version specifier to avoid
+            #       that e.g. `-xpu` gets consumed in `triton-xpu` when dep is
+            #       `triton`.
             sed -i "$metadata_file" -r \
-                -e "s/(Requires-Dist: $dep\s*(\[[^]]+\])?)[^;]*(;.*)?/\1\3/i"
+                -e "s/(Requires-Dist: $dep)(\s*(\[[^]]+\])?)([^a-zA-Z0-9._-][^;]*)?(;.*)?$/\1\2\5/i"
         done
     fi
 }
@@ -67,8 +71,8 @@ _pythonRemoveWheelDeps() {
     else
         # shellcheck disable=SC2048
         for dep in ${pythonRemoveWheelDeps[*]-}; do
-            sed -i "$metadata_file" \
-                -e "/Requires-Dist: $dep/d"
+            sed -i "$metadata_file" -r \
+                -e "/Requires-Dist: $dep([^a-zA-Z0-9._-]|$)/d"
         done
     fi
 

diff --git a/nix-builder/pkgs/python-modules/torch/binary/default.nix b/nix-builder/pkgs/python-modules/torch/binary/default.nix
@@ -13,6 +13,7 @@
 }:
 
 {
+  triton-xpu,
   xpuPackages,
   version,
 }:
@@ -38,6 +39,6 @@ let
       or (throw "Unsupported framework: ${framework} for torch version: ${version} on system: ${system}");
 in
 callPackage ./generic.nix {
-  inherit xpuPackages;
+  inherit triton-xpu xpuPackages;
   inherit (urlHash) url hash version;
 }
diff --git a/nix-builder/pkgs/python-modules/torch/binary/generic.nix b/nix-builder/pkgs/python-modules/torch/binary/generic.nix
@@ -39,6 +39,7 @@
   sympy,
   triton,
   triton-cuda,
+  triton-xpu,
   typing-extensions,
 
   url,
@@ -52,7 +53,7 @@ let
     if cudaSupport then
       triton-cuda
     else if xpuSupport then
-      python.pkgs.triton-xpu_2_9
+      triton-xpu
     else
       triton;
 
@@ -228,26 +229,46 @@ buildPythonPackage {
   # dependencies, but we don't need them or provide them because we burn
   # the Nix store paths of the framework into the Torch libraries..
   pythonRemoveWheelDeps =
-    lib.optionals cudaSupport [
-      "cuda-toolkit"
-      "nvidia-cuda-runtime"
-      "nvidia-cuda-nvrtc"
-      "nvidia-cuda-cupti"
-      "nvidia-cudnn"
-      "nvidia-cublas"
-      "nvidia-cufft"
-      "nvidia-curand"
-      "nvidia-cusolver"
-      "nvidia-cusparse"
-      "nvidia-cusparselt"
-      "nvidia-nccl"
-      "nvidia-nvshmem"
-      "nvidia-nvtx"
-      "nvidia-nvjitlink"
-      "nvidia-cufile"
-    ]
+    # Some CUDA dependencies have a version suffix and some don't. Let's
+    # be greedy, autoPatchelfHook will catch missing library dependencies
+    # for us.
+    lib.optionals cudaSupport (
+      builtins.map ({ pkg, suffix }: "${pkg}${suffix}") (
+        lib.cartesianProduct {
+          pkg = [
+            "cuda-toolkit"
+            "nvidia-cuda-runtime"
+            "nvidia-cuda-nvrtc"
+            "nvidia-cuda-cupti"
+            "nvidia-cudnn"
+            "nvidia-cudnn"
+            "nvidia-cublas"
+            "nvidia-cufft"
+            "nvidia-curand"
+            "nvidia-cusolver"
+            "nvidia-cusparse"
+            "nvidia-cusparselt"
+            "nvidia-cusparselt"
+            "nvidia-nccl"
+            "nvidia-nccl"
+            "nvidia-nvshmem"
+            "nvidia-nvshmem"
+            "nvidia-nvtx"
+            "nvidia-nvjitlink"
+            "nvidia-cufile"
+          ];
+          suffix = [
+            ""
+            "-cu12"
+            "-cu13"
+          ];
+        }
+      )
+    )
     ++ lib.optionals rocmSupport [
-      "pytorch-triton-rocm"
+      # Ours is called 'triton'. Remove this once we build ROCm Triton from
+      # a binary wheel.
+      "triton-rocm"
     ]
     ++ lib.optionals xpuSupport [
       "intel-cmplr-lib-rt"
@@ -271,7 +292,6 @@ buildPythonPackage {
       "tcmlib"
       "umf"
       "intel-pti"
-      "pytorch-triton-xpu"
     ];
 
   propagatedCxxBuildInputs = lib.optionals rocmSupport [ rocmtoolkit_joined ];

diff --git a/nix-builder/pkgs/python-modules/torch/binary/torch-versions-hash.json b/nix-builder/pkgs/python-modules/torch/binary/torch-versions-hash.json
@@ -5,11 +5,6 @@
         "url": "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313-manylinux_2_28_x86_64.whl",
         "hash": "sha256-a6V77xFJM5fBUddVM0CSKQQSkE5GyF7YYnegO7JP0To=",
         "version": "2.9.0"
-      },
-      "xpu": {
-        "url": "https://download.pytorch.org/whl/xpu/torch-2.9.0%2Bxpu-cp313-cp313-linux_x86_64.whl",
-        "hash": "sha256-mNagbdf7GFh0NnsYvWCfBfFv3OQUKlmAypRGGUmWXNI=",
-        "version": "2.9.0"
       }
     },
     "aarch64-linux": {
@@ -119,6 +114,11 @@
         "url": "https://download.pytorch.org/whl/rocm7.2/torch-2.11.0%2Brocm7.2-cp313-cp313-manylinux_2_28_x86_64.whl",
         "hash": "sha256-CupGzRf2SN6z/1kOFipiQ6SCV2W9AHJ0OWDvqdSlvB0=",
         "version": "2.11.0"
+      },
+      "xpu": {
+        "url": "https://download.pytorch.org/whl/xpu/torch-2.11.0%2Bxpu-cp313-cp313-linux_x86_64.whl",
+        "hash": "sha256-x8KZcJJCc3w3ZGctcQTWwmq36Qbvbr1NhAvUVjl5EvQ=",
+        "version": "2.11.0"
       }
     },
     "aarch64-linux": {

diff --git a/nix-builder/pkgs/python-modules/torch/binary/torch-versions.json b/nix-builder/pkgs/python-modules/torch/binary/torch-versions.json
@@ -4,11 +4,6 @@
     "cudaVersion": "12.9",
     "systems": ["x86_64-linux", "aarch64-linux"]
   },
-  {
-    "torchVersion": "2.9.0",
-    "xpuVersion": "2025.2.1",
-    "systems": ["x86_64-linux"]
-  },
 
   {
     "torchVersion": "2.10.0",
@@ -85,5 +80,10 @@
     "torchVersion": "2.11.0",
     "rocmVersion": "7.2",
     "systems": ["x86_64-linux"]
+  },
+  {
+    "torchVersion": "2.11.0",
+    "xpuVersion": "2025.3.2",
+    "systems": ["x86_64-linux"]
   }
 ]