openxla · Solaryee · Apr 11, 2024 · May 8, 2024 · Jun 25, 2024 · penpornk
diff --git a/xla/stream_executor/BUILD b/xla/stream_executor/BUILD
@@ -783,6 +783,11 @@ alias(
     actual = "//xla/stream_executor/rocm:all_runtime",
 )
 
+alias(
+    name = "sycl_platform",
+    actual = "//xla/stream_executor/sycl:all_runtime",
+)
+
 # TODO(ezhulenev): This should be removed.
 cc_library(
     name = "stream_executor_bundle",

diff --git a/xla/stream_executor/gpu/BUILD b/xla/stream_executor/gpu/BUILD
@@ -10,6 +10,10 @@ load(
     "if_rocm",
     "if_rocm_is_configured",
 )
+load(
+    "@local_config_sycl//sycl:build_defs.bzl",
+    "if_sycl_is_configured",
+)
 load(
     "@tsl//tsl/platform:build_config_root.bzl",
     "if_static",
@@ -406,6 +410,8 @@ gpu_only_cc_library(
         "@local_config_cuda//cuda:cuda_headers",
     ]) + if_rocm_is_configured([
         "@local_config_rocm//rocm:rocm_headers",
+    ]) + if_sycl_is_configured([
+        "@local_config_sycl//sycl:sycl_headers",
     ]),
 )
 

diff --git a/xla/stream_executor/gpu/gpu_types.h b/xla/stream_executor/gpu/gpu_types.h
@@ -13,12 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// GPU (ROCm / CUDA) specific type handle resolution
+// GPU (SYCL / ROCm / CUDA) specific type handle resolution
 
 #ifndef XLA_STREAM_EXECUTOR_GPU_GPU_TYPES_H_
 #define XLA_STREAM_EXECUTOR_GPU_GPU_TYPES_H_
 
-#if TENSORFLOW_USE_ROCM
+#if TENSORFLOW_USE_SYCL
+
+#include "sycl/sycl.hpp"
+
+#elif TENSORFLOW_USE_ROCM
 
 #define __HIP_DISABLE_CPP_FUNCTIONS__
 
@@ -37,10 +41,33 @@ namespace stream_executor {
 namespace gpu {
 
 // An empty struct to be used as a handle for all unsupported features in
-// current CUDA/HIP version.
+// current CUDA/HIP/SYCL version.
 struct UnsupportedGpuFeature {};
 
-#if TENSORFLOW_USE_ROCM
+#if TENSORFLOW_USE_SYCL
+
+using GpuContextHandle = ::sycl::context*;
+using GpuStreamHandle = ::sycl::queue*;
+using GpuEventHandle = ::sycl::event*;
+using GpuFunctionHandle = ::sycl::kernel*;
+using GpuFunctionAttribute = UnsupportedGpuFeature;
+using GpuDeviceHandle = ::sycl::device*;
+using GpuDevicePtr = void*;
+using GpuDeviceAttribute = UnsupportedGpuFeature;
+using GpuDeviceProperty = UnsupportedGpuFeature;
+using GpuModuleHandle = ze_module_handle_t;
+using GpuStatus = UnsupportedGpuFeature;
+using GpuFuncCachePreference = UnsupportedGpuFeature;
+using GpuSharedMemConfig = UnsupportedGpuFeature;
+using GpuComplexType = std::complex<float>;
+using GpuDoubleComplexType = std::complex<double>;
+using GpuRngHandle = UnsupportedGpuFeature;
+using GpuGraphHandle = UnsupportedGpuFeature;
+using GpuGraphExecHandle = UnsupportedGpuFeature;
+using GpuGraphNodeHandle = UnsupportedGpuFeature;
+using GpuGraphConditionalHandle = UnsupportedGpuFeature;
+
+#elif TENSORFLOW_USE_ROCM
 
 using GpuStreamHandle = hipStream_t;
 using GpuEventHandle = hipEvent_t;

diff --git a/xla/stream_executor/sycl/BUILD b/xla/stream_executor/sycl/BUILD
@@ -0,0 +1,88 @@
+# Description:
+#   SYCL-platform specific StreamExecutor support code.
+
+load(
+    "//xla/stream_executor:build_defs.bzl",
+    "stream_executor_friends",
+)
+load(
+    "@local_config_sycl//sycl:build_defs.bzl",
+    "if_sycl_is_configured",
+)
+load(
+    "//xla:xla.bzl",
+    "xla_cc_test",
+)
+load("//xla/tsl:tsl.bzl", "internal_visibility", "tsl_copts")
+load("@tsl//tsl/platform:build_config_root.bzl", "if_static")
+load("@tsl//tsl/platform:rules_cc.bzl", "cc_library")
+
+package(
+    # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
+    default_visibility = internal_visibility([":friends"]),
+    licenses = ["notice"],
+)
+
+package_group(
+    name = "friends",
+    packages = stream_executor_friends(),
+)
+
+cc_library(
+    name = "sycl_platform_id",
+    srcs = ["sycl_platform_id.cc"],
+    hdrs = ["sycl_platform_id.h"],
+    deps = ["//xla/stream_executor:platform"],
+)
+
+cc_library(
+    name = "sycl_platform",
+    srcs = if_sycl_is_configured(["sycl_platform.cc"]),
+    hdrs = if_sycl_is_configured(["sycl_platform.h"]),
+    visibility = ["//visibility:public"],
+    deps = if_sycl_is_configured([
+        ":sycl_platform_id",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/memory",
+        "//xla/stream_executor",  # buildcleaner: keep
+        "//xla/stream_executor:executor_cache",
+        "//xla/stream_executor/platform",
+        "//xla/stream_executor/gpu:gpu_types_header",
+        "//xla/stream_executor/gpu:gpu_driver_header",
+        "//xla/stream_executor/gpu:gpu_executor_header",
+        "//xla/stream_executor/gpu:gpu_collectives_header",
+    ]),
+    alwayslink = True,  # Registers itself with the PlatformManager.
+)
+
+xla_cc_test(
+    name = "sycl_platform_test",
+    srcs = if_sycl_is_configured(["sycl_platform_test.cc"]),
+    deps = if_sycl_is_configured([
+        ":sycl_platform",
+        "@tsl//tsl/platform:statusor",
+        "@tsl//tsl/platform:test",
+        "@tsl//tsl/platform:test_main",
+    ]),
+)
+
+cc_library(
+    name = "all_runtime",
+    copts = tsl_copts(),
+    visibility = ["//visibility:public"],
+    deps = if_sycl_is_configured([":sycl_platform"]),
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "sycl_rpath",
+    linkopts = ["-Wl,-rpath,../local_config_sycl/sycl/sycl/lib"],
+)
+
+cc_library(
+    name = "stream_executor_sycl",
+    deps = [
+        ":sycl_rpath",
+        "//xla/stream_executor:stream_executor_bundle",
+    ] + if_static([":all_runtime"]),
+)
diff --git a/xla/stream_executor/sycl/sycl_platform.cc b/xla/stream_executor/sycl/sycl_platform.cc
@@ -0,0 +1,159 @@
+/* Copyright 2024 The OpenXLA Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "xla/stream_executor/sycl/sycl_platform.h"
+
+#include <algorithm>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "absl/base/call_once.h"
+#include "absl/log/check.h"
+#include "absl/log/log.h"
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/str_format.h"
+#include "xla/stream_executor/sycl/sycl_platform_id.h"
+#include "xla/stream_executor/device_description.h"
+#include "xla/stream_executor/gpu/gpu_driver.h"
+#include "xla/stream_executor/gpu/gpu_executor.h"
+#include "xla/stream_executor/platform.h"
+#include "xla/stream_executor/platform/initialize.h"
+#include "xla/stream_executor/platform_manager.h"
+#include "tsl/platform/status.h"
+
+namespace stream_executor {
+namespace gpu {
+
+SyclPlatform::SyclPlatform()
+    : name_("SYCL"), min_numa_node_(0), limit_numa_node_(0) {}
+
+SyclPlatform::~SyclPlatform() {}
+
+// Due to legacy issues in user code, we can't currently call InspectNumaNodes
+// at module initialization time, because non-GPU programs still include this
+// plugin via various methods, so instead, it has to be init-on-reference.
+void SyclPlatform::InspectNumaNodes() {
+  // To get NUMA node information, we need to create all executors, so we can
+  // examine their device descriptions to see their bus assignments.
+  static absl::once_flag once;
+  absl::call_once(once, [&] {
+    for (int i = 0; i < VisibleDeviceCount(); i++) {
+      StreamExecutor* exec = *ExecutorForDevice(i);
+      if (i == 0) {
+        // NUMA nodes may not start at 0, so set the minimum node  based on the
+        // first executor we see.
+        min_numa_node_ = exec->GetDeviceDescription().numa_node();
+        limit_numa_node_ = min_numa_node_ + 1;
+      } else {
+        min_numa_node_ =
+            std::min(min_numa_node_, exec->GetDeviceDescription().numa_node());
+        limit_numa_node_ = std::max(
+            limit_numa_node_, exec->GetDeviceDescription().numa_node() + 1);
+      }
+    }
+  });
+}
+
+int SyclPlatform::BusCount() {
+  InspectNumaNodes();
+  return limit_numa_node_ - min_numa_node_;
+}
+
+int SyclPlatform::DeviceToBus(int device_ordinal) {
+  StreamExecutor* exec = *ExecutorForDevice(device_ordinal);
+  return exec->GetDeviceDescription().numa_node() - min_numa_node_;
+}
+
+absl::StatusOr<StreamExecutor*> SyclPlatform::FirstExecutorForBus(
+    int bus_ordinal) {
+  InspectNumaNodes();
+  CHECK_LT(bus_ordinal, BusCount()) << "bus ordinal out of available range";
+  for (int i = 0; i < VisibleDeviceCount(); i++) {
+    if (DeviceToBus(i) == bus_ordinal) {
+      return *ExecutorForDevice(i);
+    }
+  }
+
+  return absl::NotFoundError(
+      absl::StrFormat("Executor for bus %d not found.", bus_ordinal));
+}
+
+Platform::Id SyclPlatform::id() const { return sycl::kSyclPlatformId; }
+
+int SyclPlatform::VisibleDeviceCount() const {
+  // Initialized in a thread-safe manner the first time this is run.
+  static const int num_devices = [] {
+    if (!GpuDriver::Init().ok()) return -1;
+    return GpuDriver::GetDeviceCount();
+  }();
+  return num_devices;
+}
+
+const std::string& SyclPlatform::Name() const { return name_; }
+
+absl::StatusOr<std::unique_ptr<DeviceDescription>>
+SyclPlatform::DescriptionForDevice(int ordinal) const {
+  return GpuExecutor::CreateDeviceDescription(ordinal);
+}
+
+absl::StatusOr<StreamExecutor*> SyclPlatform::ExecutorForDevice(int ordinal) {
+  StreamExecutorConfig config;
+  config.ordinal = ordinal;
+  return GetExecutor(config);
+}
+
+absl::StatusOr<StreamExecutor*> SyclPlatform::GetExecutor(
+    const StreamExecutorConfig& config) {
+  if (config.gpu_stream) {
+    // If the GPU stream was provided, it's not possible to get-or-create a
+    // stream with a required pointer: so we are looking for previously
+    // allocated streams.
+    return executor_cache_.Get(config);
+  }
+  return executor_cache_.GetOrCreate(
+      config, [&]() { return GetUncachedExecutor(config); });
+}
+
+absl::StatusOr<std::unique_ptr<StreamExecutor>>
+SyclPlatform::GetUncachedExecutor(const StreamExecutorConfig& config) {
+  auto executor = std::make_unique<GpuExecutor>(this, config.ordinal);
+  auto init_status = executor->Init();
+  if (!init_status.ok()) {
+    return absl::InternalError(absl::StrFormat(
+        "failed initializing StreamExecutor for SYCL device ordinal %d: %s",
+        config.ordinal, init_status.ToString()));
+  }
+
+  return std::move(executor);
+}
+
+}  // namespace gpu
+
+static void InitializeSyclPlatform() {
+  // Disabling leak checking, PlatformManager does not destroy its
+  // registered platforms.
+
+  std::unique_ptr<gpu::SyclPlatform> platform(new gpu::SyclPlatform);
+  TF_CHECK_OK(PlatformManager::RegisterPlatform(std::move(platform)));
+}
+
+}  // namespace stream_executor
+
+STREAM_EXECUTOR_REGISTER_MODULE_INITIALIZER(
+    sycl_platform, stream_executor::InitializeSyclPlatform());