From 9738646490fb56193c8a03e86d32f6d20090f587 Mon Sep 17 00:00:00 2001 From: "Ma, Liangliang" Date: Fri, 13 Sep 2024 02:06:46 -0700 Subject: [PATCH 1/3] fix xpu zero-infinity --- csrc/xpu/aio/deepspeed_cpu_op.cpp | 60 +++++++++++++++++++++++++++++++ op_builder/xpu/async_io.py | 5 +-- 2 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 csrc/xpu/aio/deepspeed_cpu_op.cpp diff --git a/csrc/xpu/aio/deepspeed_cpu_op.cpp b/csrc/xpu/aio/deepspeed_cpu_op.cpp new file mode 100644 index 000000000000..af4a0f47b5c8 --- /dev/null +++ b/csrc/xpu/aio/deepspeed_cpu_op.cpp @@ -0,0 +1,60 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 + +// DeepSpeed Team + +#include "deepspeed_cpu_op.h" + +using namespace std; + +cpu_op_desc_t::cpu_op_desc_t(const bool read_op, + const torch::Tensor& buffer, + const int fd, + const char* filename, + const long long int file_num_bytes, + const int num_threads, + const bool validate) + : io_op_desc_t(read_op, buffer, fd, filename, file_num_bytes, num_threads, validate), + _cpu_buffer(buffer) +{ + // XPU don't handle buffer here. See XPU Accelerator pin_memory. + _contiguous_buffer = _cpu_buffer.contiguous(); +} + +char* cpu_op_desc_t::data_ptr() const { return (char*)_contiguous_buffer.data_ptr(); } + +void cpu_op_desc_t::finish() +{ + if (_read_op) { + if (_buffer.is_cuda()) { _buffer.copy_(_cpu_buffer.to(torch::kCUDA)); } + if (_buffer.is_xpu()) { _buffer.copy_(_cpu_buffer.to(torch::kXPU)); } +#if defined(__ENABLE_CANN__) + if (torch_npu::utils::is_npu(_buffer)) { + auto device = at::Device("npu:0"); + _buffer.copy_(_cpu_buffer.to(device)); + } +#endif + } +} + +void cpu_op_desc_t::validate() +{ + validate_aio_operation(_read_op, _filename.c_str(), data_ptr(), _file_num_bytes); +} + +void cpu_op_desc_t::run(const int tid, + std::unique_ptr& aio_ctxt, + deepspeed_aio_config_t* aio_config) +{ + assert(tid < _num_threads); + const auto base_offset = _num_bytes_per_thread * tid; + + std::unique_ptr xfer_ctxt( + new io_xfer_ctxt(_fd, base_offset, _num_bytes_per_thread, data_ptr())); + + if (aio_config->_overlap_events) { + do_aio_operation_overlap(_read_op, aio_ctxt, xfer_ctxt, aio_config, nullptr); + } else { + do_aio_operation_sequential(_read_op, aio_ctxt, xfer_ctxt, aio_config, nullptr); + } +} \ No newline at end of file diff --git a/op_builder/xpu/async_io.py b/op_builder/xpu/async_io.py index 7ed527e016fa..3872bf3cc8e0 100644 --- a/op_builder/xpu/async_io.py +++ b/op_builder/xpu/async_io.py @@ -25,7 +25,8 @@ def sources(self): 'csrc/aio/py_lib/deepspeed_py_aio.cpp', 'csrc/aio/py_lib/deepspeed_py_aio_handle.cpp', 'csrc/aio/py_lib/deepspeed_aio_thread.cpp', 'csrc/aio/common/deepspeed_aio_utils.cpp', 'csrc/aio/common/deepspeed_aio_common.cpp', 'csrc/aio/common/deepspeed_aio_types.cpp', - 'csrc/aio/py_lib/deepspeed_pin_tensor.cpp' + 'csrc/aio/py_lib/deepspeed_pin_tensor.cpp', 'csrc/aio/py_lib/deepspeed_py_io_handle.cpp', + 'csrc/xpu/aio/deepspeed_cpu_op.cpp', 'csrc/aio/py_lib/deepspeed_aio_op_desc.cpp', ] def include_paths(self): @@ -96,4 +97,4 @@ def is_compatible(self, verbose=False): self.warning( "If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found." ) - return super().is_compatible(verbose) and aio_compatible + return super().is_compatible(verbose) and aio_compatible \ No newline at end of file From 756f7c2f3b42ec25010b6c99b2bc874d731e5d7a Mon Sep 17 00:00:00 2001 From: "Ma, Liangliang" Date: Fri, 13 Sep 2024 02:09:44 -0700 Subject: [PATCH 2/3] fix format --- csrc/xpu/aio/deepspeed_cpu_op.cpp | 2 +- op_builder/xpu/async_io.py | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/csrc/xpu/aio/deepspeed_cpu_op.cpp b/csrc/xpu/aio/deepspeed_cpu_op.cpp index af4a0f47b5c8..e3010421f4ff 100644 --- a/csrc/xpu/aio/deepspeed_cpu_op.cpp +++ b/csrc/xpu/aio/deepspeed_cpu_op.cpp @@ -57,4 +57,4 @@ void cpu_op_desc_t::run(const int tid, } else { do_aio_operation_sequential(_read_op, aio_ctxt, xfer_ctxt, aio_config, nullptr); } -} \ No newline at end of file +} diff --git a/op_builder/xpu/async_io.py b/op_builder/xpu/async_io.py index 3872bf3cc8e0..6a6798eaeb9c 100644 --- a/op_builder/xpu/async_io.py +++ b/op_builder/xpu/async_io.py @@ -21,12 +21,18 @@ def absolute_name(self): def sources(self): return [ - 'csrc/aio/py_lib/deepspeed_py_copy.cpp', 'csrc/aio/py_lib/py_ds_aio.cpp', - 'csrc/aio/py_lib/deepspeed_py_aio.cpp', 'csrc/aio/py_lib/deepspeed_py_aio_handle.cpp', - 'csrc/aio/py_lib/deepspeed_aio_thread.cpp', 'csrc/aio/common/deepspeed_aio_utils.cpp', - 'csrc/aio/common/deepspeed_aio_common.cpp', 'csrc/aio/common/deepspeed_aio_types.cpp', - 'csrc/aio/py_lib/deepspeed_pin_tensor.cpp', 'csrc/aio/py_lib/deepspeed_py_io_handle.cpp', - 'csrc/xpu/aio/deepspeed_cpu_op.cpp', 'csrc/aio/py_lib/deepspeed_aio_op_desc.cpp', + 'csrc/aio/py_lib/deepspeed_py_copy.cpp', + 'csrc/aio/py_lib/py_ds_aio.cpp', + 'csrc/aio/py_lib/deepspeed_py_aio.cpp', + 'csrc/aio/py_lib/deepspeed_py_aio_handle.cpp', + 'csrc/aio/py_lib/deepspeed_aio_thread.cpp', + 'csrc/aio/common/deepspeed_aio_utils.cpp', + 'csrc/aio/common/deepspeed_aio_common.cpp', + 'csrc/aio/common/deepspeed_aio_types.cpp', + 'csrc/aio/py_lib/deepspeed_pin_tensor.cpp', + 'csrc/aio/py_lib/deepspeed_py_io_handle.cpp', + 'csrc/xpu/aio/deepspeed_cpu_op.cpp', + 'csrc/aio/py_lib/deepspeed_aio_op_desc.cpp', ] def include_paths(self): @@ -97,4 +103,4 @@ def is_compatible(self, verbose=False): self.warning( "If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found." ) - return super().is_compatible(verbose) and aio_compatible \ No newline at end of file + return super().is_compatible(verbose) and aio_compatible From cabbbd06d111fff53238441c2da23543fe80d75f Mon Sep 17 00:00:00 2001 From: "Ma, Liangliang" Date: Fri, 13 Sep 2024 19:15:46 -0700 Subject: [PATCH 3/3] remove other accelerator code --- csrc/xpu/aio/deepspeed_cpu_op.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/csrc/xpu/aio/deepspeed_cpu_op.cpp b/csrc/xpu/aio/deepspeed_cpu_op.cpp index e3010421f4ff..ee98c2d5cac2 100644 --- a/csrc/xpu/aio/deepspeed_cpu_op.cpp +++ b/csrc/xpu/aio/deepspeed_cpu_op.cpp @@ -25,16 +25,7 @@ char* cpu_op_desc_t::data_ptr() const { return (char*)_contiguous_buffer.data_pt void cpu_op_desc_t::finish() { - if (_read_op) { - if (_buffer.is_cuda()) { _buffer.copy_(_cpu_buffer.to(torch::kCUDA)); } - if (_buffer.is_xpu()) { _buffer.copy_(_cpu_buffer.to(torch::kXPU)); } -#if defined(__ENABLE_CANN__) - if (torch_npu::utils::is_npu(_buffer)) { - auto device = at::Device("npu:0"); - _buffer.copy_(_cpu_buffer.to(device)); - } -#endif - } + if (_read_op && _buffer.is_xpu()) { _buffer.copy_(_cpu_buffer.to(torch::kXPU)); } } void cpu_op_desc_t::validate()