Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ option(onnxruntime_ENABLE_PYTHON "Enable python buildings" OFF)
option(onnxruntime_ENABLE_MEMLEAK_CHECKER "Experimental: Enable memory leak checker in Windows debug build" OFF)
option(onnxruntime_USE_CUDA "Build with CUDA support" OFF)
option(onnxruntime_USE_OPENVINO "Build with OpenVINO support" OFF)
option(onnxruntime_USE_NSYNC "Build with NSYNC support. This option only takes effect on Linux" OFF)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is nsync going to be ON by default? If yes, do we still need #ifdef USE_NSYNC?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NO. We can remove the ifdef

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR sent: #3052

option(onnxruntime_USE_EIGEN_FOR_BLAS "Use eign for blas" ON)
option(onnxruntime_USE_NNAPI "Build with DNNLibrary for Android NNAPI support" OFF)
option(onnxruntime_USE_DNNL "Build with DNNL support" OFF)
Expand Down Expand Up @@ -299,10 +298,9 @@ if(onnxruntime_BUILD_BENCHMARKS)
endif()
endif()

if(onnxruntime_USE_NSYNC)
if(NOT WIN32)
add_subdirectory(${PROJECT_SOURCE_DIR}/external/nsync EXCLUDE_FROM_ALL)
endif()

# External dependencies
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/external)

Expand Down Expand Up @@ -817,9 +815,7 @@ if(WIN32)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES Shlwapi)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES debug Dbghelp)
else()
if(onnxruntime_USE_NSYNC)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES nsync_cpp)
endif()
list(APPEND onnxruntime_EXTERNAL_LIBRARIES nsync_cpp)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${CMAKE_DL_LIBS} Threads::Threads)
endif()

Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ endif()
onnxruntime_add_include_to_target(onnxruntime_common date_interface)
target_include_directories(onnxruntime_common PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT}
PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/external/nsync/public")
if(onnxruntime_USE_NSYNC)
if(NOT WIN32)
target_compile_definitions(onnxruntime_common PUBLIC USE_NSYNC NSYNC_ATOMIC_CPP11)
endif()

Expand Down
6 changes: 3 additions & 3 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ endif()
if (onnxruntime_BUILD_SHARED_LIB)
set(onnxruntime_perf_test_libs onnxruntime_test_utils onnx_test_runner_common onnxruntime_common re2::re2
onnx_test_data_proto onnx_proto ${PROTOBUF_LIB} ${GETOPT_LIB_WIDE} onnxruntime ${SYS_PATH_LIB} ${CMAKE_DL_LIBS})
if(onnxruntime_USE_NSYNC)
if(NOT WIN32)
list(APPEND onnxruntime_perf_test_libs nsync_cpp)
endif()
target_link_libraries(onnxruntime_perf_test PRIVATE ${onnxruntime_perf_test_libs} Threads::Threads)
Expand Down Expand Up @@ -641,7 +641,7 @@ if (onnxruntime_BUILD_SHARED_LIB)
# test inference using shared lib
set(onnxruntime_shared_lib_test_LIBS onnxruntime_mocked_allocator onnxruntime_test_utils onnxruntime_common onnx_proto)

if(onnxruntime_USE_NSYNC)
if(NOT WIN32)
list(APPEND onnxruntime_shared_lib_test_LIBS nsync_cpp)
endif()
AddTest(DYN
Expand Down Expand Up @@ -669,7 +669,7 @@ if(MSVC)
endif()
target_include_directories(onnxruntime_mlas_test PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${ONNXRUNTIME_ROOT})
set(onnxruntime_mlas_test_libs onnxruntime_mlas onnxruntime_common)
if(onnxruntime_USE_NSYNC)
if(NOT WIN32)
list(APPEND onnxruntime_mlas_test_libs nsync_cpp)
endif()
list(APPEND onnxruntime_mlas_test_libs Threads::Threads)
Expand Down
3 changes: 1 addition & 2 deletions include/onnxruntime/core/graph/schema_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
#include "core/graph/constants.h"
#include "core/common/common.h"
#include "core/common/status.h"
#include "core/platform/ort_mutex.h"

#include "core/graph/onnx_protobuf.h"
#include "core/platform/ort_mutex.h"
#include <mutex>
#include <deque>
#include "sstream"
Expand Down
118 changes: 104 additions & 14 deletions include/onnxruntime/core/platform/ort_mutex.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,103 @@

#pragma once
#ifdef _WIN32
#include <Windows.h>
#include <mutex>
#include <condition_variable>
namespace onnxruntime {
using OrtMutex = std::mutex;
using OrtCondVar = std::condition_variable;
// Q: Why OrtMutex is better than std::mutex
// A: OrtMutex supports static initialization but std::mutex doesn't. Static initialization helps us prevent the "static
// initialization order problem".

// Q: Why std::mutex can't make it?
// A: VC runtime has to support Windows XP at ABI level. But we don't have such requirement.

// Q: Is OrtMutex faster than std::mutex?
// A: Sure

class OrtMutex {
private:
SRWLOCK data_ = SRWLOCK_INIT;

public:
constexpr OrtMutex() = default;
// SRW locks do not need to be explicitly destroyed.
~OrtMutex() = default;
OrtMutex(const OrtMutex&) = delete;
OrtMutex& operator=(const OrtMutex&) = delete;
void lock() { AcquireSRWLockExclusive(native_handle()); }
bool try_lock() noexcept { return TryAcquireSRWLockExclusive(native_handle()) == TRUE; }
void unlock() noexcept { ReleaseSRWLockExclusive(native_handle()); }
using native_handle_type = SRWLOCK*;

__forceinline native_handle_type native_handle() { return &data_; }
};

class OrtCondVar {
CONDITION_VARIABLE native_cv_object = CONDITION_VARIABLE_INIT;

public:
constexpr OrtCondVar() noexcept = default;
~OrtCondVar() = default;

OrtCondVar(const OrtCondVar&) = delete;
OrtCondVar& operator=(const OrtCondVar&) = delete;

void notify_one() noexcept { WakeConditionVariable(&native_cv_object); }
void notify_all() noexcept { WakeAllConditionVariable(&native_cv_object); }

void wait(std::unique_lock<OrtMutex>& lk) {
if (SleepConditionVariableSRW(&native_cv_object, lk.mutex()->native_handle(), INFINITE, 0) != TRUE) {
std::terminate();
}
}
template <class _Predicate>
void wait(std::unique_lock<OrtMutex>& __lk, _Predicate __pred);

/**
* returns cv_status::timeout if the wait terminates when Rel_time has elapsed. Otherwise, the method returns
* cv_status::no_timeout.
* @param cond_mutex A unique_lock<OrtMutex> object.
* @param rel_time A chrono::duration object that specifies the amount of time before the thread wakes up.
* @return returns cv_status::timeout if the wait terminates when Rel_time has elapsed. Otherwise, the method returns
* cv_status::no_timeout
*/
template <class Rep, class Period>
std::cv_status wait_for(std::unique_lock<OrtMutex>& cond_mutex, const std::chrono::duration<Rep, Period>& rel_time);
using native_handle_type = CONDITION_VARIABLE*;

native_handle_type native_handle() { return &native_cv_object; }

private:
void timed_wait_impl(std::unique_lock<OrtMutex>& __lk,
std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>);
};

template <class _Predicate>
void OrtCondVar::wait(std::unique_lock<OrtMutex>& __lk, _Predicate __pred) {
while (!__pred()) wait(__lk);
}

template <class Rep, class Period>
std::cv_status OrtCondVar::wait_for(std::unique_lock<OrtMutex>& cond_mutex,
const std::chrono::duration<Rep, Period>& rel_time) {
// TODO: is it possible to use nsync_from_time_point_ ?
using namespace std::chrono;
if (rel_time <= duration<Rep, Period>::zero())
return std::cv_status::timeout;
using SystemTimePointFloat = time_point<system_clock, duration<long double, std::nano> >;
using SystemTimePoint = time_point<system_clock, nanoseconds>;
SystemTimePointFloat max_time = SystemTimePoint::max();
steady_clock::time_point steady_now = steady_clock::now();
system_clock::time_point system_now = system_clock::now();
if (max_time - rel_time > system_now) {
nanoseconds remain = duration_cast<nanoseconds>(rel_time);
if (remain < rel_time)
++remain;
timed_wait_impl(cond_mutex, system_now + remain);
} else
timed_wait_impl(cond_mutex, SystemTimePoint::max());
return steady_clock::now() - steady_now < rel_time ? std::cv_status::no_timeout : std::cv_status::timeout;
}
} // namespace onnxruntime
#else
#ifdef USE_NSYNC
Expand Down Expand Up @@ -79,15 +171,15 @@ class OrtCondVar {
void wait(std::unique_lock<OrtMutex>& __lk, _Predicate __pred);

/**
* returns cv_status::timeout if the wait terminates when Rel_time has elapsed. Otherwise, the method returns cv_status::no_timeout.
* returns cv_status::timeout if the wait terminates when Rel_time has elapsed. Otherwise, the method returns
* cv_status::no_timeout.
* @param cond_mutex A unique_lock<OrtMutex> object.
* @param rel_time A chrono::duration object that specifies the amount of time before the thread wakes up.
* @return returns cv_status::timeout if the wait terminates when Rel_time has elapsed. Otherwise, the method returns cv_status::no_timeout
* @return returns cv_status::timeout if the wait terminates when Rel_time has elapsed. Otherwise, the method returns
* cv_status::no_timeout
*/
template <class Rep, class Period>
std::cv_status
wait_for(std::unique_lock<OrtMutex>& cond_mutex,
const std::chrono::duration<Rep, Period>& rel_time);
std::cv_status wait_for(std::unique_lock<OrtMutex>& cond_mutex, const std::chrono::duration<Rep, Period>& rel_time);
#ifdef USE_NSYNC
using native_handle_type = nsync::nsync_cv*;
#else
Expand All @@ -103,15 +195,13 @@ class OrtCondVar {

template <class _Predicate>
void OrtCondVar::wait(std::unique_lock<OrtMutex>& __lk, _Predicate __pred) {
while (!__pred())
wait(__lk);
while (!__pred()) wait(__lk);
}

template <class Rep, class Period>
std::cv_status
OrtCondVar::wait_for(std::unique_lock<OrtMutex>& cond_mutex,
const std::chrono::duration<Rep, Period>& rel_time) {
//TODO: is it possible to use nsync_from_time_point_ ?
std::cv_status OrtCondVar::wait_for(std::unique_lock<OrtMutex>& cond_mutex,
const std::chrono::duration<Rep, Period>& rel_time) {
// TODO: is it possible to use nsync_from_time_point_ ?
using namespace std::chrono;
if (rel_time <= duration<Rep, Period>::zero())
return std::cv_status::timeout;
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/framework/kernel_registry_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
#include <list>
#include <unordered_map>
#include "core/common/status.h"
#include "core/platform/ort_mutex.h"
#include "core/graph/graph_viewer.h"
#include "core/framework/customregistry.h"
#include "core/platform/ort_mutex.h"

namespace onnxruntime {
struct KernelCreateInfo;
Expand Down
3 changes: 1 addition & 2 deletions onnxruntime/core/framework/parallel_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,15 @@
#pragma once

#include <vector>
#include <condition_variable>
#include "core/common/common.h"
#include "core/common/status.h"
#include "core/common/logging/logging.h"
#include "core/platform/ort_mutex.h"
#include "core/framework/iexecutor.h"
#include "core/framework/framework_common.h"
#include "core/framework/ml_value.h"
#include "core/framework/session_state.h"
#include "core/graph/graph_viewer.h"
#include "core/platform/ort_mutex.h"

namespace onnxruntime {

Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/framework/session_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
#include <unordered_map>
#include <vector>
#include "gsl/gsl"

#include "core/platform/ort_mutex.h"
#include "core/graph/onnx_protobuf.h"
#include "core/common/common.h"
#include "core/common/logging/logging.h"
#include "core/common/profiler.h"
Expand All @@ -26,6 +25,7 @@
#include "core/graph/graph_viewer.h"
#include "core/framework/fuse_nodes_funcs.h"
#include "core/platform/threadpool.h"
#include "core/platform/ort_mutex.h"

namespace onnxruntime {

Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/framework/utils.cc
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "core/graph/onnx_protobuf.h"
#include "core/framework/utils.h"

#include <iomanip>


#include "core/graph/graph_viewer.h"
#include "core/framework/data_transfer_manager.h"
#include "core/framework/execution_frame.h"
Expand All @@ -18,7 +19,6 @@
#include "core/framework/sequential_executor.h"
#include "core/framework/tensorprotoutils.h"
#include "core/mlas/inc/mlas.h"
#include "core/graph/onnx_protobuf.h"

namespace ONNX_NAMESPACE {
std::ostream& operator<<(std::ostream& out, const TensorShapeProto& shape_proto) {
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/cuda/cuda_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "core/providers/cuda/cuda_provider_factory.h"
#include <atomic>
#include "core/graph/onnx_protobuf.h"
#include "cuda_execution_provider.h"
#include "core/session/abi_session_options_impl.h"

Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/dnnl/dnnl_execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
#include <list>
#include <memory.h>

#include "core/platform/ort_mutex.h"
#include "core/graph/constants.h"
#include "core/framework/allocatormgr.h"
#include "core/framework/execution_provider.h"
#include "core/providers/dnnl/subgraph/subgraph.h"
#include "core/platform/ort_mutex.h"

namespace dnnl {
struct memory;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "core/graph/onnx_protobuf.h"

#include "tensorrt_execution_provider.h"
#include "core/providers/cuda/cuda_allocator.h"
#include "core/providers/cuda/math/unary_elementwise_ops_impl.h"
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/session/abi_session_options.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "core/graph/onnx_protobuf.h"
#include "core/session/onnxruntime_c_api.h"
#include "core/session/ort_apis.h"
#include "core/framework/error_code_helper.h"
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/session/custom_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#ifdef _WIN32
#pragma warning(disable : 4267)
#endif

#include "core/graph/onnx_protobuf.h"
#include "core/session/inference_session.h"
#include "core/session/ort_apis.h"
#include "core/framework/customregistry.h"
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "core/graph/onnx_protobuf.h"
#include "core/session/inference_session.h"

#include <memory>
Expand All @@ -12,7 +13,6 @@

#include "core/common/logging/logging.h"
#include "core/platform/notification.h"
#include "core/platform/ort_mutex.h"
#include "core/platform/threadpool.h"
#include "core/graph/graph_viewer.h"
#include "core/graph/graph_utils.h"
Expand Down Expand Up @@ -53,6 +53,7 @@
#include "core/optimizer/graph_transformer_utils.h"
#include "core/util/thread_utils.h"
#include "core/session/inference_session_utils.h"
#include "core/platform/ort_mutex.h"

using namespace ONNX_NAMESPACE;

Expand Down
8 changes: 4 additions & 4 deletions onnxruntime/core/session/inference_session_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Licensed under the MIT License.

#pragma once

#include "core/graph/onnx_protobuf.h"
#include "core/session/inference_session.h"
#include "core/framework/session_options.h"
#include "core/common/common.h"
Expand All @@ -14,9 +14,9 @@ namespace onnxruntime {

namespace inference_session_utils {

static const std::string kOrtConfigKey = "ort_config";
static const std::string kSessionOptionsKey = "session_options";
static const std::string kOrtLoadConfigFromModelEnvVar = "ORT_LOAD_CONFIG_FROM_MODEL";
static constexpr const char* kOrtConfigKey = "ort_config";
static constexpr const char* kSessionOptionsKey = "session_options";
static constexpr const char* kOrtLoadConfigFromModelEnvVar = "ORT_LOAD_CONFIG_FROM_MODEL";

} // namespace inference_session_utils

Expand Down
1 change: 1 addition & 0 deletions onnxruntime/test/framework/cuda/fence_cuda_test.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "core/graph/onnx_protobuf.h"

#include "core/session/inference_session.h"

Expand Down
Loading