Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in
Original file line number Diff line number Diff line change
Expand Up @@ -490,10 +490,8 @@ cdef bint __cuPythonInit = False
ctypedef CUresult (*__cuGetProcAddress_v2_T)(const char*, void**, int, cuuint64_t, CUdriverProcAddressQueryResult*) except?CUDA_ERROR_NOT_FOUND nogil
cdef __cuGetProcAddress_v2_T _F_cuGetProcAddress_v2 = NULL

cdef int cuPythonInit() except -1 nogil:
cdef int _cuPythonInit() except -1 nogil:
global __cuPythonInit
if __cuPythonInit:
return 0

cdef bint usePTDS
cdef char libPath[260]
Expand Down Expand Up @@ -8883,6 +8881,14 @@ cdef int cuPythonInit() except -1 nogil:
__cuPythonInit = True
return 0

# Create a very small function to check whether we are init'ed, so the C
# compiler can inline it.
cdef inline int cuPythonInit() except -1 nogil:
if __cuPythonInit:
return 0

return _cuPythonInit()

{{if 'cuGetErrorString' in found_functions}}

cdef CUresult _cuGetErrorString(CUresult error, const char** pStr) except ?CUDA_ERROR_NOT_FOUND nogil:
Expand Down
12 changes: 9 additions & 3 deletions cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,8 @@ cdef bint __cuPythonInit = False
{{if 'nvrtcGetPCHHeapSizeRequired' in found_functions}}cdef void *__nvrtcGetPCHHeapSizeRequired = NULL{{endif}}
{{if 'nvrtcSetFlowCallback' in found_functions}}cdef void *__nvrtcSetFlowCallback = NULL{{endif}}

cdef int cuPythonInit() except -1 nogil:
cdef int _cuPythonInit() except -1 nogil:
global __cuPythonInit
if __cuPythonInit:
return 0

with gil, __symbol_lock:
{{if 'Windows' == platform.system()}}
Expand Down Expand Up @@ -324,6 +322,14 @@ cdef int cuPythonInit() except -1 nogil:
__cuPythonInit = True
return 0

# Create a very small function to check whether we are init'ed, so the C
# compiler can inline it.
cdef inline int cuPythonInit() except -1 nogil:
if __cuPythonInit:
return 0

return _cuPythonInit()

{{if 'nvrtcGetErrorString' in found_functions}}

cdef const char* _nvrtcGetErrorString(nvrtcResult result) except ?NULL nogil:
Expand Down
12 changes: 9 additions & 3 deletions cuda_bindings/cuda/bindings/_bindings/cyruntime.pyx.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,22 @@ cimport cython

cdef bint __cudaPythonInit = False
cdef bint __usePTDS = False
cdef int cudaPythonInit() except -1 nogil:
cdef int _cudaPythonInit() except -1 nogil:
global __cudaPythonInit
global __usePTDS
if __cudaPythonInit:
return __usePTDS
with gil:
__usePTDS = os.getenv('CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM', default=False)
__cudaPythonInit = True
return __usePTDS

# Create a very small function to check whether we are init'ed, so the C
# compiler can inline it.
cdef inline int cudaPythonInit() except -1 nogil:
if __cudaPythonInit:
return __usePTDS

return _cudaPythonInit()

{{if 'cudaDeviceReset' in found_functions}}

cdef cudaError_t _cudaDeviceReset() except ?cudaErrorCallRequiresNewerDriver nogil:
Expand Down
2 changes: 2 additions & 0 deletions cuda_bindings/docs/source/release/13.X.Y-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ Highlights

* Automatic CUDA library path detection based on ``CUDA_HOME``, eliminating the need to manually set ``LIBRARY_PATH`` environment variables for installation.

* The Python overhead of calling functions in CUDA bindings in `driver`, `runtime` and `nvrtc` has been reduced by approximately 30%.


Known issues
------------
Expand Down
Loading