From 81692444ecbca6455adf6443adb136533f99f782 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 8 Apr 2026 17:38:51 +0000 Subject: [PATCH 1/6] Strip debug symbols from cuda-core Linux wheels Add -Wl,--strip-all to extra_link_args for wheel builds on Linux, matching the existing behavior in cuda_bindings/build_hooks.py. Without stripping, the 0.7.0 Linux wheel is ~30 MB (103 MB extracted) because every .so ships with debug_info. After stripping, extracted size drops from 103 MB to ~11 MB, bringing the wheel in line with the ~4-5 MB Windows wheels. Closes #1881 Co-Authored-By: Claude Opus 4.6 (1M context) --- cuda_core/build_hooks.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cuda_core/build_hooks.py b/cuda_core/build_hooks.py index b368b02759..48e58a178e 100644 --- a/cuda_core/build_hooks.py +++ b/cuda_core/build_hooks.py @@ -91,7 +91,7 @@ def _determine_cuda_major_version() -> str: _extensions = None -def _build_cuda_core(): +def _build_cuda_core(strip=False): # Customizing the build hooks is needed because we must defer cythonization until cuda-bindings, # now a required build-time dependency that's dynamically installed via the other hook below, # is installed. Otherwise, cimport any cuda.bindings modules would fail! @@ -136,6 +136,9 @@ def get_sources(mod_name): all_include_dirs = [os.path.join(_get_cuda_path(), "include")] extra_compile_args = [] + extra_link_args = [] + if strip and sys.platform == "linux": + extra_link_args += ["-Wl,--strip-all"] if COMPILE_FOR_COVERAGE: # CYTHON_TRACE_NOGIL indicates to trace nogil functions. It is not # related to free-threading builds. @@ -152,6 +155,7 @@ def get_sources(mod_name): + all_include_dirs, language="c++", extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, ) for mod in module_names() ) @@ -254,7 +258,7 @@ def _add_cython_include_paths_to_pth(wheel_path: str) -> None: def build_editable(wheel_directory, config_settings=None, metadata_directory=None): - _build_cuda_core() + _build_cuda_core(strip=False) wheel_name = _build_meta.build_editable(wheel_directory, config_settings, metadata_directory) # Patch the .pth file to add Cython include paths @@ -265,7 +269,7 @@ def build_editable(wheel_directory, config_settings=None, metadata_directory=Non def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): - _build_cuda_core() + _build_cuda_core(strip=True) return _build_meta.build_wheel(wheel_directory, config_settings, metadata_directory) From cce061559db75b8cd3684b70f97ecbae4172b513 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 10 Apr 2026 09:01:39 -0400 Subject: [PATCH 2/6] Fix debug builds of cuda_bindings and cuda_core --- cuda_bindings/README.md | 9 +++++++++ cuda_bindings/build_hooks.py | 12 ++++++------ cuda_core/README.md | 9 +++++++++ cuda_core/build_hooks.py | 19 ++++++++++++++----- 4 files changed, 38 insertions(+), 11 deletions(-) diff --git a/cuda_bindings/README.md b/cuda_bindings/README.md index b79b0febff..0c0282afdd 100644 --- a/cuda_bindings/README.md +++ b/cuda_bindings/README.md @@ -10,6 +10,15 @@ Please refer to the [Installation page](https://nvidia.github.io/cuda-python/cud This subpackage adheres to the developing practices described in the parent metapackage [CONTRIBUTING.md](https://github.com/NVIDIA/cuda-python/blob/main/CONTRIBUTING.md). +## Debugging + +Editable installs have debuggable binaries by default. To build a non-editable +debug build, pass the `debug=True` configuration option to `pip` or `uv`: + +``` +pip install -v ./cuda_bindings -C="debug=True" +``` + ## Testing Testing dependencies can be installed using the `[test]` optional dependency identifier. For example, `pip install -v -e .[test]`. diff --git a/cuda_bindings/build_hooks.py b/cuda_bindings/build_hooks.py index a48aa0f0e9..29a1546e96 100644 --- a/cuda_bindings/build_hooks.py +++ b/cuda_bindings/build_hooks.py @@ -283,7 +283,7 @@ def _prep_extensions(sources, libraries, include_dirs, library_dirs, extra_compi # Main build function -def _build_cuda_bindings(strip=False): +def _build_cuda_bindings(debug=False): """Build all cuda-bindings extensions. All CUDA-dependent logic (header parsing, code generation, cythonization) @@ -362,14 +362,13 @@ def _build_cuda_bindings(strip=False): "-Wno-deprecated-declarations", "-fno-var-tracking-assignments", ] - if "--debug" in sys.argv: + if debug: extra_cythonize_kwargs["gdb_debug"] = True extra_compile_args += ["-g", "-O0"] extra_compile_args += ["-D _GLIBCXX_ASSERTIONS"] else: extra_compile_args += ["-O3"] - if strip and sys.platform == "linux": - extra_link_args += ["-Wl,--strip-all"] + extra_link_args += ["-Wl,--strip-all"] if compile_for_coverage: # CYTHON_TRACE_NOGIL indicates to trace nogil functions. It is not # related to free-threading builds. @@ -429,10 +428,11 @@ def _cleanup_dst_files(): def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): - _build_cuda_bindings(strip=True) + debug = config_settings.get("debug", False) if config_settings else False + _build_cuda_bindings(debug=debug) return _build_meta.build_wheel(wheel_directory, config_settings, metadata_directory) def build_editable(wheel_directory, config_settings=None, metadata_directory=None): - _build_cuda_bindings(strip=False) + _build_cuda_bindings(debug=True) return _build_meta.build_editable(wheel_directory, config_settings, metadata_directory) diff --git a/cuda_core/README.md b/cuda_core/README.md index 7ea4196601..a0eb7d3c25 100644 --- a/cuda_core/README.md +++ b/cuda_core/README.md @@ -10,6 +10,15 @@ Please refer to the [Installation page](https://nvidia.github.io/cuda-python/cud This subpackage adheres to the developing practices described in the parent metapackage [CONTRIBUTING.md](https://github.com/NVIDIA/cuda-python/blob/main/CONTRIBUTING.md). +## Debugging + +Editable installs have debuggable binaries by default. To build a non-editable +build, pass the `debug=True` configuration option to `pip` or `uv`: + +``` +pip install -v ./cuda_bindings -C="debug=True" +``` + ## Testing To run these tests: diff --git a/cuda_core/build_hooks.py b/cuda_core/build_hooks.py index 48e58a178e..02a7654e1b 100644 --- a/cuda_core/build_hooks.py +++ b/cuda_core/build_hooks.py @@ -91,7 +91,7 @@ def _determine_cuda_major_version() -> str: _extensions = None -def _build_cuda_core(strip=False): +def _build_cuda_core(debug=False): # Customizing the build hooks is needed because we must defer cythonization until cuda-bindings, # now a required build-time dependency that's dynamically installed via the other hook below, # is installed. Otherwise, cimport any cuda.bindings modules would fail! @@ -137,8 +137,15 @@ def get_sources(mod_name): all_include_dirs = [os.path.join(_get_cuda_path(), "include")] extra_compile_args = [] extra_link_args = [] - if strip and sys.platform == "linux": - extra_link_args += ["-Wl,--strip-all"] + extra_cythonize_kwargs = {} + if sys.platform != "win32": + if debug: + extra_cythonize_kwargs["gdb_debug"] = True + extra_compile_args += ["-g", "-O0"] + extra_compile_args += ["-D _GLIBCXX_ASSERTIONS"] + else: + extra_compile_args += ["-O3"] + extra_link_args += ["-Wl,--strip-all"] if COMPILE_FOR_COVERAGE: # CYTHON_TRACE_NOGIL indicates to trace nogil functions. It is not # related to free-threading builds. @@ -173,6 +180,7 @@ def get_sources(mod_name): nthreads=nthreads, compiler_directives=compiler_directives, compile_time_env=compile_time_env, + **extra_cythonize_kwargs, ) return @@ -258,7 +266,7 @@ def _add_cython_include_paths_to_pth(wheel_path: str) -> None: def build_editable(wheel_directory, config_settings=None, metadata_directory=None): - _build_cuda_core(strip=False) + _build_cuda_core(debug=True) wheel_name = _build_meta.build_editable(wheel_directory, config_settings, metadata_directory) # Patch the .pth file to add Cython include paths @@ -269,7 +277,8 @@ def build_editable(wheel_directory, config_settings=None, metadata_directory=Non def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): - _build_cuda_core(strip=True) + debug = config_settings.get("debug", False) if config_settings else False + _build_cuda_core(debug=debug) return _build_meta.build_wheel(wheel_directory, config_settings, metadata_directory) From f24e66e194e120fdb586f3df91b172bd97754b58 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 10 Apr 2026 09:08:08 -0400 Subject: [PATCH 3/6] Add note about Windows --- cuda_bindings/README.md | 2 ++ cuda_core/README.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/cuda_bindings/README.md b/cuda_bindings/README.md index 0c0282afdd..744e64e299 100644 --- a/cuda_bindings/README.md +++ b/cuda_bindings/README.md @@ -19,6 +19,8 @@ debug build, pass the `debug=True` configuration option to `pip` or `uv`: pip install -v ./cuda_bindings -C="debug=True" ``` +Debuggable builds are not supported on Windows. + ## Testing Testing dependencies can be installed using the `[test]` optional dependency identifier. For example, `pip install -v -e .[test]`. diff --git a/cuda_core/README.md b/cuda_core/README.md index a0eb7d3c25..2300947899 100644 --- a/cuda_core/README.md +++ b/cuda_core/README.md @@ -19,6 +19,8 @@ build, pass the `debug=True` configuration option to `pip` or `uv`: pip install -v ./cuda_bindings -C="debug=True" ``` +Debuggable builds are not supported on Windows. + ## Testing To run these tests: From 16089731a03138fb052cb5b0bc3ca1423cf265c8 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 10 Apr 2026 12:46:31 -0400 Subject: [PATCH 4/6] Address comments in PR --- cuda_bindings/README.md | 8 ++------ cuda_bindings/build_hooks.py | 8 ++++++-- cuda_core/README.md | 8 ++------ cuda_core/build_hooks.py | 9 +++++++-- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/cuda_bindings/README.md b/cuda_bindings/README.md index 744e64e299..0f657507cb 100644 --- a/cuda_bindings/README.md +++ b/cuda_bindings/README.md @@ -12,12 +12,8 @@ This subpackage adheres to the developing practices described in the parent meta ## Debugging -Editable installs have debuggable binaries by default. To build a non-editable -debug build, pass the `debug=True` configuration option to `pip` or `uv`: - -``` -pip install -v ./cuda_bindings -C="debug=True" -``` +Pass the `pip` / `uv` configuration option `-C="debug=True"` explicitly to build debuggable binaries. +Debuggable binaries are built by default for editable builds. Debuggable builds are not supported on Windows. diff --git a/cuda_bindings/build_hooks.py b/cuda_bindings/build_hooks.py index 29a1546e96..bd4de613f1 100644 --- a/cuda_bindings/build_hooks.py +++ b/cuda_bindings/build_hooks.py @@ -355,7 +355,10 @@ def _build_cuda_bindings(debug=False): extra_compile_args = [] extra_link_args = [] extra_cythonize_kwargs = {} - if sys.platform != "win32": + if sys.platform == "win32": + if debug: + raise RuntimeError("Debuggable builds are not supported on Windows.") + else: extra_compile_args += [ "-std=c++14", "-fpermissive", @@ -428,7 +431,8 @@ def _cleanup_dst_files(): def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): - debug = config_settings.get("debug", False) if config_settings else False + debug_default = sys.platform != "win32" # Debug builds not supported on Windows + debug = config_settings.get("debug", debug_default) if config_settings else debug_default _build_cuda_bindings(debug=debug) return _build_meta.build_wheel(wheel_directory, config_settings, metadata_directory) diff --git a/cuda_core/README.md b/cuda_core/README.md index 2300947899..c1f0f29c7d 100644 --- a/cuda_core/README.md +++ b/cuda_core/README.md @@ -12,12 +12,8 @@ This subpackage adheres to the developing practices described in the parent meta ## Debugging -Editable installs have debuggable binaries by default. To build a non-editable -build, pass the `debug=True` configuration option to `pip` or `uv`: - -``` -pip install -v ./cuda_bindings -C="debug=True" -``` +Pass the `pip` / `uv` configuration option `-C="debug=True"` explicitly to build debuggable binaries. +Debuggable binaries are built by default for editable builds. Debuggable builds are not supported on Windows. diff --git a/cuda_core/build_hooks.py b/cuda_core/build_hooks.py index 02a7654e1b..52d7350a0d 100644 --- a/cuda_core/build_hooks.py +++ b/cuda_core/build_hooks.py @@ -138,7 +138,10 @@ def get_sources(mod_name): extra_compile_args = [] extra_link_args = [] extra_cythonize_kwargs = {} - if sys.platform != "win32": + if sys.platform == "win32": + if debug: + raise RuntimeError("Debuggable builds are not supported on Windows.") + else: if debug: extra_cythonize_kwargs["gdb_debug"] = True extra_compile_args += ["-g", "-O0"] @@ -266,7 +269,9 @@ def _add_cython_include_paths_to_pth(wheel_path: str) -> None: def build_editable(wheel_directory, config_settings=None, metadata_directory=None): - _build_cuda_core(debug=True) + debug_default = sys.platform != "win32" # Debug builds not supported on Windows + debug = config_settings.get("debug", debug_default) if config_settings else debug_default + _build_cuda_core(debug=debug) wheel_name = _build_meta.build_editable(wheel_directory, config_settings, metadata_directory) # Patch the .pth file to add Cython include paths From 398b28dadb107480444c999e6ab50060c88ddb89 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 10 Apr 2026 12:49:11 -0400 Subject: [PATCH 5/6] Address comments in PR --- cuda_bindings/build_hooks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cuda_bindings/build_hooks.py b/cuda_bindings/build_hooks.py index 6b951bbd45..ce4745f7a0 100644 --- a/cuda_bindings/build_hooks.py +++ b/cuda_bindings/build_hooks.py @@ -459,12 +459,13 @@ def _cleanup_dst_files(): def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): - debug_default = sys.platform != "win32" # Debug builds not supported on Windows - debug = config_settings.get("debug", debug_default) if config_settings else debug_default + debug = config_settings.get("debug", False) if config_settings else False _build_cuda_bindings(debug=debug) return _build_meta.build_wheel(wheel_directory, config_settings, metadata_directory) def build_editable(wheel_directory, config_settings=None, metadata_directory=None): - _build_cuda_bindings(debug=True) + debug_default = sys.platform != "win32" # Debug builds not supported on Windows + debug = config_settings.get("debug", debug_default) if config_settings else debug_default + _build_cuda_bindings(debug=debug) return _build_meta.build_editable(wheel_directory, config_settings, metadata_directory) From 3e29fd59ffd850a372b5bfe2c13b14613b6cc50b Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 10 Apr 2026 13:19:27 -0400 Subject: [PATCH 6/6] Show both forms of config in README --- cuda_bindings/README.md | 3 ++- cuda_core/README.md | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cuda_bindings/README.md b/cuda_bindings/README.md index 0f657507cb..2a18f5a2df 100644 --- a/cuda_bindings/README.md +++ b/cuda_bindings/README.md @@ -12,7 +12,8 @@ This subpackage adheres to the developing practices described in the parent meta ## Debugging -Pass the `pip` / `uv` configuration option `-C="debug=True"` explicitly to build debuggable binaries. +Pass the `pip` / `uv` configuration option `-C="debug=True"` or +`--config-settings="debug=True"` to explicitly to build debuggable binaries. Debuggable binaries are built by default for editable builds. Debuggable builds are not supported on Windows. diff --git a/cuda_core/README.md b/cuda_core/README.md index c1f0f29c7d..7959dfb00b 100644 --- a/cuda_core/README.md +++ b/cuda_core/README.md @@ -12,7 +12,8 @@ This subpackage adheres to the developing practices described in the parent meta ## Debugging -Pass the `pip` / `uv` configuration option `-C="debug=True"` explicitly to build debuggable binaries. +Pass the `pip` / `uv` configuration option `-C="debug=True"` or +`--config-settings="debug=True"` to explicitly to build debuggable binaries. Debuggable binaries are built by default for editable builds. Debuggable builds are not supported on Windows.