From 78cc728eb5be74abb7bfaf979b056f14b9f008e2 Mon Sep 17 00:00:00 2001 From: Sourab Gupta Date: Wed, 23 Jul 2025 22:08:40 +0000 Subject: [PATCH 1/4] Add logic to skip WSL tests and add cufile.json for async tests --- cuda_bindings/tests/cufile.json | 18 +++++++++++ cuda_bindings/tests/test_cufile.py | 51 +++++++++++++++++++++++------- 2 files changed, 58 insertions(+), 11 deletions(-) create mode 100644 cuda_bindings/tests/cufile.json diff --git a/cuda_bindings/tests/cufile.json b/cuda_bindings/tests/cufile.json new file mode 100644 index 0000000000..36b3b9bd72 --- /dev/null +++ b/cuda_bindings/tests/cufile.json @@ -0,0 +1,18 @@ +{ + // NOTE : Application can override custom configuration via export CUFILE_ENV_PATH_JSON= + // e.g : export CUFILE_ENV_PATH_JSON="/home//cufile.json" + + + "execution" : { + // max number of workitems in the queue; + "max_io_queue_depth": 128, + // max number of host threads per gpu to spawn for parallel IO + "max_io_threads" : 4, + // enable support for parallel IO + "parallel_io" : true, + // minimum IO threshold before splitting the IO + "min_io_threshold_size_kb" : 8192, + // maximum parallelism for a single request + "max_request_parallelism" : 4 + } +} diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index 4962a7feda..c4f6d2cbab 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -1,10 +1,11 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import ctypes import errno import os +import pathlib +import platform import tempfile from contextlib import suppress @@ -18,9 +19,40 @@ cufile = None +def platform_is_wsl(): + """Check if running on Windows Subsystem for Linux (WSL).""" + return platform.system() == "Linux" and "microsoft" in pathlib.Path("/proc/version").read_text().lower() + + if cufile is None: pytest.skip("skipping tests on Windows", allow_module_level=True) +if platform_is_wsl(): + pytest.skip("skipping cuFile tests on WSL", allow_module_level=True) + + +@pytest.fixture +def cufile_env_json(): + """Set CUFILE_ENV_PATH_JSON environment variable for async tests.""" + original_value = os.environ.get('CUFILE_ENV_PATH_JSON') + + # Use /etc/cufile.json if it exists, otherwise fallback to cufile.json in tests directory + if os.path.exists('/etc/cufile.json'): + config_path = '/etc/cufile.json' + else: + # Get absolute path to cufile.json in the same directory as this test file + test_dir = os.path.dirname(os.path.abspath(__file__)) + config_path = os.path.join(test_dir, 'cufile.json') + + print(f"Using cuFile config: {config_path}") + os.environ['CUFILE_ENV_PATH_JSON'] = config_path + yield + # Restore original value or remove if it wasn't set + if original_value is not None: + os.environ['CUFILE_ENV_PATH_JSON'] = original_value + else: + os.environ.pop('CUFILE_ENV_PATH_JSON', None) + def cufileLibraryAvailable(): """Check if cuFile library is available on the system.""" @@ -730,7 +762,7 @@ def test_cufile_read_write_large(): @pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") -def test_cufile_write_async(): +def test_cufile_write_async(cufile_env_json): """Test cuFile asynchronous write operations.""" # Initialize CUDA (err,) = cuda.cuInit(0) @@ -823,7 +855,7 @@ def test_cufile_write_async(): @pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") -def test_cufile_read_async(): +def test_cufile_read_async(cufile_env_json): """Test cuFile asynchronous read operations.""" # Initialize CUDA (err,) = cuda.cuInit(0) @@ -929,7 +961,7 @@ def test_cufile_read_async(): @pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") -def test_cufile_async_read_write(): +def test_cufile_async_read_write(cufile_env_json): """Test cuFile asynchronous read and write operations in sequence.""" # Initialize CUDA (err,) = cuda.cuInit(0) @@ -1275,7 +1307,6 @@ def test_batch_io_basic(): cufile.driver_close() cuda.cuDevicePrimaryCtxRelease(device) - @pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") def test_batch_io_cancel(): """Test batch IO cancellation.""" @@ -1615,9 +1646,7 @@ def test_set_get_parameter_size_t(): # Test max device pinned memory size (in KB) max_pinned_kb = 2048 # 2MB max pinned memory cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB, max_pinned_kb) - retrieved_value = cufile.get_parameter_size_t( - cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB - ) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB) assert retrieved_value == max_pinned_kb, ( f"Max pinned memory size mismatch: set {max_pinned_kb}, got {retrieved_value}" ) @@ -1787,7 +1816,7 @@ def test_set_get_parameter_string(): ) retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.LOGGING_LEVEL, 256) # Use safe_decode_string to handle null terminators and padding - retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8")) + retrieved_value = safe_decode_string(retrieved_value_raw.encode('utf-8')) print(f"Logging level test: set {logging_level}, got {retrieved_value}") # The retrieved value should be a string, so we can compare directly assert retrieved_value == logging_level, ( @@ -1809,7 +1838,7 @@ def test_set_get_parameter_string(): ) retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.ENV_LOGFILE_PATH, 256) # Use safe_decode_string to handle null terminators and padding - retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8")) + retrieved_value = safe_decode_string(retrieved_value_raw.encode('utf-8')) print(f"Log file path test: set {logfile_path}, got {retrieved_value}") # The retrieved value should be a string, so we can compare directly assert retrieved_value == logfile_path, f"Log file path mismatch: set {logfile_path}, got {retrieved_value}" @@ -1827,7 +1856,7 @@ def test_set_get_parameter_string(): cufile.set_parameter_string(cufile.StringConfigParameter.LOG_DIR, int(ctypes.addressof(log_dir_buffer))) retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.LOG_DIR, 256) # Use safe_decode_string to handle null terminators and padding - retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8")) + retrieved_value = safe_decode_string(retrieved_value_raw.encode('utf-8')) print(f"Log directory test: set {log_dir}, got {retrieved_value}") # The retrieved value should be a string, so we can compare directly assert retrieved_value == log_dir, f"Log directory mismatch: set {log_dir}, got {retrieved_value}" From f142b077c5f0ce1c1525b80c2a60e2a7cda5ccdc Mon Sep 17 00:00:00 2001 From: Sourab Gupta Date: Thu, 24 Jul 2025 01:08:49 +0000 Subject: [PATCH 2/4] Review Comments --- cuda_bindings/tests/test_cufile.py | 38 ++++++++++++++++++------------ 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index c4f6d2cbab..fba20dea57 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -3,6 +3,7 @@ import ctypes import errno +import logging import os import pathlib import platform @@ -13,6 +14,13 @@ import cuda.bindings.driver as cuda +# Configure logging to show INFO level and above +logging.basicConfig( + level=logging.INFO, + format='%(levelname)s: %(message)s', + force=True # Override any existing logging configuration +) + try: from cuda.bindings import cufile except ImportError: @@ -44,7 +52,7 @@ def cufile_env_json(): test_dir = os.path.dirname(os.path.abspath(__file__)) config_path = os.path.join(test_dir, 'cufile.json') - print(f"Using cuFile config: {config_path}") + logging.info(f"Using cuFile config: {config_path}") os.environ['CUFILE_ENV_PATH_JSON'] = config_path yield # Restore original value or remove if it wasn't set @@ -59,10 +67,10 @@ def cufileLibraryAvailable(): try: # Try to get cuFile library version - this will fail if library is not available version = cufile.get_version() - print(f"cuFile library available, version: {version}") + logging.info(f"cuFile library available, version: {version}") return True except Exception as e: - print(f"cuFile library not available: {e}") + logging.warning(f"cuFile library not available: {e}") return False @@ -71,14 +79,14 @@ def cufileVersionLessThan(target): try: # Get cuFile library version version = cufile.get_version() - print(f"cuFile library version: {version}") + logging.info(f"cuFile library version: {version}") # Check if version is less than target if version < target: - print(f"cuFile library version {version} is less than required {target}") + logging.warning(f"cuFile library version {version} is less than required {target}") return True return False except Exception as e: - print(f"Error checking cuFile version: {e}") + logging.error(f"Error checking cuFile version: {e}") return True # Assume old version if any error occurs @@ -97,14 +105,14 @@ def isSupportedFilesystem(): current_dir = os.path.abspath(".") if current_dir.startswith(mount_point): fs_type_lower = fs_type.lower() - print(f"Current filesystem type: {fs_type_lower}") + logging.info(f"Current filesystem type: {fs_type_lower}") return fs_type_lower in ["ext4", "xfs"] # If we get here, we couldn't determine the filesystem type - print("Could not determine filesystem type from /proc/mounts") + logging.warning("Could not determine filesystem type from /proc/mounts") return False except Exception as e: - print(f"Error checking filesystem type: {e}") + logging.error(f"Error checking filesystem type: {e}") return False @@ -1817,13 +1825,13 @@ def test_set_get_parameter_string(): retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.LOGGING_LEVEL, 256) # Use safe_decode_string to handle null terminators and padding retrieved_value = safe_decode_string(retrieved_value_raw.encode('utf-8')) - print(f"Logging level test: set {logging_level}, got {retrieved_value}") + logging.info(f"Logging level test: set {logging_level}, got {retrieved_value}") # The retrieved value should be a string, so we can compare directly assert retrieved_value == logging_level, ( f"Logging level mismatch: set {logging_level}, got {retrieved_value}" ) except Exception as e: - print(f"Logging level test failed: {e}") + logging.error(f"Logging level test failed: {e}") # Re-raise the exception to make the test fail raise @@ -1839,11 +1847,11 @@ def test_set_get_parameter_string(): retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.ENV_LOGFILE_PATH, 256) # Use safe_decode_string to handle null terminators and padding retrieved_value = safe_decode_string(retrieved_value_raw.encode('utf-8')) - print(f"Log file path test: set {logfile_path}, got {retrieved_value}") + logging.info(f"Log file path test: set {logfile_path}, got {retrieved_value}") # The retrieved value should be a string, so we can compare directly assert retrieved_value == logfile_path, f"Log file path mismatch: set {logfile_path}, got {retrieved_value}" except Exception as e: - print(f"Log file path test failed: {e}") + logging.error(f"Log file path test failed: {e}") # Re-raise the exception to make the test fail raise @@ -1857,11 +1865,11 @@ def test_set_get_parameter_string(): retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.LOG_DIR, 256) # Use safe_decode_string to handle null terminators and padding retrieved_value = safe_decode_string(retrieved_value_raw.encode('utf-8')) - print(f"Log directory test: set {log_dir}, got {retrieved_value}") + logging.info(f"Log directory test: set {log_dir}, got {retrieved_value}") # The retrieved value should be a string, so we can compare directly assert retrieved_value == log_dir, f"Log directory mismatch: set {log_dir}, got {retrieved_value}" except Exception as e: - print(f"Log directory test failed: {e}") + logging.error(f"Log directory test failed: {e}") # Re-raise the exception to make the test fail raise From d1424726b27937f4f933228af4581f1d67735ee6 Mon Sep 17 00:00:00 2001 From: Sourab Gupta Date: Thu, 24 Jul 2025 01:10:29 +0000 Subject: [PATCH 3/4] Pre-commit --- cuda_bindings/tests/test_cufile.py | 35 ++++++++++++++++-------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index fba20dea57..6f1a0b5a22 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -16,9 +16,9 @@ # Configure logging to show INFO level and above logging.basicConfig( - level=logging.INFO, - format='%(levelname)s: %(message)s', - force=True # Override any existing logging configuration + level=logging.INFO, + format="%(levelname)s: %(message)s", + force=True, # Override any existing logging configuration ) try: @@ -42,24 +42,24 @@ def platform_is_wsl(): @pytest.fixture def cufile_env_json(): """Set CUFILE_ENV_PATH_JSON environment variable for async tests.""" - original_value = os.environ.get('CUFILE_ENV_PATH_JSON') - + original_value = os.environ.get("CUFILE_ENV_PATH_JSON") + # Use /etc/cufile.json if it exists, otherwise fallback to cufile.json in tests directory - if os.path.exists('/etc/cufile.json'): - config_path = '/etc/cufile.json' + if os.path.exists("/etc/cufile.json"): + config_path = "/etc/cufile.json" else: # Get absolute path to cufile.json in the same directory as this test file test_dir = os.path.dirname(os.path.abspath(__file__)) - config_path = os.path.join(test_dir, 'cufile.json') - + config_path = os.path.join(test_dir, "cufile.json") + logging.info(f"Using cuFile config: {config_path}") - os.environ['CUFILE_ENV_PATH_JSON'] = config_path + os.environ["CUFILE_ENV_PATH_JSON"] = config_path yield # Restore original value or remove if it wasn't set if original_value is not None: - os.environ['CUFILE_ENV_PATH_JSON'] = original_value + os.environ["CUFILE_ENV_PATH_JSON"] = original_value else: - os.environ.pop('CUFILE_ENV_PATH_JSON', None) + os.environ.pop("CUFILE_ENV_PATH_JSON", None) def cufileLibraryAvailable(): @@ -1315,6 +1315,7 @@ def test_batch_io_basic(): cufile.driver_close() cuda.cuDevicePrimaryCtxRelease(device) + @pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") def test_batch_io_cancel(): """Test batch IO cancellation.""" @@ -1654,7 +1655,9 @@ def test_set_get_parameter_size_t(): # Test max device pinned memory size (in KB) max_pinned_kb = 2048 # 2MB max pinned memory cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB, max_pinned_kb) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB) + retrieved_value = cufile.get_parameter_size_t( + cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB + ) assert retrieved_value == max_pinned_kb, ( f"Max pinned memory size mismatch: set {max_pinned_kb}, got {retrieved_value}" ) @@ -1824,7 +1827,7 @@ def test_set_get_parameter_string(): ) retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.LOGGING_LEVEL, 256) # Use safe_decode_string to handle null terminators and padding - retrieved_value = safe_decode_string(retrieved_value_raw.encode('utf-8')) + retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8")) logging.info(f"Logging level test: set {logging_level}, got {retrieved_value}") # The retrieved value should be a string, so we can compare directly assert retrieved_value == logging_level, ( @@ -1846,7 +1849,7 @@ def test_set_get_parameter_string(): ) retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.ENV_LOGFILE_PATH, 256) # Use safe_decode_string to handle null terminators and padding - retrieved_value = safe_decode_string(retrieved_value_raw.encode('utf-8')) + retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8")) logging.info(f"Log file path test: set {logfile_path}, got {retrieved_value}") # The retrieved value should be a string, so we can compare directly assert retrieved_value == logfile_path, f"Log file path mismatch: set {logfile_path}, got {retrieved_value}" @@ -1864,7 +1867,7 @@ def test_set_get_parameter_string(): cufile.set_parameter_string(cufile.StringConfigParameter.LOG_DIR, int(ctypes.addressof(log_dir_buffer))) retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.LOG_DIR, 256) # Use safe_decode_string to handle null terminators and padding - retrieved_value = safe_decode_string(retrieved_value_raw.encode('utf-8')) + retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8")) logging.info(f"Log directory test: set {log_dir}, got {retrieved_value}") # The retrieved value should be a string, so we can compare directly assert retrieved_value == log_dir, f"Log directory mismatch: set {log_dir}, got {retrieved_value}" From 8916e8ad0b4bf09168b1a267157dd79a7ffb3a5a Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 23 Jul 2025 23:32:54 -0700 Subject: [PATCH 4/4] test: improve cuFile test ergonomics and reduce log noise - Add @cache to isSupportedFilesystem(), cufileLibraryAvailable(), and cufileVersionLessThan() to avoid redundant checks and repeated INFO log messages - Set fixture scope for cufile_env_json to "module" to reduce env var churn --- cuda_bindings/tests/test_cufile.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index 6f1a0b5a22..1a51348680 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -9,6 +9,7 @@ import platform import tempfile from contextlib import suppress +from functools import cache import pytest @@ -39,7 +40,7 @@ def platform_is_wsl(): pytest.skip("skipping cuFile tests on WSL", allow_module_level=True) -@pytest.fixture +@pytest.fixture(scope="module") def cufile_env_json(): """Set CUFILE_ENV_PATH_JSON environment variable for async tests.""" original_value = os.environ.get("CUFILE_ENV_PATH_JSON") @@ -62,6 +63,7 @@ def cufile_env_json(): os.environ.pop("CUFILE_ENV_PATH_JSON", None) +@cache def cufileLibraryAvailable(): """Check if cuFile library is available on the system.""" try: @@ -74,6 +76,7 @@ def cufileLibraryAvailable(): return False +@cache def cufileVersionLessThan(target): """Check if cuFile library version is less than target version.""" try: @@ -90,6 +93,7 @@ def cufileVersionLessThan(target): return True # Assume old version if any error occurs +@cache def isSupportedFilesystem(): """Check if the current filesystem is supported (ext4 or xfs).""" try: