From 126f257cc1b93377d08d06b6cf75335b22cb2356 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Wed, 15 Apr 2026 14:40:35 -0400 Subject: [PATCH 1/2] cuda.core.system: Add ProcessInfo APIs --- cuda_core/cuda/core/system/_device.pyx | 29 +++++++++++++ cuda_core/cuda/core/system/_process.pxi | 43 ++++++++++++++++++++ cuda_core/tests/system/test_system_device.py | 13 ++++++ 3 files changed, 85 insertions(+) create mode 100644 cuda_core/cuda/core/system/_process.pxi diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx index f661c4e685..0c5723e7b8 100644 --- a/cuda_core/cuda/core/system/_device.pyx +++ b/cuda_core/cuda/core/system/_device.pyx @@ -34,6 +34,7 @@ include "_inforom.pxi" include "_memory.pxi" include "_pci_info.pxi" include "_performance.pxi" +include "_process.pxi" include "_repair_status.pxi" include "_temperature.pxi" @@ -716,6 +717,33 @@ cdef class Device: """ return [Pstates(x) for x in nvml.device_get_supported_performance_states(self._handle)] + ########################################################################## + # PROCESS + # See external class definitions in _process.pxi + + @property + def compute_running_processes(self) -> list[ProcessInfo]: + """ + Get information about processes with a compute context on a device + + For Fermiā„¢ or newer fully supported devices. + + This function returns information only about compute running processes + (e.g. CUDA application which have active context). Any graphics + applications (e.g. using OpenGL, DirectX) won't be listed by this + function. + + Keep in mind that information returned by this call is dynamic and the + number of elements might change in time. + + In MIG mode, if device handle is provided, the API returns aggregate + information, only if the caller has appropriate privileges. Per-instance + information can be queried by using specific MIG device handles. + Querying per-instance information using MIG device handles is not + supported if the device is in vGPU Host virtualization mode. + """ + return [ProcessInfo(proc) for proc in nvml.device_get_compute_running_processes_v3(self._handle)] + ########################################################################## # REPAIR STATUS # See external class definitions in _repair_status.pxi @@ -855,6 +883,7 @@ __all__ = [ "MemoryInfo", "PcieUtilCounter", "PciInfo", + "ProcessInfo", "Pstates", "RepairStatus", "Temperature", diff --git a/cuda_core/cuda/core/system/_process.pxi b/cuda_core/cuda/core/system/_process.pxi new file mode 100644 index 0000000000..4ceac0fe44 --- /dev/null +++ b/cuda_core/cuda/core/system/_process.pxi @@ -0,0 +1,43 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + + +class ProcessInfo: + """ + Information about a process using the GPU. + """ + def __init__(self, process_info: nvml.ProcessInfo): + self._process_info = process_info + + @property + def pid(self) -> int: + """ + The PID of the process. + """ + return self._process_info.pid + + @property + def used_gpu_memory(self) -> int: + """ + The amount of GPU memory (in bytes) used by the process. + """ + return self._process_info.used_gpu_memory + + @property + def gpu_instance_id(self) -> int: + """ + The GPU instance ID for MIG devices. + + Only valid for processes running on MIG devices. + """ + return self._process_info.gpu_instance_id + + @property + def compute_instance_id(self) -> int: + """ + The Compute instance ID for MIG devices. + + Only valid for processes running on MIG devices. + """ + return self._process_info.compute_instance_id diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py index 2a094d8211..014fb4d396 100644 --- a/cuda_core/tests/system/test_system_device.py +++ b/cuda_core/tests/system/test_system_device.py @@ -729,3 +729,16 @@ def test_pstates(): assert isinstance(utilization.percentage, int) assert isinstance(utilization.inc_threshold, int) assert isinstance(utilization.dec_threshold, int) + + +def test_compute_running_processes(): + for device in system.Device.get_all_devices(): + with unsupported_before(device, "FERMI"): + processes = device.compute_running_processes + assert isinstance(processes, list) + for proc in processes: + assert isinstance(proc, system.ProcessInfo) + assert isinstance(proc.pid, int) + assert isinstance(proc.used_gpu_memory, int) + assert isinstance(proc.gpu_instance_id, int) + assert isinstance(proc.compute_instance_id, int) From 352ea3b9f364408a5ae7e14153ad259e6730d58b Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Wed, 15 Apr 2026 16:45:18 -0400 Subject: [PATCH 2/2] Add API docs --- cuda_core/docs/source/api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst index 005866ddb2..658c746fc0 100644 --- a/cuda_core/docs/source/api.rst +++ b/cuda_core/docs/source/api.rst @@ -257,6 +257,7 @@ Types system.InforomInfo system.MemoryInfo system.PciInfo + system.ProcessInfo system.RepairStatus system.Temperature system.ThermalSensor