NVIDIA · mdboom · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025 · Dec 19, 2025
diff --git a/cuda_bindings/cuda/bindings/_nvml.pyx b/cuda_bindings/cuda/bindings/_nvml.pyx
diff --git a/cuda_core/cuda/core/__init__.py b/cuda_core/cuda/core/__init__.py
@@ -28,7 +28,7 @@
 finally:
     del bindings, importlib, subdir, cuda_major, cuda_minor
 
-from cuda.core import utils  # noqa: E402
+from cuda.core import system, utils  # noqa: E402
 from cuda.core._device import Device  # noqa: E402
 from cuda.core._event import Event, EventOptions  # noqa: E402
 from cuda.core._graph import (  # noqa: E402
@@ -62,8 +62,3 @@
 from cuda.core._module import Kernel, ObjectCode  # noqa: E402
 from cuda.core._program import Program, ProgramOptions  # noqa: E402
 from cuda.core._stream import Stream, StreamOptions  # noqa: E402
-from cuda.core._system import System  # noqa: E402
-
-system = System()
-__import__("sys").modules[__spec__.name + ".system"] = system
-del System
diff --git a/cuda_core/cuda/core/_system.py b/cuda_core/cuda/core/_system.py
diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py
@@ -38,7 +38,7 @@ def _warn_deprecated():
 _warn_deprecated()
 
 
-from cuda.core import utils  # noqa: E402
+from cuda.core import system, utils  # noqa: E402
 
 # Make utils accessible as a submodule for backward compatibility
 __import__("sys").modules[__spec__.name + ".utils"] = utils
@@ -73,8 +73,3 @@ def _warn_deprecated():
 from cuda.core._module import Kernel, ObjectCode  # noqa: E402
 from cuda.core._program import Program, ProgramOptions  # noqa: E402
 from cuda.core._stream import Stream, StreamOptions  # noqa: E402
-from cuda.core._system import System  # noqa: E402
-
-system = System()
-__import__("sys").modules[__spec__.name + ".system"] = system
-del System
diff --git a/cuda_core/cuda/core/system/__init__.py b/cuda_core/cuda/core/system/__init__.py
@@ -0,0 +1,63 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# ruff: noqa: F403, F405
+
+
+__all__ = [
+    "get_driver_version",
+    "get_driver_version_full",
+    "get_gpu_driver_version",
+    "get_num_devices",
+    "get_process_name",
+    "HAS_WORKING_NVML",
+]
+
+
+from .system import *
+
+if HAS_WORKING_NVML:
+    from ._nvml_context import initialize
+    from .device import Device, DeviceArchitecture
+    from .exceptions import *
+
+    initialize()
+
+    __all__.extend(
+        [
+            "Device",
+            "DeviceArchitecture",
+            "UninitializedError",
+            "InvalidArgumentError",
+            "NotSupportedError",
+            "NoPermissionError",
+            "AlreadyInitializedError",
+            "NotFoundError",
+            "InsufficientSizeError",
+            "InsufficientPowerError",
+            "DriverNotLoadedError",
+            "TimeoutError",
+            "IrqIssueError",
+            "LibraryNotFoundError",
+            "FunctionNotFoundError",
+            "CorruptedInforomError",
+            "GpuIsLostError",
+            "ResetRequiredError",
+            "OperatingSystemError",
+            "LibRmVersionMismatchError",
+            "InUseError",
+            "MemoryError",
+            "NoDataError",
+            "VgpuEccNotSupportedError",
+            "InsufficientResourcesError",
+            "FreqNotSupportedError",
+            "ArgumentVersionMismatchError",
+            "DeprecatedError",
+            "NotReadyError",
+            "GpuNotFoundError",
+            "InvalidStateError",
+            "ResetTypeNotSupportedError",
+            "UnknownError",
+        ]
+    )
diff --git a/cuda_core/cuda/core/system/_nvml_context.pyx b/cuda_core/cuda/core/system/_nvml_context.pyx
@@ -0,0 +1,93 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import threading
+
+from cuda.bindings import _nvml as nvml
+
+from . import exceptions
+
+
+ctypedef enum _NVMLState:
+    UNINITIALIZED = 0
+    INITIALIZED = 1
+    DISABLED_LIBRARY_NOT_FOUND = 2
+
+
+# Initialisation must occur per-process, so an initialised state is a
+# (state, pid) pair
+_NVML_STATE = _NVMLState.UNINITIALIZED
+# """Current initialization state"""
+
+_NVML_OWNER_PID = 0
+# """PID of process that successfully called pynvml.nvmlInit"""
+
+
+_lock = threading.Lock()
+
+
+def initialize() -> None:
+    """Idempotent (per-process) initialization of NVUtil's NVML
+
+    Notes
+    -----
+
+    Modifies global variables _NVML_STATE and _NVML_OWNER_PID"""
+    global _NVML_STATE, _NVML_OWNER_PID
+
+    with _lock:
+        if _NVML_STATE == _NVMLState.DISABLED_LIBRARY_NOT_FOUND or (
+            _NVML_STATE == _NVMLState.INITIALIZED and os.getpid() == _NVML_OWNER_PID
+        ):
+            return
+        elif (
+            _NVML_STATE == _NVMLState.INITIALIZED and os.getpid() != _NVML_OWNER_PID
+        ) or _NVML_STATE == _NVMLState.UNINITIALIZED:
+            try:
+                nvml.init_v2()
+            except (
+                exceptions.LibraryNotFoundError,
+                exceptions.DriverNotLoadedError,
+                exceptions.UnknownError,
+            ):
+                _NVML_STATE = _NVMLState.DISABLED_LIBRARY_NOT_FOUND
+                return
+
+            # initialization was successful
+            _NVML_STATE = _NVMLState.INITIALIZED
+            _NVML_OWNER_PID = os.getpid()
+        else:
+            raise RuntimeError(f"Unhandled initialisation state ({_NVML_STATE=}, {_NVML_OWNER_PID=})")
+
+
+def is_initialized() -> bool:
+    """
+    Check whether the NVML context is initialized on this process.
+
+    Returns
+    -------
+    result: bool
+        Whether the NVML context is initialized on this process.
+    """
+    return _NVML_STATE == _NVMLState.INITIALIZED and os.getpid() == _NVML_OWNER_PID
+
+
+def validate() -> None:
+    """
+    Validate NVML state.
+
+    Validate that NVML is functional and that the system has at least one GPU available.
+
+    Raises
+    ------
+    nvml.LibraryNotFoundError
+        If the NVML library could not be found.
+    nvml.GpuNotFoundError
+        If no GPUs are available.
+    """
+    if _NVML_STATE == _NVMLState.DISABLED_LIBRARY_NOT_FOUND:
+        raise exceptions.LibraryNotFoundError("The underlying NVML library was not found")
+    elif nvml.device_get_count_v2() == 0:
+        raise exceptions.GpuNotFoundError("No GPUs available")