diff --git a/cuda_bindings/cuda/bindings/_nvml.pyx b/cuda_bindings/cuda/bindings/_nvml.pyx
index 8248d5acfb..d9bddcc4bc 100644
--- a/cuda_bindings/cuda/bindings/_nvml.pyx
+++ b/cuda_bindings/cuda/bindings/_nvml.pyx
@@ -1187,6 +1187,395 @@ class RUSD(_IntEnum):
     POLL_PCI = 0x20        # Enable RUSD polling on pci group
     POLL_FAN = 0x40        # Enable RUSD polling on fan group
     POLL_PROC_UTIL = 0x80  # Enable RUSD polling on process utilization group
+    POLL_ALL = 0xFFFFFFFFFFFFFFFF  # Enable RUSD polling on all groups
+
+
+class PowerMizerMode(_IntEnum):
+    POWER_MIZER_MODE_ADAPTIVE = 0  # Adjust GPU clocks based on GPU utilization
+    POWER_MIZER_MODE_PREFER_MAXIMUM_PERFORMANCE = 1  # Raise GPU clocks to favor maximum performance, to the extent that thermal and other constraints allow
+    POWER_MIZER_MODE_AUTO = 2  # PowerMizer mode is driver controlled
+    POWER_MIZER_MODE_PREFER_CONSISTENT_PERFORMANCE = 3  # lock to GPU base clocks
+
+
+class DeviceArch(_IntEnum):
+    DEVICE_ARCH_KEPLER = 2
+    DEVICE_ARCH_MAXWELL = 3
+    DEVICE_ARCH_PASCAL = 4
+    DEVICE_ARCH_VOLTA = 5
+    DEVICE_ARCH_TURING = 6
+    DEVICE_ARCH_AMPERE = 7
+    DEVICE_ARCH_ADA = 8
+    DEVICE_ARCH_HOPPER = 9
+    DEVICE_ARCH_BLACKWELL = 10
+    DEVICE_ARCH_UNKNOWN = 0xFFFFFFFF
+
+
+class BusType(_IntEnum):
+    BUS_TYPE_UNKNOWN = 0
+    BUS_TYPE_PCI = 1
+    BUS_TYPE_PCIE = 2
+    BUS_TYPE_FPCI = 3
+    BUS_TYPE_AGP = 4
+
+
+class FanControlPolicy(_IntEnum):
+    FAN_CONTROL_POLICY_TEMPERATURE_CONTINUOUS_SW = 0  # Temperature-controlled fan policy
+    FAN_CONTROL_POLICY_MANUAL = 1  # Manual fan control policy
+
+
+class PowerSource(_IntEnum):
+    POWER_SOURCE_AC = 0x00000000
+    POWER_SOURCE_BATTERY = 0x00000001
+    POWER_SOURCE_UNDERSIZED = 0x00000002
+
+
+class PcieLinkMaxSpeed(_IntEnum):
+    PCIE_LINK_MAX_SPEED_INVALID = 0x00000000
+    PCIE_LINK_MAX_SPEED_2500MBPS = 0x00000001
+    PCIE_LINK_MAX_SPEED_5000MBPS = 0x00000002
+    PCIE_LINK_MAX_SPEED_8000MBPS = 0x00000003
+    PCIE_LINK_MAX_SPEED_16000MBPS = 0x00000004
+    PCIE_LINK_MAX_SPEED_32000MBPS = 0x00000005
+    PCIE_LINK_MAX_SPEED_64000MBPS = 0x00000006
+
+
+class AdaptiveClockingInfoStatus(_IntEnum):
+    ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED = 0x00000000
+    ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED = 0x00000001
+
+
+MAX_GPU_UTILIZATIONS = 8
+
+
+class PcieAtomicsCap(_IntEnum):
+    PCIE_ATOMICS_CAP_FETCHADD32 = 0x01
+    PCIE_ATOMICS_CAP_FETCHADD64 = 0x02
+    PCIE_ATOMICS_CAP_SWAP32 = 0x04
+    PCIE_ATOMICS_CAP_SWAP64 = 0x08
+    PCIE_ATOMICS_CAP_CAS32 = 0x10
+    PCIE_ATOMICS_CAP_CAS64 = 0x20
+    PCIE_ATOMICS_CAP_CAS128 = 0x40
+    PCIE_ATOMICS_OPS_MAX = 7
+
+
+class PowerScope(_IntEnum):
+    POWER_SCOPE_GPU = 0
+    POWER_SCOPE_MODULE = 1
+    POWER_SCOPE_MEMORY = 2
+
+
+# Need "Enum" suffix to disambiguate from nvmlGridLicenseExpiry_t
+class GridLicenseExpiryEnum(_IntEnum):
+    GRID_LICENSE_EXPIRY_NOT_AVAILABLE = 0
+    GRID_LICENSE_EXPIRY_INVALID = 1
+    GRID_LICENSE_EXPIRY_VALID = 2
+    GRID_LICENSE_EXPIRY_NOT_APPLICABLE = 3
+    GRID_LICENSE_EXPIRY_PERMANENT = 4
+
+
+GRID_LICENSE_FEATURE_MAX_COUNT = 3
+
+
+class VgpuVirtualizationCapMigration(_IntEnum):
+    VGPU_VIRTUALIZATION_CAP_MIGRATION_NO = 0x0
+    VGPU_VIRTUALIZATION_CAP_MIGRATION_YES = 0x1
+
+
+class VgpuPgpuVirtualizationCapMigration(_IntEnum):
+    VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_NO = 0x0
+    VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_YES = 0x1
+
+
+class VgpuSchedulerPolicy(_IntEnum):
+    VGPU_SCHEDULER_POLICY_UNKNOWN = 0
+    VGPU_SCHEDULER_POLICY_BEST_EFFORT = 1
+    VGPU_SCHEDULER_POLICY_EQUAL_SHARE = 2
+    VGPU_SCHEDULER_POLICY_FIXED_SHARE = 3
+    SUPPORTED_VGPU_SCHEDULER_POLICY_COUNT = 3
+
+
+class VgpuSchedulerArr(_IntEnum):
+    VGPU_SCHEDULER_ARR_DEFAULT = 0
+    VGPU_SCHEDULER_ARR_DISABLE = 1
+    VGPU_SCHEDULER_ARR_ENABLE = 2
+
+
+class VgpuSchedulerEngineType(_IntEnum):
+    VGPU_SCHEDULER_ENGINE_TYPE_GRAPHICS = 1
+    VGPU_SCHEDULER_ENGINE_TYPE_NVENC1 = 2
+
+
+class GridLicenseState(_IntEnum):
+    GRID_LICENSE_STATE_UNKNOWN = 0
+    GRID_LICENSE_STATE_UNINITIALIZED = 1
+    GRID_LICENSE_STATE_UNLICENSED_UNRESTRICTED = 2
+    GRID_LICENSE_STATE_UNLICENSED_RESTRICTED = 3
+    GRID_LICENSE_STATE_UNLICENSED = 4
+    GRID_LICENSE_STATE_LICENSED = 5
+
+
+class NvlinkLowPowerThresholdUnit(_IntEnum):
+    NVLINK_LOW_POWER_THRESHOLD_UNIT_100US = 0x0
+    NVLINK_LOW_POWER_THRESHOLD_UNIT_50US = 0x1
+
+
+class NvlinkPowerState(_IntEnum):
+    NVLINK_POWER_STATE_HIGH_SPEED = 0x0
+    NVLINK_POWER_STATE_LOW_SPEED = 0x1
+
+
+NVLINK_LOW_POWER_THRESHOLD_MIN = 0x1
+
+
+class NvlinkLowPowerThreshold(_IntEnum):
+    NVLINK_LOW_POWER_THRESHOLD_MAX = 0x1FFF
+    NVLINK_LOW_POWER_THRESHOLD_RESET = 0xFFFFFFFF
+    NVLINK_LOW_POWER_THRESHOLD_DEFAULT = 0xFFFFFFFF
+
+
+class C2CPowerState(_IntEnum):
+    C2C_POWER_STATE_FULL_POWER = 0
+    C2C_POWER_STATE_LOW_POWER = 1
+
+
+class EventType(_IntEnum):
+    EVENT_TYPE_NONE = 0x0000000000000000
+    EVENT_TYPE_SINGLE_BIT_ECC_ERROR = 0x0000000000000001
+    EVENT_TYPE_DOUBLE_BIT_ECC_ERROR = 0x0000000000000002
+    EVENT_TYPE_PSTATE = 0x0000000000000004
+    EVENT_TYPE_XID_CRITICAL_ERROR = 0x0000000000000008
+    EVENT_TYPE_CLOCK = 0x0000000000000010
+    EVENT_TYPE_POWER_SOURCE_CHANGE = 0x0000000000000080
+    EVENT_MIG_CONFIG_CHANGE = 0x0000000000000100
+    EVENT_TYPE_SINGLE_BIT_ECC_ERROR_STORM = 0x0000000000000200
+    EVENT_TYPE_DRAM_RETIREMENT_EVENT = 0x0000000000000400
+    EVENT_TYPE_DRAM_RETIREMENT_FAILURE = 0x0000000000000800
+    EVENT_TYPE_NON_FATAL_POISON_ERROR = 0x0000000000001000
+    EVENT_TYPE_FATAL_POISON_ERROR = 0x0000000000002000
+    EVENT_TYPE_GPU_UNAVAILABLE_ERROR = 0x0000000000004000
+    EVENT_TYPE_GPU_RECOVERY_ACTION = 0x0000000000008000
+
+
+class SystemEventType(_IntEnum):
+    SYSTEM_EVENT_TYPE_GPU_DRIVER_UNBIND = 0x0000000000000001
+    SYSTEM_EVENT_TYPE_GPU_DRIVER_BIND = 0x0000000000000002
+
+
+class ClocksEvent(_IntEnum):
+    CLOCKS_EVENT_REASON_GPU_IDLE = 0x0000000000000001
+    CLOCKS_EVENT_REASON_APPLICATIONS_CLOCKS_SETTING = 0x0000000000000002
+    CLOCKS_THROTTLE_REASON_USER_DEFINED_CLOCKS = 0x0000000000000002
+    CLOCKS_EVENT_REASON_SW_POWER_CAP = 0x0000000000000004
+    CLOCKS_THROTTLE_REASON_HW_SLOWDOWN = 0x0000000000000008
+    CLOCKS_EVENT_REASON_SYNC_BOOST = 0x0000000000000010
+    CLOCKS_EVENT_REASON_SW_THERMAL_SLOWDOWN = 0x0000000000000020
+    CLOCKS_THROTTLE_REASON_HW_THERMAL_SLOWDOWN = 0x0000000000000040
+    CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE_SLOWDOWN = 0x0000000000000080
+    CLOCKS_EVENT_REASON_DISPLAY_CLOCK_SETTING = 0x0000000000000100
+    CLOCKS_EVENT_REASON_NONE = 0x0000000000000000
+    CLOCKS_THROTTLE_REASON_GPU_IDLE = 0x0000000000000001
+    CLOCKS_THROTTLE_REASON_APPLICATIONS_CLOCKS_SETTING = 0x0000000000002
+    CLOCKS_THROTTLE_REASON_SYNC_BOOST = 0x00000000000010
+    CLOCKS_THROTTLE_REASON_SW_POWER_CAP = 0x00000000000004
+    CLOCKS_THROTTLE_REASON_SW_THERMAL_SLOWDOWN = 0x00000000000020
+    CLOCKS_THROTTLE_REASON_DISPLAY_CLOCK_SETTING = 0x00000000000100
+    CLOCKS_THROTTLE_REASON_NONE = 0x0000000000000000
+
+
+class EncoderQuery(_IntEnum):
+    ENCODER_QUERY_H264 = 0x00
+    ENCODER_QUERY_HEVC = 0x01
+    ENCODER_QUERY_AV1 = 0x02
+    ENCODER_QUERY_UNKNOWN = 0xFF
+
+
+class NvFBCSessionFlag(_IntEnum):
+    NVFBC_SESSION_FLAG_DIFFMAP_ENABLED = 0x00000001
+    NVFBC_SESSION_FLAG_CLASSIFICATIONMAP_ENABLED = 0x00000002
+    NVFBC_SESSION_FLAG_CAPTURE_WITH_WAIT_NO_WAIT = 0x00000004
+    NVFBC_SESSION_FLAG_CAPTURE_WITH_WAIT_INFINITE = 0x00000008
+    NVFBC_SESSION_FLAG_CAPTURE_WITH_WAIT_TIMEOUT = 0x00000010
+
+
+class CCSystemCpuCaps(_IntEnum):
+    CC_SYSTEM_CPU_CAPS_NONE = 0
+    CC_SYSTEM_CPU_CAPS_AMD_SEV = 1
+    CC_SYSTEM_CPU_CAPS_INTEL_TDX = 2
+    CC_SYSTEM_CPU_CAPS_AMD_SEV_SNP = 3
+    CC_SYSTEM_CPU_CAPS_AMD_SNP_VTOM = 4
+
+
+class CCSystemGpus(_IntEnum):
+    CC_SYSTEM_GPUS_CC_NOT_CAPABLE = 0
+    CC_SYSTEM_GPUS_CC_CAPABLE = 1
+
+
+class CCSystemDevtoolsMode(_IntEnum):
+    CC_SYSTEM_DEVTOOLS_MODE_OFF = 0
+    CC_SYSTEM_DEVTOOLS_MODE_ON = 1
+
+
+class CCSystemEnvironment(_IntEnum):
+    CC_SYSTEM_ENVIRONMENT_UNAVAILABLE = 0
+    CC_SYSTEM_ENVIRONMENT_SIM = 1
+    CC_SYSTEM_ENVIRONMENT_PROD = 2
+
+
+class CCSystemFeature(_IntEnum):
+    CC_SYSTEM_FEATURE_DISABLED = 0
+    CC_SYSTEM_FEATURE_ENABLED = 1
+
+
+class CCSystemMultiGpu(_IntEnum):
+    CC_SYSTEM_MULTIGPU_NONE = 0
+    CC_SYSTEM_MULTIGPU_PROTECTED_PCIE = 1
+    CC_SYSTEM_MULTIGPU_NVLE = 2
+
+
+class CCAcceptingClientRequests(_IntEnum):
+    CC_ACCEPTING_CLIENT_REQUESTS_FALSE = 0
+    CC_ACCEPTING_CLIENT_REQUESTS_TRUE = 1
+
+
+class GpuFabricState(_IntEnum):
+    GPU_FABRIC_STATE_NOT_SUPPORTED = 0
+    GPU_FABRIC_STATE_NOT_STARTED = 1
+    GPU_FABRIC_STATE_IN_PROGRESS = 2
+    GPU_FABRIC_STATE_COMPLETED = 3
+
+
+class GpuFabricHealthMaskDegradedBw(_IntEnum):
+    GPU_FABRIC_HEALTH_MASK_DEGRADED_BW_NOT_SUPPORTED = 0
+    GPU_FABRIC_HEALTH_MASK_DEGRADED_BW_TRUE = 1
+    GPU_FABRIC_HEALTH_MASK_DEGRADED_BW_FALSE = 2
+    GPU_FABRIC_HEALTH_MASK_SHIFT_DEGRADED_BW = 0
+    GPU_FABRIC_HEALTH_MASK_WIDTH_DEGRADED_BW = 0x3
+
+
+class GpuFabricHealthMaskRouteRecovery(_IntEnum):
+    GPU_FABRIC_HEALTH_MASK_ROUTE_RECOVERY_NOT_SUPPORTED = 0
+    GPU_FABRIC_HEALTH_MASK_ROUTE_RECOVERY_TRUE = 1
+    GPU_FABRIC_HEALTH_MASK_ROUTE_RECOVERY_FALSE = 2
+    GPU_FABRIC_HEALTH_MASK_SHIFT_ROUTE_RECOVERY = 2
+    GPU_FABRIC_HEALTH_MASK_WIDTH_ROUTE_RECOVERY = 0x3
+
+
+class GpuFabricHealthMaskRouteUnhealthy(_IntEnum):
+    GPU_FABRIC_HEALTH_MASK_ROUTE_UNHEALTHY_NOT_SUPPORTED = 0
+    GPU_FABRIC_HEALTH_MASK_ROUTE_UNHEALTHY_TRUE = 1
+    GPU_FABRIC_HEALTH_MASK_ROUTE_UNHEALTHY_FALSE = 2
+    GPU_FABRIC_HEALTH_MASK_SHIFT_ROUTE_UNHEALTHY = 4
+    GPU_FABRIC_HEALTH_MASK_WIDTH_ROUTE_UNHEALTHY = 0x3
+
+
+class GpuFabricHealthMaskAccessTimeout(_IntEnum):
+    GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_NOT_SUPPORTED = 0
+    GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_TRUE = 1
+    GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_FALSE = 2
+    GPU_FABRIC_HEALTH_MASK_SHIFT_ACCESS_TIMEOUT = 6
+    GPU_FABRIC_HEALTH_MASK_WIDTH_ACCESS_TIMEOUT = 0x3
+
+
+class GpuFabricHealthMaskIncorrectConfiguration(_IntEnum):
+    GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_NOT_SUPPORTED = 0
+    GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_NONE = 1
+    GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCORRECT_SYSGUID = 2
+    GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCORRECT_CHASSIS_SN = 3
+    GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGRUATION_NO_PARTITION = 4
+    GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INSUFFICIENT_NVLINKS = 5
+    GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCOMPATIBLE_GPU_FW = 6
+    GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INVALID_LOCATION = 7
+    GPU_FABRIC_HEALTH_MASK_SHIFT_INCORRECT_CONFIGURATION = 8
+    GPU_FABRIC_HEALTH_MASK_WIDTH_INCORRECT_CONFIGURATION = 0xf
+
+
+class GpuFabricHealthSummary(_IntEnum):
+    GPU_FABRIC_HEALTH_SUMMARY_NOT_SUPPORTED = 0
+    GPU_FABRIC_HEALTH_SUMMARY_HEALTHY = 1
+    GPU_FABRIC_HEALTH_SUMMARY_UNHEALTHY = 2
+    GPU_FABRIC_HEALTH_SUMMARY_LIMITED_CAPACITY = 3
+
+
+class InitFlag(_IntEnum):
+    INIT_FLAG_NO_GPUS = 1
+    INIT_FLAG_NO_ATTACH = 2
+
+
+class NvlinkState(_IntEnum):
+    NVLINK_STATE_INACTIVE = 0x0
+    NVLINK_STATE_ACTIVE = 0x1
+    NVLINK_STATE_SLEEP = 0x2
+
+
+class NvlinkFirmwareUcodeType(_IntEnum):
+    NVLINK_FIRMWARE_UCODE_TYPE_MSE = 0x1
+    NVLINK_FIRMWARE_UCODE_TYPE_NETIR = 0x2
+    NVLINK_FIRMWARE_UCODE_TYPE_NETIR_UPHY = 0x3
+    NVLINK_FIRMWARE_UCODE_TYPE_NETIR_CLN = 0x4
+    NVLINK_FIRMWARE_UCODE_TYPE_NETIR_DLN = 0x5
+
+
+class DeviceMig(_IntEnum):
+    DEVICE_MIG_DISABLE = 0
+    DEVICE_MIG_ENABLE = 1
+
+
+class GpuInstanceProfile(_IntEnum):
+    GPU_INSTANCE_PROFILE_1_SLICE = 0x0
+    GPU_INSTANCE_PROFILE_2_SLICE = 0x1
+    GPU_INSTANCE_PROFILE_3_SLICE = 0x2
+    GPU_INSTANCE_PROFILE_4_SLICE = 0x3
+    GPU_INSTANCE_PROFILE_7_SLICE = 0x4
+    GPU_INSTANCE_PROFILE_8_SLICE = 0x5
+    GPU_INSTANCE_PROFILE_6_SLICE = 0x6
+    GPU_INSTANCE_PROFILE_1_SLICE_REV1 = 0x7
+    GPU_INSTANCE_PROFILE_2_SLICE_REV1 = 0x8
+    GPU_INSTANCE_PROFILE_1_SLICE_REV2 = 0x9
+    GPU_INSTANCE_PROFILE_1_SLICE_GFX = 0x0A
+    GPU_INSTANCE_PROFILE_2_SLICE_GFX = 0x0B
+    GPU_INSTANCE_PROFILE_4_SLICE_GFX = 0x0C
+    GPU_INSTANCE_PROFILE_1_SLICE_NO_ME = 0x0D
+    GPU_INSTANCE_PROFILE_2_SLICE_NO_ME = 0x0E
+    GPU_INSTANCE_PROFILE_1_SLICE_ALL_ME = 0x0F
+    GPU_INSTANCE_PROFILE_2_SLICE_ALL_ME = 0x10
+    GPU_INSTANCE_PROFILE_COUNT = 0x11
+
+
+class GpuInstanceProfileCaps(_IntEnum):
+    GPU_INSTANCE_PROFILE_CAPS_P2P = 0x1
+    GPU_INSTANCE_PROFILE_CAPS_GFX = 0x2
+
+
+class ComputeInstanceProfileCaps(_IntEnum):
+    COMPUTE_INSTANCE_PROFILE_CAPS_GFX = 0x1
+
+
+class ComputeInstanceProfile(_IntEnum):
+    COMPUTE_INSTANCE_PROFILE_1_SLICE = 0x0
+    COMPUTE_INSTANCE_PROFILE_2_SLICE = 0x1
+    COMPUTE_INSTANCE_PROFILE_3_SLICE = 0x2
+    COMPUTE_INSTANCE_PROFILE_4_SLICE = 0x3
+    COMPUTE_INSTANCE_PROFILE_7_SLICE = 0x4
+    COMPUTE_INSTANCE_PROFILE_8_SLICE = 0x5
+    COMPUTE_INSTANCE_PROFILE_6_SLICE = 0x6
+    COMPUTE_INSTANCE_PROFILE_1_SLICE_REV1 = 0x7
+    COMPUTE_INSTANCE_PROFILE_COUNT = 0x8
+
+
+class ComputeInstanceEngineProfile(_IntEnum):
+    COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED = 0x0
+    COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT = 0x1
+
+
+class PowerSmoothingProfileParam(_IntEnum):
+    POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR = 0
+    POWER_SMOOTHING_PROFILE_PARAM_RAMP_UP_RATE = 1
+    POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_RATE = 2
+    POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS = 3
+    POWER_SMOOTHING_PROFILE_PARAM_SECONDARY_POWER_FLOOR = 4
+    POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_WIN_MULT = 5
+    POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_TAR_WIN_MULT = 6
+    POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_OFFSET = 7
 
 
 ###############################################################################
@@ -2180,6 +2569,17 @@ cdef class Memory_v2:
             raise ValueError("This Memory_v2 instance is read-only")
         self._ptr[0].total = val
 
+    @property
+    def reserved(self):
+        """int: """
+        return self._ptr[0].reserved
+
+    @reserved.setter
+    def reserved(self, val):
+        if self._readonly:
+            raise ValueError("This Memory_v2 instance is read-only")
+        self._ptr[0].reserved = val
+
     @property
     def free(self):
         """int: """
@@ -2241,7 +2641,7 @@ cdef class Memory_v2:
 cdef _get_ba_r1memory_dtype_offsets():
     cdef nvmlBAR1Memory_t pod = nvmlBAR1Memory_t()
     return _numpy.dtype({
-        'names': ['bar1total', 'bar1free', 'bar1_used'],
+        'names': ['bar1_total', 'bar1_free', 'bar1_used'],
         'formats': [_numpy.uint64, _numpy.uint64, _numpy.uint64],
         'offsets': [
             (<intptr_t>&(pod.bar1Total)) - (<intptr_t>&pod),
@@ -2314,23 +2714,23 @@ cdef class BAR1Memory:
             setattr(self, key, val)
 
     @property
-    def bar1total(self):
+    def bar1_total(self):
         """int: """
         return self._ptr[0].bar1Total
 
-    @bar1total.setter
-    def bar1total(self, val):
+    @bar1_total.setter
+    def bar1_total(self, val):
         if self._readonly:
             raise ValueError("This BAR1Memory instance is read-only")
         self._ptr[0].bar1Total = val
 
     @property
-    def bar1free(self):
+    def bar1_free(self):
         """int: """
         return self._ptr[0].bar1Free
 
-    @bar1free.setter
-    def bar1free(self, val):
+    @bar1_free.setter
+    def bar1_free(self, val):
         if self._readonly:
             raise ValueError("This BAR1Memory instance is read-only")
         self._ptr[0].bar1Free = val
diff --git a/cuda_core/cuda/core/__init__.py b/cuda_core/cuda/core/__init__.py
index a10812606e..67a815d1de 100644
--- a/cuda_core/cuda/core/__init__.py
+++ b/cuda_core/cuda/core/__init__.py
@@ -28,7 +28,7 @@
 finally:
     del bindings, importlib, subdir, cuda_major, cuda_minor
 
-from cuda.core import utils  # noqa: E402
+from cuda.core import system, utils  # noqa: E402
 from cuda.core._device import Device  # noqa: E402
 from cuda.core._event import Event, EventOptions  # noqa: E402
 from cuda.core._graph import (  # noqa: E402
@@ -62,8 +62,3 @@
 from cuda.core._module import Kernel, ObjectCode  # noqa: E402
 from cuda.core._program import Program, ProgramOptions  # noqa: E402
 from cuda.core._stream import Stream, StreamOptions  # noqa: E402
-from cuda.core._system import System  # noqa: E402
-
-system = System()
-__import__("sys").modules[__spec__.name + ".system"] = system
-del System
diff --git a/cuda_core/cuda/core/_system.py b/cuda_core/cuda/core/_system.py
deleted file mode 100644
index 6f06587b46..0000000000
--- a/cuda_core/cuda/core/_system.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import warnings
-
-from cuda.core._device import Device
-from cuda.core._utils.cuda_utils import driver, handle_return, runtime
-
-
-class System:
-    """Provide information about the cuda system.
-    This class is a singleton and should not be instantiated directly.
-    """
-
-    _instance = None
-
-    def __new__(cls):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-        return cls._instance
-
-    def __init__(self):
-        if hasattr(self, "_initialized") and self._initialized:
-            return
-        self._initialized = True
-
-    def get_driver_version(self) -> tuple[int, int]:
-        """
-        Query the CUDA driver version.
-
-        Returns
-        -------
-        tuple of int
-            A 2-tuple of (major, minor) version numbers.
-        """
-        version = handle_return(driver.cuDriverGetVersion())
-        major = version // 1000
-        minor = (version % 1000) // 10
-        return (major, minor)
-
-    @property
-    def driver_version(self) -> tuple[int, int]:
-        """
-        Query the CUDA driver version.
-
-        Returns
-        -------
-        tuple of int
-            A 2-tuple of (major, minor) version numbers.
-
-        .. deprecated:: 0.5.0
-          `cuda.core.system.driver_version` will be removed in 0.6.0.
-          Use `cuda.core.system.get_driver_version()` instead.
-        """
-        warnings.warn(
-            "cuda.core.system.driver_version is deprecated. Use cuda.core.system.get_driver_version() instead.",
-            DeprecationWarning,
-            stacklevel=1,
-        )
-        return self.get_driver_version()
-
-    def get_num_devices(self) -> int:
-        """
-        Query the number of available GPUs.
-
-        Returns
-        -------
-        int
-            The number of available GPU devices.
-        """
-        return handle_return(runtime.cudaGetDeviceCount())
-
-    @property
-    def num_devices(self) -> int:
-        """
-        Query the number of available GPUs.
-
-        Returns
-        -------
-        int
-            The number of available GPU devices.
-
-        .. deprecated:: 0.5.0
-          `cuda.core.system.num_devices` will be removed in 0.6.0.
-          Use `cuda.core.system.get_num_devices()` instead.
-        """
-        warnings.warn(
-            "cuda.core.system.num_devices is deprecated. Use cuda.core.system.get_num_devices() instead.",
-            DeprecationWarning,
-            stacklevel=1,
-        )
-        return self.get_num_devices()
-
-    @property
-    def devices(self) -> tuple:
-        """
-        Query the available device instances.
-
-        Returns
-        -------
-        tuple of Device
-            A tuple containing instances of available devices.
-
-        .. deprecated:: 0.5.0
-          `cuda.core.system.devices` will be removed in 0.6.0.
-          Use `cuda.core.Device.get_all_devices()` instead.
-        """
-        warnings.warn(
-            "cuda.core.system.devices is deprecated. Use cuda.core.Device.get_all_devices() instead.",
-            DeprecationWarning,
-            stacklevel=1,
-        )
-        return Device.get_all_devices()
diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py
index 3dbf3b7440..7f5c5caf21 100644
--- a/cuda_core/cuda/core/experimental/__init__.py
+++ b/cuda_core/cuda/core/experimental/__init__.py
@@ -38,7 +38,7 @@ def _warn_deprecated():
 _warn_deprecated()
 
 
-from cuda.core import utils  # noqa: E402
+from cuda.core import system, utils  # noqa: E402
 
 # Make utils accessible as a submodule for backward compatibility
 __import__("sys").modules[__spec__.name + ".utils"] = utils
@@ -73,8 +73,3 @@ def _warn_deprecated():
 from cuda.core._module import Kernel, ObjectCode  # noqa: E402
 from cuda.core._program import Program, ProgramOptions  # noqa: E402
 from cuda.core._stream import Stream, StreamOptions  # noqa: E402
-from cuda.core._system import System  # noqa: E402
-
-system = System()
-__import__("sys").modules[__spec__.name + ".system"] = system
-del System
diff --git a/cuda_core/cuda/core/system/__init__.py b/cuda_core/cuda/core/system/__init__.py
new file mode 100644
index 0000000000..8162f5b257
--- /dev/null
+++ b/cuda_core/cuda/core/system/__init__.py
@@ -0,0 +1,63 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# ruff: noqa: F403, F405
+
+
+__all__ = [
+    "get_driver_version",
+    "get_driver_version_full",
+    "get_gpu_driver_version",
+    "get_num_devices",
+    "get_process_name",
+    "HAS_WORKING_NVML",
+]
+
+
+from .system import *
+
+if HAS_WORKING_NVML:
+    from ._nvml_context import initialize
+    from .device import Device, DeviceArchitecture
+    from .exceptions import *
+
+    initialize()
+
+    __all__.extend(
+        [
+            "Device",
+            "DeviceArchitecture",
+            "UninitializedError",
+            "InvalidArgumentError",
+            "NotSupportedError",
+            "NoPermissionError",
+            "AlreadyInitializedError",
+            "NotFoundError",
+            "InsufficientSizeError",
+            "InsufficientPowerError",
+            "DriverNotLoadedError",
+            "TimeoutError",
+            "IrqIssueError",
+            "LibraryNotFoundError",
+            "FunctionNotFoundError",
+            "CorruptedInforomError",
+            "GpuIsLostError",
+            "ResetRequiredError",
+            "OperatingSystemError",
+            "LibRmVersionMismatchError",
+            "InUseError",
+            "MemoryError",
+            "NoDataError",
+            "VgpuEccNotSupportedError",
+            "InsufficientResourcesError",
+            "FreqNotSupportedError",
+            "ArgumentVersionMismatchError",
+            "DeprecatedError",
+            "NotReadyError",
+            "GpuNotFoundError",
+            "InvalidStateError",
+            "ResetTypeNotSupportedError",
+            "UnknownError",
+        ]
+    )
diff --git a/cuda_core/cuda/core/system/_nvml_context.pyx b/cuda_core/cuda/core/system/_nvml_context.pyx
new file mode 100644
index 0000000000..eccce36a90
--- /dev/null
+++ b/cuda_core/cuda/core/system/_nvml_context.pyx
@@ -0,0 +1,93 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import threading
+
+from cuda.bindings import _nvml as nvml
+
+from . import exceptions
+
+
+ctypedef enum _NVMLState:
+    UNINITIALIZED = 0
+    INITIALIZED = 1
+    DISABLED_LIBRARY_NOT_FOUND = 2
+
+
+# Initialisation must occur per-process, so an initialised state is a
+# (state, pid) pair
+_NVML_STATE = _NVMLState.UNINITIALIZED
+# """Current initialization state"""
+
+_NVML_OWNER_PID = 0
+# """PID of process that successfully called pynvml.nvmlInit"""
+
+
+_lock = threading.Lock()
+
+
+def initialize() -> None:
+    """Idempotent (per-process) initialization of NVUtil's NVML
+
+    Notes
+    -----
+
+    Modifies global variables _NVML_STATE and _NVML_OWNER_PID"""
+    global _NVML_STATE, _NVML_OWNER_PID
+
+    with _lock:
+        if _NVML_STATE == _NVMLState.DISABLED_LIBRARY_NOT_FOUND or (
+            _NVML_STATE == _NVMLState.INITIALIZED and os.getpid() == _NVML_OWNER_PID
+        ):
+            return
+        elif (
+            _NVML_STATE == _NVMLState.INITIALIZED and os.getpid() != _NVML_OWNER_PID
+        ) or _NVML_STATE == _NVMLState.UNINITIALIZED:
+            try:
+                nvml.init_v2()
+            except (
+                exceptions.LibraryNotFoundError,
+                exceptions.DriverNotLoadedError,
+                exceptions.UnknownError,
+            ):
+                _NVML_STATE = _NVMLState.DISABLED_LIBRARY_NOT_FOUND
+                return
+
+            # initialization was successful
+            _NVML_STATE = _NVMLState.INITIALIZED
+            _NVML_OWNER_PID = os.getpid()
+        else:
+            raise RuntimeError(f"Unhandled initialisation state ({_NVML_STATE=}, {_NVML_OWNER_PID=})")
+
+
+def is_initialized() -> bool:
+    """
+    Check whether the NVML context is initialized on this process.
+
+    Returns
+    -------
+    result: bool
+        Whether the NVML context is initialized on this process.
+    """
+    return _NVML_STATE == _NVMLState.INITIALIZED and os.getpid() == _NVML_OWNER_PID
+
+
+def validate() -> None:
+    """
+    Validate NVML state.
+
+    Validate that NVML is functional and that the system has at least one GPU available.
+
+    Raises
+    ------
+    nvml.LibraryNotFoundError
+        If the NVML library could not be found.
+    nvml.GpuNotFoundError
+        If no GPUs are available.
+    """
+    if _NVML_STATE == _NVMLState.DISABLED_LIBRARY_NOT_FOUND:
+        raise exceptions.LibraryNotFoundError("The underlying NVML library was not found")
+    elif nvml.device_get_count_v2() == 0:
+        raise exceptions.GpuNotFoundError("No GPUs available")
diff --git a/cuda_core/cuda/core/system/device.pyx b/cuda_core/cuda/core/system/device.pyx
new file mode 100644
index 0000000000..3d40dd8305
--- /dev/null
+++ b/cuda_core/cuda/core/system/device.pyx
@@ -0,0 +1,306 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from libc.stdint cimport intptr_t
+from libc.math cimport ceil
+
+from multiprocessing import cpu_count
+from typing import Iterable
+
+from cuda.bindings import _nvml as nvml
+
+from .utils import unpack_bitmask
+
+
+class DeviceArchitecture:
+    """
+    Device architecture enumeration.
+    """
+
+    def __init__(self, architecture: int):
+        try:
+            self._architecture = nvml.DeviceArch(architecture)
+        except ValueError:
+            self._architecture = None
+
+    @property
+    def id(self) -> int:
+        """
+        The numeric id of the device architecture.
+
+        Returns -1 if the device is unknown.
+        """
+        if self._architecture is None:
+            return -1
+        return int(self._architecture)
+
+    @property
+    def name(self) -> str:
+        """
+        The name of the device architecture.
+
+        Returns "Unlisted" if the device is unknown.
+        """
+        if self._architecture is None:
+            return "Unlisted"
+        name = self._architecture.name
+        return name[name.rfind("_") + 1 :].title()
+
+
+cdef class MemoryInfo:
+    """
+    Memory allocation information for a device.
+    """
+    cdef object _memory_info
+
+    def __init__(self, memory_info: nvml.Memory_v2):
+        self._memory_info = memory_info
+
+    @property
+    def free(self) -> int:
+        """
+        Unallocated device memory (in bytes)
+        """
+        return self._memory_info.free
+
+    @property
+    def total(self) -> int:
+        """
+        Total physical device memory (in bytes)
+        """
+        return self._memory_info.total
+
+    @property
+    def used(self) -> int:
+        """
+        Allocated device memory (in bytes)
+        """
+        return self._memory_info.used
+
+    @property
+    def reserved(self) -> int:
+        """
+        Device memory (in bytes) reserved for system use (driver or firmware)
+        """
+        return self._memory_info.reserved
+
+
+cdef class BAR1MemoryInfo(MemoryInfo):
+    """
+    BAR1 Memory allocation information for a device.
+    """
+    cdef object _memory_info
+
+    def __init__(self, memory_info: nvml.BAR1Memory):
+        self._memory_info = memory_info
+
+    @property
+    def free(self) -> int:
+        """
+        Unallocated BAR1 memory (in bytes)
+        """
+        return self._memory_info.bar1_free
+
+    @property
+    def total(self) -> int:
+        """
+        Total BAR1 memory (in bytes)
+        """
+        return self._memory_info.bar1_total
+
+    @property
+    def used(self) -> int:
+        """
+        Allocated used memory (in bytes)
+        """
+        return self._memory_info.bar1_used
+
+
+cdef class PciInfo:
+    """
+    PCI information about a GPU device.
+    """
+    cdef object _pci_info
+
+    def __init__(self, pci_info: nvml.PciInfo):
+        self._pci_info = pci_info
+
+    @property
+    def bus(self) -> int:
+        """
+        The bus on which the device resides, 0 to 255
+        """
+        return self._pci_info.bus
+
+    @property
+    def bus_id(self) -> str:
+        """
+        The tuple domain:bus:device.function PCI identifier string
+        """
+        return self._pci_info.bus_id
+
+    @property
+    def device(self) -> int:
+        """
+        The device's id on the bus, 0 to 31
+        """
+        return self._pci_info.device_
+
+    @property
+    def domain(self) -> int:
+        """
+        The PCI domain on which the device's bus resides, 0 to 0xffffffff
+        """
+        return self._pci_info.domain
+
+    @property
+    def vendor_id(self) -> int:
+        """
+        The PCI vendor id of the device
+        """
+        return self._pci_info.pci_device_id & 0xFFFF
+
+    @property
+    def device_id(self) -> int:
+        """
+        The PCI device id of the device
+        """
+        return self._pci_info.pci_device_id >> 16
+
+
+cdef class Device:
+    """
+    Representation of a CUDA device.
+
+    Parameters
+    ----------
+    index: int, optional
+        Integer representing the CUDA device index to get a handle to.
+    uuid: bytes or str, optional
+        UUID of a CUDA device to get a handle to.
+
+    Raises
+    ------
+    ValueError
+        If neither `index` nor `uuid` are specified or if both are specified.
+    """
+
+    cdef intptr_t _handle
+
+    def __init__(self, index: int | None = None, uuid: bytes | str | None = None):
+        if index is not None and uuid is not None:
+            raise ValueError("Handle requires only one of either device `index` or `uuid`.")
+        if index is None and uuid is None:
+            raise ValueError("Handle requires either a device `index` or `uuid`.")
+
+        if index is not None:
+            self._handle = nvml.device_get_handle_by_index_v2(index)
+        else:
+            if isinstance(uuid, bytes):
+                uuid = uuid.decode("ascii")
+            self._handle = nvml.device_get_handle_by_uuid(uuid)
+
+    @property
+    def handle(self) -> int:
+        return self._handle
+
+    @classmethod
+    def get_all_devices(cls) -> Iterable[Device]:
+        """
+        Query the available device instances.
+
+        Returns
+        -------
+        Iterator of Device
+            An iterator over available devices.
+        """
+        total = nvml.device_get_count_v2()
+        for device_id in range(total):
+            yield cls(device_id)
+
+    @property
+    def architecture(self) -> DeviceArchitecture:
+        """
+        Device architecture. For example, a Tesla V100 will report
+        `DeviceArchitecture.name == "Volta"`, and RTX A6000 will report
+        `DeviceArchitecture.name == "Ampere"`. If the device returns an
+        architecture that is unknown to NVML then `DeviceArchitecture.name ==
+        "Unknown"` is reported, whereas an architecture that is unknown to
+        cuda.core.system is reported as `DeviceArchitecture.name == "Unlisted"`.
+        """
+        return DeviceArchitecture(nvml.device_get_architecture(self._handle))
+
+    @property
+    def bar1_memory_info(self) -> BAR1MemoryInfo:
+        """
+        Get information about BAR1 memory.
+
+        BAR1 is used to map the FB (device memory) so that it can be directly
+        accessed by the CPU or by 3rd party devices (peer-to-peer on the PCIE
+        bus).
+        """
+        return BAR1MemoryInfo(nvml.device_get_bar1_memory_info(self._handle))
+
+    @property
+    def cpu_affinity(self) -> list[int]:
+        """
+        Get a list containing the CPU indices to which the GPU is directly connected.
+
+        Examples
+        --------
+        >>> Device(index=0).cpu_affinity
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59]
+        """
+        return unpack_bitmask(nvml.device_get_cpu_affinity(
+            self._handle,
+            <unsigned int>ceil(cpu_count() / 64),
+        ))
+
+    @property
+    def cuda_compute_capability(self) -> tuple[int, int]:
+        """
+        CUDA compute capability of the device, e.g.: `(7, 0)` for a Tesla V100.
+
+        Returns a tuple `(major, minor)`.
+        """
+        return nvml.device_get_cuda_compute_capability(self._handle)
+
+    @property
+    def memory_info(self) -> MemoryInfo:
+        """
+        Object with memory information.
+        """
+        return MemoryInfo(nvml.device_get_memory_info_v2(self._handle))
+
+    @property
+    def name(self) -> str:
+        """
+        Name of the device, e.g.: `"Tesla V100-SXM2-32GB"`
+        """
+        return nvml.device_get_name(self._handle)
+
+    @property
+    def pci_info(self) -> PciInfo:
+        """
+        The PCI attributes of this device.
+        """
+        return PciInfo(nvml.device_get_pci_info_v3(self._handle))
+
+    @property
+    def serial(self) -> str:
+        """
+        Retrieves the globally unique board serial number associated with this
+        device's board.
+        """
+        return nvml.device_get_serial(self._handle)
+
+    @property
+    def uuid(self) -> str:
+        """
+        Retrieves the globally unique immutable UUID associated with this
+        device, as a 5 part hexadecimal string, that augments the immutable,
+        board serial identifier.
+        """
+        return nvml.device_get_uuid(self._handle)
diff --git a/cuda_core/cuda/core/system/exceptions.py b/cuda_core/cuda/core/system/exceptions.py
new file mode 100644
index 0000000000..5c6cfef889
--- /dev/null
+++ b/cuda_core/cuda/core/system/exceptions.py
@@ -0,0 +1,73 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+from cuda.bindings import _nvml as nvml
+
+UninitializedError = nvml.UninitializedError
+InvalidArgumentError = nvml.InvalidArgumentError
+NotSupportedError = nvml.NotSupportedError
+NoPermissionError = nvml.NoPermissionError
+AlreadyInitializedError = nvml.AlreadyInitializedError
+NotFoundError = nvml.NotFoundError
+InsufficientSizeError = nvml.InsufficientSizeError
+InsufficientPowerError = nvml.InsufficientPowerError
+DriverNotLoadedError = nvml.DriverNotLoadedError
+TimeoutError = nvml.TimeoutError
+IrqIssueError = nvml.IrqIssueError
+LibraryNotFoundError = nvml.LibraryNotFoundError
+FunctionNotFoundError = nvml.FunctionNotFoundError
+CorruptedInforomError = nvml.CorruptedInforomError
+GpuIsLostError = nvml.GpuIsLostError
+ResetRequiredError = nvml.ResetRequiredError
+OperatingSystemError = nvml.OperatingSystemError
+LibRmVersionMismatchError = nvml.LibRmVersionMismatchError
+InUseError = nvml.InUseError
+MemoryError = nvml.MemoryError
+NoDataError = nvml.NoDataError
+VgpuEccNotSupportedError = nvml.VgpuEccNotSupportedError
+InsufficientResourcesError = nvml.InsufficientResourcesError
+FreqNotSupportedError = nvml.FreqNotSupportedError
+ArgumentVersionMismatchError = nvml.ArgumentVersionMismatchError
+DeprecatedError = nvml.DeprecatedError
+NotReadyError = nvml.NotReadyError
+GpuNotFoundError = nvml.GpuNotFoundError
+InvalidStateError = nvml.InvalidStateError
+ResetTypeNotSupportedError = nvml.ResetTypeNotSupportedError
+UnknownError = nvml.UnknownError
+
+
+__all__ = [
+    "UninitializedError",
+    "InvalidArgumentError",
+    "NotSupportedError",
+    "NoPermissionError",
+    "AlreadyInitializedError",
+    "NotFoundError",
+    "InsufficientSizeError",
+    "InsufficientPowerError",
+    "DriverNotLoadedError",
+    "TimeoutError",
+    "IrqIssueError",
+    "LibraryNotFoundError",
+    "FunctionNotFoundError",
+    "CorruptedInforomError",
+    "GpuIsLostError",
+    "ResetRequiredError",
+    "OperatingSystemError",
+    "LibRmVersionMismatchError",
+    "InUseError",
+    "MemoryError",
+    "NoDataError",
+    "VgpuEccNotSupportedError",
+    "InsufficientResourcesError",
+    "FreqNotSupportedError",
+    "ArgumentVersionMismatchError",
+    "DeprecatedError",
+    "NotReadyError",
+    "GpuNotFoundError",
+    "InvalidStateError",
+    "ResetTypeNotSupportedError",
+    "UnknownError",
+]
diff --git a/cuda_core/cuda/core/system/system.pyx b/cuda_core/cuda/core/system/system.pyx
new file mode 100644
index 0000000000..67972385f7
--- /dev/null
+++ b/cuda_core/cuda/core/system/system.pyx
@@ -0,0 +1,101 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+# This file needs to either use NVML exclusively, or when `cuda.bindings._nvml`
+# isn't available, fall back to non-NVML-based methods for backward
+# compatibility.
+
+
+import cuda.bindings
+
+# TODO: Update after #1411 is merged
+_BINDINGS_VERSION = tuple(int(x) for x in cuda.bindings.__version__.split("."))
+
+HAS_WORKING_NVML = _BINDINGS_VERSION >= (13, 1, 2) or (_BINDINGS_VERSION[0] == 12 and _BINDINGS_VERSION[1:3] >= (9, 6)) or True
+
+
+if HAS_WORKING_NVML:
+    from cuda.bindings import _nvml as nvml
+else:
+    from cuda.core._utils.cuda_utils import driver, handle_return, runtime
+
+
+def get_driver_version() -> tuple[int, int]:
+    """
+    The CUDA driver version.
+
+    Tuple in the format `(CUDA_MAJOR, CUDA_MINOR)`.
+    """
+    return get_driver_version_full()[:2]
+
+
+def get_driver_version_full() -> tuple[int, int, int]:
+    """
+    The CUDA driver version.
+
+    Tuple in the format `(CUDA_MAJOR, CUDA_MINOR, CUDA_PATCH)`.
+    """
+    cdef int v
+    if HAS_WORKING_NVML:
+        v = nvml.system_get_cuda_driver_version()
+    else:
+        v = handle_return(driver.cuDriverGetVersion())
+    return (v // 1000, (v // 10) % 100, v % 10)
+
+
+def get_gpu_driver_version() -> tuple[int, ...]:
+    """
+    The driver version.
+    """
+    if not HAS_WORKING_NVML:
+        raise RuntimeError("NVML library is not available")
+    return tuple(int(v) for v in nvml.system_get_driver_version().split("."))
+
+
+def get_nvml_version() -> tuple[int, ...]:
+    """
+    The version of the NVML library.
+    """
+    if not HAS_WORKING_NVML:
+        raise RuntimeError("NVML library is not available")
+    return tuple(int(v) for v in nvml.system_get_nvml_version().split("."))
+
+
+def get_num_devices() -> int:
+    """
+    Return the number of devices in the system.
+    """
+    if HAS_WORKING_NVML:
+        return nvml.device_get_count_v2()
+    else:
+        return handle_return(runtime.cudaGetDeviceCount())
+
+
+def get_process_name(pid: int) -> str:
+    """
+    The name of process with given PID.
+
+    Parameters
+    ----------
+    pid: int
+        The PID of the process for which to get the name.
+
+    Returns
+    -------
+    name: str
+        The process name.
+    """
+    return nvml.system_get_process_name(pid)
+
+
+__all__ = [
+    "get_driver_version",
+    "get_driver_version_full",
+    "get_gpu_driver_version",
+    "get_nvml_version",
+    "get_num_devices",
+    "get_process_name",
+    "HAS_WORKING_NVML",
+]
diff --git a/cuda_core/cuda/core/system/utils.pyx b/cuda_core/cuda/core/system/utils.pyx
new file mode 100644
index 0000000000..5af4802112
--- /dev/null
+++ b/cuda_core/cuda/core/system/utils.pyx
@@ -0,0 +1,60 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from cpython cimport array
+from libc.stdint cimport uint64_t
+
+
+cpdef str format_bytes(uint64_t x):
+    """Return formatted string in B, KiB, MiB, GiB or TiB"""
+    if x < 1024:
+        return f"{x} B"
+    elif x < 1024**2:
+        return f"{x / 1024:.2f} KiB"
+    elif x < 1024**3:
+        return f"{x / 1024**2:.2f} MiB"
+    elif x < 1024**4:
+        return f"{x / 1024**3:.2f} GiB"
+    else:
+        return f"{x / 1024**4:.2f} TiB"
+
+
+cpdef list[int] unpack_bitmask(x: list[int] | array.array):
+    """
+    Unpack a list of integers containing bitmasks.
+
+    Parameters
+    ----------
+    x: list of int
+        A list of integers
+
+    Examples
+    --------
+    >>> from cuda.core.experimental.system.utils import unpack_bitmask
+    >>> unpack_bitmask([1 + 2 + 8])
+    [0, 1, 3]
+    >>> unpack_bitmask([1 + 2 + 16])
+    [0, 1, 4]
+    >>> unpack_bitmask([1 + 2 + 16, 2 + 4])
+    [0, 1, 4, 65, 66]
+    """
+    cdef uint64_t[:] arr
+    cdef uint64_t i, j, idx
+    cdef int mask_bits = 64
+
+    if isinstance(x, list):
+        arr = array.array("Q", x)
+    else:
+        arr = x
+
+    res = []
+
+    for i in range(len(x)):
+        cpu_offset = i * mask_bits
+        idx = 1
+        for j in range(mask_bits):
+            if arr[i] & idx:
+                res.append(cpu_offset + j)
+            idx <<= 1
+    return res
diff --git a/cuda_core/tests/system/__init__.py b/cuda_core/tests/system/__init__.py
new file mode 100644
index 0000000000..79599c77db
--- /dev/null
+++ b/cuda_core/tests/system/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
diff --git a/cuda_core/tests/system/conftest.py b/cuda_core/tests/system/conftest.py
new file mode 100644
index 0000000000..b507d35941
--- /dev/null
+++ b/cuda_core/tests/system/conftest.py
@@ -0,0 +1,11 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+import pytest
+from cuda.core import system
+
+skip_if_nvml_unsupported = pytest.mark.skipif(
+    not system.HAS_WORKING_NVML, reason="NVML support requires cuda.bindings version 12.9.6+ or 13.1.2+"
+)
diff --git a/cuda_core/tests/system/test_nvml_context.py b/cuda_core/tests/system/test_nvml_context.py
new file mode 100644
index 0000000000..cbec533378
--- /dev/null
+++ b/cuda_core/tests/system/test_nvml_context.py
@@ -0,0 +1,88 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# ruff: noqa: E402
+
+from .conftest import skip_if_nvml_unsupported
+
+pytestmark = skip_if_nvml_unsupported
+
+import multiprocessing as mp
+from platform import uname
+
+import pytest
+
+UNINITIALIZED = 0
+INITIALIZED = 1
+DISABLED_LIBRARY_NOT_FOUND = 2
+
+
+def _run_process(target):
+    p = mp.get_context("spawn").Process(target=target)
+    p.start()
+    p.join()
+    assert not p.exitcode
+
+
+def _test_initialized():
+    from cuda.core.system import _nvml_context
+
+    assert _nvml_context._NVML_STATE == INITIALIZED
+
+
+def test_initialized():
+    _run_process(_test_initialized)
+
+
+def _test_is_initialized():
+    from cuda.core.system import _nvml_context
+
+    assert _nvml_context._NVML_STATE == INITIALIZED
+    assert _nvml_context.is_initialized() is True
+
+
+def test_is_initialized():
+    _run_process(_test_is_initialized)
+
+
+def _test_uninitialized():
+    from cuda.core.system import _nvml_context
+
+    _nvml_context._NVML_STATE = UNINITIALIZED
+    assert _nvml_context.is_initialized() is False
+
+
+def test_uninitialized():
+    _run_process(_test_uninitialized)
+
+
+def _test_wrong_owner():
+    from cuda.core.system import _nvml_context
+
+    _nvml_context._NVML_OWNER_PID = 0
+    assert _nvml_context.is_initialized() is False
+
+
+def test_wrong_owner():
+    _run_process(_test_wrong_owner)
+
+
+@pytest.mark.skipif("microsoft-standard" in uname().release, reason="Probably a WSL system")
+def test_no_wsl():
+    assert "microsoft-standard" not in uname().release
+
+
+@pytest.mark.skipif("microsoft-standard" not in uname().release, reason="Probably a non-WSL system")
+def test_wsl():
+    assert "microsoft-standard" in uname().release
+
+
+def _test_validate():
+    from cuda.core.system import _nvml_context
+
+    assert _nvml_context.validate() is None
+
+
+def test_validate():
+    _run_process(_test_validate)
diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py
new file mode 100644
index 0000000000..05e0c8a08f
--- /dev/null
+++ b/cuda_core/tests/system/test_system_device.py
@@ -0,0 +1,163 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# ruff: noqa: E402
+
+from .conftest import skip_if_nvml_unsupported
+
+pytestmark = skip_if_nvml_unsupported
+
+import os
+import re
+import sys
+
+import pytest
+from cuda.core import system
+from cuda.core.system import device as system_device
+
+if system.HAS_WORKING_NVML:
+    from cuda.bindings import _nvml as nvml
+
+    if system.get_num_devices() == 0:
+        pytest.skip("No GPUs available to run device tests", allow_module_level=True)
+
+
+def test_device_index_handle():
+    for device in system.Device.get_all_devices():
+        assert isinstance(device.handle, int)
+
+
+def test_device_architecture():
+    for device in system.Device.get_all_devices():
+        device_arch = device.architecture
+
+        assert isinstance(device_arch, system_device.DeviceArchitecture)
+        if sys.version_info < (3, 12):
+            assert device_arch.id in nvml.DeviceArch.__members__.values()
+        else:
+            assert device_arch.id in nvml.DeviceArch
+
+
+def test_device_bar1_memory():
+    for device in system.Device.get_all_devices():
+        bar1_memory_info = device.bar1_memory_info
+        free, total, used = (
+            bar1_memory_info.free,
+            bar1_memory_info.total,
+            bar1_memory_info.used,
+        )
+
+        assert isinstance(bar1_memory_info, system_device.BAR1MemoryInfo)
+        assert isinstance(free, int)
+        assert isinstance(total, int)
+        assert isinstance(used, int)
+
+        assert free >= 0
+        assert total >= 0
+        assert used >= 0
+        assert free + used == total
+
+
+def test_device_cpu_affinity():
+    skip_reasons = set()
+    for device in system.Device.get_all_devices():
+        try:
+            affinity = device.cpu_affinity
+        except system.NotSupportedError:
+            skip_reasons.add(f"CPU affinity not supported on {device}")
+        else:
+            assert isinstance(affinity, list)
+            os.sched_setaffinity(0, affinity)
+            assert os.sched_getaffinity(0) == set(affinity)
+    if skip_reasons:
+        pytest.skip(" ; ".join(skip_reasons))
+
+
+def test_device_cuda_compute_capability():
+    for device in system.Device.get_all_devices():
+        cuda_compute_capability = device.cuda_compute_capability
+        assert isinstance(cuda_compute_capability, tuple)
+        assert len(cuda_compute_capability) == 2
+        assert all([isinstance(i, int) for i in cuda_compute_capability])
+        assert 3 <= cuda_compute_capability[0] <= 99
+        assert 0 <= cuda_compute_capability[1] <= 9
+
+
+def test_device_memory():
+    for device in system.Device.get_all_devices():
+        memory_info = device.memory_info
+        free, total, used, reserved = memory_info.free, memory_info.total, memory_info.used, memory_info.reserved
+
+        assert isinstance(memory_info, system_device.MemoryInfo)
+        assert isinstance(free, int)
+        assert isinstance(total, int)
+        assert isinstance(used, int)
+        assert isinstance(reserved, int)
+
+        assert free >= 0
+        assert total >= 0
+        assert used >= 0
+        assert reserved >= 0
+        assert free + used + reserved == total
+
+
+def test_device_name():
+    for device in system.Device.get_all_devices():
+        name = device.name
+        assert isinstance(name, str)
+        assert len(name) > 0
+
+
+def test_device_pci_info():
+    for device in system.Device.get_all_devices():
+        pci_info = device.pci_info
+        assert isinstance(pci_info, system_device.PciInfo)
+
+        assert isinstance(pci_info.bus_id, str)
+        assert re.match("[a-f0-9]{8}:[a-f0-9]{2}:[a-f0-9]{2}.[a-f0-9]", pci_info.bus_id.lower())
+        bus_id_domain = int(pci_info.bus_id.split(":")[0], 16)
+        bus_id_bus = int(pci_info.bus_id.split(":")[1], 16)
+        bus_id_device = int(pci_info.bus_id.split(":")[2][:2], 16)
+
+        assert isinstance(pci_info.domain, int)
+        assert 0x00 <= pci_info.domain <= 0xFFFFFFFF
+        assert pci_info.domain == bus_id_domain
+
+        assert isinstance(pci_info.bus, int)
+        assert 0x00 <= pci_info.bus <= 0xFF
+        assert pci_info.bus == bus_id_bus
+
+        assert isinstance(pci_info.device, int)
+        assert 0x00 <= pci_info.device <= 0xFF
+        assert pci_info.device == bus_id_device
+
+        assert isinstance(pci_info.vendor_id, int)
+        assert 0x0000 <= pci_info.vendor_id <= 0xFFFF
+
+        assert isinstance(pci_info.device_id, int)
+        assert 0x0000 <= pci_info.device_id <= 0xFFFF
+
+
+def test_device_serial():
+    skip_reasons = set()
+    for device in system.Device.get_all_devices():
+        try:
+            serial = device.serial
+        except system.NotSupportedError:
+            skip_reasons.add(f"Device serial not supported by device {device}")
+        else:
+            assert isinstance(serial, str)
+            assert len(serial) > 0
+
+    if skip_reasons:
+        pytest.skip(" ; ".join(skip_reasons))
+
+
+def test_device_uuid():
+    for device in system.Device.get_all_devices():
+        uuid = device.uuid
+        assert isinstance(uuid, str)
+
+        # Expands to GPU-8hex-4hex-4hex-4hex-12hex, where 8hex means 8 consecutive
+        # hex characters, e.g.: "GPU-abcdef12-abcd-0123-4567-1234567890ab"
diff --git a/cuda_core/tests/system/test_system_system.py b/cuda_core/tests/system/test_system_system.py
new file mode 100644
index 0000000000..8f4cb59be2
--- /dev/null
+++ b/cuda_core/tests/system/test_system_system.py
@@ -0,0 +1,67 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# ruff: noqa: E402
+
+import os
+
+import pytest
+from cuda.core import system
+
+from .conftest import skip_if_nvml_unsupported
+
+
+def test_cuda_driver_version():
+    cuda_driver_version = system.get_driver_version_full()
+    assert isinstance(cuda_driver_version, tuple)
+    assert len(cuda_driver_version) == 3
+
+    ver_maj, ver_min, ver_patch = cuda_driver_version
+    assert ver_maj >= 10
+    assert 0 <= ver_min <= 99
+    assert 0 <= ver_patch <= 9
+
+
+@skip_if_nvml_unsupported
+def test_gpu_driver_version():
+    driver_version = system.get_gpu_driver_version()
+    assert isinstance(driver_version, tuple)
+    assert len(driver_version) in (2, 3)
+
+    (ver_maj, ver_min, *ver_patch) = driver_version
+    assert 400 <= ver_maj < 1000
+    assert ver_min >= 0
+    if ver_patch:
+        assert 0 <= ver_patch[0] <= 99
+
+
+@skip_if_nvml_unsupported
+def test_nvml_version():
+    nvml_version = system.get_nvml_version()
+    assert isinstance(nvml_version, tuple)
+    assert len(nvml_version) in (3, 4)
+
+    (cuda_ver_maj, ver_maj, ver_min, *ver_patch) = nvml_version
+    assert cuda_ver_maj >= 10
+    assert 400 <= ver_maj < 1000
+    assert ver_min >= 0
+    if ver_patch:
+        assert 0 <= ver_patch[0] <= 99
+
+
+@skip_if_nvml_unsupported
+def test_get_process_name():
+    try:
+        process_name = system.get_process_name(os.getpid())
+    except system.NotFoundError:
+        pytest.skip("Process not found")
+
+    assert isinstance(process_name, str)
+    assert "python" in process_name
+
+
+def test_device_count():
+    device_count = system.get_num_devices()
+    assert isinstance(device_count, int)
+    assert device_count >= 0
diff --git a/cuda_core/tests/system/test_system_utils.py b/cuda_core/tests/system/test_system_utils.py
new file mode 100644
index 0000000000..45e98b1991
--- /dev/null
+++ b/cuda_core/tests/system/test_system_utils.py
@@ -0,0 +1,42 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from cuda.core.system.utils import format_bytes, unpack_bitmask
+
+
+def test_format_bytes():
+    assert format_bytes(0) == "0 B"
+    assert format_bytes(1) == "1 B"
+    assert format_bytes(1023) == "1023 B"
+    assert format_bytes(1024) == "1.00 KiB"
+    assert format_bytes(1024**2) == "1.00 MiB"
+    assert format_bytes(1024**3) == "1.00 GiB"
+    assert format_bytes(1024**4) == "1.00 TiB"
+    assert format_bytes(1024**5) == "1024.00 TiB"
+    assert format_bytes(1024**6) == "1048576.00 TiB"
+
+
+@pytest.mark.parametrize(
+    "params",
+    [
+        {
+            "input": [1152920405096267775, 0],
+            "output": [i for i in range(20)] + [i + 40 for i in range(20)],
+        },
+        {
+            "input": [17293823668613283840, 65535],
+            "output": [i + 20 for i in range(20)] + [i + 60 for i in range(20)],
+        },
+        {"input": [18446744073709551615, 0], "output": [i for i in range(64)]},
+        {"input": [0, 18446744073709551615], "output": [i + 64 for i in range(64)]},
+    ],
+)
+def test_unpack_bitmask(params):
+    assert unpack_bitmask(params["input"]) == params["output"]
+
+
+def test_unpack_bitmask_single_value():
+    with pytest.raises(TypeError):
+        unpack_bitmask(1)