Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
20d33da
Centralize context management code
Andy-Jost Dec 5, 2025
758f9f9
Integrate resource handles into Context class
Andy-Jost Dec 6, 2025
31880d4
Refactor context helpers to use ContextHandle and TLS cache
Andy-Jost Dec 8, 2025
c173e3d
Add helper functions to extract raw resources from ContextHandle
Andy-Jost Dec 8, 2025
4357f58
Refactor context acquisition to C++ handle helpers
Andy-Jost Dec 9, 2025
625a86f
Fix link error by loading _resource_handles with RTLD_GLOBAL
Andy-Jost Dec 9, 2025
c0cbacd
Move helper functions to C++ for overloading support
Andy-Jost Dec 9, 2025
4046023
Extend resource handle paradigm to Stream
Andy-Jost Dec 9, 2025
39fbefc
Simplify Stream by moving more logic to C++
Andy-Jost Dec 9, 2025
f90e625
Refactor Stream to use ContextHandle and simplify initialization
Andy-Jost Dec 9, 2025
d7a999d
Extend ContextHandle to Event and standardize naming
Andy-Jost Dec 10, 2025
1e13984
Store owning context handle in Device
Andy-Jost Dec 11, 2025
6268b6e
Convert Event to use resource handles
Andy-Jost Dec 11, 2025
1082f5a
Clean up Stream.wait() to use EventHandle for temporary events
Andy-Jost Dec 11, 2025
cd81f48
Add create_event_handle overload for temporary events
Andy-Jost Dec 11, 2025
2b798f2
Convert DeviceMemoryResource to use MemoryPoolHandle
Andy-Jost Dec 11, 2025
63d263d
Add DevicePtrHandle for RAII device pointer management
Andy-Jost Dec 11, 2025
ea9a293
Use intptr_t for all handle integer conversions
Andy-Jost Dec 11, 2025
92fa76b
Add thread-local error handling for resource handle functions
Andy-Jost Dec 11, 2025
f05d45a
Add IPC pointer cache to fix duplicate import issue (nvbug 5570902)
Andy-Jost Dec 11, 2025
937428b
Fix lint issues: remove unused imports and variables
Andy-Jost Dec 12, 2025
b629ec6
Add deviceptr_create_with_owner for handle-based owner tracking
Andy-Jost Dec 12, 2025
cce5e9f
Add resource handles _CXX_API capsule and lazy driver loading
Andy-Jost Dec 16, 2025
ab16456
Resolve CUDA driver entrypoints via cuda-bindings cuGetProcAddress
Andy-Jost Dec 16, 2025
3fafe92
Centralize resource handles capsule dispatch in _resource_handles.pxd
Andy-Jost Dec 16, 2025
ba139f3
Drop RTLD_GLOBAL import for _resource_handles
Andy-Jost Dec 16, 2025
496ee37
Merge remote-tracking branch 'origin/main' into resource-handles
Andy-Jost Dec 17, 2025
bac302b
Fix Python 3.13 finalization check
Andy-Jost Dec 17, 2025
5576353
Merge origin/main into resource-handles
Andy-Jost Dec 17, 2025
9d5a010
Fix finalization check across Python versions
Andy-Jost Dec 17, 2025
3b45f7c
Fix circular import for _resource_handles
Andy-Jost Dec 17, 2025
c539641
Merge remote-tracking branch 'origin/main' into resource-handles
Andy-Jost Dec 17, 2025
d4f0a82
Merge branch 'main' into resource-handles
Andy-Jost Dec 17, 2025
dd07ea8
Fix circular import in _resource_handles module
Andy-Jost Dec 17, 2025
280665f
Fix circular import by using importlib.import_module
Andy-Jost Dec 17, 2025
a821eeb
Merge origin/main: migrate cuda.core.experimental to cuda.core
Andy-Jost Dec 17, 2025
0f89baa
Fix wheel merge script to keep _resource_handles module
Andy-Jost Dec 18, 2025
5e437b2
Fix IPC pointer cache to use export data as key
Andy-Jost Dec 18, 2025
9bde6a2
Improve IPC pointer cache comments and fix race condition
Andy-Jost Dec 18, 2025
90ab0a5
Refactor load_driver_api to use RAII GIL guard
Andy-Jost Dec 18, 2025
48bfd2a
Merge remote-tracking branch 'origin/main' into resource-handles
Andy-Jost Dec 18, 2025
675ca24
Add DESIGN.md and optimize GIL usage in resource handle wrappers and …
Andy-Jost Dec 18, 2025
cfab280
Merge branch 'main' into resource-handles
Andy-Jost Dec 18, 2025
ccf9a3b
linter fix
Andy-Jost Dec 18, 2025
6c82cb6
Consolidate GIL helper classes at top of resource_handles.cpp
Andy-Jost Dec 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ cuda/_version.py export-subst
# we do not own any headers checked in, don't touch them
*.h binary
*.hpp binary
# Exception: headers we own (cuda_core C++ implementation)
cuda_core/cuda/core/_cpp/*.h -binary text diff
cuda_core/cuda/core/_cpp/*.hpp -binary text diff
# git should not convert line endings in PNG files
*.png binary
*.svg binary
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ __pycache__/
.pytest_cache/
.benchmarks/
*.cpp
!*_impl.cpp
!cuda_bindings/cuda/bindings/_lib/param_packer.cpp
!cuda_bindings/cuda/bindings/_bindings/loader.cpp
cache_driver
Expand Down
6 changes: 6 additions & 0 deletions ci/tools/merge_cuda_core_wheels.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,15 +150,21 @@ def merge_wheels(wheels: List[Path], output_dir: Path, show_wheel_contents: bool
"__init__.py",
"_version.py",
"_include",
"_cpp", # Headers for Cython development
"cu12",
"cu13",
)
# _resource_handles is shared (not CUDA-version-specific) and must stay
# at top level. It's imported early in __init__.py before versioned code.
items_to_keep_prefix = ("_resource_handles",)
all_items = os.scandir(base_wheel / base_dir)
removed_count = 0
for f in all_items:
f_abspath = f.path
if f.name in items_to_keep:
continue
if any(f.name.startswith(prefix) for prefix in items_to_keep_prefix):
continue
if f.is_dir():
print(f" Removing directory: {f.name}", file=sys.stderr)
shutil.rmtree(f_abspath)
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
#
# SPDX-License-Identifier: Apache-2.0

recursive-include cuda/core *.pyx *.pxd
recursive-include cuda/core *.pyx *.pxd *.cpp *.hpp
26 changes: 22 additions & 4 deletions cuda_core/build_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,21 @@ def get_cuda_paths():
print("CUDA paths:", CUDA_PATH)
return CUDA_PATH

all_include_dirs = list(os.path.join(root, "include") for root in get_cuda_paths())
def get_sources(mod_name):
"""Get source files for a module, including any .cpp files."""
sources = [f"cuda/core/{mod_name}.pyx"]

# Add module-specific .cpp file from _cpp/ directory if it exists
cpp_file = f"cuda/core/_cpp/{mod_name.lstrip('_')}.cpp"
if os.path.exists(cpp_file):
sources.append(cpp_file)

return sources

def get_extension_kwargs(mod_name):
"""Return Extension kwargs (libraries, etc.) per module."""
return {"extra_compile_args": extra_compile_args}

extra_compile_args = []
if COMPILE_FOR_COVERAGE:
# CYTHON_TRACE_NOGIL indicates to trace nogil functions. It is not
Expand All @@ -96,10 +110,14 @@ def get_cuda_paths():
ext_modules = tuple(
Extension(
f"cuda.core.{mod.replace(os.path.sep, '.')}",
sources=[f"cuda/core/{mod}.pyx"],
include_dirs=all_include_dirs,
sources=get_sources(mod),
include_dirs=[
"cuda/core/_include",
"cuda/core/_cpp",
]
+ list(os.path.join(root, "include") for root in get_cuda_paths()),
language="c++",
extra_compile_args=extra_compile_args,
**get_extension_kwargs(mod),
)
for mod in module_names
)
Expand Down
9 changes: 9 additions & 0 deletions cuda_core/cuda/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@

import importlib

# The _resource_handles module exports a PyCapsule dispatch table that other
# extension modules access via PyCapsule_Import. We import it here to ensure
# it's loaded before other modules try to use it.
#
# We use importlib.import_module with the full path to avoid triggering
# circular import issues that can occur with relative imports during
# package initialization.
_resource_handles = importlib.import_module("cuda.core._resource_handles")

subdir = f"cu{cuda_major}"
try:
versioned_mod = importlib.import_module(f".{subdir}", __package__)
Expand Down
19 changes: 19 additions & 0 deletions cuda_core/cuda/core/_context.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

from cuda.core._resource_handles cimport ContextHandle

cdef class Context:
"""Cython declaration for Context class.
This class provides access to CUDA contexts. Context objects cannot be
instantiated directly - use factory methods or Device/Stream APIs.
"""

cdef:
ContextHandle _h_context
int _device_id

@staticmethod
cdef Context _from_handle(type cls, ContextHandle h_context, int device_id)
46 changes: 33 additions & 13 deletions cuda_core/cuda/core/_context.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,55 @@

from dataclasses import dataclass

from cuda.core._utils.cuda_utils import driver
from cuda.core._resource_handles cimport (
ContextHandle,
intptr,
py,
)


@dataclass
class ContextOptions:
pass # TODO
__all__ = ['Context', 'ContextOptions']


cdef class Context:
"""CUDA context wrapper.

cdef:
readonly object _handle
int _device_id
Context objects represent CUDA contexts and cannot be instantiated directly.
Use Device or Stream APIs to obtain context objects.
"""

def __init__(self, *args, **kwargs):
raise RuntimeError("Context objects cannot be instantiated directly. Please use Device or Stream APIs.")

@classmethod
def _from_ctx(cls, handle: driver.CUcontext, int device_id):
cdef Context ctx = Context.__new__(Context)
ctx._handle = handle
@staticmethod
cdef Context _from_handle(type cls, ContextHandle h_context, int device_id):
"""Create Context from existing ContextHandle (cdef-only factory)."""
cdef Context ctx = cls.__new__(cls)
ctx._h_context = h_context
ctx._device_id = device_id
return ctx

@property
def handle(self):
"""Return the underlying CUcontext handle."""
if self._h_context.get() == NULL:
return None
return py(self._h_context)

def __eq__(self, other):
if not isinstance(other, Context):
return NotImplemented
cdef Context _other = <Context>other
return int(self._handle) == int(_other._handle)
return intptr(self._h_context) == intptr(_other._h_context)

def __hash__(self) -> int:
return hash(int(self._handle))
return hash((type(self), intptr(self._h_context)))


@dataclass
class ContextOptions:
"""Options for context creation.

Currently unused, reserved for future use.
"""
pass # TODO
Loading