From a0f15077c881f67f6b580a2803be1a6eb32a9b4c Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Tue, 30 Jun 2026 01:20:23 +0000 Subject: [PATCH 1/3] initial version of testing ObjectCode.from_library --- .github/workflows/build-wheel.yml | 5 +- .gitignore | 2 + .../test_binaries/build_test_binaries.sh | 8 +- cuda_core/tests/test_module.py | 87 +++++++++++++++++++ 4 files changed, 100 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index 27ee752b9d8..286869235ca 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -469,7 +469,10 @@ jobs: uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries - path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.o + path: | + ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.o + ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.a + ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.lib if-no-files-found: error - name: Download cuda.bindings build artifacts from the prior branch diff --git a/.gitignore b/.gitignore index d2a5bf6e52e..a66da355940 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,8 @@ cache_nvrtc # cuda.core test object fixtures built locally / downloaded as CI artifacts cuda_core/tests/test_binaries/*.o +cuda_core/tests/test_binaries/*.a +cuda_core/tests/test_binaries/*.lib # CUDA Python specific (auto-generated) cuda_bindings/cuda/bindings/_bindings/cyruntime.pxd diff --git a/cuda_core/tests/test_binaries/build_test_binaries.sh b/cuda_core/tests/test_binaries/build_test_binaries.sh index 8d2231bd90a..dfdea440f00 100755 --- a/cuda_core/tests/test_binaries/build_test_binaries.sh +++ b/cuda_core/tests/test_binaries/build_test_binaries.sh @@ -17,4 +17,10 @@ fi nvcc -dc "${NVCC_EXTRA_FLAGS[@]}" -arch=all-major \ -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" -ls -lah "${SCRIPTPATH}/saxpy.o" +if [[ "${OS:-}" == "Windows_NT" ]]; then + nvcc -lib -o "${SCRIPTPATH}/saxpy.lib" "${SCRIPTPATH}/saxpy.o" + ls -lah "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.lib" +else + nvcc -lib -o "${SCRIPTPATH}/saxpy.a" "${SCRIPTPATH}/saxpy.o" + ls -lah "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.a" +fi diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 524dd471345..530b2dbc67c 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -203,6 +203,33 @@ def get_saxpy_object(): return obj_path.read_bytes() +@pytest.fixture(scope="module") +def get_saxpy_library(): + """Read the pre-built saxpy.a (Linux) or saxpy.lib (Windows). + + In CI: produced by build stage alongside saxpy.o. + In local dev: auto-built on demand if nvcc is available; if you edit + saxpy.cu, remove stale saxpy.o / saxpy.a to force a rebuild. + """ + binaries_dir = Path(__file__).parent / "test_binaries" + lib_name = "saxpy.lib" if os.name == "nt" else "saxpy.a" + lib_path = binaries_dir / lib_name + + if not lib_path.is_file(): + if find_nvidia_binary_utility("nvcc") is None: + pytest.skip( + f"{lib_name} not found at {lib_path} and nvcc is unavailable. " + "In CI this is downloaded from the build stage." + ) + subprocess.run( # noqa: S603 + ["bash", str(binaries_dir / "build_test_binaries.sh")], # noqa: S607 + check=True, + env=os.environ, + ) + + return lib_path.read_bytes() + + def test_get_kernel(init_cuda): kernel = """extern "C" __global__ void ABC() { }""" @@ -420,6 +447,66 @@ def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): assert result[0] == 10.0 +def test_object_code_load_library(get_saxpy_library): + lib = get_saxpy_library + assert isinstance(lib, bytes) + mod_obj = ObjectCode.from_library(lib) + assert mod_obj.code == lib + assert mod_obj.code_type == "library" + with pytest.raises(RuntimeError, match=r'Unsupported code type "library"'): + mod_obj.get_kernel("saxpy") + + +def test_object_code_load_library_from_file(get_saxpy_library, tmp_path): + lib_ext = ".lib" if os.name == "nt" else ".a" + lib_file = tmp_path / f"test{lib_ext}" + lib_file.write_bytes(get_saxpy_library) + arg = str(lib_file) + mod_obj = ObjectCode.from_library(arg) + assert mod_obj.code == arg + assert mod_obj.code_type == "library" + + +def test_object_code_load_library_with_linker(get_saxpy_library, init_cuda): + arch = f"sm_{init_cuda.arch}" + kernel_code = Program( + r""" + extern __device__ float saxpy_step(float a, float x, float y); + extern "C" __global__ void linked_kernel(float a, float x, float y, float* out) { + if (threadIdx.x == 0 && blockIdx.x == 0) *out = saxpy_step(a, x, y); + } + """, + "c++", + ProgramOptions(relocatable_device_code=True, arch=arch), + ).compile("cubin") + linked = Linker( + kernel_code, + ObjectCode.from_library(get_saxpy_library), + options=LinkerOptions(arch=arch), + ).link("cubin") + kernel = linked.get_kernel("linked_kernel") + + stream = init_cuda.create_stream() + host_buf = cuda.core.LegacyPinnedMemoryResource().allocate(4) + result = np.from_dlpack(host_buf).view(np.float32) + result[:] = 0.0 + dev_buf = init_cuda.memory_resource.allocate(4, stream=init_cuda.default_stream) + + cuda.core.launch( + stream, + cuda.core.LaunchConfig(grid=1, block=1), + kernel, + np.float32(2.0), + np.float32(3.0), + np.float32(4.0), + dev_buf, + ) + dev_buf.copy_to(host_buf, stream=stream) + stream.sync() + + assert result[0] == 10.0 + + def test_saxpy_arguments(get_saxpy_kernel_cubin, cuda12_4_prerequisite_check): krn, _ = get_saxpy_kernel_cubin From f81d97e31e88a5a34ae4fd297e6ddf8b981597ee Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Tue, 30 Jun 2026 22:20:00 +0000 Subject: [PATCH 2/3] use pytest parametrize and merge boilerplate code --- cuda_core/tests/test_module.py | 161 ++++++++++----------------------- 1 file changed, 49 insertions(+), 112 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 530b2dbc67c..3adf6e5e895 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -177,48 +177,25 @@ def get_saxpy_fatbin(init_cuda): return bytes(fatbin), sym_map -@pytest.fixture(scope="module") -def get_saxpy_object(): - """Read the pre-built saxpy.o. - - In CI: produced by build stage into a test wheel file. - In local dev: auto-built on demand if nvcc is available; if you edit - saxpy.cu, remove the stale saxpy.o to force a rebuild. - """ - binaries_dir = Path(__file__).parent / "test_binaries" - obj_path = binaries_dir / "saxpy.o" - - if not obj_path.is_file(): - if find_nvidia_binary_utility("nvcc") is None: - pytest.skip( - f"saxpy.o not found at {obj_path} and nvcc is unavailable. " - "In CI this is downloaded from the build stage." - ) - subprocess.run( # noqa: S603 - ["bash", str(binaries_dir / "build_test_binaries.sh")], # noqa: S607 - check=True, - env=os.environ, - ) +def _read_saxpy_rdc(kind: str) -> bytes: + """Read a pre-built saxpy RDC object or library. - return obj_path.read_bytes() - - -@pytest.fixture(scope="module") -def get_saxpy_library(): - """Read the pre-built saxpy.a (Linux) or saxpy.lib (Windows). - - In CI: produced by build stage alongside saxpy.o. + In CI: produced by the build stage. In local dev: auto-built on demand if nvcc is available; if you edit - saxpy.cu, remove stale saxpy.o / saxpy.a to force a rebuild. + saxpy.cu, remove stale RDC files to force a rebuild. """ binaries_dir = Path(__file__).parent / "test_binaries" - lib_name = "saxpy.lib" if os.name == "nt" else "saxpy.a" - lib_path = binaries_dir / lib_name + if kind == "object": + rdc_path = binaries_dir / "saxpy.o" + elif kind == "library": + rdc_path = binaries_dir / ("saxpy.lib" if os.name == "nt" else "saxpy.a") + else: + raise ValueError(f"unknown saxpy RDC kind: {kind!r}") - if not lib_path.is_file(): + if not rdc_path.is_file(): if find_nvidia_binary_utility("nvcc") is None: pytest.skip( - f"{lib_name} not found at {lib_path} and nvcc is unavailable. " + f"{rdc_path.name} not found at {rdc_path} and nvcc is unavailable. " "In CI this is downloaded from the build stage." ) subprocess.run( # noqa: S603 @@ -226,8 +203,7 @@ def get_saxpy_library(): check=True, env=os.environ, ) - - return lib_path.read_bytes() + return rdc_path.read_bytes() def test_get_kernel(init_cuda): @@ -388,86 +364,47 @@ def test_object_code_load_fatbin_from_file(get_saxpy_fatbin, tmp_path, convert_p mod_obj.get_kernel("saxpy") # force loading -def test_object_code_load_object(get_saxpy_object): - obj = get_saxpy_object - assert isinstance(obj, bytes) - mod_obj = ObjectCode.from_object(obj) - assert mod_obj.code == obj - assert mod_obj.code_type == "object" - with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'): - mod_obj.get_kernel("saxpy") - - -def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): - obj_file = tmp_path / "test.o" - obj_file.write_bytes(get_saxpy_object) - arg = str(obj_file) - mod_obj = ObjectCode.from_object(arg) - assert mod_obj.code == arg - assert mod_obj.code_type == "object" - - -def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): - arch = f"sm_{init_cuda.arch}" - kernel_code = Program( - r""" - extern __device__ float saxpy_step(float a, float x, float y); - extern "C" __global__ void linked_kernel(float a, float x, float y, float* out) { - if (threadIdx.x == 0 && blockIdx.x == 0) *out = saxpy_step(a, x, y); - } - """, - "c++", - ProgramOptions(relocatable_device_code=True, arch=arch), - ).compile("cubin") - linked = Linker( - kernel_code, - ObjectCode.from_object(get_saxpy_object), - options=LinkerOptions(arch=arch), - ).link("cubin") - kernel = linked.get_kernel("linked_kernel") - - stream = init_cuda.create_stream() - host_buf = cuda.core.LegacyPinnedMemoryResource().allocate(4) - result = np.from_dlpack(host_buf).view(np.float32) - result[:] = 0.0 - dev_buf = init_cuda.memory_resource.allocate(4, stream=init_cuda.default_stream) - - cuda.core.launch( - stream, - cuda.core.LaunchConfig(grid=1, block=1), - kernel, - np.float32(2.0), - np.float32(3.0), - np.float32(4.0), - dev_buf, - ) - dev_buf.copy_to(host_buf, stream=stream) - stream.sync() - - assert result[0] == 10.0 - - -def test_object_code_load_library(get_saxpy_library): - lib = get_saxpy_library - assert isinstance(lib, bytes) - mod_obj = ObjectCode.from_library(lib) - assert mod_obj.code == lib - assert mod_obj.code_type == "library" - with pytest.raises(RuntimeError, match=r'Unsupported code type "library"'): +@pytest.mark.parametrize( + ("kind", "from_fn"), + [ + ("object", ObjectCode.from_object), + ("library", ObjectCode.from_library), + ], +) +def test_object_code_load_rdc(kind, from_fn): + data = _read_saxpy_rdc(kind) + assert isinstance(data, bytes) + mod_obj = from_fn(data) + assert mod_obj.code == data + assert mod_obj.code_type == kind + with pytest.raises(RuntimeError, match=rf'Unsupported code type "{kind}"'): mod_obj.get_kernel("saxpy") -def test_object_code_load_library_from_file(get_saxpy_library, tmp_path): - lib_ext = ".lib" if os.name == "nt" else ".a" - lib_file = tmp_path / f"test{lib_ext}" - lib_file.write_bytes(get_saxpy_library) - arg = str(lib_file) - mod_obj = ObjectCode.from_library(arg) +@pytest.mark.parametrize( + ("kind", "from_fn", "suffix"), + [ + ("object", ObjectCode.from_object, ".o"), + ("library", ObjectCode.from_library, ".lib" if os.name == "nt" else ".a"), + ], +) +def test_object_code_load_rdc_from_file(kind, from_fn, suffix, tmp_path): + rdc_file = tmp_path / f"test{suffix}" + rdc_file.write_bytes(_read_saxpy_rdc(kind)) + arg = str(rdc_file) + mod_obj = from_fn(arg) assert mod_obj.code == arg - assert mod_obj.code_type == "library" + assert mod_obj.code_type == kind -def test_object_code_load_library_with_linker(get_saxpy_library, init_cuda): +@pytest.mark.parametrize( + ("kind", "from_fn"), + [ + ("object", ObjectCode.from_object), + ("library", ObjectCode.from_library), + ], +) +def test_object_code_load_rdc_with_linker(kind, from_fn, init_cuda): arch = f"sm_{init_cuda.arch}" kernel_code = Program( r""" @@ -481,7 +418,7 @@ def test_object_code_load_library_with_linker(get_saxpy_library, init_cuda): ).compile("cubin") linked = Linker( kernel_code, - ObjectCode.from_library(get_saxpy_library), + from_fn(_read_saxpy_rdc(kind)), options=LinkerOptions(arch=arch), ).link("cubin") kernel = linked.get_kernel("linked_kernel") From 51569843b4c93e6465b34692d6cb35ac860fcd2b Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Tue, 30 Jun 2026 22:33:13 +0000 Subject: [PATCH 3/3] rephrase to be accurate --- cuda_core/tests/test_module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 3adf6e5e895..ce917a76ca8 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -182,7 +182,7 @@ def _read_saxpy_rdc(kind: str) -> bytes: In CI: produced by the build stage. In local dev: auto-built on demand if nvcc is available; if you edit - saxpy.cu, remove stale RDC files to force a rebuild. + saxpy.cu, remove stale RDC files (i.e. saxpy.o, saxpy.a, or saxpy.lib) to force a rebuild. """ binaries_dir = Path(__file__).parent / "test_binaries" if kind == "object":