From 2256b19d0ef51378b315bc60dfb3f836c1e21abd Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Tue, 31 Mar 2026 16:38:12 +0000 Subject: [PATCH 1/7] Simplify futher more cuda path --- backends/cuda/headers.patch | 61 +++---------------------------------- 1 file changed, 5 insertions(+), 56 deletions(-) diff --git a/backends/cuda/headers.patch b/backends/cuda/headers.patch index 66a2002a..8a587acd 100644 --- a/backends/cuda/headers.patch +++ b/backends/cuda/headers.patch @@ -1,19 +1,7 @@ diff -u4 -r --new-file include/cuda.h modified_include/cuda.h --- include/cuda.h 2026-03-27 20:57:07.000000000 +0000 -+++ modified_include/cuda.h 2026-03-30 22:10:41.000000000 +0000 -@@ -290,9 +290,11 @@ - - /** - * CUDA IPC handle size - */ -+#ifndef CU_IPC_HANDLE_SIZE - #define CU_IPC_HANDLE_SIZE 64 -+#endif - - /** - * CUDA IPC event handle - */ -@@ -499,9 +501,9 @@ ++++ modified_include/cuda.h 2026-03-31 16:32:26.000000000 +0000 +@@ -499,9 +499,9 @@ * Per-operation parameters for ::cuStreamBatchMemOp */ typedef union CUstreamBatchMemOpParams_union { @@ -24,7 +12,7 @@ diff -u4 -r --new-file include/cuda.h modified_include/cuda.h CUdeviceptr address; union { cuuint32_t value; -@@ -509,9 +511,9 @@ +@@ -509,9 +509,9 @@ }; unsigned int flags; CUdeviceptr alias; /**< For driver internal use. Initial value is unimportant. */ @@ -35,7 +23,7 @@ diff -u4 -r --new-file include/cuda.h modified_include/cuda.h CUdeviceptr address; union { cuuint32_t value; -@@ -519,13 +521,13 @@ +@@ -519,13 +519,13 @@ }; unsigned int flags; CUdeviceptr alias; /**< For driver internal use. Initial value is unimportant. */ @@ -51,33 +39,9 @@ diff -u4 -r --new-file include/cuda.h modified_include/cuda.h unsigned int flags; } memoryBarrier; cuuint64_t pad[6]; -@@ -1338,9 +1340,11 @@ - - /* - * Indicates that compute device class supports accelerated features. - */ -+#ifndef CU_COMPUTE_ACCELERATED_TARGET_BASE - #define CU_COMPUTE_ACCELERATED_TARGET_BASE 0x10000 -+#endif - - /** - * Online compilation targets - */ -@@ -2851,9 +2855,11 @@ - - /** - * Size of tensor map descriptor - */ -+#ifndef CU_TENSOR_MAP_NUM_QWORDS - #define CU_TENSOR_MAP_NUM_QWORDS 16 -+#endif - - /** - * Tensor map descriptor. Requires compiler support for aligning to 64 bytes. - */ diff -u4 -r --new-file include/cuda_vdpau_interop.h modified_include/cuda_vdpau_interop.h --- include/cuda_vdpau_interop.h 2026-03-27 20:57:07.000000000 +0000 -+++ modified_include/cuda_vdpau_interop.h 2026-03-30 22:10:41.000000000 +0000 ++++ modified_include/cuda_vdpau_interop.h 2026-03-31 16:32:26.000000000 +0000 @@ -49,11 +49,13 @@ #if !defined(__CUDA_VDPAU_INTEROP_H__) @@ -92,18 +56,3 @@ diff -u4 -r --new-file include/cuda_vdpau_interop.h modified_include/cuda_vdpau_ #if defined(__cplusplus) extern "C" { #endif /* __cplusplus */ -diff -u4 -r --new-file include/driver_types.h modified_include/driver_types.h ---- include/driver_types.h 2026-03-27 20:57:07.000000000 +0000 -+++ modified_include/driver_types.h 2026-03-30 22:10:41.000000000 +0000 -@@ -2292,9 +2292,11 @@ - - /** - * CUDA IPC Handle Size - */ -+#ifndef CUDA_IPC_HANDLE_SIZE - #define CUDA_IPC_HANDLE_SIZE 64 -+#endif - - /** - * CUDA IPC event handle - */ From 712b7e6cab9e40d86506fb04fd66b4dab9d7cef5 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Tue, 31 Mar 2026 20:04:38 +0000 Subject: [PATCH 2/7] Support anonymous union and enum --- backends/cuda/headers.patch | 43 +------------------------------------ utils/gen_library_base.rb | 8 ++++--- 2 files changed, 6 insertions(+), 45 deletions(-) diff --git a/backends/cuda/headers.patch b/backends/cuda/headers.patch index 8a587acd..8924f401 100644 --- a/backends/cuda/headers.patch +++ b/backends/cuda/headers.patch @@ -1,47 +1,6 @@ -diff -u4 -r --new-file include/cuda.h modified_include/cuda.h ---- include/cuda.h 2026-03-27 20:57:07.000000000 +0000 -+++ modified_include/cuda.h 2026-03-31 16:32:26.000000000 +0000 -@@ -499,9 +499,9 @@ - * Per-operation parameters for ::cuStreamBatchMemOp - */ - typedef union CUstreamBatchMemOpParams_union { - CUstreamBatchMemOpType operation; -- struct CUstreamMemOpWaitValueParams_st { -+ struct { - CUstreamBatchMemOpType operation; - CUdeviceptr address; - union { - cuuint32_t value; -@@ -509,9 +509,9 @@ - }; - unsigned int flags; - CUdeviceptr alias; /**< For driver internal use. Initial value is unimportant. */ - } waitValue; -- struct CUstreamMemOpWriteValueParams_st { -+ struct { - CUstreamBatchMemOpType operation; - CUdeviceptr address; - union { - cuuint32_t value; -@@ -519,13 +519,13 @@ - }; - unsigned int flags; - CUdeviceptr alias; /**< For driver internal use. Initial value is unimportant. */ - } writeValue; -- struct CUstreamMemOpFlushRemoteWritesParams_st { -+ struct { - CUstreamBatchMemOpType operation; - unsigned int flags; - } flushRemoteWrites; -- struct CUstreamMemOpMemoryBarrierParams_st { /**< Only supported in the _v2 API */ -+ struct { /**< Only supported in the _v2 API */ - CUstreamBatchMemOpType operation; - unsigned int flags; - } memoryBarrier; - cuuint64_t pad[6]; diff -u4 -r --new-file include/cuda_vdpau_interop.h modified_include/cuda_vdpau_interop.h --- include/cuda_vdpau_interop.h 2026-03-27 20:57:07.000000000 +0000 -+++ modified_include/cuda_vdpau_interop.h 2026-03-31 16:32:26.000000000 +0000 ++++ modified_include/cuda_vdpau_interop.h 2026-03-31 16:56:42.000000000 +0000 @@ -49,11 +49,13 @@ #if !defined(__CUDA_VDPAU_INTEROP_H__) diff --git a/utils/gen_library_base.rb b/utils/gen_library_base.rb index 693bf711..e7b20973 100644 --- a/utils/gen_library_base.rb +++ b/utils/gen_library_base.rb @@ -341,12 +341,14 @@ def to_ffi m.type.to_ffi elsif m.type.is_a?(Pointer) ':pointer' - elsif m.type.name + elsif m.type.name && !m.type.is_a?(Struct) && !m.type.is_a?(Union) to_ffi_name(m.type.name) elsif m.type.is_a?(Struct) - "(Class::new(#{FFI_STRUCT}) { layout #{gen_layout(m.type.to_ffi)} }.by_value)" + s = m.type.name ? $all_structs.find { |st| st.name == m.type.name } : m.type + "(Class::new(#{FFI_STRUCT}) { layout #{gen_layout(s.to_ffi)} }.by_value)" elsif m.type.is_a?(Union) - "(Class::new(#{FFI_UNION}) { layout #{gen_layout(m.type.to_ffi)} }.by_value)" + u = m.type.name ? $all_unions&.find { |un| un.name == m.type.name } : m.type + "(Class::new(#{FFI_UNION}) { layout #{gen_layout(u.to_ffi)} }.by_value)" else raise "unknown type: #{m.type}" end From 5db825fa346ba80b8143a377fc4d59ed56376427 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Tue, 31 Mar 2026 20:28:03 +0000 Subject: [PATCH 3/7] No more cuda header! --- backends/cuda/Makefile.am | 5 ++--- backends/cuda/extract/cuda_api.h | 9 +-------- backends/cuda/extract/cudart_api.h | 9 --------- backends/cuda/gen_cuda_exports_extract.rb | 1 - backends/cuda/headers.patch | 17 ----------------- 5 files changed, 3 insertions(+), 38 deletions(-) delete mode 100644 backends/cuda/headers.patch diff --git a/backends/cuda/Makefile.am b/backends/cuda/Makefile.am index 2fd2d163..342b869e 100644 --- a/backends/cuda/Makefile.am +++ b/backends/cuda/Makefile.am @@ -66,16 +66,15 @@ CLEANFILES = \ $(BTX_CUDA_GENERATED) &: $(top_srcdir)/xprof/btx_interval_model.yaml btx_cudamatching_model.yaml btx_cuda_model.yaml $(METABABEL) -u btx_cuda_model.yaml -d $(top_srcdir)/xprof/btx_interval_model.yaml -t FILTER -o btx_filter_cuda -p cudainterval -c interval --matching $(srcdir)/btx_cudamatching_model.yaml -i cuda.h.include -$(MODIFIED_CUDA_HDR) &: $(CUDA_HDR) $(srcdir)/headers.patch +$(MODIFIED_CUDA_HDR) &: $(CUDA_HDR) $(RM) -r modified_include/ cp -r $(srcdir)/include/ modified_include/ chmod -R u+w modified_include/ - cat $(srcdir)/headers.patch | patch -i - -d modified_include/ -s -p1 clean-local: $(RM) -r modified_include -EXTRA_DIST += $(srcdir)/include headers.patch +EXTRA_DIST += $(srcdir)/include CUDA_EXTRACT_H = $(srcdir)/extract/cuda_api.h $(srcdir)/extract/cudart_api.h EXTRA_DIST += $(CUDA_EXTRACT_H) diff --git a/backends/cuda/extract/cuda_api.h b/backends/cuda/extract/cuda_api.h index 303231d0..25cf1f82 100644 --- a/backends/cuda/extract/cuda_api.h +++ b/backends/cuda/extract/cuda_api.h @@ -1,17 +1,10 @@ #define __CUDA_API_VERSION_INTERNAL = 1 -#define THAPI_NO_INCLUDE #include #include -typedef int32_t VdpStatus; -typedef uint32_t VdpFuncId; -typedef uint32_t VdpDevice; -typedef VdpStatus -VdpGetProcAddress(VdpDevice device, VdpFuncId function_id, void **function_pointer); -typedef uint32_t VdpVideoSurface; -typedef uint32_t VdpOutputSurface; +#include #include diff --git a/backends/cuda/extract/cudart_api.h b/backends/cuda/extract/cudart_api.h index d489c7eb..d987d097 100644 --- a/backends/cuda/extract/cudart_api.h +++ b/backends/cuda/extract/cudart_api.h @@ -1,5 +1,4 @@ #define __CUDA_API_VERSION_INTERNAL = 1 -#define THAPI_NO_INCLUDE #include @@ -7,14 +6,6 @@ #include <__cudart.h> -typedef int32_t VdpStatus; -typedef uint32_t VdpFuncId; -typedef uint32_t VdpDevice; -typedef VdpStatus -VdpGetProcAddress(VdpDevice device, VdpFuncId function_id, void **function_pointer); -typedef uint32_t VdpVideoSurface; -typedef uint32_t VdpOutputSurface; - #include #include diff --git a/backends/cuda/gen_cuda_exports_extract.rb b/backends/cuda/gen_cuda_exports_extract.rb index 69643b0f..490484eb 100644 --- a/backends/cuda/gen_cuda_exports_extract.rb +++ b/backends/cuda/gen_cuda_exports_extract.rb @@ -5,7 +5,6 @@ puts <<~EOF #define __CUDA_API_VERSION_INTERNAL=1 - #define THAPI_NO_INCLUDE #include EOF diff --git a/backends/cuda/headers.patch b/backends/cuda/headers.patch deleted file mode 100644 index 8924f401..00000000 --- a/backends/cuda/headers.patch +++ /dev/null @@ -1,17 +0,0 @@ -diff -u4 -r --new-file include/cuda_vdpau_interop.h modified_include/cuda_vdpau_interop.h ---- include/cuda_vdpau_interop.h 2026-03-27 20:57:07.000000000 +0000 -+++ modified_include/cuda_vdpau_interop.h 2026-03-31 16:56:42.000000000 +0000 -@@ -49,11 +49,13 @@ - - #if !defined(__CUDA_VDPAU_INTEROP_H__) - #define __CUDA_VDPAU_INTEROP_H__ - -+#ifndef THAPI_NO_INCLUDE - #include "cuda_runtime_api.h" - - #include -+#endif - - #if defined(__cplusplus) - extern "C" { - #endif /* __cplusplus */ From 3820018989f7d81fc2f7fb9b1f1ec94216bb57ba Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Tue, 31 Mar 2026 20:30:08 +0000 Subject: [PATCH 4/7] Add vdpau.h stub --- backends/cuda/include/vdpau/vdpau.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 backends/cuda/include/vdpau/vdpau.h diff --git a/backends/cuda/include/vdpau/vdpau.h b/backends/cuda/include/vdpau/vdpau.h new file mode 100644 index 00000000..bf49ccc0 --- /dev/null +++ b/backends/cuda/include/vdpau/vdpau.h @@ -0,0 +1,15 @@ +/* Stub for VDPAU types used by CUDA interop headers. */ +#ifndef THAPI_VDPAU_STUB_H +#define THAPI_VDPAU_STUB_H + +#include + +typedef int32_t VdpStatus; +typedef uint32_t VdpFuncId; +typedef uint32_t VdpDevice; +typedef VdpStatus VdpGetProcAddress(VdpDevice device, VdpFuncId function_id, + void **function_pointer); +typedef uint32_t VdpVideoSurface; +typedef uint32_t VdpOutputSurface; + +#endif From a6735b91f34824e3e9aff61e9e50e50fd8c2d6c5 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Tue, 31 Mar 2026 20:36:43 +0000 Subject: [PATCH 5/7] Add README.md --- backends/cuda/README.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 backends/cuda/README.md diff --git a/backends/cuda/README.md b/backends/cuda/README.md new file mode 100644 index 00000000..e9e74ada --- /dev/null +++ b/backends/cuda/README.md @@ -0,0 +1,7 @@ +# CUDA Backend + +## VDPAU Stub + +The CUDA SDK includes interop headers (`cudaVDPAU.h`, `cuda_vdpau_interop.h`) that reference VDPAU types (`VdpDevice`, `VdpGetProcAddress`, etc.) from ``. This system header is not always available. + +A stub is provided in `include/vdpau/vdpau.h` that defines the necessary VDPAU types so that the CUDA tracer can compiled. From e4e27dd84b537542b40d0b8c6734825f38def497 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Tue, 31 Mar 2026 21:16:43 +0000 Subject: [PATCH 6/7] Fix anonymous enum who have a typedef --- utils/gen_library_base.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/utils/gen_library_base.rb b/utils/gen_library_base.rb index e7b20973..c469d681 100644 --- a/utils/gen_library_base.rb +++ b/utils/gen_library_base.rb @@ -1,5 +1,9 @@ require_relative 'yaml_ast' +def has_typedef?(name) + $all_types.any? { |t| t.type.respond_to?(:name) && t.type.name == name } +end + def to_ffi_name(name, default = true) case name when nil @@ -341,7 +345,7 @@ def to_ffi m.type.to_ffi elsif m.type.is_a?(Pointer) ':pointer' - elsif m.type.name && !m.type.is_a?(Struct) && !m.type.is_a?(Union) + elsif m.type.name && has_typedef?(m.type.name) to_ffi_name(m.type.name) elsif m.type.is_a?(Struct) s = m.type.name ? $all_structs.find { |st| st.name == m.type.name } : m.type @@ -349,6 +353,8 @@ def to_ffi elsif m.type.is_a?(Union) u = m.type.name ? $all_unions&.find { |un| un.name == m.type.name } : m.type "(Class::new(#{FFI_UNION}) { layout #{gen_layout(u.to_ffi)} }.by_value)" + elsif m.type.name + to_ffi_name(m.type.name) else raise "unknown type: #{m.type}" end From f2b1c9fc23d07b9ab6fd303f808362c22392ae7b Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Tue, 31 Mar 2026 21:26:17 +0000 Subject: [PATCH 7/7] Refactoring arguably cleaner --- utils/gen_library_base.rb | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/utils/gen_library_base.rb b/utils/gen_library_base.rb index c469d681..5b96413b 100644 --- a/utils/gen_library_base.rb +++ b/utils/gen_library_base.rb @@ -341,22 +341,27 @@ module Composite def to_ffi unamed_count = 0 members.map do |m| - mt = if m.type.is_a?(Array) + mt = case m.type + when Array m.type.to_ffi - elsif m.type.is_a?(Pointer) + when Pointer ':pointer' - elsif m.type.name && has_typedef?(m.type.name) - to_ffi_name(m.type.name) - elsif m.type.is_a?(Struct) - s = m.type.name ? $all_structs.find { |st| st.name == m.type.name } : m.type - "(Class::new(#{FFI_STRUCT}) { layout #{gen_layout(s.to_ffi)} }.by_value)" - elsif m.type.is_a?(Union) - u = m.type.name ? $all_unions&.find { |un| un.name == m.type.name } : m.type - "(Class::new(#{FFI_UNION}) { layout #{gen_layout(u.to_ffi)} }.by_value)" - elsif m.type.name - to_ffi_name(m.type.name) + when Struct + if m.type.name && has_typedef?(m.type.name) + to_ffi_name(m.type.name) + else + s = m.type.name ? $all_structs.find { |st| st.name == m.type.name } : m.type + "(Class::new(#{FFI_STRUCT}) { layout #{gen_layout(s.to_ffi)} }.by_value)" + end + when Union + if m.type.name && has_typedef?(m.type.name) + to_ffi_name(m.type.name) + else + u = m.type.name ? $all_unions&.find { |un| un.name == m.type.name } : m.type + "(Class::new(#{FFI_UNION}) { layout #{gen_layout(u.to_ffi)} }.by_value)" + end else - raise "unknown type: #{m.type}" + m.type.name ? to_ffi_name(m.type.name) : raise("unknown type: #{m.type}") end [m.name ? m.name.to_sym.inspect : ":_unamed_#{unamed_count += 1}", mt] end