diff --git a/cmake/dawn.cmake b/cmake/dawn.cmake index d9cbfc9..7793ebe 100644 --- a/cmake/dawn.cmake +++ b/cmake/dawn.cmake @@ -168,7 +168,7 @@ if(NOT DAWN_BUILD_FOUND) # Ensure source present on required commit (idempotent remote setup) if(NOT DEFINED DAWN_COMMIT OR DAWN_COMMIT STREQUAL "") - set(DAWN_COMMIT "e1d6e12337080cf9f6d8726209e86df449bc6e9a" CACHE STRING "Dawn commit to checkout" FORCE) + set(DAWN_COMMIT "3f79f3aefe0b0a498002564fcfb13eb21ab6c047" CACHE STRING "Dawn commit to checkout" FORCE) endif() file(MAKE_DIRECTORY ${DAWN_DIR}) execute_process(COMMAND git init WORKING_DIRECTORY "${DAWN_DIR}") diff --git a/examples/matmul/run.cpp b/examples/matmul/run.cpp index 47edc05..2c2b36d 100644 --- a/examples/matmul/run.cpp +++ b/examples/matmul/run.cpp @@ -613,6 +613,93 @@ inline KernelCode createMatmulWithTranspose(const char *shaderTemplate, const si return {unrolledCode, workgroupSize, precision}; } +inline KernelCode createMatmul12(const char *shaderTemplate, const size_t M, + const size_t K, const size_t N, + const size_t TM, const size_t TN, + const size_t LID, + const Shape &workgroupSize = {256, 1, 1}, + NumType precision = kf32) { + std::string codeString(shaderTemplate); + replaceAll(codeString, {{"{{precision}}", toString(precision)}, + {"{{M}}", toString(M)}, + {"{{K}}", toString(K)}, + {"{{N}}", toString(N)}, + {"{{TM}}", toString(TM)}, + {"{{TN}}", toString(TN)}, + {"{{LID}}", toString(LID)} + }); + return {loopUnrolling(codeString), workgroupSize, precision}; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Optimised WGSL matrix‑multiply kernel using subgroupMatrixLoad/Store +// and subgroupMatrixMultiplyAccumulate +// ───────────────────────────────────────────────────────────────────────────── +const char* kShaderSubgroupMatrixMultiply = R"( +enable chromium_experimental_subgroup_matrix; +diagnostic (off, chromium.subgroup_matrix_uniformity); + +@group(0) @binding(0) var A: array<{{precision}}>; +@group(0) @binding(1) var B: array<{{precision}}>; +@group(0) @binding(2) var C: array<{{precision}}>; + +@compute @workgroup_size({{workgroupSize}}) +fn main(@builtin(workgroup_id) wg: vec3, + @builtin(local_invocation_id) localID : vec3) { + + let rowStart: u32 = wg.x * 8u * {{TM}}; + let colStart: u32 = (wg.y * {{LID}} + localID.y) * 8u * {{TN}}; + + let baseA: u32 = rowStart * {{K}}; + let baseB: u32 = colStart; + let cBase: u32 = rowStart * {{N}} + colStart; + + var Ax: array, {{TM}}>; + var Bx: array, {{TN}}>; + + // 4x4 accumulators (8x8 each) + var accxx: array, {{TM}} * {{TN}}>; + + for (var idx_i: u32 = 0; idx_i < {{TM}}; idx_i++) { + Ax[idx_i] = subgroup_matrix_left<{{precision}}, 8, 8>(0); + } + + for (var idx_i: u32 = 0; idx_i < {{TN}}; idx_i++) { + Bx[idx_i] = subgroup_matrix_right<{{precision}}, 8, 8>(0); + } + + for (var idx_i: u32 = 0; idx_i < {{TM}}; idx_i++) { + for (var idx_j: u32 = 0; idx_j < {{TN}}; idx_j++) { + accxx[idx_i+idx_j*{{TM}}] = subgroup_matrix_result<{{precision}}, 8, 8>(0); + } + } + + for (var k: u32 = 0u; k < {{K}}; k = k + 8u) { + workgroupBarrier(); + for (var idx_i: u32 = 0; idx_i < {{TM}}; idx_i++) { + Ax[idx_i] = subgroupMatrixLoad>(&A, baseA + k + 8u * {{K}} * idx_i, false, {{K}}); + } + + for (var idx_i: u32 = 0; idx_i < {{TN}}; idx_i++) { + Bx[idx_i] = subgroupMatrixLoad>(&B, baseB + k * {{N}} + 8u * idx_i, false, {{N}}); + } + + for (var idx_j: u32 = 0; idx_j < {{TN}}; idx_j++) { + for (var idx_i: u32 = 0; idx_i < {{TM}}; idx_i++) { + accxx[idx_j*{{TM}} + idx_i] = subgroupMatrixMultiplyAccumulate(Ax[idx_i], Bx[idx_j], accxx[idx_j*{{TM}} + idx_i]); + } + } + } + + workgroupBarrier(); + for (var idx_i: u32 = 0; idx_i < {{TM}}; idx_i++) { + for (var idx_j: u32 = 0; idx_j < {{TN}}; idx_j++) { + subgroupMatrixStore(&C, cBase + idx_i * 8u * {{N}} + 8u * idx_j, accxx[idx_j*{{TM}} + idx_i], false, {{N}}); + } + } +} +)"; + /** * @brief No-Op shader with matmul bindings for performance testing */ @@ -683,26 +770,30 @@ Kernel selectMatmul(Context &ctx, int version, const Bindings &bindings, size_t M, size_t K, size_t N, NumType numtype) { Kernel kernel; + CompilationInfo info; if (version == 1) { Shape wgSize = {256, 1, 1}; Shape nWorkgroups = cdiv({M, N, 1}, {16, 16, 1}); KernelCode matmul = createNoOp(kShaderNoOp, /*wgsize*/ wgSize); kernel = createKernel(ctx, matmul, bindings, - /*nWorkgroups*/ nWorkgroups); + /*nWorkgroups*/ nWorkgroups, + NoParam{}, &info); } else if (version == 2) { Shape wgSize = {16, 16, 1}; LOG(kDefLog, kInfo, "wgSize: %s", toString(wgSize).c_str()); KernelCode matmul = createMatmul1(kShaderMatmul1, M, K, N, /*wgsize*/ wgSize, numtype); kernel = createKernel(ctx, matmul, bindings, - /*nWorkgroups*/ cdiv({M, N, 1}, wgSize)); + /*nWorkgroups*/ cdiv({M, N, 1}, wgSize), + NoParam{}, &info); } else if (version == 3) { static constexpr size_t tileSize = 16; KernelCode matmul = createMatmul2(kShaderMatmul2, M, K, N, /*wgSize*/ {tileSize * tileSize, 1, 1}, numtype); kernel = createKernel(ctx, matmul, bindings, - /* nWorkgroups*/ cdiv({M, N, 1}, {tileSize, tileSize, 1})); + /* nWorkgroups*/ cdiv({M, N, 1}, {tileSize, tileSize, 1}), + NoParam{}, &info); } else if (version == 4 || version == 6) { static constexpr size_t BM = 64; static constexpr size_t BK = 4; @@ -721,7 +812,8 @@ Kernel selectMatmul(Context &ctx, int version, numtype, /*Loop unrolling*/ version == 6 ? true: false); kernel = createKernel(ctx, matmul, bindings, - /*nWorkgroups*/ nWorkgroups); + /*nWorkgroups*/ nWorkgroups, + NoParam{}, &info); } else if (version == 5 || version == 7) { static constexpr size_t BM = 64; static constexpr size_t BK = 8; @@ -739,7 +831,8 @@ Kernel selectMatmul(Context &ctx, int version, numtype, /*Loop unrolling*/ version == 7 ? true: false); kernel = createKernel(ctx, matmul, bindings, - /*nWorkgroups*/ nWorkgroups); + /*nWorkgroups*/ nWorkgroups, + NoParam{}, &info); } else if (version == 8 || version == 10) { static constexpr size_t BM = 64; static constexpr size_t BK = 8; @@ -757,7 +850,8 @@ Kernel selectMatmul(Context &ctx, int version, numtype, /*Loop unrolling*/ true); kernel = createKernel(ctx, matmul, bindings, - /*nWorkgroups*/ nWorkgroups); + /*nWorkgroups*/ nWorkgroups, + NoParam{}, &info); } else if (version == 9 || version == 11) { static constexpr size_t BM = 64; static constexpr size_t BK = 8; @@ -774,8 +868,38 @@ Kernel selectMatmul(Context &ctx, int version, /*wgSize*/ wgSize, numtype); kernel = createKernel(ctx, matmul, bindings, - /*nWorkgroups*/ nWorkgroups); + /*nWorkgroups*/ nWorkgroups, + NoParam{}, &info); + } else if (version == 12 || version == 13) { + // f16: Subgroup matrix multiply + static constexpr size_t TM = 4; + static constexpr size_t TN = 8; + static constexpr size_t LID = 2; + Shape wgSize = {32, LID, 1}; // One subgroup per workgroup + Shape nWorkgroups = {cdiv(M, 8 * TM), cdiv(N, 8 * TN * LID), 1}; + LOG(kDefLog, kInfo, "M: %zu, K: %zu, N: %zu", M, K, N); + LOG(kDefLog, kInfo, "wgSize: ( %s )", toString(wgSize).c_str()); + LOG(kDefLog, kInfo, "nWorkgroups: ( %s )", toString(nWorkgroups).c_str()); + KernelCode matmul = createMatmul12(kShaderSubgroupMatrixMultiply, M, K, N, TM, TN, LID, wgSize, numtype); + kernel = createKernel(ctx, matmul, bindings, nWorkgroups, + NoParam{}, &info); + } + + if (info.status != WGPUCompilationInfoRequestStatus_Success) { + LOG(kDefLog, kError, "Failed to compile shader"); + for (size_t i = 0; i < info.messages.size(); i++) { + LOG(kDefLog, kError, "Line %llu, Pos %llu: %s", info.lineNums[i], + info.linePos[i], info.messages[i].c_str()); + } + exit(1); + } else { + LOG(kDefLog, kInfo, "Shader compiled successfully"); + for (size_t i = 0; i < info.messages.size(); i++) { + LOG(kDefLog, kInfo, "Line %llu, Pos %llu: %s", info.lineNums[i], + info.linePos[i], info.messages[i].c_str()); + } } + return kernel; } @@ -791,41 +915,51 @@ void runTest(int version, size_t M, size_t K, size_t N, assert(numtype == kf16); } - // Allocate GPU buffers and copy data - WGPUDeviceDescriptor devDescriptor = {}; - devDescriptor.requiredFeatureCount = 1; - devDescriptor.requiredFeatures = std::array{WGPUFeatureName_ShaderF16}.data(); - - Context ctx; - if (numtype == kf16) { - ctx = createContext( - {}, {}, - /*device descriptor, enabling f16 in WGSL*/ - { - .requiredFeatureCount = 1, - .requiredFeatures = std::array{WGPUFeatureName_ShaderF16}.data() - }); - if (ctx.adapterStatus != WGPURequestAdapterStatus_Success) { - LOG(kDefLog, kError, "Failed to create adapter with f16 support, try running an f32 test instead (`export MATMUL_VERSION=9)."); - exit(1); + static WGPUDawnTogglesDescriptor toggles = {}; + toggles.chain.sType = WGPUSType_DawnTogglesDescriptor; + const char* enableList[] = {"allow_unsafe_apis"}; + toggles.enabledToggles = enableList; + toggles.enabledToggleCount = 1; + + static WGPUDeviceDescriptor devDesc = {}; + devDesc.nextInChain = &toggles.chain; + devDesc.requiredFeatureCount = 3, + devDesc.requiredFeatures = std::array{ + WGPUFeatureName_ShaderF16, + WGPUFeatureName_Subgroups, + WGPUFeatureName_ChromiumExperimentalSubgroupMatrix + }.data(); + devDesc.uncapturedErrorCallbackInfo = WGPUUncapturedErrorCallbackInfo { + .callback = [](WGPUDevice const * device, WGPUErrorType type, WGPUStringView msg, void*, void*) { + LOG(kDefLog, kError, "[Uncaptured %d] %.*s\n", (int)type, (int)msg.length, msg.data); } - if (ctx.deviceStatus != WGPURequestDeviceStatus_Success) { - LOG(kDefLog, kError, "Failed to create device with f16 support, try running an f32 test instead. (`export MATMUL_VERSION=9)"); - exit(1); + }; + devDesc.deviceLostCallbackInfo = WGPUDeviceLostCallbackInfo { + .mode = WGPUCallbackMode_AllowSpontaneous, + .callback = [](WGPUDevice const * device, WGPUDeviceLostReason reason, WGPUStringView msg, void*, void*) { + LOG(kDefLog, kError, "[DeviceLost %d] %.*s\n", (int)reason, (int)msg.length, msg.data); } - } + }; - if (numtype == kf32) { - ctx = createContext({}, {}, {}); - if (ctx.adapterStatus != WGPURequestAdapterStatus_Success || - ctx.deviceStatus != WGPURequestDeviceStatus_Success) { - LOG(kDefLog, kError, "Failed to create adapter or device"); - // stop execution - exit(1); - } else { - LOG(kDefLog, kInfo, "Successfully created adapter and device"); + static WGPULimits requiredLimits = WGPU_LIMITS_INIT; + devDesc.requiredLimits = &requiredLimits; + Context ctx = createContext({}, {}, devDesc); + + WGPULoggingCallbackInfo logCb{ + .callback = [](WGPULoggingType type, WGPUStringView msg, void*, void*) { + LOG(kDefLog, kError, "[WGPU %d] %.*s\n", (int)type, (int)msg.length, msg.data); } - } + }; + wgpuDeviceSetLoggingCallback(ctx.device, logCb); + + if (ctx.adapterStatus != WGPURequestAdapterStatus_Success || + ctx.deviceStatus != WGPURequestDeviceStatus_Success) { + LOG(kDefLog, kError, "Failed to create adapter or device"); + // stop execution + exit(1); + } else { + LOG(kDefLog, kInfo, "Successfully created adapter and device"); + } Tensor input = createTensor(ctx, Shape{M, K}, numtype, inputPtr.get()); Tensor weights = createTensor(ctx, Shape{N, K}, numtype, weightsPtr.get()); // column-major @@ -859,7 +993,7 @@ void runTest(int version, size_t M, size_t K, size_t N, // Use microsecond for more accurate time measurement auto duration = std::chrono::duration_cast(end - start); - float gflops = 2 * M * N * + float gflops = 2.0f * M * N * K / // factor of 2 for multiplication & accumulation (static_cast(duration.count()) / 1000000.0) / 1000000000.0 * static_cast(nIter); @@ -870,7 +1004,7 @@ void runTest(int version, size_t M, size_t K, size_t N, show(outputPtr.get(), M, N, "Output[0]").c_str()); LOG(kDefLog, kInfo, "\n\n====================================================================" - "============\nExecution Time: (M = %d, K = %d, N = %d) x %d iterations " + "============\nExecution Time: (M = %zu, K = %zu, N = %zu) x %zu iterations " ":\n%.1f " "milliseconds / dispatch ~ %.2f " "GFLOPS\n================================================================" @@ -913,13 +1047,16 @@ const std::string versionToStr(int version){ case 9: return "f32: 2D blocktiling with loop unrolling, vectorization and transpose"; case 10: return "f16: 2D blocktiling with loop unrolling and vectorization"; case 11: return "f16: 2D blocktiling with loop unrolling, vectorization and transpose"; + case 12: return "f16: Subgroup matrix multiply with transpose (default)"; + case 13: return "f32: Subgroup matrix multiply with transpose"; default: return "Not specified"; } } int main() { + std::cout << "Starting matmul test..." << std::endl; char* version_str = getenv("MATMUL_VERSION"); - int version = version_str == NULL ? 10 : atoi(version_str); + int version = version_str == NULL ? 12 : atoi(version_str); // 1 == f32: No-Op // 2 == f32: naive matmul // 3 == f32: tiling @@ -931,8 +1068,10 @@ int main() { // 9 == f32: 2D blocktiling with loop unrolling, vectorization and transpose // 10 == f16: 2D blocktiling with loop unrolling and vectorization (default) // 11 == f16: 2D blocktiling with loop unrolling, vectorization and transpose - bool enableF16 = version == 10 || version ==11; - bool transposedInput = version == 9 || version == 11; + // 12 == f16: Subgroup matrix multiply with transpose (default) + // 13 == f32: Subgroup matrix multiply with transpose + bool enableF16 = version == 10 || version ==11 || version == 12; + bool transposedInput = version == 9 || version == 11 || version == 12 || version == 13; NumType numtype = enableF16 ? kf16 : kf32; size_t M, K, N; // Matrix dimensions diff --git a/gpu.hpp b/gpu.hpp index d1758b1..fa373b2 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -1580,8 +1580,7 @@ inline void bufferMapCallback(WGPUMapAsyncStatus status, WGPUStringView message, * and a promise to signal completion. * @param userdata2 Unused. */ -inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, - WGPUStringView message, +inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, WGPUStringView message, void *userdata1, void * /*userdata2*/) { const CallbackData *cbData = static_cast(userdata1); // Ensure the queue work finished successfully. @@ -2824,8 +2823,7 @@ Kernel createKernel(Context &ctx, const KernelCode &code, * when the work is done. * @param userdata2 Unused. */ -inline void dispatchKernelCallback(WGPUQueueWorkDoneStatus status, - WGPUStringView message, +inline void dispatchKernelCallback(WGPUQueueWorkDoneStatus status, WGPUStringView message, void *userdata1, void * /*userdata2*/) { // Cast the userdata pointer back to our heap‑allocated promise. auto *p = reinterpret_cast *>(userdata1); diff --git a/third_party/headers/webgpu/webgpu.h b/third_party/headers/webgpu/webgpu.h index 988997a..5e182c0 100644 --- a/third_party/headers/webgpu/webgpu.h +++ b/third_party/headers/webgpu/webgpu.h @@ -149,6 +149,7 @@ typedef struct WGPUTextureViewImpl* WGPUTextureView WGPU_OBJECT_ATTRIBUTE; // Structure forward declarations struct WGPUAdapterPropertiesD3D; struct WGPUAdapterPropertiesVk; +struct WGPUAdapterPropertiesWGPU; struct WGPUBindGroupDynamicBindingArray; struct WGPUBlendComponent; struct WGPUBufferBindingLayout; @@ -257,6 +258,8 @@ struct WGPUSurfaceSourceWindowsHWND; struct WGPUSurfaceSourceXCBWindow; struct WGPUSurfaceSourceXlibWindow; struct WGPUSurfaceTexture; +struct WGPUTexelBufferBindingEntry; +struct WGPUTexelBufferBindingLayout; struct WGPUTexelBufferViewDescriptor; struct WGPUTexelCopyBufferLayout; struct WGPUTextureBindingLayout; @@ -294,7 +297,7 @@ struct WGPUSharedFenceExportInfo; struct WGPUSharedTextureMemoryAHardwareBufferProperties; struct WGPUSharedTextureMemoryBeginAccessDescriptor; struct WGPUSharedTextureMemoryDmaBufDescriptor; -struct WGPUSharedTextureMemoryEndAccessState; +struct WGPUSharedTextureMemoryMetalEndAccessState; struct WGPUSurfaceDescriptor; struct WGPUTexelCopyBufferInfo; struct WGPUTexelCopyTextureInfo; @@ -312,6 +315,7 @@ struct WGPUDeviceDescriptor; struct WGPUPipelineLayoutDescriptor; struct WGPURenderPassPixelLocalStorage; struct WGPUSharedTextureMemoryDescriptor; +struct WGPUSharedTextureMemoryEndAccessState; struct WGPUSharedTextureMemoryProperties; struct WGPUTextureViewDescriptor; struct WGPUVertexState; @@ -610,8 +614,8 @@ typedef enum WGPUFeatureName { WGPUFeatureName_SharedFenceEGLSync = 0x00050038, WGPUFeatureName_DawnDeviceAllocatorControl = 0x00050039, WGPUFeatureName_TextureComponentSwizzle = 0x0005003A, - WGPUFeatureName_ChromiumExperimentalPrimitiveId = 0x0005003B, WGPUFeatureName_ChromiumExperimentalBindless = 0x0005003C, + WGPUFeatureName_AdapterPropertiesWGPU = 0x0005003D, WGPUFeatureName_Force32 = 0x7FFFFFFF } WGPUFeatureName WGPU_ENUM_ATTRIBUTE; @@ -893,6 +897,10 @@ typedef enum WGPUSType { WGPUSType_BindGroupLayoutDynamicBindingArray = 0x0005004A, WGPUSType_DynamicBindingArrayLimits = 0x0005004B, WGPUSType_BindGroupDynamicBindingArray = 0x0005004C, + WGPUSType_TexelBufferBindingEntry = 0x0005004D, + WGPUSType_TexelBufferBindingLayout = 0x0005004E, + WGPUSType_SharedTextureMemoryMetalEndAccessState = 0x0005004F, + WGPUSType_AdapterPropertiesWGPU = 0x00050050, WGPUSType_Force32 = 0x7FFFFFFF } WGPUSType WGPU_ENUM_ATTRIBUTE; @@ -916,6 +924,13 @@ typedef enum WGPUSurfaceGetCurrentTextureStatus { WGPUSurfaceGetCurrentTextureStatus_Force32 = 0x7FFFFFFF } WGPUSurfaceGetCurrentTextureStatus WGPU_ENUM_ATTRIBUTE; +typedef enum WGPUTexelBufferAccess { + WGPUTexelBufferAccess_Undefined = 0x00000000, + WGPUTexelBufferAccess_ReadOnly = 0x00000001, + WGPUTexelBufferAccess_ReadWrite = 0x00000002, + WGPUTexelBufferAccess_Force32 = 0x7FFFFFFF +} WGPUTexelBufferAccess WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUTextureAspect { WGPUTextureAspect_Undefined = 0x00000000, WGPUTextureAspect_All = 0x00000001, @@ -1436,6 +1451,20 @@ typedef struct WGPUAdapterPropertiesVk { /*.driverVersion=*/0 _wgpu_COMMA \ }) +// Can be chained in WGPUAdapterInfo +typedef struct WGPUAdapterPropertiesWGPU { + WGPUChainedStruct chain; + WGPUBackendType backendType; +} WGPUAdapterPropertiesWGPU WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_ADAPTER_PROPERTIES_WGPU_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAdapterPropertiesWGPU, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_AdapterPropertiesWGPU _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.backendType=*/WGPUBackendType_Undefined _wgpu_COMMA \ +}) + // Can be chained in WGPUBindGroupDescriptor typedef struct WGPUBindGroupDynamicBindingArray { WGPUChainedStruct chain; @@ -3007,6 +3036,36 @@ typedef struct WGPUSurfaceTexture { /*.status=*/_wgpu_ENUM_ZERO_INIT(WGPUSurfaceGetCurrentTextureStatus) _wgpu_COMMA \ }) +// Can be chained in WGPUBindGroupEntry +typedef struct WGPUTexelBufferBindingEntry { + WGPUChainedStruct chain; + WGPUTexelBufferView texelBufferView; +} WGPUTexelBufferBindingEntry WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_TEXEL_BUFFER_BINDING_ENTRY_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTexelBufferBindingEntry, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_TexelBufferBindingEntry _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.texelBufferView=*/NULL _wgpu_COMMA \ +}) + +// Can be chained in WGPUBindGroupLayoutEntry +typedef struct WGPUTexelBufferBindingLayout { + WGPUChainedStruct chain; + WGPUTexelBufferAccess access; + WGPUTextureFormat format; +} WGPUTexelBufferBindingLayout WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_TEXEL_BUFFER_BINDING_LAYOUT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTexelBufferBindingLayout, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_TexelBufferBindingLayout _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.access=*/WGPUTexelBufferAccess_Undefined _wgpu_COMMA \ + /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ +}) + typedef struct WGPUTexelBufferViewDescriptor { WGPUChainedStruct * nextInChain; WGPUStringView label; @@ -3684,20 +3743,18 @@ typedef struct WGPUSharedTextureMemoryDmaBufDescriptor { /*.planes=*/NULL _wgpu_COMMA \ }) -typedef struct WGPUSharedTextureMemoryEndAccessState { - WGPUChainedStruct * nextInChain; - WGPUBool initialized; - size_t fenceCount; - WGPUSharedFence const * fences; - uint64_t const * signaledValues; -} WGPUSharedTextureMemoryEndAccessState WGPU_STRUCTURE_ATTRIBUTE; +// Can be chained in WGPUSharedTextureMemoryEndAccessState +typedef struct WGPUSharedTextureMemoryMetalEndAccessState { + WGPUChainedStruct chain; + WGPUFuture commandsScheduledFuture; +} WGPUSharedTextureMemoryMetalEndAccessState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_END_ACCESS_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryEndAccessState, { \ - /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.initialized=*/WGPU_FALSE _wgpu_COMMA \ - /*.fenceCount=*/0 _wgpu_COMMA \ - /*.fences=*/NULL _wgpu_COMMA \ - /*.signaledValues=*/NULL _wgpu_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_METAL_END_ACCESS_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryMetalEndAccessState, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryMetalEndAccessState _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.commandsScheduledFuture=*/WGPU_FUTURE_INIT _wgpu_COMMA \ }) typedef struct WGPUSurfaceDescriptor { @@ -3962,6 +4019,22 @@ typedef struct WGPUSharedTextureMemoryDescriptor { /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ }) +typedef struct WGPUSharedTextureMemoryEndAccessState { + WGPUChainedStruct * nextInChain; + WGPUBool initialized; + size_t fenceCount; + WGPUSharedFence const * fences; + uint64_t const * signaledValues; +} WGPUSharedTextureMemoryEndAccessState WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SHARED_TEXTURE_MEMORY_END_ACCESS_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryEndAccessState, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.initialized=*/WGPU_FALSE _wgpu_COMMA \ + /*.fenceCount=*/0 _wgpu_COMMA \ + /*.fences=*/NULL _wgpu_COMMA \ + /*.signaledValues=*/NULL _wgpu_COMMA \ +}) + typedef struct WGPUSharedTextureMemoryProperties { WGPUChainedStruct * nextInChain; WGPUTextureUsage usage; @@ -4158,6 +4231,7 @@ typedef void (*WGPUProcAdapterPropertiesMemoryHeapsFreeMembers)(WGPUAdapterPrope typedef void (*WGPUProcAdapterPropertiesSubgroupMatrixConfigsFreeMembers)(WGPUAdapterPropertiesSubgroupMatrixConfigs adapterPropertiesSubgroupMatrixConfigs) WGPU_FUNCTION_ATTRIBUTE; // Procs of BindGroup +typedef void (*WGPUProcBindGroupDestroy)(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcBindGroupSetLabel)(WGPUBindGroup bindGroup, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcBindGroupAddRef)(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcBindGroupRelease)(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE; @@ -4168,6 +4242,7 @@ typedef void (*WGPUProcBindGroupLayoutAddRef)(WGPUBindGroupLayout bindGroupLayou typedef void (*WGPUProcBindGroupLayoutRelease)(WGPUBindGroupLayout bindGroupLayout) WGPU_FUNCTION_ATTRIBUTE; // Procs of Buffer +typedef WGPUTexelBufferView (*WGPUProcBufferCreateTexelView)(WGPUBuffer buffer, WGPUTexelBufferViewDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcBufferDestroy)(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; typedef void const * (*WGPUProcBufferGetConstMappedRange)(WGPUBuffer buffer, size_t offset, size_t size) WGPU_FUNCTION_ATTRIBUTE; typedef void * (*WGPUProcBufferGetMappedRange)(WGPUBuffer buffer, size_t offset, size_t size) WGPU_FUNCTION_ATTRIBUTE; @@ -4455,7 +4530,9 @@ typedef uint32_t (*WGPUProcTextureGetMipLevelCount)(WGPUTexture texture) WGPU_FU typedef uint32_t (*WGPUProcTextureGetSampleCount)(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUTextureUsage (*WGPUProcTextureGetUsage)(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE; typedef uint32_t (*WGPUProcTextureGetWidth)(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcTexturePin)(WGPUTexture texture, WGPUTextureUsage usage) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcTextureSetLabel)(WGPUTexture texture, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcTextureUnpin)(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcTextureAddRef)(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcTextureRelease)(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE; @@ -4495,6 +4572,7 @@ WGPU_EXPORT void wgpuAdapterPropertiesMemoryHeapsFreeMembers(WGPUAdapterProperti WGPU_EXPORT void wgpuAdapterPropertiesSubgroupMatrixConfigsFreeMembers(WGPUAdapterPropertiesSubgroupMatrixConfigs adapterPropertiesSubgroupMatrixConfigs) WGPU_FUNCTION_ATTRIBUTE; // Methods of BindGroup +WGPU_EXPORT void wgpuBindGroupDestroy(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuBindGroupSetLabel(WGPUBindGroup bindGroup, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuBindGroupAddRef(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuBindGroupRelease(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE; @@ -4505,6 +4583,7 @@ WGPU_EXPORT void wgpuBindGroupLayoutAddRef(WGPUBindGroupLayout bindGroupLayout) WGPU_EXPORT void wgpuBindGroupLayoutRelease(WGPUBindGroupLayout bindGroupLayout) WGPU_FUNCTION_ATTRIBUTE; // Methods of Buffer +WGPU_EXPORT WGPUTexelBufferView wgpuBufferCreateTexelView(WGPUBuffer buffer, WGPUTexelBufferViewDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuBufferDestroy(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void const * wgpuBufferGetConstMappedRange(WGPUBuffer buffer, size_t offset, size_t size) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void * wgpuBufferGetMappedRange(WGPUBuffer buffer, size_t offset, size_t size) WGPU_FUNCTION_ATTRIBUTE; @@ -4792,7 +4871,9 @@ WGPU_EXPORT uint32_t wgpuTextureGetMipLevelCount(WGPUTexture texture) WGPU_FUNCT WGPU_EXPORT uint32_t wgpuTextureGetSampleCount(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUTextureUsage wgpuTextureGetUsage(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT uint32_t wgpuTextureGetWidth(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuTexturePin(WGPUTexture texture, WGPUTextureUsage usage) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuTextureSetLabel(WGPUTexture texture, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuTextureUnpin(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuTextureAddRef(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuTextureRelease(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;