diff --git a/cmake/dawn.cmake b/cmake/dawn.cmake
index d9cbfc9..7793ebe 100644
--- a/cmake/dawn.cmake
+++ b/cmake/dawn.cmake
@@ -168,7 +168,7 @@ if(NOT DAWN_BUILD_FOUND)
 
   # Ensure source present on required commit (idempotent remote setup)
   if(NOT DEFINED DAWN_COMMIT OR DAWN_COMMIT STREQUAL "")
-    set(DAWN_COMMIT "e1d6e12337080cf9f6d8726209e86df449bc6e9a" CACHE STRING "Dawn commit to checkout" FORCE)
+    set(DAWN_COMMIT "3f79f3aefe0b0a498002564fcfb13eb21ab6c047" CACHE STRING "Dawn commit to checkout" FORCE)
   endif()
   file(MAKE_DIRECTORY ${DAWN_DIR})
   execute_process(COMMAND git init WORKING_DIRECTORY "${DAWN_DIR}")
diff --git a/examples/matmul/run.cpp b/examples/matmul/run.cpp
index 47edc05..2c2b36d 100644
--- a/examples/matmul/run.cpp
+++ b/examples/matmul/run.cpp
@@ -613,6 +613,93 @@ inline KernelCode createMatmulWithTranspose(const char *shaderTemplate, const si
   return {unrolledCode, workgroupSize, precision};
 }
 
+inline KernelCode createMatmul12(const char *shaderTemplate, const size_t M,
+                                 const size_t K, const size_t N,
+                                 const size_t TM, const size_t TN,
+                                 const size_t LID,
+                                 const Shape &workgroupSize = {256, 1, 1},
+                                 NumType precision = kf32) {
+  std::string codeString(shaderTemplate);
+  replaceAll(codeString, {{"{{precision}}", toString(precision)},
+                          {"{{M}}", toString(M)},
+                          {"{{K}}", toString(K)},
+                          {"{{N}}", toString(N)},
+                          {"{{TM}}", toString(TM)},
+                          {"{{TN}}", toString(TN)},
+                          {"{{LID}}", toString(LID)}
+    });
+  return {loopUnrolling(codeString), workgroupSize, precision};
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+//  Optimised WGSL matrix‑multiply kernel using subgroupMatrixLoad/Store
+//  and subgroupMatrixMultiplyAccumulate
+// ─────────────────────────────────────────────────────────────────────────────
+const char* kShaderSubgroupMatrixMultiply = R"(
+enable chromium_experimental_subgroup_matrix;
+diagnostic (off, chromium.subgroup_matrix_uniformity);
+
+@group(0) @binding(0) var<storage, read_write>  A: array<{{precision}}>;
+@group(0) @binding(1) var<storage, read_write>  B: array<{{precision}}>;
+@group(0) @binding(2) var<storage, read_write>  C: array<{{precision}}>;
+
+@compute @workgroup_size({{workgroupSize}})
+fn main(@builtin(workgroup_id) wg: vec3<u32>,
+        @builtin(local_invocation_id) localID : vec3<u32>) {
+
+  let rowStart: u32 = wg.x * 8u * {{TM}};
+  let colStart: u32 = (wg.y * {{LID}} + localID.y)  * 8u * {{TN}};
+
+  let baseA: u32 = rowStart * {{K}};
+  let baseB: u32 = colStart;
+  let cBase: u32 = rowStart * {{N}} + colStart;
+
+  var Ax: array<subgroup_matrix_left<{{precision}}, 8, 8>, {{TM}}>;
+  var Bx: array<subgroup_matrix_right<{{precision}}, 8, 8>, {{TN}}>;
+
+  // 4x4 accumulators (8x8 each)
+  var accxx: array<subgroup_matrix_result<{{precision}}, 8, 8>, {{TM}} * {{TN}}>;
+
+  for (var idx_i: u32 = 0; idx_i < {{TM}}; idx_i++) {
+    Ax[idx_i] = subgroup_matrix_left<{{precision}}, 8, 8>(0);
+  }
+
+  for (var idx_i: u32 = 0; idx_i < {{TN}}; idx_i++) {
+    Bx[idx_i] = subgroup_matrix_right<{{precision}}, 8, 8>(0);
+  }
+
+  for (var idx_i: u32 = 0; idx_i < {{TM}}; idx_i++) {
+    for (var idx_j: u32 = 0; idx_j < {{TN}}; idx_j++) {
+      accxx[idx_i+idx_j*{{TM}}] = subgroup_matrix_result<{{precision}}, 8, 8>(0);
+    }
+  }
+
+  for (var k: u32 = 0u; k < {{K}}; k = k + 8u) {
+    workgroupBarrier();
+    for (var idx_i: u32 = 0; idx_i < {{TM}}; idx_i++) {
+      Ax[idx_i] = subgroupMatrixLoad<subgroup_matrix_left<{{precision}},8,8>>(&A, baseA + k + 8u * {{K}} * idx_i, false, {{K}});
+    }
+
+    for (var idx_i: u32 = 0; idx_i < {{TN}}; idx_i++) {
+      Bx[idx_i] = subgroupMatrixLoad<subgroup_matrix_right<{{precision}},8,8>>(&B, baseB + k * {{N}} + 8u * idx_i, false, {{N}});
+    }
+
+    for (var idx_j: u32 = 0; idx_j < {{TN}}; idx_j++) {
+      for (var idx_i: u32 = 0; idx_i < {{TM}}; idx_i++) {
+        accxx[idx_j*{{TM}} + idx_i] = subgroupMatrixMultiplyAccumulate(Ax[idx_i], Bx[idx_j], accxx[idx_j*{{TM}} + idx_i]);
+      }
+    }
+  }
+
+  workgroupBarrier();
+  for (var idx_i: u32 = 0; idx_i < {{TM}}; idx_i++) {
+    for (var idx_j: u32 = 0; idx_j < {{TN}}; idx_j++) {
+      subgroupMatrixStore(&C, cBase + idx_i * 8u * {{N}} + 8u * idx_j, accxx[idx_j*{{TM}} + idx_i], false, {{N}});
+    }
+  }
+}
+)";
+
 /**
  * @brief No-Op shader with matmul bindings for performance testing
  */
@@ -683,26 +770,30 @@ Kernel selectMatmul(Context &ctx, int version,
                     const Bindings</* input, weights, output */ 3> &bindings,
                     size_t M, size_t K, size_t N, NumType numtype) {
   Kernel kernel;
+  CompilationInfo info;
   if (version == 1) {
     Shape wgSize = {256, 1, 1};
     Shape nWorkgroups = cdiv({M, N, 1}, {16, 16, 1});
     KernelCode matmul = createNoOp(kShaderNoOp, /*wgsize*/ wgSize);
     kernel = createKernel(ctx, matmul, bindings,
-                          /*nWorkgroups*/ nWorkgroups);
+                          /*nWorkgroups*/ nWorkgroups,
+                          NoParam{}, &info);
   } else if (version == 2) {
     Shape wgSize = {16, 16, 1};
     LOG(kDefLog, kInfo, "wgSize: %s", toString(wgSize).c_str());
     KernelCode matmul =
       createMatmul1(kShaderMatmul1, M, K, N, /*wgsize*/ wgSize, numtype);
     kernel = createKernel(ctx, matmul, bindings,
-                          /*nWorkgroups*/ cdiv({M, N, 1}, wgSize));
+                          /*nWorkgroups*/ cdiv({M, N, 1}, wgSize),
+                          NoParam{}, &info);
   } else if (version == 3) {
     static constexpr size_t tileSize = 16;
     KernelCode matmul = createMatmul2(kShaderMatmul2, M, K, N,
                                       /*wgSize*/ {tileSize * tileSize, 1, 1}, numtype);
     kernel =
         createKernel(ctx, matmul, bindings,
-                     /* nWorkgroups*/ cdiv({M, N, 1}, {tileSize, tileSize, 1}));
+                     /* nWorkgroups*/ cdiv({M, N, 1}, {tileSize, tileSize, 1}),
+                          NoParam{}, &info);
   } else if (version == 4 || version == 6) {
     static constexpr size_t BM = 64;
     static constexpr size_t BK = 4;
@@ -721,7 +812,8 @@ Kernel selectMatmul(Context &ctx, int version,
 				      numtype,
 				      /*Loop unrolling*/ version == 6 ? true: false);
     kernel = createKernel(ctx, matmul, bindings,
-                          /*nWorkgroups*/ nWorkgroups);
+                          /*nWorkgroups*/ nWorkgroups,
+                          NoParam{}, &info);
   } else if (version == 5 || version == 7) {
     static constexpr size_t BM = 64;
     static constexpr size_t BK = 8;
@@ -739,7 +831,8 @@ Kernel selectMatmul(Context &ctx, int version,
 				      numtype,
 				      /*Loop unrolling*/ version == 7 ? true: false);
     kernel = createKernel(ctx, matmul, bindings,
-                          /*nWorkgroups*/ nWorkgroups);
+                          /*nWorkgroups*/ nWorkgroups,
+                          NoParam{}, &info);
   } else if (version == 8 || version == 10) {
     static constexpr size_t BM = 64;
     static constexpr size_t BK = 8;
@@ -757,7 +850,8 @@ Kernel selectMatmul(Context &ctx, int version,
 						      numtype,
 						      /*Loop unrolling*/ true);
     kernel = createKernel(ctx, matmul, bindings,
-                          /*nWorkgroups*/ nWorkgroups);
+                          /*nWorkgroups*/ nWorkgroups,
+                          NoParam{}, &info);
   } else if (version == 9 || version == 11) {
     static constexpr size_t BM = 64;
     static constexpr size_t BK = 8;
@@ -774,8 +868,38 @@ Kernel selectMatmul(Context &ctx, int version,
 						  /*wgSize*/ wgSize,
 						  numtype);
     kernel = createKernel(ctx, matmul, bindings,
-                          /*nWorkgroups*/ nWorkgroups);
+                          /*nWorkgroups*/ nWorkgroups,
+                          NoParam{}, &info);
+  } else if (version == 12 || version == 13) {
+    // f16: Subgroup matrix multiply
+    static constexpr size_t TM = 4;
+    static constexpr size_t TN = 8;
+    static constexpr size_t LID = 2;
+    Shape wgSize = {32, LID, 1}; // One subgroup per workgroup
+    Shape nWorkgroups = {cdiv(M, 8 * TM), cdiv(N, 8 * TN * LID), 1};
+    LOG(kDefLog, kInfo, "M: %zu, K: %zu, N: %zu", M, K, N);
+    LOG(kDefLog, kInfo, "wgSize: ( %s )", toString(wgSize).c_str());
+    LOG(kDefLog, kInfo, "nWorkgroups: ( %s )", toString(nWorkgroups).c_str());
+    KernelCode matmul = createMatmul12(kShaderSubgroupMatrixMultiply, M, K, N, TM, TN, LID, wgSize, numtype);
+    kernel = createKernel(ctx, matmul, bindings, nWorkgroups,
+                          NoParam{}, &info);
+  }
+
+  if (info.status != WGPUCompilationInfoRequestStatus_Success) {
+    LOG(kDefLog, kError, "Failed to compile shader");
+    for (size_t i = 0; i < info.messages.size(); i++) {
+      LOG(kDefLog, kError, "Line %llu, Pos %llu: %s", info.lineNums[i],
+          info.linePos[i], info.messages[i].c_str());
+    }
+    exit(1);
+  } else {
+    LOG(kDefLog, kInfo, "Shader compiled successfully");
+    for (size_t i = 0; i < info.messages.size(); i++) {
+      LOG(kDefLog, kInfo, "Line %llu, Pos %llu: %s", info.lineNums[i],
+          info.linePos[i], info.messages[i].c_str());
+    }
   }
+  
   return kernel;
 }
 
@@ -791,41 +915,51 @@ void runTest(int version, size_t M, size_t K, size_t N,
     assert(numtype == kf16);
   }
 
-  // Allocate GPU buffers and copy data
-  WGPUDeviceDescriptor devDescriptor = {};
-  devDescriptor.requiredFeatureCount = 1;
-  devDescriptor.requiredFeatures = std::array{WGPUFeatureName_ShaderF16}.data();
-
-  Context ctx;
-  if (numtype == kf16) {
-    ctx = createContext(
-        {}, {},
-        /*device descriptor, enabling f16 in WGSL*/
-        {
-          .requiredFeatureCount = 1,
-          .requiredFeatures = std::array{WGPUFeatureName_ShaderF16}.data()
-        });
-    if (ctx.adapterStatus != WGPURequestAdapterStatus_Success) {
-      LOG(kDefLog, kError, "Failed to create adapter with f16 support, try running an f32 test instead (`export MATMUL_VERSION=9).");
-      exit(1);
+  static WGPUDawnTogglesDescriptor toggles = {};
+  toggles.chain.sType = WGPUSType_DawnTogglesDescriptor;
+  const char* enableList[] = {"allow_unsafe_apis"};
+  toggles.enabledToggles = enableList;
+  toggles.enabledToggleCount = 1;
+
+  static WGPUDeviceDescriptor devDesc = {};
+  devDesc.nextInChain = &toggles.chain; 
+  devDesc.requiredFeatureCount = 3,
+    devDesc.requiredFeatures = std::array{
+      WGPUFeatureName_ShaderF16,
+      WGPUFeatureName_Subgroups,
+      WGPUFeatureName_ChromiumExperimentalSubgroupMatrix
+    }.data();
+  devDesc.uncapturedErrorCallbackInfo = WGPUUncapturedErrorCallbackInfo {
+    .callback = [](WGPUDevice const * device, WGPUErrorType type, WGPUStringView msg, void*, void*) {
+      LOG(kDefLog, kError, "[Uncaptured %d] %.*s\n", (int)type, (int)msg.length, msg.data);
     }
-    if (ctx.deviceStatus != WGPURequestDeviceStatus_Success) {
-      LOG(kDefLog, kError, "Failed to create device with f16 support, try running an f32 test instead. (`export MATMUL_VERSION=9)");
-      exit(1);
+  };
+  devDesc.deviceLostCallbackInfo = WGPUDeviceLostCallbackInfo {
+    .mode = WGPUCallbackMode_AllowSpontaneous,
+    .callback = [](WGPUDevice const * device, WGPUDeviceLostReason reason, WGPUStringView msg, void*, void*) {
+      LOG(kDefLog, kError, "[DeviceLost %d] %.*s\n", (int)reason, (int)msg.length, msg.data);
     }
-  }
+  };
 
-  if (numtype == kf32) {
-    ctx = createContext({}, {}, {});
-    if (ctx.adapterStatus != WGPURequestAdapterStatus_Success ||
-        ctx.deviceStatus != WGPURequestDeviceStatus_Success) {
-      LOG(kDefLog, kError, "Failed to create adapter or device");
-      // stop execution
-      exit(1);
-    } else {
-      LOG(kDefLog, kInfo, "Successfully created adapter and device");
+  static WGPULimits requiredLimits = WGPU_LIMITS_INIT;
+  devDesc.requiredLimits = &requiredLimits;
+  Context ctx = createContext({}, {}, devDesc);
+    
+  WGPULoggingCallbackInfo logCb{
+    .callback = [](WGPULoggingType type, WGPUStringView msg, void*, void*) {
+      LOG(kDefLog, kError, "[WGPU %d] %.*s\n", (int)type, (int)msg.length, msg.data);
     }
-  } 
+  };
+  wgpuDeviceSetLoggingCallback(ctx.device, logCb);
+    
+  if (ctx.adapterStatus != WGPURequestAdapterStatus_Success ||
+      ctx.deviceStatus != WGPURequestDeviceStatus_Success) {
+    LOG(kDefLog, kError, "Failed to create adapter or device");
+    // stop execution
+    exit(1);
+  } else {
+    LOG(kDefLog, kInfo, "Successfully created adapter and device");
+  }
 
   Tensor input = createTensor(ctx, Shape{M, K}, numtype, inputPtr.get());
   Tensor weights = createTensor(ctx, Shape{N, K}, numtype, weightsPtr.get()); // column-major
@@ -859,7 +993,7 @@ void runTest(int version, size_t M, size_t K, size_t N,
   // Use microsecond for more accurate time measurement
   auto duration =
       std::chrono::duration_cast<std::chrono::microseconds>(end - start);
-  float gflops = 2 * M * N *
+  float gflops = 2.0f * M * N *
                  K / // factor of 2 for multiplication & accumulation
                  (static_cast<double>(duration.count()) / 1000000.0) /
                  1000000000.0 * static_cast<float>(nIter);
@@ -870,7 +1004,7 @@ void runTest(int version, size_t M, size_t K, size_t N,
       show<precision>(outputPtr.get(), M, N, "Output[0]").c_str());
 
   LOG(kDefLog, kInfo, "\n\n===================================================================="
-      "============\nExecution Time: (M = %d, K = %d, N = %d) x %d iterations "
+      "============\nExecution Time: (M = %zu, K = %zu, N = %zu) x %zu iterations "
       ":\n%.1f "
       "milliseconds / dispatch ~ %.2f "
       "GFLOPS\n================================================================"
@@ -913,13 +1047,16 @@ const std::string versionToStr(int version){
   case 9: return  "f32: 2D blocktiling with loop unrolling, vectorization and transpose";
   case 10: return "f16: 2D blocktiling with loop unrolling and vectorization";
   case 11: return "f16: 2D blocktiling with loop unrolling, vectorization and transpose";
+  case 12: return "f16: Subgroup matrix multiply with transpose (default)";
+  case 13: return "f32: Subgroup matrix multiply with transpose";
   default: return "Not specified";
   }
 }
 
 int main() {
+  std::cout << "Starting matmul test..." << std::endl;
   char* version_str = getenv("MATMUL_VERSION");
-  int version = version_str == NULL ? 10 : atoi(version_str);
+  int version = version_str == NULL ? 12 : atoi(version_str);
     // 1 == f32: No-Op
     // 2 == f32: naive matmul
     // 3 == f32: tiling
@@ -931,8 +1068,10 @@ int main() {
     // 9 == f32: 2D blocktiling with loop unrolling, vectorization and transpose
     // 10 == f16: 2D blocktiling with loop unrolling and vectorization (default)
     // 11 == f16: 2D blocktiling with loop unrolling, vectorization and transpose
-  bool enableF16 = version == 10 || version ==11;
-  bool transposedInput = version == 9 || version == 11;
+    // 12 == f16: Subgroup matrix multiply with transpose (default)
+    // 13 == f32: Subgroup matrix multiply with transpose
+  bool enableF16 = version == 10 || version ==11 || version == 12;
+  bool transposedInput = version == 9 || version == 11 || version == 12 || version == 13;
   NumType numtype = enableF16 ? kf16 : kf32;
 
   size_t M, K, N;  // Matrix dimensions
diff --git a/gpu.hpp b/gpu.hpp
index d1758b1..fa373b2 100644
--- a/gpu.hpp
+++ b/gpu.hpp
@@ -1580,8 +1580,7 @@ inline void bufferMapCallback(WGPUMapAsyncStatus status, WGPUStringView message,
  * and a promise to signal completion.
  * @param userdata2 Unused.
  */
-inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status,
-                                  WGPUStringView message,
+inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, WGPUStringView message,
                                   void *userdata1, void * /*userdata2*/) {
   const CallbackData *cbData = static_cast<CallbackData *>(userdata1);
   // Ensure the queue work finished successfully.
@@ -2824,8 +2823,7 @@ Kernel createKernel(Context &ctx, const KernelCode &code,
  * when the work is done.
  * @param userdata2 Unused.
  */
-inline void dispatchKernelCallback(WGPUQueueWorkDoneStatus status,
-                                   WGPUStringView message,
+inline void dispatchKernelCallback(WGPUQueueWorkDoneStatus status, WGPUStringView message,
                                    void *userdata1, void * /*userdata2*/) {
   // Cast the userdata pointer back to our heap‑allocated promise.
   auto *p = reinterpret_cast<std::promise<void> *>(userdata1);
diff --git a/third_party/headers/webgpu/webgpu.h b/third_party/headers/webgpu/webgpu.h
index 988997a..5e182c0 100644
--- a/third_party/headers/webgpu/webgpu.h
+++ b/third_party/headers/webgpu/webgpu.h
@@ -149,6 +149,7 @@ typedef struct WGPUTextureViewImpl* WGPUTextureView WGPU_OBJECT_ATTRIBUTE;
 // Structure forward declarations
 struct WGPUAdapterPropertiesD3D;
 struct WGPUAdapterPropertiesVk;
+struct WGPUAdapterPropertiesWGPU;
 struct WGPUBindGroupDynamicBindingArray;
 struct WGPUBlendComponent;
 struct WGPUBufferBindingLayout;
@@ -257,6 +258,8 @@ struct WGPUSurfaceSourceWindowsHWND;
 struct WGPUSurfaceSourceXCBWindow;
 struct WGPUSurfaceSourceXlibWindow;
 struct WGPUSurfaceTexture;
+struct WGPUTexelBufferBindingEntry;
+struct WGPUTexelBufferBindingLayout;
 struct WGPUTexelBufferViewDescriptor;
 struct WGPUTexelCopyBufferLayout;
 struct WGPUTextureBindingLayout;
@@ -294,7 +297,7 @@ struct WGPUSharedFenceExportInfo;
 struct WGPUSharedTextureMemoryAHardwareBufferProperties;
 struct WGPUSharedTextureMemoryBeginAccessDescriptor;
 struct WGPUSharedTextureMemoryDmaBufDescriptor;
-struct WGPUSharedTextureMemoryEndAccessState;
+struct WGPUSharedTextureMemoryMetalEndAccessState;
 struct WGPUSurfaceDescriptor;
 struct WGPUTexelCopyBufferInfo;
 struct WGPUTexelCopyTextureInfo;
@@ -312,6 +315,7 @@ struct WGPUDeviceDescriptor;
 struct WGPUPipelineLayoutDescriptor;
 struct WGPURenderPassPixelLocalStorage;
 struct WGPUSharedTextureMemoryDescriptor;
+struct WGPUSharedTextureMemoryEndAccessState;
 struct WGPUSharedTextureMemoryProperties;
 struct WGPUTextureViewDescriptor;
 struct WGPUVertexState;
@@ -610,8 +614,8 @@ typedef enum WGPUFeatureName {
     WGPUFeatureName_SharedFenceEGLSync = 0x00050038,
     WGPUFeatureName_DawnDeviceAllocatorControl = 0x00050039,
     WGPUFeatureName_TextureComponentSwizzle = 0x0005003A,
-    WGPUFeatureName_ChromiumExperimentalPrimitiveId = 0x0005003B,
     WGPUFeatureName_ChromiumExperimentalBindless = 0x0005003C,
+    WGPUFeatureName_AdapterPropertiesWGPU = 0x0005003D,
     WGPUFeatureName_Force32 = 0x7FFFFFFF
 } WGPUFeatureName WGPU_ENUM_ATTRIBUTE;
 
@@ -893,6 +897,10 @@ typedef enum WGPUSType {
     WGPUSType_BindGroupLayoutDynamicBindingArray = 0x0005004A,
     WGPUSType_DynamicBindingArrayLimits = 0x0005004B,
     WGPUSType_BindGroupDynamicBindingArray = 0x0005004C,
+    WGPUSType_TexelBufferBindingEntry = 0x0005004D,
+    WGPUSType_TexelBufferBindingLayout = 0x0005004E,
+    WGPUSType_SharedTextureMemoryMetalEndAccessState = 0x0005004F,
+    WGPUSType_AdapterPropertiesWGPU = 0x00050050,
     WGPUSType_Force32 = 0x7FFFFFFF
 } WGPUSType WGPU_ENUM_ATTRIBUTE;
 
@@ -916,6 +924,13 @@ typedef enum WGPUSurfaceGetCurrentTextureStatus {
     WGPUSurfaceGetCurrentTextureStatus_Force32 = 0x7FFFFFFF
 } WGPUSurfaceGetCurrentTextureStatus WGPU_ENUM_ATTRIBUTE;
 
+typedef enum WGPUTexelBufferAccess {
+    WGPUTexelBufferAccess_Undefined = 0x00000000,
+    WGPUTexelBufferAccess_ReadOnly = 0x00000001,
+    WGPUTexelBufferAccess_ReadWrite = 0x00000002,
+    WGPUTexelBufferAccess_Force32 = 0x7FFFFFFF
+} WGPUTexelBufferAccess WGPU_ENUM_ATTRIBUTE;
+
 typedef enum WGPUTextureAspect {
     WGPUTextureAspect_Undefined = 0x00000000,
     WGPUTextureAspect_All = 0x00000001,
@@ -1436,6 +1451,20 @@ typedef struct WGPUAdapterPropertiesVk {
     /*.driverVersion=*/0 _wgpu_COMMA \
 })
 
+// Can be chained in WGPUAdapterInfo
+typedef struct WGPUAdapterPropertiesWGPU {
+    WGPUChainedStruct chain;
+    WGPUBackendType backendType;
+} WGPUAdapterPropertiesWGPU WGPU_STRUCTURE_ATTRIBUTE;
+
+#define WGPU_ADAPTER_PROPERTIES_WGPU_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAdapterPropertiesWGPU, { \
+    /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \
+        /*.next=*/NULL _wgpu_COMMA \
+        /*.sType=*/WGPUSType_AdapterPropertiesWGPU _wgpu_COMMA \
+    }) _wgpu_COMMA \
+    /*.backendType=*/WGPUBackendType_Undefined _wgpu_COMMA \
+})
+
 // Can be chained in WGPUBindGroupDescriptor
 typedef struct WGPUBindGroupDynamicBindingArray {
     WGPUChainedStruct chain;
@@ -3007,6 +3036,36 @@ typedef struct WGPUSurfaceTexture {
     /*.status=*/_wgpu_ENUM_ZERO_INIT(WGPUSurfaceGetCurrentTextureStatus) _wgpu_COMMA \
 })
 
+// Can be chained in WGPUBindGroupEntry
+typedef struct WGPUTexelBufferBindingEntry {
+    WGPUChainedStruct chain;
+    WGPUTexelBufferView texelBufferView;
+} WGPUTexelBufferBindingEntry WGPU_STRUCTURE_ATTRIBUTE;
+
+#define WGPU_TEXEL_BUFFER_BINDING_ENTRY_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTexelBufferBindingEntry, { \
+    /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \
+        /*.next=*/NULL _wgpu_COMMA \
+        /*.sType=*/WGPUSType_TexelBufferBindingEntry _wgpu_COMMA \
+    }) _wgpu_COMMA \
+    /*.texelBufferView=*/NULL _wgpu_COMMA \
+})
+
+// Can be chained in WGPUBindGroupLayoutEntry
+typedef struct WGPUTexelBufferBindingLayout {
+    WGPUChainedStruct chain;
+    WGPUTexelBufferAccess access;
+    WGPUTextureFormat format;
+} WGPUTexelBufferBindingLayout WGPU_STRUCTURE_ATTRIBUTE;
+
+#define WGPU_TEXEL_BUFFER_BINDING_LAYOUT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTexelBufferBindingLayout, { \
+    /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \
+        /*.next=*/NULL _wgpu_COMMA \
+        /*.sType=*/WGPUSType_TexelBufferBindingLayout _wgpu_COMMA \
+    }) _wgpu_COMMA \
+    /*.access=*/WGPUTexelBufferAccess_Undefined _wgpu_COMMA \
+    /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \
+})
+
 typedef struct WGPUTexelBufferViewDescriptor {
     WGPUChainedStruct * nextInChain;
     WGPUStringView label;
@@ -3684,20 +3743,18 @@ typedef struct WGPUSharedTextureMemoryDmaBufDescriptor {
     /*.planes=*/NULL _wgpu_COMMA \
 })
 
-typedef struct WGPUSharedTextureMemoryEndAccessState {
-    WGPUChainedStruct * nextInChain;
-    WGPUBool initialized;
-    size_t fenceCount;
-    WGPUSharedFence const * fences;
-    uint64_t const * signaledValues;
-} WGPUSharedTextureMemoryEndAccessState WGPU_STRUCTURE_ATTRIBUTE;
+// Can be chained in WGPUSharedTextureMemoryEndAccessState
+typedef struct WGPUSharedTextureMemoryMetalEndAccessState {
+    WGPUChainedStruct chain;
+    WGPUFuture commandsScheduledFuture;
+} WGPUSharedTextureMemoryMetalEndAccessState WGPU_STRUCTURE_ATTRIBUTE;
 
-#define WGPU_SHARED_TEXTURE_MEMORY_END_ACCESS_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryEndAccessState, { \
-    /*.nextInChain=*/NULL _wgpu_COMMA \
-    /*.initialized=*/WGPU_FALSE _wgpu_COMMA \
-    /*.fenceCount=*/0 _wgpu_COMMA \
-    /*.fences=*/NULL _wgpu_COMMA \
-    /*.signaledValues=*/NULL _wgpu_COMMA \
+#define WGPU_SHARED_TEXTURE_MEMORY_METAL_END_ACCESS_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryMetalEndAccessState, { \
+    /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \
+        /*.next=*/NULL _wgpu_COMMA \
+        /*.sType=*/WGPUSType_SharedTextureMemoryMetalEndAccessState _wgpu_COMMA \
+    }) _wgpu_COMMA \
+    /*.commandsScheduledFuture=*/WGPU_FUTURE_INIT _wgpu_COMMA \
 })
 
 typedef struct WGPUSurfaceDescriptor {
@@ -3962,6 +4019,22 @@ typedef struct WGPUSharedTextureMemoryDescriptor {
     /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \
 })
 
+typedef struct WGPUSharedTextureMemoryEndAccessState {
+    WGPUChainedStruct * nextInChain;
+    WGPUBool initialized;
+    size_t fenceCount;
+    WGPUSharedFence const * fences;
+    uint64_t const * signaledValues;
+} WGPUSharedTextureMemoryEndAccessState WGPU_STRUCTURE_ATTRIBUTE;
+
+#define WGPU_SHARED_TEXTURE_MEMORY_END_ACCESS_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryEndAccessState, { \
+    /*.nextInChain=*/NULL _wgpu_COMMA \
+    /*.initialized=*/WGPU_FALSE _wgpu_COMMA \
+    /*.fenceCount=*/0 _wgpu_COMMA \
+    /*.fences=*/NULL _wgpu_COMMA \
+    /*.signaledValues=*/NULL _wgpu_COMMA \
+})
+
 typedef struct WGPUSharedTextureMemoryProperties {
     WGPUChainedStruct * nextInChain;
     WGPUTextureUsage usage;
@@ -4158,6 +4231,7 @@ typedef void (*WGPUProcAdapterPropertiesMemoryHeapsFreeMembers)(WGPUAdapterPrope
 typedef void (*WGPUProcAdapterPropertiesSubgroupMatrixConfigsFreeMembers)(WGPUAdapterPropertiesSubgroupMatrixConfigs adapterPropertiesSubgroupMatrixConfigs) WGPU_FUNCTION_ATTRIBUTE;
 
 // Procs of BindGroup
+typedef void (*WGPUProcBindGroupDestroy)(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE;
 typedef void (*WGPUProcBindGroupSetLabel)(WGPUBindGroup bindGroup, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE;
 typedef void (*WGPUProcBindGroupAddRef)(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE;
 typedef void (*WGPUProcBindGroupRelease)(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE;
@@ -4168,6 +4242,7 @@ typedef void (*WGPUProcBindGroupLayoutAddRef)(WGPUBindGroupLayout bindGroupLayou
 typedef void (*WGPUProcBindGroupLayoutRelease)(WGPUBindGroupLayout bindGroupLayout) WGPU_FUNCTION_ATTRIBUTE;
 
 // Procs of Buffer
+typedef WGPUTexelBufferView (*WGPUProcBufferCreateTexelView)(WGPUBuffer buffer, WGPUTexelBufferViewDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE;
 typedef void (*WGPUProcBufferDestroy)(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE;
 typedef void const * (*WGPUProcBufferGetConstMappedRange)(WGPUBuffer buffer, size_t offset, size_t size) WGPU_FUNCTION_ATTRIBUTE;
 typedef void * (*WGPUProcBufferGetMappedRange)(WGPUBuffer buffer, size_t offset, size_t size) WGPU_FUNCTION_ATTRIBUTE;
@@ -4455,7 +4530,9 @@ typedef uint32_t (*WGPUProcTextureGetMipLevelCount)(WGPUTexture texture) WGPU_FU
 typedef uint32_t (*WGPUProcTextureGetSampleCount)(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;
 typedef WGPUTextureUsage (*WGPUProcTextureGetUsage)(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;
 typedef uint32_t (*WGPUProcTextureGetWidth)(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;
+typedef void (*WGPUProcTexturePin)(WGPUTexture texture, WGPUTextureUsage usage) WGPU_FUNCTION_ATTRIBUTE;
 typedef void (*WGPUProcTextureSetLabel)(WGPUTexture texture, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE;
+typedef void (*WGPUProcTextureUnpin)(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;
 typedef void (*WGPUProcTextureAddRef)(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;
 typedef void (*WGPUProcTextureRelease)(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;
 
@@ -4495,6 +4572,7 @@ WGPU_EXPORT void wgpuAdapterPropertiesMemoryHeapsFreeMembers(WGPUAdapterProperti
 WGPU_EXPORT void wgpuAdapterPropertiesSubgroupMatrixConfigsFreeMembers(WGPUAdapterPropertiesSubgroupMatrixConfigs adapterPropertiesSubgroupMatrixConfigs) WGPU_FUNCTION_ATTRIBUTE;
 
 // Methods of BindGroup
+WGPU_EXPORT void wgpuBindGroupDestroy(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE;
 WGPU_EXPORT void wgpuBindGroupSetLabel(WGPUBindGroup bindGroup, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE;
 WGPU_EXPORT void wgpuBindGroupAddRef(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE;
 WGPU_EXPORT void wgpuBindGroupRelease(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE;
@@ -4505,6 +4583,7 @@ WGPU_EXPORT void wgpuBindGroupLayoutAddRef(WGPUBindGroupLayout bindGroupLayout)
 WGPU_EXPORT void wgpuBindGroupLayoutRelease(WGPUBindGroupLayout bindGroupLayout) WGPU_FUNCTION_ATTRIBUTE;
 
 // Methods of Buffer
+WGPU_EXPORT WGPUTexelBufferView wgpuBufferCreateTexelView(WGPUBuffer buffer, WGPUTexelBufferViewDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE;
 WGPU_EXPORT void wgpuBufferDestroy(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE;
 WGPU_EXPORT void const * wgpuBufferGetConstMappedRange(WGPUBuffer buffer, size_t offset, size_t size) WGPU_FUNCTION_ATTRIBUTE;
 WGPU_EXPORT void * wgpuBufferGetMappedRange(WGPUBuffer buffer, size_t offset, size_t size) WGPU_FUNCTION_ATTRIBUTE;
@@ -4792,7 +4871,9 @@ WGPU_EXPORT uint32_t wgpuTextureGetMipLevelCount(WGPUTexture texture) WGPU_FUNCT
 WGPU_EXPORT uint32_t wgpuTextureGetSampleCount(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;
 WGPU_EXPORT WGPUTextureUsage wgpuTextureGetUsage(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;
 WGPU_EXPORT uint32_t wgpuTextureGetWidth(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;
+WGPU_EXPORT void wgpuTexturePin(WGPUTexture texture, WGPUTextureUsage usage) WGPU_FUNCTION_ATTRIBUTE;
 WGPU_EXPORT void wgpuTextureSetLabel(WGPUTexture texture, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE;
+WGPU_EXPORT void wgpuTextureUnpin(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;
 WGPU_EXPORT void wgpuTextureAddRef(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;
 WGPU_EXPORT void wgpuTextureRelease(WGPUTexture texture) WGPU_FUNCTION_ATTRIBUTE;