Skip to content

Commit 7ca5991

Browse files
reeselevinengxson
andauthored
ggml webgpu: add support for emscripten builds (ggml-org#17184)
* Faster tensors (#8) Add fast matrix and matrix/vector multiplication. * Use map for shader replacements instead of pair of strings * Wasm (#9) * webgpu : fix build on emscripten * more debugging stuff * test-backend-ops: force single thread on wasm * fix single-thread case for init_tensor_uniform * use jspi * add pthread * test: remember to set n_thread for cpu backend * Add buffer label and enable dawn-specific toggles to turn off some checks * Intermediate state * Fast working f16/f32 vec4 * Working float fast mul mat * Clean up naming of mul_mat to match logical model, start work on q mul_mat * Setup for subgroup matrix mat mul * Basic working subgroup matrix * Working subgroup matrix tiling * Handle weirder sg matrix sizes (but still % sg matrix size) * Working start to gemv * working f16 accumulation with shared memory staging * Print out available subgroup matrix configurations * Vectorize dst stores for sg matrix shader * Gemv working scalar * Minor set_rows optimization (#4) * updated optimization, fixed errors * non vectorized version now dispatches one thread per element * Simplify * Change logic for set_rows pipelines --------- Co-authored-by: Neha Abbas <[email protected]> Co-authored-by: Neha Abbas <[email protected]> Co-authored-by: Reese Levine <[email protected]> * Comment on dawn toggles * Working subgroup matrix code for (semi)generic sizes * Remove some comments * Cleanup code * Update dawn version and move to portable subgroup size * Try to fix new dawn release * Update subgroup size comment * Only check for subgroup matrix configs if they are supported * Add toggles for subgroup matrix/f16 support on nvidia+vulkan * Make row/col naming consistent * Refactor shared memory loading * Move sg matrix stores to correct file * Working q4_0 * Formatting * Work with emscripten builds * Fix test-backend-ops emscripten for f16/quantized types * Use emscripten memory64 to support get_memory * Add build flags and try ci --------- Co-authored-by: Xuan Son Nguyen <[email protected]> * Remove extra whitespace * Move wasm single-thread logic out of test-backend-ops for cpu backend * Disable multiple threads for emscripten single-thread builds in ggml_graph_plan * Fix .gitignore * Add memory64 option and remove unneeded macros for setting threads to 1 --------- Co-authored-by: Xuan Son Nguyen <[email protected]>
1 parent b3e3060 commit 7ca5991

File tree

12 files changed

+355
-139
lines changed

12 files changed

+355
-139
lines changed

.github/workflows/build.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,46 @@ jobs:
547547
# This is using llvmpipe and runs slower than other backends
548548
ctest -L main --verbose --timeout 3600
549549
550+
ubuntu-24-wasm-webgpu:
551+
runs-on: ubuntu-24.04
552+
553+
steps:
554+
- name: Clone
555+
id: checkout
556+
uses: actions/checkout@v4
557+
558+
- name: ccache
559+
uses: ggml-org/[email protected]
560+
with:
561+
key: ubuntu-latest-wasm-webgpu
562+
evict-old-files: 1d
563+
564+
- name: Install Emscripten
565+
run: |
566+
git clone https://github.com/emscripten-core/emsdk.git
567+
cd emsdk
568+
./emsdk install latest
569+
./emsdk activate latest
570+
571+
- name: Fetch emdawnwebgpu
572+
run: |
573+
DAWN_TAG="v20251027.212519"
574+
EMDAWN_PKG="emdawnwebgpu_pkg-${DAWN_TAG}.zip"
575+
echo "Downloading ${EMDAWN_PKG}"
576+
curl -L -o emdawn.zip \
577+
"https://github.com/google/dawn/releases/download/${DAWN_TAG}/${EMDAWN_PKG}"
578+
unzip emdawn.zip
579+
580+
- name: Build WASM WebGPU
581+
run: |
582+
source emsdk/emsdk_env.sh
583+
emcmake cmake -B build-wasm \
584+
-DGGML_WEBGPU=ON \
585+
-DLLAMA_CURL=OFF \
586+
-DEMDAWNWEBGPU_DIR=emdawnwebgpu_pkg
587+
588+
cmake --build build-wasm --target test-backend-ops -j $(nproc)
589+
550590
ubuntu-22-cmake-hip:
551591
runs-on: ubuntu-22.04
552592
container: rocm/dev-ubuntu-22.04:6.1.2

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,5 @@ poetry.toml
134134
# IDE
135135
/*.code-workspace
136136
/.windsurf/
137+
# emscripten
138+
a.out.*

CMakeLists.txt

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,24 @@ endif()
3333

3434
option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
3535

36+
option(LLAMA_WASM_MEM64 "llama: use 64-bit memory in WASM builds" ON)
37+
3638
if (EMSCRIPTEN)
3739
set(BUILD_SHARED_LIBS_DEFAULT OFF)
3840

39-
option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
41+
# Use 64-bit memory to support backend_get_memory queries
42+
# TODO: analyze performance impact, see https://spidermonkey.dev/blog/2025/01/15/is-memory64-actually-worth-using
43+
if (LLAMA_WASM_MEM64)
44+
add_compile_options("-sMEMORY64=1")
45+
add_link_options("-sMEMORY64=1")
46+
endif()
47+
add_link_options("-sALLOW_MEMORY_GROWTH=1")
48+
49+
option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" OFF)
50+
option(LLAMA_BUILD_HTML "llama: build HTML file" ON)
51+
if (LLAMA_BUILD_HTML)
52+
set(CMAKE_EXECUTABLE_SUFFIX ".html")
53+
endif()
4054
else()
4155
if (MINGW)
4256
set(BUILD_SHARED_LIBS_DEFAULT OFF)

common/arg.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <thread> // for hardware_concurrency
3131
#include <vector>
3232

33+
#ifndef __EMSCRIPTEN__
3334
#ifdef __linux__
3435
#include <linux/limits.h>
3536
#elif defined(_WIN32)
@@ -41,6 +42,8 @@
4142
#else
4243
#include <sys/syslimits.h>
4344
#endif
45+
#endif
46+
4447
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
4548

4649
using json = nlohmann::ordered_json;

common/common.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,8 @@ std::string fs_get_cache_directory() {
902902
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
903903
#elif defined(_WIN32)
904904
cache_directory = std::getenv("LOCALAPPDATA");
905+
#elif defined(__EMSCRIPTEN__)
906+
GGML_ABORT("not implemented on this platform");
905907
#else
906908
# error Unknown architecture
907909
#endif

common/download.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "http.h"
2525
#endif
2626

27+
#ifndef __EMSCRIPTEN__
2728
#ifdef __linux__
2829
#include <linux/limits.h>
2930
#elif defined(_WIN32)
@@ -35,6 +36,8 @@
3536
#else
3637
#include <sys/syslimits.h>
3738
#endif
39+
#endif
40+
3841
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
3942

4043
// isatty

ggml/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ option(GGML_WEBGPU "ggml: use WebGPU"
226226
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
227227
option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF)
228228
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
229-
229+
option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON)
230230
option(GGML_ZDNN "ggml: use zDNN" OFF)
231231
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
232232
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2698,6 +2698,11 @@ struct ggml_cplan ggml_graph_plan(
26982698
n_threads = threadpool ? threadpool->n_threads_max : GGML_DEFAULT_N_THREADS;
26992699
}
27002700

2701+
#if defined(__EMSCRIPTEN__) && !defined(__EMSCRIPTEN_PTHREADS__)
2702+
// Emscripten without pthreads support can only use a single thread
2703+
n_threads = 1;
2704+
#endif
2705+
27012706
size_t work_size = 0;
27022707

27032708
struct ggml_cplan cplan;

ggml/src/ggml-webgpu/CMakeLists.txt

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,33 @@ add_dependencies(ggml-webgpu generate_shaders)
3939
if(EMSCRIPTEN)
4040
set(EMDAWNWEBGPU_DIR "" CACHE PATH "Path to emdawnwebgpu_pkg")
4141

42-
target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
43-
target_link_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
42+
if(NOT EMDAWNWEBGPU_DIR)
43+
# default built-in port
44+
target_compile_options(ggml-webgpu PRIVATE "--use-port=emdawnwebgpu")
45+
target_link_options(ggml-webgpu INTERFACE "--use-port=emdawnwebgpu")
46+
else()
47+
# custom port
48+
target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
49+
target_link_options(ggml-webgpu INTERFACE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
50+
endif()
51+
52+
if (GGML_WEBGPU_JSPI)
53+
target_compile_options(ggml-webgpu PRIVATE "-fwasm-exceptions")
54+
target_link_options(ggml-webgpu INTERFACE "-sJSPI" "-fwasm-exceptions")
55+
else()
56+
target_compile_options(ggml-webgpu PRIVATE "-fexceptions")
57+
target_link_options(ggml-webgpu INTERFACE "-sASYNCIFY" "-exceptions")
58+
endif()
4459
else()
4560
find_package(Dawn REQUIRED)
4661
set(DawnWebGPU_TARGET dawn::webgpu_dawn)
4762
endif()
4863

4964
if (GGML_WEBGPU_DEBUG)
5065
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
66+
if(EMSCRIPTEN)
67+
target_link_options(ggml-webgpu INTERFACE "-sASSERTIONS=2")
68+
endif()
5169
endif()
5270

5371
if (GGML_WEBGPU_CPU_PROFILE)

0 commit comments

Comments
 (0)