ggml-cpu: extend RVV quantization vec dot to higher VLENs #204
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| workflow_dispatch: # allows manual triggering | |
| push: | |
| branches: | |
| - master | |
| paths: [ | |
| '.github/workflows/build.yml', | |
| '.github/workflows/build-cmake-pkg.yml', | |
| '**/CMakeLists.txt', | |
| '**/.cmake', | |
| '**/*.h', | |
| '**/*.hpp', | |
| '**/*.c', | |
| '**/*.cpp', | |
| '**/*.cu', | |
| '**/*.cuh', | |
| '**/*.swift', | |
| '**/*.m', | |
| '**/*.metal', | |
| '**/*.comp', | |
| '**/*.glsl', | |
| '**/*.wgsl' | |
| ] | |
| pull_request: | |
| types: [opened, synchronize, reopened] | |
| paths: [ | |
| '.github/workflows/build.yml', | |
| '.github/workflows/build-cmake-pkg.yml', | |
| '**/CMakeLists.txt', | |
| '**/.cmake', | |
| '**/*.h', | |
| '**/*.hpp', | |
| '**/*.c', | |
| '**/*.cpp', | |
| '**/*.cu', | |
| '**/*.cuh', | |
| '**/*.swift', | |
| '**/*.m', | |
| '**/*.metal', | |
| '**/*.comp', | |
| '**/*.glsl', | |
| '**/*.wgsl' | |
| ] | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
| cancel-in-progress: true | |
| env: | |
| GGML_NLOOP: 3 | |
| GGML_N_THREADS: 1 | |
| LLAMA_LOG_COLORS: 1 | |
| LLAMA_LOG_PREFIX: 1 | |
| LLAMA_LOG_TIMESTAMPS: 1 | |
| jobs: | |
| build-cmake-pkg: | |
| uses: ./.github/workflows/build-cmake-pkg.yml | |
| ubuntu-cpu: | |
| strategy: | |
| matrix: | |
| include: | |
| - build: 'x64' | |
| os: ubuntu-22.04 | |
| - build: 'arm64' | |
| os: ubuntu-24.04-arm | |
| runs-on: ${{ matrix.os }} | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v6 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.21 | |
| with: | |
| key: ubuntu-cpu-${{ matrix.build }} | |
| evict-old-files: 1d | |
| save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} | |
| - name: Build Dependencies | |
| id: build_depends | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y --no-install-recommends \ | |
| python3 python3-pip python3-dev python3-wheel \ | |
| libjpeg-dev build-essential libssl-dev \ | |
| git-lfs | |
| - name: Toolchain workaround (GCC 14) | |
| if: ${{ contains(matrix.os, 'ubuntu-24.04') }} | |
| run: | | |
| sudo apt-get install -y gcc-14 g++-14 | |
| echo "CC=gcc-14" >> "$GITHUB_ENV" | |
| echo "CXX=g++-14" >> "$GITHUB_ENV" | |
| - name: Python Dependencies | |
| id: python_depends | |
| run: | | |
| export PIP_BREAK_SYSTEM_PACKAGES="1" | |
| python3 -m pip install --upgrade pip setuptools | |
| pip3 install ./gguf-py | |
| - name: Build | |
| id: cmake_build | |
| run: | | |
| cmake -B build \ | |
| -DLLAMA_FATAL_WARNINGS=ON \ | |
| -DGGML_RPC=ON | |
| time cmake --build build --config Release -j $(nproc) | |
| - name: Test | |
| id: cmake_test | |
| run: | | |
| cd build | |
| ctest -L main --verbose --timeout 900 | |
| - name: Test llama2c conversion | |
| id: llama2c_test | |
| run: | | |
| cd build | |
| echo "Fetch tokenizer" | |
| wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin | |
| echo "Fetch llama2c model" | |
| wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin | |
| ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf | |
| ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 | |
| ubuntu-24-vulkan: | |
| strategy: | |
| matrix: | |
| include: | |
| - build: 'x64' | |
| os: ubuntu-24.04 | |
| - build: 'arm64' | |
| os: ubuntu-24.04-arm | |
| runs-on: ${{ matrix.os }} | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v6 | |
| - name: Dependencies | |
| id: depends | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y gcc-14 g++-14 build-essential glslc libvulkan-dev spirv-headers libssl-dev ninja-build | |
| echo "CC=gcc-14" >> "$GITHUB_ENV" | |
| echo "CXX=g++-14" >> "$GITHUB_ENV" | |
| - name: Configure | |
| id: cmake_configure | |
| run: | | |
| cmake -B build \ | |
| -G "Ninja" \ | |
| -DCMAKE_BUILD_TYPE=RelWithDebInfo \ | |
| -DGGML_BACKEND_DL=ON \ | |
| -DGGML_CPU_ALL_VARIANTS=ON \ | |
| -DGGML_VULKAN=ON | |
| - name: Build | |
| id: cmake_build | |
| run: | | |
| time cmake --build build -j $(nproc) | |
| windows-latest: | |
| runs-on: windows-2025 | |
| env: | |
| OPENBLAS_VERSION: 0.3.23 | |
| SDE_VERSION: 9.33.0-2024-01-07 | |
| VULKAN_VERSION: 1.4.313.2 | |
| strategy: | |
| matrix: | |
| include: | |
| - build: 'cpu-x64 (static)' | |
| arch: 'x64' | |
| defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF' | |
| - build: 'openblas-x64' | |
| arch: 'x64' | |
| defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' | |
| - build: 'vulkan-x64' | |
| arch: 'x64' | |
| defines: '-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON' | |
| - build: 'llvm-arm64' | |
| arch: 'arm64' | |
| defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON' | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v6 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.21 | |
| with: | |
| key: windows-latest-${{ matrix.build }} | |
| variant: ccache | |
| evict-old-files: 1d | |
| save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} | |
| - name: Download OpenBLAS | |
| id: get_openblas | |
| if: ${{ matrix.build == 'openblas-x64' }} | |
| run: | | |
| curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip" | |
| curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE" | |
| mkdir $env:RUNNER_TEMP/openblas | |
| tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas | |
| $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) | |
| $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) | |
| $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe') | |
| & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll | |
| - name: Install Vulkan SDK | |
| id: get_vulkan | |
| if: ${{ matrix.build == 'vulkan-x64' }} | |
| run: | | |
| curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe" | |
| & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install | |
| Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" | |
| Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" | |
| - name: Install Ninja | |
| id: install_ninja | |
| run: | | |
| choco install ninja | |
| - name: Build | |
| id: cmake_build | |
| run: | | |
| cmake -S . -B build ${{ matrix.defines }} ` | |
| -DLLAMA_BUILD_BORINGSSL=ON | |
| cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} | |
| - name: Add libopenblas.dll | |
| id: add_libopenblas_dll | |
| if: ${{ matrix.build == 'openblas-x64' }} | |
| run: | | |
| cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll | |
| cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt | |
| - name: Test | |
| id: cmake_test | |
| if: ${{ matrix.arch == 'x64' }} | |
| run: | | |
| cd build | |
| ctest -L main -C Release --verbose --timeout 900 | |
| # TODO: disabled for now, consider adding tests for all CPU variants instead | |
| # - name: Test (Intel SDE) | |
| # id: cmake_test_sde | |
| # if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation | |
| # run: | | |
| # curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz" | |
| # # for some weird reason windows tar doesn't like sde tar.xz | |
| # 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz | |
| # 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar | |
| # $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe) | |
| # cd build | |
| # $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1 | |
| # & $sde -future -- ctest -L main -C Release --verbose --timeout 900 | |
| ubuntu-latest-cuda: | |
| runs-on: ubuntu-latest | |
| container: nvidia/cuda:12.6.2-devel-ubuntu24.04 | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v6 | |
| - name: Install dependencies | |
| env: | |
| DEBIAN_FRONTEND: noninteractive | |
| run: | | |
| apt update | |
| apt install -y cmake build-essential ninja-build libgomp1 git libssl-dev | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.21 | |
| with: | |
| key: ubuntu-latest-cuda | |
| evict-old-files: 1d | |
| save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} | |
| - name: Build with CMake | |
| # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project | |
| run: | | |
| cmake -S . -B build -G Ninja \ | |
| -DLLAMA_FATAL_WARNINGS=ON \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DCMAKE_CUDA_ARCHITECTURES=89-real \ | |
| -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \ | |
| -DGGML_NATIVE=OFF \ | |
| -DGGML_CUDA=ON \ | |
| -DGGML_CUDA_CUB_3DOT2=ON | |
| cmake --build build | |
| windows-2022-cuda: | |
| runs-on: windows-2022 | |
| strategy: | |
| matrix: | |
| cuda: ['12.4'] | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v6 | |
| - name: Install ccache | |
| uses: ggml-org/ccache-action@v1.2.21 | |
| with: | |
| key: windows-cuda-${{ matrix.cuda }} | |
| variant: ccache | |
| evict-old-files: 1d | |
| save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} | |
| - name: Install Cuda Toolkit | |
| uses: ./.github/actions/windows-setup-cuda | |
| with: | |
| cuda_version: ${{ matrix.cuda }} | |
| - name: Install Ninja | |
| id: install_ninja | |
| run: | | |
| choco install ninja | |
| - name: Build | |
| id: cmake_build | |
| shell: cmd | |
| # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project | |
| run: | | |
| call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 | |
| cmake -S . -B build -G "Ninja Multi-Config" ^ | |
| -DLLAMA_BUILD_SERVER=ON ^ | |
| -DLLAMA_BUILD_BORINGSSL=ON ^ | |
| -DGGML_NATIVE=OFF ^ | |
| -DGGML_BACKEND_DL=ON ^ | |
| -DGGML_CPU_ALL_VARIANTS=ON ^ | |
| -DGGML_CUDA=ON ^ | |
| -DGGML_RPC=ON ^ | |
| -DGGML_CUDA_CUB_3DOT2=ON | |
| set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 | |
| cmake --build build --config Release -j %NINJA_JOBS% -t ggml | |
| cmake --build build --config Release | |
| # TODO: simplify the following workflows using a matrix | |
| # TODO: run lighter CI on PRs and the full CI only on master (if needed) | |
| # note: moved to build-self-hosted.yml - can remove from here when everything is stable | |
| # ggml-ci-x64-cpu-low-perf: | |
| # runs-on: ubuntu-22.04 | |
| # | |
| # steps: | |
| # - name: Clone | |
| # id: checkout | |
| # uses: actions/checkout@v6 | |
| # | |
| # - name: ccache | |
| # uses: ggml-org/ccache-action@v1.2.21 | |
| # with: | |
| # key: ggml-ci-x64-cpu-low-perf | |
| # evict-old-files: 1d | |
| # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} | |
| # | |
| # - name: Dependencies | |
| # id: depends | |
| # run: | | |
| # sudo apt-get update | |
| # sudo apt-get install build-essential | |
| # | |
| # - name: Test | |
| # id: ggml-ci | |
| # run: | | |
| # LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt | |
| # note: moved to build-self-hosted.yml - can remove from here when everything is stable | |
| # ggml-ci-arm64-cpu-low-perf: | |
| # runs-on: ubuntu-22.04-arm | |
| # | |
| # steps: | |
| # - name: Clone | |
| # id: checkout | |
| # uses: actions/checkout@v6 | |
| # | |
| # - name: ccache | |
| # uses: ggml-org/ccache-action@v1.2.21 | |
| # with: | |
| # key: ggml-ci-arm64-cpu-low-perf | |
| # evict-old-files: 1d | |
| # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} | |
| # | |
| # - name: Dependencies | |
| # id: depends | |
| # run: | | |
| # sudo apt-get update | |
| # sudo apt-get install build-essential | |
| # | |
| # - name: Test | |
| # id: ggml-ci | |
| # run: | | |
| # LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt | |
| # note: moved to build-self-hosted.yml - can remove from here when everything is stable | |
| # ggml-ci-x64-cpu-high-perf: | |
| # runs-on: ubuntu-22.04 | |
| # | |
| # steps: | |
| # - name: Clone | |
| # id: checkout | |
| # uses: actions/checkout@v6 | |
| # | |
| # - name: ccache | |
| # uses: ggml-org/ccache-action@v1.2.21 | |
| # with: | |
| # key: ggml-ci-x64-cpu-high-perf | |
| # evict-old-files: 1d | |
| # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} | |
| # | |
| # - name: Dependencies | |
| # id: depends | |
| # run: | | |
| # sudo apt-get update | |
| # sudo apt-get install build-essential | |
| # | |
| # - name: Test | |
| # id: ggml-ci | |
| # run: | | |
| # LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt | |
| # note: moved to build-self-hosted.yml - can remove from here when everything is stable | |
| # ggml-ci-arm64-cpu-high-perf: | |
| # runs-on: ubuntu-22.04-arm | |
| # | |
| # steps: | |
| # - name: Clone | |
| # id: checkout | |
| # uses: actions/checkout@v6 | |
| # | |
| # - name: ccache | |
| # uses: ggml-org/ccache-action@v1.2.21 | |
| # with: | |
| # key: ggml-ci-arm64-cpu-high-perf | |
| # evict-old-files: 1d | |
| # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} | |
| # | |
| # - name: Dependencies | |
| # id: depends | |
| # run: | | |
| # sudo apt-get update | |
| # sudo apt-get install build-essential | |
| # | |
| # - name: Test | |
| # id: ggml-ci | |
| # run: | | |
| # LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt | |
| ggml-ci-arm64-cpu-high-perf-sve: | |
| runs-on: ubuntu-22.04-arm | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v6 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.21 | |
| with: | |
| key: ggml-ci-arm64-cpu-high-perf-sve | |
| evict-old-files: 1d | |
| save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} | |
| - name: Dependencies | |
| id: depends | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install build-essential | |
| - name: Test | |
| id: ggml-ci | |
| run: | | |
| LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt | |
| ggml-ci-arm64-cpu-kleidiai: | |
| runs-on: ubuntu-22.04-arm | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v6 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.21 | |
| with: | |
| key: ggml-ci-arm64-cpu-kleidiai | |
| evict-old-files: 1d | |
| save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} | |
| - name: Dependencies | |
| id: depends | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential | |
| - name: Test | |
| id: ggml-ci | |
| run: | | |
| GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt |