diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ed0c7ac1..92714e86 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -78,7 +78,21 @@ jobs: pip install nanobind -i https://mirrors.huaweicloud.com/repository/pypi/simple bash compile_shared.sh apply_patch=true - - name: Run tests on ascend + - name: Build and install tilelang-dlc + run: | + set -ex + source /home/dlc_ci/.bashrc + conda activate dlcompiler + source /usr/local/Ascend/ascend-toolkit/set_env.sh + cd ${{ env.CI_PATH }} + export TILELANG_DLC_PATH=${{ vars.CI_BASE_PATH }}/data/tilelang-dlc + export DLCOMPILER_SOURCE=${{ env.CI_PATH }} + export TILELANG_USE_DLCOMPILER=1 + echo "whoami? $(whoami)" + echo "which python? $(which python)" + bash scripts/install_tilelang-dlc.sh + + - name: Run triton tests on ascend run: | set -ex source /home/dlc_ci/.bashrc diff --git a/backend/commonir/adapter.py b/backend/commonir/adapter.py index c142ad6a..858cf682 100644 --- a/backend/commonir/adapter.py +++ b/backend/commonir/adapter.py @@ -141,7 +141,7 @@ def lower( func = func_or_mod params = extrac_params(func) if not runtime_only else None mod = tvm.IRModule({func.attrs["global_symbol"]: func}) - target = "commonir" + target = "dlcompiler" target_host = canon_target_host(target, target_host) target_host = tvm.target.Target.canon_target(target_host) target = tvm.target.Target(target, target_host) diff --git a/commonir/src/target/rt_mod_commonir.cc b/commonir/src/target/rt_mod_commonir.cc index e6cc67cd..cf81e516 100644 --- a/commonir/src/target/rt_mod_commonir.cc +++ b/commonir/src/target/rt_mod_commonir.cc @@ -35,7 +35,7 @@ TVM_FFI_STATIC_INIT_BLOCK() { BuildTileLangCommonIR); } -TVM_REGISTER_TARGET_KIND("commonir", kDLExtDev); +TVM_REGISTER_TARGET_KIND("dlcompiler", kDLExtDev); } // namespace codegen } // namespace tvm \ No newline at end of file diff --git a/scripts/install_tilelang-dlc.sh b/scripts/install_tilelang-dlc.sh new file mode 100755 index 00000000..06d344b1 --- /dev/null +++ b/scripts/install_tilelang-dlc.sh @@ -0,0 +1,146 @@ +#!/bin/bash + +# Add command line option parsing +USE_LLVM=false +DEBUG_MODE=false +REINSTALL=false +while [[ $# -gt 0 ]]; do + case $1 in + --debug|-d) + DEBUG_MODE=true + shift + ;; + --reinstall|-r) + REINSTALL=true + shift + ;; + --enable-llvm) + USE_LLVM=true + shift + ;; + *) + echo "Unknown option: $1" + echo "Usage: $0 [--enable-llvm]" + exit 1 + ;; + esac +done + +echo "Starting installation script..." + +if [ -z "${TILELANG_DLC_PATH}" ]; then + echo "Error: 环境变量 TILELANG_DLC_PATH 未设置" >&2 + exit 1 +fi + +cd "${TILELANG_DLC_PATH}" || { + echo "Error: Can not cd to dir: '${TILELANG_DLC_PATH}'" >&2 + exit 1 +} + +echo "pwd: $(pwd)" + +# Step 1: Install Python requirements +echo "Installing Python requirements from requirements.txt..." +pip install -r requirements-dev.txt -i https://mirrors.huaweicloud.com/repository/pypi/simple +pip install -r requirements.txt -i https://mirrors.huaweicloud.com/repository/pypi/simple +if [ $? -ne 0 ]; then + echo "Error: Failed to install Python requirements." + exit 1 +else + echo "Python requirements installed successfully." +fi + + +# # Step 9: Clone and build TVM +# echo "Cloning TVM repository and initializing submodules..." +# # clone and build tvm +# git submodule update --init --recursive + +if [ "$REINSTALL" != "true" ]; then + if [ -d build ]; then + rm -rf build + fi +fi + +mkdir -p build +# cp 3rdparty/tvm/cmake/config.cmake build +cd build + +# echo "set(USE_COMMONIR ON)" >> config.cmake + +# Define common CMake parameters as array +LLVM_CMAKE_PARAMS=( + "-DLLVM_BUILD_EXAMPLES=ON" + "-DLLVM_TARGETS_TO_BUILD=X86;NVPTX;AMDGPU" + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_ENABLE_ASSERTIONS=ON" + "-DLLVM_INSTALL_UTILS=ON" +) + +echo "Running CMake for TileLang..." +declare -a cmake_params +if $DEBUG_MODE; then + # Debug mode with additional flags + cmake_params+=("-DCMAKE_BUILD_TYPE=Debug") + cmake_params+=("-DCMAKE_CXX_FLAGS=-g3 -fno-omit-frame-pointer") + cmake_params+=("-DCMAKE_C_FLAGS=-g3 -fno-omit-frame-pointer -fno-optimize-sibling-calls") +else + # Release mode + : +fi + +# Run CMake with proper array expansion +cmake .. "${cmake_params[@]}" + +if [ $? -ne 0 ]; then + echo "Error: CMake configuration failed." + exit 1 +fi + +if [ $? -ne 0 ]; then + echo "Error: CMake configuration failed." + exit 1 +fi + +echo "Building TileLang with make..." + +# Calculate 75% of available CPU cores +# Other wise, make will use all available cores +# and it may cause the system to be unresponsive +CORES=$(nproc) +MAKE_JOBS=$(( CORES * 75 / 100 )) +make -j${MAKE_JOBS} + +if [ $? -ne 0 ]; then + echo "Error: TileLang build failed." + exit 1 +else + echo "TileLang build completed successfully." +fi + +cd .. + +# Step 11: Set environment variables +TILELANG_PATH="$(pwd)" +if [ "$REINSTALL" != "true" ]; then + if ! grep -q "# TileLang PYTHONPATH" ~/.bashrc; then + echo "Configuring environment variables for TVM..." + echo "export PYTHONPATH=${TILELANG_PATH}:\$PYTHONPATH # TileLang PYTHONPATH" >> ~/.bashrc + echo "TileLang environment variables added to ~/.bashrc" + else + echo "TileLang environment variables already configured in ~/.bashrc" + fi +fi + +# Step 12: Source .bashrc to apply changes +echo "Applying environment changes by sourcing .bashrc..." +source ~/.bashrc +if [ $? -ne 0 ]; then + echo "Error: Failed to source .bashrc." + exit 1 +else + echo "Environment configured successfully." +fi + +echo "Installation script completed successfully." diff --git a/test/commonir/add_vector.py b/test/commonir/add_vector.py deleted file mode 100644 index 03c50074..00000000 --- a/test/commonir/add_vector.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) Tile-AI Corporation. -# Licensed under the MIT License. -import os - -import tilelang -import tilelang.language as T - -import torch - -# tilelang.cache.clear_cache() - -dtype = "float32" -seq_len = 1024 - - -def vec_add(N, block_N, dtype="float32"): - n_num = N // block_N - - @T.prim_func - def main( - A: T.Tensor((N), dtype), - B: T.Tensor((N), dtype), - C: T.Tensor((N), dtype), - ): - with T.Kernel(n_num, 1) as (by, bx): - start_y1 = by * block_N - start_y = start_y1 + bx - for local_y in T.Parallel(block_N): - y = start_y + local_y - C[y] = A[y] + B[y] - - return main - - -def test_vec_add(): - func = vec_add(seq_len, seq_len // 4) - compiled_kernel = tilelang.compile(func, target="commonir") - - v1 = torch.randn(size=[seq_len], dtype=eval("torch." + dtype)).npu() - v2 = torch.randn(size=[seq_len], dtype=eval("torch." + dtype)).npu() - v3 = torch.zeros(size=[seq_len], dtype=eval("torch." + dtype)).npu() - - y_ref = v1 + v2 - compiled_kernel(v1, v2, v3) - - # print(y_ref) - # print(v3) - - print( - f"The maximum difference between torch and Tilellang is " - f"{torch.max(torch.abs(y_ref - v3))}" - ) - - if torch.allclose(v3, y_ref, atol=1e-2, rtol=0): - print("✅ Tilellang and Torch match") - else: - print("❌ Tilellang and Torch differ") - diff = torch.abs(v3 - y_ref) - print(f"Max diff: {diff.max().item()}") - print(f"Mean diff: {diff.mean().item()}") - - -if __name__ == "__main__": - test_vec_add() diff --git a/test/commonir/gemm.py b/test/commonir/ascend/test_gemm.py similarity index 59% rename from test/commonir/gemm.py rename to test/commonir/ascend/test_gemm.py index e6b9d19d..b1ffabfc 100644 --- a/test/commonir/gemm.py +++ b/test/commonir/ascend/test_gemm.py @@ -9,7 +9,6 @@ device = torch.npu.current_device() dtype = torch.float16 -# tilelang.cache.clear_cache() def matmul(M, N, K, block_M, block_N, block_K, dtype="float16", accum_dtype="float"): @@ -39,9 +38,9 @@ def gemm( return gemm -def main(): +def test_gemm(): func = matmul(1024, 1024, 1024, 128, 128, 32) - kernel = tilelang.compile(func, target="commonir") + kernel = tilelang.compile(func) SIZEALL = 1024 torch.manual_seed(0) @@ -53,28 +52,11 @@ def main(): golden = a @ b mask = golden.abs() < 1.0 tmpatol = tmprtol = 2**-6 - # try: - # torch.testing.assert_close(result[mask], golden[mask], atol=tmpatol, rtol=0) - # torch.testing.assert_close(result[~mask], golden[~mask], atol=0, rtol=tmprtol) - # print("run matmul success") - # except: - # print(f"[ERROR] 存在精度问题") - # # max diff - # max_diff = torch.max(torch.abs(result - golden)) - # print(f"[ERROR] max diff: {max_diff}") - # # max diff index - # max_diff_index = torch.argmax(torch.abs(result - golden)) - # print(f"[ERROR] max diff index: {max_diff_index}") - # print(f"[ERROR] result: {result}") - print(f"result is {result}, golden is {golden}") - if torch.allclose(result, golden, atol=1e-2, rtol=1e-2): - print("✅ Tilellang and Torch match") - else: - print("❌ Tilellang and Torch differ") - diff = torch.abs(result - golden) - print(f"Max diff: {diff.max().item()}") - print(f"Mean diff: {diff.mean().item()}") + + torch.testing.assert_close(result[mask], golden[mask], atol=tmpatol, rtol=0) + torch.testing.assert_close(result[~mask], golden[~mask], atol=0, rtol=tmprtol) + print("run matmul success") if __name__ == "__main__": - main() + test_gemm() diff --git a/test/commonir/ascend/vector_add.py b/test/commonir/ascend/test_vector_add.py similarity index 97% rename from test/commonir/ascend/vector_add.py rename to test/commonir/ascend/test_vector_add.py index 5157b618..58a7a6e0 100644 --- a/test/commonir/ascend/vector_add.py +++ b/test/commonir/ascend/test_vector_add.py @@ -1,6 +1,5 @@ import os import time - import tilelang import tilelang.language as T @@ -82,7 +81,7 @@ def test_tilelang_add(): # 编译 TileLang kernel # func = vec_add(seq_len, seq_len // 32) # 使用更合适的块大小 1M, 32K func = vec_add(seq_len, seq_len // block) - compiled_kernel = tilelang.compile(func, target="commonir") + compiled_kernel = tilelang.compile(func) # 执行 TileLang kernel compiled_kernel(v1, v2, v3) @@ -94,8 +93,6 @@ def test_tilelang_add(): torch.testing.assert_close(v3, y_ref, atol=1e-2, rtol=0) print("TileLang test passed!\n") - return v1, v2, v3, y_ref - def test_triton_add(): """测试 Triton 实现""" @@ -119,8 +116,6 @@ def test_triton_add(): torch.testing.assert_close(v3, y_ref, atol=1e-2, rtol=0) print("Triton test passed!\n") - return v1, v2, v3, y_ref - def benchmark_function(func, *args, num_runs=100, warmup_runs=10): """性能测试函数""" @@ -233,8 +228,8 @@ def main(): # 运行功能测试 print("FUNCTIONALITY TESTS") print("-" * 20) - tilelang_data = test_tilelang_add() - triton_data = test_triton_add() + test_tilelang_add() + test_triton_add() # 运行性能测试 run_performance_tests() diff --git a/test/commonir/ascend/vector_sub.py b/test/commonir/ascend/test_vector_sub.py similarity index 97% rename from test/commonir/ascend/vector_sub.py rename to test/commonir/ascend/test_vector_sub.py index 742f9a0e..af3e8af5 100644 --- a/test/commonir/ascend/vector_sub.py +++ b/test/commonir/ascend/test_vector_sub.py @@ -81,7 +81,7 @@ def test_tilelang_sub(): # 编译 TileLang kernel func = vec_sub(seq_len, seq_len // block) - compiled_kernel = tilelang.compile(func, target="commonir") + compiled_kernel = tilelang.compile(func) # 执行 TileLang kernel compiled_kernel(v1, v2, v3) @@ -93,8 +93,6 @@ def test_tilelang_sub(): torch.testing.assert_close(v3, y_ref, atol=1e-2, rtol=0) print("TileLang test passed!\n") - return v1, v2, v3, y_ref - def test_triton_sub(): """测试 Triton 实现""" @@ -118,8 +116,6 @@ def test_triton_sub(): torch.testing.assert_close(v3, y_ref, atol=1e-2, rtol=0) print("Triton test passed!\n") - return v1, v2, v3, y_ref - def benchmark_function(func, *args, num_runs=100, warmup_runs=10): """性能测试函数""" @@ -232,8 +228,8 @@ def main(): # 运行功能测试 print("FUNCTIONALITY TESTS") print("-" * 20) - tilelang_data = test_tilelang_sub() - triton_data = test_triton_sub() + test_tilelang_sub() + test_triton_sub() # 运行性能测试 run_performance_tests() diff --git a/test/commonir/run_tests.sh b/test/commonir/run_tests.sh new file mode 100644 index 00000000..5222a9f1 --- /dev/null +++ b/test/commonir/run_tests.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +set -ex +script=$(readlink -f "$0") +script_dir=$(dirname "$script") + +function run_pytestcases() { + if [ -d ${HOME}/.triton/dump ]; then + rm -rf ${HOME}/.triton/dump + fi + if [ -d ${HOME}/.triton/cache ]; then + rm -rf ${HOME}/.triton/cache + fi + + cd ${script_dir} + TARGET_DIR="$1" + cd ${TARGET_DIR} + pytest -n 16 --dist=load . || { exit 1 ; } + +} + +pytestcase_dir=("ascend") +for test_dir in "${pytestcase_dir[@]}"; do + echo "run pytestcase in ${test_dir}" + run_pytestcases ${test_dir} +done + + +