diff --git a/packages/adsim/install_adsim.sh b/packages/adsim/install_adsim.sh index 5d54d196c..352c18e83 100755 --- a/packages/adsim/install_adsim.sh +++ b/packages/adsim/install_adsim.sh @@ -103,7 +103,7 @@ post_build() { # Create library directory and copy all shared libraries mkdir -p "${BENCHMARKS_DIR}/lib/" cp ${BUILD_DIR}/staging/lib/*.so* "${BENCHMARKS_DIR}/lib/" - cp ${BUILD_DIR}/staging/lib64/*.so* "${BENCHMARKS_DIR}/lib/" + cp ${BUILD_DIR}/staging/lib64/*.so* "${BENCHMARKS_DIR}/lib/" || true # Copy runtime configurations, Python scripts, and QPS search tool cp -R "${BENCHPRESS_ROOT}/packages/adsim/configs" "${BENCHMARKS_DIR}" diff --git a/packages/adsim/install_fbgemm.sh b/packages/adsim/install_fbgemm.sh index 9fbab440b..3f354bab2 100755 --- a/packages/adsim/install_fbgemm.sh +++ b/packages/adsim/install_fbgemm.sh @@ -16,10 +16,10 @@ MINICONDA_PREFIX=${FBGEMM_STAGING_DIR}/miniconda # Version of FBGEMM to install -FBGEMM_VERSION=v1.2.0 +FBGEMM_VERSION=v1.4.0 # Version of PyTorch to install -PYTORCH_VERSION=2.7.0 +PYTORCH_VERSION=2.8.0 MINICONDA_VERSION="5.1-0" diff --git a/packages/adsim/patches/treadmill.patch b/packages/adsim/patches/treadmill.patch index 0d9f50961..fafb0e1f7 100644 --- a/packages/adsim/patches/treadmill.patch +++ b/packages/adsim/patches/treadmill.patch @@ -1,7 +1,7 @@ diff -wbBdu -ruN '--exclude=.git' '--exclude=*.rej' '--exclude=*.orig' '--exclude=gen-cpp2' '--exclude=build' '--exclude=third_party' treadmill-src/build.sh treadmill/build.sh --- treadmill-src/build.sh 1969-12-31 16:00:00.000000000 -0800 +++ treadmill/build.sh 2025-08-04 15:44:21.731476550 -0700 -@@ -0,0 +1,42 @@ +@@ -0,0 +1,47 @@ +#!/bin/bash + +# Exit on error @@ -15,7 +15,12 @@ diff -wbBdu -ruN '--exclude=.git' '--exclude=*.rej' '--exclude=*.orig' '--exclud +INSTALL=false + + -+sudo dnf install -y numactl-devel ++if command -v dnf >/dev/null 2>&1; then ++ sudo dnf install -y numactl-devel ++elif command -v apt-get >/dev/null 2>&1; then ++ sudo apt-get update ++ sudo apt-get install -y libnuma-dev numactl ++fi +# Create build directory +mkdir -p build +cd build @@ -6972,7 +6977,7 @@ diff -wbBdu -ruN '--exclude=.git' '--exclude=*.rej' '--exclude=*.orig' '--exclud diff -wbBdu -ruN '--exclude=.git' '--exclude=*.rej' '--exclude=*.orig' '--exclude=gen-cpp2' '--exclude=build' '--exclude=third_party' treadmill-src/src/Scheduler.cpp treadmill/src/Scheduler.cpp --- treadmill-src/src/Scheduler.cpp 1969-12-31 16:00:00.000000000 -0800 +++ treadmill/src/Scheduler.cpp 2025-08-04 15:44:21.785297704 -0700 -@@ -0,0 +1,107 @@ +@@ -0,0 +1,113 @@ +/* + * Copyright (c) 2014, Facebook, Inc. + * All rights reserved. @@ -7034,8 +7039,14 @@ diff -wbBdu -ruN '--exclude=.git' '--exclude=*.rej' '--exclude=*.orig' '--exclud + to avoid memory order violation, which greatly improves its performance. + http://siyobik.info.gf/main/reference/instruction/PAUSE */ + for (auto start = nowNs(); nowNs() - start < ns;) { -+ asm volatile("pause"); -+ } ++#if defined(__x86_64__) || defined(__i386__) ++ asm volatile("pause"); ++#elif defined(__aarch64__) || defined(__arm64__) ++ asm volatile("yield" ::: "memory"); ++#else ++ asm volatile("" ::: "memory"); ++#endif ++} +} + +/** diff --git a/packages/adsim/src/cpp2/server/CMakeLists.txt b/packages/adsim/src/cpp2/server/CMakeLists.txt index 621aecc0a..c9fa8f675 100644 --- a/packages/adsim/src/cpp2/server/CMakeLists.txt +++ b/packages/adsim/src/cpp2/server/CMakeLists.txt @@ -9,8 +9,14 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64") endif() target_compile_definitions(folly_memcpy_obj PRIVATE FOLLY_MEMCPY_IS_MEMCPY) -target_compile_options(folly_memcpy_obj PRIVATE +if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64") + target_compile_options(folly_memcpy_obj PRIVATE -mavx2 -march=haswell) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64") + target_compile_options(folly_memcpy_obj PRIVATE + -march=armv8.5-a+sve2 + -mcpu=native) +endif() # Data objects library add_library(data_objects DataObjects.cpp) diff --git a/packages/adsim/src/cpp2/server/dwarfs/CMakeLists.txt b/packages/adsim/src/cpp2/server/dwarfs/CMakeLists.txt index 5a7c722f4..ec6be90ca 100644 --- a/packages/adsim/src/cpp2/server/dwarfs/CMakeLists.txt +++ b/packages/adsim/src/cpp2/server/dwarfs/CMakeLists.txt @@ -20,12 +20,19 @@ add_library(gemm GEMM.cc GEMM.h ${FBGEMM_SRC_DIR}/test/QuantizationHelpers.cc ) -target_compile_options(gemm PRIVATE - ${COROUTINES_FLAG} - -m64 - -mavx2 - -mfma - -masm=intel) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64") + target_compile_options(gemm PRIVATE + ${COROUTINES_FLAG} + -m64 + -mavx2 + -mfma + -masm=intel) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64") + target_compile_options(gemm PRIVATE + ${COROUTINES_FLAG} + -march=armv8.5-a+sve2 + -mcpu=native) +endif() target_link_directories(gemm PUBLIC ${ADSIM_STAGING_DIR}/include @@ -40,12 +47,19 @@ add_dependencies(gemm fbgemm) add_library(embedding Embedding.cc Embedding.h) -target_compile_options(embedding PRIVATE - ${COROUTINES_FLAG} - -m64 - -mavx2 - -mfma - -masm=intel) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64") + target_compile_options(gemm PRIVATE + ${COROUTINES_FLAG} + -m64 + -mavx2 + -mfma + -masm=intel) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64") + target_compile_options(gemm PRIVATE + ${COROUTINES_FLAG} + -march=armv8.5-a+sve2 + -mcpu=native) +endif() target_link_directories(embedding PUBLIC ${ADSIM_STAGING_DIR}/include