diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 891aab51ccb..b8aa8549a24 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -81,6 +81,9 @@ jobs: outputs: ci-extra: ${{ steps.check.outputs.ci-extra }} steps: + - name: Checkout Arrow + if: github.event_name == 'pull_request' + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Check id: check env: @@ -99,7 +102,15 @@ jobs: if [ "${n_ci_extra_labels}" -eq 1 ]; then ci_extra=true else - ci_extra=false + git fetch origin ${GITHUB_BASE_REF} + if git diff --stat origin/${GITHUB_BASE_REF}.. | \ + grep \ + --fixed-strings ".github/workflows/cpp_extra.yml" \ + --quiet; then + ci_extra=true + else + ci_extra=false + fi fi ;; esac @@ -174,3 +185,88 @@ jobs: ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} continue-on-error: true run: archery docker push ${{ matrix.image }} + + jni-macos: + needs: check-labels + name: JNI macOS + runs-on: macos-14 + if: needs.check-labels.outputs.ci-extra == 'true' + timeout-minutes: 45 + env: + MACOSX_DEPLOYMENT_TARGET: "14.0" + steps: + - name: Checkout Arrow + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + submodules: recursive + - name: Install dependencies + run: | + brew bundle --file=cpp/Brewfile + # We want to link aws-sdk-cpp statically but Homebrew's + # aws-sdk-cpp provides only shared library. If we have + # Homebrew's aws-sdk-cpp, our build mix Homebrew's + # aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's + # aws-sdk-cpp to ensure using only bundled aws-sdk-cpp. + brew uninstall aws-sdk-cpp + # We want to use bundled RE2 for static linking. If + # Homebrew's RE2 is installed, its header file may be used. + # We uninstall Homebrew's RE2 to ensure using bundled RE2. + brew uninstall grpc || : # gRPC depends on RE2 + brew uninstall grpc@1.54 || : # gRPC 1.54 may be installed too + brew uninstall re2 + # We want to use bundled Protobuf for static linking. If + # Homebrew's Protobuf is installed, its library file may be + # used on test We uninstall Homebrew's Protobuf to ensure using + # bundled Protobuf. + brew uninstall protobuf + - name: Prepare ccache + run: | + echo "CCACHE_DIR=${PWD}/ccache" >> ${GITHUB_ENV} + - name: Cache ccache + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ccache + key: jni-macos-${{ hashFiles('cpp/**') }} + restore-keys: jni-macos- + - name: CMake + run: | + cmake \ + -S cpp \ + -B cpp.build \ + --preset=ninja-release-jni-macos \ + -DARROW_BUILD_TESTS=ON \ + -DCMAKE_INSTALL_PREFIX=$PWD/cpp.install + - name: Build + run: | + cmake --build cpp.build + - name: Install + run: | + cmake --install cpp.build + - name: Test + env: + ARROW_TEST_DATA: ${{ github.workspace }}/testing/data + PARQUET_TEST_DATA: ${{ github.workspace }}/cpp/submodules/parquet-testing/data + run: | + # MinIO is required + exclude_tests="arrow-s3fs-test" + # unstable + exclude_tests="${exclude_tests}|arrow-acero-asof-join-node-test" + exclude_tests="${exclude_tests}|arrow-acero-hash-join-node-test" + ctest \ + --exclude-regex "${exclude_tests}" \ + --label-regex unittest \ + --output-on-failure \ + --parallel "$(sysctl -n hw.ncpu)" \ + --test-dir "cpp.build" \ + --timeout 300 + - name: Build example + run: | + cmake \ + -S cpp/examples/minimal_build/ \ + -B cpp/examples/minimal_build.build \ + -GNinja \ + -DCMAKE_INSTALL_PREFIX=$PWD/cpp.install + cmake --build cpp/examples/minimal_build.build + cd cpp/examples/minimal_build + ../minimal_build.build/arrow-example diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 66550f128fe..8bbaffc92e6 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -582,6 +582,34 @@ ], "displayName": "Benchmarking build with everything enabled", "cacheVariables": {} + }, + { + "name": "ninja-release-jni-macos", + "inherits": [ + "base-release" + ], + "displayName": "Build for JNI on macOS", + "cacheVariables": { + "ARROW_ACERO": "ON", + "ARROW_BUILD_SHARED": "OFF", + "ARROW_BUILD_STATIC": "ON", + "ARROW_CSV": "ON", + "ARROW_DATASET": "ON", + "ARROW_DEPENDENCY_USE_SHARED": "OFF", + "ARROW_GANDIVA": "ON", + "ARROW_GANDIVA_STATIC_LIBSTDCPP": "ON", + "ARROW_JSON": "ON", + "ARROW_ORC": "ON", + "ARROW_PARQUET": "ON", + "ARROW_S3": "ON", + "ARROW_SUBSTRAIT": "ON", + "AWSSDK_SOURCE": "BUNDLED", + "GTest_SOURCE": "BUNDLED", + "PARQUET_BUILD_EXAMPLES": "OFF", + "PARQUET_BUILD_EXECUTABLES": "OFF", + "PARQUET_REQUIRE_ENCRYPTION": "OFF", + "re2_SOURCE": "BUNDLED" + } } ] } diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 89b6160ce1b..21bf5f98935 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -5055,6 +5055,8 @@ endif() # AWS SDK for C++ function(build_awssdk) + list(APPEND CMAKE_MESSAGE_INDENT "AWS SDK for C++: ") + message(STATUS "Building AWS SDK for C++ from source") # aws-c-common must be the first product because others depend on @@ -5159,9 +5161,9 @@ function(build_awssdk) # For aws-sdk-cpp # - # We need to use CACHE variables because aws-sdk-cpp < 12.0.0 uses + # We need to use CACHE variables because aws-sdk-cpp < 1.12.0 uses # CMP0077 OLD policy. We can use normal variables when we use - # aws-sdk-cpp >= 12.0.0. + # aws-sdk-cpp >= 1.12.0. set(AWS_SDK_WARNINGS_ARE_ERRORS OFF CACHE BOOL "" FORCE) @@ -5186,12 +5188,15 @@ function(build_awssdk) OFF CACHE BOOL "" FORCE) if(NOT WIN32) - set(ZLIB_INCLUDE_DIR - "$" - CACHE STRING "" FORCE) - set(ZLIB_LIBRARY - "$" - CACHE STRING "" FORCE) + if(ZLIB_VENDORED) + # Use vendored zlib. + set(ZLIB_INCLUDE_DIR + "$" + CACHE STRING "" FORCE) + set(ZLIB_LIBRARY + "$" + CACHE STRING "" FORCE) + endif() endif() if(MINGW AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9") # This is for RTools 40. We can remove this after we dropped @@ -5258,9 +5263,15 @@ function(build_awssdk) set(AWSSDK_LINK_LIBRARIES ${AWSSDK_LINK_LIBRARIES} PARENT_SCOPE) + + list(POP_BACK CMAKE_MESSAGE_INDENT) endfunction() if(ARROW_S3) + if(NOT WIN32) + # This is for adding system curl dependency. + find_curl() + endif() # Keep this in sync with s3fs.cc resolve_dependency(AWSSDK HAVE_ALT diff --git a/cpp/examples/minimal_build/CMakeLists.txt b/cpp/examples/minimal_build/CMakeLists.txt index 689dba437e0..626b987b093 100644 --- a/cpp/examples/minimal_build/CMakeLists.txt +++ b/cpp/examples/minimal_build/CMakeLists.txt @@ -19,10 +19,15 @@ cmake_minimum_required(VERSION 3.25) project(ArrowMinimalExample) -option(ARROW_LINK_SHARED "Link to the Arrow shared library" ON) - find_package(Arrow REQUIRED) +include(CMakeDependentOption) +cmake_dependent_option(ARROW_LINK_SHARED + "Link to the Arrow shared library if possible" + ON + ARROW_BUILD_SHARED + OFF) + if(NOT DEFINED CMAKE_CXX_STANDARD) set(CMAKE_CXX_STANDARD 17) endif()