diff --git a/.bumpversion.toml b/.bumpversion.toml index 8a8ac17855..ec13b7baef 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -6,6 +6,11 @@ filename = "INSTALLING.rst" search = "hoomd={current_version}" replace = "hoomd={new_version}" +[[tool.bumpversion.files]] +filename = "BUILDING.rst" +search = "tree/v{current_version}" +replace = "tree/v{new_version}" + [[tool.bumpversion.files]] filename = "CMakeLists.txt" search = 'HOOMD_VERSION_RAW "{current_version}"' diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index e5935bbb25..843cec3b74 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -87,12 +87,18 @@ jobs: echo 'test_docker_options=--gpus=all' >> "$GITHUB_OUTPUT" case "${{ inputs.compiler_version }}" in + 130) + echo "image=nvidia/cuda:13.0.2-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; + 129) + echo "image=nvidia/cuda:12.9.1-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; + 128) + echo "image=nvidia/cuda:12.8.1-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; + 126) + echo "image=nvidia/cuda:12.6.3-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; 125) - echo "image=nvidia/cuda:12.5.0-devel-ubuntu22.04" >> "$GITHUB_OUTPUT";; + echo "image=nvidia/cuda:12.5.1-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; 124) echo "image=nvidia/cuda:12.4.1-devel-ubuntu22.04" >> "$GITHUB_OUTPUT";; - 122) - echo "image=nvidia/cuda:12.2.2-devel-ubuntu22.04" >> "$GITHUB_OUTPUT";; *) echo "Unknown compiler" && exit 1;; esac @@ -134,6 +140,56 @@ jobs: activate-environment: true manifest-path: code/pixi.toml + - name: Clone hip + if: ${{ inputs.compiler_family == 'cuda' }} + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + repository: ROCm/rocm-systems + ref: 75ad45d5f131738a4d888499026edfeaa3a11fe3 # hip-version_7.2.53220 + path: rocm-systems + + - name: Clone hipCUB + if: ${{ inputs.compiler_family == 'cuda' }} + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + repository: ROCm/rocm-libraries + ref: 1a01b92fd0971c98258c0ea7811aea2e55ec8698 # rocm-7.1.0 + sparse-checkout: projects/hipcub + path: rocm-libraries + + - name: Patch hip + if: ${{ inputs.compiler_family == 'cuda' }} + run: | + git apply ${GITHUB_WORKSPACE}/code/.github/workflows/fix-cuda-12-rocm-systems.patch + working-directory: rocm-systems + + - name: Patch hipcub + if: ${{ inputs.compiler_family == 'cuda' }} + run: | + git apply ${GITHUB_WORKSPACE}/code/.github/workflows/fix-cuda-12-rocm-libraries.patch + working-directory: rocm-libraries + + - name: Install hip + if: ${{ inputs.compiler_family == 'cuda' }} + run: | + export CLR_DIR="$(readlink -f rocm-systems/projects/clr)" + export HIP_DIR="$(readlink -f rocm-systems/projects/hip)" + export HIP_OTHER="$(readlink -f rocm-systems/projects/hipother)" + cd "$CLR_DIR" + mkdir build + cd build + cmake -DHIP_COMMON_DIR=$HIP_DIR -DHIP_PLATFORM=nvidia -DCMAKE_INSTALL_PREFIX=${GITHUB_WORKSPACE}/local -DCLR_BUILD_HIP=ON -DCLR_BUILD_OCL=OFF -DHIPNV_DIR=$HIP_OTHER/hipnv .. + make install + + - name: Install hipcub + if: ${{ inputs.compiler_family == 'cuda' }} + run: | + mkdir build + cd build + cmake ../ -Dhip_ROOT=${GITHUB_WORKSPACE}/local -DHIP_PLATFORM=nvidia -DCMAKE_INSTALL_PREFIX=${GITHUB_WORKSPACE}/local + make install + working-directory: rocm-libraries/projects/hipcub + - name: Set compiler if: ${{ inputs.compiler_family != 'cuda' }} run: | @@ -157,8 +213,9 @@ jobs: -DBUILD_MPCD=${BUILD_MD:-"ON"} \ -DBUILD_METAL=${BUILD_MD:-"ON"} \ -DBUILD_HPMC=${BUILD_HPMC:-"ON"} \ - -DCUDA_ARCH_LIST="60;70" \ + -DCUDA_ARCH_LIST="80" \ -DCMAKE_INSTALL_PREFIX=${GITHUB_WORKSPACE}/install \ + -Dhip_ROOT=${GITHUB_WORKSPACE}/local \ -DPLUGINS="" env: ENABLE_GPU: ${{ contains(inputs.config, 'cuda') }} diff --git a/.github/workflows/fix-cuda-12-rocm-libraries.patch b/.github/workflows/fix-cuda-12-rocm-libraries.patch new file mode 100644 index 0000000000..c18d74b6ce --- /dev/null +++ b/.github/workflows/fix-cuda-12-rocm-libraries.patch @@ -0,0 +1,129 @@ +diff --git a/projects/hipcub/CMakeLists.txt b/projects/hipcub/CMakeLists.txt +index fbcecf0fc4..0fbae70749 100644 +--- a/projects/hipcub/CMakeLists.txt ++++ b/projects/hipcub/CMakeLists.txt +@@ -174,7 +174,7 @@ math(EXPR hipcub_VERSION_NUMBER "${hipcub_VERSION_MAJOR} * 100000 + ${hipcub_VER + include(VerifyCompiler) + + # Get dependencies (except rocm-cmake, included earlier) +-include(Dependencies) ++# include(Dependencies) + + if(BUILD_ADDRESS_SANITIZER) + add_compile_options(-fsanitize=address -shared-libasan) +diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_for.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_for.hpp +index 0f22c40517..31fab88dd6 100644 +--- a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_for.hpp ++++ b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_for.hpp +@@ -30,6 +30,7 @@ + #define HIPCUB_CUB_DEVICE_DEVICE_FOR_HPP_ + + #include "../../../config.hpp" ++#if CUDA_VERSION >= 12060 + + #include // IWYU pragma: export + +@@ -194,4 +195,5 @@ HIPCUB_RUNTIME_FUNCTION + + END_HIPCUB_NAMESPACE + ++#endif + #endif // HIPCUB_CUB_DEVICE_DEVICE_FOR_HPP_ +diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_merge.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_merge.hpp +index f314f5a128..e22f08fba9 100644 +--- a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_merge.hpp ++++ b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_merge.hpp +@@ -30,6 +30,7 @@ + #define HIPCUB_CUB_DEVICE_DEVICE_MERGE_HPP_ + + #include "../../../config.hpp" ++#if CUDA_VERSION >= 12080 + + #include // IWYU pragma: export + +@@ -104,4 +105,5 @@ struct DeviceMerge + + END_HIPCUB_NAMESPACE + ++#endif + #endif // HIPCUB_CUB_DEVICE_DEVICE_MERGE_HPP_ +diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_scan.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_scan.hpp +index 72ad11f7bc..038fddccdd 100644 +--- a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_scan.hpp ++++ b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_scan.hpp +@@ -182,6 +182,7 @@ public: + stream); + } + ++#if CUDA_VERSION >= 12080 + template + HIPCUB_RUNTIME_FUNCTION +diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_select.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_select.hpp +index 6812c5cfeb..33bce4aa23 100644 +--- a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_select.hpp ++++ b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_select.hpp +@@ -223,6 +223,7 @@ public: + stream); + } + ++#if CUDA_VERSION >= 12060 + template + HIPCUB_RUNTIME_FUNCTION +diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_transform.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_transform.hpp +index 681a0bbf98..d071c7d859 100644 +--- a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_transform.hpp ++++ b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_transform.hpp +@@ -30,6 +30,7 @@ + #define HIBCUB_ROCPRIM_DEVICE_DEVICE_TRANSFORM_HPP_ + + #include "../../../config.hpp" ++#if CUDA_VERSION >= 12090 + + #include + +@@ -203,4 +204,5 @@ struct DeviceTransform + + END_HIPCUB_NAMESPACE + ++#endif + #endif // HIBCUB_ROCPRIM_DEVICE_DEVICE_TRANSFORM_HPP_ +diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/util_temporary_storage.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/util_temporary_storage.hpp +index fc67d645b1..63540b717f 100644 +--- a/projects/hipcub/hipcub/include/hipcub/backend/cub/util_temporary_storage.hpp ++++ b/projects/hipcub/hipcub/include/hipcub/backend/cub/util_temporary_storage.hpp +@@ -50,10 +50,17 @@ HIPCUB_HOST_DEVICE HIPCUB_FORCEINLINE hipError_t + void* (&allocations)[ALLOCATIONS], + const size_t (&allocation_sizes)[ALLOCATIONS]) + { ++ #if CUDA_VERSION >= 12090 + cudaError_t error = ::cub::detail::AliasTemporaries(d_temp_storage, + temp_storage_bytes, + allocations, + allocation_sizes); ++ #else ++ cudaError_t error = ::cub::AliasTemporaries(d_temp_storage, ++ temp_storage_bytes, ++ allocations, ++ allocation_sizes); ++ #endif + + if(cudaSuccess == error) + { diff --git a/.github/workflows/fix-cuda-12-rocm-systems.patch b/.github/workflows/fix-cuda-12-rocm-systems.patch new file mode 100644 index 0000000000..3160dd2760 --- /dev/null +++ b/.github/workflows/fix-cuda-12-rocm-systems.patch @@ -0,0 +1,41 @@ +diff --git a/projects/hipother/hipnv/include/hip/nvidia_detail/nvidia_hip_runtime_api.h b/projects/hipother/hipnv/include/hip/nvidia_detail/nvidia_hip_runtime_api.h +index 8f6c295aab..05036a57fe 100644 +--- a/projects/hipother/hipnv/include/hip/nvidia_detail/nvidia_hip_runtime_api.h ++++ b/projects/hipother/hipnv/include/hip/nvidia_detail/nvidia_hip_runtime_api.h +@@ -869,7 +869,7 @@ static inline void hipMemcpy2DTocudaMemcpy2D(CUDA_MEMCPY2D* a, const hip_Memcpy2 + a->Height = (size_t)p->Height; + } + +-#if CUDA_VERSION >= CUDA_12020 ++#if CUDA_VERSION >= 12080 + typedef enum cudaMemcpyFlags hipMemcpyFlags; + #define hipMemcpyFlagDefault cudaMemcpyFlagDefault + #define hipMemcpyFlagPreferOverlapWithCompute cudaMemcpyFlagPreferOverlapWithCompute +@@ -2430,6 +2430,7 @@ inline static hipError_t hipMemcpy2DToArrayAsync(hipArray_t dst, size_t wOffset, + cudaMemcpy2DToArrayAsync(dst, wOffset, hOffset, src, spitch, width, height, kind, stream)); + } + ++#if CUDA_VERSION >= 12080 + inline static hipError_t hipMemcpyBatchAsync(void** dsts, void** srcs, size_t* sizes, size_t count, + hipMemcpyAttributes* attrs, size_t* attrsIdxs, + size_t numAttrs, size_t* failIdx, hipStream_t stream) { +@@ -2467,6 +2468,7 @@ inline static hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms* p) { + inline static hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms* p, hipStream_t stream) { + return hipCUDAErrorTohipError(cudaMemcpy3DPeerAsync(p, stream)); + } ++#endif + + __HIP_DEPRECATED inline static hipError_t hipMemcpyToArray(hipArray_t dst, size_t wOffset, + size_t hOffset, const void* src, +@@ -3756,9 +3758,11 @@ inline static hipError_t hipLibraryEnumerateKernels(hipKernel_t* kernels, unsign + return hipCUResultTohipError(cuLibraryEnumerateKernels(kernels, numKernels, library)); + } + ++#if CUDA_VERSION >= 12060 + inline static hipError_t hipKernelGetLibrary(hipLibrary_t* library, hipKernel_t kernel) { + return hipCUResultTohipError(cuKernelGetLibrary(library, kernel)); + } ++#endif + + inline static hipError_t hipKernelGetName(const char** name, hipKernel_t kernel) { + return hipCUResultTohipError(cuKernelGetName(name, kernel)); diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 0a5bcb46bb..0d55f64ae0 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -53,13 +53,13 @@ jobs: - config: [gcc, 14, -py, 313, -nomd, -nohpmc] - config: [gcc, 10, -py, 310, -mpi] - - config: [cuda, 124, -py, 313, -mpi] + - config: [cuda, 125, -py, 313, -mpi] validate: true - - config: [cuda, 124, -py, 313] + - config: [cuda, 125, -py, 313] validate: true - - config: [cuda, 124, -py, 313, -mpi, -debug] + - config: [cuda, 125, -py, 313, -mpi, -debug] release: @@ -95,6 +95,9 @@ jobs: - config: [gcc, 13, -py, 312, -mpi] - config: [gcc, 12, -py, 311, -mpi] - config: [gcc, 11, -py, 310, -mpi] + - config: [cuda, 126, -py, 313, -mpi] + - config: [cuda, 128, -py, 313, -mpi] + - config: [cuda, 129, -py, 313, -mpi] tests_complete: name: Unit test diff --git a/.gitmodules b/.gitmodules index 22dff3fe03..9ba5dad040 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,13 +7,6 @@ [submodule "hoomd/extern/quickhull"] path = hoomd/extern/quickhull url = https://github.com/glotzerlab/quickhull -[submodule "hoomd/extern/HIP"] - path = hoomd/extern/HIP - url = https://github.com/glotzerlab/HIP.git -[submodule "hoomd/extern/hipCUB"] - path = hoomd/extern/hipCUB - url = https://github.com/glotzerlab/hipCUB - branch = header_only [submodule "hoomd/extern/neighbor"] path = hoomd/extern/neighbor url = https://github.com/mphowardlab/neighbor.git diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b0abc2d07d..932b02cb70 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,7 +35,7 @@ repos: - id: end-of-file-fixer exclude_types: [svg] - id: trailing-whitespace - exclude_types: [svg] + exclude_types: [svg, diff] - id: check-json - id: check-yaml exclude: "\\.clang-format" diff --git a/BUILDING.rst b/BUILDING.rst index eaad8681bf..193d9cbc51 100644 --- a/BUILDING.rst +++ b/BUILDING.rst @@ -124,22 +124,30 @@ Install additional packages needed to build the documentation: **For GPU execution** (required when ``ENABLE_GPU=on``): -- **NVIDIA CUDA Toolkit** +.. tab:: NVIDIA (CUDA) - *OR* + - NVIDIA CUDA Toolkit + - hip (`hip installation instructions`_) + - hipcub (`hipcub installation instructions`_) -- AMD ROCm -- HIP [with ``hipcc`` and ``hcc`` as backend] -- rocFFT -- rocPRIM -- rocThrust -- hipCUB -- roctracer-dev + .. note:: -.. note:: + hip ``hip-version_7.2.53220`` and hipcub ``rocm-7.1.0`` work with CUDA 12.9. + Apply the patches in `.github/workflows`_ to add support for CUDA 12.5–12.8. + +.. tab:: AMD (HIP) + + - AMD ROCm + - HIP [with ``hipcc`` and ``hcc`` as backend] + - rocFFT + - rocPRIM + - rocThrust + - hipCUB + - roctracer-dev - When ``ENABLE_GPU=on``, HOOMD-blue will default to CUDA. Set ``HOOMD_GPU_PLATFORM=HIP`` to - choose HIP. +.. _`hip installation instructions`: https://rocmdocs.amd.com/projects/HIP/en/latest/install/build.html +.. _`hipcub installation instructions`: https://rocm.docs.amd.com/projects/hipCUB/en/latest/install/hipCUB-install-overview.html +.. _`.github/workflows`: https://github.com/glotzerlab/hoomd-blue/tree/v5.4.0/.github/workflows **To build the documentation:** @@ -147,6 +155,7 @@ Install additional packages needed to build the documentation: - **furo** - **nbsphinx** - **ipython** +- **sphinx-inline-tabs** .. _Obtain the source: diff --git a/CMake/hoomd/FindCUDALibs.cmake b/CMake/hoomd/FindCUDALibs.cmake index 37e275254f..792c444913 100644 --- a/CMake/hoomd/FindCUDALibs.cmake +++ b/CMake/hoomd/FindCUDALibs.cmake @@ -1,7 +1,7 @@ # Find CUDA libraries and binaries used by HOOMD set(REQUIRED_CUDA_LIB_VARS "") -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") # find CUDA library path get_filename_component(CUDA_BIN_PATH ${CMAKE_CUDA_COMPILER} DIRECTORY) get_filename_component(CUDA_LIB_PATH "${CUDA_BIN_PATH}/../lib64/" ABSOLUTE) @@ -23,7 +23,7 @@ else() add_library(CUDA::cudart UNKNOWN IMPORTED) endif() -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") find_library(CUDA_cudadevrt_LIBRARY cudadevrt HINTS ${CUDA_LIB_PATH}) mark_as_advanced(CUDA_cudadevrt_LIBRARY) if(CUDA_cudadevrt_LIBRARY AND NOT TARGET CUDA::cudadevrt) @@ -38,7 +38,7 @@ else() add_library(CUDA::cudadevrt UNKNOWN IMPORTED) endif() -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") find_library(CUDA_cufft_LIBRARY cufft HINTS ${CUDA_LIB_PATH}) mark_as_advanced(CUDA_cufft_LIBRARY) if(CUDA_cufft_LIBRARY AND NOT TARGET CUDA::cufft) @@ -55,7 +55,7 @@ else() add_library(CUDA::cufft UNKNOWN IMPORTED) endif() -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") find_library(CUDA_cusolver_LIBRARY cusolver HINTS ${CUDA_LIB_PATH}) mark_as_advanced(CUDA_cusolver_LIBRARY) if(CUDA_cusolver_LIBRARY AND NOT TARGET CUDA::cusolver) @@ -72,7 +72,7 @@ else() add_library(CUDA::cusolver UNKNOWN IMPORTED) endif() -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") find_library(CUDA_cusparse_LIBRARY cusparse HINTS ${CUDA_LIB_PATH}) mark_as_advanced(CUDA_cusparse_LIBRARY) if(CUDA_cusparse_LIBRARY AND NOT TARGET CUDA::cusparse) @@ -96,7 +96,7 @@ if (HIP_PLATFORM STREQUAL "amd") message("Found hipfft includes: ${hipfft_INCLUDE_DIR}") endif() -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") # find compute-sanitizer / cuda-memcheck find_program(CUDA_MEMCHECK_EXECUTABLE NAMES compute-sanitizer @@ -115,7 +115,7 @@ if (HIP_PLATFORM STREQUAL "nvcc") mark_as_advanced(CUDA_MEMCHECK_EXECUTABLE) endif() -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") include(FindPackageHandleStandardArgs) find_package_handle_standard_args(CUDALibs REQUIRED_VARS diff --git a/CMake/hoomd/HOOMDCUDASetup.cmake b/CMake/hoomd/HOOMDCUDASetup.cmake index 4da0263536..63958e07ad 100644 --- a/CMake/hoomd/HOOMDCUDASetup.cmake +++ b/CMake/hoomd/HOOMDCUDASetup.cmake @@ -1,6 +1,6 @@ # setup CUDA compile options if (ENABLE_HIP) - if (HIP_PLATFORM STREQUAL "nvcc") + if (HIP_PLATFORM STREQUAL "nvidia") # setup nvcc to build for all CUDA architectures. Allow user to modify the list if desired if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) set(CUDA_ARCH_LIST 80 CACHE STRING "List of target sm_ architectures to compile CUDA code for. Separate with semicolons.") @@ -9,7 +9,7 @@ if (ENABLE_HIP) endif() # ignore warnings about unused results - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-unused-result -diag-suppress 2810") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-unused-result -Wno-deprecated-declarations -diag-suppress 2810") if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCUSPARSE_NEW_API") @@ -55,7 +55,7 @@ if (ENABLE_HIP) endif (ENABLE_HIP) # set CUSOLVER_AVAILABLE depending on CUDA Toolkit version -if (ENABLE_HIP AND HIP_PLATFORM STREQUAL "nvcc") +if (ENABLE_HIP AND HIP_PLATFORM STREQUAL "nvidia") # CUDA 8.0 requires that libgomp be linked in - see if we can link it try_compile(_can_link_gomp ${CMAKE_CURRENT_BINARY_DIR}/tmp diff --git a/CMake/hoomd/HOOMDHIPSetup.cmake b/CMake/hoomd/HOOMDHIPSetup.cmake index 8a15aa72d1..502ee7278a 100644 --- a/CMake/hoomd/HOOMDHIPSetup.cmake +++ b/CMake/hoomd/HOOMDHIPSetup.cmake @@ -1,7 +1,7 @@ if(ENABLE_HIP) if (HOOMD_GPU_PLATFORM STREQUAL "HIP") - find_package(HIP REQUIRED) + find_package(hip REQUIRED) CMAKE_MINIMUM_REQUIRED(VERSION 3.21 FATAL_ERROR) ENABLE_LANGUAGE(HIP) SET(HOOMD_DEVICE_LANGUAGE HIP) @@ -14,26 +14,21 @@ if(ENABLE_HIP) ENABLE_LANGUAGE(CUDA) SET(HOOMD_DEVICE_LANGUAGE CUDA) - set(HIP_INCLUDE_DIR "$,${CMAKE_CURRENT_SOURCE_DIR},${HOOMD_INSTALL_PREFIX}/${PYTHON_SITE_INSTALL_DIR}/include>/hoomd/extern/HIP/include/") - # use CUDA runtime version string(REGEX MATCH "([0-9]*).([0-9]*).([0-9]*).*" _hip_version_match "${CMAKE_CUDA_COMPILER_VERSION}") set(HIP_VERSION_MAJOR "${CMAKE_MATCH_1}") set(HIP_VERSION_MINOR "${CMAKE_MATCH_2}") set(HIP_VERSION_PATCH "${CMAKE_MATCH_3}") - set(HIP_PLATFORM "nvcc") - - # hipCUB - # Use system provided CUB for CUDA 11 and newer - set(HIPCUB_INCLUDE_DIR "$,${CMAKE_CURRENT_SOURCE_DIR},${HOOMD_INSTALL_PREFIX}/${PYTHON_SITE_INSTALL_DIR}/include>/hoomd/extern/hipCUB/hipcub/include/") + set(HIP_PLATFORM "nvidia") + find_package(hip REQUIRED) else() message(FATAL_ERROR "HOOMD_GPU_PLATFORM must be either CUDA or HIP") endif() - if(NOT TARGET hip::host) - add_library(hip::host INTERFACE IMPORTED) + if(${HIP_PLATFORM} STREQUAL "nvidia") + # HIP does not configure hip::host properly for the nvidia platform set_target_properties(hip::host PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${HIP_INCLUDE_DIR};${HIPCUB_INCLUDE_DIR}") + INTERFACE_INCLUDE_DIRECTORIES "${HIP_INCLUDE_DIR}") # set HIP_VERSION_* on non-CUDA targets (the version is already defined on AMD targets through hipcc) set_property(TARGET hip::host APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS @@ -46,10 +41,10 @@ if(ENABLE_HIP) endif() # branch upon HCC or NVCC target - if(${HIP_PLATFORM} STREQUAL "nvcc") - set_property(TARGET hip::host APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS __HIP_PLATFORM_NVCC__) + if(${HIP_PLATFORM} STREQUAL "nvidia") + set_property(TARGET hip::host APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS __HIP_PLATFORM_NVIDIA__ HIPPER_CUDA) elseif(${HIP_PLATFORM} STREQUAL "amd") - set_property(TARGET hip::host APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS __HIP_PLATFORM_AMD__ __HIP_PLATFORM_HCC__) + set_property(TARGET hip::host APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS __HIP_PLATFORM_AMD__ __HIP_PLATFORM_HCC__ HIPPER_HIP) endif() find_package(CUDALibs REQUIRED) diff --git a/CMake/hoomd/HOOMDMPISetup.cmake b/CMake/hoomd/HOOMDMPISetup.cmake index b4194420b5..6f432503ee 100644 --- a/CMake/hoomd/HOOMDMPISetup.cmake +++ b/CMake/hoomd/HOOMDMPISetup.cmake @@ -36,12 +36,12 @@ if (ENABLE_MPI) mark_as_advanced(OMPI_INFO) if (ENABLE_HIP) - string(REPLACE "-pthread" "$<$,$>:-Xcompiler>;-pthread" + string(REPLACE "-pthread" "$<$,$>:-Xcompiler>;-pthread" _MPI_C_COMPILE_OPTIONS "${MPI_C_COMPILE_OPTIONS}") set_property(TARGET MPI::MPI_C PROPERTY INTERFACE_COMPILE_OPTIONS "${_MPI_C_COMPILE_OPTIONS}") unset(_MPI_C_COMPILE_OPTIONS) - string(REPLACE "-pthread" "$<$,$>:-Xcompiler>;-pthread" + string(REPLACE "-pthread" "$<$,$>:-Xcompiler>;-pthread" _MPI_CXX_COMPILE_OPTIONS "${MPI_CXX_COMPILE_OPTIONS}") set_property(TARGET MPI::MPI_CXX PROPERTY INTERFACE_COMPILE_OPTIONS "${_MPI_CXX_COMPILE_OPTIONS}") unset(_MPI_CXX_COMPILE_OPTIONS) diff --git a/CMakeLists.txt b/CMakeLists.txt index d47352292a..523d9974b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,7 +75,7 @@ if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-sign-conversion -Wno-unknown-pragmas -Wno-deprecated-declarations -Wno-unused-result") # suppress warnings regarding HIP's overly complex vector structs - if (CMAKE_COMPILER_IS_GNUCXXH AND OOMD_GPU_PLATFORM STREQUAL "HIP") + if (CMAKE_COMPILER_IS_GNUCXX AND HOOMD_GPU_PLATFORM STREQUAL "HIP") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-class-memaccess") endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall") @@ -144,7 +144,6 @@ endif() if ( NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/hoomd/extern/nano-signal-slot/nano_signal_slot.hpp OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/hoomd/extern/upp11/upp11.h OR - NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/hoomd/extern/HIP/include/hip/hip_runtime.h OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/hoomd/extern/quickhull/ConvexHull.hpp OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/hoomd/extern/OpenRAND/include/openrand/philox.h OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/hoomd/extern/neighbor/include/neighbor/neighbor.h OR diff --git a/INSTALLING.rst b/INSTALLING.rst index 32532d62af..7db4aa524d 100644 --- a/INSTALLING.rst +++ b/INSTALLING.rst @@ -20,32 +20,81 @@ Serial CPU and single GPU builds *linux-64*, *osx-64*, and *osx-arm64* platforms. Install the ``hoomd`` package from the conda-forge_ channel: -.. code-block:: bash +.. tab:: Pixi - micromamba install hoomd=6.0.0 + .. code-block:: bash + + pixi add hoomd=6.0.0 + +.. tab:: Micromamba + + .. code-block:: bash + + micromamba install hoomd=6.0.0 + +.. tab:: Mamba + + .. code-block:: bash + + mamba install hoomd=6.0.0 .. _conda-forge: https://conda-forge.org/docs/user/introduction.html By default, micromamba auto-detects whether your system has a GPU and attempts to install the appropriate package. Override this and force the GPU enabled package installation with: -.. code-block:: bash +.. tab:: Pixi + + First add the following to your ``pixi.toml`` file: + + .. code-block:: toml + + [system-requirements] + cuda = "12.9" + + Then, add hoomd with: + + .. code-block:: bash + + pixi add "hoomd=5.4.0=*gpu*" - export CONDA_OVERRIDE_CUDA="12.6" - micromamba install "hoomd=6.0.0=*gpu*" "cuda-version=12.6" + +.. tab:: Micromamba + + .. code-block:: bash + + export CONDA_OVERRIDE_CUDA="12.9" + micromamba install "hoomd=5.4.0=*gpu*" "cuda-version=12.9" + +.. tab:: Mamba + + .. code-block:: bash + + export CONDA_OVERRIDE_CUDA="12.9" + mamba install "hoomd=6.0.0=*gpu*" "cuda-version=12.9" .. note:: - conda-forge_ may update to a new version of CUDA. If the above command results in an error, - replace ``12.6`` with the version noted in micromamba's error message. + conda-forge_ may update to a new version of CUDA after these instructions are published. + If the above command results in an error, replace ``12.9`` with the version noted in + micromamba's error message. Similarly, you can force CPU-only package installation with: -.. code-block:: bash +.. tab:: Pixi - micromamba install "hoomd=6.0.0=*cpu*" + .. code-block:: bash -.. note:: + pixi add "hoomd=6.0.0=*cpu*" + +.. tab:: Micromamba + + .. code-block:: bash + + micromamba install "hoomd=6.0.0=*cpu*" + +.. tab:: Mamba + + .. code-block:: bash - CUDA 11.8 compatible packages are also available. Replace "12.0" with "11.8" above when - installing HOOMD-blue on systems with CUDA 11 compatible drivers. + mamba install "hoomd=6.0.0=*cpu*" diff --git a/hoomd/CMakeLists.txt b/hoomd/CMakeLists.txt index 4abb33a2d0..713e6f639a 100644 --- a/hoomd/CMakeLists.txt +++ b/hoomd/CMakeLists.txt @@ -223,7 +223,7 @@ add_library(HOOMD::_hoomd ALIAS _hoomd) # Work around support for the delete operator with pybind11 and older versions of clang # https://github.com/pybind/pybind11/issues/1604 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - target_compile_options(_hoomd PUBLIC $<$,$>:-Xcompiler=>;-fsized-deallocation) + target_compile_options(_hoomd PUBLIC $<$,$>:-Xcompiler=>;-fsized-deallocation) endif() # add quick hull as its own library so that it's symbols can be public @@ -267,7 +267,7 @@ target_compile_definitions(_hoomd PUBLIC HOOMD_LONGREAL_SIZE=${HOOMD_LONGREAL_SI if (ENABLE_HIP) if (HIP_PLATFORM STREQUAL "amd") target_link_libraries(_hoomd PUBLIC hip::hipfft) - elseif(HIP_PLATFORM STREQUAL "nvcc") + elseif(HIP_PLATFORM STREQUAL "nvidia") target_link_libraries(_hoomd PUBLIC CUDA::cudart CUDA::cufft) endif() target_compile_definitions(_hoomd PUBLIC ENABLE_HIP CUDA_ARCH=${_cuda_min_arch}) diff --git a/hoomd/ExecutionConfiguration.cc b/hoomd/ExecutionConfiguration.cc index dd173dfc20..0829682a1b 100644 --- a/hoomd/ExecutionConfiguration.cc +++ b/hoomd/ExecutionConfiguration.cc @@ -7,7 +7,7 @@ #ifdef ENABLE_HIP #include -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) #include #endif #endif @@ -212,7 +212,7 @@ void ExecutionConfiguration::handleHIPError(hipError_t err, file += strlen(HOOMD_SOURCE_DIR); std::ostringstream s; -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ cudaError_t cuda_error = cudaPeekAtLastError(); s << "CUDA Error: " << string(cudaGetErrorString(cuda_error)); #else @@ -263,7 +263,7 @@ void ExecutionConfiguration::initializeGPU(int gpu_id) if (gpu_id != -1) { -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ cudaSetValidDevices(&s_capable_gpu_ids[gpu_id], 1); #endif hipSetDeviceFlags(hipDeviceMapHost); @@ -272,7 +272,7 @@ void ExecutionConfiguration::initializeGPU(int gpu_id) else { // initialize the default CUDA context from one of the capable GPUs -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ cudaSetValidDevices(&s_capable_gpu_ids[0], (int)s_capable_gpu_ids.size()); #endif hipSetDeviceFlags(hipDeviceMapHost); @@ -327,7 +327,7 @@ void ExecutionConfiguration::scanGPUs() if (error != hipSuccess) { std::string message = "Failed to get GPU device count: "; -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ cudaError_t cuda_error = cudaPeekAtLastError(); message += string(cudaGetErrorString(cuda_error)); #else @@ -352,7 +352,7 @@ void ExecutionConfiguration::scanGPUs() if (error != hipSuccess) { std::string message = "Failed to get device properties: "; -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ cudaError_t cuda_error = cudaPeekAtLastError(); message += string(cudaGetErrorString(cuda_error)); #else @@ -362,7 +362,7 @@ void ExecutionConfiguration::scanGPUs() continue; } -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ // exclude a GPU if it's compute version is not high enough int compoundComputeVer = prop.minor + prop.major * 10; @@ -386,7 +386,7 @@ void ExecutionConfiguration::scanGPUs() } // exclude a GPU when it doesn't support mapped memory -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ int supports_managed_memory = 0; cudaError_t cuda_error = cudaDeviceGetAttribute(&supports_managed_memory, cudaDevAttrConcurrentManagedAccess, diff --git a/hoomd/ExecutionConfiguration.h b/hoomd/ExecutionConfiguration.h index f24f20afd1..1a23f3ddf5 100644 --- a/hoomd/ExecutionConfiguration.h +++ b/hoomd/ExecutionConfiguration.h @@ -152,7 +152,7 @@ class PYBIND11_EXPORT ExecutionConfiguration hipSetDevice(m_gpu_id); hipDeviceSynchronize(); -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ hipProfilerStart(); #elif defined(__HIP_PLATFORM_HCC__) #ifdef ENABLE_ROCTRACER @@ -167,7 +167,7 @@ class PYBIND11_EXPORT ExecutionConfiguration { hipSetDevice(m_gpu_id); hipDeviceSynchronize(); -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ hipProfilerStop(); #elif defined(__HIP_PLATFORM_HCC__) #ifdef ENABLE_ROCTRACER diff --git a/hoomd/HOOMDMath.h b/hoomd/HOOMDMath.h index dbae39dbcc..365fe0ad2b 100644 --- a/hoomd/HOOMDMath.h +++ b/hoomd/HOOMDMath.h @@ -233,7 +233,7 @@ namespace fast inline HOSTDEVICE float rsqrt(float x) { #ifdef __HIP_DEVICE_COMPILE__ -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ return ::rsqrtf(x); #elif defined(__HIP_PLATFORM_HCC__) return ::__frsqrt_rn(x); @@ -248,7 +248,7 @@ inline HOSTDEVICE float rsqrt(float x) //! Compute the reciprocal square root of x inline HOSTDEVICE double rsqrt(double x) { -#if defined(__HIP_DEVICE_COMPILE__) && defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_DEVICE_COMPILE__) && defined(__HIP_PLATFORM_NVIDIA__) return ::rsqrt(x); #else return 1.0 / ::sqrt(x); @@ -720,7 +720,7 @@ HOSTDEVICE inline hoomd::Scalar3 operator+(const hoomd::Scalar3& a, const hoomd: return hoomd::make_scalar3(a.x + b.x, a.y + b.y, a.z + b.z); } -#if !defined(ENABLE_HIP) || defined(__HIP_PLATFORM_NVCC__) +#if !defined(ENABLE_HIP) || defined(__HIP_PLATFORM_NVIDIA__) //! Vector addition HOSTDEVICE inline hoomd::Scalar3& operator+=(hoomd::Scalar3& a, const hoomd::Scalar3& b) { diff --git a/hoomd/HOOMDVersion.cc b/hoomd/HOOMDVersion.cc index 55b252838e..a748b6dc93 100644 --- a/hoomd/HOOMDVersion.cc +++ b/hoomd/HOOMDVersion.cc @@ -21,7 +21,7 @@ std::string BuildInfo::getCompileFlags() int hip_minor = HIP_VERSION_MINOR; o << "GPU ["; -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) o << "CUDA"; #elif defined(__HIP_PLATFORM_HCC__) o << "ROCm"; @@ -105,7 +105,7 @@ std::string BuildInfo::getGPUAPIVersion() std::string BuildInfo::getGPUPlatform() { -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) return std::string("CUDA"); #elif defined(__HIP_PLATFORM_HCC__) return std::string("ROCm"); diff --git a/hoomd/ManagedArray.h b/hoomd/ManagedArray.h index 2ca58b0eea..efd026da78 100644 --- a/hoomd/ManagedArray.h +++ b/hoomd/ManagedArray.h @@ -189,8 +189,8 @@ template class ManagedArray { if (managed && ptr) { -#if defined(__HIP_PLATFORM_NVCC__) && (CUDART_VERSION >= 8000) - cudaMemAdvise(ptr, sizeof(T) * N, cudaMemAdviseSetReadMostly, 0); +#if defined(__HIP_PLATFORM_NVIDIA__) && (CUDART_VERSION >= 8000) + hipMemAdvise(ptr, sizeof(T) * N, hipMemAdviseSetReadMostly, 0); #endif } } diff --git a/hoomd/WarpTools.cuh b/hoomd/WarpTools.cuh index 410775d93c..2c10a05717 100644 --- a/hoomd/WarpTools.cuh +++ b/hoomd/WarpTools.cuh @@ -65,7 +65,7 @@ class WarpReduce public: DEVICE WarpReduce() { -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ static_assert(PTX_ARCH >= 300, "PTX architecture must be >= 300"); static_assert(LOGICAL_WARP_THREADS <= CUB_PTX_WARP_THREADS, "Logical warp size cannot exceed hardware warp size"); @@ -196,7 +196,7 @@ class WarpScan public: DEVICE WarpScan() { -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ static_assert(PTX_ARCH >= 300, "PTX architecture must be >= 300"); static_assert(LOGICAL_WARP_THREADS <= CUB_PTX_WARP_THREADS, "Logical warp size cannot exceed hardware warp size"); diff --git a/hoomd/extern/ECL.cuh b/hoomd/extern/ECL.cuh index 38b48dcdea..2ab5276eb8 100644 --- a/hoomd/extern/ECL.cuh +++ b/hoomd/extern/ECL.cuh @@ -57,7 +57,7 @@ inline void ecl_connected_components(const int nodes, static const int Device = 0; static const int ThreadsPerBlock = 256; -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ static const int warpsize = 32; #else static const int warpsize = 64; @@ -166,7 +166,7 @@ void compute2(const int nodes, const int* const __restrict__ nidx, const int* co int idx; if (lane == 0) idx = atomicAdd(&posL, 1); - #ifdef __HIP_PLATFORM_NVCC__ + #ifdef __HIP_PLATFORM_NVIDIA__ idx = __shfl_sync(0xffffffff,idx, 0); #else idx = __shfl(idx,0); @@ -200,7 +200,7 @@ void compute2(const int nodes, const int* const __restrict__ nidx, const int* co } if (lane == 0) idx = atomicAdd(&posL, 1); - #ifdef __HIP_PLATFORM_NVCC__ + #ifdef __HIP_PLATFORM_NVIDIA__ idx = __shfl_sync(0xffffffff,idx, 0); #else idx = __shfl(idx,0); diff --git a/hoomd/extern/HIP b/hoomd/extern/HIP deleted file mode 160000 index db753e4ea7..0000000000 --- a/hoomd/extern/HIP +++ /dev/null @@ -1 +0,0 @@ -Subproject commit db753e4ea7a715afec405117250cecef9e882b33 diff --git a/hoomd/extern/hipCUB b/hoomd/extern/hipCUB deleted file mode 160000 index 64d8adf32b..0000000000 --- a/hoomd/extern/hipCUB +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 64d8adf32bd48d8723cc7df9e5c970169e2845b5 diff --git a/hoomd/hpmc/CMakeLists.txt b/hoomd/hpmc/CMakeLists.txt index cc9a35abc0..383138b6d0 100644 --- a/hoomd/hpmc/CMakeLists.txt +++ b/hoomd/hpmc/CMakeLists.txt @@ -168,7 +168,7 @@ endif() # link the library to its dependencies target_link_libraries(_hpmc PUBLIC _hoomd) -if (ENABLE_HIP AND HIP_PLATFORM STREQUAL "nvcc") +if (ENABLE_HIP AND HIP_PLATFORM STREQUAL "nvidia") target_link_libraries(_hpmc PUBLIC CUDA::cusparse ) endif() diff --git a/hoomd/hpmc/IntegratorHPMCMonoGPU.cuh b/hoomd/hpmc/IntegratorHPMCMonoGPU.cuh index 3f1f808583..cb20300a00 100644 --- a/hoomd/hpmc/IntegratorHPMCMonoGPU.cuh +++ b/hoomd/hpmc/IntegratorHPMCMonoGPU.cuh @@ -29,7 +29,7 @@ namespace hpmc { namespace gpu { -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ #define MAX_BLOCK_SIZE 1024 #define MIN_BLOCK_SIZE 32 #else @@ -42,7 +42,7 @@ namespace kernel { //! Check narrow-phase overlaps template -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ __launch_bounds__(max_threads) #endif __global__ void hpmc_narrow_phase(const Scalar4* d_postype, diff --git a/hoomd/hpmc/IntegratorHPMCMonoGPU.h b/hoomd/hpmc/IntegratorHPMCMonoGPU.h index 8525f31353..d6b93d9562 100644 --- a/hoomd/hpmc/IntegratorHPMCMonoGPU.h +++ b/hoomd/hpmc/IntegratorHPMCMonoGPU.h @@ -840,12 +840,12 @@ template void IntegratorHPMCMonoGPU::updateCellWidth() // update the cell list this->m_cl->setNominalWidth(this->m_nominal_width); -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ // set memory hints - cudaMemAdvise(this->m_params.data(), - this->m_params.size() * sizeof(typename Shape::param_type), - cudaMemAdviseSetReadMostly, - 0); + hipMemAdvise(this->m_params.data(), + this->m_params.size() * sizeof(typename Shape::param_type), + hipMemAdviseSetReadMostly, + 0); CHECK_CUDA_ERROR(); #endif diff --git a/hoomd/hpmc/UpdaterGCAGPU.cu b/hoomd/hpmc/UpdaterGCAGPU.cu index 42dd69d976..8dbf341109 100644 --- a/hoomd/hpmc/UpdaterGCAGPU.cu +++ b/hoomd/hpmc/UpdaterGCAGPU.cu @@ -23,7 +23,7 @@ #include #pragma GCC diagnostic pop -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ #include #endif @@ -39,7 +39,7 @@ namespace hpmc { namespace gpu { -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ #define check_cusparse(a) \ { \ cusparseStatus_t status = (a); \ @@ -277,7 +277,7 @@ void connected_components(uint2* d_adj, const hipDeviceProp_t& dev_prop, CachedAllocator& alloc) { -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ thrust::device_ptr adj(d_adj); // sort the list of pairs diff --git a/hoomd/hpmc/UpdaterGCAGPU.cuh b/hoomd/hpmc/UpdaterGCAGPU.cuh index 58ecb063d2..3efb10e5e0 100644 --- a/hoomd/hpmc/UpdaterGCAGPU.cuh +++ b/hoomd/hpmc/UpdaterGCAGPU.cuh @@ -21,7 +21,7 @@ #include "IntegratorHPMCMonoGPUTypes.cuh" -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ #define MAX_BLOCK_SIZE 1024 #define MIN_BLOCK_SIZE 256 // a reasonable minimum to limit the number of template instantiations #else @@ -194,7 +194,7 @@ namespace kernel { //! Check narrow-phase overlaps template -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ __launch_bounds__(max_threads) #endif __global__ void hpmc_cluster_overlaps(const Scalar4* d_postype, diff --git a/hoomd/md/AnisoPotentialPairGPU.cuh b/hoomd/md/AnisoPotentialPairGPU.cuh index 82015e2c39..f56cb06dc7 100644 --- a/hoomd/md/AnisoPotentialPairGPU.cuh +++ b/hoomd/md/AnisoPotentialPairGPU.cuh @@ -24,7 +24,7 @@ //! Maximum number of threads (width of a warp) // currently this is hardcoded, we should set it to the max of platforms -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) const int gpu_aniso_pair_force_max_tpp = 32; #elif defined(__HIP_PLATFORM_HCC__) const int gpu_aniso_pair_force_max_tpp = 64; diff --git a/hoomd/md/CommunicatorGrid.cc b/hoomd/md/CommunicatorGrid.cc index 10251c915b..9f8cf2fcf4 100644 --- a/hoomd/md/CommunicatorGrid.cc +++ b/hoomd/md/CommunicatorGrid.cc @@ -12,7 +12,7 @@ #if defined(ENABLE_HIP) #if __HIP_PLATFORM_HCC__ #include -#elif __HIP_PLATFORM_NVCC__ +#elif __HIP_PLATFORM_NVIDIA__ #include typedef cufftComplex hipfftComplex; #endif diff --git a/hoomd/md/CommunicatorGridGPU.cc b/hoomd/md/CommunicatorGridGPU.cc index 3fd9320328..83e6334e6c 100644 --- a/hoomd/md/CommunicatorGridGPU.cc +++ b/hoomd/md/CommunicatorGridGPU.cc @@ -10,7 +10,7 @@ #if __HIP_PLATFORM_HCC__ #include -#elif __HIP_PLATFORM_NVCC__ +#elif __HIP_PLATFORM_NVIDIA__ #include typedef cufftComplex hipfftComplex; #endif diff --git a/hoomd/md/FrictionPairGPU.cuh b/hoomd/md/FrictionPairGPU.cuh index 3932542ae0..1b665f3580 100644 --- a/hoomd/md/FrictionPairGPU.cuh +++ b/hoomd/md/FrictionPairGPU.cuh @@ -22,7 +22,7 @@ //! Maximum number of threads (width of a warp) // currently this is hardcoded, we should set it to the max of platforms -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) const int gpu_friction_pair_force_max_tpp = 32; #elif defined(__HIP_PLATFORM_HCC__) const int gpu_friction_pair_force_max_tpp = 64; diff --git a/hoomd/md/NeighborListGPUBinned.cuh b/hoomd/md/NeighborListGPUBinned.cuh index c05971d2e3..b7cf38d41b 100644 --- a/hoomd/md/NeighborListGPUBinned.cuh +++ b/hoomd/md/NeighborListGPUBinned.cuh @@ -15,7 +15,7 @@ */ // currently this is hardcoded, we should set it to the max of platforms -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) #define WARP_SIZE 32 #elif defined(__HIP_PLATFORM_HCC__) #define WARP_SIZE 64 diff --git a/hoomd/md/NeighborListGPUStencil.cuh b/hoomd/md/NeighborListGPUStencil.cuh index b303b52fa0..1db5c5f9da 100644 --- a/hoomd/md/NeighborListGPUStencil.cuh +++ b/hoomd/md/NeighborListGPUStencil.cuh @@ -14,7 +14,7 @@ \brief Declares GPU kernel code for neighbor list generation on the GPU */ -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) #define WARP_SIZE 32 #elif defined(__HIP_PLATFORM_HCC__) #define WARP_SIZE 64 diff --git a/hoomd/md/PPPMForceComputeGPU.h b/hoomd/md/PPPMForceComputeGPU.h index d74df567e9..da2ec75dd0 100644 --- a/hoomd/md/PPPMForceComputeGPU.h +++ b/hoomd/md/PPPMForceComputeGPU.h @@ -10,7 +10,7 @@ #if __HIP_PLATFORM_HCC__ #include -#elif __HIP_PLATFORM_NVCC__ +#elif __HIP_PLATFORM_NVIDIA__ #include typedef cufftComplex hipfftComplex; typedef cufftHandle hipfftHandle; diff --git a/hoomd/md/PotentialPair.h b/hoomd/md/PotentialPair.h index b5948bdb91..dec76d2784 100644 --- a/hoomd/md/PotentialPair.h +++ b/hoomd/md/PotentialPair.h @@ -368,14 +368,14 @@ PotentialPair::PotentialPair(std::shared_ptr sysdef m_r_cut_nlist = std::make_shared>(m_typpair_idx.getNumElements(), m_exec_conf); nlist->addRCutMatrix(m_r_cut_nlist); -#if defined(ENABLE_HIP) && defined(__HIP_PLATFORM_NVCC__) +#if defined(ENABLE_HIP) && defined(__HIP_PLATFORM_NVIDIA__) if (m_pdata->getExecConf()->isCUDAEnabled()) { // m_params is _always_ in unified memory, so memadvise and prefetch - cudaMemAdvise(m_params.data(), - m_params.size() * sizeof(param_type), - cudaMemAdviseSetReadMostly, - 0); + hipMemAdvise(m_params.data(), + m_params.size() * sizeof(param_type), + hipMemAdviseSetReadMostly, + 0); cudaMemPrefetchAsync(m_params.data(), sizeof(param_type) * m_params.size(), m_exec_conf->getGPUId()); diff --git a/hoomd/md/PotentialPairDPDThermoGPU.cuh b/hoomd/md/PotentialPairDPDThermoGPU.cuh index 8fcee9a0b6..59a2121e94 100644 --- a/hoomd/md/PotentialPairDPDThermoGPU.cuh +++ b/hoomd/md/PotentialPairDPDThermoGPU.cuh @@ -27,7 +27,7 @@ namespace md namespace kernel { // currently this is hardcoded, we should set it to the max of platforms -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) const int gpu_dpd_pair_force_max_tpp = 32; #elif defined(__HIP_PLATFORM_HCC__) const int gpu_dpd_pair_force_max_tpp = 64; diff --git a/hoomd/md/PotentialPairGPU.cuh b/hoomd/md/PotentialPairGPU.cuh index 45b2f17a41..24c6021687 100644 --- a/hoomd/md/PotentialPairGPU.cuh +++ b/hoomd/md/PotentialPairGPU.cuh @@ -32,7 +32,7 @@ namespace kernel { //! Maximum number of threads (width of a warp) // currently this is hardcoded, we should set it to the max of platforms -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) const int gpu_pair_force_max_tpp = 32; #elif defined(__HIP_PLATFORM_HCC__) const int gpu_pair_force_max_tpp = 64; diff --git a/hoomd/md/PotentialTersoffGPU.cuh b/hoomd/md/PotentialTersoffGPU.cuh index f3e2b859e2..b02ce1b098 100644 --- a/hoomd/md/PotentialTersoffGPU.cuh +++ b/hoomd/md/PotentialTersoffGPU.cuh @@ -29,7 +29,7 @@ namespace kernel { //! Maximum number of threads (width of a warp) // currently this is hardcoded, we should set it to the max of platforms -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) const int gpu_tersoff_max_tpp = 32; #elif defined(__HIP_PLATFORM_HCC__) const int gpu_tersoff_max_tpp = 64; diff --git a/hoomd/mpcd/ParticleData.cu b/hoomd/mpcd/ParticleData.cu index 5fc9bd950a..3464cff9ee 100644 --- a/hoomd/mpcd/ParticleData.cu +++ b/hoomd/mpcd/ParticleData.cu @@ -13,7 +13,6 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #include -#include #include #pragma GCC diagnostic pop diff --git a/sphinx-doc/conf.py b/sphinx-doc/conf.py index 9d6183d456..c085ce84c6 100644 --- a/sphinx-doc/conf.py +++ b/sphinx-doc/conf.py @@ -30,6 +30,7 @@ "sphinx.ext.napoleon", "sphinx.ext.intersphinx", "sphinx.ext.todo", + "sphinx_inline_tabs", ] if find_spec("sphinxcontrib.katex") is not None: diff --git a/sphinx-doc/requirements.in b/sphinx-doc/requirements.in index 3f87c4fa84..181a8c1c3f 100644 --- a/sphinx-doc/requirements.in +++ b/sphinx-doc/requirements.in @@ -9,3 +9,4 @@ sphinxcontrib-googleanalytics sphinxcontrib-katex sphinx-copybutton sphinx-notfound-page +sphinx-inline-tabs diff --git a/sphinx-doc/requirements.txt b/sphinx-doc/requirements.txt index 48a07614af..50fab1737b 100644 --- a/sphinx-doc/requirements.txt +++ b/sphinx-doc/requirements.txt @@ -146,6 +146,7 @@ sphinx==8.1.3 # nbsphinx # sphinx-basic-ng # sphinx-copybutton + # sphinx-inline-tabs # sphinx-notfound-page # sphinxcontrib-googleanalytics # sphinxcontrib-katex @@ -153,6 +154,8 @@ sphinx-basic-ng==1.0.0b2 # via furo sphinx-copybutton==0.5.2 # via -r sphinx-doc/requirements.in +sphinx-inline-tabs==2023.4.21 + # via -r sphinx-doc/requirements.in sphinx-notfound-page==1.1.0 # via -r sphinx-doc/requirements.in sphinxcontrib-applehelp==2.0.0