diff --git a/plugins/decl_hdf5/AUTHORS b/plugins/decl_hdf5/AUTHORS index a68fcb0f..0244c908 100644 --- a/plugins/decl_hdf5/AUTHORS +++ b/plugins/decl_hdf5/AUTHORS @@ -58,3 +58,4 @@ Kacper Sinkiewicz - PSNC (ksinkiewicz@man.poznan.pl) Yushan Wang - CEA (yushan.wang@cea.fr) * Maintainer (Sept. 2023 - ...) +* enable HDF5 subfiling diff --git a/plugins/decl_hdf5/CHANGELOG.md b/plugins/decl_hdf5/CHANGELOG.md index 546d2f8e..7f27d59c 100644 --- a/plugins/decl_hdf5/CHANGELOG.md +++ b/plugins/decl_hdf5/CHANGELOG.md @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [Unreleased] ### Added +* Add subfiling support [#602](https://github.com/pdidev/pdi/issues/602) ### Changed diff --git a/plugins/decl_hdf5/README.md b/plugins/decl_hdf5/README.md index e3982159..f2652fc3 100644 --- a/plugins/decl_hdf5/README.md +++ b/plugins/decl_hdf5/README.md @@ -60,6 +60,7 @@ The possible values for the keys are as follow: See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5P.html#Property-SetFletcher32 for more information. +* `subfiling`: an integer value interpreted as the total number of subfiles. This key is available only when the underlying HDF5 has `H5_HAVE_SUBFILING_VFD` enabled. By default `subfiling: 0` is used indicating no HDF5 subfiling. `subfiling: 2` means 2 subfiles will be generated. ### DATA_SECTION diff --git a/plugins/decl_hdf5/file_op.cxx b/plugins/decl_hdf5/file_op.cxx index 2ab0b146..8384a91e 100644 --- a/plugins/decl_hdf5/file_op.cxx +++ b/plugins/decl_hdf5/file_op.cxx @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (C) 2015-2025 Commissariat a l'energie atomique et aux energies alternatives (CEA) + * Copyright (C) 2015-2026 Commissariat a l'energie atomique et aux energies alternatives (CEA) * Copyright (C) 2021-2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC) * All rights reserved. * @@ -26,7 +26,11 @@ #include #ifdef H5_HAVE_PARALLEL #include +#ifdef H5_HAVE_SUBFILING_VFD +#include #endif +#endif + #include #include @@ -107,6 +111,12 @@ vector File_op::parse(Context& ctx, PC_tree_t tree) deflate = value; } else if (key == "fletcher") { fletcher = value; + } else if (key == "subfiling") { +#ifdef H5_HAVE_SUBFILING_VFD + template_op.m_subfiling = to_string(value); +#else + ctx.logger().warn("Used HDF5 does not support subfiling. Subfiling setup ignored"); +#endif } else if (key == "write") { // will read in pass 2 } else if (key == "read") { @@ -235,6 +245,9 @@ File_op::File_op(const File_op& other) , #ifdef H5_HAVE_PARALLEL m_communicator{other.m_communicator} +#ifdef H5_HAVE_SUBFILING_VFD + , m_subfiling{other.m_subfiling} +#endif , #endif m_dset_ops{other.m_dset_ops} @@ -302,6 +315,16 @@ void File_op::execute(Context& ctx) if (0 > H5Pset_fapl_mpio(file_lst, comm, MPI_INFO_NULL)) handle_hdf5_err(); use_mpio = true; ctx.logger().debug("Opening `{}' file in parallel mode", filename); +#ifdef H5_HAVE_SUBFILING_VFD + if (subfiling().to_long(ctx)) { + ctx.logger().info("HDF5 subfiling enabled for file {}", filename); + + H5FD_subfiling_config_t subf_config; + H5Pget_fapl_subfiling(file_lst, &subf_config); + subf_config.shared_cfg.stripe_count = subfiling().to_long(ctx); + H5Pset_fapl_subfiling(file_lst, &subf_config); + } +#endif } #endif diff --git a/plugins/decl_hdf5/file_op.h b/plugins/decl_hdf5/file_op.h index b215d9cf..92e18b98 100644 --- a/plugins/decl_hdf5/file_op.h +++ b/plugins/decl_hdf5/file_op.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (C) 2015-2025 Commissariat a l'energie atomique et aux energies alternatives (CEA) + * Copyright (C) 2015-2026 Commissariat a l'energie atomique et aux energies alternatives (CEA) * Copyright (C) 2021 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC) * All rights reserved. * @@ -66,6 +66,11 @@ class File_op #ifdef H5_HAVE_PARALLEL /// a communicator for parallel HDF5 (null if no comm is specified) PDI::Expression m_communicator; + +#ifdef H5_HAVE_SUBFILING_VFD + /// HDF5 subfiling + PDI::Expression m_subfiling = 0L; +#endif #endif /// type information for the datasets for which an explicit type is specified @@ -120,6 +125,10 @@ class File_op #ifdef H5_HAVE_PARALLEL PDI::Expression communicator() const { return m_communicator; } + +#ifdef H5_HAVE_SUBFILING_VFD + PDI::Expression subfiling() const { return m_subfiling; } +#endif #endif /** Executes the requested operation. diff --git a/plugins/decl_hdf5/tests/CMakeLists.txt b/plugins/decl_hdf5/tests/CMakeLists.txt index de0d8868..e0ad181e 100644 --- a/plugins/decl_hdf5/tests/CMakeLists.txt +++ b/plugins/decl_hdf5/tests/CMakeLists.txt @@ -138,6 +138,18 @@ target_link_libraries(decl_hdf5_IO_options_C PDI::PDI_C ${HDF5_DEPS}) add_test(NAME decl_hdf5_IO_options_C COMMAND "${RUNTEST_DIR}" "$") set_property(TEST decl_hdf5_IO_options_C PROPERTY TIMEOUT 15) +if("${BUILD_HDF5_PARALLEL}") + add_executable(decl_hdf5_mpi_subfiling decl_hdf5_mpi_subfiling.c) + target_link_libraries(decl_hdf5_mpi_subfiling PDI::PDI_C MPI::MPI_C) + add_test(NAME decl_hdf5_mpi_subfiling COMMAND "${RUNTEST_DIR}" "${MPIEXEC}" "${MPIEXEC_NUMPROC_FLAG}" 4 ${MPIEXEC_PREFLAGS} "$" ${MPIEXEC_POSTFLAGS}) + set_property(TEST decl_hdf5_mpi_subfiling PROPERTY TIMEOUT 15) + set_property(TEST decl_hdf5_mpi_subfiling PROPERTY PROCESSORS 4) + + add_test(NAME check_subfile_count + COMMAND sh -c "${RUNTEST_DIR}" "count=$(ls -1 subfiling.h5.subfile_* | wc -l) && if [ $count -eq 3 ]; then exit 0; else echo 'Found $count files instead of 3 (including config)'; exit 1; fi") + set_tests_properties(check_subfile_count PROPERTIES DEPENDS decl_hdf5_mpi_subfiling) +endif() + if("${BUILD_FORTRAN}") add_subdirectory(fortran/) endif("${BUILD_FORTRAN}") diff --git a/plugins/decl_hdf5/tests/decl_hdf5_mpi_subfiling.c b/plugins/decl_hdf5/tests/decl_hdf5_mpi_subfiling.c new file mode 100644 index 00000000..5b5090ce --- /dev/null +++ b/plugins/decl_hdf5/tests/decl_hdf5_mpi_subfiling.c @@ -0,0 +1,168 @@ +/******************************************************************************* + * Copyright (C) 2026 Commissariat a l'energie atomique et aux energies alternatives (CEA) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * forcumentation and/or other materials provided with the distribution. + * * Neither the name of CEA nor the names of its contributors may be used to + * enforrse or promote products derived from this software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#include +#include +#include +#include + +#define IMX 50 +#define JMX 40 +#define NI_GHOST 1 +#define NJ_GHOST 2 +#define DIM 2 + +const char* CONFIG_YAML + = "logging: trace \n" + "metadata: \n" + " input: int \n" + " ni: int \n" + " nj: int \n" + " nig: int \n" + " njg: int \n" + " nit: int \n" + " njt: int \n" + " istart: int \n" + " jstart: int \n" + "data: \n" + " reals: \n" + " type: array \n" + " subtype: double \n" + " size: [$nj + 2*$njg, $ni + 2*$nig] \n" + " subsize: [$nj, $ni] \n" + " start: [$njg, $nig] \n" + " values: \n" + " type: array \n" + " subtype: int \n" + " size: [$nj + 2*$njg, $ni + 2*$nig] \n" + " subsize: [$nj, $ni] \n" + " start: [$njg, $nig] \n" + "plugins: \n" + " mpi: \n" + " decl_hdf5: \n" + " file: subfiling.h5 \n" + " communicator: $MPI_COMM_WORLD \n" + " subfiling: 2 \n" + " datasets: \n" + " reals: {type: array, subtype: double, size: [$njt, $nit]} \n" + " values: {type: array, subtype: int, size: [$njt, $nit]} \n" + " write: \n" + " reals: \n" + " dataset_selection: {start: [$jstart, $istart]} \n" + " values: \n" + " dataset_selection: {start: [$jstart, $istart]} \n"; + +int main(int argc, char* argv[]) +{ + const int icst = -1; /// constants values in the ghost nodes + const double rcst = -1.01; + + int nig = NI_GHOST, njg = NJ_GHOST; + int ni = IMX, nj = JMX; + int values[JMX + 2 * NJ_GHOST][IMX + NI_GHOST * 2] = {{0}}, cp_values[JMX + 2 * NJ_GHOST][IMX + NI_GHOST * 2] = {{0}}; + double reals[JMX + 2 * NJ_GHOST][IMX + NI_GHOST * 2] = {{0}}, cp_reals[JMX + 2 * NJ_GHOST][IMX + NI_GHOST * 2] = {{0}}; + int i, j, input; + int nit, njt; + + /// MPI and parallel data or info + int dims[DIM], coord[DIM], periodic[DIM]; + int istart, jstart; + MPI_Comm comm2D; + periodic[0] = 0; + periodic[1] = 0; + dims[0] = 2; + dims[1] = 2; + + + int provided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); + + if (provided != MPI_THREAD_MULTIPLE) { + printf("provided level = %d, required level = %d\n", provided, MPI_THREAD_MULTIPLE); + return -1; + } + PC_tree_t conf = PC_parse_string(CONFIG_YAML); + MPI_Comm world = MPI_COMM_WORLD; + PDI_init(conf); + int rank; + MPI_Comm_rank(world, &rank); + + int size; + MPI_Comm_size(world, &size); + if (size != 4) { + printf("Run on 4 procs only."); + MPI_Abort(MPI_COMM_WORLD, -1); + } + PDI_expose("nproc", &size, PDI_OUT); + + + MPI_Cart_create(world, DIM, dims, periodic, 0, &comm2D); + MPI_Cart_coords(comm2D, rank, DIM, coord); + + istart = coord[1] * ni; + jstart = coord[0] * nj; + + nit = 2 * ni; + njt = 2 * nj; + + PDI_expose("nig", &nig, PDI_OUT); /// Ghost cells + PDI_expose("njg", &njg, PDI_OUT); + + PDI_expose("ni", &ni, PDI_OUT); /// Size of the portion of the array for a given MPI task + PDI_expose("nj", &nj, PDI_OUT); + + PDI_expose("nit", &nit, PDI_OUT); /// size of the distributed array + PDI_expose("njt", &njt, PDI_OUT); + + PDI_expose("istart", &istart, PDI_OUT); /// offset + PDI_expose("jstart", &jstart, PDI_OUT); + + // Fill arrays + for (j = 0; j < nj + 2 * njg; ++j) { + for (i = 0; i < ni + 2 * nig; ++i) { + cp_values[j][i] = icst; + cp_reals[j][i] = rcst; /// array initialized with const values + } + } + /// Values and reals == 0 in the ghost. + double cst = -rcst; + for (j = njg; j < nj + njg; ++j) { + for (i = nig; i < ni + nig; ++i) { + values[j][i] = (i + coord[1] * ni - nig) + (j + coord[0] * nj - njg) * 10; + reals[j][i] = (i + coord[1] * ni - nig) * cst + (j + coord[0] * nj - njg) * 10 * cst; + } + } + + PDI_expose("rank", &rank, PDI_OUT); + PDI_expose("input", &input, PDI_OUT); + + /// Test that export/exchange works + PDI_expose("input", &input, PDI_OUT); + PDI_expose("reals", &reals, PDI_OUT); // output real + PDI_expose("values", &values, PDI_INOUT); // output integers + + PDI_finalize(); + PC_tree_destroy(&conf); + MPI_Finalize(); +}