diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b290e090..45b4c94a 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -4,6 +4,7 @@ "remoteUser": "gitpod", "runArgs": ["--privileged"], + "postCreateCommand": "bash -c 'conda install -c conda-forge dvc dvc-gdrive && dvc --version'", // Configure tool-specific properties. "customizations": { // Configure properties specific to VS Code. diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 00000000..4445cea1 --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,4 @@ +/config.local +/tmp +/cache +nf-pediatric-test-data-* diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 00000000..1b6039da --- /dev/null +++ b/.dvc/config @@ -0,0 +1,5 @@ +[core] + remote = storage +['remote "storage"'] + url = gdrive://1f5Lw8-HRvX_QzNyygYvhM8S9aBoR-aS- + gdrive_use_service_account = true diff --git a/.dvcignore b/.dvcignore new file mode 100644 index 00000000..51973055 --- /dev/null +++ b/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/.editorconfig b/.editorconfig index 6d9b74cc..a5eaf1c0 100644 --- a/.editorconfig +++ b/.editorconfig @@ -28,8 +28,8 @@ indent_style = unset [/assets/email*] indent_size = unset -# ignore python and markdown -[*.{py,md}] +# ignore python, markdown, and dvc files +[*.{py,md,dvc}] indent_style = unset # ignore ro-crate metadata files diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 72aadfde..5d9a3251 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,12 +36,13 @@ jobs: - "docker" - "singularity" test_name: - - "chained.nf.test" - - "connectomics.nf.test" - - "tracking.nf.test" - - "freesurfer.nf.test" - - "infantseg.nf.test" - - "multisubjects.nf.test" + - "run_connectomics.nf.test" + #- "chained.nf.test" + #- "connectomics.nf.test" + #- "tracking.nf.test" + #- "freesurfer.nf.test" + #- "infantseg.nf.test" + #- "multisubjects.nf.test" isMaster: - ${{ github.base_ref == 'master' }} # Exclude conda and singularity on dev @@ -82,6 +83,17 @@ jobs: mkdir -p $NXF_SINGULARITY_CACHEDIR mkdir -p $NXF_SINGULARITY_LIBRARYDIR + - name: Set up DVC + uses: iterative/setup-dvc@v1 + + - name: Pull data with DVC + env: + GDRIVE_CREDENTIALS_DATA: ${{ secrets.GDRIVE_SERVICE_ACCOUNT_JSON_FILE_PATH }} + run: | + printf '%s' "$GDRIVE_CREDENTIALS_DATA" > gdrive-credentials.json + dvc remote modify --local storage gdrive_service_account_json_file_path $(realpath gdrive-credentials.json) + dvc pull + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" run: | nf-test test ${GITHUB_WORKSPACE}/tests/${{ matrix.test_name }} --ci --profile ${{ matrix.profile }} diff --git a/.prettierignore b/.prettierignore index edd29f01..5eee3045 100644 --- a/.prettierignore +++ b/.prettierignore @@ -11,3 +11,4 @@ testing* *.pyc bin/ ro-crate-metadata.json +*.dvc diff --git a/conf/base.config b/conf/base.config index 4ee07a14..10192df8 100644 --- a/conf/base.config +++ b/conf/base.config @@ -32,7 +32,7 @@ process { } withLabel:process_medium { cpus = { 4 * task.attempt * (executor.name == 'slurm' ? 2 : 1) } - memory = { 8.GB * task.attempt * (executor.name == 'slurm' ? 2 : 1) } + memory = { 10.GB * task.attempt * (executor.name == 'slurm' ? 2 : 1) } time = { 8.h * task.attempt * (executor.name == 'slurm' ? 2 : 1) } } withLabel:process_high { diff --git a/nextflow.config b/nextflow.config index f9825475..56da57a4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -268,6 +268,7 @@ params { // Boilerplate options outdir = null publish_dir_mode = 'copy' + cleanup = false email = null email_on_fail = null plaintext_email = false @@ -548,6 +549,11 @@ manifest { // Nextflow plugins plugins { id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-boost@0.4.0' // Cleaning up intermediate file while the pipeline is running. +} + +boost { + cleanup = params.cleanup } validation { diff --git a/nextflow_schema.json b/nextflow_schema.json index e1399bf4..cd52dcd8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1467,6 +1467,14 @@ "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, + "cleanup": { + "type": "boolean", + "default": false, + "description": "Remove intermediate files after pipeline completion.", + "help_text": "If set to true, the pipeline will remove all intermediate files after the pipeline has completed. This can save disk space but may make debugging more difficult and option -resume won't be available. Use with caution", + "fa_icon": "fas fa-copy", + "hidden": true + }, "lean_output": { "type": "boolean", "description": "Do not copy intermediate files to output directory.", diff --git a/nf-test.config b/nf-test.config index 0c8518a1..0e40da9f 100644 --- a/nf-test.config +++ b/nf-test.config @@ -7,7 +7,7 @@ config { withTrace true autoSort false ignore "modules/**", "subworkflows/**" - options "-dump-channels -stub-run" + options "-dump-channels" plugins { load "nft-utils@0.0.3" } diff --git a/tests/data/.gitignore b/tests/data/.gitignore new file mode 100644 index 00000000..af6746bf --- /dev/null +++ b/tests/data/.gitignore @@ -0,0 +1 @@ +/derivatives-infant diff --git a/tests/data/derivatives-infant.dvc b/tests/data/derivatives-infant.dvc new file mode 100644 index 00000000..af64e4ef --- /dev/null +++ b/tests/data/derivatives-infant.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 69c8723df1438101d77d1b87e58c8280.dir + size: 348426446 + nfiles: 13 + hash: md5 + path: derivatives-infant diff --git a/tests/nextflow.config b/tests/nextflow.config index 207cbf1c..e08a163e 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -3,6 +3,10 @@ Nextflow config file for running tests ======================================================================================== */ +params { + outdir = "output/" + publish_dir_mode = "copy" +} process { withName: '.*' { diff --git a/tests/run_connectomics.nf.test b/tests/run_connectomics.nf.test new file mode 100644 index 00000000..675bc6c6 --- /dev/null +++ b/tests/run_connectomics.nf.test @@ -0,0 +1,44 @@ +nextflow_pipeline { + + name "Test nf-pediatric -profile connectomics,infant" + script "../main.nf" + + test("nf-pediatric -profile connectomics,infant") { + + when { + params { + + params.input_deriv = "$projectDir/tests/data/derivatives-infant/" + params.outdir = "$outputDir" + + params.connectomics = true + + params.infant = true + + params.commit_para_diff = "1.2E-3" + params.commit_iso_diff = "2.0E-3" + params.decompose_min_len = 10 + params.decompose_outlier_threshold = 0.4 + + params.cleanup = true + } + } + + then { + // stable name: All files + folders in ${params.outdir}/ with a stable name. + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + assertAll( + { assert workflow.success }, + { assert snapshot( + // Number of successfully completed tasks + workflow.trace.succeeded().size(), + // Remove the nextflow version from the versions.yml because we test it using different nextflow versions. + removeNextflowVersion("$outputDir/pipeline_info/nf-pediatric_software_mqc_versions.yml"), + // All stable name. + stable_name + ).md5().match() + } + ) + } + } +} diff --git a/tests/run_connectomics.nf.test.snap b/tests/run_connectomics.nf.test.snap new file mode 100644 index 00000000..35ac84b8 --- /dev/null +++ b/tests/run_connectomics.nf.test.snap @@ -0,0 +1,10 @@ +{ + "nf-pediatric -profile connectomics,infant": { + "content": "4e3209e3c6e93e9901a525e77ce460fb", + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-17T20:34:19.897718" + } +} \ No newline at end of file