diff --git a/modules/nf-core/whatshap/phase/environment.yml b/modules/nf-core/whatshap/phase/environment.yml new file mode 100644 index 00000000000..389d6871ef4 --- /dev/null +++ b/modules/nf-core/whatshap/phase/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::whatshap=2.8" diff --git a/modules/nf-core/whatshap/phase/main.nf b/modules/nf-core/whatshap/phase/main.nf new file mode 100644 index 00000000000..b114b8e6f52 --- /dev/null +++ b/modules/nf-core/whatshap/phase/main.nf @@ -0,0 +1,55 @@ +process WHATSHAP_PHASE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'oras://community.wave.seqera.io/library/whatshap:2.8--c3862a4b2ad0f978' + : 'community.wave.seqera.io/library/whatshap:2.8--7fe530bc624a3e5a'}" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(bam), path(bai) + tuple val(meta3), path(fasta), path(fai) + + output: + tuple val(meta), path("*.phased.vcf.gz"), emit: vcf + tuple val(meta), path("*.phased.vcf.gz.tbi"), emit: tbi + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + whatshap \\ + phase \\ + --output ${prefix}.phased.vcf \\ + --reference ${fasta} \\ + ${args} \\ + ${vcf} \\ + ${bam} + + bgzip ${prefix}.phased.vcf + tabix -p vcf ${prefix}.phased.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + whatshap: \$(whatshap --version 2>&1 | sed 's/whatshap //g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.phased.vcf.gz + touch ${prefix}.phased.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + whatshap: \$(whatshap --version 2>&1 | sed 's/whatshap //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/whatshap/phase/meta.yml b/modules/nf-core/whatshap/phase/meta.yml new file mode 100644 index 00000000000..05dbbf5fb38 --- /dev/null +++ b/modules/nf-core/whatshap/phase/meta.yml @@ -0,0 +1,99 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "whatshap_phase" +description: Phase variants in a VCF file using long-read sequencing data +keywords: + - phasing + - haplotypes + - vcf + - long-reads + - nanopore + - pacbio +tools: + - whatshap: + description: | + WhatsHap is a software for phasing genomic variants using DNA sequencing + reads, also called read-based phasing or haplotype assembly. + homepage: https://whatshap.readthedocs.io/ + documentation: https://whatshap.readthedocs.io/ + tool_dev_url: https://github.com/whatshap/whatshap + doi: "10.1101/085050" + licence: ["MIT"] + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF file with unphased variants (can be gzipped) + pattern: "*.{vcf,vcf.gz}" + - tbi: + type: file + description: VCF index file (optional but recommended) + pattern: "*.{tbi,csi}" + - - meta2: + type: map + description: | + Groovy Map containing bam information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with aligned reads + pattern: "*.bam" + - bai: + type: file + description: BAM index file (optional but recommended) + pattern: "*.bai" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Reference genome index + pattern: "*.fai" + +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.phased.vcf.gz": + type: file + description: Bgzipped phased VCF file + pattern: "*.phased.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.phased.vcf.gz.tbi": + type: file + description: Phased VCF index file + pattern: "*.phased.vcf.gz.tbi" + ontologies: + - edam: http://edamontology.org/format_3616 # TBI format + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + +authors: + - "@haidyi" +maintainers: + - "@haidyi" diff --git a/modules/nf-core/whatshap/phase/tests/main.nf.test b/modules/nf-core/whatshap/phase/tests/main.nf.test new file mode 100644 index 00000000000..5d6ec2b8a9e --- /dev/null +++ b/modules/nf-core/whatshap/phase/tests/main.nf.test @@ -0,0 +1,104 @@ +nextflow_process { + + name "Test Process WHATSHAP_PHASE" + script "../main.nf" + process "WHATSHAP_PHASE" + + tag "modules" + tag "modules_nfcore" + tag "whatshap" + tag "whatshap/phase" + tag "samtools/faidx" + + setup { + run("SAMTOOLS_FAIDX") { + script "../../../samtools/faidx/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome3.fasta', checkIfExists: true) ] + input[1] = [[],[]] + input[2] = false + """ + } + } + } + + test("whatshap - phase - vcf") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/vcf/NA03697B2_new.pbmm2.repeats.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/vcf/NA03697B2_new.pbmm2.repeats.vcf.gz.csi', checkIfExists: true), + + ] + + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/NA03697B2_downsampled.pbmm2.repeats.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/NA03697B2_downsampled.pbmm2.repeats.bam.bai', checkIfExists: true), + ] + + input[2] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome3.fasta', checkIfExists: true) + ]).join(SAMTOOLS_FAIDX.out.fai) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.summary, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions, + path(process.out.versions[0]).yaml, + ).match() } + ) + } + + } + + test("whatshap - phase - vcf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/vcf/NA03697B2_new.pbmm2.repeats.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/vcf/NA03697B2_new.pbmm2.repeats.vcf.gz.csi', checkIfExists: true), + + ] + + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/NA03697B2_downsampled.pbmm2.repeats.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/NA03697B2_downsampled.pbmm2.repeats.bam.bai', checkIfExists: true), + ] + + input[2] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome3.fasta', checkIfExists: true) + ]).join(SAMTOOLS_FAIDX.out.fai) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/whatshap/phase/tests/main.nf.test.snap b/modules/nf-core/whatshap/phase/tests/main.nf.test.snap new file mode 100644 index 00000000000..2d0b696d0b7 --- /dev/null +++ b/modules/nf-core/whatshap/phase/tests/main.nf.test.snap @@ -0,0 +1,70 @@ +{ + "whatshap - phase - vcf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.phased.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.phased.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,eebecfb3d7f284fc7d11c67978bf1994" + ], + "tbi": [ + [ + { + "id": "test" + }, + "test.phased.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.phased.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,eebecfb3d7f284fc7d11c67978bf1994" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-04T13:11:14.419732725" + }, + "whatshap - phase - vcf": { + "content": [ + "VcfFile [chromosomes=[chr19:45760000-45770300], sampleCount=1, variantCount=1, phased=false, phasedAutodetect=false]", + "e75d1ebbe87d6e55739cacb4e81dcd08", + [ + "versions.yml:md5,eebecfb3d7f284fc7d11c67978bf1994" + ], + { + "WHATSHAP_PHASE": { + "whatshap": 2.8 + } + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-04T13:11:09.397108709" + } +} \ No newline at end of file