Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions modules/nf-core/minimac4/impute/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ process MINIMAC4_IMPUTE {
'biocontainers/minimac4:4.1.6--hcb620b3_1' }"

input:
tuple val(meta), path(target_vcf), path(target_index), path(ref_msav), path(sites_vcf), path(sites_index), path(map)
tuple val(meta), path(target_vcf), path(target_index), path(ref_msav), path(sites_vcf), path(sites_index), path(map), val(region)

output:
tuple val(meta), path("*.{bcf,sav,vcf.gz,vcf,ubcf,usav}"), emit: vcf
Expand All @@ -18,24 +18,26 @@ process MINIMAC4_IMPUTE {
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def extension = args.contains("--output-format bcf") || args.contains("-O bcf") ? "bcf" :
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def extension = args.contains("--output-format bcf") || args.contains("-O bcf") ? "bcf" :
args.contains("--output-format sav") || args.contains("-O sav") ? "sav" :
args.contains("--output-format vcf.gz") || args.contains("-O vcf.gz") ? "vcf.gz" :
args.contains("--output-format vcf") || args.contains("-O vcf") ? "vcf" :
args.contains("--output-format ubcf") || args.contains("-O ubcf") ? "ubcf" :
args.contains("--output-format usav") || args.contains("-O usav") ? "usav" :
"vcf.gz"
def sites_cmd = sites_vcf ? "--sites $sites_vcf" : ""
def map_cmd = map ? "--map $map" : ""
def sites_cmd = sites_vcf ? "--sites $sites_vcf" : ""
def map_cmd = map ? "--map $map" : ""
def region_cmd = region ? "--region $region" : ""
"""
minimac4 \\
$ref_msav \\
$target_vcf \\
$args \\
$sites_cmd \\
$map_cmd \\
$region_cmd \\
--threads $task.cpus \\
-o ${prefix}.${extension}

Expand Down
4 changes: 4 additions & 0 deletions modules/nf-core/minimac4/impute/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ input:
pattern: "*.map"
ontologies:
- edam: "http://edamontology.org/data_1278"
- region:
type: string
description: Region to perform imputation
pattern: "(chr)?\\d*:\\d*-\\d*"

output:
vcf:
Expand Down
8 changes: 4 additions & 4 deletions modules/nf-core/minimac4/impute/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ nextflow_process {
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi",checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz", checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz.csi", checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.GRCh38.chr22.map.gz", checkIfExists:true)
file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.GRCh38.glimpse.chr22.map.gz", checkIfExists:true)
])
.combine(MINIMAC4_COMPRESSREF.out.msav)
.map{
meta, target_vcf, target_index, sites_vcf, sites_index, map, metaRef, ref -> [
meta, target_vcf, target_index, ref, sites_vcf, sites_index, map
meta, target_vcf, target_index, ref, sites_vcf, sites_index, map, "chr22"
]
}
"""
Expand Down Expand Up @@ -79,7 +79,7 @@ nextflow_process {
.combine(MINIMAC4_COMPRESSREF.out.msav)
.map{
meta, target_vcf, target_index, metaRef, ref -> [
meta, target_vcf, target_index, ref, [], [], []
meta, target_vcf, target_index, ref, [], [], [], []
]
}
"""
Expand Down Expand Up @@ -112,7 +112,7 @@ nextflow_process {
[id: "NA12878", chr: "chr22"],
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz",checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi",checkIfExists:true),
[], [], [], []
[], [], [], [], []
])
"""
}
Expand Down
100 changes: 100 additions & 0 deletions subworkflows/nf-core/vcf_impute_minimac4/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
include { MINIMAC4_COMPRESSREF } from '../../../modules/nf-core/minimac4/compressref'
include { MINIMAC4_IMPUTE } from '../../../modules/nf-core/minimac4/impute'
include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate'
include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index'
include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index'

workflow VCF_IMPUTE_MINIMAC4 {

take:
ch_input // channel (mandatory): [ [id, chr], vcf, tbi ]
ch_panel // channel (mandatory): [ [panel, chr], vcf, tbi ]
ch_posfile // channel (optional) : [ [panel, chr], sites_vcf, sites_index ]
ch_chunks // channel (optional) : [ [panel, chr], chr, start, end ]
ch_map // channel (optional) : [ [panel, chr], map]

main:

ch_versions = channel.empty()

ch_panel_branched = ch_panel.branch { _meta, file, _index ->
def name = file.toString()
vcf : name.matches(/.*\.(vcf|bcf)(\.gz)?$/)
msav : name.endsWith('.msav')
other: true
}

ch_panel_branched.other.map{
error "ERROR: ch_panel files must be either VCF/BCF or MSAV."
}

// Compress reference panel to MSAV format
MINIMAC4_COMPRESSREF(ch_panel_branched.vcf.view())
ch_versions = ch_versions.mix(MINIMAC4_COMPRESSREF.out.versions.first())

ch_panel_msav = MINIMAC4_COMPRESSREF.out.msav
.mix(
ch_panel_branched.msav.map{ meta, file, _index -> [meta, file]}
)

ch_panel_impute = ch_panel_msav
.combine(ch_posfile, by:0)
.combine(ch_chunks, by:0)
.combine(ch_map, by:0)

ch_panel_impute.ifEmpty{
error "ERROR: join operation resulted in an empty channel. Please provide a valid ch_posfile, ch_chunks and ch_map channel as input."
}

// Prepare input channels for MINIMAC4
ch_minimac4_input = ch_input
.combine(ch_panel_impute)
.map { metaI, target_vcf, target_tbi, metaPC, ref_msav, sites_vcf, sites_index, chr, start, end, map ->
def regionout = "${chr}"
if (start != [] && end != []) {
regionout = "${chr}:${start}-${end}"
}
[
metaPC + metaI + ["regionout": regionout],
target_vcf, target_tbi,
ref_msav, sites_vcf, sites_index,
map, regionout
]
}
// Perform imputation
MINIMAC4_IMPUTE( ch_minimac4_input )
ch_versions = ch_versions.mix(MINIMAC4_IMPUTE.out.versions.first())

// Index the output VCF file
BCFTOOLS_INDEX_1( MINIMAC4_IMPUTE.out.vcf )
ch_versions = ch_versions.mix(BCFTOOLS_INDEX_1.out.versions.first())

// Ligate all phased files in one and index it
ligate_input = MINIMAC4_IMPUTE.out.vcf
.join(
BCFTOOLS_INDEX_1.out.tbi
.mix(BCFTOOLS_INDEX_1.out.csi)
)
.map{ meta, vcf, index ->
def keysToKeep = meta.keySet() - ['regionout']
[ meta.subMap(keysToKeep), vcf, index ]
}
.groupTuple()

GLIMPSE2_LIGATE( ligate_input )
ch_versions = ch_versions.mix( GLIMPSE2_LIGATE.out.versions.first() )

BCFTOOLS_INDEX_2( GLIMPSE2_LIGATE.out.merged_variants )
ch_versions = ch_versions.mix( BCFTOOLS_INDEX_2.out.versions.first() )

// Join imputed and index files
ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants
.join(
BCFTOOLS_INDEX_2.out.tbi
.mix(BCFTOOLS_INDEX_2.out.csi)
)

emit:
vcf_index = ch_vcf_index // channel: [ [id, chr, tools], vcf, index ]
versions = ch_versions // channel: [ versions.yml ]
}
118 changes: 118 additions & 0 deletions subworkflows/nf-core/vcf_impute_minimac4/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
name: "VCF_IMPUTE_MINIMAC4"
description: |
Subworkflow to impute VCF files using MINIMAC4 software. The subworkflow
takes VCF files, phased reference panel, and genetic maps to perform imputation
and outputs phased and imputed VCF files.
keywords:
- VCF
- imputation
- minimac4
- phasing
- MSAV
components:
- minimac4/compressref
- minimac4/impute
- bcftools/index
- glimpse2/ligate
input:
- ch_input:
description: Channel with input data
structure:
- meta:
type: map
description: |
Metadata map containing sample information
- vcf:
type: file
description: Input VCF files
pattern: "*.{vcf,bcf,vcf.gz}"
- index:
type: file
description: Input index file
pattern: "*.{tbi,csi}"
- ch_panel:
description: Channel with phased reference panel data
structure:
- meta:
type: map
description: |
Metadata map that will be combined with the input data map
- vcf:
type: file
description: Reference panel VCF files by chromosomes
pattern: "*.{vcf,bcf,vcf.gz}"
- index:
type: file
description: Reference panel VCF index files
pattern: "*.{tbi,csi}"
- ch_posfile:
description: Channel with variants position to impute
structure:
- meta:
type: map
description: |
Metadata map containing chromosome information
- sites_vcf:
type: file
descrition: |
VCF/BCF file containing position to impute
- sites_index:
type: file
description: |
CSI|TBI index file of the sites to impute
- ch_chunks:
description: Channel containing the region to impute
structure:
- meta:
type: map
description: |
Metadata map containing chromosome information
- chr:
type: string
description: Chromosome name
- start:
type: integer
description: Start position of the chunk
- end:
type: integer
description: End position of the chunk
- ch_map:
description: Channel with genetic map data
structure:
- meta:
type: map
description: |
Metadata map containing chromosome information
- map:
type: file
description: Minimac format genetic map files
pattern: "*.map"
output:
- vcf_index:
description: Channel with imputed and phased VCF files
structure:
- meta:
type: map
description: |
Metadata map of the target input file combined with the reference panel map.
- vcf:
type: file
description: VCF imputed and phased file by sample
pattern: "*.{vcf,bcf,vcf.gz}"
- index:
type: file
description: VCF index file
pattern: "*.{tbi,csi}"
- versions:
description: Channel containing software versions file
structure:
- versions.yml:
type: file
description: File containing versions of the software used
authors:
- "@LouisLeNezet"
- "@gichas"
maintainers:
- "@LouisLeNezet"
- "@gichas"
Loading
Loading