-
Notifications
You must be signed in to change notification settings - Fork 940
New module - TD2 (added modules for td2.longorfs & td2.predict) #9475
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| channels: | ||
| - conda-forge | ||
| - bioconda | ||
| dependencies: | ||
| - "bioconda::td2=1.0.6" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| process TD2_LONGORFS { | ||
| tag "$meta.id" | ||
| label 'process_medium' | ||
|
|
||
| conda "${moduleDir}/environment.yml" | ||
| container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
| 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/41/4155bf3b720e1e32d0615a947696fb0287ee4e8cdbeb4ec784dd4da7bb5b2e86/data': | ||
| "community.wave.seqera.io/library/td2:1.0.6--ea3e5ac09443b677"}" | ||
|
|
||
| input: | ||
| tuple val(meta), path(fasta) | ||
|
|
||
| output: | ||
| tuple val(meta), path("${prefix}/longest_orfs.{cds,gff3,pep}"), emit: orfs | ||
| path("versions.yml") , emit: versions | ||
|
|
||
| when: | ||
| task.ext.when == null || task.ext.when | ||
|
|
||
| script: | ||
| def args = task.ext.args ?: '' | ||
| prefix = task.ext.prefix ?: "${meta.id}" | ||
|
|
||
| """ | ||
| TD2.LongOrfs \\ | ||
| -t ${fasta} \\ | ||
| -O ${prefix} \\ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we just make the files directly in this folder, rather than a subfolder? |
||
| --threads ${task.cpus} \\ | ||
| ${args} | ||
|
|
||
| cat <<-END_VERSIONS > versions.yml | ||
| "${task.process}": | ||
| td2: \$(td2 v1.0.6) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be getting the version from the tool. |
||
| END_VERSIONS | ||
| """ | ||
|
|
||
| stub: | ||
| prefix = task.ext.prefix ?: "${meta.id}" | ||
|
|
||
| """ | ||
| mkdir -p ${prefix}/ | ||
| touch ${prefix}/longest_orfs.cds | ||
| touch ${prefix}/longest_orfs.gff3 | ||
| touch ${prefix}/longest_orfs.pep | ||
|
|
||
| cat <<-END_VERSIONS > versions.yml | ||
| "${task.process}": | ||
| td2: \$(td2 v1.0.6) | ||
| END_VERSIONS | ||
| """ | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| name: "td2_longorfs" | ||
| description: TD2 identifies candidate coding regions within transcript | ||
| sequences, such as those generated by de novo RNA-Seq transcript assembly | ||
| keywords: | ||
| - td2 | ||
| - orfs | ||
| - longorfs | ||
| - transcripts | ||
| tools: | ||
| - td2: | ||
| description: "TD2 identifies candidate coding regions within transcript sequences, | ||
| such as those generated by de novo RNA-Seq transcript assembly" | ||
| homepage: "https://github.com/Markusjsommer/TD2" | ||
| documentation: "https://github.com/Markusjsommer/TD2" | ||
| tool_dev_url: "https://github.com/Markusjsommer/TD2" | ||
| licence: ["MIT"] | ||
| identifier: "" | ||
|
|
||
| input: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. `[ id:'sample1' ]` | ||
| - fasta: | ||
| type: file | ||
| description: Fasta file containing the target transcript sequences | ||
| pattern: "*.{fasta}" | ||
| ontologies: [] | ||
| output: | ||
| orfs: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. `[ id:'sample1' ]` | ||
| - ${prefix}/longest_orfs.{cds,gff3,pep}: | ||
| type: file | ||
| description: Files containing the longest ORFs predicted from the input | ||
| transcript sequences | ||
| pattern: "${prefix}/longest_orfs.{cds,gff3,pep}" | ||
| ontologies: [] | ||
| versions: | ||
| - versions.yml: | ||
| type: file | ||
| description: File containing software versions | ||
| pattern: "versions.yml" | ||
| ontologies: | ||
| - edam: "http://edamontology.org/format_3750" # YAML | ||
|
|
||
| authors: | ||
| - "@khersameesh24" | ||
| maintainers: | ||
| - "@khersameesh24" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| nextflow_process { | ||
|
|
||
| name "Test Process TD2_LONGORFS" | ||
| script "../main.nf" | ||
| process "TD2_LONGORFS" | ||
|
|
||
| tag "modules" | ||
| tag "modules_nfcore" | ||
| tag "td2" | ||
| tag "td2/longorfs" | ||
|
|
||
| test("td2.longorfs - transcriptome.fasta") { | ||
|
|
||
| when { | ||
| process { | ||
| """ | ||
| input[0] = [ | ||
| [ id:'test_td2_longorfs' ], | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/transcriptome.fasta', checkIfExists: true), | ||
| ] | ||
| """ | ||
| } | ||
| } | ||
|
|
||
| then { | ||
| assert process.success | ||
| assertAll( | ||
| { assert snapshot( | ||
| process.out, | ||
| path(process.out.versions[0]).yaml | ||
| ).match() } | ||
| ) | ||
| } | ||
|
|
||
| } | ||
|
|
||
| test("td2.longorfs - transcriptome.fasta -stub") { | ||
|
|
||
| options "-stub" | ||
|
|
||
| when { | ||
| process { | ||
| """ | ||
| input[0] = [ | ||
| [ id:'test_td2_longorfs' ], | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/transcriptome.fasta', checkIfExists: true), | ||
| ] | ||
| """ | ||
| } | ||
| } | ||
|
|
||
| then { | ||
| assert process.success | ||
| assertAll( | ||
| { assert snapshot( | ||
| process.out, | ||
| path(process.out.versions[0]).yaml | ||
| ).match() } | ||
| ) | ||
| } | ||
|
|
||
| } | ||
|
|
||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,94 @@ | ||
| { | ||
| "td2.longorfs - transcriptome.fasta": { | ||
| "content": [ | ||
| { | ||
| "0": [ | ||
| [ | ||
| { | ||
| "id": "test_td2_longorfs" | ||
| }, | ||
| [ | ||
| "longest_orfs.cds:md5,4226fe196a4527f938e25061ffe7eaf6", | ||
| "longest_orfs.gff3:md5,864ae9147b65d07eb9d79c0e42968e12", | ||
| "longest_orfs.pep:md5,0151dac6998be924139a4f5a5a0c590b" | ||
| ] | ||
| ] | ||
| ], | ||
| "1": [ | ||
| "versions.yml:md5,0856a2e8d28b454f9c3a768cd3451f1a" | ||
| ], | ||
| "orfs": [ | ||
| [ | ||
| { | ||
| "id": "test_td2_longorfs" | ||
| }, | ||
| [ | ||
| "longest_orfs.cds:md5,4226fe196a4527f938e25061ffe7eaf6", | ||
| "longest_orfs.gff3:md5,864ae9147b65d07eb9d79c0e42968e12", | ||
| "longest_orfs.pep:md5,0151dac6998be924139a4f5a5a0c590b" | ||
| ] | ||
| ] | ||
| ], | ||
| "versions": [ | ||
| "versions.yml:md5,0856a2e8d28b454f9c3a768cd3451f1a" | ||
| ] | ||
| }, | ||
| { | ||
| "TD2_LONGORFS": { | ||
| "td2": null | ||
| } | ||
| } | ||
| ], | ||
| "meta": { | ||
| "nf-test": "0.9.3", | ||
| "nextflow": "25.10.0" | ||
| }, | ||
| "timestamp": "2025-11-27T09:33:10.428948889" | ||
| }, | ||
| "td2.longorfs - transcriptome.fasta -stub": { | ||
| "content": [ | ||
| { | ||
| "0": [ | ||
| [ | ||
| { | ||
| "id": "test_td2_longorfs" | ||
| }, | ||
| [ | ||
| "longest_orfs.cds:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "longest_orfs.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "longest_orfs.pep:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
| ] | ||
| ] | ||
| ], | ||
| "1": [ | ||
| "versions.yml:md5,0856a2e8d28b454f9c3a768cd3451f1a" | ||
| ], | ||
| "orfs": [ | ||
| [ | ||
| { | ||
| "id": "test_td2_longorfs" | ||
| }, | ||
| [ | ||
| "longest_orfs.cds:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "longest_orfs.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "longest_orfs.pep:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
| ] | ||
| ] | ||
| ], | ||
| "versions": [ | ||
| "versions.yml:md5,0856a2e8d28b454f9c3a768cd3451f1a" | ||
| ] | ||
| }, | ||
| { | ||
| "TD2_LONGORFS": { | ||
| "td2": null | ||
| } | ||
| } | ||
| ], | ||
| "meta": { | ||
| "nf-test": "0.9.3", | ||
| "nextflow": "25.10.0" | ||
| }, | ||
| "timestamp": "2025-11-27T09:35:15.301277063" | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| channels: | ||
| - conda-forge | ||
| - bioconda | ||
| dependencies: | ||
| - "bioconda::td2=1.0.6" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| process TD2_PREDICT { | ||
| tag "$meta.id" | ||
| label 'process_medium' | ||
|
|
||
| conda "${moduleDir}/environment.yml" | ||
| container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
| 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/41/4155bf3b720e1e32d0615a947696fb0287ee4e8cdbeb4ec784dd4da7bb5b2e86/data': | ||
| "community.wave.seqera.io/library/td2:1.0.6--ea3e5ac09443b677"}" | ||
|
|
||
| input: | ||
| tuple val(meta), path(fasta), path(orfs_dir, stageAs: 'orfs') | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, so this tool requires all three files in one directory? Then I can see a reason to emit them as one channel above. |
||
|
|
||
| output: | ||
| tuple val(meta), path("${prefix}/*.TD2.{bed,cds,gff3,pep}"), emit: predictions | ||
| path("versions.yml") , emit: versions | ||
|
|
||
| when: | ||
| task.ext.when == null || task.ext.when | ||
|
|
||
| script: | ||
| def args = task.ext.args ?: '' | ||
| prefix = task.ext.prefix ?: "${meta.id}" | ||
|
|
||
| """ | ||
| mkdir -p ${prefix}/ | ||
|
|
||
| TD2.Predict \\ | ||
| -t ${fasta} \\ | ||
| -O ${orfs_dir} \\ | ||
| ${args} | ||
|
|
||
| mv *.TD2.{bed,cds,gff3,pep} ${prefix}/ | ||
|
|
||
| cat <<-END_VERSIONS > versions.yml | ||
| "${task.process}": | ||
| td2: \$(td2 v1.0.6) | ||
| END_VERSIONS | ||
| """ | ||
|
|
||
| stub: | ||
| prefix = task.ext.prefix ?: "${meta.id}" | ||
|
|
||
| """ | ||
| mkdir -p ${prefix}/ | ||
| touch ${prefix}/fakefile.TD2.bed | ||
| touch ${prefix}/fakefile.TD2.cds | ||
| touch ${prefix}/fakefile.TD2.gff3 | ||
| touch ${prefix}/fakefile.TD2.pep | ||
|
|
||
| cat <<-END_VERSIONS > versions.yml | ||
| "${task.process}": | ||
| td2: \$(td2 v1.0.6) | ||
| END_VERSIONS | ||
| """ | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| name: "td2_predict" | ||
| description: TD2 identifies candidate coding regions within transcript | ||
| sequences, such as those generated by de novo RNA-Seq transcript assembly | ||
| keywords: | ||
| - predict | ||
| - orfs | ||
| - coding regions | ||
| - td2.predict | ||
| tools: | ||
| - "td2": | ||
| description: "TD2 identifies candidate coding regions within transcript sequences, | ||
| such as those generated by de novo RNA-Seq transcript assembly" | ||
| homepage: "https://github.com/Markusjsommer/TD2" | ||
| documentation: "https://github.com/Markusjsommer/TD2" | ||
| tool_dev_url: "https://github.com/Markusjsommer/TD2" | ||
| licence: ["MIT"] | ||
| identifier: "" | ||
|
|
||
| input: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. `[ id:'sample1' ]` | ||
| - fasta: | ||
| type: file | ||
| description: Transcripts fasta file | ||
| pattern: "*.{fasta}" | ||
| ontologies: [] | ||
| - orfs_dir: | ||
| type: file | ||
| description: Directory containing the ORF prediction files generated by the `td2_longorfs` module | ||
| pattern: "orfs_dir/" | ||
| ontologies: [] | ||
| output: | ||
| predictions: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. `[ id:'sample1' ]` | ||
| - ${prefix}/*.TD2.{bed,cds,gff3,pep}: | ||
| type: file | ||
| description: Files containing the TD2 ORF predictions for the input transcript sequences | ||
| pattern: "${prefix}/*.TD2.{bed,cds,gff3,pep}" | ||
| ontologies: [] | ||
| versions: | ||
| - versions.yml: | ||
| type: file | ||
| description: File containing software versions | ||
| pattern: "versions.yml" | ||
| ontologies: | ||
| - edam: "http://edamontology.org/format_3750" # YAML | ||
|
|
||
| authors: | ||
| - "@khersameesh24" | ||
| maintainers: | ||
| - "@khersameesh24" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We generally make one output channel per file, rather than multiple in one, unless there is a good reason otherwise.