Skip to content
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
f843ae2
add metabuli/build module
pawelciurkaardigen Mar 26, 2025
a044140
add test for metabuli/build
pawelciurkaardigen Mar 26, 2025
791b118
tests
pawelciurkaardigen Mar 27, 2025
689d72f
update tests: pass accession2taxid from `prokaryotypes` directory, up…
pawelciurkaardigen Apr 10, 2025
1246ee7
tackling some linting errors
pawelciurkaardigen Apr 10, 2025
316caeb
filling in meta.yml
pawelciurkaardigen Apr 17, 2025
dcb6589
Merge branch 'master' into metabuli_build
pawelciurkaardigen Apr 17, 2025
78f920d
don't validate split file for md5
pawelciurkaardigen Apr 17, 2025
a9846b1
Merge remote-tracking branch 'origin/metabuli_build' into metabuli_build
pawelciurkaardigen Apr 17, 2025
df482de
remove whitespaces
pawelciurkaardigen Apr 17, 2025
aab47d9
address few code review comments
pawelciurkaardigen Apr 25, 2025
c4b6fb1
Merge branch 'master' into metabuli_build
pawelciurkaardigen Apr 25, 2025
a0bc3e1
Merge branch 'master' into metabuli_build
jfy133 Apr 28, 2025
8d382cc
replace realpath -s with echo as -s is not available in busybox image
pawelciurkaardigen May 8, 2025
e56e767
populate stub section with output databse files
pawelciurkaardigen May 8, 2025
eae4a0a
populate stub section with output databse files
pawelciurkaardigen May 8, 2025
c1e6b80
Merge branch 'metabuli_build' of github.com:nf-core/modules into meta…
pawelciurkaardigen May 8, 2025
3575fc1
Added a test with two input assemblies, updated fasta input description
pawelciurkaardigen Feb 27, 2026
4a5792b
add --cds-info input handling
pawelciurkaardigen Mar 6, 2026
bb22691
add --cds-info input handling
pawelciurkaardigen Mar 6, 2026
b60d0ad
Apply suggestion from @jfy133
pawelciurkaardigen Mar 6, 2026
3764d52
Update modules/nf-core/metabuli/build/meta.yml
sofstam Mar 12, 2026
5b716aa
Merge branch 'master' into metabuli_build
sofstam Mar 12, 2026
6149ca1
Add topics
Mar 12, 2026
9331317
Update tests
Mar 12, 2026
77d5450
Merge branch 'master' into metabuli_build
sofstam Mar 12, 2026
bcc41f7
Fix linting
Mar 12, 2026
9520585
Fix stub test
Mar 12, 2026
0579aa5
Merge branch 'master' into metabuli_build
sofstam Mar 12, 2026
4656534
Merge branch 'master' into metabuli_build
sofstam Mar 12, 2026
84a4ba1
Update snapshots
Mar 13, 2026
2829908
Update modules/nf-core/metabuli/build/meta.yml
sofstam Mar 13, 2026
0dc43ef
Merge branch 'master' into metabuli_build
sofstam Mar 13, 2026
d794913
Merge branch 'master' into metabuli_build
sofstam Mar 13, 2026
35bb943
Merge branch 'master' into metabuli_build
sofstam Mar 13, 2026
7ce7f54
Fix linting
Mar 13, 2026
70ec4e1
Fix linting
Mar 13, 2026
32344a6
Merge branch 'master' into metabuli_build
sofstam Mar 13, 2026
09a007e
Merge branch 'master' into metabuli_build
sofstam Mar 13, 2026
923df22
Merge branch 'master' into metabuli_build
sofstam Mar 13, 2026
44a2b82
Merge branch 'master' into metabuli_build
sofstam Mar 13, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/metabuli/build/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::metabuli=1.1.1"
58 changes: 58 additions & 0 deletions modules/nf-core/metabuli/build/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
process METABULI_BUILD {
tag "$meta.id"
label 'process_medium'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/metabuli:1.1.1--pl5321h0bb26bb_0':
'biocontainers/metabuli:1.1.1--pl5321h0bb26bb_0' }"

input:
tuple val(meta), path(fasta)
path taxonomy_names, stageAs: 'taxonomy/names.dmp'
path taxonomy_nodes, stageAs: 'taxonomy/nodes.dmp'
path taxonomy_merged, stageAs: 'taxonomy/merged.dmp'
path accession2taxid, stageAs: 'taxonomy/*'
path cds_info

Comment thread
sofstam marked this conversation as resolved.
output:
tuple val(meta), path("$prefix"), emit: db
tuple val("${task.process}"), val('metabuli'), eval('metabuli 2>&1 | awk \'/metabuli Version:/ {print $3}\''), emit: versions_metabuli, topic: versions
Comment thread
sofstam marked this conversation as resolved.

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
Comment thread
sofstam marked this conversation as resolved.
make_merged = taxonomy_merged ? "" : "touch taxonomy/merged.dmp"
Comment thread
sofstam marked this conversation as resolved.
cds_info_arg = cds_info ? "--cds-info cds_info.txt" : ""
Comment thread
sofstam marked this conversation as resolved.
"""
$make_merged
echo $fasta | tr ' ' '\\n' > fasta.txt
echo $cds_info | tr ' ' '\\n' > cds_info.txt

metabuli build \\
"${prefix}" \\
fasta.txt \\
$accession2taxid \\
--taxonomy-path taxonomy \\
--max-ram ${task.memory.toGiga()} \\
Comment thread
pawelciurkaardigen marked this conversation as resolved.
--threads ${task.cpus} \\
${cds_info_arg} \\
$args
"""

stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
Comment thread
sofstam marked this conversation as resolved.
"""
mkdir -p "$prefix"

touch "$prefix/acc2taxid.map"
touch "$prefix/diffIdx"
touch "$prefix/info"
touch "$prefix/split"
touch "$prefix/taxID_list"
touch "$prefix/db.parameters"
"""
}
88 changes: 88 additions & 0 deletions modules/nf-core/metabuli/build/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
name: "metabuli_build"
description: Builds a database for classification with metabuli from FASTA files
and a taxonomy
keywords:
- database
Comment thread
pawelciurkaardigen marked this conversation as resolved.
- taxonomic classification
- classification
- metagenomics
tools:
- "metabuli":
description: "Metabuli: specific and sensitive metagenomic classification via
joint analysis of DNA and amino acid"
homepage: "https://github.com/steineggerlab/Metabuli"
documentation: "https://github.com/steineggerlab/Metabuli"
tool_dev_url: "https://github.com/steineggerlab/Metabuli"
doi: "10.1101/2023.05.31.543018"
licence:
- "GPL v3"
identifier: biotools:metabuli
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
- fasta:
type: file
description: List of fasta files with input assemblies
ontologies: []
- taxonomy_names:
type: file
description: File describing individual members of a taxonomic tree in NCBI
nodes.dmp format
ontologies: []
- taxonomy_nodes:
type: file
description: File describing parent-child relationships of a taxonomic tree
in NCBI nodes.dmp format
ontologies: []
- taxonomy_merged:
type: file
description: Optional input to map old/deprecated TaxID to new ones
ontologies: []
Comment thread
sofstam marked this conversation as resolved.
Outdated
- accession2taxid:
type: directory
description: TSV file (with no header) of first column with mapping
accession (from first part of each fasta entry) and second column the
corresponding TaxID
- cds_info:
type: file
description: List of files to cds files
ontologies: []
Comment thread
sofstam marked this conversation as resolved.
Outdated
output:
db:
- - meta:
type: map
description: Groovy Map containing sample information
- $prefix:
type: directory
description: metabuli database directory for classification
versions_metabuli:
- - ${task.process}:
type: string
description: The name of the process
- metabuli:
type: string
description: The name of the tool
- metabuli 2>&1 | awk '/metabuli Version:/ {print $3}':
type: eval
description: The expression to obtain the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The name of the process
- metabuli:
type: string
description: The name of the tool
- metabuli 2>&1 | awk '/metabuli Version:/ {print $3}':
type: eval
description: The expression to obtain the version of the tool
authors:
- "@pawelciurkaardigen"
- "@MichalStachowiakArdigen"
- "@sofstam"
maintainers:
- "@pawelciurkaardigen"
- "@MichalStachowiakArdigen"
- "@softam"
171 changes: 171 additions & 0 deletions modules/nf-core/metabuli/build/tests/main.nf.test
Comment thread
sofstam marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
nextflow_process {

name "Test Process METABULI_BUILD"
script "../main.nf"
process "METABULI_BUILD"

tag "modules"
tag "modules_nfcore"
tag "metabuli"
tag "metabuli/build"

test("sarscov2 - sarscov2 DNA - no cds info") {

when {
process {
"""
input[0] = [
[ id:'test' ],
[
file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true),
]
]
input[1] = file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true)
input[2] = file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true)
input[3] = []
input[4] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true)
input[5] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path("${process.out.db[0][1]}/acc2taxid.map"),
path("${process.out.db[0][1]}/diffIdx"),
path("${process.out.db[0][1]}/info"),
file("${process.out.db[0][1]}/split").name,
path("${process.out.db[0][1]}/taxID_list"),
file("${process.out.db[0][1]}/db.parameters").name,
Comment thread
sofstam marked this conversation as resolved.
process.out.findAll { key, val -> key.startsWith("versions")}
).match()
}
)
}
}

test("sarscov2 - sarscov2 DNA - two input fasta") {

when {
process {
"""
input[0] = [
[ id:'test' ],
[
file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/prokaryotes/haemophilus_influenzae/genome/genome.fna.gz", checkIfExists: true),
]
]
input[1] = file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true)
input[2] = file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true)
input[3] = []
input[4] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true)
input[5] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path("${process.out.db[0][1]}/acc2taxid.map"),
path("${process.out.db[0][1]}/diffIdx"),
path("${process.out.db[0][1]}/info"),
file("${process.out.db[0][1]}/split").name,
path("${process.out.db[0][1]}/taxID_list"),
file("${process.out.db[0][1]}/db.parameters").name,
process.out.findAll { key, val -> key.startsWith("versions")}
).match()
}
)
}

}

test("sarscov2 - sarscov2 DNA - with cds info") {

when {
process {
"""
input[0] = [
[ id:'test' ],
[
file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true),
]
]
input[1] = file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true)
input[2] = file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true)
input[3] = []
input[4] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true)
input[5] = [
file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path("${process.out.db[0][1]}/acc2taxid.map"),
path("${process.out.db[0][1]}/diffIdx"),
path("${process.out.db[0][1]}/info"),
file("${process.out.db[0][1]}/split").name,
path("${process.out.db[0][1]}/taxID_list"),
file("${process.out.db[0][1]}/db.parameters").name,
process.out.findAll { key, val -> key.startsWith("versions")}
).match()
}
)
}

}

test("sarscov2 - sarscov2 DNA - with cds info - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test' ],
[
file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true),
]
]
input[1] = file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true)
input[2] = file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true)
input[3] = []
input[4] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true)
input[5] = [
file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path("${process.out.db[0][1]}/acc2taxid.map"),
path("${process.out.db[0][1]}/diffIdx"),
path("${process.out.db[0][1]}/info"),
file("${process.out.db[0][1]}/split").name,
path("${process.out.db[0][1]}/taxID_list"),
file("${process.out.db[0][1]}/db.parameters").name,
process.out.versions_metabuli
).match()
}
)
}

}


}
Comment thread
sofstam marked this conversation as resolved.
Loading
Loading