Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ jobs:
if [[ "${{ matrix.tags }}" == "test_motus" ]]; then
wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py
python downloadDB.py --no-download-progress
echo 'tool,db_name,db_params,db_path' > 'database_motus.csv'
echo "motus,db_mOTU,,db_mOTU" >> 'database_motus.csv'
echo 'tool,db_name,db_params,db_type,db_path' > 'database_motus.csv'
echo "motus,db1_mOTU,,short,db_mOTU" >> 'database_motus.csv'
echo "motus,db2_mOTU,,long,db_mOTU" >> 'database_motus.csv'
echo "motus,db3_mOTU,,short;long,db_mOTU" >> 'database_motus.csv'
nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --databases ./database_motus.csv --outdir ./results_${{ matrix.tags }};
else
nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --outdir ./results_${{ matrix.tags }};
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`

- [#417](https://github.com/nf-core/taxprofiler/pull/417) - Added reference-free metagenome estimation with Nonpareil (added by @jfy133)
- [#466](https://github.com/nf-core/taxprofiler/pull/466) - Input database sheets now require a `db_type` column to distinguish between short- and long-read databases

## v1.1.8dev - Augmented Akita Patch []

Expand Down
6 changes: 6 additions & 0 deletions assets/schema_database.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@
"errorMessage": "Invalid database db_params entry. No quotes allowed.",
"meta": ["db_params"]
},
"db_type": {
"type": "string",
"enum": ["short", "long", "short;long"],
"default": "short;long",
"meta": ["db_type"]
},
"db_path": {
"type": "string",
"exists": true,
Expand Down
59 changes: 40 additions & 19 deletions subworkflows/local/profiling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -60,26 +60,47 @@ workflow PROFILING {
COMBINE READS WITH POSSIBLE DATABASES
*/

// e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':true], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
// Separate default 'short;long' (when necessary) databases when short/long specified in database sheet
ch_dbs = databases
.map{
meta_db, db ->
[ [meta_db.db_type.split(";")].flatten(), meta_db, db]
}
.transpose(by: 0)
.map{
type, meta_db, db ->
[[type: type], meta_db.subMap(meta_db.keySet() - 'db_type') + [type: type], db]
}

// Join short and long reads with their corresponding short/long database
// Note that for not-specified `short;long`, it will match with the database.
// E.g. if there is no 'long' reads the above generted 'long' database channel element
// will have nothing to join to and will be discarded
// Final output: [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]

ch_input_for_profiling = reads
.map {
meta, reads ->
[meta + [id: "${meta.id}${meta.single_end ? '_se' : '_pe'}"], reads]
}
.combine(databases)
.branch {
centrifuge: it[2]['tool'] == 'centrifuge'
diamond: it[2]['tool'] == 'diamond'
kaiju: it[2]['tool'] == 'kaiju'
kraken2: it[2]['tool'] == 'kraken2' || it[2]['tool'] == 'bracken' // to reuse the kraken module to produce the input data for bracken
krakenuniq: it[2]['tool'] == 'krakenuniq'
malt: it[2]['tool'] == 'malt'
metaphlan: it[2]['tool'] == 'metaphlan'
motus: it[2]['tool'] == 'motus'
kmcp: it[2]['tool'] == 'kmcp'
ganon: it[2]['tool'] == 'ganon'
unknown: true
}
.map{
meta, reads ->
[[type: meta.type], meta, reads]
}
.combine(ch_dbs, by: 0)
.map{
db_type, meta, reads, db_meta, db ->
[ meta, reads, db_meta, db ]
}
.branch { meta, reads, db_meta, db ->
centrifuge: db_meta.tool == 'centrifuge'
diamond: db_meta.tool == 'diamond'
kaiju: db_meta.tool == 'kaiju'
kraken2: db_meta.tool == 'kraken2' || db_meta.tool == 'bracken' // to reuse the kraken module to produce the input data for bracken
krakenuniq: db_meta.tool == 'krakenuniq'
malt: db_meta.tool == 'malt'
metaphlan: db_meta.tool == 'metaphlan'
motus: db_meta.tool == 'motus'
kmcp: db_meta.tool == 'kmcp'
ganon: db_meta.tool == 'ganon'
unknown: true
}

/*
PREPARE PROFILER INPUT CHANNELS & RUN PROFILING
Expand Down
9 changes: 6 additions & 3 deletions workflows/taxprofiler.nf
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,13 @@ workflow TAXPROFILER {
}
.branch { meta, run_accession, instrument_platform, fastq_1, fastq_2, fasta ->
fastq: meta.single_end || fastq_2
return [ meta, fastq_2 ? [ fastq_1, fastq_2 ] : [ fastq_1 ] ]
return [ meta + [ type: "short" ], fastq_2 ? [ fastq_1, fastq_2 ] : [ fastq_1 ] ]
nanopore: instrument_platform == 'OXFORD_NANOPORE'
meta.single_end = true
return [ meta, [ fastq_1 ] ]
return [ meta + [ type: "long" ], [ fastq_1 ] ]
fasta: meta.is_fasta
meta.single_end = true
return [ meta, [ fasta ] ]
return [ meta + [ type: "short" ], [ fasta ] ]
}

// Merge ch_input.fastq and ch_input.nanopore into a single channel
Expand All @@ -150,6 +150,9 @@ workflow TAXPROFILER {
// Validate and decompress databases
ch_dbs_for_untar = databases
.branch { db_meta, db_path ->
if ( !db_meta.db_type ) {
db_meta = db_meta + [ db_type: "short;long" ]
}
untar: db_path.name.endsWith( ".tar.gz" )
skip: true
}
Expand Down