-
Notifications
You must be signed in to change notification settings - Fork 153
BIgMAG compatibility #861
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BIgMAG compatibility #861
Changes from all commits
105a51b
5e430e3
56a200d
0168b46
5abd21b
926d01c
f5b4502
83ae6a0
7ef95af
99e5bf8
38e48b2
22517cd
9ca66ae
568abc5
d34d2ad
ed196db
0f3476d
bb864fa
07a0f3e
f6e0fa5
658f63f
2ce8180
a670f5e
be41a8a
bb198c3
a65a4ef
af7cbc3
2e5011c
7906c2b
fa84fcf
8e58f35
5c3535a
e6b272b
c148f2d
547222d
819a5a6
c942602
a54d008
dab159e
a6429c5
4a05fa9
97840fd
bfb0e37
6201615
a20cf8e
cb2863a
9d82e0f
0bc2b1d
4806731
d959f3c
d31346d
7d1a105
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| #!/usr/bin/env python | ||
|
|
||
| ## Originally written by Jeferyd Yepes and released under the MIT license. | ||
| ## See git repository (https://github.com/nf-core/mag) for full license text. | ||
|
|
||
| import pandas as pd | ||
| import re | ||
| import argparse | ||
| import sys | ||
| import warnings | ||
|
|
||
| def parse_args(args=None): | ||
| parser = argparse.ArgumentParser() | ||
| parser.add_argument("-s", "--summary", metavar="FILE", help="Pipeline summary file.") | ||
| parser.add_argument("-g", "--gunc_summary", metavar="FILE", help="GUNC summary file.") | ||
|
|
||
| parser.add_argument( | ||
| "-o", | ||
| "--out", | ||
| required=True, | ||
| metavar="FILE", | ||
| type=argparse.FileType("w"), | ||
| help="Output file containing final bigmag summary.", | ||
| ) | ||
| return parser.parse_args(args) | ||
|
|
||
|
|
||
| def main(args=None): | ||
| args = parse_args(args) | ||
|
|
||
| if ( | ||
| not args.summary | ||
| and not args.gunc_summary | ||
| ): | ||
| sys.exit( | ||
| "No summary specified! " | ||
| "Please specify the pipeline summary and the GUNC summary." | ||
| ) | ||
|
|
||
| df_summary = pd.read_csv(args.summary, sep='\t') | ||
| df_summary.columns = df_summary.columns.str.replace(r'(_busco|_checkm2|_checkm|_gtdbtk|_gunc|_quast)$', '', regex=True) | ||
| for i in range(len(df_summary["bin"])): | ||
| name = df_summary["bin"][i] | ||
| name = re.sub(r'\.(fa|fasta)(\..*)?$', '', name) | ||
| df_summary.at[i,"bin"] = name | ||
| df_summary = df_summary.sort_values(by='bin') | ||
| df_summary["bin"] = df_summary["bin"].astype(str) | ||
|
|
||
| df_gunc = pd.read_csv(args.gunc_summary, sep='\t') | ||
| df_gunc["genome"] = df_gunc["genome"].astype(str) | ||
| df_gunc = df_gunc.sort_values(by='genome') | ||
|
|
||
| df_summary = pd.merge(df_summary, df_gunc, left_on='bin', right_on='genome', how='left') | ||
|
|
||
| df_summary.rename(columns={'bin': 'Bin'}, inplace=True) | ||
| columns_to_remove = ['Name', "genome", 'Input_file', 'Assembly', 'Bin Id'] | ||
| df_summary = df_summary.drop(columns=columns_to_remove, errors="ignore") | ||
|
|
||
| df_summary['sample'] = None | ||
| for f in range(len(df_summary["Bin"])): | ||
| match = re.search(r'^.*?-.*?-(.*)$', df_summary["Bin"][f]) | ||
| if match: | ||
| name = match.group(1) | ||
| name = re.sub(r'\.(unbinned|noclass)(\..*)?$', '', name) | ||
| name = re.sub(r'\.\d+(\.[^.]+)?$', '', name) | ||
| df_summary.at[f,"sample"] = name | ||
|
|
||
| df_summary.to_csv(args.out, sep="\t", index=True) | ||
|
|
||
| if __name__ == "__main__": | ||
| sys.exit(main()) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
| channels: | ||
| - conda-forge | ||
| - bioconda | ||
| dependencies: | ||
| - conda-forge::python=3.10.6 | ||
| - conda-forge::pandas=1.4.3 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| process BIGMAG { | ||
|
|
||
| conda "conda-forge::pandas=1.4.3" | ||
| container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container | ||
| ? 'https://depot.galaxyproject.org/singularity/pandas:1.4.3' | ||
| : 'biocontainers/pandas:1.4.3'}" | ||
|
|
||
| input: | ||
| path summary | ||
| path gunc_sum | ||
|
|
||
| output: | ||
| path "bigmag_summary.tsv", emit: bigmag_summary | ||
| path "versions.yml" , emit: versions | ||
|
|
||
| when: | ||
| task.ext.when == null || task.ext.when | ||
|
|
||
| script: | ||
| def args = task.ext.args ?: '' | ||
| def summary = summary.sort().size() > 0 ? "--summary ${summary}" : "" | ||
|
jeffe107 marked this conversation as resolved.
|
||
| def gunc_summary = gunc_sum.sort().size() > 0 ? "--gunc_summary ${gunc_sum}" : "" | ||
| """ | ||
| bigmag_summary.py \ | ||
| ${args} \ | ||
| ${summary} \ | ||
|
jeffe107 marked this conversation as resolved.
|
||
| ${gunc_summary} \ | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering now why we don't have GUNC already in the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then all you need to do is rename the columns etc. |
||
| --out bigmag_summary.tsv | ||
|
|
||
| cat <<-END_VERSIONS > versions.yml | ||
| "${task.process}": | ||
| python: \$(python --version 2>&1 | sed 's/Python //g') | ||
| pandas: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('pandas').version)") | ||
| END_VERSIONS | ||
| """ | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.