diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index 7c2dc0d..9411ebc 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -13,5 +13,4 @@ jobs: id: release with: release-type: simple - release-as: v.1.2.0 - last-release-sha: a8ad46421b7b64c54f7401fa5e95ad9324eee890 \ No newline at end of file + release-as: 1.2.1 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 126eb87..934c0f0 100644 --- a/.gitignore +++ b/.gitignore @@ -25,12 +25,15 @@ !.gitattributes !Dockerfile !README.md +!MANIFEST.IN +!setup.py workflow/*.err workflow/*.out workflow/report.html workflow/kGWASflow-report.html workflow/*.sh -!kGWASflow.py +!kgwasflow +!kgwasflow/* !environment.yaml !.test !.test/config_ecoli diff --git a/CHANGELOG.md b/CHANGELOG.md index 0165d31..879a1b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## [1.2.1](https://github.com/akcorut/kGWASflow/compare/v1.2.0...v1.2.1) (2023-04-24) + + +### Features + +* add kGWASflow cli ([da57735](https://github.com/akcorut/kGWASflow/commit/da57735cf38cd5623edc69fd940f7642dc301d3f)) +* add python cli support ([56ca35c](https://github.com/akcorut/kGWASflow/commit/56ca35c7c6f6b91497d7f320f58708fc0ef83680)) +* add setup and manifest ([d15224a](https://github.com/akcorut/kGWASflow/commit/d15224a6cc5e54f6a16bab48a009c373a5fb0579)) + + +### Bug Fixes + +* fixed a typo ([bcc6b70](https://github.com/akcorut/kGWASflow/commit/bcc6b70265b9ff049a332dd09cf92b5c7b8f2085)) + ## [1.2.0](https://github.com/akcorut/kGWASflow/compare/v1.0.0...v1.2.0) (2023-04-17) diff --git a/MANIFEST.IN b/MANIFEST.IN new file mode 100644 index 0000000..7364ab7 --- /dev/null +++ b/MANIFEST.IN @@ -0,0 +1,4 @@ +include MANIFEST.in +include LICENSE +recursive-include workflow * +include workflow/config_test.yaml \ No newline at end of file diff --git a/README.md b/README.md index e83300e..1fe55f2 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ A modular, flexible and reproducible Snakemake workflow to perform k-mers-based GWAS. -[![Snakemake](https://img.shields.io/badge/snakemake-≥7.18-blue.svg)](https://snakemake.github.io) +[![Snakemake](https://img.shields.io/badge/snakemake-≥7.25-blue.svg)](https://snakemake.github.io) [![GitHub actions status](https://github.com/akcorut/kGWASflow/workflows/Tests/badge.svg?branch=main)](https://github.com/akcorut/kGWASflow/actions) ![License](https://img.shields.io/badge/license-MIT-blue.svg) [![DOI](https://zenodo.org/badge/421139649.svg)](https://zenodo.org/badge/latestdoi/421139649) @@ -126,13 +126,11 @@ ___________ ## Citation -If you use kGWASflow in your research, please cite using the DOI: [10.5281/zenodo.7290926](https://doi.org/10.5281/zenodo.7290926 ) and the original method paper by [Voichek et al. (2020)](https://www.nature.com/articles/s41588-020-0612-7): +If you use kGWASflow in your research, please cite using the DOI: [10.5281/zenodo.7834410](https://doi.org/10.5281/zenodo.7834410) and the original method paper by [Voichek et al. (2020)](https://www.nature.com/articles/s41588-020-0612-7): -> Kivanc Corut. akcorut/kGWASflow: v1.0.0. (2022). -> https://doi.org/10.5281/zenodo.7290926 +* Kivanc Corut. akcorut/kGWASflow: v1.2.0. (2023). https://doi.org/10.5281/zenodo.7834410 -> Voichek, Y., Weigel, D. Identifying genetic variants underlying phenotypic variation in plants without complete genomes. -> Nat Genet 52, 534–540 (2020). https://doi.org/10.1038/s41588-020-0612-7 +* Voichek, Y., Weigel, D. Identifying genetic variants underlying phenotypic variation in plants without complete genomes. Nat Genet 52, 534–540 (2020). https://doi.org/10.1038/s41588-020-0612-7 ## License kGWASflow is licensed under the [MIT](LICENSE.md) license. diff --git a/config/config.yaml b/config/config.yaml index dc63761..05d3d2c 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -2,8 +2,8 @@ # !!! Required !!! # Paths to the samples.tsv and phenos.tsv # For more information: https://github.com/akcorut/kGWASflow/tree/main/config -samples: ../config/samples.tsv -phenotypes: ../config/phenos.tsv +samples: config/samples.tsv +phenotypes: config/phenos.tsv # ================================================================================================= # Reference Genome diff --git a/kGWASflow.py b/kGWASflow.py deleted file mode 100755 index 0189a69..0000000 --- a/kGWASflow.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import os - - -# Parse command line arguments -parser = argparse.ArgumentParser(description='Run kGWASflow') -parser.add_argument('-c', '--config-file', default='config/config.yaml', help='Path to the config.yaml file (default: config.yaml)') -# parser.add_argument('--output', type=str, help='Path to output directory') -parser.add_argument('-t', '--threads', type=int, default=1, help='Number of threads') -parser.add_argument('--conda-frontend', type=str, default='conda', help='Conda frontend to use') -parser.add_argument('-n', '--dryrun', action='store_true', help='Dry run', required=False) -parser.add_argument('--samples', type=str, help='Path to samples.tsv file', required=False) -parser.add_argument('--phenotypes', type=str, help='Path to phenos.tsv file', required=False) -parser.add_argument('-r', '--generate-report', action='store_true', help='create an HTML report', required=False) -parser.add_argument('-v', '--verbose', action='store_true', help='Increase output verbosity', required=False) -args = parser.parse_args() - -# if config file is not specified, use the default config file -if args.config_file is None: - args.config_file = 'config/config.yaml' - -# Define the command to run snakemake -cmd = f'snakemake --use-conda --conda-frontend {args.conda_frontend} --cores {args.threads} --rerun-triggers mtime --rerun-incomplete --configfile {args.config_file}' - -if args.dryrun: - cmd += ' --dryrun' - -# Add the --report flag if specified -if args.generate_report: - if args.dryrun: - cmd += ' --report kGWASflow-report.html' - if not args.dryrun: - cmd += ' --dryrun --report kGWASflow-report.html' - -if args.verbose: - cmd += ' --verbose' - -if __name__ == "__main__": - # Run the command - os.system(cmd) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..9e2e148 --- /dev/null +++ b/setup.py @@ -0,0 +1,36 @@ +import os +from setuptools import setup, find_packages + +# Get the long description from the README file +setup_dir = os.path.abspath(os.path.dirname(__file__)) +with open(os.path.join(setup_dir, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + +setup( + name="kgwasflow", + version="1.2.0", + python_requires=">3.10", + description="kGWASflow is a Snakemake workflow for performing k-mers-based GWAS.", + long_description=long_description, + long_description_content_type='text/markdown', + url="https://github.com/akcorut/kGWASflow", + author="Adnan Kivanc Corut", + keywords="k-mers GWAS genomics snakemake", + license="MIT", + packages=find_packages(), + package_data={'workflow': [ + "config/*", + "test/config_ecoli/*", + "test/data/ecoli_ref/*", + "test/data/ecoli_phenos/*"]}, + include_package_data= True, + entry_points={ + "console_scripts": [ + "kgwasflow = workflow.kgwasflow:main", + ], + }, + install_requires=[ + "snakemake==7.25.0", + "click" + ], +) \ No newline at end of file diff --git a/workflow/Snakefile b/workflow/Snakefile index 3b35a17..48ca848 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -10,7 +10,7 @@ min_version("7.25.0") # ========================================================================================================= # Load config file -configfile: "../config/config.yaml" +configfile: os.path.join(workflow.basedir, "config", "config.yaml") # validate(config, schema="../schemas/config.schema.yaml") diff --git a/workflow/__init__.py b/workflow/__init__.py new file mode 100644 index 0000000..c68196d --- /dev/null +++ b/workflow/__init__.py @@ -0,0 +1 @@ +__version__ = "1.2.0" diff --git a/workflow/cli_utils.py b/workflow/cli_utils.py new file mode 100644 index 0000000..6718ee7 --- /dev/null +++ b/workflow/cli_utils.py @@ -0,0 +1,116 @@ +import sys +import os +import subprocess +import click +import logging + +# Get the directory path of this file +base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +workflow_dir = os.path.join(base_dir, "workflow") + +logging.basicConfig(level=logging.INFO) + +def get_snakefile(file="Snakefile"): + snake_file = os.path.join(workflow_dir, file) + if not os.path.exists(snake_file): + sys.exit("Unable to locate the Snakefile; tried %s" % snake_file) + return snake_file + +def get_configfile(file="config.yaml"): + config_file = os.path.join(workflow_dir, "config", file) + if not os.path.exists(config_file): + sys.exit("Unable to locate the config.yaml file; tried %s" % config_file) + return config_file + +def show_help_message(): + message = ( + "\nUsage examples:\n" + "\n kgwasflow run [OPTIONS] Run the kGWASflow workflow\n" + "\n kgwasflow test [OPTIONS] Run the kGWASflow test\n" + "\n kgwasflow --help" + "\n\nRun examples:" + "\n\n1. Run kGWASflow with the default config file (../config/config.yaml), default snakemake arguments and 16 threads:\n" + "\n kgwasflow run -t 16 --snake-default" + "\n\n2. Run kGWASflow with a custom config file (path/to/custom_config.yaml) and default settings:\n" + "\n kgwasflow run -t 16 -c path/to/custom_config.yaml" + "\n\n3. Run kGWASflow with user defined output directory:\n" + "\n kgwasflow run -t 16 --output path/to/output_dir" + "\n\n4. Run kGWASflow in dryrun mode to see what tasks would be executed without actually running them:\n" + "\n kgwasflow run -t 16 -n" + "\n\n5. Run kGWASflow using mamba as the conda frontend:\n" + "\n kgwasflow run -t 16 --conda-frontend mamba" + "\n\n6. Run kGWASflow and generate an HTML report:\n" + "\n kgwasflow run -t 16 --generate-report" + "\n\nTest examples:" + "\n\n1. Run the kGWASflow test in dryrun mode to see what tasks would be executed:\n" + "\n kgwasflow test -t 16 -n" + "\n\n2. Run the kGWASflow test using the test config file with 16 threads:\n" + "\n kgwasflow test -t 16" + "\n\n3. Run the kGWASflow test and define the test output folder:\n" + "\n kgwasflow test -t 16 --output path/to/test_output_dir" + ) + return message + + +def show_ascii_art(): + click.echo(""" + \b + _ _______ __ _____ __ _ + | | / ____\ \ / /\ / ____|/ _| | + | | _| | __ \ \ /\ / / \ | (___ | |_| | _____ __ + | |/ / | |_ | \ \/ \/ / /\ \ \___ \| _| |/ _ \ \ /\ / / + | <| |__| | \ /\ / ____ \ ____) | | | | (_) \ V V / + |_|\_\\_____| \/ \/_/ \_\_____/|_| |_|\___/ \_/\_/ + \b + kGWASflow: A Snakemake Workflow for k-mers Based GWAS + """) + +def run_snake(snakefile, config_file, threads, output, conda_frontend, dryrun, generate_report, snake_default, rerun_triggers, verbose, unlock, snakemake_args): + # Define the command to run snakemake + cmd = ['snakemake', '--use-conda', '--conda-frontend', conda_frontend, '--cores', str(threads)] + + if snakefile: + cmd += ['--snakefile', snakefile] + + # if config file is provided, use it + if config_file: + cmd += ['--configfile', config_file] + + # if output directory is provided, use it + if output: + if not os.path.exists(output): + os.makedirs(output) + cmd += ['--directory', output] + + if dryrun: + cmd.append('--dryrun') + + if generate_report: + if dryrun: + cmd.append('--report') + cmd.append('kGWASflow-report.html') + if not dryrun: + cmd.append('--dryrun') + cmd.append('--report') + cmd.append('kGWASflow-report.html') + + if snakemake_args: + cmd += snakemake_args + + if rerun_triggers: + cmd += ['--rerun-triggers'] + list(rerun_triggers) + + if snake_default: + default_snakemake_args = ["--rerun-incomplete", "--printshellcmds", "--nolock", "--show-failed-logs"] + cmd += default_snakemake_args + + if verbose: + cmd.append('--verbose') + + if unlock: + cmd.append('--unlock') + + try: + subprocess.run(cmd, check=True) + except subprocess.CalledProcessError as e: + logging.error("Error running Snakemake: {}".format(e)) \ No newline at end of file diff --git a/workflow/config b/workflow/config new file mode 120000 index 0000000..3ca249e --- /dev/null +++ b/workflow/config @@ -0,0 +1 @@ +../config \ No newline at end of file diff --git a/workflow/kgwasflow.py b/workflow/kgwasflow.py new file mode 100755 index 0000000..b6a4b10 --- /dev/null +++ b/workflow/kgwasflow.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +import click +import os + +from workflow import __version__ +from .cli_utils import run_snake, get_snakefile, get_configfile, show_ascii_art, show_help_message, workflow_dir, base_dir + +# kgwasflow_dir = os.path.join(os.getcwd()) + +@click.group(context_settings=dict(help_option_names=["-h", "--help"])) +@click.version_option(__version__) +def cli(): + """kGWASflow is a Snakemake workflow for performing k-mers-based GWAS. + \b + For more options, run: + kgwasflow --help + kgwasflow run --help + or, + kgwasflow test --help""" + pass + +def common_options(func): + """Decorator for common options.""" + options = [ + click.option('-s', '--snakefile', type=click.Path(dir_okay=True, writable=True, resolve_path=True), help='Path to the Snakefile.'), + click.option('-c', '--config-file', type=click.Path(dir_okay=True, writable=True, resolve_path=True), help='Path to the config.yaml file'), + click.option('-t', '--threads', default=8, type=int, help='Number of threads (default: 8).', show_default=True), + click.option('-o', '--output', help="Output directory.", type=click.Path(dir_okay=True, writable=True, readable=True)), + click.option('--conda-frontend', default='conda', type=str, help='Conda frontend to use.'), + click.option('-n', '--dryrun', is_flag=True, default=False, show_default=True, help='Dry run. Do not execute the workflow, but show which jobs would be executed.'), + click.option('-r', '--generate-report', is_flag=True, default=False, help='Create a kGWASflow HTML report.', show_default=True), + click.option('--snake-default', is_flag=True, default=False, help='Useful default snakemake arguments.', show_default=True), + click.option('--rerun-triggers', multiple=True, default= ["mtime", "params", "input", "software-env", "code"], help='Rerun all jobs that have at least one of the specified trigger files changed.', show_default=True), + click.option('--unlock', is_flag=True, help='Unlock the workflow if it is locked.'), + click.option('-v', '--verbose', is_flag=True, default=False, help='Increase output verbosity.'), + click.argument("snakemake_args", nargs=-1, type=click.UNPROCESSED), + ] + for option in reversed(options): + func = option(func) + return func + +@click.command(epilog=show_help_message(), context_settings=dict(help_option_names=["-h", "--help"], ignore_unknown_options=True)) +@common_options +def run(snakefile, config_file, **kwargs): + """Run kGWASflow workflow.""" + if not snakefile: + snakefile = get_snakefile() + if not config_file: + config_file = get_configfile() + run_snake(snakefile, config_file, **kwargs) + +@common_options +@click.command(epilog=show_help_message(), context_settings=dict(help_option_names=["-h", "--help"], ignore_unknown_options=True)) +def test(snakefile, config_file, **kwargs): + """Test kGWASflow workflow.""" + if not snakefile: + snakefile = get_snakefile() + + test_config_file = os.path.join(workflow_dir, "test", "config_ecoli", "config.yaml") + if not config_file: + config_file = test_config_file + run_snake(snakefile, config_file, **kwargs) + +cli.add_command(run) +cli.add_command(test) + +def main(): + show_ascii_art() + cli() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/workflow/report/workflow.rst b/workflow/report/workflow.rst index f200f71..ed28694 100644 --- a/workflow/report/workflow.rst +++ b/workflow/report/workflow.rst @@ -16,6 +16,6 @@ Read trimming were performed using `Cutadapt `_. If you use **kGWASflow**, please **cite**: -* Kivanc Corut. akcorut/kGWASflow: v1.1.0. (2023). https://doi.org/10.5281/zenodo.7574506 +* Kivanc Corut. akcorut/kGWASflow: v1.2.0. (2023). https://doi.org/10.5281/zenodo.7290926 * Voichek, Y., Weigel, D. Identifying genetic variants underlying phenotypic variation in plants without complete genomes. Nat Genet 52, 534–540 (2020). https://doi.org/10.1038/s41588-020-0612-7 \ No newline at end of file diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 6f652ad..470f6d7 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -80,7 +80,7 @@ wildcard_constraints: # Pipeline User Output # ================================================================================================= -kgwasflow_version = "v1.1.0" +kgwasflow_version = "v1.2.1" kgwasflow_author = "Adnan Kivanc Corut" date_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") snake_version = snakemake.__version__ @@ -121,7 +121,7 @@ logger.info(" phenotypic variation in plants without complete genomes. logger.info(" Nat Genet 52, 534–540 (2020). https://doi.org/10.1038/s41588-020-0612-7 ") logger.info("") logger.info(" * Kivanc Corut. akcorut/kGWASflow: v1.2.0. (2023). ") -logger.info(" https://doi.org/10.5281/zenodo.7574506 ") +logger.info(" https://doi.org/10.5281/zenodo.7834410 ") logger.info("") logger.info("# ================================================================================== #") logger.info("") diff --git a/workflow/test b/workflow/test new file mode 120000 index 0000000..6413384 --- /dev/null +++ b/workflow/test @@ -0,0 +1 @@ +../.test \ No newline at end of file