diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..4869dd0 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,50 @@ +cff-version: 1.2.0 +title: 'Workflomics: Bioinformatics Workflow Generation and Benchmarking' +message: >- + If you use this software, please cite it using the + metadata from this file. +type: software +authors: + - given-names: Vedran + family-names: Kasalica + email: v.kasalica@esciencecenter.nl + affiliation: Netherlands eScience Center, Netherlands + orcid: 'https://orcid.org/0000-0002-0097-1056' + - family-names: Nauman + given-names: Ahmed + email: n.ahmed@esciencecenter.nl + affiliation: Netherlands eScience Center, Netherlands + orcid: 'https://orcid.org/0000-0003-3559-9941' + - family-names: Peter + given-names: Kok + email: p.kok@esciencecenter.nl + affiliation: Netherlands eScience Center, Netherlands + orcid: 'https://orcid.org/0000-0002-6630-7326' + - family-names: Anna-Lena + given-names: Lamprecht + email: anna-lena.lamprecht@uni-potsdam.de + affiliation: University of Potsdam, Germany + orcid: 'https://orcid.org/0000-0003-1953-5606' + - family-names: Magnus + given-names: Palmblad + email: n.m.palmblad@lumc.nl + affiliation: Leiden University Medical Center, Netherlands + orcid: 'https://orcid.org/0000-0002-5865-8994' +identifiers: + - type: doi + value: 10.5281/zenodo.10047136 +repository-code: 'https://github.com/workflomics/workflomics-frontend' +url: 'https://github.com/workflomics/workflomics-frontend' +abstract: >- + The Workflomics platform aims to address the challenge faced by life science researchers who work with increasingly large and complex datasets and struggle to create optimal workflows for their data analysis problems. + + The platform facilitates a "Great Bake Off" of computational workflows in bioinformatics by integrating bioinformatics tools and metadata with technologies for automated workflow exploration and benchmarking. This enables a systematic and rigorous approach to the development of cutting-edge workflows, specifically in the field of proteomics, to increase scientific quality, robustness, reproducibility, FAIRness, and maintainability. + + The platform currently focuses on the proteomics domain. We aim to extend the platform to additional domains, e.g., metabolomics, genomics. +keywords: + - bioinformatics + - workflow benchmarking +license: Apache-2.0 +commit: bb02154d29104b2667006e12d37239a3d382b75d +version: 0.1.1 +date-released: '2023-11-12' \ No newline at end of file diff --git a/README.md b/README.md index c155dcf..ceeb63e 100644 --- a/README.md +++ b/README.md @@ -1,57 +1,72 @@ # Workflomics Benchmarker -Library used to execute workflows (in CWL) and benchmark them as part of the Workflomics ecosystem. +**Workflomics Benchmarker** is a versatile library designed for executing and benchmarking workflows encapsulated in Common Workflow Language (CWL) within the Workflomics ecosystem. -## Credits +Detailed user documentation is available on [readthedocs](https://workflomics.readthedocs.io/en/latest/workflomics-benchmarker/benchmarker.html). + +## Badges + +| Description | Badge | +|:------------|:------| +| **Packages and Releases** | ![Latest release](https://img.shields.io/github/release/workflomics/workflomics-benchmarker.svg) [![PyPI](https://img.shields.io/pypi/v/workflomics-benchmarker.svg)](https://pypi.python.org/pypi/workflomics-benchmarker/) | +| **License** | [![GitHub license](https://img.shields.io/github/license/workflomics/workflomics-benchmarker)](https://github.com/workflomics/workflomics-benchmarker/blob/main/LICENSE) | + -The `workflomics benchmarker` script was developed by [Nauman Ahmed](@nahmedraja) as part of the [containers](https://github.com/Workflomics/containers) repository, but was since migrated to its own repository (see [PR #49](https://github.com/Workflomics/containers/pull/49)) to be published as a stand-alone package. ## Requirements -- Python 3.9+ -- Poetry -- Docker or Singularity running +- Python 3.9 or higher +- Docker or Singularity +- Poetry (if you want to build the package from source) ## Installation +Install `workflomics-benchmarker` from PyPI using pip: + ```bash -poetry install +pip install workflomics-benchmarker ``` +Alternatively, you clone the repository and can install it using Poetry by running: + +```bash +git clone https://github.com/workflomics/workflomics-benchmarker.git +cd workflomics-benchmarker +poetry install +``` ## Usage -The command is used with Docker or Singularity service running. It will execute the workflow and benchmark it. +Ensure Docker or Singularity is running before executing workflows. Here are the commands for both services: ### Docker ```bash -workflomics benchmark tests/data/ +workflomics benchmark tests/data/ ``` -which is equivalent to +Or directly with Python: ```bash -python src/benchmarker/workflomics.py benchmark tests/data/ +python src/benchmarker/workflomics.py benchmark tests/data/ ``` -The results will be stored in the `./tests/data` folder. - +The results will be saved in the `./tests/data` directory. ### Singularity -Finally, you can run the test with Singularity. This will require you to have Singularity installed and running, and to use the `--singularity` flag. +To use Singularity, ensure it's installed and append the `--singularity` flag: ```bash -python src/benchmarker/workflomics.py benchmark tests/data/ --singularity +python src/benchmarker/workflomics.py benchmark tests/data/ --singularity ``` ## Testing -To run the tests, you can use the following command: +Run the following command to execute tests: ```bash -poetry run pytest -s +poetry run pytest -s ``` -The tests will execute a workflow and benchmark it (require Docker running). The results will be stored in the `./tests/data` folder. \ No newline at end of file +This command runs a workflow and benchmarks it, assuming Docker is operational. Results are stored in the `./tests/data` directory. diff --git a/pyproject.toml b/pyproject.toml index 9ae850c..fca3a34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,9 +2,9 @@ name = "workflomics-benchmarker" version = "0.1.0" description = "Library used to execute workflows (in CWL) and benchmark them as part of the Workflomics ecosystem." -authors = ["Nauman Ahmed ", - "Peter Kok ", - "Vedran Kasalica "] +authors = ["Vedran Kasalica ", + "Nauman Ahmed ", + "Peter Kok "] license = "Apache-2.0" classifiers = [ "License :: OSI Approved :: Apache Software License", diff --git a/src/workflomics_benchmarker/cwltool_runtime_benchmark.py b/src/workflomics_benchmarker/cwltool_runtime_benchmark.py index 37afe05..2a101e8 100644 --- a/src/workflomics_benchmarker/cwltool_runtime_benchmark.py +++ b/src/workflomics_benchmarker/cwltool_runtime_benchmark.py @@ -34,8 +34,8 @@ class CWLToolRuntimeBenchmark(CWLToolWrapper): "1001+": 0, } WARNINGS_DESIRABILITY_BINS = { - "0-1": 0, - "2-3": -0.25, + "0-0": 0, + "1-3": -0.25, "4-5": -0.5, "6-7": -0.75, "8+": -1, @@ -150,8 +150,6 @@ def run_workflow(self, workflow) -> None: entry["memory"] = "N/A" entry["warnings"] = "N/A" entry["errors"] = "N/A" - print("success_steps") - print(success_steps) for ( step ) in ( @@ -229,13 +227,13 @@ def aggregate_workflow_benchmark_value(self, benchmark_name) -> int | Literal[" The value of the benchmark. """ value: int = 0 - for entry in self.workflow_benchmark_result["steps"]: + for index, entry in enumerate(self.workflow_benchmark_result["steps"], start=1): match benchmark_name: case "status": if entry[benchmark_name] != "✗" and entry[benchmark_name] != "-": value = "✓" else: - return "✗" + return f"({index-1}/{len(self.workflow_benchmark_result['steps'])}) ✗" case "time": if entry[benchmark_name] != "N/A": value = value + entry["time"] @@ -272,10 +270,10 @@ def calc_desirability(self, benchmark_name, value): case "status": if value == "✓": return 1 - elif value == "✗": - return -1 - else: + elif value == "-": return 0 + else: + return -1 case "errors": if isinstance(value, list): value = len(value) diff --git a/src/workflomics_benchmarker/cwltool_wrapper.py b/src/workflomics_benchmarker/cwltool_wrapper.py index 464ce65..928637e 100644 --- a/src/workflomics_benchmarker/cwltool_wrapper.py +++ b/src/workflomics_benchmarker/cwltool_wrapper.py @@ -5,6 +5,7 @@ import sys from workflomics_benchmarker.loggingwrapper import LoggingWrapper +from workflomics_benchmarker.utils import natural_keys class CWLToolWrapper(): """ The class contains the common methods for the benchmarking and running CWL workflows.""" @@ -31,12 +32,11 @@ def __init__(self, args): self.verbose = args.verbose if hasattr(args, 'verbose') else False - self.workflows = [str(file) for file in Path(args.workflows).glob('*.cwl')] + self.workflows = sorted([str(file) for file in Path(args.workflows).glob('*.cwl')], key=natural_keys) self.version = self.check_cwltool() self.input = self.update_input_yaml(self.input_yaml_path) - def check_cwltool(self): """Check if cwltool is installed and return the version""" try: diff --git a/src/workflomics_benchmarker/utils.py b/src/workflomics_benchmarker/utils.py new file mode 100644 index 0000000..dbeabe9 --- /dev/null +++ b/src/workflomics_benchmarker/utils.py @@ -0,0 +1,9 @@ + +import re + + +def natural_keys(text): + ''' + Function to convert text to lower case and digits to integer + ''' + return [int(c) if c.isdigit() else c.lower() for c in re.split('(\d+)', text)] \ No newline at end of file