Skip to content

Commit

Permalink
Merge pull request #93 from nf-core/reports
Browse files Browse the repository at this point in the history
Reports
  • Loading branch information
JoseEspinosa authored Dec 20, 2023
2 parents 45daa85 + 134c3f5 commit 5092582
Show file tree
Hide file tree
Showing 29 changed files with 610 additions and 143 deletions.
25 changes: 22 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,27 @@ jobs:
version: "${{ matrix.NXF_VER }}"

- name: Run pipeline with test data
# TODO nf-core: You can customise CI pipeline run tests as required
# For example: adding multiple test runs with different parameters
# Remember that you can parallelise this by using strategy.matrix
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
parameters:
name: Test workflow parameters
if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/multiplesequencealign') }}"
runs-on: ubuntu-latest
strategy:
matrix:
NXF_VER:
- "23.04.0"
- "latest-everything"
steps:
- name: Check out pipeline code
uses: actions/checkout@v4

- name: Install Nextflow
uses: nf-core/setup-nextflow@v1
with:
version: "${{ matrix.NXF_VER }}"

- name: Test workflow parameters
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_align_only,docker --outdir ./results
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ null/
outdir/
.ipynb_checkpoints/
test.ipynb
*reports.tsv
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Initial release of nf-core/multiplesequencealign, created with the [nf-core](htt
[#35](https://github.com/nf-core/multiplesequencealign/issues/35) - Add module MUSCLE5_SUPER5
[#59](https://github.com/nf-core/multiplesequencealign/issues/59) - Add support for passing structure template in samplesheet.
[#77](https://github.com/nf-core/multiplesequencealign/issues/77) - Add module zip
[#77](https://github.com/nf-core/multiplesequencealign/issues/77) - Add module zip
[#93](https://github.com/nf-core/multiplesequencealign/pull/93) - Add multiqc basic support. Add custom params validation. Add basic shiny app.

### `Fixed`

Expand Down
133 changes: 133 additions & 0 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,137 @@ report_section_order:
"nf-core-multiplesequencealign-summary":
order: -1002

report_header_info:
- Application Type: "Muliple Sequence Alignment deplyoment and benchmarking."

export_plots: true

run_modules:
- custom_content

custom_data:
summary_stats:
id: "summary_stats"
section_name: "Summary Stats"
plot_type: "table"
anchor: "summary_stats"
namespace: "summary_stats"
pconfig:
id: "summary_stats"
title: "Summary statistics"

custom_table_header_config:
summary_stats:
perc_sim:
description: "Average percentage similarity between all sequences in the input fasta file."
group: "Fasta"
max: 100
format: "{:,.2f}"
suffix: "%"
fasta:
description: "Input fasta file."
hidden: False
group: "Fasta"
n_sequences:
description: "Number of sequences in the input fasta file."
hidden: False
group: "Fasta"
scale: "Set2"
seqlength_mean:
description: "Average sequence length in the input fasta file."
hidden: False
group: "Fasta"
format: "{:,.1f}"
seqlength_max:
description: "Maximum sequence length in the input fasta file."
hidden: True
group: "Fasta"
format: "{:,.1f}"
seqlength_median:
description: "Median sequence length in the input fasta file."
hidden: True
group: "Fasta"
format: "{:,.1f}"
sp:
description: "Sum of Pairs metric."
hidden: False
group: "Alignment"
format: "{:,.1f}"
max: 100
tc:
description: "Total Column metric."
hidden: False
group: "Alignment"
format: "{:,.1f}"
max: 100
EVALUATED:
description: "EVALUATED metric."
hidden: True
group: "Alignment"
format: "{:,.1f}"
max: 100
APDB:
description: "APDB metric."
hidden: True
group: "Alignment"
format: "{:,.1f}"
iRMSD:
description: "iRMSD metric."
hidden: True
group: "Alignment"
format: "{:,.1f}"
NiRMSD:
description: "NiRMSD metric."
hidden: False
group: "Alignment"
format: "{:,.1f}"
min: 0
tree:
description: "Tree used in the alignment."
hidden: False
group: "Alignment"
scale: "Paired"
args_tree:
description: "Arguments used to build the tree."
hidden: True
group: "Alignment"
args_tree_clean:
description: "Arguments used to build the tree."
hidden: True
group: "Alignment"
aligner:
description: "Aligner used."
hidden: False
group: "Alignment"
scale: "Paired"
args_aligner:
description: "Arguments used to run the aligner."
hidden: True
group: "Alignment"
args_aligner_clean:
description: "Arguments used to run the aligner."
hidden: True
group: "Alignment"

table_columns_placement:
summary_stats:
fasta: 90
tree: 150
args_tree: 170
aligner: 200
args_aligner: 220
n_sequences: 250
seqlength_mean: 280
seqlength_median: 310
seqlength_max: 340
perc_sim: 370
sp: 400
tc: 430
EVALUATED: 470
APDB: 500
iRMSD: 530
NiRMSD: 560

sp:
summary_stats:
fn: "summary_stats_eval_multiqc_table.csv"
1 change: 1 addition & 0 deletions assets/samplesheet.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
id,fasta,reference,structures,template
seatoxin-ref,test-dataset/setoxin-ref.fa,test-dataset/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz,
toxin-ref,test-dataset/toxin-ref.fa,test-dataset/toxin.ref,,
3 changes: 0 additions & 3 deletions assets/samplesheet_copy.csv

This file was deleted.

12 changes: 9 additions & 3 deletions assets/toolsheet.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
tree,args_tree,aligner,args_aligner
FAMSA,"-gt upgma -parttree",FAMSA,""
"","",MAFFT,"--anysymbol --quiet --dpparttree"

FAMSA,-gt upgma -parttree,FAMSA,
,,MAFFT,--anysymbol --quiet --dpparttree
,,MUSCLE5,
,,LEARNMSA,
,,KALIGN,
CLUSTALO,,CLUSTALO,
,,TCOFFEE,
,,3DCOFFEE,-method TMalign_pair
,,REGRESSIVE,-reg_nseq 3
10 changes: 0 additions & 10 deletions assets/tooslheet_example.csv

This file was deleted.

14 changes: 7 additions & 7 deletions shiny_app/app.py → bin/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@

# Load file
# ----------------------------------------------------------------------------
summary_report = "./outdir/summary_report/evaluation_summary_report.csv"
stats_report = "./outdir/stats/stats_summary_report.csv"
summary_report = "./shiny_data.csv"

summary_df = pd.read_csv(summary_report)
stats_df = pd.read_csv(stats_report)
try:
inputfile = pd.read_csv(summary_report)
except:
print("ERROR: file not found: ", summary_report)
sys.exit(1)

cols_to_merge = ["id"]

inputfile = summary_df.merge(stats_df, on=cols_to_merge, how="left")
# ----------------------------------------------------------------------------

options = {item: item for item in list(inputfile.columns)}
options_color = {"align": "assembly", "tree": "tree"}
options_color = {"aligner": "assembly", "tree": "tree"}
options_eval = {
"sp": "sum of pairs (SP)",
"n_sequences": "# sequences",
Expand Down
10 changes: 9 additions & 1 deletion bin/calc_seqstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
fasta_file = sys.argv[2]
outfile = sys.argv[3]
outfile_summary = sys.argv[4]
outfile_mqc = sys.argv[5]


def get_seq_lengths(fasta_file):
Expand All @@ -16,7 +17,10 @@ def get_seq_lengths(fasta_file):
name, sequence = fasta.id, str(fasta.seq)
l = len(sequence)
name = name.replace("/", "_")
entry = pd.DataFrame([{"id": fam_name, "sequence": name, "sequence length": l}])
# entry = pd.DataFrame([{"id": fam_name, "sequence": name, "sequence length": l}])
entry = pd.DataFrame([{"id": fam_name, "sequence length": l}])
# count number of sequences per sequence length
# entry = entry.groupby(by=["id", "sequence length"]).size().reset_index(name="count")
summary = pd.concat([summary, entry], ignore_index=True)
return summary

Expand All @@ -29,7 +33,11 @@ def get_seq_lengths(fasta_file):
)
stats_df["n_sequences"] = len(summary_lengths)
stats_df.rename(columns={"mean": "seqlength_mean", "max": "seqlength_max", "median": "seqlength_median"}, inplace=True)
nseq_mqc = stats_df[["id", "n_sequences"]].drop_duplicates()


summary_lengths.to_csv(outfile, sep=",", index=False)
stats_df.to_csv(outfile_summary, sep=",", index=False)
# save mqc file with no column names
# append to file
nseq_mqc.to_csv(outfile_mqc, sep="\t", index=False, header=False)
46 changes: 46 additions & 0 deletions bin/prep_multiqc_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env python

import csv
import argparse
import sys
import pandas as pd


def prep_table(input, output):
df = pd.read_csv(input, sep=",")
# make nan values "null"
df = df.rename(columns={"id": "fasta"})
# run id as first column
# replace in all rows the word null with default

# add column with index as integer
df["id"] = df.index + 1
# make it int
df["id"] = df["id"].astype(int)
# make it the first column
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]
df.to_csv(output, index=False)


def parse_args(argv=None):
"""Define and immediately parse command line arguments."""
parser = argparse.ArgumentParser(description="--")
parser.add_argument(
"-i",
)

parser.add_argument(
"-o",
)
return parser.parse_args(argv)


def main(argv=None):
args = parse_args(argv)
prep_table(args.i, args.o)


if __name__ == "__main__":
sys.exit(main())
Loading

0 comments on commit 5092582

Please sign in to comment.