Skip to content

Commit

Permalink
Merge branch 'bump_v0.2.9' into 'dev'
Browse files Browse the repository at this point in the history
Version bump

See merge request epi2melabs/workflows/wf-single-cell!124
  • Loading branch information
nrhorner committed Oct 18, 2023
2 parents ba12bef + e6d1e69 commit e828589
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 10 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
## [v0.2.9]
- Make `prepare_report_data` process more memory-efficient

## [v0.2.8]
Expand Down
58 changes: 58 additions & 0 deletions bin/workflow_glue/check_bam_headers_in_dir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Check (u)BAM files for `@SQ` lines whether they are the same in all headers."""

from pathlib import Path
import sys

import pysam

from .util import get_named_logger, wf_parser # noqa: ABS101


def get_sq_lines(xam_file):
"""Extract the `@SQ` lines from the header of a XAM file."""
return pysam.AlignmentFile(xam_file, check_sq=False).header["SQ"]


def main(args):
"""Run the entry point."""
logger = get_named_logger("checkBamHdr")

if not args.input_path.is_dir():
raise ValueError(f"Input path '{args.input_path}' must be a directory.")

target_files = list(args.input_path.glob("*"))
if not target_files:
raise ValueError(f"No files found in input directory '{args.input_path}'.")
# Loop over target files and check if there are `@SQ` lines in all headers or not.
# Set `is_unaligned` accordingly. If there are mixed headers (either with some files
# containing `@SQ` lines and some not or with different files containing different
# `@SQ` lines), set `mixed_headers` to `True`.
first_sq_lines = None
mixed_headers = False
for xam_file in target_files:
sq_lines = get_sq_lines(xam_file)
if first_sq_lines is None:
# this is the first file
first_sq_lines = sq_lines
else:
# this is a subsequent file; check with the first `@SQ` lines
if sq_lines != first_sq_lines:
mixed_headers = True
break

# we set `is_unaligned` to `True` if there were no mixed headers and the last file
# didn't have `@SQ` lines (as we can then be sure that none of the files did)
is_unaligned = not mixed_headers and not sq_lines
# write `is_unaligned` and `mixed_headers` out so that they can be set as env.
# variables
sys.stdout.write(
f"IS_UNALIGNED={int(is_unaligned)};MIXED_HEADERS={int(mixed_headers)}"
)
logger.info(f"Checked (u)BAM headers in '{args.input_path}'.")


def argparser():
"""Argument parser for entrypoint."""
parser = wf_parser("check_bam_headers")
parser.add_argument("input_path", type=Path, help="Path to target directory")
return parser
17 changes: 9 additions & 8 deletions lib/ingress.nf
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def xam_ingress(Map arguments)


process checkBamHeaders {
label "fastq_ingress"
label "ingress"
label "wf_common"
cpus 1
input: tuple val(meta), path("input_dir/reads*.bam")
Expand All @@ -254,7 +254,7 @@ process checkBamHeaders {


process mergeBams {
label "fastq_ingress"
label "ingress"
label "wf_common"
cpus 3
input: tuple val(meta), path("input_bams/reads*.bam")
Expand All @@ -268,7 +268,7 @@ process mergeBams {


process catSortBams {
label "fastq_ingress"
label "ingress"
label "wf_common"
cpus 4
input: tuple val(meta), path("input_bams/reads*.bam")
Expand All @@ -282,7 +282,7 @@ process catSortBams {


process sortBam {
label "fastq_ingress"
label "ingress"
label "wf_common"
cpus 3
input: tuple val(meta), path("reads.bam")
Expand All @@ -295,7 +295,8 @@ process sortBam {


process bamstats {
label "fastq_ingress"
label "ingress"
label "wf_common"
cpus 3
input:
tuple val(meta), path("reads.bam")
Expand Down Expand Up @@ -410,7 +411,7 @@ def watch_path(Path input, Map margs, ArrayList extensions) {


process move_or_compress_fq_file {
label "fastq_ingress"
label "ingress"
label "wf_common"
cpus 1
input:
Expand All @@ -435,7 +436,7 @@ process move_or_compress_fq_file {


process fastcat {
label "fastq_ingress"
label "ingress"
label "wf_common"
cpus 3
input:
Expand Down Expand Up @@ -734,7 +735,7 @@ def get_sample_sheet(Path sample_sheet, ArrayList required_sample_types) {
*/
process validate_sample_sheet {
cpus 1
label "fastq_ingress"
label "ingress"
label "wf_common"
input:
path "sample_sheet.csv"
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ manifest {
description = 'Identification of cell- and UMI barcodes from single-cell sequencing.'
mainScript = 'main.nf'
nextflowVersion = '>=23.04.2'
version = '0.2.8'
version = '0.2.9'
}

epi2melabs {
Expand Down

0 comments on commit e828589

Please sign in to comment.