From 910fbc054f1c316eb3a76ad3743b6f7732abb73d Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 6 Apr 2024 21:17:15 -0700 Subject: [PATCH] Addressing flake8 issues: Fixing long lines, running black for format, adding noqa and a comment when it seems appropriate --- ribotricer/cli.py | 23 +++++++++++--------- ribotricer/common.py | 11 +++++++--- ribotricer/const.py | 2 +- ribotricer/count_orfs.py | 44 +++++++++++++++++++++++++++++++------- ribotricer/detect_orfs.py | 43 ++++++++++++++++++++++++++++--------- ribotricer/metagene.py | 2 +- ribotricer/orf.py | 3 ++- ribotricer/orf_seq.py | 10 ++++----- ribotricer/plotting.py | 5 +++-- ribotricer/prepare_orfs.py | 2 +- ribotricer/utils.py | 2 +- 11 files changed, 104 insertions(+), 43 deletions(-) diff --git a/ribotricer/cli.py b/ribotricer/cli.py index 2700bf2..23cb6ee 100644 --- a/ribotricer/cli.py +++ b/ribotricer/cli.py @@ -49,7 +49,7 @@ def cli(): pass -###################### prepare-orfs function ######################################### +# prepare-orfs function ######################################### @cli.command( "prepare-orfs", context_settings=CONTEXT_SETTINGS, @@ -110,7 +110,7 @@ def prepare_orfs_cmd( prepare_orfs(gtf, fasta, prefix, min_orf_length, start_codons, stop_codons, longest) -###################### detect-orfs function ######################################### +# detect-orfs function ######################################### @cli.command( "detect-orfs", context_settings=CONTEXT_SETTINGS, @@ -200,7 +200,8 @@ def prepare_orfs_cmd( @click.option( "--meta-min-reads", type=int, - default=META_MIN_READS, + # ADS: META_MIN_READS was detected as undefined by flake8 + default=META_MIN_READS, # noqa: F821 show_default=True, help="Minimum number of reads for a read length to be considered", ) @@ -228,7 +229,7 @@ def detect_orfs_cmd( if read_lengths is not None: try: read_lengths = [int(x.strip()) for x in read_lengths.strip().split(",")] - except: + except Exception: sys.exit("Error: cannot convert read_lengths into integers") if not all([x > 0 for x in read_lengths]): sys.exit("Error: read length must be positive") @@ -237,8 +238,10 @@ def detect_orfs_cmd( sys.exit("Error: psite_offsets only allowed when read_lengths is provided") if read_lengths is not None and psite_offsets is not None: try: - psite_offsets = [int(x.strip()) for x in psite_offsets.strip().split(",")] - except: + psite_offsets = [ + int(x.strip()) for x in psite_offsets.strip().split(",") + ] + except Exception: sys.exit("Error: cannot convert psite_offsets into integers") if len(read_lengths) != len(psite_offsets): sys.exit("Error: psite_offsets must match read_lengths") @@ -265,7 +268,7 @@ def detect_orfs_cmd( ) -###################### count-orfs function ######################################### +# count-orfs function ######################################### @cli.command( "count-orfs", context_settings=CONTEXT_SETTINGS, @@ -307,7 +310,7 @@ def count_orfs_cmd(ribotricer_index, detected_orfs, features, out, report_all): count_orfs(ribotricer_index, detected_orfs, features, out, report_all) -###################### count-orfs-codon function ######################################### +# count-orfs-codon function ######################################### @cli.command( "count-orfs-codon", context_settings=CONTEXT_SETTINGS, @@ -367,7 +370,7 @@ def count_orfs_codon_cmd( ) -###################### orfs-seq function ######################################### +# orfs-seq function ######################################### @cli.command( "orfs-seq", context_settings=CONTEXT_SETTINGS, @@ -396,7 +399,7 @@ def orf_seq_cmd(ribotricer_index, fasta, saveto, protein): orf_seq(ribotricer_index, fasta, saveto, protein) -###################### learn-cutoff function ######################################### +# learn-cutoff function ######################################### @cli.command( "learn-cutoff", context_settings=CONTEXT_SETTINGS, diff --git a/ribotricer/common.py b/ribotricer/common.py index be0a93d..feaf274 100644 --- a/ribotricer/common.py +++ b/ribotricer/common.py @@ -50,7 +50,9 @@ def is_read_uniq_mapping(read): return False else: sys.stdout.write( - "WARNING: ribotricer was unable to detect any tags for determining multimapping status. All the reads will be treated as uniquely mapping\n" + "WARNING: ribotricer was unable to detect any tags for " + "determining multimapping status. All the reads will be " + "treated as uniquely mapping\n" ) @@ -76,7 +78,9 @@ def merge_intervals(intervals): intervals[i].end, intervals[i].strand, ) - while i + 1 < len(intervals) and intervals[i + 1].start <= to_merge.end: + while ( + i + 1 < len(intervals) and intervals[i + 1].start <= to_merge.end + ): to_merge.end = max(to_merge.end, intervals[i + 1].end) i += 1 merged_intervals.append(to_merge) @@ -124,6 +128,7 @@ def collapse_coverage_to_codon(coverage): Coverage collapsed to codon level """ codon_coverage = [ - sum(coverage[current : current + 3]) for current in range(0, len(coverage), 3) + sum(coverage[current: current + 3]) + for current in range(0, len(coverage), 3) ] return codon_coverage diff --git a/ribotricer/const.py b/ribotricer/const.py index 5a49f72..8620b85 100644 --- a/ribotricer/const.py +++ b/ribotricer/const.py @@ -30,4 +30,4 @@ # defined as the number of reads per unit length of the ORF MINIMUM_DENSITY_OVER_ORF = 0.0 # Minimum number of reads for a read length to be considered -META_MIN_READS=100000 +META_MIN_READS = 100000 diff --git a/ribotricer/count_orfs.py b/ribotricer/count_orfs.py index 9a908cd..9ca46f5 100644 --- a/ribotricer/count_orfs.py +++ b/ribotricer/count_orfs.py @@ -21,7 +21,9 @@ import pandas as pd -def count_orfs(ribotricer_index, detected_orfs, features, outfile, report_all=False): +def count_orfs( + ribotricer_index, detected_orfs, features, outfile, report_all=False +): """ Parameters ---------- @@ -57,7 +59,11 @@ def count_orfs(ribotricer_index, detected_orfs, features, outfile, report_all=Fa # do not output 'nontranslating' events unless report_all is set if status != "nontranslating" or report_all: intervals = orf_index[oid].intervals - coor = [x for iv in intervals for x in range(iv.start, iv.end + 1)] + coor = [ + x + for iv in intervals + for x in range(iv.start, iv.end + 1) + ] if strand == "-": coor = coor[::-1] profile_stripped = profile.strip()[1:-1].split(", ") @@ -105,7 +111,9 @@ def count_orfs_codon( if True, all coverages will be exported """ orf_index = {} - fasta_df = pd.read_csv(ribotricer_index_fasta, sep="\t").set_index("ORF_ID") + fasta_df = pd.read_csv(ribotricer_index_fasta, sep="\t").set_index( + "ORF_ID" + ) read_counts = defaultdict(dict) with open(ribotricer_index, "r") as fin: # Skip header @@ -126,9 +134,15 @@ def count_orfs_codon( # do not output 'nontranslating' events unless report_all is set if status != "nontranslating" or report_all: intervals = orf_index[oid].intervals - coor = [x for iv in intervals for x in range(iv.start, iv.end + 1)] + coor = [ + x + for iv in intervals + for x in range(iv.start, iv.end + 1) + ] codon_coor = [ - x for iv in intervals for x in range(iv.start, iv.end + 1, 3) + x + for iv in intervals + for x in range(iv.start, iv.end + 1, 3) ] if strand == "-": coor = coor[::-1] @@ -158,7 +172,17 @@ def count_orfs_codon( # Output count table with open("{}_genewise.tsv".format(prefix), "w") as fout: fout.write( - "gene_id\tcodon\tvalues\tmean_codon_coverage\tmedian_codon_coverage\tvar_codon_coverage\tcodon_occurences\ttotal_codon_coverage\n" + "\t".join( + "gene_id", + "codon", + "values", + "mean_codon_coverage", + "median_codon_coverage", + "var_codon_coverage", + "codon_occurences", + "total_codon_coverage", + ) + + "\n" ) for gene_id, codon_seq in sorted(read_counts): values = list(read_counts[gene_id, codon_seq].values()) @@ -183,12 +207,16 @@ def count_orfs_codon( fout_df["per_codon_enrichment(total/n_occur)"] = ( fout_df["total_codon_coverage"] / fout_df["codon_occurences"] ) - fout_df["-log10_relative_enrichment(per_codon/total_gene_coverage)"] = -np.log10( + fout_df[ + "-log10_relative_enrichment(per_codon/total_gene_coverage)" + ] = -np.log10( fout_df["per_codon_enrichment(total/n_occur)"] / fout_df.groupby("gene_id")["total_codon_coverage"].transform("sum") ) # Overwrite - fout_df.to_csv("{}_genewise.tsv".format(prefix), sep="\t", index=False, header=True) + fout_df.to_csv( + "{}_genewise.tsv".format(prefix), sep="\t", index=False, header=True + ) # Remove infs fout_df = fout_df.replace([np.inf, -np.inf], np.nan) fout_df = fout_df.dropna() diff --git a/ribotricer/detect_orfs.py b/ribotricer/detect_orfs.py index d5dd940..14af3bb 100644 --- a/ribotricer/detect_orfs.py +++ b/ribotricer/detect_orfs.py @@ -174,7 +174,10 @@ def orf_coverage(orf, alignments, offset_5p=0, offset_3p=0): except KeyError: coverage.append(0) else: - if strand in alignments and (chrom, pos) in alignments[strand]: + if ( + strand in alignments + and (chrom, pos) in alignments[strand] + ): coverage.append(alignments[strand][(chrom, pos)]) else: coverage.append(0) @@ -266,7 +269,9 @@ def export_orf_coverages( valid_codons_ratio = valid_codons / n_codons # total reads in the ORF divided by the length orf_density = np.sum(codon_coverage) / n_codons - codon_coverage_exceeds_min = codon_coverage >= min_reads_per_codon + codon_coverage_exceeds_min = ( + codon_coverage >= min_reads_per_codon + ) status = ( "translating" if ( @@ -322,7 +327,9 @@ def export_wig(merged_alignments, prefix): if chrom != cur_chrom: cur_chrom = chrom to_write += "variableStep chrom={}\n".format(chrom) - to_write += "{}\t{}\n".format(pos, merged_alignments[strand][(chrom, pos)]) + to_write += "{}\t{}\n".format( + pos, merged_alignments[strand][(chrom, pos)] + ) if strand == "+": fname = "{}_pos.wig".format(prefix) else: @@ -380,7 +387,11 @@ def detect_orfs( # parse the index file now = datetime.datetime.now() - print(now.strftime("%b %d %H:%M:%S ... started parsing ribotricer index file")) + print( + now.strftime( + "%b %d %H:%M:%S ... started parsing ribotricer index file" + ) + ) annotated, refseq = parse_ribotricer_index(ribotricer_index) # create directory @@ -391,7 +402,8 @@ def detect_orfs( now = datetime.datetime.now() print( "{} ... {}".format( - now.strftime("%b %d %H:%M:%S"), "started inferring experimental design" + now.strftime("%b %d %H:%M:%S"), + "started inferring experimental design", ) ) protocol = infer_protocol(bam, refseq, prefix) @@ -400,13 +412,16 @@ def detect_orfs( # split bam file into strand and read length now = datetime.datetime.now() print(now.strftime("%b %d %H:%M:%S ... started reading bam file")) - alignments, read_length_counts = split_bam(bam, protocol, prefix, read_lengths) + alignments, read_length_counts = split_bam( + bam, protocol, prefix, read_lengths + ) # plot read length distribution now = datetime.datetime.now() print( "{} ... {}".format( - now.strftime("%b %d %H:%M:%S"), "started plotting read length distribution" + now.strftime("%b %d %H:%M:%S"), + "started plotting read length distribution", ) ) plot_read_lengths(read_length_counts, prefix) @@ -419,13 +434,20 @@ def detect_orfs( "started calculating metagene profiles. This may take a long time...", ) ) - metagenes = metagene_coverage(annotated, alignments, read_length_counts, prefix, meta_min_reads = meta_min_reads) + metagenes = metagene_coverage( + annotated, + alignments, + read_length_counts, + prefix, + meta_min_reads=meta_min_reads, + ) # plot metagene profiles now = datetime.datetime.now() print( "\n{} ... {}".format( - now.strftime("%b %d %H:%M:%S"), "started plotting metagene profiles" + now.strftime("%b %d %H:%M:%S"), + "started plotting metagene profiles", ) ) plot_metagene(metagenes, read_length_counts, prefix) @@ -435,7 +457,8 @@ def detect_orfs( now = datetime.datetime.now() print( "{} ... {}".format( - now.strftime("%b %d %H:%M:%S"), "started inferring P-site offsets" + now.strftime("%b %d %H:%M:%S"), + "started inferring P-site offsets", ) ) psite_offsets = align_metagenes( diff --git a/ribotricer/metagene.py b/ribotricer/metagene.py index 10b76c2..a3e77d0 100644 --- a/ribotricer/metagene.py +++ b/ribotricer/metagene.py @@ -281,7 +281,7 @@ def align_metagenes( xcorr = np.correlate(reference, cov, "full") origin = len(xcorr) // 2 bound = min(base, length) - xcorr = xcorr[origin - bound : origin + bound] + xcorr = xcorr[(origin - bound):(origin + bound)] lag = np.argmax(xcorr) - len(xcorr) // 2 psite_offsets[length] = lag + TYPICAL_OFFSET to_write += "\tlag of {}: {}\n".format(length, lag) diff --git a/ribotricer/orf.py b/ribotricer/orf.py index 505567a..bd5848b 100644 --- a/ribotricer/orf.py +++ b/ribotricer/orf.py @@ -89,7 +89,8 @@ def from_string(cls, line): ) ) return None - oid = fields[0] + # ADS: oid below is not used + oid = fields[0] # noqa F841 category = fields[1] tid = fields[2] ttype = fields[3] diff --git a/ribotricer/orf_seq.py b/ribotricer/orf_seq.py index 95798a2..0f61e3c 100644 --- a/ribotricer/orf_seq.py +++ b/ribotricer/orf_seq.py @@ -92,7 +92,7 @@ def translate_nt_to_aa(seq): protein = "" if len(seq) % 3 == 0: for i in range(0, len(seq), 3): - codon = seq[i : i + 3] + codon = seq[i: i + 3] if "N" in codon: protein += "X" elif codon not in codon_table: @@ -142,10 +142,10 @@ def orf_seq(ribotricer_index, genome_fasta, saveto, translate=False): if translate: if len(seq) % 3 != 0: sys.stderr.write( - "WARNING: Sequence length with ORF ID '{}' is not a multiple of three. Output sequence might be truncated.\n".format( - orf_id - ) + "WARNING: Sequence length with ORF ID '{orf_id}' is not " + "a multiple of three. Output sequence might be " + "truncated.\n" ) - seq = seq[0 : (len(seq) // 3) * 3] + seq = seq[0: (len(seq) // 3) * 3] seq = translate_nt_to_aa(seq) fh.write("{}\t{}\n".format(orf_id, seq)) diff --git a/ribotricer/plotting.py b/ribotricer/plotting.py index 2f09471..6674afb 100644 --- a/ribotricer/plotting.py +++ b/ribotricer/plotting.py @@ -17,8 +17,9 @@ import matplotlib matplotlib.use("Agg") -import matplotlib.pyplot as plt -from matplotlib.backends.backend_pdf import PdfPages +# ADS: verify that matplotlib.use("Agg") must precede imports below +import matplotlib.pyplot as plt # noqa E402 +from matplotlib.backends.backend_pdf import PdfPages # noqa E402 matplotlib.rcParams["pdf.fonttype"] = 42 matplotlib.rcParams["ps.fonttype"] = 42 diff --git a/ribotricer/prepare_orfs.py b/ribotricer/prepare_orfs.py index 7c9a878..f9f07f8 100644 --- a/ribotricer/prepare_orfs.py +++ b/ribotricer/prepare_orfs.py @@ -205,7 +205,7 @@ def search_orfs(fasta, intervals, min_orf_length, start_codons, stop_codons, lon ) seq = merged_seq[start:idx] leader = merged_seq[:start] - trailer = merged_seq[idx + 3 :] + trailer = merged_seq[idx + 3:] if ivs: orfs.append((ivs, seq, leader, trailer)) if longest: diff --git a/ribotricer/utils.py b/ribotricer/utils.py index eb8793f..1de07b2 100644 --- a/ribotricer/utils.py +++ b/ribotricer/utils.py @@ -400,7 +400,7 @@ def translate(seq): protein = "" if len(seq) % 3 == 0: for i in range(0, len(seq), 3): - codon = seq[i : i + 3] + codon = seq[i: i + 3] protein += CODON_TO_AA[codon] return protein