Skip to content

Commit

Permalink
Refactor output handling to use 'info' instead of 'print' and remove …
Browse files Browse the repository at this point in the history
…unused code
  • Loading branch information
kclem committed Nov 20, 2024
1 parent 94db5fc commit 91f4840
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 125 deletions.
4 changes: 2 additions & 2 deletions CRISPResso2/CRISPRessoAggregateCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def main():
|/--\ \_| \_| | \ |_ \_| /--\ | |_ |
|__________________________________|
'''
print(CRISPRessoShared.get_crispresso_header(description, aggregate_string))
info(CRISPRessoShared.get_crispresso_header(description, aggregate_string))

parser = argparse.ArgumentParser(description="Aggregate CRISPResso2 Runs")
parser.add_argument("-p", "--prefix", action='append', help="Prefix for CRISPResso folders to aggregate (may be specified multiple times)", default=[])
Expand Down Expand Up @@ -903,7 +903,7 @@ def main():
process_pool.shutdown()

info('Analysis Complete!', {'percent_complete': 100})
print(CRISPRessoShared.get_crispresso_footer())
info(CRISPRessoShared.get_crispresso_footer())
sys.exit(0)

except Exception as e:
Expand Down
6 changes: 3 additions & 3 deletions CRISPResso2/CRISPRessoBatchCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,11 @@ def main():
||__)/--\| \__| ||
|_________________|
'''
print(CRISPRessoShared.get_crispresso_header(description, batch_string))
info(CRISPRessoShared.get_crispresso_header(description, batch_string))

# if no args are given, print a simplified help message
if len(sys.argv) == 1:
print(CRISPRessoShared.format_cl_text('usage: CRISPRessoBatch [-bs BATCH_SETTINGS] [-n NAME]\n' + \
raise CRISPRessoShared.BadParameterException(CRISPRessoShared.format_cl_text('usage: CRISPRessoBatch [-bs BATCH_SETTINGS] [-n NAME]\n' + \
'commonly-used arguments:\n' + \
'-h, --help show the full list of arguments\n' + \
'-v, --version show program\'s version number and exit\n' + \
Expand Down Expand Up @@ -984,7 +984,7 @@ def main():
CRISPRessoShared.zip_results(path_value[1])
else:
CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)
print(CRISPRessoShared.get_crispresso_footer())
info(CRISPRessoShared.get_crispresso_footer())
sys.exit(0)

except Exception as e:
Expand Down
37 changes: 12 additions & 25 deletions CRISPResso2/CRISPRessoCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,24 @@
(c) 2020 The General Hospital Corporation. All Rights Reserved.
'''

import sys
running_python3 = False
if sys.version_info > (3, 0):
running_python3 = True

import argparse
from collections import Counter
from copy import deepcopy
from concurrent.futures import ProcessPoolExecutor, wait
from functools import partial
import errno
import gzip
import json
import zipfile
import logging
import os
import re
import sys
import subprocess as sb
import traceback
from multiprocessing import Process
import zipfile

from collections import Counter
from copy import deepcopy
from concurrent.futures import ProcessPoolExecutor, wait
from datetime import datetime
from functools import partial
from multiprocessing import Process

from CRISPResso2 import CRISPRessoCOREResources
from CRISPResso2.CRISPRessoReports import CRISPRessoReport
Expand All @@ -41,15 +39,6 @@
from CRISPResso2 import CRISPResso2Align
from CRISPResso2 import CRISPRessoMultiProcessing

from datetime import datetime
present = datetime.now()
#d1 = datetime.strptime('21/07/2019','%d/%m/%Y')
#if present > d1:
# print('\nYour version of CRISPResso2 is out of date. Please download a new version.\n')
# sys.exit(1)

import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.addHandler(CRISPRessoShared.LogStreamHandler())
Expand Down Expand Up @@ -433,7 +422,6 @@ def get_new_variant_object(args, fastq_seq, refs, ref_names, aln_matrix, pe_scaf
if args.prime_editing_pegRNA_scaffold_seq and 'Prime-edited' in best_match_names: #any scaffold extensions must be closer to the prime-edited sequence
pe_read_possible_scaffold_loc = new_variant['variant_Prime-edited']['ref_positions'].index(pe_scaffold_dna_info[0]-1) + 1
if new_variant['variant_Prime-edited']['aln_seq'][pe_read_possible_scaffold_loc:(pe_read_possible_scaffold_loc+len(pe_scaffold_dna_info[1]))] == pe_scaffold_dna_info[1]:
# print('comparingHERE ' + new_variant['variant_Prime-edited']['aln_seq'][pe_read_possible_scaffold_loc:(pe_read_possible_scaffold_loc+len(pe_scaffold_dna_info[1])+5)] + ' from ' + new_variant['variant_Prime-edited']['aln_seq'] + ' and ' + new_variant['variant_Prime-edited']['aln_ref'])
new_variant['aln_ref_names'] = ["Scaffold-incorporated"]
new_variant['class_name'] = "Scaffold-incorporated"
old_payload = deepcopy(new_variant['variant_Prime-edited']) #keep prime-edited allele and alignment
Expand Down Expand Up @@ -1382,11 +1370,11 @@ def print_stacktrace_if_debug():
start_time_string = start_time.strftime('%Y-%m-%d %H:%M:%S')
description = ['~~~CRISPResso 2~~~', '-Analysis of genome editing outcomes from deep sequencing data-']
header = CRISPRessoShared.get_crispresso_header(description=description, header_str=None)
print(header)
info(header)

# if no args are given, print a simplified help message
if len(sys.argv) == 1:
print(CRISPRessoShared.format_cl_text('usage: CRISPResso [-r1 FASTQ_R1] [-r2 FASTQ_R2] [-a AMPLICON_SEQ] [-g GUIDE_SEQ] [-n NAME]\n' + \
raise CRISPRessoShared.BadParameterException(CRISPRessoShared.format_cl_text('usage: CRISPResso [-r1 FASTQ_R1] [-r2 FASTQ_R2] [-a AMPLICON_SEQ] [-g GUIDE_SEQ] [-n NAME]\n' + \
'commonly-used arguments:\n' + \
'-h, --help show the full list of arguments\n' + \
'-v, --version show program\'s version number and exit\n' + \
Expand All @@ -1396,7 +1384,6 @@ def print_stacktrace_if_debug():
'-g GUIDE_SEQ Guide sequence (default: None)\n' + \
'-n NAME, --name NAME Name for the analysis (default: name based on input file name)'
))
sys.exit()


arg_parser = CRISPRessoShared.getCRISPRessoArgParser("Core")
Expand Down Expand Up @@ -5135,7 +5122,7 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq):
CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)

info('Analysis Complete!', {'percent_complete': 100})
print(CRISPRessoShared.get_crispresso_footer())
info(CRISPRessoShared.get_crispresso_footer())

sys.exit(0)

Expand Down
4 changes: 2 additions & 2 deletions CRISPResso2/CRISPRessoCompareCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def main():
|___________________________|
'''
compare_header = CRISPRessoShared.get_crispresso_header(description, compare_header)
print(compare_header)
info(compare_header)

parser = CRISPRessoShared.getCRISPRessoArgParser("Compare", parser_title = 'CRISPRessoCompare Parameters')

Expand Down Expand Up @@ -455,7 +455,7 @@ def get_plot_title_with_ref_name(plotTitle, refName):
CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)

info('Analysis Complete!', {'percent_complete': 100})
print(CRISPRessoShared.get_crispresso_footer())
info(CRISPRessoShared.get_crispresso_footer())
sys.exit(0)

except Exception as e:
Expand Down
4 changes: 2 additions & 2 deletions CRISPResso2/CRISPRessoMetaCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def main():
| |_| |_| |_| |_|____ |_| |_| |_| |
|________________________________________|
'''
print(CRISPRessoShared.get_crispresso_header(description, meta_string))
info(CRISPRessoShared.get_crispresso_header(description, meta_string))

parser = CRISPRessoShared.getCRISPRessoArgParser("Meta", parser_title = 'CRISPRessoMeta Parameters')

Expand Down Expand Up @@ -366,7 +366,7 @@ def main():
crispresso2Meta_info_file, crispresso2_info,
)
info('Analysis Complete!', {'percent_complete': 100})
print(CRISPRessoShared.get_crispresso_footer())
info(CRISPRessoShared.get_crispresso_footer())
sys.exit(0)

except Exception as e:
Expand Down
83 changes: 27 additions & 56 deletions CRISPResso2/CRISPRessoPooledCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,28 +67,36 @@ def is_exe(fpath):


def check_samtools():
"""
Assert that samtools is installed
"""

cmd_path=which('samtools')
if cmd_path:
return True
else:
sys.stdout.write('\nCRISPRessoPooled requires samtools')
sys.stdout.write('\n\nPlease install samtools and add it to your path following the instructions at: http://www.htslib.org/download/')
return False
raise CRISPRessoShared.InstallationException('CRISPRessoPooled requires samtools\nPlease install samtools and add it to your path following the instructions at: http://www.htslib.org/download/')

def check_bowtie2():
"""
Assert that bowtie2 is installed
"""

cmd_path1=which('bowtie2')
cmd_path2=which('bowtie2-inspect')

if cmd_path1 and cmd_path2:
return True
else:
sys.stdout.write('\nCRISPRessoPooled requires Bowtie2!')
sys.stdout.write('\n\nPlease install Bowtie2 and add it to your path following the instructions at: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#obtaining-bowtie-2')
return False
raise CRISPRessoShared.InstallationException('\nCRISPRessoPooled requires Bowtie2!\nPlease install Bowtie2 and add it to your path following the instructions at: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#obtaining-bowtie-2')

def print_full_pandas_df(x):
"""
Print the full pandas dataframe (no clipping of rows or columns)
def print_full(x):
Args:
x (pd.DataFrame): The dataframe to print
"""
pd.set_option('display.max_rows', len(x))
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 2000)
Expand Down Expand Up @@ -230,41 +238,6 @@ def normalize_name(name, fastq_r1, fastq_r2, aligned_pooled_bam):
pd=check_library('pandas')
np=check_library('numpy')

###EXCEPTIONS############################
class FlashException(Exception):
pass

class TrimmomaticException(Exception):
pass

class Bowtie2Exception(Exception):
pass

class AmpliconsNotUniqueException(Exception):
pass

class AmpliconsNamesNotUniqueException(Exception):
pass

class NoReadsAlignedException(Exception):
pass

class DonorSequenceException(Exception):
pass

class AmpliconEqualDonorException(Exception):
pass

class SgRNASequenceException(Exception):
pass

class NTException(Exception):
pass

class ExonSequenceException(Exception):
pass


def main():
try:
start_time = datetime.now()
Expand All @@ -278,11 +251,11 @@ def main():
|| \__/\__/|__|__|__/ |
|_______________________|
'''
print(CRISPRessoShared.get_crispresso_header(description, pooled_string))
info(CRISPRessoShared.get_crispresso_header(description, pooled_string))

# if no args are given, print a simplified help message
if len(sys.argv) == 1:
print(CRISPRessoShared.format_cl_text('usage: CRISPRessoPooled [-r1 FASTQ_R1] [-r2 FASTQ_R2] [-f AMPLICONS_FILE] [-x GENOME_ROOT] [-n NAME]\n' + \
raise CRISPRessoShared.BadParameterException(CRISPRessoShared.format_cl_text('usage: CRISPRessoPooled [-r1 FASTQ_R1] [-r2 FASTQ_R2] [-f AMPLICONS_FILE] [-x GENOME_ROOT] [-n NAME]\n' + \
'commonly-used arguments:\n' + \
'-h, --help show the full list of arguments\n' + \
'-v, --version show program\'s version number and exit\n' + \
Expand All @@ -292,7 +265,6 @@ def main():
'-x GENOME_ROOT Folder that contains the bowtie2-indexed genome for optional unbiased alignment of reads (default: None, reads are only aligned to provided amplicon sequences)\n' + \
'-n NAME, --name NAME Name for the analysis (default: name based on input file name)\n'
))
sys.exit()

parser = CRISPRessoShared.getCRISPRessoArgParser("Pooled", parser_title = 'CRISPRessoPooled Parameters')

Expand Down Expand Up @@ -723,19 +695,19 @@ def main():

if not len(df_template.amplicon_seq.unique())==df_template.shape[0]:
duplicated_entries = df_template.amplicon_seq[df_template.amplicon_seq.duplicated()]
raise Exception('The amplicon sequences must be distinct! (Duplicated entries: ' + str(duplicated_entries.values) + ')')
raise CRISPRessoShared.BadParameterException('The amplicon sequences must be distinct! (Duplicated entries: ' + str(duplicated_entries.values) + ')')

if not len(df_template.amplicon_name.unique())==df_template.shape[0]:
duplicated_entries = df_template.amplicon_name[df_template.amplicon_name.duplicated()]
raise Exception('The amplicon names must be distinct! (Duplicated names: ' + str(duplicated_entries.values) + ')')
raise CRISPRessoShared.BadParameterException('The amplicon names must be distinct! (Duplicated names: ' + str(duplicated_entries.values) + ')')

df_template=df_template.set_index('amplicon_name')
df_template.index=df_template.index.to_series().str.replace(' ', '_')

for idx, row in df_template.iterrows():
wrong_nt=CRISPRessoShared.find_wrong_nt(row.amplicon_seq)
if wrong_nt:
raise NTException('The amplicon sequence %s contains wrong characters:%s' % (idx, ' '.join(wrong_nt)))
raise CRISPRessoShared.NTException('The amplicon sequence %s contains wrong characters:%s' % (idx, ' '.join(wrong_nt)))

if 'guide_seq' in df_template.columns and not pd.isnull(row.guide_seq):
cut_points = []
Expand All @@ -745,7 +717,7 @@ def main():

wrong_nt = CRISPRessoShared.find_wrong_nt(current_guide_seq)
if wrong_nt:
raise NTException('The sgRNA sequence %s contains wrong characters:%s' % (current_guide_seq, ' '.join(wrong_nt)))
raise CRISPRessoShared.NTException('The sgRNA sequence %s contains wrong characters:%s' % (current_guide_seq, ' '.join(wrong_nt)))

offset_fw=guide_qw_centers[idx]+len(current_guide_seq)-1
offset_rc=(-guide_qw_centers[idx])-1
Expand Down Expand Up @@ -929,7 +901,7 @@ def main():
filename_amplicon_seqs_fasta, filename_aligned_amplicons_sam_log, filename_aligned_amplicons_sam)
bowtie_status=sb.call(aligner_command, shell=True)
if bowtie_status:
raise Bowtie2Exception('Bowtie2 failed to align amplicons to the genome, please check the output file.')
raise CRISPRessoShared.AlignmentException('Bowtie2 failed to align amplicons to the genome, please check the output file.')

additional_columns = []
with open (filename_aligned_amplicons_sam) as aln:
Expand Down Expand Up @@ -1045,7 +1017,7 @@ def rreplace(s, old, new):
if can_finish_incomplete_run and 'genome_demultiplexing' in crispresso2_info['running_info']['finished_steps'] and os.path.isfile(REPORT_ALL_DEPTH):
info('Using previously-computed demultiplexing of genomic reads')
df_all_demux = pd.read_csv(REPORT_ALL_DEPTH, sep='\t')
df_all_demux['loc'] = df_all_demux['chr_id']+' ' + df_all_demux['start'].apply(str) + ' '+df_all_demux['end'].apply(str)
df_all_demux['loc'] = df_all_demux['chr_id'].apply(str) + ' ' + df_all_demux['start'].apply(str) + ' '+df_all_demux['end'].apply(str)
df_all_demux.set_index(['loc'], inplace=True)
else:
#REDISCOVER LOCATIONS and DEMULTIPLEX READS
Expand Down Expand Up @@ -1218,7 +1190,7 @@ def rreplace(s, old, new):
df_all_demux.set_index(['loc'], inplace=True)

if sum_aligned_reads == 0:
raise NoReadsAlignedException("No reads aligned to the specified genome")
raise CRISPRessoShared.NoReadsAlignedException("No reads aligned to the specified genome")

crispresso2_info['running_info']['finished_steps']['genome_demultiplexing'] = True
CRISPRessoShared.write_crispresso_info(
Expand Down Expand Up @@ -1389,11 +1361,10 @@ def rreplace(s, old, new):
info('Running CRISPResso on the regions discovered...')
crispresso_cmds = []
for idx, row in df_regions.iterrows():

if row.n_reads > args.min_reads_to_use_region:
info('\nRunning CRISPResso on: %s-%d-%d...'%(row.chr_id, row.bpstart, row.bpend))
if pd.isna(row.sequence):
raise Exception('Cannot extract sequence from input reference ' + uncompressed_reference)
raise CRISPRessoShared.BadParameterException('Cannot extract sequence ' + str(row.sequence) + ' from input reference ' + uncompressed_reference)
crispresso_cmd = args.crispresso_command + ' -r1 %s -a %s -o %s' %(row.fastq_file, row.sequence, OUTPUT_DIRECTORY)
crispresso_cmd = CRISPRessoShared.propagate_crispresso_options(crispresso_cmd, crispresso_options_for_pooled, args)
crispresso_cmds.append(crispresso_cmd)
Expand Down Expand Up @@ -1631,7 +1602,7 @@ def default_sigpipe():
try:
run_data = CRISPRessoShared.load_crispresso_info(sub_folder)
except Exception as e:
raise Exception('CRISPResso run %s is not complete. Cannot read CRISPResso2_info.json file.'% sub_folder)
raise CRISPRessoShared.OutputFolderIncompleteException('CRISPResso run %s is not complete. Cannot read CRISPResso2_info.json file.'% sub_folder)
ref_sequences = [run_data['results']['refs'][ref_name]['sequence'] for ref_name in run_data['results']['ref_names']]
allele_frequency_table_zip_filename = os.path.join(sub_folder, run_data['running_info']['allele_frequency_table_zip_filename'])
if not os.path.exists(allele_frequency_table_zip_filename):
Expand Down Expand Up @@ -1690,7 +1661,7 @@ def default_sigpipe():
CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)

info('All Done!', {'percent_complete': 100})
print(CRISPRessoShared.get_crispresso_footer())
info(CRISPRessoShared.get_crispresso_footer())
sys.exit(0)

except Exception as e:
Expand Down
4 changes: 2 additions & 2 deletions CRISPResso2/CRISPRessoPooledWGSCompareCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def main():
compare_header = CRISPRessoShared.get_crispresso_header(
description, compare_header,
)
print(compare_header)
info(compare_header)

parser = argparse.ArgumentParser(
description='CRISPRessoPooledWGSCompare Parameters',
Expand Down Expand Up @@ -380,7 +380,7 @@ def main():
CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)

info('All Done!', {'percent_complete': 100})
print(CRISPRessoShared.get_crispresso_footer())
info(CRISPRessoShared.get_crispresso_footer())
sys.exit(0)

except Exception as e:
Expand Down
2 changes: 0 additions & 2 deletions CRISPResso2/CRISPRessoShared.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import errno
import gzip
import json
import sys
import textwrap
import importlib.util
from pathlib import Path
Expand All @@ -31,7 +30,6 @@

__version__ = "2.3.2"


###EXCEPTIONS############################
class FastpException(Exception):
pass
Expand Down
Loading

0 comments on commit 91f4840

Please sign in to comment.