Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Qutils reformatting and optimization #187

Open
wants to merge 3 commits into
base: new-master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 12 additions & 29 deletions quast_libs/qutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,8 +890,18 @@ def compile_tool(name, dirpath, requirements, just_notice=False, logger=logger,


def check_dirpath(path, message="", exit_code=3):
if not is_ascii_string(path):
logger.error('QUAST does not support non-ASCII characters in path.\n' + message, to_stderr=True, exit_with_code=exit_code)
"""
This function checks if string path is in ascii format and don't contain spaces.

:param path: string check to
:param message: message to log if path isn't ok
:param exit_code: exit code in logger error
"""
try:
path.encode('ascii')
except UnicodeEncodeError:
logger.error('QUAST does not support non-ASCII characters in path.\n' + message, to_stderr=True,
exit_with_code=exit_code)
if ' ' in path:
logger.error('QUAST does not support spaces in paths.\n' + message, to_stderr=True, exit_with_code=exit_code)
return True
Expand Down Expand Up @@ -1081,18 +1091,6 @@ def run_parallel(_fn, fn_args, n_jobs=None, filter_results=False):
return results


# based on http://stackoverflow.com/questions/196345/how-to-check-if-a-string-in-python-is-in-ascii
def is_ascii_string(line):
try:
line.encode('ascii')
except UnicodeDecodeError: # python2
return False
except UnicodeEncodeError: # python3
return False
else:
return True


def md5(fpath):
hash_md5 = hashlib.md5()
with open(fpath, 'rb') as f:
Expand Down Expand Up @@ -1120,18 +1118,3 @@ def verify_md5(fpath, md5_fpath=None):
logger.warning('Failed to check md5 for %s! Either this file or its md5 file (%s) is missing or empty.' %
(fpath, md5_fpath))
return False


def percentile(values, percent):
import math
percentile_idx = int(math.ceil((len(values) * percent) / 100)) - 1
return values[max(0, percentile_idx)]


def calc_median(values):
if len(values) % 2 == 1: # odd number of values
median = values[(len(values) - 1) // 2]
else: # even number of values - take the avg of central
median = (values[len(values) // 2] + values[len(values) // 2 - 1]) // 2
return median

27 changes: 27 additions & 0 deletions quast_libs/qutils_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import sys
import os
import unittest

sys.path.append(os.path.normpath(os.path.join(os.path.dirname(__file__), os.path.pardir)))

import quast_libs.qutils as qq


class TestCheckDirPath(unittest.TestCase):
def test_check_wrong_format(self):
s = "♥O◘♦♥O◘♦"
with self.assertRaises(SystemExit):
qq.check_dirpath(s)

def test_check_spaces(self):
s = " misha@misha:~$"
with self.assertRaises(SystemExit):
qq.check_dirpath(s)

def test_check_right_format(self):
s = "misha@misha:~$"
self.assertTrue(qq.check_dirpath(s))


if __name__ == '__main__':
unittest.main()
9 changes: 5 additions & 4 deletions quast_libs/reads_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import re
import shutil
import shlex
import numpy as np
from collections import defaultdict
from math import sqrt
from os.path import isfile, join, basename, abspath, isdir, dirname, exists
Expand All @@ -20,7 +21,7 @@
from quast_libs.fastaparser import create_fai_file
from quast_libs.ra_utils.misc import *
from quast_libs.qutils import is_non_empty_file, add_suffix, get_chr_len_fpath, run_parallel, \
get_path_to_program, check_java_version, percentile, calc_median
get_path_to_program, check_java_version

from quast_libs.log import get_logger
from quast_libs.reporting import save_reads
Expand Down Expand Up @@ -849,8 +850,8 @@ def proceed_cov_file(raw_cov_fpath, cov_fpath, correct_chr_names):


def get_max_min_is(insert_sizes):
decile_1 = percentile(insert_sizes, 10)
decile_9 = percentile(insert_sizes, 90)
decile_1 = np.percentile(insert_sizes, 10, interpolation='lower')
decile_9 = np.percentile(insert_sizes, 90, interpolation='lower')
return decile_1, decile_9


Expand Down Expand Up @@ -883,7 +884,7 @@ def calculate_insert_size(sam_fpath, output_dir, ref_name, reads_suffix=''):

if insert_sizes:
insert_sizes.sort()
median_is = calc_median(insert_sizes)
median_is = np.percentile(insert_sizes, 50, interpolation='lower')
if median_is <= 0:
return None, None, None
min_insert_size, max_insert_size = get_max_min_is(insert_sizes)
Expand Down