Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Qutils reformatting and optimization #187

Open
wants to merge 3 commits into
base: new-master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion metaquast.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from quast_libs.options_parser import parse_options, remove_from_quast_py_args, prepare_regular_quast_args

from quast_libs import contigs_analyzer, search_references_meta, plotter_data, qutils, run_busco
from quast_libs.qutils import cleanup, check_dirpath, is_python2, run_parallel
from quast_libs.qutils import cleanup, check_dirpath, run_parallel

from quast_libs.log import get_logger
logger = get_logger(qconfig.LOGGER_META_NAME)
Expand Down
2 changes: 1 addition & 1 deletion quast_libs/contigs_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from quast_libs.fastaparser import get_genome_stats

from quast_libs.log import get_logger
from quast_libs.qutils import is_python2, run_parallel
from quast_libs.qutils import run_parallel

logger = get_logger(qconfig.LOGGER_DEFAULT_NAME)

Expand Down
63 changes: 16 additions & 47 deletions quast_libs/qutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,22 +206,15 @@ def correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting):
return corrected_contigs_fpaths, old_contigs_fpaths


def convert_to_unicode(value):
if is_python2():
return unicode(value)
else:
return str(value)


def slugify(value):
"""
Prepare string to use in file names: normalizes string,
removes non-alpha characters, and converts spaces to hyphens.
"""
import unicodedata
value = unicodedata.normalize('NFKD', convert_to_unicode(value)).encode('ascii', 'ignore').decode('utf-8')
value = convert_to_unicode(re.sub('[^\w\s-]', '-', value).strip())
value = convert_to_unicode(re.sub('[-\s]+', '-', value))
value = unicodedata.normalize('NFKD', str(value)).encode('ascii', 'ignore').decode('utf-8')
value = str(re.sub('[^\w\s-]', '-', value).strip())
value = str(re.sub('[-\s]+', '-', value))
return str(value)


Expand Down Expand Up @@ -831,10 +824,6 @@ def safe_create(fpath, logger, is_required=False):
logger.notice(msg)


def is_python2():
return sys.version_info[0] < 3


def fix_configure_timestamps(dirpath):
try:
os.utime(join(dirpath, 'aclocal.m4'), None)
Expand Down Expand Up @@ -890,8 +879,18 @@ def compile_tool(name, dirpath, requirements, just_notice=False, logger=logger,


def check_dirpath(path, message="", exit_code=3):
if not is_ascii_string(path):
logger.error('QUAST does not support non-ASCII characters in path.\n' + message, to_stderr=True, exit_with_code=exit_code)
"""
This function checks if string path is in ascii format and don't contain spaces.

:param path: string check to
:param message: message to log if path isn't ok
:param exit_code: exit code in logger error
"""
try:
path.encode('ascii')
except UnicodeEncodeError:
logger.error('QUAST does not support non-ASCII characters in path.\n' + message, to_stderr=True,
exit_with_code=exit_code)
if ' ' in path:
logger.error('QUAST does not support spaces in paths.\n' + message, to_stderr=True, exit_with_code=exit_code)
return True
Expand Down Expand Up @@ -1063,10 +1062,7 @@ def run_parallel(_fn, fn_args, n_jobs=None, filter_results=False):
except TypeError:
pass
except ImportError:
if is_python2():
from joblib2 import Parallel, delayed
else:
from joblib3 import Parallel, delayed
from joblib3 import Parallel, delayed
results_tuples = Parallel(**parallel_args)(delayed(_fn)(*args) for args in fn_args)
results = []
if results_tuples:
Expand All @@ -1081,18 +1077,6 @@ def run_parallel(_fn, fn_args, n_jobs=None, filter_results=False):
return results


# based on http://stackoverflow.com/questions/196345/how-to-check-if-a-string-in-python-is-in-ascii
def is_ascii_string(line):
try:
line.encode('ascii')
except UnicodeDecodeError: # python2
return False
except UnicodeEncodeError: # python3
return False
else:
return True


def md5(fpath):
hash_md5 = hashlib.md5()
with open(fpath, 'rb') as f:
Expand Down Expand Up @@ -1120,18 +1104,3 @@ def verify_md5(fpath, md5_fpath=None):
logger.warning('Failed to check md5 for %s! Either this file or its md5 file (%s) is missing or empty.' %
(fpath, md5_fpath))
return False


def percentile(values, percent):
import math
percentile_idx = int(math.ceil((len(values) * percent) / 100)) - 1
return values[max(0, percentile_idx)]


def calc_median(values):
if len(values) % 2 == 1: # odd number of values
median = values[(len(values) - 1) // 2]
else: # even number of values - take the avg of central
median = (values[len(values) // 2] + values[len(values) // 2 - 1]) // 2
return median

27 changes: 27 additions & 0 deletions quast_libs/qutils_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import sys
import os
import unittest

sys.path.append(os.path.normpath(os.path.join(os.path.dirname(__file__), os.path.pardir)))

import quast_libs.qutils as qq


class TestCheckDirPath(unittest.TestCase):
def test_check_wrong_format(self):
s = "♥O◘♦♥O◘♦"
with self.assertRaises(SystemExit):
qq.check_dirpath(s)

def test_check_spaces(self):
s = " misha@misha:~$"
with self.assertRaises(SystemExit):
qq.check_dirpath(s)

def test_check_right_format(self):
s = "misha@misha:~$"
self.assertTrue(qq.check_dirpath(s))


if __name__ == '__main__':
unittest.main()
9 changes: 5 additions & 4 deletions quast_libs/reads_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import re
import shutil
import shlex
import numpy as np
from collections import defaultdict
from math import sqrt
from os.path import isfile, join, basename, abspath, isdir, dirname, exists
Expand All @@ -20,7 +21,7 @@
from quast_libs.fastaparser import create_fai_file
from quast_libs.ra_utils.misc import *
from quast_libs.qutils import is_non_empty_file, add_suffix, get_chr_len_fpath, run_parallel, \
get_path_to_program, check_java_version, percentile, calc_median
get_path_to_program, check_java_version

from quast_libs.log import get_logger
from quast_libs.reporting import save_reads
Expand Down Expand Up @@ -849,8 +850,8 @@ def proceed_cov_file(raw_cov_fpath, cov_fpath, correct_chr_names):


def get_max_min_is(insert_sizes):
decile_1 = percentile(insert_sizes, 10)
decile_9 = percentile(insert_sizes, 90)
decile_1 = np.percentile(insert_sizes, 10, interpolation='lower')
decile_9 = np.percentile(insert_sizes, 90, interpolation='lower')
return decile_1, decile_9


Expand Down Expand Up @@ -883,7 +884,7 @@ def calculate_insert_size(sam_fpath, output_dir, ref_name, reads_suffix=''):

if insert_sizes:
insert_sizes.sort()
median_is = calc_median(insert_sizes)
median_is = np.percentile(insert_sizes, 50, interpolation='lower')
if median_is <= 0:
return None, None, None
min_insert_size, max_insert_size = get_max_min_is(insert_sizes)
Expand Down