From 33b9b9154ebfd8d52856525bfe8762671d16e0fb Mon Sep 17 00:00:00 2001 From: Moritz Berg Date: Thu, 29 Jun 2023 14:35:07 +0200 Subject: [PATCH] Added a option do enable/disable N-filtering. --- pyvapor/vaporfunc.py | 6 +++--- vapor.py | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pyvapor/vaporfunc.py b/pyvapor/vaporfunc.py index 451cd47..1551dbe 100644 --- a/pyvapor/vaporfunc.py +++ b/pyvapor/vaporfunc.py @@ -68,7 +68,7 @@ def parse_and_prefilter(fqs, dbkmers, threshold, k): f.close() return reads, nraw -def parse_fasta_uniq(fasta, filter_Ns=True): +def parse_fasta_uniq(fasta, filter_N): """ Gets unique sequences from a fasta, with filtering of Ns""" tmph = "" tmps = "" @@ -84,7 +84,7 @@ def parse_fasta_uniq(fasta, filter_Ns=True): continue elif l[0] == ">": if tmps not in sseen and li > 0: - if ((filter_Ns == True) and "N" not in tmps) or filter_Ns == False: + if ((filter_N == True) and "N" not in tmps) or filter_N == False: hs.append(tmph) ss.append(tmps) sseen.add(tmps) @@ -94,7 +94,7 @@ def parse_fasta_uniq(fasta, filter_Ns=True): tmps += l hs.append(tmph) ss.append(tmps) - return hs, ss + return hs, ss def subsample(reads, n): """ Takes a sample of n from reads """ diff --git a/vapor.py b/vapor.py index 973f032..4e5cadb 100755 --- a/vapor.py +++ b/vapor.py @@ -49,9 +49,11 @@ """ +from ctypes.wintypes import BOOLEAN import sys import argparse import os +from xmlrpc.client import boolean import pyvapor as vp def blockErr(): @@ -67,8 +69,10 @@ def main(args): sys.stderr.write("WARNING: kmer sizes of less than 21 can result in contaminating sequence carryover, which may affect results. Only do this if you know your sample is pure, or have increased the filtering threshold -t sufficiently. Refer to the docs for details. \n") sys.stderr.write("Loading database sequences\n") - seqsh, seqs = vp.parse_fasta_uniq(args.fa) + seqsh, seqs = vp.parse_fasta_uniq(args.fa, args.filter_N) sys.stderr.write("Got %d unique sequences\n" % len(seqs)) + if args.filter_N == False: + sys.stderr.write("WARNING: I WONT TELL YOU WHAT IS AMISS\n") # Get database kmers for filtering sys.stderr.write("Getting database kmers\n") @@ -149,7 +153,6 @@ def main(args): group = parser.add_mutually_exclusive_group() group.add_argument("--return_seqs", action="store_true") group.add_argument("-o", "--output_prefix", type=str, help="Prefix to write full output to, stout by default", nargs='?', default=None) - parser.add_argument("-q", "--quiet", action="store_true", default=False) parser.add_argument("--return_best_n", type=int, default=1) parser.add_argument("-m", "--min_kmer_prop", type=float, help="Minimum proportion of matched kmers allowed for queries [default=0.1]", nargs='?', default=0.1) @@ -164,6 +167,7 @@ def main(args): parser.add_argument("--nocache", action="store_true", default=False) parser.add_argument("-v", "--version", action="store_true", default=False) parser.add_argument("--low_mem", action="store_true", default=False) + parser.add_argument("--filter_N", "-n", action="store_true") if len(sys.argv)==1: parser.print_help(sys.stderr)