forked from Hua-CM/HuaSmallTools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
trim_align.py
36 lines (31 loc) · 1.36 KB
/
trim_align.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# -*- coding: utf-8 -*-
# @Time : 2020/3/13 22:02
# @Author : Zhongyi Hua
# @FileName: trim_align.py
# @Usage:
# @Note:
# @E-mail: [email protected]
from Bio import AlignIO
def trim_align(alignment, mismatch):
for col in range(alignment.get_alignment_length()):
if alignment[:, col].count("-")/alignment.__len__() < mismatch:
start_position = col
break
for col in range(alignment.get_alignment_length()-1, -1, -1):
if alignment[:, col].count("-")/alignment.__len__() < mismatch:
end_position = col
break
return alignment[:, start_position:end_position]
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description="This is the script for simple trim MSA file")
parser.add_argument('-i', '--input_file', required=True,
help='<file_path> The MSA file in fasta format')
parser.add_argument('-m', '--miss_match', default=0, type=float,
help='<float> The miss match value used for trimming')
parser.add_argument('-o', '--output_file', required=True,
help='<file_path> The result MSA file in format')
args = parser.parse_args()
tmp_align = AlignIO.read(args.input_file, "fasta")
tmp_align = trim_align(tmp_align, args.miss_match)
AlignIO.write(tmp_align, args.output_file, "fasta")