-
Notifications
You must be signed in to change notification settings - Fork 0
/
MAG_extract_GTDB_results.py
49 lines (35 loc) · 1.32 KB
/
MAG_extract_GTDB_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from jakomics import colors
import sys
import argparse
import pandas as pd
import jak_utils
jak_utils.header()
# OPTIONS #####################################################################
parser = argparse.ArgumentParser(description='XXXXX')
parser.add_argument('-b', '--bac',
help="Path to gtdbtk.bac120.summary.tsv",
required=True)
parser.add_argument('-a', '--arc',
help="Path to gtdbtk.ar122.summary.tsv",
default=None,
required=False)
parser.add_argument('-o',
'--out',
help="Path to write output to",
required=True)
args = parser.parse_args()
cols = ['user_genome', 'classification', 'fastani_reference', 'fastani_ani',
'closest_placement_reference', 'closest_placement_ani', 'classification_method', 'note', 'warnings']
df_bac = pd.read_csv(args.bac,
sep="\t",
index_col=None)
df_bac = df_bac[df_bac.columns.intersection(cols)]
if args.arc != None:
df_arc = pd.read_csv(args.arc,
sep="\t",
index_col=None)
df_arc = df_arc[df_arc.columns.intersection(cols)]
df = pd.concat([df_bac, df_arc])
else:
df = df_bac
df.to_csv(args.out, sep="\t", index=False)