-
Notifications
You must be signed in to change notification settings - Fork 0
/
gff_id_editing.py
77 lines (67 loc) · 3.14 KB
/
gff_id_editing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/python
# Script: gff_id_editing.py
# Author: Daniel Desiro'
"""
Description:
Adds ID tags to every gff entry.
Usage:
gff_id_editing.py -o <input_gff_file> -n <output_gff_file>
Source:
https://github.com/desiro/gffDB/blob/master/gff_id_editing.py
"""
import os
import argparse
import re
import sys
def main(oldGff, newGff):
counter = {}
# open the gff file
with open(oldGff,'r') as o_file:
with open(newGff,'w') as n_file:
lineNum = 0
for line in o_file:
lineNum += 1
if not re.match(r"\#",line):
line = line.strip()
lineList = line.split('\t')
# check if the gff file is consistent
if len(lineList) < 9:
print("Error: GFF file line " + str(lineNum) + " has less than 9 entries")
sys.exit()
seqname, source, feature, start, end, score, strand, frame, attribute = lineList
# check if we have an ID in the entry
attList = attribute.split(';')
noID = True
parent = 'noParNoID'
for att in attList:
if re.search(r"=",att):
tag, value = att.split('=')
if 'ID' == tag:
# update ID counter list
IDvalue = counter.get(value, 0)
counter[value] = IDvalue + 1
noID = False
elif ('parent' == tag) or ('Parent' == tag):
parent = value
# use '-' to concatenate multiple parent features
parent = parent.replace(',', '-')
# add ID if we have none
if noID:
# create ID base
newID = parent + ':' + feature
# check if ID already in use and update
IDvalue = counter.get(newID, 0)
counter[newID] = IDvalue + 1
# add counter to ID
newID = 'ID=' + newID + '_' + str(IDvalue + 1)
# add new ID to attribute list
attribute = newID + ';' + attribute
# creat new entry
n_file.write(seqname + "\t" + source + "\t" + feature + "\t" + start + "\t" + end + "\t" + score + "\t" + strand + "\t" + frame + "\t" + attribute + "\n")
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog = 'gff_source_editing.py', description='Adds ID tags to every gff entry.', prefix_chars='-+', epilog="")
parser.add_argument('--version', action='version', version='%(prog)s 0.1')
parser.add_argument('--old_gff', '-o', dest='oldGff', required=True, help='input GFF file')
parser.add_argument('--new_gff', '-n', dest='newGff', required=True, help='output GFF file')
options = parser.parse_args()
main(options.oldGff, options.newGff)