Skip to content

Commit

Permalink
improves grabbing name from gff
Browse files Browse the repository at this point in the history
  • Loading branch information
brwnj committed Jul 27, 2021
1 parent 614537d commit 710ccb7
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions covviz/gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ def parse_gff(path, traces, exclude, ftype="gene", regex="Name="):
trace_name = os.path.basename(path).partition(".gff")[0].partition(".gtf")[0]
with gzopen(path) as fh:
cleaned = filterfalse(lambda i: i[0] == "#", fh)
name_re = re.compile(r"%s([^;]*)" % regex)

if regex != "Name=":
name_re = re.compile(r"(?:%s([^;]*))|(?:gene_name=([^;]*))|(?:Name=([^;]*))|(?:name=([^;]*))" % regex)
else:
name_re = re.compile(r"(?:gene_name=([^;]*))|(?:Name=([^;]*))|(?:name=([^;]*))")

for chrom, entries in groupby(
cleaned, key=lambda i: i.partition("\t")[0].lstrip("chr")
):
Expand Down Expand Up @@ -48,10 +53,11 @@ def parse_gff(path, traces, exclude, ftype="gene", regex="Name="):
start = int(toks[3])
end = int(toks[4])
try:
name = name_re.findall(toks[8])[0]
name = [i for i in name_re.findall(toks[8])[0] if i][0]
name = name.strip('"').strip("'")
except IndexError:
name = ""
# just grab the first item in the semi-colon delimited list
name = re.findall(r"([^;]*)", toks[8])[0]
genes.append([start, end, name])
traces[chrom]["annotations"]["gff"].append([trace_name, genes])
return traces

0 comments on commit 710ccb7

Please sign in to comment.