-
Notifications
You must be signed in to change notification settings - Fork 0
/
Reformat_MPRAnalyze.py
76 lines (67 loc) · 1.76 KB
/
Reformat_MPRAnalyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#######################
### Author: Xun Chen, Ph.D.
### Email: [email protected] or [email protected]
### ORCID: https://orcid.org/0000-0003-0327-1888
### Date: 2021/3/16
###
#######################
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import gzip
import re
import sys
import getopt
import numpy as np
### define variables
try:
opts,args = getopt.getopt(sys.argv[1:], '-h:-i:-b:-c:-t:-m:', ['help', 'insert=','bcFile=','maxbc=','cell=','typeCol='])
except getopt.GetoptError:
sys.exit()
for opt_name,opt_value in opts:
if opt_name in ('-h','--help'):
sys.exit()
if opt_name in ('-i','--insert'):
insertFile = opt_value
if opt_name in ('-b','--bcFile'):
bcFile = opt_value
if opt_name in ('-m','--maxbc'):
maxbc = int(opt_value)
if opt_name in ('-c','--cell'):
cellFile = opt_value
if opt_name in ('-t','--typeCol'):
typeCol = int(opt_value)
#print("Insert\tTotalBCperInsertOriginal\tBC\tDNAcount\tRNAcount")
#### insert list
insertList = {}
with open (insertFile, 'rt') as f:
for line in f:
line2 = re.split(r'\s+',line.rstrip())
ID = line2[0]
insertList[ID] = [0] * (maxbc+1)
insertList[ID][0] = ID
#print (ID)
#### bc list
bcList = {}
with open (bcFile, 'rt') as f:
for line in f:
line2 = re.split(r'\s+',line.rstrip())
if line2[0] == "Insert":
next
else:
ID = line2[0] + "|" + line2[1]
bcList[ID] = line2[2]
#print (ID)
#### cell list
cellList = {}
with open (cellFile, 'rt') as f:
for line in f:
line2 = re.split(r'\s+',line.rstrip())
if line2[0] == "Insert":
next
else:
ID = line2[0] + "|" + line2[2]
insertList[line2[0]][int(bcList[ID])] = line2[typeCol]
# print (insertList[line2[0]])
print("insert" + "\t" + "\t".join(map(str,range(1,(maxbc+1)))))
for ID in insertList:
print ("\t".join(map(str,insertList[ID])))