-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_dots.py
100 lines (75 loc) · 3 KB
/
generate_dots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import collections
import glob, os
import multiprocessing as mp
import time
import numpy as np
import numpy.ma as ma
import pandas as pd
np.seterr(invalid='ignore')
import matplotlib
import matplotlib.pyplot as plt
import cooler
import cooltools
import cooltools.lib.plotting
import bioframe
import mitosis_manager
# resolution = 5_000
# flank_bp = 20*resolution
resolution = 1_000
flank_bp = 50*resolution
galGal7b_arms = mitosis_manager.get_chromarms()
MIN_ARM_SIZE = int(3e7)
view_df = galGal7b_arms.query(f'end - start >= {MIN_ARM_SIZE}').reset_index(drop=True)
dot_calls = pd.read_csv('dot_list.txt', sep='\t')
mask = dot_calls['region1'].apply(lambda x: x in view_df['name'].unique())
dot_calls = dot_calls.loc[mask].reset_index(drop=True)
### Constructing Dot Pileups
db = mitosis_manager.Dataset()
table = db.get_tables()
# df = mitosis_manager.filter_data(table, {'condition':['WT','SMC2'], 'preferred':True})
df = mitosis_manager.filter_data(table, {'condition':['WT','SMC2','CAPH','CAPH2'], 'preferred':True})
df = mitosis_manager.get_coolers(df, resolution=resolution)
df = mitosis_manager.get_contact_scalings(df, resolution=resolution)
df = mitosis_manager.get_dots(df, resolution=resolution)
dot_scores = collections.defaultdict(list)
for _, row in df.iterrows():
lib_name = row['lib_name']
print(f'{lib_name}:\t', end='\t', flush=True)
pileup = row[f'dots_{resolution}']
clr = cooler.Cooler(row[f'cooler_{resolution}'])
expected = row[f'Ps_{resolution}']
if isinstance(pileup, np.ndarray):
print('Already computed', end='\n', flush=True)
elif expected is None:
print('Expected file not found...', end='\t', flush=True)
time.sleep(0.5)
print('Skipping', end='\n', flush=True)
continue
else:
print(f'Computing {resolution//1000} kb pileup...', end='\t', flush=True)
mask = expected['region1'].apply(lambda s: s in view_df['name'].unique())
expected = expected.loc[mask]
try:
stack = cooltools.pileup(clr,
dot_calls,
view_df=view_df,
expected_df=expected,
flank=flank_bp,
nproc=10
)
except Exception as e:
print(f'Error!\n{e}', end='\n', flush=True)
continue
pileup = np.nanmean(stack, axis=2)
mitosis_manager.save_dots(pileup, row['lib_name'], resolution)
print('DONE!', end='\n', flush=True)
size = pileup.shape[0]
mid = size//2
half_width = 1
slicing = slice(mid - half_width, mid + half_width + 1)
score = np.mean(pileup[slicing, slicing])
dot_scores['lib_name'].append(lib_name)
dot_scores[f'dot_score_{resolution}'].append(score)
dot_scores = pd.DataFrame(dot_scores)
dot_scores = table[['lib_name']].merge(dot_scores, how='outer')
db.modify_table('dot_score', dot_scores)