-
Notifications
You must be signed in to change notification settings - Fork 4
/
myvolcano.py
210 lines (171 loc) · 8.9 KB
/
myvolcano.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import pandas as pd
import numpy as np
from os.path import join as opj
import sys
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator, AutoMinorLocator
import altair as alt
def altair_scatter(x, y, hue, data, shape=None, tooltip=[], yscale='linear', xscale='linear', palette=None, size=60, stroke=2, fontsize=14, title='', reversex=False, reversey=False):
# brush = alt.selection(type='single', resolve='global')
# palette = {'Neither':'black', 'Y only':'gold', 'X only':'blue', 'Both':'red'}
if palette is None and not hue is None:
palette = 'tableau10'
# tmp = sns.color_palette('Set2', n_colors=data[hue].unique().shape[0])
"""tmp = ['dodgerblue', 'tomato', 'black', 'green', 'eggplant']
palette = {h:c for h,c in zip(data[hue].unique(), tmp)}
col_dom = [c for c in palette]
col_rng = [palette[c] for c in palette]"""
# brush = alt.selection_single(resolve='global')
# base = alt.Chart(data).add_selection(brush).mark_point(size=size, strokeWidth=stroke).interactive()
base = alt.Chart(data).mark_point(size=size, strokeWidth=stroke).interactive()
if not palette is None:
#print(palette)
#color_param = alt.Color(field=hue, type='nominal', scale=alt.Scale(domain=col_dom, range=col_rng))
color_param = alt.Color(field=hue, type='nominal', scale=alt.Scale(scheme=palette), legend=alt.Legend(orient='right'))
#color_param = alt.Color(field=hue, type='nominal', scale=alt.Scale(scheme=palette))
# color_param = alt.condition(brush, tmp_color, alt.ColorValue('gray')),
else:
color_param = alt.Undefined
ch = base.encode(x=alt.X(x, scale=alt.Scale(type=xscale, reverse=reversex)),
y=alt.Y(y, scale=alt.Scale(type=yscale, reverse=reversey)),
tooltip=tooltip,
color=color_param)
if not shape is None:
ch.encode(shape=shape)
ch = ch.properties(title=title)
# ch = ch.configure_title(fontSize=fontsize).configure_axis(labelFontSize=fontsize-2, titleFontSize=fontsize)
return ch
def altair_scatter_select(selector, data, scatter_ch):
selection = alt.selection_multi(fields=[selector])
#color = alt.condition(selection, alt.Color(field=selector, type='nominal', scale=alt.Scale(scheme='dark2'), legend=None), alt.value('lightgray'))
color = alt.condition(selection, alt.value('black'), alt.value('lightgray'))
selector_ch = alt.Chart(data).mark_rect().encode(y=selector, color=color).add_selection(selection)
ch = scatter_ch.transform_filter(selection)
return alt.hconcat(selector_ch, ch).resolve_scale(color='independent')
"""
def altair_scatter_select(x, y, hue, data, selector, tooltip=[], yscale='linear', xscale='linear', palette=None, size=60, stroke=2, fontsize=14, title='', reversex=False, reversey=False):
# brush = alt.selection(type='single', resolve='global')
# palette = {'Neither':'black', 'Y only':'gold', 'X only':'blue', 'Both':'red'}
if palette is None and not hue is None:
# tmp = sns.color_palette('Set2', n_colors=data[hue].unique().shape[0])
# tmp = ['dodgerblue', 'tomato', 'black', 'green', 'eggplant']
tmp = list(mpl.cm.tab10.colors)
palette = {h:c for h,c in zip(data[hue].unique(), tmp)}
col_dom = [c for c in palette]
col_rng = [palette[c] for c in palette]
# brush = alt.selection_single(resolve='global')
# base = alt.Chart(data).add_selection(brush).mark_point(size=size, strokeWidth=stroke).interactive()
base = alt.Chart(data).mark_point(size=size, strokeWidth=stroke).interactive()
if not palette is None:
#color_param = alt.Color(field=hue, type='nominal', scale=alt.Scale(domain=col_dom, range=col_rng))
color_param = alt.Color(field=hue, type='nominal', scale=alt.Scale(scheme='dark2'), legend=alt.Legend(orient='right'))
# color_param = alt.condition(brush, tmp_color, alt.ColorValue('gray')),
else:
color_param = alt.Undefined
selection = alt.selection_multi(fields=[selector])
#color = alt.condition(selection, alt.Color(field=selector, type='nominal', scale=alt.Scale(scheme='dark2'), legend=None), alt.value('lightgray'))
color = alt.condition(selection, alt.value('black'), alt.value('lightgray'))
selector_ch = alt.Chart(data).mark_rect().encode(y=selector, color=color).add_selection(selection)
#color_param = alt.Color(field=hue, type='nominal', legend=alt.Legend(orient='right'), scale=alt.Scale(scheme='tab10'))
ch = base.encode(x=alt.X(x, scale=alt.Scale(type=xscale, reverse=reversex)),
y=alt.Y(y, scale=alt.Scale(type=yscale, reverse=reversey)),
tooltip=tooltip,
color=color_param).properties(title=title).transform_filter(selection)
# ch = ch.configure_title(fontSize=fontsize).configure_axis(labelFontSize=fontsize-2, titleFontSize=fontsize)
return alt.hconcat(selector_ch, ch).resolve_scale(color='independent')"""
def plot_volcano_altair(df, pvalue_col, or_col, hue_col, ann_cols=[], censor_or=None):
if not censor_or is None:
df = df.copy()
df.loc[df[or_col] < 1/censor_or, or_col] = 1/censor_or
df.loc[df[or_col] > censor_or, or_col] = censor_or
tt = ann_cols + [c for c in [or_col, pvalue_col] if not c in ann_cols]
ch = altair_scatter(x=or_col, y=pvalue_col, hue=hue_col, data=df, tooltip=tt, xscale='log', yscale='log')
return ch
def plot_volcano(df, pvalue_col, or_col, sig_col, ann_col=None, annotate=None, censor_or=None, figsize=(7,5), fontsize=7):
"""Volcano scatter plot of effect size vs. p-value on log-scales.
Parameters
----------
df : pd.DataFrame
pvalue_col : str
Colum in df containing p-values
or_col : str
Colum in df containing odds-ratio or effect size (e.g., fold-change, not log-FC)
sig_col : str
Colum in df containing bool indicator of significance
ann_col : str
Column in df containing strings for annotation
annotate : None or int
Number of points to annotate, ranked by abs(OR) and p-value (annotates top N from each)
censor_or : float
Limit for effect size (OR) column; will censor values and then plot.
figsize : tuple (float, float)
Provide figsize to matplotlib
fontsize : float
Font size for annotation
Returns
-------
figh : handle
Matplotlib figure handle
"""
if not censor_or is None:
df = df.copy()
df.loc[df[or_col] < 1/censor_or, or_col] = 1/censor_or
df.loc[df[or_col] > censor_or, or_col] = censor_or
fc_ticks = [5, 4, 3, 2.5, 2, 1.5]
xticks = [1/x for x in fc_ticks] + [1] + [x for x in fc_ticks[::-1]]
xtick_labs = [f'-{x}' for x in fc_ticks] + [1] + [f'{x}' for x in fc_ticks[::-1]]
p_func = lambda p: -10*np.log10(p)
sig_ind = df[sig_col]
figh = plt.figure(figsize=figsize)
axh = figh.add_axes([0.15, 0.15, 0.7, 0.7], xscale='log', yscale='log')
axh.set_axisbelow(True)
plt.grid(True, linewidth=1)
axh.yaxis.set_minor_locator(AutoMinorLocator())
mx_fc = np.exp(np.max(np.abs(np.log(df[or_col]))))
plt.scatter(df.loc[~sig_ind, or_col],
df.loc[~sig_ind, pvalue_col],
color='black', alpha=0.3, s=5, zorder=2)
plt.scatter(df.loc[sig_ind, or_col],
df.loc[sig_ind, pvalue_col],
color='r', alpha=0.9, s=5, zorder=3)
yl = plt.ylim()
# plt.plot([-1.1*mx_fc, mx_fc*1.1], [p_thresh]*2, '--k')
if not annotate is None:
tmp = df.loc[sig_ind].sort_values(by=or_col, ascending=False).set_index(ann_col)
bottomN = tmp.index[-annotate:].tolist()
topN = tmp.index[:annotate].tolist()
tmp = df.loc[sig_ind].sort_values(by=pvalue_col).set_index(ann_col)
topP = tmp.index[:annotate].tolist()
tmp = df.loc[sig_ind].set_index(ann_col)
for i in np.unique(topN + bottomN + topP):
xy = (tmp[[or_col, pvalue_col]].loc[i]).values
xy[1] = xy[1]
xy[0] = xy[0]
if xy[0] > 1:
p = dict(ha='left',
va='bottom',
xytext=(5, 5))
else:
p = dict(ha='right',
va='bottom',
xytext=(-5, 5))
plt.annotate(i,
xy=xy,
textcoords='offset points',
size=fontsize,
**p)
if censor_or is None:
plt.xlim([1/(1.1*mx_fc), mx_fc*1.1])
else:
plt.xlim((1/censor_or, censor_or))
plt.ylim((1, yl[0]))
#plt.ylim((1, np.min(df[pvalue_col])))
plt.ylabel(pvalue_col)
plt.xlabel(or_col)
plt.xticks(xticks, xtick_labs)
return figh
def _test_data():
df = pd.DataFrame(dict(x=np.random.rand(20),
y=np.random.normal(size=20),
category=np.random.choice(['A','B', 'C'], size=20)))
return df