-
Notifications
You must be signed in to change notification settings - Fork 0
/
dentate_umaps_reg_plots.py
116 lines (96 loc) · 5.32 KB
/
dentate_umaps_reg_plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy as np
import scanpy as sc
import glob, os
import matplotlib
import pandas as pd
import beta_vae
import shutil
import re
from reg_plot import *
def generate_simulated_pca(path,actual_data,clust_typ,source_cell,sim_data,first_cell):
target = actual_data[actual_data.obs["clusters"]==clust_typ]
target = sc.AnnData(target.X, obs={"cell_type":["Target_"+ clust_typ]*len(target)},
var={"var_names":target.var_names})
source = actual_data[actual_data.obs["clusters"]==source_cell]
source = sc.AnnData(source.X, obs={"cell_type":["Source_"+source_cell]*len(source)},
var={"var_names":source.var_names})
predicted = sc.AnnData(sim_data.X, obs={"cell_type":["Predicted"]*len(sim_data)},
var={"var_names":sim_data.var_names})
combined_data = source.concatenate(target)
combined_data = combined_data.concatenate(predicted)
sc.pp.neighbors(combined_data)
sc.tl.pca(combined_data, svd_solver='arpack')
sc.pl.pca(combined_data, color=["cell_type"],
legend_fontsize=12,
palette = ['r','k','y'],
frameon=True,
s = 35,
save="_"+first_cell+"_to_"+clust_typ+"_celltypes.pdf")
def generate_simulated_umaps(path,actual_data,clust_typ,source_cell,sim_data,first_cell):
target = actual_data[actual_data.obs["clusters"]==clust_typ]
target = sc.AnnData(target.X, obs={"cell_type":["Target_"+ clust_typ]*len(target)},
var={"var_names":target.var_names})
top_genes = list(actual_data.uns["rank_genes_groups"]['names'][clust_typ])
source = actual_data[actual_data.obs["clusters"]==source_cell]
source = sc.AnnData(source.X, obs={"cell_type":["Source_"+source_cell]*len(source)},
var={"var_names":source.var_names})
predicted = sc.AnnData(sim_data.X, obs={"cell_type":["Predicted"]*len(sim_data)},
var={"var_names":sim_data.var_names})
combined_data = source.concatenate(target)
combined_data = combined_data.concatenate(predicted)
sc.pp.neighbors(combined_data)
sc.tl.umap(combined_data)
sc.pl.umap(combined_data, color=["cell_type"],
legend_fontsize=12,
save="_"+first_cell+"_to_"+clust_typ+"_celltypes.png",
show=True,
frameon=True,
s = 35)
sc.pl.umap(combined_data, color=top_genes[:3],
legend_fontsize=12,
save="_"+first_cell+"_to_"+clust_typ+"_top_genes.png",
show=True,
frameon=True,
s = 35)
def generate_simulated_reg_plots(path,actual_data,clust_typ,cells):
old_path = os.getcwd()
os.chdir(path)
actual_data_temp = actual_data[actual_data.obs["clusters"]==clust_typ]
reg_mean_vals = []
for file in glob.glob("*.h5ad"):
print(file)
adata = sc.read(file)
print(adata)
pred_data = sc.AnnData(adata.X, obs={"comparison_typ":["pred"]*len(adata)}, var={"var_names":adata.var_names})
actual_data_temp = sc.AnnData(actual_data_temp.X, obs={"comparison_typ":["actual"]*len(actual_data_temp)}, var={"var_names":actual_data_temp.var_names})
first_cell = file[0:file.find('.')]
#print(first_cell)
source_cell = [string for string in cells if string in file]
source_cell = source_cell[0]
plot_data = actual_data_temp.concatenate(pred_data)
source_data = actual_data[actual_data.obs["clusters"]==source_cell]
source_data = sc.AnnData(source_data.X, obs={"clusters":[source_cell]*len(source_data)},
var={"var_names":source_data.var_names})
target_data = sc.AnnData(actual_data_temp.X, obs={"clusters":[clust_typ]*len(actual_data_temp)},
var={"var_names":actual_data_temp.var_names})
gene_data = target_data.concatenate(source_data)
sc.tl.rank_genes_groups(gene_data, 'clusters',method="t-test")
top_100_gene_list = list(gene_data.uns["rank_genes_groups"]['names'][clust_typ])
reg_val = reg_mean_plot(plot_data, condition_key="comparison_typ",
axis_keys={"x": "actual", "y": "pred"},
path_to_save="./reg_mean_"+file+"_TO_"+clust_typ+".png",
legend=False,
labels={"x": "actual", "y":"pred"},
show=False,
gene_list=top_100_gene_list[:5],
top_100_genes = top_100_gene_list,
fontsize=14,
textsize=14)
reg_mean_vals.append(list([first_cell,reg_val[0],reg_val[1]]))
if reg_val[1] >= 0.9:
source_cell = [string for string in cells if string in file]
source_cell = source_cell[0]
generate_simulated_umaps(path,actual_data,clust_typ,source_cell,adata,first_cell)
generate_simulated_pca(path,actual_data,clust_typ,source_cell,sim_data,first_cell)
os.chdir(old_path)
return reg_mean_vals