-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_random_fs.R
96 lines (72 loc) · 2.75 KB
/
test_random_fs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# select random features
source("random_feature_selection.R")
# random seed for different
N <- 1#0
set.seed(seed = 42)
seeds <- sample(1:10000, N, replace=F)
# number of features
n.feats <- seq(from = 100, to = 1500, by = 100)
#data.filename <- "Pf_Gustafsdottir.rds"
#data.filename <- "../../input/repurposing/repurposing_normalized.rds"
#pf <- readRDS(file.path("..", "..", "input", "BBBC022_2013", "old", data.filename))
#Pf <- readRDS(data.filename)
# select dose at 10um
#Pf %<>% filter(Metadata_mmoles_per_liter == 10)
# there are two compounds that are removed because no MOA are associated and also DMSO
#Pf %<>% filter(!is.na(Metadata_moa))
# remove "[email protected]" because it is a toxic compounds
#Pf %<>% filter(Metadata_broad_sample != "[email protected]")
# remove features with NA
#Pf <- Pf[ , colSums(is.na(Pf)) == 0] # from 1784 to 1615
#metadata <-
# colnames(Pf) %>% str_subset("^Metadata_") # loosing 1 metadata.
#variables <-
# colnames(Pf) %>% str_subset("^Cells_|^Cytoplasm_|^Nuclei_")
pf <- readRDS(file.path("../../input/repurposing/repurposing_normalized.rds"))
# select dose at 10um
#pf %<>% filter(Metadata_mmoles_per_liter == 10)
# there are two compounds that are removed because no MOA are associated and also DMSO
pf %<>% filter(!is.na(Metadata_moa))
# remove "[email protected]" because it is a toxic compounds
pf %<>% filter(Metadata_broad_sample != "BRD-K60230970-001-10-0")
pf %<>%
mutate(Metadata_broad_sample_id = Metadata_broad_sample)
pf %<>%
mutate(Metadata_broad_sample = paste(Metadata_broad_sample, Metadata_mmoles_per_liter, sep="@"))
# remove features with NA
pf <- pf[ , colSums(is.na(pf)) == 0] # from 1784 to 1615
# add rank to the dose, because each compounds does not have the same doses
rank <- pf[!duplicated(pf$Metadata_broad_sample),]
rank %<>%
arrange(Metadata_mmoles_per_liter) %>%
group_by(Metadata_broad_sample_id) %>%
mutate(Metadata_rank=row_number()) %>%
ungroup %>%
select(one_of("Metadata_rank", "Metadata_broad_sample"))
pf %<>%
left_join(., rank, by = "Metadata_broad_sample")
# select dose
dose = 6
Pf <- pf %>% filter(Metadata_rank == dose)
metadata <-
colnames(Pf) %>% str_subset("^Metadata_") # loosing 1 metadata.
variables <-
colnames(Pf) %>% str_subset("^Cells_|^Cytoplasm_|^Nuclei_")
for(n in n.feats){
print("number of features")
print(n)
for(s in seeds){
pf.random <- random_feature_selection(Pf, nb.feat=n, seed=s)
filename.save <-
paste("../../input/repurposing/profile/random/feat_names/random_",
n,
"_dose",
dose,
"_seed_",
s,
".rds",
sep = "")
pf.random %>%
saveRDS(filename.save)
}
}