-
Notifications
You must be signed in to change notification settings - Fork 0
/
music.R
55 lines (48 loc) · 1.67 KB
/
music.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
library(Biobase)
library(MuSiC)
library(dplyr)
library(rjson)
config <- fromJSON(file="configs/glob_config.json")
workdir <- paste(config[["experiment_folder"]], "/results/datasets", sep="")
setwd(workdir)
files <- list.files()
subject_path <- FALSE
for (i in 1:length(files)){
if (grepl("test", files[i], fixed=TRUE)){
test_path <- files[i]}
if (grepl("counts", files[i], fixed=TRUE)){
ref_counts_path <- files[i]
}
if (grepl("celltypes", files[i], fixed=TRUE)){
ref_celltypes_path <- files[i]
}
if (grepl("subject", files[i], fixed=TRUE)){
subject_path <- files[i]
}
}
# Bulk
df_bulk <- read.csv(test_path, row.names=1)
eset_bulk <- ExpressionSet(as.matrix(df_bulk))
# single cell
df <- read.csv(ref_counts_path, row.names=1)
df_celltypes <- read.csv(ref_celltypes_path, row.names=1)
subjects <- rep("donor1", dim(df_celltypes)[1])
df_celltypes$subject <- subjects
if (subject_path){
df_subjects <- read.csv(subject_path, row.names=1)
df_celltypes$subject <- df_subjects[,1]
}
colnames(df_celltypes) <- c("cellType", "SubjectName")
df_celltypes$cellTypeID <- df_celltypes$cellType
df_celltypes$subjectID <- df_celltypes$SubjectName
rownames(df_celltypes) <- colnames(df)
pheno_data <- AnnotatedDataFrame(data=df_celltypes)
eset <- ExpressionSet(as.matrix(df), phenoData = pheno_data)
# Run MuSiC
est.prop <- music_prop(bulk.eset = eset_bulk,
sc.eset = eset,
clusters = 'cellType',
select.ct = unique(df_celltypes$cellType),
verbose = F)
preds <- est.prop$Est.prop.weighted
write.csv(preds, "MuSiC_fractions.csv")