forked from MBrooks313/R_codes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
GOANA_Summary.R
100 lines (73 loc) · 3.38 KB
/
GOANA_Summary.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
GOANA_Summary <- function(output = "Output", name.pre=enrich.names, name.suf="_GOANA.csv", ontology = "BP", FDR=0.01) {
##This program was written to reduce the inherent redundancy associated with Gene Ontology and Kegg enrichment analysis.
##It examines the parent-child relationship of significantly enriched terms and reports back the leaf (end-of-branch) term
##as the representative enriched ontology for each branch enriched.
##This is the 2nd of 3 programs for GO/KEGG enrichment.
##Written by Matthew Brooks, last modified March 23rd, 2017
####USAGE####
# output = prefix of the output file name
# name.pre = names from the list (or list-of-lists) from gene.list of the GOANA.R function, prefix for import file
# name.suf = suffix for import file
# ontology = 'BP', 'CC', 'MF', or 'KG' for ontology to summarize
# FDR = FDR cutoff to subset prior to examining the parent-child relationship
#
# Examples:
# Output from GOANA.R to be used as input: list1_GOANA.csv
#
# GOANA_Summary(output="GOANA-BP", name.pre = names(cl.list), name.suf = "_GOANA.csv", ontology = "BP", FDR=0.01)
# GOANA_Summary(output="GOANA-CC", name.pre = names(cl.list), name.suf = "_GOANA.csv", ontology = "CC", FDR=0.01)
# GOANA_Summary(output="GOANA-MF", name.pre = names(cl.list), name.suf = "_GOANA.csv", ontology = "MF", FDR=0.01)
# GOANA_Summary(output="KG", name.pre = names(cl.list), name.suf = "_KGANA.csv", ontology = "KG", FDR=0.01)
#############
require(GO.db)
#Load ontology
if (ontology == "BP") GOCHILDREN <- GOBPCHILDREN
if (ontology == "CC") GOCHILDREN <- GOCCCHILDREN
if (ontology == "MF") GOCHILDREN <- GOMFCHILDREN
global.list <- lapply(1:length(name.pre), function(x){
# Get input, filter for ontology, and calculate FDR
input <- read.csv(paste(name.pre[x], name.suf, sep="")) #Import of the output
if (ontology == "KG") {
input <- input[input$DE > 0,]
} else {
input <- input[input$Ont == ontology & input$DE > 0,]
}
input <- input[input$FDR <= FDR,]
kg <- data.frame()
if (ontology == "KG") {
kg <- rbind(kg, input)
return(kg)
} else {
## Get Top End-of-Branch GO Term in Each Cluster
# Loop for each cluster
top <- data.frame()
tmp.GO <- as.character(input[,1]) #List of GO terms
#Loop for each cluster with a significant GO term
if (length(tmp.GO) > 0){
tmp.eob <- c()
#Parse for single GO term
if (length(tmp.GO) == 1){
tmp.cluster <- input[input$X == tmp.GO,] #If only a single sig GO term
#Parse for more than one sig GO term
} else{
for (k in 1:length(tmp.GO)){
if (!is.null(GOTERM[[tmp.GO[k]]])) { tmp.offspring <- unique(unlist(mget(tmp.GO[k], GOCHILDREN)))
#Keep only End-of-Branch GO Terms
if (sum(tmp.offspring %in% tmp.GO) == 0) {tmp.eob <- c(tmp.eob, tmp.GO[k])}
}
}
tmp.cluster <- input[input$X %in% tmp.eob,] #Get results back
}
top <- rbind(top, tmp.cluster)
}
return(top)
}
})
names(global.list) <- name.pre
#Write output file
if (ontology == "KG"){
write.csv(do.call("rbind", global.list), file=paste(output ,"_KG_Summary.csv", sep=""))
} else {
write.csv(do.call("rbind", global.list), file=paste(output ,"_GO_Summary.csv", sep=""))
}
}