This repository has been archived by the owner on Nov 19, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
tar-biomart.R
96 lines (86 loc) · 2.66 KB
/
tar-biomart.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#' get_biomart_information
#' @import data.table
#' @import biomaRt
#' @import httr
get_biomart_information <- function(
ensembl_id,
rna_level = c("ensembl_gene_id", "ensembl_transcript_id"),
organism = c("hsapiens_gene_ensembl", "mmusculus_gene_ensembl", "rnorvegicus_gene_ensembl"),
version,
build = 38
) {
rna_level <- match.arg(rna_level, c("ensembl_gene_id", "ensembl_transcript_id"))
organism <- match.arg(organism, c("hsapiens_gene_ensembl", "mmusculus_gene_ensembl", "rnorvegicus_gene_ensembl"))
httr::set_config(httr::config(ssl_verifypeer = FALSE))
get_mart <- quote(biomaRt::useEnsembl(
biomart = "ensembl",
dataset = organism,
version = version,
GRCh = if (build == 37) build else NULL
))
mart <- try(eval(get_mart), silent = TRUE)
if (inherits(mart, "try-error")) mart <- eval(get_mart)
ensembl_build_version <- sprintf("GRCh%d-%s", build, version)
list_unique_gene <- list(sub("\\.[^.]*$", "", unique(ensembl_id)))
format_columns <- function(x) {
out <- paste(setdiff(unique(x), ""), collapse = ";")
data.table::fifelse(out == "", NA_character_, out)
}
ensembl_dt <- data.table::setDT(biomaRt::getBM(
attributes = c(
rna_level,
"chromosome_name",
"start_position",
"end_position",
"external_gene_name"
),
filters = rna_level,
values = list_unique_gene,
mart = mart
))[
j = lapply(.SD, format_columns),
by = rna_level
][j = ensembl_version := ensembl_build_version]
if (!is.null(biomaRt::searchAttributes(mart, pattern = "entrezgene_id"))) {
entrez_dt <- data.table::setDT(biomaRt::getBM(
attributes = c(rna_level, "entrezgene_id"),
filters = rna_level,
values = list_unique_gene,
mart = mart
))[
j = lapply(.SD, format_columns),
by = rna_level
]
}
if (!is.null(biomaRt::searchAttributes(mart, pattern = "uniprotswissprot"))) {
uniprot_dt <- data.table::setDT(biomaRt::getBM(
attributes = c(rna_level, "uniprotswissprot"),
filters = rna_level,
values = list_unique_gene,
mart = mart
))[
j = lapply(.SD, format_columns),
by = rna_level
]
}
datasets_exists <- sapply(c("entrez_dt", "uniprot_dt"), exists)
if (!any(datasets_exists)) return(ensembl_dt)
if (all(datasets_exists)) {
return(merge(
x = ensembl_dt,
y = merge(x = entrez_dt, y = uniprot_dt, by = rna_level, all = TRUE),
by = rna_level,
all.x = TRUE
))
}
merge(
x = ensembl_dt,
y = switch(names(which(datasets_exists)),
"entrez_dt" = entrez_dt,
"uniprot_dt" = uniprot_dt,
stop("No datasets found!")
),
by = rna_level,
all.x = TRUE
)
}