Skip to content

Commit

Permalink
Find BioSample ID regardless of GCA or GCF
Browse files Browse the repository at this point in the history
  • Loading branch information
stitam committed Jun 28, 2024
1 parent 52a035b commit d5d8c19
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 27 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@ Suggests:
knitr,
rmarkdown,
testthat
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
VignetteBuilder: knitr
Config/testthat/edition: 3
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export(ncbi_download_genome)
export(ncbi_get_meta)
export(ncbi_get_summary)
export(ncbi_get_uid)
export(ncbi_link)
export(ncbi_link_uid)
export(ncbi_parse)
export(ncbi_parse_assembly_xml)
Expand Down
43 changes: 19 additions & 24 deletions R/ncbi_link.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
#' ncbi_link("GCF_000002435.2", from = "assembly", to = "biosample")
#' ncbi_link("SAMN02714232", from = "biosample", to = "assembly")
#' }
#'
#'
#' @export
ncbi_link <- function(
query,
from,
Expand Down Expand Up @@ -61,34 +60,30 @@ ncbi_link_assembly_biosample <- function(
batch_size = batch_size,
verbose = verbose
)
res <- list()
for (i in 1:nrow(from_uid$web_history)) {
WH <- list(
"WebEnv" = from_uid$web_history$WebEnv[i],
"QueryKey" = from_uid$web_history$QueryKey[i]
)
class(WH) <- c("web_history", "list")
hit <- wrap(
"entrez_summary",
package = "rentrez",
verbose = verbose,
db = "assembly",
web_history = WH
)
if ("esummary" %in% class(hit)) {
hit <- list(hit)
}
res[[i]] <- hit
}
res <- unlist(res, recursive = FALSE)
res <- ncbi_get_summary(query = from_uid, verbose = verbose)
ids <- data.frame(
assembly = unname(sapply(res, function(x) x$assemblyaccession)),
assembly_gbk = unname(sapply(res, function(x) x$synonym$genbank)),
assembly_rsq = unname(sapply(res, function(x) x$synonym$refseq)),
biosample = unname(sapply(res, function(x) x$biosampleaccn))
)
ids$assembly_rsq <- ifelse(ids$assembly_rsq == "", NA, ids$assembly_rsq)
index_gbk <- which(ids$assembly_gbk %in% assembly[which(!is.na(assembly))])
index_rsq <- which(ids$assembly_rsq %in% assembly[which(!is.na(assembly))])
out <- data.frame(
assembly = assembly
)
out <- dplyr::left_join(out, ids, by = "assembly")
out_gbk <- dplyr::right_join(
out,
ids[index_gbk, c("assembly_gbk", "biosample")],
by = c("assembly" = "assembly_gbk")
)
out_rsq <- dplyr::right_join(
out,
ids[index_rsq, c("assembly_rsq", "biosample")],
by = c("assembly" = "assembly_rsq")
)
out_both <- dplyr::bind_rows(out_gbk, out_rsq)
out <- dplyr::left_join(out, out_both, by = "assembly")
out <- tibble::as_tibble(out)
return(out)
}
Expand Down
2 changes: 0 additions & 2 deletions man/ncbi_link.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions tests/testthat/test-ncbi_link.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
data(examples)

test_that("ncbi_link() assembly to biosample from GCA",{
res <- ncbi_link("GCA_001698945.1", from = "assembly", to = "biosample")

expect_equal(class(res), c("tbl_df", "tbl", "data.frame"))
expect_equal(dim(res), c(1,2))
expect_equal(res$assembly, "GCA_001698945.1")
expect_equal(res$biosample, "SAMN03175161")
})

test_that("ncbi_link() assembly to biosample from GCF",{
res <- ncbi_link("GCF_001698945.1", from = "assembly", to = "biosample")

expect_equal(class(res), c("tbl_df", "tbl", "data.frame"))
expect_equal(dim(res), c(1,2))
expect_equal(res$assembly, "GCF_001698945.1")
expect_equal(res$biosample, "SAMN03175161")
})

test_that("ncbi_link() assembly to biosample invalid queries", {
query <- c(examples$assembly, "noname", NA)
res <- ncbi_link(query, from = "assembly", to = "biosample")

expect_equal(res$assembly, query)
expect_equal(res$assembly[4:5], c("noname", NA_character_))
expect_equal(res$biosample[4:5], c(NA_character_, NA_character_))
})

test_that("ncbi_link() biosample to assembly", {
query <- c(examples$biosample)
res <- ncbi_link(query, from = "biosample", to = "assembly")

expect_equal(res$biosample, query)
expect_equal(
res$assembly[which(res$biosample == "SAMN02714232")],
"GCF_000695855.3"
)
expect_equal(
res$assembly[which(res$biosample == "SAMN36356470")],
NA_character_
)
})

0 comments on commit d5d8c19

Please sign in to comment.