Skip to content

Commit

Permalink
Issue #54: UniProt unit test for P01133
Browse files Browse the repository at this point in the history
  • Loading branch information
cycle20 committed Mar 26, 2023
1 parent c1ca510 commit 80f77c8
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 105 deletions.
105 changes: 0 additions & 105 deletions R/dataPatch.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ main <- function() {
## NOTE: this filter drops some not well-curated compounds
filter(!is.na(source))

checkDataCoverage(clueTable)

clueTable <- targetList %>%
left_join(clueTable) %>%
rowwise() %>%
Expand All @@ -92,8 +90,6 @@ main <- function() {
print(result)
saveRDS(result, file = CLUE.PATCHED.OUTPUT)

# TODO: should be another logic: checkDataCoverage(result)

warnings()
return(result)
}
Expand Down Expand Up @@ -130,107 +126,6 @@ patch <- function(clueTable) {
}


#' Check gaps in our data set
#'
#' @param clueTable
#'
#' @return Invisible NULL
checkDataCoverage <- function(clueTable) {
separator <- paste(rep("#", 75), collapse = "")
print(glue::glue("\n\n{separator}"))
print(glue::glue("{separator}", " !!! START OF DATA INTEGRITY TEST !!!"))
print(glue::glue("{separator}\n\n"))

## internal helper function
checkTable <- function(table, msg, stop = FALSE) {
n <- nrow(table)
prefix <- glue::glue("\n\n>>>>>>>> {msg}: {n} ...")
if (nrow(table) > 0) {
print(glue::glue("{prefix} IS NOT OK!"))
print(table)
if (stop) stop("Unexpected data state")
} else
print(glue::glue("{prefix} IS OK!"))
}

## FDA Orange issue
distinctTable <- clueTable %>%
select(
pert_iname,
# moa,
final_status,
status_source,
orange_book
) %>%
distinct() %>%
rowwise() %>%
filter(grepl("FDA Orange", status_source))
checkTable(distinctTable, "FDA Orange")

## FDA Orange issue V2
distinctTable <- clueTable %>%
select(
pert_iname,
final_status,
status_source,
orange_book
) %>%
distinct() %>%
rowwise() %>%
filter(!is.na(orange_book) && is.na(status_source))
checkTable(distinctTable, "FDA.V2 orange_book has value")

## FDA Launched check
distinctTable <- clueTable %>%
select(
pert_iname,
final_status,
status_source,
orange_book
) %>%
distinct() %>%
rowwise() %>%
filter(final_status == "Launched" &&
is.na(orange_book) && is.na(status_source))
checkTable(distinctTable, "FDA Launched check")

## PubChem/ChEMBL check
distinctTable <- clueTable %>%
select(
pert_iname,
final_status,
pubchem_cid,
chembl_id,
inchi_key,
pert_id,
ttd_id,
drugbank_id,
source,
status_source
) %>%
distinct() %>%
rowwise() %>%
filter(is.na(pubchem_cid) && is.na(chembl_id))
checkTable(distinctTable, "PubChem/ChEMBL check")

distinctTable <- clueTable %>%
select(final_status, status_source) %>%
distinct() %>%
filter(final_status == "Preclinical" && !is.na(status_source))
checkTable(distinctTable, "Preclinical status_source", stop = TRUE)

## print("Foreced quit")
## quit(save = "no")


print(glue::glue("{separator}"))
print(glue::glue("{separator}", " !!! END OF DATA INTEGRITY TEST !!!"))
print(glue::glue("{separator}"))

return(invisible(NULL))
}


#' Cross-checking columns
#'
#' @param clueTable Input dataframe.
Expand Down
33 changes: 33 additions & 0 deletions inst/tinytest/test_uniprot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
##
## test_uniprot.R
##

setwd('../..')
source("R/dataPatch.R")

## arrange
clueTable <- tibble::as_tibble(list(
UNIPROT_KB_ID = c(
'P01133'
)
))

## act
clueTable <- xmlUniProt(clueTable)

## assert

# P01133
expect_equal(
target = "calcium ion binding",
current = clueTable$UniProtData[["P01133"]][["molecularFunction"]][["GO:0005509"]]
)
expect_equal(
target = "extracellular exosome",
current = clueTable$UniProtData[["P01133"]][["subCellularLocation"]][["GO:0070062"]]
)
expect_equal(
target = "positive regulation of MAPK cascade",
current = clueTable$UniProtData[["P01133"]][["biologicalProcess"]][["GO:0043410"]]
)
# GO:0070062

0 comments on commit 80f77c8

Please sign in to comment.