diff --git a/R/dataPatch.R b/R/dataPatch.R index c25f15a..31b4c7b 100644 --- a/R/dataPatch.R +++ b/R/dataPatch.R @@ -78,8 +78,6 @@ main <- function() { ## NOTE: this filter drops some not well-curated compounds filter(!is.na(source)) - checkDataCoverage(clueTable) - clueTable <- targetList %>% left_join(clueTable) %>% rowwise() %>% @@ -92,8 +90,6 @@ main <- function() { print(result) saveRDS(result, file = CLUE.PATCHED.OUTPUT) - # TODO: should be another logic: checkDataCoverage(result) - warnings() return(result) } @@ -130,107 +126,6 @@ patch <- function(clueTable) { } -#' Check gaps in our data set -#' -#' @param clueTable -#' -#' @return Invisible NULL -checkDataCoverage <- function(clueTable) { - separator <- paste(rep("#", 75), collapse = "") - print(glue::glue("\n\n{separator}")) - print(glue::glue("{separator}", " !!! START OF DATA INTEGRITY TEST !!!")) - print(glue::glue("{separator}\n\n")) - - ## internal helper function - checkTable <- function(table, msg, stop = FALSE) { - n <- nrow(table) - prefix <- glue::glue("\n\n>>>>>>>> {msg}: {n} ...") - if (nrow(table) > 0) { - print(glue::glue("{prefix} IS NOT OK!")) - print(table) - if (stop) stop("Unexpected data state") - } else - print(glue::glue("{prefix} IS OK!")) - } - - ## FDA Orange issue - distinctTable <- clueTable %>% - select( - pert_iname, - # moa, - final_status, - status_source, - orange_book - ) %>% - distinct() %>% - rowwise() %>% - filter(grepl("FDA Orange", status_source)) - checkTable(distinctTable, "FDA Orange") - - ## FDA Orange issue V2 - distinctTable <- clueTable %>% - select( - pert_iname, - final_status, - status_source, - orange_book - ) %>% - distinct() %>% - rowwise() %>% - filter(!is.na(orange_book) && is.na(status_source)) - checkTable(distinctTable, "FDA.V2 orange_book has value") - - ## FDA Launched check - distinctTable <- clueTable %>% - select( - pert_iname, - final_status, - status_source, - orange_book - ) %>% - distinct() %>% - rowwise() %>% - filter(final_status == "Launched" && - is.na(orange_book) && is.na(status_source)) - checkTable(distinctTable, "FDA Launched check") - - ## PubChem/ChEMBL check - distinctTable <- clueTable %>% - select( - pert_iname, - final_status, - pubchem_cid, - chembl_id, - inchi_key, - pert_id, - ttd_id, - drugbank_id, - source, - status_source - ) %>% - distinct() %>% - rowwise() %>% - filter(is.na(pubchem_cid) && is.na(chembl_id)) - checkTable(distinctTable, "PubChem/ChEMBL check") - - distinctTable <- clueTable %>% - select(final_status, status_source) %>% - distinct() %>% - filter(final_status == "Preclinical" && !is.na(status_source)) - checkTable(distinctTable, "Preclinical status_source", stop = TRUE) - -## print("Foreced quit") -## quit(save = "no") - - - print(glue::glue("{separator}")) - print(glue::glue("{separator}", " !!! END OF DATA INTEGRITY TEST !!!")) - print(glue::glue("{separator}")) - - return(invisible(NULL)) -} - - #' Cross-checking columns #' #' @param clueTable Input dataframe. diff --git a/inst/tinytest/test_uniprot.R b/inst/tinytest/test_uniprot.R new file mode 100644 index 0000000..cad1775 --- /dev/null +++ b/inst/tinytest/test_uniprot.R @@ -0,0 +1,33 @@ +## +## test_uniprot.R +## + +setwd('../..') +source("R/dataPatch.R") + +## arrange +clueTable <- tibble::as_tibble(list( + UNIPROT_KB_ID = c( + 'P01133' + ) +)) + +## act +clueTable <- xmlUniProt(clueTable) + +## assert + +# P01133 +expect_equal( + target = "calcium ion binding", + current = clueTable$UniProtData[["P01133"]][["molecularFunction"]][["GO:0005509"]] +) +expect_equal( + target = "extracellular exosome", + current = clueTable$UniProtData[["P01133"]][["subCellularLocation"]][["GO:0070062"]] +) +expect_equal( + target = "positive regulation of MAPK cascade", + current = clueTable$UniProtData[["P01133"]][["biologicalProcess"]][["GO:0043410"]] +) +# GO:0070062