update vignettes, fix app bugs

JamesHWade · Jan 22, 2024 · 0304daf · 0304daf
1 parent 15a971b
commit 0304daf
Show file tree

Hide file tree

Showing 49 changed files with 676 additions and 276 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -9,7 +9,6 @@
 ^cran-comments\.md$
 ^docs$
 ^gpttools\.Rproj$
-^images$
 ^pkgdown$
 ^revdep$
 ^.lintr$

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -37,7 +37,7 @@ repos:
     rev: v4.5.0
     hooks:
     -   id: check-added-large-files
-        args: ['--maxkb=2000']
+        args: ['--maxkb=25000']
 -   repo: https://github.com/pre-commit-ci/pre-commit-ci-config
     rev: v1.6.1
     hooks:

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,9 +1,10 @@
 # Generated by roxygen2: do not edit by hand
 
 export(add_roxygen_addin)
-export(addin_run_retriever)
+export(addin_run_scrape_pkgs)
 export(addin_run_select_pkgs)
 export(chat_with_context)
+export(chat_with_retrieval)
 export(collect_dataframes)
 export(crawl)
 export(create_index_from_audio)
@@ -13,6 +14,7 @@ export(delete_history)
 export(delete_index)
 export(document_data)
 export(get_selection)
+export(get_transformer_model)
 export(gpt_sitrep)
 export(ingest_pdf)
 export(insert_text)
@@ -25,6 +27,7 @@ export(remove_lines_and_spaces)
 export(run_document_data)
 export(run_select_pkgs_app)
 export(save_user_config)
+export(scrape_pkg_sites)
 export(scrape_url)
 export(script_to_function_addin)
 export(set_user_config)

diff --git a/R/addin-run-retriever.R b/R/addin-run-retriever.R
@@ -1,6 +1,6 @@
-#' Run Chat GPT with Retrieval
+#' Run Chat with Retrieval
 #'
-#' Run the ChatGPT shiny app with semantic search and document retrieval
+#' Run the Chat with Retrieval shiny app
 #'
 #' @export
 #'
@@ -9,9 +9,9 @@
 #' @examples
 #' # Call the function as an RStudio addin
 #' \dontrun{
-#' addin_run_retriever()
+#' chat_with_retrieval()
 #' }
-addin_run_retriever <- function() {
+chat_with_retrieval <- function() {
   indices <- list_index()
   if (length(indices) == 0) {
     cli::cli_abort(

diff --git a/R/addin_select_pkgs.R → R/addin_scrape_pkgs.R b/R/addin_select_pkgs.R → R/addin_scrape_pkgs.R
@@ -10,6 +10,50 @@ addin_run_select_pkgs <- function() {
   run_select_pkgs_app()
 }
 
+#' Addin to scrape installed packages
+#'
+#' Invokes RStudio addin functionality to scrape select installed packages and
+#' create indices for use in the "Chat with Retrieval" application.
+#'
+#' @export
+#' @return No return value, called for side effects only.
+#'
+#' @examplesIf rlang::is_interactive()
+#' # This function is typically run within RStudio as an Addin.
+#' # It would not be called directly in the code.
+#' addin_scrape_pkgs()
+#'
+#' @note This addin requires RStudio to be available and will stop with an
+#' error message if RStudio API is not accessible.
+#'
+addin_run_scrape_pkgs <- function() {
+  # Check if RStudio API is available
+  if (!rstudioapi::isAvailable()) {
+    cli::cli_abort("The rstudioapi is not available.")
+  }
+  # Get user feedback with rstudioapi
+  proceed <-
+    rstudioapi::showQuestion(
+      title = "Scrape Packages",
+      message = "This will scrape installed packages and create indices to use
+      with the \"Chat with Retrieval\" app. Would you like to proceed?"
+    )
+
+  # Proceed with scraping if the user agrees
+  if (proceed) {
+    cli::cli_alert_info("Scraping packages as a background job.")
+    # Run the scrape packages script as a background job
+    rstudioapi::jobRunScript(
+      path = system.file("scripts/scrape_pkgs.R",
+        package = "gpttools"
+      ),
+      name = "Scraping Pacakges"
+    )
+  } else {
+    cli::cli_alert_info("Scraping cancelled.")
+  }
+}
+
 #' Run a Shiny App to Select and Save Installed Packages
 #'
 #' This function launches a Shiny application that allows users to select from a
@@ -65,7 +109,7 @@ run_select_pkgs_app <- function() {
       {
         installed_packages |> dplyr::filter(Package %in% input$selected_pkg)
       },
-      options = list(pageLength = 10)
+      # options = list(pageLength = 5)
     )
   }
 

diff --git a/R/config.R b/R/config.R
@@ -18,7 +18,7 @@
 save_user_config <- function(service = "openai",
                              model = "gpt-4-1106-preview",
                              task = "Permissive Chat",
-                             embeddings = "local",
+                             embeddings = TRUE,
                              k_context = 4,
                              k_history = 4,
                              save_history = TRUE,

diff --git a/R/embedding-py.R b/R/embedding-py.R
@@ -10,7 +10,34 @@
 # }
 # nolint end
 
-# uses transformers instead of sentence transformers
+#' Get Transformer Model
+#'
+#' This function is designed to download and load a pre-trained transformer
+#' model using the transformers Python library via the reticulate package.
+#' It checks for the availability of the required Python package and then
+#' downloads the specified transformer model.
+#'
+#' @param model_name The name of the transformer model to download. This should
+#' be in the format "username/modelname" as recognized by the transformers
+#' library. Default is "jinaai/jina-embeddings-v2-base-en".
+#'
+#' @return An object of the downloaded transformer model.
+#'
+#' @export
+#'
+#' @note Users of this function need to ensure that the Python environment
+#' is set up with the 'transformers' package installed. The function uses
+#' the 'reticulate' R package to interface with Python and the user may need
+#' to configure it accordingly.
+#'
+#' @examples
+#' \dontrun{
+#' # To get the default transformer model:
+#' get_transformer_model()
+#'
+#' # To get a custom transformer model by specifying the model name:
+#' get_transformer_model("bert-base-uncased")
+#' }
 get_transformer_model <-
   function(model_name = "jinaai/jina-embeddings-v2-base-en") {
     py_pkg_is_available("transformers")

diff --git a/R/history.R b/R/history.R
@@ -174,20 +174,16 @@ check_context <- function(context) {
 #' @param overwrite Whether to overwrite the history file or not. Default is
 #'   FALSE.
 #' @param local Whether to use the local model or not. Default is FALSE.
+#' @param embedding_model A model object to use for embedding. Only needed if
+#' local is TRUE. Default is NULL.
 #'
 #' @return A list containing the prompt, context, and answer.
 #' @export
 #'
-#' @examples
-#' \dontrun{
-#' # Define a query and context
+#' @examplesIf rlang::is_interactive()
+#' rlang::is_interactive()
 #' query <- "What is the capital of France?"
-#' context <- "France is a country in Western Europe. Its capital is a famous
-#' city known for its culture, art, and history."
-#'
-#' # Call the chat_with_context function
 #' result <- chat_with_context(query = query, context = context)
-#' }
 chat_with_context <- function(query,
                               service = "openai",
                               model = "gpt-4",
@@ -202,14 +198,15 @@ chat_with_context <- function(query,
                               k_history = 4,
                               save_history = TRUE,
                               overwrite = FALSE,
-                              local = FALSE) {
+                              local = FALSE,
+                              embedding_model = NULL) {
   arg_match(task, c("Context Only", "Permissive Chat"))
 
-  if (local) {
-    embedding_model <- get_transformer_model()
-  } else {
-    embedding_model <- NULL
-  }
+  need_context <- is_context_needed(
+    user_prompt = query,
+    service = service,
+    model = model
+  )
 
   if (rlang::is_true(add_context) || rlang::is_true(add_history)) {
     query_embedding <- get_query_embedding(query,
@@ -218,7 +215,7 @@ chat_with_context <- function(query,
     )
   }
 
-  if (rlang::is_true(add_context)) {
+  if (rlang::is_true(add_context) && rlang::is_true(need_context)) {
     full_context <-
       get_query_context(
         query_embedding,
@@ -229,10 +226,11 @@ chat_with_context <- function(query,
       dplyr::pull("chunks") |>
       paste(collapse = "\n\n")
   } else {
+    full_context <- "No context provided."
     context <- "No additional context provided."
   }
 
-  if (add_history) {
+  if (rlang::is_true(add_history) & rlang::is_true(need_context)) {
     cli::cli_inform("Attempting to add chat history to query.")
     cli::cli_inform("Chat history: {class(chat_history)}")
     if (rlang::is_null(chat_history)) {
@@ -261,11 +259,11 @@ chat_with_context <- function(query,
             role = "system",
             content =
               glue(
-                "You are a helpful chat bot that answers questions based on ",
-                "the context provided by the user. If the user does not ",
-                "provide related context, say \"I am not able to answer that ",
-                "question. Maybe try rephrasing your question in a different ",
-                "way.\""
+                "You are a helpful chat bot that answers questions based on
+                     the context provided by the user. If the user does not
+                     provide related context and you need context to respond
+                     accurately, say \"I am not able to answer that question.
+                     Maybe try rephrasing your question in a different way.\""
               )
           )
         ),
@@ -275,11 +273,12 @@ chat_with_context <- function(query,
             role = "system",
             content =
               glue(
-                "You are a helpful chat bot that answers questions based on ",
-                "on the context provided by the user. If the user does not ",
-                "provide context, answer the quest but first say \"I am not ",
-                "able to answer that question with the context you gave me, ",
-                "but here is my best answer.",
+                "You are a helpful chat bot that answers questions based on
+                     on the context provided by the user. If the user does not
+                     provide context and you need context to respond correctly,
+                     answer the quest but first say \"I am not able to answer
+                     that question with the context you gave me, but here is my
+                     best but here is my best answer."
               )
           )
         )
@@ -370,3 +369,24 @@ chat_with_context <- function(query,
 
   list(prompt_without_context, full_context, answer$response)
 }
+
+
+is_context_needed <- function(user_prompt,
+                              service = getOption("gpttools.service"),
+                              model = getOption("gpttools.model")) {
+  prompt <-
+    glue::glue("Would additional context or history be helpful to respond to
+               this prompt from the user. If yes, answer TRUE. If no, answer
+               FALSE. ONLY answer TRUE or FALSE. It is crucial that you only
+               answer TRUE or FALSE.\n\n{user_prompt}")
+
+  gptstudio:::gptstudio_create_skeleton(
+    service = service,
+    model = model,
+    prompt = prompt,
+    stream = FALSE
+  ) |>
+    gptstudio:::gptstudio_request_perform() |>
+    purrr::pluck("response") |>
+    as.logical()
+}
diff --git a/R/site-index.R b/R/site-index.R
@@ -67,6 +67,25 @@ get_pkgs_to_scrape <- function(local = TRUE,
     dplyr::rename(version = installed_version)
 }
 
+#' Scrape packaging sites
+#'
+#' @details This function scrapes the websites for the packages specified in the
+#'   `sites` dataframe. If `sites` is empty, it alerts the user with no packages
+#'   to scrape and returns `NULL` invisibly. If the user confirms to proceed, it
+#'   scrapes each package site using the supplied details.
+#'
+#'
+#' @param sites A data frame containing the package sites to be scraped. If not
+#'   provided, it defaults to `get_pkgs_to_scrape(local = TRUE)`.
+#' @param service The service to be used for scraping, defaults to "local".
+#' @param index_create Logical indicating whether to create an index, defaults
+#'   to `TRUE`.
+#' @param overwrite Logical indicating whether to overwrite existing content,
+#'   defaults to `TRUE`.
+#' @return Invisible `NULL`. The function is called for its side effects.
+#' @examplesIf rlang::is_interactive()
+#' scrape_pkg_sites()
+#' @export
 scrape_pkg_sites <- function(sites = get_pkgs_to_scrape(local = TRUE),
                              service = "local",
                              index_create = TRUE,
@@ -76,12 +95,22 @@ scrape_pkg_sites <- function(sites = get_pkgs_to_scrape(local = TRUE),
     return(invisible())
   }
 
-  cli::cli_text("You are about to scrape {nrow(sites)} package site page{?s}")
-  usethis::ui_yeah("Do you want to continue?")
+  if (rlang::is_interactive()) {
+    cli::cli_text("You are about to scrape {nrow(sites)} package site page{?s}")
+    continue <- usethis::ui_yeah("Do you want to continue?")
+  } else {
+    continue <- TRUE
+  }
+
+  if (!continue) {
+    cli_alert_info("Scraping aborted.")
+    return(invisible())
+  }
 
   sites |>
     dplyr::select(url, version, name) |>
     purrr::pmap(.f = \(url, version, name) {
+      # Helper function `crawl` is assumed to be defined elsewhere
       crawl(
         url = url,
         index_create = index_create,