From 74cbff7250b1e8a722d2910e9bff88c18e59c989 Mon Sep 17 00:00:00 2001 From: James Wade Date: Thu, 1 Feb 2024 11:59:22 -0500 Subject: [PATCH 1/4] feat: add run_code to package options --- R/config.R | 15 +++++++++------ R/zzz.R | 7 ++++--- man/save_user_config.Rd | 9 ++++++--- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/R/config.R b/R/config.R index 872786f..188cd69 100644 --- a/R/config.R +++ b/R/config.R @@ -5,7 +5,7 @@ #' @param service The name of the service to use, default is "openai". #' @param model The model to use, default is "gpt-4-1106-preview". #' @param task The task to perform, default is "Permissive Chat". -#' @param embeddings The location of embeddings, default is "local". +#' @param local_embed Whether to use local embedding model. Default is "yes". #' @param openai_embed_model The OpenAI embeddings model to use, default is #' "text-embedding-3-small". #' @param local_embed_model The local embeddings model to use, default is @@ -15,6 +15,7 @@ #' @param save_history Logical indicating whether history should be saved, #' default is TRUE. #' @param sources The sources to use, default is "All". +#' @param run_code Whether to execute generated code with `reprex::reprex()` #' @param persist Logical indicating whether to persist the settings, default #' is TRUE. #' @return Invisible NULL. @@ -22,17 +23,18 @@ save_user_config <- function(service = "openai", model = "gpt-4-turbo-preview", task = "Permissive Chat", - embeddings = TRUE, + local_embed = "Yes", openai_embed_model = "text-embedding-3-small", local_embed_model = "BAAI/bge-small-en-v1.5", k_context = 4, k_history = 4, - save_history = TRUE, + save_history = "Yes", sources = "All", + run_code = "No", persist = TRUE) { ops <- tibble::tibble( service, model, task, embeddings, openai_embed_model, local_embed_model, - k_context, k_history, sources, save_history + k_context, k_history, sources, run_code, save_history ) if (persist == TRUE) { @@ -69,13 +71,14 @@ set_user_config <- function(path = NULL) { gpttools.service = ops$service, gpttools.model = ops$model, gpttools.task = ops$task, - gpttools.local_embed = ops$embeddings, + gpttools.local_embed = ops$local_embed, gpttools.k_context = ops$k_context, gpttools.k_history = ops$k_history, gpttools.save_history = ops$save_history, gpttools.sources = ops$sources, gpttools.openai_embed_model = ops$openai_embed_model, - gpttools.local_embed_model = ops$local_embed_model + gpttools.local_embed_model = ops$local_embed_model, + gpttools.run_code = ops$run_code ) invisible(TRUE) } else { diff --git a/R/zzz.R b/R/zzz.R index a726850..94ea95a 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -6,14 +6,15 @@ op_gpttools <- list( gpttools.service = "openai", gpttools.model = "gpt-4-turbo-preview", - gpttools.local_embed = TRUE, + gpttools.local_embed = "No", gpttools.local_embed_model = "BAAI/bge-large-en-v1.5", gpttools.task = "Permissive Chat", gpttools.k_context = 4, gpttools.k_history = 4, - gpttools.save_history = FALSE, + gpttools.save_history = "No", gpttools.sources = "All", - gpttools.openai_embed_model = "text-embedding-3-small" + gpttools.openai_embed_model = "text-embedding-3-small", + gpttools.run_code = "No" ) toset <- !(names(op_gpttools) %in% names(op)) diff --git a/man/save_user_config.Rd b/man/save_user_config.Rd index 3c6b621..565d4d1 100644 --- a/man/save_user_config.Rd +++ b/man/save_user_config.Rd @@ -8,13 +8,14 @@ save_user_config( service = "openai", model = "gpt-4-turbo-preview", task = "Permissive Chat", - embeddings = TRUE, + local_embed = "Yes", openai_embed_model = "text-embedding-3-small", local_embed_model = "BAAI/bge-small-en-v1.5", k_context = 4, k_history = 4, - save_history = TRUE, + save_history = "Yes", sources = "All", + run_code = "No", persist = TRUE ) } @@ -25,7 +26,7 @@ save_user_config( \item{task}{The task to perform, default is "Permissive Chat".} -\item{embeddings}{The location of embeddings, default is "local".} +\item{local_embed}{Whether to use local embedding model. Default is "yes".} \item{openai_embed_model}{The OpenAI embeddings model to use, default is "text-embedding-3-small".} @@ -42,6 +43,8 @@ default is TRUE.} \item{sources}{The sources to use, default is "All".} +\item{run_code}{Whether to execute generated code with \code{reprex::reprex()}} + \item{persist}{Logical indicating whether to persist the settings, default is TRUE.} } From f060a4fe2a82e56724a78023ad8887321bcb53b5 Mon Sep 17 00:00:00 2001 From: James Wade Date: Thu, 1 Feb 2024 12:59:36 -0500 Subject: [PATCH 2/4] fix: load index correctly --- R/config.R | 13 +++++++------ R/embedding.R | 8 +++++++- R/zzz.R | 6 +++--- inst/retriever/app.R | 10 ++++++---- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/R/config.R b/R/config.R index 188cd69..9e9fd51 100644 --- a/R/config.R +++ b/R/config.R @@ -5,7 +5,7 @@ #' @param service The name of the service to use, default is "openai". #' @param model The model to use, default is "gpt-4-1106-preview". #' @param task The task to perform, default is "Permissive Chat". -#' @param local_embed Whether to use local embedding model. Default is "yes". +#' @param local_embed Whether to use local embedding model. Default is FALSE. #' @param openai_embed_model The OpenAI embeddings model to use, default is #' "text-embedding-3-small". #' @param local_embed_model The local embeddings model to use, default is @@ -15,7 +15,8 @@ #' @param save_history Logical indicating whether history should be saved, #' default is TRUE. #' @param sources The sources to use, default is "All". -#' @param run_code Whether to execute generated code with `reprex::reprex()` +#' @param run_code Whether to execute generated code with `reprex::reprex()`, +#' default is FALSE. #' @param persist Logical indicating whether to persist the settings, default #' is TRUE. #' @return Invisible NULL. @@ -23,17 +24,17 @@ save_user_config <- function(service = "openai", model = "gpt-4-turbo-preview", task = "Permissive Chat", - local_embed = "Yes", + local_embed = FALSE, openai_embed_model = "text-embedding-3-small", local_embed_model = "BAAI/bge-small-en-v1.5", k_context = 4, k_history = 4, - save_history = "Yes", + save_history = TRUE, sources = "All", - run_code = "No", + run_code = FALSE, persist = TRUE) { ops <- tibble::tibble( - service, model, task, embeddings, openai_embed_model, local_embed_model, + service, model, task, local_embed, openai_embed_model, local_embed_model, k_context, k_history, sources, run_code, save_history ) diff --git a/R/embedding.R b/R/embedding.R index a4d1f4e..24d4be8 100644 --- a/R/embedding.R +++ b/R/embedding.R @@ -278,8 +278,11 @@ gpttools_index_all_scraped_data <- function(overwrite = FALSE, get_top_matches <- function(index, query_embedding, k = 5) { k <- min(k, nrow(index)) index |> + dplyr::glimpse() |> dplyr::mutate( similarity = purrr::map_dbl(embedding, \(x) { + cli_alert_info("query embedding: {length(query_embedding)}") + cli_alert_info("text embedding: {length(unlist(x))}") lsa::cosine(query_embedding, unlist(x)) }) ) |> @@ -331,7 +334,10 @@ load_index <- function(domain, local_embeddings = FALSE) { } if (domain == "All") { - arrow::open_dataset(data_dir) |> tibble::as_tibble() + arrow::open_dataset( + data_dir, + factory_options = list(selector_ignore_prefixes = "local")) |> + tibble::as_tibble() } else { arrow::read_parquet(glue("{data_dir}/{domain}.parquet")) } diff --git a/R/zzz.R b/R/zzz.R index 94ea95a..1c9ff14 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -6,15 +6,15 @@ op_gpttools <- list( gpttools.service = "openai", gpttools.model = "gpt-4-turbo-preview", - gpttools.local_embed = "No", + gpttools.local_embed = FALSE, gpttools.local_embed_model = "BAAI/bge-large-en-v1.5", gpttools.task = "Permissive Chat", gpttools.k_context = 4, gpttools.k_history = 4, - gpttools.save_history = "No", + gpttools.save_history = FALSE, gpttools.sources = "All", gpttools.openai_embed_model = "text-embedding-3-small", - gpttools.run_code = "No" + gpttools.run_code = FALSE ) toset <- !(names(op_gpttools) %in% names(op)) diff --git a/inst/retriever/app.R b/inst/retriever/app.R index 99ca16c..216be3f 100644 --- a/inst/retriever/app.R +++ b/inst/retriever/app.R @@ -140,7 +140,7 @@ ui <- page_fillable( "local", "Local Embeddings", choiceNames = c("Yes", "No"), choiceValues = c(TRUE, FALSE), - selected = getOption("gpttools.local_embed"), + selected = getOption("gpttools.local_embed", FALSE), inline = TRUE, ), selectInput( @@ -242,7 +242,7 @@ server <- function(input, output, session) { dplyr::bind_rows() } } else if (input$source == "All") { - load_index(domain = "All", local_embeddings = TRUE) + load_index(domain = "All", local_embeddings = FALSE) } else { purrr::map(input$source, \(x) { load_index(x, local_embeddings = FALSE) |> @@ -253,6 +253,7 @@ server <- function(input, output, session) { }) indices <- reactive({ + req(input$local) if (input$local == TRUE) { list_index(dir = "index/local") |> tools::file_path_sans_ext() } else { @@ -269,7 +270,7 @@ server <- function(input, output, session) { ) observe(updateSelectInput(session, "source", choices = c("All", indices()), - selected = getOption("gpttools.sources") + selected = getOption("gpttools.sources", "All") )) observe({ toggle_popover("settings", show = FALSE) @@ -277,13 +278,14 @@ server <- function(input, output, session) { service = input$service, model = input$model, task = input$task, - embeddings = input$local, + local_embed = input$local, openai_embed_model = input$openai_embed_model, local_embed_model = input$local_embed_model, k_context = input$n_docs, k_history = input$n_history, save_history = input$save_history, sources = input$source, + run_code = input$test_code, persist = TRUE ) }) |> bindEvent(input$save_settings) From 9e092b482630882e945311f1a514a1c823cffdd4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 18:01:18 +0000 Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- R/embedding.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/embedding.R b/R/embedding.R index 24d4be8..f866106 100644 --- a/R/embedding.R +++ b/R/embedding.R @@ -336,7 +336,8 @@ load_index <- function(domain, local_embeddings = FALSE) { if (domain == "All") { arrow::open_dataset( data_dir, - factory_options = list(selector_ignore_prefixes = "local")) |> + factory_options = list(selector_ignore_prefixes = "local") + ) |> tibble::as_tibble() } else { arrow::read_parquet(glue("{data_dir}/{domain}.parquet")) From 6180c8f3d9ef9adbe45ca9aea8cd5e806cb11fba Mon Sep 17 00:00:00 2001 From: James Wade Date: Thu, 1 Feb 2024 13:03:37 -0500 Subject: [PATCH 4/4] docs: update man page --- man/save_user_config.Rd | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/man/save_user_config.Rd b/man/save_user_config.Rd index 565d4d1..8e59b6a 100644 --- a/man/save_user_config.Rd +++ b/man/save_user_config.Rd @@ -8,14 +8,14 @@ save_user_config( service = "openai", model = "gpt-4-turbo-preview", task = "Permissive Chat", - local_embed = "Yes", + local_embed = FALSE, openai_embed_model = "text-embedding-3-small", local_embed_model = "BAAI/bge-small-en-v1.5", k_context = 4, k_history = 4, - save_history = "Yes", + save_history = TRUE, sources = "All", - run_code = "No", + run_code = FALSE, persist = TRUE ) } @@ -26,7 +26,7 @@ save_user_config( \item{task}{The task to perform, default is "Permissive Chat".} -\item{local_embed}{Whether to use local embedding model. Default is "yes".} +\item{local_embed}{Whether to use local embedding model. Default is FALSE.} \item{openai_embed_model}{The OpenAI embeddings model to use, default is "text-embedding-3-small".} @@ -43,7 +43,8 @@ default is TRUE.} \item{sources}{The sources to use, default is "All".} -\item{run_code}{Whether to execute generated code with \code{reprex::reprex()}} +\item{run_code}{Whether to execute generated code with \code{reprex::reprex()}, +default is FALSE.} \item{persist}{Logical indicating whether to persist the settings, default is TRUE.}