diff --git a/R/config.R b/R/config.R index 872786f..9e9fd51 100644 --- a/R/config.R +++ b/R/config.R @@ -5,7 +5,7 @@ #' @param service The name of the service to use, default is "openai". #' @param model The model to use, default is "gpt-4-1106-preview". #' @param task The task to perform, default is "Permissive Chat". -#' @param embeddings The location of embeddings, default is "local". +#' @param local_embed Whether to use local embedding model. Default is FALSE. #' @param openai_embed_model The OpenAI embeddings model to use, default is #' "text-embedding-3-small". #' @param local_embed_model The local embeddings model to use, default is @@ -15,6 +15,8 @@ #' @param save_history Logical indicating whether history should be saved, #' default is TRUE. #' @param sources The sources to use, default is "All". +#' @param run_code Whether to execute generated code with `reprex::reprex()`, +#' default is FALSE. #' @param persist Logical indicating whether to persist the settings, default #' is TRUE. #' @return Invisible NULL. @@ -22,17 +24,18 @@ save_user_config <- function(service = "openai", model = "gpt-4-turbo-preview", task = "Permissive Chat", - embeddings = TRUE, + local_embed = FALSE, openai_embed_model = "text-embedding-3-small", local_embed_model = "BAAI/bge-small-en-v1.5", k_context = 4, k_history = 4, save_history = TRUE, sources = "All", + run_code = FALSE, persist = TRUE) { ops <- tibble::tibble( - service, model, task, embeddings, openai_embed_model, local_embed_model, - k_context, k_history, sources, save_history + service, model, task, local_embed, openai_embed_model, local_embed_model, + k_context, k_history, sources, run_code, save_history ) if (persist == TRUE) { @@ -69,13 +72,14 @@ set_user_config <- function(path = NULL) { gpttools.service = ops$service, gpttools.model = ops$model, gpttools.task = ops$task, - gpttools.local_embed = ops$embeddings, + gpttools.local_embed = ops$local_embed, gpttools.k_context = ops$k_context, gpttools.k_history = ops$k_history, gpttools.save_history = ops$save_history, gpttools.sources = ops$sources, gpttools.openai_embed_model = ops$openai_embed_model, - gpttools.local_embed_model = ops$local_embed_model + gpttools.local_embed_model = ops$local_embed_model, + gpttools.run_code = ops$run_code ) invisible(TRUE) } else { diff --git a/R/embedding.R b/R/embedding.R index a4d1f4e..f866106 100644 --- a/R/embedding.R +++ b/R/embedding.R @@ -278,8 +278,11 @@ gpttools_index_all_scraped_data <- function(overwrite = FALSE, get_top_matches <- function(index, query_embedding, k = 5) { k <- min(k, nrow(index)) index |> + dplyr::glimpse() |> dplyr::mutate( similarity = purrr::map_dbl(embedding, \(x) { + cli_alert_info("query embedding: {length(query_embedding)}") + cli_alert_info("text embedding: {length(unlist(x))}") lsa::cosine(query_embedding, unlist(x)) }) ) |> @@ -331,7 +334,11 @@ load_index <- function(domain, local_embeddings = FALSE) { } if (domain == "All") { - arrow::open_dataset(data_dir) |> tibble::as_tibble() + arrow::open_dataset( + data_dir, + factory_options = list(selector_ignore_prefixes = "local") + ) |> + tibble::as_tibble() } else { arrow::read_parquet(glue("{data_dir}/{domain}.parquet")) } diff --git a/R/zzz.R b/R/zzz.R index a726850..1c9ff14 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -6,14 +6,15 @@ op_gpttools <- list( gpttools.service = "openai", gpttools.model = "gpt-4-turbo-preview", - gpttools.local_embed = TRUE, + gpttools.local_embed = FALSE, gpttools.local_embed_model = "BAAI/bge-large-en-v1.5", gpttools.task = "Permissive Chat", gpttools.k_context = 4, gpttools.k_history = 4, gpttools.save_history = FALSE, gpttools.sources = "All", - gpttools.openai_embed_model = "text-embedding-3-small" + gpttools.openai_embed_model = "text-embedding-3-small", + gpttools.run_code = FALSE ) toset <- !(names(op_gpttools) %in% names(op)) diff --git a/inst/retriever/app.R b/inst/retriever/app.R index 99ca16c..216be3f 100644 --- a/inst/retriever/app.R +++ b/inst/retriever/app.R @@ -140,7 +140,7 @@ ui <- page_fillable( "local", "Local Embeddings", choiceNames = c("Yes", "No"), choiceValues = c(TRUE, FALSE), - selected = getOption("gpttools.local_embed"), + selected = getOption("gpttools.local_embed", FALSE), inline = TRUE, ), selectInput( @@ -242,7 +242,7 @@ server <- function(input, output, session) { dplyr::bind_rows() } } else if (input$source == "All") { - load_index(domain = "All", local_embeddings = TRUE) + load_index(domain = "All", local_embeddings = FALSE) } else { purrr::map(input$source, \(x) { load_index(x, local_embeddings = FALSE) |> @@ -253,6 +253,7 @@ server <- function(input, output, session) { }) indices <- reactive({ + req(input$local) if (input$local == TRUE) { list_index(dir = "index/local") |> tools::file_path_sans_ext() } else { @@ -269,7 +270,7 @@ server <- function(input, output, session) { ) observe(updateSelectInput(session, "source", choices = c("All", indices()), - selected = getOption("gpttools.sources") + selected = getOption("gpttools.sources", "All") )) observe({ toggle_popover("settings", show = FALSE) @@ -277,13 +278,14 @@ server <- function(input, output, session) { service = input$service, model = input$model, task = input$task, - embeddings = input$local, + local_embed = input$local, openai_embed_model = input$openai_embed_model, local_embed_model = input$local_embed_model, k_context = input$n_docs, k_history = input$n_history, save_history = input$save_history, sources = input$source, + run_code = input$test_code, persist = TRUE ) }) |> bindEvent(input$save_settings) diff --git a/man/save_user_config.Rd b/man/save_user_config.Rd index 3c6b621..8e59b6a 100644 --- a/man/save_user_config.Rd +++ b/man/save_user_config.Rd @@ -8,13 +8,14 @@ save_user_config( service = "openai", model = "gpt-4-turbo-preview", task = "Permissive Chat", - embeddings = TRUE, + local_embed = FALSE, openai_embed_model = "text-embedding-3-small", local_embed_model = "BAAI/bge-small-en-v1.5", k_context = 4, k_history = 4, save_history = TRUE, sources = "All", + run_code = FALSE, persist = TRUE ) } @@ -25,7 +26,7 @@ save_user_config( \item{task}{The task to perform, default is "Permissive Chat".} -\item{embeddings}{The location of embeddings, default is "local".} +\item{local_embed}{Whether to use local embedding model. Default is FALSE.} \item{openai_embed_model}{The OpenAI embeddings model to use, default is "text-embedding-3-small".} @@ -42,6 +43,9 @@ default is TRUE.} \item{sources}{The sources to use, default is "All".} +\item{run_code}{Whether to execute generated code with \code{reprex::reprex()}, +default is FALSE.} + \item{persist}{Logical indicating whether to persist the settings, default is TRUE.} }