Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/better index loading #74

Merged
merged 4 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions R/config.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' @param service The name of the service to use, default is "openai".
#' @param model The model to use, default is "gpt-4-1106-preview".
#' @param task The task to perform, default is "Permissive Chat".
#' @param embeddings The location of embeddings, default is "local".
#' @param local_embed Whether to use local embedding model. Default is FALSE.
#' @param openai_embed_model The OpenAI embeddings model to use, default is
#' "text-embedding-3-small".
#' @param local_embed_model The local embeddings model to use, default is
Expand All @@ -15,24 +15,27 @@
#' @param save_history Logical indicating whether history should be saved,
#' default is TRUE.
#' @param sources The sources to use, default is "All".
#' @param run_code Whether to execute generated code with `reprex::reprex()`,
#' default is FALSE.
#' @param persist Logical indicating whether to persist the settings, default
#' is TRUE.
#' @return Invisible NULL.
#' @export
save_user_config <- function(service = "openai",
model = "gpt-4-turbo-preview",
task = "Permissive Chat",
embeddings = TRUE,
local_embed = FALSE,
openai_embed_model = "text-embedding-3-small",
local_embed_model = "BAAI/bge-small-en-v1.5",
k_context = 4,
k_history = 4,
save_history = TRUE,
sources = "All",
run_code = FALSE,
persist = TRUE) {
ops <- tibble::tibble(
service, model, task, embeddings, openai_embed_model, local_embed_model,
k_context, k_history, sources, save_history
service, model, task, local_embed, openai_embed_model, local_embed_model,
k_context, k_history, sources, run_code, save_history
)

if (persist == TRUE) {
Expand Down Expand Up @@ -69,13 +72,14 @@ set_user_config <- function(path = NULL) {
gpttools.service = ops$service,
gpttools.model = ops$model,
gpttools.task = ops$task,
gpttools.local_embed = ops$embeddings,
gpttools.local_embed = ops$local_embed,
gpttools.k_context = ops$k_context,
gpttools.k_history = ops$k_history,
gpttools.save_history = ops$save_history,
gpttools.sources = ops$sources,
gpttools.openai_embed_model = ops$openai_embed_model,
gpttools.local_embed_model = ops$local_embed_model
gpttools.local_embed_model = ops$local_embed_model,
gpttools.run_code = ops$run_code
)
invisible(TRUE)
} else {
Expand Down
9 changes: 8 additions & 1 deletion R/embedding.R
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,11 @@ gpttools_index_all_scraped_data <- function(overwrite = FALSE,
get_top_matches <- function(index, query_embedding, k = 5) {
k <- min(k, nrow(index))
index |>
dplyr::glimpse() |>
dplyr::mutate(
similarity = purrr::map_dbl(embedding, \(x) {
cli_alert_info("query embedding: {length(query_embedding)}")
cli_alert_info("text embedding: {length(unlist(x))}")
lsa::cosine(query_embedding, unlist(x))
})
) |>
Expand Down Expand Up @@ -331,7 +334,11 @@ load_index <- function(domain, local_embeddings = FALSE) {
}

if (domain == "All") {
arrow::open_dataset(data_dir) |> tibble::as_tibble()
arrow::open_dataset(
data_dir,
factory_options = list(selector_ignore_prefixes = "local")
) |>
tibble::as_tibble()
} else {
arrow::read_parquet(glue("{data_dir}/{domain}.parquet"))
}
Expand Down
5 changes: 3 additions & 2 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
op_gpttools <- list(
gpttools.service = "openai",
gpttools.model = "gpt-4-turbo-preview",
gpttools.local_embed = TRUE,
gpttools.local_embed = FALSE,
gpttools.local_embed_model = "BAAI/bge-large-en-v1.5",
gpttools.task = "Permissive Chat",
gpttools.k_context = 4,
gpttools.k_history = 4,
gpttools.save_history = FALSE,
gpttools.sources = "All",
gpttools.openai_embed_model = "text-embedding-3-small"
gpttools.openai_embed_model = "text-embedding-3-small",
gpttools.run_code = FALSE
)

toset <- !(names(op_gpttools) %in% names(op))
Expand Down
10 changes: 6 additions & 4 deletions inst/retriever/app.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ ui <- page_fillable(
"local", "Local Embeddings",
choiceNames = c("Yes", "No"),
choiceValues = c(TRUE, FALSE),
selected = getOption("gpttools.local_embed"),
selected = getOption("gpttools.local_embed", FALSE),
inline = TRUE,
),
selectInput(
Expand Down Expand Up @@ -242,7 +242,7 @@ server <- function(input, output, session) {
dplyr::bind_rows()
}
} else if (input$source == "All") {
load_index(domain = "All", local_embeddings = TRUE)
load_index(domain = "All", local_embeddings = FALSE)
} else {
purrr::map(input$source, \(x) {
load_index(x, local_embeddings = FALSE) |>
Expand All @@ -253,6 +253,7 @@ server <- function(input, output, session) {
})

indices <- reactive({
req(input$local)
if (input$local == TRUE) {
list_index(dir = "index/local") |> tools::file_path_sans_ext()
} else {
Expand All @@ -269,21 +270,22 @@ server <- function(input, output, session) {
)
observe(updateSelectInput(session, "source",
choices = c("All", indices()),
selected = getOption("gpttools.sources")
selected = getOption("gpttools.sources", "All")
))
observe({
toggle_popover("settings", show = FALSE)
save_user_config(
service = input$service,
model = input$model,
task = input$task,
embeddings = input$local,
local_embed = input$local,
openai_embed_model = input$openai_embed_model,
local_embed_model = input$local_embed_model,
k_context = input$n_docs,
k_history = input$n_history,
save_history = input$save_history,
sources = input$source,
run_code = input$test_code,
persist = TRUE
)
}) |> bindEvent(input$save_settings)
Expand Down
8 changes: 6 additions & 2 deletions man/save_user_config.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading