From 0c655d2e15c74edec099db4c2b940a862c74a9f7 Mon Sep 17 00:00:00 2001 From: James Wade Date: Thu, 1 Feb 2024 08:09:37 -0500 Subject: [PATCH 1/5] fix: improve is_context_needed prompt --- R/history.R | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/R/history.R b/R/history.R index 863b5a2..52a457f 100644 --- a/R/history.R +++ b/R/history.R @@ -340,8 +340,6 @@ chat_with_context <- function(query, purrr::map_chr(.f = "content") |> paste(collapse = "\n\n") - cat(simple_prompt, "\n\n") - cli_inform("Service: {service}") cli_inform("Model: {model}") @@ -384,20 +382,17 @@ is_context_needed <- function(user_prompt, service = getOption("gpttools.service"), model = getOption("gpttools.model")) { prompt <- - glue::glue("Consider if additional context or history is necessary to - ccurately respond to this user prompt. Useful context may include - recent information, package documentation, textbook excerpts, or + glue::glue("Consider if additional context or history would be useful to + accurately respond to this user prompt. Useful context may include + information like package documentation, textbook excerpts, or other relevant details. Respond with TRUE if such context is likely to enhance the - response, especially for queries involving recent developments, - technical subjects, or complex topics. Respond with FALSE if the - query seems straightforward or well within the AI's existing + response. Respond with FALSE only if the + query seems straightforward and well within your existing knowledge base. - Remember, the AI's training includes data only up to a few - months ago. If the query might relate to developments after this - period, lean towards TRUE. + Most queries benefit from additional context. Respond ONLY with TRUE or FALSE. \n\n{user_prompt}") From 66a17f6b5cd146c970538183dd34f0de45e76393 Mon Sep 17 00:00:00 2001 From: James Wade Date: Thu, 1 Feb 2024 08:13:34 -0500 Subject: [PATCH 2/5] fix: cleanup cli calls; ask model to be concise --- R/embedding.R | 3 +-- R/history.R | 24 ++++++++++++++---------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/R/embedding.R b/R/embedding.R index b401e6c..a4d1f4e 100644 --- a/R/embedding.R +++ b/R/embedding.R @@ -3,9 +3,8 @@ prepare_scraped_files <- function(domain) { scraped <- arrow::read_parquet(glue("{scraped_dir}/text/{domain}.parquet")) - if (max(scraped$n_words) > 2e5) { + if (max(scraped$n_words) > 1e6) { max_index <- scraped[which.max(scraped$n_words), ] - print(max_index |> dplyr::select(-text)) cli_alert_warning( c( "!" = "Entry {max_index$link} of {domain} has at least 200,000 words.", diff --git a/R/history.R b/R/history.R index 52a457f..2ea9f2d 100644 --- a/R/history.R +++ b/R/history.R @@ -63,7 +63,7 @@ delete_history <- function(local = FALSE) { if (delete_file) { file.remove(x) } else { - cli_inform("{x} was **not** deleted.") + cli_alert_info("{x} was **not** deleted.") } }) invisible() @@ -137,11 +137,11 @@ get_query_context <- function(query_embedding, full_context, k) { check_context <- function(context) { if (rlang::is_null(context)) { - cli_warn( + cli_alert_warning( "You specified that context should be added but none was provided." ) } else if (!is.data.frame(context)) { - cli_warn( + cli_alert_warning( "You passed a {class(context)} to but a data.frame was expected." ) } @@ -209,7 +209,7 @@ chat_with_context <- function(query, ) if (rlang::is_true(add_context) || rlang::is_true(add_history)) { - cli_inform("Creating embedding from query.") + cli_alert_info("Creating embedding from query.") query_embedding <- get_query_embedding(query, local = local, model = embedding_model @@ -217,7 +217,7 @@ chat_with_context <- function(query, } if (rlang::is_true(add_context) && rlang::is_true(need_context)) { - cli_inform("Attempting to add context to query.") + cli_alert_info("Attempting to add context to query.") full_context <- get_query_context( query_embedding, @@ -242,8 +242,8 @@ chat_with_context <- function(query, } if (rlang::is_true(add_history) && rlang::is_true(need_context)) { - cli_inform("Attempting to add chat history to query.") - cli_inform("Chat history: {class(chat_history)}") + cli_alert_info("Attempting to add chat history to query.") + cli_alert_info("Chat history: {class(chat_history)}") if (rlang::is_null(chat_history)) { related_history <- "No related history found." } else { @@ -258,7 +258,7 @@ chat_with_context <- function(query, paste(collapse = "\n\n") } } else { - cli_inform("Not attempting to add chat history to query.") + cli_alert_info("Not attempting to add chat history to query.") related_history <- "No related history found." } @@ -296,6 +296,10 @@ chat_with_context <- function(query, ) prompt_context <- list( + list( + role = "system", + content = "You provide succinct, concise, and accurate responses." + ), list( role = "user", content = glue("---\nContext:\n{context}\n---") @@ -340,8 +344,8 @@ chat_with_context <- function(query, purrr::map_chr(.f = "content") |> paste(collapse = "\n\n") - cli_inform("Service: {service}") - cli_inform("Model: {model}") + cli_alert_info("Service: {service}") + cli_alert_info("Model: {model}") answer <- gptstudio::chat( prompt = simple_prompt, From 5b480285534a5e71bc76b3d112b64e4050fbb25a Mon Sep 17 00:00:00 2001 From: James Wade Date: Thu, 1 Feb 2024 08:28:23 -0500 Subject: [PATCH 3/5] ci: update precommit config --- .pre-commit-config.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8e77eff..8245543 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,9 +28,10 @@ repos: - id: parsable-R - id: no-browser-statement - id: no-debug-statement - # - id: deps-in-desc + - id: no-print-statement + - id: deps-in-desc # args: [--warn_only] - # - id: pkgdown + - id: pkgdown # args: [--warn_only] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 From 1d21a2e04b0ae3bb6784c62b2f7f6bd4416de212 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 13:29:59 +0000 Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- inst/retriever/app.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/retriever/app.R b/inst/retriever/app.R index 1d3f4c9..8549af0 100644 --- a/inst/retriever/app.R +++ b/inst/retriever/app.R @@ -116,7 +116,7 @@ ui <- page_fillable( selected = getOption("gpttools.service", "openai") ), selectInput("model", "Model", - choices = NULL + choices = NULL ), selectInput( "embed_model", "OpenAI Embedding Model", @@ -269,8 +269,8 @@ server <- function(input, output, session) { ) ) observe(updateSelectInput(session, "source", - choices = c("All", indices()), - selected = getOption("gpttools.sources") + choices = c("All", indices()), + selected = getOption("gpttools.sources") )) observe({ toggle_popover("settings", show = FALSE) From 500420d8b0ad5987ecf847981d810bdbe9224133 Mon Sep 17 00:00:00 2001 From: James Wade Date: Thu, 1 Feb 2024 08:47:40 -0500 Subject: [PATCH 5/5] chore: cleanup for precommit --- .pre-commit-config.yaml | 4 ---- R/document_data.R | 2 +- R/embedding-py.R | 1 - R/transcribe.R | 2 -- inst/retriever/app.R | 1 - 5 files changed, 1 insertion(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8245543..0c66adc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,10 +29,6 @@ repos: - id: no-browser-statement - id: no-debug-statement - id: no-print-statement - - id: deps-in-desc - # args: [--warn_only] - - id: pkgdown - # args: [--warn_only] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 hooks: diff --git a/R/document_data.R b/R/document_data.R index 14d3599..e1fd8ba 100644 --- a/R/document_data.R +++ b/R/document_data.R @@ -88,5 +88,5 @@ summarize_data <- function(data, prep_data_prompt <- function(data, method, prompt) { summarized_data <- summarize_data(data = data, method = method) - paste(testthat::capture_output(print(summarized_data)), prompt, sep = "\n") + paste(testthat::capture_output(cat_print(summarized_data)), prompt, sep = "\n") } diff --git a/R/embedding-py.R b/R/embedding-py.R index 1cc96a3..b49c619 100644 --- a/R/embedding-py.R +++ b/R/embedding-py.R @@ -99,7 +99,6 @@ colbert_rerank <- function(documents, model_name = "colbert-ir/colbertv2.0") { ) } - print(paste0("Took ", time$time() - start, " seconds to re-rank documents with ColBERT.")) sorted_data <- scores[order(sapply(scores, function(x) x$score), decreasing = TRUE)] } diff --git a/R/transcribe.R b/R/transcribe.R index 879d1c6..89bbf9e 100644 --- a/R/transcribe.R +++ b/R/transcribe.R @@ -54,8 +54,6 @@ transcribe_audio_chunk <- result <- httr::content(response, "parsed", "application/json") - print(result) - file.remove(tmp_file) return(result) diff --git a/inst/retriever/app.R b/inst/retriever/app.R index 8549af0..99ca16c 100644 --- a/inst/retriever/app.R +++ b/inst/retriever/app.R @@ -53,7 +53,6 @@ make_chat_history <- function(chats) { ) }) |> purrr::list_flatten() - print(history) history }