From 0c655d2e15c74edec099db4c2b940a862c74a9f7 Mon Sep 17 00:00:00 2001
From: James Wade <jhwade@dow.com>
Date: Thu, 1 Feb 2024 08:09:37 -0500
Subject: [PATCH 1/5] fix: improve is_context_needed prompt

---
 R/history.R | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/R/history.R b/R/history.R
index 863b5a2..52a457f 100644
--- a/R/history.R
+++ b/R/history.R
@@ -340,8 +340,6 @@ chat_with_context <- function(query,
     purrr::map_chr(.f = "content") |>
     paste(collapse = "\n\n")
 
-  cat(simple_prompt, "\n\n")
-
   cli_inform("Service: {service}")
   cli_inform("Model: {model}")
 
@@ -384,20 +382,17 @@ is_context_needed <- function(user_prompt,
                               service = getOption("gpttools.service"),
                               model = getOption("gpttools.model")) {
   prompt <-
-    glue::glue("Consider if additional context or history is necessary to
-               ccurately respond to this user prompt. Useful context may include
-               recent information, package documentation, textbook excerpts, or
+    glue::glue("Consider if additional context or history would be useful to
+               accurately respond to this user prompt. Useful context may include
+               information like package documentation, textbook excerpts, or
                other relevant details.
 
                Respond with TRUE if such context is likely to enhance the
-               response, especially for queries involving recent developments,
-               technical subjects, or complex topics. Respond with FALSE if the
-               query seems straightforward or well within the AI's existing
+               response. Respond with FALSE only if the
+               query seems straightforward and well within your existing
                knowledge base.
 
-               Remember, the AI's training includes data only up to a few
-               months ago. If the query might relate to developments after this
-               period, lean towards TRUE.
+               Most queries benefit from additional context.
 
                Respond ONLY with TRUE or FALSE.
                \n\n{user_prompt}")

From 66a17f6b5cd146c970538183dd34f0de45e76393 Mon Sep 17 00:00:00 2001
From: James Wade <jhwade@dow.com>
Date: Thu, 1 Feb 2024 08:13:34 -0500
Subject: [PATCH 2/5] fix: cleanup cli calls; ask model to be concise

---
 R/embedding.R |  3 +--
 R/history.R   | 24 ++++++++++++++----------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/R/embedding.R b/R/embedding.R
index b401e6c..a4d1f4e 100644
--- a/R/embedding.R
+++ b/R/embedding.R
@@ -3,9 +3,8 @@ prepare_scraped_files <- function(domain) {
   scraped <-
     arrow::read_parquet(glue("{scraped_dir}/text/{domain}.parquet"))
 
-  if (max(scraped$n_words) > 2e5) {
+  if (max(scraped$n_words) > 1e6) {
     max_index <- scraped[which.max(scraped$n_words), ]
-    print(max_index |> dplyr::select(-text))
     cli_alert_warning(
       c(
         "!" = "Entry {max_index$link} of {domain} has at least 200,000 words.",
diff --git a/R/history.R b/R/history.R
index 52a457f..2ea9f2d 100644
--- a/R/history.R
+++ b/R/history.R
@@ -63,7 +63,7 @@ delete_history <- function(local = FALSE) {
     if (delete_file) {
       file.remove(x)
     } else {
-      cli_inform("{x} was **not** deleted.")
+      cli_alert_info("{x} was **not** deleted.")
     }
   })
   invisible()
@@ -137,11 +137,11 @@ get_query_context <- function(query_embedding, full_context, k) {
 
 check_context <- function(context) {
   if (rlang::is_null(context)) {
-    cli_warn(
+    cli_alert_warning(
       "You specified that context should be added but none was provided."
     )
   } else if (!is.data.frame(context)) {
-    cli_warn(
+    cli_alert_warning(
       "You passed a {class(context)} to but a data.frame was expected."
     )
   }
@@ -209,7 +209,7 @@ chat_with_context <- function(query,
   )
 
   if (rlang::is_true(add_context) || rlang::is_true(add_history)) {
-    cli_inform("Creating embedding from query.")
+    cli_alert_info("Creating embedding from query.")
     query_embedding <- get_query_embedding(query,
       local = local,
       model = embedding_model
@@ -217,7 +217,7 @@ chat_with_context <- function(query,
   }
 
   if (rlang::is_true(add_context) && rlang::is_true(need_context)) {
-    cli_inform("Attempting to add context to query.")
+    cli_alert_info("Attempting to add context to query.")
     full_context <-
       get_query_context(
         query_embedding,
@@ -242,8 +242,8 @@ chat_with_context <- function(query,
   }
 
   if (rlang::is_true(add_history) && rlang::is_true(need_context)) {
-    cli_inform("Attempting to add chat history to query.")
-    cli_inform("Chat history: {class(chat_history)}")
+    cli_alert_info("Attempting to add chat history to query.")
+    cli_alert_info("Chat history: {class(chat_history)}")
     if (rlang::is_null(chat_history)) {
       related_history <- "No related history found."
     } else {
@@ -258,7 +258,7 @@ chat_with_context <- function(query,
         paste(collapse = "\n\n")
     }
   } else {
-    cli_inform("Not attempting to add chat history to query.")
+    cli_alert_info("Not attempting to add chat history to query.")
     related_history <- "No related history found."
   }
 
@@ -296,6 +296,10 @@ chat_with_context <- function(query,
     )
 
   prompt_context <- list(
+    list(
+      role = "system",
+      content = "You provide succinct, concise, and accurate responses."
+    ),
     list(
       role = "user",
       content = glue("---\nContext:\n{context}\n---")
@@ -340,8 +344,8 @@ chat_with_context <- function(query,
     purrr::map_chr(.f = "content") |>
     paste(collapse = "\n\n")
 
-  cli_inform("Service: {service}")
-  cli_inform("Model: {model}")
+  cli_alert_info("Service: {service}")
+  cli_alert_info("Model: {model}")
 
   answer <- gptstudio::chat(
     prompt = simple_prompt,

From 5b480285534a5e71bc76b3d112b64e4050fbb25a Mon Sep 17 00:00:00 2001
From: James Wade <jhwade@dow.com>
Date: Thu, 1 Feb 2024 08:28:23 -0500
Subject: [PATCH 3/5] ci: update precommit config

---
 .pre-commit-config.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8e77eff..8245543 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -28,9 +28,10 @@ repos:
     -   id: parsable-R
     -   id: no-browser-statement
     -   id: no-debug-statement
-    # -   id: deps-in-desc
+    -   id: no-print-statement
+    -   id: deps-in-desc
     #     args: [--warn_only]
-    # -   id: pkgdown
+    -   id: pkgdown
     #     args: [--warn_only]
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.5.0

From 1d21a2e04b0ae3bb6784c62b2f7f6bd4416de212 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 1 Feb 2024 13:29:59 +0000
Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 inst/retriever/app.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/inst/retriever/app.R b/inst/retriever/app.R
index 1d3f4c9..8549af0 100644
--- a/inst/retriever/app.R
+++ b/inst/retriever/app.R
@@ -116,7 +116,7 @@ ui <- page_fillable(
             selected = getOption("gpttools.service", "openai")
           ),
           selectInput("model", "Model",
-                      choices = NULL
+            choices = NULL
           ),
           selectInput(
             "embed_model", "OpenAI Embedding Model",
@@ -269,8 +269,8 @@ server <- function(input, output, session) {
     )
   )
   observe(updateSelectInput(session, "source",
-                            choices = c("All", indices()),
-                            selected = getOption("gpttools.sources")
+    choices = c("All", indices()),
+    selected = getOption("gpttools.sources")
   ))
   observe({
     toggle_popover("settings", show = FALSE)

From 500420d8b0ad5987ecf847981d810bdbe9224133 Mon Sep 17 00:00:00 2001
From: James Wade <jhwade@dow.com>
Date: Thu, 1 Feb 2024 08:47:40 -0500
Subject: [PATCH 5/5] chore: cleanup for precommit

---
 .pre-commit-config.yaml | 4 ----
 R/document_data.R       | 2 +-
 R/embedding-py.R        | 1 -
 R/transcribe.R          | 2 --
 inst/retriever/app.R    | 1 -
 5 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8245543..0c66adc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,10 +29,6 @@ repos:
     -   id: no-browser-statement
     -   id: no-debug-statement
     -   id: no-print-statement
-    -   id: deps-in-desc
-    #     args: [--warn_only]
-    -   id: pkgdown
-    #     args: [--warn_only]
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.5.0
     hooks:
diff --git a/R/document_data.R b/R/document_data.R
index 14d3599..e1fd8ba 100644
--- a/R/document_data.R
+++ b/R/document_data.R
@@ -88,5 +88,5 @@ summarize_data <- function(data,
 prep_data_prompt <- function(data, method, prompt) {
   summarized_data <- summarize_data(data = data, method = method)
 
-  paste(testthat::capture_output(print(summarized_data)), prompt, sep = "\n")
+  paste(testthat::capture_output(cat_print(summarized_data)), prompt, sep = "\n")
 }
diff --git a/R/embedding-py.R b/R/embedding-py.R
index 1cc96a3..b49c619 100644
--- a/R/embedding-py.R
+++ b/R/embedding-py.R
@@ -99,7 +99,6 @@ colbert_rerank <- function(documents, model_name = "colbert-ir/colbertv2.0") {
     )
   }
 
-  print(paste0("Took ", time$time() - start, " seconds to re-rank documents with ColBERT."))
   sorted_data <- scores[order(sapply(scores, function(x) x$score), decreasing = TRUE)]
 }
 
diff --git a/R/transcribe.R b/R/transcribe.R
index 879d1c6..89bbf9e 100644
--- a/R/transcribe.R
+++ b/R/transcribe.R
@@ -54,8 +54,6 @@ transcribe_audio_chunk <-
 
     result <- httr::content(response, "parsed", "application/json")
 
-    print(result)
-
     file.remove(tmp_file)
 
     return(result)
diff --git a/inst/retriever/app.R b/inst/retriever/app.R
index 8549af0..99ca16c 100644
--- a/inst/retriever/app.R
+++ b/inst/retriever/app.R
@@ -53,7 +53,6 @@ make_chat_history <- function(chats) {
       )
     }) |>
     purrr::list_flatten()
-  print(history)
   history
 }