diff --git a/.Rbuildignore b/.Rbuildignore
index 003bb82..73e3223 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -9,7 +9,6 @@
^cran-comments\.md$
^docs$
^gpttools\.Rproj$
-^images$
^pkgdown$
^revdep$
^.lintr$
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7cdfd7f..4c97da8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -37,7 +37,7 @@ repos:
rev: v4.5.0
hooks:
- id: check-added-large-files
- args: ['--maxkb=2000']
+ args: ['--maxkb=25000']
- repo: https://github.com/pre-commit-ci/pre-commit-ci-config
rev: v1.6.1
hooks:
diff --git a/NAMESPACE b/NAMESPACE
index 6da8cdc..8297dba 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,9 +1,10 @@
# Generated by roxygen2: do not edit by hand
export(add_roxygen_addin)
-export(addin_run_retriever)
+export(addin_run_scrape_pkgs)
export(addin_run_select_pkgs)
export(chat_with_context)
+export(chat_with_retrieval)
export(collect_dataframes)
export(crawl)
export(create_index_from_audio)
@@ -13,6 +14,7 @@ export(delete_history)
export(delete_index)
export(document_data)
export(get_selection)
+export(get_transformer_model)
export(gpt_sitrep)
export(ingest_pdf)
export(insert_text)
@@ -25,6 +27,7 @@ export(remove_lines_and_spaces)
export(run_document_data)
export(run_select_pkgs_app)
export(save_user_config)
+export(scrape_pkg_sites)
export(scrape_url)
export(script_to_function_addin)
export(set_user_config)
diff --git a/R/addin-run-retriever.R b/R/addin-run-retriever.R
index 91b2dcb..9630ace 100644
--- a/R/addin-run-retriever.R
+++ b/R/addin-run-retriever.R
@@ -1,6 +1,6 @@
-#' Run Chat GPT with Retrieval
+#' Run Chat with Retrieval
#'
-#' Run the ChatGPT shiny app with semantic search and document retrieval
+#' Run the Chat with Retrieval shiny app
#'
#' @export
#'
@@ -9,9 +9,9 @@
#' @examples
#' # Call the function as an RStudio addin
#' \dontrun{
-#' addin_run_retriever()
+#' chat_with_retrieval()
#' }
-addin_run_retriever <- function() {
+chat_with_retrieval <- function() {
indices <- list_index()
if (length(indices) == 0) {
cli::cli_abort(
diff --git a/R/addin_select_pkgs.R b/R/addin_scrape_pkgs.R
similarity index 58%
rename from R/addin_select_pkgs.R
rename to R/addin_scrape_pkgs.R
index f2726e0..430efd2 100644
--- a/R/addin_select_pkgs.R
+++ b/R/addin_scrape_pkgs.R
@@ -10,6 +10,50 @@ addin_run_select_pkgs <- function() {
run_select_pkgs_app()
}
+#' Addin to scrape installed packages
+#'
+#' Invokes RStudio addin functionality to scrape select installed packages and
+#' create indices for use in the "Chat with Retrieval" application.
+#'
+#' @export
+#' @return No return value, called for side effects only.
+#'
+#' @examplesIf rlang::is_interactive()
+#' # This function is typically run within RStudio as an Addin.
+#' # It would not be called directly in the code.
+#' addin_scrape_pkgs()
+#'
+#' @note This addin requires RStudio to be available and will stop with an
+#' error message if RStudio API is not accessible.
+#'
+addin_run_scrape_pkgs <- function() {
+ # Check if RStudio API is available
+ if (!rstudioapi::isAvailable()) {
+ cli::cli_abort("The rstudioapi is not available.")
+ }
+ # Get user feedback with rstudioapi
+ proceed <-
+ rstudioapi::showQuestion(
+ title = "Scrape Packages",
+ message = "This will scrape installed packages and create indices to use
+ with the \"Chat with Retrieval\" app. Would you like to proceed?"
+ )
+
+ # Proceed with scraping if the user agrees
+ if (proceed) {
+ cli::cli_alert_info("Scraping packages as a background job.")
+ # Run the scrape packages script as a background job
+ rstudioapi::jobRunScript(
+ path = system.file("scripts/scrape_pkgs.R",
+ package = "gpttools"
+ ),
+ name = "Scraping Pacakges"
+ )
+ } else {
+ cli::cli_alert_info("Scraping cancelled.")
+ }
+}
+
#' Run a Shiny App to Select and Save Installed Packages
#'
#' This function launches a Shiny application that allows users to select from a
@@ -65,7 +109,7 @@ run_select_pkgs_app <- function() {
{
installed_packages |> dplyr::filter(Package %in% input$selected_pkg)
},
- options = list(pageLength = 10)
+ # options = list(pageLength = 5)
)
}
diff --git a/R/config.R b/R/config.R
index 7a33f2c..e19a3e0 100644
--- a/R/config.R
+++ b/R/config.R
@@ -18,7 +18,7 @@
save_user_config <- function(service = "openai",
model = "gpt-4-1106-preview",
task = "Permissive Chat",
- embeddings = "local",
+ embeddings = TRUE,
k_context = 4,
k_history = 4,
save_history = TRUE,
diff --git a/R/embedding-py.R b/R/embedding-py.R
index 8d5ee66..c83548c 100644
--- a/R/embedding-py.R
+++ b/R/embedding-py.R
@@ -10,7 +10,34 @@
# }
# nolint end
-# uses transformers instead of sentence transformers
+#' Get Transformer Model
+#'
+#' This function is designed to download and load a pre-trained transformer
+#' model using the transformers Python library via the reticulate package.
+#' It checks for the availability of the required Python package and then
+#' downloads the specified transformer model.
+#'
+#' @param model_name The name of the transformer model to download. This should
+#' be in the format "username/modelname" as recognized by the transformers
+#' library. Default is "jinaai/jina-embeddings-v2-base-en".
+#'
+#' @return An object of the downloaded transformer model.
+#'
+#' @export
+#'
+#' @note Users of this function need to ensure that the Python environment
+#' is set up with the 'transformers' package installed. The function uses
+#' the 'reticulate' R package to interface with Python and the user may need
+#' to configure it accordingly.
+#'
+#' @examples
+#' \dontrun{
+#' # To get the default transformer model:
+#' get_transformer_model()
+#'
+#' # To get a custom transformer model by specifying the model name:
+#' get_transformer_model("bert-base-uncased")
+#' }
get_transformer_model <-
function(model_name = "jinaai/jina-embeddings-v2-base-en") {
py_pkg_is_available("transformers")
diff --git a/R/history.R b/R/history.R
index f8866df..83be47d 100644
--- a/R/history.R
+++ b/R/history.R
@@ -174,20 +174,16 @@ check_context <- function(context) {
#' @param overwrite Whether to overwrite the history file or not. Default is
#' FALSE.
#' @param local Whether to use the local model or not. Default is FALSE.
+#' @param embedding_model A model object to use for embedding. Only needed if
+#' local is TRUE. Default is NULL.
#'
#' @return A list containing the prompt, context, and answer.
#' @export
#'
-#' @examples
-#' \dontrun{
-#' # Define a query and context
+#' @examplesIf rlang::is_interactive()
+#' rlang::is_interactive()
#' query <- "What is the capital of France?"
-#' context <- "France is a country in Western Europe. Its capital is a famous
-#' city known for its culture, art, and history."
-#'
-#' # Call the chat_with_context function
#' result <- chat_with_context(query = query, context = context)
-#' }
chat_with_context <- function(query,
service = "openai",
model = "gpt-4",
@@ -202,14 +198,15 @@ chat_with_context <- function(query,
k_history = 4,
save_history = TRUE,
overwrite = FALSE,
- local = FALSE) {
+ local = FALSE,
+ embedding_model = NULL) {
arg_match(task, c("Context Only", "Permissive Chat"))
- if (local) {
- embedding_model <- get_transformer_model()
- } else {
- embedding_model <- NULL
- }
+ need_context <- is_context_needed(
+ user_prompt = query,
+ service = service,
+ model = model
+ )
if (rlang::is_true(add_context) || rlang::is_true(add_history)) {
query_embedding <- get_query_embedding(query,
@@ -218,7 +215,7 @@ chat_with_context <- function(query,
)
}
- if (rlang::is_true(add_context)) {
+ if (rlang::is_true(add_context) && rlang::is_true(need_context)) {
full_context <-
get_query_context(
query_embedding,
@@ -229,10 +226,11 @@ chat_with_context <- function(query,
dplyr::pull("chunks") |>
paste(collapse = "\n\n")
} else {
+ full_context <- "No context provided."
context <- "No additional context provided."
}
- if (add_history) {
+ if (rlang::is_true(add_history) & rlang::is_true(need_context)) {
cli::cli_inform("Attempting to add chat history to query.")
cli::cli_inform("Chat history: {class(chat_history)}")
if (rlang::is_null(chat_history)) {
@@ -261,11 +259,11 @@ chat_with_context <- function(query,
role = "system",
content =
glue(
- "You are a helpful chat bot that answers questions based on ",
- "the context provided by the user. If the user does not ",
- "provide related context, say \"I am not able to answer that ",
- "question. Maybe try rephrasing your question in a different ",
- "way.\""
+ "You are a helpful chat bot that answers questions based on
+ the context provided by the user. If the user does not
+ provide related context and you need context to respond
+ accurately, say \"I am not able to answer that question.
+ Maybe try rephrasing your question in a different way.\""
)
)
),
@@ -275,11 +273,12 @@ chat_with_context <- function(query,
role = "system",
content =
glue(
- "You are a helpful chat bot that answers questions based on ",
- "on the context provided by the user. If the user does not ",
- "provide context, answer the quest but first say \"I am not ",
- "able to answer that question with the context you gave me, ",
- "but here is my best answer.",
+ "You are a helpful chat bot that answers questions based on
+ on the context provided by the user. If the user does not
+ provide context and you need context to respond correctly,
+ answer the quest but first say \"I am not able to answer
+ that question with the context you gave me, but here is my
+ best but here is my best answer."
)
)
)
@@ -370,3 +369,24 @@ chat_with_context <- function(query,
list(prompt_without_context, full_context, answer$response)
}
+
+
+is_context_needed <- function(user_prompt,
+ service = getOption("gpttools.service"),
+ model = getOption("gpttools.model")) {
+ prompt <-
+ glue::glue("Would additional context or history be helpful to respond to
+ this prompt from the user. If yes, answer TRUE. If no, answer
+ FALSE. ONLY answer TRUE or FALSE. It is crucial that you only
+ answer TRUE or FALSE.\n\n{user_prompt}")
+
+ gptstudio:::gptstudio_create_skeleton(
+ service = service,
+ model = model,
+ prompt = prompt,
+ stream = FALSE
+ ) |>
+ gptstudio:::gptstudio_request_perform() |>
+ purrr::pluck("response") |>
+ as.logical()
+}
diff --git a/R/site-index.R b/R/site-index.R
index 5d3ea91..771b2df 100644
--- a/R/site-index.R
+++ b/R/site-index.R
@@ -67,6 +67,25 @@ get_pkgs_to_scrape <- function(local = TRUE,
dplyr::rename(version = installed_version)
}
+#' Scrape packaging sites
+#'
+#' @details This function scrapes the websites for the packages specified in the
+#' `sites` dataframe. If `sites` is empty, it alerts the user with no packages
+#' to scrape and returns `NULL` invisibly. If the user confirms to proceed, it
+#' scrapes each package site using the supplied details.
+#'
+#'
+#' @param sites A data frame containing the package sites to be scraped. If not
+#' provided, it defaults to `get_pkgs_to_scrape(local = TRUE)`.
+#' @param service The service to be used for scraping, defaults to "local".
+#' @param index_create Logical indicating whether to create an index, defaults
+#' to `TRUE`.
+#' @param overwrite Logical indicating whether to overwrite existing content,
+#' defaults to `TRUE`.
+#' @return Invisible `NULL`. The function is called for its side effects.
+#' @examplesIf rlang::is_interactive()
+#' scrape_pkg_sites()
+#' @export
scrape_pkg_sites <- function(sites = get_pkgs_to_scrape(local = TRUE),
service = "local",
index_create = TRUE,
@@ -76,12 +95,22 @@ scrape_pkg_sites <- function(sites = get_pkgs_to_scrape(local = TRUE),
return(invisible())
}
- cli::cli_text("You are about to scrape {nrow(sites)} package site page{?s}")
- usethis::ui_yeah("Do you want to continue?")
+ if (rlang::is_interactive()) {
+ cli::cli_text("You are about to scrape {nrow(sites)} package site page{?s}")
+ continue <- usethis::ui_yeah("Do you want to continue?")
+ } else {
+ continue <- TRUE
+ }
+
+ if (!continue) {
+ cli_alert_info("Scraping aborted.")
+ return(invisible())
+ }
sites |>
dplyr::select(url, version, name) |>
purrr::pmap(.f = \(url, version, name) {
+ # Helper function `crawl` is assumed to be defined elsewhere
crawl(
url = url,
index_create = index_create,
diff --git a/README.Rmd b/README.Rmd
index 5e0f347..ecf5b0e 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -133,61 +133,6 @@ The R package is designed to share only the text or code that you specifically h
**IMPORTANT: To maintain the privacy of your data, do not highlight, include in a prompt, or otherwise upload any sensitive data, code, or text that should remain confidential.**
-
-## Usage
-
-The package has four addins:
-
-- Comment code: uses code-davinci-edit-001 model from OpenAI to add
-comments to your code with the prompt: "add comments to each line of
-code, explaining what the code does"
-
-- Add roxygen: uses text-davinci-003 model from OpenAI to add and fill
-out a roxygen skeleton to your highlight code (should be a function)
-with the prompt: "insert roxygen skeleton to document this function"
-
-- Convert script to function: uses code-davinci-edit-001 model from
-OpenAI to convert a highlighted script into a function with the
-prompt: "convert this R code into an R function"
-
-- Write a unit test for a function with testthat: uses
-text-davinci-003 model from OpenAI to suggest a unit test for a
-selected function with the prompt: "Suggest a unit text for this
-function using the testthat package"
-
-- A freeform addins that let's you specify the prompt using the "edit"
-functionality of ChatGPT
-
-You can access these addins through the addin toolbar or use the command
-pallet: `CMD/CTRL+SHIFT+P`. Examples of each of the addins in action is
-below.
-
-![](man/figures/image-1429395462.png)
-
-### Comment Code
-
-
-
-### Add Roxygen
-
-
-
-### Convert a Script into Functions
-
-
-
-### Suggest a Unit Test for a Function
-
-
-
## Code of Conduct
Please note that the gpttools project is released with a [Contributor
diff --git a/README.md b/README.md
index 2f6c17f..febf69d 100644
--- a/README.md
+++ b/README.md
@@ -172,56 +172,6 @@ service provider.
include in a prompt, or otherwise upload any sensitive data, code, or
text that should remain confidential.**
-## Usage
-
-The package has four addins:
-
-- Comment code: uses code-davinci-edit-001 model from OpenAI to add
- comments to your code with the prompt: “add comments to each line of
- code, explaining what the code does”
-
-- Add roxygen: uses text-davinci-003 model from OpenAI to add and fill
- out a roxygen skeleton to your highlight code (should be a function)
- with the prompt: “insert roxygen skeleton to document this function”
-
-- Convert script to function: uses code-davinci-edit-001 model from
- OpenAI to convert a highlighted script into a function with the
- prompt: “convert this R code into an R function”
-
-- Write a unit test for a function with testthat: uses text-davinci-003
- model from OpenAI to suggest a unit test for a selected function with
- the prompt: “Suggest a unit text for this function using the testthat
- package”
-
-- A freeform addins that let’s you specify the prompt using the “edit”
- functionality of ChatGPT
-
-You can access these addins through the addin toolbar or use the command
-pallet: `CMD/CTRL+SHIFT+P`. Examples of each of the addins in action is
-below.
-
-![](man/figures/image-1429395462.png)
-
-### Comment Code
-
-
-
-### Add Roxygen
-
-
-
-### Convert a Script into Functions
-
-
-
-### Suggest a Unit Test for a Function
-
-
-
## Code of Conduct
Please note that the gpttools project is released with a [Contributor
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 84ea960..311b924 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -10,13 +10,15 @@ navbar:
left: [addins, services, reference, news]
right: [search, github]
components:
- # addins:
- # text: Addins
- # menu:
- # - text: Chat
- # href: articles/chat.html
- # - text: Chat in Source
- # href: articles/chat-in-source.html
+ addins:
+ text: Addins
+ menu:
+ - text: Chat with Retrieval
+ href: articles/chat-with-retrieval.html
+ - text: Scrape Packages
+ href: articles/scrape-pkgs.html
+ - text: Other Addins
+ href: articles/other-addins.html
services:
text: Services
menu:
diff --git a/inst/retriever/app.R b/inst/retriever/app.R
index 9d4d6b3..f12ae3e 100644
--- a/inst/retriever/app.R
+++ b/inst/retriever/app.R
@@ -60,15 +60,12 @@ api_services <-
) |>
purrr::discard(~ .x == "gptstudio_request_perform.default")
-ui <- page_fluid(
- waiter::use_waiter(),
+ui <- page_fillable(
+ waiter::useWaiter(),
+ waiter::waiterOnBusy(html = waiter::spin_3circles(), color = waiter::transparent(0.5)),
window_height_ui("height"),
- theme = bs_theme(bootswatch = "morph", version = 5),
- tags$style("
- .card, .accordion {
- box-shadow: none !important;
- }
- "),
+ theme = bs_theme(bootswatch = "litera", version = 5) |>
+ bs_add_rules(".scrollable-popover .popover-body { max-height: 300px; overflow-y: auto; }"),
tags$head(tags$script(HTML("
$(document).on('keydown', '#chat_input', function(e) {
if ((e.keyCode == 10 || e.keyCode == 13) && (!e.shiftKey)) {
@@ -79,95 +76,96 @@ ui <- page_fluid(
}
});"))),
title = "Retreiver from gpttools",
- br(),
- layout_column_wrap(
- width = 1,
- height = "100%",
- heights_equal = "row",
- card(
- card_header("Write Prompt",
- class = "bg-primary d-flex align-items-center",
- popover(
- id = "settings",
- bs_icon("gear", class = "ms-auto"),
- accordion_panel(
- "Data & Task",
- icon = bs_icon("robot", class = "ms-auto"),
- selectInput(
- "source", "Data Source",
- choices = NULL,
- multiple = TRUE
- ),
- selectInput(
- "task", "Task",
- choices = c("Context Only", "Permissive Chat"),
- selected = getOption("gpttools.task", "Permissive Chat")
- )
+ card(
+ card_header("Chat with Retrieva",
+ class = "bg-primary d-flex align-items-center",
+ popover(
+ id = "settings",
+ options = list(customClass = "scrollable-popover"),
+ bs_icon("gear", class = "ms-auto"),
+ accordion_panel(
+ "Data & Task",
+ icon = bs_icon("robot", class = "ms-auto"),
+ selectInput(
+ "source", "Data Source",
+ choices = NULL,
+ multiple = TRUE
),
- br(),
- accordion_panel(
- "Preferences",
- icon = bs_icon("sliders", class = "ms-auto"),
- selectInput(
- "service", "AI Service",
- choices = api_services,
- selected = getOption("gpttools.service", "openai")
- ),
- selectInput("model", "Model",
- choices = NULL
- ),
- radioButtons(
- "save_history", "Save & Use History",
- choiceNames = c("Yes", "No"),
- choiceValues = c(TRUE, FALSE),
- selected = getOption("gpttools.save_history", FALSE),
- inline = TRUE,
- ),
- radioButtons(
- "local", "Local Embeddings",
- choiceNames = c("Yes", "No"),
- choiceValues = c(TRUE, FALSE),
- selected = getOption("gpttools.local_embed"),
- inline = TRUE,
- ),
- sliderInput(
- "n_docs", "Docs to Include (#)",
- min = 0,
- max = 20,
- value = getOption("gpttools.k_context", 4)
- ),
- sliderInput(
- "n_history", "Chat History to Include (#)",
- min = 0, max = 20,
- value = getOption("gpttools.k_history", 4)
- )
+ selectInput(
+ "task", "Task",
+ choices = c("Context Only", "Permissive Chat"),
+ selected = getOption("gpttools.task", "Permissive Chat")
+ )
+ ),
+ br(),
+ accordion_panel(
+ "Preferences",
+ icon = bs_icon("sliders", class = "ms-auto"),
+ selectInput(
+ "service", "AI Service",
+ choices = api_services,
+ selected = getOption("gpttools.service", "openai")
),
- br(),
- actionButton(
- "save_settings", "Save Settings",
- icon = icon("save", class = "ms-auto"),
- class = "btn-primary"
+ selectInput("model", "Model",
+ choices = NULL
),
- title = "Plot settings"
- )
+ radioButtons(
+ "save_history", "Save & Use History",
+ choiceNames = c("Yes", "No"),
+ choiceValues = c(TRUE, FALSE),
+ selected = getOption("gpttools.save_history", FALSE),
+ inline = TRUE,
+ ),
+ radioButtons(
+ "local", "Local Embeddings",
+ choiceNames = c("Yes", "No"),
+ choiceValues = c(TRUE, FALSE),
+ selected = getOption("gpttools.local_embed"),
+ inline = TRUE,
+ ),
+ sliderInput(
+ "n_docs", "Docs to Include (#)",
+ min = 0,
+ max = 20,
+ value = getOption("gpttools.k_context", 4)
+ ),
+ sliderInput(
+ "n_history", "Chat History to Include (#)",
+ min = 0, max = 20,
+ value = getOption("gpttools.k_history", 4)
+ )
+ ),
+ br(),
+ actionButton(
+ "save_settings", "Save Settings",
+ icon = icon("save", class = "ms-auto"),
+ class = "btn-primary"
+ ),
+ title = "App Settings"
+ )
+ ),
+ uiOutput("all_chats_box"),
+ div(
+ class = "mt-auto",
+ style = htmltools::css(
+ "margin-left" = "20px",
+ "margin-right" = "20px"
),
- uiOutput("all_chats_box"),
- layout_column_wrap(
- width = NULL, fill = FALSE,
- style = htmltools::css(grid_template_columns = "3fr 1fr"),
- card(
+ fluidRow(
+ column(
+ 10,
textAreaInput(
- inputId = "chat_input", label = NULL,
- value = "", resize = "vertical", rows = 1,
- width = "100%"
+ inputId = "chat_input", label = NULL, rows = 1,
+ value = "", width = "100%", resize = "vertical"
)
),
- card(
- class = "btn-primary",
+ column(
+ 2,
actionButton(
- inputId = "chat", label = "Chat",
- icon = icon("robot"),
- width = "100%", class = "btn-sucess"
+ inputId = "chat",
+ label = icon("fas fa-paper-plane"),
+ class = "btn-primary m-1",
+ width = "100%"
)
)
)
@@ -181,17 +179,35 @@ server <- function(input, output, session) {
r$all_chats_formatted <- NULL
r$all_chats <- NULL
height <- window_height_server("height")
+ transformer_model <- reactive({
+ if (input$local) {
+ get_transformer_model()
+ } else {
+ NULL
+ }
+ })
index <- reactive({
if (input$local == TRUE) {
if (input$source == "All") {
load_index(domain = "All", local_embeddings = TRUE)
} else {
- load_index(glue::glue("local/{input$source}"), local_embeddings = TRUE)
+ purrr::map(input$source, \(x) {
+ load_index(glue::glue("local/{input$source}"),
+ local_embeddings = TRUE
+ ) |>
+ tibble::as_tibble()
+ }) |> dplyr::bind_rows()
}
} else {
- load_index(input$source)
+ purrr::map(input$source, \(x) {
+ load_index(input$source,
+ local_embeddings = TRUE
+ ) |>
+ tibble::as_tibble()
+ }) |> dplyr::bind_rows()
}
})
+
indices <- reactive({
if (input$local == TRUE) {
list_index(dir = "index/local") |> tools::file_path_sans_ext()
@@ -226,13 +242,6 @@ server <- function(input, output, session) {
)
}) |> bindEvent(input$save_settings)
observe({
- waiter_show(
- html = tagList(
- waiter::spin_facebook(),
- h3("Asking ChatGPT...")
- ),
- color = waiter::transparent(0.5)
- )
interim <- chat_with_context(
query = input$chat_input,
service = input$service,
@@ -247,10 +256,24 @@ server <- function(input, output, session) {
k_history = input$n_history,
save_history = input$save_history,
overwrite = FALSE,
- local = input$local
+ local = input$local,
+ embedding_model = transformer_model()
)
new_response <- interim[[3]]
- r$context_links <- c(r$context_links, interim[[2]]$link)
+
+ if (is.character(interim[[2]])) {
+ if (length(r$context_links) == 0) {
+ r$context_links <- "No context used so far."
+ }
+ } else {
+ new_links <- interim[[2]]$link
+ r$context_links <- c(r$context_links, new_links)
+ }
+ if (length(r$context_links) > 1) {
+ r$context_links <-
+ r$context_links[r$context_links != "No context used so far."]
+ }
+
r$all_chats <-
c(
interim[[1]],
@@ -262,7 +285,6 @@ server <- function(input, output, session) {
)
)
r$all_chats_formatted <- make_chat_history(r$all_chats)
- waiter::waiter_hide()
updateTextAreaInput(session, "chat_input", value = "")
}) |>
bindEvent(input$chat)
@@ -270,7 +292,7 @@ server <- function(input, output, session) {
output$all_chats_box <- renderUI({
req(length(r$context_links) > 0)
card(
- height = height() - 300,
+ height = height() - 200,
card_body(
r$all_chats_formatted,
markdown("**Sources**"),
diff --git a/inst/rstudio/addins.dcf b/inst/rstudio/addins.dcf
index 9d16919..37945fc 100644
--- a/inst/rstudio/addins.dcf
+++ b/inst/rstudio/addins.dcf
@@ -1,10 +1,10 @@
Name: Chat with Retrieval
-Description: Semantic search and document retrieval for use with ChatGPT
-Binding: addin_run_retriever
+Description: Chat app with Retrieval for use with various AI services
+Binding: chat_with_retrieval
Interactive: true
Name: Convert script to function
-Description: Uses OpenAI's GPT-3.5 to generate comments for your code
+Description: Generate comments for your code
Binding: script_to_function_addin
Interactive: false
@@ -19,12 +19,12 @@ Binding: suggest_unit_test_addin
Interactive: false
Name: Document Data
-Description: Uses AI service to document data for a package with help from the user
+Description: Document data for a package with help from the user
Binding: document_data
Interactive: true
Name: Suggest Improvements
-Description: Uses AI service to make suggestions to improve user selected code
+Description: Make suggestions to improve user selected code
Binding: suggest_code_improvements
Interactive: false
@@ -32,3 +32,8 @@ Name: Select Packages to Scrape
Description: Select packages to scrape for use in Chat with Retrieval app
Binding: addin_run_select_pkgs
Interactive: true
+
+Name: Scrape Packages
+Description: Scrape packages for use in Chat with Retrieval app
+Binding: addin_run_scrape_pkgs
+Interactive: true
diff --git a/inst/scripts/scrape_other_docs.R b/inst/scripts/scrape_other_docs.R
new file mode 100644
index 0000000..e6d08be
--- /dev/null
+++ b/inst/scripts/scrape_other_docs.R
@@ -0,0 +1,65 @@
+library(gpttools)
+
+resources <-
+ tibble::tribble(
+ ~url, ~name,
+ "https://docs.posit.co/", "posit docs",
+ "https://www.tmwr.org/", "tmwr",
+ "https://r4ds.hadley.nz/", "r4ds",
+ "https://adv-r.hadley.nz/", "adv-r",
+ "https://r-pkgs.org/", "r-pkgs",
+ "https://mastering-shiny.org/", "mastering-shiny",
+ "https://quarto.org/", "quarto",
+ "https://ggplot2-book.org/", "ggplot2-book",
+ "https://smltar.com/", "smltar",
+ "https://unleash-shiny.rinterface.com/", "unleash-shiny",
+ "https://openintro-ims.netlify.app/", "openintro-ims",
+ "https://www.bayesrulesbook.com/", "bayesrulesbook",
+ "https://engineering-shiny.org/", "engineering-shiny",
+ "https://design.tidyverse.org/", "design-tidyverse",
+ "https://www.tidyverse.org/", "tidyverse-site",
+ "https://r-graphics.org/", "r-graphics",
+ "https://socviz.co/", "socviz",
+ )
+
+scrape_resources <- function(resources) {
+ indices <-
+ gpttools:::read_indexed_pkgs(local = TRUE) |>
+ dplyr::select(name) |>
+ dplyr::distinct() |>
+ dplyr::as_tibble()
+
+ sites <- resources |>
+ dplyr::anti_join(indices, by = "name")
+
+ if (nrow(sites) == 0) {
+ cli_alert_info("No packages to scrape.")
+ return(invisible())
+ }
+
+ if (rlang::is_interactive()) {
+ cli::cli_text("You are about to scrape {nrow(sites)} package site page{?s}")
+ continue <- usethis::ui_yeah("Do you want to continue?")
+ } else {
+ continue <- TRUE
+ }
+
+ if (!continue) {
+ cli_alert_info("Scraping aborted.")
+ return(invisible())
+ }
+
+ sites |>
+ dplyr::select(url, name) |>
+ purrr::pmap(.f = \(url, name) {
+ crawl(
+ url = url,
+ index_create = TRUE,
+ overwrite = TRUE,
+ pkg_name = name,
+ service = "local"
+ )
+ })
+}
+
+scrape_resources(resources)
diff --git a/inst/scripts/scrape_pkgs.R b/inst/scripts/scrape_pkgs.R
new file mode 100644
index 0000000..893ec21
--- /dev/null
+++ b/inst/scripts/scrape_pkgs.R
@@ -0,0 +1,5 @@
+library(gpttools)
+cli::cli_inform("Scraping package sites and creating an index for each.")
+options(repos = c(CRAN = "https://packagemanager.posit.co/all/latest"))
+options("repos")
+scrape_pkg_sites()
diff --git a/man/addin_run_scrape_pkgs.Rd b/man/addin_run_scrape_pkgs.Rd
new file mode 100644
index 0000000..9edf8ab
--- /dev/null
+++ b/man/addin_run_scrape_pkgs.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/addin_scrape_pkgs.R
+\name{addin_run_scrape_pkgs}
+\alias{addin_run_scrape_pkgs}
+\title{Addin to scrape installed packages}
+\usage{
+addin_run_scrape_pkgs()
+}
+\value{
+No return value, called for side effects only.
+}
+\description{
+Invokes RStudio addin functionality to scrape select installed packages and
+create indices for use in the "Chat with Retrieval" application.
+}
+\note{
+This addin requires RStudio to be available and will stop with an
+error message if RStudio API is not accessible.
+}
+\examples{
+\dontshow{if (rlang::is_interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# This function is typically run within RStudio as an Addin.
+# It would not be called directly in the code.
+addin_scrape_pkgs()
+\dontshow{\}) # examplesIf}
+}
diff --git a/man/addin_run_select_pkgs.Rd b/man/addin_run_select_pkgs.Rd
index 570eec4..15d3fdd 100644
--- a/man/addin_run_select_pkgs.Rd
+++ b/man/addin_run_select_pkgs.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/addin_select_pkgs.R
+% Please edit documentation in R/addin_scrape_pkgs.R
\name{addin_run_select_pkgs}
\alias{addin_run_select_pkgs}
\title{Run Package Selector App}
diff --git a/man/chat_with_context.Rd b/man/chat_with_context.Rd
index f5d90f8..c8e9ac2 100644
--- a/man/chat_with_context.Rd
+++ b/man/chat_with_context.Rd
@@ -19,7 +19,8 @@ chat_with_context(
k_history = 4,
save_history = TRUE,
overwrite = FALSE,
- local = FALSE
+ local = FALSE,
+ embedding_model = NULL
)
}
\arguments{
@@ -57,6 +58,9 @@ is TRUE.}
FALSE.}
\item{local}{Whether to use the local model or not. Default is FALSE.}
+
+\item{embedding_model}{A model object to use for embedding. Only needed if
+local is TRUE. Default is NULL.}
}
\value{
A list containing the prompt, context, and answer.
@@ -67,13 +71,9 @@ on the provided context and chat history. It uses GPT-4 architecture to
generate responses.
}
\examples{
-\dontrun{
-# Define a query and context
+\dontshow{if (rlang::is_interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+rlang::is_interactive()
query <- "What is the capital of France?"
-context <- "France is a country in Western Europe. Its capital is a famous
-city known for its culture, art, and history."
-
-# Call the chat_with_context function
result <- chat_with_context(query = query, context = context)
-}
+\dontshow{\}) # examplesIf}
}
diff --git a/man/addin_run_retriever.Rd b/man/chat_with_retrieval.Rd
similarity index 53%
rename from man/addin_run_retriever.Rd
rename to man/chat_with_retrieval.Rd
index c518024..5960df0 100644
--- a/man/addin_run_retriever.Rd
+++ b/man/chat_with_retrieval.Rd
@@ -1,20 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/addin-run-retriever.R
-\name{addin_run_retriever}
-\alias{addin_run_retriever}
-\title{Run Chat GPT with Retrieval}
+\name{chat_with_retrieval}
+\alias{chat_with_retrieval}
+\title{Run Chat with Retrieval}
\usage{
-addin_run_retriever()
+chat_with_retrieval()
}
\value{
This function has no return value.
}
\description{
-Run the ChatGPT shiny app with semantic search and document retrieval
+Run the Chat with Retrieval shiny app
}
\examples{
# Call the function as an RStudio addin
\dontrun{
-addin_run_retriever()
+chat_with_retrieval()
}
}
diff --git a/man/crawl.Rd b/man/crawl.Rd
index ba78fa2..59c009e 100644
--- a/man/crawl.Rd
+++ b/man/crawl.Rd
@@ -32,7 +32,7 @@ pages and index if they already exist. Default is FALSE.
\item{pkg_name}{Package name}
\item{service}{The service to use for scraping. Default is "openai". Options
-are "openai", "local", and "azure".}
+are "openai" and "local".}
}
\value{
NULL. The resulting tibble is saved into a parquet file.
diff --git a/man/figures/hex-gpttools.png b/man/figures/hex-gpttools.png
deleted file mode 100644
index 368cf39..0000000
Binary files a/man/figures/hex-gpttools.png and /dev/null differ
diff --git a/man/figures/logo.png b/man/figures/logo.png
index e56c3c7..d29c12c 100644
Binary files a/man/figures/logo.png and b/man/figures/logo.png differ
diff --git a/man/get_transformer_model.Rd b/man/get_transformer_model.Rd
new file mode 100644
index 0000000..218c758
--- /dev/null
+++ b/man/get_transformer_model.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/embedding-py.R
+\name{get_transformer_model}
+\alias{get_transformer_model}
+\title{Get Transformer Model}
+\usage{
+get_transformer_model(model_name = "jinaai/jina-embeddings-v2-base-en")
+}
+\arguments{
+\item{model_name}{The name of the transformer model to download. This should
+be in the format "username/modelname" as recognized by the transformers
+library. Default is "jinaai/jina-embeddings-v2-base-en".}
+}
+\value{
+An object of the downloaded transformer model.
+}
+\description{
+This function is designed to download and load a pre-trained transformer
+model using the transformers Python library via the reticulate package.
+It checks for the availability of the required Python package and then
+downloads the specified transformer model.
+}
+\note{
+Users of this function need to ensure that the Python environment
+is set up with the 'transformers' package installed. The function uses
+the 'reticulate' R package to interface with Python and the user may need
+to configure it accordingly.
+}
+\examples{
+\dontrun{
+# To get the default transformer model:
+get_transformer_model()
+
+# To get a custom transformer model by specifying the model name:
+get_transformer_model("bert-base-uncased")
+}
+}
diff --git a/man/run_select_pkgs_app.Rd b/man/run_select_pkgs_app.Rd
index 363b4c4..a6c6afd 100644
--- a/man/run_select_pkgs_app.Rd
+++ b/man/run_select_pkgs_app.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/addin_select_pkgs.R
+% Please edit documentation in R/addin_scrape_pkgs.R
\name{run_select_pkgs_app}
\alias{run_select_pkgs_app}
\title{Run a Shiny App to Select and Save Installed Packages}
diff --git a/man/save_user_config.Rd b/man/save_user_config.Rd
index eaa3798..b0ee5b3 100644
--- a/man/save_user_config.Rd
+++ b/man/save_user_config.Rd
@@ -8,7 +8,7 @@ save_user_config(
service = "openai",
model = "gpt-4-1106-preview",
task = "Permissive Chat",
- embeddings = "local",
+ embeddings = TRUE,
k_context = 4,
k_history = 4,
save_history = TRUE,
diff --git a/man/scrape_pkg_sites.Rd b/man/scrape_pkg_sites.Rd
new file mode 100644
index 0000000..fc077c1
--- /dev/null
+++ b/man/scrape_pkg_sites.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/site-index.R
+\name{scrape_pkg_sites}
+\alias{scrape_pkg_sites}
+\title{Scrape packaging sites}
+\usage{
+scrape_pkg_sites(
+ sites = get_pkgs_to_scrape(local = TRUE),
+ service = "local",
+ index_create = TRUE,
+ overwrite = TRUE
+)
+}
+\arguments{
+\item{sites}{A data frame containing the package sites to be scraped. If not
+provided, it defaults to \code{get_pkgs_to_scrape(local = TRUE)}.}
+
+\item{service}{The service to be used for scraping, defaults to "local".}
+
+\item{index_create}{Logical indicating whether to create an index, defaults
+to \code{TRUE}.}
+
+\item{overwrite}{Logical indicating whether to overwrite existing content,
+defaults to \code{TRUE}.}
+}
+\value{
+Invisible \code{NULL}. The function is called for its side effects.
+}
+\description{
+Scrape packaging sites
+}
+\details{
+This function scrapes the websites for the packages specified in the
+\code{sites} dataframe. If \code{sites} is empty, it alerts the user with no packages
+to scrape and returns \code{NULL} invisibly. If the user confirms to proceed, it
+scrapes each package site using the supplied details.
+}
+\examples{
+\dontshow{if (rlang::is_interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+scrape_pkg_sites()
+\dontshow{\}) # examplesIf}
+}
diff --git a/media/gpttools-hexsticker.png b/media/gpttools-hexsticker.png
new file mode 100644
index 0000000..47db89c
Binary files /dev/null and b/media/gpttools-hexsticker.png differ
diff --git a/media/gpttools-hexsticker.psd b/media/gpttools-hexsticker.psd
new file mode 100644
index 0000000..0b1e184
Binary files /dev/null and b/media/gpttools-hexsticker.psd differ
diff --git a/media/logo.png b/media/logo.png
index 36706ba..d29c12c 100644
Binary files a/media/logo.png and b/media/logo.png differ
diff --git a/pkgdown/favicon/apple-touch-icon-120x120.png b/pkgdown/favicon/apple-touch-icon-120x120.png
index 3de1a83..b7b6be3 100644
Binary files a/pkgdown/favicon/apple-touch-icon-120x120.png and b/pkgdown/favicon/apple-touch-icon-120x120.png differ
diff --git a/pkgdown/favicon/apple-touch-icon-152x152.png b/pkgdown/favicon/apple-touch-icon-152x152.png
index fdc196a..cc60e97 100644
Binary files a/pkgdown/favicon/apple-touch-icon-152x152.png and b/pkgdown/favicon/apple-touch-icon-152x152.png differ
diff --git a/pkgdown/favicon/apple-touch-icon-180x180.png b/pkgdown/favicon/apple-touch-icon-180x180.png
index 3f6205a..e4660ff 100644
Binary files a/pkgdown/favicon/apple-touch-icon-180x180.png and b/pkgdown/favicon/apple-touch-icon-180x180.png differ
diff --git a/pkgdown/favicon/apple-touch-icon-60x60.png b/pkgdown/favicon/apple-touch-icon-60x60.png
index 7493ace..cce911e 100644
Binary files a/pkgdown/favicon/apple-touch-icon-60x60.png and b/pkgdown/favicon/apple-touch-icon-60x60.png differ
diff --git a/pkgdown/favicon/apple-touch-icon-76x76.png b/pkgdown/favicon/apple-touch-icon-76x76.png
index 6763e2a..cc35f57 100644
Binary files a/pkgdown/favicon/apple-touch-icon-76x76.png and b/pkgdown/favicon/apple-touch-icon-76x76.png differ
diff --git a/pkgdown/favicon/apple-touch-icon.png b/pkgdown/favicon/apple-touch-icon.png
index 3f6205a..0b8cabb 100644
Binary files a/pkgdown/favicon/apple-touch-icon.png and b/pkgdown/favicon/apple-touch-icon.png differ
diff --git a/pkgdown/favicon/favicon-16x16.png b/pkgdown/favicon/favicon-16x16.png
index dce9040..64a5548 100644
Binary files a/pkgdown/favicon/favicon-16x16.png and b/pkgdown/favicon/favicon-16x16.png differ
diff --git a/pkgdown/favicon/favicon-32x32.png b/pkgdown/favicon/favicon-32x32.png
index 31bb0c6..cd8e361 100644
Binary files a/pkgdown/favicon/favicon-32x32.png and b/pkgdown/favicon/favicon-32x32.png differ
diff --git a/pkgdown/favicon/favicon.ico b/pkgdown/favicon/favicon.ico
index f42df58..101f29e 100644
Binary files a/pkgdown/favicon/favicon.ico and b/pkgdown/favicon/favicon.ico differ
diff --git a/vignettes/images/chat-with-retrieval-app.png b/vignettes/images/chat-with-retrieval-app.png
new file mode 100644
index 0000000..962871d
Binary files /dev/null and b/vignettes/images/chat-with-retrieval-app.png differ
diff --git a/vignettes/images/chat-with-retrieval-example.gif b/vignettes/images/chat-with-retrieval-example.gif
new file mode 100644
index 0000000..6304287
Binary files /dev/null and b/vignettes/images/chat-with-retrieval-example.gif differ
diff --git a/vignettes/images/chat-with-retrieval-settings.gif b/vignettes/images/chat-with-retrieval-settings.gif
new file mode 100644
index 0000000..01a3b5f
Binary files /dev/null and b/vignettes/images/chat-with-retrieval-settings.gif differ
diff --git a/man/figures/image-1429395462.png b/vignettes/images/image-1429395462.png
similarity index 100%
rename from man/figures/image-1429395462.png
rename to vignettes/images/image-1429395462.png
diff --git a/vignettes/images/logo.png b/vignettes/images/logo.png
new file mode 100644
index 0000000..d29c12c
Binary files /dev/null and b/vignettes/images/logo.png differ
diff --git a/vignettes/images/pkg-selector-app.png b/vignettes/images/pkg-selector-app.png
new file mode 100644
index 0000000..7542822
Binary files /dev/null and b/vignettes/images/pkg-selector-app.png differ
diff --git a/vignettes/no-build/chat-with-retrieval.Rmd b/vignettes/no-build/chat-with-retrieval.Rmd
new file mode 100644
index 0000000..9e6476b
--- /dev/null
+++ b/vignettes/no-build/chat-with-retrieval.Rmd
@@ -0,0 +1,61 @@
+---
+title: "Chat with Retrieval"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{Chat with Retrieval}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ comment = "#>",
+ fig.align = "center"
+)
+```
+
+## Introduction
+
+The most powerful feature of `gpttools` is the `Chat with Retrieval` app. This app allows you to use the AI service of your choosing and augment chat with retrieval-based responses. This vignette will walk you through the process of using the app. This vignette assumes that you have already installed the package and set up your API key. With the app and API key in place, you are ready to create an index to use with the app. The typical patter is to use the `crawl()` function to create an index.
+
+Let's use an example of creating an index from the extremely popular [R for Data Science](https://r4ds.hadley.nz/) book. We’ll use the `crawl()` function to scrape the text from the book using `{rvest}` under the hood to scrape the text from the online book. The text is then split into chunks with an associated embedding vector. The embedding vector is used to find similar chunks of text. The chunks of text are then stored in a parquet file for later use. For more on embeddings, see this fantastic resource from [Vicky Boykis](https://vickiboykis.com): [What are embeddings?](https://vickiboykis.com/what_are_embeddings/).
+
+```{r}
+#| eval: false
+library(gpttools)
+crawl("https://r4ds.hadley.nz/")
+```
+
+The code to scrape the data is relatively simple but is unlikely to work on all sites. From some internal testing, it works quite well on `{pkgdown}` and similar documentation sites.
+
+## Using the app
+
+![Chat with Retrieval app](images/chat-with-retrieval-app.png){width=75%}
+
+Now that you have created an index, you are ready to use the app. The app is available via the `chat_with_retrieval()` function or the RStudio addin `Chat with Retrieval`.
+
+![Simple Example of Chat with Retrieval](images/chat-with-retrieval-example.gif){width=75%}
+
+## Custom Settings
+
+You can customize the app using the built-in settings popover. Settings include:
+
+**Data & Task**
+
+- **Data Source**: The index or indices to use with the app. You can select multiple indices to use with the app. Default is "All" which uses all available indices.
+- **Task**: The default is "Permissive Chat" which will tell you when it's missing context but still gives you an answer. The other option is "Strict Chat" which will only give you an answer if it has the context.
+
+**Preferences**
+
+- **AI Service**: The AI service to use with the app. The default is OpenAI, but you can also use HuggingFace, Google AI Studio, Anthropic, and more. **Note**: you must setup each service separately with your own account information and API key.
+- **Model**: The model to use with the AI service. The default is "gpt-4-1106-preview" which is the most powerful model available from OpenAI.
+- **Save & User History**: The default is FALSE. If TRUE, the app will save your chat history and allow you to use it as context for future chats.
+- **Local Embeddings**: The default is TRUE that will download and use a model locally. If FALSE, OpenAI's API will be used to generate embeddings.
+- **Docs to Include (#)**: The default is 4 which will include the top 4 most similar documents. You can increase or decrease this number as needed.
+- **Chat History to Include (#)**: The default is 4 which will include the top 4 most similar chat history items. You can increase or decrease this number as needed.
+
+Click `Save Settings` to save your settings across sessions.
+
+
+![Customize Settings for Chat with Retrieval](images/chat-with-retrieval-settings.gif){width=75%}
diff --git a/vignettes/no-build/other-addins.Rmd b/vignettes/no-build/other-addins.Rmd
new file mode 100644
index 0000000..7e8feab
--- /dev/null
+++ b/vignettes/no-build/other-addins.Rmd
@@ -0,0 +1,66 @@
+---
+title: "Other Addins"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{Other Addins}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ comment = "#>"
+)
+```
+
+## Other Addins for Interactive Use
+
+The package has four addins for use in R, R Markdown, or Quarto documents:
+
+- Comment code: uses code-davinci-edit-001 model from OpenAI to add
+comments to your code with the prompt: "add comments to each line of
+code, explaining what the code does"
+
+- Add roxygen: uses text-davinci-003 model from OpenAI to add and fill
+out a roxygen skeleton to your highlight code (should be a function)
+with the prompt: "insert roxygen skeleton to document this function"
+
+- Convert script to function: uses code-davinci-edit-001 model from
+OpenAI to convert a highlighted script into a function with the
+prompt: "convert this R code into an R function"
+
+- Write a unit test for a function with testthat: uses
+text-davinci-003 model from OpenAI to suggest a unit test for a
+selected function with the prompt: "Suggest a unit text for this
+function using the testthat package"
+
+You can access these addins through the addin toolbar or use the command
+pallet: `CMD/CTRL+SHIFT+P`. Examples of each of the addins in action is
+below.
+
+![](images/image-1429395462.png)
+
+### Comment Code
+
+
+
+### Add Roxygen
+
+
+
+### Convert a Script into Functions
+
+
+
+### Suggest a Unit Test for a Function
+
+
diff --git a/vignettes/no-build/scrape-pkgs.Rmd b/vignettes/no-build/scrape-pkgs.Rmd
new file mode 100644
index 0000000..d4e0d0f
--- /dev/null
+++ b/vignettes/no-build/scrape-pkgs.Rmd
@@ -0,0 +1,52 @@
+---
+title: "Scrape Package Sites"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{Scrape Package Sites}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ comment = "#>",
+ fig.align = "center"
+)
+```
+
+A common use for RAG (Retrieval Augmented Generaation) is package documentation. There are two addins to help you create package indices to use with the "Chat with Retrieval" app. The "Select Packages" addin launches a Shiny app that allows you to select packages and save your selections. The "Scrape Packages" addin scrapes installed packages and creates indices to use with the "Chat with Retrieval" app.
+
+## Select Packages
+
+![Select Packages App](images/pkg-selector-app.png){width=75%}
+
+The "Select Packages" addin launches a Shiny app that allows you to select packages and save your selections. The app is launched by executing the "Select Packages" addin. The addin is available in the RStudio Addins menu. A set of default packages are pre-selected. You can add or remove packages from the list. When you are done, click the "Save Selected Packages" button to save your selections. Only installed packages are available for selection.
+
+## Scrape Packages
+
+Run the scrape packages addin by executing the "Scrape Packages" addin. The addin is available in the RStudio Addins menu. The addin will scrape installed packages and create indices to use with the "Chat with Retrieval" app. The addin will prompt you for confirmation before proceeding. The scraping process will run in the background. You can continue working in RStudio while the scraping process runs. You can check on progress in the `Background Jobs` pane.
+
+The addin _only_ allows for embeddings to be created using local models. If you want to use embeddings using OpenAI embeddings, you will need to run the `scrape_pkg_sites()` function directly. See the section on [Scraping Script](#scraping-script) for more details. The reason for this is two fold: 1. some initial testing shows that the time to create embeddings is comparable between local and OpenAI embeddings, and 2. the OpenAI embeddings incur a cost for each request. The cost is small, but it is not free.
+
+### Scraping Script
+
+Here is the script that runs when you execute the "Scrape Packages" addin. `options(repos = c(CRAN = "https://packagemanager.posit.co/all/latest"))` sets global options in the R environment. In this case, it sets the 'repos' option to a custom CRAN repository URL, "https://packagemanager.posit.co/all/latest". This is telling R to use a specific CRAN-like repository provided by Posit Package Manager for installing packages. Posit Package manager offers consistent and fast access to package downloads. `options("repos")` is simply retrieving the current setting of the 'repos' option. It doesn't change the system state but would print the 'repos' options to the console. `scrape_pkg_sites()` initiates the scraping of package information from the defined CRAN repository.
+
+```{r}
+#| eval: false
+library(gpttools)
+cli::cli_inform("Scraping package sites and creating an index for each.")
+options(repos = c(CRAN = "https://packagemanager.posit.co/all/latest"))
+options("repos")
+scrape_pkg_sites()
+```
+
+`scrape_pkg_sites()` function is a scraping orchestrator that checks for packages to scrape, prompts for user confirmation, and then carries out the scraping operations by delegating to a helper function. The packages to scrape are determined by the `get_pkgs_to_scrape()` function. This function checks for packages that are installed but not yet scraped. If there are no packages to scrape, the function returns a message to the user. If there are packages to scrape, the function prompts the user for confirmation. If the user confirms, the function calls the `scrape_pkg_site()` function to scrape the package site.
+
+Here are the default packages that will be scraped.
+
+```{r}
+#| echo: false
+gpttools:::use_default_pkgs()
+```
diff --git a/vignettes/openai.Rmd b/vignettes/openai.Rmd
index cf72e16..85bd879 100644
--- a/vignettes/openai.Rmd
+++ b/vignettes/openai.Rmd
@@ -2,7 +2,7 @@
title: "OpenAI API Service"
output: rmarkdown::html_vignette
vignette: >
- %\VignetteIndexEntry{OpenAI API Services}
+ %\VignetteIndexEntry{OpenAI API Service}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---