Skip to content

Commit

Permalink
update vignettes, fix app bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
JamesHWade committed Jan 22, 2024
1 parent 15a971b commit 0304daf
Show file tree
Hide file tree
Showing 49 changed files with 676 additions and 276 deletions.
1 change: 0 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
^cran-comments\.md$
^docs$
^gpttools\.Rproj$
^images$
^pkgdown$
^revdep$
^.lintr$
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ repos:
rev: v4.5.0
hooks:
- id: check-added-large-files
args: ['--maxkb=2000']
args: ['--maxkb=25000']
- repo: https://github.com/pre-commit-ci/pre-commit-ci-config
rev: v1.6.1
hooks:
Expand Down
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Generated by roxygen2: do not edit by hand

export(add_roxygen_addin)
export(addin_run_retriever)
export(addin_run_scrape_pkgs)
export(addin_run_select_pkgs)
export(chat_with_context)
export(chat_with_retrieval)
export(collect_dataframes)
export(crawl)
export(create_index_from_audio)
Expand All @@ -13,6 +14,7 @@ export(delete_history)
export(delete_index)
export(document_data)
export(get_selection)
export(get_transformer_model)
export(gpt_sitrep)
export(ingest_pdf)
export(insert_text)
Expand All @@ -25,6 +27,7 @@ export(remove_lines_and_spaces)
export(run_document_data)
export(run_select_pkgs_app)
export(save_user_config)
export(scrape_pkg_sites)
export(scrape_url)
export(script_to_function_addin)
export(set_user_config)
Expand Down
8 changes: 4 additions & 4 deletions R/addin-run-retriever.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Run Chat GPT with Retrieval
#' Run Chat with Retrieval
#'
#' Run the ChatGPT shiny app with semantic search and document retrieval
#' Run the Chat with Retrieval shiny app
#'
#' @export
#'
Expand All @@ -9,9 +9,9 @@
#' @examples
#' # Call the function as an RStudio addin
#' \dontrun{
#' addin_run_retriever()
#' chat_with_retrieval()
#' }
addin_run_retriever <- function() {
chat_with_retrieval <- function() {
indices <- list_index()
if (length(indices) == 0) {
cli::cli_abort(
Expand Down
46 changes: 45 additions & 1 deletion R/addin_select_pkgs.R → R/addin_scrape_pkgs.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,50 @@ addin_run_select_pkgs <- function() {
run_select_pkgs_app()
}

#' Addin to scrape installed packages
#'
#' Invokes RStudio addin functionality to scrape select installed packages and
#' create indices for use in the "Chat with Retrieval" application.
#'
#' @export
#' @return No return value, called for side effects only.
#'
#' @examplesIf rlang::is_interactive()
#' # This function is typically run within RStudio as an Addin.
#' # It would not be called directly in the code.
#' addin_scrape_pkgs()
#'
#' @note This addin requires RStudio to be available and will stop with an
#' error message if RStudio API is not accessible.
#'
addin_run_scrape_pkgs <- function() {
# Check if RStudio API is available
if (!rstudioapi::isAvailable()) {
cli::cli_abort("The rstudioapi is not available.")
}
# Get user feedback with rstudioapi
proceed <-
rstudioapi::showQuestion(
title = "Scrape Packages",
message = "This will scrape installed packages and create indices to use
with the \"Chat with Retrieval\" app. Would you like to proceed?"
)

# Proceed with scraping if the user agrees
if (proceed) {
cli::cli_alert_info("Scraping packages as a background job.")
# Run the scrape packages script as a background job
rstudioapi::jobRunScript(
path = system.file("scripts/scrape_pkgs.R",
package = "gpttools"
),
name = "Scraping Pacakges"
)
} else {
cli::cli_alert_info("Scraping cancelled.")
}
}

#' Run a Shiny App to Select and Save Installed Packages
#'
#' This function launches a Shiny application that allows users to select from a
Expand Down Expand Up @@ -65,7 +109,7 @@ run_select_pkgs_app <- function() {
{
installed_packages |> dplyr::filter(Package %in% input$selected_pkg)
},
options = list(pageLength = 10)
# options = list(pageLength = 5)
)
}

Expand Down
2 changes: 1 addition & 1 deletion R/config.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
save_user_config <- function(service = "openai",
model = "gpt-4-1106-preview",
task = "Permissive Chat",
embeddings = "local",
embeddings = TRUE,
k_context = 4,
k_history = 4,
save_history = TRUE,
Expand Down
29 changes: 28 additions & 1 deletion R/embedding-py.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,34 @@
# }
# nolint end

# uses transformers instead of sentence transformers
#' Get Transformer Model
#'
#' This function is designed to download and load a pre-trained transformer
#' model using the transformers Python library via the reticulate package.
#' It checks for the availability of the required Python package and then
#' downloads the specified transformer model.
#'
#' @param model_name The name of the transformer model to download. This should
#' be in the format "username/modelname" as recognized by the transformers
#' library. Default is "jinaai/jina-embeddings-v2-base-en".
#'
#' @return An object of the downloaded transformer model.
#'
#' @export
#'
#' @note Users of this function need to ensure that the Python environment
#' is set up with the 'transformers' package installed. The function uses
#' the 'reticulate' R package to interface with Python and the user may need
#' to configure it accordingly.
#'
#' @examples
#' \dontrun{
#' # To get the default transformer model:
#' get_transformer_model()
#'
#' # To get a custom transformer model by specifying the model name:
#' get_transformer_model("bert-base-uncased")
#' }
get_transformer_model <-
function(model_name = "jinaai/jina-embeddings-v2-base-en") {
py_pkg_is_available("transformers")
Expand Down
72 changes: 46 additions & 26 deletions R/history.R
Original file line number Diff line number Diff line change
Expand Up @@ -174,20 +174,16 @@ check_context <- function(context) {
#' @param overwrite Whether to overwrite the history file or not. Default is
#' FALSE.
#' @param local Whether to use the local model or not. Default is FALSE.
#' @param embedding_model A model object to use for embedding. Only needed if
#' local is TRUE. Default is NULL.
#'
#' @return A list containing the prompt, context, and answer.
#' @export
#'
#' @examples
#' \dontrun{
#' # Define a query and context
#' @examplesIf rlang::is_interactive()
#' rlang::is_interactive()
#' query <- "What is the capital of France?"
#' context <- "France is a country in Western Europe. Its capital is a famous
#' city known for its culture, art, and history."
#'
#' # Call the chat_with_context function
#' result <- chat_with_context(query = query, context = context)
#' }
chat_with_context <- function(query,
service = "openai",
model = "gpt-4",
Expand All @@ -202,14 +198,15 @@ chat_with_context <- function(query,
k_history = 4,
save_history = TRUE,
overwrite = FALSE,
local = FALSE) {
local = FALSE,
embedding_model = NULL) {
arg_match(task, c("Context Only", "Permissive Chat"))

if (local) {
embedding_model <- get_transformer_model()
} else {
embedding_model <- NULL
}
need_context <- is_context_needed(
user_prompt = query,
service = service,
model = model
)

if (rlang::is_true(add_context) || rlang::is_true(add_history)) {
query_embedding <- get_query_embedding(query,
Expand All @@ -218,7 +215,7 @@ chat_with_context <- function(query,
)
}

if (rlang::is_true(add_context)) {
if (rlang::is_true(add_context) && rlang::is_true(need_context)) {
full_context <-
get_query_context(
query_embedding,
Expand All @@ -229,10 +226,11 @@ chat_with_context <- function(query,
dplyr::pull("chunks") |>
paste(collapse = "\n\n")
} else {
full_context <- "No context provided."
context <- "No additional context provided."
}

if (add_history) {
if (rlang::is_true(add_history) & rlang::is_true(need_context)) {
cli::cli_inform("Attempting to add chat history to query.")
cli::cli_inform("Chat history: {class(chat_history)}")
if (rlang::is_null(chat_history)) {
Expand Down Expand Up @@ -261,11 +259,11 @@ chat_with_context <- function(query,
role = "system",
content =
glue(
"You are a helpful chat bot that answers questions based on ",
"the context provided by the user. If the user does not ",
"provide related context, say \"I am not able to answer that ",
"question. Maybe try rephrasing your question in a different ",
"way.\""
"You are a helpful chat bot that answers questions based on
the context provided by the user. If the user does not
provide related context and you need context to respond
accurately, say \"I am not able to answer that question.
Maybe try rephrasing your question in a different way.\""
)
)
),
Expand All @@ -275,11 +273,12 @@ chat_with_context <- function(query,
role = "system",
content =
glue(
"You are a helpful chat bot that answers questions based on ",
"on the context provided by the user. If the user does not ",
"provide context, answer the quest but first say \"I am not ",
"able to answer that question with the context you gave me, ",
"but here is my best answer.",
"You are a helpful chat bot that answers questions based on
on the context provided by the user. If the user does not
provide context and you need context to respond correctly,
answer the quest but first say \"I am not able to answer
that question with the context you gave me, but here is my
best but here is my best answer."
)
)
)
Expand Down Expand Up @@ -370,3 +369,24 @@ chat_with_context <- function(query,

list(prompt_without_context, full_context, answer$response)
}


is_context_needed <- function(user_prompt,
service = getOption("gpttools.service"),
model = getOption("gpttools.model")) {
prompt <-
glue::glue("Would additional context or history be helpful to respond to
this prompt from the user. If yes, answer TRUE. If no, answer
FALSE. ONLY answer TRUE or FALSE. It is crucial that you only
answer TRUE or FALSE.\n\n{user_prompt}")

gptstudio:::gptstudio_create_skeleton(
service = service,
model = model,
prompt = prompt,
stream = FALSE
) |>
gptstudio:::gptstudio_request_perform() |>
purrr::pluck("response") |>
as.logical()
}
33 changes: 31 additions & 2 deletions R/site-index.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,25 @@ get_pkgs_to_scrape <- function(local = TRUE,
dplyr::rename(version = installed_version)
}

#' Scrape packaging sites
#'
#' @details This function scrapes the websites for the packages specified in the
#' `sites` dataframe. If `sites` is empty, it alerts the user with no packages
#' to scrape and returns `NULL` invisibly. If the user confirms to proceed, it
#' scrapes each package site using the supplied details.
#'
#'
#' @param sites A data frame containing the package sites to be scraped. If not
#' provided, it defaults to `get_pkgs_to_scrape(local = TRUE)`.
#' @param service The service to be used for scraping, defaults to "local".
#' @param index_create Logical indicating whether to create an index, defaults
#' to `TRUE`.
#' @param overwrite Logical indicating whether to overwrite existing content,
#' defaults to `TRUE`.
#' @return Invisible `NULL`. The function is called for its side effects.
#' @examplesIf rlang::is_interactive()
#' scrape_pkg_sites()
#' @export
scrape_pkg_sites <- function(sites = get_pkgs_to_scrape(local = TRUE),
service = "local",
index_create = TRUE,
Expand All @@ -76,12 +95,22 @@ scrape_pkg_sites <- function(sites = get_pkgs_to_scrape(local = TRUE),
return(invisible())
}

cli::cli_text("You are about to scrape {nrow(sites)} package site page{?s}")
usethis::ui_yeah("Do you want to continue?")
if (rlang::is_interactive()) {
cli::cli_text("You are about to scrape {nrow(sites)} package site page{?s}")
continue <- usethis::ui_yeah("Do you want to continue?")
} else {
continue <- TRUE
}

if (!continue) {
cli_alert_info("Scraping aborted.")
return(invisible())
}

sites |>
dplyr::select(url, version, name) |>
purrr::pmap(.f = \(url, version, name) {
# Helper function `crawl` is assumed to be defined elsewhere
crawl(
url = url,
index_create = index_create,
Expand Down
Loading

0 comments on commit 0304daf

Please sign in to comment.