diff --git a/browser/about_flags.cc b/browser/about_flags.cc index 3467c11bd9dc..68b675bfa56c 100644 --- a/browser/about_flags.cc +++ b/browser/about_flags.cc @@ -356,51 +356,58 @@ #define BRAVE_MIDDLE_CLICK_AUTOSCROLL_FEATURE_ENTRY #endif -#define BRAVE_AI_CHAT_FEATURE_ENTRIES \ - EXPAND_FEATURE_ENTRIES( \ - { \ - "brave-ai-chat", \ - "Brave AI Chat", \ - "Summarize articles and engage in conversation with AI", \ - kOsWin | kOsMac | kOsLinux | kOsAndroid, \ - FEATURE_VALUE_TYPE(ai_chat::features::kAIChat), \ - }, \ - { \ - "brave-ai-chat-history", \ - "Brave AI Chat History", \ - "Enables AI Chat History persistence and management", \ - kOsWin | kOsMac | kOsLinux, \ - FEATURE_VALUE_TYPE(ai_chat::features::kAIChatHistory), \ - }, \ - { \ - "brave-ai-chat-context-menu-rewrite-in-place", \ - "Brave AI Chat Rewrite In Place From Context Menu", \ - "Enables AI Chat rewrite in place feature from the context menu", \ - kOsDesktop, \ - FEATURE_VALUE_TYPE(ai_chat::features::kContextMenuRewriteInPlace), \ - }, \ - { \ - "brave-ai-chat-page-content-refine", \ - "Brave AI Chat Page Content Refine", \ - "Enable local text embedding for long page content in order to " \ - "find " \ - "most relevant parts to the prompt within context limit.", \ - kOsDesktop | kOsAndroid, \ - FEATURE_VALUE_TYPE(ai_chat::features::kPageContentRefine), \ - }, \ - { \ - "brave-ai-chat-allow-private-ips", \ - "Private IP Addresses for Custom Model Endpoints", \ - "Permits the use of private IP addresses as model endpoint URLs", \ - kOsWin | kOsMac | kOsLinux | kOsAndroid, \ - FEATURE_VALUE_TYPE(ai_chat::features::kAllowPrivateIPs), \ - }, \ - { \ - "brave-ai-chat-open-leo-from-brave-search", \ - "Open Leo AI Chat from Brave Search", \ - "Enables opening Leo AI Chat from Brave Search", \ - kOsDesktop | kOsAndroid, \ - FEATURE_VALUE_TYPE(ai_chat::features::kOpenAIChatFromBraveSearch), \ +#define BRAVE_AI_CHAT_FEATURE_ENTRIES \ + EXPAND_FEATURE_ENTRIES( \ + { \ + "brave-ai-chat", \ + "Brave AI Chat", \ + "Summarize articles and engage in conversation with AI", \ + kOsWin | kOsMac | kOsLinux | kOsAndroid, \ + FEATURE_VALUE_TYPE(ai_chat::features::kAIChat), \ + }, \ + { \ + "brave-ai-chat-history", \ + "Brave AI Chat History", \ + "Enables AI Chat History persistence and management", \ + kOsWin | kOsMac | kOsLinux, \ + FEATURE_VALUE_TYPE(ai_chat::features::kAIChatHistory), \ + }, \ + { \ + "brave-ai-host-specific-distillation", \ + "Brave AI Host-Specific Distillation", \ + "Enables support for host-specific distillation scripts", \ + kOsWin | kOsMac | kOsLinux, \ + FEATURE_VALUE_TYPE(ai_chat::features::kCustomSiteDistillerScripts), \ + }, \ + { \ + "brave-ai-chat-context-menu-rewrite-in-place", \ + "Brave AI Chat Rewrite In Place From Context Menu", \ + "Enables AI Chat rewrite in place feature from the context menu", \ + kOsDesktop, \ + FEATURE_VALUE_TYPE(ai_chat::features::kContextMenuRewriteInPlace), \ + }, \ + { \ + "brave-ai-chat-page-content-refine", \ + "Brave AI Chat Page Content Refine", \ + "Enable local text embedding for long page content in order to " \ + "find " \ + "most relevant parts to the prompt within context limit.", \ + kOsDesktop | kOsAndroid, \ + FEATURE_VALUE_TYPE(ai_chat::features::kPageContentRefine), \ + }, \ + { \ + "brave-ai-chat-allow-private-ips", \ + "Private IP Addresses for Custom Model Endpoints", \ + "Permits the use of private IP addresses as model endpoint URLs", \ + kOsWin | kOsMac | kOsLinux | kOsAndroid, \ + FEATURE_VALUE_TYPE(ai_chat::features::kAllowPrivateIPs), \ + }, \ + { \ + "brave-ai-chat-open-leo-from-brave-search", \ + "Open Leo AI Chat from Brave Search", \ + "Enables opening Leo AI Chat from Brave Search", \ + kOsDesktop | kOsAndroid, \ + FEATURE_VALUE_TYPE(ai_chat::features::kOpenAIChatFromBraveSearch), \ }) #if BUILDFLAG(ENABLE_AI_REWRITER) diff --git a/browser/ui/BUILD.gn b/browser/ui/BUILD.gn index cd400c78f3d8..a0a6dbbbf851 100644 --- a/browser/ui/BUILD.gn +++ b/browser/ui/BUILD.gn @@ -769,6 +769,7 @@ source_set("ui") { "//brave/components/ai_chat/core/browser", "//brave/components/ai_chat/core/common", "//brave/components/ai_chat/core/common/mojom", + "//brave/components/ai_chat/resources/custom_site_distiller_scripts:generated_resources", "//brave/components/ai_chat/resources/page:generated_resources", "//brave/components/ai_rewriter/common/buildflags", "//brave/components/brave_adaptive_captcha", diff --git a/components/ai_chat/core/common/features.cc b/components/ai_chat/core/common/features.cc index 9004ee11ac5e..1b82c2f8fb30 100644 --- a/components/ai_chat/core/common/features.cc +++ b/components/ai_chat/core/common/features.cc @@ -41,6 +41,14 @@ bool IsAIChatHistoryEnabled() { return base::FeatureList::IsEnabled(features::kAIChatHistory); } +BASE_FEATURE(kCustomSiteDistillerScripts, + "CustomSiteDistillerScripts", + base::FEATURE_DISABLED_BY_DEFAULT); + +bool IsCustomSiteDistillerScriptsEnabled() { + return base::FeatureList::IsEnabled(features::kCustomSiteDistillerScripts); +} + BASE_FEATURE(kContextMenuRewriteInPlace, "AIChatContextMenuRewriteInPlace", base::FEATURE_ENABLED_BY_DEFAULT); diff --git a/components/ai_chat/core/common/features.h b/components/ai_chat/core/common/features.h index f98e2c8f0015..d77ace4194ce 100644 --- a/components/ai_chat/core/common/features.h +++ b/components/ai_chat/core/common/features.h @@ -38,6 +38,10 @@ COMPONENT_EXPORT(AI_CHAT_COMMON) BASE_DECLARE_FEATURE(kAIChatHistory); COMPONENT_EXPORT(AI_CHAT_COMMON) bool IsAIChatHistoryEnabled(); +COMPONENT_EXPORT(AI_CHAT_COMMON) +BASE_DECLARE_FEATURE(kCustomSiteDistillerScripts); +COMPONENT_EXPORT(AI_CHAT_COMMON) bool IsCustomSiteDistillerScriptsEnabled(); + COMPONENT_EXPORT(AI_CHAT_COMMON) BASE_DECLARE_FEATURE(kContextMenuRewriteInPlace); COMPONENT_EXPORT(AI_CHAT_COMMON) bool IsContextMenuRewriteInPlaceEnabled(); diff --git a/components/ai_chat/renderer/BUILD.gn b/components/ai_chat/renderer/BUILD.gn index 153021f4b1a7..2739e5afa3a4 100644 --- a/components/ai_chat/renderer/BUILD.gn +++ b/components/ai_chat/renderer/BUILD.gn @@ -20,6 +20,7 @@ static_library("renderer") { "//base", "//brave/components/ai_chat/core/common", "//brave/components/ai_chat/core/common/mojom", + "//brave/components/ai_chat/resources/custom_site_distiller_scripts:generated_resources", "//content/public/renderer", "//gin", "//mojo/public/cpp/bindings", diff --git a/components/ai_chat/renderer/page_content_extractor.cc b/components/ai_chat/renderer/page_content_extractor.cc index 1644a96d8cb1..69fa6ec20b7e 100644 --- a/components/ai_chat/renderer/page_content_extractor.cc +++ b/components/ai_chat/renderer/page_content_extractor.cc @@ -221,7 +221,7 @@ void PageContentExtractor::ExtractPageContent( VLOG(1) << "Text transcript type"; // Do text extraction DistillPageText( - render_frame(), isolated_world_id_, + render_frame(), global_world_id_, isolated_world_id_, base::BindOnce(&PageContentExtractor::OnDistillResult, weak_ptr_factory_.GetWeakPtr(), std::move(callback))); } diff --git a/components/ai_chat/renderer/page_text_distilling.cc b/components/ai_chat/renderer/page_text_distilling.cc index 6ae6b46bc8bb..16df470f7444 100644 --- a/components/ai_chat/renderer/page_text_distilling.cc +++ b/components/ai_chat/renderer/page_text_distilling.cc @@ -6,6 +6,7 @@ #include "brave/components/ai_chat/renderer/page_text_distilling.h" #include +#include #include #include #include @@ -17,6 +18,7 @@ #include "base/compiler_specific.h" #include "base/containers/contains.h" +#include "base/containers/fixed_flat_map.h" #include "base/containers/span.h" #include "base/functional/bind.h" #include "base/functional/callback.h" @@ -27,7 +29,10 @@ #include "base/strings/utf_string_conversions.h" #include "base/time/time.h" #include "base/values.h" +#include "brave/components/ai_chat/core/common/features.h" +#include "brave/components/ai_chat/resources/custom_site_distiller_scripts/grit/custom_site_distiller_scripts_generated.h" #include "content/public/renderer/render_frame.h" +#include "net/base/registry_controlled_domains/registry_controlled_domain.h" #include "third_party/blink/public/mojom/script/script_evaluation_params.mojom-shared.h" #include "third_party/blink/public/platform/web_string.h" #include "third_party/blink/public/web/web_document.h" @@ -39,6 +44,7 @@ #include "ui/accessibility/ax_node_data.h" #include "ui/accessibility/ax_tree.h" #include "ui/accessibility/ax_tree_update.h" +#include "ui/base/resource/resource_bundle.h" namespace ai_chat { @@ -139,8 +145,24 @@ void AddTextNodesToVector(const ui::AXNode* node, void DistillPageText( content::RenderFrame* render_frame, + int32_t global_world_id, int32_t isolated_world_id, base::OnceCallback&)> callback) { + if (ai_chat::features::IsCustomSiteDistillerScriptsEnabled()) { + std::string host = + render_frame->GetWebFrame()->GetSecurityOrigin().Host().Utf8(); + std::optional> site_script = + LoadSiteScriptForHost(host); + + if (site_script.has_value()) { + int32_t world_id = + site_script->second ? global_world_id : isolated_world_id; + DistillPageTextViaSiteScript(render_frame, site_script->first, world_id, + std::move(callback)); + return; + } + } + auto snapshotter = render_frame->CreateAXTreeSnapshotter( ui::AXMode::kWebContents | ui::AXMode::kHTML | ui::AXMode::kScreenReader); ui::AXTreeUpdate snapshot; @@ -204,4 +226,77 @@ void DistillPageText( std::move(callback).Run(contents_text); } +void DistillPageTextViaSiteScript( + content::RenderFrame* render_frame, + std::string_view script_content, + int32_t world_id, + base::OnceCallback&)> callback) { + CHECK(ai_chat::features::IsCustomSiteDistillerScriptsEnabled()); + // TODO (jonathansampson): Wrap scripts at build/transpile-time instead + // This produces an injected script that resembles the following: + // (() => { + // function distillPrimaryColumn (level) { ... } + // function distill(level) { + // return distillPrimaryColumn(level); + // } + // return distill(3); + // })()) + std::string script = absl::StrFormat( + R"((()=> { + %s + return distill(3); + })())", + script_content); + + blink::WebScriptSource source = + blink::WebScriptSource(blink::WebString::FromUTF8(script)); + + auto on_script_executed = + [](base::OnceCallback&)> callback, + std::optional value, base::TimeTicks start_time) { + if (value && value->is_string() && !value->GetString().empty()) { + std::move(callback).Run(value->GetString()); + } else { + std::move(callback).Run({}); + } + }; + + // Execute the combined script as a single source + render_frame->GetWebFrame()->RequestExecuteScript( + world_id, base::span_from_ref(source), + blink::mojom::UserActivationOption::kDoNotActivate, + blink::mojom::EvaluationTiming::kAsynchronous, + blink::mojom::LoadEventBlockingOption::kDoNotBlock, + base::BindOnce(on_script_executed, std::move(callback)), + blink::BackForwardCacheAware::kAllow, + blink::mojom::WantResultOption::kWantResult, + // Because we are using a promise to resolve the result, we will use the + // `kAwait` option to ensure the promise is resolved before the callback + // is invoked. + blink::mojom::PromiseResultOption::kAwait); +} + +std::optional> LoadSiteScriptForHost( + std::string_view host) { + static constexpr auto kHostToScriptResource = + base::MakeFixedFlatMap>({ + {"github.com", + {IDR_CUSTOM_SITE_DISTILLER_SCRIPTS_GITHUB_COM_BUNDLE_JS, false}}, + {"x.com", {IDR_CUSTOM_SITE_DISTILLER_SCRIPTS_X_COM_BUNDLE_JS, true}}, + }); + + auto it = kHostToScriptResource.find( + net::registry_controlled_domains::GetDomainAndRegistry( + host, net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES)); + + if (it == kHostToScriptResource.end()) { + return std::nullopt; + } + + return std::make_optional(std::make_pair( + ui::ResourceBundle::GetSharedInstance().LoadDataResourceString( + it->second.first), + it->second.second)); +} + } // namespace ai_chat diff --git a/components/ai_chat/renderer/page_text_distilling.h b/components/ai_chat/renderer/page_text_distilling.h index 78864ecddd72..7ca56de3c66a 100644 --- a/components/ai_chat/renderer/page_text_distilling.h +++ b/components/ai_chat/renderer/page_text_distilling.h @@ -9,8 +9,10 @@ #include #include #include +#include #include "base/functional/callback_forward.h" +#include "url/gurl.h" namespace content { class RenderFrame; @@ -18,11 +20,27 @@ class RenderFrame; namespace ai_chat { +// Distills the text content of a page. If possible, it will use a custom site +// distiller script. Otherwise, it will fall back to a more general approach. void DistillPageText( content::RenderFrame* render_frame, + int32_t global_world_id, int32_t isolated_world_id, base::OnceCallback&)>); +// Attempts to retrieve a a custom site distiller script for the given host. +// Returns a pair consisting of the script content, and a boolean indicating if +// it is intended for the main world or not +std::optional> LoadSiteScriptForHost( + std::string_view host); + +// Attempts to distill a page based on the retrieval of a host-specific script. +void DistillPageTextViaSiteScript( + content::RenderFrame* render_frame, + std::string_view script_content, + int32_t world_id, + base::OnceCallback&)>); + } // namespace ai_chat #endif // BRAVE_COMPONENTS_AI_CHAT_RENDERER_PAGE_TEXT_DISTILLING_H_ diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/BUILD.gn b/components/ai_chat/resources/custom_site_distiller_scripts/BUILD.gn new file mode 100644 index 000000000000..c52826853e72 --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/BUILD.gn @@ -0,0 +1,29 @@ +# Copyright (c) 2024 The Brave Authors. All rights reserved. +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at https://mozilla.org/MPL/2.0/. + +import("//brave/components/common/typescript.gni") +import("//tools/grit/repack.gni") + +transpile_web_ui("custom_site_distiller_scripts") { + resource_name = "custom_site_distiller_scripts" + visibility = [ ":*" ] + entry_points = [ + [ + "x_com", + rebase_path("scripts/x.com/index.ts"), + ], + [ + "github_com", + rebase_path("scripts/github.com/index.ts"), + ], + ] + output_module = true +} + +pack_web_resources("generated_resources") { + resource_name = "custom_site_distiller_scripts" + output_dir = "$root_gen_dir/brave/components/ai_chat/resources/custom_site_distiller_scripts" + deps = [ ":custom_site_distiller_scripts" ] +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/distillation.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/distillation.ts new file mode 100644 index 000000000000..0352273479e9 --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/distillation.ts @@ -0,0 +1,11 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +export enum LEO_DISTILLATION_LEVEL { + LOW = 0, + MEDIUM = 1, + HIGH = 2, + FULL = 3 +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/github.com/branches.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/github.com/branches.ts new file mode 100644 index 000000000000..920604045adc --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/github.com/branches.ts @@ -0,0 +1,119 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +export function distillBranches() { + const tables = document.querySelectorAll('table') + const output = [...tables].map(distillBranchesTable) + return output.join('\n') +} + +function distillBranchesTable(table: HTMLTableElement) { + const pageURL = document.location.toString() + const pageTitle = document.title + const tableTitle = getBranchesTableTitle(table) + + // Extract headers from the first row + const headers = [...table.rows[0].cells].map((cell) => { + return cell.innerText.trim() + }) + + // Define custom processing for specific headers + const cellProcessors = { + 'updated': (cell: HTMLTableCellElement) => ({ + author: cell.querySelector('a')?.getAttribute('href'), + date: cell.querySelector('relative-time')?.getAttribute('title') + }), + 'pull request': (cell: HTMLTableCellElement) => { + const link = cell.querySelector('a') + const href = link?.getAttribute('href') + // We can infer the state from the presence and type of icon + const state = link + ?.querySelector('[data-testid$="-pull-request-icon"]') + ?.dataset.testid?.split('-')[0] + if (href && state) { + return { href, state } + } + return null + }, + 'action menu': null // Skip this cell + } as Record + + const data = [] as Array> + + // Iterate over each row, starting from the second one + for (let i = 1; i < table.rows.length; i++) { + const row = table.rows[i] + const rowData = {} as Record + + // Iterate over each cell in the row + for (let j = 0; j < row.cells.length; j++) { + const cell = row.cells[j] + const label = headers[j] + const loweredLabel = label.toLowerCase() + + // Check if there's a custom processor for the cell + const processor = cellProcessors[loweredLabel] + const trimmedText = cell.innerText.trim() + + if (typeof processor === 'function') { + const result = processor(cell) + if (result !== null) { + rowData[label] = result + } + } else if (processor === null || trimmedText === '') { + continue + } else { + rowData[label] = trimmedText + } + } + + data.push(rowData) + } + + return [ + `Current time: ${new Date().toLocaleString()}`, + `Current URL: ${pageURL}`, + `# Page Title: ${pageTitle}`, + `## Data Title: ${tableTitle}`, + ...rDistillObject(data) + ].join('\n') +} + +function rDistillObject(obj: any) { + /** + * Recursively distill an object into a list (maintaining hierarchy): + * - Prop A: Value of Prop A + * - Subprop A: Value of Subprop A + * - Subprop B: Value of Subprop B + * - Prop B: Value of Prop B + * - Prop C: Value of Prop C + */ + const output = [] as string[] + + for (const [k, v] of Object.entries(obj)) { + if (typeof v === 'object') { + output.push(`- ${k}:`) + output.push(...rDistillObject(v).map((line) => ` ${line}`)) + } else if (typeof v === 'string') { + output.push(`- ${k.replace('\n', '/')}: ${v.replace('\n', '/')}`) + } + } + + return output +} + +function getBranchesTableTitle(table: HTMLTableElement) { + const holderSelector = ':has(.TableTitle), :has(.TabNav-item.selected' + const titleSelector = '.TableTitle, .TabNav-item.selected' + + const titleHolder = table.closest(holderSelector) + const tableTitle = titleHolder?.querySelector(titleSelector)?.textContent + + if (tableTitle) { + return tableTitle.trim() + } + + return table.getAttribute('aria-labelledby') +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/github.com/distiller.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/github.com/distiller.ts new file mode 100644 index 000000000000..db605aa10668 --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/github.com/distiller.ts @@ -0,0 +1,25 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import { LEO_DISTILLATION_LEVEL } from '../distillation' +import { distillBranches } from './branches' +import { GetPageType, SupportedPage } from './utils' + +let _DISTILLATION_LEVEL = LEO_DISTILLATION_LEVEL.LOW + +export function getDistillationLevel() { + return _DISTILLATION_LEVEL +} + +export default function distill(level: LEO_DISTILLATION_LEVEL) { + _DISTILLATION_LEVEL = level + + switch (GetPageType(document)) { + case SupportedPage.BRANCHES: + return distillBranches() + default: + return null + } +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/github.com/index.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/github.com/index.ts new file mode 100644 index 000000000000..7c481b9118d2 --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/github.com/index.ts @@ -0,0 +1,8 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import distill from './distiller' + +export { distill } diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/github.com/utils.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/github.com/utils.ts new file mode 100644 index 000000000000..f0396ecef806 --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/github.com/utils.ts @@ -0,0 +1,59 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +export enum SupportedPage { + ORG = 'org', + REPO = 'repo', + PULL_REQUESTS = 'pull_requests', + PULL_REQUEST = 'pull_request', + BRANCHES = 'branches', + PROFILE = 'profile' +} + +export function GetPageType(document: Document): SupportedPage | null { + if (isOrgPage(document)) { + return SupportedPage.ORG + } else if (isRepoPage(document)) { + return SupportedPage.REPO + } else if (isPullRequestsPage(document)) { + return SupportedPage.PULL_REQUESTS + } else if (isPullRequestPage(document)) { + return SupportedPage.PULL_REQUEST + } else if (isBranchesPage(document)) { + return SupportedPage.BRANCHES + } else if (isProfilePage(document)) { + return SupportedPage.PROFILE + } + + return null +} + +function isOrgPage(document: Document) { + const key = 'itemtype' + const val = 'Organization' + return document.querySelector(`[${key}]`)?.getAttribute(key)?.endsWith(val) +} + +function isRepoPage(document: Document) { + return location.pathname.split('/').filter(Boolean).length === 2 +} + +function isPullRequestsPage(document: Document) { + const pathParts = location.pathname.split('/').filter(Boolean) + return pathParts.length === 3 && pathParts[2] === 'pulls' +} + +function isPullRequestPage(document: Document) { + const pathParts = location.pathname.split('/').filter(Boolean) + return pathParts.length === 4 && /\d+/.test(pathParts[3]) +} + +function isBranchesPage(document: Document) { + return /\/branches(\/(active|stale|all))?\/?$/.test(location.pathname) +} + +function isProfilePage(document: Document) { + return document.body.matches('.page-profile') +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/tsconfig.json b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/tsconfig.json new file mode 100644 index 000000000000..868d3fa89936 --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../../../tsconfig", + "include": [ + "**/*.ts", + "**/*.tsx", + "**/*.d.ts", + "../../definitions/*.d.ts" + ] +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/cards.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/cards.ts new file mode 100644 index 000000000000..1e0ce415486e --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/cards.ts @@ -0,0 +1,140 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import { store } from './data' +import { getUserSignature } from './user' + +export function distillCard(post: any) { + const card = store.access('cards', post.card) + if (card) { + const title = card.binding_values.title + const cardURL = card.binding_values.card_url + const unifiedCard = card.binding_values.unified_card + const host = card.binding_values.domain + const desc = card.binding_values.description + + if (card.name.endsWith('audiospace')) { + /* Not Implemented */ + return null + } else if (card.name === 'unified_card' && unifiedCard) { + return distillUnifiedCard(unifiedCard) + } + + return [ + 'Resource:', + title ? ` - Title: "${title.string_value}"` : null, + host && cardURL + ? ` - Address: [${host.string_value}](${cardURL.string_value})` + : null, + desc ? ` - Description: "${desc.string_value}"` : null + ] + .filter(Boolean) + .join('\n') + } + return null +} + +function distillUnifiedCard(cardString: any) { + const card = JSON.parse(cardString.string_value) + return distillUnifiedCardComponents(card) +} + +function distillUnifiedCardComponents(card: any) { + return card.components + .map((key: string) => { + const component = card.component_objects[key] + + switch (component.type) { + case 'media': + return distillUnifiedCardMedia(component, card) + case 'grok_share': + return distillUnifiedCardGrokShare(component, card) + case 'community_details': + return distillUnifiedCardCommunityDetails(component, card) + default: + return null + } + }) + .filter(Boolean) + .join('\n') +} + +function distillUnifiedCardMedia(component: any, card: any) { + const { + data: { id } + } = component + const { + type, + original_info: { width, height } + } = card.media_entities[id] + + if (type !== 'photo') { + /** + * TODO (Sampson): Handle other media types + */ + console.warn(`Unhandled media type: ${type}`, card.media_entities[id]) + } + return ['Media:', ` - Type: ${type}`, ` - Dimensions: ${width}x${height}`] + .filter(Boolean) + .join('\n') +} + +type ConvoEntry = { + sender: 'USER' | 'AGENT' + grokMode: 'NORMAL' | 'FUN' + message: string +} + +function distillUnifiedCardGrokShare(component: any, card: any) { + const conversationPreview = component.data.conversation_preview + const destination = component.data.destination + const grokUser = component.data.grok_user + const profileUser = component.data.profile_user + const urlData = card.destination_objects[destination].data.url_data + const grokSig = getUserSignature(grokUser) + const userSig = getUserSignature(profileUser) + + const conversation = conversationPreview.map((entry: ConvoEntry) => { + const message = entry.message + const signature = entry.sender === 'USER' ? userSig : grokSig + return `${signature}: ${message}\n` + }) + + conversation.push(`URL: ${urlData.url}`) + + return conversation.filter(Boolean).join('\n') +} + +function distillUnifiedCardCommunityDetails(component: any, card: any) { + const { + data: { + destination, + member_count: memberCount, + members_facepile: membersFacepile, + name + } + } = component + + const { + data: { url_data: urlData } + } = card.destination_objects[destination] + + const facepileSignatures = membersFacepile + .map((id: string) => { + return getUserSignature(card('users')[id]) || null + }) + .filter(Boolean) + + const count = memberCount - facepileSignatures.length + + return [ + 'Community:', + ` - Name: ${name}`, + ` - Members: ${facepileSignatures.join(', ')}, and ${count} others`, + ` - URL: ${urlData.url}` + ] + .filter(Boolean) + .join('\n') +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/data.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/data.ts new file mode 100644 index 000000000000..f321b6c84950 --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/data.ts @@ -0,0 +1,41 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +class XStateStore { + private snapshot: Record | null + + constructor() { + const reactRoot = document.querySelector('#react-root') + const storeHost = reactRoot?.firstElementChild?.firstElementChild + if (storeHost instanceof HTMLElement) { + const xProps = getXProps(storeHost) + const reduxStore = xProps?.children?.props?.store ?? {} + this.snapshot = + reduxStore.getState instanceof Function ? reduxStore.getState() : null + } + } + + access(type: string, id: string) { + type = type === 'notifications' ? 'genericNotifications' : type + return this.snapshot?.entities?.[type]?.entities?.[id] ?? null + } +} + +export const store = new XStateStore() + +/** + * Retrieves React-specific properties from an element by + * finding the first property starting with "__reactProps" + */ +export function getXProps(element: HTMLElement | null): any { + if (element instanceof HTMLElement) { + for (const property in element) { + if (property.startsWith('__reactProps')) { + return element[property as keyof typeof element] + } + } + } + return null +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/distiller.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/distiller.ts new file mode 100644 index 000000000000..cdb6720e1852 --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/distiller.ts @@ -0,0 +1,74 @@ +// Copyright (c) 2024 The Brave Authors. All rights reserved. +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +import selectors from './selectors' +import { isSupportedPage } from './utils' +import { distillNotificationElement } from './notifications' +import { distillPostElement } from './post' +import { distillSeenUsers } from './user' +import { LEO_DISTILLATION_LEVEL } from '../distillation' + +let _DISTILLATION_LEVEL = LEO_DISTILLATION_LEVEL.LOW + +export function getDistillationLevel() { + return _DISTILLATION_LEVEL +} + +export default function distill(distillLevel = LEO_DISTILLATION_LEVEL.LOW) { + if (!isSupportedPage(document)) { + return null + } + + _DISTILLATION_LEVEL = distillLevel + + const column = distillPrimaryColumn() + const seenUsers = distillSeenUsers(distillLevel) + + /** + * Include helpful metadata about users seen on the page, but in a way that + * it isn't confused with the original content. + */ + return ( + `${seenUsers}\n\nNote: The above user information is supplemental ` + + `metadata and not part of the original page content.\n\n--- Page ` + + `Content Below ---\n\n${column}` + ) +} + +const config = { + 'tweet': { + 'selector': selectors.tweet, + 'distiller': distillPostElement + }, + 'notification': { + 'selector': selectors.notification, + 'distiller': distillNotificationElement + } +} as Record + +/** + * Extracts and processes all items (like tweets and + * notifications) from the primary column of the page, using + * configuration to determine which elements to distill. + * TODO (Sampson): Add support for other primary column items, + * such as "show more" buttons, "this post is from a suspended + * account", tends, and more. + */ +function distillPrimaryColumn() { + const primaryColumn = document.querySelector(selectors.primaryColumn) + const selectorList = Object.values(config) + .map((item) => item.selector) + .join(', ') + const timelineItems = primaryColumn?.querySelectorAll(selectorList) ?? [] + + return Array.from(timelineItems) + .map((item) => { + const type = item.getAttribute('data-testid') + const { distiller } = type && config[type] + return distiller && distiller(item) + }) + .filter(Boolean) + .join('\n\n---\n\n') +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/entities.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/entities.ts new file mode 100644 index 000000000000..0569798f584b --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/entities.ts @@ -0,0 +1,92 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import { msToMinutesAndSeconds } from './utils' + +/** + * In this case, "Attachments" refers to any of the media + * entities that are associated with the post. + * TODO (Sampson): Consider combining "cards" into this + * category as well. + */ +export function distillPostMediaEntities(post: any) { + const media = post.extended_entities.media + .map((media: any) => { + switch (media.type) { + case 'photo': + case 'video': + case 'animated_gif': + return distillPostMediaEntity(media) + default: + console.warn(`Unhandled media type: ${media.type}`, media) + return null + } + }) + .filter(Boolean) + .join('\n') + + return media +} + +function distillPostMediaEntity(media: any) { + const { type, expanded_url: expandedURL, video_info: videoInfo } = media + + const { width, height } = media.original_info ?? {} + const { duration_millis: durationMillis, variants } = videoInfo ?? {} + + const labels = { + photo: 'Photo', + video: 'Video', + animated_gif: 'Animated GIF' + } as Record + + const downloadableVariant = variants && getDownloadableVariant(variants) + const output = [` - Type: ${labels[type] ?? type}`] + + if (width && height) output.push(` - Dimensions: ${width}x${height}`) + + if (durationMillis) + output.push(` - Duration: ${msToMinutesAndSeconds(durationMillis)}s`) + + if (expandedURL) output.push(` - URL: ${expandedURL}`) + + if (downloadableVariant) + output.push(` - Download URL: ${downloadableVariant.url}`) + + return output.filter(Boolean).join('\n') +} + +function getDownloadableVariant(variants: any) { + return variants.reduce((best: any, variant: any) => { + if (variant.content_type === 'video/mp4') { + return best === null || variant.bitrate > best.bitrate ? variant : best + } + return best + }, null) +} + +/** + * We will use this to replace shortened URLs + * in both tweet bodies and profile descriptions. + */ +export function expandEntities(text: string, entities: any) { + let output = text + + for (const name in entities) { + if (['media', 'urls'].includes(name)) { + for (const { url, expanded_url: xURL } of entities[name]) { + /** + * Media attachments are already promoted to a top-level + * listing, so we will simply remove them as inline + * mentions. URLs, on the other hand will be replaced + * with their fully-expanded value. + */ + output = output.replace(url, name === 'media' ? '' : xURL) + } + } + } + + return output.trim() +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/index.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/index.ts new file mode 100644 index 000000000000..7c481b9118d2 --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/index.ts @@ -0,0 +1,8 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import distill from './distiller' + +export { distill } diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/notifications.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/notifications.ts new file mode 100644 index 000000000000..b57772271e6d --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/notifications.ts @@ -0,0 +1,103 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import { getXProps, store } from './data' +import { getUserSignature } from './user' +import { wrapLines } from './utils' + +export function distillNotificationElement(element: HTMLElement) { + const notificationXProps = getXProps(element.parentElement) + const notificationId = notificationXProps?.children?.props?.entry?.content?.id + const notificationURL = + notificationXProps?.children?.props?.entry?.content?.url?.url + const notificationData = store.access('notifications', notificationId) + + const distilled: Array = [] + + if (notificationData) { + const message = distillNotificationData(notificationData) + distilled.push( + message && `Notification: ${message}`, + notificationURL && `URL: ${notificationURL}` + ) + } + + return distilled.filter(Boolean).join('\n') +} + +function distillNotificationData(notification: any) { + // TODO (Sampson): Possibly handle other notification templates + if (notification.template?.aggregateUserActionsV1) { + return distillNotificationDataAggregateUserActionsV1(notification) + } + return null +} + +function distillNotificationDataAggregateUserActionsV1(notification: any) { + const { + message: { text, entities }, + template: { aggregateUserActionsV1 } + } = notification + + let distilled: string = text + + if (entities.length > 0) { + /** + * Walk through the entities in reverse order so as not to cause + * issues with indexes as we modify the string. + * TODO (Sampson): Indexes do not account for code points, so this + * could potentially cause issues with multi-byte characters. We'll + * need to revisit this to make sure signatures with emojis are + * handled correctly. + */ + for (let i = entities.length - 1; i >= 0; i--) { + const { fromIndex, toIndex, format, ref } = entities[i] + + if (typeof format === 'string') { + if (format.toLowerCase() === 'strong') { + // We'll convert strong text to uppercase + const strong = distilled.slice(fromIndex, toIndex).toUpperCase() + const front = distilled.slice(0, fromIndex) + const back = distilled.slice(toIndex) + distilled = front + strong + back + } else { + console.warn(`Unhandled format: ${format}`) + } + } + + if (ref) { + if (ref.user) { + const user = store.access('users', ref.user.id) + if (user) { + const signature = getUserSignature(user) + const front = distilled.slice(0, fromIndex) + const back = distilled.slice(toIndex) + distilled = front + signature + back + } + } else { + console.warn(`Unhandled ref: ${ref}`) + } + } + } + } + + /** + * For now we'll just pull in the text of the first associated post (if any). + */ + if (aggregateUserActionsV1.targetObjects?.length > 0) { + const first = aggregateUserActionsV1.targetObjects[0] + if (first.tweet) { + const tweet = store.access('tweets', first.tweet) + if (tweet) { + const text = wrapLines(tweet.full_text) + if (text) { + distilled += `\n\n"${text}"` + } + } + } + } + + return distilled +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/post.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/post.ts new file mode 100644 index 000000000000..c5aca284b4f9 --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/post.ts @@ -0,0 +1,213 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import { getXProps, store } from './data' +import { getUserSignature } from './user' +import { + indentLines, + wrapLines, + isString, + decodeHTMLSpecialChars, + getDateString +} from './utils' +import { distillCard } from './cards' +import { distillPostMediaEntities } from './entities' +import { getDistillationLevel } from './distiller' +import { LEO_DISTILLATION_LEVEL } from '../distillation' + +/** + * Generates a list of formatted metrics (like Likes, + * Quotes, Replies) for a given tweet, considering the + * plurality of values. + * TODO (Sampson): Come back and define some types, such + * as the `post` parameter. + */ +function getPostMetrics(post: any) { + const metrics = [ + { key: 'favorite', singular: 'Like', plural: 'Likes' }, + { key: 'quote', singular: 'Quote', plural: 'Quotes' }, + { key: 'reply', singular: 'Reply', plural: 'Replies' }, + { key: 'retweet', singular: 'Repost', plural: 'Reposts' }, + { key: 'bookmark', singular: 'Bookmark', plural: 'Bookmarks' } + ] + + /** + * X is now combining these two metrics into a single value. + * To keep the output consistent, we'll do the same here. + */ + let processedQuotesAndReposts = false + + return metrics + .map(({ key, singular, plural }) => { + const propKey = `${key}_count` + const propValue = post[propKey] + + if (propValue === undefined) return null + + /** + * We won't include zero values for low distillation + */ + if (getDistillationLevel() === LEO_DISTILLATION_LEVEL.LOW) { + if (parseInt(propValue) === 0) return null + } + + if (['quote', 'retweet'].includes(key) && !processedQuotesAndReposts) { + processedQuotesAndReposts = true + + const combinedValue = + (post.quote_content ?? 0) + (post.retweet_count ?? 0) + const label = combinedValue === 1 ? 'Quote/Repost' : 'Quotes/Reposts' + + return `${combinedValue.toLocaleString()} ${label}` + } else { + const label = propValue === 1 ? singular : plural + return `${propValue.toLocaleString()} ${label}` + } + }) + .filter(Boolean) +} + +/** + * Extracts post data from a DOM element and distills + * it into a formatted output. + */ +export function distillPostElement(element: HTMLElement) { + const postXProps = getXProps(element.parentElement) + const postID = postXProps?.children?.props?.entry?.content?.id + const postData = postID && store.access('tweets', postID) + + return postData ? distillPostData(postData) : element.innerText +} + +/** + * Constructs the text output for a given post, handling + * reposts, headers, and post body content. + */ +function distillPostData(post: any, level: number = 0): string | null { + if (post.retweeted_status) { + return distillRepostPost(post) + } + + const header = distillPostHeader(post) + const body = buildPostBody(post, level) + + return [header, body && indentLines(`\n${body}`)].filter(Boolean).join('\n') +} + +/** + * Builds the main content of a post, including + * handling quoted content and text wrapping. + */ +function buildPostBody(post: any, level: number) { + const text = getPostText(post) + const quoted = post.quoted_status ? buildPostQuotedBody(post, level) : null + const attachments = post.extended_entities?.media + ? distillPostMediaEntities(post) + : null + const card = post.card ? distillCard(post) : null + + return [ + text ? wrapLines(text) : null, + card ? [`\n${indentLines(wrapLines(card), ' | ')}`] : null, + quoted ? [`\n${indentLines(quoted, ' > ')}`] : null, + attachments ? [`\nAttachments:\n${attachments}`] : null + ] + .flat() + .filter(Boolean) + .join('\n') +} + +/** + * Creates the header of a post, including the author + * information, action line, and any relevant metrics. + */ +function distillPostHeader(post: any) { + const author = store.access('users', post.user) + const signature = getUserSignature(author) + const metrics = getPostMetrics(post) + + return [ + `From: ${signature}`, + buildPostActionLine(post), + metrics.length > 0 ? `Metrics: ${metrics.join(' | ')}` : null + ] + .filter(Boolean) + .join('\n') +} + +/** + * Retrieves and decodes the main text of a post, handling + * different fields where the text might be located. + */ +function getPostText(post: any) { + let response = isString(post.note_tweet?.text) + ? post.note_tweet.text + : post.full_text + + // Remove leading references + if (getDistillationLevel() < LEO_DISTILLATION_LEVEL.HIGH) { + response = response.replace(/^(\@[a-zA-Z0-9_]+\s+)+/, '') + } + + return isString(response) ? decodeHTMLSpecialChars(response) : null +} + +/** + * Formats a reposted tweet, incorporating the original + * tweet’s content and indicating who reposted it. + */ +function distillRepostPost(post: any): string | null { + const repostingUser = store.access('users', post.user) + const repostingPost = store.access('tweets', post.retweeted_status) + if (repostingUser && repostingPost) { + const distilled = distillPostData(repostingPost) + if (distilled) { + const [fromUser, ...rest] = distilled.split('\n') + const repostedBy = `Reposted by ${getUserSignature(repostingUser)}` + return [`${fromUser} (${repostedBy})`, ...rest].join('\n') + } + } + return null +} + +/** + * Constructs an action line for a post, such as indicating + * a reply or when the post was created. + */ +function buildPostActionLine(post: any) { + const date = getDateString(post.created_at) + if (post.in_reply_to_user_id_str && post.in_reply_to_screen_name) { + const user = store.access('users', post.in_reply_to_user_id_str) + const signature = + getUserSignature(user) ?? `@${post.in_reply_to_screen_name}` + return `Replied to ${signature}: ${date}` + } + return `Posted: ${date}` +} + +/** + * Handles the content for quoted posts, including truncation + * logic and URL continuation if the maximum depth is reached. + */ +function buildPostQuotedBody(post: any, level: number = 0): string | null { + const quote = store.access('tweets', post.quoted_status) + const distilled = + level < 3 && quote ? distillPostData(quote, level + 1) : null + return distilled || `Thread continues at ${getExpandedPostURL(post)}` +} + +/** + * Retrieves the expanded URL for a post, either from the + * provided permalink or by constructing it using user + * information. + */ +function getExpandedPostURL(post: any) { + const url = post.quoted_status_permalink?.expandedUrl + if (url) return url + const { screen_name: screenName } = store.access('users', post.user) ?? {} + return isString(screenName) + ? `https://x.com/${screenName}/status/${post.id_str}` + : null +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/selectors.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/selectors.ts new file mode 100644 index 000000000000..f38768acc63f --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/selectors.ts @@ -0,0 +1,11 @@ +// Copyright (c) 2024 The Brave Authors. All rights reserved. +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +export default { + "tweet": "[data-testid='tweet']", + "notification": "[data-testid='notification']", + "primaryColumn": "[data-testid='primaryColumn']", + "userProfileHeaderItems": "[data-testid='UserProfileHeader_Items']" +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/user.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/user.ts new file mode 100644 index 000000000000..d6f28b4188a3 --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/user.ts @@ -0,0 +1,131 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import { getDateString, isString, shortNumberString } from './utils' +import { store } from './data' +import { LEO_DISTILLATION_LEVEL } from '../distillation' +import { expandEntities } from './entities' +import { getDistillationLevel } from './distiller' + +const SeenUsersSet = new Set() +const UserSignatures = new Map() + +/** + * Generates a formatted signature string for a user, + * considering different possible representations (name, + * screen name, or ID). + */ +export function getUserSignature(user: any) { + if (isString(user) || Number.isInteger(user)) { + user = store.access('users', user) + } + + SeenUsersSet.add(user) + + const { name, screen_name: screenName, id_str: idStr } = user ?? {} + + /** + * If we have already created a signature for this user, we will + * instead return just their screen name. The verbose signature is + * already present in the document, so only the shorter version + * should suffice (and save us some chars). + */ + if ( + UserSignatures.has(idStr) || + getDistillationLevel() === LEO_DISTILLATION_LEVEL.LOW + ) { + return isString(screenName) ? `@${screenName}` : 'Anonyous' + } + + if (isString(screenName)) { + const signature = isString(name) + ? `${name} <@${screenName}>` + : `@${screenName}` + + UserSignatures.set(idStr, signature) + + return signature + } + + return idStr ? `User #${idStr}` : 'Unknown User' +} + +export function distillSeenUsers(level = LEO_DISTILLATION_LEVEL.MEDIUM) { + switch (level) { + case LEO_DISTILLATION_LEVEL.LOW: + return distillLowSeenUsers() + case LEO_DISTILLATION_LEVEL.MEDIUM: + case LEO_DISTILLATION_LEVEL.HIGH: + case LEO_DISTILLATION_LEVEL.FULL: + return distillMediumHighSeenUsers() + default: + return null + } +} + +function distillLowSeenUsers() { + const output = [] as string[] + + for (const user of SeenUsersSet) { + if (typeof user.created_at === 'undefined') { + continue + } + + const followers = shortNumberString(user.followers_count) + const memberSince = getDateString(user.created_at) + + const entry = [ + `@${user.screen_name}${isUserVerified(user) ? ' [Verified]' : ''}`, + ` - Joined: ${memberSince}`, + ` - Followers: ${followers}` + ].join('\n') + + output.push(entry) + } + + return output.join('\n\n') +} + +function isUserVerified(user: any): boolean { + return ( + user.verified || + user.is_blue_verified || + user.professional || + user.verified_type + ) +} + +function distillMediumHighSeenUsers() { + const output = [] as string[] + + for (const user of SeenUsersSet) { + if (typeof user.created_at === 'undefined') { + continue + } + + const memberSince = getDateString(user.created_at) + const description = + user.description && + expandEntities(user.description, user.entities.description) + + const followersStr = user.followers_count.toLocaleString() + const followingStr = user.friends_count.toLocaleString() + + const entry = [ + `**${user.name}**`, + ` - Username: @${user.screen_name}`, + ` - Verified: ${isUserVerified(user) ? 'Yes' : 'No'}`, + ` - Followers: ${followersStr} | Following: ${followingStr}`, + ` - Member Since: ${memberSince}`, + description && [` - Description: ${description}`] + ] + .filter(Boolean) + .join('\n') + + output.push(entry) + } + + return output.join('\n\n') +} diff --git a/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/utils.ts b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/utils.ts new file mode 100644 index 000000000000..40daeb69d128 --- /dev/null +++ b/components/ai_chat/resources/custom_site_distiller_scripts/scripts/x.com/utils.ts @@ -0,0 +1,125 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import selectors from './selectors'; +import { LEO_DISTILLATION_LEVEL } from '../distillation' +import { getDistillationLevel } from './distiller' + +export function isString(value: any) { + return typeof value === 'string' +} + +export function isSupportedPage(document: Document) { + + // If this is a user profile page, we can distill it. + if (document.querySelector(selectors.userProfileHeaderItems)) { + return true; + } + + const supportedPaths = [ + '/', + '/home', + '/notifications', + /^\/.*?\/status\/\d+/ // Post detail pages + ] + + for (const entry of supportedPaths) { + if (entry instanceof RegExp && entry.test(document.location.pathname)) { + return true + } else if (isString(entry) && entry === document.location.pathname) { + return true + } + } + + return false +} + +/** + * Decodes HTML entities in a given string using an ad-hoc + * `textarea` element, constructed in an ad-hoc document (for + * namespace isolation). The `textarea` is then leveraged to + * decode the HTML entities in the given string. + */ +export function decodeHTMLSpecialChars(text: string) { + const isolatedDocument = document.implementation.createHTMLDocument() + const textarea = isolatedDocument.createElement("textarea") + // eslint-disable-next-line no-unsanitized/property + textarea.innerHTML = text + return textarea.value +} + +/** + * Indents each line of the given text by a specified + * number of spaces or a string prefix. + */ +export function indentLines(text: string, indent: number | string = 2) { + const prefix = Number.isInteger(indent) + ? ' '.repeat(indent as number) + : indent + return text + .split('\n') + .map((line) => `${prefix}${line}`) + .join('\n') +} + +/** + * Wraps lines of text to a specified maximum length, + * breaking at spaces to ensure readability. + */ +export function wrapLines(text: string, limit: number = 80) { + return text + .split('\n') + .map((line) => { + const words = line.split(' ') + let replace = '' + let length = 0 + for (const word of words) { + if (length + word.length > limit) { + replace += '\n' + length = 0 + } + replace += word + ' ' + length += word.length + 1 + } + return replace.trim() + }) + .join('\n') +} + +export function msToMinutesAndSeconds(ms: number) { + const minutes = Math.floor(ms / 60000) + const seconds = Math.floor((ms % 60000) / 1000) + return `${minutes}m ${seconds}s` +} + +export function getDateString(date: any) { + const dateObject = new Date(date) + const distillLevel = getDistillationLevel() + + if (distillLevel < LEO_DISTILLATION_LEVEL.MEDIUM) { + return dateObject.toISOString().substring(0, 10) + } + + return dateObject.toLocaleString() +} + +export function shortNumberString(value: number, precision: number = 1) { + if (value < 1000) { + return value.toString() + } + + const suffixes = ['k', 'M', 'B', 'T'] + const suffixNum = Math.floor(('' + value).length / 3) + let shortValue = + suffixNum !== 0 + ? parseFloat((value / Math.pow(1000, suffixNum)).toPrecision(precision)) + : parseFloat(value.toPrecision(precision)) + + if (shortValue % 1 !== 0) { + shortValue = parseFloat(shortValue.toFixed(1)) + } + + return shortValue + suffixes[suffixNum - 1] +} diff --git a/components/resources/BUILD.gn b/components/resources/BUILD.gn index f54488abe62a..4b2a444d9fba 100644 --- a/components/resources/BUILD.gn +++ b/components/resources/BUILD.gn @@ -53,12 +53,14 @@ brave_grit("static_resources") { repack("resources") { deps = [ ":static_resources", + "//brave/components/ai_chat/resources/custom_site_distiller_scripts:generated_resources", "//brave/components/ai_chat/resources/page:generated_resources", "//brave/components/ai_rewriter/common/buildflags", "//brave/components/brave_ads/browser/resources:generated_resources", "//brave/components/skus/browser/resources:generated_resources", ] sources = [ + "$root_gen_dir/brave/components/ai_chat/resources/custom_site_distiller_scripts/custom_site_distiller_scripts_generated.pak", "$root_gen_dir/brave/components/ai_chat/resources/page/ai_chat_ui_generated.pak", "$root_gen_dir/brave/components/brave_ads/browser/resources/ads_internals_generated.pak", "$root_gen_dir/brave/components/skus/browser/resources/skus_internals_generated.pak", diff --git a/resources/resource_ids.spec b/resources/resource_ids.spec index 84f2ce499bd7..33ecc22a9a67 100644 --- a/resources/resource_ids.spec +++ b/resources/resource_ids.spec @@ -233,6 +233,10 @@ "META": {"sizes": {"includes": [10]}}, "includes": [34710], }, + "<(SHARED_INTERMEDIATE_DIR)/brave/web-ui-custom_site_distiller_scripts/custom_site_distiller_scripts.grd": { + "META": {"sizes": {"includes": [50]}}, + "includes": [34720], + }, # WARNING: The upstream ChromeOS/Ash strings currently run through 36930. We # must be careful not to exceed that maximum when adding new strings here. }