Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: make Chromium install configurable #2192

Merged
merged 10 commits into from
Sep 5, 2024
16 changes: 14 additions & 2 deletions core/config/util.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { ModelDescription } from "../index.js";
import { editConfigJson } from "../util/paths.js";
import { ContextProviderWithParams, ModelDescription } from "../";
import { editConfigJson } from "../util/paths";

function stringify(obj: any, indentation?: number): string {
return JSON.stringify(
Expand All @@ -11,6 +11,18 @@ function stringify(obj: any, indentation?: number): string {
);
}

export function addContextProvider(provider: ContextProviderWithParams) {
editConfigJson((config) => {
if (!config.contextProviders) {
config.contextProviders = [provider];
} else {
config.contextProviders.push(provider);
}

return config;
});
}

export function addModel(model: ModelDescription) {
editConfigJson((config) => {
if (config.models?.some((m: any) => stringify(m) === stringify(model))) {
Expand Down
2 changes: 1 addition & 1 deletion core/context/providers/CustomContextProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import type {
CustomContextProvider,
IContextProvider,
LoadSubmenuItemsArgs,
} from "../../index.js";
} from "../../";

class CustomContextProviderClass implements IContextProvider {
custom: CustomContextProvider;
Expand Down
8 changes: 5 additions & 3 deletions core/context/providers/DocsContextProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ class DocsContextProvider extends BaseContextProvider {
await docsService.isJetBrainsAndPreIndexedDocsProvider();

if (isJetBrainsAndPreIndexedDocsProvider) {
extras.ide.errorPopup(
extras.ide.showToast(
"error",
`${DocsService.preIndexedDocsEmbeddingsProvider.id} is configured as ` +
"the embeddings provider, but it cannot be used with JetBrains. " +
"Please select a different embeddings provider to use the '@docs' " +
Expand All @@ -108,8 +109,9 @@ class DocsContextProvider extends BaseContextProvider {
});
}

const embeddingsProvider =
await docsService.getEmbeddingsProvider(!!preIndexedDoc);
const embeddingsProvider = await docsService.getEmbeddingsProvider(
!!preIndexedDoc,
);

const [vector] = await embeddingsProvider.embed([extras.fullInput]);

Expand Down
11 changes: 7 additions & 4 deletions core/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ export class Core {
message: err.message,
stack: err.stack,
});
this.messenger.request("errorPopup", { message: err.message });
this.ide.showToast("error", err.message);
});

// New
Expand Down Expand Up @@ -272,9 +272,9 @@ export class Core {
}

if (hasFailed) {
this.ide.infoPopup(`Failed to index ${msg.data.startUrl}`);
this.ide.showToast("info", `Failed to index ${msg.data.startUrl}`);
} else {
this.ide.infoPopup(`Successfully indexed ${msg.data.startUrl}`);
this.ide.showToast("info", `Successfully indexed ${msg.data.startUrl}`);
this.messenger.send("refreshSubmenuItems", undefined);
}
});
Expand Down Expand Up @@ -344,7 +344,10 @@ export class Core {
id,
}));
} catch (e) {
this.ide.errorPopup(`Error getting context items from ${name}: ${e}`);
this.ide.showToast(
"error",
`Error getting context items from ${name}: ${e}`,
);
return [];
}
});
Expand Down
30 changes: 25 additions & 5 deletions core/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ export interface ILLM extends LLMOptions {
export type ContextProviderType = "normal" | "query" | "submenu";

export interface ContextProviderDescription {
title: string;
title: ContextProviderName;
displayTitle: string;
description: string;
renderInlineAs?: string;
Expand Down Expand Up @@ -317,6 +317,9 @@ export interface LLMFullCompletionOptions extends BaseCompletionOptions {

model?: string;
}

export type ToastType = "info" | "error" | "warning";

export interface LLMOptions {
model: string;

Expand Down Expand Up @@ -497,9 +500,11 @@ export interface IDE {
getBranch(dir: string): Promise<string>;
getTags(artifactId: string): Promise<IndexTag[]>;
getRepoName(dir: string): Promise<string | undefined>;
errorPopup(message: string): Promise<void>;
infoPopup(message: string): Promise<void>;

showToast(
type: ToastType,
message: string,
...otherParams: any[]
): Promise<any>;
getGitRootPath(dir: string): Promise<string | undefined>;
listDir(dir: string): Promise<[string, FileType][]>;
getLastModified(files: string[]): Promise<{ [path: string]: number }>;
Expand Down Expand Up @@ -569,7 +574,15 @@ type ContextProviderName =
| "docs"
| "gitlab-mr"
| "os"
| "currentFile";
| "currentFile"
| "outline"
| "continue-proxy"
| "highlights"
| "file"
| "issue"
| "repo-map"
| "url"
| string;

type TemplateType =
| "llama2"
Expand Down Expand Up @@ -913,6 +926,13 @@ interface ExperimentalConfig {
* Automatically read LLM chat responses aloud using system TTS models
*/
readResponseTTS?: boolean;

/**
* If set to true, we will attempt to pull down and install an instance of Chromium
* that is compatible with the current version of Puppeteer.
* This is needed to crawl a large number of documentation sites that are dynamically rendered.
*/
useChromiumForDocsCrawling?: boolean;
}

interface AnalyticsConfig {
Expand Down
180 changes: 180 additions & 0 deletions core/indexing/docs/DocsCrawler.skip.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
// import { SpiedFunction } from "jest-mock";
// import DocsCrawler, {
// ChromiumCrawler,
// ChromiumInstaller,
// type PageData,
// } from "./DocsCrawler";
// import preIndexedDocs from "./preIndexedDocs";
// import { jest } from "@jest/globals";
// import FileSystemIde from "../../util/filesystem";

// // Temporary workaround until we have better caching of Chromium
// // download between test runs
// const TIMEOUT_MS = 1_000_000_000;

// // Skipped until we have a better way to cache Chromium installs
// // between tests and in CI
// describe.skip("crawl", () => {
// const mockIde = new FileSystemIde(process.cwd());

// const chromiumInstaller = new ChromiumInstaller(mockIde);
// beforeAll(async () => {
// // Make sure we have Chromium pulled down before beginning tests
// await ChromiumInstaller.install();
// }, TIMEOUT_MS);

// describe("GitHub Crawler", () => {
// const repoUrl =
// "https://github.com/Patrick-Erichsen/test-github-repo-for-crawling";

// let crawlResults: PageData[];

// beforeAll(async () => {
// const docsCrawler = new DocsCrawler();

// crawlResults = [];

// for await (const page of docsCrawler.crawl(new URL(repoUrl))) {
// crawlResults.push(page);
// }
// }, TIMEOUT_MS);

// test("finds expected markdown files", async () => {
// expect(crawlResults.some((page) => page.path.endsWith("README.md"))).toBe(
// true,
// );

// expect(
// crawlResults.some((page) => page.path.endsWith("docs-depth-1.md")),
// ).toBe(true);
// });

// test("html includes the correct content", async () => {
// const doc = crawlResults.find((page) =>
// page.path.endsWith("docs-depth-1.md"),
// );

// expect(doc?.content.includes("Test body")).toBe(true);
// expect(doc?.content.includes("Test H2 Header")).toBe(true);
// });

// test("ignores non-markdown files, e.g. `test.js` at the root", async () => {
// expect(crawlResults.some((page) => page.path.endsWith("test.js"))).toBe(
// false,
// );
// });
// });

// describe("Chromium Crawler", () => {
// it("Pre-indexed Docs", () => {
// const results: { [url: string]: boolean } = {};
// let totalTests = 0;
// let passedTests = 0;

// Object.keys(preIndexedDocs).forEach((url) => {
// it(
// `Crawl test for ${url}`,
// async () => {
// totalTests++;
// let pageFound = false;

// try {
// const docsCrawler = new DocsCrawler();

// for await (const page of docsCrawler.crawl(new URL(url), 1)) {
// if (page.url === url) {
// pageFound = true;
// break;
// }
// }
// } catch (error) {
// console.error(`Error crawling ${url}:`, error);
// }

// results[url] = pageFound;
// if (pageFound) {
// passedTests++;
// console.log(`✅ ${url}`);
// } else {
// console.log(`❌ ${url}`);
// }

// expect(pageFound).toBe(true);
// },
// TIMEOUT_MS,
// );
// });
// });

// it(
// "succeeds in crawling a list of common sites",
// async () => {
// const TEST_SITES = [
// "https://docs.nestjs.com/",
// "https://docs.nestjs.com/",
// "https://go.dev/doc/",
// "https://clickhouse.com/docs",
// "https://www.tensorflow.org/api_docs",
// "https://www.rust-lang.org/learn",
// "https://docs.anthropic.com/en/docs",
// ];

// const NUM_PAGES_TO_CRAWL = 10;

// for (const site of TEST_SITES) {
// const crawlResults: PageData[] = [];
// const docsCrawler = new DocsCrawler();

// for await (const page of docsCrawler.crawl(
// new URL(site),
// NUM_PAGES_TO_CRAWL,
// )) {
// crawlResults.push(page);
// }

// // `toBeGreaterThanOrEqual` because Crawlee doesn't guarantee
// // an exact number of pages to crawl since it runs in parallel
// expect(crawlResults.length).toBeGreaterThanOrEqual(
// NUM_PAGES_TO_CRAWL,
// );
// }
// },
// TIMEOUT_MS,
// );
// });

// describe("Cheerio Crawler", () => {
// let mockVerifyOrInstallChromium: SpiedFunction<() => Promise<boolean>>;

// beforeAll(() => {
// mockVerifyOrInstallChromium = jest
// .spyOn(ChromiumCrawler, "verifyOrInstallChromium")
// .mockResolvedValue(false);
// });

// afterAll(() => {
// mockVerifyOrInstallChromium.mockRestore();
// });

// it(
// "succeeds in crawling a basic site",
// async () => {
// const NUM_PAGES_TO_CRAWL = 1;
// const site = "https://amplified.dev/";

// const crawlResults: PageData[] = [];
// const docsCrawler = new DocsCrawler();

// for await (const page of docsCrawler.crawl(
// new URL(site),
// NUM_PAGES_TO_CRAWL,
// )) {
// crawlResults.push(page);
// }

// expect(crawlResults.length).toBeGreaterThanOrEqual(NUM_PAGES_TO_CRAWL);
// },
// TIMEOUT_MS,
// );
// });
// });
Loading
Loading