From 677127d2baf18d798f2a4fe91162f38aa633ac7a Mon Sep 17 00:00:00 2001 From: Leo McArdle Date: Mon, 11 Dec 2023 10:37:12 +0100 Subject: [PATCH 1/6] feat(ai-help): add index with rendered macros --- .github/workflows/prod-build.yml | 9 +- .github/workflows/stage-build.yml | 9 +- build/cli.ts | 12 +- build/index.ts | 10 +- build/resolve-bcd.ts | 4 +- scripts/ai-help-macros.ts | 504 ++++++++++++++++++++++++++++++ scripts/ai-help.sql | 18 +- 7 files changed, 557 insertions(+), 9 deletions(-) create mode 100644 scripts/ai-help-macros.ts diff --git a/.github/workflows/prod-build.yml b/.github/workflows/prod-build.yml index 4230d8da10da..e2041def7700 100644 --- a/.github/workflows/prod-build.yml +++ b/.github/workflows/prod-build.yml @@ -307,7 +307,7 @@ jobs: if: ${{ ! vars.SKIP_BUILD }} run: |- gsutil -q -m -h "Cache-Control: public, max-age=3600" cp -r client/build/static gs://${{ vars.GCP_BUCKET_NAME }}/main/ - gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main + gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/|^plain.html$" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main - name: Authenticate with GCP if: ${{ ! vars.SKIP_FUNCTION }} @@ -380,6 +380,13 @@ jobs: SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }} SUPABASE_URL: ${{ secrets.SUPABASE_URL }} + - name: Update AI Help index with macros + run: yarn ai-help-macros update-index + env: + OPENAI_KEY: ${{ secrets.OPENAI_KEY }} + SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }} + SUPABASE_URL: ${{ secrets.SUPABASE_URL }} + - name: Slack Notification if: failure() uses: rtCamp/action-slack-notify@v2 diff --git a/.github/workflows/stage-build.yml b/.github/workflows/stage-build.yml index 35c9d7474481..5d8daebc6a7f 100644 --- a/.github/workflows/stage-build.yml +++ b/.github/workflows/stage-build.yml @@ -300,7 +300,7 @@ jobs: if: ${{ ! vars.SKIP_BUILD }} run: |- gsutil -q -m -h "Cache-Control: public, max-age=3600" cp -r client/build/static gs://${{ vars.GCP_BUCKET_NAME }}/main/ - gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main + gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/|^plain.html$" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main - name: Authenticate with GCP if: ${{ ! vars.SKIP_FUNCTION }} @@ -373,6 +373,13 @@ jobs: SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }} SUPABASE_URL: ${{ secrets.SUPABASE_URL }} + - name: Update AI Help index with macros + run: yarn ai-help-macros update-index + env: + OPENAI_KEY: ${{ secrets.OPENAI_KEY }} + SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }} + SUPABASE_URL: ${{ secrets.SUPABASE_URL }} + - name: Slack Notification if: failure() uses: rtCamp/action-slack-notify@v2 diff --git a/build/cli.ts b/build/cli.ts index 9f9eb6e9cb4e..2d1ad577d654 100644 --- a/build/cli.ts +++ b/build/cli.ts @@ -77,7 +77,11 @@ async function buildDocumentInteractive( ); } - return { document, doc: await buildDocument(document), skip: false }; + return { + document, + doc: await buildDocument(document, { plainHTML: true }), + skip: false, + }; } catch (e) { if (!interactive) { throw e; @@ -208,7 +212,7 @@ async function buildDocuments( } const { - doc: { doc: builtDocument, liveSamples, fileAttachmentMap }, + doc: { doc: builtDocument, liveSamples, fileAttachmentMap, plainHTML }, document, } = result; @@ -230,6 +234,10 @@ async function buildDocuments( ); } + if (plainHTML) { + fs.writeFileSync(path.join(outPath, "plain.html"), plainHTML); + } + // This is exploiting the fact that renderHTML has the side-effect of // mutating the built document which makes this not great and refactor-worthy. const docString = JSON.stringify({ doc: builtDocument }); diff --git a/build/index.ts b/build/index.ts index cdc99969f8ac..0cd83e5079db 100644 --- a/build/index.ts +++ b/build/index.ts @@ -164,6 +164,7 @@ export interface BuiltDocument { source?: { github_url: string; }; + plainHTML?: string; } interface DocumentOptions { @@ -171,6 +172,7 @@ interface DocumentOptions { fixFlawsDryRun?: boolean; fixFlawsTypes?: Iterable; fixFlawsVerbose?: boolean; + plainHTML?: boolean; } export async function buildDocument( @@ -445,6 +447,12 @@ export async function buildDocument( throw error; } + // Dump HTML for GPT context + let plainHTML; + if (documentOptions.plainHTML) { + plainHTML = $.html(); + } + // Apply syntax highlighting all
 tags.
   syntaxHighlight($, doc);
 
@@ -555,7 +563,7 @@ export async function buildDocument(
     document.metadata.slug.startsWith("orphaned/") ||
     document.metadata.slug.startsWith("conflicting/");
 
-  return { doc: doc as Doc, liveSamples, fileAttachmentMap };
+  return { doc: doc as Doc, liveSamples, fileAttachmentMap, plainHTML };
 }
 
 function addBaseline(doc: Partial) {
diff --git a/build/resolve-bcd.ts b/build/resolve-bcd.ts
index 72357358446a..af888df86fb6 100644
--- a/build/resolve-bcd.ts
+++ b/build/resolve-bcd.ts
@@ -1,10 +1,10 @@
 import bcdUntyped from "@mdn/browser-compat-data/forLegacyNode";
-import { CompatData } from "@mdn/browser-compat-data/types";
+import { CompatData, Identifier } from "@mdn/browser-compat-data/types";
 
 const bcd = bcdUntyped as CompatData;
 
 export function packageBCD(query) {
-  const data = query.split(".").reduce((prev, curr) => {
+  const data: Identifier = query.split(".").reduce((prev, curr) => {
     return prev && Object.prototype.hasOwnProperty.call(prev, curr)
       ? prev[curr]
       : undefined;
diff --git a/scripts/ai-help-macros.ts b/scripts/ai-help-macros.ts
new file mode 100644
index 000000000000..da493d43d29a
--- /dev/null
+++ b/scripts/ai-help-macros.ts
@@ -0,0 +1,504 @@
+import { createHash } from "node:crypto";
+import { readFile } from "node:fs/promises";
+
+import caporal from "@caporal/core";
+import { SupabaseClient, createClient } from "@supabase/supabase-js";
+import { fdir } from "fdir";
+import OpenAI from "openai";
+import { load as cheerio } from "cheerio";
+
+import { DocMetadata } from "../libs/types/document.js";
+import {
+  BUILD_OUT_ROOT,
+  OPENAI_KEY,
+  SUPABASE_SERVICE_ROLE_KEY,
+  SUPABASE_URL,
+} from "../libs/env/index.js";
+import {
+  getBCDDataForPath,
+  SimpleSupportStatementExtended,
+} from "@mdn/bcd-utils-api";
+import path from "node:path";
+import {
+  BrowserStatement,
+  SimpleSupportStatement,
+  VersionValue,
+} from "@mdn/browser-compat-data/types";
+
+const { program } = caporal;
+
+// const MAX_TABLE_LENGTH = 10000;
+// const IGNORE_SECTIONS = ["Specifications", "Browser compatibility", "See also"];
+
+interface IndexedDoc {
+  id: number;
+  mdn_url: string;
+  title: string;
+  token_count: number | null;
+  hash: string;
+  text_hash: string;
+}
+
+interface Doc {
+  mdn_url: string;
+  title: string;
+  hash: string;
+  html: string;
+  text?: string;
+  text_hash?: string;
+}
+
+export async function updateEmbeddings(
+  directory: string,
+  updateFormatting: boolean
+) {
+  if (!OPENAI_KEY || !SUPABASE_URL || !SUPABASE_SERVICE_ROLE_KEY) {
+    throw Error(
+      "Please set these environment variables: OPENAI_KEY, SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY"
+    );
+  }
+
+  // Supabase.
+  const supabaseClient = createClient(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY);
+
+  // Open AI.
+  const openai = new OpenAI({
+    apiKey: OPENAI_KEY,
+  });
+
+  const createEmbedding = async (input: string) => {
+    let embeddingResponse: OpenAI.Embeddings.CreateEmbeddingResponse;
+    try {
+      embeddingResponse = await openai.embeddings.create({
+        model: "text-embedding-ada-002",
+        input,
+      });
+    } catch (e: any) {
+      const {
+        data: {
+          error: { message, type },
+        },
+        status,
+        statusText,
+      } = e.response;
+      console.error(
+        `[!] Failed to create embedding (${status} ${statusText}): ${type} - ${message}`
+      );
+      // Try again with trimmed content.
+      embeddingResponse = await openai.embeddings.create({
+        model: "text-embedding-ada-002",
+        input: input.substring(0, 15000),
+      });
+    }
+
+    const {
+      data: [{ embedding }],
+      usage: { total_tokens },
+    } = embeddingResponse;
+
+    return {
+      total_tokens,
+      embedding,
+    };
+  };
+
+  console.log(`Retrieving all indexed documents...`);
+  const existingDocs = await fetchAllExistingDocs(supabaseClient);
+  console.log(`-> Done.`);
+
+  const existingDocByUrl = new Map(
+    existingDocs.map((doc) => [doc.mdn_url, doc])
+  );
+
+  console.log(`Determining changed and deleted documents...`);
+
+  const seenUrls = new Set();
+  const updates: Doc[] = [];
+  const formattingUpdates: Doc[] = [];
+
+  for await (const { mdn_url, title, hash, html, text } of builtDocs(
+    directory
+  )) {
+    seenUrls.add(mdn_url);
+
+    // Check for existing document in DB and compare checksums.
+    const existingDoc = existingDocByUrl.get(mdn_url);
+
+    const text_hash = createHash("sha256").update(text).digest("base64");
+
+    if (existingDoc?.text_hash !== text_hash) {
+      updates.push({
+        mdn_url,
+        title,
+        hash,
+        html,
+        text,
+        text_hash,
+      });
+    } else if (updateFormatting || existingDoc?.hash !== hash) {
+      formattingUpdates.push({
+        mdn_url,
+        title,
+        hash,
+        html,
+      });
+    }
+  }
+
+  console.log(
+    `-> ${updates.length} of ${seenUrls.size} documents were changed (or added).`
+  );
+  const deletions: IndexedDoc[] = [...existingDocByUrl.entries()]
+    .filter(([key]) => !seenUrls.has(key))
+    .map(([, value]) => value);
+  console.log(
+    `-> ${deletions.length} of ${existingDocs.length} indexed documents were deleted (or moved).`
+  );
+
+  if (updates.length > 0 || formattingUpdates.length > 0) {
+    console.log(`Applying updates...`);
+    for (const { mdn_url, title, hash, html, text, text_hash } of updates) {
+      try {
+        console.log(`-> [${mdn_url}] Updating document...`);
+
+        // Embedding for full document.
+        const { total_tokens, embedding } = await createEmbedding(text);
+
+        // Create/update document record.
+        await supabaseClient
+          .from("mdn_doc_macro")
+          .upsert(
+            {
+              mdn_url,
+              title,
+              hash,
+              html,
+              token_count: total_tokens,
+              embedding,
+              text_hash,
+            },
+            { onConflict: "mdn_url" }
+          )
+          .select()
+          .single()
+          .throwOnError();
+      } catch (err: any) {
+        console.error(`!> [${mdn_url}] Failed to update document.`);
+        const context = err?.response?.data ?? err?.response ?? err;
+        console.error(context);
+      }
+    }
+    for (const { mdn_url, title, hash, html } of formattingUpdates) {
+      try {
+        console.log(
+          `-> [${mdn_url}] Updating document without generating new embedding...`
+        );
+
+        // Create/update document record.
+        await supabaseClient
+          .from("mdn_doc_macro")
+          .upsert(
+            {
+              mdn_url,
+              title,
+              hash,
+              html,
+            },
+            { onConflict: "mdn_url" }
+          )
+          .select()
+          .single()
+          .throwOnError();
+      } catch (err: any) {
+        console.error(`!> [${mdn_url}] Failed to update document.`);
+        const context = err?.response?.data ?? err?.response ?? err;
+        console.error(context);
+      }
+    }
+    console.log(`-> Done.`);
+  }
+
+  if (deletions.length > 0) {
+    console.log(`Applying deletions...`);
+    for (const { id, mdn_url } of deletions) {
+      console.log(`-> [${mdn_url}] Deleting indexed document...`);
+      await supabaseClient
+        .from("mdn_doc_macro")
+        .delete()
+        .eq("id", id)
+        .throwOnError();
+    }
+    console.log(`-> Done.`);
+  }
+}
+
+async function formatDocs(directory: string) {
+  for await (const { html, text } of builtDocs(directory)) {
+    console.log(html, text);
+  }
+}
+
+async function* builtPaths(directory: string) {
+  const api = new fdir()
+    .withFullPaths()
+    .withErrors()
+    .filter((filePath) => filePath.endsWith("metadata.json"))
+    .crawl(directory);
+
+  const paths = await api.withPromise();
+
+  for (const path of paths) {
+    yield path;
+  }
+}
+
+async function* builtDocs(directory: string) {
+  for await (const metadataPath of builtPaths(directory)) {
+    try {
+      const raw = await readFile(metadataPath, "utf-8");
+      const { title, mdn_url, hash } = JSON.parse(raw) as DocMetadata;
+
+      const plainPath = path.join(path.dirname(metadataPath), "plain.html");
+      const plainHTML = await readFile(plainPath, "utf-8");
+
+      // reformat HTML version, used as context
+      const $ = cheerio(plainHTML);
+      $("#specifications, .bc-specs").remove();
+      $("body").prepend(`

${title}

`); + $("head").prepend(`${title}`); + $("head").prepend(``); + $("[width], [height]").each((_, el) => { + $(el).removeAttr("width").removeAttr("height"); + }); + $(".bc-data[data-query]").each((_, el) => { + $(el).replaceWith(buildBCDTable($(el).data("query") as string)); + }); + const html = $.html(); + + // reformat text version, used for embedding + $("title").remove(); + $("#browser_compatibility, .bc-table").remove(); + const text = $.text().trim().replace(/\n+/g, "\n"); + + yield { + mdn_url, + title, + hash, + html, + text, + }; + } catch (e) { + console.error(`Error preparing doc: ${metadataPath}`, e); + } + } +} + +function buildBCDTable(query: string) { + const bcdData = getBCDDataForPath(query); + if (!bcdData) return ""; + const { browsers, data } = bcdData; + return data.__compat?.support + ? ` + + +${Object.entries(data.__compat?.support) + .map( + ([browser, support]) => + `` + ) + .join("\n")} + +
BrowserSupport
${browsers[browser].name}${buildBCDSupportString( + browsers[browser], + support + )}
` + : ""; +} + +function buildBCDSupportString( + browser: BrowserStatement, + support: (SimpleSupportStatement & SimpleSupportStatementExtended)[] +) { + return support + .flatMap((item) => { + return [ + item.version_removed && + !support.some( + (otherItem) => otherItem.version_added === item.version_removed + ) + ? `Removed in ${labelFromString( + item.version_removed, + browser + )} and later` + : null, + item.partial_implementation ? "Partial support" : null, + item.prefix + ? `Implemented with the vendor prefix: ${item.prefix}` + : null, + item.alternative_name + ? `Alternate name: ${item.alternative_name}` + : null, + item.flags ? FlagsNote(item, browser) : null, + item.notes + ? (Array.isArray(item.notes) ? item.notes : [item.notes]).join(". ") + : null, + versionIsPreview(item.version_added, browser) + ? "Preview browser support" + : null, + isFullySupportedWithoutLimitation(item) && + !versionIsPreview(item.version_added, browser) + ? `Full support since version ${item.version_added}${ + item.release_date ? ` (released ${item.release_date})` : "" + }` + : isNotSupportedAtAll(item) + ? "No support" + : null, + ] + .flat() + .filter((x) => Boolean(x)); + }) + .join(". "); +} + +function labelFromString( + version: string | boolean | null | undefined, + browser: BrowserStatement +) { + if (typeof version !== "string") { + return "?"; + } + // Treat BCD ranges as exact versions to avoid confusion for the reader + // See https://github.com/mdn/yari/issues/3238 + if (version.startsWith("≤")) { + return version.slice(1); + } + if (version === "preview") { + return browser.preview_name; + } + return version; +} + +function FlagsNote( + supportItem: SimpleSupportStatement, + browser: BrowserStatement +) { + const hasAddedVersion = typeof supportItem.version_added === "string"; + const hasRemovedVersion = typeof supportItem.version_removed === "string"; + const flags = supportItem.flags || []; + return `${ + hasAddedVersion ? `From version ${supportItem.version_added}` : "" + }${ + hasRemovedVersion + ? `${hasAddedVersion ? " until" : "Until"} version ${ + supportItem.version_removed + } (exclusive)` + : "" + }${ + hasAddedVersion || hasRemovedVersion ? ": this" : "This" + } feature is behind the ${flags.map((flag, i) => { + const valueToSet = flag.value_to_set + ? ` (needs to be set to ${flag.value_to_set})` + : ""; + return `${flag.name}${ + flag.type === "preference" ? ` preference${valueToSet}` : "" + }${flag.type === "runtime_flag" ? ` runtime flag${valueToSet}` : ""}${ + i < flags.length - 1 ? " and the " : "" + }`; + })}.${ + browser.pref_url && flags.some((flag) => flag.type === "preference") + ? ` To change preferences in ${browser.name}, visit ${browser.pref_url}.` + : "" + }`; +} + +function versionIsPreview( + version: VersionValue | string | undefined, + browser: BrowserStatement +): boolean { + if (version === "preview") { + return true; + } + + if (browser && typeof version === "string" && browser.releases[version]) { + return ["beta", "nightly", "planned"].includes( + browser.releases[version].status + ); + } + + return false; +} + +export function isFullySupportedWithoutLimitation( + support: SimpleSupportStatement +) { + return support.version_added && !hasLimitation(support); +} + +function hasLimitation(support: SimpleSupportStatement) { + return hasMajorLimitation(support) || support.notes; +} + +function hasMajorLimitation(support: SimpleSupportStatement) { + return ( + support.partial_implementation || + support.alternative_name || + support.flags || + support.prefix || + support.version_removed + ); +} + +export function isNotSupportedAtAll(support: SimpleSupportStatement) { + return !support.version_added && !hasLimitation(support); +} + +async function fetchAllExistingDocs(supabase: SupabaseClient) { + const PAGE_SIZE = 1000; + const selectDocs = () => + supabase + .from("mdn_doc_macro") + .select("id, mdn_url, title, hash, token_count, text_hash") + .order("id") + .limit(PAGE_SIZE); + + let { data } = await selectDocs().throwOnError(); + let allData = data; + while (data.length === PAGE_SIZE) { + const lastItem = data[data.length - 1]; + ({ data } = await selectDocs().gt("id", lastItem.id).throwOnError()); + allData = [...allData, ...data]; + } + + return allData; +} + +// CLI. +program + .command( + "update-index", + "Generates OpenAI embeddings for all documents and uploads them to Supabase." + ) + .argument("", "Path in which to execute it", { + default: path.join(BUILD_OUT_ROOT, "en-us", "docs"), + }) + .option( + "--update-formatting", + "Even if hashes match, update without generating a new embedding." + ) + .action(function (params) { + const { directory } = params.args as { directory: string }; + const { updateFormatting } = params.options as { + updateFormatting: boolean; + }; + return updateEmbeddings(directory, updateFormatting); + }) + .command("format-docs", "Generates formatted docs for local debugging") + .argument("", "Path in which to execute it", { + default: path.join(BUILD_OUT_ROOT, "en-us", "docs"), + }) + .action(function (params) { + const { directory } = params.args as { directory: string }; + return formatDocs(directory); + }); + +program.run(); diff --git a/scripts/ai-help.sql b/scripts/ai-help.sql index 7aaafa27a77f..a558f4317caf 100644 --- a/scripts/ai-help.sql +++ b/scripts/ai-help.sql @@ -12,7 +12,7 @@ create table constraint mdn_doc_url_key unique (url), ) tablespace pg_default; - create table +create table public.mdn_doc_section ( id bigint not null default nextval('mdn_doc_section_id_seq'::regclass), doc_id bigint not null, @@ -22,4 +22,18 @@ create table embedding public.vector null, constraint mdn_doc_section_pkey primary key (id), constraint mdn_doc_section_doc_id_fkey foreign key (doc_id) references mdn_doc (id) on delete cascade - ) tablespace pg_default; \ No newline at end of file + ) tablespace pg_default; + +create table + public.mdn_doc_macro ( + id bigserial, + hash text null, + title text not null, + mdn_url text not null, + html text null, + token_count integer null, + embedding extensions.vector null, + text_hash text null, + constraint mdn_doc_macro_pkey primary key (id), + constraint mdn_doc_macro_url_key unique (mdn_url) + ) tablespace pg_default; From 4568271a77cf5cc4666a00a8c1c69b340751acf2 Mon Sep 17 00:00:00 2001 From: Claas Augner Date: Mon, 11 Dec 2023 10:39:30 +0100 Subject: [PATCH 2/6] fix(workflows): ignore plain.html everywhere --- .github/workflows/prod-build.yml | 2 +- .github/workflows/stage-build.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/prod-build.yml b/.github/workflows/prod-build.yml index e2041def7700..a55b6a27c55d 100644 --- a/.github/workflows/prod-build.yml +++ b/.github/workflows/prod-build.yml @@ -307,7 +307,7 @@ jobs: if: ${{ ! vars.SKIP_BUILD }} run: |- gsutil -q -m -h "Cache-Control: public, max-age=3600" cp -r client/build/static gs://${{ vars.GCP_BUCKET_NAME }}/main/ - gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/|^plain.html$" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main + gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/|/plain.html$" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main - name: Authenticate with GCP if: ${{ ! vars.SKIP_FUNCTION }} diff --git a/.github/workflows/stage-build.yml b/.github/workflows/stage-build.yml index 5d8daebc6a7f..efda88142325 100644 --- a/.github/workflows/stage-build.yml +++ b/.github/workflows/stage-build.yml @@ -300,7 +300,7 @@ jobs: if: ${{ ! vars.SKIP_BUILD }} run: |- gsutil -q -m -h "Cache-Control: public, max-age=3600" cp -r client/build/static gs://${{ vars.GCP_BUCKET_NAME }}/main/ - gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/|^plain.html$" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main + gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/|/plain.html$" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main - name: Authenticate with GCP if: ${{ ! vars.SKIP_FUNCTION }} From 8b050d5d499f3d146c688e161cccfe70bf17cdc2 Mon Sep 17 00:00:00 2001 From: Claas Augner Date: Mon, 11 Dec 2023 13:15:49 +0100 Subject: [PATCH 3/6] Revert "fix(workflows): ignore plain.html everywhere" This reverts commit 4568271a77cf5cc4666a00a8c1c69b340751acf2. --- .github/workflows/prod-build.yml | 2 +- .github/workflows/stage-build.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/prod-build.yml b/.github/workflows/prod-build.yml index a55b6a27c55d..e2041def7700 100644 --- a/.github/workflows/prod-build.yml +++ b/.github/workflows/prod-build.yml @@ -307,7 +307,7 @@ jobs: if: ${{ ! vars.SKIP_BUILD }} run: |- gsutil -q -m -h "Cache-Control: public, max-age=3600" cp -r client/build/static gs://${{ vars.GCP_BUCKET_NAME }}/main/ - gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/|/plain.html$" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main + gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/|^plain.html$" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main - name: Authenticate with GCP if: ${{ ! vars.SKIP_FUNCTION }} diff --git a/.github/workflows/stage-build.yml b/.github/workflows/stage-build.yml index efda88142325..5d8daebc6a7f 100644 --- a/.github/workflows/stage-build.yml +++ b/.github/workflows/stage-build.yml @@ -300,7 +300,7 @@ jobs: if: ${{ ! vars.SKIP_BUILD }} run: |- gsutil -q -m -h "Cache-Control: public, max-age=3600" cp -r client/build/static gs://${{ vars.GCP_BUCKET_NAME }}/main/ - gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/|/plain.html$" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main + gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/|^plain.html$" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main - name: Authenticate with GCP if: ${{ ! vars.SKIP_FUNCTION }} From f9ebf743aec02f0bc0b0db46ea6236dec64fbbf8 Mon Sep 17 00:00:00 2001 From: Claas Augner Date: Mon, 11 Dec 2023 13:17:53 +0100 Subject: [PATCH 4/6] chore(workflows): only update macro index --- .github/workflows/prod-build.yml | 113 ------------------------------ .github/workflows/stage-build.yml | 113 ------------------------------ 2 files changed, 226 deletions(-) diff --git a/.github/workflows/prod-build.yml b/.github/workflows/prod-build.yml index e2041def7700..db2a8f5684e1 100644 --- a/.github/workflows/prod-build.yml +++ b/.github/workflows/prod-build.yml @@ -283,122 +283,9 @@ jobs: yarn tool whatsdeployed $CONTENT_ROOT --output client/build/_whatsdeployed/content.json yarn tool whatsdeployed $CONTENT_TRANSLATED_ROOT --output client/build/_whatsdeployed/translated-content.json - - name: Update search index - if: ${{ ! vars.SKIP_BUILD }} - env: - DEPLOYER_ELASTICSEARCH_URL: ${{ secrets.DEPLOYER_PROD_ELASTICSEARCH_URL }} - run: | - cd deployer - poetry run deployer search-index ../client/build - - - name: Authenticate with GCP - if: ${{ ! vars.SKIP_BUILD }} - uses: google-github-actions/auth@v2 - with: - token_format: access_token - service_account: deploy-prod-content@${{ secrets.GCP_PROJECT_NAME }}.iam.gserviceaccount.com - workload_identity_provider: projects/${{ secrets.WIP_PROJECT_ID }}/locations/global/workloadIdentityPools/github-actions/providers/github-actions - - - name: Setup gcloud - if: ${{ ! vars.SKIP_BUILD }} - uses: google-github-actions/setup-gcloud@v1 - - - name: Sync build - if: ${{ ! vars.SKIP_BUILD }} - run: |- - gsutil -q -m -h "Cache-Control: public, max-age=3600" cp -r client/build/static gs://${{ vars.GCP_BUCKET_NAME }}/main/ - gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/|^plain.html$" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main - - - name: Authenticate with GCP - if: ${{ ! vars.SKIP_FUNCTION }} - uses: google-github-actions/auth@v2 - with: - token_format: access_token - service_account: deploy-prod-prod-mdn-ingress@${{ secrets.GCP_PROJECT_NAME }}.iam.gserviceaccount.com - workload_identity_provider: projects/${{ secrets.WIP_PROJECT_ID }}/locations/global/workloadIdentityPools/github-actions/providers/github-actions - - - name: Setup gcloud - if: ${{ ! vars.SKIP_FUNCTION }} - uses: google-github-actions/setup-gcloud@v1 - with: - install_components: "beta" - - - name: Generate redirects map - if: ${{ ! vars.SKIP_FUNCTION }} - working-directory: cloud-function - env: - CONTENT_ROOT: ${{ github.workspace }}/mdn/content/files - CONTENT_TRANSLATED_ROOT: ${{ github.workspace }}/mdn/translated-content/files - run: |- - npm ci - npm run build-redirects - - - name: Deploy Function - if: ${{ ! vars.SKIP_FUNCTION }} - run: |- - for region in europe-west1 us-west1 asia-east1; do - gcloud beta functions deploy mdn-prod-prod-$region \ - --gen2 \ - --runtime=nodejs18 \ - --region=$region \ - --source=cloud-function \ - --trigger-http \ - --allow-unauthenticated \ - --entry-point=mdnHandler \ - --concurrency=100 \ - --min-instances=10 \ - --max-instances=1000 \ - --memory=2GB \ - --timeout=60s \ - --set-env-vars="ORIGIN_MAIN=developer.mozilla.org" \ - --set-env-vars="ORIGIN_LIVE_SAMPLES=live.mdnplay.dev" \ - --set-env-vars="ORIGIN_PLAY=mdnplay.dev" \ - --set-env-vars="SOURCE_CONTENT=https://storage.googleapis.com/${{ vars.GCP_BUCKET_NAME }}/main/" \ - --set-env-vars="SOURCE_API=https://api.developer.mozilla.org/" \ - --set-env-vars="BSA_ENABLED=true" \ - --set-env-vars="SENTRY_DSN=${{ secrets.SENTRY_DSN_CLOUD_FUNCTION }}" \ - --set-env-vars="SENTRY_ENVIRONMENT=prod" \ - --set-env-vars="SENTRY_TRACES_SAMPLE_RATE=${{ vars.SENTRY_TRACES_SAMPLE_RATE }}" \ - --set-env-vars="SENTRY_RELEASE=${{ github.sha }}" \ - --set-secrets="KEVEL_SITE_ID=projects/${{ secrets.GCP_PROJECT_NAME }}/secrets/prod-kevel-site-id/versions/latest" \ - --set-secrets="KEVEL_NETWORK_ID=projects/${{ secrets.GCP_PROJECT_NAME }}/secrets/prod-kevel-network-id/versions/latest" \ - --set-secrets="SIGN_SECRET=projects/${{ secrets.GCP_PROJECT_NAME }}/secrets/prod-sign-secret/versions/latest" \ - --set-secrets="BSA_ZONE_KEYS=projects/${{ secrets.GCP_PROJECT_NAME }}/secrets/prod-bsa-zone-keys/versions/latest" \ - 2>&1 | sed "s/^/[$region] /" & - pids+=($!) - done - - for pid in "${pids[@]}"; do - wait $pid - done - - - name: Update AI Help index - run: yarn ai-help update-index - env: - CONTENT_ROOT: ${{ github.workspace }}/mdn/content/files - OPENAI_KEY: ${{ secrets.OPENAI_KEY }} - SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }} - SUPABASE_URL: ${{ secrets.SUPABASE_URL }} - - name: Update AI Help index with macros run: yarn ai-help-macros update-index env: OPENAI_KEY: ${{ secrets.OPENAI_KEY }} SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }} SUPABASE_URL: ${{ secrets.SUPABASE_URL }} - - - name: Slack Notification - if: failure() - uses: rtCamp/action-slack-notify@v2 - env: - SLACK_CHANNEL: mdn-notifications - SLACK_COLOR: ${{ job.status }} - SLACK_ICON: https://avatars.slack-edge.com/2020-11-17/1513880588420_fedd7f0e9456888e69ff_96.png - SLACK_TITLE: ":rotating_light: Prod :rotating_light:" - SLACK_MESSAGE: "Build failed :collision:" - SLACK_FOOTER: "Powered by prod-build.yml" - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - - - name: Invalidate Google Cloud CDN - if: ${{ github.event.inputs.invalidate }} - run: gcloud compute url-maps invalidate-cdn-cache ${{ secrets.GCP_LOAD_BALANCER_NAME }} --path "/*" --async diff --git a/.github/workflows/stage-build.yml b/.github/workflows/stage-build.yml index 5d8daebc6a7f..de99b025d34e 100644 --- a/.github/workflows/stage-build.yml +++ b/.github/workflows/stage-build.yml @@ -276,122 +276,9 @@ jobs: yarn tool whatsdeployed $CONTENT_ROOT --output client/build/_whatsdeployed/content.json yarn tool whatsdeployed $CONTENT_TRANSLATED_ROOT --output client/build/_whatsdeployed/translated-content.json - - name: Update search index - if: ${{ ! vars.SKIP_BUILD }} - env: - DEPLOYER_ELASTICSEARCH_URL: ${{ secrets.DEPLOYER_STAGE_ELASTICSEARCH_URL }} - run: | - cd deployer - poetry run deployer search-index ../client/build - - - name: Authenticate with GCP - if: ${{ ! vars.SKIP_BUILD }} - uses: google-github-actions/auth@v2 - with: - token_format: access_token - service_account: deploy-stage-content@${{ secrets.GCP_PROJECT_NAME }}.iam.gserviceaccount.com - workload_identity_provider: projects/${{ secrets.WIP_PROJECT_ID }}/locations/global/workloadIdentityPools/github-actions/providers/github-actions - - - name: Setup gcloud - if: ${{ ! vars.SKIP_BUILD }} - uses: google-github-actions/setup-gcloud@v1 - - - name: Sync build - if: ${{ ! vars.SKIP_BUILD }} - run: |- - gsutil -q -m -h "Cache-Control: public, max-age=3600" cp -r client/build/static gs://${{ vars.GCP_BUCKET_NAME }}/main/ - gsutil -q -m -h "Cache-Control: public, max-age=3600" rsync -cdrj html,json,txt -y "^static/|^plain.html$" client/build gs://${{ vars.GCP_BUCKET_NAME }}/main - - - name: Authenticate with GCP - if: ${{ ! vars.SKIP_FUNCTION }} - uses: google-github-actions/auth@v2 - with: - token_format: access_token - service_account: deploy-stage-nonprod-mdn-ingre@${{ secrets.GCP_PROJECT_NAME }}.iam.gserviceaccount.com - workload_identity_provider: projects/${{ secrets.WIP_PROJECT_ID }}/locations/global/workloadIdentityPools/github-actions/providers/github-actions - - - name: Setup gcloud - if: ${{ ! vars.SKIP_FUNCTION }} - uses: google-github-actions/setup-gcloud@v1 - with: - install_components: "beta" - - - name: Generate redirects map - if: ${{ ! vars.SKIP_FUNCTION }} - working-directory: cloud-function - env: - CONTENT_ROOT: ${{ github.workspace }}/mdn/content/files - CONTENT_TRANSLATED_ROOT: ${{ github.workspace }}/mdn/translated-content/files - run: | - npm ci - npm run build-redirects - - - name: Deploy Function - if: ${{ ! vars.SKIP_FUNCTION }} - run: |- - for region in europe-west1 us-west1 asia-east1; do - gcloud beta functions deploy mdn-nonprod-stage-$region \ - --gen2 \ - --runtime=nodejs18 \ - --region=$region \ - --source=cloud-function \ - --trigger-http \ - --allow-unauthenticated \ - --entry-point=mdnHandler \ - --concurrency=100 \ - --min-instances=1 \ - --max-instances=100 \ - --memory=2GB \ - --timeout=60s \ - --set-env-vars="ORIGIN_MAIN=developer.allizom.org" \ - --set-env-vars="ORIGIN_LIVE_SAMPLES=live.mdnyalp.dev" \ - --set-env-vars="ORIGIN_PLAY=mdnyalp.dev" \ - --set-env-vars="SOURCE_CONTENT=https://storage.googleapis.com/${{ vars.GCP_BUCKET_NAME }}/main/" \ - --set-env-vars="SOURCE_API=https://api.developer.allizom.org/" \ - --set-env-vars="BSA_ENABLED=true" \ - --set-env-vars="SENTRY_DSN=${{ secrets.SENTRY_DSN_CLOUD_FUNCTION }}" \ - --set-env-vars="SENTRY_ENVIRONMENT=stage" \ - --set-env-vars="SENTRY_TRACES_SAMPLE_RATE=${{ vars.SENTRY_TRACES_SAMPLE_RATE }}" \ - --set-env-vars="SENTRY_RELEASE=${{ github.sha }}" \ - --set-secrets="KEVEL_SITE_ID=projects/${{ secrets.GCP_PROJECT_NAME }}/secrets/stage-kevel-site-id/versions/latest" \ - --set-secrets="KEVEL_NETWORK_ID=projects/${{ secrets.GCP_PROJECT_NAME }}/secrets/stage-kevel-network-id/versions/latest" \ - --set-secrets="SIGN_SECRET=projects/${{ secrets.GCP_PROJECT_NAME }}/secrets/stage-sign-secret/versions/latest" \ - --set-secrets="BSA_ZONE_KEYS=projects/${{ secrets.GCP_PROJECT_NAME }}/secrets/stage-bsa-zone-keys/versions/latest" \ - 2>&1 | sed "s/^/[$region] /" & - pids+=($!) - done - - for pid in "${pids[@]}"; do - wait $pid - done - - - name: Update AI Help index - run: yarn ai-help update-index - env: - CONTENT_ROOT: ${{ github.workspace }}/mdn/content/files - OPENAI_KEY: ${{ secrets.OPENAI_KEY }} - SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }} - SUPABASE_URL: ${{ secrets.SUPABASE_URL }} - - name: Update AI Help index with macros run: yarn ai-help-macros update-index env: OPENAI_KEY: ${{ secrets.OPENAI_KEY }} SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }} SUPABASE_URL: ${{ secrets.SUPABASE_URL }} - - - name: Slack Notification - if: failure() - uses: rtCamp/action-slack-notify@v2 - env: - SLACK_CHANNEL: mdn-notifications - SLACK_COLOR: ${{ job.status }} - SLACK_ICON: https://avatars.slack-edge.com/2020-11-17/1513880588420_fedd7f0e9456888e69ff_96.png - SLACK_TITLE: "Stage" - SLACK_MESSAGE: "Build failed :collision:" - SLACK_FOOTER: "Powered by stage-build.yml" - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - - - name: Invalidate CDN - if: ${{ github.event.inputs.invalidate }} - run: gcloud compute url-maps invalidate-cdn-cache ${{ secrets.GCP_LOAD_BALANCER_NAME }} --path "/*" --async From ff282479b479518d5a81ae73d454a1d3c4ad730e Mon Sep 17 00:00:00 2001 From: Claas Augner Date: Mon, 11 Dec 2023 14:02:32 +0100 Subject: [PATCH 5/6] fixup! feat(ai-help): add index with rendered macros --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index c7f8e9a12628..ce8578864465 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ }, "scripts": { "ai-help": "ts-node scripts/ai-help.ts", + "ai-help-macros": "ts-node scripts/ai-help-macros.ts", "analyze": "source-map-explorer 'client/build/static/js/*.js'", "analyze:css": "source-map-explorer 'client/build/static/css/*.css'", "build": "cross-env NODE_ENV=production NODE_OPTIONS='--no-warnings=ExperimentalWarning --loader ts-node/esm' node build/cli.ts", From 04e881ccb20c0c17d9c781a873f2e73d8f7a60a4 Mon Sep 17 00:00:00 2001 From: Claas Augner Date: Mon, 11 Dec 2023 14:03:28 +0100 Subject: [PATCH 6/6] chore(workflows): only build en-us --- .github/workflows/prod-build.yml | 11 +---------- .github/workflows/stage-build.yml | 7 +------ 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/.github/workflows/prod-build.yml b/.github/workflows/prod-build.yml index db2a8f5684e1..594b6a27b0a7 100644 --- a/.github/workflows/prod-build.yml +++ b/.github/workflows/prod-build.yml @@ -259,16 +259,7 @@ jobs: yarn tool sync-translated-content - # Build using one process per locale. - # Note: We have 4 cores, but 9 processes is a reasonable number. - for locale in en-us es fr ja ko pt-br ru zh-cn zh-tw; do - yarn build --locale $locale 2>&1 | sed "s/^/[$locale] /" & - pids+=($!) - done - - for pid in "${pids[@]}"; do - wait $pid - done + yarn build --locale en-us du -sh client/build diff --git a/.github/workflows/stage-build.yml b/.github/workflows/stage-build.yml index de99b025d34e..72b6dbabde22 100644 --- a/.github/workflows/stage-build.yml +++ b/.github/workflows/stage-build.yml @@ -252,12 +252,7 @@ jobs: yarn tool sync-translated-content - # Build using one process per locale. - # Note: We have 4 cores, but 9 processes is a reasonable number. - for locale in en-us es fr ja ko pt-br ru zh-cn zh-tw; do - yarn build --locale $locale 2>&1 | sed "s/^/[$locale] /" & - pids+=($!) - done + yarn build --locale en-us for pid in "${pids[@]}"; do wait $pid