github investigate sample (#727)

* github investigator * a few more commands * Add diff package for log comparison and refactor GitHub Action log handling * Refactor GitHub Action log handling and enhance diff analysis features * Optimize shell output handling and enhance GitHub repository info retrieval logic * Add log URLs to success and failure log outputs in console and downloadRunLog function * Refactor log processing, add log diffing, and improve parsing functions * Rename file and add annotations functionality to script configuration * Enhance GitHub Actions handling by adding PR retrieval and refactoring prompt logic. * artificial build break * Optimize environment parsing and improve markdown handling in CLI and sample scripts * Add failure handling and logging improvements in genai workflow and script * Rename workflow to "genai investigator" in YAML configuration. * Fix typo in function call from downloadRu nLog to downloadRunLog
microsoft · Sep 26, 2024 · 7ec144a · 7ec144a
1 parent 0bf994e
commit 7ec144a
Show file tree

Hide file tree

Showing 7 changed files with 551 additions and 19 deletions.
diff --git a/.github/workflows/genai-investigator.yml b/.github/workflows/genai-investigator.yml
@@ -0,0 +1,27 @@
+name: genai investigator
+on:
+    workflow_run:
+        workflows: ["build"]
+        types:
+            - completed
+permissions:
+    actions: read
+    pull-requests: write
+jobs:
+    check_failure:
+        if: ${{ github.event.workflow_run.conclusion == 'failure' }}
+        runs-on: ubuntu-latest
+        steps:
+            - uses: actions/checkout@v4
+              with:
+                  submodules: "recursive"
+                  fetch-depth: 10
+            - uses: actions/setup-node@v4
+              with:
+                  node-version: "20"
+                  cache: yarn
+            - run: yarn install --frozen-lockfile
+            - name: compile
+              run: yarn compile
+            - name: genai investigator
+              run: yarn genai gai -prc --vars "failure_run_id=${{ github.event.workflow_run.id }}" --out-trace $GITHUB_STEP_SUMMARY
diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts
@@ -13,6 +13,7 @@ import {
     githubCreatePullRequestReviews,
     githubUpdatePullRequestDescription,
     githubParseEnv,
+    GithubConnectionInfo,
 } from "../../core/src/github"
 import {
     HTTPS_REGEX,
@@ -61,12 +62,14 @@ import { PromptScriptRunOptions } from "../../core/src/server/messages"
 import { writeFileEdits } from "../../core/src/edits"
 import {
     azureDevOpsCreateIssueComment,
+    AzureDevOpsEnv,
     azureDevOpsParseEnv,
     azureDevOpsUpdatePullRequestDescription,
 } from "../../core/src/azuredevops"
 import { resolveTokenEncoder } from "../../core/src/encoders"
 import { writeFile } from "fs/promises"
 import { writeFileSync } from "node:fs"
+import { prettifyMarkdown } from "../../core/src/markdown"
 
 async function setupTraceWriting(trace: MarkdownTrace, filename: string) {
     logVerbose(`trace: ${filename}`)
@@ -424,37 +427,39 @@ export async function runScript(
         }
     }
 
+    let ghInfo: GithubConnectionInfo = undefined
+    let adoInfo: AzureDevOpsEnv = undefined
     if (pullRequestReviews && result.annotations?.length) {
         // github action or repo
-        const info = await githubParseEnv(process.env)
-        if (info.repository && info.issue && info.commitSha) {
+        ghInfo = ghInfo ?? (await githubParseEnv(process.env))
+        if (ghInfo.repository && ghInfo.issue && ghInfo.commitSha) {
             await githubCreatePullRequestReviews(
                 script,
-                info,
+                ghInfo,
                 result.annotations
             )
         }
     }
 
     if (pullRequestComment && result.text) {
         // github action or repo
-        const info = await githubParseEnv(process.env)
-        if (info.repository && info.issue) {
+        ghInfo = ghInfo ?? (await githubParseEnv(process.env))
+        if (ghInfo.repository && ghInfo.issue) {
             await githubCreateIssueComment(
                 script,
-                info,
-                result.text,
+                ghInfo,
+                prettifyMarkdown(result.text),
                 typeof pullRequestComment === "string"
                     ? pullRequestComment
                     : script.id
             )
         } else {
-            const adoinfo = await azureDevOpsParseEnv(process.env)
-            if (adoinfo.collectionUri) {
+            adoInfo = adoInfo ?? (await azureDevOpsParseEnv(process.env))
+            if (adoInfo.collectionUri) {
                 await azureDevOpsCreateIssueComment(
                     script,
-                    adoinfo,
-                    result.text,
+                    adoInfo,
+                    prettifyMarkdown(result.text),
                     typeof pullRequestComment === "string"
                         ? pullRequestComment
                         : script.id
@@ -468,24 +473,24 @@ export async function runScript(
 
     if (pullRequestDescription && result.text) {
         // github action or repo
-        const ghinfo = await githubParseEnv(process.env)
-        if (ghinfo.repository && ghinfo.issue) {
+        ghInfo = ghInfo ?? (await githubParseEnv(process.env))
+        if (ghInfo.repository && ghInfo.issue) {
             await githubUpdatePullRequestDescription(
                 script,
-                ghinfo,
-                result.text,
+                ghInfo,
+                prettifyMarkdown(result.text),
                 typeof pullRequestDescription === "string"
                     ? pullRequestDescription
                     : script.id
             )
         } else {
             // azure devops pipeline
-            const adoinfo = await azureDevOpsParseEnv(process.env)
-            if (adoinfo.collectionUri) {
+            adoInfo = adoInfo ?? (await azureDevOpsParseEnv(process.env))
+            if (adoInfo.collectionUri) {
                 await azureDevOpsUpdatePullRequestDescription(
                     script,
-                    adoinfo,
-                    result.text,
+                    adoInfo,
+                    prettifyMarkdown(result.text),
                     typeof pullRequestDescription === "string"
                         ? pullRequestDescription
                         : script.id

diff --git a/packages/core/package.json b/packages/core/package.json
@@ -33,6 +33,7 @@
     "cross-fetch": "^4.0.0",
     "csv-parse": "^5.5.6",
     "csv-stringify": "^6.5.1",
+    "diff": "^7.0.0",
     "dotenv": "^16.4.5",
     "esbuild": "^0.24.0",
     "fast-xml-parser": "^4.5.0",
@@ -81,6 +82,7 @@
     "test": "node --import tsx --test src/**.test.ts"
   },
   "dependencies": {
+    "@types/diff": "^5.2.2",
     "@types/turndown": "^5.0.5",
     "turndown": "^7.2.0"
   }

diff --git a/packages/core/src/chatrender.ts b/packages/core/src/chatrender.ts
@@ -20,6 +20,7 @@ import { YAMLStringify } from "./yaml"
 export function renderShellOutput(output: ShellOutput) {
     // Destructure the output object to retrieve exitCode, stdout, and stderr.
     const { exitCode, stdout, stderr } = output
+    if (exitCode === 0) return stdout
     return (
         [
             // Include exit code in the output only if it's non-zero.

diff --git a/packages/sample/genaisrc/gai.genai.mts b/packages/sample/genaisrc/gai.genai.mts
@@ -0,0 +1,225 @@
+/* spellchecker: disable */
+import { Octokit } from "octokit"
+import { createPatch } from "diff"
+
+const workflow = env.vars.workflow || "build.yml"
+const ffid = env.vars.failure_run_id
+const lsid = env.vars.success_run_id
+const branch =
+    env.vars.branch ||
+    (await host.exec("git branch --show-current")).stdout.trim()
+
+const octokit = new Octokit({
+    auth: process.env.GITHUB_TOKEN,
+})
+const { owner, repo } = await getRepoInfo()
+
+script({
+    system: ["system", "system.files"],
+    cache: "gh-investigator",
+})
+
+const runs = await listRuns(workflow, branch)
+
+// first last success
+const lsi = lsid
+    ? runs.findIndex(({ id }) => id === lsid)
+    : runs.findIndex(({ conclusion }) => conclusion === "success")
+const ls = runs[lsi]
+console.log(
+    `> last success: ${ls.id}, ${ls.created_at}, ${ls.head_sha}, ${ls.html_url}`
+)
+const ff = ffid ? runs.find(({ id }) => id === ffid) : runs[lsi - 1]
+if (!ff) cancel("failure run not found")
+console.log(
+    `> first failure: ${ff.id}, ${ff.created_at}, ${ff.head_sha}, ${ff.html_url}`
+)
+if (ff.conclusion !== "failure") cancel("failure run not found")
+
+const gitDiff = await host.exec(
+    `git diff ${ls.head_sha} ${ff.head_sha} -- . :!**/genaiscript.d.ts`
+)
+console.log(`> source diff: ${(gitDiff.stdout.length / 1000) | 0}kb`)
+
+// download logs
+const lsjobs = await downloadRunLog(ls.id)
+const lsjob = lsjobs[0]
+const lslog = lsjob.text
+console.log(
+    `> last success log: ${(lslog.length / 1000) | 0}kb ${lsjob.logUrl}`
+)
+const ffjobs = await downloadRunLog(ff.id)
+const ffjob = ffjobs[0]
+const fflog = ffjob.text
+console.log(
+    `> first failure log: ${(fflog.length / 1000) | 0}kb  ${ffjob.logUrl}`
+)
+
+const logDiff = diffJobLogs(lslog, fflog)
+console.log(`> log diff: ${(logDiff.length / 1000) | 0}kb`)
+
+// include difss
+def("GIT_DIFF", gitDiff, {
+    language: "diff",
+    maxTokens: 10000,
+    lineNumbers: true,
+})
+def("LOG_DIFF", logDiff, {
+    language: "diff",
+    maxTokens: 20000,
+    lineNumbers: false,
+})
+$`Your are an expert software engineer and you are able to analyze the logs and find the root cause of the failure.
+
+- GIT_DIFF contains a diff of 2 run commits
+- LOG_DIFF contains a diff of 2 runs in GitHub Action
+- The first run is the last successful run and the second run is the first failed run
+
+Add links to run logs.
+
+Analyze the diff in LOG_DIFF and provide a summary of the root cause of the failure.
+
+If you cannot find the root cause, stop.
+
+Generate a diff with suggested fixes. Use a diff format.`
+
+writeText(
+    `## Investigator report
+- [run first failure](${ff.html_url})
+- [run last success](${ls.html_url})
+- [commit diff](https://github.com/${owner}/${repo}/compare/${ls.head_sha}...${ff.head_sha})
+
+`,
+    { assistant: true }
+)
+
+/*-----------------------------------------
+
+GitHub infra
+
+-----------------------------------------*/
+async function getRepoInfo() {
+    const repository = process.env.GITHUB_REPOSITORY
+    if (repository) {
+        const [owner, repo] = repository.split("/")
+        return { owner, repo }
+    }
+    const remoteUrl = (await host.exec("git config --get remote.origin.url"))
+        .stdout
+    const match = remoteUrl.match(/github\.com\/(?<owner>.+)\/(?<repo>.+)$/)
+    if (!match) {
+        throw new Error(
+            "Could not parse repository information from remote URL"
+        )
+    }
+    const { owner, repo } = match.groups
+    return { owner, repo }
+}
+
+async function listRuns(workflow_id: string, branch: string) {
+    // Get the workflow runs for the specified workflow file, filtering for failures
+    const {
+        data: { workflow_runs },
+    } = await octokit.rest.actions.listWorkflowRuns({
+        owner,
+        repo,
+        workflow_id,
+        branch,
+        per_page: 100,
+    })
+    const runs = workflow_runs.filter(
+        ({ conclusion }) => conclusion !== "skipped"
+    )
+    return runs
+}
+
+async function downloadRunLog(run_id: number) {
+    const res = []
+    // Get the jobs for the specified workflow run
+    const {
+        data: { jobs },
+    } = await octokit.rest.actions.listJobsForWorkflowRun({
+        owner,
+        repo,
+        run_id,
+    })
+    for (const job of jobs) {
+        const { url: logUrl } =
+            await octokit.rest.actions.downloadJobLogsForWorkflowRun({
+                owner,
+                repo,
+                job_id: job.id,
+            })
+        const { text } = await fetchText(logUrl)
+        res.push({ ...job, logUrl, text })
+    }
+    return res
+}
+
+function diffJobLogs(firstLog: string, otherLog: string) {
+    let firsts = parseJobLog(firstLog)
+    let others = parseJobLog(otherLog)
+
+    // assumption: the list of steps has not changed
+    const n = Math.min(firsts.length, others.length)
+    firsts = firsts.slice(0, n)
+    others = others.slice(0, n)
+
+    // now do a regular diff
+    const f = firsts
+        .map((f) =>
+            f.title ? `##[group]${f.title}\n${f.text}\n##[endgroup]` : f.text
+        )
+        .join("\n")
+    const l = others
+        .map((f) =>
+            f.title ? `##[group]${f.title}\n${f.text}\n##[endgroup]` : f.text
+        )
+        .join("\n")
+    const d = createPatch("log.txt", f, l, undefined, undefined, {
+        ignoreCase: true,
+        ignoreWhitespace: true,
+    })
+    return d
+}
+
+function parseJobLog(text: string) {
+    const lines = cleanLog(text).split(/\r?\n/g)
+    const groups: { title: string; text: string }[] = []
+    let current = groups[0]
+    for (const line of lines) {
+        if (line.startsWith("##[group]")) {
+            current = { title: line.slice("##[group]".length), text: "" }
+        } else if (line.startsWith("##[endgroup]")) {
+            if (current) groups.push(current)
+            current = undefined
+        } else {
+            if (!current) current = { title: "", text: "" }
+            current.text += line + "\n"
+        }
+    }
+    if (current) groups.push(current)
+
+    const ignoreSteps = [
+        "Runner Image",
+        "Fetching the repository",
+        "Checking out the ref",
+        "Setting up auth",
+        "Setting up auth for fetching submodules",
+        "Getting Git version info",
+        "Initializing the repository",
+        "Determining the checkout info",
+        "Persisting credentials for submodules",
+    ]
+    return groups.filter(({ title }) => !ignoreSteps.includes(title))
+}
+
+function cleanLog(text: string) {
+    return text
+        .replace(
+            // timestamps
+            /^?\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{2,}Z /gm,
+            ""
+        )
+        .replace(/\x1b\[[0-9;]*m/g, "") // ascii colors
+}
diff --git a/packages/sample/package.json b/packages/sample/package.json
@@ -22,6 +22,7 @@
     "@azure/storage-blob": "^12.24.0",
     "@tidyjs/tidy": "^2.5.2",
     "@xenova/transformers": "^2.17.2",
+    "octokit": "^4.0.2",
     "p-all": "^5.0.0",
     "vectorstore": "^0.0.4",
     "zod": "^3.23.8",