Skip to content

Commit

Permalink
Refactor JSONL parsing and logging, improve error handling and commen…
Browse files Browse the repository at this point in the history
…t guidelines
  • Loading branch information
pelikhan committed Sep 24, 2024
1 parent b393cae commit 5c9b8c6
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 37 deletions.
13 changes: 10 additions & 3 deletions packages/cli/src/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,22 @@ import replaceExt from "replace-ext"
import { readFile } from "node:fs/promises"
import { DOCXTryParse } from "../../core/src/docx"
import { extractFenced } from "../../core/src/fence"
import { expandFiles, writeText, readText } from "../../core/src/fs"
import {
expandFiles,
writeText,
readText,
tryReadText,
} from "../../core/src/fs"
import { HTMLToText } from "../../core/src/html"
import { isJSONLFilename, readJSONL } from "../../core/src/jsonl"
import { isJSONLFilename, JSONLTryParse } from "../../core/src/jsonl"
import { parsePdf } from "../../core/src/pdf"
import { estimateTokens } from "../../core/src/tokens"
import { YAMLStringify } from "../../core/src/yaml"
import { resolveTokenEncoder } from "../../core/src/encoders"
import { DEFAULT_MODEL } from "../../core/src/constants"
import { promptyParse, promptyToGenAIScript } from "../../core/src/prompty"
import { basename, join } from "node:path"
import { JSONLLMTryParse } from "../../core/src/json5"

export async function parseFence(language: string, file: string) {
const res = await parsePdf(file)
Expand Down Expand Up @@ -44,7 +50,8 @@ export async function jsonl2json(files: string[]) {
console.log(`skipping ${file}`)
continue
}
const objs = await readJSONL(file)
const content = await tryReadText(file)
const objs = await JSONLTryParse(content, { repair: true })
const out = replaceExt(file, ".json")
await writeText(out, JSON.stringify(objs, null, 2))
console.log(`${file} -> ${out}`)
Expand Down
9 changes: 6 additions & 3 deletions packages/core/src/cache.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
// Import necessary modules and types
import { appendJSONL, readJSONL, writeJSONL } from "./jsonl"
import { host, runtimeHost } from "./host"
import { appendJSONL, JSONLTryParse, writeJSONL } from "./jsonl"
import { host } from "./host"
import { dotGenaiscriptPath, sha256string } from "./util"
import { CHANGE } from "./constants"
import { TraceOptions } from "./trace"
import { CORE_VERSION } from "./version"
import { tryReadText } from "./fs"
import { JSON5TryParse } from "./json5"

/**
* Represents a cache entry with a hashed identifier (`sha`), `key`, and `val`.
Expand Down Expand Up @@ -60,7 +62,8 @@ export class JSONLineCache<K, V> extends EventTarget {
if (this._entries) return
this._entries = {}
await host.createDirectory(this.folder()) // Ensure directory exists
const objs: CacheEntry<K, V>[] = await readJSONL(this.path())
const content = await tryReadText(this.path())
const objs: CacheEntry<K, V>[] = (await JSON5TryParse(content)) ?? []
let numdup = 0 // Counter for duplicates
for (const obj of objs) {
if (this._entries[obj.sha]) numdup++ // Count duplicates
Expand Down
29 changes: 2 additions & 27 deletions packages/core/src/jsonl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,31 +13,6 @@ export function isJSONLFilename(fn: string) {
return /\.(jsonl|mdjson|ldjson)$/i.test(fn)
}

export async function readJSONL(fn: string) {
const buf = await tryReadFile(fn)
const res: any[] = []
if (buf == null) return res
let line = 1
let numerr = 0
const decoder = host.createUTF8Decoder()
for (let pos = 0; pos < buf.length; ) {
let ep = buf.indexOf(10, pos)
if (ep < 0) ep = buf.length
const str = decoder.decode(buf.slice(pos, ep))
if (!/^\s*$/.test(str)) {
try {
res.push(JSON.parse(str))
} catch (e) {
if (!numerr) logWarn(`${fn}(${line}): JSON error`)
numerr++
}
}
pos = ep + 1
line++
}
return res
}

export function JSONLTryParse(
text: string,
options?: {
Expand All @@ -49,15 +24,15 @@ export function JSONLTryParse(
for (const line of text.split("\n")) {
if (!line) continue
const obj = JSON5TryParse(line, options)
res.push(obj)
if (obj !== undefined && obj !== null) res.push(obj)
}
return res
}

export function JSONLStringify(objs: any[]) {
const acc: string[] = []
if (objs?.length)
for (const o of objs) {
for (const o of objs.filter((o) => o !== undefined && o !== null)) {
const s = JSON.stringify(o)
acc.push(s)
}
Expand Down
7 changes: 3 additions & 4 deletions packages/vscode/genaisrc/cmt.genai.mts
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ script({
const { format, build } = env.vars
const saveLimit = pLimit(1)

console.log({ format, build })

// Get files from environment or modified files from Git if none provided
let files = env.files
if (files.length === 0) {
Expand Down Expand Up @@ -122,7 +120,8 @@ You should analyze it, and add/update appropriate comments as needed.
To add or update comments to this code, follow these steps:
1. Analyze the code to understand its structure and functionality.
- If you are not familiar with the programming language, ignore the file.
- If you are not familiar with the programming language, emit an empty file.
- If there is no code, emit an empty file.
2. Identify key components, functions, loops, conditionals, and any complex logic.
3. Add comments that explain:
- The purpose of functions or code blocks using the best comment format for that programming language.
Expand Down Expand Up @@ -158,7 +157,7 @@ Your comments should provide insight into the code's purpose, logic, and any imp
)
const { text, fences } = res
const newContent = fences?.[0]?.content ?? text
if (!newContent) return undefined
if (!newContent?.trim()) return undefined
if (newContent === content) break
content = newContent
}
Expand Down

0 comments on commit 5c9b8c6

Please sign in to comment.