Skip to content

Commit

Permalink
llm as judge support
Browse files Browse the repository at this point in the history
  • Loading branch information
pelikhan committed Dec 19, 2024
1 parent 20aabcb commit 6cce2d1
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 1 deletion.
2 changes: 1 addition & 1 deletion packages/core/src/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -785,7 +785,7 @@ async function choicesToLogitBias(
disableFallback: true,
})) || {}
if (!encode) {
logWarn(
logVerbose(

Check failure on line 788 in packages/core/src/chat.ts

View workflow job for this annotation

GitHub Actions / build

Did you mean `unable` instead of `unabled`?
`unabled to compute logit bias, no token encoder found for ${model}`
)
trace.warn(
Expand Down
80 changes: 80 additions & 0 deletions packages/sample/genaisrc/judge.genai.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/**
*
* @param task
* @param result
* @param categories
* @param options
* @returns
*/
async function judgeClassify(
task: Awaitable<string>,
result: Awaitable<string>,
categories: Record<string, string>,
options?: {
logprobs?: boolean
topLogprobs?: number
model?: ModelType
temperature?: number
}
) {
const unknown = "unknown"
const unknownText =
"if you do not have enough information or certainty to categorize the result"
+
const choices = { ...categories, ...{ [unknown]: unknownText } }
const res = await runPrompt(
async (ctx) => {
ctx.$`## Task
You will be given a task description in <TASK> and a response from a chat bot in <RESULT>.
Your task is to judge and classify <RESULT> according to the categories in <CATEGORY>.
- Rely exclusively on the information provided in the task and the response.
## Output
The output should start with the step-by-step explanation of the classification (one paragraph).
Finish the output with the category name on a single line, no other words.
`.role("system")
ctx.def("TASK", await task)
ctx.def("RESULT", await result)
ctx.def(
"CATEGORY",
Object.entries(choices)
.map((kv) => `- ${kv[0]}: ${kv[1]}`)
.join("\n")
)
},
{
system: ["system.output_plaintext", "system.safety_jailbreak"],
choices: Object.keys(choices),
logprobs: options?.logprobs,
topLogprobs: options?.topLogprobs,
temperature: options?.temperature || 0.1,
model: options?.model || "small",
}
)

// extract the classifiction
const category = Object.keys(choices)
.map((category) => ({
category,
l: res.text.lastIndexOf(category),
}))
.filter(({ l }) => l > -1)
.sort((a, b) => a.l - b.l)[0]?.category

const logprob = res.logprobs?.reverse().find((lp) => lp.token === category)
return {
category,
logprob,
}
}

const task = "Generate a funny joke."
//const { text: result } = await runPrompt(task)
let result = "Why did the tomato turn red? Because boo."

const res = await judgeClassify(task, result, {
pass: "the joke was funny",
fail: "the joke was not funny",
})
console.log(res)

0 comments on commit 6cce2d1

Please sign in to comment.