From e89d84e630cee7dd46467af704961ae940c9fbb7 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 3 Dec 2024 22:57:12 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20=F0=9F=90=9B=20add=20fallback=20for=20to?= =?UTF-8?q?ken=20encoder=20&=20assertions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/core/src/chat.ts | 2 +- packages/core/src/encoders.ts | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts index c3e5fea269..83421a15ac 100644 --- a/packages/core/src/chat.ts +++ b/packages/core/src/chat.ts @@ -749,7 +749,7 @@ async function choicesToLogitBias( if (!choices?.length) return undefined const { encode } = await resolveTokenEncoder(model, { disableFallback: true, - }) + }) || {} if (!encode) { trace.error( `unabled to compute logit bias, no token encoder found for ${model}` diff --git a/packages/core/src/encoders.ts b/packages/core/src/encoders.ts index 49d445d6c6..5c87fb0535 100644 --- a/packages/core/src/encoders.ts +++ b/packages/core/src/encoders.ts @@ -5,6 +5,7 @@ import path from "node:path" import { addLineNumbers, indexToLineNumber } from "./liner" import { resolveFileContent } from "./file" import type { EncodeOptions } from "gpt-tokenizer/GptEncoding" +import { assert } from "./util" /** * Resolves the appropriate token encoder based on the given model ID. @@ -31,6 +32,7 @@ export async function resolveTokenEncoder( decode, default: api, } = await import(`gpt-tokenizer/model/${module}`) + assert(!!encode) const { modelName } = api const size = api.bytePairEncodingCoreProcessor?.mergeableBytePairRankCount + @@ -50,6 +52,7 @@ export async function resolveTokenEncoder( decode, default: api, } = await import("gpt-tokenizer/model/gpt-4o") + assert(!!encode) const { modelName, vocabularySize } = api return Object.freeze({ model: modelName,