diff --git a/docs/public/schemas/llms.json b/docs/public/schemas/llms.json index b750191fa..bc004791d 100644 --- a/docs/public/schemas/llms.json +++ b/docs/public/schemas/llms.json @@ -58,6 +58,48 @@ "bearerToken": { "type": "boolean", "description": "Indicates if bearer token is supported" + }, + "aliases": { + "type": "object", + "description": "List of model aliases for the provider", + "properties": { + "large": { + "type": "string", + "description": "Alias for large model" + }, + "small": { + "type": "string", + "description": "Alias for small model" + }, + "vision": { + "type": "string", + "description": "Alias for vision model" + }, + "reasoning": { + "type": "string", + "description": "Alias for reasoning model" + }, + "reasoning_small": { + "type": "string", + "description": "Alias for reasoning small model" + }, + "long": { + "type": "string", + "description": "Alias for long model" + }, + "agent": { + "type": "string", + "description": "Alias for agent model" + }, + "memory": { + "type": "string", + "description": "Alias for memory model" + }, + "embeddings": { + "type": "string", + "description": "Alias for embeddings model" + } + } } }, "additionalProperties": false, @@ -65,6 +107,15 @@ } } }, + "aliases": { + "type": "object", + "additionalProperties": true, + "patternProperties": { + "^[a-zA-Z0-9:_-]+$": { + "type": "string" + } + } + }, "pricings": { "type": "object", "additionalProperties": false, diff --git a/docs/src/content/docs/reference/cli/commands.md b/docs/src/content/docs/reference/cli/commands.md index 120ff8111..5a933de22 100644 --- a/docs/src/content/docs/reference/cli/commands.md +++ b/docs/src/content/docs/reference/cli/commands.md @@ -1,3 +1,78 @@ +{ + "large": { + "model": "openai:gpt-4o", + "source": "default", + "candidates": [ + "openai:gpt-4o", + "azure_serverless:gpt-4o", + "anthropic:claude-2.1", + "google:gemini-1.5-pro-latest", + "github:gpt-4o", + "client:gpt-4o" + ] + }, + "small": { + "model": "openai:gpt-4o-mini", + "source": "default", + "candidates": [ + "openai:gpt-4o-mini", + "azure_serverless:gpt-4o-mini", + "anthropic:claude-instant-1.2", + "google:gemini-1.5-flash-latest", + "github:gpt-4o-mini", + "client:gpt-4o-mini" + ] + }, + "vision": { + "model": "openai:gpt-4o", + "source": "default", + "candidates": [ + "openai:gpt-4o", + "azure_serverless:gpt-4o", + "anthropic:claude-2.1", + "google:gemini-1.5-flash-latest", + "github:gpt-4o" + ] + }, + "embeddings": { + "model": "openai:text-embedding-3-small", + "source": "default", + "candidates": [ + "openai:text-embedding-3-small", + "github:text-embedding-3-small" + ] + }, + "reasoning": { + "model": "openai:o1", + "source": "default", + "candidates": [ + "openai:o1", + "azure_serverless:o1-preview", + "github:o1-preview" + ] + }, + "reasoning_small": { + "model": "openai:o1-mini", + "source": "default", + "candidates": [ + "openai:o1-mini", + "azure_serverless:o1-mini", + "github:o1-mini" + ] + }, + "agent": { + "model": "large", + "source": "default" + }, + "long": { + "model": "large", + "source": "default" + }, + "memory": { + "model": "large", + "source": "default" + } +} --- title: Commands description: List of all CLI commands diff --git a/packages/cli/src/nodehost.ts b/packages/cli/src/nodehost.ts index b09c736e7..100f02d2c 100644 --- a/packages/cli/src/nodehost.ts +++ b/packages/cli/src/nodehost.ts @@ -17,27 +17,16 @@ import { parseTokenFromEnv, } from "../../core/src/connection" import { - DEFAULT_LARGE_MODEL, MODEL_PROVIDER_AZURE_OPENAI, SHELL_EXEC_TIMEOUT, - DEFAULT_EMBEDDINGS_MODEL, - DEFAULT_SMALL_MODEL, AZURE_COGNITIVE_SERVICES_TOKEN_SCOPES, MODEL_PROVIDER_AZURE_SERVERLESS_MODELS, AZURE_AI_INFERENCE_TOKEN_SCOPES, MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, DOT_ENV_FILENAME, - DEFAULT_VISION_MODEL, LARGE_MODEL_ID, SMALL_MODEL_ID, - DEFAULT_SMALL_MODEL_CANDIDATES, - DEFAULT_LARGE_MODEL_CANDIDATES, - DEFAULT_EMBEDDINGS_MODEL_CANDIDATES, - DEFAULT_VISION_MODEL_CANDIDATES, - DEFAULT_REASONING_MODEL, - DEFAULT_REASONING_SMALL_MODEL, - DEFAULT_REASONING_SMALL_MODEL_CANDIDATES, - DEFAULT_REASONING_MODEL_CANDIDATES, + VISION_MODEL_ID, } from "../../core/src/constants" import { tryReadText } from "../../core/src/fs" import { @@ -54,7 +43,7 @@ import { ModelConfiguration, } from "../../core/src/host" import { TraceOptions } from "../../core/src/trace" -import { logError, logVerbose } from "../../core/src/util" +import { deleteEmptyValues, logError, logVerbose } from "../../core/src/util" import { parseModelIdentifier } from "../../core/src/models" import { LanguageModel } from "../../core/src/chat" import { errorMessage, NotSupportedError } from "../../core/src/error" @@ -73,6 +62,7 @@ import { resolveGlobalConfiguration } from "../../core/src/config" import { HostConfiguration } from "../../core/src/hostconfiguration" import { resolveLanguageModel } from "../../core/src/lm" import { CancellationOptions } from "../../core/src/cancellation" +import LLMS from "../../../packages/core/src/llms.json" class NodeServerManager implements ServerManager { async start(): Promise { @@ -83,6 +73,43 @@ class NodeServerManager implements ServerManager { } } +function readModelAliases(): ModelConfigurations { + const aliases = [ + LARGE_MODEL_ID, + SMALL_MODEL_ID, + VISION_MODEL_ID, + "embeddings", + "reasoning", + "reasoning_small", + ] + const res = { + ...(Object.fromEntries( + aliases.map((alias) => [alias, readModelAlias(alias)]) + ) as ModelConfigurations), + ...Object.fromEntries( + Object.entries(LLMS.aliases).map((kv) => [ + kv[0], + { + model: kv[1], + source: "default", + } satisfies ModelConfiguration, + ]) + ), + } + return res + + function readModelAlias(alias: string) { + const candidates = Object.values(LLMS.providers) + .map(({ aliases }) => (aliases as Record)?.[alias]) + .filter((c) => !!c) + return deleteEmptyValues({ + model: candidates[0], + source: "default", + candidates, + }) + } +} + export class NodeHost implements RuntimeHost { private pulledModels: string[] = [] readonly dotEnvPath: string @@ -97,41 +124,7 @@ export class NodeHost implements RuntimeHost { "default" | "cli" | "env" | "config", Omit > = { - default: { - large: { - model: DEFAULT_LARGE_MODEL, - source: "default", - candidates: DEFAULT_LARGE_MODEL_CANDIDATES, - }, - small: { - model: DEFAULT_SMALL_MODEL, - source: "default", - candidates: DEFAULT_SMALL_MODEL_CANDIDATES, - }, - vision: { - model: DEFAULT_VISION_MODEL, - source: "default", - candidates: DEFAULT_VISION_MODEL_CANDIDATES, - }, - embeddings: { - model: DEFAULT_EMBEDDINGS_MODEL, - source: "default", - candidates: DEFAULT_EMBEDDINGS_MODEL_CANDIDATES, - }, - reasoning: { - model: DEFAULT_REASONING_MODEL, - source: "default", - candidates: DEFAULT_REASONING_MODEL_CANDIDATES, - }, - ["reasoning_small"]: { - model: DEFAULT_REASONING_SMALL_MODEL, - source: "default", - candidates: DEFAULT_REASONING_SMALL_MODEL_CANDIDATES, - }, - long: { model: LARGE_MODEL_ID, source: "default" }, - agent: { model: LARGE_MODEL_ID, source: "default" }, - memory: { model: SMALL_MODEL_ID, source: "default" }, - }, + default: readModelAliases(), cli: {}, env: {}, config: {}, diff --git a/packages/cli/src/parse.ts b/packages/cli/src/parse.ts index d2de135c4..97e4d15bd 100644 --- a/packages/cli/src/parse.ts +++ b/packages/cli/src/parse.ts @@ -16,7 +16,6 @@ import { YAMLParse, YAMLStringify } from "../../core/src/yaml" import { resolveTokenEncoder } from "../../core/src/encoders" import { CSV_REGEX, - DEFAULT_LARGE_MODEL, INI_REGEX, JSON5_REGEX, MD_REGEX, @@ -203,7 +202,7 @@ export async function parseTokens( filesGlobs: string[], options: { excludedFiles: string[]; model: string } ) { - const { model = DEFAULT_LARGE_MODEL } = options || {} + const { model } = options || {} const { encode: encoder } = await resolveTokenEncoder(model) const files = await expandFiles(filesGlobs, options?.excludedFiles) diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index 37e651533..79bcbea21 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -57,56 +57,6 @@ export const SMALL_MODEL_ID = "small" export const LARGE_MODEL_ID = "large" export const VISION_MODEL_ID = "vision" export const DEFAULT_FENCE_FORMAT: FenceFormat = "xml" -export const DEFAULT_LARGE_MODEL = "openai:gpt-4o" -export const DEFAULT_LARGE_MODEL_CANDIDATES = [ - "azure_serverless:gpt-4o", - DEFAULT_LARGE_MODEL, - "google:gemini-1.5-pro-latest", - "anthropic:claude-2.1", - "mistral:mistral-large-latest", - "github:gpt-4o", - "client:gpt-4", -] -export const DEFAULT_VISION_MODEL = "openai:gpt-4o" -export const DEFAULT_VISION_MODEL_CANDIDATES = [ - "azure_serverless:gpt-4o", - DEFAULT_VISION_MODEL, - "google:gemini-1.5-flash-latest", - "anthropic:claude-2.1", - "github:gpt-4o", -] -export const DEFAULT_SMALL_MODEL = "openai:gpt-4o-mini" -export const DEFAULT_SMALL_MODEL_CANDIDATES = [ - "azure_serverless:gpt-4o-mini", - DEFAULT_SMALL_MODEL, - "google:gemini-1.5-flash-latest", - "anthropic:claude-instant-1.2", - "mistral:mistral-small-latest", - "github:gpt-4o-mini", - "client:gpt-4-mini", -] -export const DEFAULT_EMBEDDINGS_MODEL_CANDIDATES = [ - "azure:text-embedding-3-small", - "azure:text-embedding-2-small", - "openai:text-embedding-3-small", - "github:text-embedding-3-small", - "client:text-embedding-3-small", -] -export const DEFAULT_REASONING_SMALL_MODEL = "openai:o1-mini" -export const DEFAULT_REASONING_SMALL_MODEL_CANDIDATES = [ - "azure_serverless:o1-mini", - DEFAULT_REASONING_SMALL_MODEL, - "github:o1-mini", - "client:o1-mini", -] -export const DEFAULT_REASONING_MODEL = "openai:o1" -export const DEFAULT_REASONING_MODEL_CANDIDATES = [ - "azure_serverless:o1-preview", - DEFAULT_REASONING_MODEL, - "github:o1-preview", - "client:o1-preview", -] -export const DEFAULT_EMBEDDINGS_MODEL = "openai:text-embedding-ada-002" export const DEFAULT_TEMPERATURE = 0.8 export const BUILTIN_PREFIX = "_builtin/" export const CACHE_LLMREQUEST_PREFIX = "genaiscript/cache/llm/" diff --git a/packages/core/src/llms.json b/packages/core/src/llms.json index 4d4c16974..f679be40e 100644 --- a/packages/core/src/llms.json +++ b/packages/core/src/llms.json @@ -4,16 +4,15 @@ { "id": "openai", "detail": "OpenAI (or compatible)", - "bearerToken": true - }, - { - "id": "github", - "detail": "GitHub Models", - "logprobs": false, - "topLogprobs": false, - "limitations": "Smaller context windows, and rate limiting", - "prediction": false, - "bearerToken": true + "bearerToken": true, + "aliases": { + "large": "openai:gpt-4o", + "small": "openai:gpt-4o-mini", + "vision": "openai:gpt-4o", + "embeddings": "openai:text-embedding-3-small", + "reasoning": "openai:o1", + "reasoning_small": "openai:o1-mini" + } }, { "id": "azure", @@ -23,7 +22,14 @@ { "id": "azure_serverless", "detail": "Azure AI OpenAI (serverless deployments)", - "bearerToken": false + "bearerToken": false, + "aliases": { + "large": "azure_serverless:gpt-4o", + "small": "azure_serverless:gpt-4o-mini", + "vision": "azure_serverless:gpt-4o", + "reasoning": "azure_serverless:o1-preview", + "reasoning_small": "azure_serverless:o1-mini" + } }, { "id": "azure_serverless_models", @@ -36,7 +42,12 @@ "detail": "Anthropic models", "logprobs": false, "topLogprobs": false, - "prediction": false + "prediction": false, + "aliases": { + "large": "anthropic:claude-2.1", + "small": "anthropic:claude-instant-1.2", + "vision": "anthropic:claude-2.1" + } }, { "id": "google", @@ -45,7 +56,12 @@ "tools": false, "openaiCompatibility": "https://ai.google.dev/gemini-api/docs/openai", "prediction": false, - "bearerToken": true + "bearerToken": true, + "aliases": { + "large": "google:gemini-1.5-pro-latest", + "small": "google:gemini-1.5-flash-latest", + "vision": "google:gemini-1.5-flash-latest" + } }, { "id": "huggingface", @@ -58,6 +74,31 @@ "prediction": false, "bearerToken": true }, + { + "id": "github", + "detail": "GitHub Models", + "logprobs": false, + "topLogprobs": false, + "limitations": "Smaller context windows, and rate limiting", + "prediction": false, + "bearerToken": true, + "aliases": { + "large": "github:gpt-4o", + "small": "github:gpt-4o-mini", + "vision": "github:gpt-4o", + "embeddings": "github:text-embedding-3-small", + "reasoning": "github:o1-preview", + "reasoning_small": "github:o1-mini" + } + }, + { + "id": "client", + "detail": "GitHub Copilot Chat Modes", + "aliases": { + "large": "client:gpt-4o", + "small": "client:gpt-4o-mini" + } + }, { "id": "transformers", "detail": "Hugging Face Transformers", @@ -99,6 +140,11 @@ "prediction": false } ], + "aliases": { + "agent": "large", + "long": "large", + "memory": "large" + }, "pricings": { "openai:gpt-4o": { "price_per_million_input_tokens": 2.5, diff --git a/packages/core/src/util.ts b/packages/core/src/util.ts index dbd9aef94..a58b11073 100644 --- a/packages/core/src/util.ts +++ b/packages/core/src/util.ts @@ -66,6 +66,20 @@ export function deleteUndefinedValues>(o: T): T { return o } +export function deleteEmptyValues>(o: T): T { + for (const k in o) { + const v = o[k] + if ( + v === undefined || + v === null || + v === "" || + (Array.isArray(v) && !v.length) + ) + delete o[k] + } + return o +} + export function collapseEmptyLines(text: string) { return text?.replace(/(\r?\n){2,}/g, "\n\n") }