Add model alias configuration support and documentation (#941)

* feat: 🔧 add model alias configuration support * feat: 🎉 add model aliases support and documentation
microsoft · Dec 13, 2024 · 4aa8f89 · 4aa8f89
1 parent 5b73ef2
commit 4aa8f89
Show file tree

Hide file tree

Showing 10 changed files with 155 additions and 25 deletions.
diff --git a/docs/src/content/docs/getting-started/configuration.mdx b/docs/src/content/docs/getting-started/configuration.mdx
@@ -37,7 +37,7 @@ script({
 
 ### Large, small, vision models
 
-You can also use the `small`, `large`, `vision` aliases to use the default configured small, large and vision-enabled models.
+You can also use the `small`, `large`, `vision` [model aliases](/genaiscript/reference/scripts/model-aliases) to use the default configured small, large and vision-enabled models.
 Large models are typically in the OpenAI gpt-4 reasoning range and can be used for more complex tasks.
 Small models are in the OpenAI gpt-4o-mini range, and are useful for quick and simple tasks.
 
@@ -49,13 +49,14 @@ script({ model: "small" })
 script({ model: "large" })
 ```
 
-The model can also be overridden from the [cli run command](/genaiscript/reference/cli/run#model)
+The model aliases can also be overridden from the [cli run command](/genaiscript/reference/cli/run#model), 
+or environment variables or configuration file. [Learn more about model aliases](/genaiscript/reference/scripts/model-aliases).
 
 ```sh
 genaiscript run ... --model largemodelid --small-model smallmodelid
 ```
 
-or by adding the `GENAISCRIPT_LARGE_MODEL` and `GENAISCRIPT_SMALL_MODEL` environment variables.
+or by adding the `GENAISCRIPT_MODEL_LARGE` and `GENAISCRIPT_MODEL_SMALL` environment variables.
 
 ```txt title=".env"
 GENAISCRIPT_MODEL_LARGE="azure_serverless:..."

diff --git a/docs/src/content/docs/reference/scripts/model-aliases.mdx b/docs/src/content/docs/reference/scripts/model-aliases.mdx
@@ -0,0 +1,70 @@
+---
+title: Model Aliases
+description: Give friendly names to models
+sidebar:
+    order: 60
+---
+
+You can define **model aliases** in your project to give friendly names to models and abstract away from a particular model version/tag.
+
+So instead of hard-coding a model type,
+
+```js 'model: "openai:gpt-4o"'
+script({
+    model: "openai:gpt-4o",
+})
+```
+
+You can use/define an alias like `large`.
+
+```js 'model: "large"'
+script({
+    model: "large",
+})
+```
+
+Model aliases can be defined as environment varialbles (through the `.env` file),
+in a configuration file or through the [cli](/genaiscript/reference/cli/run).
+
+This `.env` file defines a `llama32` alias for the `ollama:llama3.2:1b` model.
+
+```txt title=".env"
+GENAISCRIPT_MODEL_LLAMA32="ollama:llama3.2:1b"
+```
+
+You can then use the `llama32` alias in your scripts.
+
+```js 'model: "llama32"'
+script({
+    model: "llama32",
+})
+```
+
+## Defining aliases
+
+The following configuration are support in order importance (last one wins):
+
+- [configuration file](/genaiscript/reference/configuration-files) with the `modelAliases` field
+
+```json title="genaiscript.config.js"
+{
+    "modelAliases": {
+        "llama32": "ollama:llama3.2:1b"
+    }
+}
+```
+
+- environment variables with keys of the pattern `GENAISCRIPT_MODEL_ALIAS=...`
+- [cli](/genaiscript/reference/cli/run) with the `--model-alias` flag
+
+```sh
+genaiscript run --model-alias llama32=ollama:llama3.2:1b
+```
+
+## Builtin aliases
+
+By default, GenAIScript supports the following model aliases:
+
+- `large`: `gpt-4o like` model
+- `small`: `gpt-4o-mini` model or similar. A smaller, cheaper faster model
+- `vision`: `gpt-4o-mini`. A model that can analyze images
diff --git a/packages/cli/src/nodehost.ts b/packages/cli/src/nodehost.ts
@@ -45,6 +45,7 @@ import {
     ResponseStatus,
     AzureTokenResolver,
     ModelConfigurations,
+    ModelConfiguration,
 } from "../../core/src/host"
 import { AbortSignalOptions, TraceOptions } from "../../core/src/trace"
 import { logError, logVerbose } from "../../core/src/util"
@@ -144,11 +145,19 @@ export class NodeHost implements RuntimeHost {
     readonly workspace = createFileSystem()
     readonly containers = new DockerManager()
     readonly browsers = new BrowserManager()
-    readonly modelAliases: ModelConfigurations = {
-        large: { model: DEFAULT_MODEL },
-        small: { model: DEFAULT_SMALL_MODEL },
-        vision: { model: DEFAULT_VISION_MODEL },
-        embeddings: { model: DEFAULT_EMBEDDINGS_MODEL },
+    private readonly _modelAliases: Record<
+        "default" | "cli" | "env" | "config",
+        Omit<ModelConfigurations, "large" | "small" | "vision" | "embeddings">
+    > = {
+        default: {
+            large: { model: DEFAULT_MODEL },
+            small: { model: DEFAULT_SMALL_MODEL },
+            vision: { model: DEFAULT_VISION_MODEL },
+            embeddings: { model: DEFAULT_EMBEDDINGS_MODEL },
+        },
+        cli: {},
+        env: {},
+        config: {},
     }
     readonly userInputQueue = new PLimitPromiseQueue(1)
     readonly azureToken: AzureTokenResolver
@@ -168,9 +177,32 @@ export class NodeHost implements RuntimeHost {
         )
     }
 
+    get modelAliases(): Readonly<ModelConfigurations> {
+        const res = {
+            ...this._modelAliases.default,
+            ...this._modelAliases.config,
+            ...this._modelAliases.env,
+            ...this._modelAliases.cli,
+        } as ModelConfigurations
+        return Object.freeze(res)
+    }
+
+    setModelAlias(
+        source: "cli" | "env" | "config",
+        id: string,
+        value: string | ModelConfiguration
+    ): void {
+        id = id.toLowerCase()
+        if (typeof value === "string") value = { model: value }
+        const aliases = this._modelAliases[source]
+        const c = aliases[id] || (aliases[id] = {})
+        c.model = value.model
+        c.temperature = value.temperature
+    }
+
     async readConfig(): Promise<HostConfiguration> {
         const config = await resolveGlobalConfiguration(this.dotEnvPath)
-        const { envFile } = config
+        const { envFile, modelAliases } = config
         if (existsSync(envFile)) {
             if (resolve(envFile) !== resolve(DOT_ENV_FILENAME))
                 logVerbose(`.env: loading ${envFile}`)
@@ -182,6 +214,9 @@ export class NodeHost implements RuntimeHost {
             if (res.error) throw res.error
         }
         await parseDefaultsFromEnv(process.env)
+        if (modelAliases)
+            for (const kv of Object.entries(modelAliases))
+                this.setModelAlias("config", kv[0], kv[1])
         return config
     }
 

diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts
@@ -87,7 +87,6 @@ import {
     stderr,
     stdout,
 } from "../../core/src/logging"
-import { setModelAlias } from "../../core/src/connection"
 
 async function setupTraceWriting(trace: MarkdownTrace, filename: string) {
     logVerbose(`  trace: ${filename}`)
@@ -221,7 +220,7 @@ export async function runScriptInternal(
     for (const kv of options.modelAlias || []) {
         const aliases = parseKeyValuePair(kv)
         for (const [key, value] of Object.entries(aliases))
-            setModelAlias(key, value)
+            runtimeHost.setModelAlias("cli", key, value)
     }
 
     const fail = (msg: string, exitCode: number, url?: string) => {

diff --git a/packages/core/src/connection.ts b/packages/core/src/connection.ts
@@ -73,26 +73,19 @@ export function findEnvVar(
     return undefined
 }
 
-export function setModelAlias(id: string, modelid: string) {
-    id = id.toLowerCase()
-    const c =
-        runtimeHost.modelAliases[id] || (runtimeHost.modelAliases[id] = {})
-    c.model = modelid
-}
-
 export async function parseDefaultsFromEnv(env: Record<string, string>) {
     // legacy
     if (env.GENAISCRIPT_DEFAULT_MODEL)
         runtimeHost.modelAliases.large.model = env.GENAISCRIPT_DEFAULT_MODEL
 
     const rx =
-        /^GENAISCRIPT(_DEFAULT)?_((?<id>[A-Z0-9]+)_MODEL|MODEL_(?<id2>[A-Z0-9]+))$/i
+        /^GENAISCRIPT(_DEFAULT)?_((?<id>[A-Z0-9_\-]+)_MODEL|MODEL_(?<id2>[A-Z0-9_\-]+))$/i
     for (const kv of Object.entries(env)) {
         const [k, v] = kv
         const m = rx.exec(k)
         if (!m) continue
         const id = m.groups.id || m.groups.id2
-        setModelAlias(id, v)
+        runtimeHost.setModelAlias("env", id, v)
     }
     const t = normalizeFloat(env.GENAISCRIPT_DEFAULT_TEMPERATURE)
     if (!isNaN(t)) runtimeHost.modelAliases.large.temperature = t

diff --git a/packages/core/src/host.ts b/packages/core/src/host.ts
@@ -166,7 +166,9 @@ export interface RuntimeHost extends Host {
     models: ModelService
     workspace: Omit<WorkspaceFileSystem, "grep">
     azureToken: AzureTokenResolver
-    modelAliases: ModelConfigurations
+    modelAliases: Readonly<ModelConfigurations>
+
+    setModelAlias(source: "env" | "cli" | "config", id: string, value: string | ModelConfiguration): void
 
     readConfig(): Promise<HostConfiguration>
     readSecret(name: string): Promise<string | undefined>

diff --git a/packages/core/src/hostconfiguration.ts b/packages/core/src/hostconfiguration.ts
@@ -1,3 +1,5 @@
+import { ModelConfiguration } from "./host"
+
 /**
  * Schema for a global configuration file
  */
@@ -11,4 +13,9 @@ export interface HostConfiguration {
      * List of glob paths to scan for genai scripts
      */
     include?: string[]
+
+    /**
+     * Configures a list of known aliases. Overriden by environment variables and CLI arguments
+     */
+    modelAliases?: Record<string, string | ModelConfiguration>
 }
diff --git a/packages/core/src/models.ts b/packages/core/src/models.ts
@@ -13,7 +13,7 @@ import {
 import { errorMessage } from "./error"
 import { LanguageModelConfiguration, host, runtimeHost } from "./host"
 import { AbortSignalOptions, MarkdownTrace, TraceOptions } from "./trace"
-import { arrayify, assert, logVerbose } from "./util"
+import { arrayify, assert, logVerbose, toStringList } from "./util"
 
 /**
  * model
@@ -95,7 +95,15 @@ export function traceLanguageModelConnection(
 
         trace.startDetails(`🔗 model aliases`)
         Object.entries(runtimeHost.modelAliases).forEach(([key, value]) =>
-            trace.itemValue(key, value.model)
+            trace.itemValue(
+                key,
+                toStringList(
+                    value.model,
+                    isNaN(value.temperature)
+                        ? undefined
+                        : `temperature: ${value.temperature}`
+                )
+            )
         )
         trace.endDetails()
     } finally {

diff --git a/packages/core/src/testhost.ts b/packages/core/src/testhost.ts
@@ -16,6 +16,7 @@ import {
     RuntimeHost,
     AzureTokenResolver,
     ModelConfigurations,
+    ModelConfiguration,
 } from "./host"
 import { TraceOptions } from "./trace"
 import {
@@ -83,6 +84,14 @@ export class TestHost implements RuntimeHost {
         setRuntimeHost(new TestHost())
     }
 
+    setModelAlias(
+        source: "cli" | "env" | "config",
+        id: string,
+        value: string | ModelConfiguration
+    ): void {
+        if (typeof value === "string") value = { model: value }
+        this.modelAliases[id] = value
+    }
     async readConfig() {
         return {}
     }

diff --git a/packages/sample/genaiscript.config.json b/packages/sample/genaiscript.config.json
@@ -1,4 +1,10 @@
 {
-    "$schema": "../../core/src/schemas/hostconfiguration.schema.json",
-    "include": ["../../genaisrc/*.genai.mts"]
+    "include": ["../../genaisrc/*.genai.mts"],
+    "modelAliases": {
+        "llama32": "ollama:llama3.2:1b",
+        "llama32hot": {
+            "model": "ollama:llama3.2:1b",
+            "temperature": 2
+        }
+    }
 }