Skip to content

Commit

Permalink
Prompt reorg for caching + usage collection (#743)
Browse files Browse the repository at this point in the history
* Optimize chat caching by repositioning definitions in prompt structure.

* Update node sorting logic to include "image" type in layoutPromptNode function

* Add ephemeral property to PromptNode and update sorting logic

* Fix typo and add prompt caching section to context.md

* Add chat usage tracking and refactor session handling

* Update token usage logging and fix token accumulation logic in chat processing

* Add ChatCompletionUsages to GenerationOptions and refactor usage handling

* Update loop to iterate over 'usages' instead of 'result.usages'

* Update log format for token usage details in CLI output
  • Loading branch information
pelikhan authored Oct 1, 2024
1 parent 820ca82 commit a64b1be
Show file tree
Hide file tree
Showing 28 changed files with 243 additions and 47 deletions.
5 changes: 5 additions & 0 deletions docs/genaisrc/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 10 additions & 1 deletion docs/src/content/docs/reference/scripts/context.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def("DIFF", gitdiff, { language: "diff" })
### Referencing

The `def` function returns a variable name that can be used in the prompt.
The name might be formatted diferently to accommodate the model's preference.
The name might be formatted differently to accommodate the model's preference.

```js "const f = "
const f = def("FILE", file)
Expand Down Expand Up @@ -182,6 +182,15 @@ def("FILE", env.files, { sliceTail: 100 })
def("FILE", env.files, { sliceSample: 100 })
```
### Prompt Caching
You can specify `ephemeral: true` to turn on some prompt caching optimization. In paricular, a `def` with `ephemeral` will be rendered at the back of the prompt
to persist the [cache prefix](https://openai.com/index/api-prompt-caching/).
```js
def("FILE", env.files, { ephemeral: true })
```
## Data definition (`defData`)
The `defData` function offers additional formatting options for converting a data object into a textual representation. It supports rendering objects as YAML, JSON, or CSV (formatted as a markdown table).
Expand Down
5 changes: 5 additions & 0 deletions genaisrc/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions packages/auto/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 14 additions & 5 deletions packages/cli/src/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ import { convertDiagnosticsToSARIF } from "./sarif"
import { buildProject } from "./build"
import { diagnosticsToCSV } from "../../core/src/ast"
import { CancellationOptions } from "../../core/src/cancellation"
import { ChatCompletionsProgressReport } from "../../core/src/chattypes"
import {
ChatCompletionsProgressReport,
ChatCompletionUsages,
} from "../../core/src/chattypes"
import { runTemplate } from "../../core/src/promptrunner"
import {
githubCreateIssueComment,
Expand Down Expand Up @@ -244,7 +247,7 @@ export async function runScript(
(acc, v) => ({ ...acc, ...parseKeyValuePair(v) }),
{}
)
let tokens = 0
const usages: ChatCompletionUsages = {}
try {
if (options.label) trace.heading(2, options.label)
const { info } = await resolveModelConnectionInfo(script, {
Expand All @@ -262,6 +265,7 @@ export async function runScript(
trace.options.encoder = await resolveTokenEncoder(info.model)
await runtimeHost.models.pullModel(info.model)
result = await runTemplate(prj, script, fragment, {
usages,
inner: false,
infoCb: (args) => {
const { text } = args
Expand All @@ -272,7 +276,6 @@ export async function runScript(
},
partialCb: (args) => {
const { responseChunk, tokensSoFar, inner } = args
tokens = tokensSoFar
if (responseChunk !== undefined) {
if (stream) {
if (!inner) process.stdout.write(responseChunk)
Expand Down Expand Up @@ -523,7 +526,13 @@ export async function runScript(
if (failOnErrors && result.annotations?.some((a) => a.severity === "error"))
return fail("error annotations found", ANNOTATION_ERROR_CODE)

logVerbose("genaiscript: done\n")
if (outTraceFilename) logVerbose(`trace: ${outTraceFilename}`)
logVerbose("genaiscript: done")
for (const [key, value] of Object.entries(usages)) {
if (value.total_tokens > 0)
logVerbose(
`tokens: ${key}, ${value.total_tokens} (${value.prompt_tokens} => ${value.completion_tokens})`
)
}
if (outTraceFilename) logVerbose(` trace: ${outTraceFilename}`)
return { exitCode: 0, result }
}
81 changes: 54 additions & 27 deletions packages/core/src/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import {
ChatCompletionResponse,
ChatCompletionsOptions,
ChatCompletionTool,
ChatCompletionUsage,
ChatCompletionUsages,
ChatCompletionUserMessageParam,
CreateChatCompletionRequest,
} from "./chattypes"
Expand Down Expand Up @@ -369,7 +371,7 @@ function structurifyChatSession(
err?: any
}
): RunPromptResult {
const { trace, responseType, responseSchema } = options
const { trace, responseType, responseSchema, usages } = options
const { resp, err } = others || {}
const text = assistantText(messages, responseType)
const annotations = parseAnnotations(text)
Expand Down Expand Up @@ -426,10 +428,12 @@ function structurifyChatSession(
error,
genVars,
schemas,
usages,
}
}

async function processChatMessage(
req: CreateChatCompletionRequest,
resp: ChatCompletionResponse,
messages: ChatCompletionMessageParam[],
tools: ToolCallback[],
Expand All @@ -443,8 +447,11 @@ async function processChatMessage(
maxToolCalls = MAX_TOOL_CALLS,
trace,
cancellationToken,
usages,
} = options

accumulateChatUsage(usages, req.model, resp.usage)

if (resp.text)
messages.push({
role: "assistant",
Expand Down Expand Up @@ -534,11 +541,29 @@ export function mergeGenerationOptions(
}
}

function accumulateChatUsage(
usages: ChatCompletionUsages,
model: string,
usage: ChatCompletionUsage
) {
if (!usage) return

const u =
usages[model] ??
(usages[model] = <ChatCompletionUsage>{
completion_tokens: 0,
prompt_tokens: 0,
total_tokens: 0,
})
u.completion_tokens += usage.completion_tokens ?? 0
u.prompt_tokens += usage.prompt_tokens ?? 0
u.total_tokens += usage.total_tokens ?? 0
}

export async function executeChatSession(
connectionToken: LanguageModelConfiguration,
cancellationToken: CancellationToken,
messages: ChatCompletionMessageParam[],
vars: Partial<ExpansionVariables>,
toolDefinitions: ToolCallback[],
schemas: Record<string, JSONSchema>,
completer: ChatCompletionHandler,
Expand Down Expand Up @@ -585,34 +610,35 @@ export async function executeChatSession(
let resp: ChatCompletionResponse
try {
checkCancelled(cancellationToken)
const req: CreateChatCompletionRequest = {
model,
temperature: temperature,
top_p: topP,
max_tokens: maxTokens,
seed,
stream: true,
messages,
tools,
response_format:
responseType === "json_object"
? { type: responseType }
: responseType === "json_schema"
? {
type: "json_schema",
json_schema: {
name: "result",
schema: toStrictJSONSchema(
responseSchema
),
strict: true,
},
}
: undefined,
}
try {
trace.startDetails(`📤 llm request`)
resp = await completer(
{
model,
temperature: temperature,
top_p: topP,
max_tokens: maxTokens,
seed,
stream: true,
messages,
tools,
response_format:
responseType === "json_object"
? { type: responseType }
: responseType === "json_schema"
? {
type: "json_schema",
json_schema: {
name: "result",
schema: toStrictJSONSchema(
responseSchema
),
strict: true,
},
}
: undefined,
},
req,
connectionToken,
genOptions,
trace
Expand All @@ -625,6 +651,7 @@ export async function executeChatSession(
}

const output = await processChatMessage(
req,
resp,
messages,
toolDefinitions,
Expand Down
10 changes: 10 additions & 0 deletions packages/core/src/chattypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@ export interface AICIRequest {
}

// Aliases for OpenAI chat completion types
export type ChatCompletionUsage = Omit<
OpenAI.Completions.CompletionUsage,
"completion_tokens_details"
>

/**
* Per model storage of chat completion usages.
*/
export type ChatCompletionUsages = Record<string, ChatCompletionUsage>

// Text content part of a chat completion
export type ChatCompletionContentPartText =
Expand Down Expand Up @@ -99,6 +108,7 @@ export interface ChatCompletionResponse {
toolCalls?: ChatCompletionToolCall[] // List of tool calls made during the response
finishReason?: // Reason why the chat completion finished
"stop" | "length" | "tool_calls" | "content_filter" | "cancel" | "fail"
usage?: ChatCompletionUsage // Usage information for the completion
}

// Alias for OpenAI's API error type
Expand Down
5 changes: 5 additions & 0 deletions packages/core/src/genaisrc/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion packages/core/src/generation.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
// Import necessary modules and interfaces
import { CancellationToken } from "./cancellation"
import { LanguageModel } from "./chat"
import { ChatCompletionMessageParam, ChatCompletionsOptions } from "./chattypes"
import {
ChatCompletionMessageParam,
ChatCompletionsOptions,
ChatCompletionUsages,
} from "./chattypes"
import { MarkdownTrace } from "./trace"

// Represents a code fragment with associated files
Expand Down Expand Up @@ -56,6 +60,11 @@ export interface GenerationResult extends GenerationOutput {
*/
finishReason?: string

/**
* Token usage statistics if reported by LLM
*/
usages?: ChatCompletionUsages

/**
* Optional label for the run
*/
Expand Down Expand Up @@ -96,4 +105,5 @@ export interface GenerationOptions
}
vars?: PromptParameters // Variables for prompt customization
stats: GenerationStats // Statistics of the generation
usages: ChatCompletionUsages
}
Loading

0 comments on commit a64b1be

Please sign in to comment.