-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Allow bot to analyze / generate image (#2)
Also: * Fix incorrect message order if run generates more than 1 * Include instructions, model and tools for each Assistant run * Convert markdown to safe HTML for Telegram * Use `zod` to ensure params schema * Mask file URL for security
- Loading branch information
1 parent
97183d4
commit 6c84021
Showing
15 changed files
with
611 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import { generateSchema } from "@anatine/zod-openapi"; | ||
import { FunctionParameters } from "openai/resources"; | ||
import { RunCreateParams } from "openai/resources/beta/threads/runs/runs"; | ||
import { z } from "zod"; | ||
|
||
export const analyzeImageParameters = z.object({ | ||
image_url: z.string({ | ||
description: "The image URL.", | ||
}), | ||
prompt: z.string({ | ||
description: "The prompt to ask the Vision AI model to analyze.", | ||
}), | ||
temperature: z.number({ | ||
description: | ||
"What sampling temperature to use, between 0 and 2. " + | ||
"Higher values like 0.8 will make the output more random, " + | ||
"while lower values like 0.2 will make it more focused and deterministic.", | ||
}), | ||
}); | ||
|
||
export const analyzeImage: RunCreateParams.AssistantToolsFunction = { | ||
function: { | ||
description: "Analyze an image.", | ||
name: "analyze_image", | ||
parameters: generateSchema(analyzeImageParameters) as FunctionParameters, | ||
}, | ||
type: "function", | ||
}; | ||
|
||
export const generateImageParameters = z.object({ | ||
prompt: z.string({ | ||
description: "The prompt to ask the Vision AI model to generate.", | ||
}), | ||
size: z.enum(["1024x1024", "1792x1024", "1024x1792"], { | ||
description: "The size of the generated images.", | ||
}), | ||
}); | ||
|
||
export const generateImage: RunCreateParams.AssistantToolsFunction = { | ||
function: { | ||
description: "Generate an image.", | ||
name: "generate_image", | ||
parameters: generateSchema(generateImageParameters) as FunctionParameters, | ||
}, | ||
type: "function", | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import { generateSchema } from "@anatine/zod-openapi"; | ||
import { FunctionParameters } from "openai/resources"; | ||
import { RunCreateParams } from "openai/resources/beta/threads/runs/runs"; | ||
import { z } from "zod"; | ||
|
||
export const newThread: RunCreateParams.AssistantToolsFunction = { | ||
function: { | ||
description: "Discard the recent messages and start a new thread.", | ||
name: "new_thread", | ||
parameters: { | ||
type: "object", | ||
properties: {}, | ||
required: [], | ||
}, | ||
}, | ||
type: "function", | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import { | ||
ChatCompletionCreateParamsNonStreaming, | ||
ImageGenerateParams, | ||
} from "openai/resources"; | ||
|
||
import { openai } from "./openai"; | ||
import { AssistantThreadInput } from "./assistant_thread"; | ||
|
||
export async function visionAnalyzeImage( | ||
input: AssistantThreadInput, | ||
{ | ||
image_url, | ||
prompt, | ||
temperature, | ||
}: { | ||
image_url: string; | ||
prompt: string; | ||
temperature: number; | ||
} | ||
): Promise<string> { | ||
let url = image_url; | ||
|
||
const unmaskedUrl = await input.chat.unmaskFileUrl(image_url); | ||
if (typeof unmaskedUrl === "string") { | ||
url = unmaskedUrl; | ||
} | ||
|
||
const body: ChatCompletionCreateParamsNonStreaming = { | ||
messages: [ | ||
{ | ||
content: [ | ||
{ type: "text", text: prompt }, | ||
{ type: "image_url", image_url: { url } }, | ||
], | ||
role: "user", | ||
}, | ||
], | ||
model: "gpt-4-vision-preview", | ||
max_tokens: 1024, | ||
temperature, | ||
}; | ||
console.log(JSON.stringify(body, null, 2)); | ||
const completion = await openai.chat.completions.create(body); | ||
console.log(JSON.stringify(completion, null, 2)); | ||
return completion.choices[0].message.content ?? ""; | ||
} | ||
|
||
export async function visionGenerateImage({ | ||
prompt, | ||
size, | ||
}: { | ||
prompt: string; | ||
size: ImageGenerateParams["size"]; | ||
}): Promise<{ caption: string; url: string } | undefined> { | ||
const body: ImageGenerateParams = { | ||
prompt, | ||
model: "dall-e-3", | ||
response_format: "url", | ||
size, | ||
}; | ||
console.log(JSON.stringify(body, null, 2)); | ||
const completion = await openai.images.generate(body); | ||
console.log(JSON.stringify(completion, null, 2)); | ||
const image = completion.data[0]; | ||
if (typeof image === "object") { | ||
return { | ||
caption: image.revised_prompt ?? prompt, | ||
url: image.url!, | ||
}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import { marked } from "marked"; | ||
import sanitizeHtml from "sanitize-html"; | ||
|
||
export function convertMarkdownToSafeHtml(markdown: string): string { | ||
const html = marked.parse(markdown); | ||
return sanitizeHtml(html, { | ||
allowedAttributes: { | ||
code: ["class"], | ||
}, | ||
allowedTags: [ | ||
// https://core.telegram.org/bots/api#formatting-options | ||
// <b>bold</b>, <strong>bold</strong> | ||
"b", | ||
"strong", | ||
// <i>italic</i>, <em>italic</em> | ||
"i", | ||
"em", | ||
// <u>underline</u>, <ins>underline</ins> | ||
"u", | ||
"ins", | ||
// <s>strikethrough</s>, <strike>strikethrough</strike>, <del>strikethrough</del> | ||
"s", | ||
"strike", | ||
"del", | ||
// <span class="tg-spoiler">spoiler</span>, <tg-spoiler>spoiler</tg-spoiler> | ||
"span", | ||
"tg-spoiler", | ||
// <a href="http://www.example.com/">inline URL</a> | ||
// <a href="tg://user?id=123456789">inline mention of a user</a> | ||
"a", | ||
// <tg-emoji emoji-id="5368324170671202286">👍</tg-emoji> | ||
"tg-emoji", | ||
// <code>inline fixed-width code</code> | ||
"code", | ||
// <pre>pre-formatted fixed-width code block</pre> | ||
"pre", | ||
], | ||
}); | ||
} |
Oops, something went wrong.