Skip to content

Commit

Permalink
Add support for voice messages (#11)
Browse files Browse the repository at this point in the history
Also:

* Apply OOP for Chat, User objects
* Improve logging
* Re-use system message id
  • Loading branch information
daohoangson authored Nov 21, 2023
1 parent 6d1980d commit 7486b25
Show file tree
Hide file tree
Showing 14 changed files with 388 additions and 221 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@

## Roadmap

- [x] Integrate with Assistants API
- [x] Use GPT-4 Turbo model for conversation
- [x] Integrate with [Assistants API](https://platform.openai.com/docs/assistants/overview)
- [x] Use [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) for conversation
- [x] Use Vision Preview model for image analysis
- [x] Use DALL-E 3 model for image generation
- [x] Use [DALL-E 3](https://platform.openai.com/docs/models/dall-e) for image generation
- [x] Use [whisper-1](https://platform.openai.com/docs/models/whisper) for speech to text transcription
- [x] Use [tts-1](https://platform.openai.com/docs/models/tts) for text to speech synthesis
- [x] Add support for memory recall
- [ ] Allow bot to search for old messages, and maybe continue previous conversation
- [ ] Add support for reminders
Expand Down
1 change: 1 addition & 0 deletions packages/core/src/interfaces/ai/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export * from "./agent";
export * from "./speech";
export * from "./tool";
9 changes: 9 additions & 0 deletions packages/core/src/interfaces/ai/speech.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
export type SpeechData = {
blob: () => Promise<Blob>;
url: string;
};

export type Speech = {
fromText(text: string): Promise<SpeechData>;
toText(input: SpeechData): Promise<string>;
};
2 changes: 1 addition & 1 deletion packages/core/src/interfaces/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export type ChatPhoto = Chat & {
};

export type ChatText = Chat & {
getTextMessage: () => string;
getTextMessage: () => Promise<string>;
};

export type Reply =
Expand Down
7 changes: 4 additions & 3 deletions packages/functions/src/handlers/telegram-webhook-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { Config } from "sst/node/config";
import { kv } from "@bubby/aws";
import { AppContext } from "@bubby/core/interfaces/app";
import { ChatPhoto, ChatText } from "@bubby/core/interfaces/chat";
import { agent } from "@bubby/openai";
import { agent, speech } from "@bubby/openai";
import { onMessage } from "@bubby/telegram";
import { tools } from "src/tools";

Expand All @@ -18,6 +18,7 @@ export async function handleTelegramWebhook(secretToken: string, update: any) {
await onMessage({
onPhoto: (input) => replyToPhoto({ ...input, kv }),
onText: (input) => replyToText({ ...input, kv }),
speech,
update,
});
}
Expand All @@ -30,8 +31,8 @@ function replyToPhoto(ctx: AppContext<ChatPhoto>): Promise<void> {
return respond(ctx, message);
}

const replyToText = (ctx: AppContext<ChatText>) =>
respond(ctx, ctx.chat.getTextMessage());
const replyToText = async (ctx: AppContext<ChatText>) =>
respond(ctx, await ctx.chat.getTextMessage());

const respond = (ctx: AppContext, message: string) =>
agent.respond({ ctx, message, tools });
2 changes: 1 addition & 1 deletion packages/functions/src/tools/image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export const analyzeImage: Tool<z.infer<typeof analyzeImageParameters>> = {
description: "Analyze an image.",
name: "analyze_image",
handler: async ({ ctx, parameters }) => {
ctx.chat.reply({ type: "system", system: "🚨 Analyzing image..." });
ctx.chat.reply({ type: "system", system: "🚨 Analyzing..." });
return visionAnalyzeImage({ ctx, ...parameters });
},
parametersSchema: analyzeImageParameters,
Expand Down
1 change: 0 additions & 1 deletion packages/openai/src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ export const agent: Agent = {
assistantGetNewMessages(threadId, runId, messageIds).then(
(messages) => {
for (const message of messages) {
console.log(JSON.stringify({ loopCount, message }, null, 2));
for (const messageContent of message.content) {
if (messageContent.type === "text") {
const markdown = messageContent.text.value;
Expand Down
1 change: 1 addition & 0 deletions packages/openai/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
export * from "./agent";
export * from "./speech";

// TODO: avoid exporting internal functions
export { assistantThreadIdInsert } from "./internal/assistant_thread";
Expand Down
28 changes: 28 additions & 0 deletions packages/openai/src/internal/audio.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { toFile } from "openai";
import { SpeechCreateParams } from "openai/resources/audio/speech";
import { TranscriptionCreateParams } from "openai/resources/audio/transcriptions";

import { SpeechData } from "@bubby/core/interfaces/ai";
import { openai } from "./openai";

export async function audioCreateTranscription(
speechData: SpeechData
): Promise<string> {
const body: TranscriptionCreateParams = {
file: await toFile(speechData),
model: "whisper-1",
};
const transcription = await openai.audio.transcriptions.create(body);
console.log(JSON.stringify(transcription, null, 2));
return transcription.text;
}

export async function audioCreateSpeech(input: string): Promise<SpeechData> {
const body: SpeechCreateParams = {
input,
model: "tts-1",
response_format: "opus",
voice: "alloy",
};
return openai.audio.speech.create(body);
}
7 changes: 7 additions & 0 deletions packages/openai/src/speech.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { Speech } from "@bubby/core/interfaces/ai";
import { audioCreateSpeech, audioCreateTranscription } from "./internal/audio";

export const speech: Speech = {
fromText: async (text) => audioCreateSpeech(text),
toText: (speechData) => audioCreateTranscription(speechData),
};
Loading

0 comments on commit 7486b25

Please sign in to comment.