Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
twitchard committed Nov 1, 2024
1 parent b9ec240 commit 13b250e
Show file tree
Hide file tree
Showing 9 changed files with 1,598 additions and 199 deletions.
266 changes: 178 additions & 88 deletions evi-react-native-example/EVIExample/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@ import {
StyleSheet,
ScrollView,
SafeAreaView,
LayoutAnimation,
} from 'react-native';

let outstanding = 0
// We use Hume's low-level typescript SDK for this example.
// The React SDK (@humeai/voice-react) does not support React Native.
import { HumeClient, type Hume } from 'hume'

import * as NativeAudio from './modules/audio';
Expand All @@ -17,122 +22,207 @@ interface ChatEntry {
content: string;
}

const isValidBase64 = (str: string) => {
try {
return btoa(atob(str)) === str;
} catch (err) {
return false;
}
}

const hume = new HumeClient({
apiKey: process.env.EXPO_PUBLIC_HUME_API_KEY || ''
})

// EVI can send audio output messages faster than they can be played back.
// It is important to buffer them in a queue so as not to cut off a clip of
// playing audio with a more recent clip.
class AudioQueue {
private tasks: Array<() => Promise<void>> = []
private currentClip: Promise<void> | null = null;

private advance() {
console.log('Advancing audio queue...')
if (this.tasks.length === 0) {
this.currentClip = null;
return
}
this.currentClip = this.tasks.shift()!().then(() => this.advance())
}

public add(playAudio: () => Promise<void>) {
console.log('Adding to queue...')
this.tasks.push(playAudio)

if (!this.currentClip) {
this.advance()
}
}

public clear() {
console.log('Clearing audio queue...')
this.tasks = []
this.currentClip = null
}
}

const audioQueue = new AudioQueue()

const App = () => {
const [isConnected, setIsConnected] = useState(false);
const [isMuted, setIsMuted] = useState(false);
const [chatEntries, setChatEntries] = useState<ChatEntry[]>([
{ role: 'assistant', timestamp: new Date().toString(), content: 'Hello! How can I help you today?' },
{ role: 'user', timestamp: new Date().toString(), content: 'I am beyond help' },
const [chatEntries, setChatEntries] = useState<ChatEntry[]>([]);
const addChatEntry = (entry: ChatEntry) => {
setChatEntries((prev) => [...prev, entry]);
}

]);
const [playbackQueue, setPlaybackQueue] = useState<any[]>([]);
// Scroll to the bottom of the chat display when new messages are added
const scrollViewRef = useRef<ScrollView | null>(null);
useEffect(() => {
if (scrollViewRef.current) {
LayoutAnimation.configureNext(LayoutAnimation.Presets.easeInEaseOut);
scrollViewRef.current.scrollToEnd();
}
}, [chatEntries]);

const chatSocketRef = useRef<Hume.empathicVoice.chat.ChatSocket | null>(null);

useEffect(() => {
if (isConnected) {
NativeAudio.getPermissions().then(() => {
NativeAudio.startRecording();
}).catch((error) => {
console.error('Failed to get permissions:', error);
})
const chatSocket = hume.empathicVoice.chat.connect({
configId: process.env.EXPO_PUBLIC_HUME_CONFIG_ID,
})
chatSocket.on('open', () => {
chatSocket.sendSessionSettings({
audio: {
// @ts-ignore
encoding: "linear16",
channels: 1,
sampleRate: 48000,
}
(async () => {
if (isConnected) {
try {
await NativeAudio.getPermissions()
} catch (error) {
console.error('Failed to get permissions:', error)
}
try {
await NativeAudio.startRecording()
} catch (error) {
console.error('Failed to start recording:', error)
}

const chatSocket = hume.empathicVoice.chat.connect({
configId: process.env.EXPO_PUBLIC_HUME_CONFIG_ID,
})
console.log("Socket opened");
})
chatSocket.on('message', handleIncomingMessage);
chatSocket.on('open', () => {
// The code within the native modules converts the default system audio format
// system audio to linear 16 PCM, a standard format recognized by EVI. For linear16 PCM
// you must send a `session_settings` message to EVI to inform EVI of the
// correct sampling rate.
chatSocket.sendSessionSettings({
audio: {
encoding: "linear16",
channels: 1,
sampleRate: NativeAudio.sampleRate,
}
})
})
chatSocket.on('message', handleIncomingMessage);

chatSocket.on('error', (error) => {
console.error("WebSocket Error:", error);
});
chatSocket.on('error', (error) => {
console.error("WebSocket Error:", error);
});

console.log('Registering handler')
chatSocket.on('close', () => {
console.log('Socket closing')
setIsConnected(false);
});
console.log('Registering handler')
chatSocket.on('close', () => {
console.log('Socket closing')
setIsConnected(false);
});

chatSocketRef.current = chatSocket;
chatSocketRef.current = chatSocket;

NativeAudio.onAudioInput(({ base64EncodedAudio }: NativeAudio.AudioEventPayload) => {
console.log('Sending audio input...')
chatSocket.sendAudioInput({ data: base64EncodedAudio });
})
} else {
NativeAudio.stopRecording();
if (chatSocketRef.current) {
chatSocketRef.current.close();
NativeAudio.onAudioInput(({ base64EncodedAudio }: NativeAudio.AudioEventPayload) => {
if (chatSocket.readyState !== WebSocket.OPEN) {
console.log('Socket not open, not sending audio input...')
return
}
chatSocket.sendAudioInput({ data: base64EncodedAudio });
})
} else {
try {
await NativeAudio.stopRecording()
} catch (error) {
console.error('Error while stopping recording', error)
}
if (chatSocketRef.current) {
chatSocketRef.current.close();
}
}
}
})()
return () => {
NativeAudio.stopRecording();
NativeAudio.stopRecording().catch((error) => {
console.error('Error while stopping recording', error)
})
if (chatSocketRef.current && chatSocketRef.current.readyState === WebSocket.OPEN) {
chatSocketRef.current?.close();
}
}
}, [isConnected]);

const handleIncomingMessage = (message: any) => {
if (message.type === 'audio_output') {
const audioData = message.data;
const decodedAudio = atob(audioData);
playAudio(decodedAudio);
} else if (message.type === 'chat_message') {
const chatEntry: ChatEntry = {
role: message.role === 'assistant' ? 'assistant' : 'user',
timestamp: new Date().toString(),
content: message.content,
};
setChatEntries((prev) => [...prev, chatEntry]);

useEffect(() => {
if (isMuted) {
NativeAudio.mute().catch((error) => {
console.error('Error while muting', error)
})
} else {
console.log(message)
NativeAudio.unmute().catch((error) => {
console.error('Error while unmuting', error)
});
}
};
}, [isMuted])

const connectToWebSocket = () => {
setIsConnected(true);
};

const disconnectFromWebSocket = () => {
setIsConnected(false);
};
const handleInterruption = () => {
console.log("Clearing audio queue...")
audioQueue.clear()
NativeAudio.stopPlayback()
}

const muteInput = () => {
setIsMuted(true);
NativeAudio.stopRecording();
};
const handleIncomingMessage = async (message: Hume.empathicVoice.SubscribeEvent) => {
switch (message.type) {
case 'error':
console.error(message);
break;
case 'chat_metadata':
// Contains useful information:
// - chat_id: a unique identifier for the chat session, useful if you want to retrieve transcripts later
// - chat_group_id: passing a "chat group" allows you to preserve context and resume the same conversation with EVI
// in a new websocket connection, e.g. after a disconnection.
console.log('Received chat metadata:', message);
break;
case 'audio_output':
audioQueue.add(() => NativeAudio.playAudio(message.data));
break;
case 'user_message':
case 'assistant_message':
if (message.message.role !== 'user' && message.message.role !== 'assistant') {
console.error(`Unhandled: received message with role: ${message.message.role}`);
return;
}
if (message.type === 'user_message') {
handleInterruption()
}
addChatEntry({
role: message.message.role,
timestamp: new Date().toString(),
content: message.message.content!,
});
break;
case 'user_interruption':
handleInterruption()
break;

const unmuteInput = () => {
setIsMuted(false);
NativeAudio.startRecording();
};
// This message type indicate the end of EVI's "turn" in the conversation. They are not
// needed in this example, however they could be useful in an audio environment that didn't have
// good echo cancellation, so that you could auto-mute the user's microphone while EVI was
// speaking.
case 'assistant_end':

const playAudio = (audioData: string) => {
if (playbackQueue.length > 0) {
setPlaybackQueue((prev) => [...prev, audioData]);
} else {
NativeAudio.playAudio(audioData);
// These messages are not needed in this example. There are for EVI's "tool use" feature:
// https://dev.hume.ai/docs/empathic-voice-interface-evi/tool-use
case 'tool_call':
case 'tool_error':
case 'tool_response':
console.log(`Received unhandled message type: ${message.type}`);
break;
default:
const _: never = message;
console.error(`Unexpected message`);
console.error(message)
break;
}
};

Expand All @@ -142,7 +232,7 @@ const App = () => {
<View style={styles.header}>
<Text style={styles.headerText}>You are {isConnected ? 'connected' : 'disconnected'}</Text>
</View>
<ScrollView style={styles.chatDisplay}>
<ScrollView style={styles.chatDisplay} ref={scrollViewRef}>
{chatEntries.map((entry, index) => (
<View
key={index}
Expand All @@ -158,9 +248,9 @@ const App = () => {
<View style={styles.buttonContainer}>
<Button
title={isConnected ? 'Disconnect' : 'Connect'}
onPress={isConnected ? disconnectFromWebSocket : connectToWebSocket}
onPress={() => setIsConnected(!isConnected)}
/>
<Button title={isMuted ? 'Unmute' : 'Mute'} onPress={isMuted ? unmuteInput : muteInput} />
<Button title={isMuted ? 'Unmute' : 'Mute'} onPress={() => setIsMuted(!isMuted)} />
</View>
</SafeAreaView>
</View>
Expand Down
32 changes: 32 additions & 0 deletions evi-react-native-example/EVIExample/AudioQueue.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import {AudioQueue} from './AudioQueue'
test('AudioQueue', async () => {
const log: Array<string> = []
const aq = new AudioQueue()
log.push('1') // t=0
aq.add(async () => {
// t=0
log.push('2')
await new Promise((resolve) => setTimeout(resolve, 25))
// t=25
log.push('3')
await new Promise((resolve) => setTimeout(resolve, 75))
// t=100
log.push('5')
})
await new Promise((resolve) => setTimeout(resolve, 50))
// t=50
log.push('4')
aq.add(async () => {
// t=100
log.push('6')
await new Promise((resolve) => setTimeout(resolve, 100))
// t=200
log.push('7')
})
await new Promise((resolve) => setTimeout(resolve, 202))
expect(log.join(',')).toBe('1,2,3,4,5,6,7')
// @ts-ignore
expect(aq.tasks).toEqual([])
// @ts-ignore
expect(aq.currentClip).toEqual(null)
})
29 changes: 29 additions & 0 deletions evi-react-native-example/EVIExample/AudioQueue.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
export class AudioQueue {
private tasks: Array<() => Promise<void>>
private currentClip: Promise<void> | null;
constructor() {
this.tasks = []
this.currentClip = null
}

private advance() {
if (this.tasks.length === 0) {
this.currentClip = null;
return
}
this.currentClip = this.tasks.shift()!().then(() => this.advance())
}

public add(playAudio: () => Promise<void>) {
this.tasks.push(playAudio)

if (!this.currentClip) {
this.advance()
}
}

public clear() {
this.tasks = []
this.currentClip = null
}
}
Loading

0 comments on commit 13b250e

Please sign in to comment.