Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add caption renderer #675

Open
wants to merge 8 commits into
base: feature/rooms-captions
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions src/components/AudioLevelIndicator/AudioLevelIndicator.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,30 @@ const getUniqueClipId = () => clipId++;

// @ts-ignore
const AudioContext = window.AudioContext || window.webkitAudioContext;
let audioContext: AudioContext;

export function initializeAnalyser(stream: MediaStream) {
audioContext = audioContext || new AudioContext();
const audioContext = new AudioContext(); // Create a new audioContext for each audio indicator
const audioSource = audioContext.createMediaStreamSource(stream);

const analyser = audioContext.createAnalyser();
analyser.smoothingTimeConstant = 0.2;
analyser.fftSize = 256;

audioSource.connect(analyser);

// Here we provide a way for the audioContext to be closed.
// Closing the audioContext allows the unused audioSource to be garbage collected.
stream.addEventListener('cleanup', () => {
if (audioContext.state !== 'closed') {
audioContext.close();
}
});

return analyser;
}

const isIOS = /iPhone|iPad/.test(navigator.userAgent);

function AudioLevelIndicator({ audioTrack, color = 'white' }: { audioTrack?: AudioTrack; color?: string }) {
const SVGRectRef = useRef<SVGRectElement>(null);
const [analyser, setAnalyser] = useState<AnalyserNode>();
Expand All @@ -33,19 +43,27 @@ function AudioLevelIndicator({ audioTrack, color = 'white' }: { audioTrack?: Aud
if (audioTrack && mediaStreamTrack && isTrackEnabled) {
// Here we create a new MediaStream from a clone of the mediaStreamTrack.
// A clone is created to allow multiple instances of this component for a single
// AudioTrack on iOS Safari.
let newMediaStream = new MediaStream([mediaStreamTrack.clone()]);
// AudioTrack on iOS Safari. We only clone the mediaStreamTrack on iOS.
let newMediaStream = new MediaStream([isIOS ? mediaStreamTrack.clone() : mediaStreamTrack]);

// Here we listen for the 'stopped' event on the audioTrack. When the audioTrack is stopped,
// we stop the cloned track that is stored in 'newMediaStream'. It is important that we stop
// all tracks when they are not in use. Browsers like Firefox don't let you create a new stream
// from a new audio device while the active audio device still has active tracks.
const stopAllMediaStreamTracks = () => newMediaStream.getTracks().forEach(track => track.stop());
const stopAllMediaStreamTracks = () => {
if (isIOS) {
// If we are on iOS, then we want to stop the MediaStreamTrack that we have previously cloned.
// If we are not on iOS, then we do not stop the MediaStreamTrack since it is the original and still in use.
newMediaStream.getTracks().forEach(track => track.stop());
}
newMediaStream.dispatchEvent(new Event('cleanup')); // Stop the audioContext
};
audioTrack.on('stopped', stopAllMediaStreamTracks);

const reinitializeAnalyser = () => {
stopAllMediaStreamTracks();
newMediaStream = new MediaStream([mediaStreamTrack.clone()]);
// We only clone the mediaStreamTrack on iOS.
newMediaStream = new MediaStream([isIOS ? mediaStreamTrack.clone() : mediaStreamTrack]);
setAnalyser(initializeAnalyser(newMediaStream));
};

Expand Down
125 changes: 125 additions & 0 deletions src/components/CaptionRenderer/CaptionRenderer.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import React, { useCallback, useEffect, useState } from 'react';
import { makeStyles } from '@material-ui/core/styles';
import Snackbar from '../Snackbar/Snackbar';
import { TwilioCaptionResult } from './CaptionTypes';
import { Typography } from '@material-ui/core';
import useParticipants from '../../hooks/useParticipants/useParticipants';
import useParticipantTracks from '../../hooks/useParticipantTracks/useParticipantTracks';
import { useAppState } from '../../state';

interface Caption {
identity: string;
id: string;
timestamp: number;
transcript: string;
}

const useStyles = makeStyles({
captionContainer: {
position: 'fixed',
left: '15%',
right: '15%',
top: 'calc(100% - 300px)',
zIndex: 100,
},
caption: {
color: 'white',
background: 'rgba(0, 0, 0, 0.8)',
padding: '0.2em',
display: 'inline-block',
},
});

export function CaptionRenderer() {
const classes = useStyles();
const [captions, setCaptions] = useState<Caption[]>([]);
const participants = useParticipants();
const transcriberParticipant = participants.find(p => p.identity === 'media-transcriber');
const transcriberTracks = useParticipantTracks(transcriberParticipant);
const transcriberDataTrack = transcriberTracks.find(
track => track.kind === 'data' && track.name !== 'transcriber-error'
);
const transcriberError = transcriberTracks.find(track => track.kind === 'data' && track.name === 'transcriber-error');
const { displayCaptions } = useAppState();

const registerResult = useCallback((captionResult: TwilioCaptionResult) => {
if (captionResult.transcriptionResponse.TranscriptEvent.Transcript.Results.length) {
captionResult.transcriptionResponse.TranscriptEvent.Transcript.Results.forEach(result => {
const transcript = result.Alternatives[0].Transcript;
const id = result.ResultId;
const timestamp = Date.now();
const identity = result.Identity;

setCaptions(prevCaptions => {
// Make a copy of the caption array, keeping only the 4 most recent captions
const arrayCopy = prevCaptions.slice(-4);

const existingID = arrayCopy.find(item => item.id === id);
if (existingID) {
const existingIdIndex = arrayCopy.indexOf(existingID);
arrayCopy[existingIdIndex] = { transcript, id, timestamp, identity };
} else {
arrayCopy.push({ transcript, id, timestamp, identity });
}

return arrayCopy;
});
});
}
}, []);

useEffect(() => {
if (transcriberDataTrack) {
const handleMessage = (message: string) => {
try {
registerResult(JSON.parse(message));
} catch (e) {
console.log('received unexpected dataTrack message: ', message);
}
};
transcriberDataTrack.on('message', handleMessage);

return () => {
transcriberDataTrack.on('message', handleMessage);
};
}
}, [transcriberDataTrack, registerResult]);

// Every second, we go through the captions, and remove any that are older than ten seconds
useEffect(() => {
const intervalId = setInterval(() => {
setCaptions(prevCaptions => {
const now = Date.now();
const filteredCaptions = prevCaptions.filter(caption => caption.timestamp > now - 10000);
if (filteredCaptions.length !== prevCaptions.length) {
return filteredCaptions;
} else {
return prevCaptions;
}
});
}, 1000);
return () => {
clearInterval(intervalId);
};
}, []);

if (!displayCaptions) return null;

return (
<div className={classes.captionContainer}>
<Snackbar
variant="error"
headline="Transcriber Error"
message="Media transcriber is not connected."
open={Boolean(transcriberError)}
/>
{captions.map(caption => (
<div>
<Typography variant="h6" key={caption.id} className={classes.caption}>
{caption.identity}: {caption.transcript}
</Typography>
</div>
))}
</div>
);
}
37 changes: 37 additions & 0 deletions src/components/CaptionRenderer/CaptionTypes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
export interface TwilioCaptionResult {
transcriptionResponse: TranscriptionResponse;
}

export interface TranscriptionResponse {
TranscriptEvent: TranscriptEvent;
}

export interface TranscriptEvent {
Transcript: Transcript;
}

export interface Transcript {
Results: Result[];
}

export interface Result {
Alternatives: Alternative[];
EndTime: number;
IsPartial: boolean;
ResultId: string;
StartTime: number;
Identity: string;
}

export interface Alternative {
Items: Item[];
Transcript: string;
}

export interface Item {
Content: string;
EndTime: number;
StartTime: number;
Type: string;
VocabularyFilterMatch: boolean;
}
10 changes: 9 additions & 1 deletion src/components/MenuBar/Menu/Menu.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import React, { useState, useRef } from 'react';
import AboutDialog from '../../AboutDialog/AboutDialog';
import BackgroundIcon from '../../../icons/BackgroundIcon';
import ClosedCaptionsIcon from '@material-ui/icons/ClosedCaption';
import DeviceSelectionDialog from '../../DeviceSelectionDialog/DeviceSelectionDialog';
import ExpandMoreIcon from '@material-ui/icons/ExpandMore';
import InfoIconOutlined from '../../../icons/InfoIconOutlined';
Expand Down Expand Up @@ -34,7 +35,7 @@ export default function Menu(props: { buttonClassName?: string }) {
const [menuOpen, setMenuOpen] = useState(false);
const [settingsOpen, setSettingsOpen] = useState(false);

const { isFetching, updateRecordingRules, roomType } = useAppState();
const { isFetching, updateRecordingRules, roomType, displayCaptions, setDisplayCaptions } = useAppState();
const { setIsChatWindowOpen } = useChatContext();
const isRecording = useIsRecording();
const { room, setIsBackgroundSelectionOpen } = useVideoContext();
Expand Down Expand Up @@ -133,6 +134,13 @@ export default function Menu(props: { buttonClassName?: string }) {
<Typography variant="body1">Room Monitor</Typography>
</MenuItem>

<MenuItem onClick={() => setDisplayCaptions(prevDisplayCaptions => !prevDisplayCaptions)}>
<IconContainer>
<ClosedCaptionsIcon />
</IconContainer>
<Typography variant="body1">{displayCaptions ? 'Hide Captions' : 'Show Captions'}</Typography>
</MenuItem>

<MenuItem onClick={() => setAboutOpen(true)}>
<IconContainer>
<InfoIconOutlined />
Expand Down
2 changes: 1 addition & 1 deletion src/components/ParticipantList/ParticipantList.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export default function ParticipantList() {
const classes = useStyles();
const { room } = useVideoContext();
const localParticipant = room!.localParticipant;
const participants = useParticipants();
const participants = useParticipants().filter(p => p.identity !== 'media-transcriber');
const [selectedParticipant, setSelectedParticipant] = useSelectedParticipant();
const screenShareParticipant = useScreenShareParticipant();
const mainParticipant = useMainParticipant();
Expand Down
2 changes: 2 additions & 0 deletions src/components/Room/Room.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import React from 'react';
import clsx from 'clsx';
import { makeStyles, Theme } from '@material-ui/core';
import { CaptionRenderer } from '../CaptionRenderer/CaptionRenderer';
import ChatWindow from '../ChatWindow/ChatWindow';
import ParticipantList from '../ParticipantList/ParticipantList';
import MainParticipant from '../MainParticipant/MainParticipant';
Expand Down Expand Up @@ -42,6 +43,7 @@ export default function Room() {
<ParticipantList />
<ChatWindow />
<BackgroundSelectionDialog />
<CaptionRenderer />
</div>
);
}
2 changes: 1 addition & 1 deletion src/hooks/useMainParticipant/useMainParticipant.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export default function useMainParticipant() {
const [selectedParticipant] = useSelectedParticipant();
const screenShareParticipant = useScreenShareParticipant();
const dominantSpeaker = useDominantSpeaker();
const participants = useParticipants();
const participants = useParticipants().filter(p => p.identity !== 'media-transcriber');
const { room } = useVideoContext();
const localParticipant = room?.localParticipant;
const remoteScreenShareParticipant = screenShareParticipant !== localParticipant ? screenShareParticipant : null;
Expand Down
58 changes: 58 additions & 0 deletions src/hooks/useParticipantTracks/useParticipantTracks.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { renderHook, act } from '@testing-library/react-hooks';
import EventEmitter from 'events';
import useTracks from './useParticipantTracks';

describe('the useParticipantTracks hook', () => {
let mockParticipant: any;

beforeEach(() => {
mockParticipant = new EventEmitter();
mockParticipant.tracks = new Map([
[0, { track: 'track1' }],
[1, { track: null }],
[2, { track: 'track2' }],
]);
});

it('should return an array of mockParticipant.tracks by default, filtering out null tracks', () => {
const { result } = renderHook(() => useTracks(mockParticipant));
expect(result.current).toEqual(['track1', 'track2']);
});

it('should respond to "trackSubscribed" events', async () => {
const { result } = renderHook(() => useTracks(mockParticipant));
act(() => {
mockParticipant.emit('trackSubscribed', 'newMockTrack');
});
expect(result.current).toEqual(['track1', 'track2', 'newMockTrack']);
});

it('should respond to "trackUnsubscribed" events', async () => {
const { result } = renderHook(() => useTracks(mockParticipant));
act(() => {
mockParticipant.emit('trackUnsubscribed', 'track1');
});
expect(result.current).toEqual(['track2']);
});

it('should return a new set of tracks if the participant changes', () => {
const { result, rerender } = renderHook(({ participant }) => useTracks(participant), {
initialProps: { participant: mockParticipant },
});
expect(result.current).toEqual(['track1', 'track2']);
mockParticipant = new EventEmitter();
mockParticipant.tracks = new Map([
[0, { track: 'track3' }],
[1, { track: 'track4' }],
]);
rerender({ participant: mockParticipant });
expect(result.current).toEqual(['track3', 'track4']);
});

it('should clean up listeners on unmount', () => {
const { unmount } = renderHook(() => useTracks(mockParticipant));
unmount();
expect(mockParticipant.listenerCount('trackSubscribed')).toBe(0);
expect(mockParticipant.listenerCount('trackUnsubscribed')).toBe(0);
});
});
29 changes: 29 additions & 0 deletions src/hooks/useParticipantTracks/useParticipantTracks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import { RemoteParticipant, RemoteTrack } from 'twilio-video';
import { useEffect, useState } from 'react';

export default function useParticipantTracks(participant: RemoteParticipant | undefined) {
const [tracks, setTracks] = useState<RemoteTrack[]>([]);

useEffect(() => {
if (participant) {
const subscribedTracks = Array.from(participant.tracks.values())
.filter(trackPublication => trackPublication.track !== null)
.map(trackPublication => trackPublication.track!);

setTracks(subscribedTracks);

const handleTrackSubscribed = (track: RemoteTrack) => setTracks(prevTracks => [...prevTracks, track]);
const handleTrackUnsubscribed = (track: RemoteTrack) =>
setTracks(prevTracks => prevTracks.filter(t => t !== track));

participant.on('trackSubscribed', handleTrackSubscribed);
participant.on('trackUnsubscribed', handleTrackUnsubscribed);
return () => {
participant.off('trackSubscribed', handleTrackSubscribed);
participant.off('trackUnsubscribed', handleTrackUnsubscribed);
};
}
}, [participant]);

return tracks;
}
Loading