diff --git a/example/lib/stt_service.dart b/example/lib/stt_service.dart index 510719f..136b51a 100644 --- a/example/lib/stt_service.dart +++ b/example/lib/stt_service.dart @@ -1,388 +1,389 @@ -// import 'dart:async'; -// import 'dart:collection'; -// import 'dart:math' as math; -// import 'dart:typed_data'; - -// import 'package:flutter/foundation.dart'; -// import 'package:fonnx/models/sileroVad/silero_vad.dart'; -// import 'package:fonnx/models/whisper/whisper.dart'; - -// /// A single frame of audio data. -// /// -// /// Added to allow memoization of the VAD inference and subsequent clipping out -// /// audio frames that are not speech. e.g. getting silence clipped out amounts -// /// to frames.where(!isSilent).map(bytes).toList(). -// class AudioFrame { -// final Uint8List bytes; - -// /// Probability that the frame contains speech. -// /// -// /// The VAD outputs a float from 0 to 1, representing the probability that -// /// the frame contains speech. >= this value is considered speech when -// /// deciding which frames to keep and when to stop recording, and also -// /// the value of the [AudioFrame.isSilent]. -// double? vadP; -// AudioFrame({required this.bytes}); -// } - -// class SttServiceResponse { -// final String transcription; -// final List audioFrames; - -// SttServiceResponse({required this.transcription, required this.audioFrames}); -// } - -// class GetMicrophoneResponse { -// final Stream audioStream; -// final AudioRecorder audioRecorder; - -// GetMicrophoneResponse( -// {required this.audioStream, required this.audioRecorder}); -// } - -// class GetExistingBytesResponse { -// final Stream audioStream; -// final StreamController audioStreamController; - -// GetExistingBytesResponse( -// {required this.audioStream, required this.audioStreamController}); -// } - -// class SttService { -// // Rationale for PCM: -// // - PCM streaming is universally supported on all platforms. -// // - Streaming is not supported for all other codecs. -// // - Not all codecs are supported on all platforms. -// // - Whisper input expects at least WAV/MP3, and PCM is trival to convert -// // to WAV. (only requires adding header) -// // - Observed when using `record` package on 2024 Feb 2. -// /// Format of audio bytes from microphone. -// static const kEncoder = AudioEncoder.pcm16bits; - -// /// Sample rate in Hz -// static const int kSampleRate = 16000; - -// // Rationale for 1 channel: -// // - Whisper needs ORT Extensions in order to decode anything other than -// // signed 16-bit PCM audio in 1 channel at 16kHz. -// // - ORT Extensions are not supported on web. -// // - Generally, 1 channel is sufficient for speech recognition, it is -// // both best practice and supported universally. -// /// Number of audio channels -// static const int kChannels = 1; - -// /// Bits per sample, assuming 16-bit PCM audio -// static const int kBitsPerSample = 16; - -// /// Maximum VAD frame duration in milliseconds -// static const int kMaxVadFrameMs = 30; - -// /// Recommended VAD probability threshold for speech. -// /// Tuned to accept whispering. -// static const double kVadPIsVoiceThreshold = 0.1; - -// final Duration maxDuration; - -// /// If and only if: -// /// - There was at least one frame of speech, and -// /// - The last N frames were silent and their duration is >= this value, -// /// then the recording will stop. -// final Duration maxSilenceDuration; -// final String vadModelPath; -// final String whisperModelPath; - -// /// Values >= this are considered speech. -// /// -// /// The VAD outputs a float from 0 to 1, representing the probability that -// /// the frame contains speech. >= this value is considered speech when -// /// deciding which frames to keep and when to stop recording. -// final double voiceThreshold; - -// final sessionManager = WhisperSessionManager(); - -// var lastVadState = {}; -// var lastVadStateIndex = 0; -// bool stopped = false; -// Timer? stopForMaxDurationTimer; - -// SttService({ -// required this.vadModelPath, -// required this.whisperModelPath, -// this.maxDuration = const Duration(seconds: 10), -// this.maxSilenceDuration = const Duration(milliseconds: 1000), -// this.voiceThreshold = kVadPIsVoiceThreshold, -// }); - -// Stream transcribe() { -// final StreamController controller = -// StreamController(); -// _start(controller); -// return controller.stream; -// } - -// void stop() { -// stopForMaxDurationTimer?.cancel(); -// stopped = true; -// } - -// void _start(StreamController streamController) async { -// Uint8List audioBuffer = Uint8List(0); -// final List frames = []; -// final getMicrophoneResponse = await _getMicrophoneStreamThrows(); -// final audioStream = getMicrophoneResponse.audioStream; - -// stopForMaxDurationTimer = Timer(maxDuration, () { -// debugPrint('[SttService] Stopping due to max duration.'); -// stop(); -// stopForMaxDurationTimer = null; -// }); - -// final vad = SileroVad.load(vadModelPath); -// var stoppedAudioRecorderForStoppedStream = false; -// audioStream.listen((event) { -// if (stopped && !stoppedAudioRecorderForStoppedStream) { -// stoppedAudioRecorderForStoppedStream = true; -// getMicrophoneResponse.audioRecorder.stop(); -// return; -// } -// audioBuffer = Uint8List.fromList(audioBuffer + event); -// const maxVadFrameSizeInBytes = kSampleRate * -// kMaxVadFrameMs * -// kChannels * -// (kBitsPerSample / 8) ~/ -// 1000; -// final remainder = audioBuffer.length % maxVadFrameSizeInBytes; -// final vadBufferLength = audioBuffer.length - remainder; -// final vadBuffer = audioBuffer.sublist(0, vadBufferLength); -// _vadBufferQueue.add(vadBuffer); -// audioBuffer = audioBuffer.sublist(vadBufferLength); -// }); -// _vadInferenceLoop(vad, frames, streamController); -// _whisperInferenceLoop( -// Whisper.load(whisperModelPath), -// frames, -// streamController, -// ); -// } - -// final Queue _vadBufferQueue = Queue(); -// void _vadInferenceLoop( -// SileroVad vad, -// List frames, -// StreamController streamController, -// ) async { -// if (stopped) { -// return; -// } -// final hasBuffer = _vadBufferQueue.isNotEmpty; -// if (hasBuffer) { -// final buffer = _vadBufferQueue.removeFirst(); -// await _processBufferAndVad(vad, buffer, frames, streamController); -// _vadInferenceLoop(vad, frames, streamController); -// } else { -// Future.delayed(const Duration(milliseconds: kMaxVadFrameMs), -// () => _vadInferenceLoop(vad, frames, streamController)); -// } -// } - -// Future _processBufferAndVad( -// SileroVad vad, -// Uint8List buffer, -// List frames, -// StreamController streamController) async { -// // Process buffer into frames for VAD -// final frameSizeInBytes = -// (kSampleRate * kMaxVadFrameMs * kChannels * (kBitsPerSample / 8)) -// .toInt() ~/ -// 1000; -// int index = 0; -// while ((index + 1) * frameSizeInBytes <= buffer.length) { -// final startIdx = index * frameSizeInBytes; -// final endIdx = (index + 1) * frameSizeInBytes; -// final frameBytes = buffer.sublist(startIdx, endIdx); -// final frame = AudioFrame(bytes: frameBytes); -// frames.add(frame); -// final idx = frames.length - 1; -// final nextVdState = -// await vad.doInference(frameBytes, previousState: lastVadState); -// lastVadState = nextVdState; -// lastVadStateIndex = idx; -// final p = (nextVdState['output'] as Float32List).first; -// frames[idx].vadP = p; -// if (!stopped) { -// streamController.add(SttServiceResponse( -// transcription: sessionManager.transcription, -// audioFrames: frames, -// )); -// } else { -// break; -// } - -// if (_shouldStopForSilence(frames)) { -// if (kDebugMode) { -// print('[SttService] Stopping due to silence.'); -// } -// stop(); -// } -// index++; -// } -// } - -// bool _shouldStopForSilence(List frames) { -// if (frames.isEmpty) { -// return false; -// } -// final frameThatIsSpeech = frames.any((frame) { -// return frame.vadP != null && frame.vadP! >= voiceThreshold; -// }); -// if (!frameThatIsSpeech) { -// return false; -// } -// final isSilenceThreshold = voiceThreshold; -// final lastNFrames = frames.reversed.takeWhile((frame) { -// return frame.vadP != null && frame.vadP! < isSilenceThreshold; -// }).toList(); -// final lastNSilenceDuration = lastNFrames.length * kMaxVadFrameMs; -// return lastNSilenceDuration >= maxSilenceDuration.inMilliseconds; -// } - -// // Recursively run whisper inference on collected frames -// void _whisperInferenceLoop( -// Whisper whisper, -// List frames, -// StreamController streamController, -// ) async { -// Future doIt() async { -// final voiceFrames = sessionManager.getAudioFrames( -// frames: frames, -// voiceThresholdSegmentEnd: voiceThreshold, -// multipleSegments: false, -// ); -// if (voiceFrames.isEmpty) { -// return; -// } -// final bytesToInferBuilder = BytesBuilder(copy: false); -// for (final frame in voiceFrames) { -// bytesToInferBuilder.add(frame.bytes); -// } -// final bytesToInfer = bytesToInferBuilder.takeBytes(); -// final result = (await whisper.doInference(bytesToInfer)).trim(); -// sessionManager.addInferenceResult(result, voiceFrames.first); -// streamController.add(SttServiceResponse( -// transcription: sessionManager.transcription, -// audioFrames: frames, -// )); -// } - -// void scheduleNextInference() async { -// if (!stopped) { -// Future.delayed(const Duration(milliseconds: 16), -// () => _whisperInferenceLoop(whisper, frames, streamController)); -// return; -// } -// // Stopped. -// // Do one last inference with all audio bytes, then close the stream. -// await doIt(); -// streamController.close(); -// } - -// await doIt(); -// scheduleNextInference(); -// } -// } - -// class WhisperSessionManager { -// var _frozenTranscription = ''; -// String? _lastInferenceResult; -// AudioFrame? _lastFirstInferenceInputFrame; - -// String get transcription { -// if (_lastInferenceResult != null && _lastInferenceResult!.isNotEmpty) { -// final StringBuffer sb = StringBuffer(); -// sb.write(_frozenTranscription); -// sb.write(' '); -// sb.write(_lastInferenceResult); -// return sb.toString(); -// } -// return _frozenTranscription; -// } - - -// List getAudioFrames({ -// required List frames, -// required double voiceThresholdSegmentEnd, -// /// If true, all frames without speech are discarded. -// /// If false, only the first frames without speech are discarded. -// /// This significantly helps inference quality on at least Whisper Tiny. -// /// The downside is, if the audio is > 30s, the inference cannot be -// /// completed. -// required bool multipleSegments, -// }) { -// if (!multipleSegments) { -// final firstVoiceFrameIndex = frames.indexWhere((frame) => -// frame.vadP != null && frame.vadP! >= voiceThresholdSegmentEnd); -// if (firstVoiceFrameIndex == -1) { -// return []; -// } -// return frames.sublist(firstVoiceFrameIndex); -// } +import 'dart:async'; +import 'dart:collection'; +import 'dart:math' as math; +import 'dart:typed_data'; + +import 'package:flutter/foundation.dart'; +import 'package:fonnx/models/sileroVad/silero_vad.dart'; +import 'package:fonnx/models/whisper/whisper.dart'; +import 'package:record/record.dart'; + +/// A single frame of audio data. +/// +/// Added to allow memoization of the VAD inference and subsequent clipping out +/// audio frames that are not speech. e.g. getting silence clipped out amounts +/// to frames.where(!isSilent).map(bytes).toList(). +class AudioFrame { + final Uint8List bytes; + + /// Probability that the frame contains speech. + /// + /// The VAD outputs a float from 0 to 1, representing the probability that + /// the frame contains speech. >= this value is considered speech when + /// deciding which frames to keep and when to stop recording, and also + /// the value of the [AudioFrame.isSilent]. + double? vadP; + AudioFrame({required this.bytes}); +} + +class SttServiceResponse { + final String transcription; + final List audioFrames; + + SttServiceResponse({required this.transcription, required this.audioFrames}); +} + +class GetMicrophoneResponse { + final Stream audioStream; + final AudioRecorder audioRecorder; + + GetMicrophoneResponse( + {required this.audioStream, required this.audioRecorder}); +} + +class GetExistingBytesResponse { + final Stream audioStream; + final StreamController audioStreamController; + + GetExistingBytesResponse( + {required this.audioStream, required this.audioStreamController}); +} + +class SttService { + // Rationale for PCM: + // - PCM streaming is universally supported on all platforms. + // - Streaming is not supported for all other codecs. + // - Not all codecs are supported on all platforms. + // - Whisper input expects at least WAV/MP3, and PCM is trival to convert + // to WAV. (only requires adding header) + // - Observed when using `record` package on 2024 Feb 2. + /// Format of audio bytes from microphone. + static const kEncoder = AudioEncoder.pcm16bits; + + /// Sample rate in Hz + static const int kSampleRate = 16000; + + // Rationale for 1 channel: + // - Whisper needs ORT Extensions in order to decode anything other than + // signed 16-bit PCM audio in 1 channel at 16kHz. + // - ORT Extensions are not supported on web. + // - Generally, 1 channel is sufficient for speech recognition, it is + // both best practice and supported universally. + /// Number of audio channels + static const int kChannels = 1; + + /// Bits per sample, assuming 16-bit PCM audio + static const int kBitsPerSample = 16; + + /// Maximum VAD frame duration in milliseconds + static const int kMaxVadFrameMs = 30; + + /// Recommended VAD probability threshold for speech. + /// Tuned to accept whispering. + static const double kVadPIsVoiceThreshold = 0.1; + + final Duration maxDuration; + + /// If and only if: + /// - There was at least one frame of speech, and + /// - The last N frames were silent and their duration is >= this value, + /// then the recording will stop. + final Duration maxSilenceDuration; + final String vadModelPath; + final String whisperModelPath; + + /// Values >= this are considered speech. + /// + /// The VAD outputs a float from 0 to 1, representing the probability that + /// the frame contains speech. >= this value is considered speech when + /// deciding which frames to keep and when to stop recording. + final double voiceThreshold; + + final sessionManager = WhisperSessionManager(); + + var lastVadState = {}; + var lastVadStateIndex = 0; + bool stopped = false; + Timer? stopForMaxDurationTimer; + + SttService({ + required this.vadModelPath, + required this.whisperModelPath, + this.maxDuration = const Duration(seconds: 10), + this.maxSilenceDuration = const Duration(milliseconds: 1000), + this.voiceThreshold = kVadPIsVoiceThreshold, + }); + + Stream transcribe() { + final StreamController controller = + StreamController(); + _start(controller); + return controller.stream; + } + + void stop() { + stopForMaxDurationTimer?.cancel(); + stopped = true; + } + + void _start(StreamController streamController) async { + Uint8List audioBuffer = Uint8List(0); + final List frames = []; + final getMicrophoneResponse = await _getMicrophoneStreamThrows(); + final audioStream = getMicrophoneResponse.audioStream; + + stopForMaxDurationTimer = Timer(maxDuration, () { + debugPrint('[SttService] Stopping due to max duration.'); + stop(); + stopForMaxDurationTimer = null; + }); + + final vad = SileroVad.load(vadModelPath); + var stoppedAudioRecorderForStoppedStream = false; + audioStream.listen((event) { + if (stopped && !stoppedAudioRecorderForStoppedStream) { + stoppedAudioRecorderForStoppedStream = true; + getMicrophoneResponse.audioRecorder.stop(); + return; + } + audioBuffer = Uint8List.fromList(audioBuffer + event); + const maxVadFrameSizeInBytes = kSampleRate * + kMaxVadFrameMs * + kChannels * + (kBitsPerSample / 8) ~/ + 1000; + final remainder = audioBuffer.length % maxVadFrameSizeInBytes; + final vadBufferLength = audioBuffer.length - remainder; + final vadBuffer = audioBuffer.sublist(0, vadBufferLength); + _vadBufferQueue.add(vadBuffer); + audioBuffer = audioBuffer.sublist(vadBufferLength); + }); + _vadInferenceLoop(vad, frames, streamController); + _whisperInferenceLoop( + Whisper.load(whisperModelPath), + frames, + streamController, + ); + } + + final Queue _vadBufferQueue = Queue(); + void _vadInferenceLoop( + SileroVad vad, + List frames, + StreamController streamController, + ) async { + if (stopped) { + return; + } + final hasBuffer = _vadBufferQueue.isNotEmpty; + if (hasBuffer) { + final buffer = _vadBufferQueue.removeFirst(); + await _processBufferAndVad(vad, buffer, frames, streamController); + _vadInferenceLoop(vad, frames, streamController); + } else { + Future.delayed(const Duration(milliseconds: kMaxVadFrameMs), + () => _vadInferenceLoop(vad, frames, streamController)); + } + } + + Future _processBufferAndVad( + SileroVad vad, + Uint8List buffer, + List frames, + StreamController streamController) async { + // Process buffer into frames for VAD + final frameSizeInBytes = + (kSampleRate * kMaxVadFrameMs * kChannels * (kBitsPerSample / 8)) + .toInt() ~/ + 1000; + int index = 0; + while ((index + 1) * frameSizeInBytes <= buffer.length) { + final startIdx = index * frameSizeInBytes; + final endIdx = (index + 1) * frameSizeInBytes; + final frameBytes = buffer.sublist(startIdx, endIdx); + final frame = AudioFrame(bytes: frameBytes); + frames.add(frame); + final idx = frames.length - 1; + final nextVdState = + await vad.doInference(frameBytes, previousState: lastVadState); + lastVadState = nextVdState; + lastVadStateIndex = idx; + final p = (nextVdState['output'] as Float32List).first; + frames[idx].vadP = p; + if (!stopped) { + streamController.add(SttServiceResponse( + transcription: sessionManager.transcription, + audioFrames: frames, + )); + } else { + break; + } + + if (_shouldStopForSilence(frames)) { + if (kDebugMode) { + print('[SttService] Stopping due to silence.'); + } + stop(); + } + index++; + } + } + + bool _shouldStopForSilence(List frames) { + if (frames.isEmpty) { + return false; + } + final frameThatIsSpeech = frames.any((frame) { + return frame.vadP != null && frame.vadP! >= voiceThreshold; + }); + if (!frameThatIsSpeech) { + return false; + } + final isSilenceThreshold = voiceThreshold; + final lastNFrames = frames.reversed.takeWhile((frame) { + return frame.vadP != null && frame.vadP! < isSilenceThreshold; + }).toList(); + final lastNSilenceDuration = lastNFrames.length * kMaxVadFrameMs; + return lastNSilenceDuration >= maxSilenceDuration.inMilliseconds; + } + + // Recursively run whisper inference on collected frames + void _whisperInferenceLoop( + Whisper whisper, + List frames, + StreamController streamController, + ) async { + Future doIt() async { + final voiceFrames = sessionManager.getAudioFrames( + frames: frames, + voiceThresholdSegmentEnd: voiceThreshold, + multipleSegments: false, + ); + if (voiceFrames.isEmpty) { + return; + } + final bytesToInferBuilder = BytesBuilder(copy: false); + for (final frame in voiceFrames) { + bytesToInferBuilder.add(frame.bytes); + } + final bytesToInfer = bytesToInferBuilder.takeBytes(); + final result = (await whisper.doInference(bytesToInfer)).trim(); + sessionManager.addInferenceResult(result, voiceFrames.first); + streamController.add(SttServiceResponse( + transcription: sessionManager.transcription, + audioFrames: frames, + )); + } + + void scheduleNextInference() async { + if (!stopped) { + Future.delayed(const Duration(milliseconds: 16), + () => _whisperInferenceLoop(whisper, frames, streamController)); + return; + } + // Stopped. + // Do one last inference with all audio bytes, then close the stream. + await doIt(); + streamController.close(); + } + + await doIt(); + scheduleNextInference(); + } +} + +class WhisperSessionManager { + var _frozenTranscription = ''; + String? _lastInferenceResult; + AudioFrame? _lastFirstInferenceInputFrame; + + String get transcription { + if (_lastInferenceResult != null && _lastInferenceResult!.isNotEmpty) { + final StringBuffer sb = StringBuffer(); + sb.write(_frozenTranscription); + sb.write(' '); + sb.write(_lastInferenceResult); + return sb.toString(); + } + return _frozenTranscription; + } + + + List getAudioFrames({ + required List frames, + required double voiceThresholdSegmentEnd, + /// If true, all frames without speech are discarded. + /// If false, only the first frames without speech are discarded. + /// This significantly helps inference quality on at least Whisper Tiny. + /// The downside is, if the audio is > 30s, the inference cannot be + /// completed. + required bool multipleSegments, + }) { + if (!multipleSegments) { + final firstVoiceFrameIndex = frames.indexWhere((frame) => + frame.vadP != null && frame.vadP! >= voiceThresholdSegmentEnd); + if (firstVoiceFrameIndex == -1) { + return []; + } + return frames.sublist(firstVoiceFrameIndex); + } -// var indexOfLastSegmentStart = -1; -// for (var i = frames.length - 1; i >= 0; i--) { -// final currentIndexInSegment = -// frames[i].vadP != null && frames[i].vadP! >= voiceThresholdSegmentEnd; -// final hasPreviousIndex = i - 1 >= 0; -// final previousIndexIsOutsideSegment = hasPreviousIndex && -// frames[i - 1].vadP != null && -// frames[i - 1].vadP! < voiceThresholdSegmentEnd; -// if (currentIndexInSegment && previousIndexIsOutsideSegment) { -// indexOfLastSegmentStart = i; -// break; -// } -// } - -// final framesToInference = indexOfLastSegmentStart == -1 -// ? [] -// : frames.sublist(math.max(0, indexOfLastSegmentStart - 3)); -// return framesToInference; -// } - -// void addInferenceResult(String result, AudioFrame firstInferenceInputFrame) { -// final lastResult = _lastInferenceResult; -// final isNewSegment = lastResult != null && -// _lastFirstInferenceInputFrame != firstInferenceInputFrame; -// if (isNewSegment) { -// if (_frozenTranscription.isNotEmpty && lastResult.isNotEmpty) { -// _frozenTranscription += ' '; -// } -// _frozenTranscription += lastResult; -// } -// _lastFirstInferenceInputFrame = firstInferenceInputFrame; -// _lastInferenceResult = result; -// } -// } - -// // Throws an error if the microphone stream cannot be obtained. -// Future _getMicrophoneStreamThrows() async { -// final audioRecorder = AudioRecorder(); - -// final hasPermission = await audioRecorder.hasPermission(); -// if (!hasPermission) { -// throw 'Denied permission to record audio.'; -// } - -// final stream = await audioRecorder.startStream( -// const RecordConfig( -// encoder: AudioEncoder.pcm16bits, -// numChannels: SttService.kChannels, -// sampleRate: SttService.kSampleRate, -// echoCancel: false, -// noiseSuppress: false, -// ), -// ); - -// return GetMicrophoneResponse( -// audioStream: stream, -// audioRecorder: audioRecorder, -// ); -// } + var indexOfLastSegmentStart = -1; + for (var i = frames.length - 1; i >= 0; i--) { + final currentIndexInSegment = + frames[i].vadP != null && frames[i].vadP! >= voiceThresholdSegmentEnd; + final hasPreviousIndex = i - 1 >= 0; + final previousIndexIsOutsideSegment = hasPreviousIndex && + frames[i - 1].vadP != null && + frames[i - 1].vadP! < voiceThresholdSegmentEnd; + if (currentIndexInSegment && previousIndexIsOutsideSegment) { + indexOfLastSegmentStart = i; + break; + } + } + + final framesToInference = indexOfLastSegmentStart == -1 + ? [] + : frames.sublist(math.max(0, indexOfLastSegmentStart - 3)); + return framesToInference; + } + + void addInferenceResult(String result, AudioFrame firstInferenceInputFrame) { + final lastResult = _lastInferenceResult; + final isNewSegment = lastResult != null && + _lastFirstInferenceInputFrame != firstInferenceInputFrame; + if (isNewSegment) { + if (_frozenTranscription.isNotEmpty && lastResult.isNotEmpty) { + _frozenTranscription += ' '; + } + _frozenTranscription += lastResult; + } + _lastFirstInferenceInputFrame = firstInferenceInputFrame; + _lastInferenceResult = result; + } +} + +// Throws an error if the microphone stream cannot be obtained. +Future _getMicrophoneStreamThrows() async { + final audioRecorder = AudioRecorder(); + + final hasPermission = await audioRecorder.hasPermission(); + if (!hasPermission) { + throw 'Denied permission to record audio.'; + } + + final stream = await audioRecorder.startStream( + const RecordConfig( + encoder: AudioEncoder.pcm16bits, + numChannels: SttService.kChannels, + sampleRate: SttService.kSampleRate, + echoCancel: false, + noiseSuppress: false, + ), + ); + + return GetMicrophoneResponse( + audioStream: stream, + audioRecorder: audioRecorder, + ); +} diff --git a/example/lib/tts_demo_widget.dart b/example/lib/tts_demo_widget.dart index bf51340..eca0bf2 100644 --- a/example/lib/tts_demo_widget.dart +++ b/example/lib/tts_demo_widget.dart @@ -22,342 +22,338 @@ class TtsDemoWidget extends StatefulWidget { } class _TtsDemoWidgetState extends State { + SttServiceResponse? _sttServiceResponse; + StreamSubscription? _sttStreamSubscription; + SttService? _sttService; + AudioPlayer? _audioPlayer; + var _sttIsVoiceThreshold = SttService.kVadPIsVoiceThreshold; + @override Widget build(BuildContext context) { - return Container(); + final lastVoiceFrameIndex = _sttServiceResponse?.audioFrames.lastIndexWhere( + (element) { + final threshold = element?.vadP; + if (threshold == null) { + return false; + } + return threshold > _sttIsVoiceThreshold; + }, + ); + return Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Text( + 'Voice Assistant Query Demo', + style: Theme.of(context).textTheme.headlineLarge, + ), + const Text( + 'Mic to Silero VAD for voice detection and Whisper Tiny for STT.\nOther Whisper models are available and much higher quality.'), + heightPadding, + Row( + mainAxisAlignment: MainAxisAlignment.start, + children: [ + ElevatedButton.icon( + onPressed: _runVadDemo, + icon: _sttService == null + ? const Icon(Icons.mic) + : const Icon(Icons.mic_off), + label: _sttService == null + ? const Text('Listen') + : const Text('Stop'), + ), + ], + ), + if (_sttServiceResponse != null) ...[ + heightPadding, + SelectableText( + _sttServiceResponse!.transcription, + style: Theme.of(context).textTheme.bodyMedium, + ), + heightPadding, + SizedBox( + height: 100, + child: Row( + mainAxisAlignment: MainAxisAlignment.start, + crossAxisAlignment: CrossAxisAlignment.end, + children: _sttServiceResponse!.audioFrames + .mapIndexed((index, e) { + final Color color; + if (e?.vadP == null) { + color = Colors.transparent; + } else if (e!.vadP! > _sttIsVoiceThreshold) { + color = Colors.green; + } else { + color = Colors.red; + } + return Flexible( + child: Tooltip( + showDuration: Duration.zero, + waitDuration: Duration.zero, + message: e?.vadP == null + ? 'not recorded yet' + : '${(e!.vadP! * 100).toStringAsFixed(0)}%\n@${index * SttService.kMaxVadFrameMs}ms', + child: Container( + constraints: const BoxConstraints( + minWidth: 0, + maxWidth: 10, + ), + height: 100 * (e?.vadP == null ? 1 : e!.vadP!), + color: color, + ), + ), + ); + }).toList(), + ), + ), + if (lastVoiceFrameIndex != null && lastVoiceFrameIndex >= 0) ...[ + heightPadding, + Text( + 'Last detected voice: @${lastVoiceFrameIndex * SttService.kMaxVadFrameMs}ms'), + if (_sttService == null && _sttServiceResponse != null) + Text( + 'Endpointer: @${(_sttServiceResponse!.audioFrames.length - 1) * SttService.kMaxVadFrameMs}ms'), + ], + heightPadding, + Text( + 'Voice threshold: ${(_sttIsVoiceThreshold * 100).toStringAsFixed(0)}%'), + Slider( + label: '${(_sttIsVoiceThreshold * 100).toStringAsFixed(0)}%', + value: _sttIsVoiceThreshold, + onChanged: (value) { + setState( + () { + _sttIsVoiceThreshold = value; + }, + ); + }, + ), + ElevatedButton.icon( + icon: const Icon(Icons.play_arrow), + label: const Text("Play"), + onPressed: _playButtonPressed, + ), + ] + ], + ); } -// SttServiceResponse? _sttServiceResponse; -// StreamSubscription? _sttStreamSubscription; -// SttService? _sttService; -// AudioPlayer? _audioPlayer; -// var _sttIsVoiceThreshold = SttService.kVadPIsVoiceThreshold; - -// @override -// Widget build(BuildContext context) { -// final lastVoiceFrameIndex = _sttServiceResponse?.audioFrames.lastIndexWhere( -// (element) { -// final threshold = element?.vadP; -// if (threshold == null) { -// return false; -// } -// return threshold > _sttIsVoiceThreshold; -// }, -// ); -// return Column( -// crossAxisAlignment: CrossAxisAlignment.start, -// children: [ -// Text( -// 'Voice Assistant Query Demo', -// style: Theme.of(context).textTheme.headlineLarge, -// ), -// const Text( -// 'Mic to Silero VAD for voice detection and Whisper Tiny for STT.\nOther Whisper models are available and much higher quality.'), -// heightPadding, -// Row( -// mainAxisAlignment: MainAxisAlignment.start, -// children: [ -// ElevatedButton.icon( -// onPressed: _runVadDemo, -// icon: _sttService == null -// ? const Icon(Icons.mic) -// : const Icon(Icons.mic_off), -// label: _sttService == null -// ? const Text('Listen') -// : const Text('Stop'), -// ), -// ], -// ), -// if (_sttServiceResponse != null) ...[ -// heightPadding, -// SelectableText( -// _sttServiceResponse!.transcription, -// style: Theme.of(context).textTheme.bodyMedium, -// ), -// heightPadding, -// SizedBox( -// height: 100, -// child: Row( -// mainAxisAlignment: MainAxisAlignment.start, -// crossAxisAlignment: CrossAxisAlignment.end, -// children: _sttServiceResponse!.audioFrames -// .mapIndexed((index, e) { -// final Color color; -// if (e?.vadP == null) { -// color = Colors.transparent; -// } else if (e!.vadP! > _sttIsVoiceThreshold) { -// color = Colors.green; -// } else { -// color = Colors.red; -// } -// return Flexible( -// child: Tooltip( -// showDuration: Duration.zero, -// waitDuration: Duration.zero, -// message: e?.vadP == null -// ? 'not recorded yet' -// : '${(e!.vadP! * 100).toStringAsFixed(0)}%\n@${index * SttService.kMaxVadFrameMs}ms', -// child: Container( -// constraints: const BoxConstraints( -// minWidth: 0, -// maxWidth: 10, -// ), -// height: 100 * (e?.vadP == null ? 1 : e!.vadP!), -// color: color, -// ), -// ), -// ); -// }).toList(), -// ), -// ), -// if (lastVoiceFrameIndex != null && lastVoiceFrameIndex >= 0) ...[ -// heightPadding, -// Text( -// 'Last detected voice: @${lastVoiceFrameIndex * SttService.kMaxVadFrameMs}ms'), -// if (_sttService == null && _sttServiceResponse != null) -// Text( -// 'Endpointer: @${(_sttServiceResponse!.audioFrames.length - 1) * SttService.kMaxVadFrameMs}ms'), -// ], -// heightPadding, -// Text( -// 'Voice threshold: ${(_sttIsVoiceThreshold * 100).toStringAsFixed(0)}%'), -// Slider( -// label: '${(_sttIsVoiceThreshold * 100).toStringAsFixed(0)}%', -// value: _sttIsVoiceThreshold, -// onChanged: (value) { -// setState( -// () { -// _sttIsVoiceThreshold = value; -// }, -// ); -// }, -// ), -// ElevatedButton.icon( -// icon: const Icon(Icons.play_arrow), -// label: const Text("Play"), -// onPressed: _playButtonPressed, -// ), -// ] -// ], -// ); -// } -// void _playButtonPressed() async { -// final frames = _sttServiceResponse!.audioFrames; -// int? firstFrameIndex; -// int? lastFrameIndex; -// for (var i = 0; i < frames.length; i++) { -// final frame = frames[i]; -// if (frame?.vadP == null) { -// // Audio hasn't been processed. -// continue; // Continues to next iteration if audio hasn't been processed, instead of breaking. -// } -// // Check for the first non-silent frame -// if (firstFrameIndex == null && frame!.vadP! > _sttIsVoiceThreshold) { -// firstFrameIndex = i; -// } -// // Update the last non-silent frame index whenever a non-silent frame is encountered -// if (frame!.vadP! > _sttIsVoiceThreshold) { -// lastFrameIndex = i; -// } -// } -// // Return or perform further actions only if both first and last non-silent frames are found -// if (firstFrameIndex == null || lastFrameIndex == null) { -// return; -// } -// final framesToProcess = frames.sublist( -// math.max(firstFrameIndex - 3, 0), -// math.min(lastFrameIndex + 10, frames.length), -// ); -// debugPrint( -// 'Detected ${framesToProcess.length} frames of voice (from ${frames.whereType().toList().length} of audio @ threshold $_sttIsVoiceThreshold)'); -// final indexOfFirstSpeech = frames.indexWhere((frame) { -// return frame?.vadP != null && frame!.vadP! >= _sttIsVoiceThreshold; -// }); -// // Intent: capture ~100ms of audio before the first speech. -// final startIndex = math.max(0, indexOfFirstSpeech - 3); -// final voiceFrames = frames -// .sublist(startIndex) -// .where((e) => e?.vadP != null && e!.vadP! > _sttIsVoiceThreshold); + void _playButtonPressed() async { + final frames = _sttServiceResponse!.audioFrames; + int? firstFrameIndex; + int? lastFrameIndex; + for (var i = 0; i < frames.length; i++) { + final frame = frames[i]; + if (frame?.vadP == null) { + // Audio hasn't been processed. + continue; // Continues to next iteration if audio hasn't been processed, instead of breaking. + } + // Check for the first non-silent frame + if (firstFrameIndex == null && frame!.vadP! > _sttIsVoiceThreshold) { + firstFrameIndex = i; + } + // Update the last non-silent frame index whenever a non-silent frame is encountered + if (frame!.vadP! > _sttIsVoiceThreshold) { + lastFrameIndex = i; + } + } +// Return or perform further actions only if both first and last non-silent frames are found + if (firstFrameIndex == null || lastFrameIndex == null) { + return; + } + final framesToProcess = frames.sublist( + math.max(firstFrameIndex - 3, 0), + math.min(lastFrameIndex + 10, frames.length), + ); + debugPrint( + 'Detected ${framesToProcess.length} frames of voice (from ${frames.whereType().toList().length} of audio @ threshold $_sttIsVoiceThreshold)'); + final indexOfFirstSpeech = frames.indexWhere((frame) { + return frame?.vadP != null && frame!.vadP! >= _sttIsVoiceThreshold; + }); + // Intent: capture ~100ms of audio before the first speech. + final startIndex = math.max(0, indexOfFirstSpeech - 3); + final voiceFrames = frames + .sublist(startIndex) + .where((e) => e?.vadP != null && e!.vadP! > _sttIsVoiceThreshold); -// Uint8List generateWavHeader( -// int pcmDataLength, { -// required int bitsPerSample, -// required int numChannels, -// required int sampleRate, -// }) { -// int fileSize = pcmDataLength + -// 44 - -// 8; // Add WAV header size except for 'RIFF' and its size field -// int byteRate = sampleRate * numChannels * bitsPerSample ~/ 8; -// int blockAlign = numChannels * bitsPerSample ~/ 8; + Uint8List generateWavHeader( + int pcmDataLength, { + required int bitsPerSample, + required int numChannels, + required int sampleRate, + }) { + int fileSize = pcmDataLength + + 44 - + 8; // Add WAV header size except for 'RIFF' and its size field + int byteRate = sampleRate * numChannels * bitsPerSample ~/ 8; + int blockAlign = numChannels * bitsPerSample ~/ 8; -// var header = Uint8List(44); -// var buffer = ByteData.view(header.buffer); + var header = Uint8List(44); + var buffer = ByteData.view(header.buffer); -// // RIFF header -// buffer.setUint32(0, 0x52494646, Endian.big); // 'RIFF' -// buffer.setUint32(4, fileSize, Endian.little); -// buffer.setUint32(8, 0x57415645, Endian.big); // 'WAVE' + // RIFF header + buffer.setUint32(0, 0x52494646, Endian.big); // 'RIFF' + buffer.setUint32(4, fileSize, Endian.little); + buffer.setUint32(8, 0x57415645, Endian.big); // 'WAVE' -// // fmt subchunk -// buffer.setUint32(12, 0x666d7420, Endian.big); // 'fmt ' -// buffer.setUint32(16, 16, Endian.little); // Subchunk1 size (16 for PCM) -// buffer.setUint16(20, 1, Endian.little); // Audio format (1 for PCM) -// buffer.setUint16(22, numChannels, Endian.little); // Number of channels -// buffer.setUint32(24, sampleRate, Endian.little); // Sample rate -// buffer.setUint32(28, byteRate, Endian.little); // Byte rate -// buffer.setUint16(32, blockAlign, Endian.little); // Block align -// buffer.setUint16(34, bitsPerSample, Endian.little); // Bits per sample + // fmt subchunk + buffer.setUint32(12, 0x666d7420, Endian.big); // 'fmt ' + buffer.setUint32(16, 16, Endian.little); // Subchunk1 size (16 for PCM) + buffer.setUint16(20, 1, Endian.little); // Audio format (1 for PCM) + buffer.setUint16(22, numChannels, Endian.little); // Number of channels + buffer.setUint32(24, sampleRate, Endian.little); // Sample rate + buffer.setUint32(28, byteRate, Endian.little); // Byte rate + buffer.setUint16(32, blockAlign, Endian.little); // Block align + buffer.setUint16(34, bitsPerSample, Endian.little); // Bits per sample -// // data subchunk -// buffer.setUint32(36, 0x64617461, Endian.big); // 'data' -// buffer.setUint32( -// 40, pcmDataLength, Endian.little); // Subchunk2 size (PCM data size) + // data subchunk + buffer.setUint32(36, 0x64617461, Endian.big); // 'data' + buffer.setUint32( + 40, pcmDataLength, Endian.little); // Subchunk2 size (PCM data size) -// return header; -// } + return header; + } -// Uint8List generateWavFile( -// List pcmData, { -// required int bitsPerSample, -// required int numChannels, -// required int sampleRate, -// }) { -// final header = generateWavHeader( -// pcmData.length, -// sampleRate: sampleRate, -// numChannels: numChannels, -// bitsPerSample: bitsPerSample, -// ); -// final wavFile = Uint8List(header.length + pcmData.length); -// wavFile.setAll(0, header); -// wavFile.setAll(header.length, pcmData); -// return wavFile; -// } + Uint8List generateWavFile( + List pcmData, { + required int bitsPerSample, + required int numChannels, + required int sampleRate, + }) { + final header = generateWavHeader( + pcmData.length, + sampleRate: sampleRate, + numChannels: numChannels, + bitsPerSample: bitsPerSample, + ); + final wavFile = Uint8List(header.length + pcmData.length); + wavFile.setAll(0, header); + wavFile.setAll(header.length, pcmData); + return wavFile; + } -// /// Returns null if no frames are above the threshold. -// Uint8List? wavFromFrames( -// {required List frames, required double minVadP}) { -// final bytes = frames -// .where((e) => e.vadP != null && e.vadP! >= minVadP) -// .map((e) => e.bytes) -// .expand((element) => element); -// if (bytes.isEmpty) { -// return null; -// } -// final bytesList = bytes.toList(); -// return generateWavFile( -// bytesList, -// bitsPerSample: SttService.kBitsPerSample, -// numChannels: SttService.kChannels, -// sampleRate: SttService.kSampleRate, -// ); -// } + /// Returns null if no frames are above the threshold. + Uint8List? wavFromFrames( + {required List frames, required double minVadP}) { + final bytes = frames + .where((e) => e.vadP != null && e.vadP! >= minVadP) + .map((e) => e.bytes) + .expand((element) => element); + if (bytes.isEmpty) { + return null; + } + final bytesList = bytes.toList(); + return generateWavFile( + bytesList, + bitsPerSample: SttService.kBitsPerSample, + numChannels: SttService.kChannels, + sampleRate: SttService.kSampleRate, + ); + } -// final playWav = wavFromFrames( -// frames: voiceFrames.nonNulls.toList(), -// minVadP: 0, -// ); -// if (playWav == null) { -// debugPrint('No frames with voice, skipping WAV creation.'); -// return; -// } -// if (kIsWeb) { -// String base64String = base64Encode(playWav); + final playWav = wavFromFrames( + frames: voiceFrames.nonNulls.toList(), + minVadP: 0, + ); + if (playWav == null) { + debugPrint('No frames with voice, skipping WAV creation.'); + return; + } + if (kIsWeb) { + String base64String = base64Encode(playWav); -// // Step 4: Create the data URL -// final url = 'data:audio/wav;base64,$base64String'; -// _audioPlayer ??= AudioPlayer(); -// _audioPlayer!.play(UrlSource(url)); -// } else { -// final tempDir = await path_provider.getTemporaryDirectory(); -// final playWavPath = path.join(tempDir.path, 'voice.wav'); -// final playWavFile = File(playWavPath); -// await playWavFile.writeAsBytes(playWav); -// debugPrint('Wrote voice to $playWavPath'); -// _audioPlayer ??= AudioPlayer(); -// _audioPlayer!.play(DeviceFileSource(playWavPath)); -// } -// } + // Step 4: Create the data URL + final url = 'data:audio/wav;base64,$base64String'; + _audioPlayer ??= AudioPlayer(); + _audioPlayer!.play(UrlSource(url)); + } else { + final tempDir = await path_provider.getTemporaryDirectory(); + final playWavPath = path.join(tempDir.path, 'voice.wav'); + final playWavFile = File(playWavPath); + await playWavFile.writeAsBytes(playWav); + debugPrint('Wrote voice to $playWavPath'); + _audioPlayer ??= AudioPlayer(); + _audioPlayer!.play(DeviceFileSource(playWavPath)); + } + } -// void _runVadDemo() async { -// if (_sttStreamSubscription != null) { -// setState(() { -// _sttStreamSubscription?.cancel(); -// _sttStreamSubscription = null; -// _sttService?.stop(); -// _sttService = null; -// }); -// return; -// } -// final vadModelPath = await getModelPath('silero_vad.onnx'); -// final whisperModelPath = await getWhisperModelPath('whisper_tiny.onnx'); -// final service = SttService( -// vadModelPath: vadModelPath, -// whisperModelPath: whisperModelPath, -// voiceThreshold: _sttIsVoiceThreshold, -// maxDuration: const Duration(seconds: 10), -// ); -// _sttService = service; -// final subscription = service.transcribe().listen((event) { -// setState(() { -// _sttServiceResponse = event; -// }); -// }); -// _sttStreamSubscription = subscription; -// subscription.onDone(() { -// setState(() { -// _sttStreamSubscription = null; -// _sttService = null; -// }); -// }); -// } + void _runVadDemo() async { + if (_sttStreamSubscription != null) { + setState(() { + _sttStreamSubscription?.cancel(); + _sttStreamSubscription = null; + _sttService?.stop(); + _sttService = null; + }); + return; + } + final vadModelPath = await getModelPath('silero_vad.onnx'); + final whisperModelPath = await getWhisperModelPath('whisper_tiny.onnx'); + final service = SttService( + vadModelPath: vadModelPath, + whisperModelPath: whisperModelPath, + voiceThreshold: _sttIsVoiceThreshold, + maxDuration: const Duration(seconds: 10), + ); + _sttService = service; + final subscription = service.transcribe().listen((event) { + setState(() { + _sttServiceResponse = event; + }); + }); + _sttStreamSubscription = subscription; + subscription.onDone(() { + setState(() { + _sttStreamSubscription = null; + _sttService = null; + }); + }); + } -// Future getModelPath(String modelFilenameWithExtension) async { -// if (kIsWeb) { -// return 'assets/models/sileroVad/$modelFilenameWithExtension'; -// } -// final assetCacheDirectory = -// await path_provider.getApplicationSupportDirectory(); -// final modelPath = -// path.join(assetCacheDirectory.path, modelFilenameWithExtension); + Future getModelPath(String modelFilenameWithExtension) async { + if (kIsWeb) { + return 'assets/models/sileroVad/$modelFilenameWithExtension'; + } + final assetCacheDirectory = + await path_provider.getApplicationSupportDirectory(); + final modelPath = + path.join(assetCacheDirectory.path, modelFilenameWithExtension); -// File file = File(modelPath); -// bool fileExists = await file.exists(); -// final fileLength = fileExists ? await file.length() : 0; + File file = File(modelPath); + bool fileExists = await file.exists(); + final fileLength = fileExists ? await file.length() : 0; -// // Do not use path package / path.join for paths. -// // After testing on Windows, it appears that asset paths are _always_ Unix style, i.e. -// // use /, but path.join uses \ on Windows. -// final assetPath = -// 'assets/models/sileroVad/${path.basename(modelFilenameWithExtension)}'; -// final assetByteData = await rootBundle.load(assetPath); -// final assetLength = assetByteData.lengthInBytes; -// final fileSameSize = fileLength == assetLength; -// if (!fileExists || !fileSameSize) { -// debugPrint( -// 'Copying model to $modelPath. Why? Either the file does not exist (${!fileExists}), ' -// 'or it does exist but is not the same size as the one in the assets ' -// 'directory. (${!fileSameSize})'); -// debugPrint('About to get byte data for $modelPath'); + // Do not use path package / path.join for paths. + // After testing on Windows, it appears that asset paths are _always_ Unix style, i.e. + // use /, but path.join uses \ on Windows. + final assetPath = + 'assets/models/sileroVad/${path.basename(modelFilenameWithExtension)}'; + final assetByteData = await rootBundle.load(assetPath); + final assetLength = assetByteData.lengthInBytes; + final fileSameSize = fileLength == assetLength; + if (!fileExists || !fileSameSize) { + debugPrint( + 'Copying model to $modelPath. Why? Either the file does not exist (${!fileExists}), ' + 'or it does exist but is not the same size as the one in the assets ' + 'directory. (${!fileSameSize})'); + debugPrint('About to get byte data for $modelPath'); -// List bytes = assetByteData.buffer.asUint8List( -// assetByteData.offsetInBytes, -// assetByteData.lengthInBytes, -// ); -// debugPrint('About to copy model to $modelPath'); -// try { -// if (!fileExists) { -// await file.create(recursive: true); -// } -// await file.writeAsBytes(bytes, flush: true); -// } catch (e) { -// debugPrint('Error writing bytes to $modelPath: $e'); -// rethrow; -// } -// debugPrint('Copied model to $modelPath'); -// } + List bytes = assetByteData.buffer.asUint8List( + assetByteData.offsetInBytes, + assetByteData.lengthInBytes, + ); + debugPrint('About to copy model to $modelPath'); + try { + if (!fileExists) { + await file.create(recursive: true); + } + await file.writeAsBytes(bytes, flush: true); + } catch (e) { + debugPrint('Error writing bytes to $modelPath: $e'); + rethrow; + } + debugPrint('Copied model to $modelPath'); + } -// return modelPath; -// } + return modelPath; + } } diff --git a/example/linux/flutter/generated_plugin_registrant.cc b/example/linux/flutter/generated_plugin_registrant.cc index 86820e1..f1b202c 100644 --- a/example/linux/flutter/generated_plugin_registrant.cc +++ b/example/linux/flutter/generated_plugin_registrant.cc @@ -8,6 +8,7 @@ #include #include +#include void fl_register_plugins(FlPluginRegistry* registry) { g_autoptr(FlPluginRegistrar) audioplayers_linux_registrar = @@ -16,4 +17,7 @@ void fl_register_plugins(FlPluginRegistry* registry) { g_autoptr(FlPluginRegistrar) fonnx_registrar = fl_plugin_registry_get_registrar_for_plugin(registry, "FonnxPlugin"); fonnx_plugin_register_with_registrar(fonnx_registrar); + g_autoptr(FlPluginRegistrar) record_linux_registrar = + fl_plugin_registry_get_registrar_for_plugin(registry, "RecordLinuxPlugin"); + record_linux_plugin_register_with_registrar(record_linux_registrar); } diff --git a/example/linux/flutter/generated_plugins.cmake b/example/linux/flutter/generated_plugins.cmake index b800921..01e4982 100644 --- a/example/linux/flutter/generated_plugins.cmake +++ b/example/linux/flutter/generated_plugins.cmake @@ -5,6 +5,7 @@ list(APPEND FLUTTER_PLUGIN_LIST audioplayers_linux fonnx + record_linux ) list(APPEND FLUTTER_FFI_PLUGIN_LIST diff --git a/example/macos/Flutter/GeneratedPluginRegistrant.swift b/example/macos/Flutter/GeneratedPluginRegistrant.swift index 5127d7b..0fb4802 100644 --- a/example/macos/Flutter/GeneratedPluginRegistrant.swift +++ b/example/macos/Flutter/GeneratedPluginRegistrant.swift @@ -8,9 +8,11 @@ import Foundation import audioplayers_darwin import fonnx import path_provider_foundation +import record_darwin func RegisterGeneratedPlugins(registry: FlutterPluginRegistry) { AudioplayersDarwinPlugin.register(with: registry.registrar(forPlugin: "AudioplayersDarwinPlugin")) FonnxPlugin.register(with: registry.registrar(forPlugin: "FonnxPlugin")) PathProviderPlugin.register(with: registry.registrar(forPlugin: "PathProviderPlugin")) + RecordPlugin.register(with: registry.registrar(forPlugin: "RecordPlugin")) } diff --git a/example/macos/Podfile.lock b/example/macos/Podfile.lock index d716f16..a6da468 100644 --- a/example/macos/Podfile.lock +++ b/example/macos/Podfile.lock @@ -39,4 +39,4 @@ SPEC CHECKSUMS: PODFILE CHECKSUM: 91f0996b85caf3478dfdcf490d95bb166c22642b -COCOAPODS: 1.15.2 +COCOAPODS: 1.16.2 diff --git a/example/pubspec.yaml b/example/pubspec.yaml index 1ee4b7f..c941ee9 100644 --- a/example/pubspec.yaml +++ b/example/pubspec.yaml @@ -25,6 +25,8 @@ dependencies: audioplayers: ^6.0.0 file_picker: ^8.1.2 collection: ^1.18.0 + record: '<5.2.0' + record_darwin: '<1.2.0' dependency_overrides: audioplayers_web: git: diff --git a/example/windows/flutter/generated_plugin_registrant.cc b/example/windows/flutter/generated_plugin_registrant.cc index 69cbed8..785e498 100644 --- a/example/windows/flutter/generated_plugin_registrant.cc +++ b/example/windows/flutter/generated_plugin_registrant.cc @@ -8,10 +8,13 @@ #include #include +#include void RegisterPlugins(flutter::PluginRegistry* registry) { AudioplayersWindowsPluginRegisterWithRegistrar( registry->GetRegistrarForPlugin("AudioplayersWindowsPlugin")); FonnxPluginCApiRegisterWithRegistrar( registry->GetRegistrarForPlugin("FonnxPluginCApi")); + RecordWindowsPluginCApiRegisterWithRegistrar( + registry->GetRegistrarForPlugin("RecordWindowsPluginCApi")); } diff --git a/example/windows/flutter/generated_plugins.cmake b/example/windows/flutter/generated_plugins.cmake index 60bdc80..6c34c04 100644 --- a/example/windows/flutter/generated_plugins.cmake +++ b/example/windows/flutter/generated_plugins.cmake @@ -5,6 +5,7 @@ list(APPEND FLUTTER_PLUGIN_LIST audioplayers_windows fonnx + record_windows ) list(APPEND FLUTTER_FFI_PLUGIN_LIST