feat: Improved callbacks and minor fix (#13)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
jBernavaPrah · Aug 21, 2024 · 060ee6b · 060ee6b
1 parent 206324f
commit 060ee6b
Show file tree

Hide file tree

Showing 24 changed files with 533 additions and 167 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 /target
 .idea
+**/.DS_Store
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.3](https://github.com/jBernavaPrah/azure-speech-sdk-rs/compare/v0.2.2...v0.2.3) - 2024-08-16
+
+### Other
+- Improve documentation  ([#10](https://github.com/jBernavaPrah/azure-speech-sdk-rs/pull/10))
+
 ## [0.2.2](https://github.com/jBernavaPrah/azure-speech-sdk-rs/compare/v0.2.1...v0.2.2) - 2024-08-16
 
 ### Other

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,8 +1,8 @@
 [package]
 name = "azure-speech"
-version = "0.2.2"
+version = "0.2.3"
 authors = ["Jure Bernava Prah <[email protected]>"]
-description = "Pure Rust implementation for Microsoft Speech Service"
+description = "Pure Rust SDK for Azure Speech Service"
 edition = "2021"
 rust-version = "1.71.0"
 license = "MIT"
@@ -41,7 +41,7 @@ serde_json = "1.0.114"
 os_info = "3"
 
 ssml = "0.1.0"
-async-channel = "1.9.0"
+async-channel = "1.9.0" # needed for ezsockets 0.6 for call_with;
 
 
 [dev-dependencies]

diff --git a/examples/recognize_callbacks.rs b/examples/recognize_callbacks.rs
@@ -0,0 +1,79 @@
+use azure_speech::stream::Stream;
+use azure_speech::Auth;
+use azure_speech::{recognizer, StreamExt};
+use std::env;
+use std::error::Error;
+use std::path::Path;
+use tokio::fs::File;
+use tokio::io::{AsyncReadExt, BufReader};
+use tokio_stream::wrappers::ReceiverStream;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn Error>> {
+    tracing_subscriber::fmt()
+        .with_max_level(tracing::Level::INFO)
+        .init();
+
+    // Check on the example recognize_simple.rs for more details on how to set the recognizer.
+    let auth = Auth::from_subscription(
+        env::var("AZURE_REGION").expect("Region set on AZURE_REGION env"),
+        env::var("AZURE_SUBSCRIPTION_KEY").expect("Subscription set on AZURE_SUBSCRIPTION_KEY env"),
+    );
+    let config = recognizer::Config::default();
+
+    let client = recognizer::Client::connect(auth, config)
+        .await
+        .expect("to connect to azure");
+
+    // Create a callbacks for the recognizer.
+    // The callbacks are used to get information about the recognition process.
+    let callbacks = recognizer::Callback::default()
+        .on_start_detected(|id, offset| async move {
+            tracing::info!("Start detected: {:?} - {:?}", id, offset);
+        })
+        .on_recognized(|id, result, _offset, _duration, _raw| async move {
+            tracing::info!("Recognized: {:?} - {:?}", id, result);
+        })
+        .on_session_end(|id| async move {
+            tracing::info!("Session end: {:?}", id);
+        });
+    //.on_... // check the other callbacks available.
+
+    client
+        .recognize(
+            create_audio_stream("tests/audios/examples_sample_files_turn_on_the_lamp.wav").await, // Try also the mp3 version of the file.
+            recognizer::ContentType::Wav, // Be sure to set it correctly.
+            recognizer::Details::file(),
+        )
+        .await
+        .expect("to recognize")
+        // When you set the callbacks, the events will be sent to the callbacks and not to the stream.
+        .use_callbacks(callbacks)
+        .await; // it's important to await here.
+
+    tracing::info!("Completed!");
+
+    Ok(())
+}
+
+async fn create_audio_stream(path: impl AsRef<Path>) -> impl Stream<Item = Vec<u8>> {
+    let (tx, rx) = tokio::sync::mpsc::channel(1024);
+    let file = File::open(path).await.expect("Failed to open file");
+    let mut reader = BufReader::new(file);
+
+    tokio::spawn(async move {
+        let mut chunk = vec![0; 4096];
+        while let Ok(n) = reader.read(&mut chunk).await {
+            if n == 0 {
+                break;
+            }
+            if tx.send(chunk.clone()).await.is_err() {
+                tracing::error!("Error sending data");
+                break;
+            }
+        }
+        drop(tx);
+    });
+
+    ReceiverStream::new(rx)
+}
diff --git a/examples/recognize_from_bbc_word_radio.rs b/examples/recognize_from_bbc_word_radio.rs
@@ -15,19 +15,20 @@ async fn main() {
 
     let client = recognizer::Client::connect(
         auth,
-        recognizer::Config::default().set_detect_languages(
-            vec![recognizer::Language::EnGb],
-            recognizer::LanguageDetectMode::Continuous,
-        ),
+        recognizer::Config::default()
+            // The BBC World Service stream is in English.
+            .set_language(recognizer::Language::EnGb),
     )
     .await
     .expect("Failed to connect to Azure");
 
     let mut events = client
         .recognize(
+            // The BBC World Service stream is a good example to test the recognizer.
             create_audio_stream("https://stream.live.vc.bbcmedia.co.uk/bbc_world_service").await,
+            // The content type is MPEG.
             recognizer::ContentType::Mpeg,
-            recognizer::Details::stream("mac", "stream"),
+            recognizer::Details::stream("unknown", "stream"),
         )
         .await
         .expect("Failed to recognize");

diff --git a/examples/recognize_from_microphone.rs b/examples/recognize_from_microphone.rs
@@ -13,7 +13,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
         .with_max_level(tracing::Level::INFO)
         .init();
 
-    // More information on the configuration can be found in the simple example.
+    // More information on the configuration can be found in the examples/recognize_simple.rs example.
 
     let auth = Auth::from_subscription(
         env::var("AZURE_REGION").expect("Region set on AZURE_REGION env"),
@@ -31,6 +31,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
     // As the audio is raw, the WAV format is used.
     let (stream, microphone) = listen_from_default_input().await;
 
+    // Start the microphone.
     microphone.play().expect("play failed");
 
     let mut events = client

diff --git a/examples/recognize_simple.rs b/examples/recognize_simple.rs
@@ -33,6 +33,8 @@ async fn main() -> Result<(), Box<dyn Error>> {
         .await
         .expect("to connect to azure");
 
+    // Here we are streaming the events from the synthesizer.
+    // But you can also use the callbacks (see: examples/recognize_callbacks.rs) if you prefer.
     let mut stream = client
         .recognize(
             // Here is your input audio stream. The audio headers needs to be present if required by the content type used.
@@ -65,8 +67,9 @@ async fn main() -> Result<(), Box<dyn Error>> {
                 tracing::info!("Result: {:?}", result);
                 tracing::info!("Offset: {:?}", offset);
                 tracing::info!("Duration: {:?}", duration);
-                // the raw message is the raw json message from the service.
-                // You can use it to extract more information if needed.
+
+                // the raw message is the json message received from the service.
+                // You can use it to extract more information when needed.
                 tracing::info!("Raw message: {:?}", raw_message);
             }
             _ => {

diff --git a/examples/synthesize_callbacks.rs b/examples/synthesize_callbacks.rs
@@ -1,61 +1,62 @@
-use azure_speech::{synthesizer, Auth};
+use azure_speech::{synthesizer, Auth, StreamExt};
 use std::env;
 use std::error::Error;
-use tokio_stream::StreamExt;
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn Error>> {
     tracing_subscriber::fmt()
         .with_max_level(tracing::Level::INFO)
         .init();
 
+    // Check the examples/synthesize_simple.rs file for the full code.
+
     let auth = Auth::from_subscription(
         env::var("AZURE_REGION").expect("Region set on AZURE_REGION env"),
         env::var("AZURE_SUBSCRIPTION_KEY").expect("Subscription set on AZURE_SUBSCRIPTION_KEY env"),
     );
 
-    let config = synthesizer::Config::default()
-        .on_synthesising(|request_id, audio| {
+    let config = synthesizer::Config::default();
+    let client = synthesizer::Client::connect(auth, config)
+        .await
+        .expect("to connect to azure");
+
+    // Create the callbacks for the synthesizer.
+    let callbacks = synthesizer::Callback::default()
+        .on_synthesising(|request_id, audio| async move {
             tracing::info!(
                 "Callback - request: {:?}: Synthesising bytes {:?} ",
                 request_id,
                 audio.len()
             );
         })
-        .on_synthesised(|request_id| {
+        .on_synthesised(|request_id| async move {
             tracing::info!("Callback - request: {:?}: Synthesised", request_id);
         })
-        .on_error(|request_id, error| {
-            tracing::info!("Callback - request: {:?}: Error {:?}", request_id, error);
-        })
-        .on_audio_metadata(|request_id, metadata| {
+        .on_audio_metadata(|request_id, metadata| async move {
             tracing::info!(
                 "Callback - request: {:?}: Audio metadata {:?}",
                 request_id,
                 metadata
             );
         })
-        .on_session_start(|request_id| {
+        .on_session_start(|request_id| async move {
             tracing::info!("Callback - request: {:?}: Session started", request_id);
         })
-        .on_session_end(|request_id| {
+        .on_session_end(|request_id| async move {
             tracing::info!("Callback - request: {:?}: Session ended", request_id);
+        })
+        .on_error(|request_id, error| async move {
+            tracing::info!("Callback - request: {:?}: Error {:?}", request_id, error);
         });
 
-    let client = synthesizer::Client::connect(auth, config)
-        .await
-        .expect("to connect to azure");
-
     // you can use both the stream and callback in the same functions.
-    let mut stream = client
+    client
         // here you put your text to synthesize.
         .synthesize("Hello World!")
         .await
-        .expect("to synthesize");
-
-    while let Some(event) = stream.next().await {
-        tracing::info!("Synthesizer Event: {:?}", event);
-    }
+        .expect("to synthesize")
+        .use_callbacks(callbacks)
+        .await;
 
     Ok(())
 }
diff --git a/examples/synthesize_simple.rs b/examples/synthesize_simple.rs
@@ -25,6 +25,8 @@ async fn main() -> Result<(), Box<dyn Error>> {
     // It will understand the en-US language and will use the EnUsJennyNeural voice.
     // You can change it by using the Config struct and its methods.
     let config = synthesizer::Config::default();
+    //.with_language(synthesizer::Language::EnGb)
+    //.with_voice(synthesizer::Voice::EnGbLibbyNeural)
 
     let client = synthesizer::Client::connect(auth, config)
         .await
@@ -36,6 +38,8 @@ async fn main() -> Result<(), Box<dyn Error>> {
         .await
         .expect("to synthesize");
 
+    // Here we are streaming the events from the synthesizer.
+    // But you can also use the callbacks (see: examples/synthesize_callbacks.rs) if you prefer.
     while let Some(event) = stream.next().await {
         // Each event is a part of the synthesis process.
         match event {

diff --git a/examples/synthesize_using_ssml.rs b/examples/synthesize_using_ssml.rs
@@ -39,7 +39,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
         // this will print a lot of events to the console.
         // you can use the events to create your own audio output.
 
-        // check other examples to see how to create an audio output.
+        // check examples/synthesize_to_standard_output.rs to see how to create an audio output.
         tracing::info!("Synthesized: {:?}", event);
     }
 

diff --git a/readme.md b/readme.md
@@ -19,17 +19,20 @@ This library aims to provide an easy-to-install and straightforward interface fo
 
 The library currently supports the following features:
 
-- [X] Speech-to-Text (Speech Recognition)
+- [X] Speech Recognition (Speech-to-Text) [examples](examples/recognize_simple.rs)
   - [X] Real-time Speech Recognition
   - [X] Custom Speech Recognition
-- [X] Text-to-Speech (Speech Synthesis)
+  - [X] Phrase List
+  - [ ] Conversation Transcriber - Real-time Diarization (Work in Progress)
+  - [ ] Pronunciation Assessment (Work in Progress)
+- [X] Speech Synthesis (Text-to-Speech) [example](examples/synthesize_simple.rs)
   - [X] Real-time Speech Synthesis
   - [X] Custom Voice
   - [X] SSML Support
-- [ ] Speech Translation 
-- [ ] Intent Recognition
-- [ ] Speaker Recognition
-- [ ] Keyword Recognition
+- [ ] Speech Translation (Work in Progress)
+- [ ] Intent Recognition (Work in Progress)
+- [ ] Keyword Recognition (Work in Progress)
+
 
 The library is currently in the early stages of development, and I am actively working on adding more features and improving the existing ones.
 
@@ -48,6 +51,10 @@ Add this library to your project using the following command:
 cargo add azure_speech
 ```
 
+**And that's it!** 
+
+You are now ready to use the Azure Speech SDK in your Rust project.
+
 ## Usage
 For usage examples, please refer to the [examples folder](https://github.com/jBernavaPrah/azure-speech-sdk-rs/tree/master/examples) in the repository. Or check the [documentation](https://docs.rs/azure-speech).
 

diff --git a/src/callback.rs b/src/callback.rs
@@ -0,0 +1,15 @@
+// src/callback.rs
+use crate::RequestId;
+use std::future::Future;
+use std::pin::Pin;
+
+pub(crate) type OnSessionStarted = Box<dyn Fn(RequestId) -> BoxFuture>;
+pub(crate) type OnSessionEnded = Box<dyn Fn(RequestId) -> BoxFuture>;
+pub(crate) type OnError = Box<dyn Fn(RequestId, crate::Error) -> BoxFuture>;
+pub(crate) type BoxFuture = Pin<Box<dyn Future<Output = ()> + Send + 'static>>;
+
+#[async_trait::async_trait]
+pub trait Callback {
+    type Item;
+    fn on_event(&self, item: Self::Item) -> impl Future<Output = ()>;
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -8,19 +8,19 @@
 //! `tokio` runtime, it minimizes external dependencies wherever possible.
 //!
 //! ## Core Functionalities
-//! - [X] Speech to Text
-//! - [X] Text to Speech
+//! - [X] Speech to Text [recognizer]
+//! - [X] Text to Speech [synthesizer]
 //!
 //! For comprehensive information on Microsoft Speech Service, refer to the official
-//! documentation [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-sdk?tabs=windows%2Cubuntu%2Cios-xcode%2Cmac-xcode%2Candroid-studio).
+//! documentation [here](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-sdk).
 //!
 //! ## Notes
 //! This crate, in its current version, does not include some features available in the
 //! official SDK, such as microphone/file recognition or synthesizer output to speakers.
 //! However, examples demonstrating these capabilities can be found in the `examples` directory.
 //!
 //! ## Usage and Examples
-//! Detailed usage instructions and examples are provided in the `examples` directory.
+//! Detailed usage instructions and examples are provided in the [examples](https://github.com/jBernavaPrah/azure-speech-sdk-rs/blob/master/examples) folder in the GitHub repository.
 //!
 
 mod auth;
@@ -31,6 +31,7 @@ mod event;
 mod stream_ext;
 mod utils;
 
+mod callback;
 pub mod recognizer;
 pub mod synthesizer;
 
@@ -39,7 +40,7 @@ pub use connector::*;
 pub use error::*;
 
 pub use event::*;
-pub use stream_ext::StreamExt;
+pub use stream_ext::*;
 
 pub mod stream {
     //! Re-export of `tokio_stream` crate.