Skip to content

Commit

Permalink
feat: Improved callbacks and minor fix (#13)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
jBernavaPrah and github-actions[bot] authored Aug 21, 2024
1 parent 206324f commit 060ee6b
Show file tree
Hide file tree
Showing 24 changed files with 533 additions and 167 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/target
.idea
**/.DS_Store
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.2.3](https://github.com/jBernavaPrah/azure-speech-sdk-rs/compare/v0.2.2...v0.2.3) - 2024-08-16

### Other
- Improve documentation ([#10](https://github.com/jBernavaPrah/azure-speech-sdk-rs/pull/10))

## [0.2.2](https://github.com/jBernavaPrah/azure-speech-sdk-rs/compare/v0.2.1...v0.2.2) - 2024-08-16

### Other
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[package]
name = "azure-speech"
version = "0.2.2"
version = "0.2.3"
authors = ["Jure Bernava Prah <[email protected]>"]
description = "Pure Rust implementation for Microsoft Speech Service"
description = "Pure Rust SDK for Azure Speech Service"
edition = "2021"
rust-version = "1.71.0"
license = "MIT"
Expand Down Expand Up @@ -41,7 +41,7 @@ serde_json = "1.0.114"
os_info = "3"

ssml = "0.1.0"
async-channel = "1.9.0"
async-channel = "1.9.0" # needed for ezsockets 0.6 for call_with;


[dev-dependencies]
Expand Down
79 changes: 79 additions & 0 deletions examples/recognize_callbacks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
use azure_speech::stream::Stream;
use azure_speech::Auth;
use azure_speech::{recognizer, StreamExt};
use std::env;
use std::error::Error;
use std::path::Path;
use tokio::fs::File;
use tokio::io::{AsyncReadExt, BufReader};
use tokio_stream::wrappers::ReceiverStream;

#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.init();

// Check on the example recognize_simple.rs for more details on how to set the recognizer.
let auth = Auth::from_subscription(
env::var("AZURE_REGION").expect("Region set on AZURE_REGION env"),
env::var("AZURE_SUBSCRIPTION_KEY").expect("Subscription set on AZURE_SUBSCRIPTION_KEY env"),
);
let config = recognizer::Config::default();

let client = recognizer::Client::connect(auth, config)
.await
.expect("to connect to azure");

// Create a callbacks for the recognizer.
// The callbacks are used to get information about the recognition process.
let callbacks = recognizer::Callback::default()
.on_start_detected(|id, offset| async move {
tracing::info!("Start detected: {:?} - {:?}", id, offset);
})
.on_recognized(|id, result, _offset, _duration, _raw| async move {
tracing::info!("Recognized: {:?} - {:?}", id, result);
})
.on_session_end(|id| async move {
tracing::info!("Session end: {:?}", id);
});
//.on_... // check the other callbacks available.

client
.recognize(
create_audio_stream("tests/audios/examples_sample_files_turn_on_the_lamp.wav").await, // Try also the mp3 version of the file.
recognizer::ContentType::Wav, // Be sure to set it correctly.
recognizer::Details::file(),
)
.await
.expect("to recognize")
// When you set the callbacks, the events will be sent to the callbacks and not to the stream.
.use_callbacks(callbacks)
.await; // it's important to await here.

tracing::info!("Completed!");

Ok(())
}

async fn create_audio_stream(path: impl AsRef<Path>) -> impl Stream<Item = Vec<u8>> {
let (tx, rx) = tokio::sync::mpsc::channel(1024);
let file = File::open(path).await.expect("Failed to open file");
let mut reader = BufReader::new(file);

tokio::spawn(async move {
let mut chunk = vec![0; 4096];
while let Ok(n) = reader.read(&mut chunk).await {
if n == 0 {
break;
}
if tx.send(chunk.clone()).await.is_err() {
tracing::error!("Error sending data");
break;
}
}
drop(tx);
});

ReceiverStream::new(rx)
}
11 changes: 6 additions & 5 deletions examples/recognize_from_bbc_word_radio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,20 @@ async fn main() {

let client = recognizer::Client::connect(
auth,
recognizer::Config::default().set_detect_languages(
vec![recognizer::Language::EnGb],
recognizer::LanguageDetectMode::Continuous,
),
recognizer::Config::default()
// The BBC World Service stream is in English.
.set_language(recognizer::Language::EnGb),
)
.await
.expect("Failed to connect to Azure");

let mut events = client
.recognize(
// The BBC World Service stream is a good example to test the recognizer.
create_audio_stream("https://stream.live.vc.bbcmedia.co.uk/bbc_world_service").await,
// The content type is MPEG.
recognizer::ContentType::Mpeg,
recognizer::Details::stream("mac", "stream"),
recognizer::Details::stream("unknown", "stream"),
)
.await
.expect("Failed to recognize");
Expand Down
3 changes: 2 additions & 1 deletion examples/recognize_from_microphone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
.with_max_level(tracing::Level::INFO)
.init();

// More information on the configuration can be found in the simple example.
// More information on the configuration can be found in the examples/recognize_simple.rs example.

let auth = Auth::from_subscription(
env::var("AZURE_REGION").expect("Region set on AZURE_REGION env"),
Expand All @@ -31,6 +31,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
// As the audio is raw, the WAV format is used.
let (stream, microphone) = listen_from_default_input().await;

// Start the microphone.
microphone.play().expect("play failed");

let mut events = client
Expand Down
7 changes: 5 additions & 2 deletions examples/recognize_simple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ async fn main() -> Result<(), Box<dyn Error>> {
.await
.expect("to connect to azure");

// Here we are streaming the events from the synthesizer.
// But you can also use the callbacks (see: examples/recognize_callbacks.rs) if you prefer.
let mut stream = client
.recognize(
// Here is your input audio stream. The audio headers needs to be present if required by the content type used.
Expand Down Expand Up @@ -65,8 +67,9 @@ async fn main() -> Result<(), Box<dyn Error>> {
tracing::info!("Result: {:?}", result);
tracing::info!("Offset: {:?}", offset);
tracing::info!("Duration: {:?}", duration);
// the raw message is the raw json message from the service.
// You can use it to extract more information if needed.

// the raw message is the json message received from the service.
// You can use it to extract more information when needed.
tracing::info!("Raw message: {:?}", raw_message);
}
_ => {
Expand Down
43 changes: 22 additions & 21 deletions examples/synthesize_callbacks.rs
Original file line number Diff line number Diff line change
@@ -1,61 +1,62 @@
use azure_speech::{synthesizer, Auth};
use azure_speech::{synthesizer, Auth, StreamExt};
use std::env;
use std::error::Error;
use tokio_stream::StreamExt;

#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.init();

// Check the examples/synthesize_simple.rs file for the full code.

let auth = Auth::from_subscription(
env::var("AZURE_REGION").expect("Region set on AZURE_REGION env"),
env::var("AZURE_SUBSCRIPTION_KEY").expect("Subscription set on AZURE_SUBSCRIPTION_KEY env"),
);

let config = synthesizer::Config::default()
.on_synthesising(|request_id, audio| {
let config = synthesizer::Config::default();
let client = synthesizer::Client::connect(auth, config)
.await
.expect("to connect to azure");

// Create the callbacks for the synthesizer.
let callbacks = synthesizer::Callback::default()
.on_synthesising(|request_id, audio| async move {
tracing::info!(
"Callback - request: {:?}: Synthesising bytes {:?} ",
request_id,
audio.len()
);
})
.on_synthesised(|request_id| {
.on_synthesised(|request_id| async move {
tracing::info!("Callback - request: {:?}: Synthesised", request_id);
})
.on_error(|request_id, error| {
tracing::info!("Callback - request: {:?}: Error {:?}", request_id, error);
})
.on_audio_metadata(|request_id, metadata| {
.on_audio_metadata(|request_id, metadata| async move {
tracing::info!(
"Callback - request: {:?}: Audio metadata {:?}",
request_id,
metadata
);
})
.on_session_start(|request_id| {
.on_session_start(|request_id| async move {
tracing::info!("Callback - request: {:?}: Session started", request_id);
})
.on_session_end(|request_id| {
.on_session_end(|request_id| async move {
tracing::info!("Callback - request: {:?}: Session ended", request_id);
})
.on_error(|request_id, error| async move {
tracing::info!("Callback - request: {:?}: Error {:?}", request_id, error);
});

let client = synthesizer::Client::connect(auth, config)
.await
.expect("to connect to azure");

// you can use both the stream and callback in the same functions.
let mut stream = client
client
// here you put your text to synthesize.
.synthesize("Hello World!")
.await
.expect("to synthesize");

while let Some(event) = stream.next().await {
tracing::info!("Synthesizer Event: {:?}", event);
}
.expect("to synthesize")
.use_callbacks(callbacks)
.await;

Ok(())
}
4 changes: 4 additions & 0 deletions examples/synthesize_simple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ async fn main() -> Result<(), Box<dyn Error>> {
// It will understand the en-US language and will use the EnUsJennyNeural voice.
// You can change it by using the Config struct and its methods.
let config = synthesizer::Config::default();
//.with_language(synthesizer::Language::EnGb)
//.with_voice(synthesizer::Voice::EnGbLibbyNeural)

let client = synthesizer::Client::connect(auth, config)
.await
Expand All @@ -36,6 +38,8 @@ async fn main() -> Result<(), Box<dyn Error>> {
.await
.expect("to synthesize");

// Here we are streaming the events from the synthesizer.
// But you can also use the callbacks (see: examples/synthesize_callbacks.rs) if you prefer.
while let Some(event) = stream.next().await {
// Each event is a part of the synthesis process.
match event {
Expand Down
2 changes: 1 addition & 1 deletion examples/synthesize_using_ssml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
// this will print a lot of events to the console.
// you can use the events to create your own audio output.

// check other examples to see how to create an audio output.
// check examples/synthesize_to_standard_output.rs to see how to create an audio output.
tracing::info!("Synthesized: {:?}", event);
}

Expand Down
19 changes: 13 additions & 6 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,20 @@ This library aims to provide an easy-to-install and straightforward interface fo

The library currently supports the following features:

- [X] Speech-to-Text (Speech Recognition)
- [X] Speech Recognition (Speech-to-Text) [examples](examples/recognize_simple.rs)
- [X] Real-time Speech Recognition
- [X] Custom Speech Recognition
- [X] Text-to-Speech (Speech Synthesis)
- [X] Phrase List
- [ ] Conversation Transcriber - Real-time Diarization (Work in Progress)
- [ ] Pronunciation Assessment (Work in Progress)
- [X] Speech Synthesis (Text-to-Speech) [example](examples/synthesize_simple.rs)
- [X] Real-time Speech Synthesis
- [X] Custom Voice
- [X] SSML Support
- [ ] Speech Translation
- [ ] Intent Recognition
- [ ] Speaker Recognition
- [ ] Keyword Recognition
- [ ] Speech Translation (Work in Progress)
- [ ] Intent Recognition (Work in Progress)
- [ ] Keyword Recognition (Work in Progress)


The library is currently in the early stages of development, and I am actively working on adding more features and improving the existing ones.

Expand All @@ -48,6 +51,10 @@ Add this library to your project using the following command:
cargo add azure_speech
```

**And that's it!**

You are now ready to use the Azure Speech SDK in your Rust project.

## Usage
For usage examples, please refer to the [examples folder](https://github.com/jBernavaPrah/azure-speech-sdk-rs/tree/master/examples) in the repository. Or check the [documentation](https://docs.rs/azure-speech).

Expand Down
15 changes: 15 additions & 0 deletions src/callback.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// src/callback.rs
use crate::RequestId;
use std::future::Future;
use std::pin::Pin;

pub(crate) type OnSessionStarted = Box<dyn Fn(RequestId) -> BoxFuture>;
pub(crate) type OnSessionEnded = Box<dyn Fn(RequestId) -> BoxFuture>;
pub(crate) type OnError = Box<dyn Fn(RequestId, crate::Error) -> BoxFuture>;
pub(crate) type BoxFuture = Pin<Box<dyn Future<Output = ()> + Send + 'static>>;

#[async_trait::async_trait]
pub trait Callback {
type Item;
fn on_event(&self, item: Self::Item) -> impl Future<Output = ()>;
}
11 changes: 6 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@
//! `tokio` runtime, it minimizes external dependencies wherever possible.
//!
//! ## Core Functionalities
//! - [X] Speech to Text
//! - [X] Text to Speech
//! - [X] Speech to Text [recognizer]
//! - [X] Text to Speech [synthesizer]
//!
//! For comprehensive information on Microsoft Speech Service, refer to the official
//! documentation [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-sdk?tabs=windows%2Cubuntu%2Cios-xcode%2Cmac-xcode%2Candroid-studio).
//! documentation [here](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-sdk).
//!
//! ## Notes
//! This crate, in its current version, does not include some features available in the
//! official SDK, such as microphone/file recognition or synthesizer output to speakers.
//! However, examples demonstrating these capabilities can be found in the `examples` directory.
//!
//! ## Usage and Examples
//! Detailed usage instructions and examples are provided in the `examples` directory.
//! Detailed usage instructions and examples are provided in the [examples](https://github.com/jBernavaPrah/azure-speech-sdk-rs/blob/master/examples) folder in the GitHub repository.
//!
mod auth;
Expand All @@ -31,6 +31,7 @@ mod event;
mod stream_ext;
mod utils;

mod callback;
pub mod recognizer;
pub mod synthesizer;

Expand All @@ -39,7 +40,7 @@ pub use connector::*;
pub use error::*;

pub use event::*;
pub use stream_ext::StreamExt;
pub use stream_ext::*;

pub mod stream {
//! Re-export of `tokio_stream` crate.
Expand Down
Loading

0 comments on commit 060ee6b

Please sign in to comment.