From 706a688a2f3e646443be1daed1c6fa2c93e2f8cd Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Wed, 13 Dec 2023 23:32:49 -0800 Subject: [PATCH 1/4] refactor!: Switch to use ICU4X BREAKING CHANGE: This changes the type used for language ids from icu_locid::LanguageIdentifier to icu_locid:LanguageIdentifier. --- Cargo.toml | 7 +-- fluent-bundle/Cargo.toml | 10 ++-- fluent-bundle/README.md | 2 +- fluent-bundle/benches/resolver.rs | 2 +- fluent-bundle/benches/resolver_iai.rs | 2 +- fluent-bundle/examples/custom_formatter.rs | 2 +- fluent-bundle/examples/custom_type.rs | 2 +- fluent-bundle/examples/external_arguments.rs | 2 +- fluent-bundle/examples/functions.rs | 2 +- fluent-bundle/examples/simple-app.rs | 2 +- fluent-bundle/src/bundle.rs | 20 +++---- fluent-bundle/src/concurrent.rs | 4 +- fluent-bundle/src/errors.rs | 2 +- fluent-bundle/src/lib.rs | 2 +- fluent-bundle/src/memoizer.rs | 2 +- fluent-bundle/src/types/mod.rs | 27 +++++----- fluent-bundle/src/types/number.rs | 17 ++---- fluent-bundle/src/types/plural.rs | 20 +++---- fluent-bundle/tests/bundle.rs | 2 +- fluent-bundle/tests/custom_types.rs | 2 +- fluent-bundle/tests/function.rs | 3 +- fluent-bundle/tests/resolver_fixtures.rs | 2 +- fluent-bundle/tests/types_test.rs | 56 +++++++++++++++++--- fluent-fallback/Cargo.toml | 3 +- fluent-fallback/examples/simple-fallback.rs | 2 +- fluent-fallback/src/env.rs | 4 +- fluent-fallback/src/errors.rs | 2 +- fluent-fallback/src/generator.rs | 2 +- fluent-fallback/src/lib.rs | 4 +- fluent-fallback/tests/localization_test.rs | 2 +- fluent-pseudo/README.md | 2 +- fluent-resmgr/Cargo.toml | 3 +- fluent-resmgr/examples/simple-resmgr.rs | 2 +- fluent-resmgr/src/resource_manager.rs | 4 +- fluent-resmgr/tests/localization_test.rs | 2 +- fluent-syntax/benches/parser.rs | 2 +- fluent-syntax/tests/parser_fixtures.rs | 6 +-- fluent/Cargo.toml | 2 +- fluent/README.md | 2 +- fluent/src/lib.rs | 2 +- intl-memoizer/Cargo.toml | 9 +++- intl-memoizer/examples/numberformat.rs | 2 +- intl-memoizer/examples/pluralrules.rs | 16 ++++-- intl-memoizer/src/lib.rs | 47 ++++++++-------- 44 files changed, 179 insertions(+), 133 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 89a3dbea..d3bccfe6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,16 +32,17 @@ rust-version = "1.67.0" [workspace.dependencies] criterion = "0.5" -fluent-langneg = "0.13" +fluent-langneg = "0.14" futures = "0.3" iai = "0.1" -intl_pluralrules = "7.0.1" rustc-hash = "1" serde = "1.0" serde_json = "1.0" thiserror = "1.0" tokio = "1.0" -unic-langid = "0.9" +icu_locid = "1.4" +icu_plurals = { version = "1.4", features = ["experimental"] } +icu_provider = "1.4" fluent-bundle = { version = "0.15.3", path = "fluent-bundle" } fluent-fallback = { version = "0.7.1", path = "fluent-fallback" } diff --git a/fluent-bundle/Cargo.toml b/fluent-bundle/Cargo.toml index 0374d2b4..3703b8f8 100644 --- a/fluent-bundle/Cargo.toml +++ b/fluent-bundle/Cargo.toml @@ -26,9 +26,11 @@ include = [ [dependencies] fluent-langneg.workspace = true fluent-syntax.workspace = true -intl_pluralrules.workspace = true rustc-hash.workspace = true -unic-langid.workspace = true +icu_locid.workspace = true +icu_plurals.workspace = true +icu_provider.workspace = true +fixed_decimal = { version = "0.5.5", features = ["ryu"] } intl-memoizer = { version = "0.5.2", path = "../intl-memoizer" } self_cell = "1.0" smallvec = "1.13" @@ -37,12 +39,12 @@ smallvec = "1.13" criterion.workspace = true iai.workspace = true serde = { workspace = true, features = ["derive"] } -unic-langid = { workspace = true, features = ["macros"] } rand = "0.8" serde_yaml = "0.9" [features] -default = [] +default = ["icu_provider/sync"] +sync = ["icu_provider/sync"] all-benchmarks = [] [[bench]] diff --git a/fluent-bundle/README.md b/fluent-bundle/README.md index 488be2ea..3ff0b846 100644 --- a/fluent-bundle/README.md +++ b/fluent-bundle/README.md @@ -23,7 +23,7 @@ Usage ```rust use fluent_bundle::{FluentBundle, FluentResource}; -use unic_langid::langid; +use icu_locid::langid; fn main() { let ftl_string = "hello-world = Hello, world!".to_owned(); diff --git a/fluent-bundle/benches/resolver.rs b/fluent-bundle/benches/resolver.rs index a024da05..d98c923a 100644 --- a/fluent-bundle/benches/resolver.rs +++ b/fluent-bundle/benches/resolver.rs @@ -10,7 +10,7 @@ use std::rc::Rc; use fluent_bundle::{FluentArgs, FluentBundle, FluentResource, FluentValue}; use fluent_syntax::ast; -use unic_langid::langid; +use icu_locid::langid; fn read_file(path: &str) -> Result { let mut f = File::open(path)?; diff --git a/fluent-bundle/benches/resolver_iai.rs b/fluent-bundle/benches/resolver_iai.rs index 05df9bee..7dd2819f 100644 --- a/fluent-bundle/benches/resolver_iai.rs +++ b/fluent-bundle/benches/resolver_iai.rs @@ -1,6 +1,6 @@ use fluent_bundle::{FluentArgs, FluentBundle, FluentResource, FluentValue}; use fluent_syntax::ast; -use unic_langid::{langid, LanguageIdentifier}; +use icu_locid::{langid, LanguageIdentifier}; const LANG_EN: LanguageIdentifier = langid!("en"); diff --git a/fluent-bundle/examples/custom_formatter.rs b/fluent-bundle/examples/custom_formatter.rs index 8fc59f1f..f94ffc09 100644 --- a/fluent-bundle/examples/custom_formatter.rs +++ b/fluent-bundle/examples/custom_formatter.rs @@ -2,7 +2,7 @@ // to format selected types of values. // // This allows users to plug their own number formatter to Fluent. -use unic_langid::LanguageIdentifier; +use icu_locid::LanguageIdentifier; use fluent_bundle::memoizer::MemoizerKind; use fluent_bundle::types::{FluentNumber, FluentNumberOptions}; diff --git a/fluent-bundle/examples/custom_type.rs b/fluent-bundle/examples/custom_type.rs index a6093732..889f786c 100644 --- a/fluent-bundle/examples/custom_type.rs +++ b/fluent-bundle/examples/custom_type.rs @@ -9,8 +9,8 @@ // Lastly, we'll also create a new formatter which will be memoizable. // // The type and its options are modelled after ECMA402 Intl.DateTimeFormat. +use icu_locid::LanguageIdentifier; use intl_memoizer::Memoizable; -use unic_langid::LanguageIdentifier; use fluent_bundle::types::FluentType; use fluent_bundle::{FluentArgs, FluentBundle, FluentResource, FluentValue}; diff --git a/fluent-bundle/examples/external_arguments.rs b/fluent-bundle/examples/external_arguments.rs index fa9250ab..86bfc9ac 100644 --- a/fluent-bundle/examples/external_arguments.rs +++ b/fluent-bundle/examples/external_arguments.rs @@ -1,5 +1,5 @@ use fluent_bundle::{FluentArgs, FluentBundle, FluentResource, FluentValue}; -use unic_langid::langid; +use icu_locid::langid; fn main() { let ftl_string = String::from( diff --git a/fluent-bundle/examples/functions.rs b/fluent-bundle/examples/functions.rs index cfa4f46b..fa1a3d4f 100644 --- a/fluent-bundle/examples/functions.rs +++ b/fluent-bundle/examples/functions.rs @@ -1,5 +1,5 @@ use fluent_bundle::{FluentBundle, FluentResource, FluentValue}; -use unic_langid::langid; +use icu_locid::langid; fn main() { // We define the resources here so that they outlive diff --git a/fluent-bundle/examples/simple-app.rs b/fluent-bundle/examples/simple-app.rs index 8844832c..201195c5 100644 --- a/fluent-bundle/examples/simple-app.rs +++ b/fluent-bundle/examples/simple-app.rs @@ -19,6 +19,7 @@ //! default one. use fluent_bundle::{FluentArgs, FluentBundle, FluentResource, FluentValue}; use fluent_langneg::{negotiate_languages, NegotiationStrategy}; +use icu_locid::{langid, LanguageIdentifier}; use std::env; use std::fs; use std::fs::File; @@ -26,7 +27,6 @@ use std::io; use std::io::prelude::*; use std::path::Path; use std::str::FromStr; -use unic_langid::{langid, LanguageIdentifier}; /// We need a generic file read helper function to /// read the localization resource file. diff --git a/fluent-bundle/src/bundle.rs b/fluent-bundle/src/bundle.rs index 41a00e24..d30f2f0f 100644 --- a/fluent-bundle/src/bundle.rs +++ b/fluent-bundle/src/bundle.rs @@ -12,8 +12,8 @@ use std::default::Default; use std::fmt; use fluent_syntax::ast; +use icu_locid::LanguageIdentifier; use intl_memoizer::IntlLangMemoizer; -use unic_langid::LanguageIdentifier; use crate::args::FluentArgs; use crate::entry::Entry; @@ -32,7 +32,7 @@ use crate::types::FluentValue; /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource, FluentValue, FluentArgs}; -/// use unic_langid::langid; +/// use icu_locid::langid; /// /// // 1. Create a FluentResource /// @@ -163,7 +163,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from(" /// hello = Hi! @@ -253,7 +253,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from(" /// hello = Hi! @@ -359,7 +359,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from("hello = Hi!"); /// let resource = FluentResource::try_new(ftl_string) @@ -384,7 +384,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from("hello-world = Hello World!"); /// let resource = FluentResource::try_new(ftl_string) @@ -412,7 +412,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from("hello-world = Hello World!"); /// let resource = FluentResource::try_new(ftl_string) @@ -459,7 +459,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from("hello-world = Hello World!"); /// let resource = FluentResource::try_new(ftl_string) @@ -508,7 +508,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource, FluentValue}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from("length = { STRLEN(\"12345\") }"); /// let resource = FluentResource::try_new(ftl_string) @@ -622,7 +622,7 @@ impl FluentBundle { /// ``` /// use fluent_bundle::FluentBundle; /// use fluent_bundle::FluentResource; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let langid_en = langid!("en-US"); /// let mut bundle: FluentBundle = FluentBundle::new(vec![langid_en]); diff --git a/fluent-bundle/src/concurrent.rs b/fluent-bundle/src/concurrent.rs index de55f0a3..839b0df2 100644 --- a/fluent-bundle/src/concurrent.rs +++ b/fluent-bundle/src/concurrent.rs @@ -1,6 +1,6 @@ +use icu_locid::LanguageIdentifier; use intl_memoizer::{concurrent::IntlLangMemoizer, Memoizable}; use rustc_hash::FxHashMap; -use unic_langid::LanguageIdentifier; use crate::memoizer::MemoizerKind; use crate::types::FluentType; @@ -23,7 +23,7 @@ impl FluentBundle { /// ``` /// use fluent_bundle::concurrent::FluentBundle; /// use fluent_bundle::FluentResource; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let langid_en = langid!("en-US"); /// let mut bundle: FluentBundle = diff --git a/fluent-bundle/src/errors.rs b/fluent-bundle/src/errors.rs index 58b1754b..4d2e4601 100644 --- a/fluent-bundle/src/errors.rs +++ b/fluent-bundle/src/errors.rs @@ -33,7 +33,7 @@ pub enum FluentError { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from("intro = Welcome, { $name }."); /// let res1 = FluentResource::try_new(ftl_string) diff --git a/fluent-bundle/src/lib.rs b/fluent-bundle/src/lib.rs index 93d7ea53..bcb956bd 100644 --- a/fluent-bundle/src/lib.rs +++ b/fluent-bundle/src/lib.rs @@ -18,7 +18,7 @@ //! ``` //! use fluent_bundle::{FluentBundle, FluentValue, FluentResource, FluentArgs}; //! // Used to provide a locale for the bundle. -//! use unic_langid::langid; +//! use icu_locid::langid; //! //! // 1. Crate a FluentResource //! diff --git a/fluent-bundle/src/memoizer.rs b/fluent-bundle/src/memoizer.rs index 1f03e308..cbffd686 100644 --- a/fluent-bundle/src/memoizer.rs +++ b/fluent-bundle/src/memoizer.rs @@ -1,6 +1,6 @@ use crate::types::FluentType; +use icu_locid::LanguageIdentifier; use intl_memoizer::Memoizable; -use unic_langid::LanguageIdentifier; /// This trait contains thread-safe methods which extend [`intl_memoizer::IntlLangMemoizer`]. /// It is used as the generic bound in this crate when a memoizer is needed. diff --git a/fluent-bundle/src/types/mod.rs b/fluent-bundle/src/types/mod.rs index 585e90b6..fbcf1ab0 100644 --- a/fluent-bundle/src/types/mod.rs +++ b/fluent-bundle/src/types/mod.rs @@ -22,7 +22,7 @@ use std::borrow::{Borrow, Cow}; use std::fmt; use std::str::FromStr; -use intl_pluralrules::{PluralCategory, PluralRuleType}; +use icu_plurals::{PluralCategory, PluralRuleType}; use crate::memoizer::MemoizerKind; use crate::resolver::Scope; @@ -157,10 +157,10 @@ impl<'source> FluentValue<'source> { /// ``` /// use fluent_bundle::resolver::Scope; /// use fluent_bundle::{types::FluentValue, FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// - /// let langid_ars = langid!("en"); - /// let bundle: FluentBundle = FluentBundle::new(vec![langid_ars]); + /// let langid_en = langid!("en"); + /// let bundle: FluentBundle = FluentBundle::new(vec![langid_en]); /// let scope = Scope::new(&bundle, None, None); /// /// // Matching examples: @@ -189,12 +189,12 @@ impl<'source> FluentValue<'source> { (FluentValue::Number(a), FluentValue::Number(b)) => a == b, (FluentValue::String(a), FluentValue::Number(b)) => { let cat = match a.as_ref() { - "zero" => PluralCategory::ZERO, - "one" => PluralCategory::ONE, - "two" => PluralCategory::TWO, - "few" => PluralCategory::FEW, - "many" => PluralCategory::MANY, - "other" => PluralCategory::OTHER, + "zero" => PluralCategory::Zero, + "one" => PluralCategory::One, + "two" => PluralCategory::Two, + "few" => PluralCategory::Few, + "many" => PluralCategory::Many, + "other" => PluralCategory::Other, _ => return false, }; // This string matches a plural rule keyword. Check if the number @@ -206,9 +206,10 @@ impl<'source> FluentValue<'source> { scope .bundle .intls - .with_try_get_threadsafe::((r#type,), |pr| { - pr.0.select(b) == Ok(cat) - }) + .with_try_get_threadsafe::( + (PluralRuleType::Cardinal,), + |pr| pr.0.category_for(b) == cat, + ) .unwrap() } _ => false, diff --git a/fluent-bundle/src/types/number.rs b/fluent-bundle/src/types/number.rs index b9c3b2de..3d32db15 100644 --- a/fluent-bundle/src/types/number.rs +++ b/fluent-bundle/src/types/number.rs @@ -1,9 +1,8 @@ use std::borrow::Cow; -use std::convert::TryInto; use std::default::Default; use std::str::FromStr; -use intl_pluralrules::operands::PluralOperands; +use icu_plurals::PluralOperands; use crate::args::FluentArgs; use crate::types::FluentValue; @@ -231,18 +230,12 @@ macro_rules! from_num { impl From<&FluentNumber> for PluralOperands { fn from(input: &FluentNumber) -> Self { - let mut operands: Self = input - .value - .try_into() - .expect("Failed to generate operands out of FluentNumber"); + use fixed_decimal::{FixedDecimal, FloatPrecision}; + let mut fd = FixedDecimal::try_from_f64(input.value, FloatPrecision::Floating).unwrap(); if let Some(mfd) = input.options.minimum_fraction_digits { - if mfd > operands.v { - operands.f *= 10_u64.pow(mfd as u32 - operands.v as u32); - operands.v = mfd; - } + fd.pad_end(-(mfd as i16)); } - // XXX: Add support for other options. - operands + (&fd).into() } } diff --git a/fluent-bundle/src/types/plural.rs b/fluent-bundle/src/types/plural.rs index 1151fd6d..80fe7cb4 100644 --- a/fluent-bundle/src/types/plural.rs +++ b/fluent-bundle/src/types/plural.rs @@ -1,7 +1,6 @@ -use fluent_langneg::{negotiate_languages, NegotiationStrategy}; +use icu_locid::LanguageIdentifier; +use icu_plurals::{PluralRuleType, PluralRules as IntlPluralRules}; use intl_memoizer::Memoizable; -use intl_pluralrules::{PluralRuleType, PluralRules as IntlPluralRules}; -use unic_langid::LanguageIdentifier; pub struct PluralRules(pub IntlPluralRules); @@ -9,14 +8,11 @@ impl Memoizable for PluralRules { type Args = (PluralRuleType,); type Error = &'static str; fn construct(lang: LanguageIdentifier, args: Self::Args) -> Result { - let default_lang: LanguageIdentifier = "en".parse().unwrap(); - let pr_lang = negotiate_languages( - &[lang], - &IntlPluralRules::get_locales(args.0), - Some(&default_lang), - NegotiationStrategy::Lookup, - )[0] - .clone(); - Ok(Self(IntlPluralRules::create(pr_lang, args.0)?)) + let inner = match args.0 { + PluralRuleType::Cardinal => IntlPluralRules::try_new_cardinal(&lang.into()), + PluralRuleType::Ordinal => IntlPluralRules::try_new_ordinal(&lang.into()), + _ => todo!(), + }; + Ok(Self(inner.unwrap())) } } diff --git a/fluent-bundle/tests/bundle.rs b/fluent-bundle/tests/bundle.rs index 7d3e6206..7311f5cd 100644 --- a/fluent-bundle/tests/bundle.rs +++ b/fluent-bundle/tests/bundle.rs @@ -1,6 +1,6 @@ use fluent_bundle::{FluentArgs, FluentBundle, FluentResource}; +use icu_locid::langid; use std::borrow::Cow; -use unic_langid::langid; #[test] fn add_resource_override() { diff --git a/fluent-bundle/tests/custom_types.rs b/fluent-bundle/tests/custom_types.rs index 082f864a..41d442ad 100644 --- a/fluent-bundle/tests/custom_types.rs +++ b/fluent-bundle/tests/custom_types.rs @@ -4,7 +4,7 @@ use fluent_bundle::FluentArgs; use fluent_bundle::FluentBundle; use fluent_bundle::FluentResource; use fluent_bundle::FluentValue; -use unic_langid::langid; +use icu_locid::langid; #[test] fn fluent_custom_type() { diff --git a/fluent-bundle/tests/function.rs b/fluent-bundle/tests/function.rs index 1d403e2f..493ce269 100644 --- a/fluent-bundle/tests/function.rs +++ b/fluent-bundle/tests/function.rs @@ -1,5 +1,6 @@ use fluent_bundle::types::FluentNumber; use fluent_bundle::{FluentArgs, FluentBundle, FluentResource, FluentValue}; +use icu_locid::langid; #[test] fn test_function_resolve() { @@ -21,7 +22,7 @@ liked-count2 = { NUMBER($num) -> ); let res = FluentResource::try_new(ftl_string).expect("Could not parse an FTL string."); - let mut bundle = FluentBundle::default(); + let mut bundle = FluentBundle::new(vec![langid!("en")]); bundle .add_function("NUMBER", |positional, named| match positional.first() { diff --git a/fluent-bundle/tests/resolver_fixtures.rs b/fluent-bundle/tests/resolver_fixtures.rs index e242a390..20ed8532 100644 --- a/fluent-bundle/tests/resolver_fixtures.rs +++ b/fluent-bundle/tests/resolver_fixtures.rs @@ -10,9 +10,9 @@ use fluent_bundle::resolver::ResolverError; use fluent_bundle::FluentArgs; use fluent_bundle::FluentError; use fluent_bundle::{FluentBundle, FluentResource, FluentValue}; +use icu_locid::LanguageIdentifier; use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; -use unic_langid::LanguageIdentifier; use helpers::*; diff --git a/fluent-bundle/tests/types_test.rs b/fluent-bundle/tests/types_test.rs index 08d4d9be..b1e9011a 100644 --- a/fluent-bundle/tests/types_test.rs +++ b/fluent-bundle/tests/types_test.rs @@ -6,8 +6,8 @@ use fluent_bundle::FluentArgs; use fluent_bundle::FluentBundle; use fluent_bundle::FluentResource; use fluent_bundle::FluentValue; -use intl_pluralrules::operands::PluralOperands; -use unic_langid::langid; +use icu_locid::langid; +use icu_plurals::PluralOperands; #[test] fn fluent_value_try_number() { @@ -17,10 +17,10 @@ fn fluent_value_try_number() { #[test] fn fluent_value_matches() { - // We'll use `ars` locale since it happens to have all + // We'll use `ar` locale since it happens to have all // plural rules categories. - let langid_ars = langid!("ars"); - let bundle: FluentBundle = FluentBundle::new(vec![langid_ars]); + let langid_ar = langid!("ar"); + let bundle: FluentBundle = FluentBundle::new(vec![langid_ar]); let scope = Scope::new(&bundle, None, None); let string_val = FluentValue::from("string1"); @@ -139,18 +139,60 @@ fn fluent_number_style() { #[test] fn fluent_number_to_operands() { + use icu_plurals::rules::RawPluralOperands; + let num = FluentNumber::new(2.81, FluentNumberOptions::default()); let operands: PluralOperands = (&num).into(); assert_eq!( operands, - PluralOperands { - n: 2.81, + RawPluralOperands { i: 2, v: 2, w: 2, f: 81, t: 81, + c: 0, + } + .into() + ); +} + +#[test] +fn fluent_number_to_float_vs_int() { + // This test verifies that we coalesce f64 `1.0` to usize `1`. + // See `From for PluralOperands` for more details. + use icu_plurals::rules::RawPluralOperands; + + let num: FluentNumber = 1.0.into(); + let operands: PluralOperands = (&num).into(); + + assert_eq!( + operands, + RawPluralOperands { + i: 1, + v: 0, + w: 0, + f: 0, + t: 0, + c: 0, + } + .into() + ); + + let num: FluentNumber = 1.into(); + let operands: PluralOperands = (&num).into(); + + assert_eq!( + operands, + RawPluralOperands { + i: 1, + v: 0, + w: 0, + f: 0, + t: 0, + c: 0, } + .into() ); } diff --git a/fluent-fallback/Cargo.toml b/fluent-fallback/Cargo.toml index d017d3b0..0d974fd9 100644 --- a/fluent-fallback/Cargo.toml +++ b/fluent-fallback/Cargo.toml @@ -19,7 +19,7 @@ readme = "README.md" fluent-bundle.workspace = true futures.workspace = true rustc-hash.workspace = true -unic-langid.workspace = true +icu_locid.workspace = true async-trait = "0.1" chunky-vec = "0.1" once_cell = "1.19" @@ -27,6 +27,5 @@ pin-cell = "0.2" [dev-dependencies] fluent-langneg.workspace = true -unic-langid = { workspace = true, features = ["macros"] } tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } fluent-resmgr = { path = "../fluent-resmgr" } diff --git a/fluent-fallback/examples/simple-fallback.rs b/fluent-fallback/examples/simple-fallback.rs index 33fc4e82..a11d4877 100644 --- a/fluent-fallback/examples/simple-fallback.rs +++ b/fluent-fallback/examples/simple-fallback.rs @@ -28,8 +28,8 @@ use fluent_fallback::{ }; use fluent_langneg::{negotiate_languages, NegotiationStrategy}; +use icu_locid::{langid, LanguageIdentifier}; use rustc_hash::FxHashSet; -use unic_langid::{langid, LanguageIdentifier}; /// This helper struct holds the scheme for converting /// resource paths into full paths. It is used to customise diff --git a/fluent-fallback/src/env.rs b/fluent-fallback/src/env.rs index cf340fcf..4d69ffa1 100644 --- a/fluent-fallback/src/env.rs +++ b/fluent-fallback/src/env.rs @@ -13,7 +13,7 @@ //! are available. The list should also be sorted according to the user //! preference, as the order is significant for how [`Localization`](crate::Localization) performs //! fallbacking. -use unic_langid::LanguageIdentifier; +use icu_locid::LanguageIdentifier; /// A trait used to provide a selection of locales to be used by the /// [`Localization`](crate::Localization) instance for runtime @@ -23,7 +23,7 @@ use unic_langid::LanguageIdentifier; /// ``` /// use fluent_fallback::{Localization, env::LocalesProvider}; /// use fluent_resmgr::ResourceManager; -/// use unic_langid::LanguageIdentifier; +/// use icu_locid::LanguageIdentifier; /// use std::{ /// rc::Rc, /// cell::RefCell diff --git a/fluent-fallback/src/errors.rs b/fluent-fallback/src/errors.rs index 704bc84f..35553f3c 100644 --- a/fluent-fallback/src/errors.rs +++ b/fluent-fallback/src/errors.rs @@ -1,6 +1,6 @@ use fluent_bundle::FluentError; +use icu_locid::LanguageIdentifier; use std::error::Error; -use unic_langid::LanguageIdentifier; #[derive(Debug, PartialEq, Eq)] pub enum LocalizationError { diff --git a/fluent-fallback/src/generator.rs b/fluent-fallback/src/generator.rs index f13af63c..4a02fd01 100644 --- a/fluent-fallback/src/generator.rs +++ b/fluent-fallback/src/generator.rs @@ -1,8 +1,8 @@ use fluent_bundle::{FluentBundle, FluentError, FluentResource}; use futures::Stream; +use icu_locid::LanguageIdentifier; use rustc_hash::FxHashSet; use std::borrow::Borrow; -use unic_langid::LanguageIdentifier; use crate::types::ResourceId; diff --git a/fluent-fallback/src/lib.rs b/fluent-fallback/src/lib.rs index dee5906f..d14b0b1d 100644 --- a/fluent-fallback/src/lib.rs +++ b/fluent-fallback/src/lib.rs @@ -26,7 +26,7 @@ //! ``` //! use fluent_fallback::{Localization, types::{ResourceType, ToResourceId}}; //! use fluent_resmgr::ResourceManager; -//! use unic_langid::langid; +//! use icu_locid::langid; //! //! let res_mgr = ResourceManager::new("./tests/resources/{locale}/".to_string()); //! @@ -96,7 +96,7 @@ //! As a long lived structure, the [`Localization`] is intended to handle runtime locale //! management. //! -//! In the example above, [`Vec`](unic_langid::LanguageIdentifier) +//! In the example above, [`Vec`](icu_locid::LanguageIdentifier) //! provides a static list of locales that the [`Localization`] handles, but that's just the //! simplest implementation of the [`env::LocalesProvider`], and one can implement //! a much more sophisticated one that reacts to user or environment driven changes, and diff --git a/fluent-fallback/tests/localization_test.rs b/fluent-fallback/tests/localization_test.rs index ebe57314..5df89de1 100644 --- a/fluent-fallback/tests/localization_test.rs +++ b/fluent-fallback/tests/localization_test.rs @@ -11,10 +11,10 @@ use fluent_fallback::{ types::{L10nKey, ResourceId}, Localization, LocalizationError, }; +use icu_locid::{langid, LanguageIdentifier}; use rustc_hash::FxHashSet; use std::cell::RefCell; use std::rc::Rc; -use unic_langid::{langid, LanguageIdentifier}; struct InnerLocales { locales: RefCell>, diff --git a/fluent-pseudo/README.md b/fluent-pseudo/README.md index 2b97ce12..f15eb5cc 100644 --- a/fluent-pseudo/README.md +++ b/fluent-pseudo/README.md @@ -13,7 +13,7 @@ Usage ```rust use fluent_bundle::{FluentBundle, FluentResource}; -use unic_langid::langid; +use icu_locid::langid; use fluent_pseudo::transform; fn transform_wrapper(s: &str) -> Cow { diff --git a/fluent-resmgr/Cargo.toml b/fluent-resmgr/Cargo.toml index 7029083c..760ccd36 100644 --- a/fluent-resmgr/Cargo.toml +++ b/fluent-resmgr/Cargo.toml @@ -20,9 +20,8 @@ fluent-fallback.workspace = true futures.workspace = true rustc-hash.workspace = true thiserror.workspace = true -unic-langid.workspace = true +icu_locid.workspace = true elsa = "1.10" [dev-dependencies] fluent-langneg.workspace = true -unic-langid = { workspace = true, features = ["macros"] } diff --git a/fluent-resmgr/examples/simple-resmgr.rs b/fluent-resmgr/examples/simple-resmgr.rs index 81b1fdd1..d184d97f 100644 --- a/fluent-resmgr/examples/simple-resmgr.rs +++ b/fluent-resmgr/examples/simple-resmgr.rs @@ -20,12 +20,12 @@ use fluent_bundle::{FluentArgs, FluentValue}; use fluent_langneg::{negotiate_languages, NegotiationStrategy}; use fluent_resmgr::resource_manager::ResourceManager; +use icu_locid::LanguageIdentifier; use std::env; use std::fs; use std::io; use std::path::PathBuf; use std::str::FromStr; -use unic_langid::LanguageIdentifier; /// This helper function allows us to read the list /// of available locales by reading the list of diff --git a/fluent-resmgr/src/resource_manager.rs b/fluent-resmgr/src/resource_manager.rs index 9f2cfc57..39a8d493 100644 --- a/fluent-resmgr/src/resource_manager.rs +++ b/fluent-resmgr/src/resource_manager.rs @@ -5,11 +5,11 @@ use fluent_fallback::{ types::ResourceId, }; use futures::stream::Stream; +use icu_locid::LanguageIdentifier; use rustc_hash::FxHashSet; use std::io; use std::{fs, iter}; use thiserror::Error; -use unic_langid::LanguageIdentifier; fn read_file(path: &str) -> Result { fs::read_to_string(path) @@ -222,7 +222,7 @@ impl BundleGenerator for ResourceManager { #[cfg(test)] mod test { use super::*; - use unic_langid::langid; + use icu_locid::langid; #[test] fn caching() { diff --git a/fluent-resmgr/tests/localization_test.rs b/fluent-resmgr/tests/localization_test.rs index d1534f29..d413071d 100644 --- a/fluent-resmgr/tests/localization_test.rs +++ b/fluent-resmgr/tests/localization_test.rs @@ -1,7 +1,7 @@ use fluent_fallback::Localization; use fluent_resmgr::resource_manager::ResourceManager; +use icu_locid::langid; use std::borrow::Cow; -use unic_langid::langid; #[test] fn localization_format_value() { diff --git a/fluent-syntax/benches/parser.rs b/fluent-syntax/benches/parser.rs index 2397044d..71fe96e5 100644 --- a/fluent-syntax/benches/parser.rs +++ b/fluent-syntax/benches/parser.rs @@ -18,7 +18,7 @@ fn get_resources(tests: &[&'static str]) -> HashMap<&'static str, String> { let path = format!("./benches/{}", test); ftl_strings.insert(*test, read_file(&path).expect("Couldn't load file")); } - return ftl_strings; + ftl_strings } fn get_ctxs(tests: &[&'static str]) -> HashMap<&'static str, Vec> { diff --git a/fluent-syntax/tests/parser_fixtures.rs b/fluent-syntax/tests/parser_fixtures.rs index eb8b9d1f..a067d38e 100644 --- a/fluent-syntax/tests/parser_fixtures.rs +++ b/fluent-syntax/tests/parser_fixtures.rs @@ -27,7 +27,7 @@ fn parse_fixtures_compare() { let reference_path = path.replace(".ftl", ".json"); let reference_file = read_file(&reference_path, true).unwrap(); - let ftl_file = read_file(&path, false).unwrap(); + let ftl_file = read_file(path, false).unwrap(); println!("Parsing: {:#?}", path); let target_ast = match parse(ftl_file) { @@ -72,7 +72,7 @@ fn parse_bench_fixtures() { file_name.replace(".ftl", ".json") ); let reference_file = read_file(&reference_path, true).unwrap(); - let ftl_file = read_file(&path, false).unwrap(); + let ftl_file = read_file(path, false).unwrap(); println!("Parsing: {:#?}", path); let target_ast = match parse(ftl_file) { @@ -106,7 +106,7 @@ fn parse_bench_fixtures() { file_name.replace(".ftl", ".json") ); let reference_file = read_file(&reference_path, true).unwrap(); - let ftl_file = read_file(&path, false).unwrap(); + let ftl_file = read_file(path, false).unwrap(); println!("Parsing: {:#?}", path); let target_ast = match parse(ftl_file.clone()) { diff --git a/fluent/Cargo.toml b/fluent/Cargo.toml index c57985c0..47049327 100644 --- a/fluent/Cargo.toml +++ b/fluent/Cargo.toml @@ -26,4 +26,4 @@ include = [ [dependencies] fluent-bundle.workspace = true fluent-pseudo = { workspace = true, optional = true } -unic-langid.workspace = true +icu_locid.workspace = true diff --git a/fluent/README.md b/fluent/README.md index 875dd50d..1b15651b 100644 --- a/fluent/README.md +++ b/fluent/README.md @@ -23,7 +23,7 @@ Usage ```rust use fluent::{FluentBundle, FluentResource}; -use unic_langid::langid; +use icu_locid::langid; fn main() { let ftl_string = "hello-world = Hello, world!".to_owned(); diff --git a/fluent/src/lib.rs b/fluent/src/lib.rs index d91e9c52..cd7be073 100644 --- a/fluent/src/lib.rs +++ b/fluent/src/lib.rs @@ -20,7 +20,7 @@ //! use fluent::{FluentBundle, FluentValue, FluentResource, FluentArgs}; //! //! // Used to provide a locale for the bundle. -//! use unic_langid::LanguageIdentifier; +//! use icu_locid::LanguageIdentifier; //! //! let ftl_string = String::from(" //! hello-world = Hello, world! diff --git a/intl-memoizer/Cargo.toml b/intl-memoizer/Cargo.toml index b741604b..a0e2483f 100644 --- a/intl-memoizer/Cargo.toml +++ b/intl-memoizer/Cargo.toml @@ -24,9 +24,14 @@ include = [ ] [dependencies] -unic-langid.workspace = true +icu_locid.workspace = true +icu_plurals.workspace = true +icu_provider.workspace = true type-map = "0.5" [dev-dependencies] -intl_pluralrules.workspace = true fluent-langneg.workspace = true + +[features] +default = [] +sync = ["icu_provider/sync"] diff --git a/intl-memoizer/examples/numberformat.rs b/intl-memoizer/examples/numberformat.rs index 793c890c..01aa1519 100644 --- a/intl-memoizer/examples/numberformat.rs +++ b/intl-memoizer/examples/numberformat.rs @@ -1,5 +1,5 @@ +use icu_locid::LanguageIdentifier; use intl_memoizer::{IntlMemoizer, Memoizable}; -use unic_langid::LanguageIdentifier; #[derive(Clone, Hash, PartialEq, Eq)] struct NumberFormatOptions { diff --git a/intl-memoizer/examples/pluralrules.rs b/intl-memoizer/examples/pluralrules.rs index a37f8d1a..b07d3b0d 100644 --- a/intl-memoizer/examples/pluralrules.rs +++ b/intl-memoizer/examples/pluralrules.rs @@ -1,12 +1,18 @@ +use icu_locid::LanguageIdentifier; +use icu_plurals::{PluralCategory, PluralRuleType, PluralRules as IntlPluralRules}; use intl_memoizer::{IntlMemoizer, Memoizable}; -use intl_pluralrules::{PluralCategory, PluralRuleType, PluralRules as IntlPluralRules}; -use unic_langid::LanguageIdentifier; struct PluralRules(pub IntlPluralRules); impl PluralRules { pub fn new(lang: LanguageIdentifier, pr_type: PluralRuleType) -> Result { - Ok(Self(IntlPluralRules::create(lang, pr_type)?)) + let locale = lang.into(); + let inner = match pr_type { + PluralRuleType::Cardinal => IntlPluralRules::try_new_cardinal(&locale), + PluralRuleType::Ordinal => IntlPluralRules::try_new_ordinal(&locale), + _ => todo!(), + }; + Ok(Self(inner.unwrap())) } } @@ -24,8 +30,8 @@ fn main() { let lang: LanguageIdentifier = "en".parse().unwrap(); let lang_memoizer = memoizer.get_for_lang(lang); let result = lang_memoizer - .with_try_get::((PluralRuleType::CARDINAL,), |pr| pr.0.select(5)) + .with_try_get::((PluralRuleType::Cardinal,), |pr| pr.0.category_for(5)) .unwrap(); - assert_eq!(result, Ok(PluralCategory::OTHER)); + assert_eq!(result, PluralCategory::Other); } diff --git a/intl-memoizer/src/lib.rs b/intl-memoizer/src/lib.rs index d9986571..184c8e94 100644 --- a/intl-memoizer/src/lib.rs +++ b/intl-memoizer/src/lib.rs @@ -4,12 +4,12 @@ //! //! The [`IntlMemoizer`] is the main struct that creates a per-locale [`IntlLangMemoizer`]. +use icu_locid::LanguageIdentifier; use std::cell::RefCell; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::hash::Hash; use std::rc::{Rc, Weak}; -use unic_langid::LanguageIdentifier; pub mod concurrent; @@ -47,7 +47,7 @@ pub trait Memoizable { /// /// ``` /// use intl_memoizer::{IntlLangMemoizer, Memoizable}; -/// use unic_langid::LanguageIdentifier; +/// use icu_locid::LanguageIdentifier; /// /// // Create a static counter so that we can demonstrate the side effects of when /// // the memoizer re-constructs an API. @@ -249,7 +249,7 @@ impl IntlLangMemoizer { /// /// ``` /// # use intl_memoizer::{IntlMemoizer, IntlLangMemoizer, Memoizable}; -/// # use unic_langid::LanguageIdentifier; +/// # use icu_locid::LanguageIdentifier; /// # use std::rc::Rc; /// # /// # struct ExampleFormatter { @@ -353,9 +353,7 @@ impl IntlMemoizer { #[cfg(test)] mod tests { use super::*; - use fluent_langneg::{negotiate_languages, NegotiationStrategy}; - use intl_pluralrules::{PluralCategory, PluralRuleType, PluralRules as IntlPluralRules}; - use std::{sync::Arc, thread}; + use icu_plurals::{PluralCategory, PluralRuleType, PluralRules as IntlPluralRules}; struct PluralRules(pub IntlPluralRules); @@ -364,16 +362,12 @@ mod tests { lang: LanguageIdentifier, pr_type: PluralRuleType, ) -> Result { - let default_lang: LanguageIdentifier = "en".parse().unwrap(); - let pr_lang = negotiate_languages( - &[lang], - &IntlPluralRules::get_locales(pr_type), - Some(&default_lang), - NegotiationStrategy::Lookup, - )[0] - .clone(); - - Ok(Self(IntlPluralRules::create(pr_lang, pr_type)?)) + let inner = match pr_type { + PluralRuleType::Cardinal => IntlPluralRules::try_new_cardinal(&lang.into()), + PluralRuleType::Ordinal => IntlPluralRules::try_new_ordinal(&lang.into()), + _ => todo!(), + }; + Ok(Self(inner.unwrap())) } } @@ -394,23 +388,30 @@ mod tests { let en_memoizer = memoizer.get_for_lang(lang.clone()); let result = en_memoizer - .with_try_get::((PluralRuleType::CARDINAL,), |cb| cb.0.select(5)) + .with_try_get::((PluralRuleType::Cardinal,), |cb| { + cb.0.category_for(5) + }) .unwrap(); - assert_eq!(result, Ok(PluralCategory::OTHER)); + assert_eq!(result, PluralCategory::Other); } { let en_memoizer = memoizer.get_for_lang(lang); let result = en_memoizer - .with_try_get::((PluralRuleType::CARDINAL,), |cb| cb.0.select(5)) + .with_try_get::((PluralRuleType::Cardinal,), |cb| { + cb.0.category_for(5) + }) .unwrap(); - assert_eq!(result, Ok(PluralCategory::OTHER)); + assert_eq!(result, PluralCategory::Other); } } + #[cfg(feature = "sync")] #[test] fn test_concurrent() { + use std::{sync::Arc, thread}; + let lang: LanguageIdentifier = "en".parse().unwrap(); let memoizer = Arc::new(concurrent::IntlLangMemoizer::new(lang)); let mut threads = vec![]; @@ -420,8 +421,8 @@ mod tests { let memoizer = Arc::clone(&memoizer); threads.push(thread::spawn(move || { memoizer - .with_try_get::((PluralRuleType::CARDINAL,), |cb| { - cb.0.select(5) + .with_try_get::((PluralRuleType::Cardinal,), |cb| { + cb.0.category_for(5) }) .expect("Failed to get a PluralRules result.") })); @@ -429,7 +430,7 @@ mod tests { for thread in threads.drain(..) { let result = thread.join().expect("Failed to join thread."); - assert_eq!(result, Ok(PluralCategory::OTHER)); + assert_eq!(result, PluralCategory::Other); } } } From f2476cb06cc1bf8337ee6ad2c4918f81262abb9a Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Mon, 18 Dec 2023 12:00:45 -0800 Subject: [PATCH 2/4] refactor: Separate sync feature --- fluent-bundle/Cargo.toml | 4 ++-- fluent-bundle/examples/custom_type.rs | 2 ++ fluent-bundle/src/bundle.rs | 12 ++++++++++++ fluent-bundle/src/concurrent.rs | 20 ++++++++++++++++++-- fluent-bundle/src/lib.rs | 1 + fluent-bundle/src/memoizer.rs | 9 +++++++++ fluent-bundle/src/types/mod.rs | 16 ++++++++++++++-- fluent-bundle/tests/custom_types.rs | 4 ++++ intl-memoizer/src/lib.rs | 1 + 9 files changed, 63 insertions(+), 6 deletions(-) diff --git a/fluent-bundle/Cargo.toml b/fluent-bundle/Cargo.toml index 3703b8f8..2b0890d9 100644 --- a/fluent-bundle/Cargo.toml +++ b/fluent-bundle/Cargo.toml @@ -43,8 +43,8 @@ rand = "0.8" serde_yaml = "0.9" [features] -default = ["icu_provider/sync"] -sync = ["icu_provider/sync"] +default = [] +sync = ["intl-memoizer/sync", "icu_provider/sync"] all-benchmarks = [] [[bench]] diff --git a/fluent-bundle/examples/custom_type.rs b/fluent-bundle/examples/custom_type.rs index 889f786c..88f23e41 100644 --- a/fluent-bundle/examples/custom_type.rs +++ b/fluent-bundle/examples/custom_type.rs @@ -107,6 +107,8 @@ impl FluentType for DateTime { }) .expect("Failed to format a date.") } + + #[cfg(feature = "sync")] fn as_string_threadsafe( &self, _: &intl_memoizer::concurrent::IntlLangMemoizer, diff --git a/fluent-bundle/src/bundle.rs b/fluent-bundle/src/bundle.rs index d30f2f0f..65f15603 100644 --- a/fluent-bundle/src/bundle.rs +++ b/fluent-bundle/src/bundle.rs @@ -653,6 +653,7 @@ impl crate::memoizer::MemoizerKind for IntlLangMemoizer { Self::new(lang) } + #[cfg(feature = "sync")] fn with_try_get_threadsafe(&self, args: I::Args, cb: U) -> Result where Self: Sized, @@ -663,6 +664,17 @@ impl crate::memoizer::MemoizerKind for IntlLangMemoizer { self.with_try_get(args, cb) } + #[cfg(not(feature = "sync"))] + fn with_try_get(&self, args: I::Args, cb: U) -> Result + where + Self: Sized, + I: intl_memoizer::Memoizable + 'static, + I::Args: 'static, + U: FnOnce(&I) -> R, + { + self.with_try_get(args, cb) + } + fn stringify_value( &self, value: &dyn crate::types::FluentType, diff --git a/fluent-bundle/src/concurrent.rs b/fluent-bundle/src/concurrent.rs index 839b0df2..64df98c3 100644 --- a/fluent-bundle/src/concurrent.rs +++ b/fluent-bundle/src/concurrent.rs @@ -1,5 +1,9 @@ use icu_locid::LanguageIdentifier; -use intl_memoizer::{concurrent::IntlLangMemoizer, Memoizable}; +#[cfg(feature = "sync")] +use intl_memoizer::concurrent::IntlLangMemoizer; +#[cfg(not(feature = "sync"))] +use intl_memoizer::IntlLangMemoizer; +use intl_memoizer::Memoizable; use rustc_hash::FxHashMap; use crate::memoizer::MemoizerKind; @@ -51,6 +55,7 @@ impl MemoizerKind for IntlLangMemoizer { Self::new(lang) } + #[cfg(feature = "sync")] fn with_try_get_threadsafe(&self, args: I::Args, cb: U) -> Result where Self: Sized, @@ -58,7 +63,18 @@ impl MemoizerKind for IntlLangMemoizer { I::Args: Send + Sync + 'static, U: FnOnce(&I) -> R, { - self.with_try_get(args, cb) + Self::with_try_get(self, args, cb) + } + + #[cfg(not(feature = "sync"))] + fn with_try_get(&self, args: I::Args, cb: U) -> Result + where + Self: Sized, + I: Memoizable + 'static, + I::Args: 'static, + U: FnOnce(&I) -> R, + { + Self::with_try_get(self, args, cb) } fn stringify_value(&self, value: &dyn FluentType) -> std::borrow::Cow<'static, str> { diff --git a/fluent-bundle/src/lib.rs b/fluent-bundle/src/lib.rs index bcb956bd..a0db15d5 100644 --- a/fluent-bundle/src/lib.rs +++ b/fluent-bundle/src/lib.rs @@ -101,6 +101,7 @@ mod args; pub mod builtins; pub mod bundle; +#[cfg(feature = "sync")] pub mod concurrent; mod entry; mod errors; diff --git a/fluent-bundle/src/memoizer.rs b/fluent-bundle/src/memoizer.rs index cbffd686..560ba02f 100644 --- a/fluent-bundle/src/memoizer.rs +++ b/fluent-bundle/src/memoizer.rs @@ -18,6 +18,7 @@ pub trait MemoizerKind: 'static { /// /// `U` - The callback that accepts the instance of the intl formatter, and generates /// some kind of results `R`. + #[cfg(feature = "sync")] fn with_try_get_threadsafe(&self, args: I::Args, callback: U) -> Result where Self: Sized, @@ -25,6 +26,14 @@ pub trait MemoizerKind: 'static { I::Args: Send + Sync + 'static, U: FnOnce(&I) -> R; + #[cfg(not(feature = "sync"))] + fn with_try_get(&self, args: I::Args, callback: U) -> Result + where + Self: Sized, + I: Memoizable + 'static, + I::Args: 'static, + U: FnOnce(&I) -> R; + /// Wires up the `as_string` or `as_string_threadsafe` variants for [`FluentType`]. fn stringify_value(&self, value: &dyn FluentType) -> std::borrow::Cow<'static, str>; } diff --git a/fluent-bundle/src/types/mod.rs b/fluent-bundle/src/types/mod.rs index fbcf1ab0..2aab29b2 100644 --- a/fluent-bundle/src/types/mod.rs +++ b/fluent-bundle/src/types/mod.rs @@ -41,6 +41,7 @@ pub trait FluentType: fmt::Debug + AnyEq + 'static { /// Convert the custom type into a string value, for instance a custom `DateTime` /// type could return "Oct. 27, 2022". This operation is provided the threadsafe /// [`IntlLangMemoizer`](intl_memoizer::concurrent::IntlLangMemoizer). + #[cfg(feature = "sync")] fn as_string_threadsafe( &self, intls: &intl_memoizer::concurrent::IntlLangMemoizer, @@ -203,14 +204,25 @@ impl<'source> FluentValue<'source> { FluentNumberType::Cardinal => PluralRuleType::CARDINAL, FluentNumberType::Ordinal => PluralRuleType::ORDINAL, }; - scope + #[cfg(feature = "sync")] + let result = scope .bundle .intls .with_try_get_threadsafe::( (PluralRuleType::Cardinal,), |pr| pr.0.category_for(b) == cat, ) - .unwrap() + .unwrap(); + + #[cfg(not(feature = "sync"))] + let result = scope + .bundle + .intls + .with_try_get::((PluralRuleType::Cardinal,), |pr| { + pr.0.category_for(b) == cat + }) + .unwrap(); + result } _ => false, } diff --git a/fluent-bundle/tests/custom_types.rs b/fluent-bundle/tests/custom_types.rs index 41d442ad..1a153b35 100644 --- a/fluent-bundle/tests/custom_types.rs +++ b/fluent-bundle/tests/custom_types.rs @@ -26,6 +26,8 @@ fn fluent_custom_type() { fn as_string(&self, _: &intl_memoizer::IntlLangMemoizer) -> std::borrow::Cow<'static, str> { format!("{}", self.epoch).into() } + + #[cfg(feature = "sync")] fn as_string_threadsafe( &self, _: &intl_memoizer::concurrent::IntlLangMemoizer, @@ -118,6 +120,8 @@ fn fluent_date_time_builtin() { fn as_string(&self, _: &intl_memoizer::IntlLangMemoizer) -> std::borrow::Cow<'static, str> { format!("2020-01-20 {}:00", self.epoch).into() } + + #[cfg(feature = "sync")] fn as_string_threadsafe( &self, _intls: &intl_memoizer::concurrent::IntlLangMemoizer, diff --git a/intl-memoizer/src/lib.rs b/intl-memoizer/src/lib.rs index 184c8e94..90ab44a5 100644 --- a/intl-memoizer/src/lib.rs +++ b/intl-memoizer/src/lib.rs @@ -11,6 +11,7 @@ use std::collections::HashMap; use std::hash::Hash; use std::rc::{Rc, Weak}; +#[cfg(feature = "sync")] pub mod concurrent; /// The trait that needs to be implemented for each intl formatter that needs to be From adef7693a47ead8a2bf43b1bb4bf8d63a58d6cf4 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 21 Dec 2023 14:33:31 +0100 Subject: [PATCH 3/4] refactor(intr-memoizer): Move intl_memoizer to capture provider --- intl-memoizer/Cargo.toml | 11 + intl-memoizer/benches/single.rs | 153 ++++++++++ intl-memoizer/src/lang_memoizer.rs | 48 ++++ intl-memoizer/src/lib.rs | 442 +---------------------------- intl-memoizer/src/memoizable.rs | 17 ++ intl-memoizer/src/memoizer.rs | 28 ++ intl-memoizer/tests/single.rs | 14 + 7 files changed, 277 insertions(+), 436 deletions(-) create mode 100644 intl-memoizer/benches/single.rs create mode 100644 intl-memoizer/src/lang_memoizer.rs create mode 100644 intl-memoizer/src/memoizable.rs create mode 100644 intl-memoizer/src/memoizer.rs create mode 100644 intl-memoizer/tests/single.rs diff --git a/intl-memoizer/Cargo.toml b/intl-memoizer/Cargo.toml index a0e2483f..65615e07 100644 --- a/intl-memoizer/Cargo.toml +++ b/intl-memoizer/Cargo.toml @@ -28,10 +28,21 @@ icu_locid.workspace = true icu_plurals.workspace = true icu_provider.workspace = true type-map = "0.5" +hashbrown = "0.14" [dev-dependencies] fluent-langneg.workspace = true +criterion.workspace = true +icu_datetime = {version = "1.4", features = ["serde"]} +icu_calendar = "1.4" +icu_decimal = "1.4" +icu_provider_blob = "1.4" [features] default = [] sync = ["icu_provider/sync"] + +[[bench]] +name = "single" +harness = false + diff --git a/intl-memoizer/benches/single.rs b/intl-memoizer/benches/single.rs new file mode 100644 index 00000000..c5e9d621 --- /dev/null +++ b/intl-memoizer/benches/single.rs @@ -0,0 +1,153 @@ +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; +use criterion::{Bencher, BenchmarkId}; +use icu_calendar::DateTime; +use icu_datetime::{options::length::Time, TimeFormatter}; +use icu_locid::LanguageIdentifier; +use intl_memoizer::{IntlLangMemoizer, Memoizable}; + +struct TF(pub TimeFormatter); + +use icu_provider_blob::BlobDataProvider; +const ICU4X_DATA: &[u8] = include_bytes!(concat!( + "/Users/zibi/projects/icu-perf/data/icu4x-1.4-datetime.postcard" +)); + +impl Memoizable for TF { + type Args = (Time,); + + type Provider = icu_provider_blob::BlobDataProvider; + + /// If the construtor is fallible, than errors can be described here. + type Error = (); + + /// This function wires together the `Args` and `Error` type to construct + /// the intl API. In our example, there is + fn construct( + lang: LanguageIdentifier, + args: Self::Args, + provider: Option<&Self::Provider>, + ) -> Result { + Ok(Self( + TimeFormatter::try_new_with_length_with_buffer_provider( + provider.unwrap(), + &lang.into(), + args.0, + ) + .unwrap(), + )) + } +} + +const SETS: usize = 10; +const REPS: usize = 10; + +fn construct_lang_bench(c: &mut Criterion) { + let lang: LanguageIdentifier = "en-US".parse().unwrap(); + let provider = + BlobDataProvider::try_new_from_static_blob(ICU4X_DATA).expect("Failed to load data"); + + c.bench_with_input( + BenchmarkId::new("construct_lang", &lang), + &(lang, provider), + |b, (lang, provider)| { + b.iter(|| { + let _ = IntlLangMemoizer::new(lang.clone(), Some(provider)); + }); + }, + ); +} + +fn populate_lang(c: &mut Criterion) { + let lang: LanguageIdentifier = "en".parse().unwrap(); + + let input = DateTime::try_new_gregorian_datetime(2020, 9, 1, 12, 34, 28).unwrap(); + let provider = + BlobDataProvider::try_new_from_static_blob(ICU4X_DATA).expect("Failed to load data"); + let construct_args = (Time::Short,); + + c.bench_with_input( + BenchmarkId::new("populate_lang", &lang), + &(construct_args, provider), + |b: &mut Bencher, (construct_args, provider)| { + b.iter(|| { + let memoizer = IntlLangMemoizer::new(lang.clone(), Some(provider)); + for _ in 0..SETS { + for _ in 0..REPS { + let _ = memoizer.with_try_get::(construct_args, |intl_example| { + intl_example.0.format_to_string(&input) + }); + } + } + }); + }, + ); +} + +fn without_memoizer(c: &mut Criterion) { + let lang: LanguageIdentifier = "en".parse().unwrap(); + let provider = + BlobDataProvider::try_new_from_static_blob(ICU4X_DATA).expect("Failed to load data"); + let construct_args = (Time::Short,); + + let input = DateTime::try_new_gregorian_datetime(2020, 9, 1, 12, 34, 28).unwrap(); + + c.bench_with_input( + BenchmarkId::new("without_memoizer", &lang), + &(construct_args, provider), + |b: &mut Bencher, (construct_args, provider)| { + b.iter(|| { + for _ in 0..SETS { + for _ in 0..REPS { + let formatter = TimeFormatter::try_new_with_length_with_buffer_provider( + provider, + &lang.clone().into(), + construct_args.0, + ) + .unwrap(); + let _ = formatter.format(&input); + } + } + }); + }, + ); +} + +fn without_memoizer_hoisted(c: &mut Criterion) { + let lang: LanguageIdentifier = "en".parse().unwrap(); + let provider = + BlobDataProvider::try_new_from_static_blob(ICU4X_DATA).expect("Failed to load data"); + let construct_args = (Time::Short,); + + let input = DateTime::try_new_gregorian_datetime(2020, 9, 1, 12, 34, 28).unwrap(); + + c.bench_with_input( + BenchmarkId::new("without_memoizer_hoisted", &lang), + &(construct_args, provider), + |b: &mut Bencher, (construct_args, provider)| { + b.iter(|| { + for _ in 0..SETS { + let formatter = TimeFormatter::try_new_with_length_with_buffer_provider( + provider, + &lang.clone().into(), + construct_args.0, + ) + .unwrap(); + for _ in 0..REPS { + let _ = formatter.format(&input); + } + } + }); + }, + ); +} + +criterion_group!( + benches, + construct_lang_bench, + populate_lang, + without_memoizer, + without_memoizer_hoisted +); +criterion_main!(benches); diff --git a/intl-memoizer/src/lang_memoizer.rs b/intl-memoizer/src/lang_memoizer.rs new file mode 100644 index 00000000..80cc06e5 --- /dev/null +++ b/intl-memoizer/src/lang_memoizer.rs @@ -0,0 +1,48 @@ +// use std::collections::HashMap; +use crate::memoizable::Memoizable; +use hashbrown::HashMap; +use icu_locid::LanguageIdentifier; +use std::cell::RefCell; + +pub struct IntlLangMemoizer<'dp, DP = ()> { + lang: LanguageIdentifier, + provider: Option<&'dp DP>, + map: RefCell, +} + +impl<'dp, DP> IntlLangMemoizer<'dp, DP> { + pub fn new(lang: LanguageIdentifier, provider: Option<&'dp DP>) -> Self { + Self { + lang, + provider, + map: Default::default(), + } + } + + pub fn with_try_get( + &self, + construct_args: &I::Args, + callback: U, + ) -> Result + where + Self: Sized, + I: Memoizable + 'static, + U: FnOnce(&I) -> R, + { + let mut map = self.map.borrow_mut(); + + let cache = map.entry().or_insert_with(HashMap::::new); + + let (_, e) = cache + .raw_entry_mut() + .from_key(construct_args) + .or_insert_with(|| { + ( + construct_args.clone(), + I::construct(self.lang.clone(), construct_args.clone(), self.provider) + .expect("FOO"), + ) + }); + Ok(callback(e)) + } +} diff --git a/intl-memoizer/src/lib.rs b/intl-memoizer/src/lib.rs index 90ab44a5..2468ac18 100644 --- a/intl-memoizer/src/lib.rs +++ b/intl-memoizer/src/lib.rs @@ -1,437 +1,7 @@ -//! This crate contains a memoizer for internationalization formatters. Often it is -//! expensive (in terms of performance and memory) to construct a formatter, but then -//! relatively cheap to run the format operation. -//! -//! The [`IntlMemoizer`] is the main struct that creates a per-locale [`IntlLangMemoizer`]. +mod lang_memoizer; +mod memoizable; +mod memoizer; -use icu_locid::LanguageIdentifier; -use std::cell::RefCell; -use std::collections::hash_map::Entry; -use std::collections::HashMap; -use std::hash::Hash; -use std::rc::{Rc, Weak}; - -#[cfg(feature = "sync")] -pub mod concurrent; - -/// The trait that needs to be implemented for each intl formatter that needs to be -/// memoized. -pub trait Memoizable { - /// Type of the arguments that are used to construct the formatter. - type Args: 'static + Eq + Hash + Clone; - - /// Type of any errors that can occur during the construction process. - type Error; - - /// Construct a formatter. This maps the [`Self::Args`] type to the actual constructor - /// for an intl formatter. - fn construct(lang: LanguageIdentifier, args: Self::Args) -> Result - where - Self: std::marker::Sized; -} - -/// The [`IntlLangMemoizer`] can memoize multiple constructed internationalization -/// formatters, and their configuration for a single locale. For instance, given "en-US", -/// a memorizer could retain 3 `DateTimeFormat` instances, and a `PluralRules`. -/// -/// For memoizing with multiple locales, see [`IntlMemoizer`]. -/// -/// # Example -/// -/// The code example does the following steps: -/// -/// 1. Create a static counter -/// 2. Create an `ExampleFormatter` -/// 3. Implement [`Memoizable`] for `ExampleFormatter`. -/// 4. Use `IntlLangMemoizer::with_try_get` to run `ExampleFormatter::format` -/// 5. Demonstrate the memoization using the static counter -/// -/// ``` -/// use intl_memoizer::{IntlLangMemoizer, Memoizable}; -/// use icu_locid::LanguageIdentifier; -/// -/// // Create a static counter so that we can demonstrate the side effects of when -/// // the memoizer re-constructs an API. -/// -/// static mut INTL_EXAMPLE_CONSTRUCTS: u32 = 0; -/// fn increment_constructs() { -/// unsafe { -/// INTL_EXAMPLE_CONSTRUCTS += 1; -/// } -/// } -/// -/// fn get_constructs_count() -> u32 { -/// unsafe { INTL_EXAMPLE_CONSTRUCTS } -/// } -/// -/// /// Create an example formatter, that doesn't really do anything useful. In a real -/// /// implementation, this could be a PluralRules or DateTimeFormat struct. -/// struct ExampleFormatter { -/// lang: LanguageIdentifier, -/// /// This is here to show how to initiate the API with an argument. -/// prefix: String, -/// } -/// -/// impl ExampleFormatter { -/// /// Perform an example format by printing information about the formatter -/// /// configuration, and the arguments passed into the individual format operation. -/// fn format(&self, example_string: &str) -> String { -/// format!( -/// "{} lang({}) string({})", -/// self.prefix, self.lang, example_string -/// ) -/// } -/// } -/// -/// /// Multiple classes of structs may be add1ed to the memoizer, with the restriction -/// /// that they must implement the `Memoizable` trait. -/// impl Memoizable for ExampleFormatter { -/// /// The arguments will be passed into the constructor. Here a single `String` -/// /// will be used as a prefix to the formatting operation. -/// type Args = (String,); -/// -/// /// If the constructor is fallible, than errors can be described here. -/// type Error = (); -/// -/// /// This function wires together the `Args` and `Error` type to construct -/// /// the intl API. In our example, there is -/// fn construct(lang: LanguageIdentifier, args: Self::Args) -> Result { -/// // Keep track for example purposes that this was constructed. -/// increment_constructs(); -/// -/// Ok(Self { -/// lang, -/// prefix: args.0, -/// }) -/// } -/// } -/// -/// // The following demonstrates how these structs are actually used with the memoizer. -/// -/// // Construct a new memoizer. -/// let lang = "en-US".parse().expect("Failed to parse."); -/// let memoizer = IntlLangMemoizer::new(lang); -/// -/// // These arguments are passed into the constructor for `ExampleFormatter`. -/// let construct_args = (String::from("prefix:"),); -/// let message1 = "The format operation will run"; -/// let message2 = "ExampleFormatter will be re-used, when a second format is run"; -/// -/// // Run `IntlLangMemoizer::with_try_get`. The name of the method means "with" an -/// // intl formatter, "try and get" the result. See the method documentation for -/// // more details. -/// -/// let result1 = memoizer -/// .with_try_get::(construct_args.clone(), |intl_example| { -/// intl_example.format(message1) -/// }); -/// -/// // The memoized instance of `ExampleFormatter` will be re-used. -/// let result2 = memoizer -/// .with_try_get::(construct_args.clone(), |intl_example| { -/// intl_example.format(message2) -/// }); -/// -/// assert_eq!( -/// result1.unwrap(), -/// "prefix: lang(en-US) string(The format operation will run)" -/// ); -/// assert_eq!( -/// result2.unwrap(), -/// "prefix: lang(en-US) string(ExampleFormatter will be re-used, when a second format is run)" -/// ); -/// assert_eq!( -/// get_constructs_count(), -/// 1, -/// "The constructor was only run once." -/// ); -/// -/// let construct_args = (String::from("re-init:"),); -/// -/// // Since the constructor args changed, `ExampleFormatter` will be re-constructed. -/// let result1 = memoizer -/// .with_try_get::(construct_args.clone(), |intl_example| { -/// intl_example.format(message1) -/// }); -/// -/// // The memoized instance of `ExampleFormatter` will be re-used. -/// let result2 = memoizer -/// .with_try_get::(construct_args.clone(), |intl_example| { -/// intl_example.format(message2) -/// }); -/// -/// assert_eq!( -/// result1.unwrap(), -/// "re-init: lang(en-US) string(The format operation will run)" -/// ); -/// assert_eq!( -/// result2.unwrap(), -/// "re-init: lang(en-US) string(ExampleFormatter will be re-used, when a second format is run)" -/// ); -/// assert_eq!( -/// get_constructs_count(), -/// 2, -/// "The constructor was invalidated and ran again." -/// ); -/// ``` -#[derive(Debug)] -pub struct IntlLangMemoizer { - lang: LanguageIdentifier, - map: RefCell, -} - -impl IntlLangMemoizer { - /// Create a new [`IntlLangMemoizer`] that is unique to a specific - /// [`LanguageIdentifier`] - pub fn new(lang: LanguageIdentifier) -> Self { - Self { - lang, - map: RefCell::new(type_map::TypeMap::new()), - } - } - - /// `with_try_get` means `with` an internationalization formatter, `try` and `get` a result. - /// The (potentially expensive) constructor for the formatter (such as `PluralRules` or - /// `DateTimeFormat`) will be memoized and only constructed once for a given - /// `construct_args`. After that the format operation can be run multiple times - /// inexpensively. - /// - /// The first generic argument `I` must be provided, but the `R` and `U` will be - /// deduced by the typing of the `callback` argument that is provided. - /// - /// I - The memoizable intl object, for instance a `PluralRules` instance. This - /// must implement the Memoizable trait. - /// - /// R - The return result from the callback `U`. - /// - /// U - The callback function. Takes an instance of `I` as the first parameter and - /// returns the R value. - pub fn with_try_get(&self, construct_args: I::Args, callback: U) -> Result - where - Self: Sized, - I: Memoizable + 'static, - U: FnOnce(&I) -> R, - { - let mut map = self - .map - .try_borrow_mut() - .expect("Cannot use memoizer reentrantly"); - let cache = map - .entry::>() - .or_insert_with(HashMap::new); - - let e = match cache.entry(construct_args.clone()) { - Entry::Occupied(entry) => entry.into_mut(), - Entry::Vacant(entry) => { - let val = I::construct(self.lang.clone(), construct_args)?; - entry.insert(val) - } - }; - Ok(callback(e)) - } -} - -/// [`IntlMemoizer`] is designed to handle lazily-initialized references to -/// internationalization formatters. -/// -/// Constructing a new formatter is often expensive in terms of memory and performance, -/// and the instance is often read-only during its lifetime. The format operations in -/// comparison are relatively cheap. -/// -/// Because of this relationship, it can be helpful to memoize the constructors, and -/// re-use them across multiple format operations. This strategy is used where all -/// instances of intl APIs such as `PluralRules`, `DateTimeFormat` etc. are memoized -/// between all `FluentBundle` instances. -/// -/// # Example -/// -/// For a more complete example of the memoization, see the [`IntlLangMemoizer`] documentation. -/// This example provides a higher-level overview. -/// -/// ``` -/// # use intl_memoizer::{IntlMemoizer, IntlLangMemoizer, Memoizable}; -/// # use icu_locid::LanguageIdentifier; -/// # use std::rc::Rc; -/// # -/// # struct ExampleFormatter { -/// # lang: LanguageIdentifier, -/// # prefix: String, -/// # } -/// # -/// # impl ExampleFormatter { -/// # fn format(&self, example_string: &str) -> String { -/// # format!( -/// # "{} lang({}) string({})", -/// # self.prefix, self.lang, example_string -/// # ) -/// # } -/// # } -/// # -/// # impl Memoizable for ExampleFormatter { -/// # type Args = (String,); -/// # type Error = (); -/// # fn construct(lang: LanguageIdentifier, args: Self::Args) -> Result { -/// # Ok(Self { -/// # lang, -/// # prefix: args.0, -/// # }) -/// # } -/// # } -/// # -/// let mut memoizer = IntlMemoizer::default(); -/// -/// // The memoziation happens per-locale. -/// let en_us = "en-US".parse().expect("Failed to parse."); -/// let en_us_memoizer: Rc = memoizer.get_for_lang(en_us); -/// -/// // These arguments are passed into the constructor for `ExampleFormatter`. The -/// // construct_args will be used for determining the memoization, but the message -/// // can be different and re-use the constructed instance. -/// let construct_args = (String::from("prefix:"),); -/// let message = "The format operation will run"; -/// -/// // Use the `ExampleFormatter` from the `IntlLangMemoizer` example. It returns a -/// // string that demonstrates the configuration of the formatter. This step will -/// // construct a new formatter if needed, and run the format operation. -/// // -/// // See `IntlLangMemoizer` for more details on this step. -/// let en_us_result = en_us_memoizer -/// .with_try_get::(construct_args.clone(), |intl_example| { -/// intl_example.format(message) -/// }); -/// -/// // The example formatter constructs a string with diagnostic information about -/// // the configuration. -/// assert_eq!( -/// en_us_result.unwrap(), -/// "prefix: lang(en-US) string(The format operation will run)" -/// ); -/// -/// // The process can be repeated for a new locale. -/// -/// let de_de = "de-DE".parse().expect("Failed to parse."); -/// let de_de_memoizer: Rc = memoizer.get_for_lang(de_de); -/// -/// let de_de_result = de_de_memoizer -/// .with_try_get::(construct_args.clone(), |intl_example| { -/// intl_example.format(message) -/// }); -/// -/// assert_eq!( -/// de_de_result.unwrap(), -/// "prefix: lang(de-DE) string(The format operation will run)" -/// ); -/// ``` -#[derive(Default)] -pub struct IntlMemoizer { - map: HashMap>, -} - -impl IntlMemoizer { - /// Get a [`IntlLangMemoizer`] for a given language. If one does not exist for - /// a locale, it will be constructed and weakly retained. See [`IntlLangMemoizer`] - /// for more detailed documentation how to use it. - pub fn get_for_lang(&mut self, lang: LanguageIdentifier) -> Rc { - match self.map.entry(lang.clone()) { - Entry::Vacant(empty) => { - let entry = Rc::new(IntlLangMemoizer::new(lang)); - empty.insert(Rc::downgrade(&entry)); - entry - } - Entry::Occupied(mut entry) => { - if let Some(entry) = entry.get().upgrade() { - entry - } else { - let e = Rc::new(IntlLangMemoizer::new(lang)); - entry.insert(Rc::downgrade(&e)); - e - } - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use icu_plurals::{PluralCategory, PluralRuleType, PluralRules as IntlPluralRules}; - - struct PluralRules(pub IntlPluralRules); - - impl PluralRules { - pub fn new( - lang: LanguageIdentifier, - pr_type: PluralRuleType, - ) -> Result { - let inner = match pr_type { - PluralRuleType::Cardinal => IntlPluralRules::try_new_cardinal(&lang.into()), - PluralRuleType::Ordinal => IntlPluralRules::try_new_ordinal(&lang.into()), - _ => todo!(), - }; - Ok(Self(inner.unwrap())) - } - } - - impl Memoizable for PluralRules { - type Args = (PluralRuleType,); - type Error = &'static str; - fn construct(lang: LanguageIdentifier, args: Self::Args) -> Result { - Self::new(lang, args.0) - } - } - - #[test] - fn test_single_thread() { - let lang: LanguageIdentifier = "en".parse().unwrap(); - - let mut memoizer = IntlMemoizer::default(); - { - let en_memoizer = memoizer.get_for_lang(lang.clone()); - - let result = en_memoizer - .with_try_get::((PluralRuleType::Cardinal,), |cb| { - cb.0.category_for(5) - }) - .unwrap(); - assert_eq!(result, PluralCategory::Other); - } - - { - let en_memoizer = memoizer.get_for_lang(lang); - - let result = en_memoizer - .with_try_get::((PluralRuleType::Cardinal,), |cb| { - cb.0.category_for(5) - }) - .unwrap(); - assert_eq!(result, PluralCategory::Other); - } - } - - #[cfg(feature = "sync")] - #[test] - fn test_concurrent() { - use std::{sync::Arc, thread}; - - let lang: LanguageIdentifier = "en".parse().unwrap(); - let memoizer = Arc::new(concurrent::IntlLangMemoizer::new(lang)); - let mut threads = vec![]; - - // Spawn four threads that all use the PluralRules. - for _ in 0..4 { - let memoizer = Arc::clone(&memoizer); - threads.push(thread::spawn(move || { - memoizer - .with_try_get::((PluralRuleType::Cardinal,), |cb| { - cb.0.category_for(5) - }) - .expect("Failed to get a PluralRules result.") - })); - } - - for thread in threads.drain(..) { - let result = thread.join().expect("Failed to join thread."); - assert_eq!(result, PluralCategory::Other); - } - } -} +pub use lang_memoizer::IntlLangMemoizer; +pub use memoizable::Memoizable; +pub use memoizer::IntlMemoizer; diff --git a/intl-memoizer/src/memoizable.rs b/intl-memoizer/src/memoizable.rs new file mode 100644 index 00000000..793098f4 --- /dev/null +++ b/intl-memoizer/src/memoizable.rs @@ -0,0 +1,17 @@ +use icu_locid::LanguageIdentifier; +use std::hash::Hash; + +pub trait Memoizable { + type Args: 'static + Eq + Hash + Clone; + type Provider; + + type Error: std::fmt::Debug; + + fn construct( + lang: LanguageIdentifier, + args: Self::Args, + provider: Option<&Self::Provider>, + ) -> Result + where + Self: std::marker::Sized; +} diff --git a/intl-memoizer/src/memoizer.rs b/intl-memoizer/src/memoizer.rs new file mode 100644 index 00000000..54772c61 --- /dev/null +++ b/intl-memoizer/src/memoizer.rs @@ -0,0 +1,28 @@ +use crate::IntlLangMemoizer; +use icu_locid::LanguageIdentifier; +use std::collections::HashMap; +use std::rc::Rc; + +pub struct IntlMemoizer<'dp, DP> { + provider: Option<&'dp DP>, + map: HashMap>>, +} + +impl<'dp, DP> IntlMemoizer<'dp, DP> { + pub fn new(provider: Option<&'dp DP>) -> Self { + Self { + provider, + map: HashMap::default(), + } + } + + pub fn get_for_lang(&mut self, lang: LanguageIdentifier) -> Rc> { + if let Some(memoizer) = self.map.get(&lang) { + memoizer.clone() + } else { + let memoizer = Rc::new(IntlLangMemoizer::new(lang.clone(), self.provider)); + self.map.insert(lang, memoizer.clone()); + memoizer + } + } +} diff --git a/intl-memoizer/tests/single.rs b/intl-memoizer/tests/single.rs new file mode 100644 index 00000000..7ee3ae9e --- /dev/null +++ b/intl-memoizer/tests/single.rs @@ -0,0 +1,14 @@ + +static mut INTL_EXAMPLE_CONSTRUCTS: u32 = 0; +fn increment_constructs() { + unsafe { + INTL_EXAMPLE_CONSTRUCTS += 1; + } +} + +fn get_constructs_count() -> u32 { + unsafe { INTL_EXAMPLE_CONSTRUCTS } +} + +#[test] +fn test_memoizable() {} From 30125e998d2cbc05b545e15955570abd74be650b Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 8 Feb 2024 14:29:40 -0800 Subject: [PATCH 4/4] perf(intl-memoizer): Benchmark memoizer --- intl-memoizer/Cargo.toml | 3 + intl-memoizer/benches/single.rs | 305 +++++++++++++++++++------------- 2 files changed, 183 insertions(+), 125 deletions(-) diff --git a/intl-memoizer/Cargo.toml b/intl-memoizer/Cargo.toml index 65615e07..7aa05450 100644 --- a/intl-memoizer/Cargo.toml +++ b/intl-memoizer/Cargo.toml @@ -37,6 +37,9 @@ icu_datetime = {version = "1.4", features = ["serde"]} icu_calendar = "1.4" icu_decimal = "1.4" icu_provider_blob = "1.4" +icu_collator = "1.4" +fixed_decimal = "0.5" +icu_list = { version = "1.4", features = ["serde"]} [features] default = [] diff --git a/intl-memoizer/benches/single.rs b/intl-memoizer/benches/single.rs index c5e9d621..b85d7916 100644 --- a/intl-memoizer/benches/single.rs +++ b/intl-memoizer/benches/single.rs @@ -3,151 +3,206 @@ use criterion::criterion_main; use criterion::Criterion; use criterion::{Bencher, BenchmarkId}; use icu_calendar::DateTime; -use icu_datetime::{options::length::Time, TimeFormatter}; +use icu_datetime::{ + options::length::{Date, Time}, + // DateTimeFormatterOptions, + DateFormatter, + // DateTimeFormatter, + TimeFormatter, +}; +// use icu_collator::{Collator, CollatorOptions}; +// use icu_decimal::{FixedDecimalFormatter, options::FixedDecimalFormatterOptions}; +// use fixed_decimal::FixedDecimal; +use icu_list::{ListFormatter, ListLength}; use icu_locid::LanguageIdentifier; +use icu_plurals::{PluralRuleType, PluralRules}; use intl_memoizer::{IntlLangMemoizer, Memoizable}; - -struct TF(pub TimeFormatter); +use std::hint::black_box; use icu_provider_blob::BlobDataProvider; const ICU4X_DATA: &[u8] = include_bytes!(concat!( - "/Users/zibi/projects/icu-perf/data/icu4x-1.4-datetime.postcard" + "/Users/zibi/projects/icu-perf/data/icu4x-1.4.postcard" )); -impl Memoizable for TF { - type Args = (Time,); - - type Provider = icu_provider_blob::BlobDataProvider; - - /// If the construtor is fallible, than errors can be described here. - type Error = (); - - /// This function wires together the `Args` and `Error` type to construct - /// the intl API. In our example, there is - fn construct( - lang: LanguageIdentifier, - args: Self::Args, - provider: Option<&Self::Provider>, - ) -> Result { - Ok(Self( - TimeFormatter::try_new_with_length_with_buffer_provider( - provider.unwrap(), - &lang.into(), - args.0, - ) - .unwrap(), - )) - } +trait Testable { + type Input; + + fn execute(&self, input: Self::Input); } -const SETS: usize = 10; -const REPS: usize = 10; +macro_rules! define_testable_type { + ($name:ident, $type:ident, $args:tt, $constructor:ident, $method:ident, $input:ty) => { + define_testable_type!($name, $type, $args, $constructor); + + impl Testable for $name { + type Input = $input; + + fn execute(&self, input: Self::Input) { + let _ = self.0.$method(input); + } + } + }; + + ($name:ident, $type:ident, $args:tt, $constructor:ident, $method:ident, ref $input:ty) => { + define_testable_type!($name, $type, $args, $constructor); + + impl Testable for $name { + type Input = $input; + + fn execute(&self, input: Self::Input) { + let _ = self.0.$method(&input); + } + } + }; + + ($name:ident, $type:ident, $args:tt, $constructor:ident) => { + struct $name($type); + + impl Memoizable for $name { + type Args = $args; + type Provider = icu_provider_blob::BlobDataProvider; + type Error = (); + + fn construct( + lang: LanguageIdentifier, + args: Self::Args, + provider: Option<&Self::Provider>, + ) -> Result { + Ok(Self( + $type::$constructor(provider.unwrap(), &lang.into(), args.0).unwrap(), + )) + } + } + }; +} -fn construct_lang_bench(c: &mut Criterion) { - let lang: LanguageIdentifier = "en-US".parse().unwrap(); - let provider = - BlobDataProvider::try_new_from_static_blob(ICU4X_DATA).expect("Failed to load data"); +define_testable_type!(TF, TimeFormatter, (Time, ), try_new_with_length_with_buffer_provider, format_to_string, ref DateTime); +define_testable_type!(DF, DateFormatter, (Date, ), try_new_with_length_with_buffer_provider, format_to_string, ref DateTime); +// define_testable_type!(DTF, DateTimeFormatter, (DateTimeFormatterOptions, ), try_new_with_length_with_buffer_provider, format_to_string, ref DateTime); +define_testable_type!( + PR, + PluralRules, + (PluralRuleType,), + try_new_with_buffer_provider, + category_for, + usize +); +// define_testable_type!( +// C, +// Collator, +// (CollatorOptions,), +// try_new_with_buffer_provider, +// compare, +// &str, +// &str, +// ); +// define_testable_type!( +// D, +// FixedDecimalFormatter, +// (FixedDecimalFormatterOptions,), +// try_new_with_buffer_provider, +// format_to_string, +// ref FixedDecimal +// ); +define_testable_type!( + LF, + ListFormatter, + (ListLength,), + try_new_and_with_length_with_buffer_provider, + format_to_string, + std::vec::IntoIter +); - c.bench_with_input( - BenchmarkId::new("construct_lang", &lang), - &(lang, provider), - |b, (lang, provider)| { - b.iter(|| { - let _ = IntlLangMemoizer::new(lang.clone(), Some(provider)); - }); - }, - ); +macro_rules! without_memoizer_hoisted { + ($type:ident, $b:ident, $lang:ident, $provider:ident, $args:expr, $count:expr, $input:expr ) => { + $b.iter(|| { + let intl = $type::construct($lang.clone(), black_box($args), Some($provider)).unwrap(); + for _ in 0..$count { + let _ = intl.execute($input); + } + }) + }; } -fn populate_lang(c: &mut Criterion) { - let lang: LanguageIdentifier = "en".parse().unwrap(); - - let input = DateTime::try_new_gregorian_datetime(2020, 9, 1, 12, 34, 28).unwrap(); - let provider = - BlobDataProvider::try_new_from_static_blob(ICU4X_DATA).expect("Failed to load data"); - let construct_args = (Time::Short,); - - c.bench_with_input( - BenchmarkId::new("populate_lang", &lang), - &(construct_args, provider), - |b: &mut Bencher, (construct_args, provider)| { - b.iter(|| { - let memoizer = IntlLangMemoizer::new(lang.clone(), Some(provider)); - for _ in 0..SETS { - for _ in 0..REPS { - let _ = memoizer.with_try_get::(construct_args, |intl_example| { - intl_example.0.format_to_string(&input) - }); - } - } - }); - }, - ); +macro_rules! without_memoizer { + ($type:ident, $b:ident, $lang:ident, $provider:ident, $args:expr, $count:expr, $input:expr ) => { + $b.iter(|| { + for _ in 0..$count { + let intl = + $type::construct($lang.clone(), black_box($args), Some($provider)).unwrap(); + let _ = intl.execute($input); + } + }) + }; } -fn without_memoizer(c: &mut Criterion) { - let lang: LanguageIdentifier = "en".parse().unwrap(); - let provider = - BlobDataProvider::try_new_from_static_blob(ICU4X_DATA).expect("Failed to load data"); - let construct_args = (Time::Short,); - - let input = DateTime::try_new_gregorian_datetime(2020, 9, 1, 12, 34, 28).unwrap(); - - c.bench_with_input( - BenchmarkId::new("without_memoizer", &lang), - &(construct_args, provider), - |b: &mut Bencher, (construct_args, provider)| { - b.iter(|| { - for _ in 0..SETS { - for _ in 0..REPS { - let formatter = TimeFormatter::try_new_with_length_with_buffer_provider( - provider, - &lang.clone().into(), - construct_args.0, - ) - .unwrap(); - let _ = formatter.format(&input); - } - } - }); - }, - ); +macro_rules! with_memoizer { + ($type:ident, $b:ident, $lang:ident, $provider:ident, $args:expr, $count:expr, $input:expr ) => { + $b.iter(|| { + let memoizer = + IntlLangMemoizer::new(black_box($lang.clone()), Some(black_box($provider))); + for _ in 0..$count { + let _ = + memoizer.with_try_get(black_box(&$args), |intl: &$type| intl.execute($input)); + } + }) + }; } -fn without_memoizer_hoisted(c: &mut Criterion) { - let lang: LanguageIdentifier = "en".parse().unwrap(); +fn bench_variants(c: &mut Criterion) { + let lang: LanguageIdentifier = "und".parse().unwrap(); + let provider = BlobDataProvider::try_new_from_static_blob(ICU4X_DATA).expect("Failed to load data"); - let construct_args = (Time::Short,); - - let input = DateTime::try_new_gregorian_datetime(2020, 9, 1, 12, 34, 28).unwrap(); - - c.bench_with_input( - BenchmarkId::new("without_memoizer_hoisted", &lang), - &(construct_args, provider), - |b: &mut Bencher, (construct_args, provider)| { - b.iter(|| { - for _ in 0..SETS { - let formatter = TimeFormatter::try_new_with_length_with_buffer_provider( - provider, - &lang.clone().into(), - construct_args.0, - ) - .unwrap(); - for _ in 0..REPS { - let _ = formatter.format(&input); + + let tf_input = DateTime::try_new_gregorian_datetime(2020, 9, 1, 12, 34, 28).unwrap(); + let tf_args = (Time::Short,); + + let pr_input = 5; + let pr_args = (PluralRuleType::Cardinal,); + + for component in ["time", "plurals"] { + let mut group = c.benchmark_group(component); + let counts: &[usize] = &[0, 1, 10, 100, 1000, 10000]; + + for count in counts { + group.bench_with_input( + BenchmarkId::new("without_memoizer_hoisted", count), + &(count, &provider), + |b: &mut Bencher, &(count, provider)| match component { + "time" => { + without_memoizer_hoisted!(TF, b, lang, provider, tf_args, *count, tf_input) + } + "plurals" => { + without_memoizer_hoisted!(PR, b, lang, provider, pr_args, *count, pr_input) } - } - }); - }, - ); + _ => unreachable!(), + }, + ); + group.bench_with_input( + BenchmarkId::new("without_memoizer", count), + &(count, &provider), + |b: &mut Bencher, &(count, provider)| match component { + "time" => without_memoizer!(TF, b, lang, provider, tf_args, *count, tf_input), + "plurals" => { + without_memoizer!(PR, b, lang, provider, pr_args, *count, pr_input) + } + _ => unreachable!(), + }, + ); + group.bench_with_input( + BenchmarkId::new("with_memoizer", count), + &(count, &provider), + |b: &mut Bencher, &(count, provider)| match component { + "time" => with_memoizer!(TF, b, lang, provider, tf_args, *count, tf_input), + "plurals" => with_memoizer!(PR, b, lang, provider, pr_args, *count, pr_input), + _ => unreachable!(), + }, + ); + } + group.finish(); + } } -criterion_group!( - benches, - construct_lang_bench, - populate_lang, - without_memoizer, - without_memoizer_hoisted -); +criterion_group!(benches, bench_variants,); criterion_main!(benches);