Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions unic-langid-impl/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ repository = "https://github.com/zbraniecki/unic-locale"
license = "MIT/Apache-2.0"
categories = ["internationalization"]

[dependencies]
tinystr = "0.1"

[dev-dependencies]
criterion = "0.2"
serde = { version = "1.0", features = ["derive"] }
Expand Down
166 changes: 83 additions & 83 deletions unic-langid-impl/benches/langid.rs
Original file line number Diff line number Diff line change
@@ -1,95 +1,95 @@
use criterion::criterion_group;
use criterion::criterion_main;
use criterion::Criterion;
use criterion::Fun;

use tinystr::{TinyStr4, TinyStr8};
use unic_langid_impl::LanguageIdentifier;

fn language_identifier_from_str_bench(c: &mut Criterion) {
let strings = &[
"en-US",
"en-GB",
"es-AR",
"it",
"zh-Hans-CN",
"de-AT",
"pl",
"fr-FR",
"de-AT",
"sr-Cyrl-SR",
"nb-NO",
"fr-FR",
"mk",
"uk",
];
c.bench_function("language_identifier_from_str", move |b| {
b.iter(|| {
for s in strings {
let _: Result<LanguageIdentifier, _> = s.parse();
}
})
});
}
static STRINGS: &[&str] = &[
"en-US",
"en-GB",
"es-AR",
"it",
"zh-Hans-CN",
"de-AT",
"pl",
"fr-FR",
"de-AT",
"sr-Cyrl-SR",
"nb-NO",
"fr-FR",
"mk",
"uk",
];

fn language_identifier_from_parts_bench(c: &mut Criterion) {
let entries: Vec<(Option<&str>, Option<&str>, Option<&str>, Option<&[&&str]>)> = vec![
(Some("en"), None, Some("US"), None),
(Some("en"), None, Some("GB"), None),
(Some("es"), None, Some("AR"), None),
(Some("it"), None, None, None),
(Some("zh"), Some("Hans"), Some("CN"), None),
(Some("de"), None, Some("AT"), None),
(Some("pl"), None, None, None),
(Some("fr"), None, Some("FR"), None),
(Some("de"), None, Some("AT"), None),
(Some("sr"), Some("Cyrl"), Some("SR"), None),
(Some("nb"), None, Some("NO"), None),
(Some("fr"), None, Some("FR"), None),
(Some("mk"), None, None, None),
(Some("uk"), None, None, None),
];
c.bench_function("language_identifier_from_parts", move |b| {
b.iter(|| {
for (language, region, script, variants) in &entries {
let _ = LanguageIdentifier::from_parts(
language.as_ref(),
region.as_ref(),
script.as_ref(),
*variants,
);
}
})
});
fn language_identifier_construct_bench(c: &mut Criterion) {
let langids: Vec<LanguageIdentifier> = STRINGS
.iter()
.map(|s| -> LanguageIdentifier { s.parse().unwrap() })
.collect();

let entries2: Vec<(Option<&str>, Option<&str>, Option<&str>, Option<&[&str]>)> = vec![
(Some("en"), None, Some("US"), None),
(Some("en"), None, Some("GB"), None),
(Some("es"), None, Some("AR"), None),
(Some("it"), None, None, None),
(Some("zh"), Some("Hans"), Some("CN"), None),
(Some("de"), None, Some("AT"), None),
(Some("pl"), None, None, None),
(Some("fr"), None, Some("FR"), None),
(Some("de"), None, Some("AT"), None),
(Some("sr"), Some("Cyrl"), Some("SR"), None),
(Some("nb"), None, Some("NO"), None),
(Some("fr"), None, Some("FR"), None),
(Some("mk"), None, None, None),
(Some("uk"), None, None, None),
let funcs = vec![
Fun::new("from_str", |b, _| {
b.iter(|| {
for s in STRINGS {
let _: Result<LanguageIdentifier, _> = s.parse();
}
})
}),
Fun::new("from_parts", |b, langids: &Vec<LanguageIdentifier>| {
let entries: Vec<(Option<&str>, Option<&str>, Option<&str>, Vec<&str>)> = langids
.iter()
.map(|langid| {
let lang = Some(langid.get_language()).and_then(|s| {
if s == "und" {
None
} else {
Some(s)
}
});
(
lang,
langid.get_script(),
langid.get_region(),
langid.get_variants(),
)
})
.collect();
b.iter(|| {
for (language, script, region, variants) in &entries {
let _ = LanguageIdentifier::from_parts(*language, *script, *region, variants);
}
})
}),
Fun::new(
"from_parts_unchecked",
|b, langids: &Vec<LanguageIdentifier>| {
let entries = langids
.iter()
.map(|langid| langid.clone().to_raw_parts())
.collect::<Vec<_>>();
b.iter(|| {
for (language, script, region, variants) in &entries {
let _ = unsafe {
LanguageIdentifier::from_raw_parts_unchecked(
language.map(|l| TinyStr8::new_unchecked(l)),
script.map(|s| TinyStr4::new_unchecked(s)),
region.map(|r| TinyStr4::new_unchecked(r)),
variants
.into_iter()
.map(|v| TinyStr8::new_unchecked(*v))
.collect(),
)
};
}
})
},
),
];
c.bench_function("language_identifier_from_parts_unchecked", move |b| {
b.iter(|| {
for (language, region, script, variants) in &entries2 {
let _ = LanguageIdentifier::from_parts_unchecked(
*language, *region, *script, *variants,
);
}
})
});

c.bench_functions("language_identifier_construct", funcs, langids);
}

criterion_group!(
benches,
language_identifier_from_str_bench,
language_identifier_from_parts_bench,,
);
criterion_group!(benches, language_identifier_construct_bench,);
criterion_main!(benches);
80 changes: 44 additions & 36 deletions unic-langid-impl/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,24 @@ pub mod parser;
pub mod subtags;

use crate::errors::LanguageIdentifierError;
use std::borrow::Cow;
use std::str::FromStr;

use tinystr::{TinyStr4, TinyStr8};

#[derive(Default, Debug, PartialEq, Eq, Clone, Hash)]
pub struct LanguageIdentifier {
language: Option<Cow<'static, str>>,
script: Option<Cow<'static, str>>,
region: Option<Cow<'static, str>>,
variants: Vec<Cow<'static, str>>,
language: Option<TinyStr8>,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment saying that these are guaranteed to be <=4

script: Option<TinyStr4>,
region: Option<TinyStr4>,
variants: Box<[TinyStr8]>,
}

impl LanguageIdentifier {
pub fn from_parts<S: AsRef<str>>(
language: Option<S>,
script: Option<S>,
region: Option<S>,
variants: Option<&[S]>,
variants: &[S],
) -> Result<Self, LanguageIdentifierError> {
let language = if let Some(subtag) = language {
subtags::parse_language_subtag(subtag.as_ref())?
Expand All @@ -36,38 +37,42 @@ impl LanguageIdentifier {
} else {
None
};
let mut variants_field = vec![];

if let Some(variants) = variants {
for variant in variants {
variants_field.push(subtags::parse_variant_subtag(variant.as_ref())?);
}
variants_field.sort();
let mut vars = Vec::with_capacity(variants.len());
for variant in variants {
vars.push(subtags::parse_variant_subtag(variant.as_ref())?);
}
vars.sort();
vars.dedup();

Ok(Self {
language,
script,
region,
variants: variants_field,
variants: vars.into_boxed_slice(),
})
}

pub fn from_parts_unchecked(
language: Option<&'static str>,
script: Option<&'static str>,
region: Option<&'static str>,
variants: Option<&[&'static str]>,
pub fn to_raw_parts(self) -> (Option<u64>, Option<u32>, Option<u32>, Box<[u64]>) {
(
self.language.map(|l| l.into()),
self.script.map(|s| s.into()),
self.region.map(|r| r.into()),
self.variants.into_iter().map(|v| (*v).into()).collect(),
)
}

pub const unsafe fn from_raw_parts_unchecked(
language: Option<TinyStr8>,
script: Option<TinyStr4>,
region: Option<TinyStr4>,
variants: Box<[TinyStr8]>,
) -> Self {
Self {
language: language.map(|l| l.into()),
script: script.map(|s| s.into()),
region: region.map(|r| r.into()),
variants: variants.map_or(vec![], |v| {
v.iter()
.map(|v| -> Cow<'static, str> { Cow::Borrowed(v) })
.collect()
}),
language,
script,
region,
variants,
}
}

Expand Down Expand Up @@ -137,11 +142,14 @@ impl LanguageIdentifier {
}

pub fn set_variants(&mut self, variants: &[&str]) -> Result<(), LanguageIdentifierError> {
self.variants.clear();
let mut result = Vec::with_capacity(variants.len());
for variant in variants {
self.variants.push(subtags::parse_variant_subtag(variant)?);
result.push(subtags::parse_variant_subtag(variant)?);
}
self.variants.sort();
result.sort();
result.dedup();

self.variants = result.into_boxed_slice();
Ok(())
}
}
Expand Down Expand Up @@ -169,26 +177,26 @@ impl std::fmt::Display for LanguageIdentifier {
if let Some(region) = self.get_region() {
subtags.push(region);
}
for variant in &self.variants {
for variant in self.variants.iter() {
subtags.push(variant);
}

f.write_str(&subtags.join("-"))
}
}

fn subtag_matches(
subtag1: &Option<Cow<'static, str>>,
subtag2: &Option<Cow<'static, str>>,
fn subtag_matches<P: PartialEq>(
subtag1: &Option<P>,
subtag2: &Option<P>,
as_range1: bool,
as_range2: bool,
) -> bool {
(as_range1 && subtag1.is_none()) || (as_range2 && subtag2.is_none()) || subtag1 == subtag2
}

fn subtags_match(
subtag1: &[Cow<'static, str>],
subtag2: &[Cow<'static, str>],
fn subtags_match<P: PartialEq>(
subtag1: &[P],
subtag2: &[P],
as_range1: bool,
as_range2: bool,
) -> bool {
Expand Down
3 changes: 2 additions & 1 deletion unic-langid-impl/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,12 @@ pub fn parse_language_identifier(t: &str) -> Result<LanguageIdentifier, ParserEr
}

variants.sort();
variants.dedup();

Ok(LanguageIdentifier {
language,
script,
region,
variants,
variants: variants.into_boxed_slice(),
})
}
Loading