icu_locale_core/extensions/transform/mod.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Transform Extensions provide information on content transformations in a given locale.
6//!
7//! The main struct for this extension is [`Transform`] which contains [`Fields`] and an
8//! optional [`LanguageIdentifier`].
9//!
10//! [`LanguageIdentifier`]: super::super::LanguageIdentifier
11//!
12//! # Examples
13//!
14//! ```
15//! use icu::locale::extensions::transform::{Fields, Key, Transform, Value};
16//! use icu::locale::{LanguageIdentifier, Locale};
17//!
18//! let mut loc: Locale =
19//! "en-US-t-es-ar-h0-hybrid".parse().expect("Parsing failed.");
20//!
21//! let lang: LanguageIdentifier =
22//! "es-AR".parse().expect("Parsing LanguageIdentifier failed.");
23//!
24//! let key: Key = "h0".parse().expect("Parsing key failed.");
25//! let value: Value = "hybrid".parse().expect("Parsing value failed.");
26//!
27//! assert_eq!(loc.extensions.transform.lang, Some(lang));
28//! assert!(loc.extensions.transform.fields.contains_key(&key));
29//! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
30//!
31//! assert_eq!(&loc.extensions.transform.to_string(), "t-es-ar-h0-hybrid");
32//! ```
33mod fields;
34mod key;
35mod value;
36
37use core::cmp::Ordering;
38#[cfg(feature = "alloc")]
39use core::str::FromStr;
40
41pub use fields::Fields;
42#[doc(inline)]
43pub use key::{key, Key};
44pub use value::Value;
45
46#[cfg(feature = "alloc")]
47use super::ExtensionType;
48#[cfg(feature = "alloc")]
49use crate::parser::SubtagIterator;
50#[cfg(feature = "alloc")]
51use crate::parser::{parse_language_identifier_from_iter, ParseError, ParserMode};
52#[cfg(feature = "alloc")]
53use crate::shortvec::ShortBoxSlice;
54use crate::subtags;
55#[cfg(feature = "alloc")]
56use crate::subtags::Language;
57use crate::LanguageIdentifier;
58#[cfg(feature = "alloc")]
59use litemap::LiteMap;
60
61pub(crate) const TRANSFORM_EXT_CHAR: char = 't';
62pub(crate) const TRANSFORM_EXT_STR: &str = "t";
63
64/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
65/// Identifier`] specification.
66///
67/// Transform extension carries information about source language or script of
68/// transformed content, including content that has been transliterated, transcribed,
69/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details).
70///
71/// # Examples
72///
73/// ```
74/// use icu::locale::extensions::transform::{Key, Value};
75/// use icu::locale::{LanguageIdentifier, Locale};
76///
77/// let mut loc: Locale =
78/// "de-t-en-us-h0-hybrid".parse().expect("Parsing failed.");
79///
80/// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed.");
81///
82/// assert_eq!(loc.extensions.transform.lang, Some(en_us));
83/// let key: Key = "h0".parse().expect("Parsing key failed.");
84/// let value: Value = "hybrid".parse().expect("Parsing value failed.");
85/// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
86/// ```
87/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension
88/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
89/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
90#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)]
91#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
92pub struct Transform {
93 /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present.
94 pub lang: Option<LanguageIdentifier>,
95 /// The key-value pairs present in this locale extension, with each extension key subtag
96 /// associated to its provided value subtag.
97 pub fields: Fields,
98}
99
100impl Transform {
101 /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`.
102 ///
103 /// # Examples
104 ///
105 /// ```
106 /// use icu::locale::extensions::transform::Transform;
107 ///
108 /// assert_eq!(Transform::new(), Transform::default());
109 /// ```
110 #[inline]
111 pub const fn new() -> Self {
112 Self {
113 lang: None,
114 fields: Fields::new(),
115 }
116 }
117
118 /// A constructor which takes a str slice, parses it and
119 /// produces a well-formed [`Transform`].
120 ///
121 /// ✨ *Enabled with the `alloc` Cargo feature.*
122 #[inline]
123 #[cfg(feature = "alloc")]
124 pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
125 Self::try_from_utf8(s.as_bytes())
126 }
127
128 /// See [`Self::try_from_str`]
129 ///
130 /// ✨ *Enabled with the `alloc` Cargo feature.*
131 #[cfg(feature = "alloc")]
132 pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
133 let mut iter = SubtagIterator::new(code_units);
134
135 let ext = iter.next().ok_or(ParseError::InvalidExtension)?;
136 if let ExtensionType::Transform = ExtensionType::try_from_byte_slice(ext)? {
137 return Self::try_from_iter(&mut iter);
138 }
139
140 Err(ParseError::InvalidExtension)
141 }
142
143 /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`.
144 ///
145 /// # Examples
146 ///
147 /// ```
148 /// use icu::locale::Locale;
149 ///
150 /// let mut loc: Locale = "en-US-t-es-ar".parse().expect("Parsing failed.");
151 ///
152 /// assert!(!loc.extensions.transform.is_empty());
153 /// ```
154 pub fn is_empty(&self) -> bool {
155 self.lang.is_none() && self.fields.is_empty()
156 }
157
158 /// Clears the transform extension, effectively removing it from the locale.
159 ///
160 /// # Examples
161 ///
162 /// ```
163 /// use icu::locale::Locale;
164 ///
165 /// let mut loc: Locale = "en-US-t-es-ar".parse().unwrap();
166 /// loc.extensions.transform.clear();
167 /// assert_eq!(loc, "en-US".parse().unwrap());
168 /// ```
169 pub fn clear(&mut self) {
170 self.lang = None;
171 self.fields.clear();
172 }
173
174 #[expect(clippy::type_complexity)]
175 pub(crate) fn as_tuple(
176 &self,
177 ) -> (
178 Option<(
179 subtags::Language,
180 Option<subtags::Script>,
181 Option<subtags::Region>,
182 &subtags::Variants,
183 )>,
184 &Fields,
185 ) {
186 (self.lang.as_ref().map(|l| l.as_tuple()), &self.fields)
187 }
188
189 /// Returns an ordering suitable for use in [`BTreeSet`].
190 ///
191 /// The ordering may or may not be equivalent to string ordering, and it
192 /// may or may not be stable across ICU4X releases.
193 ///
194 /// [`BTreeSet`]: alloc::collections::BTreeSet
195 pub fn total_cmp(&self, other: &Self) -> Ordering {
196 self.as_tuple().cmp(&other.as_tuple())
197 }
198
199 #[cfg(feature = "alloc")]
200 pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {
201 let mut tlang = None;
202 let mut tfields = LiteMap::new();
203
204 if let Some(subtag) = iter.peek() {
205 if Language::try_from_utf8(subtag).is_ok() {
206 tlang = Some(parse_language_identifier_from_iter(
207 iter,
208 ParserMode::Partial,
209 )?);
210 }
211 }
212
213 let mut current_tkey = None;
214 let mut current_tvalue = ShortBoxSlice::new();
215 let mut has_current_tvalue = false;
216
217 while let Some(subtag) = iter.peek() {
218 if let Some(tkey) = current_tkey {
219 if let Ok(val) = Value::parse_subtag(subtag) {
220 has_current_tvalue = true;
221 if let Some(val) = val {
222 current_tvalue.push(val);
223 }
224 } else {
225 if !has_current_tvalue {
226 return Err(ParseError::InvalidExtension);
227 }
228 tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
229 current_tkey = None;
230 current_tvalue = ShortBoxSlice::new();
231 has_current_tvalue = false;
232 continue;
233 }
234 } else if let Ok(tkey) = Key::try_from_utf8(subtag) {
235 current_tkey = Some(tkey);
236 } else {
237 break;
238 }
239
240 iter.next();
241 }
242
243 if let Some(tkey) = current_tkey {
244 if !has_current_tvalue {
245 return Err(ParseError::InvalidExtension);
246 }
247 tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
248 }
249
250 if tlang.is_none() && tfields.is_empty() {
251 Err(ParseError::InvalidExtension)
252 } else {
253 Ok(Self {
254 lang: tlang,
255 fields: tfields.into(),
256 })
257 }
258 }
259
260 pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
261 where
262 F: FnMut(&str) -> Result<(), E>,
263 {
264 if self.is_empty() {
265 return Ok(());
266 }
267 if with_ext {
268 f(TRANSFORM_EXT_STR)?;
269 }
270 if let Some(lang) = &self.lang {
271 lang.for_each_subtag_str_lowercased(f)?;
272 }
273 self.fields.for_each_subtag_str(f)
274 }
275}
276
277/// ✨ *Enabled with the `alloc` Cargo feature.*
278#[cfg(feature = "alloc")]
279impl FromStr for Transform {
280 type Err = ParseError;
281
282 #[inline]
283 fn from_str(s: &str) -> Result<Self, Self::Err> {
284 Self::try_from_str(s)
285 }
286}
287
288writeable::impl_display_with_writeable!(Transform, #[cfg(feature = "alloc")]);
289
290impl writeable::Writeable for Transform {
291 fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
292 if self.is_empty() {
293 return Ok(());
294 }
295 sink.write_char(TRANSFORM_EXT_CHAR)?;
296 if let Some(lang) = &self.lang {
297 sink.write_char('-')?;
298 lang.write_lowercased_to(sink)?;
299 }
300 if !self.fields.is_empty() {
301 sink.write_char('-')?;
302 writeable::Writeable::write_to(&self.fields, sink)?;
303 }
304 Ok(())
305 }
306
307 fn writeable_length_hint(&self) -> writeable::LengthHint {
308 if self.is_empty() {
309 return writeable::LengthHint::exact(0);
310 }
311 let mut result = writeable::LengthHint::exact(1);
312 if let Some(lang) = &self.lang {
313 result += writeable::Writeable::writeable_length_hint(lang) + 1;
314 }
315 if !self.fields.is_empty() {
316 result += writeable::Writeable::writeable_length_hint(&self.fields) + 1;
317 }
318 result
319 }
320}
321
322#[cfg(test)]
323mod tests {
324 use super::*;
325
326 #[test]
327 fn test_transform_extension_fromstr() {
328 let te: Transform = "t-en-us-h0-hybrid"
329 .parse()
330 .expect("Failed to parse Transform");
331 assert_eq!(te.to_string(), "t-en-us-h0-hybrid");
332
333 let te: Result<Transform, _> = "t".parse();
334 assert!(te.is_err());
335 }
336}