icu_locale_core/extensions/unicode/mod.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Unicode Extensions provide information about user preferences in a given locale.
6//!
7//! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and
8//! [`Attributes`].
9//!
10//!
11//! # Examples
12//!
13//! ```
14//! use icu::locale::extensions::unicode::{attribute, key, value, Unicode};
15//! use icu::locale::Locale;
16//!
17//! let loc: Locale = "en-US-u-foobar-hc-h12".parse().expect("Parsing failed.");
18//!
19//! assert_eq!(
20//! loc.extensions.unicode.keywords.get(&key!("hc")),
21//! Some(&value!("h12"))
22//! );
23//! assert!(loc
24//! .extensions
25//! .unicode
26//! .attributes
27//! .contains(&attribute!("foobar")));
28//! ```
29mod attribute;
30mod attributes;
31mod key;
32mod keywords;
33mod subdivision;
34mod value;
35
36use core::cmp::Ordering;
37#[cfg(feature = "alloc")]
38use core::str::FromStr;
39
40#[doc(inline)]
41pub use attribute::{attribute, Attribute};
42pub use attributes::Attributes;
43#[doc(inline)]
44pub use key::{key, Key};
45pub use keywords::Keywords;
46#[doc(inline)]
47pub use subdivision::{subdivision_suffix, SubdivisionId, SubdivisionSuffix};
48#[doc(inline)]
49pub use value::{value, Value};
50
51#[cfg(feature = "alloc")]
52use super::ExtensionType;
53#[cfg(feature = "alloc")]
54use crate::parser::ParseError;
55#[cfg(feature = "alloc")]
56use crate::parser::SubtagIterator;
57
58pub(crate) const UNICODE_EXT_CHAR: char = 'u';
59pub(crate) const UNICODE_EXT_STR: &str = "u";
60
61/// Unicode Extensions provide information about user preferences in a given locale.
62///
63/// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale
64/// Identifier`] specification.
65///
66/// Unicode extensions provide subtags that specify language and/or locale-based behavior
67/// or refinements to language tags, according to work done by the Unicode Consortium.
68/// (See [`RFC 6067`] for details).
69///
70/// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension
71/// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt
72/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
73///
74/// # Examples
75///
76/// ```
77/// use icu::locale::extensions::unicode::{key, value};
78/// use icu::locale::Locale;
79///
80/// let loc: Locale =
81/// "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed.");
82///
83/// assert_eq!(
84/// loc.extensions.unicode.keywords.get(&key!("ca")),
85/// Some(&value!("buddhist"))
86/// );
87/// ```
88#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)]
89#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
90pub struct Unicode {
91 /// The key-value pairs present in this locale extension, with each extension key subtag
92 /// associated to its provided value subtag.
93 pub keywords: Keywords,
94 /// A canonically ordered sequence of single standalone subtags for this locale extension.
95 pub attributes: Attributes,
96}
97
98impl Unicode {
99 /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`.
100 ///
101 /// # Examples
102 ///
103 /// ```
104 /// use icu::locale::extensions::unicode::Unicode;
105 ///
106 /// assert_eq!(Unicode::new(), Unicode::default());
107 /// ```
108 #[inline]
109 pub const fn new() -> Self {
110 Self {
111 keywords: Keywords::new(),
112 attributes: Attributes::new(),
113 }
114 }
115
116 /// A constructor which takes a str slice, parses it and
117 /// produces a well-formed [`Unicode`].
118 ///
119 /// ✨ *Enabled with the `alloc` Cargo feature.*
120 #[inline]
121 #[cfg(feature = "alloc")]
122 pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
123 Self::try_from_utf8(s.as_bytes())
124 }
125
126 /// See [`Self::try_from_str`]
127 ///
128 /// ✨ *Enabled with the `alloc` Cargo feature.*
129 #[cfg(feature = "alloc")]
130 pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
131 let mut iter = SubtagIterator::new(code_units);
132
133 let ext = iter.next().ok_or(ParseError::InvalidExtension)?;
134 if let ExtensionType::Unicode = ExtensionType::try_from_byte_slice(ext)? {
135 return Self::try_from_iter(&mut iter);
136 }
137
138 Err(ParseError::InvalidExtension)
139 }
140
141 /// Returns [`true`] if there list of keywords and attributes is empty.
142 ///
143 /// # Examples
144 ///
145 /// ```
146 /// use icu::locale::Locale;
147 ///
148 /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
149 ///
150 /// assert!(!loc.extensions.unicode.is_empty());
151 /// ```
152 pub fn is_empty(&self) -> bool {
153 self.keywords.is_empty() && self.attributes.is_empty()
154 }
155
156 /// Clears all Unicode extension keywords and attributes, effectively removing
157 /// the Unicode extension.
158 ///
159 /// # Example
160 ///
161 /// ```
162 /// use icu::locale::Locale;
163 ///
164 /// let mut loc: Locale =
165 /// "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap();
166 /// loc.extensions.unicode.clear();
167 /// assert_eq!(loc, "und-t-mul".parse().unwrap());
168 /// ```
169 pub fn clear(&mut self) {
170 self.keywords.clear();
171 self.attributes.clear();
172 }
173
174 pub(crate) fn as_tuple(&self) -> (&Attributes, &Keywords) {
175 (&self.attributes, &self.keywords)
176 }
177
178 /// Returns an ordering suitable for use in [`BTreeSet`].
179 ///
180 /// The ordering may or may not be equivalent to string ordering, and it
181 /// may or may not be stable across ICU4X releases.
182 ///
183 /// [`BTreeSet`]: alloc::collections::BTreeSet
184 pub fn total_cmp(&self, other: &Self) -> Ordering {
185 self.as_tuple().cmp(&other.as_tuple())
186 }
187
188 #[cfg(feature = "alloc")]
189 pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {
190 let attributes = Attributes::try_from_iter(iter)?;
191 let keywords = Keywords::try_from_iter(iter)?;
192
193 // Ensure we've defined at least one attribute or keyword
194 if attributes.is_empty() && keywords.is_empty() {
195 return Err(ParseError::InvalidExtension);
196 }
197
198 Ok(Self {
199 keywords,
200 attributes,
201 })
202 }
203
204 pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
205 where
206 F: FnMut(&str) -> Result<(), E>,
207 {
208 if !self.is_empty() {
209 if with_ext {
210 f(UNICODE_EXT_STR)?;
211 }
212 self.attributes.for_each_subtag_str(f)?;
213 self.keywords.for_each_subtag_str(f)?;
214 }
215 Ok(())
216 }
217
218 /// Extends the `Unicode` with values from another `Unicode`.
219 ///
220 /// # Example
221 ///
222 /// ```
223 /// use icu::locale::extensions::unicode::Unicode;
224 ///
225 /// let mut ue: Unicode = "u-foobar-ca-buddhist".parse().unwrap();
226 /// let ue2: Unicode = "u-ca-gregory-hc-h12".parse().unwrap();
227 ///
228 /// ue.extend(ue2);
229 ///
230 /// assert_eq!(ue, "u-foobar-ca-gregory-hc-h12".parse().unwrap());
231 /// ```
232 #[cfg(feature = "alloc")]
233 pub fn extend(&mut self, other: Unicode) {
234 self.keywords.extend_from_keywords(other.keywords);
235 self.attributes.extend_from_attributes(other.attributes);
236 }
237}
238
239/// ✨ *Enabled with the `alloc` Cargo feature.*
240#[cfg(feature = "alloc")]
241impl FromStr for Unicode {
242 type Err = ParseError;
243
244 #[inline]
245 fn from_str(s: &str) -> Result<Self, Self::Err> {
246 Self::try_from_str(s)
247 }
248}
249
250writeable::impl_display_with_writeable!(Unicode, #[cfg(feature = "alloc")]);
251
252impl writeable::Writeable for Unicode {
253 fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
254 sink.write_char(UNICODE_EXT_CHAR)?;
255
256 if !self.attributes.is_empty() {
257 sink.write_char('-')?;
258 writeable::Writeable::write_to(&self.attributes, sink)?;
259 }
260 if !self.keywords.is_empty() {
261 sink.write_char('-')?;
262 writeable::Writeable::write_to(&self.keywords, sink)?;
263 }
264 Ok(())
265 }
266
267 fn writeable_length_hint(&self) -> writeable::LengthHint {
268 if self.is_empty() {
269 return writeable::LengthHint::exact(0);
270 }
271 let mut result = writeable::LengthHint::exact(1);
272 if !self.attributes.is_empty() {
273 result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1;
274 }
275 if !self.keywords.is_empty() {
276 result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1;
277 }
278 result
279 }
280}
281
282#[cfg(test)]
283mod tests {
284 use super::*;
285
286 #[test]
287 fn test_unicode_extension_fromstr() {
288 let ue: Unicode = "u-foo-hc-h12".parse().expect("Failed to parse Unicode");
289 assert_eq!(ue.to_string(), "u-foo-hc-h12");
290
291 let ue: Result<Unicode, _> = "u".parse();
292 assert!(ue.is_err());
293 }
294}