icu_properties/props.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! This module defines all available properties.
6//!
7//! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1]
8//! and implement [`EnumeratedProperty`].
9//!
10//! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData),
11//! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData).
12//!
13//! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or
14//! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser),
15//! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort)
16//! can be constructed.
17//!
18//! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums)
19
20pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty};
21
22pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType};
23
24/// See [`test_enumerated_property_completeness`] for usage.
25/// Example input:
26/// ```ignore
27/// impl EastAsianWidth {
28/// pub const Neutral: EastAsianWidth = EastAsianWidth(0);
29/// pub const Ambiguous: EastAsianWidth = EastAsianWidth(1);
30/// ...
31/// }
32/// ```
33/// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by
34/// explicitly casting first field of the struct to u16.
35macro_rules! create_const_array {
36 (
37 $ ( #[$meta:meta] )*
38 impl $enum_ty:ident {
39 $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )*
40 }
41 ) => {
42 $( #[$meta] )*
43 impl $enum_ty {
44 $(
45 $(#[$const_meta])*
46 $v const $i: $t = $e;
47 )*
48
49 /// All possible values of this enum in the Unicode version
50 /// from this ICU4X release.
51 pub const ALL_VALUES: &'static [$enum_ty] = &[
52 $($enum_ty::$i),*
53 ];
54 }
55
56 #[cfg(feature = "datagen")]
57 impl databake::Bake for $enum_ty {
58 fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
59 env.insert("icu_properties");
60 match *self {
61 $(
62 Self::$i => databake::quote!(icu_properties::props::$enum_ty::$i),
63 )*
64 Self(v) => databake::quote!(icu_properties::props::$enum_ty::from_icu4c_value(#v)),
65 }
66 }
67 }
68
69
70 impl From<$enum_ty> for u16 {
71 fn from(other: $enum_ty) -> Self {
72 other.0 as u16
73 }
74 }
75 }
76}
77
78pub use crate::code_point_map::EnumeratedProperty;
79
80macro_rules! make_enumerated_property {
81 (
82 name: $name:literal;
83 short_name: $short_name:literal;
84 ident: $value_ty:path;
85 data_marker: $data_marker:ty;
86 singleton: $singleton:ident;
87 $(ule_ty: $ule_ty:ty;)?
88 ) => {
89 impl crate::private::Sealed for $value_ty {}
90
91 impl EnumeratedProperty for $value_ty {
92 type DataMarker = $data_marker;
93 #[cfg(feature = "compiled_data")]
94 const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> =
95 crate::provider::Baked::$singleton;
96 const NAME: &'static [u8] = $name.as_bytes();
97 const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
98 }
99
100 $(
101 impl zerovec::ule::AsULE for $value_ty {
102 type ULE = $ule_ty;
103
104 fn to_unaligned(self) -> Self::ULE {
105 self.0.to_unaligned()
106 }
107 fn from_unaligned(unaligned: Self::ULE) -> Self {
108 Self(zerovec::ule::AsULE::from_unaligned(unaligned))
109 }
110 }
111 )?
112 };
113}
114
115/// Enumerated property Bidi_Class
116///
117/// These are the categories required by the Unicode Bidirectional Algorithm.
118/// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values).
119/// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9).
120///
121/// # Example
122///
123/// ```
124/// use icu::properties::{props::BidiClass, CodePointMapData};
125///
126/// assert_eq!(
127/// CodePointMapData::<BidiClass>::new().get('y'),
128/// BidiClass::LeftToRight
129/// ); // U+0079
130/// assert_eq!(
131/// CodePointMapData::<BidiClass>::new().get('ع'),
132/// BidiClass::ArabicLetter
133/// ); // U+0639
134/// ```
135#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
136#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
137#[allow(clippy::exhaustive_structs)] // newtype
138#[repr(transparent)]
139pub struct BidiClass(pub(crate) u8);
140
141impl BidiClass {
142 /// Returns an ICU4C `UBidiClass` value.
143 pub const fn to_icu4c_value(self) -> u8 {
144 self.0
145 }
146 /// Constructor from an ICU4C `UBidiClass` value.
147 pub const fn from_icu4c_value(value: u8) -> Self {
148 Self(value)
149 }
150}
151
152create_const_array! {
153#[allow(non_upper_case_globals)]
154impl BidiClass {
155 /// (`L`) any strong left-to-right character
156 pub const LeftToRight: BidiClass = BidiClass(0);
157 /// (`R`) any strong right-to-left (non-Arabic-type) character
158 pub const RightToLeft: BidiClass = BidiClass(1);
159 /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit
160 pub const EuropeanNumber: BidiClass = BidiClass(2);
161 /// (`ES`) plus and minus signs
162 pub const EuropeanSeparator: BidiClass = BidiClass(3);
163 /// (`ET`) a terminator in a numeric format context, includes currency signs
164 pub const EuropeanTerminator: BidiClass = BidiClass(4);
165 /// (`AN`) any Arabic-Indic digit
166 pub const ArabicNumber: BidiClass = BidiClass(5);
167 /// (`CS`) commas, colons, and slashes
168 pub const CommonSeparator: BidiClass = BidiClass(6);
169 /// (`B`) various newline characters
170 pub const ParagraphSeparator: BidiClass = BidiClass(7);
171 /// (`S`) various segment-related control codes
172 pub const SegmentSeparator: BidiClass = BidiClass(8);
173 /// (`WS`) spaces
174 pub const WhiteSpace: BidiClass = BidiClass(9);
175 /// (`ON`) most other symbols and punctuation marks
176 pub const OtherNeutral: BidiClass = BidiClass(10);
177 /// (`LRE`) U+202A: the LR embedding control
178 pub const LeftToRightEmbedding: BidiClass = BidiClass(11);
179 /// (`LRO`) U+202D: the LR override control
180 pub const LeftToRightOverride: BidiClass = BidiClass(12);
181 /// (`AL`) any strong right-to-left (Arabic-type) character
182 pub const ArabicLetter: BidiClass = BidiClass(13);
183 /// (`RLE`) U+202B: the RL embedding control
184 pub const RightToLeftEmbedding: BidiClass = BidiClass(14);
185 /// (`RLO`) U+202E: the RL override control
186 pub const RightToLeftOverride: BidiClass = BidiClass(15);
187 /// (`PDF`) U+202C: terminates an embedding or override control
188 pub const PopDirectionalFormat: BidiClass = BidiClass(16);
189 /// (`NSM`) any nonspacing mark
190 pub const NonspacingMark: BidiClass = BidiClass(17);
191 /// (`BN`) most format characters, control codes, or noncharacters
192 pub const BoundaryNeutral: BidiClass = BidiClass(18);
193 /// (`FSI`) U+2068: the first strong isolate control
194 pub const FirstStrongIsolate: BidiClass = BidiClass(19);
195 /// (`LRI`) U+2066: the LR isolate control
196 pub const LeftToRightIsolate: BidiClass = BidiClass(20);
197 /// (`RLI`) U+2067: the RL isolate control
198 pub const RightToLeftIsolate: BidiClass = BidiClass(21);
199 /// (`PDI`) U+2069: terminates an isolate control
200 pub const PopDirectionalIsolate: BidiClass = BidiClass(22);
201}
202}
203
204make_enumerated_property! {
205 name: "Bidi_Class";
206 short_name: "bc";
207 ident: BidiClass;
208 data_marker: crate::provider::PropertyEnumBidiClassV1;
209 singleton: SINGLETON_PROPERTY_ENUM_BIDI_CLASS_V1;
210 ule_ty: u8;
211}
212
213// This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props
214pub(crate) mod gc {
215 /// Enumerated property General_Category.
216 ///
217 /// General_Category specifies the most general classification of a code point, usually
218 /// determined based on the primary characteristic of the assigned character. For example, is the
219 /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type?
220 ///
221 /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`).
222 /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`](
223 /// crate::props::GeneralCategoryGroup).
224 ///
225 /// # Example
226 ///
227 /// ```
228 /// use icu::properties::{props::GeneralCategory, CodePointMapData};
229 ///
230 /// assert_eq!(
231 /// CodePointMapData::<GeneralCategory>::new().get('木'),
232 /// GeneralCategory::OtherLetter
233 /// ); // U+6728
234 /// assert_eq!(
235 /// CodePointMapData::<GeneralCategory>::new().get('🎃'),
236 /// GeneralCategory::OtherSymbol
237 /// ); // U+1F383 JACK-O-LANTERN
238 /// ```
239 #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
240 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
241 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
242 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
243 #[allow(clippy::exhaustive_enums)] // this type is stable
244 #[zerovec::make_ule(GeneralCategoryULE)]
245 #[cfg_attr(not(feature = "alloc"), zerovec::skip_derive(ZeroMapKV))]
246 #[repr(u8)]
247 pub enum GeneralCategory {
248 /// (`Cn`) A reserved unassigned code point or a noncharacter
249 Unassigned = 0,
250
251 /// (`Lu`) An uppercase letter
252 UppercaseLetter = 1,
253 /// (`Ll`) A lowercase letter
254 LowercaseLetter = 2,
255 /// (`Lt`) A digraphic letter, with first part uppercase
256 TitlecaseLetter = 3,
257 /// (`Lm`) A modifier letter
258 ModifierLetter = 4,
259 /// (`Lo`) Other letters, including syllables and ideographs
260 OtherLetter = 5,
261
262 /// (`Mn`) A nonspacing combining mark (zero advance width)
263 NonspacingMark = 6,
264 /// (`Mc`) A spacing combining mark (positive advance width)
265 SpacingMark = 8,
266 /// (`Me`) An enclosing combining mark
267 EnclosingMark = 7,
268
269 /// (`Nd`) A decimal digit
270 DecimalNumber = 9,
271 /// (`Nl`) A letterlike numeric character
272 LetterNumber = 10,
273 /// (`No`) A numeric character of other type
274 OtherNumber = 11,
275
276 /// (`Zs`) A space character (of various non-zero widths)
277 SpaceSeparator = 12,
278 /// (`Zl`) U+2028 LINE SEPARATOR only
279 LineSeparator = 13,
280 /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
281 ParagraphSeparator = 14,
282
283 /// (`Cc`) A C0 or C1 control code
284 Control = 15,
285 /// (`Cf`) A format control character
286 Format = 16,
287 /// (`Co`) A private-use character
288 PrivateUse = 17,
289 /// (`Cs`) A surrogate code point
290 Surrogate = 18,
291
292 /// (`Pd`) A dash or hyphen punctuation mark
293 DashPunctuation = 19,
294 /// (`Ps`) An opening punctuation mark (of a pair)
295 OpenPunctuation = 20,
296 /// (`Pe`) A closing punctuation mark (of a pair)
297 ClosePunctuation = 21,
298 /// (`Pc`) A connecting punctuation mark, like a tie
299 ConnectorPunctuation = 22,
300 /// (`Pi`) An initial quotation mark
301 InitialPunctuation = 28,
302 /// (`Pf`) A final quotation mark
303 FinalPunctuation = 29,
304 /// (`Po`) A punctuation mark of other type
305 OtherPunctuation = 23,
306
307 /// (`Sm`) A symbol of mathematical use
308 MathSymbol = 24,
309 /// (`Sc`) A currency sign
310 CurrencySymbol = 25,
311 /// (`Sk`) A non-letterlike modifier symbol
312 ModifierSymbol = 26,
313 /// (`So`) A symbol of other type
314 OtherSymbol = 27,
315 }
316}
317
318pub use gc::GeneralCategory;
319
320impl GeneralCategory {
321 /// All possible values of this enum
322 pub const ALL_VALUES: &'static [GeneralCategory] = &[
323 GeneralCategory::Unassigned,
324 GeneralCategory::UppercaseLetter,
325 GeneralCategory::LowercaseLetter,
326 GeneralCategory::TitlecaseLetter,
327 GeneralCategory::ModifierLetter,
328 GeneralCategory::OtherLetter,
329 GeneralCategory::NonspacingMark,
330 GeneralCategory::SpacingMark,
331 GeneralCategory::EnclosingMark,
332 GeneralCategory::DecimalNumber,
333 GeneralCategory::LetterNumber,
334 GeneralCategory::OtherNumber,
335 GeneralCategory::SpaceSeparator,
336 GeneralCategory::LineSeparator,
337 GeneralCategory::ParagraphSeparator,
338 GeneralCategory::Control,
339 GeneralCategory::Format,
340 GeneralCategory::PrivateUse,
341 GeneralCategory::Surrogate,
342 GeneralCategory::DashPunctuation,
343 GeneralCategory::OpenPunctuation,
344 GeneralCategory::ClosePunctuation,
345 GeneralCategory::ConnectorPunctuation,
346 GeneralCategory::InitialPunctuation,
347 GeneralCategory::FinalPunctuation,
348 GeneralCategory::OtherPunctuation,
349 GeneralCategory::MathSymbol,
350 GeneralCategory::CurrencySymbol,
351 GeneralCategory::ModifierSymbol,
352 GeneralCategory::OtherSymbol,
353 ];
354}
355
356#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
357/// Error value for `impl TryFrom<u8> for GeneralCategory`.
358#[non_exhaustive]
359pub struct GeneralCategoryOutOfBoundsError;
360
361impl TryFrom<u8> for GeneralCategory {
362 type Error = GeneralCategoryOutOfBoundsError;
363 /// Construct this [`GeneralCategory`] from an integer, returning
364 /// an error if it is out of bounds
365 fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> {
366 GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError)
367 }
368}
369
370make_enumerated_property! {
371 name: "General_Category";
372 short_name: "gc";
373 ident: GeneralCategory;
374 data_marker: crate::provider::PropertyEnumGeneralCategoryV1;
375 singleton: SINGLETON_PROPERTY_ENUM_GENERAL_CATEGORY_V1;
376}
377
378/// Groupings of multiple General_Category property values.
379///
380/// Instances of `GeneralCategoryGroup` represent the defined multi-category
381/// values that are useful for users in certain contexts, such as regex. In
382/// other words, unlike [`GeneralCategory`], this supports groups of general
383/// categories: for example, `Letter` /// is the union of `UppercaseLetter`,
384/// `LowercaseLetter`, etc.
385///
386/// See <https://www.unicode.org/reports/tr44/> .
387///
388/// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C.
389/// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter`
390/// is the union of `UppercaseLetter`, `LowercaseLetter`, etc.
391///
392/// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C.
393#[derive(Copy, Clone, PartialEq, Debug, Eq)]
394#[allow(clippy::exhaustive_structs)] // newtype
395#[repr(transparent)]
396pub struct GeneralCategoryGroup(pub(crate) u32);
397
398impl crate::private::Sealed for GeneralCategoryGroup {}
399
400use GeneralCategory as GC;
401use GeneralCategoryGroup as GCG;
402
403#[allow(non_upper_case_globals)]
404impl GeneralCategoryGroup {
405 /// (`Lu`) An uppercase letter
406 pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32));
407 /// (`Ll`) A lowercase letter
408 pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32));
409 /// (`Lt`) A digraphic letter, with first part uppercase
410 pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32));
411 /// (`Lm`) A modifier letter
412 pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32));
413 /// (`Lo`) Other letters, including syllables and ideographs
414 pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32));
415 /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter
416 pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
417 | (1 << (GC::LowercaseLetter as u32))
418 | (1 << (GC::TitlecaseLetter as u32)));
419 /// (`L`) The union of all letter categories
420 pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
421 | (1 << (GC::LowercaseLetter as u32))
422 | (1 << (GC::TitlecaseLetter as u32))
423 | (1 << (GC::ModifierLetter as u32))
424 | (1 << (GC::OtherLetter as u32)));
425
426 /// (`Mn`) A nonspacing combining mark (zero advance width)
427 pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32));
428 /// (`Mc`) A spacing combining mark (positive advance width)
429 pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32));
430 /// (`Me`) An enclosing combining mark
431 pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32));
432 /// (`M`) The union of all mark categories
433 pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32))
434 | (1 << (GC::EnclosingMark as u32))
435 | (1 << (GC::SpacingMark as u32)));
436
437 /// (`Nd`) A decimal digit
438 pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32));
439 /// (`Nl`) A letterlike numeric character
440 pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32));
441 /// (`No`) A numeric character of other type
442 pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32));
443 /// (`N`) The union of all number categories
444 pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32))
445 | (1 << (GC::LetterNumber as u32))
446 | (1 << (GC::OtherNumber as u32)));
447
448 /// (`Zs`) A space character (of various non-zero widths)
449 pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32));
450 /// (`Zl`) U+2028 LINE SEPARATOR only
451 pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32));
452 /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
453 pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32));
454 /// (`Z`) The union of all separator categories
455 pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32))
456 | (1 << (GC::LineSeparator as u32))
457 | (1 << (GC::ParagraphSeparator as u32)));
458
459 /// (`Cc`) A C0 or C1 control code
460 pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32));
461 /// (`Cf`) A format control character
462 pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32));
463 /// (`Co`) A private-use character
464 pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32));
465 /// (`Cs`) A surrogate code point
466 pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32));
467 /// (`Cn`) A reserved unassigned code point or a noncharacter
468 pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32));
469 /// (`C`) The union of all control code, reserved, and unassigned categories
470 pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32))
471 | (1 << (GC::Format as u32))
472 | (1 << (GC::PrivateUse as u32))
473 | (1 << (GC::Surrogate as u32))
474 | (1 << (GC::Unassigned as u32)));
475
476 /// (`Pd`) A dash or hyphen punctuation mark
477 pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32));
478 /// (`Ps`) An opening punctuation mark (of a pair)
479 pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32));
480 /// (`Pe`) A closing punctuation mark (of a pair)
481 pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32));
482 /// (`Pc`) A connecting punctuation mark, like a tie
483 pub const ConnectorPunctuation: GeneralCategoryGroup =
484 GCG(1 << (GC::ConnectorPunctuation as u32));
485 /// (`Pi`) An initial quotation mark
486 pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32));
487 /// (`Pf`) A final quotation mark
488 pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32));
489 /// (`Po`) A punctuation mark of other type
490 pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32));
491 /// (`P`) The union of all punctuation categories
492 pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32))
493 | (1 << (GC::OpenPunctuation as u32))
494 | (1 << (GC::ClosePunctuation as u32))
495 | (1 << (GC::ConnectorPunctuation as u32))
496 | (1 << (GC::OtherPunctuation as u32))
497 | (1 << (GC::InitialPunctuation as u32))
498 | (1 << (GC::FinalPunctuation as u32)));
499
500 /// (`Sm`) A symbol of mathematical use
501 pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32));
502 /// (`Sc`) A currency sign
503 pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32));
504 /// (`Sk`) A non-letterlike modifier symbol
505 pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32));
506 /// (`So`) A symbol of other type
507 pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32));
508 /// (`S`) The union of all symbol categories
509 pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32))
510 | (1 << (GC::CurrencySymbol as u32))
511 | (1 << (GC::ModifierSymbol as u32))
512 | (1 << (GC::OtherSymbol as u32)));
513
514 const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1;
515
516 /// Return whether the code point belongs in the provided multi-value category.
517 ///
518 /// ```
519 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
520 /// use icu::properties::CodePointMapData;
521 ///
522 /// let gc = CodePointMapData::<GeneralCategory>::new();
523 ///
524 /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
525 /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));
526 ///
527 /// // U+0B1E ORIYA LETTER NYA
528 /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
529 /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
530 /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));
531 ///
532 /// // U+0301 COMBINING ACUTE ACCENT
533 /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark);
534 /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}')));
535 /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}')));
536 ///
537 /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
538 /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
539 /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));
540 ///
541 /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
542 /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
543 /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));
544 ///
545 /// // U+2713 CHECK MARK
546 /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
547 /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
548 /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));
549 ///
550 /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
551 /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
552 /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));
553 ///
554 /// // U+E007F CANCEL TAG
555 /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format);
556 /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}')));
557 /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}')));
558 /// ```
559 pub const fn contains(self, val: GeneralCategory) -> bool {
560 0 != (1 << (val as u32)) & self.0
561 }
562
563 /// Produce a GeneralCategoryGroup that is the inverse of this one
564 ///
565 /// # Example
566 ///
567 /// ```rust
568 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
569 ///
570 /// let letter = GeneralCategoryGroup::Letter;
571 /// let not_letter = letter.complement();
572 ///
573 /// assert!(not_letter.contains(GeneralCategory::MathSymbol));
574 /// assert!(!letter.contains(GeneralCategory::MathSymbol));
575 /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
576 /// assert!(!letter.contains(GeneralCategory::OtherPunctuation));
577 /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
578 /// assert!(letter.contains(GeneralCategory::UppercaseLetter));
579 /// ```
580 pub const fn complement(self) -> Self {
581 // Mask off things not in Self::ALL to guarantee the mask
582 // values stay in-range
583 GeneralCategoryGroup(!self.0 & Self::ALL)
584 }
585
586 /// Return the group representing all GeneralCategory values
587 ///
588 /// # Example
589 ///
590 /// ```rust
591 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
592 ///
593 /// let all = GeneralCategoryGroup::all();
594 ///
595 /// assert!(all.contains(GeneralCategory::MathSymbol));
596 /// assert!(all.contains(GeneralCategory::OtherPunctuation));
597 /// assert!(all.contains(GeneralCategory::UppercaseLetter));
598 /// ```
599 pub const fn all() -> Self {
600 Self(Self::ALL)
601 }
602
603 /// Return the empty group
604 ///
605 /// # Example
606 ///
607 /// ```rust
608 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
609 ///
610 /// let empty = GeneralCategoryGroup::empty();
611 ///
612 /// assert!(!empty.contains(GeneralCategory::MathSymbol));
613 /// assert!(!empty.contains(GeneralCategory::OtherPunctuation));
614 /// assert!(!empty.contains(GeneralCategory::UppercaseLetter));
615 /// ```
616 pub const fn empty() -> Self {
617 Self(0)
618 }
619
620 /// Take the union of two groups
621 ///
622 /// # Example
623 ///
624 /// ```rust
625 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
626 ///
627 /// let letter = GeneralCategoryGroup::Letter;
628 /// let symbol = GeneralCategoryGroup::Symbol;
629 /// let union = letter.union(symbol);
630 ///
631 /// assert!(union.contains(GeneralCategory::MathSymbol));
632 /// assert!(!union.contains(GeneralCategory::OtherPunctuation));
633 /// assert!(union.contains(GeneralCategory::UppercaseLetter));
634 /// ```
635 pub const fn union(self, other: Self) -> Self {
636 Self(self.0 | other.0)
637 }
638
639 /// Take the intersection of two groups
640 ///
641 /// # Example
642 ///
643 /// ```rust
644 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
645 ///
646 /// let letter = GeneralCategoryGroup::Letter;
647 /// let lu = GeneralCategoryGroup::UppercaseLetter;
648 /// let intersection = letter.intersection(lu);
649 ///
650 /// assert!(!intersection.contains(GeneralCategory::MathSymbol));
651 /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
652 /// assert!(intersection.contains(GeneralCategory::UppercaseLetter));
653 /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
654 /// ```
655 pub const fn intersection(self, other: Self) -> Self {
656 Self(self.0 & other.0)
657 }
658}
659
660impl From<GeneralCategory> for GeneralCategoryGroup {
661 fn from(subcategory: GeneralCategory) -> Self {
662 GeneralCategoryGroup(1 << (subcategory as u32))
663 }
664}
665impl From<u32> for GeneralCategoryGroup {
666 fn from(mask: u32) -> Self {
667 // Mask off things not in Self::ALL to guarantee the mask
668 // values stay in-range
669 GeneralCategoryGroup(mask & Self::ALL)
670 }
671}
672impl From<GeneralCategoryGroup> for u32 {
673 fn from(group: GeneralCategoryGroup) -> Self {
674 group.0
675 }
676}
677
678/// Enumerated property Script.
679///
680/// This is used with both the Script and Script_Extensions Unicode properties.
681/// Each character is assigned a single Script, but characters that are used in
682/// a particular subset of scripts will be in more than one Script_Extensions set.
683/// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the
684/// Script_Extensions set for Dogra, Kaithi, and Mahajani. If you are trying to
685/// determine whether a code point belongs to a certain script, you should use
686/// [`ScriptWithExtensionsBorrowed::has_script`].
687///
688/// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>.
689/// See `UScriptCode` in ICU4C.
690///
691/// # Example
692///
693/// ```
694/// use icu::properties::{CodePointMapData, props::Script};
695///
696/// assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han); // U+6728
697/// assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common); // U+1F383 JACK-O-LANTERN
698/// ```
699/// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script
700#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
701#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
702#[allow(clippy::exhaustive_structs)] // newtype
703#[repr(transparent)]
704pub struct Script(pub(crate) u16);
705
706impl Script {
707 /// Returns an ICU4C `UScriptCode` value.
708 pub const fn to_icu4c_value(self) -> u16 {
709 self.0
710 }
711 /// Constructor from an ICU4C `UScriptCode` value.
712 pub const fn from_icu4c_value(value: u16) -> Self {
713 Self(value)
714 }
715}
716
717create_const_array! {
718#[allow(missing_docs)] // These constants don't need individual documentation.
719#[allow(non_upper_case_globals)]
720impl Script {
721 pub const Adlam: Script = Script(167);
722 pub const Ahom: Script = Script(161);
723 pub const AnatolianHieroglyphs: Script = Script(156);
724 pub const Arabic: Script = Script(2);
725 pub const Armenian: Script = Script(3);
726 pub const Avestan: Script = Script(117);
727 pub const Balinese: Script = Script(62);
728 pub const Bamum: Script = Script(130);
729 pub const BassaVah: Script = Script(134);
730 pub const Batak: Script = Script(63);
731 pub const Bengali: Script = Script(4);
732 pub const BeriaErfe: Script = Script(208);
733 pub const Bhaiksuki: Script = Script(168);
734 pub const Bopomofo: Script = Script(5);
735 pub const Brahmi: Script = Script(65);
736 pub const Braille: Script = Script(46);
737 pub const Buginese: Script = Script(55);
738 pub const Buhid: Script = Script(44);
739 pub const CanadianAboriginal: Script = Script(40);
740 pub const Carian: Script = Script(104);
741 pub const CaucasianAlbanian: Script = Script(159);
742 pub const Chakma: Script = Script(118);
743 pub const Cham: Script = Script(66);
744 pub const Cherokee: Script = Script(6);
745 pub const Chisoi: Script = Script(209);
746 pub const Chorasmian: Script = Script(189);
747 pub const Common: Script = Script(0);
748 pub const Coptic: Script = Script(7);
749 pub const Cuneiform: Script = Script(101);
750 pub const Cypriot: Script = Script(47);
751 pub const CyproMinoan: Script = Script(193);
752 pub const Cyrillic: Script = Script(8);
753 pub const Deseret: Script = Script(9);
754 pub const Devanagari: Script = Script(10);
755 pub const DivesAkuru: Script = Script(190);
756 pub const Dogra: Script = Script(178);
757 pub const Duployan: Script = Script(135);
758 pub const EgyptianHieroglyphs: Script = Script(71);
759 pub const Elbasan: Script = Script(136);
760 pub const Elymaic: Script = Script(185);
761 pub const Ethiopian: Script = Script(11);
762 pub const Georgian: Script = Script(12);
763 pub const Glagolitic: Script = Script(56);
764 pub const Gothic: Script = Script(13);
765 pub const Grantha: Script = Script(137);
766 pub const Greek: Script = Script(14);
767 pub const Gujarati: Script = Script(15);
768 pub const GunjalaGondi: Script = Script(179);
769 pub const Gurmukhi: Script = Script(16);
770 pub const Han: Script = Script(17);
771 pub const Hangul: Script = Script(18);
772 pub const HanifiRohingya: Script = Script(182);
773 pub const Hanunoo: Script = Script(43);
774 pub const Hatran: Script = Script(162);
775 pub const Hebrew: Script = Script(19);
776 pub const Hiragana: Script = Script(20);
777 pub const ImperialAramaic: Script = Script(116);
778 pub const Inherited: Script = Script(1);
779 pub const InscriptionalPahlavi: Script = Script(122);
780 pub const InscriptionalParthian: Script = Script(125);
781 pub const Javanese: Script = Script(78);
782 pub const Kaithi: Script = Script(120);
783 pub const Kannada: Script = Script(21);
784 pub const Katakana: Script = Script(22);
785 pub const Kawi: Script = Script(198);
786 pub const KayahLi: Script = Script(79);
787 pub const Kharoshthi: Script = Script(57);
788 pub const KhitanSmallScript: Script = Script(191);
789 pub const Khmer: Script = Script(23);
790 pub const Khojki: Script = Script(157);
791 pub const Khudawadi: Script = Script(145);
792 pub const Lao: Script = Script(24);
793 pub const Latin: Script = Script(25);
794 pub const Lepcha: Script = Script(82);
795 pub const Limbu: Script = Script(48);
796 pub const LinearA: Script = Script(83);
797 pub const LinearB: Script = Script(49);
798 pub const Lisu: Script = Script(131);
799 pub const Lycian: Script = Script(107);
800 pub const Lydian: Script = Script(108);
801 pub const Mahajani: Script = Script(160);
802 pub const Makasar: Script = Script(180);
803 pub const Malayalam: Script = Script(26);
804 pub const Mandaic: Script = Script(84);
805 pub const Manichaean: Script = Script(121);
806 pub const Marchen: Script = Script(169);
807 pub const MasaramGondi: Script = Script(175);
808 pub const Medefaidrin: Script = Script(181);
809 pub const MeeteiMayek: Script = Script(115);
810 pub const MendeKikakui: Script = Script(140);
811 pub const MeroiticCursive: Script = Script(141);
812 pub const MeroiticHieroglyphs: Script = Script(86);
813 pub const Miao: Script = Script(92);
814 pub const Modi: Script = Script(163);
815 pub const Mongolian: Script = Script(27);
816 pub const Mro: Script = Script(149);
817 pub const Multani: Script = Script(164);
818 pub const Myanmar: Script = Script(28);
819 pub const Nabataean: Script = Script(143);
820 pub const NagMundari: Script = Script(199);
821 pub const Nandinagari: Script = Script(187);
822 pub const Nastaliq: Script = Script(200);
823 pub const Newa: Script = Script(170);
824 pub const NewTaiLue: Script = Script(59);
825 pub const Nko: Script = Script(87);
826 pub const Nushu: Script = Script(150);
827 pub const NyiakengPuachueHmong: Script = Script(186);
828 pub const Ogham: Script = Script(29);
829 pub const OlChiki: Script = Script(109);
830 pub const OldHungarian: Script = Script(76);
831 pub const OldItalic: Script = Script(30);
832 pub const OldNorthArabian: Script = Script(142);
833 pub const OldPermic: Script = Script(89);
834 pub const OldPersian: Script = Script(61);
835 pub const OldSogdian: Script = Script(184);
836 pub const OldSouthArabian: Script = Script(133);
837 pub const OldTurkic: Script = Script(88);
838 pub const OldUyghur: Script = Script(194);
839 pub const Oriya: Script = Script(31);
840 pub const Osage: Script = Script(171);
841 pub const Osmanya: Script = Script(50);
842 pub const PahawhHmong: Script = Script(75);
843 pub const Palmyrene: Script = Script(144);
844 pub const PauCinHau: Script = Script(165);
845 pub const PhagsPa: Script = Script(90);
846 pub const Phoenician: Script = Script(91);
847 pub const PsalterPahlavi: Script = Script(123);
848 pub const Rejang: Script = Script(110);
849 pub const Runic: Script = Script(32);
850 pub const Samaritan: Script = Script(126);
851 pub const Saurashtra: Script = Script(111);
852 pub const Sharada: Script = Script(151);
853 pub const Shavian: Script = Script(51);
854 pub const Siddham: Script = Script(166);
855 pub const Sidetic: Script = Script(210);
856 pub const SignWriting: Script = Script(112);
857 pub const Sinhala: Script = Script(33);
858 pub const Sogdian: Script = Script(183);
859 pub const SoraSompeng: Script = Script(152);
860 pub const Soyombo: Script = Script(176);
861 pub const Sundanese: Script = Script(113);
862 pub const SylotiNagri: Script = Script(58);
863 pub const Syriac: Script = Script(34);
864 pub const Tagalog: Script = Script(42);
865 pub const Tagbanwa: Script = Script(45);
866 pub const TaiLe: Script = Script(52);
867 pub const TaiTham: Script = Script(106);
868 pub const TaiViet: Script = Script(127);
869 pub const TaiYo: Script = Script(211);
870 pub const Takri: Script = Script(153);
871 pub const Tamil: Script = Script(35);
872 pub const Tangsa: Script = Script(195);
873 pub const Tangut: Script = Script(154);
874 pub const Telugu: Script = Script(36);
875 pub const Thaana: Script = Script(37);
876 pub const Thai: Script = Script(38);
877 pub const Tibetan: Script = Script(39);
878 pub const Tifinagh: Script = Script(60);
879 pub const Tirhuta: Script = Script(158);
880 pub const TolongSiki: Script = Script(212);
881 pub const Toto: Script = Script(196);
882 pub const Ugaritic: Script = Script(53);
883 pub const Unknown: Script = Script(103);
884 pub const Vai: Script = Script(99);
885 pub const Vithkuqi: Script = Script(197);
886 pub const Wancho: Script = Script(188);
887 pub const WarangCiti: Script = Script(146);
888 pub const Yezidi: Script = Script(192);
889 pub const Yi: Script = Script(41);
890 pub const ZanabazarSquare: Script = Script(177);
891}
892}
893
894make_enumerated_property! {
895 name: "Script";
896 short_name: "sc";
897 ident: Script;
898 data_marker: crate::provider::PropertyEnumScriptV1;
899 singleton: SINGLETON_PROPERTY_ENUM_SCRIPT_V1;
900 ule_ty: <u16 as zerovec::ule::AsULE>::ULE;
901}
902
903/// Enumerated property Hangul_Syllable_Type
904///
905/// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose
906/// arbitrary Hangul syllables. This property provides that ontology of Hangul code points.
907///
908/// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html).
909///
910/// # Example
911///
912/// ```
913/// use icu::properties::{props::HangulSyllableType, CodePointMapData};
914///
915/// assert_eq!(
916/// CodePointMapData::<HangulSyllableType>::new().get('ᄀ'),
917/// HangulSyllableType::LeadingJamo
918/// ); // U+1100
919/// assert_eq!(
920/// CodePointMapData::<HangulSyllableType>::new().get('가'),
921/// HangulSyllableType::LeadingVowelSyllable
922/// ); // U+AC00
923/// ```
924#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
925#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
926#[allow(clippy::exhaustive_structs)] // newtype
927#[repr(transparent)]
928pub struct HangulSyllableType(pub(crate) u8);
929
930impl HangulSyllableType {
931 /// Returns an ICU4C `UHangulSyllableType` value.
932 pub const fn to_icu4c_value(self) -> u8 {
933 self.0
934 }
935 /// Constructor from an ICU4C `UHangulSyllableType` value.
936 pub const fn from_icu4c_value(value: u8) -> Self {
937 Self(value)
938 }
939}
940
941create_const_array! {
942#[allow(non_upper_case_globals)]
943impl HangulSyllableType {
944 /// (`NA`) not applicable (e.g. not a Hangul code point).
945 pub const NotApplicable: HangulSyllableType = HangulSyllableType(0);
946 /// (`L`) a conjoining leading consonant Jamo.
947 pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1);
948 /// (`V`) a conjoining vowel Jamo.
949 pub const VowelJamo: HangulSyllableType = HangulSyllableType(2);
950 /// (`T`) a conjoining trailing consonant Jamo.
951 pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3);
952 /// (`LV`) a precomposed syllable with a leading consonant and a vowel.
953 pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4);
954 /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant.
955 pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5);
956}
957}
958
959make_enumerated_property! {
960 name: "Hangul_Syllable_Type";
961 short_name: "hst";
962 ident: HangulSyllableType;
963 data_marker: crate::provider::PropertyEnumHangulSyllableTypeV1;
964 singleton: SINGLETON_PROPERTY_ENUM_HANGUL_SYLLABLE_TYPE_V1;
965 ule_ty: u8;
966
967}
968
969/// Enumerated property East_Asian_Width.
970///
971/// See "Definition" in UAX #11 for the summary of each property value:
972/// <https://www.unicode.org/reports/tr11/#Definitions>
973///
974/// # Example
975///
976/// ```
977/// use icu::properties::{props::EastAsianWidth, CodePointMapData};
978///
979/// assert_eq!(
980/// CodePointMapData::<EastAsianWidth>::new().get('ア'),
981/// EastAsianWidth::Halfwidth
982/// ); // U+FF71: Halfwidth Katakana Letter A
983/// assert_eq!(
984/// CodePointMapData::<EastAsianWidth>::new().get('ア'),
985/// EastAsianWidth::Wide
986/// ); //U+30A2: Katakana Letter A
987/// ```
988#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
989#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
990#[allow(clippy::exhaustive_structs)] // newtype
991#[repr(transparent)]
992pub struct EastAsianWidth(pub(crate) u8);
993
994impl EastAsianWidth {
995 /// Returns an ICU4C `UEastAsianWidth` value.
996 pub const fn to_icu4c_value(self) -> u8 {
997 self.0
998 }
999 /// Constructor from an ICU4C `UEastAsianWidth` value.
1000 pub const fn from_icu4c_value(value: u8) -> Self {
1001 Self(value)
1002 }
1003}
1004
1005create_const_array! {
1006#[allow(missing_docs)] // These constants don't need individual documentation.
1007#[allow(non_upper_case_globals)]
1008impl EastAsianWidth {
1009 pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N"
1010 pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A"
1011 pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H"
1012 pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F"
1013 pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na"
1014 pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W"
1015}
1016}
1017
1018make_enumerated_property! {
1019 name: "East_Asian_Width";
1020 short_name: "ea";
1021 ident: EastAsianWidth;
1022 data_marker: crate::provider::PropertyEnumEastAsianWidthV1;
1023 singleton: SINGLETON_PROPERTY_ENUM_EAST_ASIAN_WIDTH_V1;
1024 ule_ty: u8;
1025}
1026
1027/// Enumerated property Line_Break.
1028///
1029/// See "Line Breaking Properties" in UAX #14 for the summary of each property
1030/// value: <https://www.unicode.org/reports/tr14/#Properties>
1031///
1032/// The numeric value is compatible with `ULineBreak` in ICU4C.
1033///
1034/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1035///
1036/// # Example
1037///
1038/// ```
1039/// use icu::properties::{props::LineBreak, CodePointMapData};
1040///
1041/// assert_eq!(
1042/// CodePointMapData::<LineBreak>::new().get(')'),
1043/// LineBreak::CloseParenthesis
1044/// ); // U+0029: Right Parenthesis
1045/// assert_eq!(
1046/// CodePointMapData::<LineBreak>::new().get('ぁ'),
1047/// LineBreak::ConditionalJapaneseStarter
1048/// ); //U+3041: Hiragana Letter Small A
1049/// ```
1050#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1051#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1052#[allow(clippy::exhaustive_structs)] // newtype
1053#[repr(transparent)]
1054pub struct LineBreak(pub(crate) u8);
1055
1056impl LineBreak {
1057 /// Returns an ICU4C `ULineBreak` value.
1058 pub const fn to_icu4c_value(self) -> u8 {
1059 self.0
1060 }
1061 /// Constructor from an ICU4C `ULineBreak` value.
1062 pub const fn from_icu4c_value(value: u8) -> Self {
1063 Self(value)
1064 }
1065}
1066
1067create_const_array! {
1068#[allow(missing_docs)] // These constants don't need individual documentation.
1069#[allow(non_upper_case_globals)]
1070impl LineBreak {
1071 pub const Unknown: LineBreak = LineBreak(0); // name="XX"
1072 pub const Ambiguous: LineBreak = LineBreak(1); // name="AI"
1073 pub const Alphabetic: LineBreak = LineBreak(2); // name="AL"
1074 pub const BreakBoth: LineBreak = LineBreak(3); // name="B2"
1075 pub const BreakAfter: LineBreak = LineBreak(4); // name="BA"
1076 pub const BreakBefore: LineBreak = LineBreak(5); // name="BB"
1077 pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK"
1078 pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB"
1079 pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL"
1080 pub const CombiningMark: LineBreak = LineBreak(9); // name="CM"
1081 pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR"
1082 pub const Exclamation: LineBreak = LineBreak(11); // name="EX"
1083 pub const Glue: LineBreak = LineBreak(12); // name="GL"
1084 pub const Hyphen: LineBreak = LineBreak(13); // name="HY"
1085 pub const Ideographic: LineBreak = LineBreak(14); // name="ID"
1086 pub const Inseparable: LineBreak = LineBreak(15); // name="IN"
1087 pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS"
1088 pub const LineFeed: LineBreak = LineBreak(17); // name="LF"
1089 pub const Nonstarter: LineBreak = LineBreak(18); // name="NS"
1090 pub const Numeric: LineBreak = LineBreak(19); // name="NU"
1091 pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP"
1092 pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO"
1093 pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR"
1094 pub const Quotation: LineBreak = LineBreak(23); // name="QU"
1095 pub const ComplexContext: LineBreak = LineBreak(24); // name="SA"
1096 pub const Surrogate: LineBreak = LineBreak(25); // name="SG"
1097 pub const Space: LineBreak = LineBreak(26); // name="SP"
1098 pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY"
1099 pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW"
1100 pub const NextLine: LineBreak = LineBreak(29); // name="NL"
1101 pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ"
1102 pub const H2: LineBreak = LineBreak(31); // name="H2"
1103 pub const H3: LineBreak = LineBreak(32); // name="H3"
1104 pub const JL: LineBreak = LineBreak(33); // name="JL"
1105 pub const JT: LineBreak = LineBreak(34); // name="JT"
1106 pub const JV: LineBreak = LineBreak(35); // name="JV"
1107 pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP"
1108 pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ"
1109 pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL"
1110 pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI"
1111 pub const EBase: LineBreak = LineBreak(40); // name="EB"
1112 pub const EModifier: LineBreak = LineBreak(41); // name="EM"
1113 pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ"
1114
1115 // Added in ICU 74:
1116 pub const Aksara: LineBreak = LineBreak(43); // name="AK"
1117 pub const AksaraPrebase: LineBreak = LineBreak(44); // name="AP"
1118 pub const AksaraStart: LineBreak = LineBreak(45); // name="AS"
1119 pub const ViramaFinal: LineBreak = LineBreak(46); // name="VF"
1120 pub const Virama: LineBreak = LineBreak(47); // name="VI"
1121
1122 // Added in ICU 78:
1123 pub const UnambiguousHyphen: LineBreak = LineBreak(48); // name="HH"
1124}
1125}
1126
1127make_enumerated_property! {
1128 name: "Line_Break";
1129 short_name: "lb";
1130 ident: LineBreak;
1131 data_marker: crate::provider::PropertyEnumLineBreakV1;
1132 singleton: SINGLETON_PROPERTY_ENUM_LINE_BREAK_V1;
1133 ule_ty: u8;
1134}
1135
1136/// Enumerated property Grapheme_Cluster_Break.
1137///
1138/// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the
1139/// summary of each property value:
1140/// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table>
1141///
1142/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1143///
1144/// # Example
1145///
1146/// ```
1147/// use icu::properties::{props::GraphemeClusterBreak, CodePointMapData};
1148///
1149/// assert_eq!(
1150/// CodePointMapData::<GraphemeClusterBreak>::new().get('🇦'),
1151/// GraphemeClusterBreak::RegionalIndicator
1152/// ); // U+1F1E6: Regional Indicator Symbol Letter A
1153/// assert_eq!(
1154/// CodePointMapData::<GraphemeClusterBreak>::new().get('ำ'),
1155/// GraphemeClusterBreak::SpacingMark
1156/// ); //U+0E33: Thai Character Sara Am
1157/// ```
1158#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1159#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1160#[allow(clippy::exhaustive_structs)] // this type is stable
1161#[repr(transparent)]
1162pub struct GraphemeClusterBreak(pub(crate) u8);
1163
1164impl GraphemeClusterBreak {
1165 /// Returns an ICU4C `UGraphemeClusterBreak` value.
1166 pub const fn to_icu4c_value(self) -> u8 {
1167 self.0
1168 }
1169 /// Constructor from an ICU4C `UGraphemeClusterBreak` value.
1170 pub const fn from_icu4c_value(value: u8) -> Self {
1171 Self(value)
1172 }
1173}
1174
1175create_const_array! {
1176#[allow(missing_docs)] // These constants don't need individual documentation.
1177#[allow(non_upper_case_globals)]
1178impl GraphemeClusterBreak {
1179 pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX"
1180 pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN"
1181 pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR"
1182 pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX"
1183 pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L"
1184 pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF"
1185 pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV"
1186 pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT"
1187 pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T"
1188 pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V"
1189 pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM"
1190 pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP"
1191 pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI"
1192 /// This value is obsolete and unused.
1193 pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB"
1194 /// This value is obsolete and unused.
1195 pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG"
1196 /// This value is obsolete and unused.
1197 pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM"
1198 /// This value is obsolete and unused.
1199 pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ"
1200 pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ"
1201}
1202}
1203
1204make_enumerated_property! {
1205 name: "Grapheme_Cluster_Break";
1206 short_name: "GCB";
1207 ident: GraphemeClusterBreak;
1208 data_marker: crate::provider::PropertyEnumGraphemeClusterBreakV1;
1209 singleton: SINGLETON_PROPERTY_ENUM_GRAPHEME_CLUSTER_BREAK_V1;
1210 ule_ty: u8;
1211}
1212
1213/// Enumerated property Word_Break.
1214///
1215/// See "Default Word Boundary Specification" in UAX #29 for the summary of
1216/// each property value:
1217/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1218///
1219/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1220///
1221/// # Example
1222///
1223/// ```
1224/// use icu::properties::{props::WordBreak, CodePointMapData};
1225///
1226/// assert_eq!(
1227/// CodePointMapData::<WordBreak>::new().get('.'),
1228/// WordBreak::MidNumLet
1229/// ); // U+002E: Full Stop
1230/// assert_eq!(
1231/// CodePointMapData::<WordBreak>::new().get(','),
1232/// WordBreak::MidNum
1233/// ); // U+FF0C: Fullwidth Comma
1234/// ```
1235#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1236#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1237#[allow(clippy::exhaustive_structs)] // newtype
1238#[repr(transparent)]
1239pub struct WordBreak(pub(crate) u8);
1240
1241impl WordBreak {
1242 /// Returns an ICU4C `UWordBreak` value.
1243 pub const fn to_icu4c_value(self) -> u8 {
1244 self.0
1245 }
1246 /// Constructor from an ICU4C `UWordBreak` value.
1247 pub const fn from_icu4c_value(value: u8) -> Self {
1248 Self(value)
1249 }
1250}
1251
1252create_const_array! {
1253#[allow(missing_docs)] // These constants don't need individual documentation.
1254#[allow(non_upper_case_globals)]
1255impl WordBreak {
1256 pub const Other: WordBreak = WordBreak(0); // name="XX"
1257 pub const ALetter: WordBreak = WordBreak(1); // name="LE"
1258 pub const Format: WordBreak = WordBreak(2); // name="FO"
1259 pub const Katakana: WordBreak = WordBreak(3); // name="KA"
1260 pub const MidLetter: WordBreak = WordBreak(4); // name="ML"
1261 pub const MidNum: WordBreak = WordBreak(5); // name="MN"
1262 pub const Numeric: WordBreak = WordBreak(6); // name="NU"
1263 pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX"
1264 pub const CR: WordBreak = WordBreak(8); // name="CR"
1265 pub const Extend: WordBreak = WordBreak(9); // name="Extend"
1266 pub const LF: WordBreak = WordBreak(10); // name="LF"
1267 pub const MidNumLet: WordBreak = WordBreak(11); // name="MB"
1268 pub const Newline: WordBreak = WordBreak(12); // name="NL"
1269 pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI"
1270 pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL"
1271 pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ"
1272 pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ
1273 /// This value is obsolete and unused.
1274 pub const EBase: WordBreak = WordBreak(17); // name="EB"
1275 /// This value is obsolete and unused.
1276 pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG"
1277 /// This value is obsolete and unused.
1278 pub const EModifier: WordBreak = WordBreak(19); // name="EM"
1279 /// This value is obsolete and unused.
1280 pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ"
1281 pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ"
1282 pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace"
1283}
1284}
1285
1286make_enumerated_property! {
1287 name: "Word_Break";
1288 short_name: "WB";
1289 ident: WordBreak;
1290 data_marker: crate::provider::PropertyEnumWordBreakV1;
1291 singleton: SINGLETON_PROPERTY_ENUM_WORD_BREAK_V1;
1292 ule_ty: u8;
1293}
1294
1295/// Enumerated property Sentence_Break.
1296///
1297/// See "Default Sentence Boundary Specification" in UAX #29 for the summary of
1298/// each property value:
1299/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1300///
1301/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1302///
1303/// # Example
1304///
1305/// ```
1306/// use icu::properties::{props::SentenceBreak, CodePointMapData};
1307///
1308/// assert_eq!(
1309/// CodePointMapData::<SentenceBreak>::new().get('9'),
1310/// SentenceBreak::Numeric
1311/// ); // U+FF19: Fullwidth Digit Nine
1312/// assert_eq!(
1313/// CodePointMapData::<SentenceBreak>::new().get(','),
1314/// SentenceBreak::SContinue
1315/// ); // U+002C: Comma
1316/// ```
1317#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1318#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1319#[allow(clippy::exhaustive_structs)] // newtype
1320#[repr(transparent)]
1321pub struct SentenceBreak(pub(crate) u8);
1322
1323impl SentenceBreak {
1324 /// Returns an ICU4C `USentenceBreak` value.
1325 pub const fn to_icu4c_value(self) -> u8 {
1326 self.0
1327 }
1328 /// Constructor from an ICU4C `USentenceBreak` value.
1329 pub const fn from_icu4c_value(value: u8) -> Self {
1330 Self(value)
1331 }
1332}
1333
1334create_const_array! {
1335#[allow(missing_docs)] // These constants don't need individual documentation.
1336#[allow(non_upper_case_globals)]
1337impl SentenceBreak {
1338 pub const Other: SentenceBreak = SentenceBreak(0); // name="XX"
1339 pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT"
1340 pub const Close: SentenceBreak = SentenceBreak(2); // name="CL"
1341 pub const Format: SentenceBreak = SentenceBreak(3); // name="FO"
1342 pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO"
1343 pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU"
1344 pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE"
1345 pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE"
1346 pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP"
1347 pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST"
1348 pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP"
1349 pub const CR: SentenceBreak = SentenceBreak(11); // name="CR"
1350 pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX"
1351 pub const LF: SentenceBreak = SentenceBreak(13); // name="LF"
1352 pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC"
1353}
1354}
1355
1356make_enumerated_property! {
1357 name: "Sentence_Break";
1358 short_name: "SB";
1359 ident: SentenceBreak;
1360 data_marker: crate::provider::PropertyEnumSentenceBreakV1;
1361 singleton: SINGLETON_PROPERTY_ENUM_SENTENCE_BREAK_V1;
1362 ule_ty: u8;
1363}
1364
1365/// Property Canonical_Combining_Class.
1366/// See UAX #15:
1367/// <https://www.unicode.org/reports/tr15/>.
1368///
1369/// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API
1370/// to look up the Canonical_Combining_Class property by scalar value.
1371///
1372/// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API
1373/// to look up the Canonical_Combining_Class property by scalar value.
1374///
1375/// # Example
1376///
1377/// ```
1378/// use icu::properties::{props::CanonicalCombiningClass, CodePointMapData};
1379///
1380/// assert_eq!(
1381/// CodePointMapData::<CanonicalCombiningClass>::new().get('a'),
1382/// CanonicalCombiningClass::NotReordered
1383/// ); // U+0061: LATIN SMALL LETTER A
1384/// assert_eq!(
1385/// CodePointMapData::<CanonicalCombiningClass>::new().get('\u{0301}'),
1386/// CanonicalCombiningClass::Above
1387/// ); // U+0301: COMBINING ACUTE ACCENT
1388/// ```
1389//
1390// NOTE: The Pernosco debugger has special knowledge
1391// of this struct. Please do not change the bit layout
1392// or the crate-module-qualified name of this struct
1393// without coordination.
1394#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1395#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1396#[allow(clippy::exhaustive_structs)] // newtype
1397#[repr(transparent)]
1398pub struct CanonicalCombiningClass(pub(crate) u8);
1399
1400impl CanonicalCombiningClass {
1401 /// Returns an ICU4C `UCanonicalCombiningClass` value.
1402 pub const fn to_icu4c_value(self) -> u8 {
1403 self.0
1404 }
1405 /// Constructor from an ICU4C `UCanonicalCombiningClass` value.
1406 pub const fn from_icu4c_value(value: u8) -> Self {
1407 Self(value)
1408 }
1409}
1410
1411create_const_array! {
1412// These constant names come from PropertyValueAliases.txt
1413#[allow(missing_docs)] // These constants don't need individual documentation.
1414#[allow(non_upper_case_globals)]
1415impl CanonicalCombiningClass {
1416 pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR"
1417 pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV"
1418 pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR"
1419 pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK"
1420 pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV"
1421 pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR"
1422 pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10"
1423 pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11"
1424 pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12"
1425 pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13"
1426 pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14"
1427 pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15"
1428 pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16"
1429 pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17"
1430 pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18"
1431 pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19"
1432 pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20"
1433 pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21"
1434 pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22"
1435 pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23"
1436 pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24"
1437 pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25"
1438 pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26"
1439 pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27"
1440 pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28"
1441 pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29"
1442 pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30"
1443 pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31"
1444 pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32"
1445 pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33"
1446 pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34"
1447 pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35"
1448 pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36"
1449 pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84"
1450 pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91"
1451 pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103"
1452 pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107"
1453 pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118"
1454 pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122"
1455 pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129"
1456 pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130"
1457 pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132"
1458 pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED
1459 pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL"
1460 pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB"
1461 pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA"
1462 pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR"
1463 pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL"
1464 pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B"
1465 pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR"
1466 pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L"
1467 pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R"
1468 pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL"
1469 pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A"
1470 pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR"
1471 pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB"
1472 pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA"
1473 pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS"
1474}
1475}
1476
1477make_enumerated_property! {
1478 name: "Canonical_Combining_Class";
1479 short_name: "ccc";
1480 ident: CanonicalCombiningClass;
1481 data_marker: crate::provider::PropertyEnumCanonicalCombiningClassV1;
1482 singleton: SINGLETON_PROPERTY_ENUM_CANONICAL_COMBINING_CLASS_V1;
1483 ule_ty: u8;
1484}
1485
1486/// Property Indic_Conjunct_Break.
1487/// See UAX #44:
1488/// <https://www.unicode.org/reports/tr44/#Indic_Conjunct_Break>.
1489///
1490/// # Example
1491///
1492/// ```
1493/// use icu::properties::{props::IndicConjunctBreak, CodePointMapData};
1494///
1495/// assert_eq!(
1496/// CodePointMapData::<IndicConjunctBreak>::new().get('a'),
1497/// IndicConjunctBreak::None
1498/// );
1499/// assert_eq!(
1500/// CodePointMapData::<IndicConjunctBreak>::new().get('\u{094d}'),
1501/// IndicConjunctBreak::Linker
1502/// );
1503/// assert_eq!(
1504/// CodePointMapData::<IndicConjunctBreak>::new().get('\u{0915}'),
1505/// IndicConjunctBreak::Consonant
1506/// );
1507/// assert_eq!(
1508/// CodePointMapData::<IndicConjunctBreak>::new().get('\u{0300}'),
1509/// IndicConjunctBreak::Extend
1510/// );
1511/// ```
1512#[doc(hidden)] // draft API in ICU4C
1513#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1514#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1515#[allow(clippy::exhaustive_structs)] // newtype
1516#[repr(transparent)]
1517pub struct IndicConjunctBreak(pub(crate) u8);
1518
1519impl IndicConjunctBreak {
1520 /// Returns an ICU4C `UIndicConjunctBreak` value.
1521 pub const fn to_icu4c_value(self) -> u8 {
1522 self.0
1523 }
1524 /// Constructor from an ICU4C `UIndicConjunctBreak` value.
1525 pub const fn from_icu4c_value(value: u8) -> Self {
1526 Self(value)
1527 }
1528}
1529
1530create_const_array! {
1531#[doc(hidden)] // draft API in ICU4C
1532#[allow(non_upper_case_globals)]
1533impl IndicConjunctBreak {
1534 pub const None: IndicConjunctBreak = IndicConjunctBreak(0);
1535 pub const Consonant: IndicConjunctBreak = IndicConjunctBreak(1);
1536 pub const Extend: IndicConjunctBreak = IndicConjunctBreak(2);
1537 pub const Linker: IndicConjunctBreak = IndicConjunctBreak(3);
1538}
1539}
1540
1541make_enumerated_property! {
1542 name: "Indic_Conjunct_Break";
1543 short_name: "InCB";
1544 ident: IndicConjunctBreak;
1545 data_marker: crate::provider::PropertyEnumIndicConjunctBreakV1;
1546 singleton: SINGLETON_PROPERTY_ENUM_INDIC_CONJUNCT_BREAK_V1;
1547 ule_ty: u8;
1548}
1549
1550/// Property Indic_Syllabic_Category.
1551/// See UAX #44:
1552/// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>.
1553///
1554/// # Example
1555///
1556/// ```
1557/// use icu::properties::{props::IndicSyllabicCategory, CodePointMapData};
1558///
1559/// assert_eq!(
1560/// CodePointMapData::<IndicSyllabicCategory>::new().get('a'),
1561/// IndicSyllabicCategory::Other
1562/// );
1563/// assert_eq!(
1564/// CodePointMapData::<IndicSyllabicCategory>::new().get('\u{0900}'),
1565/// IndicSyllabicCategory::Bindu
1566/// ); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU
1567/// ```
1568#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1569#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1570#[allow(clippy::exhaustive_structs)] // newtype
1571#[repr(transparent)]
1572pub struct IndicSyllabicCategory(pub(crate) u8);
1573
1574impl IndicSyllabicCategory {
1575 /// Returns an ICU4C `UIndicSyllabicCategory` value.
1576 pub const fn to_icu4c_value(self) -> u8 {
1577 self.0
1578 }
1579 /// Constructor from an ICU4C `UIndicSyllabicCategory` value.
1580 pub const fn from_icu4c_value(value: u8) -> Self {
1581 Self(value)
1582 }
1583}
1584
1585create_const_array! {
1586#[allow(missing_docs)] // These constants don't need individual documentation.
1587#[allow(non_upper_case_globals)]
1588impl IndicSyllabicCategory {
1589 pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0);
1590 pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1);
1591 pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2);
1592 pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3);
1593 pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4);
1594 pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5);
1595 pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6);
1596 pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7);
1597 pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8);
1598 pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9);
1599 pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10);
1600 pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11);
1601 pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12);
1602 pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13);
1603 pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14);
1604 pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(15);
1605 pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(16);
1606 pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17);
1607 pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18);
1608 pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19);
1609 pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20);
1610 pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21);
1611 pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22);
1612 pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23);
1613 pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24);
1614 pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25);
1615 pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26);
1616 pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27);
1617 pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28);
1618 pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29);
1619 pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30);
1620 pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31);
1621 pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32);
1622 pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33);
1623 pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34);
1624 pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35);
1625 pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36);
1626}
1627}
1628
1629make_enumerated_property! {
1630 name: "Indic_Syllabic_Category";
1631 short_name: "InSC";
1632 ident: IndicSyllabicCategory;
1633 data_marker: crate::provider::PropertyEnumIndicSyllabicCategoryV1;
1634 singleton: SINGLETON_PROPERTY_ENUM_INDIC_SYLLABIC_CATEGORY_V1;
1635 ule_ty: u8;
1636}
1637
1638/// Enumerated property Joining_Type.
1639///
1640/// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of
1641/// each property value.
1642///
1643/// # Example
1644///
1645/// ```
1646/// use icu::properties::{props::JoiningType, CodePointMapData};
1647///
1648/// assert_eq!(
1649/// CodePointMapData::<JoiningType>::new().get('ؠ'),
1650/// JoiningType::DualJoining
1651/// ); // U+0620: Arabic Letter Kashmiri Yeh
1652/// assert_eq!(
1653/// CodePointMapData::<JoiningType>::new().get('𐫍'),
1654/// JoiningType::LeftJoining
1655/// ); // U+10ACD: Manichaean Letter Heth
1656/// ```
1657#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1658#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1659#[allow(clippy::exhaustive_structs)] // newtype
1660#[repr(transparent)]
1661pub struct JoiningType(pub(crate) u8);
1662
1663impl JoiningType {
1664 /// Returns an ICU4C `UJoiningType` value.
1665 pub const fn to_icu4c_value(self) -> u8 {
1666 self.0
1667 }
1668 /// Constructor from an ICU4C `UJoiningType` value.
1669 pub const fn from_icu4c_value(value: u8) -> Self {
1670 Self(value)
1671 }
1672}
1673
1674create_const_array! {
1675#[allow(missing_docs)] // These constants don't need individual documentation.
1676#[allow(non_upper_case_globals)]
1677impl JoiningType {
1678 pub const NonJoining: JoiningType = JoiningType(0); // name="U"
1679 pub const JoinCausing: JoiningType = JoiningType(1); // name="C"
1680 pub const DualJoining: JoiningType = JoiningType(2); // name="D"
1681 pub const LeftJoining: JoiningType = JoiningType(3); // name="L"
1682 pub const RightJoining: JoiningType = JoiningType(4); // name="R"
1683 pub const Transparent: JoiningType = JoiningType(5); // name="T"
1684}
1685}
1686
1687make_enumerated_property! {
1688 name: "Joining_Type";
1689 short_name: "jt";
1690 ident: JoiningType;
1691 data_marker: crate::provider::PropertyEnumJoiningTypeV1;
1692 singleton: SINGLETON_PROPERTY_ENUM_JOINING_TYPE_V1;
1693 ule_ty: u8;
1694}
1695
1696/// Property Vertical_Orientation
1697///
1698/// See UTR #50:
1699/// <https://www.unicode.org/reports/tr50/#vo>
1700///
1701/// # Example
1702///
1703/// ```
1704/// use icu::properties::{props::VerticalOrientation, CodePointMapData};
1705///
1706/// assert_eq!(
1707/// CodePointMapData::<VerticalOrientation>::new().get('a'),
1708/// VerticalOrientation::Rotated
1709/// );
1710/// assert_eq!(
1711/// CodePointMapData::<VerticalOrientation>::new().get('§'),
1712/// VerticalOrientation::Upright
1713/// );
1714/// assert_eq!(
1715/// CodePointMapData::<VerticalOrientation>::new().get32(0x2329),
1716/// VerticalOrientation::TransformedRotated
1717/// );
1718/// assert_eq!(
1719/// CodePointMapData::<VerticalOrientation>::new().get32(0x3001),
1720/// VerticalOrientation::TransformedUpright
1721/// );
1722/// ```
1723#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1724#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1725#[allow(clippy::exhaustive_structs)] // newtype
1726#[repr(transparent)]
1727pub struct VerticalOrientation(pub(crate) u8);
1728
1729impl VerticalOrientation {
1730 /// Returns an ICU4C `UVerticalOrientation` value.
1731 pub const fn to_icu4c_value(self) -> u8 {
1732 self.0
1733 }
1734 /// Constructor from an ICU4C `UVerticalOrientation` value.
1735 pub const fn from_icu4c_value(value: u8) -> Self {
1736 Self(value)
1737 }
1738}
1739
1740create_const_array! {
1741#[allow(missing_docs)] // These constants don't need individual documentation.
1742#[allow(non_upper_case_globals)]
1743impl VerticalOrientation {
1744 pub const Rotated: VerticalOrientation = VerticalOrientation(0); // name="R"
1745 pub const TransformedRotated: VerticalOrientation = VerticalOrientation(1); // name="Tr"
1746 pub const TransformedUpright: VerticalOrientation = VerticalOrientation(2); // name="Tu"
1747 pub const Upright: VerticalOrientation = VerticalOrientation(3); // name="U"
1748}
1749}
1750
1751make_enumerated_property! {
1752 name: "Vertical_Orientation";
1753 short_name: "vo";
1754 ident: VerticalOrientation;
1755 data_marker: crate::provider::PropertyEnumVerticalOrientationV1;
1756 singleton: SINGLETON_PROPERTY_ENUM_VERTICAL_ORIENTATION_V1;
1757 ule_ty: u8;
1758}
1759
1760pub use crate::code_point_set::BinaryProperty;
1761
1762macro_rules! make_binary_property {
1763 (
1764 name: $name:literal;
1765 short_name: $short_name:literal;
1766 ident: $ident:ident;
1767 data_marker: $data_marker:ty;
1768 singleton: $singleton:ident;
1769 $(#[$doc:meta])+
1770 ) => {
1771 $(#[$doc])+
1772 #[derive(Debug)]
1773 #[non_exhaustive]
1774 pub struct $ident;
1775
1776 impl crate::private::Sealed for $ident {}
1777
1778 impl BinaryProperty for $ident {
1779 type DataMarker = $data_marker;
1780 #[cfg(feature = "compiled_data")]
1781 const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> =
1782 &crate::provider::Baked::$singleton;
1783 const NAME: &'static [u8] = $name.as_bytes();
1784 const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
1785 }
1786 };
1787}
1788
1789make_binary_property! {
1790 name: "ASCII_Hex_Digit";
1791 short_name: "AHex";
1792 ident: AsciiHexDigit;
1793 data_marker: crate::provider::PropertyBinaryAsciiHexDigitV1;
1794 singleton: SINGLETON_PROPERTY_BINARY_ASCII_HEX_DIGIT_V1;
1795 /// ASCII characters commonly used for the representation of hexadecimal numbers.
1796 ///
1797 /// # Example
1798 ///
1799 /// ```
1800 /// use icu::properties::CodePointSetData;
1801 /// use icu::properties::props::AsciiHexDigit;
1802 ///
1803 /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>();
1804 ///
1805 /// assert!(ascii_hex_digit.contains('3'));
1806 /// assert!(!ascii_hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
1807 /// assert!(ascii_hex_digit.contains('A'));
1808 /// assert!(!ascii_hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1809 /// ```
1810}
1811
1812make_binary_property! {
1813 name: "Alnum";
1814 short_name: "Alnum";
1815 ident: Alnum;
1816 data_marker: crate::provider::PropertyBinaryAlnumV1;
1817 singleton: SINGLETON_PROPERTY_BINARY_ALNUM_V1;
1818 /// Characters with the `Alphabetic` or `Decimal_Number` property.
1819 ///
1820 /// This is defined for POSIX compatibility.
1821}
1822
1823make_binary_property! {
1824 name: "Alphabetic";
1825 short_name: "Alpha";
1826 ident: Alphabetic;
1827 data_marker: crate::provider::PropertyBinaryAlphabeticV1;
1828 singleton: SINGLETON_PROPERTY_BINARY_ALPHABETIC_V1;
1829 /// Alphabetic characters.
1830 ///
1831 /// # Example
1832 ///
1833 /// ```
1834 /// use icu::properties::CodePointSetData;
1835 /// use icu::properties::props::Alphabetic;
1836 ///
1837 /// let alphabetic = CodePointSetData::new::<Alphabetic>();
1838 ///
1839 /// assert!(!alphabetic.contains('3'));
1840 /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
1841 /// assert!(alphabetic.contains('A'));
1842 /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1843 /// ```
1844
1845}
1846
1847make_binary_property! {
1848 name: "Bidi_Control";
1849 short_name: "Bidi_C";
1850 ident: BidiControl;
1851 data_marker: crate::provider::PropertyBinaryBidiControlV1;
1852 singleton: SINGLETON_PROPERTY_BINARY_BIDI_CONTROL_V1;
1853 /// Format control characters which have specific functions in the Unicode Bidirectional
1854 /// Algorithm.
1855 ///
1856 /// # Example
1857 ///
1858 /// ```
1859 /// use icu::properties::CodePointSetData;
1860 /// use icu::properties::props::BidiControl;
1861 ///
1862 /// let bidi_control = CodePointSetData::new::<BidiControl>();
1863 ///
1864 /// assert!(bidi_control.contains('\u{200F}')); // RIGHT-TO-LEFT MARK
1865 /// assert!(!bidi_control.contains('ش')); // U+0634 ARABIC LETTER SHEEN
1866 /// ```
1867
1868}
1869
1870make_binary_property! {
1871 name: "Bidi_Mirrored";
1872 short_name: "Bidi_M";
1873 ident: BidiMirrored;
1874 data_marker: crate::provider::PropertyBinaryBidiMirroredV1;
1875 singleton: SINGLETON_PROPERTY_BINARY_BIDI_MIRRORED_V1;
1876 /// Characters that are mirrored in bidirectional text.
1877 ///
1878 /// # Example
1879 ///
1880 /// ```
1881 /// use icu::properties::CodePointSetData;
1882 /// use icu::properties::props::BidiMirrored;
1883 ///
1884 /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>();
1885 ///
1886 /// assert!(bidi_mirrored.contains('['));
1887 /// assert!(bidi_mirrored.contains(']'));
1888 /// assert!(bidi_mirrored.contains('∑')); // U+2211 N-ARY SUMMATION
1889 /// assert!(!bidi_mirrored.contains('ཉ')); // U+0F49 TIBETAN LETTER NYA
1890 /// ```
1891
1892}
1893
1894make_binary_property! {
1895 name: "Blank";
1896 short_name: "Blank";
1897 ident: Blank;
1898 data_marker: crate::provider::PropertyBinaryBlankV1;
1899 singleton: SINGLETON_PROPERTY_BINARY_BLANK_V1;
1900 /// Horizontal whitespace characters
1901
1902}
1903
1904make_binary_property! {
1905 name: "Cased";
1906 short_name: "Cased";
1907 ident: Cased;
1908 data_marker: crate::provider::PropertyBinaryCasedV1;
1909 singleton: SINGLETON_PROPERTY_BINARY_CASED_V1;
1910 /// Uppercase, lowercase, and titlecase characters.
1911 ///
1912 /// # Example
1913 ///
1914 /// ```
1915 /// use icu::properties::CodePointSetData;
1916 /// use icu::properties::props::Cased;
1917 ///
1918 /// let cased = CodePointSetData::new::<Cased>();
1919 ///
1920 /// assert!(cased.contains('Ꙡ')); // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
1921 /// assert!(!cased.contains('ދ')); // U+078B THAANA LETTER DHAALU
1922 /// ```
1923
1924}
1925
1926make_binary_property! {
1927 name: "Case_Ignorable";
1928 short_name: "CI";
1929 ident: CaseIgnorable;
1930 data_marker: crate::provider::PropertyBinaryCaseIgnorableV1;
1931 singleton: SINGLETON_PROPERTY_BINARY_CASE_IGNORABLE_V1;
1932 /// Characters which are ignored for casing purposes.
1933 ///
1934 /// # Example
1935 ///
1936 /// ```
1937 /// use icu::properties::CodePointSetData;
1938 /// use icu::properties::props::CaseIgnorable;
1939 ///
1940 /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>();
1941 ///
1942 /// assert!(case_ignorable.contains(':'));
1943 /// assert!(!case_ignorable.contains('λ')); // U+03BB GREEK SMALL LETTER LAMBDA
1944 /// ```
1945
1946}
1947
1948make_binary_property! {
1949 name: "Full_Composition_Exclusion";
1950 short_name: "Comp_Ex";
1951 ident: FullCompositionExclusion;
1952 data_marker: crate::provider::PropertyBinaryFullCompositionExclusionV1;
1953 singleton: SINGLETON_PROPERTY_BINARY_FULL_COMPOSITION_EXCLUSION_V1;
1954 /// Characters that are excluded from composition.
1955 ///
1956 /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
1957
1958}
1959
1960make_binary_property! {
1961 name: "Changes_When_Casefolded";
1962 short_name: "CWCF";
1963 ident: ChangesWhenCasefolded;
1964 data_marker: crate::provider::PropertyBinaryChangesWhenCasefoldedV1;
1965 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEFOLDED_V1;
1966 /// Characters whose normalized forms are not stable under case folding.
1967 ///
1968 /// # Example
1969 ///
1970 /// ```
1971 /// use icu::properties::CodePointSetData;
1972 /// use icu::properties::props::ChangesWhenCasefolded;
1973 ///
1974 /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>();
1975 ///
1976 /// assert!(changes_when_casefolded.contains('ß')); // U+00DF LATIN SMALL LETTER SHARP S
1977 /// assert!(!changes_when_casefolded.contains('ᜉ')); // U+1709 TAGALOG LETTER PA
1978 /// ```
1979
1980}
1981
1982make_binary_property! {
1983 name: "Changes_When_Casemapped";
1984 short_name: "CWCM";
1985 ident: ChangesWhenCasemapped;
1986 data_marker: crate::provider::PropertyBinaryChangesWhenCasemappedV1;
1987 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEMAPPED_V1;
1988 /// Characters which may change when they undergo case mapping.
1989
1990}
1991
1992make_binary_property! {
1993 name: "Changes_When_NFKC_Casefolded";
1994 short_name: "CWKCF";
1995 ident: ChangesWhenNfkcCasefolded;
1996 data_marker: crate::provider::PropertyBinaryChangesWhenNfkcCasefoldedV1;
1997 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_NFKC_CASEFOLDED_V1;
1998 /// Characters which are not identical to their `NFKC_Casefold` mapping.
1999 ///
2000 /// # Example
2001 ///
2002 /// ```
2003 /// use icu::properties::CodePointSetData;
2004 /// use icu::properties::props::ChangesWhenNfkcCasefolded;
2005 ///
2006 /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>();
2007 ///
2008 /// assert!(changes_when_nfkc_casefolded.contains('🄵')); // U+1F135 SQUARED LATIN CAPITAL LETTER F
2009 /// assert!(!changes_when_nfkc_casefolded.contains('f'));
2010 /// ```
2011
2012}
2013
2014make_binary_property! {
2015 name: "Changes_When_Lowercased";
2016 short_name: "CWL";
2017 ident: ChangesWhenLowercased;
2018 data_marker: crate::provider::PropertyBinaryChangesWhenLowercasedV1;
2019 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_LOWERCASED_V1;
2020 /// Characters whose normalized forms are not stable under a `toLowercase` mapping.
2021 ///
2022 /// # Example
2023 ///
2024 /// ```
2025 /// use icu::properties::CodePointSetData;
2026 /// use icu::properties::props::ChangesWhenLowercased;
2027 ///
2028 /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>();
2029 ///
2030 /// assert!(changes_when_lowercased.contains('Ⴔ')); // U+10B4 GEORGIAN CAPITAL LETTER PHAR
2031 /// assert!(!changes_when_lowercased.contains('ფ')); // U+10E4 GEORGIAN LETTER PHAR
2032 /// ```
2033
2034}
2035
2036make_binary_property! {
2037 name: "Changes_When_Titlecased";
2038 short_name: "CWT";
2039 ident: ChangesWhenTitlecased;
2040 data_marker: crate::provider::PropertyBinaryChangesWhenTitlecasedV1;
2041 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_TITLECASED_V1;
2042 /// Characters whose normalized forms are not stable under a `toTitlecase` mapping.
2043 ///
2044 /// # Example
2045 ///
2046 /// ```
2047 /// use icu::properties::CodePointSetData;
2048 /// use icu::properties::props::ChangesWhenTitlecased;
2049 ///
2050 /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>();
2051 ///
2052 /// assert!(changes_when_titlecased.contains('æ')); // U+00E6 LATIN SMALL LETTER AE
2053 /// assert!(!changes_when_titlecased.contains('Æ')); // U+00E6 LATIN CAPITAL LETTER AE
2054 /// ```
2055
2056}
2057
2058make_binary_property! {
2059 name: "Changes_When_Uppercased";
2060 short_name: "CWU";
2061 ident: ChangesWhenUppercased;
2062 data_marker: crate::provider::PropertyBinaryChangesWhenUppercasedV1;
2063 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_UPPERCASED_V1;
2064 /// Characters whose normalized forms are not stable under a `toUppercase` mapping.
2065 ///
2066 /// # Example
2067 ///
2068 /// ```
2069 /// use icu::properties::CodePointSetData;
2070 /// use icu::properties::props::ChangesWhenUppercased;
2071 ///
2072 /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>();
2073 ///
2074 /// assert!(changes_when_uppercased.contains('ւ')); // U+0582 ARMENIAN SMALL LETTER YIWN
2075 /// assert!(!changes_when_uppercased.contains('Ւ')); // U+0552 ARMENIAN CAPITAL LETTER YIWN
2076 /// ```
2077
2078}
2079
2080make_binary_property! {
2081 name: "Dash";
2082 short_name: "Dash";
2083 ident: Dash;
2084 data_marker: crate::provider::PropertyBinaryDashV1;
2085 singleton: SINGLETON_PROPERTY_BINARY_DASH_V1;
2086 /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
2087 /// their compatibility equivalents.
2088 ///
2089 /// # Example
2090 ///
2091 /// ```
2092 /// use icu::properties::CodePointSetData;
2093 /// use icu::properties::props::Dash;
2094 ///
2095 /// let dash = CodePointSetData::new::<Dash>();
2096 ///
2097 /// assert!(dash.contains('⸺')); // U+2E3A TWO-EM DASH
2098 /// assert!(dash.contains('-')); // U+002D
2099 /// assert!(!dash.contains('=')); // U+003D
2100 /// ```
2101
2102}
2103
2104make_binary_property! {
2105 name: "Deprecated";
2106 short_name: "Dep";
2107 ident: Deprecated;
2108 data_marker: crate::provider::PropertyBinaryDeprecatedV1;
2109 singleton: SINGLETON_PROPERTY_BINARY_DEPRECATED_V1;
2110 /// Deprecated characters.
2111 ///
2112 /// No characters will ever be removed from the standard, but the
2113 /// usage of deprecated characters is strongly discouraged.
2114 ///
2115 /// # Example
2116 ///
2117 /// ```
2118 /// use icu::properties::CodePointSetData;
2119 /// use icu::properties::props::Deprecated;
2120 ///
2121 /// let deprecated = CodePointSetData::new::<Deprecated>();
2122 ///
2123 /// assert!(deprecated.contains('ឣ')); // U+17A3 KHMER INDEPENDENT VOWEL QAQ
2124 /// assert!(!deprecated.contains('A'));
2125 /// ```
2126
2127}
2128
2129make_binary_property! {
2130 name: "Default_Ignorable_Code_Point";
2131 short_name: "DI";
2132 ident: DefaultIgnorableCodePoint;
2133 data_marker: crate::provider::PropertyBinaryDefaultIgnorableCodePointV1;
2134 singleton: SINGLETON_PROPERTY_BINARY_DEFAULT_IGNORABLE_CODE_POINT_V1;
2135 /// For programmatic determination of default ignorable code points.
2136 ///
2137 /// New characters that
2138 /// should be ignored in rendering (unless explicitly supported) will be assigned in these
2139 /// ranges, permitting programs to correctly handle the default rendering of such
2140 /// characters when not otherwise supported.
2141 ///
2142 /// # Example
2143 ///
2144 /// ```
2145 /// use icu::properties::CodePointSetData;
2146 /// use icu::properties::props::DefaultIgnorableCodePoint;
2147 ///
2148 /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>();
2149 ///
2150 /// assert!(default_ignorable_code_point.contains('\u{180B}')); // MONGOLIAN FREE VARIATION SELECTOR ONE
2151 /// assert!(!default_ignorable_code_point.contains('E'));
2152 /// ```
2153
2154}
2155
2156make_binary_property! {
2157 name: "Diacritic";
2158 short_name: "Dia";
2159 ident: Diacritic;
2160 data_marker: crate::provider::PropertyBinaryDiacriticV1;
2161 singleton: SINGLETON_PROPERTY_BINARY_DIACRITIC_V1;
2162 /// Characters that linguistically modify the meaning of another character to which they apply.
2163 ///
2164 /// # Example
2165 ///
2166 /// ```
2167 /// use icu::properties::CodePointSetData;
2168 /// use icu::properties::props::Diacritic;
2169 ///
2170 /// let diacritic = CodePointSetData::new::<Diacritic>();
2171 ///
2172 /// assert!(diacritic.contains('\u{05B3}')); // HEBREW POINT HATAF QAMATS
2173 /// assert!(!diacritic.contains('א')); // U+05D0 HEBREW LETTER ALEF
2174 /// ```
2175
2176}
2177
2178make_binary_property! {
2179 name: "Emoji_Modifier_Base";
2180 short_name: "EBase";
2181 ident: EmojiModifierBase;
2182 data_marker: crate::provider::PropertyBinaryEmojiModifierBaseV1;
2183 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_BASE_V1;
2184 /// Characters that can serve as a base for emoji modifiers.
2185 ///
2186 /// # Example
2187 ///
2188 /// ```
2189 /// use icu::properties::CodePointSetData;
2190 /// use icu::properties::props::EmojiModifierBase;
2191 ///
2192 /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>();
2193 ///
2194 /// assert!(emoji_modifier_base.contains('✊')); // U+270A RAISED FIST
2195 /// assert!(!emoji_modifier_base.contains('⛰')); // U+26F0 MOUNTAIN
2196 /// ```
2197
2198}
2199
2200make_binary_property! {
2201 name: "Emoji_Component";
2202 short_name: "EComp";
2203 ident: EmojiComponent;
2204 data_marker: crate::provider::PropertyBinaryEmojiComponentV1;
2205 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_COMPONENT_V1;
2206 /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
2207 /// separate choices, such as base characters for emoji keycaps.
2208 ///
2209 /// # Example
2210 ///
2211 /// ```
2212 /// use icu::properties::CodePointSetData;
2213 /// use icu::properties::props::EmojiComponent;
2214 ///
2215 /// let emoji_component = CodePointSetData::new::<EmojiComponent>();
2216 ///
2217 /// assert!(emoji_component.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2218 /// assert!(emoji_component.contains('\u{20E3}')); // COMBINING ENCLOSING KEYCAP
2219 /// assert!(emoji_component.contains('7'));
2220 /// assert!(!emoji_component.contains('T'));
2221 /// ```
2222
2223}
2224
2225make_binary_property! {
2226 name: "Emoji_Modifier";
2227 short_name: "EMod";
2228 ident: EmojiModifier;
2229 data_marker: crate::provider::PropertyBinaryEmojiModifierV1;
2230 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_V1;
2231 /// Characters that are emoji modifiers.
2232 ///
2233 /// # Example
2234 ///
2235 /// ```
2236 /// use icu::properties::CodePointSetData;
2237 /// use icu::properties::props::EmojiModifier;
2238 ///
2239 /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>();
2240 ///
2241 /// assert!(emoji_modifier.contains('\u{1F3FD}')); // EMOJI MODIFIER FITZPATRICK TYPE-4
2242 /// assert!(!emoji_modifier.contains('\u{200C}')); // ZERO WIDTH NON-JOINER
2243 /// ```
2244
2245}
2246
2247make_binary_property! {
2248 name: "Emoji";
2249 short_name: "Emoji";
2250 ident: Emoji;
2251 data_marker: crate::provider::PropertyBinaryEmojiV1;
2252 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_V1;
2253 /// Characters that are emoji.
2254 ///
2255 /// # Example
2256 ///
2257 /// ```
2258 /// use icu::properties::CodePointSetData;
2259 /// use icu::properties::props::Emoji;
2260 ///
2261 /// let emoji = CodePointSetData::new::<Emoji>();
2262 ///
2263 /// assert!(emoji.contains('🔥')); // U+1F525 FIRE
2264 /// assert!(!emoji.contains('V'));
2265 /// ```
2266
2267}
2268
2269make_binary_property! {
2270 name: "Emoji_Presentation";
2271 short_name: "EPres";
2272 ident: EmojiPresentation;
2273 data_marker: crate::provider::PropertyBinaryEmojiPresentationV1;
2274 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_PRESENTATION_V1;
2275 /// Characters that have emoji presentation by default.
2276 ///
2277 /// # Example
2278 ///
2279 /// ```
2280 /// use icu::properties::CodePointSetData;
2281 /// use icu::properties::props::EmojiPresentation;
2282 ///
2283 /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>();
2284 ///
2285 /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
2286 /// assert!(!emoji_presentation.contains('♻')); // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
2287 /// ```
2288
2289}
2290
2291make_binary_property! {
2292 name: "Extender";
2293 short_name: "Ext";
2294 ident: Extender;
2295 data_marker: crate::provider::PropertyBinaryExtenderV1;
2296 singleton: SINGLETON_PROPERTY_BINARY_EXTENDER_V1;
2297 /// Characters whose principal function is to extend the value of a preceding alphabetic
2298 /// character or to extend the shape of adjacent characters.
2299 ///
2300 /// # Example
2301 ///
2302 /// ```
2303 /// use icu::properties::CodePointSetData;
2304 /// use icu::properties::props::Extender;
2305 ///
2306 /// let extender = CodePointSetData::new::<Extender>();
2307 ///
2308 /// assert!(extender.contains('ヾ')); // U+30FE KATAKANA VOICED ITERATION MARK
2309 /// assert!(extender.contains('ー')); // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
2310 /// assert!(!extender.contains('・')); // U+30FB KATAKANA MIDDLE DOT
2311 /// ```
2312
2313}
2314
2315make_binary_property! {
2316 name: "Extended_Pictographic";
2317 short_name: "ExtPict";
2318 ident: ExtendedPictographic;
2319 data_marker: crate::provider::PropertyBinaryExtendedPictographicV1;
2320 singleton: SINGLETON_PROPERTY_BINARY_EXTENDED_PICTOGRAPHIC_V1;
2321 /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
2322 /// emoji characters
2323 ///
2324 /// # Example
2325 ///
2326 /// ```
2327 /// use icu::properties::CodePointSetData;
2328 /// use icu::properties::props::ExtendedPictographic;
2329 ///
2330 /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>();
2331 ///
2332 /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
2333 /// assert!(!extended_pictographic.contains('🇪')); // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
2334 /// ```
2335
2336}
2337
2338make_binary_property! {
2339 name: "Graph";
2340 short_name: "Graph";
2341 ident: Graph;
2342 data_marker: crate::provider::PropertyBinaryGraphV1;
2343 singleton: SINGLETON_PROPERTY_BINARY_GRAPH_V1;
2344 /// Invisible characters.
2345 ///
2346 /// This is defined for POSIX compatibility.
2347
2348}
2349
2350make_binary_property! {
2351 name: "Grapheme_Base";
2352 short_name: "Gr_Base";
2353 ident: GraphemeBase;
2354 data_marker: crate::provider::PropertyBinaryGraphemeBaseV1;
2355 singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_BASE_V1;
2356 /// Property used together with the definition of Standard Korean Syllable Block to define
2357 /// "Grapheme base".
2358 ///
2359 /// See D58 in Chapter 3, Conformance in the Unicode Standard.
2360 ///
2361 /// # Example
2362 ///
2363 /// ```
2364 /// use icu::properties::CodePointSetData;
2365 /// use icu::properties::props::GraphemeBase;
2366 ///
2367 /// let grapheme_base = CodePointSetData::new::<GraphemeBase>();
2368 ///
2369 /// assert!(grapheme_base.contains('ക')); // U+0D15 MALAYALAM LETTER KA
2370 /// assert!(grapheme_base.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I
2371 /// assert!(!grapheme_base.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA
2372 /// ```
2373
2374}
2375
2376make_binary_property! {
2377 name: "Grapheme_Extend";
2378 short_name: "Gr_Ext";
2379 ident: GraphemeExtend;
2380 data_marker: crate::provider::PropertyBinaryGraphemeExtendV1;
2381 singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_EXTEND_V1;
2382 /// Property used to define "Grapheme extender".
2383 ///
2384 /// See D59 in Chapter 3, Conformance in the
2385 /// Unicode Standard.
2386 ///
2387 /// # Example
2388 ///
2389 /// ```
2390 /// use icu::properties::CodePointSetData;
2391 /// use icu::properties::props::GraphemeExtend;
2392 ///
2393 /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>();
2394 ///
2395 /// assert!(!grapheme_extend.contains('ക')); // U+0D15 MALAYALAM LETTER KA
2396 /// assert!(!grapheme_extend.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I
2397 /// assert!(grapheme_extend.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA
2398 /// ```
2399
2400}
2401
2402make_binary_property! {
2403 name: "Grapheme_Link";
2404 short_name: "Gr_Link";
2405 ident: GraphemeLink;
2406 data_marker: crate::provider::PropertyBinaryGraphemeLinkV1;
2407 singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_LINK_V1;
2408 /// Deprecated property.
2409 ///
2410 /// Formerly proposed for programmatic determination of grapheme
2411 /// cluster boundaries.
2412}
2413
2414make_binary_property! {
2415 name: "Hex_Digit";
2416 short_name: "Hex";
2417 ident: HexDigit;
2418 data_marker: crate::provider::PropertyBinaryHexDigitV1;
2419 singleton: SINGLETON_PROPERTY_BINARY_HEX_DIGIT_V1;
2420 /// Characters commonly used for the representation of hexadecimal numbers, plus their
2421 /// compatibility equivalents.
2422 ///
2423 /// # Example
2424 ///
2425 /// ```
2426 /// use icu::properties::CodePointSetData;
2427 /// use icu::properties::props::HexDigit;
2428 ///
2429 /// let hex_digit = CodePointSetData::new::<HexDigit>();
2430 ///
2431 /// assert!(hex_digit.contains('0'));
2432 /// assert!(!hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
2433 /// assert!(hex_digit.contains('f'));
2434 /// assert!(hex_digit.contains('f')); // U+FF46 FULLWIDTH LATIN SMALL LETTER F
2435 /// assert!(hex_digit.contains('F')); // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
2436 /// assert!(!hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
2437 /// ```
2438}
2439
2440make_binary_property! {
2441 name: "Hyphen";
2442 short_name: "Hyphen";
2443 ident: Hyphen;
2444 data_marker: crate::provider::PropertyBinaryHyphenV1;
2445 singleton: SINGLETON_PROPERTY_BINARY_HYPHEN_V1;
2446 /// Deprecated property.
2447 ///
2448 /// Dashes which are used to mark connections between pieces of
2449 /// words, plus the Katakana middle dot.
2450}
2451
2452make_binary_property! {
2453 name: "ID_Compat_Math_Continue";
2454 short_name: "ID_Compat_Math_Continue";
2455 ident: IdCompatMathContinue;
2456 data_marker: crate::provider::PropertyBinaryIdCompatMathContinueV1;
2457 singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_CONTINUE_V1;
2458 /// ID_Compat_Math_Continue Property
2459}
2460
2461make_binary_property! {
2462 name: "ID_Compat_Math_Start";
2463 short_name: "ID_Compat_Math_Start";
2464 ident: IdCompatMathStart;
2465 data_marker: crate::provider::PropertyBinaryIdCompatMathStartV1;
2466 singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_START_V1;
2467 /// ID_Compat_Math_Start Property
2468}
2469
2470make_binary_property! {
2471 name: "Id_Continue";
2472 short_name: "IDC";
2473 ident: IdContinue;
2474 data_marker: crate::provider::PropertyBinaryIdContinueV1;
2475 singleton: SINGLETON_PROPERTY_BINARY_ID_CONTINUE_V1;
2476 /// Characters that can come after the first character in an identifier.
2477 ///
2478 /// If using NFKC to
2479 /// fold differences between characters, use [`XidContinue`] instead. See
2480 /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2481 /// more details.
2482 ///
2483 /// # Example
2484 ///
2485 /// ```
2486 /// use icu::properties::CodePointSetData;
2487 /// use icu::properties::props::IdContinue;
2488 ///
2489 /// let id_continue = CodePointSetData::new::<IdContinue>();
2490 ///
2491 /// assert!(id_continue.contains('x'));
2492 /// assert!(id_continue.contains('1'));
2493 /// assert!(id_continue.contains('_'));
2494 /// assert!(id_continue.contains('ߝ')); // U+07DD NKO LETTER FA
2495 /// assert!(!id_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
2496 /// assert!(id_continue.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2497 /// ```
2498}
2499
2500make_binary_property! {
2501 name: "Ideographic";
2502 short_name: "Ideo";
2503 ident: Ideographic;
2504 data_marker: crate::provider::PropertyBinaryIdeographicV1;
2505 singleton: SINGLETON_PROPERTY_BINARY_IDEOGRAPHIC_V1;
2506 /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
2507 /// ideographs, or related siniform ideographs
2508 ///
2509 /// # Example
2510 ///
2511 /// ```
2512 /// use icu::properties::CodePointSetData;
2513 /// use icu::properties::props::Ideographic;
2514 ///
2515 /// let ideographic = CodePointSetData::new::<Ideographic>();
2516 ///
2517 /// assert!(ideographic.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2518 /// assert!(!ideographic.contains('밥')); // U+BC25 HANGUL SYLLABLE BAB
2519 /// ```
2520}
2521
2522make_binary_property! {
2523 name: "Id_Start";
2524 short_name: "IDS";
2525 ident: IdStart;
2526 data_marker: crate::provider::PropertyBinaryIdStartV1;
2527 singleton: SINGLETON_PROPERTY_BINARY_ID_START_V1;
2528 /// Characters that can begin an identifier.
2529 ///
2530 /// If using NFKC to fold differences between
2531 /// characters, use [`XidStart`] instead. See [`Unicode Standard Annex
2532 /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2533 ///
2534 /// # Example
2535 ///
2536 /// ```
2537 /// use icu::properties::CodePointSetData;
2538 /// use icu::properties::props::IdStart;
2539 ///
2540 /// let id_start = CodePointSetData::new::<IdStart>();
2541 ///
2542 /// assert!(id_start.contains('x'));
2543 /// assert!(!id_start.contains('1'));
2544 /// assert!(!id_start.contains('_'));
2545 /// assert!(id_start.contains('ߝ')); // U+07DD NKO LETTER FA
2546 /// assert!(!id_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
2547 /// assert!(id_start.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2548 /// ```
2549}
2550
2551make_binary_property! {
2552 name: "Ids_Binary_Operator";
2553 short_name: "IDSB";
2554 ident: IdsBinaryOperator;
2555 data_marker: crate::provider::PropertyBinaryIdsBinaryOperatorV1;
2556 singleton: SINGLETON_PROPERTY_BINARY_IDS_BINARY_OPERATOR_V1;
2557 /// Characters used in Ideographic Description Sequences.
2558 ///
2559 /// # Example
2560 ///
2561 /// ```
2562 /// use icu::properties::CodePointSetData;
2563 /// use icu::properties::props::IdsBinaryOperator;
2564 ///
2565 /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>();
2566 ///
2567 /// assert!(ids_binary_operator.contains('\u{2FF5}')); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2568 /// assert!(!ids_binary_operator.contains('\u{3006}')); // IDEOGRAPHIC CLOSING MARK
2569 /// ```
2570}
2571
2572make_binary_property! {
2573 name: "Ids_Trinary_Operator";
2574 short_name: "IDST";
2575 ident: IdsTrinaryOperator;
2576 data_marker: crate::provider::PropertyBinaryIdsTrinaryOperatorV1;
2577 singleton: SINGLETON_PROPERTY_BINARY_IDS_TRINARY_OPERATOR_V1;
2578 /// Characters used in Ideographic Description Sequences.
2579 ///
2580 /// # Example
2581 ///
2582 /// ```
2583 /// use icu::properties::CodePointSetData;
2584 /// use icu::properties::props::IdsTrinaryOperator;
2585 ///
2586 /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>();
2587 ///
2588 /// assert!(ids_trinary_operator.contains('\u{2FF2}')); // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
2589 /// assert!(ids_trinary_operator.contains('\u{2FF3}')); // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
2590 /// assert!(!ids_trinary_operator.contains('\u{2FF4}'));
2591 /// assert!(!ids_trinary_operator.contains('\u{2FF5}')); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2592 /// assert!(!ids_trinary_operator.contains('\u{3006}')); // IDEOGRAPHIC CLOSING MARK
2593 /// ```
2594}
2595
2596make_binary_property! {
2597 name: "IDS_Unary_Operator";
2598 short_name: "IDSU";
2599 ident: IdsUnaryOperator;
2600 data_marker: crate::provider::PropertyBinaryIdsUnaryOperatorV1;
2601 singleton: SINGLETON_PROPERTY_BINARY_IDS_UNARY_OPERATOR_V1;
2602 /// IDS_Unary_Operator Property
2603}
2604
2605make_binary_property! {
2606 name: "Join_Control";
2607 short_name: "Join_C";
2608 ident: JoinControl;
2609 data_marker: crate::provider::PropertyBinaryJoinControlV1;
2610 singleton: SINGLETON_PROPERTY_BINARY_JOIN_CONTROL_V1;
2611 /// Format control characters which have specific functions for control of cursive joining
2612 /// and ligation.
2613 ///
2614 /// # Example
2615 ///
2616 /// ```
2617 /// use icu::properties::CodePointSetData;
2618 /// use icu::properties::props::JoinControl;
2619 ///
2620 /// let join_control = CodePointSetData::new::<JoinControl>();
2621 ///
2622 /// assert!(join_control.contains('\u{200C}')); // ZERO WIDTH NON-JOINER
2623 /// assert!(join_control.contains('\u{200D}')); // ZERO WIDTH JOINER
2624 /// assert!(!join_control.contains('\u{200E}'));
2625 /// ```
2626}
2627
2628make_binary_property! {
2629 name: "Logical_Order_Exception";
2630 short_name: "LOE";
2631 ident: LogicalOrderException;
2632 data_marker: crate::provider::PropertyBinaryLogicalOrderExceptionV1;
2633 singleton: SINGLETON_PROPERTY_BINARY_LOGICAL_ORDER_EXCEPTION_V1;
2634 /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao.
2635 ///
2636 /// # Example
2637 ///
2638 /// ```
2639 /// use icu::properties::CodePointSetData;
2640 /// use icu::properties::props::LogicalOrderException;
2641 ///
2642 /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>();
2643 ///
2644 /// assert!(logical_order_exception.contains('ແ')); // U+0EC1 LAO VOWEL SIGN EI
2645 /// assert!(!logical_order_exception.contains('ະ')); // U+0EB0 LAO VOWEL SIGN A
2646 /// ```
2647}
2648
2649make_binary_property! {
2650 name: "Lowercase";
2651 short_name: "Lower";
2652 ident: Lowercase;
2653 data_marker: crate::provider::PropertyBinaryLowercaseV1;
2654 singleton: SINGLETON_PROPERTY_BINARY_LOWERCASE_V1;
2655 /// Lowercase characters.
2656 ///
2657 /// # Example
2658 ///
2659 /// ```
2660 /// use icu::properties::CodePointSetData;
2661 /// use icu::properties::props::Lowercase;
2662 ///
2663 /// let lowercase = CodePointSetData::new::<Lowercase>();
2664 ///
2665 /// assert!(lowercase.contains('a'));
2666 /// assert!(!lowercase.contains('A'));
2667 /// ```
2668}
2669
2670make_binary_property! {
2671 name: "Math";
2672 short_name: "Math";
2673 ident: Math;
2674 data_marker: crate::provider::PropertyBinaryMathV1;
2675 singleton: SINGLETON_PROPERTY_BINARY_MATH_V1;
2676 /// Characters used in mathematical notation.
2677 ///
2678 /// # Example
2679 ///
2680 /// ```
2681 /// use icu::properties::CodePointSetData;
2682 /// use icu::properties::props::Math;
2683 ///
2684 /// let math = CodePointSetData::new::<Math>();
2685 ///
2686 /// assert!(math.contains('='));
2687 /// assert!(math.contains('+'));
2688 /// assert!(!math.contains('-'));
2689 /// assert!(math.contains('−')); // U+2212 MINUS SIGN
2690 /// assert!(!math.contains('/'));
2691 /// assert!(math.contains('∕')); // U+2215 DIVISION SLASH
2692 /// ```
2693}
2694
2695make_binary_property! {
2696 name: "Modifier_Combining_Mark";
2697 short_name: "MCM";
2698 ident: ModifierCombiningMark;
2699 data_marker: crate::provider::PropertyBinaryModifierCombiningMarkV1;
2700 singleton: SINGLETON_PROPERTY_BINARY_MODIFIER_COMBINING_MARK_V1;
2701 /// Modifier_Combining_Mark Property
2702}
2703
2704make_binary_property! {
2705 name: "Noncharacter_Code_Point";
2706 short_name: "NChar";
2707 ident: NoncharacterCodePoint;
2708 data_marker: crate::provider::PropertyBinaryNoncharacterCodePointV1;
2709 singleton: SINGLETON_PROPERTY_BINARY_NONCHARACTER_CODE_POINT_V1;
2710 /// Code points permanently reserved for internal use.
2711 ///
2712 /// # Example
2713 ///
2714 /// ```
2715 /// use icu::properties::CodePointSetData;
2716 /// use icu::properties::props::NoncharacterCodePoint;
2717 ///
2718 /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>();
2719 ///
2720 /// assert!(noncharacter_code_point.contains('\u{FDD0}'));
2721 /// assert!(noncharacter_code_point.contains('\u{FFFF}'));
2722 /// assert!(!noncharacter_code_point.contains('\u{10000}'));
2723 /// ```
2724}
2725
2726make_binary_property! {
2727 name: "NFC_Inert";
2728 short_name: "NFC_Inert";
2729 ident: NfcInert;
2730 data_marker: crate::provider::PropertyBinaryNfcInertV1;
2731 singleton: SINGLETON_PROPERTY_BINARY_NFC_INERT_V1;
2732 /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters.
2733}
2734
2735make_binary_property! {
2736 name: "NFD_Inert";
2737 short_name: "NFD_Inert";
2738 ident: NfdInert;
2739 data_marker: crate::provider::PropertyBinaryNfdInertV1;
2740 singleton: SINGLETON_PROPERTY_BINARY_NFD_INERT_V1;
2741 /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters.
2742}
2743
2744make_binary_property! {
2745 name: "NFKC_Inert";
2746 short_name: "NFKC_Inert";
2747 ident: NfkcInert;
2748 data_marker: crate::provider::PropertyBinaryNfkcInertV1;
2749 singleton: SINGLETON_PROPERTY_BINARY_NFKC_INERT_V1;
2750 /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters.
2751}
2752
2753make_binary_property! {
2754 name: "NFKD_Inert";
2755 short_name: "NFKD_Inert";
2756 ident: NfkdInert;
2757 data_marker: crate::provider::PropertyBinaryNfkdInertV1;
2758 singleton: SINGLETON_PROPERTY_BINARY_NFKD_INERT_V1;
2759 /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters.
2760}
2761
2762make_binary_property! {
2763 name: "Pattern_Syntax";
2764 short_name: "Pat_Syn";
2765 ident: PatternSyntax;
2766 data_marker: crate::provider::PropertyBinaryPatternSyntaxV1;
2767 singleton: SINGLETON_PROPERTY_BINARY_PATTERN_SYNTAX_V1;
2768 /// Characters used as syntax in patterns (such as regular expressions).
2769 ///
2770 /// See [`Unicode
2771 /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2772 /// details.
2773 ///
2774 /// # Example
2775 ///
2776 /// ```
2777 /// use icu::properties::CodePointSetData;
2778 /// use icu::properties::props::PatternSyntax;
2779 ///
2780 /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>();
2781 ///
2782 /// assert!(pattern_syntax.contains('{'));
2783 /// assert!(pattern_syntax.contains('⇒')); // U+21D2 RIGHTWARDS DOUBLE ARROW
2784 /// assert!(!pattern_syntax.contains('0'));
2785 /// ```
2786}
2787
2788make_binary_property! {
2789 name: "Pattern_White_Space";
2790 short_name: "Pat_WS";
2791 ident: PatternWhiteSpace;
2792 data_marker: crate::provider::PropertyBinaryPatternWhiteSpaceV1;
2793 singleton: SINGLETON_PROPERTY_BINARY_PATTERN_WHITE_SPACE_V1;
2794 /// Characters used as whitespace in patterns (such as regular expressions).
2795 ///
2796 /// See
2797 /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2798 /// more details.
2799 ///
2800 /// # Example
2801 ///
2802 /// ```
2803 /// use icu::properties::CodePointSetData;
2804 /// use icu::properties::props::PatternWhiteSpace;
2805 ///
2806 /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>();
2807 ///
2808 /// assert!(pattern_white_space.contains(' '));
2809 /// assert!(pattern_white_space.contains('\u{2029}')); // PARAGRAPH SEPARATOR
2810 /// assert!(pattern_white_space.contains('\u{000A}')); // NEW LINE
2811 /// assert!(!pattern_white_space.contains('\u{00A0}')); // NO-BREAK SPACE
2812 /// ```
2813}
2814
2815make_binary_property! {
2816 name: "Prepended_Concatenation_Mark";
2817 short_name: "PCM";
2818 ident: PrependedConcatenationMark;
2819 data_marker: crate::provider::PropertyBinaryPrependedConcatenationMarkV1;
2820 singleton: SINGLETON_PROPERTY_BINARY_PREPENDED_CONCATENATION_MARK_V1;
2821 /// A small class of visible format controls, which precede and then span a sequence of
2822 /// other characters, usually digits.
2823}
2824
2825make_binary_property! {
2826 name: "Print";
2827 short_name: "Print";
2828 ident: Print;
2829 data_marker: crate::provider::PropertyBinaryPrintV1;
2830 singleton: SINGLETON_PROPERTY_BINARY_PRINT_V1;
2831 /// Printable characters (visible characters and whitespace).
2832 ///
2833 /// This is defined for POSIX compatibility.
2834}
2835
2836make_binary_property! {
2837 name: "Quotation_Mark";
2838 short_name: "QMark";
2839 ident: QuotationMark;
2840 data_marker: crate::provider::PropertyBinaryQuotationMarkV1;
2841 singleton: SINGLETON_PROPERTY_BINARY_QUOTATION_MARK_V1;
2842 /// Punctuation characters that function as quotation marks.
2843 ///
2844 /// # Example
2845 ///
2846 /// ```
2847 /// use icu::properties::CodePointSetData;
2848 /// use icu::properties::props::QuotationMark;
2849 ///
2850 /// let quotation_mark = CodePointSetData::new::<QuotationMark>();
2851 ///
2852 /// assert!(quotation_mark.contains('\''));
2853 /// assert!(quotation_mark.contains('„')); // U+201E DOUBLE LOW-9 QUOTATION MARK
2854 /// assert!(!quotation_mark.contains('<'));
2855 /// ```
2856}
2857
2858make_binary_property! {
2859 name: "Radical";
2860 short_name: "Radical";
2861 ident: Radical;
2862 data_marker: crate::provider::PropertyBinaryRadicalV1;
2863 singleton: SINGLETON_PROPERTY_BINARY_RADICAL_V1;
2864 /// Characters used in the definition of Ideographic Description Sequences.
2865 ///
2866 /// # Example
2867 ///
2868 /// ```
2869 /// use icu::properties::CodePointSetData;
2870 /// use icu::properties::props::Radical;
2871 ///
2872 /// let radical = CodePointSetData::new::<Radical>();
2873 ///
2874 /// assert!(radical.contains('⺆')); // U+2E86 CJK RADICAL BOX
2875 /// assert!(!radical.contains('丹')); // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
2876 /// ```
2877}
2878
2879make_binary_property! {
2880 name: "Regional_Indicator";
2881 short_name: "RI";
2882 ident: RegionalIndicator;
2883 data_marker: crate::provider::PropertyBinaryRegionalIndicatorV1;
2884 singleton: SINGLETON_PROPERTY_BINARY_REGIONAL_INDICATOR_V1;
2885 /// Regional indicator characters, `U+1F1E6..U+1F1FF`.
2886 ///
2887 /// # Example
2888 ///
2889 /// ```
2890 /// use icu::properties::CodePointSetData;
2891 /// use icu::properties::props::RegionalIndicator;
2892 ///
2893 /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>();
2894 ///
2895 /// assert!(regional_indicator.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2896 /// assert!(!regional_indicator.contains('Ⓣ')); // U+24C9 CIRCLED LATIN CAPITAL LETTER T
2897 /// assert!(!regional_indicator.contains('T'));
2898 /// ```
2899}
2900
2901make_binary_property! {
2902 name: "Soft_Dotted";
2903 short_name: "SD";
2904 ident: SoftDotted;
2905 data_marker: crate::provider::PropertyBinarySoftDottedV1;
2906 singleton: SINGLETON_PROPERTY_BINARY_SOFT_DOTTED_V1;
2907 /// Characters with a "soft dot", like i or j.
2908 ///
2909 /// An accent placed on these characters causes
2910 /// the dot to disappear.
2911 ///
2912 /// # Example
2913 ///
2914 /// ```
2915 /// use icu::properties::CodePointSetData;
2916 /// use icu::properties::props::SoftDotted;
2917 ///
2918 /// let soft_dotted = CodePointSetData::new::<SoftDotted>();
2919 ///
2920 /// assert!(soft_dotted.contains('і')); //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
2921 /// assert!(!soft_dotted.contains('ı')); // U+0131 LATIN SMALL LETTER DOTLESS I
2922 /// ```
2923}
2924
2925make_binary_property! {
2926 name: "Segment_Starter";
2927 short_name: "Segment_Starter";
2928 ident: SegmentStarter;
2929 data_marker: crate::provider::PropertyBinarySegmentStarterV1;
2930 singleton: SINGLETON_PROPERTY_BINARY_SEGMENT_STARTER_V1;
2931 /// Characters that are starters in terms of Unicode normalization and combining character
2932 /// sequences.
2933}
2934
2935make_binary_property! {
2936 name: "Case_Sensitive";
2937 short_name: "Case_Sensitive";
2938 ident: CaseSensitive;
2939 data_marker: crate::provider::PropertyBinaryCaseSensitiveV1;
2940 singleton: SINGLETON_PROPERTY_BINARY_CASE_SENSITIVE_V1;
2941 /// Characters that are either the source of a case mapping or in the target of a case
2942 /// mapping.
2943}
2944
2945make_binary_property! {
2946 name: "Sentence_Terminal";
2947 short_name: "STerm";
2948 ident: SentenceTerminal;
2949 data_marker: crate::provider::PropertyBinarySentenceTerminalV1;
2950 singleton: SINGLETON_PROPERTY_BINARY_SENTENCE_TERMINAL_V1;
2951 /// Punctuation characters that generally mark the end of sentences.
2952 ///
2953 /// # Example
2954 ///
2955 /// ```
2956 /// use icu::properties::CodePointSetData;
2957 /// use icu::properties::props::SentenceTerminal;
2958 ///
2959 /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>();
2960 ///
2961 /// assert!(sentence_terminal.contains('.'));
2962 /// assert!(sentence_terminal.contains('?'));
2963 /// assert!(sentence_terminal.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN
2964 /// assert!(!sentence_terminal.contains(','));
2965 /// assert!(!sentence_terminal.contains('¿')); // U+00BF INVERTED QUESTION MARK
2966 /// ```
2967}
2968
2969make_binary_property! {
2970 name: "Terminal_Punctuation";
2971 short_name: "Term";
2972 ident: TerminalPunctuation;
2973 data_marker: crate::provider::PropertyBinaryTerminalPunctuationV1;
2974 singleton: SINGLETON_PROPERTY_BINARY_TERMINAL_PUNCTUATION_V1;
2975 /// Punctuation characters that generally mark the end of textual units.
2976 ///
2977 /// # Example
2978 ///
2979 /// ```
2980 /// use icu::properties::CodePointSetData;
2981 /// use icu::properties::props::TerminalPunctuation;
2982 ///
2983 /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>();
2984 ///
2985 /// assert!(terminal_punctuation.contains('.'));
2986 /// assert!(terminal_punctuation.contains('?'));
2987 /// assert!(terminal_punctuation.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN
2988 /// assert!(terminal_punctuation.contains(','));
2989 /// assert!(!terminal_punctuation.contains('¿')); // U+00BF INVERTED QUESTION MARK
2990 /// ```
2991}
2992
2993make_binary_property! {
2994 name: "Unified_Ideograph";
2995 short_name: "UIdeo";
2996 ident: UnifiedIdeograph;
2997 data_marker: crate::provider::PropertyBinaryUnifiedIdeographV1;
2998 singleton: SINGLETON_PROPERTY_BINARY_UNIFIED_IDEOGRAPH_V1;
2999 /// A property which specifies the exact set of Unified CJK Ideographs in the standard.
3000 ///
3001 /// # Example
3002 ///
3003 /// ```
3004 /// use icu::properties::CodePointSetData;
3005 /// use icu::properties::props::UnifiedIdeograph;
3006 ///
3007 /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>();
3008 ///
3009 /// assert!(unified_ideograph.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
3010 /// assert!(unified_ideograph.contains('木')); // U+6728 CJK UNIFIED IDEOGRAPH-6728
3011 /// assert!(!unified_ideograph.contains('𛅸')); // U+1B178 NUSHU CHARACTER-1B178
3012 /// ```
3013}
3014
3015make_binary_property! {
3016 name: "Uppercase";
3017 short_name: "Upper";
3018 ident: Uppercase;
3019 data_marker: crate::provider::PropertyBinaryUppercaseV1;
3020 singleton: SINGLETON_PROPERTY_BINARY_UPPERCASE_V1;
3021 /// Uppercase characters.
3022 ///
3023 /// # Example
3024 ///
3025 /// ```
3026 /// use icu::properties::CodePointSetData;
3027 /// use icu::properties::props::Uppercase;
3028 ///
3029 /// let uppercase = CodePointSetData::new::<Uppercase>();
3030 ///
3031 /// assert!(uppercase.contains('U'));
3032 /// assert!(!uppercase.contains('u'));
3033 /// ```
3034}
3035
3036make_binary_property! {
3037 name: "Variation_Selector";
3038 short_name: "VS";
3039 ident: VariationSelector;
3040 data_marker: crate::provider::PropertyBinaryVariationSelectorV1;
3041 singleton: SINGLETON_PROPERTY_BINARY_VARIATION_SELECTOR_V1;
3042 /// Characters that are Variation Selectors.
3043 ///
3044 /// # Example
3045 ///
3046 /// ```
3047 /// use icu::properties::CodePointSetData;
3048 /// use icu::properties::props::VariationSelector;
3049 ///
3050 /// let variation_selector = CodePointSetData::new::<VariationSelector>();
3051 ///
3052 /// assert!(variation_selector.contains('\u{180D}')); // MONGOLIAN FREE VARIATION SELECTOR THREE
3053 /// assert!(!variation_selector.contains('\u{303E}')); // IDEOGRAPHIC VARIATION INDICATOR
3054 /// assert!(variation_selector.contains('\u{FE0F}')); // VARIATION SELECTOR-16
3055 /// assert!(!variation_selector.contains('\u{FE10}')); // PRESENTATION FORM FOR VERTICAL COMMA
3056 /// assert!(variation_selector.contains('\u{E01EF}')); // VARIATION SELECTOR-256
3057 /// ```
3058}
3059
3060make_binary_property! {
3061 name: "White_Space";
3062 short_name: "space";
3063 ident: WhiteSpace;
3064 data_marker: crate::provider::PropertyBinaryWhiteSpaceV1;
3065 singleton: SINGLETON_PROPERTY_BINARY_WHITE_SPACE_V1;
3066 /// Spaces, separator characters and other control characters which should be treated by
3067 /// programming languages as "white space" for the purpose of parsing elements.
3068 ///
3069 /// # Example
3070 ///
3071 /// ```
3072 /// use icu::properties::CodePointSetData;
3073 /// use icu::properties::props::WhiteSpace;
3074 ///
3075 /// let white_space = CodePointSetData::new::<WhiteSpace>();
3076 ///
3077 /// assert!(white_space.contains(' '));
3078 /// assert!(white_space.contains('\u{000A}')); // NEW LINE
3079 /// assert!(white_space.contains('\u{00A0}')); // NO-BREAK SPACE
3080 /// assert!(!white_space.contains('\u{200B}')); // ZERO WIDTH SPACE
3081 /// ```
3082}
3083
3084make_binary_property! {
3085 name: "Xdigit";
3086 short_name: "Xdigit";
3087 ident: Xdigit;
3088 data_marker: crate::provider::PropertyBinaryXdigitV1;
3089 singleton: SINGLETON_PROPERTY_BINARY_XDIGIT_V1;
3090 /// Hexadecimal digits
3091 ///
3092 /// This is defined for POSIX compatibility.
3093}
3094
3095make_binary_property! {
3096 name: "XID_Continue";
3097 short_name: "XIDC";
3098 ident: XidContinue;
3099 data_marker: crate::provider::PropertyBinaryXidContinueV1;
3100 singleton: SINGLETON_PROPERTY_BINARY_XID_CONTINUE_V1;
3101 /// Characters that can come after the first character in an identifier.
3102 ///
3103 /// See [`Unicode Standard Annex
3104 /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
3105 ///
3106 /// # Example
3107 ///
3108 /// ```
3109 /// use icu::properties::CodePointSetData;
3110 /// use icu::properties::props::XidContinue;
3111 ///
3112 /// let xid_continue = CodePointSetData::new::<XidContinue>();
3113 ///
3114 /// assert!(xid_continue.contains('x'));
3115 /// assert!(xid_continue.contains('1'));
3116 /// assert!(xid_continue.contains('_'));
3117 /// assert!(xid_continue.contains('ߝ')); // U+07DD NKO LETTER FA
3118 /// assert!(!xid_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
3119 /// assert!(!xid_continue.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3120 /// ```
3121}
3122
3123make_binary_property! {
3124 name: "XID_Start";
3125 short_name: "XIDS";
3126 ident: XidStart;
3127 data_marker: crate::provider::PropertyBinaryXidStartV1;
3128 singleton: SINGLETON_PROPERTY_BINARY_XID_START_V1;
3129 /// Characters that can begin an identifier.
3130 ///
3131 /// See [`Unicode
3132 /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
3133 /// details.
3134 ///
3135 /// # Example
3136 ///
3137 /// ```
3138 /// use icu::properties::CodePointSetData;
3139 /// use icu::properties::props::XidStart;
3140 ///
3141 /// let xid_start = CodePointSetData::new::<XidStart>();
3142 ///
3143 /// assert!(xid_start.contains('x'));
3144 /// assert!(!xid_start.contains('1'));
3145 /// assert!(!xid_start.contains('_'));
3146 /// assert!(xid_start.contains('ߝ')); // U+07DD NKO LETTER FA
3147 /// assert!(!xid_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
3148 /// assert!(!xid_start.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3149 /// ```
3150}
3151
3152pub use crate::emoji::EmojiSet;
3153
3154macro_rules! make_emoji_set {
3155 (
3156 ident: $ident:ident;
3157 data_marker: $data_marker:ty;
3158 singleton: $singleton:ident;
3159 $(#[$doc:meta])+
3160 ) => {
3161 $(#[$doc])+
3162 #[derive(Debug)]
3163 #[non_exhaustive]
3164 pub struct $ident;
3165
3166 impl crate::private::Sealed for $ident {}
3167
3168 impl EmojiSet for $ident {
3169 type DataMarker = $data_marker;
3170 #[cfg(feature = "compiled_data")]
3171 const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> =
3172 &crate::provider::Baked::$singleton;
3173 }
3174 }
3175}
3176
3177make_emoji_set! {
3178 ident: BasicEmoji;
3179 data_marker: crate::provider::PropertyBinaryBasicEmojiV1;
3180 singleton: SINGLETON_PROPERTY_BINARY_BASIC_EMOJI_V1;
3181 /// Characters and character sequences intended for general-purpose, independent, direct input.
3182 ///
3183 /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
3184 /// details.
3185 ///
3186 /// # Example
3187 ///
3188 /// ```
3189 /// use icu::properties::EmojiSetData;
3190 /// use icu::properties::props::BasicEmoji;
3191 ///
3192 /// let basic_emoji = EmojiSetData::new::<BasicEmoji>();
3193 ///
3194 /// assert!(!basic_emoji.contains('\u{0020}'));
3195 /// assert!(!basic_emoji.contains('\n'));
3196 /// assert!(basic_emoji.contains('🦃')); // U+1F983 TURKEY
3197 /// assert!(basic_emoji.contains_str("\u{1F983}"));
3198 /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track
3199 /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}")); // Emoji_Keycap_Sequence, keycap 3
3200 /// ```
3201}
3202
3203#[cfg(test)]
3204mod test_enumerated_property_completeness {
3205 use super::*;
3206 use std::collections::BTreeMap;
3207
3208 fn check_enum<'a, T: NamedEnumeratedProperty>(
3209 lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>,
3210 consts: impl IntoIterator<Item = &'a T>,
3211 ) where
3212 u16: From<T>,
3213 {
3214 let mut data: BTreeMap<_, _> = lookup
3215 .map
3216 .iter()
3217 .map(|(name, value)| (value, (name, "Data")))
3218 .collect();
3219
3220 let names = crate::PropertyNamesLong::<T>::new();
3221 let consts = consts.into_iter().map(|value| {
3222 (
3223 u16::from(*value) as usize,
3224 (
3225 names.get(*value).unwrap_or("<unknown>").to_string(),
3226 "Consts",
3227 ),
3228 )
3229 });
3230
3231 let mut diff = Vec::new();
3232 for t @ (value, _) in consts {
3233 if data.remove(&value).is_none() {
3234 diff.push(t);
3235 }
3236 }
3237 diff.extend(data);
3238
3239 let mut fmt_diff = String::new();
3240 for (value, (name, source)) in diff {
3241 fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n"));
3242 }
3243
3244 assert!(
3245 fmt_diff.is_empty(),
3246 "Values defined in data do not match values defined in consts. Difference:\n{fmt_diff}"
3247 );
3248 }
3249
3250 #[test]
3251 fn test_ea() {
3252 check_enum(
3253 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1,
3254 EastAsianWidth::ALL_VALUES,
3255 );
3256 }
3257
3258 #[test]
3259 fn test_ccc() {
3260 check_enum(
3261 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1,
3262 CanonicalCombiningClass::ALL_VALUES,
3263 );
3264 }
3265
3266 #[test]
3267 fn test_jt() {
3268 check_enum(
3269 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1,
3270 JoiningType::ALL_VALUES,
3271 );
3272 }
3273
3274 #[test]
3275 fn test_insc() {
3276 check_enum(
3277 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1,
3278 IndicSyllabicCategory::ALL_VALUES,
3279 );
3280 }
3281
3282 #[test]
3283 fn test_sb() {
3284 check_enum(
3285 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1,
3286 SentenceBreak::ALL_VALUES,
3287 );
3288 }
3289
3290 #[test]
3291 fn test_wb() {
3292 check_enum(
3293 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1,
3294 WordBreak::ALL_VALUES,
3295 );
3296 }
3297
3298 #[test]
3299 fn test_bc() {
3300 check_enum(
3301 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1,
3302 BidiClass::ALL_VALUES,
3303 );
3304 }
3305
3306 #[test]
3307 fn test_hst() {
3308 check_enum(
3309 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1,
3310 HangulSyllableType::ALL_VALUES,
3311 );
3312 }
3313
3314 #[test]
3315 fn test_vo() {
3316 check_enum(
3317 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1,
3318 VerticalOrientation::ALL_VALUES,
3319 );
3320 }
3321}