icu_properties/
props.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! This module defines all available properties.
6//!
7//! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1]
8//! and implement [`EnumeratedProperty`].
9//!
10//! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData),
11//! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData).
12//!
13//! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or
14//! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser),
15//! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort)
16//! can be constructed.
17//!
18//! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums)
19
20pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty};
21
22pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType};
23
24/// See [`test_enumerated_property_completeness`] for usage.
25/// Example input:
26/// ```ignore
27/// impl EastAsianWidth {
28///     pub const Neutral: EastAsianWidth = EastAsianWidth(0);
29///     pub const Ambiguous: EastAsianWidth = EastAsianWidth(1);
30///     ...
31/// }
32/// ```
33/// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by
34/// explicitly casting first field of the struct to u16.
35macro_rules! create_const_array {
36    (
37        $ ( #[$meta:meta] )*
38        impl $enum_ty:ident {
39            $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )*
40        }
41    ) => {
42        $( #[$meta] )*
43        impl $enum_ty {
44            $(
45                $(#[$const_meta])*
46                $v const $i: $t = $e;
47            )*
48
49            /// All possible values of this enum in the Unicode version
50            /// from this ICU4X release.
51            pub const ALL_VALUES: &'static [$enum_ty] = &[
52                $($enum_ty::$i),*
53            ];
54        }
55
56        #[cfg(feature = "datagen")]
57        impl databake::Bake for $enum_ty {
58            fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
59                env.insert("icu_properties");
60                match *self {
61                    $(
62                        Self::$i => databake::quote!(icu_properties::props::$enum_ty::$i),
63                    )*
64                    Self(v) => databake::quote!(icu_properties::props::$enum_ty::from_icu4c_value(#v)),
65                }
66            }
67        }
68
69
70        impl From<$enum_ty> for u16  {
71            fn from(other: $enum_ty) -> Self {
72                other.0 as u16
73            }
74        }
75    }
76}
77
78pub use crate::code_point_map::EnumeratedProperty;
79
80macro_rules! make_enumerated_property {
81    (
82        name: $name:literal;
83        short_name: $short_name:literal;
84        ident: $value_ty:path;
85        data_marker: $data_marker:ty;
86        singleton: $singleton:ident;
87        $(ule_ty: $ule_ty:ty;)?
88    ) => {
89        impl crate::private::Sealed for $value_ty {}
90
91        impl EnumeratedProperty for $value_ty {
92            type DataMarker = $data_marker;
93            #[cfg(feature = "compiled_data")]
94            const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> =
95                crate::provider::Baked::$singleton;
96            const NAME: &'static [u8] = $name.as_bytes();
97            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
98        }
99
100        $(
101            impl zerovec::ule::AsULE for $value_ty {
102                type ULE = $ule_ty;
103
104                fn to_unaligned(self) -> Self::ULE {
105                    self.0.to_unaligned()
106                }
107                fn from_unaligned(unaligned: Self::ULE) -> Self {
108                    Self(zerovec::ule::AsULE::from_unaligned(unaligned))
109                }
110            }
111        )?
112    };
113}
114
115/// Enumerated property Bidi_Class
116///
117/// These are the categories required by the Unicode Bidirectional Algorithm.
118/// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values).
119/// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9).
120///
121/// # Example
122///
123/// ```
124/// use icu::properties::{props::BidiClass, CodePointMapData};
125///
126/// assert_eq!(
127///     CodePointMapData::<BidiClass>::new().get('y'),
128///     BidiClass::LeftToRight
129/// ); // U+0079
130/// assert_eq!(
131///     CodePointMapData::<BidiClass>::new().get('ع'),
132///     BidiClass::ArabicLetter
133/// ); // U+0639
134/// ```
135#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
136#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
137#[allow(clippy::exhaustive_structs)] // newtype
138#[repr(transparent)]
139pub struct BidiClass(pub(crate) u8);
140
141impl BidiClass {
142    /// Returns an ICU4C `UBidiClass` value.
143    pub const fn to_icu4c_value(self) -> u8 {
144        self.0
145    }
146    /// Constructor from an ICU4C `UBidiClass` value.
147    pub const fn from_icu4c_value(value: u8) -> Self {
148        Self(value)
149    }
150}
151
152create_const_array! {
153#[allow(non_upper_case_globals)]
154impl BidiClass {
155    /// (`L`) any strong left-to-right character
156    pub const LeftToRight: BidiClass = BidiClass(0);
157    /// (`R`) any strong right-to-left (non-Arabic-type) character
158    pub const RightToLeft: BidiClass = BidiClass(1);
159    /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit
160    pub const EuropeanNumber: BidiClass = BidiClass(2);
161    /// (`ES`) plus and minus signs
162    pub const EuropeanSeparator: BidiClass = BidiClass(3);
163    /// (`ET`) a terminator in a numeric format context, includes currency signs
164    pub const EuropeanTerminator: BidiClass = BidiClass(4);
165    /// (`AN`) any Arabic-Indic digit
166    pub const ArabicNumber: BidiClass = BidiClass(5);
167    /// (`CS`) commas, colons, and slashes
168    pub const CommonSeparator: BidiClass = BidiClass(6);
169    /// (`B`) various newline characters
170    pub const ParagraphSeparator: BidiClass = BidiClass(7);
171    /// (`S`) various segment-related control codes
172    pub const SegmentSeparator: BidiClass = BidiClass(8);
173    /// (`WS`) spaces
174    pub const WhiteSpace: BidiClass = BidiClass(9);
175    /// (`ON`) most other symbols and punctuation marks
176    pub const OtherNeutral: BidiClass = BidiClass(10);
177    /// (`LRE`) U+202A: the LR embedding control
178    pub const LeftToRightEmbedding: BidiClass = BidiClass(11);
179    /// (`LRO`) U+202D: the LR override control
180    pub const LeftToRightOverride: BidiClass = BidiClass(12);
181    /// (`AL`) any strong right-to-left (Arabic-type) character
182    pub const ArabicLetter: BidiClass = BidiClass(13);
183    /// (`RLE`) U+202B: the RL embedding control
184    pub const RightToLeftEmbedding: BidiClass = BidiClass(14);
185    /// (`RLO`) U+202E: the RL override control
186    pub const RightToLeftOverride: BidiClass = BidiClass(15);
187    /// (`PDF`) U+202C: terminates an embedding or override control
188    pub const PopDirectionalFormat: BidiClass = BidiClass(16);
189    /// (`NSM`) any nonspacing mark
190    pub const NonspacingMark: BidiClass = BidiClass(17);
191    /// (`BN`) most format characters, control codes, or noncharacters
192    pub const BoundaryNeutral: BidiClass = BidiClass(18);
193    /// (`FSI`) U+2068: the first strong isolate control
194    pub const FirstStrongIsolate: BidiClass = BidiClass(19);
195    /// (`LRI`) U+2066: the LR isolate control
196    pub const LeftToRightIsolate: BidiClass = BidiClass(20);
197    /// (`RLI`) U+2067: the RL isolate control
198    pub const RightToLeftIsolate: BidiClass = BidiClass(21);
199    /// (`PDI`) U+2069: terminates an isolate control
200    pub const PopDirectionalIsolate: BidiClass = BidiClass(22);
201}
202}
203
204make_enumerated_property! {
205    name: "Bidi_Class";
206    short_name: "bc";
207    ident: BidiClass;
208    data_marker: crate::provider::PropertyEnumBidiClassV1;
209    singleton: SINGLETON_PROPERTY_ENUM_BIDI_CLASS_V1;
210    ule_ty: u8;
211}
212
213// This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props
214pub(crate) mod gc {
215    /// Enumerated property General_Category.
216    ///
217    /// General_Category specifies the most general classification of a code point, usually
218    /// determined based on the primary characteristic of the assigned character. For example, is the
219    /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type?
220    ///
221    /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`).
222    /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`](
223    /// crate::props::GeneralCategoryGroup).
224    ///
225    /// # Example
226    ///
227    /// ```
228    /// use icu::properties::{props::GeneralCategory, CodePointMapData};
229    ///
230    /// assert_eq!(
231    ///     CodePointMapData::<GeneralCategory>::new().get('木'),
232    ///     GeneralCategory::OtherLetter
233    /// ); // U+6728
234    /// assert_eq!(
235    ///     CodePointMapData::<GeneralCategory>::new().get('🎃'),
236    ///     GeneralCategory::OtherSymbol
237    /// ); // U+1F383 JACK-O-LANTERN
238    /// ```
239    #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
240    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
241    #[cfg_attr(feature = "datagen", derive(databake::Bake))]
242    #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
243    #[allow(clippy::exhaustive_enums)] // this type is stable
244    #[zerovec::make_ule(GeneralCategoryULE)]
245    #[cfg_attr(not(feature = "alloc"), zerovec::skip_derive(ZeroMapKV))]
246    #[repr(u8)]
247    pub enum GeneralCategory {
248        /// (`Cn`) A reserved unassigned code point or a noncharacter
249        Unassigned = 0,
250
251        /// (`Lu`) An uppercase letter
252        UppercaseLetter = 1,
253        /// (`Ll`) A lowercase letter
254        LowercaseLetter = 2,
255        /// (`Lt`) A digraphic letter, with first part uppercase
256        TitlecaseLetter = 3,
257        /// (`Lm`) A modifier letter
258        ModifierLetter = 4,
259        /// (`Lo`) Other letters, including syllables and ideographs
260        OtherLetter = 5,
261
262        /// (`Mn`) A nonspacing combining mark (zero advance width)
263        NonspacingMark = 6,
264        /// (`Mc`) A spacing combining mark (positive advance width)
265        SpacingMark = 8,
266        /// (`Me`) An enclosing combining mark
267        EnclosingMark = 7,
268
269        /// (`Nd`) A decimal digit
270        DecimalNumber = 9,
271        /// (`Nl`) A letterlike numeric character
272        LetterNumber = 10,
273        /// (`No`) A numeric character of other type
274        OtherNumber = 11,
275
276        /// (`Zs`) A space character (of various non-zero widths)
277        SpaceSeparator = 12,
278        /// (`Zl`) U+2028 LINE SEPARATOR only
279        LineSeparator = 13,
280        /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
281        ParagraphSeparator = 14,
282
283        /// (`Cc`) A C0 or C1 control code
284        Control = 15,
285        /// (`Cf`) A format control character
286        Format = 16,
287        /// (`Co`) A private-use character
288        PrivateUse = 17,
289        /// (`Cs`) A surrogate code point
290        Surrogate = 18,
291
292        /// (`Pd`) A dash or hyphen punctuation mark
293        DashPunctuation = 19,
294        /// (`Ps`) An opening punctuation mark (of a pair)
295        OpenPunctuation = 20,
296        /// (`Pe`) A closing punctuation mark (of a pair)
297        ClosePunctuation = 21,
298        /// (`Pc`) A connecting punctuation mark, like a tie
299        ConnectorPunctuation = 22,
300        /// (`Pi`) An initial quotation mark
301        InitialPunctuation = 28,
302        /// (`Pf`) A final quotation mark
303        FinalPunctuation = 29,
304        /// (`Po`) A punctuation mark of other type
305        OtherPunctuation = 23,
306
307        /// (`Sm`) A symbol of mathematical use
308        MathSymbol = 24,
309        /// (`Sc`) A currency sign
310        CurrencySymbol = 25,
311        /// (`Sk`) A non-letterlike modifier symbol
312        ModifierSymbol = 26,
313        /// (`So`) A symbol of other type
314        OtherSymbol = 27,
315    }
316}
317
318pub use gc::GeneralCategory;
319
320impl GeneralCategory {
321    /// All possible values of this enum
322    pub const ALL_VALUES: &'static [GeneralCategory] = &[
323        GeneralCategory::Unassigned,
324        GeneralCategory::UppercaseLetter,
325        GeneralCategory::LowercaseLetter,
326        GeneralCategory::TitlecaseLetter,
327        GeneralCategory::ModifierLetter,
328        GeneralCategory::OtherLetter,
329        GeneralCategory::NonspacingMark,
330        GeneralCategory::SpacingMark,
331        GeneralCategory::EnclosingMark,
332        GeneralCategory::DecimalNumber,
333        GeneralCategory::LetterNumber,
334        GeneralCategory::OtherNumber,
335        GeneralCategory::SpaceSeparator,
336        GeneralCategory::LineSeparator,
337        GeneralCategory::ParagraphSeparator,
338        GeneralCategory::Control,
339        GeneralCategory::Format,
340        GeneralCategory::PrivateUse,
341        GeneralCategory::Surrogate,
342        GeneralCategory::DashPunctuation,
343        GeneralCategory::OpenPunctuation,
344        GeneralCategory::ClosePunctuation,
345        GeneralCategory::ConnectorPunctuation,
346        GeneralCategory::InitialPunctuation,
347        GeneralCategory::FinalPunctuation,
348        GeneralCategory::OtherPunctuation,
349        GeneralCategory::MathSymbol,
350        GeneralCategory::CurrencySymbol,
351        GeneralCategory::ModifierSymbol,
352        GeneralCategory::OtherSymbol,
353    ];
354}
355
356#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
357/// Error value for `impl TryFrom<u8> for GeneralCategory`.
358#[non_exhaustive]
359pub struct GeneralCategoryOutOfBoundsError;
360
361impl TryFrom<u8> for GeneralCategory {
362    type Error = GeneralCategoryOutOfBoundsError;
363    /// Construct this [`GeneralCategory`] from an integer, returning
364    /// an error if it is out of bounds
365    fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> {
366        GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError)
367    }
368}
369
370make_enumerated_property! {
371    name: "General_Category";
372    short_name: "gc";
373    ident: GeneralCategory;
374    data_marker: crate::provider::PropertyEnumGeneralCategoryV1;
375    singleton: SINGLETON_PROPERTY_ENUM_GENERAL_CATEGORY_V1;
376}
377
378/// Groupings of multiple General_Category property values.
379///
380/// Instances of `GeneralCategoryGroup` represent the defined multi-category
381/// values that are useful for users in certain contexts, such as regex. In
382/// other words, unlike [`GeneralCategory`], this supports groups of general
383/// categories: for example, `Letter` /// is the union of `UppercaseLetter`,
384/// `LowercaseLetter`, etc.
385///
386/// See <https://www.unicode.org/reports/tr44/> .
387///
388/// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C.
389/// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter`
390/// is the union of `UppercaseLetter`, `LowercaseLetter`, etc.
391///
392/// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C.
393#[derive(Copy, Clone, PartialEq, Debug, Eq)]
394#[allow(clippy::exhaustive_structs)] // newtype
395#[repr(transparent)]
396pub struct GeneralCategoryGroup(pub(crate) u32);
397
398impl crate::private::Sealed for GeneralCategoryGroup {}
399
400use GeneralCategory as GC;
401use GeneralCategoryGroup as GCG;
402
403#[allow(non_upper_case_globals)]
404impl GeneralCategoryGroup {
405    /// (`Lu`) An uppercase letter
406    pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32));
407    /// (`Ll`) A lowercase letter
408    pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32));
409    /// (`Lt`) A digraphic letter, with first part uppercase
410    pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32));
411    /// (`Lm`) A modifier letter
412    pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32));
413    /// (`Lo`) Other letters, including syllables and ideographs
414    pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32));
415    /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter
416    pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
417        | (1 << (GC::LowercaseLetter as u32))
418        | (1 << (GC::TitlecaseLetter as u32)));
419    /// (`L`) The union of all letter categories
420    pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
421        | (1 << (GC::LowercaseLetter as u32))
422        | (1 << (GC::TitlecaseLetter as u32))
423        | (1 << (GC::ModifierLetter as u32))
424        | (1 << (GC::OtherLetter as u32)));
425
426    /// (`Mn`) A nonspacing combining mark (zero advance width)
427    pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32));
428    /// (`Mc`) A spacing combining mark (positive advance width)
429    pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32));
430    /// (`Me`) An enclosing combining mark
431    pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32));
432    /// (`M`) The union of all mark categories
433    pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32))
434        | (1 << (GC::EnclosingMark as u32))
435        | (1 << (GC::SpacingMark as u32)));
436
437    /// (`Nd`) A decimal digit
438    pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32));
439    /// (`Nl`) A letterlike numeric character
440    pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32));
441    /// (`No`) A numeric character of other type
442    pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32));
443    /// (`N`) The union of all number categories
444    pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32))
445        | (1 << (GC::LetterNumber as u32))
446        | (1 << (GC::OtherNumber as u32)));
447
448    /// (`Zs`) A space character (of various non-zero widths)
449    pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32));
450    /// (`Zl`) U+2028 LINE SEPARATOR only
451    pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32));
452    /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
453    pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32));
454    /// (`Z`) The union of all separator categories
455    pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32))
456        | (1 << (GC::LineSeparator as u32))
457        | (1 << (GC::ParagraphSeparator as u32)));
458
459    /// (`Cc`) A C0 or C1 control code
460    pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32));
461    /// (`Cf`) A format control character
462    pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32));
463    /// (`Co`) A private-use character
464    pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32));
465    /// (`Cs`) A surrogate code point
466    pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32));
467    /// (`Cn`) A reserved unassigned code point or a noncharacter
468    pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32));
469    /// (`C`) The union of all control code, reserved, and unassigned categories
470    pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32))
471        | (1 << (GC::Format as u32))
472        | (1 << (GC::PrivateUse as u32))
473        | (1 << (GC::Surrogate as u32))
474        | (1 << (GC::Unassigned as u32)));
475
476    /// (`Pd`) A dash or hyphen punctuation mark
477    pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32));
478    /// (`Ps`) An opening punctuation mark (of a pair)
479    pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32));
480    /// (`Pe`) A closing punctuation mark (of a pair)
481    pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32));
482    /// (`Pc`) A connecting punctuation mark, like a tie
483    pub const ConnectorPunctuation: GeneralCategoryGroup =
484        GCG(1 << (GC::ConnectorPunctuation as u32));
485    /// (`Pi`) An initial quotation mark
486    pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32));
487    /// (`Pf`) A final quotation mark
488    pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32));
489    /// (`Po`) A punctuation mark of other type
490    pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32));
491    /// (`P`) The union of all punctuation categories
492    pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32))
493        | (1 << (GC::OpenPunctuation as u32))
494        | (1 << (GC::ClosePunctuation as u32))
495        | (1 << (GC::ConnectorPunctuation as u32))
496        | (1 << (GC::OtherPunctuation as u32))
497        | (1 << (GC::InitialPunctuation as u32))
498        | (1 << (GC::FinalPunctuation as u32)));
499
500    /// (`Sm`) A symbol of mathematical use
501    pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32));
502    /// (`Sc`) A currency sign
503    pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32));
504    /// (`Sk`) A non-letterlike modifier symbol
505    pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32));
506    /// (`So`) A symbol of other type
507    pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32));
508    /// (`S`) The union of all symbol categories
509    pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32))
510        | (1 << (GC::CurrencySymbol as u32))
511        | (1 << (GC::ModifierSymbol as u32))
512        | (1 << (GC::OtherSymbol as u32)));
513
514    const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1;
515
516    /// Return whether the code point belongs in the provided multi-value category.
517    ///
518    /// ```
519    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
520    /// use icu::properties::CodePointMapData;
521    ///
522    /// let gc = CodePointMapData::<GeneralCategory>::new();
523    ///
524    /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
525    /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));
526    ///
527    /// // U+0B1E ORIYA LETTER NYA
528    /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
529    /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
530    /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));
531    ///
532    /// // U+0301 COMBINING ACUTE ACCENT
533    /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark);
534    /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}')));
535    /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}')));
536    ///
537    /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
538    /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
539    /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));
540    ///
541    /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
542    /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
543    /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));
544    ///
545    /// // U+2713 CHECK MARK
546    /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
547    /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
548    /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));
549    ///
550    /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
551    /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
552    /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));
553    ///
554    /// // U+E007F CANCEL TAG
555    /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format);
556    /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}')));
557    /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}')));
558    /// ```
559    pub const fn contains(self, val: GeneralCategory) -> bool {
560        0 != (1 << (val as u32)) & self.0
561    }
562
563    /// Produce a GeneralCategoryGroup that is the inverse of this one
564    ///
565    /// # Example
566    ///
567    /// ```rust
568    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
569    ///
570    /// let letter = GeneralCategoryGroup::Letter;
571    /// let not_letter = letter.complement();
572    ///
573    /// assert!(not_letter.contains(GeneralCategory::MathSymbol));
574    /// assert!(!letter.contains(GeneralCategory::MathSymbol));
575    /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
576    /// assert!(!letter.contains(GeneralCategory::OtherPunctuation));
577    /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
578    /// assert!(letter.contains(GeneralCategory::UppercaseLetter));
579    /// ```
580    pub const fn complement(self) -> Self {
581        // Mask off things not in Self::ALL to guarantee the mask
582        // values stay in-range
583        GeneralCategoryGroup(!self.0 & Self::ALL)
584    }
585
586    /// Return the group representing all GeneralCategory values
587    ///
588    /// # Example
589    ///
590    /// ```rust
591    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
592    ///
593    /// let all = GeneralCategoryGroup::all();
594    ///
595    /// assert!(all.contains(GeneralCategory::MathSymbol));
596    /// assert!(all.contains(GeneralCategory::OtherPunctuation));
597    /// assert!(all.contains(GeneralCategory::UppercaseLetter));
598    /// ```
599    pub const fn all() -> Self {
600        Self(Self::ALL)
601    }
602
603    /// Return the empty group
604    ///
605    /// # Example
606    ///
607    /// ```rust
608    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
609    ///
610    /// let empty = GeneralCategoryGroup::empty();
611    ///
612    /// assert!(!empty.contains(GeneralCategory::MathSymbol));
613    /// assert!(!empty.contains(GeneralCategory::OtherPunctuation));
614    /// assert!(!empty.contains(GeneralCategory::UppercaseLetter));
615    /// ```
616    pub const fn empty() -> Self {
617        Self(0)
618    }
619
620    /// Take the union of two groups
621    ///
622    /// # Example
623    ///
624    /// ```rust
625    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
626    ///
627    /// let letter = GeneralCategoryGroup::Letter;
628    /// let symbol = GeneralCategoryGroup::Symbol;
629    /// let union = letter.union(symbol);
630    ///
631    /// assert!(union.contains(GeneralCategory::MathSymbol));
632    /// assert!(!union.contains(GeneralCategory::OtherPunctuation));
633    /// assert!(union.contains(GeneralCategory::UppercaseLetter));
634    /// ```
635    pub const fn union(self, other: Self) -> Self {
636        Self(self.0 | other.0)
637    }
638
639    /// Take the intersection of two groups
640    ///
641    /// # Example
642    ///
643    /// ```rust
644    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
645    ///
646    /// let letter = GeneralCategoryGroup::Letter;
647    /// let lu = GeneralCategoryGroup::UppercaseLetter;
648    /// let intersection = letter.intersection(lu);
649    ///
650    /// assert!(!intersection.contains(GeneralCategory::MathSymbol));
651    /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
652    /// assert!(intersection.contains(GeneralCategory::UppercaseLetter));
653    /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
654    /// ```
655    pub const fn intersection(self, other: Self) -> Self {
656        Self(self.0 & other.0)
657    }
658}
659
660impl From<GeneralCategory> for GeneralCategoryGroup {
661    fn from(subcategory: GeneralCategory) -> Self {
662        GeneralCategoryGroup(1 << (subcategory as u32))
663    }
664}
665impl From<u32> for GeneralCategoryGroup {
666    fn from(mask: u32) -> Self {
667        // Mask off things not in Self::ALL to guarantee the mask
668        // values stay in-range
669        GeneralCategoryGroup(mask & Self::ALL)
670    }
671}
672impl From<GeneralCategoryGroup> for u32 {
673    fn from(group: GeneralCategoryGroup) -> Self {
674        group.0
675    }
676}
677
678/// Enumerated property Script.
679///
680/// This is used with both the Script and Script_Extensions Unicode properties.
681/// Each character is assigned a single Script, but characters that are used in
682/// a particular subset of scripts will be in more than one Script_Extensions set.
683/// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the
684/// Script_Extensions set for Dogra, Kaithi, and Mahajani. If you are trying to
685/// determine whether a code point belongs to a certain script, you should use
686/// [`ScriptWithExtensionsBorrowed::has_script`].
687///
688/// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>.
689/// See `UScriptCode` in ICU4C.
690///
691/// # Example
692///
693/// ```
694/// use icu::properties::{CodePointMapData, props::Script};
695///
696/// assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han);  // U+6728
697/// assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common);  // U+1F383 JACK-O-LANTERN
698/// ```
699/// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script
700#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
701#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
702#[allow(clippy::exhaustive_structs)] // newtype
703#[repr(transparent)]
704pub struct Script(pub(crate) u16);
705
706impl Script {
707    /// Returns an ICU4C `UScriptCode` value.
708    pub const fn to_icu4c_value(self) -> u16 {
709        self.0
710    }
711    /// Constructor from an ICU4C `UScriptCode` value.
712    pub const fn from_icu4c_value(value: u16) -> Self {
713        Self(value)
714    }
715}
716
717create_const_array! {
718#[allow(missing_docs)] // These constants don't need individual documentation.
719#[allow(non_upper_case_globals)]
720impl Script {
721    pub const Adlam: Script = Script(167);
722    pub const Ahom: Script = Script(161);
723    pub const AnatolianHieroglyphs: Script = Script(156);
724    pub const Arabic: Script = Script(2);
725    pub const Armenian: Script = Script(3);
726    pub const Avestan: Script = Script(117);
727    pub const Balinese: Script = Script(62);
728    pub const Bamum: Script = Script(130);
729    pub const BassaVah: Script = Script(134);
730    pub const Batak: Script = Script(63);
731    pub const Bengali: Script = Script(4);
732    pub const BeriaErfe: Script = Script(208);
733    pub const Bhaiksuki: Script = Script(168);
734    pub const Bopomofo: Script = Script(5);
735    pub const Brahmi: Script = Script(65);
736    pub const Braille: Script = Script(46);
737    pub const Buginese: Script = Script(55);
738    pub const Buhid: Script = Script(44);
739    pub const CanadianAboriginal: Script = Script(40);
740    pub const Carian: Script = Script(104);
741    pub const CaucasianAlbanian: Script = Script(159);
742    pub const Chakma: Script = Script(118);
743    pub const Cham: Script = Script(66);
744    pub const Cherokee: Script = Script(6);
745    pub const Chisoi: Script = Script(209);
746    pub const Chorasmian: Script = Script(189);
747    pub const Common: Script = Script(0);
748    pub const Coptic: Script = Script(7);
749    pub const Cuneiform: Script = Script(101);
750    pub const Cypriot: Script = Script(47);
751    pub const CyproMinoan: Script = Script(193);
752    pub const Cyrillic: Script = Script(8);
753    pub const Deseret: Script = Script(9);
754    pub const Devanagari: Script = Script(10);
755    pub const DivesAkuru: Script = Script(190);
756    pub const Dogra: Script = Script(178);
757    pub const Duployan: Script = Script(135);
758    pub const EgyptianHieroglyphs: Script = Script(71);
759    pub const Elbasan: Script = Script(136);
760    pub const Elymaic: Script = Script(185);
761    pub const Ethiopian: Script = Script(11);
762    pub const Georgian: Script = Script(12);
763    pub const Glagolitic: Script = Script(56);
764    pub const Gothic: Script = Script(13);
765    pub const Grantha: Script = Script(137);
766    pub const Greek: Script = Script(14);
767    pub const Gujarati: Script = Script(15);
768    pub const GunjalaGondi: Script = Script(179);
769    pub const Gurmukhi: Script = Script(16);
770    pub const Han: Script = Script(17);
771    pub const Hangul: Script = Script(18);
772    pub const HanifiRohingya: Script = Script(182);
773    pub const Hanunoo: Script = Script(43);
774    pub const Hatran: Script = Script(162);
775    pub const Hebrew: Script = Script(19);
776    pub const Hiragana: Script = Script(20);
777    pub const ImperialAramaic: Script = Script(116);
778    pub const Inherited: Script = Script(1);
779    pub const InscriptionalPahlavi: Script = Script(122);
780    pub const InscriptionalParthian: Script = Script(125);
781    pub const Javanese: Script = Script(78);
782    pub const Kaithi: Script = Script(120);
783    pub const Kannada: Script = Script(21);
784    pub const Katakana: Script = Script(22);
785    pub const Kawi: Script = Script(198);
786    pub const KayahLi: Script = Script(79);
787    pub const Kharoshthi: Script = Script(57);
788    pub const KhitanSmallScript: Script = Script(191);
789    pub const Khmer: Script = Script(23);
790    pub const Khojki: Script = Script(157);
791    pub const Khudawadi: Script = Script(145);
792    pub const Lao: Script = Script(24);
793    pub const Latin: Script = Script(25);
794    pub const Lepcha: Script = Script(82);
795    pub const Limbu: Script = Script(48);
796    pub const LinearA: Script = Script(83);
797    pub const LinearB: Script = Script(49);
798    pub const Lisu: Script = Script(131);
799    pub const Lycian: Script = Script(107);
800    pub const Lydian: Script = Script(108);
801    pub const Mahajani: Script = Script(160);
802    pub const Makasar: Script = Script(180);
803    pub const Malayalam: Script = Script(26);
804    pub const Mandaic: Script = Script(84);
805    pub const Manichaean: Script = Script(121);
806    pub const Marchen: Script = Script(169);
807    pub const MasaramGondi: Script = Script(175);
808    pub const Medefaidrin: Script = Script(181);
809    pub const MeeteiMayek: Script = Script(115);
810    pub const MendeKikakui: Script = Script(140);
811    pub const MeroiticCursive: Script = Script(141);
812    pub const MeroiticHieroglyphs: Script = Script(86);
813    pub const Miao: Script = Script(92);
814    pub const Modi: Script = Script(163);
815    pub const Mongolian: Script = Script(27);
816    pub const Mro: Script = Script(149);
817    pub const Multani: Script = Script(164);
818    pub const Myanmar: Script = Script(28);
819    pub const Nabataean: Script = Script(143);
820    pub const NagMundari: Script = Script(199);
821    pub const Nandinagari: Script = Script(187);
822    pub const Nastaliq: Script = Script(200);
823    pub const Newa: Script = Script(170);
824    pub const NewTaiLue: Script = Script(59);
825    pub const Nko: Script = Script(87);
826    pub const Nushu: Script = Script(150);
827    pub const NyiakengPuachueHmong: Script = Script(186);
828    pub const Ogham: Script = Script(29);
829    pub const OlChiki: Script = Script(109);
830    pub const OldHungarian: Script = Script(76);
831    pub const OldItalic: Script = Script(30);
832    pub const OldNorthArabian: Script = Script(142);
833    pub const OldPermic: Script = Script(89);
834    pub const OldPersian: Script = Script(61);
835    pub const OldSogdian: Script = Script(184);
836    pub const OldSouthArabian: Script = Script(133);
837    pub const OldTurkic: Script = Script(88);
838    pub const OldUyghur: Script = Script(194);
839    pub const Oriya: Script = Script(31);
840    pub const Osage: Script = Script(171);
841    pub const Osmanya: Script = Script(50);
842    pub const PahawhHmong: Script = Script(75);
843    pub const Palmyrene: Script = Script(144);
844    pub const PauCinHau: Script = Script(165);
845    pub const PhagsPa: Script = Script(90);
846    pub const Phoenician: Script = Script(91);
847    pub const PsalterPahlavi: Script = Script(123);
848    pub const Rejang: Script = Script(110);
849    pub const Runic: Script = Script(32);
850    pub const Samaritan: Script = Script(126);
851    pub const Saurashtra: Script = Script(111);
852    pub const Sharada: Script = Script(151);
853    pub const Shavian: Script = Script(51);
854    pub const Siddham: Script = Script(166);
855    pub const Sidetic: Script = Script(210);
856    pub const SignWriting: Script = Script(112);
857    pub const Sinhala: Script = Script(33);
858    pub const Sogdian: Script = Script(183);
859    pub const SoraSompeng: Script = Script(152);
860    pub const Soyombo: Script = Script(176);
861    pub const Sundanese: Script = Script(113);
862    pub const SylotiNagri: Script = Script(58);
863    pub const Syriac: Script = Script(34);
864    pub const Tagalog: Script = Script(42);
865    pub const Tagbanwa: Script = Script(45);
866    pub const TaiLe: Script = Script(52);
867    pub const TaiTham: Script = Script(106);
868    pub const TaiViet: Script = Script(127);
869    pub const TaiYo: Script = Script(211);
870    pub const Takri: Script = Script(153);
871    pub const Tamil: Script = Script(35);
872    pub const Tangsa: Script = Script(195);
873    pub const Tangut: Script = Script(154);
874    pub const Telugu: Script = Script(36);
875    pub const Thaana: Script = Script(37);
876    pub const Thai: Script = Script(38);
877    pub const Tibetan: Script = Script(39);
878    pub const Tifinagh: Script = Script(60);
879    pub const Tirhuta: Script = Script(158);
880    pub const TolongSiki: Script = Script(212);
881    pub const Toto: Script = Script(196);
882    pub const Ugaritic: Script = Script(53);
883    pub const Unknown: Script = Script(103);
884    pub const Vai: Script = Script(99);
885    pub const Vithkuqi: Script = Script(197);
886    pub const Wancho: Script = Script(188);
887    pub const WarangCiti: Script = Script(146);
888    pub const Yezidi: Script = Script(192);
889    pub const Yi: Script = Script(41);
890    pub const ZanabazarSquare: Script = Script(177);
891}
892}
893
894make_enumerated_property! {
895    name: "Script";
896    short_name: "sc";
897    ident: Script;
898    data_marker: crate::provider::PropertyEnumScriptV1;
899    singleton: SINGLETON_PROPERTY_ENUM_SCRIPT_V1;
900    ule_ty: <u16 as zerovec::ule::AsULE>::ULE;
901}
902
903/// Enumerated property Hangul_Syllable_Type
904///
905/// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose
906/// arbitrary Hangul syllables. This property provides that ontology of Hangul code points.
907///
908/// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html).
909///
910/// # Example
911///
912/// ```
913/// use icu::properties::{props::HangulSyllableType, CodePointMapData};
914///
915/// assert_eq!(
916///     CodePointMapData::<HangulSyllableType>::new().get('ᄀ'),
917///     HangulSyllableType::LeadingJamo
918/// ); // U+1100
919/// assert_eq!(
920///     CodePointMapData::<HangulSyllableType>::new().get('가'),
921///     HangulSyllableType::LeadingVowelSyllable
922/// ); // U+AC00
923/// ```
924#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
925#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
926#[allow(clippy::exhaustive_structs)] // newtype
927#[repr(transparent)]
928pub struct HangulSyllableType(pub(crate) u8);
929
930impl HangulSyllableType {
931    /// Returns an ICU4C `UHangulSyllableType` value.
932    pub const fn to_icu4c_value(self) -> u8 {
933        self.0
934    }
935    /// Constructor from an ICU4C `UHangulSyllableType` value.
936    pub const fn from_icu4c_value(value: u8) -> Self {
937        Self(value)
938    }
939}
940
941create_const_array! {
942#[allow(non_upper_case_globals)]
943impl HangulSyllableType {
944    /// (`NA`) not applicable (e.g. not a Hangul code point).
945    pub const NotApplicable: HangulSyllableType = HangulSyllableType(0);
946    /// (`L`) a conjoining leading consonant Jamo.
947    pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1);
948    /// (`V`) a conjoining vowel Jamo.
949    pub const VowelJamo: HangulSyllableType = HangulSyllableType(2);
950    /// (`T`) a conjoining trailing consonant Jamo.
951    pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3);
952    /// (`LV`) a precomposed syllable with a leading consonant and a vowel.
953    pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4);
954    /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant.
955    pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5);
956}
957}
958
959make_enumerated_property! {
960    name: "Hangul_Syllable_Type";
961    short_name: "hst";
962    ident: HangulSyllableType;
963    data_marker: crate::provider::PropertyEnumHangulSyllableTypeV1;
964    singleton: SINGLETON_PROPERTY_ENUM_HANGUL_SYLLABLE_TYPE_V1;
965    ule_ty: u8;
966
967}
968
969/// Enumerated property East_Asian_Width.
970///
971/// See "Definition" in UAX #11 for the summary of each property value:
972/// <https://www.unicode.org/reports/tr11/#Definitions>
973///
974/// # Example
975///
976/// ```
977/// use icu::properties::{props::EastAsianWidth, CodePointMapData};
978///
979/// assert_eq!(
980///     CodePointMapData::<EastAsianWidth>::new().get('ｱ'),
981///     EastAsianWidth::Halfwidth
982/// ); // U+FF71: Halfwidth Katakana Letter A
983/// assert_eq!(
984///     CodePointMapData::<EastAsianWidth>::new().get('ア'),
985///     EastAsianWidth::Wide
986/// ); //U+30A2: Katakana Letter A
987/// ```
988#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
989#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
990#[allow(clippy::exhaustive_structs)] // newtype
991#[repr(transparent)]
992pub struct EastAsianWidth(pub(crate) u8);
993
994impl EastAsianWidth {
995    /// Returns an ICU4C `UEastAsianWidth` value.
996    pub const fn to_icu4c_value(self) -> u8 {
997        self.0
998    }
999    /// Constructor from an ICU4C `UEastAsianWidth` value.
1000    pub const fn from_icu4c_value(value: u8) -> Self {
1001        Self(value)
1002    }
1003}
1004
1005create_const_array! {
1006#[allow(missing_docs)] // These constants don't need individual documentation.
1007#[allow(non_upper_case_globals)]
1008impl EastAsianWidth {
1009    pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N"
1010    pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A"
1011    pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H"
1012    pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F"
1013    pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na"
1014    pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W"
1015}
1016}
1017
1018make_enumerated_property! {
1019    name: "East_Asian_Width";
1020    short_name: "ea";
1021    ident: EastAsianWidth;
1022    data_marker: crate::provider::PropertyEnumEastAsianWidthV1;
1023    singleton: SINGLETON_PROPERTY_ENUM_EAST_ASIAN_WIDTH_V1;
1024    ule_ty: u8;
1025}
1026
1027/// Enumerated property Line_Break.
1028///
1029/// See "Line Breaking Properties" in UAX #14 for the summary of each property
1030/// value: <https://www.unicode.org/reports/tr14/#Properties>
1031///
1032/// The numeric value is compatible with `ULineBreak` in ICU4C.
1033///
1034/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1035///
1036/// # Example
1037///
1038/// ```
1039/// use icu::properties::{props::LineBreak, CodePointMapData};
1040///
1041/// assert_eq!(
1042///     CodePointMapData::<LineBreak>::new().get(')'),
1043///     LineBreak::CloseParenthesis
1044/// ); // U+0029: Right Parenthesis
1045/// assert_eq!(
1046///     CodePointMapData::<LineBreak>::new().get('ぁ'),
1047///     LineBreak::ConditionalJapaneseStarter
1048/// ); //U+3041: Hiragana Letter Small A
1049/// ```
1050#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1051#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1052#[allow(clippy::exhaustive_structs)] // newtype
1053#[repr(transparent)]
1054pub struct LineBreak(pub(crate) u8);
1055
1056impl LineBreak {
1057    /// Returns an ICU4C `ULineBreak` value.
1058    pub const fn to_icu4c_value(self) -> u8 {
1059        self.0
1060    }
1061    /// Constructor from an ICU4C `ULineBreak` value.
1062    pub const fn from_icu4c_value(value: u8) -> Self {
1063        Self(value)
1064    }
1065}
1066
1067create_const_array! {
1068#[allow(missing_docs)] // These constants don't need individual documentation.
1069#[allow(non_upper_case_globals)]
1070impl LineBreak {
1071    pub const Unknown: LineBreak = LineBreak(0); // name="XX"
1072    pub const Ambiguous: LineBreak = LineBreak(1); // name="AI"
1073    pub const Alphabetic: LineBreak = LineBreak(2); // name="AL"
1074    pub const BreakBoth: LineBreak = LineBreak(3); // name="B2"
1075    pub const BreakAfter: LineBreak = LineBreak(4); // name="BA"
1076    pub const BreakBefore: LineBreak = LineBreak(5); // name="BB"
1077    pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK"
1078    pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB"
1079    pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL"
1080    pub const CombiningMark: LineBreak = LineBreak(9); // name="CM"
1081    pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR"
1082    pub const Exclamation: LineBreak = LineBreak(11); // name="EX"
1083    pub const Glue: LineBreak = LineBreak(12); // name="GL"
1084    pub const Hyphen: LineBreak = LineBreak(13); // name="HY"
1085    pub const Ideographic: LineBreak = LineBreak(14); // name="ID"
1086    pub const Inseparable: LineBreak = LineBreak(15); // name="IN"
1087    pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS"
1088    pub const LineFeed: LineBreak = LineBreak(17); // name="LF"
1089    pub const Nonstarter: LineBreak = LineBreak(18); // name="NS"
1090    pub const Numeric: LineBreak = LineBreak(19); // name="NU"
1091    pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP"
1092    pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO"
1093    pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR"
1094    pub const Quotation: LineBreak = LineBreak(23); // name="QU"
1095    pub const ComplexContext: LineBreak = LineBreak(24); // name="SA"
1096    pub const Surrogate: LineBreak = LineBreak(25); // name="SG"
1097    pub const Space: LineBreak = LineBreak(26); // name="SP"
1098    pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY"
1099    pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW"
1100    pub const NextLine: LineBreak = LineBreak(29); // name="NL"
1101    pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ"
1102    pub const H2: LineBreak = LineBreak(31); // name="H2"
1103    pub const H3: LineBreak = LineBreak(32); // name="H3"
1104    pub const JL: LineBreak = LineBreak(33); // name="JL"
1105    pub const JT: LineBreak = LineBreak(34); // name="JT"
1106    pub const JV: LineBreak = LineBreak(35); // name="JV"
1107    pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP"
1108    pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ"
1109    pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL"
1110    pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI"
1111    pub const EBase: LineBreak = LineBreak(40); // name="EB"
1112    pub const EModifier: LineBreak = LineBreak(41); // name="EM"
1113    pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ"
1114
1115    // Added in ICU 74:
1116    pub const Aksara: LineBreak = LineBreak(43); // name="AK"
1117    pub const AksaraPrebase: LineBreak = LineBreak(44); // name="AP"
1118    pub const AksaraStart: LineBreak = LineBreak(45); // name="AS"
1119    pub const ViramaFinal: LineBreak = LineBreak(46); // name="VF"
1120    pub const Virama: LineBreak = LineBreak(47); // name="VI"
1121
1122    // Added in ICU 78:
1123    pub const UnambiguousHyphen: LineBreak = LineBreak(48); // name="HH"
1124}
1125}
1126
1127make_enumerated_property! {
1128    name: "Line_Break";
1129    short_name: "lb";
1130    ident: LineBreak;
1131    data_marker: crate::provider::PropertyEnumLineBreakV1;
1132    singleton: SINGLETON_PROPERTY_ENUM_LINE_BREAK_V1;
1133    ule_ty: u8;
1134}
1135
1136/// Enumerated property Grapheme_Cluster_Break.
1137///
1138/// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the
1139/// summary of each property value:
1140/// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table>
1141///
1142/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1143///
1144/// # Example
1145///
1146/// ```
1147/// use icu::properties::{props::GraphemeClusterBreak, CodePointMapData};
1148///
1149/// assert_eq!(
1150///     CodePointMapData::<GraphemeClusterBreak>::new().get('🇦'),
1151///     GraphemeClusterBreak::RegionalIndicator
1152/// ); // U+1F1E6: Regional Indicator Symbol Letter A
1153/// assert_eq!(
1154///     CodePointMapData::<GraphemeClusterBreak>::new().get('ำ'),
1155///     GraphemeClusterBreak::SpacingMark
1156/// ); //U+0E33: Thai Character Sara Am
1157/// ```
1158#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1159#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1160#[allow(clippy::exhaustive_structs)] // this type is stable
1161#[repr(transparent)]
1162pub struct GraphemeClusterBreak(pub(crate) u8);
1163
1164impl GraphemeClusterBreak {
1165    /// Returns an ICU4C `UGraphemeClusterBreak` value.
1166    pub const fn to_icu4c_value(self) -> u8 {
1167        self.0
1168    }
1169    /// Constructor from an ICU4C `UGraphemeClusterBreak` value.
1170    pub const fn from_icu4c_value(value: u8) -> Self {
1171        Self(value)
1172    }
1173}
1174
1175create_const_array! {
1176#[allow(missing_docs)] // These constants don't need individual documentation.
1177#[allow(non_upper_case_globals)]
1178impl GraphemeClusterBreak {
1179    pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX"
1180    pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN"
1181    pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR"
1182    pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX"
1183    pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L"
1184    pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF"
1185    pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV"
1186    pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT"
1187    pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T"
1188    pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V"
1189    pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM"
1190    pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP"
1191    pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI"
1192    /// This value is obsolete and unused.
1193    pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB"
1194    /// This value is obsolete and unused.
1195    pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG"
1196    /// This value is obsolete and unused.
1197    pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM"
1198    /// This value is obsolete and unused.
1199    pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ"
1200    pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ"
1201}
1202}
1203
1204make_enumerated_property! {
1205    name: "Grapheme_Cluster_Break";
1206    short_name: "GCB";
1207    ident: GraphemeClusterBreak;
1208    data_marker: crate::provider::PropertyEnumGraphemeClusterBreakV1;
1209    singleton: SINGLETON_PROPERTY_ENUM_GRAPHEME_CLUSTER_BREAK_V1;
1210    ule_ty: u8;
1211}
1212
1213/// Enumerated property Word_Break.
1214///
1215/// See "Default Word Boundary Specification" in UAX #29 for the summary of
1216/// each property value:
1217/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1218///
1219/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1220///
1221/// # Example
1222///
1223/// ```
1224/// use icu::properties::{props::WordBreak, CodePointMapData};
1225///
1226/// assert_eq!(
1227///     CodePointMapData::<WordBreak>::new().get('.'),
1228///     WordBreak::MidNumLet
1229/// ); // U+002E: Full Stop
1230/// assert_eq!(
1231///     CodePointMapData::<WordBreak>::new().get('，'),
1232///     WordBreak::MidNum
1233/// ); // U+FF0C: Fullwidth Comma
1234/// ```
1235#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1236#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1237#[allow(clippy::exhaustive_structs)] // newtype
1238#[repr(transparent)]
1239pub struct WordBreak(pub(crate) u8);
1240
1241impl WordBreak {
1242    /// Returns an ICU4C `UWordBreak` value.
1243    pub const fn to_icu4c_value(self) -> u8 {
1244        self.0
1245    }
1246    /// Constructor from an ICU4C `UWordBreak` value.
1247    pub const fn from_icu4c_value(value: u8) -> Self {
1248        Self(value)
1249    }
1250}
1251
1252create_const_array! {
1253#[allow(missing_docs)] // These constants don't need individual documentation.
1254#[allow(non_upper_case_globals)]
1255impl WordBreak {
1256    pub const Other: WordBreak = WordBreak(0); // name="XX"
1257    pub const ALetter: WordBreak = WordBreak(1); // name="LE"
1258    pub const Format: WordBreak = WordBreak(2); // name="FO"
1259    pub const Katakana: WordBreak = WordBreak(3); // name="KA"
1260    pub const MidLetter: WordBreak = WordBreak(4); // name="ML"
1261    pub const MidNum: WordBreak = WordBreak(5); // name="MN"
1262    pub const Numeric: WordBreak = WordBreak(6); // name="NU"
1263    pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX"
1264    pub const CR: WordBreak = WordBreak(8); // name="CR"
1265    pub const Extend: WordBreak = WordBreak(9); // name="Extend"
1266    pub const LF: WordBreak = WordBreak(10); // name="LF"
1267    pub const MidNumLet: WordBreak = WordBreak(11); // name="MB"
1268    pub const Newline: WordBreak = WordBreak(12); // name="NL"
1269    pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI"
1270    pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL"
1271    pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ"
1272    pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ
1273    /// This value is obsolete and unused.
1274    pub const EBase: WordBreak = WordBreak(17); // name="EB"
1275    /// This value is obsolete and unused.
1276    pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG"
1277    /// This value is obsolete and unused.
1278    pub const EModifier: WordBreak = WordBreak(19); // name="EM"
1279    /// This value is obsolete and unused.
1280    pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ"
1281    pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ"
1282    pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace"
1283}
1284}
1285
1286make_enumerated_property! {
1287    name: "Word_Break";
1288    short_name: "WB";
1289    ident: WordBreak;
1290    data_marker: crate::provider::PropertyEnumWordBreakV1;
1291    singleton: SINGLETON_PROPERTY_ENUM_WORD_BREAK_V1;
1292    ule_ty: u8;
1293}
1294
1295/// Enumerated property Sentence_Break.
1296///
1297/// See "Default Sentence Boundary Specification" in UAX #29 for the summary of
1298/// each property value:
1299/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1300///
1301/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1302///
1303/// # Example
1304///
1305/// ```
1306/// use icu::properties::{props::SentenceBreak, CodePointMapData};
1307///
1308/// assert_eq!(
1309///     CodePointMapData::<SentenceBreak>::new().get('９'),
1310///     SentenceBreak::Numeric
1311/// ); // U+FF19: Fullwidth Digit Nine
1312/// assert_eq!(
1313///     CodePointMapData::<SentenceBreak>::new().get(','),
1314///     SentenceBreak::SContinue
1315/// ); // U+002C: Comma
1316/// ```
1317#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1318#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1319#[allow(clippy::exhaustive_structs)] // newtype
1320#[repr(transparent)]
1321pub struct SentenceBreak(pub(crate) u8);
1322
1323impl SentenceBreak {
1324    /// Returns an ICU4C `USentenceBreak` value.
1325    pub const fn to_icu4c_value(self) -> u8 {
1326        self.0
1327    }
1328    /// Constructor from an ICU4C `USentenceBreak` value.
1329    pub const fn from_icu4c_value(value: u8) -> Self {
1330        Self(value)
1331    }
1332}
1333
1334create_const_array! {
1335#[allow(missing_docs)] // These constants don't need individual documentation.
1336#[allow(non_upper_case_globals)]
1337impl SentenceBreak {
1338    pub const Other: SentenceBreak = SentenceBreak(0); // name="XX"
1339    pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT"
1340    pub const Close: SentenceBreak = SentenceBreak(2); // name="CL"
1341    pub const Format: SentenceBreak = SentenceBreak(3); // name="FO"
1342    pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO"
1343    pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU"
1344    pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE"
1345    pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE"
1346    pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP"
1347    pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST"
1348    pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP"
1349    pub const CR: SentenceBreak = SentenceBreak(11); // name="CR"
1350    pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX"
1351    pub const LF: SentenceBreak = SentenceBreak(13); // name="LF"
1352    pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC"
1353}
1354}
1355
1356make_enumerated_property! {
1357    name: "Sentence_Break";
1358    short_name: "SB";
1359    ident: SentenceBreak;
1360    data_marker: crate::provider::PropertyEnumSentenceBreakV1;
1361    singleton: SINGLETON_PROPERTY_ENUM_SENTENCE_BREAK_V1;
1362    ule_ty: u8;
1363}
1364
1365/// Property Canonical_Combining_Class.
1366/// See UAX #15:
1367/// <https://www.unicode.org/reports/tr15/>.
1368///
1369/// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API
1370/// to look up the Canonical_Combining_Class property by scalar value.
1371///
1372/// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API
1373/// to look up the Canonical_Combining_Class property by scalar value.
1374///
1375/// # Example
1376///
1377/// ```
1378/// use icu::properties::{props::CanonicalCombiningClass, CodePointMapData};
1379///
1380/// assert_eq!(
1381///     CodePointMapData::<CanonicalCombiningClass>::new().get('a'),
1382///     CanonicalCombiningClass::NotReordered
1383/// ); // U+0061: LATIN SMALL LETTER A
1384/// assert_eq!(
1385///     CodePointMapData::<CanonicalCombiningClass>::new().get('\u{0301}'),
1386///     CanonicalCombiningClass::Above
1387/// ); // U+0301: COMBINING ACUTE ACCENT
1388/// ```
1389//
1390// NOTE: The Pernosco debugger has special knowledge
1391// of this struct. Please do not change the bit layout
1392// or the crate-module-qualified name of this struct
1393// without coordination.
1394#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1395#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1396#[allow(clippy::exhaustive_structs)] // newtype
1397#[repr(transparent)]
1398pub struct CanonicalCombiningClass(pub(crate) u8);
1399
1400impl CanonicalCombiningClass {
1401    /// Returns an ICU4C `UCanonicalCombiningClass` value.
1402    pub const fn to_icu4c_value(self) -> u8 {
1403        self.0
1404    }
1405    /// Constructor from an ICU4C `UCanonicalCombiningClass` value.
1406    pub const fn from_icu4c_value(value: u8) -> Self {
1407        Self(value)
1408    }
1409}
1410
1411create_const_array! {
1412// These constant names come from PropertyValueAliases.txt
1413#[allow(missing_docs)] // These constants don't need individual documentation.
1414#[allow(non_upper_case_globals)]
1415impl CanonicalCombiningClass {
1416    pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR"
1417    pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV"
1418    pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR"
1419    pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK"
1420    pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV"
1421    pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR"
1422    pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10"
1423    pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11"
1424    pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12"
1425    pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13"
1426    pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14"
1427    pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15"
1428    pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16"
1429    pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17"
1430    pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18"
1431    pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19"
1432    pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20"
1433    pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21"
1434    pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22"
1435    pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23"
1436    pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24"
1437    pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25"
1438    pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26"
1439    pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27"
1440    pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28"
1441    pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29"
1442    pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30"
1443    pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31"
1444    pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32"
1445    pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33"
1446    pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34"
1447    pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35"
1448    pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36"
1449    pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84"
1450    pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91"
1451    pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103"
1452    pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107"
1453    pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118"
1454    pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122"
1455    pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129"
1456    pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130"
1457    pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132"
1458    pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED
1459    pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL"
1460    pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB"
1461    pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA"
1462    pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR"
1463    pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL"
1464    pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B"
1465    pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR"
1466    pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L"
1467    pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R"
1468    pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL"
1469    pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A"
1470    pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR"
1471    pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB"
1472    pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA"
1473    pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS"
1474}
1475}
1476
1477make_enumerated_property! {
1478    name: "Canonical_Combining_Class";
1479    short_name: "ccc";
1480    ident: CanonicalCombiningClass;
1481    data_marker: crate::provider::PropertyEnumCanonicalCombiningClassV1;
1482    singleton: SINGLETON_PROPERTY_ENUM_CANONICAL_COMBINING_CLASS_V1;
1483    ule_ty: u8;
1484}
1485
1486/// Property Indic_Conjunct_Break.
1487/// See UAX #44:
1488/// <https://www.unicode.org/reports/tr44/#Indic_Conjunct_Break>.
1489///
1490/// # Example
1491///
1492/// ```
1493/// use icu::properties::{props::IndicConjunctBreak, CodePointMapData};
1494///
1495/// assert_eq!(
1496///     CodePointMapData::<IndicConjunctBreak>::new().get('a'),
1497///     IndicConjunctBreak::None
1498/// );
1499/// assert_eq!(
1500///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{094d}'),
1501///     IndicConjunctBreak::Linker
1502/// );
1503/// assert_eq!(
1504///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{0915}'),
1505///     IndicConjunctBreak::Consonant
1506/// );
1507/// assert_eq!(
1508///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{0300}'),
1509///     IndicConjunctBreak::Extend
1510/// );
1511/// ```
1512#[doc(hidden)] // draft API in ICU4C
1513#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1514#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1515#[allow(clippy::exhaustive_structs)] // newtype
1516#[repr(transparent)]
1517pub struct IndicConjunctBreak(pub(crate) u8);
1518
1519impl IndicConjunctBreak {
1520    /// Returns an ICU4C `UIndicConjunctBreak` value.
1521    pub const fn to_icu4c_value(self) -> u8 {
1522        self.0
1523    }
1524    /// Constructor from an ICU4C `UIndicConjunctBreak` value.
1525    pub const fn from_icu4c_value(value: u8) -> Self {
1526        Self(value)
1527    }
1528}
1529
1530create_const_array! {
1531#[doc(hidden)] // draft API in ICU4C
1532#[allow(non_upper_case_globals)]
1533impl IndicConjunctBreak {
1534    pub const None: IndicConjunctBreak = IndicConjunctBreak(0);
1535    pub const Consonant: IndicConjunctBreak = IndicConjunctBreak(1);
1536    pub const Extend: IndicConjunctBreak = IndicConjunctBreak(2);
1537    pub const Linker: IndicConjunctBreak = IndicConjunctBreak(3);
1538}
1539}
1540
1541make_enumerated_property! {
1542    name: "Indic_Conjunct_Break";
1543    short_name: "InCB";
1544    ident: IndicConjunctBreak;
1545    data_marker: crate::provider::PropertyEnumIndicConjunctBreakV1;
1546    singleton: SINGLETON_PROPERTY_ENUM_INDIC_CONJUNCT_BREAK_V1;
1547    ule_ty: u8;
1548}
1549
1550/// Property Indic_Syllabic_Category.
1551/// See UAX #44:
1552/// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>.
1553///
1554/// # Example
1555///
1556/// ```
1557/// use icu::properties::{props::IndicSyllabicCategory, CodePointMapData};
1558///
1559/// assert_eq!(
1560///     CodePointMapData::<IndicSyllabicCategory>::new().get('a'),
1561///     IndicSyllabicCategory::Other
1562/// );
1563/// assert_eq!(
1564///     CodePointMapData::<IndicSyllabicCategory>::new().get('\u{0900}'),
1565///     IndicSyllabicCategory::Bindu
1566/// ); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU
1567/// ```
1568#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1569#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1570#[allow(clippy::exhaustive_structs)] // newtype
1571#[repr(transparent)]
1572pub struct IndicSyllabicCategory(pub(crate) u8);
1573
1574impl IndicSyllabicCategory {
1575    /// Returns an ICU4C `UIndicSyllabicCategory` value.
1576    pub const fn to_icu4c_value(self) -> u8 {
1577        self.0
1578    }
1579    /// Constructor from an ICU4C `UIndicSyllabicCategory` value.
1580    pub const fn from_icu4c_value(value: u8) -> Self {
1581        Self(value)
1582    }
1583}
1584
1585create_const_array! {
1586#[allow(missing_docs)] // These constants don't need individual documentation.
1587#[allow(non_upper_case_globals)]
1588impl IndicSyllabicCategory {
1589    pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0);
1590    pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1);
1591    pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2);
1592    pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3);
1593    pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4);
1594    pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5);
1595    pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6);
1596    pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7);
1597    pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8);
1598    pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9);
1599    pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10);
1600    pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11);
1601    pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12);
1602    pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13);
1603    pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14);
1604    pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(15);
1605    pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(16);
1606    pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17);
1607    pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18);
1608    pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19);
1609    pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20);
1610    pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21);
1611    pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22);
1612    pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23);
1613    pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24);
1614    pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25);
1615    pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26);
1616    pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27);
1617    pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28);
1618    pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29);
1619    pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30);
1620    pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31);
1621    pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32);
1622    pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33);
1623    pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34);
1624    pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35);
1625    pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36);
1626}
1627}
1628
1629make_enumerated_property! {
1630    name: "Indic_Syllabic_Category";
1631    short_name: "InSC";
1632    ident: IndicSyllabicCategory;
1633    data_marker: crate::provider::PropertyEnumIndicSyllabicCategoryV1;
1634    singleton: SINGLETON_PROPERTY_ENUM_INDIC_SYLLABIC_CATEGORY_V1;
1635    ule_ty: u8;
1636}
1637
1638/// Enumerated property Joining_Type.
1639///
1640/// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of
1641/// each property value.
1642///
1643/// # Example
1644///
1645/// ```
1646/// use icu::properties::{props::JoiningType, CodePointMapData};
1647///
1648/// assert_eq!(
1649///     CodePointMapData::<JoiningType>::new().get('ؠ'),
1650///     JoiningType::DualJoining
1651/// ); // U+0620: Arabic Letter Kashmiri Yeh
1652/// assert_eq!(
1653///     CodePointMapData::<JoiningType>::new().get('𐫍'),
1654///     JoiningType::LeftJoining
1655/// ); // U+10ACD: Manichaean Letter Heth
1656/// ```
1657#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1658#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1659#[allow(clippy::exhaustive_structs)] // newtype
1660#[repr(transparent)]
1661pub struct JoiningType(pub(crate) u8);
1662
1663impl JoiningType {
1664    /// Returns an ICU4C `UJoiningType` value.
1665    pub const fn to_icu4c_value(self) -> u8 {
1666        self.0
1667    }
1668    /// Constructor from an ICU4C `UJoiningType` value.
1669    pub const fn from_icu4c_value(value: u8) -> Self {
1670        Self(value)
1671    }
1672}
1673
1674create_const_array! {
1675#[allow(missing_docs)] // These constants don't need individual documentation.
1676#[allow(non_upper_case_globals)]
1677impl JoiningType {
1678    pub const NonJoining: JoiningType = JoiningType(0); // name="U"
1679    pub const JoinCausing: JoiningType = JoiningType(1); // name="C"
1680    pub const DualJoining: JoiningType = JoiningType(2); // name="D"
1681    pub const LeftJoining: JoiningType = JoiningType(3); // name="L"
1682    pub const RightJoining: JoiningType = JoiningType(4); // name="R"
1683    pub const Transparent: JoiningType = JoiningType(5); // name="T"
1684}
1685}
1686
1687make_enumerated_property! {
1688    name: "Joining_Type";
1689    short_name: "jt";
1690    ident: JoiningType;
1691    data_marker: crate::provider::PropertyEnumJoiningTypeV1;
1692    singleton: SINGLETON_PROPERTY_ENUM_JOINING_TYPE_V1;
1693    ule_ty: u8;
1694}
1695
1696/// Property Vertical_Orientation
1697///
1698/// See UTR #50:
1699/// <https://www.unicode.org/reports/tr50/#vo>
1700///
1701/// # Example
1702///
1703/// ```
1704/// use icu::properties::{props::VerticalOrientation, CodePointMapData};
1705///
1706/// assert_eq!(
1707///     CodePointMapData::<VerticalOrientation>::new().get('a'),
1708///     VerticalOrientation::Rotated
1709/// );
1710/// assert_eq!(
1711///     CodePointMapData::<VerticalOrientation>::new().get('§'),
1712///     VerticalOrientation::Upright
1713/// );
1714/// assert_eq!(
1715///     CodePointMapData::<VerticalOrientation>::new().get32(0x2329),
1716///     VerticalOrientation::TransformedRotated
1717/// );
1718/// assert_eq!(
1719///     CodePointMapData::<VerticalOrientation>::new().get32(0x3001),
1720///     VerticalOrientation::TransformedUpright
1721/// );
1722/// ```
1723#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1724#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1725#[allow(clippy::exhaustive_structs)] // newtype
1726#[repr(transparent)]
1727pub struct VerticalOrientation(pub(crate) u8);
1728
1729impl VerticalOrientation {
1730    /// Returns an ICU4C `UVerticalOrientation` value.
1731    pub const fn to_icu4c_value(self) -> u8 {
1732        self.0
1733    }
1734    /// Constructor from an ICU4C `UVerticalOrientation` value.
1735    pub const fn from_icu4c_value(value: u8) -> Self {
1736        Self(value)
1737    }
1738}
1739
1740create_const_array! {
1741#[allow(missing_docs)] // These constants don't need individual documentation.
1742#[allow(non_upper_case_globals)]
1743impl VerticalOrientation {
1744    pub const Rotated: VerticalOrientation = VerticalOrientation(0); // name="R"
1745    pub const TransformedRotated: VerticalOrientation = VerticalOrientation(1); // name="Tr"
1746    pub const TransformedUpright: VerticalOrientation = VerticalOrientation(2); // name="Tu"
1747    pub const Upright: VerticalOrientation = VerticalOrientation(3); // name="U"
1748}
1749}
1750
1751make_enumerated_property! {
1752    name: "Vertical_Orientation";
1753    short_name: "vo";
1754    ident: VerticalOrientation;
1755    data_marker: crate::provider::PropertyEnumVerticalOrientationV1;
1756    singleton: SINGLETON_PROPERTY_ENUM_VERTICAL_ORIENTATION_V1;
1757    ule_ty: u8;
1758}
1759
1760pub use crate::code_point_set::BinaryProperty;
1761
1762macro_rules! make_binary_property {
1763    (
1764        name: $name:literal;
1765        short_name: $short_name:literal;
1766        ident: $ident:ident;
1767        data_marker: $data_marker:ty;
1768        singleton: $singleton:ident;
1769            $(#[$doc:meta])+
1770    ) => {
1771        $(#[$doc])+
1772        #[derive(Debug)]
1773        #[non_exhaustive]
1774        pub struct $ident;
1775
1776        impl crate::private::Sealed for $ident {}
1777
1778        impl BinaryProperty for $ident {
1779        type DataMarker = $data_marker;
1780            #[cfg(feature = "compiled_data")]
1781            const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> =
1782                &crate::provider::Baked::$singleton;
1783            const NAME: &'static [u8] = $name.as_bytes();
1784            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
1785        }
1786    };
1787}
1788
1789make_binary_property! {
1790    name: "ASCII_Hex_Digit";
1791    short_name: "AHex";
1792    ident: AsciiHexDigit;
1793    data_marker: crate::provider::PropertyBinaryAsciiHexDigitV1;
1794    singleton: SINGLETON_PROPERTY_BINARY_ASCII_HEX_DIGIT_V1;
1795    /// ASCII characters commonly used for the representation of hexadecimal numbers.
1796    ///
1797    /// # Example
1798    ///
1799    /// ```
1800    /// use icu::properties::CodePointSetData;
1801    /// use icu::properties::props::AsciiHexDigit;
1802    ///
1803    /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>();
1804    ///
1805    /// assert!(ascii_hex_digit.contains('3'));
1806    /// assert!(!ascii_hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1807    /// assert!(ascii_hex_digit.contains('A'));
1808    /// assert!(!ascii_hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1809    /// ```
1810}
1811
1812make_binary_property! {
1813    name: "Alnum";
1814    short_name: "Alnum";
1815    ident: Alnum;
1816    data_marker: crate::provider::PropertyBinaryAlnumV1;
1817    singleton: SINGLETON_PROPERTY_BINARY_ALNUM_V1;
1818    /// Characters with the `Alphabetic` or `Decimal_Number` property.
1819    ///
1820    /// This is defined for POSIX compatibility.
1821}
1822
1823make_binary_property! {
1824    name: "Alphabetic";
1825    short_name: "Alpha";
1826    ident: Alphabetic;
1827    data_marker: crate::provider::PropertyBinaryAlphabeticV1;
1828    singleton: SINGLETON_PROPERTY_BINARY_ALPHABETIC_V1;
1829    /// Alphabetic characters.
1830    ///
1831    /// # Example
1832    ///
1833    /// ```
1834    /// use icu::properties::CodePointSetData;
1835    /// use icu::properties::props::Alphabetic;
1836    ///
1837    /// let alphabetic = CodePointSetData::new::<Alphabetic>();
1838    ///
1839    /// assert!(!alphabetic.contains('3'));
1840    /// assert!(!alphabetic.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1841    /// assert!(alphabetic.contains('A'));
1842    /// assert!(alphabetic.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1843    /// ```
1844
1845}
1846
1847make_binary_property! {
1848    name: "Bidi_Control";
1849    short_name: "Bidi_C";
1850    ident: BidiControl;
1851    data_marker: crate::provider::PropertyBinaryBidiControlV1;
1852    singleton: SINGLETON_PROPERTY_BINARY_BIDI_CONTROL_V1;
1853    /// Format control characters which have specific functions in the Unicode Bidirectional
1854    /// Algorithm.
1855    ///
1856    /// # Example
1857    ///
1858    /// ```
1859    /// use icu::properties::CodePointSetData;
1860    /// use icu::properties::props::BidiControl;
1861    ///
1862    /// let bidi_control = CodePointSetData::new::<BidiControl>();
1863    ///
1864    /// assert!(bidi_control.contains('\u{200F}'));  // RIGHT-TO-LEFT MARK
1865    /// assert!(!bidi_control.contains('ش'));  // U+0634 ARABIC LETTER SHEEN
1866    /// ```
1867
1868}
1869
1870make_binary_property! {
1871    name: "Bidi_Mirrored";
1872    short_name: "Bidi_M";
1873    ident: BidiMirrored;
1874    data_marker: crate::provider::PropertyBinaryBidiMirroredV1;
1875    singleton: SINGLETON_PROPERTY_BINARY_BIDI_MIRRORED_V1;
1876    /// Characters that are mirrored in bidirectional text.
1877    ///
1878    /// # Example
1879    ///
1880    /// ```
1881    /// use icu::properties::CodePointSetData;
1882    /// use icu::properties::props::BidiMirrored;
1883    ///
1884    /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>();
1885    ///
1886    /// assert!(bidi_mirrored.contains('['));
1887    /// assert!(bidi_mirrored.contains(']'));
1888    /// assert!(bidi_mirrored.contains('∑'));  // U+2211 N-ARY SUMMATION
1889    /// assert!(!bidi_mirrored.contains('ཉ'));  // U+0F49 TIBETAN LETTER NYA
1890    /// ```
1891
1892}
1893
1894make_binary_property! {
1895    name: "Blank";
1896    short_name: "Blank";
1897    ident: Blank;
1898    data_marker: crate::provider::PropertyBinaryBlankV1;
1899    singleton: SINGLETON_PROPERTY_BINARY_BLANK_V1;
1900    /// Horizontal whitespace characters
1901
1902}
1903
1904make_binary_property! {
1905    name: "Cased";
1906    short_name: "Cased";
1907    ident: Cased;
1908    data_marker: crate::provider::PropertyBinaryCasedV1;
1909    singleton: SINGLETON_PROPERTY_BINARY_CASED_V1;
1910    /// Uppercase, lowercase, and titlecase characters.
1911    ///
1912    /// # Example
1913    ///
1914    /// ```
1915    /// use icu::properties::CodePointSetData;
1916    /// use icu::properties::props::Cased;
1917    ///
1918    /// let cased = CodePointSetData::new::<Cased>();
1919    ///
1920    /// assert!(cased.contains('Ꙡ'));  // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
1921    /// assert!(!cased.contains('ދ'));  // U+078B THAANA LETTER DHAALU
1922    /// ```
1923
1924}
1925
1926make_binary_property! {
1927    name: "Case_Ignorable";
1928    short_name: "CI";
1929    ident: CaseIgnorable;
1930    data_marker: crate::provider::PropertyBinaryCaseIgnorableV1;
1931    singleton: SINGLETON_PROPERTY_BINARY_CASE_IGNORABLE_V1;
1932    /// Characters which are ignored for casing purposes.
1933    ///
1934    /// # Example
1935    ///
1936    /// ```
1937    /// use icu::properties::CodePointSetData;
1938    /// use icu::properties::props::CaseIgnorable;
1939    ///
1940    /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>();
1941    ///
1942    /// assert!(case_ignorable.contains(':'));
1943    /// assert!(!case_ignorable.contains('λ'));  // U+03BB GREEK SMALL LETTER LAMBDA
1944    /// ```
1945
1946}
1947
1948make_binary_property! {
1949    name: "Full_Composition_Exclusion";
1950    short_name: "Comp_Ex";
1951    ident: FullCompositionExclusion;
1952    data_marker: crate::provider::PropertyBinaryFullCompositionExclusionV1;
1953    singleton: SINGLETON_PROPERTY_BINARY_FULL_COMPOSITION_EXCLUSION_V1;
1954    /// Characters that are excluded from composition.
1955    ///
1956    /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
1957
1958}
1959
1960make_binary_property! {
1961    name: "Changes_When_Casefolded";
1962    short_name: "CWCF";
1963    ident: ChangesWhenCasefolded;
1964    data_marker: crate::provider::PropertyBinaryChangesWhenCasefoldedV1;
1965    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEFOLDED_V1;
1966    /// Characters whose normalized forms are not stable under case folding.
1967    ///
1968    /// # Example
1969    ///
1970    /// ```
1971    /// use icu::properties::CodePointSetData;
1972    /// use icu::properties::props::ChangesWhenCasefolded;
1973    ///
1974    /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>();
1975    ///
1976    /// assert!(changes_when_casefolded.contains('ß'));  // U+00DF LATIN SMALL LETTER SHARP S
1977    /// assert!(!changes_when_casefolded.contains('ᜉ'));  // U+1709 TAGALOG LETTER PA
1978    /// ```
1979
1980}
1981
1982make_binary_property! {
1983    name: "Changes_When_Casemapped";
1984    short_name: "CWCM";
1985    ident: ChangesWhenCasemapped;
1986    data_marker: crate::provider::PropertyBinaryChangesWhenCasemappedV1;
1987    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEMAPPED_V1;
1988    /// Characters which may change when they undergo case mapping.
1989
1990}
1991
1992make_binary_property! {
1993    name: "Changes_When_NFKC_Casefolded";
1994    short_name: "CWKCF";
1995    ident: ChangesWhenNfkcCasefolded;
1996    data_marker: crate::provider::PropertyBinaryChangesWhenNfkcCasefoldedV1;
1997    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_NFKC_CASEFOLDED_V1;
1998    /// Characters which are not identical to their `NFKC_Casefold` mapping.
1999    ///
2000    /// # Example
2001    ///
2002    /// ```
2003    /// use icu::properties::CodePointSetData;
2004    /// use icu::properties::props::ChangesWhenNfkcCasefolded;
2005    ///
2006    /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>();
2007    ///
2008    /// assert!(changes_when_nfkc_casefolded.contains('🄵'));  // U+1F135 SQUARED LATIN CAPITAL LETTER F
2009    /// assert!(!changes_when_nfkc_casefolded.contains('f'));
2010    /// ```
2011
2012}
2013
2014make_binary_property! {
2015    name: "Changes_When_Lowercased";
2016    short_name: "CWL";
2017    ident: ChangesWhenLowercased;
2018    data_marker: crate::provider::PropertyBinaryChangesWhenLowercasedV1;
2019    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_LOWERCASED_V1;
2020    /// Characters whose normalized forms are not stable under a `toLowercase` mapping.
2021    ///
2022    /// # Example
2023    ///
2024    /// ```
2025    /// use icu::properties::CodePointSetData;
2026    /// use icu::properties::props::ChangesWhenLowercased;
2027    ///
2028    /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>();
2029    ///
2030    /// assert!(changes_when_lowercased.contains('Ⴔ'));  // U+10B4 GEORGIAN CAPITAL LETTER PHAR
2031    /// assert!(!changes_when_lowercased.contains('ფ'));  // U+10E4 GEORGIAN LETTER PHAR
2032    /// ```
2033
2034}
2035
2036make_binary_property! {
2037    name: "Changes_When_Titlecased";
2038    short_name: "CWT";
2039    ident: ChangesWhenTitlecased;
2040    data_marker: crate::provider::PropertyBinaryChangesWhenTitlecasedV1;
2041    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_TITLECASED_V1;
2042    /// Characters whose normalized forms are not stable under a `toTitlecase` mapping.
2043    ///
2044    /// # Example
2045    ///
2046    /// ```
2047    /// use icu::properties::CodePointSetData;
2048    /// use icu::properties::props::ChangesWhenTitlecased;
2049    ///
2050    /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>();
2051    ///
2052    /// assert!(changes_when_titlecased.contains('æ'));  // U+00E6 LATIN SMALL LETTER AE
2053    /// assert!(!changes_when_titlecased.contains('Æ'));  // U+00E6 LATIN CAPITAL LETTER AE
2054    /// ```
2055
2056}
2057
2058make_binary_property! {
2059    name: "Changes_When_Uppercased";
2060    short_name: "CWU";
2061    ident: ChangesWhenUppercased;
2062    data_marker: crate::provider::PropertyBinaryChangesWhenUppercasedV1;
2063    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_UPPERCASED_V1;
2064    /// Characters whose normalized forms are not stable under a `toUppercase` mapping.
2065    ///
2066    /// # Example
2067    ///
2068    /// ```
2069    /// use icu::properties::CodePointSetData;
2070    /// use icu::properties::props::ChangesWhenUppercased;
2071    ///
2072    /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>();
2073    ///
2074    /// assert!(changes_when_uppercased.contains('ւ'));  // U+0582 ARMENIAN SMALL LETTER YIWN
2075    /// assert!(!changes_when_uppercased.contains('Ւ'));  // U+0552 ARMENIAN CAPITAL LETTER YIWN
2076    /// ```
2077
2078}
2079
2080make_binary_property! {
2081    name: "Dash";
2082    short_name: "Dash";
2083    ident: Dash;
2084    data_marker: crate::provider::PropertyBinaryDashV1;
2085    singleton: SINGLETON_PROPERTY_BINARY_DASH_V1;
2086    /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
2087    /// their compatibility equivalents.
2088    ///
2089    /// # Example
2090    ///
2091    /// ```
2092    /// use icu::properties::CodePointSetData;
2093    /// use icu::properties::props::Dash;
2094    ///
2095    /// let dash = CodePointSetData::new::<Dash>();
2096    ///
2097    /// assert!(dash.contains('⸺'));  // U+2E3A TWO-EM DASH
2098    /// assert!(dash.contains('-'));  // U+002D
2099    /// assert!(!dash.contains('='));  // U+003D
2100    /// ```
2101
2102}
2103
2104make_binary_property! {
2105    name: "Deprecated";
2106    short_name: "Dep";
2107    ident: Deprecated;
2108    data_marker: crate::provider::PropertyBinaryDeprecatedV1;
2109    singleton: SINGLETON_PROPERTY_BINARY_DEPRECATED_V1;
2110    /// Deprecated characters.
2111    ///
2112    /// No characters will ever be removed from the standard, but the
2113    /// usage of deprecated characters is strongly discouraged.
2114    ///
2115    /// # Example
2116    ///
2117    /// ```
2118    /// use icu::properties::CodePointSetData;
2119    /// use icu::properties::props::Deprecated;
2120    ///
2121    /// let deprecated = CodePointSetData::new::<Deprecated>();
2122    ///
2123    /// assert!(deprecated.contains('ឣ'));  // U+17A3 KHMER INDEPENDENT VOWEL QAQ
2124    /// assert!(!deprecated.contains('A'));
2125    /// ```
2126
2127}
2128
2129make_binary_property! {
2130    name: "Default_Ignorable_Code_Point";
2131    short_name: "DI";
2132    ident: DefaultIgnorableCodePoint;
2133    data_marker: crate::provider::PropertyBinaryDefaultIgnorableCodePointV1;
2134    singleton: SINGLETON_PROPERTY_BINARY_DEFAULT_IGNORABLE_CODE_POINT_V1;
2135    /// For programmatic determination of default ignorable code points.
2136    ///
2137    /// New characters that
2138    /// should be ignored in rendering (unless explicitly supported) will be assigned in these
2139    /// ranges, permitting programs to correctly handle the default rendering of such
2140    /// characters when not otherwise supported.
2141    ///
2142    /// # Example
2143    ///
2144    /// ```
2145    /// use icu::properties::CodePointSetData;
2146    /// use icu::properties::props::DefaultIgnorableCodePoint;
2147    ///
2148    /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>();
2149    ///
2150    /// assert!(default_ignorable_code_point.contains('\u{180B}'));  // MONGOLIAN FREE VARIATION SELECTOR ONE
2151    /// assert!(!default_ignorable_code_point.contains('E'));
2152    /// ```
2153
2154}
2155
2156make_binary_property! {
2157    name: "Diacritic";
2158    short_name: "Dia";
2159    ident: Diacritic;
2160    data_marker: crate::provider::PropertyBinaryDiacriticV1;
2161    singleton: SINGLETON_PROPERTY_BINARY_DIACRITIC_V1;
2162    /// Characters that linguistically modify the meaning of another character to which they apply.
2163    ///
2164    /// # Example
2165    ///
2166    /// ```
2167    /// use icu::properties::CodePointSetData;
2168    /// use icu::properties::props::Diacritic;
2169    ///
2170    /// let diacritic = CodePointSetData::new::<Diacritic>();
2171    ///
2172    /// assert!(diacritic.contains('\u{05B3}'));  // HEBREW POINT HATAF QAMATS
2173    /// assert!(!diacritic.contains('א'));  // U+05D0 HEBREW LETTER ALEF
2174    /// ```
2175
2176}
2177
2178make_binary_property! {
2179    name: "Emoji_Modifier_Base";
2180    short_name: "EBase";
2181    ident: EmojiModifierBase;
2182    data_marker: crate::provider::PropertyBinaryEmojiModifierBaseV1;
2183    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_BASE_V1;
2184    /// Characters that can serve as a base for emoji modifiers.
2185    ///
2186    /// # Example
2187    ///
2188    /// ```
2189    /// use icu::properties::CodePointSetData;
2190    /// use icu::properties::props::EmojiModifierBase;
2191    ///
2192    /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>();
2193    ///
2194    /// assert!(emoji_modifier_base.contains('✊'));  // U+270A RAISED FIST
2195    /// assert!(!emoji_modifier_base.contains('⛰'));  // U+26F0 MOUNTAIN
2196    /// ```
2197
2198}
2199
2200make_binary_property! {
2201    name: "Emoji_Component";
2202    short_name: "EComp";
2203    ident: EmojiComponent;
2204    data_marker: crate::provider::PropertyBinaryEmojiComponentV1;
2205    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_COMPONENT_V1;
2206    /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
2207    /// separate choices, such as base characters for emoji keycaps.
2208    ///
2209    /// # Example
2210    ///
2211    /// ```
2212    /// use icu::properties::CodePointSetData;
2213    /// use icu::properties::props::EmojiComponent;
2214    ///
2215    /// let emoji_component = CodePointSetData::new::<EmojiComponent>();
2216    ///
2217    /// assert!(emoji_component.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2218    /// assert!(emoji_component.contains('\u{20E3}'));  // COMBINING ENCLOSING KEYCAP
2219    /// assert!(emoji_component.contains('7'));
2220    /// assert!(!emoji_component.contains('T'));
2221    /// ```
2222
2223}
2224
2225make_binary_property! {
2226    name: "Emoji_Modifier";
2227    short_name: "EMod";
2228    ident: EmojiModifier;
2229    data_marker: crate::provider::PropertyBinaryEmojiModifierV1;
2230    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_V1;
2231    /// Characters that are emoji modifiers.
2232    ///
2233    /// # Example
2234    ///
2235    /// ```
2236    /// use icu::properties::CodePointSetData;
2237    /// use icu::properties::props::EmojiModifier;
2238    ///
2239    /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>();
2240    ///
2241    /// assert!(emoji_modifier.contains('\u{1F3FD}'));  // EMOJI MODIFIER FITZPATRICK TYPE-4
2242    /// assert!(!emoji_modifier.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2243    /// ```
2244
2245}
2246
2247make_binary_property! {
2248    name: "Emoji";
2249    short_name: "Emoji";
2250    ident: Emoji;
2251    data_marker: crate::provider::PropertyBinaryEmojiV1;
2252    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_V1;
2253    /// Characters that are emoji.
2254    ///
2255    /// # Example
2256    ///
2257    /// ```
2258    /// use icu::properties::CodePointSetData;
2259    /// use icu::properties::props::Emoji;
2260    ///
2261    /// let emoji = CodePointSetData::new::<Emoji>();
2262    ///
2263    /// assert!(emoji.contains('🔥'));  // U+1F525 FIRE
2264    /// assert!(!emoji.contains('V'));
2265    /// ```
2266
2267}
2268
2269make_binary_property! {
2270    name: "Emoji_Presentation";
2271    short_name: "EPres";
2272    ident: EmojiPresentation;
2273    data_marker: crate::provider::PropertyBinaryEmojiPresentationV1;
2274    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_PRESENTATION_V1;
2275    /// Characters that have emoji presentation by default.
2276    ///
2277    /// # Example
2278    ///
2279    /// ```
2280    /// use icu::properties::CodePointSetData;
2281    /// use icu::properties::props::EmojiPresentation;
2282    ///
2283    /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>();
2284    ///
2285    /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
2286    /// assert!(!emoji_presentation.contains('♻'));  // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
2287    /// ```
2288
2289}
2290
2291make_binary_property! {
2292    name: "Extender";
2293    short_name: "Ext";
2294    ident: Extender;
2295    data_marker: crate::provider::PropertyBinaryExtenderV1;
2296    singleton: SINGLETON_PROPERTY_BINARY_EXTENDER_V1;
2297    /// Characters whose principal function is to extend the value of a preceding alphabetic
2298    /// character or to extend the shape of adjacent characters.
2299    ///
2300    /// # Example
2301    ///
2302    /// ```
2303    /// use icu::properties::CodePointSetData;
2304    /// use icu::properties::props::Extender;
2305    ///
2306    /// let extender = CodePointSetData::new::<Extender>();
2307    ///
2308    /// assert!(extender.contains('ヾ'));  // U+30FE KATAKANA VOICED ITERATION MARK
2309    /// assert!(extender.contains('ー'));  // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
2310    /// assert!(!extender.contains('・'));  // U+30FB KATAKANA MIDDLE DOT
2311    /// ```
2312
2313}
2314
2315make_binary_property! {
2316    name: "Extended_Pictographic";
2317    short_name: "ExtPict";
2318    ident: ExtendedPictographic;
2319    data_marker: crate::provider::PropertyBinaryExtendedPictographicV1;
2320    singleton: SINGLETON_PROPERTY_BINARY_EXTENDED_PICTOGRAPHIC_V1;
2321    /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
2322    /// emoji characters
2323    ///
2324    /// # Example
2325    ///
2326    /// ```
2327    /// use icu::properties::CodePointSetData;
2328    /// use icu::properties::props::ExtendedPictographic;
2329    ///
2330    /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>();
2331    ///
2332    /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
2333    /// assert!(!extended_pictographic.contains('🇪'));  // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
2334    /// ```
2335
2336}
2337
2338make_binary_property! {
2339    name: "Graph";
2340    short_name: "Graph";
2341    ident: Graph;
2342    data_marker: crate::provider::PropertyBinaryGraphV1;
2343    singleton: SINGLETON_PROPERTY_BINARY_GRAPH_V1;
2344    /// Invisible characters.
2345    ///
2346    /// This is defined for POSIX compatibility.
2347
2348}
2349
2350make_binary_property! {
2351    name: "Grapheme_Base";
2352    short_name: "Gr_Base";
2353    ident: GraphemeBase;
2354    data_marker: crate::provider::PropertyBinaryGraphemeBaseV1;
2355    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_BASE_V1;
2356    /// Property used together with the definition of Standard Korean Syllable Block to define
2357    /// "Grapheme base".
2358    ///
2359    /// See D58 in Chapter 3, Conformance in the Unicode Standard.
2360    ///
2361    /// # Example
2362    ///
2363    /// ```
2364    /// use icu::properties::CodePointSetData;
2365    /// use icu::properties::props::GraphemeBase;
2366    ///
2367    /// let grapheme_base = CodePointSetData::new::<GraphemeBase>();
2368    ///
2369    /// assert!(grapheme_base.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2370    /// assert!(grapheme_base.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2371    /// assert!(!grapheme_base.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2372    /// ```
2373
2374}
2375
2376make_binary_property! {
2377    name: "Grapheme_Extend";
2378    short_name: "Gr_Ext";
2379    ident: GraphemeExtend;
2380    data_marker: crate::provider::PropertyBinaryGraphemeExtendV1;
2381    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_EXTEND_V1;
2382    /// Property used to define "Grapheme extender".
2383    ///
2384    /// See D59 in Chapter 3, Conformance in the
2385    /// Unicode Standard.
2386    ///
2387    /// # Example
2388    ///
2389    /// ```
2390    /// use icu::properties::CodePointSetData;
2391    /// use icu::properties::props::GraphemeExtend;
2392    ///
2393    /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>();
2394    ///
2395    /// assert!(!grapheme_extend.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2396    /// assert!(!grapheme_extend.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2397    /// assert!(grapheme_extend.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2398    /// ```
2399
2400}
2401
2402make_binary_property! {
2403    name: "Grapheme_Link";
2404    short_name: "Gr_Link";
2405    ident: GraphemeLink;
2406    data_marker: crate::provider::PropertyBinaryGraphemeLinkV1;
2407    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_LINK_V1;
2408    /// Deprecated property.
2409    ///
2410    /// Formerly proposed for programmatic determination of grapheme
2411    /// cluster boundaries.
2412}
2413
2414make_binary_property! {
2415    name: "Hex_Digit";
2416    short_name: "Hex";
2417    ident: HexDigit;
2418    data_marker: crate::provider::PropertyBinaryHexDigitV1;
2419    singleton: SINGLETON_PROPERTY_BINARY_HEX_DIGIT_V1;
2420    /// Characters commonly used for the representation of hexadecimal numbers, plus their
2421    /// compatibility equivalents.
2422    ///
2423    /// # Example
2424    ///
2425    /// ```
2426    /// use icu::properties::CodePointSetData;
2427    /// use icu::properties::props::HexDigit;
2428    ///
2429    /// let hex_digit = CodePointSetData::new::<HexDigit>();
2430    ///
2431    /// assert!(hex_digit.contains('0'));
2432    /// assert!(!hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
2433    /// assert!(hex_digit.contains('f'));
2434    /// assert!(hex_digit.contains('ｆ'));  // U+FF46 FULLWIDTH LATIN SMALL LETTER F
2435    /// assert!(hex_digit.contains('Ｆ'));  // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
2436    /// assert!(!hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
2437    /// ```
2438}
2439
2440make_binary_property! {
2441    name: "Hyphen";
2442    short_name: "Hyphen";
2443    ident: Hyphen;
2444    data_marker: crate::provider::PropertyBinaryHyphenV1;
2445    singleton: SINGLETON_PROPERTY_BINARY_HYPHEN_V1;
2446    /// Deprecated property.
2447    ///
2448    /// Dashes which are used to mark connections between pieces of
2449    /// words, plus the Katakana middle dot.
2450}
2451
2452make_binary_property! {
2453    name: "ID_Compat_Math_Continue";
2454    short_name: "ID_Compat_Math_Continue";
2455    ident: IdCompatMathContinue;
2456    data_marker: crate::provider::PropertyBinaryIdCompatMathContinueV1;
2457    singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_CONTINUE_V1;
2458    /// ID_Compat_Math_Continue Property
2459}
2460
2461make_binary_property! {
2462    name: "ID_Compat_Math_Start";
2463    short_name: "ID_Compat_Math_Start";
2464    ident: IdCompatMathStart;
2465    data_marker: crate::provider::PropertyBinaryIdCompatMathStartV1;
2466    singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_START_V1;
2467    /// ID_Compat_Math_Start Property
2468}
2469
2470make_binary_property! {
2471    name: "Id_Continue";
2472    short_name: "IDC";
2473    ident: IdContinue;
2474    data_marker: crate::provider::PropertyBinaryIdContinueV1;
2475    singleton: SINGLETON_PROPERTY_BINARY_ID_CONTINUE_V1;
2476    /// Characters that can come after the first character in an identifier.
2477    ///
2478    /// If using NFKC to
2479    /// fold differences between characters, use [`XidContinue`] instead.  See
2480    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2481    /// more details.
2482    ///
2483    /// # Example
2484    ///
2485    /// ```
2486    /// use icu::properties::CodePointSetData;
2487    /// use icu::properties::props::IdContinue;
2488    ///
2489    /// let id_continue = CodePointSetData::new::<IdContinue>();
2490    ///
2491    /// assert!(id_continue.contains('x'));
2492    /// assert!(id_continue.contains('1'));
2493    /// assert!(id_continue.contains('_'));
2494    /// assert!(id_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
2495    /// assert!(!id_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2496    /// assert!(id_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2497    /// ```
2498}
2499
2500make_binary_property! {
2501    name: "Ideographic";
2502    short_name: "Ideo";
2503    ident: Ideographic;
2504    data_marker: crate::provider::PropertyBinaryIdeographicV1;
2505    singleton: SINGLETON_PROPERTY_BINARY_IDEOGRAPHIC_V1;
2506    /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
2507    /// ideographs, or related siniform ideographs
2508    ///
2509    /// # Example
2510    ///
2511    /// ```
2512    /// use icu::properties::CodePointSetData;
2513    /// use icu::properties::props::Ideographic;
2514    ///
2515    /// let ideographic = CodePointSetData::new::<Ideographic>();
2516    ///
2517    /// assert!(ideographic.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2518    /// assert!(!ideographic.contains('밥'));  // U+BC25 HANGUL SYLLABLE BAB
2519    /// ```
2520}
2521
2522make_binary_property! {
2523    name: "Id_Start";
2524    short_name: "IDS";
2525    ident: IdStart;
2526    data_marker: crate::provider::PropertyBinaryIdStartV1;
2527    singleton: SINGLETON_PROPERTY_BINARY_ID_START_V1;
2528    /// Characters that can begin an identifier.
2529    ///
2530    /// If using NFKC to fold differences between
2531    /// characters, use [`XidStart`] instead.  See [`Unicode Standard Annex
2532    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2533    ///
2534    /// # Example
2535    ///
2536    /// ```
2537    /// use icu::properties::CodePointSetData;
2538    /// use icu::properties::props::IdStart;
2539    ///
2540    /// let id_start = CodePointSetData::new::<IdStart>();
2541    ///
2542    /// assert!(id_start.contains('x'));
2543    /// assert!(!id_start.contains('1'));
2544    /// assert!(!id_start.contains('_'));
2545    /// assert!(id_start.contains('ߝ'));  // U+07DD NKO LETTER FA
2546    /// assert!(!id_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2547    /// assert!(id_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2548    /// ```
2549}
2550
2551make_binary_property! {
2552    name: "Ids_Binary_Operator";
2553    short_name: "IDSB";
2554    ident: IdsBinaryOperator;
2555    data_marker: crate::provider::PropertyBinaryIdsBinaryOperatorV1;
2556    singleton: SINGLETON_PROPERTY_BINARY_IDS_BINARY_OPERATOR_V1;
2557    /// Characters used in Ideographic Description Sequences.
2558    ///
2559    /// # Example
2560    ///
2561    /// ```
2562    /// use icu::properties::CodePointSetData;
2563    /// use icu::properties::props::IdsBinaryOperator;
2564    ///
2565    /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>();
2566    ///
2567    /// assert!(ids_binary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2568    /// assert!(!ids_binary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2569    /// ```
2570}
2571
2572make_binary_property! {
2573    name: "Ids_Trinary_Operator";
2574    short_name: "IDST";
2575    ident: IdsTrinaryOperator;
2576    data_marker: crate::provider::PropertyBinaryIdsTrinaryOperatorV1;
2577    singleton: SINGLETON_PROPERTY_BINARY_IDS_TRINARY_OPERATOR_V1;
2578    /// Characters used in Ideographic Description Sequences.
2579    ///
2580    /// # Example
2581    ///
2582    /// ```
2583    /// use icu::properties::CodePointSetData;
2584    /// use icu::properties::props::IdsTrinaryOperator;
2585    ///
2586    /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>();
2587    ///
2588    /// assert!(ids_trinary_operator.contains('\u{2FF2}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
2589    /// assert!(ids_trinary_operator.contains('\u{2FF3}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
2590    /// assert!(!ids_trinary_operator.contains('\u{2FF4}'));
2591    /// assert!(!ids_trinary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2592    /// assert!(!ids_trinary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2593    /// ```
2594}
2595
2596make_binary_property! {
2597    name: "IDS_Unary_Operator";
2598    short_name: "IDSU";
2599    ident: IdsUnaryOperator;
2600    data_marker: crate::provider::PropertyBinaryIdsUnaryOperatorV1;
2601    singleton: SINGLETON_PROPERTY_BINARY_IDS_UNARY_OPERATOR_V1;
2602    /// IDS_Unary_Operator Property
2603}
2604
2605make_binary_property! {
2606    name: "Join_Control";
2607    short_name: "Join_C";
2608    ident: JoinControl;
2609    data_marker: crate::provider::PropertyBinaryJoinControlV1;
2610    singleton: SINGLETON_PROPERTY_BINARY_JOIN_CONTROL_V1;
2611    /// Format control characters which have specific functions for control of cursive joining
2612    /// and ligation.
2613    ///
2614    /// # Example
2615    ///
2616    /// ```
2617    /// use icu::properties::CodePointSetData;
2618    /// use icu::properties::props::JoinControl;
2619    ///
2620    /// let join_control = CodePointSetData::new::<JoinControl>();
2621    ///
2622    /// assert!(join_control.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2623    /// assert!(join_control.contains('\u{200D}'));  // ZERO WIDTH JOINER
2624    /// assert!(!join_control.contains('\u{200E}'));
2625    /// ```
2626}
2627
2628make_binary_property! {
2629    name: "Logical_Order_Exception";
2630    short_name: "LOE";
2631    ident: LogicalOrderException;
2632    data_marker: crate::provider::PropertyBinaryLogicalOrderExceptionV1;
2633    singleton: SINGLETON_PROPERTY_BINARY_LOGICAL_ORDER_EXCEPTION_V1;
2634    /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao.
2635    ///
2636    /// # Example
2637    ///
2638    /// ```
2639    /// use icu::properties::CodePointSetData;
2640    /// use icu::properties::props::LogicalOrderException;
2641    ///
2642    /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>();
2643    ///
2644    /// assert!(logical_order_exception.contains('ແ'));  // U+0EC1 LAO VOWEL SIGN EI
2645    /// assert!(!logical_order_exception.contains('ະ'));  // U+0EB0 LAO VOWEL SIGN A
2646    /// ```
2647}
2648
2649make_binary_property! {
2650    name: "Lowercase";
2651    short_name: "Lower";
2652    ident: Lowercase;
2653    data_marker: crate::provider::PropertyBinaryLowercaseV1;
2654    singleton: SINGLETON_PROPERTY_BINARY_LOWERCASE_V1;
2655    /// Lowercase characters.
2656    ///
2657    /// # Example
2658    ///
2659    /// ```
2660    /// use icu::properties::CodePointSetData;
2661    /// use icu::properties::props::Lowercase;
2662    ///
2663    /// let lowercase = CodePointSetData::new::<Lowercase>();
2664    ///
2665    /// assert!(lowercase.contains('a'));
2666    /// assert!(!lowercase.contains('A'));
2667    /// ```
2668}
2669
2670make_binary_property! {
2671    name: "Math";
2672    short_name: "Math";
2673    ident: Math;
2674    data_marker: crate::provider::PropertyBinaryMathV1;
2675    singleton: SINGLETON_PROPERTY_BINARY_MATH_V1;
2676    /// Characters used in mathematical notation.
2677    ///
2678    /// # Example
2679    ///
2680    /// ```
2681    /// use icu::properties::CodePointSetData;
2682    /// use icu::properties::props::Math;
2683    ///
2684    /// let math = CodePointSetData::new::<Math>();
2685    ///
2686    /// assert!(math.contains('='));
2687    /// assert!(math.contains('+'));
2688    /// assert!(!math.contains('-'));
2689    /// assert!(math.contains('−'));  // U+2212 MINUS SIGN
2690    /// assert!(!math.contains('/'));
2691    /// assert!(math.contains('∕'));  // U+2215 DIVISION SLASH
2692    /// ```
2693}
2694
2695make_binary_property! {
2696    name: "Modifier_Combining_Mark";
2697    short_name: "MCM";
2698    ident: ModifierCombiningMark;
2699    data_marker: crate::provider::PropertyBinaryModifierCombiningMarkV1;
2700    singleton: SINGLETON_PROPERTY_BINARY_MODIFIER_COMBINING_MARK_V1;
2701    /// Modifier_Combining_Mark Property
2702}
2703
2704make_binary_property! {
2705    name: "Noncharacter_Code_Point";
2706    short_name: "NChar";
2707    ident: NoncharacterCodePoint;
2708    data_marker: crate::provider::PropertyBinaryNoncharacterCodePointV1;
2709    singleton: SINGLETON_PROPERTY_BINARY_NONCHARACTER_CODE_POINT_V1;
2710    /// Code points permanently reserved for internal use.
2711    ///
2712    /// # Example
2713    ///
2714    /// ```
2715    /// use icu::properties::CodePointSetData;
2716    /// use icu::properties::props::NoncharacterCodePoint;
2717    ///
2718    /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>();
2719    ///
2720    /// assert!(noncharacter_code_point.contains('\u{FDD0}'));
2721    /// assert!(noncharacter_code_point.contains('\u{FFFF}'));
2722    /// assert!(!noncharacter_code_point.contains('\u{10000}'));
2723    /// ```
2724}
2725
2726make_binary_property! {
2727    name: "NFC_Inert";
2728    short_name: "NFC_Inert";
2729    ident: NfcInert;
2730    data_marker: crate::provider::PropertyBinaryNfcInertV1;
2731    singleton: SINGLETON_PROPERTY_BINARY_NFC_INERT_V1;
2732    /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters.
2733}
2734
2735make_binary_property! {
2736    name: "NFD_Inert";
2737    short_name: "NFD_Inert";
2738    ident: NfdInert;
2739    data_marker: crate::provider::PropertyBinaryNfdInertV1;
2740    singleton: SINGLETON_PROPERTY_BINARY_NFD_INERT_V1;
2741    /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters.
2742}
2743
2744make_binary_property! {
2745    name: "NFKC_Inert";
2746    short_name: "NFKC_Inert";
2747    ident: NfkcInert;
2748    data_marker: crate::provider::PropertyBinaryNfkcInertV1;
2749    singleton: SINGLETON_PROPERTY_BINARY_NFKC_INERT_V1;
2750    /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters.
2751}
2752
2753make_binary_property! {
2754    name: "NFKD_Inert";
2755    short_name: "NFKD_Inert";
2756    ident: NfkdInert;
2757    data_marker: crate::provider::PropertyBinaryNfkdInertV1;
2758    singleton: SINGLETON_PROPERTY_BINARY_NFKD_INERT_V1;
2759    /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters.
2760}
2761
2762make_binary_property! {
2763    name: "Pattern_Syntax";
2764    short_name: "Pat_Syn";
2765    ident: PatternSyntax;
2766    data_marker: crate::provider::PropertyBinaryPatternSyntaxV1;
2767    singleton: SINGLETON_PROPERTY_BINARY_PATTERN_SYNTAX_V1;
2768    /// Characters used as syntax in patterns (such as regular expressions).
2769    ///
2770    /// See [`Unicode
2771    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2772    /// details.
2773    ///
2774    /// # Example
2775    ///
2776    /// ```
2777    /// use icu::properties::CodePointSetData;
2778    /// use icu::properties::props::PatternSyntax;
2779    ///
2780    /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>();
2781    ///
2782    /// assert!(pattern_syntax.contains('{'));
2783    /// assert!(pattern_syntax.contains('⇒'));  // U+21D2 RIGHTWARDS DOUBLE ARROW
2784    /// assert!(!pattern_syntax.contains('0'));
2785    /// ```
2786}
2787
2788make_binary_property! {
2789    name: "Pattern_White_Space";
2790    short_name: "Pat_WS";
2791    ident: PatternWhiteSpace;
2792    data_marker: crate::provider::PropertyBinaryPatternWhiteSpaceV1;
2793    singleton: SINGLETON_PROPERTY_BINARY_PATTERN_WHITE_SPACE_V1;
2794    /// Characters used as whitespace in patterns (such as regular expressions).
2795    ///
2796    /// See
2797    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2798    /// more details.
2799    ///
2800    /// # Example
2801    ///
2802    /// ```
2803    /// use icu::properties::CodePointSetData;
2804    /// use icu::properties::props::PatternWhiteSpace;
2805    ///
2806    /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>();
2807    ///
2808    /// assert!(pattern_white_space.contains(' '));
2809    /// assert!(pattern_white_space.contains('\u{2029}'));  // PARAGRAPH SEPARATOR
2810    /// assert!(pattern_white_space.contains('\u{000A}'));  // NEW LINE
2811    /// assert!(!pattern_white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
2812    /// ```
2813}
2814
2815make_binary_property! {
2816    name: "Prepended_Concatenation_Mark";
2817    short_name: "PCM";
2818    ident: PrependedConcatenationMark;
2819    data_marker: crate::provider::PropertyBinaryPrependedConcatenationMarkV1;
2820    singleton: SINGLETON_PROPERTY_BINARY_PREPENDED_CONCATENATION_MARK_V1;
2821    /// A small class of visible format controls, which precede and then span a sequence of
2822    /// other characters, usually digits.
2823}
2824
2825make_binary_property! {
2826    name: "Print";
2827    short_name: "Print";
2828    ident: Print;
2829    data_marker: crate::provider::PropertyBinaryPrintV1;
2830    singleton: SINGLETON_PROPERTY_BINARY_PRINT_V1;
2831    /// Printable characters (visible characters and whitespace).
2832    ///
2833    /// This is defined for POSIX compatibility.
2834}
2835
2836make_binary_property! {
2837    name: "Quotation_Mark";
2838    short_name: "QMark";
2839    ident: QuotationMark;
2840    data_marker: crate::provider::PropertyBinaryQuotationMarkV1;
2841    singleton: SINGLETON_PROPERTY_BINARY_QUOTATION_MARK_V1;
2842    /// Punctuation characters that function as quotation marks.
2843    ///
2844    /// # Example
2845    ///
2846    /// ```
2847    /// use icu::properties::CodePointSetData;
2848    /// use icu::properties::props::QuotationMark;
2849    ///
2850    /// let quotation_mark = CodePointSetData::new::<QuotationMark>();
2851    ///
2852    /// assert!(quotation_mark.contains('\''));
2853    /// assert!(quotation_mark.contains('„'));  // U+201E DOUBLE LOW-9 QUOTATION MARK
2854    /// assert!(!quotation_mark.contains('<'));
2855    /// ```
2856}
2857
2858make_binary_property! {
2859    name: "Radical";
2860    short_name: "Radical";
2861    ident: Radical;
2862    data_marker: crate::provider::PropertyBinaryRadicalV1;
2863    singleton: SINGLETON_PROPERTY_BINARY_RADICAL_V1;
2864    /// Characters used in the definition of Ideographic Description Sequences.
2865    ///
2866    /// # Example
2867    ///
2868    /// ```
2869    /// use icu::properties::CodePointSetData;
2870    /// use icu::properties::props::Radical;
2871    ///
2872    /// let radical = CodePointSetData::new::<Radical>();
2873    ///
2874    /// assert!(radical.contains('⺆'));  // U+2E86 CJK RADICAL BOX
2875    /// assert!(!radical.contains('丹'));  // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
2876    /// ```
2877}
2878
2879make_binary_property! {
2880    name: "Regional_Indicator";
2881    short_name: "RI";
2882    ident: RegionalIndicator;
2883    data_marker: crate::provider::PropertyBinaryRegionalIndicatorV1;
2884    singleton: SINGLETON_PROPERTY_BINARY_REGIONAL_INDICATOR_V1;
2885    /// Regional indicator characters, `U+1F1E6..U+1F1FF`.
2886    ///
2887    /// # Example
2888    ///
2889    /// ```
2890    /// use icu::properties::CodePointSetData;
2891    /// use icu::properties::props::RegionalIndicator;
2892    ///
2893    /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>();
2894    ///
2895    /// assert!(regional_indicator.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2896    /// assert!(!regional_indicator.contains('Ⓣ'));  // U+24C9 CIRCLED LATIN CAPITAL LETTER T
2897    /// assert!(!regional_indicator.contains('T'));
2898    /// ```
2899}
2900
2901make_binary_property! {
2902    name: "Soft_Dotted";
2903    short_name: "SD";
2904    ident: SoftDotted;
2905    data_marker: crate::provider::PropertyBinarySoftDottedV1;
2906    singleton: SINGLETON_PROPERTY_BINARY_SOFT_DOTTED_V1;
2907    /// Characters with a "soft dot", like i or j.
2908    ///
2909    /// An accent placed on these characters causes
2910    /// the dot to disappear.
2911    ///
2912    /// # Example
2913    ///
2914    /// ```
2915    /// use icu::properties::CodePointSetData;
2916    /// use icu::properties::props::SoftDotted;
2917    ///
2918    /// let soft_dotted = CodePointSetData::new::<SoftDotted>();
2919    ///
2920    /// assert!(soft_dotted.contains('і'));  //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
2921    /// assert!(!soft_dotted.contains('ı'));  // U+0131 LATIN SMALL LETTER DOTLESS I
2922    /// ```
2923}
2924
2925make_binary_property! {
2926    name: "Segment_Starter";
2927    short_name: "Segment_Starter";
2928    ident: SegmentStarter;
2929    data_marker: crate::provider::PropertyBinarySegmentStarterV1;
2930    singleton: SINGLETON_PROPERTY_BINARY_SEGMENT_STARTER_V1;
2931    /// Characters that are starters in terms of Unicode normalization and combining character
2932    /// sequences.
2933}
2934
2935make_binary_property! {
2936    name: "Case_Sensitive";
2937    short_name: "Case_Sensitive";
2938    ident: CaseSensitive;
2939    data_marker: crate::provider::PropertyBinaryCaseSensitiveV1;
2940    singleton: SINGLETON_PROPERTY_BINARY_CASE_SENSITIVE_V1;
2941    /// Characters that are either the source of a case mapping or in the target of a case
2942    /// mapping.
2943}
2944
2945make_binary_property! {
2946    name: "Sentence_Terminal";
2947    short_name: "STerm";
2948    ident: SentenceTerminal;
2949    data_marker: crate::provider::PropertyBinarySentenceTerminalV1;
2950    singleton: SINGLETON_PROPERTY_BINARY_SENTENCE_TERMINAL_V1;
2951    /// Punctuation characters that generally mark the end of sentences.
2952    ///
2953    /// # Example
2954    ///
2955    /// ```
2956    /// use icu::properties::CodePointSetData;
2957    /// use icu::properties::props::SentenceTerminal;
2958    ///
2959    /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>();
2960    ///
2961    /// assert!(sentence_terminal.contains('.'));
2962    /// assert!(sentence_terminal.contains('?'));
2963    /// assert!(sentence_terminal.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
2964    /// assert!(!sentence_terminal.contains(','));
2965    /// assert!(!sentence_terminal.contains('¿'));  // U+00BF INVERTED QUESTION MARK
2966    /// ```
2967}
2968
2969make_binary_property! {
2970    name: "Terminal_Punctuation";
2971    short_name: "Term";
2972    ident: TerminalPunctuation;
2973    data_marker: crate::provider::PropertyBinaryTerminalPunctuationV1;
2974    singleton: SINGLETON_PROPERTY_BINARY_TERMINAL_PUNCTUATION_V1;
2975    /// Punctuation characters that generally mark the end of textual units.
2976    ///
2977    /// # Example
2978    ///
2979    /// ```
2980    /// use icu::properties::CodePointSetData;
2981    /// use icu::properties::props::TerminalPunctuation;
2982    ///
2983    /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>();
2984    ///
2985    /// assert!(terminal_punctuation.contains('.'));
2986    /// assert!(terminal_punctuation.contains('?'));
2987    /// assert!(terminal_punctuation.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
2988    /// assert!(terminal_punctuation.contains(','));
2989    /// assert!(!terminal_punctuation.contains('¿'));  // U+00BF INVERTED QUESTION MARK
2990    /// ```
2991}
2992
2993make_binary_property! {
2994    name: "Unified_Ideograph";
2995    short_name: "UIdeo";
2996    ident: UnifiedIdeograph;
2997    data_marker: crate::provider::PropertyBinaryUnifiedIdeographV1;
2998    singleton: SINGLETON_PROPERTY_BINARY_UNIFIED_IDEOGRAPH_V1;
2999    /// A property which specifies the exact set of Unified CJK Ideographs in the standard.
3000    ///
3001    /// # Example
3002    ///
3003    /// ```
3004    /// use icu::properties::CodePointSetData;
3005    /// use icu::properties::props::UnifiedIdeograph;
3006    ///
3007    /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>();
3008    ///
3009    /// assert!(unified_ideograph.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
3010    /// assert!(unified_ideograph.contains('木'));  // U+6728 CJK UNIFIED IDEOGRAPH-6728
3011    /// assert!(!unified_ideograph.contains('𛅸'));  // U+1B178 NUSHU CHARACTER-1B178
3012    /// ```
3013}
3014
3015make_binary_property! {
3016    name: "Uppercase";
3017    short_name: "Upper";
3018    ident: Uppercase;
3019    data_marker: crate::provider::PropertyBinaryUppercaseV1;
3020    singleton: SINGLETON_PROPERTY_BINARY_UPPERCASE_V1;
3021    /// Uppercase characters.
3022    ///
3023    /// # Example
3024    ///
3025    /// ```
3026    /// use icu::properties::CodePointSetData;
3027    /// use icu::properties::props::Uppercase;
3028    ///
3029    /// let uppercase = CodePointSetData::new::<Uppercase>();
3030    ///
3031    /// assert!(uppercase.contains('U'));
3032    /// assert!(!uppercase.contains('u'));
3033    /// ```
3034}
3035
3036make_binary_property! {
3037    name: "Variation_Selector";
3038    short_name: "VS";
3039    ident: VariationSelector;
3040    data_marker: crate::provider::PropertyBinaryVariationSelectorV1;
3041    singleton: SINGLETON_PROPERTY_BINARY_VARIATION_SELECTOR_V1;
3042    /// Characters that are Variation Selectors.
3043    ///
3044    /// # Example
3045    ///
3046    /// ```
3047    /// use icu::properties::CodePointSetData;
3048    /// use icu::properties::props::VariationSelector;
3049    ///
3050    /// let variation_selector = CodePointSetData::new::<VariationSelector>();
3051    ///
3052    /// assert!(variation_selector.contains('\u{180D}'));  // MONGOLIAN FREE VARIATION SELECTOR THREE
3053    /// assert!(!variation_selector.contains('\u{303E}'));  // IDEOGRAPHIC VARIATION INDICATOR
3054    /// assert!(variation_selector.contains('\u{FE0F}'));  // VARIATION SELECTOR-16
3055    /// assert!(!variation_selector.contains('\u{FE10}'));  // PRESENTATION FORM FOR VERTICAL COMMA
3056    /// assert!(variation_selector.contains('\u{E01EF}'));  // VARIATION SELECTOR-256
3057    /// ```
3058}
3059
3060make_binary_property! {
3061    name: "White_Space";
3062    short_name: "space";
3063    ident: WhiteSpace;
3064    data_marker: crate::provider::PropertyBinaryWhiteSpaceV1;
3065    singleton: SINGLETON_PROPERTY_BINARY_WHITE_SPACE_V1;
3066    /// Spaces, separator characters and other control characters which should be treated by
3067    /// programming languages as "white space" for the purpose of parsing elements.
3068    ///
3069    /// # Example
3070    ///
3071    /// ```
3072    /// use icu::properties::CodePointSetData;
3073    /// use icu::properties::props::WhiteSpace;
3074    ///
3075    /// let white_space = CodePointSetData::new::<WhiteSpace>();
3076    ///
3077    /// assert!(white_space.contains(' '));
3078    /// assert!(white_space.contains('\u{000A}'));  // NEW LINE
3079    /// assert!(white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
3080    /// assert!(!white_space.contains('\u{200B}'));  // ZERO WIDTH SPACE
3081    /// ```
3082}
3083
3084make_binary_property! {
3085    name: "Xdigit";
3086    short_name: "Xdigit";
3087    ident: Xdigit;
3088    data_marker: crate::provider::PropertyBinaryXdigitV1;
3089    singleton: SINGLETON_PROPERTY_BINARY_XDIGIT_V1;
3090    /// Hexadecimal digits
3091    ///
3092    /// This is defined for POSIX compatibility.
3093}
3094
3095make_binary_property! {
3096    name: "XID_Continue";
3097    short_name: "XIDC";
3098    ident: XidContinue;
3099    data_marker: crate::provider::PropertyBinaryXidContinueV1;
3100    singleton: SINGLETON_PROPERTY_BINARY_XID_CONTINUE_V1;
3101    /// Characters that can come after the first character in an identifier.
3102    ///
3103    /// See [`Unicode Standard Annex
3104    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
3105    ///
3106    /// # Example
3107    ///
3108    /// ```
3109    /// use icu::properties::CodePointSetData;
3110    /// use icu::properties::props::XidContinue;
3111    ///
3112    /// let xid_continue = CodePointSetData::new::<XidContinue>();
3113    ///
3114    /// assert!(xid_continue.contains('x'));
3115    /// assert!(xid_continue.contains('1'));
3116    /// assert!(xid_continue.contains('_'));
3117    /// assert!(xid_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
3118    /// assert!(!xid_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
3119    /// assert!(!xid_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3120    /// ```
3121}
3122
3123make_binary_property! {
3124    name: "XID_Start";
3125    short_name: "XIDS";
3126    ident: XidStart;
3127    data_marker: crate::provider::PropertyBinaryXidStartV1;
3128    singleton: SINGLETON_PROPERTY_BINARY_XID_START_V1;
3129    /// Characters that can begin an identifier.
3130    ///
3131    /// See [`Unicode
3132    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
3133    /// details.
3134    ///
3135    /// # Example
3136    ///
3137    /// ```
3138    /// use icu::properties::CodePointSetData;
3139    /// use icu::properties::props::XidStart;
3140    ///
3141    /// let xid_start = CodePointSetData::new::<XidStart>();
3142    ///
3143    /// assert!(xid_start.contains('x'));
3144    /// assert!(!xid_start.contains('1'));
3145    /// assert!(!xid_start.contains('_'));
3146    /// assert!(xid_start.contains('ߝ'));  // U+07DD NKO LETTER FA
3147    /// assert!(!xid_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
3148    /// assert!(!xid_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3149    /// ```
3150}
3151
3152pub use crate::emoji::EmojiSet;
3153
3154macro_rules! make_emoji_set {
3155    (
3156        ident: $ident:ident;
3157        data_marker: $data_marker:ty;
3158        singleton: $singleton:ident;
3159        $(#[$doc:meta])+
3160    ) => {
3161        $(#[$doc])+
3162        #[derive(Debug)]
3163        #[non_exhaustive]
3164        pub struct $ident;
3165
3166        impl crate::private::Sealed for $ident {}
3167
3168        impl EmojiSet for $ident {
3169            type DataMarker = $data_marker;
3170            #[cfg(feature = "compiled_data")]
3171            const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> =
3172                &crate::provider::Baked::$singleton;
3173        }
3174    }
3175}
3176
3177make_emoji_set! {
3178    ident: BasicEmoji;
3179    data_marker: crate::provider::PropertyBinaryBasicEmojiV1;
3180    singleton: SINGLETON_PROPERTY_BINARY_BASIC_EMOJI_V1;
3181    /// Characters and character sequences intended for general-purpose, independent, direct input.
3182    ///
3183    /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
3184    /// details.
3185    ///
3186    /// # Example
3187    ///
3188    /// ```
3189    /// use icu::properties::EmojiSetData;
3190    /// use icu::properties::props::BasicEmoji;
3191    ///
3192    /// let basic_emoji = EmojiSetData::new::<BasicEmoji>();
3193    ///
3194    /// assert!(!basic_emoji.contains('\u{0020}'));
3195    /// assert!(!basic_emoji.contains('\n'));
3196    /// assert!(basic_emoji.contains('🦃')); // U+1F983 TURKEY
3197    /// assert!(basic_emoji.contains_str("\u{1F983}"));
3198    /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track
3199    /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}"));  // Emoji_Keycap_Sequence, keycap 3
3200    /// ```
3201}
3202
3203#[cfg(test)]
3204mod test_enumerated_property_completeness {
3205    use super::*;
3206    use std::collections::BTreeMap;
3207
3208    fn check_enum<'a, T: NamedEnumeratedProperty>(
3209        lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>,
3210        consts: impl IntoIterator<Item = &'a T>,
3211    ) where
3212        u16: From<T>,
3213    {
3214        let mut data: BTreeMap<_, _> = lookup
3215            .map
3216            .iter()
3217            .map(|(name, value)| (value, (name, "Data")))
3218            .collect();
3219
3220        let names = crate::PropertyNamesLong::<T>::new();
3221        let consts = consts.into_iter().map(|value| {
3222            (
3223                u16::from(*value) as usize,
3224                (
3225                    names.get(*value).unwrap_or("<unknown>").to_string(),
3226                    "Consts",
3227                ),
3228            )
3229        });
3230
3231        let mut diff = Vec::new();
3232        for t @ (value, _) in consts {
3233            if data.remove(&value).is_none() {
3234                diff.push(t);
3235            }
3236        }
3237        diff.extend(data);
3238
3239        let mut fmt_diff = String::new();
3240        for (value, (name, source)) in diff {
3241            fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n"));
3242        }
3243
3244        assert!(
3245            fmt_diff.is_empty(),
3246            "Values defined in data do not match values defined in consts. Difference:\n{fmt_diff}"
3247        );
3248    }
3249
3250    #[test]
3251    fn test_ea() {
3252        check_enum(
3253            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1,
3254            EastAsianWidth::ALL_VALUES,
3255        );
3256    }
3257
3258    #[test]
3259    fn test_ccc() {
3260        check_enum(
3261            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1,
3262            CanonicalCombiningClass::ALL_VALUES,
3263        );
3264    }
3265
3266    #[test]
3267    fn test_jt() {
3268        check_enum(
3269            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1,
3270            JoiningType::ALL_VALUES,
3271        );
3272    }
3273
3274    #[test]
3275    fn test_insc() {
3276        check_enum(
3277            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1,
3278            IndicSyllabicCategory::ALL_VALUES,
3279        );
3280    }
3281
3282    #[test]
3283    fn test_sb() {
3284        check_enum(
3285            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1,
3286            SentenceBreak::ALL_VALUES,
3287        );
3288    }
3289
3290    #[test]
3291    fn test_wb() {
3292        check_enum(
3293            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1,
3294            WordBreak::ALL_VALUES,
3295        );
3296    }
3297
3298    #[test]
3299    fn test_bc() {
3300        check_enum(
3301            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1,
3302            BidiClass::ALL_VALUES,
3303        );
3304    }
3305
3306    #[test]
3307    fn test_hst() {
3308        check_enum(
3309            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1,
3310            HangulSyllableType::ALL_VALUES,
3311        );
3312    }
3313
3314    #[test]
3315    fn test_vo() {
3316        check_enum(
3317            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1,
3318            VerticalOrientation::ALL_VALUES,
3319        );
3320    }
3321}
icu_properties/props.rs

icu_properties/
props.rs