icu_provider/marker.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::fallback::{LocaleFallbackConfig, LocaleFallbackPriority};
6use crate::{DataError, DataErrorKind, DataLocale, DataProvider, DataProviderWithMarker};
7use core::fmt;
8use core::marker::PhantomData;
9use icu_locale_core::preferences::LocalePreferences;
10use yoke::Yokeable;
11use zerovec::ule::*;
12
13/// Trait marker for data structs. All types delivered by the data provider must be associated with
14/// something implementing this trait.
15///
16/// Data markers normally generated with the [`data_marker`](crate::data_marker) macro.
17///
18/// Also see [`DataMarker`].
19///
20/// Note: `DynamicDataMarker`s are quasi-const-generic compile-time objects, and as such are expected
21/// to be unit structs. As this is not something that can be enforced by the type system, we
22/// currently only have a `'static` bound on them (which is needed by a lot of our code).
23///
24/// # Examples
25///
26/// Manually implementing DynamicDataMarker for a custom type:
27///
28/// ```
29/// use icu_provider::prelude::*;
30/// use std::borrow::Cow;
31///
32/// #[derive(yoke::Yokeable, zerofrom::ZeroFrom)]
33/// struct MyDataStruct<'data> {
34/// message: Cow<'data, str>,
35/// }
36///
37/// struct MyDataStructMarker;
38///
39/// impl DynamicDataMarker for MyDataStructMarker {
40/// type DataStruct = MyDataStruct<'static>;
41/// }
42///
43/// // We can now use MyDataStruct with DataProvider:
44/// let s = MyDataStruct {
45/// message: Cow::Owned("Hello World".into()),
46/// };
47/// let payload = DataPayload::<MyDataStructMarker>::from_owned(s);
48/// assert_eq!(payload.get().message, "Hello World");
49/// ```
50///
51/// [`data_struct`]: crate::data_struct
52pub trait DynamicDataMarker: 'static {
53 /// A type that implements [`Yokeable`]. This should typically be the `'static` version of a
54 /// data struct.
55 type DataStruct: for<'a> Yokeable<'a>;
56}
57
58/// A [`DynamicDataMarker`] with a [`DataMarkerInfo`] attached.
59///
60/// Structs implementing this trait are normally generated with the [`data_struct!`] macro.
61///
62/// Implementing this trait enables this marker to be used with the main [`DataProvider`] trait.
63/// Most markers should be associated with a specific marker and should therefore implement this
64/// trait.
65///
66/// [`BufferMarker`] is an example of a marker that does _not_ implement this trait.
67///
68/// Note: `DataMarker`s are quasi-const-generic compile-time objects, and as such are expected
69/// to be unit structs. As this is not something that can be enforced by the type system, we
70/// currently only have a `'static` bound on them (which is needed by a lot of our code).
71///
72/// [`data_struct!`]: crate::data_struct
73/// [`DataProvider`]: crate::DataProvider
74/// [`BufferMarker`]: crate::buf::BufferMarker
75pub trait DataMarker: DynamicDataMarker {
76 /// The single [`DataMarkerInfo`] associated with this marker.
77 const INFO: DataMarkerInfo;
78}
79
80/// Extension trait for methods on [`DataMarker`]
81pub trait DataMarkerExt: DataMarker + Sized {
82 /// Binds a [`DataMarker`] to a provider supporting it.
83 fn bind<P>(provider: P) -> DataProviderWithMarker<Self, P>
84 where
85 P: DataProvider<Self>;
86 /// Constructs a [`DataLocale`] using fallback preferences from this [`DataMarker`].
87 fn make_locale(locale: LocalePreferences) -> DataLocale;
88}
89
90impl<M: DataMarker + Sized> DataMarkerExt for M {
91 fn bind<P>(provider: P) -> DataProviderWithMarker<Self, P>
92 where
93 P: DataProvider<Self>,
94 {
95 DataProviderWithMarker::new(provider)
96 }
97
98 fn make_locale(locale: LocalePreferences) -> DataLocale {
99 M::INFO.make_locale(locale)
100 }
101}
102
103/// A [`DynamicDataMarker`] that never returns data.
104///
105/// All types that have non-blanket impls of `DataProvider<M>` are expected to explicitly
106/// implement `DataProvider<NeverMarker<Y>>`, returning [`DataErrorKind::MarkerNotFound`].
107/// See [`impl_data_provider_never_marker!`].
108///
109/// [`DataErrorKind::MarkerNotFound`]: crate::DataErrorKind::MarkerNotFound
110/// [`impl_data_provider_never_marker!`]: crate::marker::impl_data_provider_never_marker
111///
112/// # Examples
113///
114/// ```
115/// use icu_locale_core::langid;
116/// use icu_provider::hello_world::*;
117/// use icu_provider::marker::NeverMarker;
118/// use icu_provider::prelude::*;
119///
120/// let buffer_provider = HelloWorldProvider.into_json_provider();
121///
122/// let result = DataProvider::<NeverMarker<HelloWorld<'static>>>::load(
123/// &buffer_provider.as_deserializing(),
124/// DataRequest {
125/// id: DataIdentifierBorrowed::for_locale(&langid!("en").into()),
126/// ..Default::default()
127/// },
128/// );
129///
130/// assert!(matches!(
131/// result,
132/// Err(DataError {
133/// kind: DataErrorKind::MarkerNotFound,
134/// ..
135/// })
136/// ));
137/// ```
138#[derive(Debug, Copy, Clone)]
139pub struct NeverMarker<Y>(PhantomData<Y>);
140
141impl<Y> DynamicDataMarker for NeverMarker<Y>
142where
143 for<'a> Y: Yokeable<'a>,
144{
145 type DataStruct = Y;
146}
147
148impl<Y> DataMarker for NeverMarker<Y>
149where
150 for<'a> Y: Yokeable<'a>,
151{
152 const INFO: DataMarkerInfo = DataMarkerInfo::from_id(DataMarkerId {
153 #[cfg(any(feature = "export", debug_assertions))]
154 debug: "NeverMarker",
155 hash: *b"nevermar",
156 });
157}
158
159/// Implements `DataProvider<NeverMarker<Y>>` on a struct.
160///
161/// For more information, see [`NeverMarker`].
162///
163/// # Examples
164///
165/// ```
166/// use icu_locale_core::langid;
167/// use icu_provider::hello_world::*;
168/// use icu_provider::marker::NeverMarker;
169/// use icu_provider::prelude::*;
170///
171/// struct MyProvider;
172///
173/// icu_provider::marker::impl_data_provider_never_marker!(MyProvider);
174///
175/// let result = DataProvider::<NeverMarker<HelloWorld<'static>>>::load(
176/// &MyProvider,
177/// DataRequest {
178/// id: DataIdentifierBorrowed::for_locale(&langid!("und").into()),
179/// ..Default::default()
180/// },
181/// );
182///
183/// assert!(matches!(
184/// result,
185/// Err(DataError {
186/// kind: DataErrorKind::MarkerNotFound,
187/// ..
188/// })
189/// ));
190/// ```
191#[doc(hidden)] // macro
192#[macro_export]
193macro_rules! __impl_data_provider_never_marker {
194 ($ty:path) => {
195 impl<Y> $crate::DataProvider<$crate::marker::NeverMarker<Y>> for $ty
196 where
197 for<'a> Y: $crate::prelude::yoke::Yokeable<'a>,
198 {
199 fn load(
200 &self,
201 req: $crate::DataRequest,
202 ) -> Result<$crate::DataResponse<$crate::marker::NeverMarker<Y>>, $crate::DataError>
203 {
204 Err($crate::DataErrorKind::MarkerNotFound.with_req(
205 <$crate::marker::NeverMarker<Y> as $crate::DataMarker>::INFO,
206 req,
207 ))
208 }
209 }
210 };
211}
212#[doc(inline)]
213pub use __impl_data_provider_never_marker as impl_data_provider_never_marker;
214
215/// A compact hash of a [`DataMarkerInfo`]. Useful for keys in maps.
216///
217/// The hash will be stable over time within major releases.
218#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, ULE)]
219#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
220#[repr(transparent)]
221pub struct DataMarkerIdHash([u8; 4]);
222
223impl DataMarkerIdHash {
224 /// Magic bytes to locate [`DataMarkerIdHash`]es in binaries.
225 pub const LEADING_TAG: &[u8] = b"tdmh";
226
227 /// Gets the hash value as a byte array.
228 pub const fn to_bytes(self) -> [u8; 4] {
229 self.0
230 }
231}
232
233/// Const function to compute the FxHash of a byte array.
234///
235/// FxHash is a speedy hash algorithm used within rustc. The algorithm is satisfactory for our
236/// use case since the strings being hashed originate from a trusted source (the ICU4X
237/// components), and the hashes are computed at compile time, so we can check for collisions.
238///
239/// We could have considered a SHA or other cryptographic hash function. However, we are using
240/// FxHash because:
241///
242/// 1. There is precedent for this algorithm in Rust
243/// 2. The algorithm is easy to implement as a const function
244/// 3. The amount of code is small enough that we can reasonably keep the algorithm in-tree
245/// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits,
246/// such that truncation would be required in order to fit into a u32, partially reducing
247/// the benefit of a cryptographically secure algorithm
248// The indexing operations in this function have been reviewed in detail and won't panic.
249#[expect(clippy::indexing_slicing)]
250const fn fxhash_32(bytes: &[u8]) -> u32 {
251 // This code is adapted from https://github.com/rust-lang/rustc-hash,
252 // whose license text is reproduced below.
253 //
254 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
255 // file at the top-level directory of this distribution and at
256 // http://rust-lang.org/COPYRIGHT.
257 //
258 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
259 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
260 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
261 // option. This file may not be copied, modified, or distributed
262 // except according to those terms.
263
264 #[inline]
265 const fn hash_word_32(mut hash: u32, word: u32) -> u32 {
266 const ROTATE: u32 = 5;
267 const SEED32: u32 = 0x9e_37_79_b9;
268 hash = hash.rotate_left(ROTATE);
269 hash ^= word;
270 hash = hash.wrapping_mul(SEED32);
271 hash
272 }
273
274 let mut cursor = 0;
275 let end = bytes.len();
276 let mut hash = 0;
277
278 while end - cursor >= 4 {
279 let word = u32::from_le_bytes([
280 bytes[cursor],
281 bytes[cursor + 1],
282 bytes[cursor + 2],
283 bytes[cursor + 3],
284 ]);
285 hash = hash_word_32(hash, word);
286 cursor += 4;
287 }
288
289 if end - cursor >= 2 {
290 let word = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]);
291 hash = hash_word_32(hash, word as u32);
292 cursor += 2;
293 }
294
295 if end - cursor >= 1 {
296 hash = hash_word_32(hash, bytes[cursor] as u32);
297 }
298
299 hash
300}
301
302#[cfg(feature = "alloc")]
303impl<'a> zerovec::maps::ZeroMapKV<'a> for DataMarkerIdHash {
304 type Container = zerovec::ZeroVec<'a, DataMarkerIdHash>;
305 type Slice = zerovec::ZeroSlice<DataMarkerIdHash>;
306 type GetType = <DataMarkerIdHash as AsULE>::ULE;
307 type OwnedType = DataMarkerIdHash;
308}
309
310impl AsULE for DataMarkerIdHash {
311 type ULE = Self;
312 #[inline]
313 fn to_unaligned(self) -> Self::ULE {
314 self
315 }
316 #[inline]
317 fn from_unaligned(unaligned: Self::ULE) -> Self {
318 unaligned
319 }
320}
321
322// Safe since the ULE type is `self`.
323unsafe impl EqULE for DataMarkerIdHash {}
324
325/// The ID of a data marker.
326///
327/// This is generally a [`DataMarkerIdHash`]. If debug assertions or the `export` Cargo feature
328/// are enabled, this also contains a human-readable string for an improved `Debug` implementation.
329#[derive(Debug, Copy, Clone, Eq)]
330pub struct DataMarkerId {
331 #[cfg(any(feature = "export", debug_assertions))]
332 debug: &'static str,
333 hash: [u8; 8],
334}
335
336impl PartialEq for DataMarkerId {
337 #[inline]
338 fn eq(&self, other: &Self) -> bool {
339 self.hash == other.hash
340 }
341}
342
343impl Ord for DataMarkerId {
344 #[inline]
345 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
346 self.hash.cmp(&other.hash)
347 }
348}
349
350impl PartialOrd for DataMarkerId {
351 #[inline]
352 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
353 Some(self.cmp(other))
354 }
355}
356
357impl core::hash::Hash for DataMarkerId {
358 #[inline]
359 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
360 self.hash.hash(state)
361 }
362}
363
364impl DataMarkerId {
365 #[doc(hidden)]
366 // macro use
367 // Error is a str of the expected character class and the index where it wasn't encountered
368 // The indexing operations in this function have been reviewed in detail and won't panic.
369 pub const fn from_name(name: &'static str) -> Result<Self, (&'static str, usize)> {
370 #![allow(clippy::indexing_slicing)]
371 if !name.as_bytes()[name.len() - 1].is_ascii_digit() {
372 return Err(("[0-9]", name.len()));
373 }
374 let mut i = name.len() - 1;
375 while name.as_bytes()[i - 1].is_ascii_digit() {
376 i -= 1;
377 }
378 if name.as_bytes()[i - 1] != b'V' {
379 return Err(("V", i));
380 }
381
382 let magic = DataMarkerIdHash::LEADING_TAG;
383 let hash = fxhash_32(name.as_bytes()).to_le_bytes();
384
385 Ok(Self {
386 #[cfg(any(feature = "export", debug_assertions))]
387 debug: name,
388 hash: [
389 magic[0], magic[1], magic[2], magic[3], hash[0], hash[1], hash[2], hash[3],
390 ],
391 })
392 }
393
394 /// Gets a platform-independent hash of a [`DataMarkerId`].
395 ///
396 /// The hash is 4 bytes and allows for fast comparison.
397 ///
398 /// # Example
399 ///
400 /// ```
401 /// use icu_provider::prelude::*;
402 ///
403 /// icu_provider::data_marker!(FooV1, &'static str);
404 ///
405 /// assert_eq!(FooV1::INFO.id.hashed().to_bytes(), [198, 217, 86, 48]);
406 /// ```
407 #[inline]
408 pub const fn hashed(self) -> DataMarkerIdHash {
409 let [.., h1, h2, h3, h4] = self.hash;
410 DataMarkerIdHash([h1, h2, h3, h4])
411 }
412
413 /// Returns the marker name.
414 ///
415 /// For size reasons, this is only available with the `export` Cargo feature.
416 #[cfg(feature = "export")]
417 pub const fn name(self) -> &'static str {
418 self.debug
419 }
420}
421
422/// Used for loading data from a dynamic ICU4X data provider.
423///
424/// A data marker is tightly coupled with the code that uses it to load data at runtime.
425/// Executables can be searched for `DataMarkerInfo` instances to produce optimized data files.
426/// Therefore, users should not generally create DataMarkerInfo instances; they should instead use
427/// the ones exported by a component.
428#[derive(Copy, Clone, PartialEq, Eq)]
429#[non_exhaustive]
430pub struct DataMarkerInfo {
431 /// The ID of this marker.
432 pub id: DataMarkerId,
433 /// Whether this data marker only has a single payload, not keyed by a data identifier.
434 pub is_singleton: bool,
435 /// Whether this data marker uses checksums for integrity purposes.
436 pub has_checksum: bool,
437 /// The fallback to use for this data marker.
438 pub fallback_config: LocaleFallbackConfig,
439 /// The attributes domain for this data marker. This can be used for filtering marker
440 /// attributes during provider export.
441 #[cfg(feature = "export")]
442 pub attributes_domain: &'static str,
443 /// Whether to create constants for each data struct in baked data.
444 #[cfg(feature = "export")]
445 pub expose_baked_consts: bool,
446}
447
448impl PartialOrd for DataMarkerInfo {
449 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
450 Some(self.cmp(other))
451 }
452}
453
454impl Ord for DataMarkerInfo {
455 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
456 self.id.cmp(&other.id)
457 }
458}
459
460impl core::hash::Hash for DataMarkerInfo {
461 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
462 self.id.hash(state)
463 }
464}
465
466impl DataMarkerInfo {
467 /// See [`Default::default`]
468 pub const fn from_id(id: DataMarkerId) -> Self {
469 Self {
470 id,
471 fallback_config: LocaleFallbackConfig::default(),
472 is_singleton: false,
473 has_checksum: false,
474 #[cfg(feature = "export")]
475 attributes_domain: "",
476 #[cfg(feature = "export")]
477 expose_baked_consts: false,
478 }
479 }
480
481 /// Returns [`Ok`] if this data marker matches the argument, or the appropriate error.
482 ///
483 /// Convenience method for data providers that support a single [`DataMarkerInfo`].
484 ///
485 /// # Examples
486 ///
487 /// ```
488 /// use icu_provider::hello_world::*;
489 /// use icu_provider::prelude::*;
490 ///
491 /// icu_provider::data_marker!(
492 /// DummyV1,
493 /// <HelloWorldV1 as DynamicDataMarker>::DataStruct
494 /// );
495 ///
496 /// assert!(matches!(
497 /// HelloWorldV1::INFO.match_marker(HelloWorldV1::INFO),
498 /// Ok(())
499 /// ));
500 /// assert!(matches!(
501 /// HelloWorldV1::INFO.match_marker(DummyV1::INFO),
502 /// Err(DataError {
503 /// kind: DataErrorKind::MarkerNotFound,
504 /// ..
505 /// })
506 /// ));
507 ///
508 /// // The error context contains the argument:
509 /// assert_eq!(
510 /// HelloWorldV1::INFO
511 /// .match_marker(DummyV1::INFO)
512 /// .unwrap_err()
513 /// .marker,
514 /// Some(DummyV1::INFO.id)
515 /// );
516 /// ```
517 pub fn match_marker(self, marker: Self) -> Result<(), DataError> {
518 if self == marker {
519 Ok(())
520 } else {
521 Err(DataErrorKind::MarkerNotFound.with_marker(marker))
522 }
523 }
524
525 /// Constructs a [`DataLocale`] for this [`DataMarkerInfo`].
526 pub fn make_locale(self, locale: LocalePreferences) -> DataLocale {
527 if self.fallback_config.priority == LocaleFallbackPriority::Region {
528 locale.to_data_locale_region_priority()
529 } else {
530 locale.to_data_locale_language_priority()
531 }
532 }
533}
534
535/// Creates a data marker.
536///
537/// # Examples
538///
539/// ```
540/// icu_provider::data_marker!(DummyV1, &'static str);
541/// ```
542///
543/// The identifier needs to end with a `V` followed by one or more digits (the version number).
544///
545/// Invalid identifiers are compile-time errors (as [`data_marker!`](crate::data_marker) uses `const`).
546///
547/// ```compile_fail,E0080
548/// icu_provider::data_marker!(Dummy, &'static str);
549/// ```
550#[macro_export] // canonical location is crate root
551macro_rules! data_marker {
552 ($(#[$doc:meta])* $name:ident, $($debug:literal,)? $struct:ty $(, $(#[$meta:meta])* $info_field:ident = $info_val:expr)* $(,)?) => {
553 $(#[$doc])*
554 #[non_exhaustive]
555 pub struct $name;
556 impl $crate::DynamicDataMarker for $name {
557 type DataStruct = $struct;
558 }
559 impl $crate::DataMarker for $name {
560 const INFO: $crate::DataMarkerInfo = {
561 $(
562 /// ```rust
563 #[doc = concat!("let ident = \"", stringify!($name), "\";")]
564 #[doc = concat!("let debug = \"", $debug, "\";")]
565 /// assert_eq!(
566 /// debug.split('/').map(|s| {
567 /// let mut b = s.to_ascii_lowercase().into_bytes();
568 /// b[0] = b[0].to_ascii_uppercase();
569 /// String::from_utf8(b).unwrap()
570 /// })
571 /// .collect::<Vec<_>>()
572 /// .join(""),
573 /// ident
574 /// );
575 /// ```
576 #[allow(dead_code)]
577 struct DebugTest;
578 )?
579 #[allow(unused_mut)]
580 // Force evaluation even if marker is unused
581 let mut info = const { $crate::DataMarkerInfo::from_id(
582 match $crate::marker::DataMarkerId::from_name(stringify!($name)) {
583 Ok(path) => path,
584 Err(_) => panic!(concat!("Invalid marker name: ", stringify!($name))),
585 })};
586 $(
587 $(#[$meta])*
588 {info.$info_field = $info_val;}
589 )*
590 info
591 };
592 }
593 }
594}
595
596impl fmt::Debug for DataMarkerInfo {
597 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
598 #[cfg(any(feature = "export", debug_assertions))]
599 return f.write_str(self.id.debug);
600 #[cfg(not(any(feature = "export", debug_assertions)))]
601 return write!(f, "{:?}", self.id);
602 }
603}
604
605/// A marker for the given `DataStruct`.
606#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
607pub struct ErasedMarker<DataStruct: for<'a> Yokeable<'a>>(PhantomData<DataStruct>);
608impl<DataStruct: for<'a> Yokeable<'a>> DynamicDataMarker for ErasedMarker<DataStruct> {
609 type DataStruct = DataStruct;
610}
611
612#[test]
613fn test_marker_syntax() {
614 // Valid markers:
615 DataMarkerId::from_name("HelloWorldV1").unwrap();
616 DataMarkerId::from_name("HelloWorldFooV1").unwrap();
617 DataMarkerId::from_name("HelloWorldV999").unwrap();
618 DataMarkerId::from_name("Hello485FooV1").unwrap();
619
620 // No version:
621 assert_eq!(
622 DataMarkerId::from_name("HelloWorld"),
623 Err(("[0-9]", "HelloWorld".len()))
624 );
625
626 assert_eq!(
627 DataMarkerId::from_name("HelloWorldV"),
628 Err(("[0-9]", "HelloWorldV".len()))
629 );
630 assert_eq!(
631 DataMarkerId::from_name("HelloWorldVFoo"),
632 Err(("[0-9]", "HelloWorldVFoo".len()))
633 );
634 assert_eq!(
635 DataMarkerId::from_name("HelloWorldV1Foo"),
636 Err(("[0-9]", "HelloWorldV1Foo".len()))
637 );
638}
639
640#[test]
641fn test_id_debug() {
642 assert_eq!(DataMarkerId::from_name("BarV1").unwrap().debug, "BarV1");
643}
644
645#[test]
646fn test_hash_word_32() {
647 assert_eq!(0, fxhash_32(b""));
648 assert_eq!(0xF3051F19, fxhash_32(b"a"));
649 assert_eq!(0x2F9DF119, fxhash_32(b"ab"));
650 assert_eq!(0xCB1D9396, fxhash_32(b"abc"));
651 assert_eq!(0x8628F119, fxhash_32(b"abcd"));
652 assert_eq!(0xBEBDB56D, fxhash_32(b"abcde"));
653 assert_eq!(0x1CE8476D, fxhash_32(b"abcdef"));
654 assert_eq!(0xC0F176A4, fxhash_32(b"abcdefg"));
655 assert_eq!(0x09AB476D, fxhash_32(b"abcdefgh"));
656 assert_eq!(0xB72F5D88, fxhash_32(b"abcdefghi"));
657}
658
659#[test]
660fn test_id_hash() {
661 assert_eq!(
662 DataMarkerId::from_name("BarV1").unwrap().hashed(),
663 DataMarkerIdHash([212, 77, 158, 241]),
664 );
665}