tinystr/
ascii.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::asciibyte::AsciiByte;
6use crate::int_ops::{Aligned4, Aligned8};
7use crate::ParseError;
8use core::borrow::Borrow;
9use core::fmt;
10use core::ops::Deref;
11use core::str::{self, FromStr};
12
13#[repr(transparent)]
14#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)]
15pub struct TinyAsciiStr<const N: usize> {
16    bytes: [AsciiByte; N],
17}
18
19impl<const N: usize> TinyAsciiStr<N> {
20    #[inline]
21    pub const fn try_from_str(s: &str) -> Result<Self, ParseError> {
22        Self::try_from_utf8(s.as_bytes())
23    }
24
25    /// Creates a `TinyAsciiStr<N>` from the given UTF-8 slice.
26    /// `code_units` may contain at most `N` non-null ASCII code points.
27    #[inline]
28    pub const fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
29        Self::try_from_utf8_inner(code_units, false)
30    }
31
32    /// Creates a `TinyAsciiStr<N>` from the given UTF-16 slice.
33    /// `code_units` may contain at most `N` non-null ASCII code points.
34    #[inline]
35    pub const fn try_from_utf16(code_units: &[u16]) -> Result<Self, ParseError> {
36        Self::try_from_utf16_inner(code_units, 0, code_units.len(), false)
37    }
38
39    /// Creates a `TinyAsciiStr<N>` from a UTF-8 slice, replacing invalid code units.
40    ///
41    /// Invalid code units, as well as null or non-ASCII code points
42    /// (i.e. those outside the range U+0001..=U+007F`)
43    /// will be replaced with the replacement byte.
44    ///
45    /// The input slice will be truncated if its length exceeds `N`.
46    pub const fn from_utf8_lossy(code_units: &[u8], replacement: u8) -> Self {
47        let mut out = [0; N];
48        let mut i = 0;
49        // Ord is not available in const, so no `.min(N)`
50        let len = if code_units.len() > N {
51            N
52        } else {
53            code_units.len()
54        };
55
56        // Indexing is protected by the len check above
57        #[expect(clippy::indexing_slicing)]
58        while i < len {
59            let b = code_units[i];
60            if b > 0 && b < 0x80 {
61                out[i] = b;
62            } else {
63                out[i] = replacement;
64            }
65            i += 1;
66        }
67
68        Self {
69            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
70            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
71        }
72    }
73
74    /// Creates a `TinyAsciiStr<N>` from a UTF-16 slice, replacing invalid code units.
75    ///
76    /// Invalid code units, as well as null or non-ASCII code points
77    /// (i.e. those outside the range U+0001..=U+007F`)
78    /// will be replaced with the replacement byte.
79    ///
80    /// The input slice will be truncated if its length exceeds `N`.
81    pub const fn from_utf16_lossy(code_units: &[u16], replacement: u8) -> Self {
82        let mut out = [0; N];
83        let mut i = 0;
84        // Ord is not available in const, so no `.min(N)`
85        let len = if code_units.len() > N {
86            N
87        } else {
88            code_units.len()
89        };
90
91        // Indexing is protected by the len check above
92        #[expect(clippy::indexing_slicing)]
93        while i < len {
94            let b = code_units[i];
95            if b > 0 && b < 0x80 {
96                out[i] = b as u8;
97            } else {
98                out[i] = replacement;
99            }
100            i += 1;
101        }
102
103        Self {
104            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
105            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
106        }
107    }
108
109    /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`.
110    ///
111    /// The byte array may contain trailing NUL bytes.
112    ///
113    /// # Example
114    ///
115    /// ```
116    /// use tinystr::tinystr;
117    /// use tinystr::TinyAsciiStr;
118    ///
119    /// assert_eq!(
120    ///     TinyAsciiStr::<3>::try_from_raw(*b"GB\0"),
121    ///     Ok(tinystr!(3, "GB"))
122    /// );
123    /// assert_eq!(
124    ///     TinyAsciiStr::<3>::try_from_raw(*b"USD"),
125    ///     Ok(tinystr!(3, "USD"))
126    /// );
127    /// assert!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0").is_err());
128    /// ```
129    pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, ParseError> {
130        Self::try_from_utf8_inner(&raw, true)
131    }
132
133    pub(crate) const fn try_from_utf8_inner(
134        code_units: &[u8],
135        allow_trailing_null: bool,
136    ) -> Result<Self, ParseError> {
137        if code_units.len() > N {
138            return Err(ParseError::TooLong {
139                max: N,
140                len: code_units.len(),
141            });
142        }
143
144        let mut out = [0; N];
145        let mut i = 0;
146        let mut found_null = false;
147        // Indexing is protected by TinyStrError::TooLarge
148        #[expect(clippy::indexing_slicing)]
149        while i < code_units.len() {
150            let b = code_units[i];
151
152            if b == 0 {
153                found_null = true;
154            } else if b >= 0x80 {
155                return Err(ParseError::NonAscii);
156            } else if found_null {
157                // Error if there are contentful bytes after null
158                return Err(ParseError::ContainsNull);
159            }
160            out[i] = b;
161
162            i += 1;
163        }
164
165        if !allow_trailing_null && found_null {
166            // We found some trailing nulls, error
167            return Err(ParseError::ContainsNull);
168        }
169
170        Ok(Self {
171            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
172            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
173        })
174    }
175
176    pub(crate) const fn try_from_utf16_inner(
177        code_units: &[u16],
178        start: usize,
179        end: usize,
180        allow_trailing_null: bool,
181    ) -> Result<Self, ParseError> {
182        let len = end - start;
183        if len > N {
184            return Err(ParseError::TooLong { max: N, len });
185        }
186
187        let mut out = [0; N];
188        let mut i = 0;
189        let mut found_null = false;
190        // Indexing is protected by TinyStrError::TooLarge
191        #[expect(clippy::indexing_slicing)]
192        while i < len {
193            let b = code_units[start + i];
194
195            if b == 0 {
196                found_null = true;
197            } else if b >= 0x80 {
198                return Err(ParseError::NonAscii);
199            } else if found_null {
200                // Error if there are contentful bytes after null
201                return Err(ParseError::ContainsNull);
202            }
203            out[i] = b as u8;
204
205            i += 1;
206        }
207
208        if !allow_trailing_null && found_null {
209            // We found some trailing nulls, error
210            return Err(ParseError::ContainsNull);
211        }
212
213        Ok(Self {
214            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
215            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
216        })
217    }
218
219    /// Creates a `TinyAsciiStr<N>` containing the decimal representation of
220    /// the given unsigned integer.
221    ///
222    /// If the number of decimal digits exceeds `N`, the highest-magnitude
223    /// digits are truncated, and the lowest-magnitude digits are returned
224    /// as the error.
225    ///
226    /// Note: this function takes a u32. Larger integer types should probably
227    /// not be stored in a `TinyAsciiStr`.
228    ///
229    /// # Examples
230    ///
231    /// ```
232    /// use tinystr::tinystr;
233    /// use tinystr::TinyAsciiStr;
234    ///
235    /// let s0_4 = TinyAsciiStr::<4>::new_unsigned_decimal(0).unwrap();
236    /// let s456_4 = TinyAsciiStr::<4>::new_unsigned_decimal(456).unwrap();
237    /// let s456_3 = TinyAsciiStr::<3>::new_unsigned_decimal(456).unwrap();
238    /// let s456_2 = TinyAsciiStr::<2>::new_unsigned_decimal(456).unwrap_err();
239    ///
240    /// assert_eq!(s0_4, tinystr!(4, "0"));
241    /// assert_eq!(s456_4, tinystr!(4, "456"));
242    /// assert_eq!(s456_3, tinystr!(3, "456"));
243    /// assert_eq!(s456_2, tinystr!(2, "56"));
244    /// ```
245    ///
246    /// Example with saturating the value:
247    ///
248    /// ```
249    /// use tinystr::tinystr;
250    /// use tinystr::TinyAsciiStr;
251    ///
252    /// let str_truncated =
253    ///     TinyAsciiStr::<2>::new_unsigned_decimal(456).unwrap_or_else(|s| s);
254    /// let str_saturated = TinyAsciiStr::<2>::new_unsigned_decimal(456)
255    ///     .unwrap_or(tinystr!(2, "99"));
256    ///
257    /// assert_eq!(str_truncated, tinystr!(2, "56"));
258    /// assert_eq!(str_saturated, tinystr!(2, "99"));
259    /// ```
260    pub fn new_unsigned_decimal(number: u32) -> Result<Self, Self> {
261        let mut bytes = [AsciiByte::B0; N];
262        let mut x = number;
263        let mut i = 0usize;
264        #[expect(clippy::indexing_slicing)] // in-range: i < N
265        while i < N && (x != 0 || i == 0) {
266            bytes[N - i - 1] = AsciiByte::from_decimal_digit((x % 10) as u8);
267            x /= 10;
268            i += 1;
269        }
270        if i < N {
271            bytes.copy_within((N - i)..N, 0);
272            bytes[i..N].fill(AsciiByte::B0);
273        }
274        let s = Self { bytes };
275        if x != 0 {
276            Err(s)
277        } else {
278            Ok(s)
279        }
280    }
281
282    #[inline]
283    pub const fn as_str(&self) -> &str {
284        // as_utf8 is valid utf8
285        unsafe { str::from_utf8_unchecked(self.as_utf8()) }
286    }
287
288    #[inline]
289    #[must_use]
290    pub const fn len(&self) -> usize {
291        if N <= 4 {
292            Aligned4::from_ascii_bytes(&self.bytes).len()
293        } else if N <= 8 {
294            Aligned8::from_ascii_bytes(&self.bytes).len()
295        } else {
296            let mut i = 0;
297            #[expect(clippy::indexing_slicing)] // < N is safe
298            while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 {
299                i += 1
300            }
301            i
302        }
303    }
304
305    #[inline]
306    #[must_use]
307    pub const fn is_empty(&self) -> bool {
308        self.bytes[0] as u8 == AsciiByte::B0 as u8
309    }
310
311    #[inline]
312    #[must_use]
313    pub const fn as_utf8(&self) -> &[u8] {
314        // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`,
315        // and changing the length of that slice to self.len() < N is safe.
316        unsafe {
317            core::slice::from_raw_parts(self.bytes.as_slice().as_ptr() as *const u8, self.len())
318        }
319    }
320
321    #[inline]
322    #[must_use]
323    pub const fn all_bytes(&self) -> &[u8; N] {
324        // SAFETY: `self.bytes` has same size as [u8; N]
325        unsafe { &*(self.bytes.as_ptr() as *const [u8; N]) }
326    }
327
328    #[inline]
329    #[must_use]
330    /// Resizes a `TinyAsciiStr<N>` to a `TinyAsciiStr<M>`.
331    ///
332    /// If `M < len()` the string gets truncated, otherwise only the
333    /// memory representation changes.
334    pub const fn resize<const M: usize>(self) -> TinyAsciiStr<M> {
335        let mut bytes = [0; M];
336        let mut i = 0;
337        // Indexing is protected by the loop guard
338        #[expect(clippy::indexing_slicing)]
339        while i < M && i < N {
340            bytes[i] = self.bytes[i] as u8;
341            i += 1;
342        }
343        // `self.bytes` only contains ASCII bytes, with no null bytes between
344        // ASCII characters, so this also holds for `bytes`.
345        unsafe { TinyAsciiStr::from_utf8_unchecked(bytes) }
346    }
347
348    #[inline]
349    #[must_use]
350    /// Returns a `TinyAsciiStr<Q>` with the concatenation of this string,
351    /// `TinyAsciiStr<N>`, and another string, `TinyAsciiStr<M>`.
352    ///
353    /// If `Q < N + M`, the string gets truncated.
354    ///
355    /// # Examples
356    ///
357    /// ```
358    /// use tinystr::tinystr;
359    /// use tinystr::TinyAsciiStr;
360    ///
361    /// let abc = tinystr!(6, "abc");
362    /// let defg = tinystr!(6, "defg");
363    ///
364    /// // The concatenation is successful if Q is large enough...
365    /// assert_eq!(abc.concat(defg), tinystr!(16, "abcdefg"));
366    /// assert_eq!(abc.concat(defg), tinystr!(12, "abcdefg"));
367    /// assert_eq!(abc.concat(defg), tinystr!(8, "abcdefg"));
368    /// assert_eq!(abc.concat(defg), tinystr!(7, "abcdefg"));
369    ///
370    /// /// ...but it truncates of Q is too small.
371    /// assert_eq!(abc.concat(defg), tinystr!(6, "abcdef"));
372    /// assert_eq!(abc.concat(defg), tinystr!(2, "ab"));
373    /// ```
374    pub const fn concat<const M: usize, const Q: usize>(
375        self,
376        other: TinyAsciiStr<M>,
377    ) -> TinyAsciiStr<Q> {
378        let mut result = self.resize::<Q>();
379        let mut i = self.len();
380        let mut j = 0;
381        // Indexing is protected by the loop guard
382        #[expect(clippy::indexing_slicing)]
383        while i < Q && j < M {
384            result.bytes[i] = other.bytes[j];
385            i += 1;
386            j += 1;
387        }
388        result
389    }
390
391    /// # Safety
392    /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes
393    /// between ASCII characters
394    #[must_use]
395    pub const unsafe fn from_utf8_unchecked(code_units: [u8; N]) -> Self {
396        Self {
397            bytes: AsciiByte::to_ascii_byte_array(&code_units),
398        }
399    }
400}
401
402macro_rules! check_is {
403    ($self:ident, $check_int:ident, $check_u8:ident) => {
404        if N <= 4 {
405            Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
406        } else if N <= 8 {
407            Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
408        } else {
409            let mut i = 0;
410            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
411                if !($self.bytes[i] as u8).$check_u8() {
412                    return false;
413                }
414                i += 1;
415            }
416            true
417        }
418    };
419    ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => {
420        if N <= 4 {
421            Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
422        } else if N <= 8 {
423            Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
424        } else {
425            // Won't panic because N is > 8
426            if ($self.bytes[0] as u8).$check_u8_0_inv() {
427                return false;
428            }
429            let mut i = 1;
430            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
431                if ($self.bytes[i] as u8).$check_u8_1_inv() {
432                    return false;
433                }
434                i += 1;
435            }
436            true
437        }
438    };
439    ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => {
440        if N <= 4 {
441            Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
442        } else if N <= 8 {
443            Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
444        } else {
445            // Won't panic because N is > 8
446            if !($self.bytes[0] as u8).$check_u8_0_inv() {
447                return false;
448            }
449            let mut i = 1;
450            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
451                if !($self.bytes[i] as u8).$check_u8_1_inv() {
452                    return false;
453                }
454                i += 1;
455            }
456            true
457        }
458    };
459}
460
461impl<const N: usize> TinyAsciiStr<N> {
462    /// Checks if the value is composed of ASCII alphabetic characters:
463    ///
464    ///  * U+0041 'A' ..= U+005A 'Z', or
465    ///  * U+0061 'a' ..= U+007A 'z'.
466    ///
467    /// # Examples
468    ///
469    /// ```
470    /// use tinystr::TinyAsciiStr;
471    ///
472    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
473    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
474    ///
475    /// assert!(s1.is_ascii_alphabetic());
476    /// assert!(!s2.is_ascii_alphabetic());
477    /// ```
478    #[inline]
479    #[must_use]
480    pub const fn is_ascii_alphabetic(&self) -> bool {
481        check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic)
482    }
483
484    /// Checks if the value is composed of ASCII alphanumeric characters:
485    ///
486    ///  * U+0041 'A' ..= U+005A 'Z', or
487    ///  * U+0061 'a' ..= U+007A 'z', or
488    ///  * U+0030 '0' ..= U+0039 '9'.
489    ///
490    /// # Examples
491    ///
492    /// ```
493    /// use tinystr::TinyAsciiStr;
494    ///
495    /// let s1: TinyAsciiStr<4> = "A15b".parse().expect("Failed to parse.");
496    /// let s2: TinyAsciiStr<4> = "[3@w".parse().expect("Failed to parse.");
497    ///
498    /// assert!(s1.is_ascii_alphanumeric());
499    /// assert!(!s2.is_ascii_alphanumeric());
500    /// ```
501    #[inline]
502    #[must_use]
503    pub const fn is_ascii_alphanumeric(&self) -> bool {
504        check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric)
505    }
506
507    /// Checks if the value is composed of ASCII decimal digits:
508    ///
509    ///  * U+0030 '0' ..= U+0039 '9'.
510    ///
511    /// # Examples
512    ///
513    /// ```
514    /// use tinystr::TinyAsciiStr;
515    ///
516    /// let s1: TinyAsciiStr<4> = "312".parse().expect("Failed to parse.");
517    /// let s2: TinyAsciiStr<4> = "3d".parse().expect("Failed to parse.");
518    ///
519    /// assert!(s1.is_ascii_numeric());
520    /// assert!(!s2.is_ascii_numeric());
521    /// ```
522    #[inline]
523    #[must_use]
524    pub const fn is_ascii_numeric(&self) -> bool {
525        check_is!(self, is_ascii_numeric, is_ascii_digit)
526    }
527
528    /// Checks if the value is in ASCII lower case.
529    ///
530    /// All letter characters are checked for case. Non-letter characters are ignored.
531    ///
532    /// # Examples
533    ///
534    /// ```
535    /// use tinystr::TinyAsciiStr;
536    ///
537    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
538    /// let s2: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
539    /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
540    ///
541    /// assert!(!s1.is_ascii_lowercase());
542    /// assert!(s2.is_ascii_lowercase());
543    /// assert!(s3.is_ascii_lowercase());
544    /// ```
545    #[inline]
546    #[must_use]
547    pub const fn is_ascii_lowercase(&self) -> bool {
548        check_is!(
549            self,
550            is_ascii_lowercase,
551            !is_ascii_uppercase,
552            !is_ascii_uppercase
553        )
554    }
555
556    /// Checks if the value is in ASCII title case.
557    ///
558    /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase.
559    /// Non-letter characters are ignored.
560    ///
561    /// # Examples
562    ///
563    /// ```
564    /// use tinystr::TinyAsciiStr;
565    ///
566    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
567    /// let s2: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
568    /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
569    ///
570    /// assert!(!s1.is_ascii_titlecase());
571    /// assert!(s2.is_ascii_titlecase());
572    /// assert!(s3.is_ascii_titlecase());
573    /// ```
574    #[inline]
575    #[must_use]
576    pub const fn is_ascii_titlecase(&self) -> bool {
577        check_is!(
578            self,
579            is_ascii_titlecase,
580            !is_ascii_lowercase,
581            !is_ascii_uppercase
582        )
583    }
584
585    /// Checks if the value is in ASCII upper case.
586    ///
587    /// All letter characters are checked for case. Non-letter characters are ignored.
588    ///
589    /// # Examples
590    ///
591    /// ```
592    /// use tinystr::TinyAsciiStr;
593    ///
594    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
595    /// let s2: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
596    /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
597    ///
598    /// assert!(!s1.is_ascii_uppercase());
599    /// assert!(s2.is_ascii_uppercase());
600    /// assert!(!s3.is_ascii_uppercase());
601    /// ```
602    #[inline]
603    #[must_use]
604    pub const fn is_ascii_uppercase(&self) -> bool {
605        check_is!(
606            self,
607            is_ascii_uppercase,
608            !is_ascii_lowercase,
609            !is_ascii_lowercase
610        )
611    }
612
613    /// Checks if the value is composed of ASCII alphabetic lower case characters:
614    ///
615    ///  * U+0061 'a' ..= U+007A 'z',
616    ///
617    /// # Examples
618    ///
619    /// ```
620    /// use tinystr::TinyAsciiStr;
621    ///
622    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
623    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
624    /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
625    /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
626    /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
627    ///
628    /// assert!(!s1.is_ascii_alphabetic_lowercase());
629    /// assert!(!s2.is_ascii_alphabetic_lowercase());
630    /// assert!(!s3.is_ascii_alphabetic_lowercase());
631    /// assert!(s4.is_ascii_alphabetic_lowercase());
632    /// assert!(!s5.is_ascii_alphabetic_lowercase());
633    /// ```
634    #[inline]
635    #[must_use]
636    pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
637        check_is!(
638            self,
639            is_ascii_alphabetic_lowercase,
640            is_ascii_lowercase,
641            is_ascii_lowercase
642        )
643    }
644
645    /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase.
646    ///
647    /// # Examples
648    ///
649    /// ```
650    /// use tinystr::TinyAsciiStr;
651    ///
652    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
653    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
654    /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
655    /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
656    /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
657    ///
658    /// assert!(s1.is_ascii_alphabetic_titlecase());
659    /// assert!(!s2.is_ascii_alphabetic_titlecase());
660    /// assert!(!s3.is_ascii_alphabetic_titlecase());
661    /// assert!(!s4.is_ascii_alphabetic_titlecase());
662    /// assert!(!s5.is_ascii_alphabetic_titlecase());
663    /// ```
664    #[inline]
665    #[must_use]
666    pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
667        check_is!(
668            self,
669            is_ascii_alphabetic_titlecase,
670            is_ascii_uppercase,
671            is_ascii_lowercase
672        )
673    }
674
675    /// Checks if the value is composed of ASCII alphabetic upper case characters:
676    ///
677    ///  * U+0041 'A' ..= U+005A 'Z',
678    ///
679    /// # Examples
680    ///
681    /// ```
682    /// use tinystr::TinyAsciiStr;
683    ///
684    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
685    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
686    /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
687    /// let s4: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
688    /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
689    ///
690    /// assert!(!s1.is_ascii_alphabetic_uppercase());
691    /// assert!(!s2.is_ascii_alphabetic_uppercase());
692    /// assert!(!s3.is_ascii_alphabetic_uppercase());
693    /// assert!(s4.is_ascii_alphabetic_uppercase());
694    /// assert!(!s5.is_ascii_alphabetic_uppercase());
695    /// ```
696    #[inline]
697    #[must_use]
698    pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
699        check_is!(
700            self,
701            is_ascii_alphabetic_uppercase,
702            is_ascii_uppercase,
703            is_ascii_uppercase
704        )
705    }
706}
707
708macro_rules! to {
709    ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{
710        let mut i = 0;
711        if N <= 4 {
712            let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
713            // Won't panic because self.bytes has length N and aligned has length >= N
714            #[expect(clippy::indexing_slicing)]
715            while i < N {
716                $self.bytes[i] = aligned[i];
717                i += 1;
718            }
719        } else if N <= 8 {
720            let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
721            // Won't panic because self.bytes has length N and aligned has length >= N
722            #[expect(clippy::indexing_slicing)]
723            while i < N {
724                $self.bytes[i] = aligned[i];
725                i += 1;
726            }
727        } else {
728            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
729                // SAFETY: AsciiByte is repr(u8) and has same size as u8
730                unsafe {
731                    $self.bytes[i] = core::mem::transmute::<u8, AsciiByte>(
732                        ($self.bytes[i] as u8).$later_char_to()
733                    );
734                }
735                i += 1;
736            }
737            // SAFETY: AsciiByte is repr(u8) and has same size as u8
738            $(
739                $self.bytes[0] = unsafe {
740                    core::mem::transmute::<u8, AsciiByte>(($self.bytes[0] as u8).$first_char_to())
741                };
742            )?
743        }
744        $self
745    }};
746}
747
748impl<const N: usize> TinyAsciiStr<N> {
749    /// Converts this type to its ASCII lower case equivalent in-place.
750    ///
751    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged.
752    ///
753    /// # Examples
754    ///
755    /// ```
756    /// use tinystr::TinyAsciiStr;
757    ///
758    /// let s1: TinyAsciiStr<4> = "TeS3".parse().expect("Failed to parse.");
759    ///
760    /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3");
761    /// ```
762    #[inline]
763    #[must_use]
764    pub const fn to_ascii_lowercase(mut self) -> Self {
765        to!(self, to_ascii_lowercase, to_ascii_lowercase)
766    }
767
768    /// Converts this type to its ASCII title case equivalent in-place.
769    ///
770    /// The first character is converted to ASCII uppercase; the remaining characters
771    /// are converted to ASCII lowercase.
772    ///
773    /// # Examples
774    ///
775    /// ```
776    /// use tinystr::TinyAsciiStr;
777    ///
778    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
779    ///
780    /// assert_eq!(&*s1.to_ascii_titlecase(), "Test");
781    /// ```
782    #[inline]
783    #[must_use]
784    pub const fn to_ascii_titlecase(mut self) -> Self {
785        to!(
786            self,
787            to_ascii_titlecase,
788            to_ascii_lowercase,
789            to_ascii_uppercase
790        )
791    }
792
793    /// Converts this type to its ASCII upper case equivalent in-place.
794    ///
795    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged.
796    ///
797    /// # Examples
798    ///
799    /// ```
800    /// use tinystr::TinyAsciiStr;
801    ///
802    /// let s1: TinyAsciiStr<4> = "Tes3".parse().expect("Failed to parse.");
803    ///
804    /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3");
805    /// ```
806    #[inline]
807    #[must_use]
808    pub const fn to_ascii_uppercase(mut self) -> Self {
809        to!(self, to_ascii_uppercase, to_ascii_uppercase)
810    }
811}
812
813impl<const N: usize> fmt::Debug for TinyAsciiStr<N> {
814    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
815        fmt::Debug::fmt(self.as_str(), f)
816    }
817}
818
819impl<const N: usize> fmt::Display for TinyAsciiStr<N> {
820    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
821        fmt::Display::fmt(self.as_str(), f)
822    }
823}
824
825impl<const N: usize> Deref for TinyAsciiStr<N> {
826    type Target = str;
827    #[inline]
828    fn deref(&self) -> &str {
829        self.as_str()
830    }
831}
832
833impl<const N: usize> Borrow<str> for TinyAsciiStr<N> {
834    #[inline]
835    fn borrow(&self) -> &str {
836        self.as_str()
837    }
838}
839
840impl<const N: usize> FromStr for TinyAsciiStr<N> {
841    type Err = ParseError;
842    #[inline]
843    fn from_str(s: &str) -> Result<Self, Self::Err> {
844        Self::try_from_str(s)
845    }
846}
847
848impl<const N: usize> PartialEq<str> for TinyAsciiStr<N> {
849    fn eq(&self, other: &str) -> bool {
850        self.deref() == other
851    }
852}
853
854impl<const N: usize> PartialEq<&str> for TinyAsciiStr<N> {
855    fn eq(&self, other: &&str) -> bool {
856        self.deref() == *other
857    }
858}
859
860#[cfg(feature = "alloc")]
861impl<const N: usize> PartialEq<alloc::string::String> for TinyAsciiStr<N> {
862    fn eq(&self, other: &alloc::string::String) -> bool {
863        self.deref() == other.deref()
864    }
865}
866
867#[cfg(feature = "alloc")]
868impl<const N: usize> PartialEq<TinyAsciiStr<N>> for alloc::string::String {
869    fn eq(&self, other: &TinyAsciiStr<N>) -> bool {
870        self.deref() == other.deref()
871    }
872}
873
874#[cfg(test)]
875mod test {
876    use super::*;
877    use rand::distr::Distribution;
878    use rand::distr::StandardUniform;
879    use rand::rngs::SmallRng;
880    use rand::SeedableRng;
881
882    const STRINGS: [&str; 26] = [
883        "Latn",
884        "laTn",
885        "windows",
886        "AR",
887        "Hans",
888        "macos",
889        "AT",
890        "infiniband",
891        "FR",
892        "en",
893        "Cyrl",
894        "FromIntegral",
895        "NO",
896        "419",
897        "MacintoshOSX2019",
898        "a3z",
899        "A3z",
900        "A3Z",
901        "a3Z",
902        "3A",
903        "3Z",
904        "3a",
905        "3z",
906        "@@[`{",
907        "UK",
908        "E12",
909    ];
910
911    fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String> {
912        use rand::seq::IndexedRandom;
913        let mut rng = SmallRng::seed_from_u64(2022);
914        // Need to do this in 2 steps since the RNG is needed twice
915        let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap())
916            .take(num_strings)
917            .collect::<Vec<usize>>();
918        string_lengths
919            .iter()
920            .map(|len| {
921                StandardUniform
922                    .sample_iter(&mut rng)
923                    .filter(|b: &u8| *b > 0 && *b < 0x80)
924                    .take(*len)
925                    .collect::<Vec<u8>>()
926            })
927            .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII"))
928            .collect()
929    }
930
931    fn check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2)
932    where
933        F1: Fn(&str) -> T,
934        F2: Fn(TinyAsciiStr<N>) -> T,
935        T: core::fmt::Debug + core::cmp::PartialEq,
936    {
937        for s in STRINGS
938            .into_iter()
939            .map(str::to_owned)
940            .chain(gen_strings(100, &[3, 4, 5, 8, 12]))
941        {
942            let t = match TinyAsciiStr::<N>::from_str(&s) {
943                Ok(t) => t,
944                Err(ParseError::TooLong { .. }) => continue,
945                Err(e) => panic!("{}", e),
946            };
947            let expected = reference_f(&s);
948            let actual = tinystr_f(t);
949            assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
950
951            let s_utf16: Vec<u16> = s.encode_utf16().collect();
952            let t = match TinyAsciiStr::<N>::try_from_utf16(&s_utf16) {
953                Ok(t) => t,
954                Err(ParseError::TooLong { .. }) => continue,
955                Err(e) => panic!("{}", e),
956            };
957            let expected = reference_f(&s);
958            let actual = tinystr_f(t);
959            assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
960        }
961    }
962
963    #[test]
964    fn test_is_ascii_alphabetic() {
965        fn check<const N: usize>() {
966            check_operation(
967                |s| s.chars().all(|c| c.is_ascii_alphabetic()),
968                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic(&t),
969            )
970        }
971        check::<2>();
972        check::<3>();
973        check::<4>();
974        check::<5>();
975        check::<8>();
976        check::<16>();
977    }
978
979    #[test]
980    fn test_is_ascii_alphanumeric() {
981        fn check<const N: usize>() {
982            check_operation(
983                |s| s.chars().all(|c| c.is_ascii_alphanumeric()),
984                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphanumeric(&t),
985            )
986        }
987        check::<2>();
988        check::<3>();
989        check::<4>();
990        check::<5>();
991        check::<8>();
992        check::<16>();
993    }
994
995    #[test]
996    fn test_is_ascii_numeric() {
997        fn check<const N: usize>() {
998            check_operation(
999                |s| s.chars().all(|c| c.is_ascii_digit()),
1000                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_numeric(&t),
1001            )
1002        }
1003        check::<2>();
1004        check::<3>();
1005        check::<4>();
1006        check::<5>();
1007        check::<8>();
1008        check::<16>();
1009    }
1010
1011    #[test]
1012    fn test_is_ascii_lowercase() {
1013        fn check<const N: usize>() {
1014            check_operation(
1015                |s| {
1016                    s == TinyAsciiStr::<16>::try_from_str(s)
1017                        .unwrap()
1018                        .to_ascii_lowercase()
1019                        .as_str()
1020                },
1021                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_lowercase(&t),
1022            )
1023        }
1024        check::<2>();
1025        check::<3>();
1026        check::<4>();
1027        check::<5>();
1028        check::<8>();
1029        check::<16>();
1030    }
1031
1032    #[test]
1033    fn test_is_ascii_titlecase() {
1034        fn check<const N: usize>() {
1035            check_operation(
1036                |s| {
1037                    s == TinyAsciiStr::<16>::try_from_str(s)
1038                        .unwrap()
1039                        .to_ascii_titlecase()
1040                        .as_str()
1041                },
1042                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_titlecase(&t),
1043            )
1044        }
1045        check::<2>();
1046        check::<3>();
1047        check::<4>();
1048        check::<5>();
1049        check::<8>();
1050        check::<16>();
1051    }
1052
1053    #[test]
1054    fn test_is_ascii_uppercase() {
1055        fn check<const N: usize>() {
1056            check_operation(
1057                |s| {
1058                    s == TinyAsciiStr::<16>::try_from_str(s)
1059                        .unwrap()
1060                        .to_ascii_uppercase()
1061                        .as_str()
1062                },
1063                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_uppercase(&t),
1064            )
1065        }
1066        check::<2>();
1067        check::<3>();
1068        check::<4>();
1069        check::<5>();
1070        check::<8>();
1071        check::<16>();
1072    }
1073
1074    #[test]
1075    fn test_is_ascii_alphabetic_lowercase() {
1076        fn check<const N: usize>() {
1077            check_operation(
1078                |s| {
1079                    // Check alphabetic
1080                    s.chars().all(|c| c.is_ascii_alphabetic()) &&
1081                    // Check lowercase
1082                    s == TinyAsciiStr::<16>::try_from_str(s)
1083                        .unwrap()
1084                        .to_ascii_lowercase()
1085                        .as_str()
1086                },
1087                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t),
1088            )
1089        }
1090        check::<2>();
1091        check::<3>();
1092        check::<4>();
1093        check::<5>();
1094        check::<8>();
1095        check::<16>();
1096    }
1097
1098    #[test]
1099    fn test_is_ascii_alphabetic_titlecase() {
1100        fn check<const N: usize>() {
1101            check_operation(
1102                |s| {
1103                    // Check alphabetic
1104                    s.chars().all(|c| c.is_ascii_alphabetic()) &&
1105                    // Check titlecase
1106                    s == TinyAsciiStr::<16>::try_from_str(s)
1107                        .unwrap()
1108                        .to_ascii_titlecase()
1109                        .as_str()
1110                },
1111                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t),
1112            )
1113        }
1114        check::<2>();
1115        check::<3>();
1116        check::<4>();
1117        check::<5>();
1118        check::<8>();
1119        check::<16>();
1120    }
1121
1122    #[test]
1123    fn test_is_ascii_alphabetic_uppercase() {
1124        fn check<const N: usize>() {
1125            check_operation(
1126                |s| {
1127                    // Check alphabetic
1128                    s.chars().all(|c| c.is_ascii_alphabetic()) &&
1129                    // Check uppercase
1130                    s == TinyAsciiStr::<16>::try_from_str(s)
1131                        .unwrap()
1132                        .to_ascii_uppercase()
1133                        .as_str()
1134                },
1135                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t),
1136            )
1137        }
1138        check::<2>();
1139        check::<3>();
1140        check::<4>();
1141        check::<5>();
1142        check::<8>();
1143        check::<16>();
1144    }
1145
1146    #[test]
1147    fn test_to_ascii_lowercase() {
1148        fn check<const N: usize>() {
1149            check_operation(
1150                |s| {
1151                    s.chars()
1152                        .map(|c| c.to_ascii_lowercase())
1153                        .collect::<String>()
1154                },
1155                |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_lowercase(t).as_str().to_owned(),
1156            )
1157        }
1158        check::<2>();
1159        check::<3>();
1160        check::<4>();
1161        check::<5>();
1162        check::<8>();
1163        check::<16>();
1164    }
1165
1166    #[test]
1167    fn test_to_ascii_titlecase() {
1168        fn check<const N: usize>() {
1169            check_operation(
1170                |s| {
1171                    let mut r = s
1172                        .chars()
1173                        .map(|c| c.to_ascii_lowercase())
1174                        .collect::<String>();
1175                    // Safe because the string is nonempty and an ASCII string
1176                    unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() };
1177                    r
1178                },
1179                |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_titlecase(t).as_str().to_owned(),
1180            )
1181        }
1182        check::<2>();
1183        check::<3>();
1184        check::<4>();
1185        check::<5>();
1186        check::<8>();
1187        check::<16>();
1188    }
1189
1190    #[test]
1191    fn test_to_ascii_uppercase() {
1192        fn check<const N: usize>() {
1193            check_operation(
1194                |s| {
1195                    s.chars()
1196                        .map(|c| c.to_ascii_uppercase())
1197                        .collect::<String>()
1198                },
1199                |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_uppercase(t).as_str().to_owned(),
1200            )
1201        }
1202        check::<2>();
1203        check::<3>();
1204        check::<4>();
1205        check::<5>();
1206        check::<8>();
1207        check::<16>();
1208    }
1209
1210    #[test]
1211    fn lossy_constructor() {
1212        assert_eq!(TinyAsciiStr::<4>::from_utf8_lossy(b"", b'?').as_str(), "");
1213        assert_eq!(
1214            TinyAsciiStr::<4>::from_utf8_lossy(b"oh\0o", b'?').as_str(),
1215            "oh?o"
1216        );
1217        assert_eq!(
1218            TinyAsciiStr::<4>::from_utf8_lossy(b"\0", b'?').as_str(),
1219            "?"
1220        );
1221        assert_eq!(
1222            TinyAsciiStr::<4>::from_utf8_lossy(b"toolong", b'?').as_str(),
1223            "tool"
1224        );
1225        assert_eq!(
1226            TinyAsciiStr::<4>::from_utf8_lossy(&[b'a', 0x80, 0xFF, b'1'], b'?').as_str(),
1227            "a??1"
1228        );
1229    }
1230}
tinystr/ascii.rs

tinystr/
ascii.rs