tinystr/ascii.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::asciibyte::AsciiByte;
6use crate::int_ops::{Aligned4, Aligned8};
7use crate::ParseError;
8use core::borrow::Borrow;
9use core::fmt;
10use core::ops::Deref;
11use core::str::{self, FromStr};
12
13#[repr(transparent)]
14#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)]
15pub struct TinyAsciiStr<const N: usize> {
16 bytes: [AsciiByte; N],
17}
18
19impl<const N: usize> TinyAsciiStr<N> {
20 #[inline]
21 pub const fn try_from_str(s: &str) -> Result<Self, ParseError> {
22 Self::try_from_utf8(s.as_bytes())
23 }
24
25 /// Creates a `TinyAsciiStr<N>` from the given UTF-8 slice.
26 /// `code_units` may contain at most `N` non-null ASCII code points.
27 #[inline]
28 pub const fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
29 Self::try_from_utf8_inner(code_units, false)
30 }
31
32 /// Creates a `TinyAsciiStr<N>` from the given UTF-16 slice.
33 /// `code_units` may contain at most `N` non-null ASCII code points.
34 #[inline]
35 pub const fn try_from_utf16(code_units: &[u16]) -> Result<Self, ParseError> {
36 Self::try_from_utf16_inner(code_units, 0, code_units.len(), false)
37 }
38
39 /// Creates a `TinyAsciiStr<N>` from a UTF-8 slice, replacing invalid code units.
40 ///
41 /// Invalid code units, as well as null or non-ASCII code points
42 /// (i.e. those outside the range U+0001..=U+007F`)
43 /// will be replaced with the replacement byte.
44 ///
45 /// The input slice will be truncated if its length exceeds `N`.
46 pub const fn from_utf8_lossy(code_units: &[u8], replacement: u8) -> Self {
47 let mut out = [0; N];
48 let mut i = 0;
49 // Ord is not available in const, so no `.min(N)`
50 let len = if code_units.len() > N {
51 N
52 } else {
53 code_units.len()
54 };
55
56 // Indexing is protected by the len check above
57 #[expect(clippy::indexing_slicing)]
58 while i < len {
59 let b = code_units[i];
60 if b > 0 && b < 0x80 {
61 out[i] = b;
62 } else {
63 out[i] = replacement;
64 }
65 i += 1;
66 }
67
68 Self {
69 // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
70 bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
71 }
72 }
73
74 /// Creates a `TinyAsciiStr<N>` from a UTF-16 slice, replacing invalid code units.
75 ///
76 /// Invalid code units, as well as null or non-ASCII code points
77 /// (i.e. those outside the range U+0001..=U+007F`)
78 /// will be replaced with the replacement byte.
79 ///
80 /// The input slice will be truncated if its length exceeds `N`.
81 pub const fn from_utf16_lossy(code_units: &[u16], replacement: u8) -> Self {
82 let mut out = [0; N];
83 let mut i = 0;
84 // Ord is not available in const, so no `.min(N)`
85 let len = if code_units.len() > N {
86 N
87 } else {
88 code_units.len()
89 };
90
91 // Indexing is protected by the len check above
92 #[expect(clippy::indexing_slicing)]
93 while i < len {
94 let b = code_units[i];
95 if b > 0 && b < 0x80 {
96 out[i] = b as u8;
97 } else {
98 out[i] = replacement;
99 }
100 i += 1;
101 }
102
103 Self {
104 // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
105 bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
106 }
107 }
108
109 /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`.
110 ///
111 /// The byte array may contain trailing NUL bytes.
112 ///
113 /// # Example
114 ///
115 /// ```
116 /// use tinystr::tinystr;
117 /// use tinystr::TinyAsciiStr;
118 ///
119 /// assert_eq!(
120 /// TinyAsciiStr::<3>::try_from_raw(*b"GB\0"),
121 /// Ok(tinystr!(3, "GB"))
122 /// );
123 /// assert_eq!(
124 /// TinyAsciiStr::<3>::try_from_raw(*b"USD"),
125 /// Ok(tinystr!(3, "USD"))
126 /// );
127 /// assert!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0").is_err());
128 /// ```
129 pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, ParseError> {
130 Self::try_from_utf8_inner(&raw, true)
131 }
132
133 pub(crate) const fn try_from_utf8_inner(
134 code_units: &[u8],
135 allow_trailing_null: bool,
136 ) -> Result<Self, ParseError> {
137 if code_units.len() > N {
138 return Err(ParseError::TooLong {
139 max: N,
140 len: code_units.len(),
141 });
142 }
143
144 let mut out = [0; N];
145 let mut i = 0;
146 let mut found_null = false;
147 // Indexing is protected by TinyStrError::TooLarge
148 #[expect(clippy::indexing_slicing)]
149 while i < code_units.len() {
150 let b = code_units[i];
151
152 if b == 0 {
153 found_null = true;
154 } else if b >= 0x80 {
155 return Err(ParseError::NonAscii);
156 } else if found_null {
157 // Error if there are contentful bytes after null
158 return Err(ParseError::ContainsNull);
159 }
160 out[i] = b;
161
162 i += 1;
163 }
164
165 if !allow_trailing_null && found_null {
166 // We found some trailing nulls, error
167 return Err(ParseError::ContainsNull);
168 }
169
170 Ok(Self {
171 // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
172 bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
173 })
174 }
175
176 pub(crate) const fn try_from_utf16_inner(
177 code_units: &[u16],
178 start: usize,
179 end: usize,
180 allow_trailing_null: bool,
181 ) -> Result<Self, ParseError> {
182 let len = end - start;
183 if len > N {
184 return Err(ParseError::TooLong { max: N, len });
185 }
186
187 let mut out = [0; N];
188 let mut i = 0;
189 let mut found_null = false;
190 // Indexing is protected by TinyStrError::TooLarge
191 #[expect(clippy::indexing_slicing)]
192 while i < len {
193 let b = code_units[start + i];
194
195 if b == 0 {
196 found_null = true;
197 } else if b >= 0x80 {
198 return Err(ParseError::NonAscii);
199 } else if found_null {
200 // Error if there are contentful bytes after null
201 return Err(ParseError::ContainsNull);
202 }
203 out[i] = b as u8;
204
205 i += 1;
206 }
207
208 if !allow_trailing_null && found_null {
209 // We found some trailing nulls, error
210 return Err(ParseError::ContainsNull);
211 }
212
213 Ok(Self {
214 // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
215 bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
216 })
217 }
218
219 /// Creates a `TinyAsciiStr<N>` containing the decimal representation of
220 /// the given unsigned integer.
221 ///
222 /// If the number of decimal digits exceeds `N`, the highest-magnitude
223 /// digits are truncated, and the lowest-magnitude digits are returned
224 /// as the error.
225 ///
226 /// Note: this function takes a u32. Larger integer types should probably
227 /// not be stored in a `TinyAsciiStr`.
228 ///
229 /// # Examples
230 ///
231 /// ```
232 /// use tinystr::tinystr;
233 /// use tinystr::TinyAsciiStr;
234 ///
235 /// let s0_4 = TinyAsciiStr::<4>::new_unsigned_decimal(0).unwrap();
236 /// let s456_4 = TinyAsciiStr::<4>::new_unsigned_decimal(456).unwrap();
237 /// let s456_3 = TinyAsciiStr::<3>::new_unsigned_decimal(456).unwrap();
238 /// let s456_2 = TinyAsciiStr::<2>::new_unsigned_decimal(456).unwrap_err();
239 ///
240 /// assert_eq!(s0_4, tinystr!(4, "0"));
241 /// assert_eq!(s456_4, tinystr!(4, "456"));
242 /// assert_eq!(s456_3, tinystr!(3, "456"));
243 /// assert_eq!(s456_2, tinystr!(2, "56"));
244 /// ```
245 ///
246 /// Example with saturating the value:
247 ///
248 /// ```
249 /// use tinystr::tinystr;
250 /// use tinystr::TinyAsciiStr;
251 ///
252 /// let str_truncated =
253 /// TinyAsciiStr::<2>::new_unsigned_decimal(456).unwrap_or_else(|s| s);
254 /// let str_saturated = TinyAsciiStr::<2>::new_unsigned_decimal(456)
255 /// .unwrap_or(tinystr!(2, "99"));
256 ///
257 /// assert_eq!(str_truncated, tinystr!(2, "56"));
258 /// assert_eq!(str_saturated, tinystr!(2, "99"));
259 /// ```
260 pub fn new_unsigned_decimal(number: u32) -> Result<Self, Self> {
261 let mut bytes = [AsciiByte::B0; N];
262 let mut x = number;
263 let mut i = 0usize;
264 #[expect(clippy::indexing_slicing)] // in-range: i < N
265 while i < N && (x != 0 || i == 0) {
266 bytes[N - i - 1] = AsciiByte::from_decimal_digit((x % 10) as u8);
267 x /= 10;
268 i += 1;
269 }
270 if i < N {
271 bytes.copy_within((N - i)..N, 0);
272 bytes[i..N].fill(AsciiByte::B0);
273 }
274 let s = Self { bytes };
275 if x != 0 {
276 Err(s)
277 } else {
278 Ok(s)
279 }
280 }
281
282 #[inline]
283 pub const fn as_str(&self) -> &str {
284 // as_utf8 is valid utf8
285 unsafe { str::from_utf8_unchecked(self.as_utf8()) }
286 }
287
288 #[inline]
289 #[must_use]
290 pub const fn len(&self) -> usize {
291 if N <= 4 {
292 Aligned4::from_ascii_bytes(&self.bytes).len()
293 } else if N <= 8 {
294 Aligned8::from_ascii_bytes(&self.bytes).len()
295 } else {
296 let mut i = 0;
297 #[expect(clippy::indexing_slicing)] // < N is safe
298 while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 {
299 i += 1
300 }
301 i
302 }
303 }
304
305 #[inline]
306 #[must_use]
307 pub const fn is_empty(&self) -> bool {
308 self.bytes[0] as u8 == AsciiByte::B0 as u8
309 }
310
311 #[inline]
312 #[must_use]
313 pub const fn as_utf8(&self) -> &[u8] {
314 // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`,
315 // and changing the length of that slice to self.len() < N is safe.
316 unsafe {
317 core::slice::from_raw_parts(self.bytes.as_slice().as_ptr() as *const u8, self.len())
318 }
319 }
320
321 #[inline]
322 #[must_use]
323 pub const fn all_bytes(&self) -> &[u8; N] {
324 // SAFETY: `self.bytes` has same size as [u8; N]
325 unsafe { &*(self.bytes.as_ptr() as *const [u8; N]) }
326 }
327
328 #[inline]
329 #[must_use]
330 /// Resizes a `TinyAsciiStr<N>` to a `TinyAsciiStr<M>`.
331 ///
332 /// If `M < len()` the string gets truncated, otherwise only the
333 /// memory representation changes.
334 pub const fn resize<const M: usize>(self) -> TinyAsciiStr<M> {
335 let mut bytes = [0; M];
336 let mut i = 0;
337 // Indexing is protected by the loop guard
338 #[expect(clippy::indexing_slicing)]
339 while i < M && i < N {
340 bytes[i] = self.bytes[i] as u8;
341 i += 1;
342 }
343 // `self.bytes` only contains ASCII bytes, with no null bytes between
344 // ASCII characters, so this also holds for `bytes`.
345 unsafe { TinyAsciiStr::from_utf8_unchecked(bytes) }
346 }
347
348 #[inline]
349 #[must_use]
350 /// Returns a `TinyAsciiStr<Q>` with the concatenation of this string,
351 /// `TinyAsciiStr<N>`, and another string, `TinyAsciiStr<M>`.
352 ///
353 /// If `Q < N + M`, the string gets truncated.
354 ///
355 /// # Examples
356 ///
357 /// ```
358 /// use tinystr::tinystr;
359 /// use tinystr::TinyAsciiStr;
360 ///
361 /// let abc = tinystr!(6, "abc");
362 /// let defg = tinystr!(6, "defg");
363 ///
364 /// // The concatenation is successful if Q is large enough...
365 /// assert_eq!(abc.concat(defg), tinystr!(16, "abcdefg"));
366 /// assert_eq!(abc.concat(defg), tinystr!(12, "abcdefg"));
367 /// assert_eq!(abc.concat(defg), tinystr!(8, "abcdefg"));
368 /// assert_eq!(abc.concat(defg), tinystr!(7, "abcdefg"));
369 ///
370 /// /// ...but it truncates of Q is too small.
371 /// assert_eq!(abc.concat(defg), tinystr!(6, "abcdef"));
372 /// assert_eq!(abc.concat(defg), tinystr!(2, "ab"));
373 /// ```
374 pub const fn concat<const M: usize, const Q: usize>(
375 self,
376 other: TinyAsciiStr<M>,
377 ) -> TinyAsciiStr<Q> {
378 let mut result = self.resize::<Q>();
379 let mut i = self.len();
380 let mut j = 0;
381 // Indexing is protected by the loop guard
382 #[expect(clippy::indexing_slicing)]
383 while i < Q && j < M {
384 result.bytes[i] = other.bytes[j];
385 i += 1;
386 j += 1;
387 }
388 result
389 }
390
391 /// # Safety
392 /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes
393 /// between ASCII characters
394 #[must_use]
395 pub const unsafe fn from_utf8_unchecked(code_units: [u8; N]) -> Self {
396 Self {
397 bytes: AsciiByte::to_ascii_byte_array(&code_units),
398 }
399 }
400}
401
402macro_rules! check_is {
403 ($self:ident, $check_int:ident, $check_u8:ident) => {
404 if N <= 4 {
405 Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
406 } else if N <= 8 {
407 Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
408 } else {
409 let mut i = 0;
410 while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
411 if !($self.bytes[i] as u8).$check_u8() {
412 return false;
413 }
414 i += 1;
415 }
416 true
417 }
418 };
419 ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => {
420 if N <= 4 {
421 Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
422 } else if N <= 8 {
423 Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
424 } else {
425 // Won't panic because N is > 8
426 if ($self.bytes[0] as u8).$check_u8_0_inv() {
427 return false;
428 }
429 let mut i = 1;
430 while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
431 if ($self.bytes[i] as u8).$check_u8_1_inv() {
432 return false;
433 }
434 i += 1;
435 }
436 true
437 }
438 };
439 ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => {
440 if N <= 4 {
441 Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
442 } else if N <= 8 {
443 Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
444 } else {
445 // Won't panic because N is > 8
446 if !($self.bytes[0] as u8).$check_u8_0_inv() {
447 return false;
448 }
449 let mut i = 1;
450 while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
451 if !($self.bytes[i] as u8).$check_u8_1_inv() {
452 return false;
453 }
454 i += 1;
455 }
456 true
457 }
458 };
459}
460
461impl<const N: usize> TinyAsciiStr<N> {
462 /// Checks if the value is composed of ASCII alphabetic characters:
463 ///
464 /// * U+0041 'A' ..= U+005A 'Z', or
465 /// * U+0061 'a' ..= U+007A 'z'.
466 ///
467 /// # Examples
468 ///
469 /// ```
470 /// use tinystr::TinyAsciiStr;
471 ///
472 /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
473 /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
474 ///
475 /// assert!(s1.is_ascii_alphabetic());
476 /// assert!(!s2.is_ascii_alphabetic());
477 /// ```
478 #[inline]
479 #[must_use]
480 pub const fn is_ascii_alphabetic(&self) -> bool {
481 check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic)
482 }
483
484 /// Checks if the value is composed of ASCII alphanumeric characters:
485 ///
486 /// * U+0041 'A' ..= U+005A 'Z', or
487 /// * U+0061 'a' ..= U+007A 'z', or
488 /// * U+0030 '0' ..= U+0039 '9'.
489 ///
490 /// # Examples
491 ///
492 /// ```
493 /// use tinystr::TinyAsciiStr;
494 ///
495 /// let s1: TinyAsciiStr<4> = "A15b".parse().expect("Failed to parse.");
496 /// let s2: TinyAsciiStr<4> = "[3@w".parse().expect("Failed to parse.");
497 ///
498 /// assert!(s1.is_ascii_alphanumeric());
499 /// assert!(!s2.is_ascii_alphanumeric());
500 /// ```
501 #[inline]
502 #[must_use]
503 pub const fn is_ascii_alphanumeric(&self) -> bool {
504 check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric)
505 }
506
507 /// Checks if the value is composed of ASCII decimal digits:
508 ///
509 /// * U+0030 '0' ..= U+0039 '9'.
510 ///
511 /// # Examples
512 ///
513 /// ```
514 /// use tinystr::TinyAsciiStr;
515 ///
516 /// let s1: TinyAsciiStr<4> = "312".parse().expect("Failed to parse.");
517 /// let s2: TinyAsciiStr<4> = "3d".parse().expect("Failed to parse.");
518 ///
519 /// assert!(s1.is_ascii_numeric());
520 /// assert!(!s2.is_ascii_numeric());
521 /// ```
522 #[inline]
523 #[must_use]
524 pub const fn is_ascii_numeric(&self) -> bool {
525 check_is!(self, is_ascii_numeric, is_ascii_digit)
526 }
527
528 /// Checks if the value is in ASCII lower case.
529 ///
530 /// All letter characters are checked for case. Non-letter characters are ignored.
531 ///
532 /// # Examples
533 ///
534 /// ```
535 /// use tinystr::TinyAsciiStr;
536 ///
537 /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
538 /// let s2: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
539 /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
540 ///
541 /// assert!(!s1.is_ascii_lowercase());
542 /// assert!(s2.is_ascii_lowercase());
543 /// assert!(s3.is_ascii_lowercase());
544 /// ```
545 #[inline]
546 #[must_use]
547 pub const fn is_ascii_lowercase(&self) -> bool {
548 check_is!(
549 self,
550 is_ascii_lowercase,
551 !is_ascii_uppercase,
552 !is_ascii_uppercase
553 )
554 }
555
556 /// Checks if the value is in ASCII title case.
557 ///
558 /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase.
559 /// Non-letter characters are ignored.
560 ///
561 /// # Examples
562 ///
563 /// ```
564 /// use tinystr::TinyAsciiStr;
565 ///
566 /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
567 /// let s2: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
568 /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
569 ///
570 /// assert!(!s1.is_ascii_titlecase());
571 /// assert!(s2.is_ascii_titlecase());
572 /// assert!(s3.is_ascii_titlecase());
573 /// ```
574 #[inline]
575 #[must_use]
576 pub const fn is_ascii_titlecase(&self) -> bool {
577 check_is!(
578 self,
579 is_ascii_titlecase,
580 !is_ascii_lowercase,
581 !is_ascii_uppercase
582 )
583 }
584
585 /// Checks if the value is in ASCII upper case.
586 ///
587 /// All letter characters are checked for case. Non-letter characters are ignored.
588 ///
589 /// # Examples
590 ///
591 /// ```
592 /// use tinystr::TinyAsciiStr;
593 ///
594 /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
595 /// let s2: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
596 /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
597 ///
598 /// assert!(!s1.is_ascii_uppercase());
599 /// assert!(s2.is_ascii_uppercase());
600 /// assert!(!s3.is_ascii_uppercase());
601 /// ```
602 #[inline]
603 #[must_use]
604 pub const fn is_ascii_uppercase(&self) -> bool {
605 check_is!(
606 self,
607 is_ascii_uppercase,
608 !is_ascii_lowercase,
609 !is_ascii_lowercase
610 )
611 }
612
613 /// Checks if the value is composed of ASCII alphabetic lower case characters:
614 ///
615 /// * U+0061 'a' ..= U+007A 'z',
616 ///
617 /// # Examples
618 ///
619 /// ```
620 /// use tinystr::TinyAsciiStr;
621 ///
622 /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
623 /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
624 /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
625 /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
626 /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
627 ///
628 /// assert!(!s1.is_ascii_alphabetic_lowercase());
629 /// assert!(!s2.is_ascii_alphabetic_lowercase());
630 /// assert!(!s3.is_ascii_alphabetic_lowercase());
631 /// assert!(s4.is_ascii_alphabetic_lowercase());
632 /// assert!(!s5.is_ascii_alphabetic_lowercase());
633 /// ```
634 #[inline]
635 #[must_use]
636 pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
637 check_is!(
638 self,
639 is_ascii_alphabetic_lowercase,
640 is_ascii_lowercase,
641 is_ascii_lowercase
642 )
643 }
644
645 /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase.
646 ///
647 /// # Examples
648 ///
649 /// ```
650 /// use tinystr::TinyAsciiStr;
651 ///
652 /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
653 /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
654 /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
655 /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
656 /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
657 ///
658 /// assert!(s1.is_ascii_alphabetic_titlecase());
659 /// assert!(!s2.is_ascii_alphabetic_titlecase());
660 /// assert!(!s3.is_ascii_alphabetic_titlecase());
661 /// assert!(!s4.is_ascii_alphabetic_titlecase());
662 /// assert!(!s5.is_ascii_alphabetic_titlecase());
663 /// ```
664 #[inline]
665 #[must_use]
666 pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
667 check_is!(
668 self,
669 is_ascii_alphabetic_titlecase,
670 is_ascii_uppercase,
671 is_ascii_lowercase
672 )
673 }
674
675 /// Checks if the value is composed of ASCII alphabetic upper case characters:
676 ///
677 /// * U+0041 'A' ..= U+005A 'Z',
678 ///
679 /// # Examples
680 ///
681 /// ```
682 /// use tinystr::TinyAsciiStr;
683 ///
684 /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
685 /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
686 /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
687 /// let s4: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
688 /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
689 ///
690 /// assert!(!s1.is_ascii_alphabetic_uppercase());
691 /// assert!(!s2.is_ascii_alphabetic_uppercase());
692 /// assert!(!s3.is_ascii_alphabetic_uppercase());
693 /// assert!(s4.is_ascii_alphabetic_uppercase());
694 /// assert!(!s5.is_ascii_alphabetic_uppercase());
695 /// ```
696 #[inline]
697 #[must_use]
698 pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
699 check_is!(
700 self,
701 is_ascii_alphabetic_uppercase,
702 is_ascii_uppercase,
703 is_ascii_uppercase
704 )
705 }
706}
707
708macro_rules! to {
709 ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{
710 let mut i = 0;
711 if N <= 4 {
712 let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
713 // Won't panic because self.bytes has length N and aligned has length >= N
714 #[expect(clippy::indexing_slicing)]
715 while i < N {
716 $self.bytes[i] = aligned[i];
717 i += 1;
718 }
719 } else if N <= 8 {
720 let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
721 // Won't panic because self.bytes has length N and aligned has length >= N
722 #[expect(clippy::indexing_slicing)]
723 while i < N {
724 $self.bytes[i] = aligned[i];
725 i += 1;
726 }
727 } else {
728 while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
729 // SAFETY: AsciiByte is repr(u8) and has same size as u8
730 unsafe {
731 $self.bytes[i] = core::mem::transmute::<u8, AsciiByte>(
732 ($self.bytes[i] as u8).$later_char_to()
733 );
734 }
735 i += 1;
736 }
737 // SAFETY: AsciiByte is repr(u8) and has same size as u8
738 $(
739 $self.bytes[0] = unsafe {
740 core::mem::transmute::<u8, AsciiByte>(($self.bytes[0] as u8).$first_char_to())
741 };
742 )?
743 }
744 $self
745 }};
746}
747
748impl<const N: usize> TinyAsciiStr<N> {
749 /// Converts this type to its ASCII lower case equivalent in-place.
750 ///
751 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged.
752 ///
753 /// # Examples
754 ///
755 /// ```
756 /// use tinystr::TinyAsciiStr;
757 ///
758 /// let s1: TinyAsciiStr<4> = "TeS3".parse().expect("Failed to parse.");
759 ///
760 /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3");
761 /// ```
762 #[inline]
763 #[must_use]
764 pub const fn to_ascii_lowercase(mut self) -> Self {
765 to!(self, to_ascii_lowercase, to_ascii_lowercase)
766 }
767
768 /// Converts this type to its ASCII title case equivalent in-place.
769 ///
770 /// The first character is converted to ASCII uppercase; the remaining characters
771 /// are converted to ASCII lowercase.
772 ///
773 /// # Examples
774 ///
775 /// ```
776 /// use tinystr::TinyAsciiStr;
777 ///
778 /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
779 ///
780 /// assert_eq!(&*s1.to_ascii_titlecase(), "Test");
781 /// ```
782 #[inline]
783 #[must_use]
784 pub const fn to_ascii_titlecase(mut self) -> Self {
785 to!(
786 self,
787 to_ascii_titlecase,
788 to_ascii_lowercase,
789 to_ascii_uppercase
790 )
791 }
792
793 /// Converts this type to its ASCII upper case equivalent in-place.
794 ///
795 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged.
796 ///
797 /// # Examples
798 ///
799 /// ```
800 /// use tinystr::TinyAsciiStr;
801 ///
802 /// let s1: TinyAsciiStr<4> = "Tes3".parse().expect("Failed to parse.");
803 ///
804 /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3");
805 /// ```
806 #[inline]
807 #[must_use]
808 pub const fn to_ascii_uppercase(mut self) -> Self {
809 to!(self, to_ascii_uppercase, to_ascii_uppercase)
810 }
811}
812
813impl<const N: usize> fmt::Debug for TinyAsciiStr<N> {
814 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
815 fmt::Debug::fmt(self.as_str(), f)
816 }
817}
818
819impl<const N: usize> fmt::Display for TinyAsciiStr<N> {
820 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
821 fmt::Display::fmt(self.as_str(), f)
822 }
823}
824
825impl<const N: usize> Deref for TinyAsciiStr<N> {
826 type Target = str;
827 #[inline]
828 fn deref(&self) -> &str {
829 self.as_str()
830 }
831}
832
833impl<const N: usize> Borrow<str> for TinyAsciiStr<N> {
834 #[inline]
835 fn borrow(&self) -> &str {
836 self.as_str()
837 }
838}
839
840impl<const N: usize> FromStr for TinyAsciiStr<N> {
841 type Err = ParseError;
842 #[inline]
843 fn from_str(s: &str) -> Result<Self, Self::Err> {
844 Self::try_from_str(s)
845 }
846}
847
848impl<const N: usize> PartialEq<str> for TinyAsciiStr<N> {
849 fn eq(&self, other: &str) -> bool {
850 self.deref() == other
851 }
852}
853
854impl<const N: usize> PartialEq<&str> for TinyAsciiStr<N> {
855 fn eq(&self, other: &&str) -> bool {
856 self.deref() == *other
857 }
858}
859
860#[cfg(feature = "alloc")]
861impl<const N: usize> PartialEq<alloc::string::String> for TinyAsciiStr<N> {
862 fn eq(&self, other: &alloc::string::String) -> bool {
863 self.deref() == other.deref()
864 }
865}
866
867#[cfg(feature = "alloc")]
868impl<const N: usize> PartialEq<TinyAsciiStr<N>> for alloc::string::String {
869 fn eq(&self, other: &TinyAsciiStr<N>) -> bool {
870 self.deref() == other.deref()
871 }
872}
873
874#[cfg(test)]
875mod test {
876 use super::*;
877 use rand::distr::Distribution;
878 use rand::distr::StandardUniform;
879 use rand::rngs::SmallRng;
880 use rand::SeedableRng;
881
882 const STRINGS: [&str; 26] = [
883 "Latn",
884 "laTn",
885 "windows",
886 "AR",
887 "Hans",
888 "macos",
889 "AT",
890 "infiniband",
891 "FR",
892 "en",
893 "Cyrl",
894 "FromIntegral",
895 "NO",
896 "419",
897 "MacintoshOSX2019",
898 "a3z",
899 "A3z",
900 "A3Z",
901 "a3Z",
902 "3A",
903 "3Z",
904 "3a",
905 "3z",
906 "@@[`{",
907 "UK",
908 "E12",
909 ];
910
911 fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String> {
912 use rand::seq::IndexedRandom;
913 let mut rng = SmallRng::seed_from_u64(2022);
914 // Need to do this in 2 steps since the RNG is needed twice
915 let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap())
916 .take(num_strings)
917 .collect::<Vec<usize>>();
918 string_lengths
919 .iter()
920 .map(|len| {
921 StandardUniform
922 .sample_iter(&mut rng)
923 .filter(|b: &u8| *b > 0 && *b < 0x80)
924 .take(*len)
925 .collect::<Vec<u8>>()
926 })
927 .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII"))
928 .collect()
929 }
930
931 fn check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2)
932 where
933 F1: Fn(&str) -> T,
934 F2: Fn(TinyAsciiStr<N>) -> T,
935 T: core::fmt::Debug + core::cmp::PartialEq,
936 {
937 for s in STRINGS
938 .into_iter()
939 .map(str::to_owned)
940 .chain(gen_strings(100, &[3, 4, 5, 8, 12]))
941 {
942 let t = match TinyAsciiStr::<N>::from_str(&s) {
943 Ok(t) => t,
944 Err(ParseError::TooLong { .. }) => continue,
945 Err(e) => panic!("{}", e),
946 };
947 let expected = reference_f(&s);
948 let actual = tinystr_f(t);
949 assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
950
951 let s_utf16: Vec<u16> = s.encode_utf16().collect();
952 let t = match TinyAsciiStr::<N>::try_from_utf16(&s_utf16) {
953 Ok(t) => t,
954 Err(ParseError::TooLong { .. }) => continue,
955 Err(e) => panic!("{}", e),
956 };
957 let expected = reference_f(&s);
958 let actual = tinystr_f(t);
959 assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
960 }
961 }
962
963 #[test]
964 fn test_is_ascii_alphabetic() {
965 fn check<const N: usize>() {
966 check_operation(
967 |s| s.chars().all(|c| c.is_ascii_alphabetic()),
968 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic(&t),
969 )
970 }
971 check::<2>();
972 check::<3>();
973 check::<4>();
974 check::<5>();
975 check::<8>();
976 check::<16>();
977 }
978
979 #[test]
980 fn test_is_ascii_alphanumeric() {
981 fn check<const N: usize>() {
982 check_operation(
983 |s| s.chars().all(|c| c.is_ascii_alphanumeric()),
984 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphanumeric(&t),
985 )
986 }
987 check::<2>();
988 check::<3>();
989 check::<4>();
990 check::<5>();
991 check::<8>();
992 check::<16>();
993 }
994
995 #[test]
996 fn test_is_ascii_numeric() {
997 fn check<const N: usize>() {
998 check_operation(
999 |s| s.chars().all(|c| c.is_ascii_digit()),
1000 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_numeric(&t),
1001 )
1002 }
1003 check::<2>();
1004 check::<3>();
1005 check::<4>();
1006 check::<5>();
1007 check::<8>();
1008 check::<16>();
1009 }
1010
1011 #[test]
1012 fn test_is_ascii_lowercase() {
1013 fn check<const N: usize>() {
1014 check_operation(
1015 |s| {
1016 s == TinyAsciiStr::<16>::try_from_str(s)
1017 .unwrap()
1018 .to_ascii_lowercase()
1019 .as_str()
1020 },
1021 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_lowercase(&t),
1022 )
1023 }
1024 check::<2>();
1025 check::<3>();
1026 check::<4>();
1027 check::<5>();
1028 check::<8>();
1029 check::<16>();
1030 }
1031
1032 #[test]
1033 fn test_is_ascii_titlecase() {
1034 fn check<const N: usize>() {
1035 check_operation(
1036 |s| {
1037 s == TinyAsciiStr::<16>::try_from_str(s)
1038 .unwrap()
1039 .to_ascii_titlecase()
1040 .as_str()
1041 },
1042 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_titlecase(&t),
1043 )
1044 }
1045 check::<2>();
1046 check::<3>();
1047 check::<4>();
1048 check::<5>();
1049 check::<8>();
1050 check::<16>();
1051 }
1052
1053 #[test]
1054 fn test_is_ascii_uppercase() {
1055 fn check<const N: usize>() {
1056 check_operation(
1057 |s| {
1058 s == TinyAsciiStr::<16>::try_from_str(s)
1059 .unwrap()
1060 .to_ascii_uppercase()
1061 .as_str()
1062 },
1063 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_uppercase(&t),
1064 )
1065 }
1066 check::<2>();
1067 check::<3>();
1068 check::<4>();
1069 check::<5>();
1070 check::<8>();
1071 check::<16>();
1072 }
1073
1074 #[test]
1075 fn test_is_ascii_alphabetic_lowercase() {
1076 fn check<const N: usize>() {
1077 check_operation(
1078 |s| {
1079 // Check alphabetic
1080 s.chars().all(|c| c.is_ascii_alphabetic()) &&
1081 // Check lowercase
1082 s == TinyAsciiStr::<16>::try_from_str(s)
1083 .unwrap()
1084 .to_ascii_lowercase()
1085 .as_str()
1086 },
1087 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t),
1088 )
1089 }
1090 check::<2>();
1091 check::<3>();
1092 check::<4>();
1093 check::<5>();
1094 check::<8>();
1095 check::<16>();
1096 }
1097
1098 #[test]
1099 fn test_is_ascii_alphabetic_titlecase() {
1100 fn check<const N: usize>() {
1101 check_operation(
1102 |s| {
1103 // Check alphabetic
1104 s.chars().all(|c| c.is_ascii_alphabetic()) &&
1105 // Check titlecase
1106 s == TinyAsciiStr::<16>::try_from_str(s)
1107 .unwrap()
1108 .to_ascii_titlecase()
1109 .as_str()
1110 },
1111 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t),
1112 )
1113 }
1114 check::<2>();
1115 check::<3>();
1116 check::<4>();
1117 check::<5>();
1118 check::<8>();
1119 check::<16>();
1120 }
1121
1122 #[test]
1123 fn test_is_ascii_alphabetic_uppercase() {
1124 fn check<const N: usize>() {
1125 check_operation(
1126 |s| {
1127 // Check alphabetic
1128 s.chars().all(|c| c.is_ascii_alphabetic()) &&
1129 // Check uppercase
1130 s == TinyAsciiStr::<16>::try_from_str(s)
1131 .unwrap()
1132 .to_ascii_uppercase()
1133 .as_str()
1134 },
1135 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t),
1136 )
1137 }
1138 check::<2>();
1139 check::<3>();
1140 check::<4>();
1141 check::<5>();
1142 check::<8>();
1143 check::<16>();
1144 }
1145
1146 #[test]
1147 fn test_to_ascii_lowercase() {
1148 fn check<const N: usize>() {
1149 check_operation(
1150 |s| {
1151 s.chars()
1152 .map(|c| c.to_ascii_lowercase())
1153 .collect::<String>()
1154 },
1155 |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_lowercase(t).as_str().to_owned(),
1156 )
1157 }
1158 check::<2>();
1159 check::<3>();
1160 check::<4>();
1161 check::<5>();
1162 check::<8>();
1163 check::<16>();
1164 }
1165
1166 #[test]
1167 fn test_to_ascii_titlecase() {
1168 fn check<const N: usize>() {
1169 check_operation(
1170 |s| {
1171 let mut r = s
1172 .chars()
1173 .map(|c| c.to_ascii_lowercase())
1174 .collect::<String>();
1175 // Safe because the string is nonempty and an ASCII string
1176 unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() };
1177 r
1178 },
1179 |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_titlecase(t).as_str().to_owned(),
1180 )
1181 }
1182 check::<2>();
1183 check::<3>();
1184 check::<4>();
1185 check::<5>();
1186 check::<8>();
1187 check::<16>();
1188 }
1189
1190 #[test]
1191 fn test_to_ascii_uppercase() {
1192 fn check<const N: usize>() {
1193 check_operation(
1194 |s| {
1195 s.chars()
1196 .map(|c| c.to_ascii_uppercase())
1197 .collect::<String>()
1198 },
1199 |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_uppercase(t).as_str().to_owned(),
1200 )
1201 }
1202 check::<2>();
1203 check::<3>();
1204 check::<4>();
1205 check::<5>();
1206 check::<8>();
1207 check::<16>();
1208 }
1209
1210 #[test]
1211 fn lossy_constructor() {
1212 assert_eq!(TinyAsciiStr::<4>::from_utf8_lossy(b"", b'?').as_str(), "");
1213 assert_eq!(
1214 TinyAsciiStr::<4>::from_utf8_lossy(b"oh\0o", b'?').as_str(),
1215 "oh?o"
1216 );
1217 assert_eq!(
1218 TinyAsciiStr::<4>::from_utf8_lossy(b"\0", b'?').as_str(),
1219 "?"
1220 );
1221 assert_eq!(
1222 TinyAsciiStr::<4>::from_utf8_lossy(b"toolong", b'?').as_str(),
1223 "tool"
1224 );
1225 assert_eq!(
1226 TinyAsciiStr::<4>::from_utf8_lossy(&[b'a', 0x80, 0xFF, b'1'], b'?').as_str(),
1227 "a??1"
1228 );
1229 }
1230}