aws_smithy_xml/
unescape.rs1use crate::decode::XmlDecodeError;
7use std::borrow::Cow;
8
9pub(crate) fn unescape(s: &str) -> Result<Cow<'_, str>, XmlDecodeError> {
19 if !s.contains('&') {
21 return Ok(Cow::Borrowed(s));
22 }
23 let mut res = String::with_capacity(s.len());
25 let mut sections = s.split('&');
27 if let Some(prefix) = sections.next() {
29 res.push_str(prefix);
30 }
31 for section in sections {
32 match section.find(';') {
34 Some(idx) => {
35 let entity = §ion[..idx];
36 match entity {
37 "lt" => res.push('<'),
38 "gt" => res.push('>'),
39 "amp" => res.push('&'),
40 "quot" => res.push('"'),
41 "apos" => res.push('\''),
42 entity => {
43 let (entity, radix) = if let Some(entity) = entity.strip_prefix("#x") {
45 (entity, 16)
46 } else if let Some(entity) = entity.strip_prefix('#') {
47 (entity, 10)
49 } else {
50 return Err(XmlDecodeError::invalid_escape(entity));
51 };
52 let char_code = u32::from_str_radix(entity, radix).map_err(|_| {
53 XmlDecodeError::invalid_escape(format!(
54 "expected numeric escape in base {}; got: {}",
55 radix, &entity
56 ))
57 })?;
58 let chr = std::char::from_u32(char_code).ok_or_else(|| {
59 XmlDecodeError::invalid_escape(format!(
60 "invalid char code: {char_code}"
61 ))
62 })?;
63 res.push(chr);
64 }
65 }
66 res.push_str(§ion[idx + 1..])
68 }
69 None => return Err(XmlDecodeError::invalid_escape("unterminated pattern")),
70 }
71 }
72 Ok(Cow::Owned(res))
73}
74
75#[cfg(test)]
76mod test {
77 use crate::unescape::unescape;
78 use std::borrow::Cow;
79
80 #[test]
81 fn basic_unescape() {
82 assert_eq!(
83 unescape("< > ' " &").unwrap(),
84 "< > ' \" &"
85 );
86 assert_eq!(
87 unescape("Since a > b, b is less than a").unwrap(),
88 "Since a > b, b is less than a"
89 );
90 }
91
92 #[test]
93 fn no_need_to_escape() {
94 assert_eq!(unescape("hello 🍕!").unwrap(), Cow::Borrowed("hello 🍕!"));
95 }
96
97 #[test]
98 fn complex_unescape() {
99 assert_eq!(
101 unescape("a<b>c"d'e&f;;").unwrap(),
102 "a<b>c\"d'e&f;;"
103 );
104 assert_eq!(unescape("&lt;").unwrap(), "<")
105 }
106
107 #[test]
108 fn newline_encoding() {
109 assert_eq!(unescape(" ").unwrap(), "\n");
110 assert_eq!(unescape("
").unwrap(), "\r");
111 }
112
113 #[test]
114 fn xml_eol_encoding() {
115 assert_eq!(unescape("
 
").unwrap(), "\n \n");
116 assert_eq!(
117 unescape("a
 b
 c
").unwrap(),
118 "a\r\n b\n c\r"
119 );
120 assert_eq!(
121 unescape("a
… b…").unwrap(),
122 "a\r\u{0085} b\u{0085}"
123 );
124 assert_eq!(
125 unescape("a

 b… c
").unwrap(),
126 "a\r\u{2028} b\u{0085} c\u{2028}"
127 );
128 }
129
130 #[test]
131 fn invalid_escapes() {
132 unescape("<e;").expect_err("lte does not make a ≤");
133 unescape("<").expect_err("unterminated escape sequence");
134 unescape("&#Q1234;").expect_err("Q does not began a numeric sequence");
135 unescape(".14;").expect_err("decimal escape");
136 unescape("&#xZZ").expect_err("Z is not hex");
137 unescape("here is a & but without an escape sequence...").expect_err("naked &");
138 }
139
140 use proptest::prelude::*;
141 proptest! {
142 #[test]
143 fn no_panics(s: String) {
144 let unescaped = unescape(&s);
145 if s.contains('&') {
147 assert!(
148 matches!(unescaped, Ok(Cow::Owned(_)) | Err(_))
149 );
150 }
151 }
152 }
153}