quick_xml/reader/
state.rs1#[cfg(feature = "encoding")]
2use encoding_rs::UTF_8;
3
4use crate::encoding::Decoder;
5use crate::errors::{Error, Result};
6use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
7#[cfg(feature = "encoding")]
8use crate::reader::EncodingRef;
9use crate::reader::{is_whitespace, BangType, ParseState};
10
11use memchr;
12
13#[derive(Clone)]
17pub(super) struct ReaderState {
18 pub offset: usize,
20 pub state: ParseState,
22 pub expand_empty_elements: bool,
24 pub trim_text_start: bool,
26 pub trim_text_end: bool,
28 pub trim_markup_names_in_closing_tags: bool,
30 pub check_end_names: bool,
32 pub check_comments: bool,
34 opened_buffer: Vec<u8>,
52 opened_starts: Vec<usize>,
55
56 #[cfg(feature = "encoding")]
57 pub encoding: EncodingRef,
59}
60
61impl ReaderState {
62 pub fn emit_text<'b>(&mut self, bytes: &'b [u8]) -> Result<Event<'b>> {
69 let mut content = bytes;
70
71 if self.trim_text_end {
72 let len = bytes
74 .iter()
75 .rposition(|&b| !is_whitespace(b))
76 .map_or_else(|| bytes.len(), |p| p + 1);
77 content = &bytes[..len];
78 }
79
80 Ok(Event::Text(BytesText::wrap(content, self.decoder())))
81 }
82
83 pub fn emit_bang<'b>(&mut self, bang_type: BangType, buf: &'b [u8]) -> Result<Event<'b>> {
86 let uncased_starts_with = |string: &[u8], prefix: &[u8]| {
87 string.len() >= prefix.len() && string[..prefix.len()].eq_ignore_ascii_case(prefix)
88 };
89
90 let len = buf.len();
91 match bang_type {
92 BangType::Comment if buf.starts_with(b"!--") => {
93 debug_assert!(buf.ends_with(b"--"));
94 if self.check_comments {
95 if let Some(p) = memchr::memchr_iter(b'-', &buf[3..len - 2])
97 .position(|p| buf[3 + p + 1] == b'-')
98 {
99 self.offset += len - p;
100 return Err(Error::UnexpectedToken("--".to_string()));
101 }
102 }
103 Ok(Event::Comment(BytesText::wrap(
104 &buf[3..len - 2],
105 self.decoder(),
106 )))
107 }
108 BangType::CData if uncased_starts_with(buf, b"![CDATA[") => {
109 debug_assert!(buf.ends_with(b"]]"));
110 Ok(Event::CData(BytesCData::wrap(
111 &buf[8..len - 2],
112 self.decoder(),
113 )))
114 }
115 BangType::DocType if uncased_starts_with(buf, b"!DOCTYPE") => {
116 let start = buf[8..]
117 .iter()
118 .position(|b| !is_whitespace(*b))
119 .unwrap_or(len - 8);
120 if start + 8 >= len {
121 return Err(Error::EmptyDocType);
122 }
123 Ok(Event::DocType(BytesText::wrap(
124 &buf[8 + start..],
125 self.decoder(),
126 )))
127 }
128 _ => Err(bang_type.to_err()),
129 }
130 }
131
132 pub fn emit_end<'b>(&mut self, buf: &'b [u8]) -> Result<Event<'b>> {
135 let content = &buf[1..];
137 let name = if self.trim_markup_names_in_closing_tags {
140 if let Some(pos_end_name) = content.iter().rposition(|&b| !is_whitespace(b)) {
141 &content[..pos_end_name + 1]
142 } else {
143 content
144 }
145 } else {
146 content
147 };
148
149 let decoder = self.decoder();
150 let mismatch_err = |expected: String, found: &[u8], offset: &mut usize| {
151 *offset -= buf.len();
152 Err(Error::EndEventMismatch {
153 expected,
154 found: decoder.decode(found).unwrap_or_default().into_owned(),
155 })
156 };
157
158 match self.opened_starts.pop() {
160 Some(start) => {
161 if self.check_end_names {
162 let expected = &self.opened_buffer[start..];
163 if name != expected {
164 let expected = decoder.decode(expected).unwrap_or_default().into_owned();
165 self.opened_buffer.truncate(start);
167
168 return mismatch_err(expected, name, &mut self.offset);
169 }
170 }
171
172 self.opened_buffer.truncate(start);
173 }
174 None => {
175 if self.check_end_names {
176 return mismatch_err("".to_string(), &buf[1..], &mut self.offset);
177 }
178 }
179 }
180
181 Ok(Event::End(BytesEnd::wrap(name.into())))
182 }
183
184 pub fn emit_question_mark<'b>(&mut self, buf: &'b [u8]) -> Result<Event<'b>> {
187 let len = buf.len();
188 if len > 2 && buf[len - 1] == b'?' {
189 if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) {
190 let event = BytesDecl::from_start(BytesStart::wrap(&buf[1..len - 1], 3));
191
192 #[cfg(feature = "encoding")]
194 if self.encoding.can_be_refined() {
195 if let Some(encoding) = event.encoder() {
196 self.encoding = EncodingRef::XmlDetected(encoding);
197 }
198 }
199
200 Ok(Event::Decl(event))
201 } else {
202 Ok(Event::PI(BytesText::wrap(&buf[1..len - 1], self.decoder())))
203 }
204 } else {
205 self.offset -= len;
206 Err(Error::UnexpectedEof("XmlDecl".to_string()))
207 }
208 }
209
210 pub fn emit_start<'b>(&mut self, content: &'b [u8]) -> Result<Event<'b>> {
215 let len = content.len();
216 let name_end = content
217 .iter()
218 .position(|&b| is_whitespace(b))
219 .unwrap_or(len);
220 if let Some(&b'/') = content.last() {
221 let name_len = if name_end < len { name_end } else { len - 1 };
223 let event = BytesStart::wrap(&content[..len - 1], name_len);
224
225 if self.expand_empty_elements {
226 self.state = ParseState::Empty;
227 self.opened_starts.push(self.opened_buffer.len());
228 self.opened_buffer.extend(&content[..name_len]);
229 Ok(Event::Start(event))
230 } else {
231 Ok(Event::Empty(event))
232 }
233 } else {
234 self.opened_starts.push(self.opened_buffer.len());
238 self.opened_buffer.extend(&content[..name_end]);
239 Ok(Event::Start(BytesStart::wrap(content, name_end)))
240 }
241 }
242
243 #[inline]
244 pub fn close_expanded_empty(&mut self) -> Result<Event<'static>> {
245 self.state = ParseState::ClosedTag;
246 let name = self
247 .opened_buffer
248 .split_off(self.opened_starts.pop().unwrap());
249 Ok(Event::End(BytesEnd::wrap(name.into())))
250 }
251
252 pub fn decoder(&self) -> Decoder {
262 Decoder {
263 #[cfg(feature = "encoding")]
264 encoding: self.encoding.encoding(),
265 }
266 }
267}
268
269impl Default for ReaderState {
270 fn default() -> Self {
271 Self {
272 offset: 0,
273 state: ParseState::Init,
274 expand_empty_elements: false,
275 trim_text_start: false,
276 trim_text_end: false,
277 trim_markup_names_in_closing_tags: true,
278 check_end_names: true,
279 check_comments: false,
280 opened_buffer: Vec::new(),
281 opened_starts: Vec::new(),
282
283 #[cfg(feature = "encoding")]
284 encoding: EncodingRef::Implicit(UTF_8),
285 }
286 }
287}