wasmparser/parser.rs
1#[cfg(feature = "features")]
2use crate::WasmFeatures;
3use crate::binary_reader::WASM_MAGIC_NUMBER;
4use crate::prelude::*;
5use crate::{
6 BinaryReader, BinaryReaderError, CustomSectionReader, DataSectionReader, ElementSectionReader,
7 ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader, GlobalSectionReader,
8 ImportSectionReader, MemorySectionReader, Result, TableSectionReader, TagSectionReader,
9 TypeSectionReader,
10};
11#[cfg(feature = "component-model")]
12use crate::{
13 ComponentCanonicalSectionReader, ComponentExportSectionReader, ComponentImportSectionReader,
14 ComponentInstanceSectionReader, ComponentStartFunction, ComponentTypeSectionReader,
15 CoreTypeSectionReader, InstanceSectionReader, SectionLimited, limits::MAX_WASM_MODULE_SIZE,
16};
17use core::fmt;
18use core::iter;
19use core::ops::Range;
20
21pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
22
23// Note that this started at `0xa` and we're incrementing up from there. When
24// the component model is stabilized this will become 0x1. The changes here are:
25//
26// * [????-??-??] 0xa - original version
27// * [2023-01-05] 0xb - `export` introduces an alias
28// * [2023-02-06] 0xc - `export` has an optional type ascribed to it
29// * [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which
30// allows for `(import (interface "...") ...)` syntax.
31pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xd;
32
33const KIND_MODULE: u16 = 0x00;
34const KIND_COMPONENT: u16 = 0x01;
35
36/// The supported encoding formats for the parser.
37#[derive(Debug, Clone, Copy, Eq, PartialEq)]
38pub enum Encoding {
39 /// The encoding format is a WebAssembly module.
40 Module,
41 /// The encoding format is a WebAssembly component.
42 Component,
43}
44
45#[derive(Debug, Clone, Default)]
46struct ParserCounts {
47 function_entries: Option<u32>,
48 code_entries: Option<u32>,
49 data_entries: Option<u32>,
50 data_count: Option<u32>,
51 #[cfg(feature = "component-model")]
52 component_start_sections: bool,
53}
54
55// Section order for WebAssembly modules.
56//
57// Component sections are unordered and allow for duplicates,
58// so this isn't used for components.
59#[derive(Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Debug)]
60pub(crate) enum Order {
61 #[default]
62 Initial,
63 Type,
64 Import,
65 Function,
66 Table,
67 Memory,
68 Tag,
69 Global,
70 Export,
71 Start,
72 Element,
73 DataCount,
74 Code,
75 Data,
76}
77
78/// An incremental parser of a binary WebAssembly module or component.
79///
80/// This type is intended to be used to incrementally parse a WebAssembly module
81/// or component as bytes become available for the module. This can also be used
82/// to parse modules or components that are already entirely resident within memory.
83///
84/// This primary function for a parser is the [`Parser::parse`] function which
85/// will incrementally consume input. You can also use the [`Parser::parse_all`]
86/// function to parse a module or component that is entirely resident in memory.
87#[derive(Debug, Clone)]
88pub struct Parser {
89 state: State,
90 offset: u64,
91 max_size: u64,
92 encoding: Encoding,
93 #[cfg(feature = "features")]
94 features: WasmFeatures,
95 counts: ParserCounts,
96 order: (Order, u64),
97}
98
99#[derive(Debug, Clone)]
100enum State {
101 Header,
102 SectionStart,
103 FunctionBody { remaining: u32, len: u32 },
104}
105
106/// A successful return payload from [`Parser::parse`].
107///
108/// On success one of two possible values can be returned, either that more data
109/// is needed to continue parsing or a chunk of the input was parsed, indicating
110/// how much of it was parsed.
111#[derive(Debug)]
112pub enum Chunk<'a> {
113 /// This can be returned at any time and indicates that more data is needed
114 /// to proceed with parsing. Zero bytes were consumed from the input to
115 /// [`Parser::parse`]. The `u64` value here is a hint as to how many more
116 /// bytes are needed to continue parsing.
117 NeedMoreData(u64),
118
119 /// A chunk was successfully parsed.
120 Parsed {
121 /// This many bytes of the `data` input to [`Parser::parse`] were
122 /// consumed to produce `payload`.
123 consumed: usize,
124 /// The value that we actually parsed.
125 payload: Payload<'a>,
126 },
127}
128
129/// Values that can be parsed from a WebAssembly module or component.
130///
131/// This enumeration is all possible chunks of pieces that can be parsed by a
132/// [`Parser`] from a binary WebAssembly module or component. Note that for many
133/// sections the entire section is parsed all at once, whereas other functions,
134/// like the code section, are parsed incrementally. This is a distinction where some
135/// sections, like the type section, are required to be fully resident in memory
136/// (fully downloaded) before proceeding. Other sections, like the code section,
137/// can be processed in a streaming fashion where each function is extracted
138/// individually so it can possibly be shipped to another thread while you wait
139/// for more functions to get downloaded.
140///
141/// Note that payloads, when returned, do not indicate that the module or component
142/// is valid. For example when you receive a `Payload::TypeSection` the type
143/// section itself has not yet actually been parsed. The reader returned will be
144/// able to parse it, but you'll have to actually iterate the reader to do the
145/// full parse. Each payload returned is intended to be a *window* into the
146/// original `data` passed to [`Parser::parse`] which can be further processed
147/// if necessary.
148#[non_exhaustive]
149pub enum Payload<'a> {
150 /// Indicates the header of a WebAssembly module or component.
151 Version {
152 /// The version number found in the header.
153 num: u16,
154 /// The encoding format being parsed.
155 encoding: Encoding,
156 /// The range of bytes that were parsed to consume the header of the
157 /// module or component. Note that this range is relative to the start
158 /// of the byte stream.
159 range: Range<usize>,
160 },
161
162 /// A module type section was received and the provided reader can be
163 /// used to parse the contents of the type section.
164 TypeSection(TypeSectionReader<'a>),
165 /// A module import section was received and the provided reader can be
166 /// used to parse the contents of the import section.
167 ImportSection(ImportSectionReader<'a>),
168 /// A module function section was received and the provided reader can be
169 /// used to parse the contents of the function section.
170 FunctionSection(FunctionSectionReader<'a>),
171 /// A module table section was received and the provided reader can be
172 /// used to parse the contents of the table section.
173 TableSection(TableSectionReader<'a>),
174 /// A module memory section was received and the provided reader can be
175 /// used to parse the contents of the memory section.
176 MemorySection(MemorySectionReader<'a>),
177 /// A module tag section was received, and the provided reader can be
178 /// used to parse the contents of the tag section.
179 TagSection(TagSectionReader<'a>),
180 /// A module global section was received and the provided reader can be
181 /// used to parse the contents of the global section.
182 GlobalSection(GlobalSectionReader<'a>),
183 /// A module export section was received, and the provided reader can be
184 /// used to parse the contents of the export section.
185 ExportSection(ExportSectionReader<'a>),
186 /// A module start section was received.
187 StartSection {
188 /// The start function index
189 func: u32,
190 /// The range of bytes that specify the `func` field, specified in
191 /// offsets relative to the start of the byte stream.
192 range: Range<usize>,
193 },
194 /// A module element section was received and the provided reader can be
195 /// used to parse the contents of the element section.
196 ElementSection(ElementSectionReader<'a>),
197 /// A module data count section was received.
198 DataCountSection {
199 /// The number of data segments.
200 count: u32,
201 /// The range of bytes that specify the `count` field, specified in
202 /// offsets relative to the start of the byte stream.
203 range: Range<usize>,
204 },
205 /// A module data section was received and the provided reader can be
206 /// used to parse the contents of the data section.
207 DataSection(DataSectionReader<'a>),
208 /// Indicator of the start of the code section of a WebAssembly module.
209 ///
210 /// This entry is returned whenever the code section starts. The `count`
211 /// field indicates how many entries are in this code section. After
212 /// receiving this start marker you're guaranteed that the next `count`
213 /// items will be either `CodeSectionEntry` or an error will be returned.
214 ///
215 /// This, unlike other sections, is intended to be used for streaming the
216 /// contents of the code section. The code section is not required to be
217 /// fully resident in memory when we parse it. Instead a [`Parser`] is
218 /// capable of parsing piece-by-piece of a code section.
219 CodeSectionStart {
220 /// The number of functions in this section.
221 count: u32,
222 /// The range of bytes that represent this section, specified in
223 /// offsets relative to the start of the byte stream.
224 range: Range<usize>,
225 /// The size, in bytes, of the remaining contents of this section.
226 ///
227 /// This can be used in combination with [`Parser::skip_section`]
228 /// where the caller will know how many bytes to skip before feeding
229 /// bytes into `Parser` again.
230 size: u32,
231 },
232 /// An entry of the code section, a function, was parsed from a WebAssembly
233 /// module.
234 ///
235 /// This entry indicates that a function was successfully received from the
236 /// code section, and the payload here is the window into the original input
237 /// where the function resides. Note that the function itself has not been
238 /// parsed, it's only been outlined. You'll need to process the
239 /// `FunctionBody` provided to test whether it parses and/or is valid.
240 CodeSectionEntry(FunctionBody<'a>),
241
242 /// A core module section was received and the provided parser can be
243 /// used to parse the nested module.
244 ///
245 /// This variant is special in that it returns a sub-`Parser`. Upon
246 /// receiving a `ModuleSection` it is expected that the returned
247 /// `Parser` will be used instead of the parent `Parser` until the parse has
248 /// finished. You'll need to feed data into the `Parser` returned until it
249 /// returns `Payload::End`. After that you'll switch back to the parent
250 /// parser to resume parsing the rest of the current component.
251 ///
252 /// Note that binaries will not be parsed correctly if you feed the data for
253 /// a nested module into the parent [`Parser`].
254 #[cfg(feature = "component-model")]
255 ModuleSection {
256 /// The parser for the nested module.
257 parser: Parser,
258 /// The range of bytes that represent the nested module in the
259 /// original byte stream.
260 ///
261 /// Note that, to better support streaming parsing and validation, the
262 /// validator does *not* check that this range is in bounds.
263 unchecked_range: Range<usize>,
264 },
265 /// A core instance section was received and the provided parser can be
266 /// used to parse the contents of the core instance section.
267 ///
268 /// Currently this section is only parsed in a component.
269 #[cfg(feature = "component-model")]
270 InstanceSection(InstanceSectionReader<'a>),
271 /// A core type section was received and the provided parser can be
272 /// used to parse the contents of the core type section.
273 ///
274 /// Currently this section is only parsed in a component.
275 #[cfg(feature = "component-model")]
276 CoreTypeSection(CoreTypeSectionReader<'a>),
277 /// A component section from a WebAssembly component was received and the
278 /// provided parser can be used to parse the nested component.
279 ///
280 /// This variant is special in that it returns a sub-`Parser`. Upon
281 /// receiving a `ComponentSection` it is expected that the returned
282 /// `Parser` will be used instead of the parent `Parser` until the parse has
283 /// finished. You'll need to feed data into the `Parser` returned until it
284 /// returns `Payload::End`. After that you'll switch back to the parent
285 /// parser to resume parsing the rest of the current component.
286 ///
287 /// Note that binaries will not be parsed correctly if you feed the data for
288 /// a nested component into the parent [`Parser`].
289 #[cfg(feature = "component-model")]
290 ComponentSection {
291 /// The parser for the nested component.
292 parser: Parser,
293 /// The range of bytes that represent the nested component in the
294 /// original byte stream.
295 ///
296 /// Note that, to better support streaming parsing and validation, the
297 /// validator does *not* check that this range is in bounds.
298 unchecked_range: Range<usize>,
299 },
300 /// A component instance section was received and the provided reader can be
301 /// used to parse the contents of the component instance section.
302 #[cfg(feature = "component-model")]
303 ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
304 /// A component alias section was received and the provided reader can be
305 /// used to parse the contents of the component alias section.
306 #[cfg(feature = "component-model")]
307 ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
308 /// A component type section was received and the provided reader can be
309 /// used to parse the contents of the component type section.
310 #[cfg(feature = "component-model")]
311 ComponentTypeSection(ComponentTypeSectionReader<'a>),
312 /// A component canonical section was received and the provided reader can be
313 /// used to parse the contents of the component canonical section.
314 #[cfg(feature = "component-model")]
315 ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
316 /// A component start section was received.
317 #[cfg(feature = "component-model")]
318 ComponentStartSection {
319 /// The start function description.
320 start: ComponentStartFunction,
321 /// The range of bytes that specify the `start` field.
322 range: Range<usize>,
323 },
324 /// A component import section was received and the provided reader can be
325 /// used to parse the contents of the component import section.
326 #[cfg(feature = "component-model")]
327 ComponentImportSection(ComponentImportSectionReader<'a>),
328 /// A component export section was received, and the provided reader can be
329 /// used to parse the contents of the component export section.
330 #[cfg(feature = "component-model")]
331 ComponentExportSection(ComponentExportSectionReader<'a>),
332
333 /// A module or component custom section was received.
334 CustomSection(CustomSectionReader<'a>),
335
336 /// An unknown section was found.
337 ///
338 /// This variant is returned for all unknown sections encountered. This
339 /// likely wants to be interpreted as an error by consumers of the parser,
340 /// but this can also be used to parse sections currently unsupported by
341 /// the parser.
342 UnknownSection {
343 /// The 8-bit identifier for this section.
344 id: u8,
345 /// The contents of this section.
346 contents: &'a [u8],
347 /// The range of bytes, relative to the start of the original data
348 /// stream, that the contents of this section reside in.
349 range: Range<usize>,
350 },
351
352 /// The end of the WebAssembly module or component was reached.
353 ///
354 /// The value is the offset in the input byte stream where the end
355 /// was reached.
356 End(usize),
357}
358
359const CUSTOM_SECTION: u8 = 0;
360const TYPE_SECTION: u8 = 1;
361const IMPORT_SECTION: u8 = 2;
362const FUNCTION_SECTION: u8 = 3;
363const TABLE_SECTION: u8 = 4;
364const MEMORY_SECTION: u8 = 5;
365const GLOBAL_SECTION: u8 = 6;
366const EXPORT_SECTION: u8 = 7;
367const START_SECTION: u8 = 8;
368const ELEMENT_SECTION: u8 = 9;
369const CODE_SECTION: u8 = 10;
370const DATA_SECTION: u8 = 11;
371const DATA_COUNT_SECTION: u8 = 12;
372const TAG_SECTION: u8 = 13;
373
374#[cfg(feature = "component-model")]
375const COMPONENT_MODULE_SECTION: u8 = 1;
376#[cfg(feature = "component-model")]
377const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
378#[cfg(feature = "component-model")]
379const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
380#[cfg(feature = "component-model")]
381const COMPONENT_SECTION: u8 = 4;
382#[cfg(feature = "component-model")]
383const COMPONENT_INSTANCE_SECTION: u8 = 5;
384#[cfg(feature = "component-model")]
385const COMPONENT_ALIAS_SECTION: u8 = 6;
386#[cfg(feature = "component-model")]
387const COMPONENT_TYPE_SECTION: u8 = 7;
388#[cfg(feature = "component-model")]
389const COMPONENT_CANONICAL_SECTION: u8 = 8;
390#[cfg(feature = "component-model")]
391const COMPONENT_START_SECTION: u8 = 9;
392#[cfg(feature = "component-model")]
393const COMPONENT_IMPORT_SECTION: u8 = 10;
394#[cfg(feature = "component-model")]
395const COMPONENT_EXPORT_SECTION: u8 = 11;
396
397impl Parser {
398 /// Creates a new parser.
399 ///
400 /// Reports errors and ranges relative to `offset` provided, where `offset`
401 /// is some logical offset within the input stream that we're parsing.
402 pub fn new(offset: u64) -> Parser {
403 Parser {
404 state: State::Header,
405 offset,
406 max_size: u64::MAX,
407 // Assume the encoding is a module until we know otherwise
408 encoding: Encoding::Module,
409 #[cfg(feature = "features")]
410 features: WasmFeatures::all(),
411 counts: ParserCounts::default(),
412 order: (Order::default(), offset),
413 }
414 }
415
416 /// Tests whether `bytes` looks like a core WebAssembly module.
417 ///
418 /// This will inspect the first 8 bytes of `bytes` and return `true` if it
419 /// starts with the standard core WebAssembly header.
420 pub fn is_core_wasm(bytes: &[u8]) -> bool {
421 const HEADER: [u8; 8] = [
422 WASM_MAGIC_NUMBER[0],
423 WASM_MAGIC_NUMBER[1],
424 WASM_MAGIC_NUMBER[2],
425 WASM_MAGIC_NUMBER[3],
426 WASM_MODULE_VERSION.to_le_bytes()[0],
427 WASM_MODULE_VERSION.to_le_bytes()[1],
428 KIND_MODULE.to_le_bytes()[0],
429 KIND_MODULE.to_le_bytes()[1],
430 ];
431 bytes.starts_with(&HEADER)
432 }
433
434 /// Tests whether `bytes` looks like a WebAssembly component.
435 ///
436 /// This will inspect the first 8 bytes of `bytes` and return `true` if it
437 /// starts with the standard WebAssembly component header.
438 pub fn is_component(bytes: &[u8]) -> bool {
439 const HEADER: [u8; 8] = [
440 WASM_MAGIC_NUMBER[0],
441 WASM_MAGIC_NUMBER[1],
442 WASM_MAGIC_NUMBER[2],
443 WASM_MAGIC_NUMBER[3],
444 WASM_COMPONENT_VERSION.to_le_bytes()[0],
445 WASM_COMPONENT_VERSION.to_le_bytes()[1],
446 KIND_COMPONENT.to_le_bytes()[0],
447 KIND_COMPONENT.to_le_bytes()[1],
448 ];
449 bytes.starts_with(&HEADER)
450 }
451
452 /// Returns the currently active set of wasm features that this parser is
453 /// using while parsing.
454 ///
455 /// The default set of features is [`WasmFeatures::all()`] for new parsers.
456 ///
457 /// For more information see [`BinaryReader::new`].
458 #[cfg(feature = "features")]
459 pub fn features(&self) -> WasmFeatures {
460 self.features
461 }
462
463 /// Sets the wasm features active while parsing to the `features` specified.
464 ///
465 /// The default set of features is [`WasmFeatures::all()`] for new parsers.
466 ///
467 /// For more information see [`BinaryReader::new`].
468 #[cfg(feature = "features")]
469 pub fn set_features(&mut self, features: WasmFeatures) {
470 self.features = features;
471 }
472
473 /// Returns the original offset that this parser is currently at.
474 pub fn offset(&self) -> u64 {
475 self.offset
476 }
477
478 /// Attempts to parse a chunk of data.
479 ///
480 /// This method will attempt to parse the next incremental portion of a
481 /// WebAssembly binary. Data available for the module or component is
482 /// provided as `data`, and the data can be incomplete if more data has yet
483 /// to arrive. The `eof` flag indicates whether more data will ever be received.
484 ///
485 /// There are two ways parsing can succeed with this method:
486 ///
487 /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
488 /// in `data` to parse a payload. The caller needs to wait for more data to
489 /// be available in this situation before calling this method again. It is
490 /// guaranteed that this is only returned if `eof` is `false`.
491 ///
492 /// * `Chunk::Parsed` - this indicates that a chunk of the input was
493 /// successfully parsed. The payload is available in this variant of what
494 /// was parsed, and this also indicates how many bytes of `data` was
495 /// consumed. It's expected that the caller will not provide these bytes
496 /// back to the [`Parser`] again.
497 ///
498 /// Note that all `Chunk` return values are connected, with a lifetime, to
499 /// the input buffer. Each parsed chunk borrows the input buffer and is a
500 /// view into it for successfully parsed chunks.
501 ///
502 /// It is expected that you'll call this method until `Payload::End` is
503 /// reached, at which point you're guaranteed that the parse has completed.
504 /// Note that complete parsing, for the top-level module or component,
505 /// implies that `data` is empty and `eof` is `true`.
506 ///
507 /// # Errors
508 ///
509 /// Parse errors are returned as an `Err`. Errors can happen when the
510 /// structure of the data is unexpected or if sections are too large for
511 /// example. Note that errors are not returned for malformed *contents* of
512 /// sections here. Sections are generally not individually parsed and each
513 /// returned [`Payload`] needs to be iterated over further to detect all
514 /// errors.
515 ///
516 /// # Examples
517 ///
518 /// An example of reading a wasm file from a stream (`std::io::Read`) and
519 /// incrementally parsing it.
520 ///
521 /// ```
522 /// use std::io::Read;
523 /// use anyhow::Result;
524 /// use wasmparser::{Parser, Chunk, Payload::*};
525 ///
526 /// fn parse(mut reader: impl Read) -> Result<()> {
527 /// let mut buf = Vec::new();
528 /// let mut cur = Parser::new(0);
529 /// let mut eof = false;
530 /// let mut stack = Vec::new();
531 ///
532 /// loop {
533 /// let (payload, consumed) = match cur.parse(&buf, eof)? {
534 /// Chunk::NeedMoreData(hint) => {
535 /// assert!(!eof); // otherwise an error would be returned
536 ///
537 /// // Use the hint to preallocate more space, then read
538 /// // some more data into our buffer.
539 /// //
540 /// // Note that the buffer management here is not ideal,
541 /// // but it's compact enough to fit in an example!
542 /// let len = buf.len();
543 /// buf.extend((0..hint).map(|_| 0u8));
544 /// let n = reader.read(&mut buf[len..])?;
545 /// buf.truncate(len + n);
546 /// eof = n == 0;
547 /// continue;
548 /// }
549 ///
550 /// Chunk::Parsed { consumed, payload } => (payload, consumed),
551 /// };
552 ///
553 /// match payload {
554 /// // Sections for WebAssembly modules
555 /// Version { .. } => { /* ... */ }
556 /// TypeSection(_) => { /* ... */ }
557 /// ImportSection(_) => { /* ... */ }
558 /// FunctionSection(_) => { /* ... */ }
559 /// TableSection(_) => { /* ... */ }
560 /// MemorySection(_) => { /* ... */ }
561 /// TagSection(_) => { /* ... */ }
562 /// GlobalSection(_) => { /* ... */ }
563 /// ExportSection(_) => { /* ... */ }
564 /// StartSection { .. } => { /* ... */ }
565 /// ElementSection(_) => { /* ... */ }
566 /// DataCountSection { .. } => { /* ... */ }
567 /// DataSection(_) => { /* ... */ }
568 ///
569 /// // Here we know how many functions we'll be receiving as
570 /// // `CodeSectionEntry`, so we can prepare for that, and
571 /// // afterwards we can parse and handle each function
572 /// // individually.
573 /// CodeSectionStart { .. } => { /* ... */ }
574 /// CodeSectionEntry(body) => {
575 /// // here we can iterate over `body` to parse the function
576 /// // and its locals
577 /// }
578 ///
579 /// // Sections for WebAssembly components
580 /// InstanceSection(_) => { /* ... */ }
581 /// CoreTypeSection(_) => { /* ... */ }
582 /// ComponentInstanceSection(_) => { /* ... */ }
583 /// ComponentAliasSection(_) => { /* ... */ }
584 /// ComponentTypeSection(_) => { /* ... */ }
585 /// ComponentCanonicalSection(_) => { /* ... */ }
586 /// ComponentStartSection { .. } => { /* ... */ }
587 /// ComponentImportSection(_) => { /* ... */ }
588 /// ComponentExportSection(_) => { /* ... */ }
589 ///
590 /// ModuleSection { parser, .. }
591 /// | ComponentSection { parser, .. } => {
592 /// stack.push(cur.clone());
593 /// cur = parser.clone();
594 /// }
595 ///
596 /// CustomSection(_) => { /* ... */ }
597 ///
598 /// // Once we've reached the end of a parser we either resume
599 /// // at the parent parser or we break out of the loop because
600 /// // we're done.
601 /// End(_) => {
602 /// if let Some(parent_parser) = stack.pop() {
603 /// cur = parent_parser;
604 /// } else {
605 /// break;
606 /// }
607 /// }
608 ///
609 /// // most likely you'd return an error here
610 /// _ => { /* ... */ }
611 /// }
612 ///
613 /// // once we're done processing the payload we can forget the
614 /// // original.
615 /// buf.drain(..consumed);
616 /// }
617 ///
618 /// Ok(())
619 /// }
620 ///
621 /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
622 /// ```
623 pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
624 let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
625 (&data[..(self.max_size as usize)], true)
626 } else {
627 (data, eof)
628 };
629 // TODO: thread through `offset: u64` to `BinaryReader`, remove
630 // the cast here.
631 let starting_offset = self.offset as usize;
632 let mut reader = BinaryReader::new(data, starting_offset);
633 #[cfg(feature = "features")]
634 {
635 reader.set_features(self.features);
636 }
637 match self.parse_reader(&mut reader, eof) {
638 Ok(payload) => {
639 // Be sure to update our offset with how far we got in the
640 // reader
641 let consumed = reader.original_position() - starting_offset;
642 self.offset += usize_to_u64(consumed);
643 self.max_size -= usize_to_u64(consumed);
644 Ok(Chunk::Parsed {
645 consumed: consumed,
646 payload,
647 })
648 }
649 Err(e) => {
650 // If we're at EOF then there's no way we can recover from any
651 // error, so continue to propagate it.
652 if eof {
653 return Err(e);
654 }
655
656 // If our error doesn't look like it can be resolved with more
657 // data being pulled down, then propagate it, otherwise switch
658 // the error to "feed me please"
659 match e.inner.needed_hint {
660 Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
661 None => Err(e),
662 }
663 }
664 }
665 }
666
667 fn update_order(&mut self, order: Order, pos: usize) -> Result<()> {
668 let pos_u64 = usize_to_u64(pos);
669 if self.encoding == Encoding::Module {
670 match self.order {
671 (last_order, last_pos) if last_order >= order && last_pos < pos_u64 => {
672 bail!(pos, "section out of order")
673 }
674 _ => (),
675 }
676 }
677
678 self.order = (order, pos_u64);
679
680 Ok(())
681 }
682
683 fn parse_reader<'a>(
684 &mut self,
685 reader: &mut BinaryReader<'a>,
686 eof: bool,
687 ) -> Result<Payload<'a>> {
688 use Payload::*;
689
690 match self.state {
691 State::Header => {
692 let start = reader.original_position();
693 let header_version = reader.read_header_version()?;
694 let num = header_version as u16;
695 self.encoding = match (num, (header_version >> 16) as u16) {
696 (WASM_MODULE_VERSION, KIND_MODULE) => Encoding::Module,
697 (WASM_COMPONENT_VERSION, KIND_COMPONENT) => Encoding::Component,
698 _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
699 };
700 self.state = State::SectionStart;
701 Ok(Version {
702 num,
703 encoding: self.encoding,
704 range: start..reader.original_position(),
705 })
706 }
707 State::SectionStart => {
708 // If we're at eof and there are no bytes in our buffer, then
709 // that means we reached the end of the data since it's
710 // just a bunch of sections concatenated after the header.
711 if eof && reader.bytes_remaining() == 0 {
712 self.check_function_code_counts(reader.original_position())?;
713 self.check_data_count(reader.original_position())?;
714 return Ok(Payload::End(reader.original_position()));
715 }
716
717 // Corrupted binaries containing multiple modules or
718 // components will fail because a section can never start with
719 // the magic number: 0 is custom section, 'a' is section len
720 // of 97, `s` is section name string len of 115, at which
721 // point validation will fail because name string is bigger
722 // than section. Report a better error instead:
723 match reader.peek_bytes(4) {
724 Ok(peek) if peek == WASM_MAGIC_NUMBER => {
725 return Err(BinaryReaderError::new(
726 "expected section, got wasm magic number",
727 reader.original_position(),
728 ));
729 }
730 _ => {}
731 }
732
733 let id_pos = reader.original_position();
734 let id = reader.read_u8()?;
735 if id & 0x80 != 0 {
736 return Err(BinaryReaderError::new("malformed section id", id_pos));
737 }
738 let len_pos = reader.original_position();
739 let mut len = reader.read_var_u32()?;
740
741 // Test to make sure that this section actually fits within
742 // `Parser::max_size`. This doesn't matter for top-level modules
743 // but it is required for nested modules/components to correctly ensure
744 // that all sections live entirely within their section of the
745 // file.
746 let consumed = reader.original_position() - id_pos;
747 let section_overflow = self
748 .max_size
749 .checked_sub(usize_to_u64(consumed))
750 .and_then(|s| s.checked_sub(len.into()))
751 .is_none();
752 if section_overflow {
753 return Err(BinaryReaderError::new("section too large", len_pos));
754 }
755
756 match (self.encoding, id) {
757 // Custom sections for both modules and components.
758 (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
759
760 // Module sections
761 (Encoding::Module, TYPE_SECTION) => {
762 self.update_order(Order::Type, reader.original_position())?;
763 section(reader, len, TypeSectionReader::new, TypeSection)
764 }
765 (Encoding::Module, IMPORT_SECTION) => {
766 self.update_order(Order::Import, reader.original_position())?;
767 section(reader, len, ImportSectionReader::new, ImportSection)
768 }
769 (Encoding::Module, FUNCTION_SECTION) => {
770 self.update_order(Order::Function, reader.original_position())?;
771 let s = section(reader, len, FunctionSectionReader::new, FunctionSection)?;
772 match &s {
773 FunctionSection(f) => self.counts.function_entries = Some(f.count()),
774 _ => unreachable!(),
775 }
776 Ok(s)
777 }
778 (Encoding::Module, TABLE_SECTION) => {
779 self.update_order(Order::Table, reader.original_position())?;
780 section(reader, len, TableSectionReader::new, TableSection)
781 }
782 (Encoding::Module, MEMORY_SECTION) => {
783 self.update_order(Order::Memory, reader.original_position())?;
784 section(reader, len, MemorySectionReader::new, MemorySection)
785 }
786 (Encoding::Module, GLOBAL_SECTION) => {
787 self.update_order(Order::Global, reader.original_position())?;
788 section(reader, len, GlobalSectionReader::new, GlobalSection)
789 }
790 (Encoding::Module, EXPORT_SECTION) => {
791 self.update_order(Order::Export, reader.original_position())?;
792 section(reader, len, ExportSectionReader::new, ExportSection)
793 }
794 (Encoding::Module, START_SECTION) => {
795 self.update_order(Order::Start, reader.original_position())?;
796 let (func, range) = single_item(reader, len, "start")?;
797 Ok(StartSection { func, range })
798 }
799 (Encoding::Module, ELEMENT_SECTION) => {
800 self.update_order(Order::Element, reader.original_position())?;
801 section(reader, len, ElementSectionReader::new, ElementSection)
802 }
803 (Encoding::Module, CODE_SECTION) => {
804 self.update_order(Order::Code, reader.original_position())?;
805 let start = reader.original_position();
806 let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
807 self.counts.code_entries = Some(count);
808 self.check_function_code_counts(start)?;
809 let range = start..reader.original_position() + len as usize;
810 self.state = State::FunctionBody {
811 remaining: count,
812 len,
813 };
814 Ok(CodeSectionStart {
815 count,
816 range,
817 size: len,
818 })
819 }
820 (Encoding::Module, DATA_SECTION) => {
821 self.update_order(Order::Data, reader.original_position())?;
822 let s = section(reader, len, DataSectionReader::new, DataSection)?;
823 match &s {
824 DataSection(d) => self.counts.data_entries = Some(d.count()),
825 _ => unreachable!(),
826 }
827 self.check_data_count(reader.original_position())?;
828 Ok(s)
829 }
830 (Encoding::Module, DATA_COUNT_SECTION) => {
831 self.update_order(Order::DataCount, reader.original_position())?;
832 let (count, range) = single_item(reader, len, "data count")?;
833 self.counts.data_count = Some(count);
834 Ok(DataCountSection { count, range })
835 }
836 (Encoding::Module, TAG_SECTION) => {
837 self.update_order(Order::Tag, reader.original_position())?;
838 section(reader, len, TagSectionReader::new, TagSection)
839 }
840
841 // Component sections
842 #[cfg(feature = "component-model")]
843 (Encoding::Component, COMPONENT_MODULE_SECTION)
844 | (Encoding::Component, COMPONENT_SECTION) => {
845 if len as usize > MAX_WASM_MODULE_SIZE {
846 bail!(
847 len_pos,
848 "{} section is too large",
849 if id == 1 { "module" } else { "component " }
850 );
851 }
852
853 let range = reader.original_position()
854 ..reader.original_position() + usize::try_from(len).unwrap();
855 self.max_size -= u64::from(len);
856 self.offset += u64::from(len);
857 let mut parser = Parser::new(usize_to_u64(reader.original_position()));
858 #[cfg(feature = "features")]
859 {
860 parser.features = self.features;
861 }
862 parser.max_size = u64::from(len);
863
864 Ok(match id {
865 1 => ModuleSection {
866 parser,
867 unchecked_range: range,
868 },
869 4 => ComponentSection {
870 parser,
871 unchecked_range: range,
872 },
873 _ => unreachable!(),
874 })
875 }
876 #[cfg(feature = "component-model")]
877 (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
878 section(reader, len, InstanceSectionReader::new, InstanceSection)
879 }
880 #[cfg(feature = "component-model")]
881 (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
882 section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
883 }
884 #[cfg(feature = "component-model")]
885 (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
886 reader,
887 len,
888 ComponentInstanceSectionReader::new,
889 ComponentInstanceSection,
890 ),
891 #[cfg(feature = "component-model")]
892 (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
893 section(reader, len, SectionLimited::new, ComponentAliasSection)
894 }
895 #[cfg(feature = "component-model")]
896 (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
897 reader,
898 len,
899 ComponentTypeSectionReader::new,
900 ComponentTypeSection,
901 ),
902 #[cfg(feature = "component-model")]
903 (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
904 reader,
905 len,
906 ComponentCanonicalSectionReader::new,
907 ComponentCanonicalSection,
908 ),
909 #[cfg(feature = "component-model")]
910 (Encoding::Component, COMPONENT_START_SECTION) => {
911 match self.counts.component_start_sections {
912 false => self.counts.component_start_sections = true,
913 true => {
914 bail!(
915 reader.original_position(),
916 "component cannot have more than one start function"
917 )
918 }
919 }
920 let (start, range) = single_item(reader, len, "component start")?;
921 Ok(ComponentStartSection { start, range })
922 }
923 #[cfg(feature = "component-model")]
924 (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
925 reader,
926 len,
927 ComponentImportSectionReader::new,
928 ComponentImportSection,
929 ),
930 #[cfg(feature = "component-model")]
931 (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
932 reader,
933 len,
934 ComponentExportSectionReader::new,
935 ComponentExportSection,
936 ),
937 (_, id) => {
938 let offset = reader.original_position();
939 let contents = reader.read_bytes(len as usize)?;
940 let range = offset..offset + len as usize;
941 Ok(UnknownSection {
942 id,
943 contents,
944 range,
945 })
946 }
947 }
948 }
949
950 // Once we hit 0 remaining incrementally parsed items, with 0
951 // remaining bytes in each section, we're done and can switch back
952 // to parsing sections.
953 State::FunctionBody {
954 remaining: 0,
955 len: 0,
956 } => {
957 self.state = State::SectionStart;
958 self.parse_reader(reader, eof)
959 }
960
961 // ... otherwise trailing bytes with no remaining entries in these
962 // sections indicates an error.
963 State::FunctionBody { remaining: 0, len } => {
964 debug_assert!(len > 0);
965 let offset = reader.original_position();
966 Err(BinaryReaderError::new(
967 "trailing bytes at end of section",
968 offset,
969 ))
970 }
971
972 // Functions are relatively easy to parse when we know there's at
973 // least one remaining and at least one byte available to read
974 // things.
975 //
976 // We use the remaining length try to read a u32 size of the
977 // function, and using that size we require the entire function be
978 // resident in memory. This means that we're reading whole chunks of
979 // functions at a time.
980 //
981 // Limiting via `Parser::max_size` (nested parsing) happens above in
982 // `fn parse`, and limiting by our section size happens via
983 // `delimited`. Actual parsing of the function body is delegated to
984 // the caller to iterate over the `FunctionBody` structure.
985 State::FunctionBody { remaining, mut len } => {
986 let body = delimited(reader, &mut len, |r| {
987 Ok(FunctionBody::new(r.read_reader()?))
988 })?;
989 self.state = State::FunctionBody {
990 remaining: remaining - 1,
991 len,
992 };
993 Ok(CodeSectionEntry(body))
994 }
995 }
996 }
997
998 /// Convenience function that can be used to parse a module or component
999 /// that is entirely resident in memory.
1000 ///
1001 /// This function will parse the `data` provided as a WebAssembly module
1002 /// or component.
1003 ///
1004 /// Note that when this function yields sections that provide parsers,
1005 /// no further action is required for those sections as payloads from
1006 /// those parsers will be automatically returned.
1007 ///
1008 /// # Examples
1009 ///
1010 /// An example of reading a wasm file from a stream (`std::io::Read`) into
1011 /// a buffer and then parsing it.
1012 ///
1013 /// ```
1014 /// use std::io::Read;
1015 /// use anyhow::Result;
1016 /// use wasmparser::{Parser, Chunk, Payload::*};
1017 ///
1018 /// fn parse(mut reader: impl Read) -> Result<()> {
1019 /// let mut buf = Vec::new();
1020 /// reader.read_to_end(&mut buf)?;
1021 /// let parser = Parser::new(0);
1022 ///
1023 /// for payload in parser.parse_all(&buf) {
1024 /// match payload? {
1025 /// // Sections for WebAssembly modules
1026 /// Version { .. } => { /* ... */ }
1027 /// TypeSection(_) => { /* ... */ }
1028 /// ImportSection(_) => { /* ... */ }
1029 /// FunctionSection(_) => { /* ... */ }
1030 /// TableSection(_) => { /* ... */ }
1031 /// MemorySection(_) => { /* ... */ }
1032 /// TagSection(_) => { /* ... */ }
1033 /// GlobalSection(_) => { /* ... */ }
1034 /// ExportSection(_) => { /* ... */ }
1035 /// StartSection { .. } => { /* ... */ }
1036 /// ElementSection(_) => { /* ... */ }
1037 /// DataCountSection { .. } => { /* ... */ }
1038 /// DataSection(_) => { /* ... */ }
1039 ///
1040 /// // Here we know how many functions we'll be receiving as
1041 /// // `CodeSectionEntry`, so we can prepare for that, and
1042 /// // afterwards we can parse and handle each function
1043 /// // individually.
1044 /// CodeSectionStart { .. } => { /* ... */ }
1045 /// CodeSectionEntry(body) => {
1046 /// // here we can iterate over `body` to parse the function
1047 /// // and its locals
1048 /// }
1049 ///
1050 /// // Sections for WebAssembly components
1051 /// ModuleSection { .. } => { /* ... */ }
1052 /// InstanceSection(_) => { /* ... */ }
1053 /// CoreTypeSection(_) => { /* ... */ }
1054 /// ComponentSection { .. } => { /* ... */ }
1055 /// ComponentInstanceSection(_) => { /* ... */ }
1056 /// ComponentAliasSection(_) => { /* ... */ }
1057 /// ComponentTypeSection(_) => { /* ... */ }
1058 /// ComponentCanonicalSection(_) => { /* ... */ }
1059 /// ComponentStartSection { .. } => { /* ... */ }
1060 /// ComponentImportSection(_) => { /* ... */ }
1061 /// ComponentExportSection(_) => { /* ... */ }
1062 ///
1063 /// CustomSection(_) => { /* ... */ }
1064 ///
1065 /// // Once we've reached the end of a parser we either resume
1066 /// // at the parent parser or the payload iterator is at its
1067 /// // end and we're done.
1068 /// End(_) => {}
1069 ///
1070 /// // most likely you'd return an error here, but if you want
1071 /// // you can also inspect the raw contents of unknown sections
1072 /// other => {
1073 /// match other.as_section() {
1074 /// Some((id, range)) => { /* ... */ }
1075 /// None => { /* ... */ }
1076 /// }
1077 /// }
1078 /// }
1079 /// }
1080 ///
1081 /// Ok(())
1082 /// }
1083 ///
1084 /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
1085 /// ```
1086 pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload<'_>>> {
1087 let mut stack = Vec::new();
1088 let mut cur = self;
1089 let mut done = false;
1090 iter::from_fn(move || {
1091 if done {
1092 return None;
1093 }
1094 let payload = match cur.parse(data, true) {
1095 // Propagate all errors
1096 Err(e) => {
1097 done = true;
1098 return Some(Err(e));
1099 }
1100
1101 // This isn't possible because `eof` is always true.
1102 Ok(Chunk::NeedMoreData(_)) => unreachable!(),
1103
1104 Ok(Chunk::Parsed { payload, consumed }) => {
1105 data = &data[consumed..];
1106 payload
1107 }
1108 };
1109
1110 match &payload {
1111 #[cfg(feature = "component-model")]
1112 Payload::ModuleSection { parser, .. }
1113 | Payload::ComponentSection { parser, .. } => {
1114 stack.push(cur.clone());
1115 cur = parser.clone();
1116 }
1117 Payload::End(_) => match stack.pop() {
1118 Some(p) => cur = p,
1119 None => done = true,
1120 },
1121
1122 _ => {}
1123 }
1124
1125 Some(Ok(payload))
1126 })
1127 }
1128
1129 /// Skip parsing the code section entirely.
1130 ///
1131 /// This function can be used to indicate, after receiving
1132 /// `CodeSectionStart`, that the section will not be parsed.
1133 ///
1134 /// The caller will be responsible for skipping `size` bytes (found in the
1135 /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
1136 /// after the `size` bytes have been skipped.
1137 ///
1138 /// # Panics
1139 ///
1140 /// This function will panic if the parser is not in a state where it's
1141 /// parsing the code section.
1142 ///
1143 /// # Examples
1144 ///
1145 /// ```
1146 /// use wasmparser::{Result, Parser, Chunk, Payload::*};
1147 /// use core::ops::Range;
1148 ///
1149 /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
1150 /// let mut parser = Parser::new(0);
1151 /// loop {
1152 /// let payload = match parser.parse(wasm, true)? {
1153 /// Chunk::Parsed { consumed, payload } => {
1154 /// wasm = &wasm[consumed..];
1155 /// payload
1156 /// }
1157 /// // this state isn't possible with `eof = true`
1158 /// Chunk::NeedMoreData(_) => unreachable!(),
1159 /// };
1160 /// match payload {
1161 /// TypeSection(s) => print_range("type section", &s.range()),
1162 /// ImportSection(s) => print_range("import section", &s.range()),
1163 /// // .. other sections
1164 ///
1165 /// // Print the range of the code section we see, but don't
1166 /// // actually iterate over each individual function.
1167 /// CodeSectionStart { range, size, .. } => {
1168 /// print_range("code section", &range);
1169 /// parser.skip_section();
1170 /// wasm = &wasm[size as usize..];
1171 /// }
1172 /// End(_) => break,
1173 /// _ => {}
1174 /// }
1175 /// }
1176 /// Ok(())
1177 /// }
1178 ///
1179 /// fn print_range(section: &str, range: &Range<usize>) {
1180 /// println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
1181 /// }
1182 /// ```
1183 pub fn skip_section(&mut self) {
1184 let skip = match self.state {
1185 State::FunctionBody { remaining: _, len } => len,
1186 _ => panic!("wrong state to call `skip_section`"),
1187 };
1188 self.offset += u64::from(skip);
1189 self.max_size -= u64::from(skip);
1190 self.state = State::SectionStart;
1191 }
1192
1193 fn check_function_code_counts(&self, pos: usize) -> Result<()> {
1194 match (self.counts.function_entries, self.counts.code_entries) {
1195 (Some(n), Some(m)) if n != m => {
1196 bail!(pos, "function and code section have inconsistent lengths")
1197 }
1198 (Some(n), None) if n > 0 => bail!(
1199 pos,
1200 "function section has non-zero count but code section is absent"
1201 ),
1202 (None, Some(m)) if m > 0 => bail!(
1203 pos,
1204 "function section is absent but code section has non-zero count"
1205 ),
1206 _ => Ok(()),
1207 }
1208 }
1209
1210 fn check_data_count(&self, pos: usize) -> Result<()> {
1211 match (self.counts.data_count, self.counts.data_entries) {
1212 (Some(n), Some(m)) if n != m => {
1213 bail!(pos, "data count and data section have inconsistent lengths")
1214 }
1215 (Some(n), None) if n > 0 => {
1216 bail!(pos, "data count is non-zero but data section is absent")
1217 }
1218 _ => Ok(()),
1219 }
1220 }
1221}
1222
1223fn usize_to_u64(a: usize) -> u64 {
1224 a.try_into().unwrap()
1225}
1226
1227/// Parses an entire section resident in memory into a `Payload`.
1228///
1229/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
1230/// to construct the section to return.
1231fn section<'a, T>(
1232 reader: &mut BinaryReader<'a>,
1233 len: u32,
1234 ctor: fn(BinaryReader<'a>) -> Result<T>,
1235 variant: fn(T) -> Payload<'a>,
1236) -> Result<Payload<'a>> {
1237 let reader = reader.skip(|r| {
1238 r.read_bytes(len as usize)?;
1239 Ok(())
1240 })?;
1241 // clear the hint for "need this many more bytes" here because we already
1242 // read all the bytes, so it's not possible to read more bytes if this
1243 // fails.
1244 let reader = ctor(reader).map_err(clear_hint)?;
1245 Ok(variant(reader))
1246}
1247
1248/// Reads a section that is represented by a single uleb-encoded `u32`.
1249fn single_item<'a, T>(
1250 reader: &mut BinaryReader<'a>,
1251 len: u32,
1252 desc: &str,
1253) -> Result<(T, Range<usize>)>
1254where
1255 T: FromReader<'a>,
1256{
1257 let range = reader.original_position()..reader.original_position() + len as usize;
1258 let mut content = reader.skip(|r| {
1259 r.read_bytes(len as usize)?;
1260 Ok(())
1261 })?;
1262 // We can't recover from "unexpected eof" here because our entire section is
1263 // already resident in memory, so clear the hint for how many more bytes are
1264 // expected.
1265 let ret = content.read().map_err(clear_hint)?;
1266 if !content.eof() {
1267 bail!(
1268 content.original_position(),
1269 "unexpected content in the {desc} section",
1270 );
1271 }
1272 Ok((ret, range))
1273}
1274
1275/// Attempts to parse using `f`.
1276///
1277/// This will update `*len` with the number of bytes consumed, and it will cause
1278/// a failure to be returned instead of the number of bytes consumed exceeds
1279/// what `*len` currently is.
1280fn delimited<'a, T>(
1281 reader: &mut BinaryReader<'a>,
1282 len: &mut u32,
1283 f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
1284) -> Result<T> {
1285 let start = reader.original_position();
1286 let ret = f(reader)?;
1287 *len = match (reader.original_position() - start)
1288 .try_into()
1289 .ok()
1290 .and_then(|i| len.checked_sub(i))
1291 {
1292 Some(i) => i,
1293 None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
1294 };
1295 Ok(ret)
1296}
1297
1298impl Default for Parser {
1299 fn default() -> Parser {
1300 Parser::new(0)
1301 }
1302}
1303
1304impl Payload<'_> {
1305 /// If this `Payload` represents a section in the original wasm module then
1306 /// the section's id and range within the original wasm binary are returned.
1307 ///
1308 /// Not all payloads refer to entire sections, such as the `Version` and
1309 /// `CodeSectionEntry` variants. These variants will return `None` from this
1310 /// function.
1311 ///
1312 /// Otherwise this function will return `Some` where the first element is
1313 /// the byte identifier for the section and the second element is the range
1314 /// of the contents of the section within the original wasm binary.
1315 ///
1316 /// The purpose of this method is to enable tools to easily iterate over
1317 /// entire sections if necessary and handle sections uniformly, for example
1318 /// dropping custom sections while preserving all other sections.
1319 pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
1320 use Payload::*;
1321
1322 match self {
1323 Version { .. } => None,
1324 TypeSection(s) => Some((TYPE_SECTION, s.range())),
1325 ImportSection(s) => Some((IMPORT_SECTION, s.range())),
1326 FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
1327 TableSection(s) => Some((TABLE_SECTION, s.range())),
1328 MemorySection(s) => Some((MEMORY_SECTION, s.range())),
1329 TagSection(s) => Some((TAG_SECTION, s.range())),
1330 GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
1331 ExportSection(s) => Some((EXPORT_SECTION, s.range())),
1332 ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
1333 DataSection(s) => Some((DATA_SECTION, s.range())),
1334 StartSection { range, .. } => Some((START_SECTION, range.clone())),
1335 DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
1336 CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
1337 CodeSectionEntry(_) => None,
1338
1339 #[cfg(feature = "component-model")]
1340 ModuleSection {
1341 unchecked_range: range,
1342 ..
1343 } => Some((COMPONENT_MODULE_SECTION, range.clone())),
1344 #[cfg(feature = "component-model")]
1345 InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
1346 #[cfg(feature = "component-model")]
1347 CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
1348 #[cfg(feature = "component-model")]
1349 ComponentSection {
1350 unchecked_range: range,
1351 ..
1352 } => Some((COMPONENT_SECTION, range.clone())),
1353 #[cfg(feature = "component-model")]
1354 ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
1355 #[cfg(feature = "component-model")]
1356 ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
1357 #[cfg(feature = "component-model")]
1358 ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
1359 #[cfg(feature = "component-model")]
1360 ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
1361 #[cfg(feature = "component-model")]
1362 ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
1363 #[cfg(feature = "component-model")]
1364 ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
1365 #[cfg(feature = "component-model")]
1366 ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
1367
1368 CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1369
1370 UnknownSection { id, range, .. } => Some((*id, range.clone())),
1371
1372 End(_) => None,
1373 }
1374 }
1375}
1376
1377impl fmt::Debug for Payload<'_> {
1378 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1379 use Payload::*;
1380 match self {
1381 Version {
1382 num,
1383 encoding,
1384 range,
1385 } => f
1386 .debug_struct("Version")
1387 .field("num", num)
1388 .field("encoding", encoding)
1389 .field("range", range)
1390 .finish(),
1391
1392 // Module sections
1393 TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1394 ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1395 FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1396 TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1397 MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1398 TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1399 GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1400 ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1401 ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1402 DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1403 StartSection { func, range } => f
1404 .debug_struct("StartSection")
1405 .field("func", func)
1406 .field("range", range)
1407 .finish(),
1408 DataCountSection { count, range } => f
1409 .debug_struct("DataCountSection")
1410 .field("count", count)
1411 .field("range", range)
1412 .finish(),
1413 CodeSectionStart { count, range, size } => f
1414 .debug_struct("CodeSectionStart")
1415 .field("count", count)
1416 .field("range", range)
1417 .field("size", size)
1418 .finish(),
1419 CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1420
1421 // Component sections
1422 #[cfg(feature = "component-model")]
1423 ModuleSection {
1424 parser: _,
1425 unchecked_range: range,
1426 } => f
1427 .debug_struct("ModuleSection")
1428 .field("range", range)
1429 .finish(),
1430 #[cfg(feature = "component-model")]
1431 InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1432 #[cfg(feature = "component-model")]
1433 CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1434 #[cfg(feature = "component-model")]
1435 ComponentSection {
1436 parser: _,
1437 unchecked_range: range,
1438 } => f
1439 .debug_struct("ComponentSection")
1440 .field("range", range)
1441 .finish(),
1442 #[cfg(feature = "component-model")]
1443 ComponentInstanceSection(_) => f
1444 .debug_tuple("ComponentInstanceSection")
1445 .field(&"...")
1446 .finish(),
1447 #[cfg(feature = "component-model")]
1448 ComponentAliasSection(_) => f
1449 .debug_tuple("ComponentAliasSection")
1450 .field(&"...")
1451 .finish(),
1452 #[cfg(feature = "component-model")]
1453 ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1454 #[cfg(feature = "component-model")]
1455 ComponentCanonicalSection(_) => f
1456 .debug_tuple("ComponentCanonicalSection")
1457 .field(&"...")
1458 .finish(),
1459 #[cfg(feature = "component-model")]
1460 ComponentStartSection { .. } => f
1461 .debug_tuple("ComponentStartSection")
1462 .field(&"...")
1463 .finish(),
1464 #[cfg(feature = "component-model")]
1465 ComponentImportSection(_) => f
1466 .debug_tuple("ComponentImportSection")
1467 .field(&"...")
1468 .finish(),
1469 #[cfg(feature = "component-model")]
1470 ComponentExportSection(_) => f
1471 .debug_tuple("ComponentExportSection")
1472 .field(&"...")
1473 .finish(),
1474
1475 CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1476
1477 UnknownSection { id, range, .. } => f
1478 .debug_struct("UnknownSection")
1479 .field("id", id)
1480 .field("range", range)
1481 .finish(),
1482
1483 End(offset) => f.debug_tuple("End").field(offset).finish(),
1484 }
1485 }
1486}
1487
1488fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1489 err.inner.needed_hint = None;
1490 err
1491}
1492
1493#[cfg(test)]
1494mod tests {
1495 use super::*;
1496
1497 macro_rules! assert_matches {
1498 ($a:expr, $b:pat $(,)?) => {
1499 match $a {
1500 $b => {}
1501 a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1502 }
1503 };
1504 }
1505
1506 #[test]
1507 fn header() {
1508 assert!(Parser::default().parse(&[], true).is_err());
1509 assert_matches!(
1510 Parser::default().parse(&[], false),
1511 Ok(Chunk::NeedMoreData(4)),
1512 );
1513 assert_matches!(
1514 Parser::default().parse(b"\0", false),
1515 Ok(Chunk::NeedMoreData(3)),
1516 );
1517 assert_matches!(
1518 Parser::default().parse(b"\0asm", false),
1519 Ok(Chunk::NeedMoreData(4)),
1520 );
1521 assert_matches!(
1522 Parser::default().parse(b"\0asm\x01\0\0\0", false),
1523 Ok(Chunk::Parsed {
1524 consumed: 8,
1525 payload: Payload::Version { num: 1, .. },
1526 }),
1527 );
1528 }
1529
1530 #[test]
1531 fn header_iter() {
1532 for _ in Parser::default().parse_all(&[]) {}
1533 for _ in Parser::default().parse_all(b"\0") {}
1534 for _ in Parser::default().parse_all(b"\0asm") {}
1535 for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1536 }
1537
1538 fn parser_after_header() -> Parser {
1539 let mut p = Parser::default();
1540 assert_matches!(
1541 p.parse(b"\0asm\x01\0\0\0", false),
1542 Ok(Chunk::Parsed {
1543 consumed: 8,
1544 payload: Payload::Version {
1545 num: WASM_MODULE_VERSION,
1546 encoding: Encoding::Module,
1547 ..
1548 },
1549 }),
1550 );
1551 p
1552 }
1553
1554 fn parser_after_component_header() -> Parser {
1555 let mut p = Parser::default();
1556 assert_matches!(
1557 p.parse(b"\0asm\x0d\0\x01\0", false),
1558 Ok(Chunk::Parsed {
1559 consumed: 8,
1560 payload: Payload::Version {
1561 num: WASM_COMPONENT_VERSION,
1562 encoding: Encoding::Component,
1563 ..
1564 },
1565 }),
1566 );
1567 p
1568 }
1569
1570 #[test]
1571 fn start_section() {
1572 assert_matches!(
1573 parser_after_header().parse(&[], false),
1574 Ok(Chunk::NeedMoreData(1)),
1575 );
1576 assert!(parser_after_header().parse(&[8], true).is_err());
1577 assert!(parser_after_header().parse(&[8, 1], true).is_err());
1578 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1579 assert_matches!(
1580 parser_after_header().parse(&[8], false),
1581 Ok(Chunk::NeedMoreData(1)),
1582 );
1583 assert_matches!(
1584 parser_after_header().parse(&[8, 1], false),
1585 Ok(Chunk::NeedMoreData(1)),
1586 );
1587 assert_matches!(
1588 parser_after_header().parse(&[8, 2], false),
1589 Ok(Chunk::NeedMoreData(2)),
1590 );
1591 assert_matches!(
1592 parser_after_header().parse(&[8, 1, 1], false),
1593 Ok(Chunk::Parsed {
1594 consumed: 3,
1595 payload: Payload::StartSection { func: 1, .. },
1596 }),
1597 );
1598 assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1599 assert!(parser_after_header().parse(&[8, 0], false).is_err());
1600 }
1601
1602 #[test]
1603 fn end_works() {
1604 assert_matches!(
1605 parser_after_header().parse(&[], true),
1606 Ok(Chunk::Parsed {
1607 consumed: 0,
1608 payload: Payload::End(8),
1609 }),
1610 );
1611 }
1612
1613 #[test]
1614 fn type_section() {
1615 assert!(parser_after_header().parse(&[1], true).is_err());
1616 assert!(parser_after_header().parse(&[1, 0], false).is_err());
1617 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1618 assert_matches!(
1619 parser_after_header().parse(&[1], false),
1620 Ok(Chunk::NeedMoreData(1)),
1621 );
1622 assert_matches!(
1623 parser_after_header().parse(&[1, 1], false),
1624 Ok(Chunk::NeedMoreData(1)),
1625 );
1626 assert_matches!(
1627 parser_after_header().parse(&[1, 1, 1], false),
1628 Ok(Chunk::Parsed {
1629 consumed: 3,
1630 payload: Payload::TypeSection(_),
1631 }),
1632 );
1633 assert_matches!(
1634 parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1635 Ok(Chunk::Parsed {
1636 consumed: 3,
1637 payload: Payload::TypeSection(_),
1638 }),
1639 );
1640 }
1641
1642 #[test]
1643 fn custom_section() {
1644 assert!(parser_after_header().parse(&[0], true).is_err());
1645 assert!(parser_after_header().parse(&[0, 0], false).is_err());
1646 assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1647 assert_matches!(
1648 parser_after_header().parse(&[0, 2, 1], false),
1649 Ok(Chunk::NeedMoreData(1)),
1650 );
1651 assert_custom(
1652 parser_after_header().parse(&[0, 1, 0], false).unwrap(),
1653 3,
1654 "",
1655 11,
1656 b"",
1657 Range { start: 10, end: 11 },
1658 );
1659 assert_custom(
1660 parser_after_header()
1661 .parse(&[0, 2, 1, b'a'], false)
1662 .unwrap(),
1663 4,
1664 "a",
1665 12,
1666 b"",
1667 Range { start: 10, end: 12 },
1668 );
1669 assert_custom(
1670 parser_after_header()
1671 .parse(&[0, 2, 0, b'a'], false)
1672 .unwrap(),
1673 4,
1674 "",
1675 11,
1676 b"a",
1677 Range { start: 10, end: 12 },
1678 );
1679 }
1680
1681 fn assert_custom(
1682 chunk: Chunk<'_>,
1683 expected_consumed: usize,
1684 expected_name: &str,
1685 expected_data_offset: usize,
1686 expected_data: &[u8],
1687 expected_range: Range<usize>,
1688 ) {
1689 let (consumed, s) = match chunk {
1690 Chunk::Parsed {
1691 consumed,
1692 payload: Payload::CustomSection(s),
1693 } => (consumed, s),
1694 _ => panic!("not a custom section payload"),
1695 };
1696 assert_eq!(consumed, expected_consumed);
1697 assert_eq!(s.name(), expected_name);
1698 assert_eq!(s.data_offset(), expected_data_offset);
1699 assert_eq!(s.data(), expected_data);
1700 assert_eq!(s.range(), expected_range);
1701 }
1702
1703 #[test]
1704 fn function_section() {
1705 assert!(parser_after_header().parse(&[10], true).is_err());
1706 assert!(parser_after_header().parse(&[10, 0], true).is_err());
1707 assert!(parser_after_header().parse(&[10, 1], true).is_err());
1708 assert_matches!(
1709 parser_after_header().parse(&[10], false),
1710 Ok(Chunk::NeedMoreData(1))
1711 );
1712 assert_matches!(
1713 parser_after_header().parse(&[10, 1], false),
1714 Ok(Chunk::NeedMoreData(1))
1715 );
1716 let mut p = parser_after_header();
1717 assert_matches!(
1718 p.parse(&[10, 1, 0], false),
1719 Ok(Chunk::Parsed {
1720 consumed: 3,
1721 payload: Payload::CodeSectionStart { count: 0, .. },
1722 }),
1723 );
1724 assert_matches!(
1725 p.parse(&[], true),
1726 Ok(Chunk::Parsed {
1727 consumed: 0,
1728 payload: Payload::End(11),
1729 }),
1730 );
1731 let mut p = parser_after_header();
1732 assert_matches!(
1733 p.parse(&[3, 2, 1, 0], false),
1734 Ok(Chunk::Parsed {
1735 consumed: 4,
1736 payload: Payload::FunctionSection { .. },
1737 }),
1738 );
1739 assert_matches!(
1740 p.parse(&[10, 2, 1, 0], false),
1741 Ok(Chunk::Parsed {
1742 consumed: 3,
1743 payload: Payload::CodeSectionStart { count: 1, .. },
1744 }),
1745 );
1746 assert_matches!(
1747 p.parse(&[0], false),
1748 Ok(Chunk::Parsed {
1749 consumed: 1,
1750 payload: Payload::CodeSectionEntry(_),
1751 }),
1752 );
1753 assert_matches!(
1754 p.parse(&[], true),
1755 Ok(Chunk::Parsed {
1756 consumed: 0,
1757 payload: Payload::End(16),
1758 }),
1759 );
1760
1761 // 1 byte section with 1 function can't read the function body because
1762 // the section is too small
1763 let mut p = parser_after_header();
1764 assert_matches!(
1765 p.parse(&[3, 2, 1, 0], false),
1766 Ok(Chunk::Parsed {
1767 consumed: 4,
1768 payload: Payload::FunctionSection { .. },
1769 }),
1770 );
1771 assert_matches!(
1772 p.parse(&[10, 1, 1], false),
1773 Ok(Chunk::Parsed {
1774 consumed: 3,
1775 payload: Payload::CodeSectionStart { count: 1, .. },
1776 }),
1777 );
1778 assert_eq!(
1779 p.parse(&[0], false).unwrap_err().message(),
1780 "unexpected end-of-file"
1781 );
1782
1783 // section with 2 functions but section is cut off
1784 let mut p = parser_after_header();
1785 assert_matches!(
1786 p.parse(&[3, 2, 2, 0], false),
1787 Ok(Chunk::Parsed {
1788 consumed: 4,
1789 payload: Payload::FunctionSection { .. },
1790 }),
1791 );
1792 assert_matches!(
1793 p.parse(&[10, 2, 2], false),
1794 Ok(Chunk::Parsed {
1795 consumed: 3,
1796 payload: Payload::CodeSectionStart { count: 2, .. },
1797 }),
1798 );
1799 assert_matches!(
1800 p.parse(&[0], false),
1801 Ok(Chunk::Parsed {
1802 consumed: 1,
1803 payload: Payload::CodeSectionEntry(_),
1804 }),
1805 );
1806 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1807 assert_eq!(
1808 p.parse(&[0], false).unwrap_err().message(),
1809 "unexpected end-of-file",
1810 );
1811
1812 // trailing data is bad
1813 let mut p = parser_after_header();
1814 assert_matches!(
1815 p.parse(&[3, 2, 1, 0], false),
1816 Ok(Chunk::Parsed {
1817 consumed: 4,
1818 payload: Payload::FunctionSection { .. },
1819 }),
1820 );
1821 assert_matches!(
1822 p.parse(&[10, 3, 1], false),
1823 Ok(Chunk::Parsed {
1824 consumed: 3,
1825 payload: Payload::CodeSectionStart { count: 1, .. },
1826 }),
1827 );
1828 assert_matches!(
1829 p.parse(&[0], false),
1830 Ok(Chunk::Parsed {
1831 consumed: 1,
1832 payload: Payload::CodeSectionEntry(_),
1833 }),
1834 );
1835 assert_eq!(
1836 p.parse(&[0], false).unwrap_err().message(),
1837 "trailing bytes at end of section",
1838 );
1839 }
1840
1841 #[test]
1842 fn single_module() {
1843 let mut p = parser_after_component_header();
1844 assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1845
1846 // A module that's 8 bytes in length
1847 let mut sub = match p.parse(&[1, 8], false) {
1848 Ok(Chunk::Parsed {
1849 consumed: 2,
1850 payload: Payload::ModuleSection { parser, .. },
1851 }) => parser,
1852 other => panic!("bad parse {other:?}"),
1853 };
1854
1855 // Parse the header of the submodule with the sub-parser.
1856 assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1857 assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1858 assert_matches!(
1859 sub.parse(b"\0asm\x01\0\0\0", false),
1860 Ok(Chunk::Parsed {
1861 consumed: 8,
1862 payload: Payload::Version {
1863 num: 1,
1864 encoding: Encoding::Module,
1865 ..
1866 },
1867 }),
1868 );
1869
1870 // The sub-parser should be byte-limited so the next byte shouldn't get
1871 // consumed, it's intended for the parent parser.
1872 assert_matches!(
1873 sub.parse(&[10], false),
1874 Ok(Chunk::Parsed {
1875 consumed: 0,
1876 payload: Payload::End(18),
1877 }),
1878 );
1879
1880 // The parent parser should now be back to resuming, and we simulate it
1881 // being done with bytes to ensure that it's safely at the end,
1882 // completing the module code section.
1883 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1884 assert_matches!(
1885 p.parse(&[], true),
1886 Ok(Chunk::Parsed {
1887 consumed: 0,
1888 payload: Payload::End(18),
1889 }),
1890 );
1891 }
1892
1893 #[test]
1894 fn nested_section_too_big() {
1895 let mut p = parser_after_component_header();
1896
1897 // A module that's 10 bytes in length
1898 let mut sub = match p.parse(&[1, 10], false) {
1899 Ok(Chunk::Parsed {
1900 consumed: 2,
1901 payload: Payload::ModuleSection { parser, .. },
1902 }) => parser,
1903 other => panic!("bad parse {other:?}"),
1904 };
1905
1906 // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1907 // module.
1908 assert_matches!(
1909 sub.parse(b"\0asm\x01\0\0\0", false),
1910 Ok(Chunk::Parsed {
1911 consumed: 8,
1912 payload: Payload::Version { num: 1, .. },
1913 }),
1914 );
1915
1916 // We can't parse a section which declares its bigger than the outer
1917 // module. This is a custom section, one byte big, with one content byte. The
1918 // content byte, however, lives outside of the parent's module code
1919 // section.
1920 assert_eq!(
1921 sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1922 "section too large",
1923 );
1924 }
1925}