wasmparser/
parser.rs

1#[cfg(feature = "features")]
2use crate::WasmFeatures;
3use crate::binary_reader::WASM_MAGIC_NUMBER;
4use crate::prelude::*;
5use crate::{
6    BinaryReader, BinaryReaderError, CustomSectionReader, DataSectionReader, ElementSectionReader,
7    ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader, GlobalSectionReader,
8    ImportSectionReader, MemorySectionReader, Result, TableSectionReader, TagSectionReader,
9    TypeSectionReader,
10};
11#[cfg(feature = "component-model")]
12use crate::{
13    ComponentCanonicalSectionReader, ComponentExportSectionReader, ComponentImportSectionReader,
14    ComponentInstanceSectionReader, ComponentStartFunction, ComponentTypeSectionReader,
15    CoreTypeSectionReader, InstanceSectionReader, SectionLimited, limits::MAX_WASM_MODULE_SIZE,
16};
17use core::fmt;
18use core::iter;
19use core::ops::Range;
20
21pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
22
23// Note that this started at `0xa` and we're incrementing up from there. When
24// the component model is stabilized this will become 0x1. The changes here are:
25//
26// * [????-??-??] 0xa - original version
27// * [2023-01-05] 0xb - `export` introduces an alias
28// * [2023-02-06] 0xc - `export` has an optional type ascribed to it
29// * [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which
30//                      allows for `(import (interface "...") ...)` syntax.
31pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xd;
32
33const KIND_MODULE: u16 = 0x00;
34const KIND_COMPONENT: u16 = 0x01;
35
36/// The supported encoding formats for the parser.
37#[derive(Debug, Clone, Copy, Eq, PartialEq)]
38pub enum Encoding {
39    /// The encoding format is a WebAssembly module.
40    Module,
41    /// The encoding format is a WebAssembly component.
42    Component,
43}
44
45#[derive(Debug, Clone, Default)]
46struct ParserCounts {
47    function_entries: Option<u32>,
48    code_entries: Option<u32>,
49    data_entries: Option<u32>,
50    data_count: Option<u32>,
51    #[cfg(feature = "component-model")]
52    component_start_sections: bool,
53}
54
55// Section order for WebAssembly modules.
56//
57// Component sections are unordered and allow for duplicates,
58// so this isn't used for components.
59#[derive(Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Debug)]
60pub(crate) enum Order {
61    #[default]
62    Initial,
63    Type,
64    Import,
65    Function,
66    Table,
67    Memory,
68    Tag,
69    Global,
70    Export,
71    Start,
72    Element,
73    DataCount,
74    Code,
75    Data,
76}
77
78/// An incremental parser of a binary WebAssembly module or component.
79///
80/// This type is intended to be used to incrementally parse a WebAssembly module
81/// or component as bytes become available for the module. This can also be used
82/// to parse modules or components that are already entirely resident within memory.
83///
84/// This primary function for a parser is the [`Parser::parse`] function which
85/// will incrementally consume input. You can also use the [`Parser::parse_all`]
86/// function to parse a module or component that is entirely resident in memory.
87#[derive(Debug, Clone)]
88pub struct Parser {
89    state: State,
90    offset: u64,
91    max_size: u64,
92    encoding: Encoding,
93    #[cfg(feature = "features")]
94    features: WasmFeatures,
95    counts: ParserCounts,
96    order: (Order, u64),
97}
98
99#[derive(Debug, Clone)]
100enum State {
101    Header,
102    SectionStart,
103    FunctionBody { remaining: u32, len: u32 },
104}
105
106/// A successful return payload from [`Parser::parse`].
107///
108/// On success one of two possible values can be returned, either that more data
109/// is needed to continue parsing or a chunk of the input was parsed, indicating
110/// how much of it was parsed.
111#[derive(Debug)]
112pub enum Chunk<'a> {
113    /// This can be returned at any time and indicates that more data is needed
114    /// to proceed with parsing. Zero bytes were consumed from the input to
115    /// [`Parser::parse`]. The `u64` value here is a hint as to how many more
116    /// bytes are needed to continue parsing.
117    NeedMoreData(u64),
118
119    /// A chunk was successfully parsed.
120    Parsed {
121        /// This many bytes of the `data` input to [`Parser::parse`] were
122        /// consumed to produce `payload`.
123        consumed: usize,
124        /// The value that we actually parsed.
125        payload: Payload<'a>,
126    },
127}
128
129/// Values that can be parsed from a WebAssembly module or component.
130///
131/// This enumeration is all possible chunks of pieces that can be parsed by a
132/// [`Parser`] from a binary WebAssembly module or component. Note that for many
133/// sections the entire section is parsed all at once, whereas other functions,
134/// like the code section, are parsed incrementally. This is a distinction where some
135/// sections, like the type section, are required to be fully resident in memory
136/// (fully downloaded) before proceeding. Other sections, like the code section,
137/// can be processed in a streaming fashion where each function is extracted
138/// individually so it can possibly be shipped to another thread while you wait
139/// for more functions to get downloaded.
140///
141/// Note that payloads, when returned, do not indicate that the module or component
142/// is valid. For example when you receive a `Payload::TypeSection` the type
143/// section itself has not yet actually been parsed. The reader returned will be
144/// able to parse it, but you'll have to actually iterate the reader to do the
145/// full parse. Each payload returned is intended to be a *window* into the
146/// original `data` passed to [`Parser::parse`] which can be further processed
147/// if necessary.
148#[non_exhaustive]
149pub enum Payload<'a> {
150    /// Indicates the header of a WebAssembly module or component.
151    Version {
152        /// The version number found in the header.
153        num: u16,
154        /// The encoding format being parsed.
155        encoding: Encoding,
156        /// The range of bytes that were parsed to consume the header of the
157        /// module or component. Note that this range is relative to the start
158        /// of the byte stream.
159        range: Range<usize>,
160    },
161
162    /// A module type section was received and the provided reader can be
163    /// used to parse the contents of the type section.
164    TypeSection(TypeSectionReader<'a>),
165    /// A module import section was received and the provided reader can be
166    /// used to parse the contents of the import section.
167    ImportSection(ImportSectionReader<'a>),
168    /// A module function section was received and the provided reader can be
169    /// used to parse the contents of the function section.
170    FunctionSection(FunctionSectionReader<'a>),
171    /// A module table section was received and the provided reader can be
172    /// used to parse the contents of the table section.
173    TableSection(TableSectionReader<'a>),
174    /// A module memory section was received and the provided reader can be
175    /// used to parse the contents of the memory section.
176    MemorySection(MemorySectionReader<'a>),
177    /// A module tag section was received, and the provided reader can be
178    /// used to parse the contents of the tag section.
179    TagSection(TagSectionReader<'a>),
180    /// A module global section was received and the provided reader can be
181    /// used to parse the contents of the global section.
182    GlobalSection(GlobalSectionReader<'a>),
183    /// A module export section was received, and the provided reader can be
184    /// used to parse the contents of the export section.
185    ExportSection(ExportSectionReader<'a>),
186    /// A module start section was received.
187    StartSection {
188        /// The start function index
189        func: u32,
190        /// The range of bytes that specify the `func` field, specified in
191        /// offsets relative to the start of the byte stream.
192        range: Range<usize>,
193    },
194    /// A module element section was received and the provided reader can be
195    /// used to parse the contents of the element section.
196    ElementSection(ElementSectionReader<'a>),
197    /// A module data count section was received.
198    DataCountSection {
199        /// The number of data segments.
200        count: u32,
201        /// The range of bytes that specify the `count` field, specified in
202        /// offsets relative to the start of the byte stream.
203        range: Range<usize>,
204    },
205    /// A module data section was received and the provided reader can be
206    /// used to parse the contents of the data section.
207    DataSection(DataSectionReader<'a>),
208    /// Indicator of the start of the code section of a WebAssembly module.
209    ///
210    /// This entry is returned whenever the code section starts. The `count`
211    /// field indicates how many entries are in this code section. After
212    /// receiving this start marker you're guaranteed that the next `count`
213    /// items will be either `CodeSectionEntry` or an error will be returned.
214    ///
215    /// This, unlike other sections, is intended to be used for streaming the
216    /// contents of the code section. The code section is not required to be
217    /// fully resident in memory when we parse it. Instead a [`Parser`] is
218    /// capable of parsing piece-by-piece of a code section.
219    CodeSectionStart {
220        /// The number of functions in this section.
221        count: u32,
222        /// The range of bytes that represent this section, specified in
223        /// offsets relative to the start of the byte stream.
224        range: Range<usize>,
225        /// The size, in bytes, of the remaining contents of this section.
226        ///
227        /// This can be used in combination with [`Parser::skip_section`]
228        /// where the caller will know how many bytes to skip before feeding
229        /// bytes into `Parser` again.
230        size: u32,
231    },
232    /// An entry of the code section, a function, was parsed from a WebAssembly
233    /// module.
234    ///
235    /// This entry indicates that a function was successfully received from the
236    /// code section, and the payload here is the window into the original input
237    /// where the function resides. Note that the function itself has not been
238    /// parsed, it's only been outlined. You'll need to process the
239    /// `FunctionBody` provided to test whether it parses and/or is valid.
240    CodeSectionEntry(FunctionBody<'a>),
241
242    /// A core module section was received and the provided parser can be
243    /// used to parse the nested module.
244    ///
245    /// This variant is special in that it returns a sub-`Parser`. Upon
246    /// receiving a `ModuleSection` it is expected that the returned
247    /// `Parser` will be used instead of the parent `Parser` until the parse has
248    /// finished. You'll need to feed data into the `Parser` returned until it
249    /// returns `Payload::End`. After that you'll switch back to the parent
250    /// parser to resume parsing the rest of the current component.
251    ///
252    /// Note that binaries will not be parsed correctly if you feed the data for
253    /// a nested module into the parent [`Parser`].
254    #[cfg(feature = "component-model")]
255    ModuleSection {
256        /// The parser for the nested module.
257        parser: Parser,
258        /// The range of bytes that represent the nested module in the
259        /// original byte stream.
260        ///
261        /// Note that, to better support streaming parsing and validation, the
262        /// validator does *not* check that this range is in bounds.
263        unchecked_range: Range<usize>,
264    },
265    /// A core instance section was received and the provided parser can be
266    /// used to parse the contents of the core instance section.
267    ///
268    /// Currently this section is only parsed in a component.
269    #[cfg(feature = "component-model")]
270    InstanceSection(InstanceSectionReader<'a>),
271    /// A core type section was received and the provided parser can be
272    /// used to parse the contents of the core type section.
273    ///
274    /// Currently this section is only parsed in a component.
275    #[cfg(feature = "component-model")]
276    CoreTypeSection(CoreTypeSectionReader<'a>),
277    /// A component section from a WebAssembly component was received and the
278    /// provided parser can be used to parse the nested component.
279    ///
280    /// This variant is special in that it returns a sub-`Parser`. Upon
281    /// receiving a `ComponentSection` it is expected that the returned
282    /// `Parser` will be used instead of the parent `Parser` until the parse has
283    /// finished. You'll need to feed data into the `Parser` returned until it
284    /// returns `Payload::End`. After that you'll switch back to the parent
285    /// parser to resume parsing the rest of the current component.
286    ///
287    /// Note that binaries will not be parsed correctly if you feed the data for
288    /// a nested component into the parent [`Parser`].
289    #[cfg(feature = "component-model")]
290    ComponentSection {
291        /// The parser for the nested component.
292        parser: Parser,
293        /// The range of bytes that represent the nested component in the
294        /// original byte stream.
295        ///
296        /// Note that, to better support streaming parsing and validation, the
297        /// validator does *not* check that this range is in bounds.
298        unchecked_range: Range<usize>,
299    },
300    /// A component instance section was received and the provided reader can be
301    /// used to parse the contents of the component instance section.
302    #[cfg(feature = "component-model")]
303    ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
304    /// A component alias section was received and the provided reader can be
305    /// used to parse the contents of the component alias section.
306    #[cfg(feature = "component-model")]
307    ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
308    /// A component type section was received and the provided reader can be
309    /// used to parse the contents of the component type section.
310    #[cfg(feature = "component-model")]
311    ComponentTypeSection(ComponentTypeSectionReader<'a>),
312    /// A component canonical section was received and the provided reader can be
313    /// used to parse the contents of the component canonical section.
314    #[cfg(feature = "component-model")]
315    ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
316    /// A component start section was received.
317    #[cfg(feature = "component-model")]
318    ComponentStartSection {
319        /// The start function description.
320        start: ComponentStartFunction,
321        /// The range of bytes that specify the `start` field.
322        range: Range<usize>,
323    },
324    /// A component import section was received and the provided reader can be
325    /// used to parse the contents of the component import section.
326    #[cfg(feature = "component-model")]
327    ComponentImportSection(ComponentImportSectionReader<'a>),
328    /// A component export section was received, and the provided reader can be
329    /// used to parse the contents of the component export section.
330    #[cfg(feature = "component-model")]
331    ComponentExportSection(ComponentExportSectionReader<'a>),
332
333    /// A module or component custom section was received.
334    CustomSection(CustomSectionReader<'a>),
335
336    /// An unknown section was found.
337    ///
338    /// This variant is returned for all unknown sections encountered. This
339    /// likely wants to be interpreted as an error by consumers of the parser,
340    /// but this can also be used to parse sections currently unsupported by
341    /// the parser.
342    UnknownSection {
343        /// The 8-bit identifier for this section.
344        id: u8,
345        /// The contents of this section.
346        contents: &'a [u8],
347        /// The range of bytes, relative to the start of the original data
348        /// stream, that the contents of this section reside in.
349        range: Range<usize>,
350    },
351
352    /// The end of the WebAssembly module or component was reached.
353    ///
354    /// The value is the offset in the input byte stream where the end
355    /// was reached.
356    End(usize),
357}
358
359const CUSTOM_SECTION: u8 = 0;
360const TYPE_SECTION: u8 = 1;
361const IMPORT_SECTION: u8 = 2;
362const FUNCTION_SECTION: u8 = 3;
363const TABLE_SECTION: u8 = 4;
364const MEMORY_SECTION: u8 = 5;
365const GLOBAL_SECTION: u8 = 6;
366const EXPORT_SECTION: u8 = 7;
367const START_SECTION: u8 = 8;
368const ELEMENT_SECTION: u8 = 9;
369const CODE_SECTION: u8 = 10;
370const DATA_SECTION: u8 = 11;
371const DATA_COUNT_SECTION: u8 = 12;
372const TAG_SECTION: u8 = 13;
373
374#[cfg(feature = "component-model")]
375const COMPONENT_MODULE_SECTION: u8 = 1;
376#[cfg(feature = "component-model")]
377const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
378#[cfg(feature = "component-model")]
379const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
380#[cfg(feature = "component-model")]
381const COMPONENT_SECTION: u8 = 4;
382#[cfg(feature = "component-model")]
383const COMPONENT_INSTANCE_SECTION: u8 = 5;
384#[cfg(feature = "component-model")]
385const COMPONENT_ALIAS_SECTION: u8 = 6;
386#[cfg(feature = "component-model")]
387const COMPONENT_TYPE_SECTION: u8 = 7;
388#[cfg(feature = "component-model")]
389const COMPONENT_CANONICAL_SECTION: u8 = 8;
390#[cfg(feature = "component-model")]
391const COMPONENT_START_SECTION: u8 = 9;
392#[cfg(feature = "component-model")]
393const COMPONENT_IMPORT_SECTION: u8 = 10;
394#[cfg(feature = "component-model")]
395const COMPONENT_EXPORT_SECTION: u8 = 11;
396
397impl Parser {
398    /// Creates a new parser.
399    ///
400    /// Reports errors and ranges relative to `offset` provided, where `offset`
401    /// is some logical offset within the input stream that we're parsing.
402    pub fn new(offset: u64) -> Parser {
403        Parser {
404            state: State::Header,
405            offset,
406            max_size: u64::MAX,
407            // Assume the encoding is a module until we know otherwise
408            encoding: Encoding::Module,
409            #[cfg(feature = "features")]
410            features: WasmFeatures::all(),
411            counts: ParserCounts::default(),
412            order: (Order::default(), offset),
413        }
414    }
415
416    /// Tests whether `bytes` looks like a core WebAssembly module.
417    ///
418    /// This will inspect the first 8 bytes of `bytes` and return `true` if it
419    /// starts with the standard core WebAssembly header.
420    pub fn is_core_wasm(bytes: &[u8]) -> bool {
421        const HEADER: [u8; 8] = [
422            WASM_MAGIC_NUMBER[0],
423            WASM_MAGIC_NUMBER[1],
424            WASM_MAGIC_NUMBER[2],
425            WASM_MAGIC_NUMBER[3],
426            WASM_MODULE_VERSION.to_le_bytes()[0],
427            WASM_MODULE_VERSION.to_le_bytes()[1],
428            KIND_MODULE.to_le_bytes()[0],
429            KIND_MODULE.to_le_bytes()[1],
430        ];
431        bytes.starts_with(&HEADER)
432    }
433
434    /// Tests whether `bytes` looks like a WebAssembly component.
435    ///
436    /// This will inspect the first 8 bytes of `bytes` and return `true` if it
437    /// starts with the standard WebAssembly component header.
438    pub fn is_component(bytes: &[u8]) -> bool {
439        const HEADER: [u8; 8] = [
440            WASM_MAGIC_NUMBER[0],
441            WASM_MAGIC_NUMBER[1],
442            WASM_MAGIC_NUMBER[2],
443            WASM_MAGIC_NUMBER[3],
444            WASM_COMPONENT_VERSION.to_le_bytes()[0],
445            WASM_COMPONENT_VERSION.to_le_bytes()[1],
446            KIND_COMPONENT.to_le_bytes()[0],
447            KIND_COMPONENT.to_le_bytes()[1],
448        ];
449        bytes.starts_with(&HEADER)
450    }
451
452    /// Returns the currently active set of wasm features that this parser is
453    /// using while parsing.
454    ///
455    /// The default set of features is [`WasmFeatures::all()`] for new parsers.
456    ///
457    /// For more information see [`BinaryReader::new`].
458    #[cfg(feature = "features")]
459    pub fn features(&self) -> WasmFeatures {
460        self.features
461    }
462
463    /// Sets the wasm features active while parsing to the `features` specified.
464    ///
465    /// The default set of features is [`WasmFeatures::all()`] for new parsers.
466    ///
467    /// For more information see [`BinaryReader::new`].
468    #[cfg(feature = "features")]
469    pub fn set_features(&mut self, features: WasmFeatures) {
470        self.features = features;
471    }
472
473    /// Returns the original offset that this parser is currently at.
474    pub fn offset(&self) -> u64 {
475        self.offset
476    }
477
478    /// Attempts to parse a chunk of data.
479    ///
480    /// This method will attempt to parse the next incremental portion of a
481    /// WebAssembly binary. Data available for the module or component is
482    /// provided as `data`, and the data can be incomplete if more data has yet
483    /// to arrive. The `eof` flag indicates whether more data will ever be received.
484    ///
485    /// There are two ways parsing can succeed with this method:
486    ///
487    /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
488    ///   in `data` to parse a payload. The caller needs to wait for more data to
489    ///   be available in this situation before calling this method again. It is
490    ///   guaranteed that this is only returned if `eof` is `false`.
491    ///
492    /// * `Chunk::Parsed` - this indicates that a chunk of the input was
493    ///   successfully parsed. The payload is available in this variant of what
494    ///   was parsed, and this also indicates how many bytes of `data` was
495    ///   consumed. It's expected that the caller will not provide these bytes
496    ///   back to the [`Parser`] again.
497    ///
498    /// Note that all `Chunk` return values are connected, with a lifetime, to
499    /// the input buffer. Each parsed chunk borrows the input buffer and is a
500    /// view into it for successfully parsed chunks.
501    ///
502    /// It is expected that you'll call this method until `Payload::End` is
503    /// reached, at which point you're guaranteed that the parse has completed.
504    /// Note that complete parsing, for the top-level module or component,
505    /// implies that `data` is empty and `eof` is `true`.
506    ///
507    /// # Errors
508    ///
509    /// Parse errors are returned as an `Err`. Errors can happen when the
510    /// structure of the data is unexpected or if sections are too large for
511    /// example. Note that errors are not returned for malformed *contents* of
512    /// sections here. Sections are generally not individually parsed and each
513    /// returned [`Payload`] needs to be iterated over further to detect all
514    /// errors.
515    ///
516    /// # Examples
517    ///
518    /// An example of reading a wasm file from a stream (`std::io::Read`) and
519    /// incrementally parsing it.
520    ///
521    /// ```
522    /// use std::io::Read;
523    /// use anyhow::Result;
524    /// use wasmparser::{Parser, Chunk, Payload::*};
525    ///
526    /// fn parse(mut reader: impl Read) -> Result<()> {
527    ///     let mut buf = Vec::new();
528    ///     let mut cur = Parser::new(0);
529    ///     let mut eof = false;
530    ///     let mut stack = Vec::new();
531    ///
532    ///     loop {
533    ///         let (payload, consumed) = match cur.parse(&buf, eof)? {
534    ///             Chunk::NeedMoreData(hint) => {
535    ///                 assert!(!eof); // otherwise an error would be returned
536    ///
537    ///                 // Use the hint to preallocate more space, then read
538    ///                 // some more data into our buffer.
539    ///                 //
540    ///                 // Note that the buffer management here is not ideal,
541    ///                 // but it's compact enough to fit in an example!
542    ///                 let len = buf.len();
543    ///                 buf.extend((0..hint).map(|_| 0u8));
544    ///                 let n = reader.read(&mut buf[len..])?;
545    ///                 buf.truncate(len + n);
546    ///                 eof = n == 0;
547    ///                 continue;
548    ///             }
549    ///
550    ///             Chunk::Parsed { consumed, payload } => (payload, consumed),
551    ///         };
552    ///
553    ///         match payload {
554    ///             // Sections for WebAssembly modules
555    ///             Version { .. } => { /* ... */ }
556    ///             TypeSection(_) => { /* ... */ }
557    ///             ImportSection(_) => { /* ... */ }
558    ///             FunctionSection(_) => { /* ... */ }
559    ///             TableSection(_) => { /* ... */ }
560    ///             MemorySection(_) => { /* ... */ }
561    ///             TagSection(_) => { /* ... */ }
562    ///             GlobalSection(_) => { /* ... */ }
563    ///             ExportSection(_) => { /* ... */ }
564    ///             StartSection { .. } => { /* ... */ }
565    ///             ElementSection(_) => { /* ... */ }
566    ///             DataCountSection { .. } => { /* ... */ }
567    ///             DataSection(_) => { /* ... */ }
568    ///
569    ///             // Here we know how many functions we'll be receiving as
570    ///             // `CodeSectionEntry`, so we can prepare for that, and
571    ///             // afterwards we can parse and handle each function
572    ///             // individually.
573    ///             CodeSectionStart { .. } => { /* ... */ }
574    ///             CodeSectionEntry(body) => {
575    ///                 // here we can iterate over `body` to parse the function
576    ///                 // and its locals
577    ///             }
578    ///
579    ///             // Sections for WebAssembly components
580    ///             InstanceSection(_) => { /* ... */ }
581    ///             CoreTypeSection(_) => { /* ... */ }
582    ///             ComponentInstanceSection(_) => { /* ... */ }
583    ///             ComponentAliasSection(_) => { /* ... */ }
584    ///             ComponentTypeSection(_) => { /* ... */ }
585    ///             ComponentCanonicalSection(_) => { /* ... */ }
586    ///             ComponentStartSection { .. } => { /* ... */ }
587    ///             ComponentImportSection(_) => { /* ... */ }
588    ///             ComponentExportSection(_) => { /* ... */ }
589    ///
590    ///             ModuleSection { parser, .. }
591    ///             | ComponentSection { parser, .. } => {
592    ///                 stack.push(cur.clone());
593    ///                 cur = parser.clone();
594    ///             }
595    ///
596    ///             CustomSection(_) => { /* ... */ }
597    ///
598    ///             // Once we've reached the end of a parser we either resume
599    ///             // at the parent parser or we break out of the loop because
600    ///             // we're done.
601    ///             End(_) => {
602    ///                 if let Some(parent_parser) = stack.pop() {
603    ///                     cur = parent_parser;
604    ///                 } else {
605    ///                     break;
606    ///                 }
607    ///             }
608    ///
609    ///             // most likely you'd return an error here
610    ///             _ => { /* ... */ }
611    ///         }
612    ///
613    ///         // once we're done processing the payload we can forget the
614    ///         // original.
615    ///         buf.drain(..consumed);
616    ///     }
617    ///
618    ///     Ok(())
619    /// }
620    ///
621    /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
622    /// ```
623    pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
624        let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
625            (&data[..(self.max_size as usize)], true)
626        } else {
627            (data, eof)
628        };
629        // TODO: thread through `offset: u64` to `BinaryReader`, remove
630        // the cast here.
631        let starting_offset = self.offset as usize;
632        let mut reader = BinaryReader::new(data, starting_offset);
633        #[cfg(feature = "features")]
634        {
635            reader.set_features(self.features);
636        }
637        match self.parse_reader(&mut reader, eof) {
638            Ok(payload) => {
639                // Be sure to update our offset with how far we got in the
640                // reader
641                let consumed = reader.original_position() - starting_offset;
642                self.offset += usize_to_u64(consumed);
643                self.max_size -= usize_to_u64(consumed);
644                Ok(Chunk::Parsed {
645                    consumed: consumed,
646                    payload,
647                })
648            }
649            Err(e) => {
650                // If we're at EOF then there's no way we can recover from any
651                // error, so continue to propagate it.
652                if eof {
653                    return Err(e);
654                }
655
656                // If our error doesn't look like it can be resolved with more
657                // data being pulled down, then propagate it, otherwise switch
658                // the error to "feed me please"
659                match e.inner.needed_hint {
660                    Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
661                    None => Err(e),
662                }
663            }
664        }
665    }
666
667    fn update_order(&mut self, order: Order, pos: usize) -> Result<()> {
668        let pos_u64 = usize_to_u64(pos);
669        if self.encoding == Encoding::Module {
670            match self.order {
671                (last_order, last_pos) if last_order >= order && last_pos < pos_u64 => {
672                    bail!(pos, "section out of order")
673                }
674                _ => (),
675            }
676        }
677
678        self.order = (order, pos_u64);
679
680        Ok(())
681    }
682
683    fn parse_reader<'a>(
684        &mut self,
685        reader: &mut BinaryReader<'a>,
686        eof: bool,
687    ) -> Result<Payload<'a>> {
688        use Payload::*;
689
690        match self.state {
691            State::Header => {
692                let start = reader.original_position();
693                let header_version = reader.read_header_version()?;
694                let num = header_version as u16;
695                self.encoding = match (num, (header_version >> 16) as u16) {
696                    (WASM_MODULE_VERSION, KIND_MODULE) => Encoding::Module,
697                    (WASM_COMPONENT_VERSION, KIND_COMPONENT) => Encoding::Component,
698                    _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
699                };
700                self.state = State::SectionStart;
701                Ok(Version {
702                    num,
703                    encoding: self.encoding,
704                    range: start..reader.original_position(),
705                })
706            }
707            State::SectionStart => {
708                // If we're at eof and there are no bytes in our buffer, then
709                // that means we reached the end of the data since it's
710                // just a bunch of sections concatenated after the header.
711                if eof && reader.bytes_remaining() == 0 {
712                    self.check_function_code_counts(reader.original_position())?;
713                    self.check_data_count(reader.original_position())?;
714                    return Ok(Payload::End(reader.original_position()));
715                }
716
717                // Corrupted binaries containing multiple modules or
718                // components will fail because a section can never start with
719                // the magic number: 0 is custom section, 'a' is section len
720                // of 97, `s` is section name string len of 115, at which
721                // point validation will fail because name string is bigger
722                // than section. Report a better error instead:
723                match reader.peek_bytes(4) {
724                    Ok(peek) if peek == WASM_MAGIC_NUMBER => {
725                        return Err(BinaryReaderError::new(
726                            "expected section, got wasm magic number",
727                            reader.original_position(),
728                        ));
729                    }
730                    _ => {}
731                }
732
733                let id_pos = reader.original_position();
734                let id = reader.read_u8()?;
735                if id & 0x80 != 0 {
736                    return Err(BinaryReaderError::new("malformed section id", id_pos));
737                }
738                let len_pos = reader.original_position();
739                let mut len = reader.read_var_u32()?;
740
741                // Test to make sure that this section actually fits within
742                // `Parser::max_size`. This doesn't matter for top-level modules
743                // but it is required for nested modules/components to correctly ensure
744                // that all sections live entirely within their section of the
745                // file.
746                let consumed = reader.original_position() - id_pos;
747                let section_overflow = self
748                    .max_size
749                    .checked_sub(usize_to_u64(consumed))
750                    .and_then(|s| s.checked_sub(len.into()))
751                    .is_none();
752                if section_overflow {
753                    return Err(BinaryReaderError::new("section too large", len_pos));
754                }
755
756                match (self.encoding, id) {
757                    // Custom sections for both modules and components.
758                    (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
759
760                    // Module sections
761                    (Encoding::Module, TYPE_SECTION) => {
762                        self.update_order(Order::Type, reader.original_position())?;
763                        section(reader, len, TypeSectionReader::new, TypeSection)
764                    }
765                    (Encoding::Module, IMPORT_SECTION) => {
766                        self.update_order(Order::Import, reader.original_position())?;
767                        section(reader, len, ImportSectionReader::new, ImportSection)
768                    }
769                    (Encoding::Module, FUNCTION_SECTION) => {
770                        self.update_order(Order::Function, reader.original_position())?;
771                        let s = section(reader, len, FunctionSectionReader::new, FunctionSection)?;
772                        match &s {
773                            FunctionSection(f) => self.counts.function_entries = Some(f.count()),
774                            _ => unreachable!(),
775                        }
776                        Ok(s)
777                    }
778                    (Encoding::Module, TABLE_SECTION) => {
779                        self.update_order(Order::Table, reader.original_position())?;
780                        section(reader, len, TableSectionReader::new, TableSection)
781                    }
782                    (Encoding::Module, MEMORY_SECTION) => {
783                        self.update_order(Order::Memory, reader.original_position())?;
784                        section(reader, len, MemorySectionReader::new, MemorySection)
785                    }
786                    (Encoding::Module, GLOBAL_SECTION) => {
787                        self.update_order(Order::Global, reader.original_position())?;
788                        section(reader, len, GlobalSectionReader::new, GlobalSection)
789                    }
790                    (Encoding::Module, EXPORT_SECTION) => {
791                        self.update_order(Order::Export, reader.original_position())?;
792                        section(reader, len, ExportSectionReader::new, ExportSection)
793                    }
794                    (Encoding::Module, START_SECTION) => {
795                        self.update_order(Order::Start, reader.original_position())?;
796                        let (func, range) = single_item(reader, len, "start")?;
797                        Ok(StartSection { func, range })
798                    }
799                    (Encoding::Module, ELEMENT_SECTION) => {
800                        self.update_order(Order::Element, reader.original_position())?;
801                        section(reader, len, ElementSectionReader::new, ElementSection)
802                    }
803                    (Encoding::Module, CODE_SECTION) => {
804                        self.update_order(Order::Code, reader.original_position())?;
805                        let start = reader.original_position();
806                        let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
807                        self.counts.code_entries = Some(count);
808                        self.check_function_code_counts(start)?;
809                        let range = start..reader.original_position() + len as usize;
810                        self.state = State::FunctionBody {
811                            remaining: count,
812                            len,
813                        };
814                        Ok(CodeSectionStart {
815                            count,
816                            range,
817                            size: len,
818                        })
819                    }
820                    (Encoding::Module, DATA_SECTION) => {
821                        self.update_order(Order::Data, reader.original_position())?;
822                        let s = section(reader, len, DataSectionReader::new, DataSection)?;
823                        match &s {
824                            DataSection(d) => self.counts.data_entries = Some(d.count()),
825                            _ => unreachable!(),
826                        }
827                        self.check_data_count(reader.original_position())?;
828                        Ok(s)
829                    }
830                    (Encoding::Module, DATA_COUNT_SECTION) => {
831                        self.update_order(Order::DataCount, reader.original_position())?;
832                        let (count, range) = single_item(reader, len, "data count")?;
833                        self.counts.data_count = Some(count);
834                        Ok(DataCountSection { count, range })
835                    }
836                    (Encoding::Module, TAG_SECTION) => {
837                        self.update_order(Order::Tag, reader.original_position())?;
838                        section(reader, len, TagSectionReader::new, TagSection)
839                    }
840
841                    // Component sections
842                    #[cfg(feature = "component-model")]
843                    (Encoding::Component, COMPONENT_MODULE_SECTION)
844                    | (Encoding::Component, COMPONENT_SECTION) => {
845                        if len as usize > MAX_WASM_MODULE_SIZE {
846                            bail!(
847                                len_pos,
848                                "{} section is too large",
849                                if id == 1 { "module" } else { "component " }
850                            );
851                        }
852
853                        let range = reader.original_position()
854                            ..reader.original_position() + usize::try_from(len).unwrap();
855                        self.max_size -= u64::from(len);
856                        self.offset += u64::from(len);
857                        let mut parser = Parser::new(usize_to_u64(reader.original_position()));
858                        #[cfg(feature = "features")]
859                        {
860                            parser.features = self.features;
861                        }
862                        parser.max_size = u64::from(len);
863
864                        Ok(match id {
865                            1 => ModuleSection {
866                                parser,
867                                unchecked_range: range,
868                            },
869                            4 => ComponentSection {
870                                parser,
871                                unchecked_range: range,
872                            },
873                            _ => unreachable!(),
874                        })
875                    }
876                    #[cfg(feature = "component-model")]
877                    (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
878                        section(reader, len, InstanceSectionReader::new, InstanceSection)
879                    }
880                    #[cfg(feature = "component-model")]
881                    (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
882                        section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
883                    }
884                    #[cfg(feature = "component-model")]
885                    (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
886                        reader,
887                        len,
888                        ComponentInstanceSectionReader::new,
889                        ComponentInstanceSection,
890                    ),
891                    #[cfg(feature = "component-model")]
892                    (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
893                        section(reader, len, SectionLimited::new, ComponentAliasSection)
894                    }
895                    #[cfg(feature = "component-model")]
896                    (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
897                        reader,
898                        len,
899                        ComponentTypeSectionReader::new,
900                        ComponentTypeSection,
901                    ),
902                    #[cfg(feature = "component-model")]
903                    (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
904                        reader,
905                        len,
906                        ComponentCanonicalSectionReader::new,
907                        ComponentCanonicalSection,
908                    ),
909                    #[cfg(feature = "component-model")]
910                    (Encoding::Component, COMPONENT_START_SECTION) => {
911                        match self.counts.component_start_sections {
912                            false => self.counts.component_start_sections = true,
913                            true => {
914                                bail!(
915                                    reader.original_position(),
916                                    "component cannot have more than one start function"
917                                )
918                            }
919                        }
920                        let (start, range) = single_item(reader, len, "component start")?;
921                        Ok(ComponentStartSection { start, range })
922                    }
923                    #[cfg(feature = "component-model")]
924                    (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
925                        reader,
926                        len,
927                        ComponentImportSectionReader::new,
928                        ComponentImportSection,
929                    ),
930                    #[cfg(feature = "component-model")]
931                    (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
932                        reader,
933                        len,
934                        ComponentExportSectionReader::new,
935                        ComponentExportSection,
936                    ),
937                    (_, id) => {
938                        let offset = reader.original_position();
939                        let contents = reader.read_bytes(len as usize)?;
940                        let range = offset..offset + len as usize;
941                        Ok(UnknownSection {
942                            id,
943                            contents,
944                            range,
945                        })
946                    }
947                }
948            }
949
950            // Once we hit 0 remaining incrementally parsed items, with 0
951            // remaining bytes in each section, we're done and can switch back
952            // to parsing sections.
953            State::FunctionBody {
954                remaining: 0,
955                len: 0,
956            } => {
957                self.state = State::SectionStart;
958                self.parse_reader(reader, eof)
959            }
960
961            // ... otherwise trailing bytes with no remaining entries in these
962            // sections indicates an error.
963            State::FunctionBody { remaining: 0, len } => {
964                debug_assert!(len > 0);
965                let offset = reader.original_position();
966                Err(BinaryReaderError::new(
967                    "trailing bytes at end of section",
968                    offset,
969                ))
970            }
971
972            // Functions are relatively easy to parse when we know there's at
973            // least one remaining and at least one byte available to read
974            // things.
975            //
976            // We use the remaining length try to read a u32 size of the
977            // function, and using that size we require the entire function be
978            // resident in memory. This means that we're reading whole chunks of
979            // functions at a time.
980            //
981            // Limiting via `Parser::max_size` (nested parsing) happens above in
982            // `fn parse`, and limiting by our section size happens via
983            // `delimited`. Actual parsing of the function body is delegated to
984            // the caller to iterate over the `FunctionBody` structure.
985            State::FunctionBody { remaining, mut len } => {
986                let body = delimited(reader, &mut len, |r| {
987                    Ok(FunctionBody::new(r.read_reader()?))
988                })?;
989                self.state = State::FunctionBody {
990                    remaining: remaining - 1,
991                    len,
992                };
993                Ok(CodeSectionEntry(body))
994            }
995        }
996    }
997
998    /// Convenience function that can be used to parse a module or component
999    /// that is entirely resident in memory.
1000    ///
1001    /// This function will parse the `data` provided as a WebAssembly module
1002    /// or component.
1003    ///
1004    /// Note that when this function yields sections that provide parsers,
1005    /// no further action is required for those sections as payloads from
1006    /// those parsers will be automatically returned.
1007    ///
1008    /// # Examples
1009    ///
1010    /// An example of reading a wasm file from a stream (`std::io::Read`) into
1011    /// a buffer and then parsing it.
1012    ///
1013    /// ```
1014    /// use std::io::Read;
1015    /// use anyhow::Result;
1016    /// use wasmparser::{Parser, Chunk, Payload::*};
1017    ///
1018    /// fn parse(mut reader: impl Read) -> Result<()> {
1019    ///     let mut buf = Vec::new();
1020    ///     reader.read_to_end(&mut buf)?;
1021    ///     let parser = Parser::new(0);
1022    ///
1023    ///     for payload in parser.parse_all(&buf) {
1024    ///         match payload? {
1025    ///             // Sections for WebAssembly modules
1026    ///             Version { .. } => { /* ... */ }
1027    ///             TypeSection(_) => { /* ... */ }
1028    ///             ImportSection(_) => { /* ... */ }
1029    ///             FunctionSection(_) => { /* ... */ }
1030    ///             TableSection(_) => { /* ... */ }
1031    ///             MemorySection(_) => { /* ... */ }
1032    ///             TagSection(_) => { /* ... */ }
1033    ///             GlobalSection(_) => { /* ... */ }
1034    ///             ExportSection(_) => { /* ... */ }
1035    ///             StartSection { .. } => { /* ... */ }
1036    ///             ElementSection(_) => { /* ... */ }
1037    ///             DataCountSection { .. } => { /* ... */ }
1038    ///             DataSection(_) => { /* ... */ }
1039    ///
1040    ///             // Here we know how many functions we'll be receiving as
1041    ///             // `CodeSectionEntry`, so we can prepare for that, and
1042    ///             // afterwards we can parse and handle each function
1043    ///             // individually.
1044    ///             CodeSectionStart { .. } => { /* ... */ }
1045    ///             CodeSectionEntry(body) => {
1046    ///                 // here we can iterate over `body` to parse the function
1047    ///                 // and its locals
1048    ///             }
1049    ///
1050    ///             // Sections for WebAssembly components
1051    ///             ModuleSection { .. } => { /* ... */ }
1052    ///             InstanceSection(_) => { /* ... */ }
1053    ///             CoreTypeSection(_) => { /* ... */ }
1054    ///             ComponentSection { .. } => { /* ... */ }
1055    ///             ComponentInstanceSection(_) => { /* ... */ }
1056    ///             ComponentAliasSection(_) => { /* ... */ }
1057    ///             ComponentTypeSection(_) => { /* ... */ }
1058    ///             ComponentCanonicalSection(_) => { /* ... */ }
1059    ///             ComponentStartSection { .. } => { /* ... */ }
1060    ///             ComponentImportSection(_) => { /* ... */ }
1061    ///             ComponentExportSection(_) => { /* ... */ }
1062    ///
1063    ///             CustomSection(_) => { /* ... */ }
1064    ///
1065    ///             // Once we've reached the end of a parser we either resume
1066    ///             // at the parent parser or the payload iterator is at its
1067    ///             // end and we're done.
1068    ///             End(_) => {}
1069    ///
1070    ///             // most likely you'd return an error here, but if you want
1071    ///             // you can also inspect the raw contents of unknown sections
1072    ///             other => {
1073    ///                 match other.as_section() {
1074    ///                     Some((id, range)) => { /* ... */ }
1075    ///                     None => { /* ... */ }
1076    ///                 }
1077    ///             }
1078    ///         }
1079    ///     }
1080    ///
1081    ///     Ok(())
1082    /// }
1083    ///
1084    /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
1085    /// ```
1086    pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload<'_>>> {
1087        let mut stack = Vec::new();
1088        let mut cur = self;
1089        let mut done = false;
1090        iter::from_fn(move || {
1091            if done {
1092                return None;
1093            }
1094            let payload = match cur.parse(data, true) {
1095                // Propagate all errors
1096                Err(e) => {
1097                    done = true;
1098                    return Some(Err(e));
1099                }
1100
1101                // This isn't possible because `eof` is always true.
1102                Ok(Chunk::NeedMoreData(_)) => unreachable!(),
1103
1104                Ok(Chunk::Parsed { payload, consumed }) => {
1105                    data = &data[consumed..];
1106                    payload
1107                }
1108            };
1109
1110            match &payload {
1111                #[cfg(feature = "component-model")]
1112                Payload::ModuleSection { parser, .. }
1113                | Payload::ComponentSection { parser, .. } => {
1114                    stack.push(cur.clone());
1115                    cur = parser.clone();
1116                }
1117                Payload::End(_) => match stack.pop() {
1118                    Some(p) => cur = p,
1119                    None => done = true,
1120                },
1121
1122                _ => {}
1123            }
1124
1125            Some(Ok(payload))
1126        })
1127    }
1128
1129    /// Skip parsing the code section entirely.
1130    ///
1131    /// This function can be used to indicate, after receiving
1132    /// `CodeSectionStart`, that the section will not be parsed.
1133    ///
1134    /// The caller will be responsible for skipping `size` bytes (found in the
1135    /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
1136    /// after the `size` bytes have been skipped.
1137    ///
1138    /// # Panics
1139    ///
1140    /// This function will panic if the parser is not in a state where it's
1141    /// parsing the code section.
1142    ///
1143    /// # Examples
1144    ///
1145    /// ```
1146    /// use wasmparser::{Result, Parser, Chunk, Payload::*};
1147    /// use core::ops::Range;
1148    ///
1149    /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
1150    ///     let mut parser = Parser::new(0);
1151    ///     loop {
1152    ///         let payload = match parser.parse(wasm, true)? {
1153    ///             Chunk::Parsed { consumed, payload } => {
1154    ///                 wasm = &wasm[consumed..];
1155    ///                 payload
1156    ///             }
1157    ///             // this state isn't possible with `eof = true`
1158    ///             Chunk::NeedMoreData(_) => unreachable!(),
1159    ///         };
1160    ///         match payload {
1161    ///             TypeSection(s) => print_range("type section", &s.range()),
1162    ///             ImportSection(s) => print_range("import section", &s.range()),
1163    ///             // .. other sections
1164    ///
1165    ///             // Print the range of the code section we see, but don't
1166    ///             // actually iterate over each individual function.
1167    ///             CodeSectionStart { range, size, .. } => {
1168    ///                 print_range("code section", &range);
1169    ///                 parser.skip_section();
1170    ///                 wasm = &wasm[size as usize..];
1171    ///             }
1172    ///             End(_) => break,
1173    ///             _ => {}
1174    ///         }
1175    ///     }
1176    ///     Ok(())
1177    /// }
1178    ///
1179    /// fn print_range(section: &str, range: &Range<usize>) {
1180    ///     println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
1181    /// }
1182    /// ```
1183    pub fn skip_section(&mut self) {
1184        let skip = match self.state {
1185            State::FunctionBody { remaining: _, len } => len,
1186            _ => panic!("wrong state to call `skip_section`"),
1187        };
1188        self.offset += u64::from(skip);
1189        self.max_size -= u64::from(skip);
1190        self.state = State::SectionStart;
1191    }
1192
1193    fn check_function_code_counts(&self, pos: usize) -> Result<()> {
1194        match (self.counts.function_entries, self.counts.code_entries) {
1195            (Some(n), Some(m)) if n != m => {
1196                bail!(pos, "function and code section have inconsistent lengths")
1197            }
1198            (Some(n), None) if n > 0 => bail!(
1199                pos,
1200                "function section has non-zero count but code section is absent"
1201            ),
1202            (None, Some(m)) if m > 0 => bail!(
1203                pos,
1204                "function section is absent but code section has non-zero count"
1205            ),
1206            _ => Ok(()),
1207        }
1208    }
1209
1210    fn check_data_count(&self, pos: usize) -> Result<()> {
1211        match (self.counts.data_count, self.counts.data_entries) {
1212            (Some(n), Some(m)) if n != m => {
1213                bail!(pos, "data count and data section have inconsistent lengths")
1214            }
1215            (Some(n), None) if n > 0 => {
1216                bail!(pos, "data count is non-zero but data section is absent")
1217            }
1218            _ => Ok(()),
1219        }
1220    }
1221}
1222
1223fn usize_to_u64(a: usize) -> u64 {
1224    a.try_into().unwrap()
1225}
1226
1227/// Parses an entire section resident in memory into a `Payload`.
1228///
1229/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
1230/// to construct the section to return.
1231fn section<'a, T>(
1232    reader: &mut BinaryReader<'a>,
1233    len: u32,
1234    ctor: fn(BinaryReader<'a>) -> Result<T>,
1235    variant: fn(T) -> Payload<'a>,
1236) -> Result<Payload<'a>> {
1237    let reader = reader.skip(|r| {
1238        r.read_bytes(len as usize)?;
1239        Ok(())
1240    })?;
1241    // clear the hint for "need this many more bytes" here because we already
1242    // read all the bytes, so it's not possible to read more bytes if this
1243    // fails.
1244    let reader = ctor(reader).map_err(clear_hint)?;
1245    Ok(variant(reader))
1246}
1247
1248/// Reads a section that is represented by a single uleb-encoded `u32`.
1249fn single_item<'a, T>(
1250    reader: &mut BinaryReader<'a>,
1251    len: u32,
1252    desc: &str,
1253) -> Result<(T, Range<usize>)>
1254where
1255    T: FromReader<'a>,
1256{
1257    let range = reader.original_position()..reader.original_position() + len as usize;
1258    let mut content = reader.skip(|r| {
1259        r.read_bytes(len as usize)?;
1260        Ok(())
1261    })?;
1262    // We can't recover from "unexpected eof" here because our entire section is
1263    // already resident in memory, so clear the hint for how many more bytes are
1264    // expected.
1265    let ret = content.read().map_err(clear_hint)?;
1266    if !content.eof() {
1267        bail!(
1268            content.original_position(),
1269            "unexpected content in the {desc} section",
1270        );
1271    }
1272    Ok((ret, range))
1273}
1274
1275/// Attempts to parse using `f`.
1276///
1277/// This will update `*len` with the number of bytes consumed, and it will cause
1278/// a failure to be returned instead of the number of bytes consumed exceeds
1279/// what `*len` currently is.
1280fn delimited<'a, T>(
1281    reader: &mut BinaryReader<'a>,
1282    len: &mut u32,
1283    f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
1284) -> Result<T> {
1285    let start = reader.original_position();
1286    let ret = f(reader)?;
1287    *len = match (reader.original_position() - start)
1288        .try_into()
1289        .ok()
1290        .and_then(|i| len.checked_sub(i))
1291    {
1292        Some(i) => i,
1293        None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
1294    };
1295    Ok(ret)
1296}
1297
1298impl Default for Parser {
1299    fn default() -> Parser {
1300        Parser::new(0)
1301    }
1302}
1303
1304impl Payload<'_> {
1305    /// If this `Payload` represents a section in the original wasm module then
1306    /// the section's id and range within the original wasm binary are returned.
1307    ///
1308    /// Not all payloads refer to entire sections, such as the `Version` and
1309    /// `CodeSectionEntry` variants. These variants will return `None` from this
1310    /// function.
1311    ///
1312    /// Otherwise this function will return `Some` where the first element is
1313    /// the byte identifier for the section and the second element is the range
1314    /// of the contents of the section within the original wasm binary.
1315    ///
1316    /// The purpose of this method is to enable tools to easily iterate over
1317    /// entire sections if necessary and handle sections uniformly, for example
1318    /// dropping custom sections while preserving all other sections.
1319    pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
1320        use Payload::*;
1321
1322        match self {
1323            Version { .. } => None,
1324            TypeSection(s) => Some((TYPE_SECTION, s.range())),
1325            ImportSection(s) => Some((IMPORT_SECTION, s.range())),
1326            FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
1327            TableSection(s) => Some((TABLE_SECTION, s.range())),
1328            MemorySection(s) => Some((MEMORY_SECTION, s.range())),
1329            TagSection(s) => Some((TAG_SECTION, s.range())),
1330            GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
1331            ExportSection(s) => Some((EXPORT_SECTION, s.range())),
1332            ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
1333            DataSection(s) => Some((DATA_SECTION, s.range())),
1334            StartSection { range, .. } => Some((START_SECTION, range.clone())),
1335            DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
1336            CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
1337            CodeSectionEntry(_) => None,
1338
1339            #[cfg(feature = "component-model")]
1340            ModuleSection {
1341                unchecked_range: range,
1342                ..
1343            } => Some((COMPONENT_MODULE_SECTION, range.clone())),
1344            #[cfg(feature = "component-model")]
1345            InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
1346            #[cfg(feature = "component-model")]
1347            CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
1348            #[cfg(feature = "component-model")]
1349            ComponentSection {
1350                unchecked_range: range,
1351                ..
1352            } => Some((COMPONENT_SECTION, range.clone())),
1353            #[cfg(feature = "component-model")]
1354            ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
1355            #[cfg(feature = "component-model")]
1356            ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
1357            #[cfg(feature = "component-model")]
1358            ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
1359            #[cfg(feature = "component-model")]
1360            ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
1361            #[cfg(feature = "component-model")]
1362            ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
1363            #[cfg(feature = "component-model")]
1364            ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
1365            #[cfg(feature = "component-model")]
1366            ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
1367
1368            CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1369
1370            UnknownSection { id, range, .. } => Some((*id, range.clone())),
1371
1372            End(_) => None,
1373        }
1374    }
1375}
1376
1377impl fmt::Debug for Payload<'_> {
1378    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1379        use Payload::*;
1380        match self {
1381            Version {
1382                num,
1383                encoding,
1384                range,
1385            } => f
1386                .debug_struct("Version")
1387                .field("num", num)
1388                .field("encoding", encoding)
1389                .field("range", range)
1390                .finish(),
1391
1392            // Module sections
1393            TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1394            ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1395            FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1396            TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1397            MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1398            TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1399            GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1400            ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1401            ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1402            DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1403            StartSection { func, range } => f
1404                .debug_struct("StartSection")
1405                .field("func", func)
1406                .field("range", range)
1407                .finish(),
1408            DataCountSection { count, range } => f
1409                .debug_struct("DataCountSection")
1410                .field("count", count)
1411                .field("range", range)
1412                .finish(),
1413            CodeSectionStart { count, range, size } => f
1414                .debug_struct("CodeSectionStart")
1415                .field("count", count)
1416                .field("range", range)
1417                .field("size", size)
1418                .finish(),
1419            CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1420
1421            // Component sections
1422            #[cfg(feature = "component-model")]
1423            ModuleSection {
1424                parser: _,
1425                unchecked_range: range,
1426            } => f
1427                .debug_struct("ModuleSection")
1428                .field("range", range)
1429                .finish(),
1430            #[cfg(feature = "component-model")]
1431            InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1432            #[cfg(feature = "component-model")]
1433            CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1434            #[cfg(feature = "component-model")]
1435            ComponentSection {
1436                parser: _,
1437                unchecked_range: range,
1438            } => f
1439                .debug_struct("ComponentSection")
1440                .field("range", range)
1441                .finish(),
1442            #[cfg(feature = "component-model")]
1443            ComponentInstanceSection(_) => f
1444                .debug_tuple("ComponentInstanceSection")
1445                .field(&"...")
1446                .finish(),
1447            #[cfg(feature = "component-model")]
1448            ComponentAliasSection(_) => f
1449                .debug_tuple("ComponentAliasSection")
1450                .field(&"...")
1451                .finish(),
1452            #[cfg(feature = "component-model")]
1453            ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1454            #[cfg(feature = "component-model")]
1455            ComponentCanonicalSection(_) => f
1456                .debug_tuple("ComponentCanonicalSection")
1457                .field(&"...")
1458                .finish(),
1459            #[cfg(feature = "component-model")]
1460            ComponentStartSection { .. } => f
1461                .debug_tuple("ComponentStartSection")
1462                .field(&"...")
1463                .finish(),
1464            #[cfg(feature = "component-model")]
1465            ComponentImportSection(_) => f
1466                .debug_tuple("ComponentImportSection")
1467                .field(&"...")
1468                .finish(),
1469            #[cfg(feature = "component-model")]
1470            ComponentExportSection(_) => f
1471                .debug_tuple("ComponentExportSection")
1472                .field(&"...")
1473                .finish(),
1474
1475            CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1476
1477            UnknownSection { id, range, .. } => f
1478                .debug_struct("UnknownSection")
1479                .field("id", id)
1480                .field("range", range)
1481                .finish(),
1482
1483            End(offset) => f.debug_tuple("End").field(offset).finish(),
1484        }
1485    }
1486}
1487
1488fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1489    err.inner.needed_hint = None;
1490    err
1491}
1492
1493#[cfg(test)]
1494mod tests {
1495    use super::*;
1496
1497    macro_rules! assert_matches {
1498        ($a:expr, $b:pat $(,)?) => {
1499            match $a {
1500                $b => {}
1501                a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1502            }
1503        };
1504    }
1505
1506    #[test]
1507    fn header() {
1508        assert!(Parser::default().parse(&[], true).is_err());
1509        assert_matches!(
1510            Parser::default().parse(&[], false),
1511            Ok(Chunk::NeedMoreData(4)),
1512        );
1513        assert_matches!(
1514            Parser::default().parse(b"\0", false),
1515            Ok(Chunk::NeedMoreData(3)),
1516        );
1517        assert_matches!(
1518            Parser::default().parse(b"\0asm", false),
1519            Ok(Chunk::NeedMoreData(4)),
1520        );
1521        assert_matches!(
1522            Parser::default().parse(b"\0asm\x01\0\0\0", false),
1523            Ok(Chunk::Parsed {
1524                consumed: 8,
1525                payload: Payload::Version { num: 1, .. },
1526            }),
1527        );
1528    }
1529
1530    #[test]
1531    fn header_iter() {
1532        for _ in Parser::default().parse_all(&[]) {}
1533        for _ in Parser::default().parse_all(b"\0") {}
1534        for _ in Parser::default().parse_all(b"\0asm") {}
1535        for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1536    }
1537
1538    fn parser_after_header() -> Parser {
1539        let mut p = Parser::default();
1540        assert_matches!(
1541            p.parse(b"\0asm\x01\0\0\0", false),
1542            Ok(Chunk::Parsed {
1543                consumed: 8,
1544                payload: Payload::Version {
1545                    num: WASM_MODULE_VERSION,
1546                    encoding: Encoding::Module,
1547                    ..
1548                },
1549            }),
1550        );
1551        p
1552    }
1553
1554    fn parser_after_component_header() -> Parser {
1555        let mut p = Parser::default();
1556        assert_matches!(
1557            p.parse(b"\0asm\x0d\0\x01\0", false),
1558            Ok(Chunk::Parsed {
1559                consumed: 8,
1560                payload: Payload::Version {
1561                    num: WASM_COMPONENT_VERSION,
1562                    encoding: Encoding::Component,
1563                    ..
1564                },
1565            }),
1566        );
1567        p
1568    }
1569
1570    #[test]
1571    fn start_section() {
1572        assert_matches!(
1573            parser_after_header().parse(&[], false),
1574            Ok(Chunk::NeedMoreData(1)),
1575        );
1576        assert!(parser_after_header().parse(&[8], true).is_err());
1577        assert!(parser_after_header().parse(&[8, 1], true).is_err());
1578        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1579        assert_matches!(
1580            parser_after_header().parse(&[8], false),
1581            Ok(Chunk::NeedMoreData(1)),
1582        );
1583        assert_matches!(
1584            parser_after_header().parse(&[8, 1], false),
1585            Ok(Chunk::NeedMoreData(1)),
1586        );
1587        assert_matches!(
1588            parser_after_header().parse(&[8, 2], false),
1589            Ok(Chunk::NeedMoreData(2)),
1590        );
1591        assert_matches!(
1592            parser_after_header().parse(&[8, 1, 1], false),
1593            Ok(Chunk::Parsed {
1594                consumed: 3,
1595                payload: Payload::StartSection { func: 1, .. },
1596            }),
1597        );
1598        assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1599        assert!(parser_after_header().parse(&[8, 0], false).is_err());
1600    }
1601
1602    #[test]
1603    fn end_works() {
1604        assert_matches!(
1605            parser_after_header().parse(&[], true),
1606            Ok(Chunk::Parsed {
1607                consumed: 0,
1608                payload: Payload::End(8),
1609            }),
1610        );
1611    }
1612
1613    #[test]
1614    fn type_section() {
1615        assert!(parser_after_header().parse(&[1], true).is_err());
1616        assert!(parser_after_header().parse(&[1, 0], false).is_err());
1617        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1618        assert_matches!(
1619            parser_after_header().parse(&[1], false),
1620            Ok(Chunk::NeedMoreData(1)),
1621        );
1622        assert_matches!(
1623            parser_after_header().parse(&[1, 1], false),
1624            Ok(Chunk::NeedMoreData(1)),
1625        );
1626        assert_matches!(
1627            parser_after_header().parse(&[1, 1, 1], false),
1628            Ok(Chunk::Parsed {
1629                consumed: 3,
1630                payload: Payload::TypeSection(_),
1631            }),
1632        );
1633        assert_matches!(
1634            parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1635            Ok(Chunk::Parsed {
1636                consumed: 3,
1637                payload: Payload::TypeSection(_),
1638            }),
1639        );
1640    }
1641
1642    #[test]
1643    fn custom_section() {
1644        assert!(parser_after_header().parse(&[0], true).is_err());
1645        assert!(parser_after_header().parse(&[0, 0], false).is_err());
1646        assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1647        assert_matches!(
1648            parser_after_header().parse(&[0, 2, 1], false),
1649            Ok(Chunk::NeedMoreData(1)),
1650        );
1651        assert_custom(
1652            parser_after_header().parse(&[0, 1, 0], false).unwrap(),
1653            3,
1654            "",
1655            11,
1656            b"",
1657            Range { start: 10, end: 11 },
1658        );
1659        assert_custom(
1660            parser_after_header()
1661                .parse(&[0, 2, 1, b'a'], false)
1662                .unwrap(),
1663            4,
1664            "a",
1665            12,
1666            b"",
1667            Range { start: 10, end: 12 },
1668        );
1669        assert_custom(
1670            parser_after_header()
1671                .parse(&[0, 2, 0, b'a'], false)
1672                .unwrap(),
1673            4,
1674            "",
1675            11,
1676            b"a",
1677            Range { start: 10, end: 12 },
1678        );
1679    }
1680
1681    fn assert_custom(
1682        chunk: Chunk<'_>,
1683        expected_consumed: usize,
1684        expected_name: &str,
1685        expected_data_offset: usize,
1686        expected_data: &[u8],
1687        expected_range: Range<usize>,
1688    ) {
1689        let (consumed, s) = match chunk {
1690            Chunk::Parsed {
1691                consumed,
1692                payload: Payload::CustomSection(s),
1693            } => (consumed, s),
1694            _ => panic!("not a custom section payload"),
1695        };
1696        assert_eq!(consumed, expected_consumed);
1697        assert_eq!(s.name(), expected_name);
1698        assert_eq!(s.data_offset(), expected_data_offset);
1699        assert_eq!(s.data(), expected_data);
1700        assert_eq!(s.range(), expected_range);
1701    }
1702
1703    #[test]
1704    fn function_section() {
1705        assert!(parser_after_header().parse(&[10], true).is_err());
1706        assert!(parser_after_header().parse(&[10, 0], true).is_err());
1707        assert!(parser_after_header().parse(&[10, 1], true).is_err());
1708        assert_matches!(
1709            parser_after_header().parse(&[10], false),
1710            Ok(Chunk::NeedMoreData(1))
1711        );
1712        assert_matches!(
1713            parser_after_header().parse(&[10, 1], false),
1714            Ok(Chunk::NeedMoreData(1))
1715        );
1716        let mut p = parser_after_header();
1717        assert_matches!(
1718            p.parse(&[10, 1, 0], false),
1719            Ok(Chunk::Parsed {
1720                consumed: 3,
1721                payload: Payload::CodeSectionStart { count: 0, .. },
1722            }),
1723        );
1724        assert_matches!(
1725            p.parse(&[], true),
1726            Ok(Chunk::Parsed {
1727                consumed: 0,
1728                payload: Payload::End(11),
1729            }),
1730        );
1731        let mut p = parser_after_header();
1732        assert_matches!(
1733            p.parse(&[3, 2, 1, 0], false),
1734            Ok(Chunk::Parsed {
1735                consumed: 4,
1736                payload: Payload::FunctionSection { .. },
1737            }),
1738        );
1739        assert_matches!(
1740            p.parse(&[10, 2, 1, 0], false),
1741            Ok(Chunk::Parsed {
1742                consumed: 3,
1743                payload: Payload::CodeSectionStart { count: 1, .. },
1744            }),
1745        );
1746        assert_matches!(
1747            p.parse(&[0], false),
1748            Ok(Chunk::Parsed {
1749                consumed: 1,
1750                payload: Payload::CodeSectionEntry(_),
1751            }),
1752        );
1753        assert_matches!(
1754            p.parse(&[], true),
1755            Ok(Chunk::Parsed {
1756                consumed: 0,
1757                payload: Payload::End(16),
1758            }),
1759        );
1760
1761        // 1 byte section with 1 function can't read the function body because
1762        // the section is too small
1763        let mut p = parser_after_header();
1764        assert_matches!(
1765            p.parse(&[3, 2, 1, 0], false),
1766            Ok(Chunk::Parsed {
1767                consumed: 4,
1768                payload: Payload::FunctionSection { .. },
1769            }),
1770        );
1771        assert_matches!(
1772            p.parse(&[10, 1, 1], false),
1773            Ok(Chunk::Parsed {
1774                consumed: 3,
1775                payload: Payload::CodeSectionStart { count: 1, .. },
1776            }),
1777        );
1778        assert_eq!(
1779            p.parse(&[0], false).unwrap_err().message(),
1780            "unexpected end-of-file"
1781        );
1782
1783        // section with 2 functions but section is cut off
1784        let mut p = parser_after_header();
1785        assert_matches!(
1786            p.parse(&[3, 2, 2, 0], false),
1787            Ok(Chunk::Parsed {
1788                consumed: 4,
1789                payload: Payload::FunctionSection { .. },
1790            }),
1791        );
1792        assert_matches!(
1793            p.parse(&[10, 2, 2], false),
1794            Ok(Chunk::Parsed {
1795                consumed: 3,
1796                payload: Payload::CodeSectionStart { count: 2, .. },
1797            }),
1798        );
1799        assert_matches!(
1800            p.parse(&[0], false),
1801            Ok(Chunk::Parsed {
1802                consumed: 1,
1803                payload: Payload::CodeSectionEntry(_),
1804            }),
1805        );
1806        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1807        assert_eq!(
1808            p.parse(&[0], false).unwrap_err().message(),
1809            "unexpected end-of-file",
1810        );
1811
1812        // trailing data is bad
1813        let mut p = parser_after_header();
1814        assert_matches!(
1815            p.parse(&[3, 2, 1, 0], false),
1816            Ok(Chunk::Parsed {
1817                consumed: 4,
1818                payload: Payload::FunctionSection { .. },
1819            }),
1820        );
1821        assert_matches!(
1822            p.parse(&[10, 3, 1], false),
1823            Ok(Chunk::Parsed {
1824                consumed: 3,
1825                payload: Payload::CodeSectionStart { count: 1, .. },
1826            }),
1827        );
1828        assert_matches!(
1829            p.parse(&[0], false),
1830            Ok(Chunk::Parsed {
1831                consumed: 1,
1832                payload: Payload::CodeSectionEntry(_),
1833            }),
1834        );
1835        assert_eq!(
1836            p.parse(&[0], false).unwrap_err().message(),
1837            "trailing bytes at end of section",
1838        );
1839    }
1840
1841    #[test]
1842    fn single_module() {
1843        let mut p = parser_after_component_header();
1844        assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1845
1846        // A module that's 8 bytes in length
1847        let mut sub = match p.parse(&[1, 8], false) {
1848            Ok(Chunk::Parsed {
1849                consumed: 2,
1850                payload: Payload::ModuleSection { parser, .. },
1851            }) => parser,
1852            other => panic!("bad parse {other:?}"),
1853        };
1854
1855        // Parse the header of the submodule with the sub-parser.
1856        assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1857        assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1858        assert_matches!(
1859            sub.parse(b"\0asm\x01\0\0\0", false),
1860            Ok(Chunk::Parsed {
1861                consumed: 8,
1862                payload: Payload::Version {
1863                    num: 1,
1864                    encoding: Encoding::Module,
1865                    ..
1866                },
1867            }),
1868        );
1869
1870        // The sub-parser should be byte-limited so the next byte shouldn't get
1871        // consumed, it's intended for the parent parser.
1872        assert_matches!(
1873            sub.parse(&[10], false),
1874            Ok(Chunk::Parsed {
1875                consumed: 0,
1876                payload: Payload::End(18),
1877            }),
1878        );
1879
1880        // The parent parser should now be back to resuming, and we simulate it
1881        // being done with bytes to ensure that it's safely at the end,
1882        // completing the module code section.
1883        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1884        assert_matches!(
1885            p.parse(&[], true),
1886            Ok(Chunk::Parsed {
1887                consumed: 0,
1888                payload: Payload::End(18),
1889            }),
1890        );
1891    }
1892
1893    #[test]
1894    fn nested_section_too_big() {
1895        let mut p = parser_after_component_header();
1896
1897        // A module that's 10 bytes in length
1898        let mut sub = match p.parse(&[1, 10], false) {
1899            Ok(Chunk::Parsed {
1900                consumed: 2,
1901                payload: Payload::ModuleSection { parser, .. },
1902            }) => parser,
1903            other => panic!("bad parse {other:?}"),
1904        };
1905
1906        // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1907        // module.
1908        assert_matches!(
1909            sub.parse(b"\0asm\x01\0\0\0", false),
1910            Ok(Chunk::Parsed {
1911                consumed: 8,
1912                payload: Payload::Version { num: 1, .. },
1913            }),
1914        );
1915
1916        // We can't parse a section which declares its bigger than the outer
1917        // module. This is a custom section, one byte big, with one content byte. The
1918        // content byte, however, lives outside of the parent's module code
1919        // section.
1920        assert_eq!(
1921            sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1922            "section too large",
1923        );
1924    }
1925}
wasmparser/parser.rs

wasmparser/
parser.rs