wasmtime_environ/compile/
module_environ.rs

1use crate::module::{
2    FuncRefIndex, Initializer, MemoryInitialization, MemoryInitializer, Module, TableSegment,
3    TableSegmentElements,
4};
5use crate::{
6    ConstExpr, ConstOp, DataIndex, DefinedFuncIndex, ElemIndex, EngineOrModuleTypeIndex,
7    EntityIndex, EntityType, FuncIndex, GlobalIndex, IndexType, InitMemory, MemoryIndex,
8    ModuleInternedTypeIndex, ModuleTypesBuilder, PrimaryMap, SizeOverflow, StaticMemoryInitializer,
9    TableIndex, TableInitialValue, Tag, TagIndex, Tunables, TypeConvert, TypeIndex, WasmError,
10    WasmHeapTopType, WasmHeapType, WasmResult, WasmValType, WasmparserTypeConverter,
11};
12use crate::{StaticModuleIndex, prelude::*};
13use anyhow::{Result, bail};
14use cranelift_entity::SecondaryMap;
15use cranelift_entity::packed_option::ReservedValue;
16use std::borrow::Cow;
17use std::collections::HashMap;
18use std::mem;
19use std::path::PathBuf;
20use std::sync::Arc;
21use wasmparser::{
22    CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind,
23    FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Parser, Payload, TypeRef,
24    Validator, ValidatorResources, types::Types,
25};
26
27/// Object containing the standalone environment information.
28pub struct ModuleEnvironment<'a, 'data> {
29    /// The current module being translated
30    result: ModuleTranslation<'data>,
31
32    /// Intern'd types for this entire translation, shared by all modules.
33    types: &'a mut ModuleTypesBuilder,
34
35    // Various bits and pieces of configuration
36    validator: &'a mut Validator,
37    tunables: &'a Tunables,
38}
39
40/// The result of translating via `ModuleEnvironment`.
41///
42/// Function bodies are not yet translated, and data initializers have not yet
43/// been copied out of the original buffer.
44pub struct ModuleTranslation<'data> {
45    /// Module information.
46    pub module: Module,
47
48    /// The input wasm binary.
49    ///
50    /// This can be useful, for example, when modules are parsed from a
51    /// component and the embedder wants access to the raw wasm modules
52    /// themselves.
53    pub wasm: &'data [u8],
54
55    /// References to the function bodies.
56    pub function_body_inputs: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
57
58    /// For each imported function, the single statically-known defined function
59    /// that satisfies that import, if any. This is used to turn what would
60    /// otherwise be indirect calls through the imports table into direct calls,
61    /// when possible.
62    pub known_imported_functions:
63        SecondaryMap<FuncIndex, Option<(StaticModuleIndex, DefinedFuncIndex)>>,
64
65    /// A list of type signatures which are considered exported from this
66    /// module, or those that can possibly be called. This list is sorted, and
67    /// trampolines for each of these signatures are required.
68    pub exported_signatures: Vec<ModuleInternedTypeIndex>,
69
70    /// DWARF debug information, if enabled, parsed from the module.
71    pub debuginfo: DebugInfoData<'data>,
72
73    /// Set if debuginfo was found but it was not parsed due to `Tunables`
74    /// configuration.
75    pub has_unparsed_debuginfo: bool,
76
77    /// List of data segments found in this module which should be concatenated
78    /// together for the final compiled artifact.
79    ///
80    /// These data segments, when concatenated, are indexed by the
81    /// `MemoryInitializer` type.
82    pub data: Vec<Cow<'data, [u8]>>,
83
84    /// The desired alignment of `data` in the final data section of the object
85    /// file that we'll emit.
86    ///
87    /// Note that this is 1 by default but `MemoryInitialization::Static` might
88    /// switch this to a higher alignment to facilitate mmap-ing data from
89    /// an object file into a linear memory.
90    pub data_align: Option<u64>,
91
92    /// Total size of all data pushed onto `data` so far.
93    total_data: u32,
94
95    /// List of passive element segments found in this module which will get
96    /// concatenated for the final artifact.
97    pub passive_data: Vec<&'data [u8]>,
98
99    /// Total size of all passive data pushed into `passive_data` so far.
100    total_passive_data: u32,
101
102    /// When we're parsing the code section this will be incremented so we know
103    /// which function is currently being defined.
104    code_index: u32,
105
106    /// The type information of the current module made available at the end of the
107    /// validation process.
108    types: Option<Types>,
109}
110
111impl<'data> ModuleTranslation<'data> {
112    /// Create a new translation for the module with the given index.
113    pub fn new(module_index: StaticModuleIndex) -> Self {
114        Self {
115            module: Module::new(module_index),
116            wasm: &[],
117            function_body_inputs: PrimaryMap::default(),
118            known_imported_functions: SecondaryMap::default(),
119            exported_signatures: Vec::default(),
120            debuginfo: DebugInfoData::default(),
121            has_unparsed_debuginfo: false,
122            data: Vec::default(),
123            data_align: None,
124            total_data: 0,
125            passive_data: Vec::default(),
126            total_passive_data: 0,
127            code_index: 0,
128            types: None,
129        }
130    }
131
132    /// Returns a reference to the type information of the current module.
133    pub fn get_types(&self) -> &Types {
134        self.types
135            .as_ref()
136            .expect("module type information to be available")
137    }
138
139    /// Get this translation's module's index.
140    pub fn module_index(&self) -> StaticModuleIndex {
141        self.module.module_index
142    }
143}
144
145/// Contains function data: byte code and its offset in the module.
146pub struct FunctionBodyData<'a> {
147    /// The body of the function, containing code and locals.
148    pub body: FunctionBody<'a>,
149    /// Validator for the function body
150    pub validator: FuncToValidate<ValidatorResources>,
151}
152
153#[derive(Debug, Default)]
154#[expect(missing_docs, reason = "self-describing fields")]
155pub struct DebugInfoData<'a> {
156    pub dwarf: Dwarf<'a>,
157    pub name_section: NameSection<'a>,
158    pub wasm_file: WasmFileInfo,
159    pub debug_loc: gimli::DebugLoc<Reader<'a>>,
160    pub debug_loclists: gimli::DebugLocLists<Reader<'a>>,
161    pub debug_ranges: gimli::DebugRanges<Reader<'a>>,
162    pub debug_rnglists: gimli::DebugRngLists<Reader<'a>>,
163    pub debug_cu_index: gimli::DebugCuIndex<Reader<'a>>,
164    pub debug_tu_index: gimli::DebugTuIndex<Reader<'a>>,
165}
166
167#[expect(missing_docs, reason = "self-describing")]
168pub type Dwarf<'input> = gimli::Dwarf<Reader<'input>>;
169
170type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;
171
172#[derive(Debug, Default)]
173#[expect(missing_docs, reason = "self-describing fields")]
174pub struct NameSection<'a> {
175    pub module_name: Option<&'a str>,
176    pub func_names: HashMap<FuncIndex, &'a str>,
177    pub locals_names: HashMap<FuncIndex, HashMap<u32, &'a str>>,
178}
179
180#[derive(Debug, Default)]
181#[expect(missing_docs, reason = "self-describing fields")]
182pub struct WasmFileInfo {
183    pub path: Option<PathBuf>,
184    pub code_section_offset: u64,
185    pub imported_func_count: u32,
186    pub funcs: Vec<FunctionMetadata>,
187}
188
189#[derive(Debug)]
190#[expect(missing_docs, reason = "self-describing fields")]
191pub struct FunctionMetadata {
192    pub params: Box<[WasmValType]>,
193    pub locals: Box<[(u32, WasmValType)]>,
194}
195
196impl<'a, 'data> ModuleEnvironment<'a, 'data> {
197    /// Allocates the environment data structures.
198    pub fn new(
199        tunables: &'a Tunables,
200        validator: &'a mut Validator,
201        types: &'a mut ModuleTypesBuilder,
202        module_index: StaticModuleIndex,
203    ) -> Self {
204        Self {
205            result: ModuleTranslation::new(module_index),
206            types,
207            tunables,
208            validator,
209        }
210    }
211
212    /// Translate a wasm module using this environment.
213    ///
214    /// This function will translate the `data` provided with `parser`,
215    /// validating everything along the way with this environment's validator.
216    ///
217    /// The result of translation, [`ModuleTranslation`], contains everything
218    /// necessary to compile functions afterwards as well as learn type
219    /// information about the module at runtime.
220    pub fn translate(
221        mut self,
222        parser: Parser,
223        data: &'data [u8],
224    ) -> Result<ModuleTranslation<'data>> {
225        self.result.wasm = data;
226
227        for payload in parser.parse_all(data) {
228            self.translate_payload(payload?)?;
229        }
230
231        Ok(self.result)
232    }
233
234    fn translate_payload(&mut self, payload: Payload<'data>) -> Result<()> {
235        match payload {
236            Payload::Version {
237                num,
238                encoding,
239                range,
240            } => {
241                self.validator.version(num, encoding, &range)?;
242                match encoding {
243                    Encoding::Module => {}
244                    Encoding::Component => {
245                        bail!("expected a WebAssembly module but was given a WebAssembly component")
246                    }
247                }
248            }
249
250            Payload::End(offset) => {
251                self.result.types = Some(self.validator.end(offset)?);
252
253                // With the `escaped_funcs` set of functions finished
254                // we can calculate the set of signatures that are exported as
255                // the set of exported functions' signatures.
256                self.result.exported_signatures = self
257                    .result
258                    .module
259                    .functions
260                    .iter()
261                    .filter_map(|(_, func)| {
262                        if func.is_escaping() {
263                            Some(func.signature.unwrap_module_type_index())
264                        } else {
265                            None
266                        }
267                    })
268                    .collect();
269                self.result.exported_signatures.sort_unstable();
270                self.result.exported_signatures.dedup();
271            }
272
273            Payload::TypeSection(types) => {
274                self.validator.type_section(&types)?;
275
276                let count = self.validator.types(0).unwrap().core_type_count_in_module();
277                log::trace!("interning {count} Wasm types");
278
279                let capacity = usize::try_from(count).unwrap();
280                self.result.module.types.reserve(capacity);
281                self.types.reserve_wasm_signatures(capacity);
282
283                // Iterate over each *rec group* -- not type -- defined in the
284                // types section. Rec groups are the unit of canonicalization
285                // and therefore the unit at which we need to process at a
286                // time. `wasmparser` has already done the hard work of
287                // de-duplicating and canonicalizing the rec groups within the
288                // module for us, we just need to translate them into our data
289                // structures. Note that, if the Wasm defines duplicate rec
290                // groups, we need copy the duplicates over (shallowly) as well,
291                // so that our types index space doesn't have holes.
292                let mut type_index = 0;
293                while type_index < count {
294                    let validator_types = self.validator.types(0).unwrap();
295
296                    // Get the rec group for the current type index, which is
297                    // always the first type defined in a rec group.
298                    log::trace!("looking up wasmparser type for index {type_index}");
299                    let core_type_id = validator_types.core_type_at_in_module(type_index);
300                    log::trace!(
301                        "  --> {core_type_id:?} = {:?}",
302                        validator_types[core_type_id],
303                    );
304                    let rec_group_id = validator_types.rec_group_id_of(core_type_id);
305                    debug_assert_eq!(
306                        validator_types
307                            .rec_group_elements(rec_group_id)
308                            .position(|id| id == core_type_id),
309                        Some(0)
310                    );
311
312                    // Intern the rec group and then fill in this module's types
313                    // index space.
314                    let interned = self.types.intern_rec_group(validator_types, rec_group_id)?;
315                    let elems = self.types.rec_group_elements(interned);
316                    let len = elems.len();
317                    self.result.module.types.reserve(len);
318                    for ty in elems {
319                        self.result.module.types.push(ty.into());
320                    }
321
322                    // Advance `type_index` to the start of the next rec group.
323                    type_index += u32::try_from(len).unwrap();
324                }
325            }
326
327            Payload::ImportSection(imports) => {
328                self.validator.import_section(&imports)?;
329
330                let cnt = usize::try_from(imports.count()).unwrap();
331                self.result.module.initializers.reserve(cnt);
332
333                for entry in imports {
334                    let import = entry?;
335                    let ty = match import.ty {
336                        TypeRef::Func(index) => {
337                            let index = TypeIndex::from_u32(index);
338                            let interned_index = self.result.module.types[index];
339                            self.result.module.num_imported_funcs += 1;
340                            self.result.debuginfo.wasm_file.imported_func_count += 1;
341                            EntityType::Function(interned_index)
342                        }
343                        TypeRef::Memory(ty) => {
344                            self.result.module.num_imported_memories += 1;
345                            EntityType::Memory(ty.into())
346                        }
347                        TypeRef::Global(ty) => {
348                            self.result.module.num_imported_globals += 1;
349                            EntityType::Global(self.convert_global_type(&ty)?)
350                        }
351                        TypeRef::Table(ty) => {
352                            self.result.module.num_imported_tables += 1;
353                            EntityType::Table(self.convert_table_type(&ty)?)
354                        }
355                        TypeRef::Tag(ty) => {
356                            let index = TypeIndex::from_u32(ty.func_type_idx);
357                            let signature = self.result.module.types[index];
358                            let exception = self.types.define_exception_type_for_tag(
359                                signature.unwrap_module_type_index(),
360                            );
361                            let tag = Tag {
362                                signature,
363                                exception: EngineOrModuleTypeIndex::Module(exception),
364                            };
365                            self.result.module.num_imported_tags += 1;
366                            EntityType::Tag(tag)
367                        }
368                    };
369                    self.declare_import(import.module, import.name, ty);
370                }
371            }
372
373            Payload::FunctionSection(functions) => {
374                self.validator.function_section(&functions)?;
375
376                let cnt = usize::try_from(functions.count()).unwrap();
377                self.result.module.functions.reserve_exact(cnt);
378
379                for entry in functions {
380                    let sigindex = entry?;
381                    let ty = TypeIndex::from_u32(sigindex);
382                    let interned_index = self.result.module.types[ty];
383                    self.result.module.push_function(interned_index);
384                }
385            }
386
387            Payload::TableSection(tables) => {
388                self.validator.table_section(&tables)?;
389                let cnt = usize::try_from(tables.count()).unwrap();
390                self.result.module.tables.reserve_exact(cnt);
391
392                for entry in tables {
393                    let wasmparser::Table { ty, init } = entry?;
394                    let table = self.convert_table_type(&ty)?;
395                    self.result.module.needs_gc_heap |= table.ref_type.is_vmgcref_type();
396                    self.result.module.tables.push(table);
397                    let init = match init {
398                        wasmparser::TableInit::RefNull => TableInitialValue::Null {
399                            precomputed: Vec::new(),
400                        },
401                        wasmparser::TableInit::Expr(expr) => {
402                            let (init, escaped) = ConstExpr::from_wasmparser(self, expr)?;
403                            for f in escaped {
404                                self.flag_func_escaped(f);
405                            }
406                            TableInitialValue::Expr(init)
407                        }
408                    };
409                    self.result
410                        .module
411                        .table_initialization
412                        .initial_values
413                        .push(init);
414                }
415            }
416
417            Payload::MemorySection(memories) => {
418                self.validator.memory_section(&memories)?;
419
420                let cnt = usize::try_from(memories.count()).unwrap();
421                self.result.module.memories.reserve_exact(cnt);
422
423                for entry in memories {
424                    let memory = entry?;
425                    self.result.module.memories.push(memory.into());
426                }
427            }
428
429            Payload::TagSection(tags) => {
430                self.validator.tag_section(&tags)?;
431
432                for entry in tags {
433                    let sigindex = entry?.func_type_idx;
434                    let ty = TypeIndex::from_u32(sigindex);
435                    let interned_index = self.result.module.types[ty];
436                    let exception = self
437                        .types
438                        .define_exception_type_for_tag(interned_index.unwrap_module_type_index());
439                    self.result.module.push_tag(interned_index, exception);
440                }
441            }
442
443            Payload::GlobalSection(globals) => {
444                self.validator.global_section(&globals)?;
445
446                let cnt = usize::try_from(globals.count()).unwrap();
447                self.result.module.globals.reserve_exact(cnt);
448
449                for entry in globals {
450                    let wasmparser::Global { ty, init_expr } = entry?;
451                    let (initializer, escaped) = ConstExpr::from_wasmparser(self, init_expr)?;
452                    for f in escaped {
453                        self.flag_func_escaped(f);
454                    }
455                    let ty = self.convert_global_type(&ty)?;
456                    self.result.module.globals.push(ty);
457                    self.result.module.global_initializers.push(initializer);
458                }
459            }
460
461            Payload::ExportSection(exports) => {
462                self.validator.export_section(&exports)?;
463
464                let cnt = usize::try_from(exports.count()).unwrap();
465                self.result.module.exports.reserve(cnt);
466
467                for entry in exports {
468                    let wasmparser::Export { name, kind, index } = entry?;
469                    let entity = match kind {
470                        ExternalKind::Func => {
471                            let index = FuncIndex::from_u32(index);
472                            self.flag_func_escaped(index);
473                            EntityIndex::Function(index)
474                        }
475                        ExternalKind::Table => EntityIndex::Table(TableIndex::from_u32(index)),
476                        ExternalKind::Memory => EntityIndex::Memory(MemoryIndex::from_u32(index)),
477                        ExternalKind::Global => EntityIndex::Global(GlobalIndex::from_u32(index)),
478                        ExternalKind::Tag => EntityIndex::Tag(TagIndex::from_u32(index)),
479                    };
480                    self.result
481                        .module
482                        .exports
483                        .insert(String::from(name), entity);
484                }
485            }
486
487            Payload::StartSection { func, range } => {
488                self.validator.start_section(func, &range)?;
489
490                let func_index = FuncIndex::from_u32(func);
491                self.flag_func_escaped(func_index);
492                debug_assert!(self.result.module.start_func.is_none());
493                self.result.module.start_func = Some(func_index);
494            }
495
496            Payload::ElementSection(elements) => {
497                self.validator.element_section(&elements)?;
498
499                for (index, entry) in elements.into_iter().enumerate() {
500                    let wasmparser::Element {
501                        kind,
502                        items,
503                        range: _,
504                    } = entry?;
505
506                    // Build up a list of `FuncIndex` corresponding to all the
507                    // entries listed in this segment. Note that it's not
508                    // possible to create anything other than a `ref.null
509                    // extern` for externref segments, so those just get
510                    // translated to the reserved value of `FuncIndex`.
511                    let elements = match items {
512                        ElementItems::Functions(funcs) => {
513                            let mut elems =
514                                Vec::with_capacity(usize::try_from(funcs.count()).unwrap());
515                            for func in funcs {
516                                let func = FuncIndex::from_u32(func?);
517                                self.flag_func_escaped(func);
518                                elems.push(func);
519                            }
520                            TableSegmentElements::Functions(elems.into())
521                        }
522                        ElementItems::Expressions(_ty, items) => {
523                            let mut exprs =
524                                Vec::with_capacity(usize::try_from(items.count()).unwrap());
525                            for expr in items {
526                                let (expr, escaped) = ConstExpr::from_wasmparser(self, expr?)?;
527                                exprs.push(expr);
528                                for func in escaped {
529                                    self.flag_func_escaped(func);
530                                }
531                            }
532                            TableSegmentElements::Expressions(exprs.into())
533                        }
534                    };
535
536                    match kind {
537                        ElementKind::Active {
538                            table_index,
539                            offset_expr,
540                        } => {
541                            let table_index = TableIndex::from_u32(table_index.unwrap_or(0));
542                            let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;
543                            debug_assert!(escaped.is_empty());
544
545                            self.result
546                                .module
547                                .table_initialization
548                                .segments
549                                .push(TableSegment {
550                                    table_index,
551                                    offset,
552                                    elements,
553                                });
554                        }
555
556                        ElementKind::Passive => {
557                            let elem_index = ElemIndex::from_u32(index as u32);
558                            let index = self.result.module.passive_elements.len();
559                            self.result.module.passive_elements.push(elements);
560                            self.result
561                                .module
562                                .passive_elements_map
563                                .insert(elem_index, index);
564                        }
565
566                        ElementKind::Declared => {}
567                    }
568                }
569            }
570
571            Payload::CodeSectionStart { count, range, .. } => {
572                self.validator.code_section_start(&range)?;
573                let cnt = usize::try_from(count).unwrap();
574                self.result.function_body_inputs.reserve_exact(cnt);
575                self.result.debuginfo.wasm_file.code_section_offset = range.start as u64;
576            }
577
578            Payload::CodeSectionEntry(body) => {
579                let validator = self.validator.code_section_entry(&body)?;
580                let func_index =
581                    self.result.code_index + self.result.module.num_imported_funcs as u32;
582                let func_index = FuncIndex::from_u32(func_index);
583
584                if self.tunables.generate_native_debuginfo {
585                    let sig_index = self.result.module.functions[func_index]
586                        .signature
587                        .unwrap_module_type_index();
588                    let sig = self.types[sig_index].unwrap_func();
589                    let mut locals = Vec::new();
590                    for pair in body.get_locals_reader()? {
591                        let (cnt, ty) = pair?;
592                        let ty = self.convert_valtype(ty)?;
593                        locals.push((cnt, ty));
594                    }
595                    self.result
596                        .debuginfo
597                        .wasm_file
598                        .funcs
599                        .push(FunctionMetadata {
600                            locals: locals.into_boxed_slice(),
601                            params: sig.params().into(),
602                        });
603                }
604                self.result
605                    .function_body_inputs
606                    .push(FunctionBodyData { validator, body });
607                self.result.code_index += 1;
608            }
609
610            Payload::DataSection(data) => {
611                self.validator.data_section(&data)?;
612
613                let initializers = match &mut self.result.module.memory_initialization {
614                    MemoryInitialization::Segmented(i) => i,
615                    _ => unreachable!(),
616                };
617
618                let cnt = usize::try_from(data.count()).unwrap();
619                initializers.reserve_exact(cnt);
620                self.result.data.reserve_exact(cnt);
621
622                for (index, entry) in data.into_iter().enumerate() {
623                    let wasmparser::Data {
624                        kind,
625                        data,
626                        range: _,
627                    } = entry?;
628                    let mk_range = |total: &mut u32| -> Result<_, WasmError> {
629                        let range = u32::try_from(data.len())
630                            .ok()
631                            .and_then(|size| {
632                                let start = *total;
633                                let end = start.checked_add(size)?;
634                                Some(start..end)
635                            })
636                            .ok_or_else(|| {
637                                WasmError::Unsupported(format!(
638                                    "more than 4 gigabytes of data in wasm module",
639                                ))
640                            })?;
641                        *total += range.end - range.start;
642                        Ok(range)
643                    };
644                    match kind {
645                        DataKind::Active {
646                            memory_index,
647                            offset_expr,
648                        } => {
649                            let range = mk_range(&mut self.result.total_data)?;
650                            let memory_index = MemoryIndex::from_u32(memory_index);
651                            let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;
652                            debug_assert!(escaped.is_empty());
653
654                            let initializers = match &mut self.result.module.memory_initialization {
655                                MemoryInitialization::Segmented(i) => i,
656                                _ => unreachable!(),
657                            };
658                            initializers.push(MemoryInitializer {
659                                memory_index,
660                                offset,
661                                data: range,
662                            });
663                            self.result.data.push(data.into());
664                        }
665                        DataKind::Passive => {
666                            let data_index = DataIndex::from_u32(index as u32);
667                            let range = mk_range(&mut self.result.total_passive_data)?;
668                            self.result.passive_data.push(data);
669                            self.result
670                                .module
671                                .passive_data_map
672                                .insert(data_index, range);
673                        }
674                    }
675                }
676            }
677
678            Payload::DataCountSection { count, range } => {
679                self.validator.data_count_section(count, &range)?;
680
681                // Note: the count passed in here is the *total* segment count
682                // There is no way to reserve for just the passive segments as
683                // they are discovered when iterating the data section entries
684                // Given that the total segment count might be much larger than
685                // the passive count, do not reserve anything here.
686            }
687
688            Payload::CustomSection(s)
689                if s.name() == "webidl-bindings" || s.name() == "wasm-interface-types" =>
690            {
691                bail!(
692                    "\
693Support for interface types has temporarily been removed from `wasmtime`.
694
695For more information about this temporary change you can read on the issue online:
696
697    https://github.com/bytecodealliance/wasmtime/issues/1271
698
699and for re-adding support for interface types you can see this issue:
700
701    https://github.com/bytecodealliance/wasmtime/issues/677
702"
703                )
704            }
705
706            Payload::CustomSection(s) => {
707                self.register_custom_section(&s);
708            }
709
710            // It's expected that validation will probably reject other
711            // payloads such as `UnknownSection` or those related to the
712            // component model. If, however, something gets past validation then
713            // that's a bug in Wasmtime as we forgot to implement something.
714            other => {
715                self.validator.payload(&other)?;
716                panic!("unimplemented section in wasm file {other:?}");
717            }
718        }
719        Ok(())
720    }
721
722    fn register_custom_section(&mut self, section: &CustomSectionReader<'data>) {
723        match section.as_known() {
724            KnownCustom::Name(name) => {
725                let result = self.name_section(name);
726                if let Err(e) = result {
727                    log::warn!("failed to parse name section {e:?}");
728                }
729            }
730            _ => {
731                let name = section.name().trim_end_matches(".dwo");
732                if name.starts_with(".debug_") {
733                    self.dwarf_section(name, section);
734                }
735            }
736        }
737    }
738
739    fn dwarf_section(&mut self, name: &str, section: &CustomSectionReader<'data>) {
740        if !self.tunables.generate_native_debuginfo && !self.tunables.parse_wasm_debuginfo {
741            self.result.has_unparsed_debuginfo = true;
742            return;
743        }
744        let info = &mut self.result.debuginfo;
745        let dwarf = &mut info.dwarf;
746        let endian = gimli::LittleEndian;
747        let data = section.data();
748        let slice = gimli::EndianSlice::new(data, endian);
749
750        match name {
751            // `gimli::Dwarf` fields.
752            ".debug_abbrev" => dwarf.debug_abbrev = gimli::DebugAbbrev::new(data, endian),
753            ".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice),
754            ".debug_info" => {
755                dwarf.debug_info = gimli::DebugInfo::new(data, endian);
756            }
757            ".debug_line" => dwarf.debug_line = gimli::DebugLine::new(data, endian),
758            ".debug_line_str" => dwarf.debug_line_str = gimli::DebugLineStr::from(slice),
759            ".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian),
760            ".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice),
761            ".debug_str_sup" => {
762                let mut dwarf_sup: Dwarf<'data> = Default::default();
763                dwarf_sup.debug_str = gimli::DebugStr::from(slice);
764                dwarf.sup = Some(Arc::new(dwarf_sup));
765            }
766            ".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice),
767
768            // Additional fields.
769            ".debug_loc" => info.debug_loc = gimli::DebugLoc::from(slice),
770            ".debug_loclists" => info.debug_loclists = gimli::DebugLocLists::from(slice),
771            ".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian),
772            ".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian),
773
774            // DWARF package fields
775            ".debug_cu_index" => info.debug_cu_index = gimli::DebugCuIndex::new(data, endian),
776            ".debug_tu_index" => info.debug_tu_index = gimli::DebugTuIndex::new(data, endian),
777
778            // We don't use these at the moment.
779            ".debug_aranges" | ".debug_pubnames" | ".debug_pubtypes" => return,
780            other => {
781                log::warn!("unknown debug section `{other}`");
782                return;
783            }
784        }
785
786        dwarf.ranges = gimli::RangeLists::new(info.debug_ranges, info.debug_rnglists);
787        dwarf.locations = gimli::LocationLists::new(info.debug_loc, info.debug_loclists);
788    }
789
790    /// Declares a new import with the `module` and `field` names, importing the
791    /// `ty` specified.
792    ///
793    /// Note that this method is somewhat tricky due to the implementation of
794    /// the module linking proposal. In the module linking proposal two-level
795    /// imports are recast as single-level imports of instances. That recasting
796    /// happens here by recording an import of an instance for the first time
797    /// we see a two-level import.
798    ///
799    /// When the module linking proposal is disabled, however, disregard this
800    /// logic and instead work directly with two-level imports since no
801    /// instances are defined.
802    fn declare_import(&mut self, module: &'data str, field: &'data str, ty: EntityType) {
803        let index = self.push_type(ty);
804        self.result.module.initializers.push(Initializer::Import {
805            name: module.to_owned(),
806            field: field.to_owned(),
807            index,
808        });
809    }
810
811    fn push_type(&mut self, ty: EntityType) -> EntityIndex {
812        match ty {
813            EntityType::Function(ty) => EntityIndex::Function({
814                let func_index = self
815                    .result
816                    .module
817                    .push_function(ty.unwrap_module_type_index());
818                // Imported functions can escape; in fact, they've already done
819                // so to get here.
820                self.flag_func_escaped(func_index);
821                func_index
822            }),
823            EntityType::Table(ty) => EntityIndex::Table(self.result.module.tables.push(ty)),
824            EntityType::Memory(ty) => EntityIndex::Memory(self.result.module.memories.push(ty)),
825            EntityType::Global(ty) => EntityIndex::Global(self.result.module.globals.push(ty)),
826            EntityType::Tag(ty) => EntityIndex::Tag(self.result.module.tags.push(ty)),
827        }
828    }
829
830    fn flag_func_escaped(&mut self, func: FuncIndex) {
831        let ty = &mut self.result.module.functions[func];
832        // If this was already assigned a funcref index no need to re-assign it.
833        if ty.is_escaping() {
834            return;
835        }
836        let index = self.result.module.num_escaped_funcs as u32;
837        ty.func_ref = FuncRefIndex::from_u32(index);
838        self.result.module.num_escaped_funcs += 1;
839    }
840
841    /// Parses the Name section of the wasm module.
842    fn name_section(&mut self, names: NameSectionReader<'data>) -> WasmResult<()> {
843        for subsection in names {
844            match subsection? {
845                wasmparser::Name::Function(names) => {
846                    for name in names {
847                        let Naming { index, name } = name?;
848                        // Skip this naming if it's naming a function that
849                        // doesn't actually exist.
850                        if (index as usize) >= self.result.module.functions.len() {
851                            continue;
852                        }
853
854                        // Store the name unconditionally, regardless of
855                        // whether we're parsing debuginfo, since function
856                        // names are almost always present in the
857                        // final compilation artifact.
858                        let index = FuncIndex::from_u32(index);
859                        self.result
860                            .debuginfo
861                            .name_section
862                            .func_names
863                            .insert(index, name);
864                    }
865                }
866                wasmparser::Name::Module { name, .. } => {
867                    self.result.module.name = Some(name.to_string());
868                    if self.tunables.generate_native_debuginfo {
869                        self.result.debuginfo.name_section.module_name = Some(name);
870                    }
871                }
872                wasmparser::Name::Local(reader) => {
873                    if !self.tunables.generate_native_debuginfo {
874                        continue;
875                    }
876                    for f in reader {
877                        let f = f?;
878                        // Skip this naming if it's naming a function that
879                        // doesn't actually exist.
880                        if (f.index as usize) >= self.result.module.functions.len() {
881                            continue;
882                        }
883                        for name in f.names {
884                            let Naming { index, name } = name?;
885
886                            self.result
887                                .debuginfo
888                                .name_section
889                                .locals_names
890                                .entry(FuncIndex::from_u32(f.index))
891                                .or_insert(HashMap::new())
892                                .insert(index, name);
893                        }
894                    }
895                }
896                wasmparser::Name::Label(_)
897                | wasmparser::Name::Type(_)
898                | wasmparser::Name::Table(_)
899                | wasmparser::Name::Global(_)
900                | wasmparser::Name::Memory(_)
901                | wasmparser::Name::Element(_)
902                | wasmparser::Name::Data(_)
903                | wasmparser::Name::Tag(_)
904                | wasmparser::Name::Field(_)
905                | wasmparser::Name::Unknown { .. } => {}
906            }
907        }
908        Ok(())
909    }
910}
911
912impl TypeConvert for ModuleEnvironment<'_, '_> {
913    fn lookup_heap_type(&self, index: wasmparser::UnpackedIndex) -> WasmHeapType {
914        WasmparserTypeConverter::new(&self.types, |idx| {
915            self.result.module.types[idx].unwrap_module_type_index()
916        })
917        .lookup_heap_type(index)
918    }
919
920    fn lookup_type_index(&self, index: wasmparser::UnpackedIndex) -> EngineOrModuleTypeIndex {
921        WasmparserTypeConverter::new(&self.types, |idx| {
922            self.result.module.types[idx].unwrap_module_type_index()
923        })
924        .lookup_type_index(index)
925    }
926}
927
928impl ModuleTranslation<'_> {
929    /// Attempts to convert segmented memory initialization into static
930    /// initialization for the module that this translation represents.
931    ///
932    /// If this module's memory initialization is not compatible with paged
933    /// initialization then this won't change anything. Otherwise if it is
934    /// compatible then the `memory_initialization` field will be updated.
935    ///
936    /// Takes a `page_size` argument in order to ensure that all
937    /// initialization is page-aligned for mmap-ability, and
938    /// `max_image_size_always_allowed` to control how we decide
939    /// whether to use static init.
940    ///
941    /// We will try to avoid generating very sparse images, which are
942    /// possible if e.g. a module has an initializer at offset 0 and a
943    /// very high offset (say, 1 GiB). To avoid this, we use a dual
944    /// condition: we always allow images less than
945    /// `max_image_size_always_allowed`, and the embedder of Wasmtime
946    /// can set this if desired to ensure that static init should
947    /// always be done if the size of the module or its heaps is
948    /// otherwise bounded by the system. We also allow images with
949    /// static init data bigger than that, but only if it is "dense",
950    /// defined as having at least half (50%) of its pages with some
951    /// data.
952    ///
953    /// We could do something slightly better by building a dense part
954    /// and keeping a sparse list of outlier/leftover segments (see
955    /// issue #3820). This would also allow mostly-static init of
956    /// modules that have some dynamically-placed data segments. But,
957    /// for now, this is sufficient to allow a system that "knows what
958    /// it's doing" to always get static init.
959    pub fn try_static_init(&mut self, page_size: u64, max_image_size_always_allowed: u64) {
960        // This method only attempts to transform a `Segmented` memory init
961        // into a `Static` one, no other state.
962        if !self.module.memory_initialization.is_segmented() {
963            return;
964        }
965
966        // First a dry run of memory initialization is performed. This
967        // collects information about the extent of memory initialized for each
968        // memory as well as the size of all data segments being copied in.
969        struct Memory {
970            data_size: u64,
971            min_addr: u64,
972            max_addr: u64,
973            // The `usize` here is a pointer into `self.data` which is the list
974            // of data segments corresponding to what was found in the original
975            // wasm module.
976            segments: Vec<(usize, StaticMemoryInitializer)>,
977        }
978        let mut info = PrimaryMap::with_capacity(self.module.memories.len());
979        for _ in 0..self.module.memories.len() {
980            info.push(Memory {
981                data_size: 0,
982                min_addr: u64::MAX,
983                max_addr: 0,
984                segments: Vec::new(),
985            });
986        }
987
988        struct InitMemoryAtCompileTime<'a> {
989            module: &'a Module,
990            info: &'a mut PrimaryMap<MemoryIndex, Memory>,
991            idx: usize,
992        }
993        impl InitMemory for InitMemoryAtCompileTime<'_> {
994            fn memory_size_in_bytes(
995                &mut self,
996                memory_index: MemoryIndex,
997            ) -> Result<u64, SizeOverflow> {
998                self.module.memories[memory_index].minimum_byte_size()
999            }
1000
1001            fn eval_offset(&mut self, memory_index: MemoryIndex, expr: &ConstExpr) -> Option<u64> {
1002                match (expr.ops(), self.module.memories[memory_index].idx_type) {
1003                    (&[ConstOp::I32Const(offset)], IndexType::I32) => {
1004                        Some(offset.cast_unsigned().into())
1005                    }
1006                    (&[ConstOp::I64Const(offset)], IndexType::I64) => Some(offset.cast_unsigned()),
1007                    _ => None,
1008                }
1009            }
1010
1011            fn write(&mut self, memory: MemoryIndex, init: &StaticMemoryInitializer) -> bool {
1012                // Currently `Static` only applies to locally-defined memories,
1013                // so if a data segment references an imported memory then
1014                // transitioning to a `Static` memory initializer is not
1015                // possible.
1016                if self.module.defined_memory_index(memory).is_none() {
1017                    return false;
1018                };
1019                let info = &mut self.info[memory];
1020                let data_len = u64::from(init.data.end - init.data.start);
1021                if data_len > 0 {
1022                    info.data_size += data_len;
1023                    info.min_addr = info.min_addr.min(init.offset);
1024                    info.max_addr = info.max_addr.max(init.offset + data_len);
1025                    info.segments.push((self.idx, init.clone()));
1026                }
1027                self.idx += 1;
1028                true
1029            }
1030        }
1031        let ok = self
1032            .module
1033            .memory_initialization
1034            .init_memory(&mut InitMemoryAtCompileTime {
1035                idx: 0,
1036                module: &self.module,
1037                info: &mut info,
1038            });
1039        if !ok {
1040            return;
1041        }
1042
1043        // Validate that the memory information collected is indeed valid for
1044        // static memory initialization.
1045        for (i, info) in info.iter().filter(|(_, info)| info.data_size > 0) {
1046            let image_size = info.max_addr - info.min_addr;
1047
1048            // Simplify things for now by bailing out entirely if any memory has
1049            // a page size smaller than the host's page size. This fixes a case
1050            // where currently initializers are created in host-page-size units
1051            // of length which means that a larger-than-the-entire-memory
1052            // initializer can be created. This can be handled technically but
1053            // would require some more changes to help fix the assert elsewhere
1054            // that this protects against.
1055            if self.module.memories[i].page_size() < page_size {
1056                return;
1057            }
1058
1059            // If the range of memory being initialized is less than twice the
1060            // total size of the data itself then it's assumed that static
1061            // initialization is ok. This means we'll at most double memory
1062            // consumption during the memory image creation process, which is
1063            // currently assumed to "probably be ok" but this will likely need
1064            // tweaks over time.
1065            if image_size < info.data_size.saturating_mul(2) {
1066                continue;
1067            }
1068
1069            // If the memory initialization image is larger than the size of all
1070            // data, then we still allow memory initialization if the image will
1071            // be of a relatively modest size, such as 1MB here.
1072            if image_size < max_image_size_always_allowed {
1073                continue;
1074            }
1075
1076            // At this point memory initialization is concluded to be too
1077            // expensive to do at compile time so it's entirely deferred to
1078            // happen at runtime.
1079            return;
1080        }
1081
1082        // Here's where we've now committed to changing to static memory. The
1083        // memory initialization image is built here from the page data and then
1084        // it's converted to a single initializer.
1085        let data = mem::replace(&mut self.data, Vec::new());
1086        let mut map = PrimaryMap::with_capacity(info.len());
1087        let mut module_data_size = 0u32;
1088        for (memory, info) in info.iter() {
1089            // Create the in-memory `image` which is the initialized contents of
1090            // this linear memory.
1091            let extent = if info.segments.len() > 0 {
1092                (info.max_addr - info.min_addr) as usize
1093            } else {
1094                0
1095            };
1096            let mut image = Vec::with_capacity(extent);
1097            for (idx, init) in info.segments.iter() {
1098                let data = &data[*idx];
1099                assert_eq!(data.len(), init.data.len());
1100                let offset = usize::try_from(init.offset - info.min_addr).unwrap();
1101                if image.len() < offset {
1102                    image.resize(offset, 0u8);
1103                    image.extend_from_slice(data);
1104                } else {
1105                    image.splice(
1106                        offset..(offset + data.len()).min(image.len()),
1107                        data.iter().copied(),
1108                    );
1109                }
1110            }
1111            assert_eq!(image.len(), extent);
1112            assert_eq!(image.capacity(), extent);
1113            let mut offset = if info.segments.len() > 0 {
1114                info.min_addr
1115            } else {
1116                0
1117            };
1118
1119            // Chop off trailing zeros from the image as memory is already
1120            // zero-initialized. Note that `i` is the position of a nonzero
1121            // entry here, so to not lose it we truncate to `i + 1`.
1122            if let Some(i) = image.iter().rposition(|i| *i != 0) {
1123                image.truncate(i + 1);
1124            }
1125
1126            // Also chop off leading zeros, if any.
1127            if let Some(i) = image.iter().position(|i| *i != 0) {
1128                offset += i as u64;
1129                image.drain(..i);
1130            }
1131            let mut len = u64::try_from(image.len()).unwrap();
1132
1133            // The goal is to enable mapping this image directly into memory, so
1134            // the offset into linear memory must be a multiple of the page
1135            // size. If that's not already the case then the image is padded at
1136            // the front and back with extra zeros as necessary
1137            if offset % page_size != 0 {
1138                let zero_padding = offset % page_size;
1139                self.data.push(vec![0; zero_padding as usize].into());
1140                offset -= zero_padding;
1141                len += zero_padding;
1142            }
1143            self.data.push(image.into());
1144            if len % page_size != 0 {
1145                let zero_padding = page_size - (len % page_size);
1146                self.data.push(vec![0; zero_padding as usize].into());
1147                len += zero_padding;
1148            }
1149
1150            // Offset/length should now always be page-aligned.
1151            assert!(offset % page_size == 0);
1152            assert!(len % page_size == 0);
1153
1154            // Create the `StaticMemoryInitializer` which describes this image,
1155            // only needed if the image is actually present and has a nonzero
1156            // length. The `offset` has been calculates above, originally
1157            // sourced from `info.min_addr`. The `data` field is the extent
1158            // within the final data segment we'll emit to an ELF image, which
1159            // is the concatenation of `self.data`, so here it's the size of
1160            // the section-so-far plus the current segment we're appending.
1161            let len = u32::try_from(len).unwrap();
1162            let init = if len > 0 {
1163                Some(StaticMemoryInitializer {
1164                    offset,
1165                    data: module_data_size..module_data_size + len,
1166                })
1167            } else {
1168                None
1169            };
1170            let idx = map.push(init);
1171            assert_eq!(idx, memory);
1172            module_data_size += len;
1173        }
1174        self.data_align = Some(page_size);
1175        self.module.memory_initialization = MemoryInitialization::Static { map };
1176    }
1177
1178    /// Attempts to convert the module's table initializers to
1179    /// FuncTable form where possible. This enables lazy table
1180    /// initialization later by providing a one-to-one map of initial
1181    /// table values, without having to parse all segments.
1182    pub fn try_func_table_init(&mut self) {
1183        // This should be large enough to support very large Wasm
1184        // modules with huge funcref tables, but small enough to avoid
1185        // OOMs or DoS on truly sparse tables.
1186        const MAX_FUNC_TABLE_SIZE: u64 = 1024 * 1024;
1187
1188        // First convert any element-initialized tables to images of just that
1189        // single function if the minimum size of the table allows doing so.
1190        for ((_, init), (_, table)) in self
1191            .module
1192            .table_initialization
1193            .initial_values
1194            .iter_mut()
1195            .zip(
1196                self.module
1197                    .tables
1198                    .iter()
1199                    .skip(self.module.num_imported_tables),
1200            )
1201        {
1202            let table_size = table.limits.min;
1203            if table_size > MAX_FUNC_TABLE_SIZE {
1204                continue;
1205            }
1206            if let TableInitialValue::Expr(expr) = init {
1207                if let [ConstOp::RefFunc(f)] = expr.ops() {
1208                    *init = TableInitialValue::Null {
1209                        precomputed: vec![*f; table_size as usize],
1210                    };
1211                }
1212            }
1213        }
1214
1215        let mut segments = mem::take(&mut self.module.table_initialization.segments)
1216            .into_iter()
1217            .peekable();
1218
1219        // The goal of this loop is to interpret a table segment and apply it
1220        // "statically" to a local table. This will iterate over segments and
1221        // apply them one-by-one to each table.
1222        //
1223        // If any segment can't be applied, however, then this loop exits and
1224        // all remaining segments are placed back into the segment list. This is
1225        // because segments are supposed to be initialized one-at-a-time which
1226        // means that intermediate state is visible with respect to traps. If
1227        // anything isn't statically known to not trap it's pessimistically
1228        // assumed to trap meaning all further segment initializers must be
1229        // applied manually at instantiation time.
1230        while let Some(segment) = segments.peek() {
1231            let defined_index = match self.module.defined_table_index(segment.table_index) {
1232                Some(index) => index,
1233                // Skip imported tables: we can't provide a preconstructed
1234                // table for them, because their values depend on the
1235                // imported table overlaid with whatever segments we have.
1236                None => break,
1237            };
1238
1239            // If the base of this segment is dynamic, then we can't
1240            // include it in the statically-built array of initial
1241            // contents.
1242            let offset = match segment.offset.ops() {
1243                &[ConstOp::I32Const(offset)] => u64::from(offset.cast_unsigned()),
1244                &[ConstOp::I64Const(offset)] => offset.cast_unsigned(),
1245                _ => break,
1246            };
1247
1248            // Get the end of this segment. If out-of-bounds, or too
1249            // large for our dense table representation, then skip the
1250            // segment.
1251            let top = match offset.checked_add(segment.elements.len()) {
1252                Some(top) => top,
1253                None => break,
1254            };
1255            let table_size = self.module.tables[segment.table_index].limits.min;
1256            if top > table_size || top > MAX_FUNC_TABLE_SIZE {
1257                break;
1258            }
1259
1260            match self.module.tables[segment.table_index]
1261                .ref_type
1262                .heap_type
1263                .top()
1264            {
1265                WasmHeapTopType::Func => {}
1266                // If this is not a funcref table, then we can't support a
1267                // pre-computed table of function indices. Technically this
1268                // initializer won't trap so we could continue processing
1269                // segments, but that's left as a future optimization if
1270                // necessary.
1271                WasmHeapTopType::Any
1272                | WasmHeapTopType::Extern
1273                | WasmHeapTopType::Cont
1274                | WasmHeapTopType::Exn => break,
1275            }
1276
1277            // Function indices can be optimized here, but fully general
1278            // expressions are deferred to get evaluated at runtime.
1279            let function_elements = match &segment.elements {
1280                TableSegmentElements::Functions(indices) => indices,
1281                TableSegmentElements::Expressions(_) => break,
1282            };
1283
1284            let precomputed =
1285                match &mut self.module.table_initialization.initial_values[defined_index] {
1286                    TableInitialValue::Null { precomputed } => precomputed,
1287
1288                    // If this table is still listed as an initial value here
1289                    // then that means the initial size of the table doesn't
1290                    // support a precomputed function list, so skip this.
1291                    // Technically this won't trap so it's possible to process
1292                    // further initializers, but that's left as a future
1293                    // optimization.
1294                    TableInitialValue::Expr(_) => break,
1295                };
1296
1297            // At this point we're committing to pre-initializing the table
1298            // with the `segment` that's being iterated over. This segment is
1299            // applied to the `precomputed` list for the table by ensuring
1300            // it's large enough to hold the segment and then copying the
1301            // segment into the precomputed list.
1302            if precomputed.len() < top as usize {
1303                precomputed.resize(top as usize, FuncIndex::reserved_value());
1304            }
1305            let dst = &mut precomputed[offset as usize..top as usize];
1306            dst.copy_from_slice(&function_elements);
1307
1308            // advance the iterator to see the next segment
1309            let _ = segments.next();
1310        }
1311        self.module.table_initialization.segments = segments.collect();
1312    }
1313}
wasmtime_environ/compile/module_environ.rs

wasmtime_environ/compile/
module_environ.rs