wasmparser/validator/core/
canonical.rs

1//! Canonicalization of types.
2//!
3//! The unit of canonicalization is a recursion group. Having "unnecessary"
4//! types in a recursion group can "break" canonicalization of other types
5//! within that same recursion group, as can reordering types within a recursion
6//! group.
7//!
8//! It is an invariant that all types defined before the recursion group we are
9//! currently canonicalizing have already been canonicalized themselves.
10//!
11//! Canonicalizing a recursion group then proceeds as follows:
12//!
13//! * First we walk each of its `SubType` elements and put their type references
14//!   (i.e. their `PackedIndex`es) into canonical form. Canonicalizing a
15//!   `PackedIndex` means switching it from indexing into the Wasm module's
16//!   types space into either
17//!
18//!   1. Referencing an already-canonicalized type, for types outside of this
19//!      recursion group. Because inter-group type references can only go
20//!      towards types defined before this recursion group, we know the type is
21//!      already canonicalized and we have a `CoreTypeId` for each of those
22//!      types. This updates the `PackedIndex` into a `CoreTypeId`.
23//!
24//!   2. Indexing into the current recursion group, for intra-group type
25//!      references.
26//!
27//!   Note that (2) has the effect of making the "same" structure of mutual type
28//!   recursion look identical across recursion groups:
29//!
30//!   ```wat
31//!   ;; Before
32//!   (rec (struct (field (module-type 1))) (struct (field (module-type 0))))
33//!   (rec (struct (field (module-type 3))) (struct (field (module-type 2))))
34//!
35//!   ;; After
36//!   (rec (struct (field (rec-group-type 1))) (struct (field (rec-group-type 0))))
37//!   (rec (struct (field (rec-group-type 1))) (struct (field (rec-group-type 0))))
38//!   ```
39//!
40//! * Now that the recursion group's elements are in canonical form, we can
41//!   "simply" hash cons whole rec groups at a time. The `TypesList` morally
42//!   maintains a hash map from `Vec<SubType>` to `RecGroupId` and we can do
43//!   get-or-create operations on it. I say "morally" because we don't actually
44//!   duplicate the `Vec<SubType>` key in that hash map since those elements are
45//!   already stored in the `TypeList`'s internal `SnapshotList<CoreType>`. This
46//!   means we need to do some low-level hash table fiddling with the
47//!   `hashbrown` crate.
48//!
49//! And that's it! That is the whole canonicalization algorithm.
50//!
51//! Some more random things to note:
52//!
53//! * Because we essentially already have to do the check to canonicalize, and
54//!   to avoid additional passes over the types, the canonicalization pass also
55//!   checks that type references are in bounds. These are the only errors that
56//!   can be returned from canonicalization.
57//!
58//! * Canonicalizing requires the `Module` to translate type indices to
59//!   actual `CoreTypeId`s.
60//!
61//! * It is important that *after* we have canonicalized all types, we don't
62//!   need the `Module` anymore. This makes sure that we can, for example,
63//!   intern all types from the same store into the same `TypeList`. Which in
64//!   turn lets us type check function imports of a same-store instance's
65//!   exported functions and we don't need to translate from one module's
66//!   canonical representation to another module's canonical representation or
67//!   perform additional expensive checks to see if the types match or not
68//!   (since the whole point of canonicalization is to avoid that!).
69
70use super::{RecGroupId, TypeAlloc, TypeList};
71use crate::{
72    BinaryReaderError, CompositeInnerType, CompositeType, PackedIndex, RecGroup, Result,
73    StorageType, UnpackedIndex, ValType, WasmFeatures,
74    types::{CoreTypeId, TypeIdentifier},
75};
76
77pub(crate) trait InternRecGroup {
78    fn add_type_id(&mut self, id: CoreTypeId);
79    fn type_id_at(&self, idx: u32, offset: usize) -> Result<CoreTypeId>;
80    fn types_len(&self) -> u32;
81    fn features(&self) -> &WasmFeatures;
82
83    /// Canonicalize the rec group and return its id and whether it is a new group
84    /// (we added its types to the `TypeAlloc`) or not (we deduplicated it with an
85    /// existing canonical rec group).
86    fn canonicalize_and_intern_rec_group(
87        &mut self,
88        types: &mut TypeAlloc,
89        mut rec_group: RecGroup,
90        offset: usize,
91    ) -> Result<()>
92    where
93        Self: Sized,
94    {
95        debug_assert!(rec_group.is_explicit_rec_group() || rec_group.types().len() == 1);
96        if rec_group.is_explicit_rec_group() && !self.features().gc() {
97            bail!(
98                offset,
99                "rec group usage requires `gc` proposal to be enabled"
100            );
101        }
102        if self.features().needs_type_canonicalization() {
103            TypeCanonicalizer::new(self, offset).canonicalize_rec_group(&mut rec_group)?;
104        }
105        let (is_new, rec_group_id) = types
106            .intern_canonical_rec_group(self.features().needs_type_canonicalization(), rec_group);
107        let range = &types[rec_group_id];
108        let start = range.start.index();
109        let end = range.end.index();
110
111        for i in start..end {
112            let i = u32::try_from(i).unwrap();
113            let id = CoreTypeId::from_index(i);
114            debug_assert!(types.get(id).is_some());
115            self.add_type_id(id);
116            if is_new {
117                self.check_subtype(rec_group_id, id, types, offset)?;
118            }
119        }
120
121        Ok(())
122    }
123
124    fn check_subtype(
125        &mut self,
126        rec_group: RecGroupId,
127        id: CoreTypeId,
128        types: &mut TypeAlloc,
129        offset: usize,
130    ) -> Result<()> {
131        let ty = &types[id];
132        if !self.features().gc() && (!ty.is_final || ty.supertype_idx.is_some()) {
133            bail!(offset, "gc proposal must be enabled to use subtypes");
134        }
135
136        self.check_composite_type(&ty.composite_type, &types, offset)?;
137
138        let depth = if let Some(supertype_index) = ty.supertype_idx {
139            debug_assert!(supertype_index.is_canonical());
140            let sup_id = self.at_packed_index(types, rec_group, supertype_index, offset)?;
141            if types[sup_id].is_final {
142                bail!(offset, "sub type cannot have a final super type");
143            }
144            if !types.matches(id, sup_id) {
145                bail!(offset, "sub type must match super type");
146            }
147            let depth = types.get_subtyping_depth(sup_id) + 1;
148            if usize::from(depth) > crate::limits::MAX_WASM_SUBTYPING_DEPTH {
149                bail!(
150                    offset,
151                    "sub type hierarchy too deep: found depth {}, cannot exceed depth {}",
152                    depth,
153                    crate::limits::MAX_WASM_SUBTYPING_DEPTH,
154                );
155            }
156            depth
157        } else {
158            0
159        };
160        types.set_subtyping_depth(id, depth);
161
162        Ok(())
163    }
164
165    fn check_composite_type(
166        &mut self,
167        ty: &CompositeType,
168        types: &TypeList,
169        offset: usize,
170    ) -> Result<()> {
171        let features = self.features();
172        let check = |ty: &ValType, shared: bool| {
173            features
174                .check_value_type(*ty)
175                .map_err(|e| BinaryReaderError::new(e, offset))?;
176            if shared && !types.valtype_is_shared(*ty) {
177                return Err(BinaryReaderError::new(
178                    "shared composite type must contain shared types",
179                    offset,
180                ));
181                // The other cases are fine:
182                // - both shared or unshared: good to go
183                // - the func type is unshared, `ty` is shared: though
184                //   odd, we _can_ in fact use shared values in
185                //   unshared composite types (e.g., functions).
186            }
187            Ok(())
188        };
189        if !features.shared_everything_threads() && ty.shared {
190            return Err(BinaryReaderError::new(
191                "shared composite types require the shared-everything-threads proposal",
192                offset,
193            ));
194        }
195        match &ty.inner {
196            CompositeInnerType::Func(t) => {
197                for vt in t.params().iter().chain(t.results()) {
198                    check(vt, ty.shared)?;
199                }
200                if t.results().len() > 1 && !features.multi_value() {
201                    return Err(BinaryReaderError::new(
202                        "func type returns multiple values but the multi-value feature is not enabled",
203                        offset,
204                    ));
205                }
206            }
207            CompositeInnerType::Array(t) => {
208                if !features.gc() {
209                    bail!(
210                        offset,
211                        "array indexed types not supported without the gc feature",
212                    );
213                }
214                if !features.gc_types() {
215                    bail!(
216                        offset,
217                        "cannot define array types when gc types are disabled",
218                    );
219                }
220                match &t.0.element_type {
221                    StorageType::I8 | StorageType::I16 => {
222                        // Note: scalar types are always `shared`.
223                    }
224                    StorageType::Val(value_type) => check(value_type, ty.shared)?,
225                };
226            }
227            CompositeInnerType::Struct(t) => {
228                if !features.gc() {
229                    bail!(
230                        offset,
231                        "struct indexed types not supported without the gc feature",
232                    );
233                }
234                if !features.gc_types() {
235                    bail!(
236                        offset,
237                        "cannot define struct types when gc types are disabled",
238                    );
239                }
240                for ft in t.fields.iter() {
241                    match &ft.element_type {
242                        StorageType::I8 | StorageType::I16 => {
243                            // Note: scalar types are always `shared`.
244                        }
245                        StorageType::Val(value_type) => check(value_type, ty.shared)?,
246                    }
247                }
248            }
249            CompositeInnerType::Cont(t) => {
250                if !features.stack_switching() {
251                    bail!(
252                        offset,
253                        "cannot define continuation types when stack switching is disabled",
254                    );
255                }
256                if !features.gc_types() {
257                    bail!(
258                        offset,
259                        "cannot define continuation types when gc types are disabled",
260                    );
261                }
262                // Check that the type index points to a valid function type.
263                let id = t.0.as_core_type_id().unwrap();
264                match types[id].composite_type.inner {
265                    CompositeInnerType::Func(_) => (),
266                    _ => bail!(offset, "non-function type {}", id.index()),
267                }
268            }
269        }
270        Ok(())
271    }
272
273    fn at_packed_index(
274        &self,
275        types: &TypeList,
276        rec_group: RecGroupId,
277        index: PackedIndex,
278        offset: usize,
279    ) -> Result<CoreTypeId> {
280        match index.unpack() {
281            UnpackedIndex::Id(id) => Ok(id),
282            UnpackedIndex::Module(idx) => self.type_id_at(idx, offset),
283            UnpackedIndex::RecGroup(idx) => types.rec_group_local_id(rec_group, idx, offset),
284        }
285    }
286}
287
288/// The kind of canonicalization we are doing.
289#[derive(Clone, Copy, Debug, PartialEq, Eq)]
290enum CanonicalizationMode {
291    /// Standard canonicalization: turns module indices into either (1)
292    /// `CoreTypeId`s for inter-group references or (2) rec-group-local indices
293    /// for intra-group references.
294    HashConsing,
295
296    /// Turns all type reference indices into `CoreTypeId`s, even from within
297    /// the same rec group. Not useful for hash consing, but useful when
298    /// exposing types to end users so they don't have to deal with
299    /// rec-group-local indices.
300    OnlyIds,
301}
302
303pub(crate) struct TypeCanonicalizer<'a> {
304    module: &'a dyn InternRecGroup,
305    rec_group_start: u32,
306    rec_group_len: u32,
307    offset: usize,
308    mode: CanonicalizationMode,
309    within_rec_group: Option<core::ops::Range<CoreTypeId>>,
310}
311
312impl<'a> TypeCanonicalizer<'a> {
313    pub fn new(module: &'a dyn InternRecGroup, offset: usize) -> Self {
314        // These defaults will work for when we are canonicalizing types from
315        // outside of a rec group definition, forcing all `PackedIndex`es to be
316        // canonicalized to `CoreTypeId`s.
317        let rec_group_start = u32::MAX;
318        let rec_group_len = 0;
319
320        Self {
321            module,
322            rec_group_start,
323            rec_group_len,
324            offset,
325            mode: CanonicalizationMode::HashConsing,
326            within_rec_group: None,
327        }
328    }
329
330    fn allow_gc(&self) -> bool {
331        self.module.features().gc()
332    }
333
334    fn canonicalize_rec_group(&mut self, rec_group: &mut RecGroup) -> Result<()> {
335        // Re-initialize these fields so that we properly canonicalize
336        // intra-rec-group type references into indices into the rec group
337        // rather than as `CoreTypeId`s.
338        self.rec_group_start = self.module.types_len();
339        self.rec_group_len = u32::try_from(rec_group.types().len()).unwrap();
340
341        for (rec_group_local_index, ty) in rec_group.types_mut().enumerate() {
342            let rec_group_local_index = u32::try_from(rec_group_local_index).unwrap();
343            let type_index = self.rec_group_start + rec_group_local_index;
344
345            if let Some(sup) = ty.supertype_idx.as_mut() {
346                if sup.as_module_index().map_or(false, |i| i >= type_index) {
347                    bail!(self.offset, "supertypes must be defined before subtypes");
348                }
349            }
350
351            ty.remap_indices(&mut |idx| self.canonicalize_type_index(idx))?;
352        }
353
354        Ok(())
355    }
356
357    fn canonicalize_type_index(&self, ty: &mut PackedIndex) -> Result<()> {
358        match ty.unpack() {
359            UnpackedIndex::Id(_) => Ok(()),
360            UnpackedIndex::Module(index) => {
361                if index < self.rec_group_start || self.mode == CanonicalizationMode::OnlyIds {
362                    let id = self.module.type_id_at(index, self.offset)?;
363                    if let Some(id) = PackedIndex::from_id(id) {
364                        *ty = id;
365                        return Ok(());
366                    } else {
367                        bail!(
368                            self.offset,
369                            "implementation limit: too many types in `TypeList`"
370                        )
371                    }
372                }
373
374                // When GC is not enabled the `rec_group_len == 1` so any rec group
375                // local type references will be direct self references. But any kind of
376                // type recursion, including self references, is not allowed in the
377                // typed function references proposal, only the GC proposal.
378                debug_assert!(self.allow_gc() || self.rec_group_len == 1);
379                let local = index - self.rec_group_start;
380                if local < self.rec_group_len {
381                    if self.allow_gc() {
382                        if let Some(id) = PackedIndex::from_rec_group_index(local) {
383                            *ty = id;
384                            return Ok(());
385                        } else {
386                            bail!(
387                                self.offset,
388                                "implementation limit: too many types in a recursion group"
389                            )
390                        }
391                    } else {
392                        bail!(
393                            self.offset,
394                            "unknown type {index}: type index out of bounds because the GC proposal is disabled"
395                        )
396                    }
397                }
398
399                bail!(
400                    self.offset,
401                    "unknown type {index}: type index out of bounds"
402                )
403            }
404            UnpackedIndex::RecGroup(local_index) => match self.mode {
405                CanonicalizationMode::HashConsing => Ok(()),
406                CanonicalizationMode::OnlyIds => {
407                    let rec_group_elems = self.within_rec_group.as_ref().expect(
408                        "configured to canonicalize all type reference indices to `CoreTypeId`s \
409                         and found rec-group-local index, but missing `within_rec_group` context",
410                    );
411
412                    let rec_group_len = rec_group_elems.end.index() - rec_group_elems.start.index();
413                    let rec_group_len = u32::try_from(rec_group_len).unwrap();
414                    assert!(local_index < rec_group_len);
415
416                    let rec_group_start = u32::try_from(rec_group_elems.start.index()).unwrap();
417
418                    let id = CoreTypeId::from_index(rec_group_start + local_index);
419                    *ty = PackedIndex::from_id(id).expect(
420                        "should fit in impl limits since we already have the end of the rec group \
421                         constructed successfully",
422                    );
423                    Ok(())
424                }
425            },
426        }
427    }
428}