wasmtime_internal_unwinder/
exception_table.rs

1//! Compact representation of exception handlers associated with
2//! callsites, for use when searching a Cranelift stack for a handler.
3//!
4//! This module implements (i) conversion from the metadata provided
5//! alongside Cranelift's compilation result (as provided by
6//! [`cranelift_codegen::MachBufferFinalized::call_sites`]) to its
7//! format, and (ii) use of its format to find a handler efficiently.
8//!
9//! The format has been designed so that it can be mapped in from disk
10//! and used without post-processing; this enables efficient
11//! module-loading in runtimes such as Wasmtime.
12
13use object::{Bytes, LittleEndian, U32Bytes};
14
15#[cfg(feature = "cranelift")]
16use alloc::vec;
17use alloc::vec::Vec;
18#[cfg(feature = "cranelift")]
19use cranelift_codegen::{
20    ExceptionContextLoc, FinalizedMachCallSite, FinalizedMachExceptionHandler, binemit::CodeOffset,
21};
22
23/// Collector struct for exception handlers per call site.
24///
25/// # Format
26///
27/// We keep six different arrays (`Vec`s) that we build as we visit
28/// callsites, in ascending offset (address relative to beginning of
29/// code segment) order: callsite offsets, frame offsets,
30/// tag/destination ranges, tags, tag context SP offset, destination
31/// offsets.
32///
33/// The callsite offsets, frame offsets, and tag/destination ranges
34/// logically form a sorted lookup array, allowing us to find
35/// information for any single callsite. The frame offset specifies
36/// distance down to the SP value at the callsite (in bytes), relative
37/// to the FP of that frame. The range denotes a range of indices in
38/// the tag/context and destination offset arrays. Ranges are stored
39/// with the (exclusive) *end* index only; the start index is implicit
40/// as the previous end, or zero if first element.
41///
42/// The slices of tag, context, and handlers arrays named by `ranges`
43/// for each callsite specify a series of handler items for that
44/// callsite. The tag and context together allow a
45/// dynamic-tag-instance match in the unwinder: the context specifies
46/// an offset from SP at the callsite that contains a machine word
47/// (e.g. with vmctx) that, together with the static tag index, can be
48/// used to perform a dynamic match. A context of `-1` indicates no
49/// dynamic context, and a tag of `-1` indicates a catch-all
50/// handler. If a handler item matches, control should be transferred
51/// to the code offset given in the last array, `handlers`.
52///
53/// # Example
54///
55/// An example of this data format:
56///
57/// ```plain
58/// callsites: [0x10, 0x50, 0xf0] // callsites (return addrs) at offsets 0x10, 0x50, 0xf0
59/// ranges: [2, 4, 5]             // corresponding ranges for each callsite
60/// frame_offsets: [0, 0x10, 0]   // corresponding SP-to-FP offsets for each callsite
61/// tags: [1, 5, 1, -1, -1]       // tags for each handler at each callsite
62/// contexts: [-1, -1, 0x10, 0x20, 0x30] // SP-offset for context for each tag
63/// handlers: [0x40, 0x42, 0x6f, 0x71, 0xf5] // handler destinations at each callsite
64/// ```
65///
66/// Expanding this out:
67///
68/// ```plain
69/// callsites: [0x10, 0x50, 0xf0],  # PCs relative to some start of return-points.
70/// frame_offsets: [0, 0x10, 0],    # SP-to-FP offsets at each callsite.
71/// ranges: [
72///     2,  # callsite 0x10 has tags/handlers indices 0..2
73///     4,  # callsite 0x50 has tags/handlers indices 2..4
74///     5,  # callsite 0xf0 has tags/handlers indices 4..5
75/// ],
76/// tags: [
77///     # tags for callsite 0x10:
78///     1,
79///     5,
80///     # tags for callsite 0x50:
81///     1,
82///     -1,  # "catch-all"
83///     # tags for callsite 0xf0:
84///     -1,  # "catch-all"
85/// ]
86/// contexts: [
87///     # SP-offsets for context for each tag at callsite 0x10:
88///     -1,
89///     -1,
90///     # for callsite 0x50:
91///     0x10,
92///     0x20,
93///     # for callsite 0xf0:
94///     0x30,
95/// ]
96/// handlers: [
97///     # handlers for callsite 0x10:
98///     0x40,  # relative PC to handle tag 1 (above)
99///     0x42,  # relative PC to handle tag 5
100///     # handlers for callsite 0x50:
101///     0x6f,  # relative PC to handle tag 1
102///     0x71,  # relative PC to handle all other tags
103///     # handlers for callsite 0xf0:
104///     0xf5,  # relative PC to handle all other tags
105/// ]
106/// ```
107#[cfg(feature = "cranelift")]
108#[derive(Clone, Debug, Default)]
109pub struct ExceptionTableBuilder {
110    pub callsites: Vec<U32Bytes<LittleEndian>>,
111    pub frame_offsets: Vec<U32Bytes<LittleEndian>>,
112    pub ranges: Vec<U32Bytes<LittleEndian>>,
113    pub tags: Vec<U32Bytes<LittleEndian>>,
114    pub contexts: Vec<U32Bytes<LittleEndian>>,
115    pub handlers: Vec<U32Bytes<LittleEndian>>,
116    last_start_offset: CodeOffset,
117}
118
119#[cfg(feature = "cranelift")]
120impl ExceptionTableBuilder {
121    /// Add a function at a given offset from the start of the
122    /// compiled code section, recording information about its call
123    /// sites.
124    ///
125    /// Functions must be added in ascending offset order.
126    pub fn add_func<'a>(
127        &mut self,
128        start_offset: CodeOffset,
129        call_sites: impl Iterator<Item = FinalizedMachCallSite<'a>>,
130    ) -> anyhow::Result<()> {
131        // Ensure that we see functions in offset order.
132        assert!(start_offset >= self.last_start_offset);
133        self.last_start_offset = start_offset;
134
135        // Visit each callsite in turn, translating offsets from
136        // function-local to section-local.
137        let mut handlers = vec![];
138        for call_site in call_sites {
139            let ret_addr = call_site.ret_addr.checked_add(start_offset).unwrap();
140            handlers.extend(call_site.exception_handlers.iter().cloned());
141
142            let start_idx = u32::try_from(self.tags.len()).unwrap();
143            let mut context = u32::MAX;
144            for handler in call_site.exception_handlers {
145                match handler {
146                    FinalizedMachExceptionHandler::Tag(tag, offset) => {
147                        self.tags.push(U32Bytes::new(LittleEndian, tag.as_u32()));
148                        self.contexts.push(U32Bytes::new(LittleEndian, context));
149                        self.handlers.push(U32Bytes::new(
150                            LittleEndian,
151                            offset.checked_add(start_offset).unwrap(),
152                        ));
153                    }
154                    FinalizedMachExceptionHandler::Default(offset) => {
155                        self.tags.push(U32Bytes::new(LittleEndian, u32::MAX));
156                        self.contexts.push(U32Bytes::new(LittleEndian, context));
157                        self.handlers.push(U32Bytes::new(
158                            LittleEndian,
159                            offset.checked_add(start_offset).unwrap(),
160                        ));
161                    }
162                    FinalizedMachExceptionHandler::Context(ExceptionContextLoc::SPOffset(
163                        offset,
164                    )) => {
165                        context = *offset;
166                    }
167                    FinalizedMachExceptionHandler::Context(ExceptionContextLoc::GPR(_)) => {
168                        panic!(
169                            "Wasmtime exception unwind info only supports dynamic contexts on the stack"
170                        );
171                    }
172                }
173            }
174            let end_idx = u32::try_from(self.tags.len()).unwrap();
175
176            // Omit empty callsites for compactness.
177            if end_idx > start_idx {
178                self.ranges.push(U32Bytes::new(LittleEndian, end_idx));
179                self.frame_offsets.push(U32Bytes::new(
180                    LittleEndian,
181                    call_site.frame_offset.unwrap_or(u32::MAX),
182                ));
183                self.callsites.push(U32Bytes::new(LittleEndian, ret_addr));
184            }
185        }
186
187        Ok(())
188    }
189
190    /// Serialize the exception-handler data section, taking a closure
191    /// to consume slices.
192    pub fn serialize<F: FnMut(&[u8])>(&self, mut f: F) {
193        // Serialize the length of `callsites` / `ranges`.
194        let callsite_count = u32::try_from(self.callsites.len()).unwrap();
195        f(&callsite_count.to_le_bytes());
196        // Serialize the length of `tags` / `handlers`.
197        let handler_count = u32::try_from(self.handlers.len()).unwrap();
198        f(&handler_count.to_le_bytes());
199
200        // Serialize `callsites`, `ranges`, `tags`, and `handlers` in
201        // that order.
202        f(object::bytes_of_slice(&self.callsites));
203        f(object::bytes_of_slice(&self.frame_offsets));
204        f(object::bytes_of_slice(&self.ranges));
205        f(object::bytes_of_slice(&self.tags));
206        f(object::bytes_of_slice(&self.contexts));
207        f(object::bytes_of_slice(&self.handlers));
208    }
209
210    /// Serialize the exception-handler data section to a vector of
211    /// bytes.
212    pub fn to_vec(&self) -> Vec<u8> {
213        let mut bytes = vec![];
214        self.serialize(|slice| bytes.extend(slice.iter().cloned()));
215        bytes
216    }
217}
218
219/// ExceptionTable deserialized from a serialized slice.
220///
221/// This struct retains borrows of the various serialized parts of the
222/// exception table data as produced by
223/// [`ExceptionTableBuilder::serialize`].
224#[derive(Clone, Debug)]
225pub struct ExceptionTable<'a> {
226    callsites: &'a [U32Bytes<LittleEndian>],
227    ranges: &'a [U32Bytes<LittleEndian>],
228    frame_offsets: &'a [U32Bytes<LittleEndian>],
229    tags: &'a [U32Bytes<LittleEndian>],
230    contexts: &'a [U32Bytes<LittleEndian>],
231    handlers: &'a [U32Bytes<LittleEndian>],
232}
233
234/// Wasmtime exception table item, after parsing.
235///
236/// Note that this is separately defined from the equivalent type in
237/// Cranelift, `cranelift_codegen::FinalizedMachExceptionHandler`,
238/// because we need this in runtime-only builds when Cranelift is not
239/// included.
240#[derive(Clone, Debug, PartialEq, Eq)]
241pub struct ExceptionHandler {
242    /// A tag (arbitrary `u32` identifier from CLIF) or `None` for catch-all.
243    pub tag: Option<u32>,
244    /// Dynamic context, if provided, with which to interpret the
245    /// tag. Context is available at the given offset from SP in this
246    /// frame.
247    pub context_sp_offset: Option<u32>,
248    /// Handler code offset.
249    pub handler_offset: u32,
250}
251
252impl<'a> ExceptionTable<'a> {
253    /// Parse exception tables from a byte-slice as produced by
254    /// [`ExceptionTableBuilder::serialize`].
255    pub fn parse(data: &'a [u8]) -> anyhow::Result<ExceptionTable<'a>> {
256        let mut data = Bytes(data);
257        let callsite_count = data
258            .read::<U32Bytes<LittleEndian>>()
259            .map_err(|_| anyhow::anyhow!("Unable to read callsite count prefix"))?;
260        let callsite_count = usize::try_from(callsite_count.get(LittleEndian))?;
261        let handler_count = data
262            .read::<U32Bytes<LittleEndian>>()
263            .map_err(|_| anyhow::anyhow!("Unable to read handler count prefix"))?;
264        let handler_count = usize::try_from(handler_count.get(LittleEndian))?;
265        let (callsites, data) =
266            object::slice_from_bytes::<U32Bytes<LittleEndian>>(data.0, callsite_count)
267                .map_err(|_| anyhow::anyhow!("Unable to read callsites slice"))?;
268        let (frame_offsets, data) =
269            object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, callsite_count)
270                .map_err(|_| anyhow::anyhow!("Unable to read frame_offsets slice"))?;
271        let (ranges, data) =
272            object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, callsite_count)
273                .map_err(|_| anyhow::anyhow!("Unable to read ranges slice"))?;
274        let (tags, data) = object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, handler_count)
275            .map_err(|_| anyhow::anyhow!("Unable to read tags slice"))?;
276        let (contexts, data) =
277            object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, handler_count)
278                .map_err(|_| anyhow::anyhow!("Unable to read contexts slice"))?;
279        let (handlers, data) =
280            object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, handler_count)
281                .map_err(|_| anyhow::anyhow!("Unable to read handlers slice"))?;
282
283        if !data.is_empty() {
284            anyhow::bail!("Unexpected data at end of serialized exception table");
285        }
286
287        Ok(ExceptionTable {
288            callsites,
289            frame_offsets,
290            ranges,
291            tags,
292            contexts,
293            handlers,
294        })
295    }
296
297    /// Look up the set of handlers, if any, for a given return
298    /// address (as an offset into the code section).
299    ///
300    /// The handler for `None` (the catch-all/default handler), if
301    /// any, will always come last.
302    ///
303    /// Note: we use raw `u32` types for code offsets here to avoid
304    /// dependencies on `cranelift-codegen` when this crate is built
305    /// without compiler backend support (runtime-only config).
306    ///
307    /// Returns a tuple of `(frame offset, handler iterator)`. The
308    /// frame offset, if `Some`, specifies the distance from SP to FP
309    /// at this callsite.
310    pub fn lookup_pc(&self, pc: u32) -> (Option<u32>, impl Iterator<Item = ExceptionHandler> + '_) {
311        let callsite_idx = self
312            .callsites
313            .binary_search_by_key(&pc, |callsite| callsite.get(LittleEndian))
314            .ok();
315        let frame_offset = callsite_idx
316            .map(|idx| self.frame_offsets[idx])
317            .and_then(|offset| option_from_u32(offset.get(LittleEndian)));
318
319        (
320            frame_offset,
321            callsite_idx
322                .into_iter()
323                .flat_map(|callsite_idx| self.handlers_for_callsite(callsite_idx)),
324        )
325    }
326
327    /// Look up the frame offset and handler destination if any, for a
328    /// given return address (as an offset into the code section) and
329    /// exception tag.
330    ///
331    /// Note: we use raw `u32` types for code offsets and tags here to
332    /// avoid dependencies on `cranelift-codegen` when this crate is
333    /// built without compiler backend support (runtime-only config).
334    pub fn lookup_pc_tag(&self, pc: u32, tag: u32) -> Option<(u32, u32)> {
335        // First, look up the callsite in the sorted callsites list.
336        let callsite_idx = self
337            .callsites
338            .binary_search_by_key(&pc, |callsite| callsite.get(LittleEndian))
339            .ok()?;
340        let frame_offset =
341            option_from_u32(self.frame_offsets[callsite_idx].get(LittleEndian)).unwrap_or(0);
342
343        let (tags, _, handlers) = self.tags_contexts_handlers_for_callsite(callsite_idx);
344
345        // Is there any handler with an exact tag match?
346        if let Ok(handler_idx) = tags.binary_search_by_key(&tag, |tag| tag.get(LittleEndian)) {
347            return Some((frame_offset, handlers[handler_idx].get(LittleEndian)));
348        }
349
350        // If not, is there a fallback handler? Note that we serialize
351        // it with the tag `u32::MAX`, so it is always last in sorted
352        // order.
353        if tags.last().map(|v| v.get(LittleEndian)) == Some(u32::MAX) {
354            return Some((frame_offset, handlers.last().unwrap().get(LittleEndian)));
355        }
356
357        None
358    }
359
360    fn tags_contexts_handlers_for_callsite(
361        &self,
362        idx: usize,
363    ) -> (
364        &[U32Bytes<LittleEndian>],
365        &[U32Bytes<LittleEndian>],
366        &[U32Bytes<LittleEndian>],
367    ) {
368        let end_idx = self.ranges[idx].get(LittleEndian);
369        let start_idx = if idx > 0 {
370            self.ranges[idx - 1].get(LittleEndian)
371        } else {
372            0
373        };
374
375        // Take the subslices of `tags`, `contexts`, and `handlers`
376        // corresponding to this callsite.
377        let start_idx = usize::try_from(start_idx).unwrap();
378        let end_idx = usize::try_from(end_idx).unwrap();
379        let tags = &self.tags[start_idx..end_idx];
380        let contexts = &self.contexts[start_idx..end_idx];
381        let handlers = &self.handlers[start_idx..end_idx];
382        (tags, contexts, handlers)
383    }
384
385    fn handlers_for_callsite(&self, idx: usize) -> impl Iterator<Item = ExceptionHandler> {
386        let (tags, contexts, handlers) = self.tags_contexts_handlers_for_callsite(idx);
387        tags.iter()
388            .zip(contexts.iter())
389            .zip(handlers.iter())
390            .map(|((tag, context), handler)| {
391                let tag = option_from_u32(tag.get(LittleEndian));
392                let context = option_from_u32(context.get(LittleEndian));
393                let handler = handler.get(LittleEndian);
394                ExceptionHandler {
395                    tag,
396                    context_sp_offset: context,
397                    handler_offset: handler,
398                }
399            })
400    }
401
402    /// Provide an iterator over callsites, and for each callsite, the
403    /// frame offset and arrays of handlers.
404    pub fn into_iter(self) -> impl Iterator<Item = (u32, Option<u32>, Vec<ExceptionHandler>)> + 'a {
405        self.callsites
406            .iter()
407            .map(|pc| pc.get(LittleEndian))
408            .enumerate()
409            .map(move |(i, pc)| {
410                (
411                    pc,
412                    option_from_u32(self.frame_offsets[i].get(LittleEndian)),
413                    self.handlers_for_callsite(i).collect(),
414                )
415            })
416    }
417}
418
419fn option_from_u32(value: u32) -> Option<u32> {
420    if value == u32::MAX { None } else { Some(value) }
421}
422
423#[cfg(all(test, feature = "cranelift"))]
424mod test {
425    use super::*;
426    use cranelift_codegen::entity::EntityRef;
427    use cranelift_codegen::ir::ExceptionTag;
428
429    #[test]
430    fn serialize_exception_table() {
431        let callsites = [
432            FinalizedMachCallSite {
433                ret_addr: 0x10,
434                frame_offset: None,
435                exception_handlers: &[
436                    FinalizedMachExceptionHandler::Tag(ExceptionTag::new(1), 0x20),
437                    FinalizedMachExceptionHandler::Tag(ExceptionTag::new(2), 0x30),
438                    FinalizedMachExceptionHandler::Default(0x40),
439                ],
440            },
441            FinalizedMachCallSite {
442                ret_addr: 0x48,
443                frame_offset: None,
444                exception_handlers: &[],
445            },
446            FinalizedMachCallSite {
447                ret_addr: 0x50,
448                frame_offset: Some(0x20),
449                exception_handlers: &[FinalizedMachExceptionHandler::Default(0x60)],
450            },
451        ];
452
453        let mut builder = ExceptionTableBuilder::default();
454        builder.add_func(0x100, callsites.into_iter()).unwrap();
455        let mut bytes = vec![];
456        builder.serialize(|slice| bytes.extend(slice.iter().cloned()));
457
458        let deserialized = ExceptionTable::parse(&bytes).unwrap();
459
460        let (frame_offset, iter) = deserialized.lookup_pc(0x148);
461        assert_eq!(frame_offset, None);
462        assert_eq!(iter.collect::<Vec<ExceptionHandler>>(), vec![]);
463
464        let (frame_offset, iter) = deserialized.lookup_pc(0x110);
465        assert_eq!(frame_offset, None);
466        assert_eq!(
467            iter.collect::<Vec<ExceptionHandler>>(),
468            vec![
469                ExceptionHandler {
470                    tag: Some(1),
471                    context_sp_offset: None,
472                    handler_offset: 0x120
473                },
474                ExceptionHandler {
475                    tag: Some(2),
476                    context_sp_offset: None,
477                    handler_offset: 0x130
478                },
479                ExceptionHandler {
480                    tag: None,
481                    context_sp_offset: None,
482                    handler_offset: 0x140
483                },
484            ]
485        );
486
487        let (frame_offset, iter) = deserialized.lookup_pc(0x150);
488        assert_eq!(frame_offset, Some(0x20));
489        assert_eq!(
490            iter.collect::<Vec<ExceptionHandler>>(),
491            vec![ExceptionHandler {
492                tag: None,
493                context_sp_offset: None,
494                handler_offset: 0x160
495            }]
496        );
497    }
498}