cranelift_codegen/machinst/
blockorder.rs

1//! Computation of basic block order in emitted code.
2//!
3//! This module handles the translation from CLIF BBs to VCode BBs.
4//!
5//! The basic idea is that we compute a sequence of "lowered blocks" that
6//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
7//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
8//! moves for phi-nodes on every block-to-block transfer; these blocks always
9//! conceptually exist, but may be merged with an "original" CLIF block (and
10//! hence not actually exist; this is equivalent to inserting the blocks only on
11//! critical edges).
12//!
13//! In other words, starting from a CFG like this (where each "CLIF block" and
14//! "(edge N->M)" is a separate basic block):
15//!
16//! ```plain
17//!
18//!              CLIF block 0
19//!               /           \
20//!       (edge 0->1)         (edge 0->2)
21//!              |                |
22//!       CLIF block 1         CLIF block 2
23//!              \                /
24//!           (edge 1->3)   (edge 2->3)
25//!                   \      /
26//!                 CLIF block 3
27//! ```
28//!
29//! We can produce a CFG of lowered blocks like so:
30//!
31//! ```plain
32//!            +--------------+
33//!            | CLIF block 0 |
34//!            +--------------+
35//!               /           \
36//!     +--------------+     +--------------+
37//!     | (edge 0->1)  |     | (edge 0->2)  |
38//!     | CLIF block 1 |     | CLIF block 2 |
39//!     | (edge 1->3)  |     | (edge 2->3)  |
40//!     +--------------+     +--------------+
41//!                \           /
42//!                 \         /
43//!                +------------+
44//!                |CLIF block 3|
45//!                +------------+
46//! ```
47//!
48//! Each `LoweredBlock` names just an original CLIF block, or just an edge block.
49//!
50//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
51//! (never actually materialized, just defined by a "successors" function), and
52//! compute the reverse postorder.
53//!
54//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
55//! example, consider any information about whether edge blocks will actually
56//! have content, because this computation happens as part of lowering *before*
57//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
58//! particular edge. But it works relatively well and is conceptually simple.
59//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
60//! branch editing that in practice elides empty blocks and simplifies some of
61//! the other redundancies that this scheme produces.
62
63use crate::dominator_tree::DominatorTree;
64use crate::entity::SecondaryMap;
65use crate::inst_predicates::visit_block_succs;
66use crate::ir::{Block, Function, Inst, Opcode};
67use crate::{machinst::*, trace};
68use rustc_hash::{FxHashMap, FxHashSet};
69
70/// Mapping from CLIF BBs to VCode BBs.
71#[derive(Debug)]
72pub struct BlockLoweringOrder {
73    /// Lowered blocks, in BlockIndex order. Each block is some combination of
74    /// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
75    /// see [LoweredBlock] for details.
76    lowered_order: Vec<LoweredBlock>,
77    /// BlockIndex values for successors for all lowered blocks, indexing `lowered_order`.
78    lowered_succ_indices: Vec<BlockIndex>,
79    /// Ranges in `lowered_succ_indices` giving the successor lists for each lowered
80    /// block. Indexed by lowering-order index (`BlockIndex`).
81    lowered_succ_ranges: Vec<(Option<Inst>, std::ops::Range<usize>)>,
82    /// BlockIndex for each original Block.
83    blockindex_by_block: SecondaryMap<Block, BlockIndex>,
84    /// Cold blocks. These blocks are not reordered in the
85    /// `lowered_order` above; the lowered order must respect RPO
86    /// (uses after defs) in order for lowering to be
87    /// correct. Instead, this set is used to provide `is_cold()`,
88    /// which is used by VCode emission to sink the blocks at the last
89    /// moment (when we actually emit bytes into the MachBuffer).
90    cold_blocks: FxHashSet<BlockIndex>,
91    /// Lowered blocks that are indirect branch targets.
92    indirect_branch_targets: FxHashSet<BlockIndex>,
93}
94
95#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
96pub enum LoweredBlock {
97    /// Block in original CLIF.
98    Orig {
99        /// Original CLIF block.
100        block: Block,
101    },
102
103    /// Critical edge between two CLIF blocks.
104    CriticalEdge {
105        /// The predecessor block.
106        pred: Block,
107
108        /// The successor block.
109        succ: Block,
110
111        /// The index of this branch in the successor edges from `pred`, following the same
112        /// indexing order as `inst_predicates::visit_block_succs`. This is used to distinguish
113        /// multiple edges between the same CLIF blocks.
114        succ_idx: u32,
115    },
116}
117
118impl LoweredBlock {
119    /// Unwrap an `Orig` block.
120    pub fn orig_block(&self) -> Option<Block> {
121        match self {
122            &LoweredBlock::Orig { block } => Some(block),
123            &LoweredBlock::CriticalEdge { .. } => None,
124        }
125    }
126
127    /// The associated in-edge predecessor, if this is a critical edge.
128    #[cfg(test)]
129    pub fn in_edge(&self) -> Option<Block> {
130        match self {
131            &LoweredBlock::CriticalEdge { pred, .. } => Some(pred),
132            &LoweredBlock::Orig { .. } => None,
133        }
134    }
135
136    /// The associated out-edge successor, if this is a critical edge.
137    pub fn out_edge(&self) -> Option<Block> {
138        match self {
139            &LoweredBlock::CriticalEdge { succ, .. } => Some(succ),
140            &LoweredBlock::Orig { .. } => None,
141        }
142    }
143}
144
145impl BlockLoweringOrder {
146    /// Compute and return a lowered block order for `f`.
147    pub fn new(
148        f: &Function,
149        domtree: &DominatorTree,
150        ctrl_plane: &mut ControlPlane,
151    ) -> BlockLoweringOrder {
152        trace!("BlockLoweringOrder: function body {:?}", f);
153
154        // Step 1: compute the in-edge and out-edge count of every block.
155        let mut block_in_count = SecondaryMap::with_default(0);
156        let mut block_out_count = SecondaryMap::with_default(0);
157
158        // Block successors are stored as `LoweredBlocks` to simplify the construction of
159        // `lowered_succs` in the final result. Initially, all entries are `Orig` values, and are
160        // updated to be `CriticalEdge` when those cases are identified in step 2 below.
161        let mut block_succs: SmallVec<[LoweredBlock; 128]> = SmallVec::new();
162        let mut block_succ_range = SecondaryMap::with_default(0..0);
163
164        let mut indirect_branch_target_clif_blocks = FxHashSet::default();
165
166        for block in f.layout.blocks() {
167            let start = block_succs.len();
168            visit_block_succs(f, block, |_, succ, from_table| {
169                block_out_count[block] += 1;
170                block_in_count[succ] += 1;
171                block_succs.push(LoweredBlock::Orig { block: succ });
172
173                if from_table {
174                    indirect_branch_target_clif_blocks.insert(succ);
175                }
176            });
177
178            // Ensure that blocks terminated by br_table instructions
179            // with an empty jump table are still treated like
180            // conditional blocks from the point of view of critical
181            // edge splitting. Also do the same for TryCall and
182            // TryCallIndirect: we cannot have edge moves before the
183            // branch, even if they have empty handler tables and thus
184            // would otherwise have only one successor.
185            if let Some(inst) = f.layout.last_inst(block) {
186                match f.dfg.insts[inst].opcode() {
187                    Opcode::BrTable | Opcode::TryCall | Opcode::TryCallIndirect => {
188                        block_out_count[block] = block_out_count[block].max(2);
189                    }
190                    _ => {}
191                }
192            }
193
194            let end = block_succs.len();
195            block_succ_range[block] = start..end;
196        }
197
198        // Step 2: walk the postorder from the domtree in reverse to produce our desired node
199        // lowering order, identifying critical edges to split along the way.
200
201        let mut lowered_order = Vec::new();
202        let mut blockindex_by_block = SecondaryMap::with_default(BlockIndex::invalid());
203        for &block in domtree.cfg_rpo() {
204            let idx = BlockIndex::new(lowered_order.len());
205            lowered_order.push(LoweredBlock::Orig { block });
206            blockindex_by_block[block] = idx;
207
208            if block_out_count[block] > 1 {
209                let range = block_succ_range[block].clone();
210
211                // If chaos-mode is enabled in the control plane, iterate over
212                // the successors in an arbitrary order, which should have no
213                // impact on correctness. The order of the blocks is generally
214                // relevant: Uses must be seen before defs for dead-code
215                // elimination.
216                let succs = ctrl_plane.shuffled(block_succs[range].iter_mut().enumerate());
217
218                for (succ_ix, lb) in succs {
219                    let succ = lb.orig_block().unwrap();
220                    if block_in_count[succ] > 1 {
221                        // Mutate the successor to be a critical edge, as `block` has multiple
222                        // edges leaving it, and `succ` has multiple edges entering it.
223                        *lb = LoweredBlock::CriticalEdge {
224                            pred: block,
225                            succ,
226                            succ_idx: succ_ix as u32,
227                        };
228                        lowered_order.push(*lb);
229                    }
230                }
231            }
232        }
233
234        let lb_to_bindex = FxHashMap::from_iter(
235            lowered_order
236                .iter()
237                .enumerate()
238                .map(|(i, &lb)| (lb, BlockIndex::new(i))),
239        );
240
241        // Step 3: build the successor tables given the lowering order. We can't perform this step
242        // during the creation of `lowering_order`, as we need `lb_to_bindex` to be fully populated
243        // first.
244        let mut lowered_succ_indices = Vec::new();
245        let mut cold_blocks = FxHashSet::default();
246        let mut indirect_branch_targets = FxHashSet::default();
247        let lowered_succ_ranges =
248            Vec::from_iter(lowered_order.iter().enumerate().map(|(ix, lb)| {
249                let bindex = BlockIndex::new(ix);
250                let start = lowered_succ_indices.len();
251                let opt_inst = match lb {
252                    // Block successors are pulled directly over, as they'll have been mutated when
253                    // determining the block order already.
254                    &LoweredBlock::Orig { block } => {
255                        let range = block_succ_range[block].clone();
256                        lowered_succ_indices
257                            .extend(block_succs[range].iter().map(|lb| lb_to_bindex[lb]));
258
259                        if f.layout.is_cold(block) {
260                            cold_blocks.insert(bindex);
261                        }
262
263                        if indirect_branch_target_clif_blocks.contains(&block) {
264                            indirect_branch_targets.insert(bindex);
265                        }
266
267                        let last = f.layout.last_inst(block).unwrap();
268                        let opcode = f.dfg.insts[last].opcode();
269
270                        assert!(opcode.is_terminator());
271
272                        opcode.is_branch().then_some(last)
273                    }
274
275                    // Critical edges won't have successor information in block_succ_range, but
276                    // they only have a single known successor to record anyway.
277                    &LoweredBlock::CriticalEdge { succ, .. } => {
278                        let succ_index = lb_to_bindex[&LoweredBlock::Orig { block: succ }];
279                        lowered_succ_indices.push(succ_index);
280
281                        // Edges inherit indirect branch and cold block metadata from their
282                        // successor.
283
284                        if f.layout.is_cold(succ) {
285                            cold_blocks.insert(bindex);
286                        }
287
288                        if indirect_branch_target_clif_blocks.contains(&succ) {
289                            indirect_branch_targets.insert(bindex);
290                        }
291
292                        None
293                    }
294                };
295                let end = lowered_succ_indices.len();
296                (opt_inst, start..end)
297            }));
298
299        let result = BlockLoweringOrder {
300            lowered_order,
301            lowered_succ_indices,
302            lowered_succ_ranges,
303            blockindex_by_block,
304            cold_blocks,
305            indirect_branch_targets,
306        };
307
308        trace!("BlockLoweringOrder: {:#?}", result);
309        result
310    }
311
312    /// Get the lowered order of blocks.
313    pub fn lowered_order(&self) -> &[LoweredBlock] {
314        &self.lowered_order[..]
315    }
316
317    /// Get the BlockIndex, if any, for a given Block.
318    ///
319    /// The result will be `None` if the given Block is unreachable
320    /// (and thus does not appear in the lowered order).
321    pub fn lowered_index_for_block(&self, block: Block) -> Option<BlockIndex> {
322        let idx = self.blockindex_by_block[block];
323        if idx.is_valid() { Some(idx) } else { None }
324    }
325
326    /// Get the successor indices for a lowered block.
327    pub fn succ_indices(&self, block: BlockIndex) -> (Option<Inst>, &[BlockIndex]) {
328        let (opt_inst, range) = &self.lowered_succ_ranges[block.index()];
329        (*opt_inst, &self.lowered_succ_indices[range.clone()])
330    }
331
332    /// Determine whether the given lowered-block index is cold.
333    pub fn is_cold(&self, block: BlockIndex) -> bool {
334        self.cold_blocks.contains(&block)
335    }
336
337    /// Determine whether the given lowered block index is an indirect branch
338    /// target.
339    pub fn is_indirect_branch_target(&self, block: BlockIndex) -> bool {
340        self.indirect_branch_targets.contains(&block)
341    }
342}
343
344#[cfg(test)]
345mod test {
346    use super::*;
347    use crate::cursor::{Cursor, FuncCursor};
348    use crate::flowgraph::ControlFlowGraph;
349    use crate::ir::UserFuncName;
350    use crate::ir::types::*;
351    use crate::ir::{AbiParam, InstBuilder, Signature};
352    use crate::isa::CallConv;
353
354    fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> BlockLoweringOrder {
355        assert!(n_blocks > 0);
356
357        let name = UserFuncName::testcase("test0");
358        let mut sig = Signature::new(CallConv::SystemV);
359        sig.params.push(AbiParam::new(I32));
360        let mut func = Function::with_name_signature(name, sig);
361        let blocks = (0..n_blocks)
362            .map(|i| {
363                let bb = func.dfg.make_block();
364                assert!(bb.as_u32() == i as u32);
365                bb
366            })
367            .collect::<Vec<_>>();
368
369        let arg0 = func.dfg.append_block_param(blocks[0], I32);
370
371        let mut pos = FuncCursor::new(&mut func);
372
373        let mut edge = 0;
374        for i in 0..n_blocks {
375            pos.insert_block(blocks[i]);
376            let mut succs = vec![];
377            while edge < edges.len() && edges[edge].0 == i {
378                succs.push(edges[edge].1);
379                edge += 1;
380            }
381            if succs.len() == 0 {
382                pos.ins().return_(&[arg0]);
383            } else if succs.len() == 1 {
384                pos.ins().jump(blocks[succs[0]], &[]);
385            } else if succs.len() == 2 {
386                pos.ins()
387                    .brif(arg0, blocks[succs[0]], &[], blocks[succs[1]], &[]);
388            } else {
389                panic!("Too many successors");
390            }
391        }
392
393        let mut cfg = ControlFlowGraph::new();
394        cfg.compute(&func);
395        let dom_tree = DominatorTree::with_function(&func, &cfg);
396
397        BlockLoweringOrder::new(&func, &dom_tree, &mut Default::default())
398    }
399
400    #[test]
401    fn test_blockorder_diamond() {
402        let order = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
403
404        // This test case doesn't need to introduce any critical edges, as all regalloc allocations
405        // can sit on either the entry or exit of blocks 1 and 2.
406        assert_eq!(order.lowered_order.len(), 4);
407    }
408
409    #[test]
410    fn test_blockorder_critedge() {
411        //            0
412        //          /   \
413        //         1     2
414        //        /  \     \
415        //       3    4    |
416        //       |\  _|____|
417        //       | \/ |
418        //       | /\ |
419        //       5    6
420        //
421        // (3 -> 5, and 3 -> 6 are critical edges and must be split)
422        //
423        let order = build_test_func(
424            7,
425            &[
426                (0, 1),
427                (0, 2),
428                (1, 3),
429                (1, 4),
430                (2, 5),
431                (3, 5),
432                (3, 6),
433                (4, 6),
434            ],
435        );
436
437        assert_eq!(order.lowered_order.len(), 9);
438        println!("ordered = {:?}", order.lowered_order);
439
440        // block 0
441        assert_eq!(order.lowered_order[0].orig_block().unwrap().as_u32(), 0);
442        assert!(order.lowered_order[0].in_edge().is_none());
443        assert!(order.lowered_order[0].out_edge().is_none());
444
445        // block 2
446        assert_eq!(order.lowered_order[1].orig_block().unwrap().as_u32(), 2);
447        assert!(order.lowered_order[1].in_edge().is_none());
448        assert!(order.lowered_order[1].out_edge().is_none());
449
450        // block 1
451        assert_eq!(order.lowered_order[2].orig_block().unwrap().as_u32(), 1);
452        assert!(order.lowered_order[2].in_edge().is_none());
453        assert!(order.lowered_order[2].out_edge().is_none());
454
455        // block 4
456        assert_eq!(order.lowered_order[3].orig_block().unwrap().as_u32(), 4);
457        assert!(order.lowered_order[3].in_edge().is_none());
458        assert!(order.lowered_order[3].out_edge().is_none());
459
460        // block 3
461        assert_eq!(order.lowered_order[4].orig_block().unwrap().as_u32(), 3);
462        assert!(order.lowered_order[4].in_edge().is_none());
463        assert!(order.lowered_order[4].out_edge().is_none());
464
465        // critical edge 3 -> 5
466        assert!(order.lowered_order[5].orig_block().is_none());
467        assert_eq!(order.lowered_order[5].in_edge().unwrap().as_u32(), 3);
468        assert_eq!(order.lowered_order[5].out_edge().unwrap().as_u32(), 5);
469
470        // critical edge 3 -> 6
471        assert!(order.lowered_order[6].orig_block().is_none());
472        assert_eq!(order.lowered_order[6].in_edge().unwrap().as_u32(), 3);
473        assert_eq!(order.lowered_order[6].out_edge().unwrap().as_u32(), 6);
474
475        // block 6
476        assert_eq!(order.lowered_order[7].orig_block().unwrap().as_u32(), 6);
477        assert!(order.lowered_order[7].in_edge().is_none());
478        assert!(order.lowered_order[7].out_edge().is_none());
479
480        // block 5
481        assert_eq!(order.lowered_order[8].orig_block().unwrap().as_u32(), 5);
482        assert!(order.lowered_order[8].in_edge().is_none());
483        assert!(order.lowered_order[8].out_edge().is_none());
484    }
485}