Auto merge of #32210 - Aatch:mir-traversal, r=nikomatsakis

rBreak Critical Edges and other MIR work

This PR is built on top of #32080.

This adds the basic depth-first traversals for MIR, preorder, postorder and reverse postorder. The MIR blocks are now translated using reverse postorder. There is also a transform for breaking critical edges, which includes the edges from `invoke`d calls (`Drop` and `Call`), to account for the fact that we can't add code after an `invoke`. It also stops generating the intermediate block (since the transform essentially does it if necessary already).

The kinds of cases this deals with are difficult to produce, so the test is the one I managed to get. However, it seems to bootstrap with `-Z orbit`, which it didn't before my changes.
This commit is contained in:
bors 2016-04-03 08:58:59 -07:00
commit c0b8c43820
10 changed files with 539 additions and 49 deletions

View File

@ -8,7 +8,10 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::iter::FromIterator;
/// A very simple BitVector type.
#[derive(Clone)]
pub struct BitVector {
data: Vec<u64>,
}
@ -50,7 +53,9 @@ impl BitVector {
pub fn grow(&mut self, num_bits: usize) {
let num_words = u64s(num_bits);
let extra_words = self.data.len() - num_words;
self.data.extend((0..extra_words).map(|_| 0));
if extra_words > 0 {
self.data.extend((0..extra_words).map(|_| 0));
}
}
/// Iterates over indexes of set bits in a sorted order
@ -93,6 +98,27 @@ impl<'a> Iterator for BitVectorIter<'a> {
}
}
impl FromIterator<bool> for BitVector {
fn from_iter<I>(iter: I) -> BitVector where I: IntoIterator<Item=bool> {
let iter = iter.into_iter();
let (len, _) = iter.size_hint();
// Make the minimum length for the bitvector 64 bits since that's
// the smallest non-zero size anyway.
let len = if len < 64 { 64 } else { len };
let mut bv = BitVector::new(len);
for (idx, val) in iter.enumerate() {
if idx > len {
bv.grow(idx);
}
if val {
bv.insert(idx);
}
}
bv
}
}
/// A "bit matrix" is basically a square matrix of booleans
/// represented as one gigantic bitvector. In other words, it is as if
/// you have N bitvectors, each of length N. Note that `elements` here is `N`/

View File

@ -881,10 +881,7 @@ pub fn phase_3_run_analysis_passes<'tcx, F, R>(sess: &'tcx Session,
passes.push_pass(box mir::transform::remove_dead_blocks::RemoveDeadBlocks);
passes.push_pass(box mir::transform::type_check::TypeckMir);
passes.push_pass(box mir::transform::simplify_cfg::SimplifyCfg);
// Late passes
passes.push_pass(box mir::transform::no_landing_pads::NoLandingPads);
passes.push_pass(box mir::transform::remove_dead_blocks::RemoveDeadBlocks);
passes.push_pass(box mir::transform::erase_regions::EraseRegions);
// And run everything.
passes.run_passes(tcx, &mut mir_map);
});
@ -937,16 +934,25 @@ pub fn phase_3_run_analysis_passes<'tcx, F, R>(sess: &'tcx Session,
/// Run the translation phase to LLVM, after which the AST and analysis can
pub fn phase_4_translate_to_llvm<'tcx>(tcx: &TyCtxt<'tcx>,
mir_map: MirMap<'tcx>,
analysis: ty::CrateAnalysis)
-> trans::CrateTranslation {
mut mir_map: MirMap<'tcx>,
analysis: ty::CrateAnalysis) -> trans::CrateTranslation {
let time_passes = tcx.sess.time_passes();
time(time_passes,
"resolving dependency formats",
|| dependency_format::calculate(&tcx.sess));
// Option dance to work around the lack of stack once closures.
// Run the passes that transform the MIR into a more suitable for translation
// to LLVM code.
time(time_passes, "Prepare MIR codegen passes", || {
let mut passes = ::rustc::mir::transform::Passes::new();
passes.push_pass(box mir::transform::no_landing_pads::NoLandingPads);
passes.push_pass(box mir::transform::remove_dead_blocks::RemoveDeadBlocks);
passes.push_pass(box mir::transform::erase_regions::EraseRegions);
passes.push_pass(box mir::transform::break_critical_edges::BreakCriticalEdges);
passes.run_passes(tcx, &mut mir_map);
});
time(time_passes,
"translation",
move || trans::trans_crate(tcx, &mir_map, analysis))

View File

@ -42,3 +42,4 @@ mod hair;
pub mod mir_map;
pub mod pretty;
pub mod transform;
pub mod traversal;

View File

@ -0,0 +1,117 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use rustc::ty::TyCtxt;
use rustc::mir::repr::*;
use rustc::mir::transform::{MirPass, Pass};
use syntax::ast::NodeId;
use rustc_data_structures::bitvec::BitVector;
use traversal;
pub struct BreakCriticalEdges;
/**
* Breaks critical edges in the MIR.
*
* Critical edges are edges that are neither the only edge leaving a
* block, nor the only edge entering one.
*
* When you want something to happen "along" an edge, you can either
* do at the end of the predecessor block, or at the start of the
* successor block. Critical edges have to be broken in order to prevent
* "edge actions" from affecting other edges.
*
* This function will break those edges by inserting new blocks along them.
*
* A special case is Drop and Call terminators with unwind/cleanup successors,
* They use `invoke` in LLVM, which terminates a block, meaning that code cannot
* be inserted after them, so even if an edge is the only edge leaving a block
* like that, we still insert blocks if the edge is one of many entering the
* target.
*
* NOTE: Simplify CFG will happily undo most of the work this pass does.
*
*/
impl<'tcx> MirPass<'tcx> for BreakCriticalEdges {
fn run_pass(&mut self, _: &TyCtxt<'tcx>, _: NodeId, mir: &mut Mir<'tcx>) {
break_critical_edges(mir);
}
}
impl Pass for BreakCriticalEdges {}
fn break_critical_edges(mir: &mut Mir) {
let mut pred_count = vec![0u32; mir.basic_blocks.len()];
// Build the precedecessor map for the MIR
for (_, data) in traversal::preorder(mir) {
if let Some(ref term) = data.terminator {
for &tgt in term.successors().iter() {
pred_count[tgt.index()] += 1;
}
}
}
let cleanup_map : BitVector = mir.basic_blocks
.iter().map(|bb| bb.is_cleanup).collect();
// We need a place to store the new blocks generated
let mut new_blocks = Vec::new();
let bbs = mir.all_basic_blocks();
let cur_len = mir.basic_blocks.len();
for &bb in &bbs {
let data = mir.basic_block_data_mut(bb);
if let Some(ref mut term) = data.terminator {
let is_invoke = term_is_invoke(term);
let term_span = term.span;
let term_scope = term.scope;
let succs = term.successors_mut();
if succs.len() > 1 || (succs.len() > 0 && is_invoke) {
for tgt in succs {
let num_preds = pred_count[tgt.index()];
if num_preds > 1 {
// It's a critical edge, break it
let goto = Terminator {
span: term_span,
scope: term_scope,
kind: TerminatorKind::Goto { target: *tgt }
};
let mut data = BasicBlockData::new(Some(goto));
data.is_cleanup = cleanup_map.contains(tgt.index());
// Get the index it will be when inserted into the MIR
let idx = cur_len + new_blocks.len();
new_blocks.push(data);
*tgt = BasicBlock::new(idx);
}
}
}
}
}
debug!("Broke {} N edges", new_blocks.len());
mir.basic_blocks.extend_from_slice(&new_blocks);
}
// Returns true if the terminator would use an invoke in LLVM.
fn term_is_invoke(term: &Terminator) -> bool {
match term.kind {
TerminatorKind::Call { cleanup: Some(_), .. } |
TerminatorKind::Drop { unwind: Some(_), .. } => true,
_ => false
}
}

View File

@ -13,3 +13,4 @@ pub mod simplify_cfg;
pub mod erase_regions;
pub mod no_landing_pads;
pub mod type_check;
pub mod break_critical_edges;

View File

@ -0,0 +1,276 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::vec;
use rustc_data_structures::bitvec::BitVector;
use rustc::mir::repr::*;
/// Preorder traversal of a graph.
///
/// Preorder traversal is when each node is visited before an of it's
/// successors
///
/// A
/// / \
/// / \
/// B C
/// \ /
/// \ /
/// D
///
/// A preorder traversal of this graph is either `A B D C` or `A C D B`
#[derive(Clone)]
pub struct Preorder<'a, 'tcx: 'a> {
mir: &'a Mir<'tcx>,
visited: BitVector,
worklist: Vec<BasicBlock>,
}
impl<'a, 'tcx> Preorder<'a, 'tcx> {
pub fn new(mir: &'a Mir<'tcx>, root: BasicBlock) -> Preorder<'a, 'tcx> {
let worklist = vec![root];
Preorder {
mir: mir,
visited: BitVector::new(mir.basic_blocks.len()),
worklist: worklist
}
}
}
pub fn preorder<'a, 'tcx>(mir: &'a Mir<'tcx>) -> Preorder<'a, 'tcx> {
Preorder::new(mir, START_BLOCK)
}
impl<'a, 'tcx> Iterator for Preorder<'a, 'tcx> {
type Item = (BasicBlock, &'a BasicBlockData<'tcx>);
fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> {
while let Some(idx) = self.worklist.pop() {
if !self.visited.insert(idx.index()) {
continue;
}
let data = self.mir.basic_block_data(idx);
if let Some(ref term) = data.terminator {
for &succ in term.successors().iter() {
self.worklist.push(succ);
}
}
return Some((idx, data));
}
None
}
}
/// Postorder traversal of a graph.
///
/// Postorder traversal is when each node is visited after all of it's
/// successors, except when the successor is only reachable by a back-edge
///
/// A
/// / \
/// / \
/// B C
/// \ /
/// \ /
/// D
///
/// A Postorder traversal of this graph is `D B C A` or `D C B A`
pub struct Postorder<'a, 'tcx: 'a> {
mir: &'a Mir<'tcx>,
visited: BitVector,
visit_stack: Vec<(BasicBlock, vec::IntoIter<BasicBlock>)>
}
impl<'a, 'tcx> Postorder<'a, 'tcx> {
pub fn new(mir: &'a Mir<'tcx>, root: BasicBlock) -> Postorder<'a, 'tcx> {
let mut po = Postorder {
mir: mir,
visited: BitVector::new(mir.basic_blocks.len()),
visit_stack: Vec::new()
};
let data = po.mir.basic_block_data(root);
if let Some(ref term) = data.terminator {
po.visited.insert(root.index());
let succs = term.successors().into_owned().into_iter();
po.visit_stack.push((root, succs));
po.traverse_successor();
}
po
}
fn traverse_successor(&mut self) {
// This is quite a complex loop due to 1. the borrow checker not liking it much
// and 2. what exactly is going on is not clear
//
// It does the actual traversal of the graph, while the `next` method on the iterator
// just pops off of the stack. `visit_stack` is a stack containing pairs of nodes and
// iterators over the sucessors of those nodes. Each iteration attempts to get the next
// node from the top of the stack, then pushes that node and an iterator over the
// successors to the top of the stack. This loop only grows `visit_stack`, stopping when
// we reach a child that has no children that we haven't already visited.
//
// For a graph that looks like this:
//
// A
// / \
// / \
// B C
// | |
// | |
// D |
// \ /
// \ /
// E
//
// The state of the stack starts out with just the root node (`A` in this case);
// [(A, [B, C])]
//
// When the first call to `traverse_sucessor` happens, the following happens:
//
// [(B, [D]), // `B` taken from the successors of `A`, pushed to the
// // top of the stack along with the successors of `B`
// (A, [C])]
//
// [(D, [E]), // `D` taken from successors of `B`, pushed to stack
// (B, []),
// (A, [C])]
//
// [(E, []), // `E` taken from successors of `D`, pushed to stack
// (D, []),
// (B, []),
// (A, [C])]
//
// Now that the top of the stack has no successors we can traverse, each item will
// be popped off during iteration until we get back to `A`. This yeilds [E, D, B].
//
// When we yield `B` and call `traverse_successor`, we push `C` to the stack, but
// since we've already visited `E`, that child isn't added to the stack. The last
// two iterations yield `C` and finally `A` for a final traversal of [E, D, B, C, A]
loop {
let bb = if let Some(&mut (_, ref mut iter)) = self.visit_stack.last_mut() {
if let Some(bb) = iter.next() {
bb
} else {
break;
}
} else {
break;
};
if self.visited.insert(bb.index()) {
let data = self.mir.basic_block_data(bb);
if let Some(ref term) = data.terminator {
let succs = term.successors().into_owned().into_iter();
self.visit_stack.push((bb, succs));
}
}
}
}
}
pub fn postorder<'a, 'tcx>(mir: &'a Mir<'tcx>) -> Postorder<'a, 'tcx> {
Postorder::new(mir, START_BLOCK)
}
impl<'a, 'tcx> Iterator for Postorder<'a, 'tcx> {
type Item = (BasicBlock, &'a BasicBlockData<'tcx>);
fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> {
let next = self.visit_stack.pop();
if next.is_some() {
self.traverse_successor();
}
next.map(|(bb, _)| {
let data = self.mir.basic_block_data(bb);
(bb, data)
})
}
}
/// Reverse postorder traversal of a graph
///
/// Reverse postorder is the reverse order of a postorder traversal.
/// This is different to a preorder traversal and represents a natural
/// linearisation of control-flow.
///
/// A
/// / \
/// / \
/// B C
/// \ /
/// \ /
/// D
///
/// A reverse postorder traversal of this graph is either `A B C D` or `A C B D`
/// Note that for a graph containing no loops (i.e. A DAG), this is equivalent to
/// a topological sort.
///
/// Construction of a `ReversePostorder` traversal requires doing a full
/// postorder traversal of the graph, therefore this traversal should be
/// constructed as few times as possible. Use the `reset` method to be able
/// to re-use the traversal
#[derive(Clone)]
pub struct ReversePostorder<'a, 'tcx: 'a> {
mir: &'a Mir<'tcx>,
blocks: Vec<BasicBlock>,
idx: usize
}
impl<'a, 'tcx> ReversePostorder<'a, 'tcx> {
pub fn new(mir: &'a Mir<'tcx>, root: BasicBlock) -> ReversePostorder<'a, 'tcx> {
let blocks : Vec<_> = Postorder::new(mir, root).map(|(bb, _)| bb).collect();
let len = blocks.len();
ReversePostorder {
mir: mir,
blocks: blocks,
idx: len
}
}
pub fn reset(&mut self) {
self.idx = self.blocks.len();
}
}
pub fn reverse_postorder<'a, 'tcx>(mir: &'a Mir<'tcx>) -> ReversePostorder<'a, 'tcx> {
ReversePostorder::new(mir, START_BLOCK)
}
impl<'a, 'tcx> Iterator for ReversePostorder<'a, 'tcx> {
type Item = (BasicBlock, &'a BasicBlockData<'tcx>);
fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> {
if self.idx == 0 { return None; }
self.idx -= 1;
self.blocks.get(self.idx).map(|&bb| {
let data = self.mir.basic_block_data(bb);
(bb, data)
})
}
}

View File

@ -49,4 +49,10 @@ impl BasicBlock {
_ => None
}
}
pub fn delete(self) {
unsafe {
llvm::LLVMDeleteBasicBlock(self.0);
}
}
}

View File

@ -300,33 +300,8 @@ impl<'bcx, 'tcx> MirContext<'bcx, 'tcx> {
// Many different ways to call a function handled here
if let Some(cleanup) = cleanup.map(|bb| self.bcx(bb)) {
// We translate the copy into a temporary block. The temporary block is
// necessary because the current block has already been terminated (by
// `invoke`) and we cannot really translate into the target block
// because:
// * The target block may have more than a single precedesor;
// * Some LLVM insns cannot have a preceeding store insn (phi,
// cleanuppad), and adding/prepending the store now may render
// those other instructions invalid.
//
// NB: This approach still may break some LLVM code. For example if the
// target block starts with a `phi` (which may only match on immediate
// precedesors), it cannot know about this temporary block thus
// resulting in an invalid code:
//
// this:
// …
// %0 = …
// %1 = invoke to label %temp …
// temp:
// store ty %1, ty* %dest
// br label %actualtargetblock
// actualtargetblock: ; preds: %temp, …
// phi … [%this, …], [%0, …] ; ERROR: phi requires to match only on
// ; immediate precedesors
let ret_bcx = if destination.is_some() {
self.fcx.new_block("", None)
let ret_bcx = if let Some((_, target)) = *destination {
self.blocks[target.index()]
} else {
self.unreachable_block()
};
@ -343,15 +318,16 @@ impl<'bcx, 'tcx> MirContext<'bcx, 'tcx> {
self.set_operand_dropped(bcx, op);
});
if let Some((_, target)) = *destination {
if destination.is_some() {
let ret_bcx = ret_bcx.build();
if let Some(ret_dest) = ret_dest {
fn_ty.ret.store(&ret_bcx, invokeret, ret_dest.llval);
}
for op in args {
self.set_operand_dropped(&ret_bcx, op);
}
ret_bcx.br(self.llblock(target));
ret_bcx.at_start(|ret_bcx| {
if let Some(ret_dest) = ret_dest {
fn_ty.ret.store(&ret_bcx, invokeret, ret_dest.llval);
}
for op in args {
self.set_operand_dropped(&ret_bcx, op);
}
});
}
} else {
let llret = bcx.call(fn_ptr, &llargs, cleanup_bundle.as_ref());

View File

@ -19,7 +19,13 @@ use common::{self, Block, BlockAndBuilder, FunctionContext};
use std::ops::Deref;
use std::rc::Rc;
use basic_block::BasicBlock;
use rustc_data_structures::bitvec::BitVector;
use self::lvalue::{LvalueRef, get_dataptr, get_meta};
use rustc_mir::traversal;
use self::operand::OperandRef;
#[derive(Clone)]
@ -95,7 +101,7 @@ enum TempRef<'tcx> {
///////////////////////////////////////////////////////////////////////////
pub fn trans_mir<'blk, 'tcx>(fcx: &'blk FunctionContext<'blk, 'tcx>) {
pub fn trans_mir<'blk, 'tcx: 'blk>(fcx: &'blk FunctionContext<'blk, 'tcx>) {
let bcx = fcx.init(false, None).build();
let mir = bcx.mir();
@ -132,8 +138,13 @@ pub fn trans_mir<'blk, 'tcx>(fcx: &'blk FunctionContext<'blk, 'tcx>) {
let block_bcxs: Vec<Block<'blk,'tcx>> =
mir_blocks.iter()
.map(|&bb|{
// FIXME(#30941) this doesn't handle msvc-style exceptions
fcx.new_block(&format!("{:?}", bb), None)
if bb == mir::START_BLOCK {
fcx.new_block("start", None)
} else if bb == mir::END_BLOCK {
fcx.new_block("end", None)
} else {
fcx.new_block(&format!("{:?}", bb), None)
}
})
.collect();
@ -142,7 +153,7 @@ pub fn trans_mir<'blk, 'tcx>(fcx: &'blk FunctionContext<'blk, 'tcx>) {
bcx.br(start_bcx.llbb);
let mut mircx = MirContext {
mir: mir,
mir: mir.clone(),
fcx: fcx,
llpersonalityslot: None,
blocks: block_bcxs,
@ -152,11 +163,28 @@ pub fn trans_mir<'blk, 'tcx>(fcx: &'blk FunctionContext<'blk, 'tcx>) {
args: args,
};
// Translate the body of each block
for &bb in &mir_blocks {
let mut visited = BitVector::new(mir_blocks.len());
let rpo = traversal::reverse_postorder(&mir);
// Translate the body of each block using reverse postorder
for (bb, _) in rpo {
visited.insert(bb.index());
mircx.trans_block(bb);
}
// Remove blocks that haven't been visited, or have no
// predecessors.
for &bb in &mir_blocks {
let block = mircx.blocks[bb.index()];
let block = BasicBlock(block.llbb);
// Unreachable block
if !visited.contains(bb.index()) {
block.delete();
} else if block.pred_iter().count() == 0 {
block.delete();
}
}
fcx.cleanup();
}

View File

@ -0,0 +1,53 @@
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// This code produces a CFG with critical edges that, if we don't
// handle properly, will cause invalid codegen.
#![feature(rustc_attrs)]
enum State {
Both,
Front,
Back
}
pub struct Foo<A: Iterator, B: Iterator> {
state: State,
a: A,
b: B
}
impl<A, B> Foo<A, B>
where A: Iterator, B: Iterator<Item=A::Item>
{
// This is the function we care about
#[rustc_mir]
fn next(&mut self) -> Option<A::Item> {
match self.state {
State::Both => match self.a.next() {
elt @ Some(..) => elt,
None => {
self.state = State::Back;
self.b.next()
}
},
State::Front => self.a.next(),
State::Back => self.b.next(),
}
}
}
// Make sure we actually translate a version of the function
pub fn do_stuff(mut f: Foo<Box<Iterator<Item=u32>>, Box<Iterator<Item=u32>>>) {
let _x = f.next();
}
fn main() {}