diff --git a/src/librustc_data_structures/graph/implementation/tests.rs b/src/librustc_data_structures/graph/implementation/tests.rs index 007704357af..3814827b5df 100644 --- a/src/librustc_data_structures/graph/implementation/tests.rs +++ b/src/librustc_data_structures/graph/implementation/tests.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use graph::*; +use graph::implementation::*; use std::fmt::Debug; type TestGraph = Graph<&'static str, &'static str>; diff --git a/src/librustc_data_structures/graph/mod.rs b/src/librustc_data_structures/graph/mod.rs index bd933e57b39..7265e4e8c7c 100644 --- a/src/librustc_data_structures/graph/mod.rs +++ b/src/librustc_data_structures/graph/mod.rs @@ -14,6 +14,7 @@ pub mod dominators; pub mod implementation; pub mod iterate; mod reference; +pub mod scc; #[cfg(test)] mod test; diff --git a/src/librustc_data_structures/graph/scc/mod.rs b/src/librustc_data_structures/graph/scc/mod.rs new file mode 100644 index 00000000000..b0f098b3d20 --- /dev/null +++ b/src/librustc_data_structures/graph/scc/mod.rs @@ -0,0 +1,341 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Routine to compute the strongly connected components (SCCs) of a +//! graph, as well as the resulting DAG if each SCC is replaced with a +//! node in the graph. This uses Tarjan's algorithm that completes in +//! O(n) time. + +use fx::FxHashSet; +use graph::{DirectedGraph, WithNumNodes, WithSuccessors}; +use indexed_vec::{Idx, IndexVec}; +use std::ops::Range; + +mod test; + +/// Strongly connected components (SCC) of a graph. The type `N` is +/// the index type for the graph nodes and `S` is the index type for +/// the SCCs. We can map from each node to the SCC that it +/// participates in, and we also have the successors of each SCC. +pub struct Sccs { + /// For each node, what is the SCC index of the SCC to which it + /// belongs. + scc_indices: IndexVec, + + /// Data about each SCC. + scc_data: SccData, +} + +struct SccData { + /// For each SCC, the range of `all_successors` where its + /// successors can be found. + ranges: IndexVec>, + + /// Contains the succcessors for all the Sccs, concatenated. The + /// range of indices corresponding to a given SCC is found in its + /// SccData. + all_successors: Vec, +} + +impl Sccs { + pub fn new(graph: &(impl DirectedGraph + WithNumNodes + WithSuccessors)) -> Self { + SccsConstruction::construct(graph) + } + + /// Returns the number of SCCs in the graph. + pub fn num_sccs(&self) -> usize { + self.scc_data.len() + } + + /// Returns the SCC to which a node `r` belongs. + pub fn scc(&self, r: N) -> S { + self.scc_indices[r] + } + + /// Returns the successor of the given SCC. + pub fn successors(&self, scc: S) -> &[S] { + self.scc_data.successors(scc) + } +} + +impl SccData { + /// Number of SCCs, + fn len(&self) -> usize { + self.ranges.len() + } + + /// Returns the successor of the given SCC. + fn successors(&self, scc: S) -> &[S] { + // Annoyingly, `range` does not implement `Copy`, so we have + // to do `range.start..range.end`: + let range = &self.ranges[scc]; + &self.all_successors[range.start..range.end] + } + + /// Creates a new SCC with `successors` as its successors and + /// returns the resulting index. + fn create_scc(&mut self, successors: impl IntoIterator) -> S { + // Store the successors on `scc_successors_vec`, remembering + // the range of indices. + let all_successors_start = self.all_successors.len(); + self.all_successors.extend(successors); + let all_successors_end = self.all_successors.len(); + + debug!( + "create_scc({:?}) successors={:?}", + self.ranges.len(), + &self.all_successors[all_successors_start..all_successors_end], + ); + + self.ranges.push(all_successors_start..all_successors_end) + } +} + +struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors + 'c, S: Idx> { + graph: &'c G, + + /// The state of each node; used during walk to record the stack + /// and after walk to record what cycle each node ended up being + /// in. + node_states: IndexVec>, + + /// The stack of nodes that we are visiting as part of the DFS. + node_stack: Vec, + + /// The stack of successors: as we visit a node, we mark our + /// position in this stack, and when we encounter a successor SCC, + /// we push it on the stack. When we complete an SCC, we can pop + /// everything off the stack that was found along the way. + successors_stack: Vec, + + /// A set used to strip duplicates. As we accumulate successors + /// into the successors_stack, we sometimes get duplicate entries. + /// We use this set to remove those -- we keep it around between + /// successors to amortize memory allocation costs. + duplicate_set: FxHashSet, + + scc_data: SccData, +} + +#[derive(Copy, Clone, Debug)] +enum NodeState { + /// This node has not yet been visited as part of the DFS. + /// + /// After SCC construction is complete, this state ought to be + /// impossible. + NotVisited, + + /// This node is currently being walk as part of our DFS. It is on + /// the stack at the depth `depth`. + /// + /// After SCC construction is complete, this state ought to be + /// impossible. + BeingVisited { depth: usize }, + + /// Indicates that this node is a member of the given cycle. + InCycle { scc_index: S }, + + /// Indicates that this node is a member of whatever cycle + /// `parent` is a member of. This state is transient: whenever we + /// see it, we try to overwrite it with the current state of + /// `parent` (this is the "path compression" step of a union-find + /// algorithm). + InCycleWith { parent: N }, +} + +#[derive(Copy, Clone, Debug)] +enum WalkReturn { + Cycle { min_depth: usize }, + Complete { scc_index: S }, +} + +impl<'c, G, S> SccsConstruction<'c, G, S> +where + G: DirectedGraph + WithNumNodes + WithSuccessors, + S: Idx, +{ + /// Identifies SCCs in the graph `G` and computes the resulting + /// DAG. This uses a variant of [Tarjan's + /// algorithm][wikipedia]. The high-level summary of the algorithm + /// is that we do a depth-first search. Along the way, we keep a + /// stack of each node whose successors are being visited. We + /// track the depth of each node on this stack (there is no depth + /// if the node is not on the stack). When we find that some node + /// N with depth D can reach some other node N' with lower depth + /// D' (i.e., D' < D), we know that N, N', and all nodes in + /// between them on the stack are part of an SCC. + /// + /// For each node, we track the lowest depth of any successor we + /// have found, along with that + /// + /// [wikipedia]: https://bit.ly/2EZIx84 + fn construct(graph: &'c G) -> Sccs { + let num_nodes = graph.num_nodes(); + + let mut this = Self { + graph, + node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes), + node_stack: Vec::with_capacity(num_nodes), + successors_stack: Vec::new(), + scc_data: SccData { + ranges: IndexVec::new(), + all_successors: Vec::new(), + }, + duplicate_set: FxHashSet::default(), + }; + + let scc_indices = (0..num_nodes) + .map(G::Node::new) + .map(|node| match this.walk_node(0, node) { + WalkReturn::Complete { scc_index } => scc_index, + WalkReturn::Cycle { min_depth } => panic!( + "`walk_node(0, {:?})` returned cycle with depth {:?}", + node, min_depth + ), + }) + .collect(); + + Sccs { + scc_indices, + scc_data: this.scc_data, + } + } + + fn walk_node(&mut self, depth: usize, node: G::Node) -> WalkReturn { + debug!("walk_node(depth = {:?}, node = {:?})", depth, node); + match self.find_state(node) { + NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index }, + + NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth }, + + NodeState::NotVisited => self.walk_unvisited_node(depth, node), + + NodeState::InCycleWith { parent } => panic!( + "`find_state` returned `InCycleWith({:?})`, which ought to be impossible", + parent + ), + } + } + + /// Fetches the state of the node `r`. If `r` is recorded as being + /// in a cycle with some other node `r2`, then fetches the state + /// of `r2` (and updates `r` to reflect current result). This is + /// basically the "find" part of a standard union-find algorithm + /// (with path compression). + fn find_state(&mut self, r: G::Node) -> NodeState { + debug!("find_state(r = {:?} in state {:?})", r, self.node_states[r]); + match self.node_states[r] { + NodeState::InCycle { scc_index } => NodeState::InCycle { scc_index }, + NodeState::BeingVisited { depth } => NodeState::BeingVisited { depth }, + NodeState::NotVisited => NodeState::NotVisited, + NodeState::InCycleWith { parent } => { + let parent_state = self.find_state(parent); + debug!("find_state: parent_state = {:?}", parent_state); + match parent_state { + NodeState::InCycle { .. } => { + self.node_states[r] = parent_state; + parent_state + } + + NodeState::BeingVisited { depth } => { + self.node_states[r] = NodeState::InCycleWith { + parent: self.node_stack[depth], + }; + parent_state + } + + NodeState::NotVisited | NodeState::InCycleWith { .. } => { + panic!("invalid parent state: {:?}", parent_state) + } + } + } + } + } + + /// Walks a node that has never been visited before. + fn walk_unvisited_node(&mut self, depth: usize, node: G::Node) -> WalkReturn { + debug!( + "walk_unvisited_node(depth = {:?}, node = {:?})", + depth, node + ); + + debug_assert!(match self.node_states[node] { + NodeState::NotVisited => true, + _ => false, + }); + + self.node_states[node] = NodeState::BeingVisited { depth }; + self.node_stack.push(node); + + // Walk each successor of the node, looking to see if any of + // them can reach a node that is presently on the stack. If + // so, that means they can also reach us. + let mut min_depth = depth; + let mut min_cycle_root = node; + let successors_len = self.successors_stack.len(); + for successor_node in self.graph.successors(node) { + debug!( + "walk_unvisited_node: node = {:?} successor_ode = {:?}", + node, successor_node + ); + match self.walk_node(depth + 1, successor_node) { + WalkReturn::Cycle { + min_depth: successor_min_depth, + } => { + assert!(successor_min_depth <= depth); + if successor_min_depth < min_depth { + debug!( + "walk_unvisited_node: node = {:?} successor_min_depth = {:?}", + node, successor_min_depth + ); + min_depth = successor_min_depth; + min_cycle_root = successor_node; + } + } + + WalkReturn::Complete { + scc_index: successor_scc_index, + } => { + debug!( + "walk_unvisited_node: node = {:?} successor_scc_index = {:?}", + node, successor_scc_index + ); + self.successors_stack.push(successor_scc_index); + } + } + } + + let r = self.node_stack.pop(); + debug_assert_eq!(r, Some(node)); + + if min_depth == depth { + // Note that successor stack may have duplicates, so we + // want to remove those: + let deduplicated_successors = { + let duplicate_set = &mut self.duplicate_set; + duplicate_set.clear(); + self.successors_stack + .drain(successors_len..) + .filter(move |&i| duplicate_set.insert(i)) + }; + let scc_index = self.scc_data.create_scc(deduplicated_successors); + self.node_states[node] = NodeState::InCycle { scc_index }; + WalkReturn::Complete { scc_index } + } else { + // We are not the head of the cycle. Return back to our + // caller. They will take ownership of the + // `self.successors` data that we pushed. + self.node_states[node] = NodeState::InCycleWith { + parent: min_cycle_root, + }; + WalkReturn::Cycle { min_depth } + } + } +} diff --git a/src/librustc_data_structures/graph/scc/test.rs b/src/librustc_data_structures/graph/scc/test.rs new file mode 100644 index 00000000000..a273d64a40c --- /dev/null +++ b/src/librustc_data_structures/graph/scc/test.rs @@ -0,0 +1,178 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![cfg(test)] + +use graph::test::TestGraph; +use super::*; + +#[test] +fn diamond() { + let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3)]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 4); + assert_eq!(sccs.num_sccs(), 4); +} + +#[test] +fn test_big_scc() { + // The order in which things will be visited is important to this + // test. + // + // We will visit: + // + // 0 -> 1 -> 2 -> 0 + // + // and at this point detect a cycle. 2 will return back to 1 which + // will visit 3. 3 will visit 2 before the cycle is complete, and + // hence it too will return a cycle. + + /* ++-> 0 +| | +| v +| 1 -> 3 +| | | +| v | ++-- 2 <--+ + */ + let graph = TestGraph::new(0, &[ + (0, 1), + (1, 2), + (1, 3), + (2, 0), + (3, 2), + ]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 1); +} + +#[test] +fn test_three_sccs() { + /* + 0 + | + v ++-> 1 3 +| | | +| v | ++-- 2 <--+ + */ + let graph = TestGraph::new(0, &[ + (0, 1), + (1, 2), + (2, 1), + (3, 2), + ]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 3); + assert_eq!(sccs.scc(0), 1); + assert_eq!(sccs.scc(1), 0); + assert_eq!(sccs.scc(2), 0); + assert_eq!(sccs.scc(3), 2); + assert_eq!(sccs.successors(0), &[]); + assert_eq!(sccs.successors(1), &[0]); + assert_eq!(sccs.successors(2), &[0]); +} + +#[test] +fn test_find_state_2() { + // The order in which things will be visited is important to this + // test. It tests part of the `find_state` behavior. + // + // We will start in our DFS by visiting: + // + // 0 -> 1 -> 2 -> 1 + // + // and at this point detect a cycle. The state of 2 will thus be + // `InCycleWith { 1 }`. We will then visit the 1 -> 3 edge, which + // will attempt to visit 0 as well, thus going to the state + // `InCycleWith { 0 }`. Finally, node 1 will complete; the lowest + // depth of any successor was 3 which had depth 0, and thus it + // will be in the state `InCycleWith { 3 }`. + // + // When we finally traverse the `0 -> 4` edge and then visit node 2, + // the states of the nodes are: + // + // 0 BeingVisited { 0 } + // 1 InCycleWith { 3 } + // 2 InCycleWith { 1 } + // 3 InCycleWith { 0 } + // + // and hence 4 will traverse the links, finding an ultimate depth of 0. + // If will also collapse the states to the following: + // + // 0 BeingVisited { 0 } + // 1 InCycleWith { 3 } + // 2 InCycleWith { 1 } + // 3 InCycleWith { 0 } + + /* + /----+ + 0 <--+ | + | | | + v | | ++-> 1 -> 3 4 +| | | +| v | ++-- 2 <----+ + */ + let graph = TestGraph::new(0, &[ + (0, 1), + (0, 4), + (1, 2), + (1, 3), + (2, 1), + (3, 0), + (4, 2), + ]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 1); + assert_eq!(sccs.scc(0), 0); + assert_eq!(sccs.scc(1), 0); + assert_eq!(sccs.scc(2), 0); + assert_eq!(sccs.scc(3), 0); + assert_eq!(sccs.scc(4), 0); + assert_eq!(sccs.successors(0), &[]); +} + +#[test] +fn test_find_state_3() { + /* + /----+ + 0 <--+ | + | | | + v | | ++-> 1 -> 3 4 5 +| | | | +| v | | ++-- 2 <----+-+ + */ + let graph = TestGraph::new(0, &[ + (0, 1), + (0, 4), + (1, 2), + (1, 3), + (2, 1), + (3, 0), + (4, 2), + (5, 2), + ]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 2); + assert_eq!(sccs.scc(0), 0); + assert_eq!(sccs.scc(1), 0); + assert_eq!(sccs.scc(2), 0); + assert_eq!(sccs.scc(3), 0); + assert_eq!(sccs.scc(4), 0); + assert_eq!(sccs.scc(5), 1); + assert_eq!(sccs.successors(0), &[]); + assert_eq!(sccs.successors(1), &[0]); +} diff --git a/src/librustc_data_structures/graph/test.rs b/src/librustc_data_structures/graph/test.rs index f04b536bc18..48b654726b8 100644 --- a/src/librustc_data_structures/graph/test.rs +++ b/src/librustc_data_structures/graph/test.rs @@ -13,7 +13,7 @@ use std::cmp::max; use std::slice; use std::iter; -use super::{ControlFlowGraph, GraphPredecessors, GraphSuccessors}; +use super::*; pub struct TestGraph { num_nodes: usize, @@ -44,23 +44,31 @@ impl TestGraph { } } -impl ControlFlowGraph for TestGraph { +impl DirectedGraph for TestGraph { type Node = usize; +} +impl WithStartNode for TestGraph { fn start_node(&self) -> usize { self.start_node } +} +impl WithNumNodes for TestGraph { fn num_nodes(&self) -> usize { self.num_nodes } +} +impl WithPredecessors for TestGraph { fn predecessors<'graph>(&'graph self, node: usize) -> >::Iter { self.predecessors[&node].iter().cloned() } +} +impl WithSuccessors for TestGraph { fn successors<'graph>(&'graph self, node: usize) -> >::Iter { self.successors[&node].iter().cloned() }