introduce a generic SCC computation

2018-07-02 10:40:03 -04:00 · 2018-07-02 10:40:03 -04:00 · 0052ddd8ae
parent dab206f8b5
commit 0052ddd8ae
5 changed files with 531 additions and 3 deletions
--- a/src/librustc_data_structures/graph/implementation/tests.rs
+++ b/src/librustc_data_structures/graph/implementation/tests.rs
@ -8,7 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

-use graph::*;
+use graph::implementation::*;
 use std::fmt::Debug;

 type TestGraph = Graph<&'static str, &'static str>;
--- a/src/librustc_data_structures/graph/mod.rs
+++ b/src/librustc_data_structures/graph/mod.rs
@ -14,6 +14,7 @@ pub mod dominators;
 pub mod implementation;
 pub mod iterate;
 mod reference;
+pub mod scc;

 #[cfg(test)]
 mod test;
--- a/src/librustc_data_structures/graph/scc/mod.rs
+++ b/src/librustc_data_structures/graph/scc/mod.rs
@ -0,0 +1,341 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Routine to compute the strongly connected components (SCCs) of a
+//! graph, as well as the resulting DAG if each SCC is replaced with a
+//! node in the graph. This uses Tarjan's algorithm that completes in
+//! O(n) time.
+
+use fx::FxHashSet;
+use graph::{DirectedGraph, WithNumNodes, WithSuccessors};
+use indexed_vec::{Idx, IndexVec};
+use std::ops::Range;
+
+mod test;
+
+/// Strongly connected components (SCC) of a graph. The type `N` is
+/// the index type for the graph nodes and `S` is the index type for
+/// the SCCs. We can map from each node to the SCC that it
+/// participates in, and we also have the successors of each SCC.
+pub struct Sccs<N: Idx, S: Idx> {
+    /// For each node, what is the SCC index of the SCC to which it
+    /// belongs.
+    scc_indices: IndexVec<N, S>,
+
+    /// Data about each SCC.
+    scc_data: SccData<S>,
+}
+
+struct SccData<S: Idx> {
+    /// For each SCC, the range of `all_successors` where its
+    /// successors can be found.
+    ranges: IndexVec<S, Range<usize>>,
+
+    /// Contains the succcessors for all the Sccs, concatenated. The
+    /// range of indices corresponding to a given SCC is found in its
+    /// SccData.
+    all_successors: Vec<S>,
+}
+
+impl<N: Idx, S: Idx> Sccs<N, S> {
+    pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self {
+        SccsConstruction::construct(graph)
+    }
+
+    /// Returns the number of SCCs in the graph.
+    pub fn num_sccs(&self) -> usize {
+        self.scc_data.len()
+    }
+
+    /// Returns the SCC to which a node `r` belongs.
+    pub fn scc(&self, r: N) -> S {
+        self.scc_indices[r]
+    }
+
+    /// Returns the successor of the given SCC.
+    pub fn successors(&self, scc: S) -> &[S] {
+        self.scc_data.successors(scc)
+    }
+}
+
+impl<S: Idx> SccData<S> {
+    /// Number of SCCs,
+    fn len(&self) -> usize {
+        self.ranges.len()
+    }
+
+    /// Returns the successor of the given SCC.
+    fn successors(&self, scc: S) -> &[S] {
+        // Annoyingly, `range` does not implement `Copy`, so we have
+        // to do `range.start..range.end`:
+        let range = &self.ranges[scc];
+        &self.all_successors[range.start..range.end]
+    }
+
+    /// Creates a new SCC with `successors` as its successors and
+    /// returns the resulting index.
+    fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
+        // Store the successors on `scc_successors_vec`, remembering
+        // the range of indices.
+        let all_successors_start = self.all_successors.len();
+        self.all_successors.extend(successors);
+        let all_successors_end = self.all_successors.len();
+
+        debug!(
+            "create_scc({:?}) successors={:?}",
+            self.ranges.len(),
+            &self.all_successors[all_successors_start..all_successors_end],
+        );
+
+        self.ranges.push(all_successors_start..all_successors_end)
+    }
+}
+
+struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors + 'c, S: Idx> {
+    graph: &'c G,
+
+    /// The state of each node; used during walk to record the stack
+    /// and after walk to record what cycle each node ended up being
+    /// in.
+    node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
+
+    /// The stack of nodes that we are visiting as part of the DFS.
+    node_stack: Vec<G::Node>,
+
+    /// The stack of successors: as we visit a node, we mark our
+    /// position in this stack, and when we encounter a successor SCC,
+    /// we push it on the stack. When we complete an SCC, we can pop
+    /// everything off the stack that was found along the way.
+    successors_stack: Vec<S>,
+
+    /// A set used to strip duplicates. As we accumulate successors
+    /// into the successors_stack, we sometimes get duplicate entries.
+    /// We use this set to remove those -- we keep it around between
+    /// successors to amortize memory allocation costs.
+    duplicate_set: FxHashSet<S>,
+
+    scc_data: SccData<S>,
+}
+
+#[derive(Copy, Clone, Debug)]
+enum NodeState<N, S> {
+    /// This node has not yet been visited as part of the DFS.
+    ///
+    /// After SCC construction is complete, this state ought to be
+    /// impossible.
+    NotVisited,
+
+    /// This node is currently being walk as part of our DFS. It is on
+    /// the stack at the depth `depth`.
+    ///
+    /// After SCC construction is complete, this state ought to be
+    /// impossible.
+    BeingVisited { depth: usize },
+
+    /// Indicates that this node is a member of the given cycle.
+    InCycle { scc_index: S },
+
+    /// Indicates that this node is a member of whatever cycle
+    /// `parent` is a member of. This state is transient: whenever we
+    /// see it, we try to overwrite it with the current state of
+    /// `parent` (this is the "path compression" step of a union-find
+    /// algorithm).
+    InCycleWith { parent: N },
+}
+
+#[derive(Copy, Clone, Debug)]
+enum WalkReturn<S> {
+    Cycle { min_depth: usize },
+    Complete { scc_index: S },
+}
+
+impl<'c, G, S> SccsConstruction<'c, G, S>
+where
+    G: DirectedGraph + WithNumNodes + WithSuccessors,
+    S: Idx,
+{
+    /// Identifies SCCs in the graph `G` and computes the resulting
+    /// DAG. This uses a variant of [Tarjan's
+    /// algorithm][wikipedia]. The high-level summary of the algorithm
+    /// is that we do a depth-first search. Along the way, we keep a
+    /// stack of each node whose successors are being visited. We
+    /// track the depth of each node on this stack (there is no depth
+    /// if the node is not on the stack). When we find that some node
+    /// N with depth D can reach some other node N' with lower depth
+    /// D' (i.e., D' < D), we know that N, N', and all nodes in
+    /// between them on the stack are part of an SCC.
+    ///
+    /// For each node, we track the lowest depth of any successor we
+    /// have found, along with that
+    ///
+    /// [wikipedia]: https://bit.ly/2EZIx84
+    fn construct(graph: &'c G) -> Sccs<G::Node, S> {
+        let num_nodes = graph.num_nodes();
+
+        let mut this = Self {
+            graph,
+            node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
+            node_stack: Vec::with_capacity(num_nodes),
+            successors_stack: Vec::new(),
+            scc_data: SccData {
+                ranges: IndexVec::new(),
+                all_successors: Vec::new(),
+            },
+            duplicate_set: FxHashSet::default(),
+        };
+
+        let scc_indices = (0..num_nodes)
+            .map(G::Node::new)
+            .map(|node| match this.walk_node(0, node) {
+                WalkReturn::Complete { scc_index } => scc_index,
+                WalkReturn::Cycle { min_depth } => panic!(
+                    "`walk_node(0, {:?})` returned cycle with depth {:?}",
+                    node, min_depth
+                ),
+            })
+            .collect();
+
+        Sccs {
+            scc_indices,
+            scc_data: this.scc_data,
+        }
+    }
+
+    fn walk_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> {
+        debug!("walk_node(depth = {:?}, node = {:?})", depth, node);
+        match self.find_state(node) {
+            NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
+
+            NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
+
+            NodeState::NotVisited => self.walk_unvisited_node(depth, node),
+
+            NodeState::InCycleWith { parent } => panic!(
+                "`find_state` returned `InCycleWith({:?})`, which ought to be impossible",
+                parent
+            ),
+        }
+    }
+
+    /// Fetches the state of the node `r`. If `r` is recorded as being
+    /// in a cycle with some other node `r2`, then fetches the state
+    /// of `r2` (and updates `r` to reflect current result). This is
+    /// basically the "find" part of a standard union-find algorithm
+    /// (with path compression).
+    fn find_state(&mut self, r: G::Node) -> NodeState<G::Node, S> {
+        debug!("find_state(r = {:?} in state {:?})", r, self.node_states[r]);
+        match self.node_states[r] {
+            NodeState::InCycle { scc_index } => NodeState::InCycle { scc_index },
+            NodeState::BeingVisited { depth } => NodeState::BeingVisited { depth },
+            NodeState::NotVisited => NodeState::NotVisited,
+            NodeState::InCycleWith { parent } => {
+                let parent_state = self.find_state(parent);
+                debug!("find_state: parent_state = {:?}", parent_state);
+                match parent_state {
+                    NodeState::InCycle { .. } => {
+                        self.node_states[r] = parent_state;
+                        parent_state
+                    }
+
+                    NodeState::BeingVisited { depth } => {
+                        self.node_states[r] = NodeState::InCycleWith {
+                            parent: self.node_stack[depth],
+                        };
+                        parent_state
+                    }
+
+                    NodeState::NotVisited | NodeState::InCycleWith { .. } => {
+                        panic!("invalid parent state: {:?}", parent_state)
+                    }
+                }
+            }
+        }
+    }
+
+    /// Walks a node that has never been visited before.
+    fn walk_unvisited_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> {
+        debug!(
+            "walk_unvisited_node(depth = {:?}, node = {:?})",
+            depth, node
+        );
+
+        debug_assert!(match self.node_states[node] {
+            NodeState::NotVisited => true,
+            _ => false,
+        });
+
+        self.node_states[node] = NodeState::BeingVisited { depth };
+        self.node_stack.push(node);
+
+        // Walk each successor of the node, looking to see if any of
+        // them can reach a node that is presently on the stack. If
+        // so, that means they can also reach us.
+        let mut min_depth = depth;
+        let mut min_cycle_root = node;
+        let successors_len = self.successors_stack.len();
+        for successor_node in self.graph.successors(node) {
+            debug!(
+                "walk_unvisited_node: node = {:?} successor_ode = {:?}",
+                node, successor_node
+            );
+            match self.walk_node(depth + 1, successor_node) {
+                WalkReturn::Cycle {
+                    min_depth: successor_min_depth,
+                } => {
+                    assert!(successor_min_depth <= depth);
+                    if successor_min_depth < min_depth {
+                        debug!(
+                            "walk_unvisited_node: node = {:?} successor_min_depth = {:?}",
+                            node, successor_min_depth
+                        );
+                        min_depth = successor_min_depth;
+                        min_cycle_root = successor_node;
+                    }
+                }
+
+                WalkReturn::Complete {
+                    scc_index: successor_scc_index,
+                } => {
+                    debug!(
+                        "walk_unvisited_node: node = {:?} successor_scc_index = {:?}",
+                        node, successor_scc_index
+                    );
+                    self.successors_stack.push(successor_scc_index);
+                }
+            }
+        }
+
+        let r = self.node_stack.pop();
+        debug_assert_eq!(r, Some(node));
+
+        if min_depth == depth {
+            // Note that successor stack may have duplicates, so we
+            // want to remove those:
+            let deduplicated_successors = {
+                let duplicate_set = &mut self.duplicate_set;
+                duplicate_set.clear();
+                self.successors_stack
+                    .drain(successors_len..)
+                    .filter(move |&i| duplicate_set.insert(i))
+            };
+            let scc_index = self.scc_data.create_scc(deduplicated_successors);
+            self.node_states[node] = NodeState::InCycle { scc_index };
+            WalkReturn::Complete { scc_index }
+        } else {
+            // We are not the head of the cycle. Return back to our
+            // caller.  They will take ownership of the
+            // `self.successors` data that we pushed.
+            self.node_states[node] = NodeState::InCycleWith {
+                parent: min_cycle_root,
+            };
+            WalkReturn::Cycle { min_depth }
+        }
+    }
+}
--- a/src/librustc_data_structures/graph/scc/test.rs
+++ b/src/librustc_data_structures/graph/scc/test.rs
@ -0,0 +1,178 @@
+// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![cfg(test)]
+
+use graph::test::TestGraph;
+use super::*;
+
+#[test]
+fn diamond() {
+    let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
+    let sccs: Sccs<_, usize> = Sccs::new(&graph);
+    assert_eq!(sccs.num_sccs(), 4);
+    assert_eq!(sccs.num_sccs(), 4);
+}
+
+#[test]
+fn test_big_scc() {
+    // The order in which things will be visited is important to this
+    // test.
+    //
+    // We will visit:
+    //
+    // 0 -> 1 -> 2 -> 0
+    //
+    // and at this point detect a cycle. 2 will return back to 1 which
+    // will visit 3. 3 will visit 2 before the cycle is complete, and
+    // hence it too will return a cycle.
+
+    /*
+-> 0
+|   |
+|   v
+|   1 -> 3
+|   |    |
+|   v    |
+-- 2 <--+
+     */
+    let graph = TestGraph::new(0, &[
+        (0, 1),
+        (1, 2),
+        (1, 3),
+        (2, 0),
+        (3, 2),
+    ]);
+    let sccs: Sccs<_, usize> = Sccs::new(&graph);
+    assert_eq!(sccs.num_sccs(), 1);
+}
+
+#[test]
+fn test_three_sccs() {
+    /*
+    0
+    |
+    v
+-> 1    3
+|   |    |
+|   v    |
+-- 2 <--+
+     */
+    let graph = TestGraph::new(0, &[
+        (0, 1),
+        (1, 2),
+        (2, 1),
+        (3, 2),
+    ]);
+    let sccs: Sccs<_, usize> = Sccs::new(&graph);
+    assert_eq!(sccs.num_sccs(), 3);
+    assert_eq!(sccs.scc(0), 1);
+    assert_eq!(sccs.scc(1), 0);
+    assert_eq!(sccs.scc(2), 0);
+    assert_eq!(sccs.scc(3), 2);
+    assert_eq!(sccs.successors(0), &[]);
+    assert_eq!(sccs.successors(1), &[0]);
+    assert_eq!(sccs.successors(2), &[0]);
+}
+
+#[test]
+fn test_find_state_2() {
+    // The order in which things will be visited is important to this
+    // test. It tests part of the `find_state` behavior.
+    //
+    // We will start in our DFS by visiting:
+    //
+    // 0 -> 1 -> 2 -> 1
+    //
+    // and at this point detect a cycle. The state of 2 will thus be
+    // `InCycleWith { 1 }`.  We will then visit the 1 -> 3 edge, which
+    // will attempt to visit 0 as well, thus going to the state
+    // `InCycleWith { 0 }`. Finally, node 1 will complete; the lowest
+    // depth of any successor was 3 which had depth 0, and thus it
+    // will be in the state `InCycleWith { 3 }`.
+    //
+    // When we finally traverse the `0 -> 4` edge and then visit node 2,
+    // the states of the nodes are:
+    //
+    // 0 BeingVisited { 0 }
+    // 1 InCycleWith { 3 }
+    // 2 InCycleWith { 1 }
+    // 3 InCycleWith { 0 }
+    //
+    // and hence 4 will traverse the links, finding an ultimate depth of 0.
+    // If will also collapse the states to the following:
+    //
+    // 0 BeingVisited { 0 }
+    // 1 InCycleWith { 3 }
+    // 2 InCycleWith { 1 }
+    // 3 InCycleWith { 0 }
+
+    /*
+      /----+
+    0 <--+ |
+    |    | |
+    v    | |
+-> 1 -> 3 4
+|   |      |
+|   v      |
+-- 2 <----+
+     */
+    let graph = TestGraph::new(0, &[
+        (0, 1),
+        (0, 4),
+        (1, 2),
+        (1, 3),
+        (2, 1),
+        (3, 0),
+        (4, 2),
+    ]);
+    let sccs: Sccs<_, usize> = Sccs::new(&graph);
+    assert_eq!(sccs.num_sccs(), 1);
+    assert_eq!(sccs.scc(0), 0);
+    assert_eq!(sccs.scc(1), 0);
+    assert_eq!(sccs.scc(2), 0);
+    assert_eq!(sccs.scc(3), 0);
+    assert_eq!(sccs.scc(4), 0);
+    assert_eq!(sccs.successors(0), &[]);
+}
+
+#[test]
+fn test_find_state_3() {
+    /*
+      /----+
+    0 <--+ |
+    |    | |
+    v    | |
+-> 1 -> 3 4 5
+|   |      | |
+|   v      | |
+-- 2 <----+-+
+     */
+    let graph = TestGraph::new(0, &[
+        (0, 1),
+        (0, 4),
+        (1, 2),
+        (1, 3),
+        (2, 1),
+        (3, 0),
+        (4, 2),
+        (5, 2),
+    ]);
+    let sccs: Sccs<_, usize> = Sccs::new(&graph);
+    assert_eq!(sccs.num_sccs(), 2);
+    assert_eq!(sccs.scc(0), 0);
+    assert_eq!(sccs.scc(1), 0);
+    assert_eq!(sccs.scc(2), 0);
+    assert_eq!(sccs.scc(3), 0);
+    assert_eq!(sccs.scc(4), 0);
+    assert_eq!(sccs.scc(5), 1);
+    assert_eq!(sccs.successors(0), &[]);
+    assert_eq!(sccs.successors(1), &[0]);
+}
--- a/src/librustc_data_structures/graph/test.rs
+++ b/src/librustc_data_structures/graph/test.rs
@ -13,7 +13,7 @@ use std::cmp::max;
 use std::slice;
 use std::iter;

-use super::{ControlFlowGraph, GraphPredecessors, GraphSuccessors};
+use super::*;

 pub struct TestGraph {
    num_nodes: usize,
@ -44,23 +44,31 @@ impl TestGraph {
    }
 }

-impl ControlFlowGraph for TestGraph {
+impl DirectedGraph for TestGraph {
    type Node = usize;
+}

+impl WithStartNode for TestGraph {
    fn start_node(&self) -> usize {
        self.start_node
    }
+}

+impl WithNumNodes for TestGraph {
    fn num_nodes(&self) -> usize {
        self.num_nodes
    }
+}

+impl WithPredecessors for TestGraph {
    fn predecessors<'graph>(&'graph self,
                            node: usize)
                            -> <Self as GraphPredecessors<'graph>>::Iter {
        self.predecessors[&node].iter().cloned()
    }
+}

+impl WithSuccessors for TestGraph {
    fn successors<'graph>(&'graph self, node: usize) -> <Self as GraphSuccessors<'graph>>::Iter {
        self.successors[&node].iter().cloned()
    }