introduce a generic SCC computation

This commit is contained in:
Niko Matsakis 2018-07-02 10:40:03 -04:00
parent dab206f8b5
commit 0052ddd8ae
5 changed files with 531 additions and 3 deletions

View File

@ -8,7 +8,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use graph::*;
use graph::implementation::*;
use std::fmt::Debug;
type TestGraph = Graph<&'static str, &'static str>;

View File

@ -14,6 +14,7 @@ pub mod dominators;
pub mod implementation;
pub mod iterate;
mod reference;
pub mod scc;
#[cfg(test)]
mod test;

View File

@ -0,0 +1,341 @@
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Routine to compute the strongly connected components (SCCs) of a
//! graph, as well as the resulting DAG if each SCC is replaced with a
//! node in the graph. This uses Tarjan's algorithm that completes in
//! O(n) time.
use fx::FxHashSet;
use graph::{DirectedGraph, WithNumNodes, WithSuccessors};
use indexed_vec::{Idx, IndexVec};
use std::ops::Range;
mod test;
/// Strongly connected components (SCC) of a graph. The type `N` is
/// the index type for the graph nodes and `S` is the index type for
/// the SCCs. We can map from each node to the SCC that it
/// participates in, and we also have the successors of each SCC.
pub struct Sccs<N: Idx, S: Idx> {
/// For each node, what is the SCC index of the SCC to which it
/// belongs.
scc_indices: IndexVec<N, S>,
/// Data about each SCC.
scc_data: SccData<S>,
}
struct SccData<S: Idx> {
/// For each SCC, the range of `all_successors` where its
/// successors can be found.
ranges: IndexVec<S, Range<usize>>,
/// Contains the succcessors for all the Sccs, concatenated. The
/// range of indices corresponding to a given SCC is found in its
/// SccData.
all_successors: Vec<S>,
}
impl<N: Idx, S: Idx> Sccs<N, S> {
pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self {
SccsConstruction::construct(graph)
}
/// Returns the number of SCCs in the graph.
pub fn num_sccs(&self) -> usize {
self.scc_data.len()
}
/// Returns the SCC to which a node `r` belongs.
pub fn scc(&self, r: N) -> S {
self.scc_indices[r]
}
/// Returns the successor of the given SCC.
pub fn successors(&self, scc: S) -> &[S] {
self.scc_data.successors(scc)
}
}
impl<S: Idx> SccData<S> {
/// Number of SCCs,
fn len(&self) -> usize {
self.ranges.len()
}
/// Returns the successor of the given SCC.
fn successors(&self, scc: S) -> &[S] {
// Annoyingly, `range` does not implement `Copy`, so we have
// to do `range.start..range.end`:
let range = &self.ranges[scc];
&self.all_successors[range.start..range.end]
}
/// Creates a new SCC with `successors` as its successors and
/// returns the resulting index.
fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
// Store the successors on `scc_successors_vec`, remembering
// the range of indices.
let all_successors_start = self.all_successors.len();
self.all_successors.extend(successors);
let all_successors_end = self.all_successors.len();
debug!(
"create_scc({:?}) successors={:?}",
self.ranges.len(),
&self.all_successors[all_successors_start..all_successors_end],
);
self.ranges.push(all_successors_start..all_successors_end)
}
}
struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors + 'c, S: Idx> {
graph: &'c G,
/// The state of each node; used during walk to record the stack
/// and after walk to record what cycle each node ended up being
/// in.
node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
/// The stack of nodes that we are visiting as part of the DFS.
node_stack: Vec<G::Node>,
/// The stack of successors: as we visit a node, we mark our
/// position in this stack, and when we encounter a successor SCC,
/// we push it on the stack. When we complete an SCC, we can pop
/// everything off the stack that was found along the way.
successors_stack: Vec<S>,
/// A set used to strip duplicates. As we accumulate successors
/// into the successors_stack, we sometimes get duplicate entries.
/// We use this set to remove those -- we keep it around between
/// successors to amortize memory allocation costs.
duplicate_set: FxHashSet<S>,
scc_data: SccData<S>,
}
#[derive(Copy, Clone, Debug)]
enum NodeState<N, S> {
/// This node has not yet been visited as part of the DFS.
///
/// After SCC construction is complete, this state ought to be
/// impossible.
NotVisited,
/// This node is currently being walk as part of our DFS. It is on
/// the stack at the depth `depth`.
///
/// After SCC construction is complete, this state ought to be
/// impossible.
BeingVisited { depth: usize },
/// Indicates that this node is a member of the given cycle.
InCycle { scc_index: S },
/// Indicates that this node is a member of whatever cycle
/// `parent` is a member of. This state is transient: whenever we
/// see it, we try to overwrite it with the current state of
/// `parent` (this is the "path compression" step of a union-find
/// algorithm).
InCycleWith { parent: N },
}
#[derive(Copy, Clone, Debug)]
enum WalkReturn<S> {
Cycle { min_depth: usize },
Complete { scc_index: S },
}
impl<'c, G, S> SccsConstruction<'c, G, S>
where
G: DirectedGraph + WithNumNodes + WithSuccessors,
S: Idx,
{
/// Identifies SCCs in the graph `G` and computes the resulting
/// DAG. This uses a variant of [Tarjan's
/// algorithm][wikipedia]. The high-level summary of the algorithm
/// is that we do a depth-first search. Along the way, we keep a
/// stack of each node whose successors are being visited. We
/// track the depth of each node on this stack (there is no depth
/// if the node is not on the stack). When we find that some node
/// N with depth D can reach some other node N' with lower depth
/// D' (i.e., D' < D), we know that N, N', and all nodes in
/// between them on the stack are part of an SCC.
///
/// For each node, we track the lowest depth of any successor we
/// have found, along with that
///
/// [wikipedia]: https://bit.ly/2EZIx84
fn construct(graph: &'c G) -> Sccs<G::Node, S> {
let num_nodes = graph.num_nodes();
let mut this = Self {
graph,
node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
node_stack: Vec::with_capacity(num_nodes),
successors_stack: Vec::new(),
scc_data: SccData {
ranges: IndexVec::new(),
all_successors: Vec::new(),
},
duplicate_set: FxHashSet::default(),
};
let scc_indices = (0..num_nodes)
.map(G::Node::new)
.map(|node| match this.walk_node(0, node) {
WalkReturn::Complete { scc_index } => scc_index,
WalkReturn::Cycle { min_depth } => panic!(
"`walk_node(0, {:?})` returned cycle with depth {:?}",
node, min_depth
),
})
.collect();
Sccs {
scc_indices,
scc_data: this.scc_data,
}
}
fn walk_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> {
debug!("walk_node(depth = {:?}, node = {:?})", depth, node);
match self.find_state(node) {
NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
NodeState::NotVisited => self.walk_unvisited_node(depth, node),
NodeState::InCycleWith { parent } => panic!(
"`find_state` returned `InCycleWith({:?})`, which ought to be impossible",
parent
),
}
}
/// Fetches the state of the node `r`. If `r` is recorded as being
/// in a cycle with some other node `r2`, then fetches the state
/// of `r2` (and updates `r` to reflect current result). This is
/// basically the "find" part of a standard union-find algorithm
/// (with path compression).
fn find_state(&mut self, r: G::Node) -> NodeState<G::Node, S> {
debug!("find_state(r = {:?} in state {:?})", r, self.node_states[r]);
match self.node_states[r] {
NodeState::InCycle { scc_index } => NodeState::InCycle { scc_index },
NodeState::BeingVisited { depth } => NodeState::BeingVisited { depth },
NodeState::NotVisited => NodeState::NotVisited,
NodeState::InCycleWith { parent } => {
let parent_state = self.find_state(parent);
debug!("find_state: parent_state = {:?}", parent_state);
match parent_state {
NodeState::InCycle { .. } => {
self.node_states[r] = parent_state;
parent_state
}
NodeState::BeingVisited { depth } => {
self.node_states[r] = NodeState::InCycleWith {
parent: self.node_stack[depth],
};
parent_state
}
NodeState::NotVisited | NodeState::InCycleWith { .. } => {
panic!("invalid parent state: {:?}", parent_state)
}
}
}
}
}
/// Walks a node that has never been visited before.
fn walk_unvisited_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> {
debug!(
"walk_unvisited_node(depth = {:?}, node = {:?})",
depth, node
);
debug_assert!(match self.node_states[node] {
NodeState::NotVisited => true,
_ => false,
});
self.node_states[node] = NodeState::BeingVisited { depth };
self.node_stack.push(node);
// Walk each successor of the node, looking to see if any of
// them can reach a node that is presently on the stack. If
// so, that means they can also reach us.
let mut min_depth = depth;
let mut min_cycle_root = node;
let successors_len = self.successors_stack.len();
for successor_node in self.graph.successors(node) {
debug!(
"walk_unvisited_node: node = {:?} successor_ode = {:?}",
node, successor_node
);
match self.walk_node(depth + 1, successor_node) {
WalkReturn::Cycle {
min_depth: successor_min_depth,
} => {
assert!(successor_min_depth <= depth);
if successor_min_depth < min_depth {
debug!(
"walk_unvisited_node: node = {:?} successor_min_depth = {:?}",
node, successor_min_depth
);
min_depth = successor_min_depth;
min_cycle_root = successor_node;
}
}
WalkReturn::Complete {
scc_index: successor_scc_index,
} => {
debug!(
"walk_unvisited_node: node = {:?} successor_scc_index = {:?}",
node, successor_scc_index
);
self.successors_stack.push(successor_scc_index);
}
}
}
let r = self.node_stack.pop();
debug_assert_eq!(r, Some(node));
if min_depth == depth {
// Note that successor stack may have duplicates, so we
// want to remove those:
let deduplicated_successors = {
let duplicate_set = &mut self.duplicate_set;
duplicate_set.clear();
self.successors_stack
.drain(successors_len..)
.filter(move |&i| duplicate_set.insert(i))
};
let scc_index = self.scc_data.create_scc(deduplicated_successors);
self.node_states[node] = NodeState::InCycle { scc_index };
WalkReturn::Complete { scc_index }
} else {
// We are not the head of the cycle. Return back to our
// caller. They will take ownership of the
// `self.successors` data that we pushed.
self.node_states[node] = NodeState::InCycleWith {
parent: min_cycle_root,
};
WalkReturn::Cycle { min_depth }
}
}
}

View File

@ -0,0 +1,178 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![cfg(test)]
use graph::test::TestGraph;
use super::*;
#[test]
fn diamond() {
let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
let sccs: Sccs<_, usize> = Sccs::new(&graph);
assert_eq!(sccs.num_sccs(), 4);
assert_eq!(sccs.num_sccs(), 4);
}
#[test]
fn test_big_scc() {
// The order in which things will be visited is important to this
// test.
//
// We will visit:
//
// 0 -> 1 -> 2 -> 0
//
// and at this point detect a cycle. 2 will return back to 1 which
// will visit 3. 3 will visit 2 before the cycle is complete, and
// hence it too will return a cycle.
/*
+-> 0
| |
| v
| 1 -> 3
| | |
| v |
+-- 2 <--+
*/
let graph = TestGraph::new(0, &[
(0, 1),
(1, 2),
(1, 3),
(2, 0),
(3, 2),
]);
let sccs: Sccs<_, usize> = Sccs::new(&graph);
assert_eq!(sccs.num_sccs(), 1);
}
#[test]
fn test_three_sccs() {
/*
0
|
v
+-> 1 3
| | |
| v |
+-- 2 <--+
*/
let graph = TestGraph::new(0, &[
(0, 1),
(1, 2),
(2, 1),
(3, 2),
]);
let sccs: Sccs<_, usize> = Sccs::new(&graph);
assert_eq!(sccs.num_sccs(), 3);
assert_eq!(sccs.scc(0), 1);
assert_eq!(sccs.scc(1), 0);
assert_eq!(sccs.scc(2), 0);
assert_eq!(sccs.scc(3), 2);
assert_eq!(sccs.successors(0), &[]);
assert_eq!(sccs.successors(1), &[0]);
assert_eq!(sccs.successors(2), &[0]);
}
#[test]
fn test_find_state_2() {
// The order in which things will be visited is important to this
// test. It tests part of the `find_state` behavior.
//
// We will start in our DFS by visiting:
//
// 0 -> 1 -> 2 -> 1
//
// and at this point detect a cycle. The state of 2 will thus be
// `InCycleWith { 1 }`. We will then visit the 1 -> 3 edge, which
// will attempt to visit 0 as well, thus going to the state
// `InCycleWith { 0 }`. Finally, node 1 will complete; the lowest
// depth of any successor was 3 which had depth 0, and thus it
// will be in the state `InCycleWith { 3 }`.
//
// When we finally traverse the `0 -> 4` edge and then visit node 2,
// the states of the nodes are:
//
// 0 BeingVisited { 0 }
// 1 InCycleWith { 3 }
// 2 InCycleWith { 1 }
// 3 InCycleWith { 0 }
//
// and hence 4 will traverse the links, finding an ultimate depth of 0.
// If will also collapse the states to the following:
//
// 0 BeingVisited { 0 }
// 1 InCycleWith { 3 }
// 2 InCycleWith { 1 }
// 3 InCycleWith { 0 }
/*
/----+
0 <--+ |
| | |
v | |
+-> 1 -> 3 4
| | |
| v |
+-- 2 <----+
*/
let graph = TestGraph::new(0, &[
(0, 1),
(0, 4),
(1, 2),
(1, 3),
(2, 1),
(3, 0),
(4, 2),
]);
let sccs: Sccs<_, usize> = Sccs::new(&graph);
assert_eq!(sccs.num_sccs(), 1);
assert_eq!(sccs.scc(0), 0);
assert_eq!(sccs.scc(1), 0);
assert_eq!(sccs.scc(2), 0);
assert_eq!(sccs.scc(3), 0);
assert_eq!(sccs.scc(4), 0);
assert_eq!(sccs.successors(0), &[]);
}
#[test]
fn test_find_state_3() {
/*
/----+
0 <--+ |
| | |
v | |
+-> 1 -> 3 4 5
| | | |
| v | |
+-- 2 <----+-+
*/
let graph = TestGraph::new(0, &[
(0, 1),
(0, 4),
(1, 2),
(1, 3),
(2, 1),
(3, 0),
(4, 2),
(5, 2),
]);
let sccs: Sccs<_, usize> = Sccs::new(&graph);
assert_eq!(sccs.num_sccs(), 2);
assert_eq!(sccs.scc(0), 0);
assert_eq!(sccs.scc(1), 0);
assert_eq!(sccs.scc(2), 0);
assert_eq!(sccs.scc(3), 0);
assert_eq!(sccs.scc(4), 0);
assert_eq!(sccs.scc(5), 1);
assert_eq!(sccs.successors(0), &[]);
assert_eq!(sccs.successors(1), &[0]);
}

View File

@ -13,7 +13,7 @@ use std::cmp::max;
use std::slice;
use std::iter;
use super::{ControlFlowGraph, GraphPredecessors, GraphSuccessors};
use super::*;
pub struct TestGraph {
num_nodes: usize,
@ -44,23 +44,31 @@ impl TestGraph {
}
}
impl ControlFlowGraph for TestGraph {
impl DirectedGraph for TestGraph {
type Node = usize;
}
impl WithStartNode for TestGraph {
fn start_node(&self) -> usize {
self.start_node
}
}
impl WithNumNodes for TestGraph {
fn num_nodes(&self) -> usize {
self.num_nodes
}
}
impl WithPredecessors for TestGraph {
fn predecessors<'graph>(&'graph self,
node: usize)
-> <Self as GraphPredecessors<'graph>>::Iter {
self.predecessors[&node].iter().cloned()
}
}
impl WithSuccessors for TestGraph {
fn successors<'graph>(&'graph self, node: usize) -> <Self as GraphSuccessors<'graph>>::Iter {
self.successors[&node].iter().cloned()
}