diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 584e02227671..d7ea96218516 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -591,9 +591,100 @@ unlock: raw_spin_unlock(&trie->lock); } -static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key) +static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) { - return -ENOTSUPP; + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); + struct bpf_lpm_trie_key *key = _key, *next_key = _next_key; + struct lpm_trie_node *node, *next_node = NULL, *parent; + struct lpm_trie_node **node_stack = NULL; + struct lpm_trie_node __rcu **root; + int err = 0, stack_ptr = -1; + unsigned int next_bit; + size_t matchlen; + + /* The get_next_key follows postorder. For the 4 node example in + * the top of this file, the trie_get_next_key() returns the following + * one after another: + * 192.168.0.0/24 + * 192.168.1.0/24 + * 192.168.128.0/24 + * 192.168.0.0/16 + * + * The idea is to return more specific keys before less specific ones. + */ + + /* Empty trie */ + if (!rcu_dereference(trie->root)) + return -ENOENT; + + /* For invalid key, find the leftmost node in the trie */ + if (!key || key->prefixlen > trie->max_prefixlen) { + root = &trie->root; + goto find_leftmost; + } + + node_stack = kmalloc(trie->max_prefixlen * sizeof(struct lpm_trie_node *), + GFP_USER | __GFP_NOWARN); + if (!node_stack) + return -ENOMEM; + + /* Try to find the exact node for the given key */ + for (node = rcu_dereference(trie->root); node;) { + node_stack[++stack_ptr] = node; + matchlen = longest_prefix_match(trie, node, key); + if (node->prefixlen != matchlen || + node->prefixlen == key->prefixlen) + break; + + next_bit = extract_bit(key->data, node->prefixlen); + node = rcu_dereference(node->child[next_bit]); + } + if (!node || node->prefixlen != key->prefixlen || + (node->flags & LPM_TREE_NODE_FLAG_IM)) { + root = &trie->root; + goto find_leftmost; + } + + /* The node with the exactly-matching key has been found, + * find the first node in postorder after the matched node. + */ + node = node_stack[stack_ptr]; + while (stack_ptr > 0) { + parent = node_stack[stack_ptr - 1]; + if (rcu_dereference(parent->child[0]) == node && + rcu_dereference(parent->child[1])) { + root = &parent->child[1]; + goto find_leftmost; + } + if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) { + next_node = parent; + goto do_copy; + } + + node = parent; + stack_ptr--; + } + + /* did not find anything */ + err = -ENOENT; + goto free_stack; + +find_leftmost: + /* Find the leftmost non-intermediate node, all intermediate nodes + * have exact two children, so this function will never return NULL. + */ + for (node = rcu_dereference(*root); node;) { + if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) + next_node = node; + node = rcu_dereference(node->child[0]); + } +do_copy: + next_key->prefixlen = next_node->prefixlen; + memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data), + next_node->data, trie->data_size); +free_stack: + kfree(node_stack); + return err; } const struct bpf_map_ops trie_map_ops = {