78c1d78488
A series of radix tree cleanups, and usage of them in the core pagecache
code.
Micro-benchmark:
lookup 14 slots (typical page-vector size)
in radix-tree there earch <step> slot filled and tagged
before/after - nsec per full scan through tree
* Intel Sandy Bridge i7-2620M 4Mb L3
New code always faster
* AMD Athlon 6000+ 2x1Mb L2, without L3
New code generally faster,
Minor degradation (marked with "*") for huge sparse trees
* i386 on Sandy Bridge
New code faster for common cases: tagged and dense trees.
Some degradations for non-tagged lookup on sparse trees.
Ideally, there might help __ffs() analog for searching first non-zero
long element in array, gcc sometimes cannot optimize this loop corretly.
Numbers:
CPU: Intel Sandy Bridge i7-2620M 4Mb L3
radix-tree with 1024 slots:
tagged lookup
step 1 before 7156 after 3613
step 2 before 5399 after 2696
step 3 before 4779 after 1928
step 4 before 4456 after 1429
step 5 before 4292 after 1213
step 6 before 4183 after 1052
step 7 before 4157 after 951
step 8 before 4016 after 812
step 9 before 3952 after 851
step 10 before 3937 after 732
step 11 before 4023 after 709
step 12 before 3872 after 657
step 13 before 3892 after 633
step 14 before 3720 after 591
step 15 before 3879 after 578
step 16 before 3561 after 513
normal lookup
step 1 before 4266 after 3301
step 2 before 2695 after 2129
step 3 before 2083 after 1712
step 4 before 1801 after 1534
step 5 before 1628 after 1313
step 6 before 1551 after 1263
step 7 before 1475 after 1185
step 8 before 1432 after 1167
step 9 before 1373 after 1092
step 10 before 1339 after 1134
step 11 before 1292 after 1056
step 12 before 1319 after 1030
step 13 before 1276 after 1004
step 14 before 1256 after 987
step 15 before 1228 after 992
step 16 before 1247 after 999
radix-tree with 1024*1024*128 slots:
tagged lookup
step 1 before 1086102841 after 674196409
step 2 before 816839155 after 498138306
step 7 before 599728907 after 240676762
step 15 before 555729253 after 185219677
step 63 before 606637748 after 128585664
step 64 before 608384432 after 102945089
step 65 before 596987114 after 123996019
step 128 before 304459225 after 56783056
step 256 before 158846855 after 31232481
step 512 before 86085652 after 18950595
step 12345 before 6517189 after 1674057
normal lookup
step 1 before 626064869 after 544418266
step 2 before 418809975 after 336321473
step 7 before 242303598 after 207755560
step 15 before 208380563 after 176496355
step 63 before 186854206 after 167283638
step 64 before 176188060 after 170143976
step 65 before 185139608 after 167487116
step 128 before 88181865 after 86913490
step 256 before 45733628 after 45143534
step 512 before 24506038 after 23859036
step 12345 before 2177425 after 2018662
* AMD Athlon 6000+ 2x1Mb L2, without L3
radix-tree with 1024 slots:
tag-lookup
step 1 before 8164 after 5379
step 2 before 5818 after 5581
step 3 before 4959 after 4213
step 4 before 4371 after 3386
step 5 before 4204 after 2997
step 6 before 4950 after 2744
step 7 before 4598 after 2480
step 8 before 4251 after 2288
step 9 before 4262 after 2243
step 10 before 4175 after 2131
step 11 before 3999 after 2024
step 12 before 3979 after 1994
step 13 before 3842 after 1929
step 14 before 3750 after 1810
step 15 before 3735 after 1810
step 16 before 3532 after 1660
normal-lookup
step 1 before 7875 after 5847
step 2 before 4808 after 4071
step 3 before 4073 after 3462
step 4 before 3677 after 3074
step 5 before 4308 after 2978
step 6 before 3911 after 3807
step 7 before 3635 after 3522
step 8 before 3313 after 3202
step 9 before 3280 after 3257
step 10 before 3166 after 3083
step 11 before 3066 after 3026
step 12 before 2985 after 2982
step 13 before 2925 after 2924
step 14 before 2834 after 2808
step 15 before 2805 after 2803
step 16 before 2647 after 2622
radix-tree with 1024*1024*128 slots:
tag-lookup
step 1 before 1288059720 after 951736580
step 2 before 961292300 after 884212140
step 7 before 768905140 after 547267580
step 15 before 771319480 after 456550640
step 63 before 504847640 after 242704304
step 64 before 392484800 after 177920786
step 65 before 491162160 after 246895264
step 128 before 208084064 after 97348392
step 256 before 112401035 after 51408126
step 512 before 75825834 after 29145070
step 12345 before 5603166 after 2847330
normal-lookup
step 1 before 1025677120 after 861375100
step 2 before 647220080 after 572258540
step 7 before 505518960 after 484041813
step 15 before 430483053 after 444815320 *
step 63 before 388113453 after 404250546 *
step 64 before 374154666 after 396027440 *
step 65 before 381423973 after 396704853 *
step 128 before 190078700 after 202619384 *
step 256 before 100886756 after 102829108 *
step 512 before 64074505 after 56158720
step 12345 before 4237289 after 4422299 *
* i686 on Sandy bridge
radix-tree with 1024 slots:
tagged lookup
step 1 before 7990 after 4019
step 2 before 5698 after 2897
step 3 before 5013 after 2475
step 4 before 4630 after 1721
step 5 before 4346 after 1759
step 6 before 4299 after 1556
step 7 before 4098 after 1513
step 8 before 4115 after 1222
step 9 before 3983 after 1390
step 10 before 4077 after 1207
step 11 before 3921 after 1231
step 12 before 3894 after 1116
step 13 before 3840 after 1147
step 14 before 3799 after 1090
step 15 before 3797 after 1059
step 16 before 3783 after 745
normal lookup
step 1 before 5103 after 3499
step 2 before 3299 after 2550
step 3 before 2489 after 2370
step 4 before 2034 after 2302 *
step 5 before 1846 after 2268 *
step 6 before 1752 after 2249 *
step 7 before 1679 after 2164 *
step 8 before 1627 after 2153 *
step 9 before 1542 after 2095 *
step 10 before 1479 after 2109 *
step 11 before 1469 after 2009 *
step 12 before 1445 after 2039 *
step 13 before 1411 after 2013 *
step 14 before 1374 after 2046 *
step 15 before 1340 after 1975 *
step 16 before 1331 after 2000 *
radix-tree with 1024*1024*128 slots:
tagged lookup
step 1 before 1225865377 after 667153553
step 2 before 842427423 after 471533007
step 7 before 609296153 after 276260116
step 15 before 544232060 after 226859105
step 63 before 519209199 after 141343043
step 64 before 588980279 after 141951339
step 65 before 521099710 after 138282060
step 128 before 298476778 after 83390628
step 256 before 149358342 after 43602609
step 512 before 76994713 after 22911077
step 12345 before 5328666
after 1472111
normal lookup
step 1 before 819284564 after 533635310
step 2 before 512421605 after 364956155
step 7 before 271443305 after 305721345 *
step 15 before 223591630 after 273960216 *
step 63 before 190320247 after 217770207 *
step 64 before 178538168 after 267411372 *
step 65 before 186400423 after 215347937 *
step 128 before 88106045 after 140540612 *
step 256 before 44812420 after 70660377 *
step 512 before 24435438 after 36328275 *
step 12345 before 2123924 after 2148062 *
bloat-o-meter delta for this patchset + patchset with related shmem cleanups
bloat-o-meter: x86_64
add/remove: 4/3 grow/shrink: 5/6 up/down: 928/-939 (-11)
function old new delta
radix_tree_next_chunk - 499 +499
shmem_unuse 428 554 +126
shmem_radix_tree_replace 131 227 +96
find_get_pages_tag 354 419 +65
find_get_pages_contig 345 407 +62
find_get_pages 362 396 +34
__kstrtab_radix_tree_next_chunk - 22 +22
__ksymtab_radix_tree_next_chunk - 16 +16
__kcrctab_radix_tree_next_chunk - 8 +8
radix_tree_gang_lookup_slot 204 203 -1
static.shmem_xattr_set 384 381 -3
radix_tree_gang_lookup_tag_slot 208 191 -17
radix_tree_gang_lookup 231 187 -44
radix_tree_gang_lookup_tag 247 199 -48
shmem_unlock_mapping 278 190 -88
__lookup 217 - -217
__lookup_tag 242 - -242
radix_tree_locate_item 279 - -279
bloat-o-meter: i386
add/remove: 3/3 grow/shrink: 8/9 up/down: 1075/-1275 (-200)
function old new delta
radix_tree_next_chunk - 757 +757
shmem_unuse 352 449 +97
find_get_pages_contig 269 322 +53
shmem_radix_tree_replace 113 154 +41
find_get_pages_tag 277 318 +41
dcache_dir_lseek 426 458 +32
__kstrtab_radix_tree_next_chunk - 22 +22
vc_do_resize 968 977 +9
snd_pcm_lib_read1 725 733 +8
__ksymtab_radix_tree_next_chunk - 8 +8
netlbl_cipsov4_list 1120 1127 +7
find_get_pages 293 291 -2
new_slab 467 459 -8
bitfill_unaligned_rev 425 417 -8
radix_tree_gang_lookup_tag_slot 177 146 -31
blk_dump_cmd 267 229 -38
radix_tree_gang_lookup_slot 212 134 -78
shmem_unlock_mapping 221 128 -93
radix_tree_gang_lookup_tag 275 162 -113
radix_tree_gang_lookup 255 126 -129
__lookup 227 - -227
__lookup_tag 271 - -271
radix_tree_locate_item 277 - -277
This patch:
Implement a clean, simple and effective radix-tree iteration routine.
Iterating divided into two phases:
* lookup next chunk in radix-tree leaf node
* iterating through slots in this chunk
Main iterator function radix_tree_next_chunk() returns pointer to first
slot, and stores in the struct radix_tree_iter index of next-to-last slot.
For tagged-iterating it also constuct bitmask of tags for retunted chunk.
All additional logic implemented as static-inline functions and macroses.
Also adds radix_tree_find_next_bit() static-inline variant of
find_next_bit() optimized for small constant size arrays, because
find_next_bit() too heavy for searching in an array with one/two long
elements.
[akpm@linux-foundation.org: rework comments a bit]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Tested-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1665 lines
43 KiB
C
1665 lines
43 KiB
C
/*
|
|
* Copyright (C) 2001 Momchil Velikov
|
|
* Portions Copyright (C) 2001 Christoph Hellwig
|
|
* Copyright (C) 2005 SGI, Christoph Lameter
|
|
* Copyright (C) 2006 Nick Piggin
|
|
* Copyright (C) 2012 Konstantin Khlebnikov
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License as
|
|
* published by the Free Software Foundation; either version 2, or (at
|
|
* your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*/
|
|
|
|
#include <linux/errno.h>
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/radix-tree.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/string.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/rcupdate.h>
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
|
|
#else
|
|
#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
|
|
#endif
|
|
|
|
#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
|
|
#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
|
|
|
|
#define RADIX_TREE_TAG_LONGS \
|
|
((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
|
|
|
|
struct radix_tree_node {
|
|
unsigned int height; /* Height from the bottom */
|
|
unsigned int count;
|
|
union {
|
|
struct radix_tree_node *parent; /* Used when ascending tree */
|
|
struct rcu_head rcu_head; /* Used when freeing node */
|
|
};
|
|
void __rcu *slots[RADIX_TREE_MAP_SIZE];
|
|
unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
|
|
};
|
|
|
|
#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
|
|
#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
|
|
RADIX_TREE_MAP_SHIFT))
|
|
|
|
/*
|
|
* The height_to_maxindex array needs to be one deeper than the maximum
|
|
* path as height 0 holds only 1 entry.
|
|
*/
|
|
static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
|
|
|
|
/*
|
|
* Radix tree node cache.
|
|
*/
|
|
static struct kmem_cache *radix_tree_node_cachep;
|
|
|
|
/*
|
|
* Per-cpu pool of preloaded nodes
|
|
*/
|
|
struct radix_tree_preload {
|
|
int nr;
|
|
struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
|
|
};
|
|
static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
|
|
|
|
static inline void *ptr_to_indirect(void *ptr)
|
|
{
|
|
return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
|
|
}
|
|
|
|
static inline void *indirect_to_ptr(void *ptr)
|
|
{
|
|
return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
|
|
}
|
|
|
|
static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
|
|
{
|
|
return root->gfp_mask & __GFP_BITS_MASK;
|
|
}
|
|
|
|
static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
|
|
int offset)
|
|
{
|
|
__set_bit(offset, node->tags[tag]);
|
|
}
|
|
|
|
static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
|
|
int offset)
|
|
{
|
|
__clear_bit(offset, node->tags[tag]);
|
|
}
|
|
|
|
static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
|
|
int offset)
|
|
{
|
|
return test_bit(offset, node->tags[tag]);
|
|
}
|
|
|
|
static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
|
|
{
|
|
root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
|
|
}
|
|
|
|
static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
|
|
{
|
|
root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
|
|
}
|
|
|
|
static inline void root_tag_clear_all(struct radix_tree_root *root)
|
|
{
|
|
root->gfp_mask &= __GFP_BITS_MASK;
|
|
}
|
|
|
|
static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
|
|
{
|
|
return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
|
|
}
|
|
|
|
/*
|
|
* Returns 1 if any slot in the node has this tag set.
|
|
* Otherwise returns 0.
|
|
*/
|
|
static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
|
|
{
|
|
int idx;
|
|
for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
|
|
if (node->tags[tag][idx])
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* radix_tree_find_next_bit - find the next set bit in a memory region
|
|
*
|
|
* @addr: The address to base the search on
|
|
* @size: The bitmap size in bits
|
|
* @offset: The bitnumber to start searching at
|
|
*
|
|
* Unrollable variant of find_next_bit() for constant size arrays.
|
|
* Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
|
|
* Returns next bit offset, or size if nothing found.
|
|
*/
|
|
static __always_inline unsigned long
|
|
radix_tree_find_next_bit(const unsigned long *addr,
|
|
unsigned long size, unsigned long offset)
|
|
{
|
|
if (!__builtin_constant_p(size))
|
|
return find_next_bit(addr, size, offset);
|
|
|
|
if (offset < size) {
|
|
unsigned long tmp;
|
|
|
|
addr += offset / BITS_PER_LONG;
|
|
tmp = *addr >> (offset % BITS_PER_LONG);
|
|
if (tmp)
|
|
return __ffs(tmp) + offset;
|
|
offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
|
|
while (offset < size) {
|
|
tmp = *++addr;
|
|
if (tmp)
|
|
return __ffs(tmp) + offset;
|
|
offset += BITS_PER_LONG;
|
|
}
|
|
}
|
|
return size;
|
|
}
|
|
|
|
/*
|
|
* This assumes that the caller has performed appropriate preallocation, and
|
|
* that the caller has pinned this thread of control to the current CPU.
|
|
*/
|
|
static struct radix_tree_node *
|
|
radix_tree_node_alloc(struct radix_tree_root *root)
|
|
{
|
|
struct radix_tree_node *ret = NULL;
|
|
gfp_t gfp_mask = root_gfp_mask(root);
|
|
|
|
if (!(gfp_mask & __GFP_WAIT)) {
|
|
struct radix_tree_preload *rtp;
|
|
|
|
/*
|
|
* Provided the caller has preloaded here, we will always
|
|
* succeed in getting a node here (and never reach
|
|
* kmem_cache_alloc)
|
|
*/
|
|
rtp = &__get_cpu_var(radix_tree_preloads);
|
|
if (rtp->nr) {
|
|
ret = rtp->nodes[rtp->nr - 1];
|
|
rtp->nodes[rtp->nr - 1] = NULL;
|
|
rtp->nr--;
|
|
}
|
|
}
|
|
if (ret == NULL)
|
|
ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
|
|
|
|
BUG_ON(radix_tree_is_indirect_ptr(ret));
|
|
return ret;
|
|
}
|
|
|
|
static void radix_tree_node_rcu_free(struct rcu_head *head)
|
|
{
|
|
struct radix_tree_node *node =
|
|
container_of(head, struct radix_tree_node, rcu_head);
|
|
int i;
|
|
|
|
/*
|
|
* must only free zeroed nodes into the slab. radix_tree_shrink
|
|
* can leave us with a non-NULL entry in the first slot, so clear
|
|
* that here to make sure.
|
|
*/
|
|
for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
|
|
tag_clear(node, i, 0);
|
|
|
|
node->slots[0] = NULL;
|
|
node->count = 0;
|
|
|
|
kmem_cache_free(radix_tree_node_cachep, node);
|
|
}
|
|
|
|
static inline void
|
|
radix_tree_node_free(struct radix_tree_node *node)
|
|
{
|
|
call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
|
|
}
|
|
|
|
/*
|
|
* Load up this CPU's radix_tree_node buffer with sufficient objects to
|
|
* ensure that the addition of a single element in the tree cannot fail. On
|
|
* success, return zero, with preemption disabled. On error, return -ENOMEM
|
|
* with preemption not disabled.
|
|
*
|
|
* To make use of this facility, the radix tree must be initialised without
|
|
* __GFP_WAIT being passed to INIT_RADIX_TREE().
|
|
*/
|
|
int radix_tree_preload(gfp_t gfp_mask)
|
|
{
|
|
struct radix_tree_preload *rtp;
|
|
struct radix_tree_node *node;
|
|
int ret = -ENOMEM;
|
|
|
|
preempt_disable();
|
|
rtp = &__get_cpu_var(radix_tree_preloads);
|
|
while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
|
|
preempt_enable();
|
|
node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
|
|
if (node == NULL)
|
|
goto out;
|
|
preempt_disable();
|
|
rtp = &__get_cpu_var(radix_tree_preloads);
|
|
if (rtp->nr < ARRAY_SIZE(rtp->nodes))
|
|
rtp->nodes[rtp->nr++] = node;
|
|
else
|
|
kmem_cache_free(radix_tree_node_cachep, node);
|
|
}
|
|
ret = 0;
|
|
out:
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_preload);
|
|
|
|
/*
|
|
* Return the maximum key which can be store into a
|
|
* radix tree with height HEIGHT.
|
|
*/
|
|
static inline unsigned long radix_tree_maxindex(unsigned int height)
|
|
{
|
|
return height_to_maxindex[height];
|
|
}
|
|
|
|
/*
|
|
* Extend a radix tree so it can store key @index.
|
|
*/
|
|
static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
|
|
{
|
|
struct radix_tree_node *node;
|
|
struct radix_tree_node *slot;
|
|
unsigned int height;
|
|
int tag;
|
|
|
|
/* Figure out what the height should be. */
|
|
height = root->height + 1;
|
|
while (index > radix_tree_maxindex(height))
|
|
height++;
|
|
|
|
if (root->rnode == NULL) {
|
|
root->height = height;
|
|
goto out;
|
|
}
|
|
|
|
do {
|
|
unsigned int newheight;
|
|
if (!(node = radix_tree_node_alloc(root)))
|
|
return -ENOMEM;
|
|
|
|
/* Propagate the aggregated tag info into the new root */
|
|
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
|
|
if (root_tag_get(root, tag))
|
|
tag_set(node, tag, 0);
|
|
}
|
|
|
|
/* Increase the height. */
|
|
newheight = root->height+1;
|
|
node->height = newheight;
|
|
node->count = 1;
|
|
node->parent = NULL;
|
|
slot = root->rnode;
|
|
if (newheight > 1) {
|
|
slot = indirect_to_ptr(slot);
|
|
slot->parent = node;
|
|
}
|
|
node->slots[0] = slot;
|
|
node = ptr_to_indirect(node);
|
|
rcu_assign_pointer(root->rnode, node);
|
|
root->height = newheight;
|
|
} while (height > root->height);
|
|
out:
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* radix_tree_insert - insert into a radix tree
|
|
* @root: radix tree root
|
|
* @index: index key
|
|
* @item: item to insert
|
|
*
|
|
* Insert an item into the radix tree at position @index.
|
|
*/
|
|
int radix_tree_insert(struct radix_tree_root *root,
|
|
unsigned long index, void *item)
|
|
{
|
|
struct radix_tree_node *node = NULL, *slot;
|
|
unsigned int height, shift;
|
|
int offset;
|
|
int error;
|
|
|
|
BUG_ON(radix_tree_is_indirect_ptr(item));
|
|
|
|
/* Make sure the tree is high enough. */
|
|
if (index > radix_tree_maxindex(root->height)) {
|
|
error = radix_tree_extend(root, index);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
slot = indirect_to_ptr(root->rnode);
|
|
|
|
height = root->height;
|
|
shift = (height-1) * RADIX_TREE_MAP_SHIFT;
|
|
|
|
offset = 0; /* uninitialised var warning */
|
|
while (height > 0) {
|
|
if (slot == NULL) {
|
|
/* Have to add a child node. */
|
|
if (!(slot = radix_tree_node_alloc(root)))
|
|
return -ENOMEM;
|
|
slot->height = height;
|
|
slot->parent = node;
|
|
if (node) {
|
|
rcu_assign_pointer(node->slots[offset], slot);
|
|
node->count++;
|
|
} else
|
|
rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
|
|
}
|
|
|
|
/* Go a level down */
|
|
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
|
|
node = slot;
|
|
slot = node->slots[offset];
|
|
shift -= RADIX_TREE_MAP_SHIFT;
|
|
height--;
|
|
}
|
|
|
|
if (slot != NULL)
|
|
return -EEXIST;
|
|
|
|
if (node) {
|
|
node->count++;
|
|
rcu_assign_pointer(node->slots[offset], item);
|
|
BUG_ON(tag_get(node, 0, offset));
|
|
BUG_ON(tag_get(node, 1, offset));
|
|
} else {
|
|
rcu_assign_pointer(root->rnode, item);
|
|
BUG_ON(root_tag_get(root, 0));
|
|
BUG_ON(root_tag_get(root, 1));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_insert);
|
|
|
|
/*
|
|
* is_slot == 1 : search for the slot.
|
|
* is_slot == 0 : search for the node.
|
|
*/
|
|
static void *radix_tree_lookup_element(struct radix_tree_root *root,
|
|
unsigned long index, int is_slot)
|
|
{
|
|
unsigned int height, shift;
|
|
struct radix_tree_node *node, **slot;
|
|
|
|
node = rcu_dereference_raw(root->rnode);
|
|
if (node == NULL)
|
|
return NULL;
|
|
|
|
if (!radix_tree_is_indirect_ptr(node)) {
|
|
if (index > 0)
|
|
return NULL;
|
|
return is_slot ? (void *)&root->rnode : node;
|
|
}
|
|
node = indirect_to_ptr(node);
|
|
|
|
height = node->height;
|
|
if (index > radix_tree_maxindex(height))
|
|
return NULL;
|
|
|
|
shift = (height-1) * RADIX_TREE_MAP_SHIFT;
|
|
|
|
do {
|
|
slot = (struct radix_tree_node **)
|
|
(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
|
|
node = rcu_dereference_raw(*slot);
|
|
if (node == NULL)
|
|
return NULL;
|
|
|
|
shift -= RADIX_TREE_MAP_SHIFT;
|
|
height--;
|
|
} while (height > 0);
|
|
|
|
return is_slot ? (void *)slot : indirect_to_ptr(node);
|
|
}
|
|
|
|
/**
|
|
* radix_tree_lookup_slot - lookup a slot in a radix tree
|
|
* @root: radix tree root
|
|
* @index: index key
|
|
*
|
|
* Returns: the slot corresponding to the position @index in the
|
|
* radix tree @root. This is useful for update-if-exists operations.
|
|
*
|
|
* This function can be called under rcu_read_lock iff the slot is not
|
|
* modified by radix_tree_replace_slot, otherwise it must be called
|
|
* exclusive from other writers. Any dereference of the slot must be done
|
|
* using radix_tree_deref_slot.
|
|
*/
|
|
void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
|
|
{
|
|
return (void **)radix_tree_lookup_element(root, index, 1);
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_lookup_slot);
|
|
|
|
/**
|
|
* radix_tree_lookup - perform lookup operation on a radix tree
|
|
* @root: radix tree root
|
|
* @index: index key
|
|
*
|
|
* Lookup the item at the position @index in the radix tree @root.
|
|
*
|
|
* This function can be called under rcu_read_lock, however the caller
|
|
* must manage lifetimes of leaf nodes (eg. RCU may also be used to free
|
|
* them safely). No RCU barriers are required to access or modify the
|
|
* returned item, however.
|
|
*/
|
|
void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
|
|
{
|
|
return radix_tree_lookup_element(root, index, 0);
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_lookup);
|
|
|
|
/**
|
|
* radix_tree_tag_set - set a tag on a radix tree node
|
|
* @root: radix tree root
|
|
* @index: index key
|
|
* @tag: tag index
|
|
*
|
|
* Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
|
|
* corresponding to @index in the radix tree. From
|
|
* the root all the way down to the leaf node.
|
|
*
|
|
* Returns the address of the tagged item. Setting a tag on a not-present
|
|
* item is a bug.
|
|
*/
|
|
void *radix_tree_tag_set(struct radix_tree_root *root,
|
|
unsigned long index, unsigned int tag)
|
|
{
|
|
unsigned int height, shift;
|
|
struct radix_tree_node *slot;
|
|
|
|
height = root->height;
|
|
BUG_ON(index > radix_tree_maxindex(height));
|
|
|
|
slot = indirect_to_ptr(root->rnode);
|
|
shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
|
|
|
|
while (height > 0) {
|
|
int offset;
|
|
|
|
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
|
|
if (!tag_get(slot, tag, offset))
|
|
tag_set(slot, tag, offset);
|
|
slot = slot->slots[offset];
|
|
BUG_ON(slot == NULL);
|
|
shift -= RADIX_TREE_MAP_SHIFT;
|
|
height--;
|
|
}
|
|
|
|
/* set the root's tag bit */
|
|
if (slot && !root_tag_get(root, tag))
|
|
root_tag_set(root, tag);
|
|
|
|
return slot;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_tag_set);
|
|
|
|
/**
|
|
* radix_tree_tag_clear - clear a tag on a radix tree node
|
|
* @root: radix tree root
|
|
* @index: index key
|
|
* @tag: tag index
|
|
*
|
|
* Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
|
|
* corresponding to @index in the radix tree. If
|
|
* this causes the leaf node to have no tags set then clear the tag in the
|
|
* next-to-leaf node, etc.
|
|
*
|
|
* Returns the address of the tagged item on success, else NULL. ie:
|
|
* has the same return value and semantics as radix_tree_lookup().
|
|
*/
|
|
void *radix_tree_tag_clear(struct radix_tree_root *root,
|
|
unsigned long index, unsigned int tag)
|
|
{
|
|
struct radix_tree_node *node = NULL;
|
|
struct radix_tree_node *slot = NULL;
|
|
unsigned int height, shift;
|
|
int uninitialized_var(offset);
|
|
|
|
height = root->height;
|
|
if (index > radix_tree_maxindex(height))
|
|
goto out;
|
|
|
|
shift = height * RADIX_TREE_MAP_SHIFT;
|
|
slot = indirect_to_ptr(root->rnode);
|
|
|
|
while (shift) {
|
|
if (slot == NULL)
|
|
goto out;
|
|
|
|
shift -= RADIX_TREE_MAP_SHIFT;
|
|
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
|
|
node = slot;
|
|
slot = slot->slots[offset];
|
|
}
|
|
|
|
if (slot == NULL)
|
|
goto out;
|
|
|
|
while (node) {
|
|
if (!tag_get(node, tag, offset))
|
|
goto out;
|
|
tag_clear(node, tag, offset);
|
|
if (any_tag_set(node, tag))
|
|
goto out;
|
|
|
|
index >>= RADIX_TREE_MAP_SHIFT;
|
|
offset = index & RADIX_TREE_MAP_MASK;
|
|
node = node->parent;
|
|
}
|
|
|
|
/* clear the root's tag bit */
|
|
if (root_tag_get(root, tag))
|
|
root_tag_clear(root, tag);
|
|
|
|
out:
|
|
return slot;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_tag_clear);
|
|
|
|
/**
|
|
* radix_tree_tag_get - get a tag on a radix tree node
|
|
* @root: radix tree root
|
|
* @index: index key
|
|
* @tag: tag index (< RADIX_TREE_MAX_TAGS)
|
|
*
|
|
* Return values:
|
|
*
|
|
* 0: tag not present or not set
|
|
* 1: tag set
|
|
*
|
|
* Note that the return value of this function may not be relied on, even if
|
|
* the RCU lock is held, unless tag modification and node deletion are excluded
|
|
* from concurrency.
|
|
*/
|
|
int radix_tree_tag_get(struct radix_tree_root *root,
|
|
unsigned long index, unsigned int tag)
|
|
{
|
|
unsigned int height, shift;
|
|
struct radix_tree_node *node;
|
|
|
|
/* check the root's tag bit */
|
|
if (!root_tag_get(root, tag))
|
|
return 0;
|
|
|
|
node = rcu_dereference_raw(root->rnode);
|
|
if (node == NULL)
|
|
return 0;
|
|
|
|
if (!radix_tree_is_indirect_ptr(node))
|
|
return (index == 0);
|
|
node = indirect_to_ptr(node);
|
|
|
|
height = node->height;
|
|
if (index > radix_tree_maxindex(height))
|
|
return 0;
|
|
|
|
shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
|
|
|
|
for ( ; ; ) {
|
|
int offset;
|
|
|
|
if (node == NULL)
|
|
return 0;
|
|
|
|
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
|
|
if (!tag_get(node, tag, offset))
|
|
return 0;
|
|
if (height == 1)
|
|
return 1;
|
|
node = rcu_dereference_raw(node->slots[offset]);
|
|
shift -= RADIX_TREE_MAP_SHIFT;
|
|
height--;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_tag_get);
|
|
|
|
/**
|
|
* radix_tree_next_chunk - find next chunk of slots for iteration
|
|
*
|
|
* @root: radix tree root
|
|
* @iter: iterator state
|
|
* @flags: RADIX_TREE_ITER_* flags and tag index
|
|
* Returns: pointer to chunk first slot, or NULL if iteration is over
|
|
*/
|
|
void **radix_tree_next_chunk(struct radix_tree_root *root,
|
|
struct radix_tree_iter *iter, unsigned flags)
|
|
{
|
|
unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK;
|
|
struct radix_tree_node *rnode, *node;
|
|
unsigned long index, offset;
|
|
|
|
if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
|
|
return NULL;
|
|
|
|
/*
|
|
* Catch next_index overflow after ~0UL. iter->index never overflows
|
|
* during iterating; it can be zero only at the beginning.
|
|
* And we cannot overflow iter->next_index in a single step,
|
|
* because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
|
|
*/
|
|
index = iter->next_index;
|
|
if (!index && iter->index)
|
|
return NULL;
|
|
|
|
rnode = rcu_dereference_raw(root->rnode);
|
|
if (radix_tree_is_indirect_ptr(rnode)) {
|
|
rnode = indirect_to_ptr(rnode);
|
|
} else if (rnode && !index) {
|
|
/* Single-slot tree */
|
|
iter->index = 0;
|
|
iter->next_index = 1;
|
|
iter->tags = 1;
|
|
return (void **)&root->rnode;
|
|
} else
|
|
return NULL;
|
|
|
|
restart:
|
|
shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT;
|
|
offset = index >> shift;
|
|
|
|
/* Index outside of the tree */
|
|
if (offset >= RADIX_TREE_MAP_SIZE)
|
|
return NULL;
|
|
|
|
node = rnode;
|
|
while (1) {
|
|
if ((flags & RADIX_TREE_ITER_TAGGED) ?
|
|
!test_bit(offset, node->tags[tag]) :
|
|
!node->slots[offset]) {
|
|
/* Hole detected */
|
|
if (flags & RADIX_TREE_ITER_CONTIG)
|
|
return NULL;
|
|
|
|
if (flags & RADIX_TREE_ITER_TAGGED)
|
|
offset = radix_tree_find_next_bit(
|
|
node->tags[tag],
|
|
RADIX_TREE_MAP_SIZE,
|
|
offset + 1);
|
|
else
|
|
while (++offset < RADIX_TREE_MAP_SIZE) {
|
|
if (node->slots[offset])
|
|
break;
|
|
}
|
|
index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1);
|
|
index += offset << shift;
|
|
/* Overflow after ~0UL */
|
|
if (!index)
|
|
return NULL;
|
|
if (offset == RADIX_TREE_MAP_SIZE)
|
|
goto restart;
|
|
}
|
|
|
|
/* This is leaf-node */
|
|
if (!shift)
|
|
break;
|
|
|
|
node = rcu_dereference_raw(node->slots[offset]);
|
|
if (node == NULL)
|
|
goto restart;
|
|
shift -= RADIX_TREE_MAP_SHIFT;
|
|
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
|
|
}
|
|
|
|
/* Update the iterator state */
|
|
iter->index = index;
|
|
iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1;
|
|
|
|
/* Construct iter->tags bit-mask from node->tags[tag] array */
|
|
if (flags & RADIX_TREE_ITER_TAGGED) {
|
|
unsigned tag_long, tag_bit;
|
|
|
|
tag_long = offset / BITS_PER_LONG;
|
|
tag_bit = offset % BITS_PER_LONG;
|
|
iter->tags = node->tags[tag][tag_long] >> tag_bit;
|
|
/* This never happens if RADIX_TREE_TAG_LONGS == 1 */
|
|
if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
|
|
/* Pick tags from next element */
|
|
if (tag_bit)
|
|
iter->tags |= node->tags[tag][tag_long + 1] <<
|
|
(BITS_PER_LONG - tag_bit);
|
|
/* Clip chunk size, here only BITS_PER_LONG tags */
|
|
iter->next_index = index + BITS_PER_LONG;
|
|
}
|
|
}
|
|
|
|
return node->slots + offset;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_next_chunk);
|
|
|
|
/**
|
|
* radix_tree_range_tag_if_tagged - for each item in given range set given
|
|
* tag if item has another tag set
|
|
* @root: radix tree root
|
|
* @first_indexp: pointer to a starting index of a range to scan
|
|
* @last_index: last index of a range to scan
|
|
* @nr_to_tag: maximum number items to tag
|
|
* @iftag: tag index to test
|
|
* @settag: tag index to set if tested tag is set
|
|
*
|
|
* This function scans range of radix tree from first_index to last_index
|
|
* (inclusive). For each item in the range if iftag is set, the function sets
|
|
* also settag. The function stops either after tagging nr_to_tag items or
|
|
* after reaching last_index.
|
|
*
|
|
* The tags must be set from the leaf level only and propagated back up the
|
|
* path to the root. We must do this so that we resolve the full path before
|
|
* setting any tags on intermediate nodes. If we set tags as we descend, then
|
|
* we can get to the leaf node and find that the index that has the iftag
|
|
* set is outside the range we are scanning. This reults in dangling tags and
|
|
* can lead to problems with later tag operations (e.g. livelocks on lookups).
|
|
*
|
|
* The function returns number of leaves where the tag was set and sets
|
|
* *first_indexp to the first unscanned index.
|
|
* WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
|
|
* be prepared to handle that.
|
|
*/
|
|
unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
|
|
unsigned long *first_indexp, unsigned long last_index,
|
|
unsigned long nr_to_tag,
|
|
unsigned int iftag, unsigned int settag)
|
|
{
|
|
unsigned int height = root->height;
|
|
struct radix_tree_node *node = NULL;
|
|
struct radix_tree_node *slot;
|
|
unsigned int shift;
|
|
unsigned long tagged = 0;
|
|
unsigned long index = *first_indexp;
|
|
|
|
last_index = min(last_index, radix_tree_maxindex(height));
|
|
if (index > last_index)
|
|
return 0;
|
|
if (!nr_to_tag)
|
|
return 0;
|
|
if (!root_tag_get(root, iftag)) {
|
|
*first_indexp = last_index + 1;
|
|
return 0;
|
|
}
|
|
if (height == 0) {
|
|
*first_indexp = last_index + 1;
|
|
root_tag_set(root, settag);
|
|
return 1;
|
|
}
|
|
|
|
shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
|
|
slot = indirect_to_ptr(root->rnode);
|
|
|
|
for (;;) {
|
|
unsigned long upindex;
|
|
int offset;
|
|
|
|
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
|
|
if (!slot->slots[offset])
|
|
goto next;
|
|
if (!tag_get(slot, iftag, offset))
|
|
goto next;
|
|
if (shift) {
|
|
/* Go down one level */
|
|
shift -= RADIX_TREE_MAP_SHIFT;
|
|
node = slot;
|
|
slot = slot->slots[offset];
|
|
continue;
|
|
}
|
|
|
|
/* tag the leaf */
|
|
tagged++;
|
|
tag_set(slot, settag, offset);
|
|
|
|
/* walk back up the path tagging interior nodes */
|
|
upindex = index;
|
|
while (node) {
|
|
upindex >>= RADIX_TREE_MAP_SHIFT;
|
|
offset = upindex & RADIX_TREE_MAP_MASK;
|
|
|
|
/* stop if we find a node with the tag already set */
|
|
if (tag_get(node, settag, offset))
|
|
break;
|
|
tag_set(node, settag, offset);
|
|
node = node->parent;
|
|
}
|
|
|
|
/*
|
|
* Small optimization: now clear that node pointer.
|
|
* Since all of this slot's ancestors now have the tag set
|
|
* from setting it above, we have no further need to walk
|
|
* back up the tree setting tags, until we update slot to
|
|
* point to another radix_tree_node.
|
|
*/
|
|
node = NULL;
|
|
|
|
next:
|
|
/* Go to next item at level determined by 'shift' */
|
|
index = ((index >> shift) + 1) << shift;
|
|
/* Overflow can happen when last_index is ~0UL... */
|
|
if (index > last_index || !index)
|
|
break;
|
|
if (tagged >= nr_to_tag)
|
|
break;
|
|
while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
|
|
/*
|
|
* We've fully scanned this node. Go up. Because
|
|
* last_index is guaranteed to be in the tree, what
|
|
* we do below cannot wander astray.
|
|
*/
|
|
slot = slot->parent;
|
|
shift += RADIX_TREE_MAP_SHIFT;
|
|
}
|
|
}
|
|
/*
|
|
* We need not to tag the root tag if there is no tag which is set with
|
|
* settag within the range from *first_indexp to last_index.
|
|
*/
|
|
if (tagged > 0)
|
|
root_tag_set(root, settag);
|
|
*first_indexp = index;
|
|
|
|
return tagged;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
|
|
|
|
|
|
/**
|
|
* radix_tree_next_hole - find the next hole (not-present entry)
|
|
* @root: tree root
|
|
* @index: index key
|
|
* @max_scan: maximum range to search
|
|
*
|
|
* Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
|
|
* indexed hole.
|
|
*
|
|
* Returns: the index of the hole if found, otherwise returns an index
|
|
* outside of the set specified (in which case 'return - index >= max_scan'
|
|
* will be true). In rare cases of index wrap-around, 0 will be returned.
|
|
*
|
|
* radix_tree_next_hole may be called under rcu_read_lock. However, like
|
|
* radix_tree_gang_lookup, this will not atomically search a snapshot of
|
|
* the tree at a single point in time. For example, if a hole is created
|
|
* at index 5, then subsequently a hole is created at index 10,
|
|
* radix_tree_next_hole covering both indexes may return 10 if called
|
|
* under rcu_read_lock.
|
|
*/
|
|
unsigned long radix_tree_next_hole(struct radix_tree_root *root,
|
|
unsigned long index, unsigned long max_scan)
|
|
{
|
|
unsigned long i;
|
|
|
|
for (i = 0; i < max_scan; i++) {
|
|
if (!radix_tree_lookup(root, index))
|
|
break;
|
|
index++;
|
|
if (index == 0)
|
|
break;
|
|
}
|
|
|
|
return index;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_next_hole);
|
|
|
|
/**
|
|
* radix_tree_prev_hole - find the prev hole (not-present entry)
|
|
* @root: tree root
|
|
* @index: index key
|
|
* @max_scan: maximum range to search
|
|
*
|
|
* Search backwards in the range [max(index-max_scan+1, 0), index]
|
|
* for the first hole.
|
|
*
|
|
* Returns: the index of the hole if found, otherwise returns an index
|
|
* outside of the set specified (in which case 'index - return >= max_scan'
|
|
* will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
|
|
*
|
|
* radix_tree_next_hole may be called under rcu_read_lock. However, like
|
|
* radix_tree_gang_lookup, this will not atomically search a snapshot of
|
|
* the tree at a single point in time. For example, if a hole is created
|
|
* at index 10, then subsequently a hole is created at index 5,
|
|
* radix_tree_prev_hole covering both indexes may return 5 if called under
|
|
* rcu_read_lock.
|
|
*/
|
|
unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
|
|
unsigned long index, unsigned long max_scan)
|
|
{
|
|
unsigned long i;
|
|
|
|
for (i = 0; i < max_scan; i++) {
|
|
if (!radix_tree_lookup(root, index))
|
|
break;
|
|
index--;
|
|
if (index == ULONG_MAX)
|
|
break;
|
|
}
|
|
|
|
return index;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_prev_hole);
|
|
|
|
static unsigned int
|
|
__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
|
|
unsigned long index, unsigned int max_items, unsigned long *next_index)
|
|
{
|
|
unsigned int nr_found = 0;
|
|
unsigned int shift, height;
|
|
unsigned long i;
|
|
|
|
height = slot->height;
|
|
if (height == 0)
|
|
goto out;
|
|
shift = (height-1) * RADIX_TREE_MAP_SHIFT;
|
|
|
|
for ( ; height > 1; height--) {
|
|
i = (index >> shift) & RADIX_TREE_MAP_MASK;
|
|
for (;;) {
|
|
if (slot->slots[i] != NULL)
|
|
break;
|
|
index &= ~((1UL << shift) - 1);
|
|
index += 1UL << shift;
|
|
if (index == 0)
|
|
goto out; /* 32-bit wraparound */
|
|
i++;
|
|
if (i == RADIX_TREE_MAP_SIZE)
|
|
goto out;
|
|
}
|
|
|
|
shift -= RADIX_TREE_MAP_SHIFT;
|
|
slot = rcu_dereference_raw(slot->slots[i]);
|
|
if (slot == NULL)
|
|
goto out;
|
|
}
|
|
|
|
/* Bottom level: grab some items */
|
|
for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
|
|
if (slot->slots[i]) {
|
|
results[nr_found] = &(slot->slots[i]);
|
|
if (indices)
|
|
indices[nr_found] = index;
|
|
if (++nr_found == max_items) {
|
|
index++;
|
|
goto out;
|
|
}
|
|
}
|
|
index++;
|
|
}
|
|
out:
|
|
*next_index = index;
|
|
return nr_found;
|
|
}
|
|
|
|
/**
|
|
* radix_tree_gang_lookup - perform multiple lookup on a radix tree
|
|
* @root: radix tree root
|
|
* @results: where the results of the lookup are placed
|
|
* @first_index: start the lookup from this key
|
|
* @max_items: place up to this many items at *results
|
|
*
|
|
* Performs an index-ascending scan of the tree for present items. Places
|
|
* them at *@results and returns the number of items which were placed at
|
|
* *@results.
|
|
*
|
|
* The implementation is naive.
|
|
*
|
|
* Like radix_tree_lookup, radix_tree_gang_lookup may be called under
|
|
* rcu_read_lock. In this case, rather than the returned results being
|
|
* an atomic snapshot of the tree at a single point in time, the semantics
|
|
* of an RCU protected gang lookup are as though multiple radix_tree_lookups
|
|
* have been issued in individual locks, and results stored in 'results'.
|
|
*/
|
|
unsigned int
|
|
radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
|
|
unsigned long first_index, unsigned int max_items)
|
|
{
|
|
unsigned long max_index;
|
|
struct radix_tree_node *node;
|
|
unsigned long cur_index = first_index;
|
|
unsigned int ret;
|
|
|
|
node = rcu_dereference_raw(root->rnode);
|
|
if (!node)
|
|
return 0;
|
|
|
|
if (!radix_tree_is_indirect_ptr(node)) {
|
|
if (first_index > 0)
|
|
return 0;
|
|
results[0] = node;
|
|
return 1;
|
|
}
|
|
node = indirect_to_ptr(node);
|
|
|
|
max_index = radix_tree_maxindex(node->height);
|
|
|
|
ret = 0;
|
|
while (ret < max_items) {
|
|
unsigned int nr_found, slots_found, i;
|
|
unsigned long next_index; /* Index of next search */
|
|
|
|
if (cur_index > max_index)
|
|
break;
|
|
slots_found = __lookup(node, (void ***)results + ret, NULL,
|
|
cur_index, max_items - ret, &next_index);
|
|
nr_found = 0;
|
|
for (i = 0; i < slots_found; i++) {
|
|
struct radix_tree_node *slot;
|
|
slot = *(((void ***)results)[ret + i]);
|
|
if (!slot)
|
|
continue;
|
|
results[ret + nr_found] =
|
|
indirect_to_ptr(rcu_dereference_raw(slot));
|
|
nr_found++;
|
|
}
|
|
ret += nr_found;
|
|
if (next_index == 0)
|
|
break;
|
|
cur_index = next_index;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_gang_lookup);
|
|
|
|
/**
|
|
* radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
|
|
* @root: radix tree root
|
|
* @results: where the results of the lookup are placed
|
|
* @indices: where their indices should be placed (but usually NULL)
|
|
* @first_index: start the lookup from this key
|
|
* @max_items: place up to this many items at *results
|
|
*
|
|
* Performs an index-ascending scan of the tree for present items. Places
|
|
* their slots at *@results and returns the number of items which were
|
|
* placed at *@results.
|
|
*
|
|
* The implementation is naive.
|
|
*
|
|
* Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
|
|
* be dereferenced with radix_tree_deref_slot, and if using only RCU
|
|
* protection, radix_tree_deref_slot may fail requiring a retry.
|
|
*/
|
|
unsigned int
|
|
radix_tree_gang_lookup_slot(struct radix_tree_root *root,
|
|
void ***results, unsigned long *indices,
|
|
unsigned long first_index, unsigned int max_items)
|
|
{
|
|
unsigned long max_index;
|
|
struct radix_tree_node *node;
|
|
unsigned long cur_index = first_index;
|
|
unsigned int ret;
|
|
|
|
node = rcu_dereference_raw(root->rnode);
|
|
if (!node)
|
|
return 0;
|
|
|
|
if (!radix_tree_is_indirect_ptr(node)) {
|
|
if (first_index > 0)
|
|
return 0;
|
|
results[0] = (void **)&root->rnode;
|
|
if (indices)
|
|
indices[0] = 0;
|
|
return 1;
|
|
}
|
|
node = indirect_to_ptr(node);
|
|
|
|
max_index = radix_tree_maxindex(node->height);
|
|
|
|
ret = 0;
|
|
while (ret < max_items) {
|
|
unsigned int slots_found;
|
|
unsigned long next_index; /* Index of next search */
|
|
|
|
if (cur_index > max_index)
|
|
break;
|
|
slots_found = __lookup(node, results + ret,
|
|
indices ? indices + ret : NULL,
|
|
cur_index, max_items - ret, &next_index);
|
|
ret += slots_found;
|
|
if (next_index == 0)
|
|
break;
|
|
cur_index = next_index;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
|
|
|
|
/*
|
|
* FIXME: the two tag_get()s here should use find_next_bit() instead of
|
|
* open-coding the search.
|
|
*/
|
|
static unsigned int
|
|
__lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
|
|
unsigned int max_items, unsigned long *next_index, unsigned int tag)
|
|
{
|
|
unsigned int nr_found = 0;
|
|
unsigned int shift, height;
|
|
|
|
height = slot->height;
|
|
if (height == 0)
|
|
goto out;
|
|
shift = (height-1) * RADIX_TREE_MAP_SHIFT;
|
|
|
|
while (height > 0) {
|
|
unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ;
|
|
|
|
for (;;) {
|
|
if (tag_get(slot, tag, i))
|
|
break;
|
|
index &= ~((1UL << shift) - 1);
|
|
index += 1UL << shift;
|
|
if (index == 0)
|
|
goto out; /* 32-bit wraparound */
|
|
i++;
|
|
if (i == RADIX_TREE_MAP_SIZE)
|
|
goto out;
|
|
}
|
|
height--;
|
|
if (height == 0) { /* Bottom level: grab some items */
|
|
unsigned long j = index & RADIX_TREE_MAP_MASK;
|
|
|
|
for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
|
|
index++;
|
|
if (!tag_get(slot, tag, j))
|
|
continue;
|
|
/*
|
|
* Even though the tag was found set, we need to
|
|
* recheck that we have a non-NULL node, because
|
|
* if this lookup is lockless, it may have been
|
|
* subsequently deleted.
|
|
*
|
|
* Similar care must be taken in any place that
|
|
* lookup ->slots[x] without a lock (ie. can't
|
|
* rely on its value remaining the same).
|
|
*/
|
|
if (slot->slots[j]) {
|
|
results[nr_found++] = &(slot->slots[j]);
|
|
if (nr_found == max_items)
|
|
goto out;
|
|
}
|
|
}
|
|
}
|
|
shift -= RADIX_TREE_MAP_SHIFT;
|
|
slot = rcu_dereference_raw(slot->slots[i]);
|
|
if (slot == NULL)
|
|
break;
|
|
}
|
|
out:
|
|
*next_index = index;
|
|
return nr_found;
|
|
}
|
|
|
|
/**
|
|
* radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
|
|
* based on a tag
|
|
* @root: radix tree root
|
|
* @results: where the results of the lookup are placed
|
|
* @first_index: start the lookup from this key
|
|
* @max_items: place up to this many items at *results
|
|
* @tag: the tag index (< RADIX_TREE_MAX_TAGS)
|
|
*
|
|
* Performs an index-ascending scan of the tree for present items which
|
|
* have the tag indexed by @tag set. Places the items at *@results and
|
|
* returns the number of items which were placed at *@results.
|
|
*/
|
|
unsigned int
|
|
radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
|
|
unsigned long first_index, unsigned int max_items,
|
|
unsigned int tag)
|
|
{
|
|
struct radix_tree_node *node;
|
|
unsigned long max_index;
|
|
unsigned long cur_index = first_index;
|
|
unsigned int ret;
|
|
|
|
/* check the root's tag bit */
|
|
if (!root_tag_get(root, tag))
|
|
return 0;
|
|
|
|
node = rcu_dereference_raw(root->rnode);
|
|
if (!node)
|
|
return 0;
|
|
|
|
if (!radix_tree_is_indirect_ptr(node)) {
|
|
if (first_index > 0)
|
|
return 0;
|
|
results[0] = node;
|
|
return 1;
|
|
}
|
|
node = indirect_to_ptr(node);
|
|
|
|
max_index = radix_tree_maxindex(node->height);
|
|
|
|
ret = 0;
|
|
while (ret < max_items) {
|
|
unsigned int nr_found, slots_found, i;
|
|
unsigned long next_index; /* Index of next search */
|
|
|
|
if (cur_index > max_index)
|
|
break;
|
|
slots_found = __lookup_tag(node, (void ***)results + ret,
|
|
cur_index, max_items - ret, &next_index, tag);
|
|
nr_found = 0;
|
|
for (i = 0; i < slots_found; i++) {
|
|
struct radix_tree_node *slot;
|
|
slot = *(((void ***)results)[ret + i]);
|
|
if (!slot)
|
|
continue;
|
|
results[ret + nr_found] =
|
|
indirect_to_ptr(rcu_dereference_raw(slot));
|
|
nr_found++;
|
|
}
|
|
ret += nr_found;
|
|
if (next_index == 0)
|
|
break;
|
|
cur_index = next_index;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
|
|
|
|
/**
|
|
* radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
|
|
* radix tree based on a tag
|
|
* @root: radix tree root
|
|
* @results: where the results of the lookup are placed
|
|
* @first_index: start the lookup from this key
|
|
* @max_items: place up to this many items at *results
|
|
* @tag: the tag index (< RADIX_TREE_MAX_TAGS)
|
|
*
|
|
* Performs an index-ascending scan of the tree for present items which
|
|
* have the tag indexed by @tag set. Places the slots at *@results and
|
|
* returns the number of slots which were placed at *@results.
|
|
*/
|
|
unsigned int
|
|
radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
|
|
unsigned long first_index, unsigned int max_items,
|
|
unsigned int tag)
|
|
{
|
|
struct radix_tree_node *node;
|
|
unsigned long max_index;
|
|
unsigned long cur_index = first_index;
|
|
unsigned int ret;
|
|
|
|
/* check the root's tag bit */
|
|
if (!root_tag_get(root, tag))
|
|
return 0;
|
|
|
|
node = rcu_dereference_raw(root->rnode);
|
|
if (!node)
|
|
return 0;
|
|
|
|
if (!radix_tree_is_indirect_ptr(node)) {
|
|
if (first_index > 0)
|
|
return 0;
|
|
results[0] = (void **)&root->rnode;
|
|
return 1;
|
|
}
|
|
node = indirect_to_ptr(node);
|
|
|
|
max_index = radix_tree_maxindex(node->height);
|
|
|
|
ret = 0;
|
|
while (ret < max_items) {
|
|
unsigned int slots_found;
|
|
unsigned long next_index; /* Index of next search */
|
|
|
|
if (cur_index > max_index)
|
|
break;
|
|
slots_found = __lookup_tag(node, results + ret,
|
|
cur_index, max_items - ret, &next_index, tag);
|
|
ret += slots_found;
|
|
if (next_index == 0)
|
|
break;
|
|
cur_index = next_index;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
|
|
|
|
#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
|
|
#include <linux/sched.h> /* for cond_resched() */
|
|
|
|
/*
|
|
* This linear search is at present only useful to shmem_unuse_inode().
|
|
*/
|
|
static unsigned long __locate(struct radix_tree_node *slot, void *item,
|
|
unsigned long index, unsigned long *found_index)
|
|
{
|
|
unsigned int shift, height;
|
|
unsigned long i;
|
|
|
|
height = slot->height;
|
|
shift = (height-1) * RADIX_TREE_MAP_SHIFT;
|
|
|
|
for ( ; height > 1; height--) {
|
|
i = (index >> shift) & RADIX_TREE_MAP_MASK;
|
|
for (;;) {
|
|
if (slot->slots[i] != NULL)
|
|
break;
|
|
index &= ~((1UL << shift) - 1);
|
|
index += 1UL << shift;
|
|
if (index == 0)
|
|
goto out; /* 32-bit wraparound */
|
|
i++;
|
|
if (i == RADIX_TREE_MAP_SIZE)
|
|
goto out;
|
|
}
|
|
|
|
shift -= RADIX_TREE_MAP_SHIFT;
|
|
slot = rcu_dereference_raw(slot->slots[i]);
|
|
if (slot == NULL)
|
|
goto out;
|
|
}
|
|
|
|
/* Bottom level: check items */
|
|
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
|
|
if (slot->slots[i] == item) {
|
|
*found_index = index + i;
|
|
index = 0;
|
|
goto out;
|
|
}
|
|
}
|
|
index += RADIX_TREE_MAP_SIZE;
|
|
out:
|
|
return index;
|
|
}
|
|
|
|
/**
|
|
* radix_tree_locate_item - search through radix tree for item
|
|
* @root: radix tree root
|
|
* @item: item to be found
|
|
*
|
|
* Returns index where item was found, or -1 if not found.
|
|
* Caller must hold no lock (since this time-consuming function needs
|
|
* to be preemptible), and must check afterwards if item is still there.
|
|
*/
|
|
unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
|
|
{
|
|
struct radix_tree_node *node;
|
|
unsigned long max_index;
|
|
unsigned long cur_index = 0;
|
|
unsigned long found_index = -1;
|
|
|
|
do {
|
|
rcu_read_lock();
|
|
node = rcu_dereference_raw(root->rnode);
|
|
if (!radix_tree_is_indirect_ptr(node)) {
|
|
rcu_read_unlock();
|
|
if (node == item)
|
|
found_index = 0;
|
|
break;
|
|
}
|
|
|
|
node = indirect_to_ptr(node);
|
|
max_index = radix_tree_maxindex(node->height);
|
|
if (cur_index > max_index)
|
|
break;
|
|
|
|
cur_index = __locate(node, item, cur_index, &found_index);
|
|
rcu_read_unlock();
|
|
cond_resched();
|
|
} while (cur_index != 0 && cur_index <= max_index);
|
|
|
|
return found_index;
|
|
}
|
|
#else
|
|
unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
|
|
{
|
|
return -1;
|
|
}
|
|
#endif /* CONFIG_SHMEM && CONFIG_SWAP */
|
|
|
|
/**
|
|
* radix_tree_shrink - shrink height of a radix tree to minimal
|
|
* @root radix tree root
|
|
*/
|
|
static inline void radix_tree_shrink(struct radix_tree_root *root)
|
|
{
|
|
/* try to shrink tree height */
|
|
while (root->height > 0) {
|
|
struct radix_tree_node *to_free = root->rnode;
|
|
struct radix_tree_node *slot;
|
|
|
|
BUG_ON(!radix_tree_is_indirect_ptr(to_free));
|
|
to_free = indirect_to_ptr(to_free);
|
|
|
|
/*
|
|
* The candidate node has more than one child, or its child
|
|
* is not at the leftmost slot, we cannot shrink.
|
|
*/
|
|
if (to_free->count != 1)
|
|
break;
|
|
if (!to_free->slots[0])
|
|
break;
|
|
|
|
/*
|
|
* We don't need rcu_assign_pointer(), since we are simply
|
|
* moving the node from one part of the tree to another: if it
|
|
* was safe to dereference the old pointer to it
|
|
* (to_free->slots[0]), it will be safe to dereference the new
|
|
* one (root->rnode) as far as dependent read barriers go.
|
|
*/
|
|
slot = to_free->slots[0];
|
|
if (root->height > 1) {
|
|
slot->parent = NULL;
|
|
slot = ptr_to_indirect(slot);
|
|
}
|
|
root->rnode = slot;
|
|
root->height--;
|
|
|
|
/*
|
|
* We have a dilemma here. The node's slot[0] must not be
|
|
* NULLed in case there are concurrent lookups expecting to
|
|
* find the item. However if this was a bottom-level node,
|
|
* then it may be subject to the slot pointer being visible
|
|
* to callers dereferencing it. If item corresponding to
|
|
* slot[0] is subsequently deleted, these callers would expect
|
|
* their slot to become empty sooner or later.
|
|
*
|
|
* For example, lockless pagecache will look up a slot, deref
|
|
* the page pointer, and if the page is 0 refcount it means it
|
|
* was concurrently deleted from pagecache so try the deref
|
|
* again. Fortunately there is already a requirement for logic
|
|
* to retry the entire slot lookup -- the indirect pointer
|
|
* problem (replacing direct root node with an indirect pointer
|
|
* also results in a stale slot). So tag the slot as indirect
|
|
* to force callers to retry.
|
|
*/
|
|
if (root->height == 0)
|
|
*((unsigned long *)&to_free->slots[0]) |=
|
|
RADIX_TREE_INDIRECT_PTR;
|
|
|
|
radix_tree_node_free(to_free);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* radix_tree_delete - delete an item from a radix tree
|
|
* @root: radix tree root
|
|
* @index: index key
|
|
*
|
|
* Remove the item at @index from the radix tree rooted at @root.
|
|
*
|
|
* Returns the address of the deleted item, or NULL if it was not present.
|
|
*/
|
|
void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
|
|
{
|
|
struct radix_tree_node *node = NULL;
|
|
struct radix_tree_node *slot = NULL;
|
|
struct radix_tree_node *to_free;
|
|
unsigned int height, shift;
|
|
int tag;
|
|
int uninitialized_var(offset);
|
|
|
|
height = root->height;
|
|
if (index > radix_tree_maxindex(height))
|
|
goto out;
|
|
|
|
slot = root->rnode;
|
|
if (height == 0) {
|
|
root_tag_clear_all(root);
|
|
root->rnode = NULL;
|
|
goto out;
|
|
}
|
|
slot = indirect_to_ptr(slot);
|
|
shift = height * RADIX_TREE_MAP_SHIFT;
|
|
|
|
do {
|
|
if (slot == NULL)
|
|
goto out;
|
|
|
|
shift -= RADIX_TREE_MAP_SHIFT;
|
|
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
|
|
node = slot;
|
|
slot = slot->slots[offset];
|
|
} while (shift);
|
|
|
|
if (slot == NULL)
|
|
goto out;
|
|
|
|
/*
|
|
* Clear all tags associated with the item to be deleted.
|
|
* This way of doing it would be inefficient, but seldom is any set.
|
|
*/
|
|
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
|
|
if (tag_get(node, tag, offset))
|
|
radix_tree_tag_clear(root, index, tag);
|
|
}
|
|
|
|
to_free = NULL;
|
|
/* Now free the nodes we do not need anymore */
|
|
while (node) {
|
|
node->slots[offset] = NULL;
|
|
node->count--;
|
|
/*
|
|
* Queue the node for deferred freeing after the
|
|
* last reference to it disappears (set NULL, above).
|
|
*/
|
|
if (to_free)
|
|
radix_tree_node_free(to_free);
|
|
|
|
if (node->count) {
|
|
if (node == indirect_to_ptr(root->rnode))
|
|
radix_tree_shrink(root);
|
|
goto out;
|
|
}
|
|
|
|
/* Node with zero slots in use so free it */
|
|
to_free = node;
|
|
|
|
index >>= RADIX_TREE_MAP_SHIFT;
|
|
offset = index & RADIX_TREE_MAP_MASK;
|
|
node = node->parent;
|
|
}
|
|
|
|
root_tag_clear_all(root);
|
|
root->height = 0;
|
|
root->rnode = NULL;
|
|
if (to_free)
|
|
radix_tree_node_free(to_free);
|
|
|
|
out:
|
|
return slot;
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_delete);
|
|
|
|
/**
|
|
* radix_tree_tagged - test whether any items in the tree are tagged
|
|
* @root: radix tree root
|
|
* @tag: tag to test
|
|
*/
|
|
int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
|
|
{
|
|
return root_tag_get(root, tag);
|
|
}
|
|
EXPORT_SYMBOL(radix_tree_tagged);
|
|
|
|
static void
|
|
radix_tree_node_ctor(void *node)
|
|
{
|
|
memset(node, 0, sizeof(struct radix_tree_node));
|
|
}
|
|
|
|
static __init unsigned long __maxindex(unsigned int height)
|
|
{
|
|
unsigned int width = height * RADIX_TREE_MAP_SHIFT;
|
|
int shift = RADIX_TREE_INDEX_BITS - width;
|
|
|
|
if (shift < 0)
|
|
return ~0UL;
|
|
if (shift >= BITS_PER_LONG)
|
|
return 0UL;
|
|
return ~0UL >> shift;
|
|
}
|
|
|
|
static __init void radix_tree_init_maxindex(void)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
|
|
height_to_maxindex[i] = __maxindex(i);
|
|
}
|
|
|
|
static int radix_tree_callback(struct notifier_block *nfb,
|
|
unsigned long action,
|
|
void *hcpu)
|
|
{
|
|
int cpu = (long)hcpu;
|
|
struct radix_tree_preload *rtp;
|
|
|
|
/* Free per-cpu pool of perloaded nodes */
|
|
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
|
|
rtp = &per_cpu(radix_tree_preloads, cpu);
|
|
while (rtp->nr) {
|
|
kmem_cache_free(radix_tree_node_cachep,
|
|
rtp->nodes[rtp->nr-1]);
|
|
rtp->nodes[rtp->nr-1] = NULL;
|
|
rtp->nr--;
|
|
}
|
|
}
|
|
return NOTIFY_OK;
|
|
}
|
|
|
|
void __init radix_tree_init(void)
|
|
{
|
|
radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
|
|
sizeof(struct radix_tree_node), 0,
|
|
SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
|
|
radix_tree_node_ctor);
|
|
radix_tree_init_maxindex();
|
|
hotcpu_notifier(radix_tree_callback, 0);
|
|
}
|