percpu_ida: make percpu_ida percpu size/batch configurable
Make percpu_ida percpu size/batch configurable. The block-mq-tag will use it. After block-mq uses percpu_ida to manage tags, performance is improved. My test is done in a 2 sockets machine, 12 process cross the 2 sockets. So if there is lock contention or ipi, should be stressed heavily. Testing is done for null-blk. hw_queue_depth nopatch iops patch iops 64 ~800k/s ~1470k/s 2048 ~4470k/s ~4340k/s Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Shaohua Li <shli@fusionio.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
098faf5805
commit
e26b53d0b2
|
@ -16,6 +16,8 @@ struct percpu_ida {
|
||||||
* percpu_ida_init()
|
* percpu_ida_init()
|
||||||
*/
|
*/
|
||||||
unsigned nr_tags;
|
unsigned nr_tags;
|
||||||
|
unsigned percpu_max_size;
|
||||||
|
unsigned percpu_batch_size;
|
||||||
|
|
||||||
struct percpu_ida_cpu __percpu *tag_cpu;
|
struct percpu_ida_cpu __percpu *tag_cpu;
|
||||||
|
|
||||||
|
@ -51,10 +53,24 @@ struct percpu_ida {
|
||||||
} ____cacheline_aligned_in_smp;
|
} ____cacheline_aligned_in_smp;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of tags we move between the percpu freelist and the global freelist at
|
||||||
|
* a time
|
||||||
|
*/
|
||||||
|
#define IDA_DEFAULT_PCPU_BATCH_MOVE 32U
|
||||||
|
/* Max size of percpu freelist, */
|
||||||
|
#define IDA_DEFAULT_PCPU_SIZE ((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2)
|
||||||
|
|
||||||
int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp);
|
int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp);
|
||||||
void percpu_ida_free(struct percpu_ida *pool, unsigned tag);
|
void percpu_ida_free(struct percpu_ida *pool, unsigned tag);
|
||||||
|
|
||||||
void percpu_ida_destroy(struct percpu_ida *pool);
|
void percpu_ida_destroy(struct percpu_ida *pool);
|
||||||
int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags);
|
int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
|
||||||
|
unsigned long max_size, unsigned long batch_size);
|
||||||
|
static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
|
||||||
|
{
|
||||||
|
return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE,
|
||||||
|
IDA_DEFAULT_PCPU_BATCH_MOVE);
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* __PERCPU_IDA_H__ */
|
#endif /* __PERCPU_IDA_H__ */
|
||||||
|
|
|
@ -30,15 +30,6 @@
|
||||||
#include <linux/spinlock.h>
|
#include <linux/spinlock.h>
|
||||||
#include <linux/percpu_ida.h>
|
#include <linux/percpu_ida.h>
|
||||||
|
|
||||||
/*
|
|
||||||
* Number of tags we move between the percpu freelist and the global freelist at
|
|
||||||
* a time
|
|
||||||
*/
|
|
||||||
#define IDA_PCPU_BATCH_MOVE 32U
|
|
||||||
|
|
||||||
/* Max size of percpu freelist, */
|
|
||||||
#define IDA_PCPU_SIZE ((IDA_PCPU_BATCH_MOVE * 3) / 2)
|
|
||||||
|
|
||||||
struct percpu_ida_cpu {
|
struct percpu_ida_cpu {
|
||||||
/*
|
/*
|
||||||
* Even though this is percpu, we need a lock for tag stealing by remote
|
* Even though this is percpu, we need a lock for tag stealing by remote
|
||||||
|
@ -78,7 +69,7 @@ static inline void steal_tags(struct percpu_ida *pool,
|
||||||
struct percpu_ida_cpu *remote;
|
struct percpu_ida_cpu *remote;
|
||||||
|
|
||||||
for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
|
for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
|
||||||
cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2;
|
cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2;
|
||||||
cpus_have_tags--) {
|
cpus_have_tags--) {
|
||||||
cpu = cpumask_next(cpu, &pool->cpus_have_tags);
|
cpu = cpumask_next(cpu, &pool->cpus_have_tags);
|
||||||
|
|
||||||
|
@ -123,7 +114,7 @@ static inline void alloc_global_tags(struct percpu_ida *pool,
|
||||||
{
|
{
|
||||||
move_tags(tags->freelist, &tags->nr_free,
|
move_tags(tags->freelist, &tags->nr_free,
|
||||||
pool->freelist, &pool->nr_free,
|
pool->freelist, &pool->nr_free,
|
||||||
min(pool->nr_free, IDA_PCPU_BATCH_MOVE));
|
min(pool->nr_free, pool->percpu_batch_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned alloc_local_tag(struct percpu_ida *pool,
|
static inline unsigned alloc_local_tag(struct percpu_ida *pool,
|
||||||
|
@ -245,17 +236,17 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
|
||||||
wake_up(&pool->wait);
|
wake_up(&pool->wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nr_free == IDA_PCPU_SIZE) {
|
if (nr_free == pool->percpu_max_size) {
|
||||||
spin_lock(&pool->lock);
|
spin_lock(&pool->lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Global lock held and irqs disabled, don't need percpu
|
* Global lock held and irqs disabled, don't need percpu
|
||||||
* lock
|
* lock
|
||||||
*/
|
*/
|
||||||
if (tags->nr_free == IDA_PCPU_SIZE) {
|
if (tags->nr_free == pool->percpu_max_size) {
|
||||||
move_tags(pool->freelist, &pool->nr_free,
|
move_tags(pool->freelist, &pool->nr_free,
|
||||||
tags->freelist, &tags->nr_free,
|
tags->freelist, &tags->nr_free,
|
||||||
IDA_PCPU_BATCH_MOVE);
|
pool->percpu_batch_size);
|
||||||
|
|
||||||
wake_up(&pool->wait);
|
wake_up(&pool->wait);
|
||||||
}
|
}
|
||||||
|
@ -292,7 +283,8 @@ EXPORT_SYMBOL_GPL(percpu_ida_destroy);
|
||||||
* Allocation is percpu, but sharding is limited by nr_tags - for best
|
* Allocation is percpu, but sharding is limited by nr_tags - for best
|
||||||
* performance, the workload should not span more cpus than nr_tags / 128.
|
* performance, the workload should not span more cpus than nr_tags / 128.
|
||||||
*/
|
*/
|
||||||
int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
|
int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
|
||||||
|
unsigned long max_size, unsigned long batch_size)
|
||||||
{
|
{
|
||||||
unsigned i, cpu, order;
|
unsigned i, cpu, order;
|
||||||
|
|
||||||
|
@ -301,6 +293,8 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
|
||||||
init_waitqueue_head(&pool->wait);
|
init_waitqueue_head(&pool->wait);
|
||||||
spin_lock_init(&pool->lock);
|
spin_lock_init(&pool->lock);
|
||||||
pool->nr_tags = nr_tags;
|
pool->nr_tags = nr_tags;
|
||||||
|
pool->percpu_max_size = max_size;
|
||||||
|
pool->percpu_batch_size = batch_size;
|
||||||
|
|
||||||
/* Guard against overflow */
|
/* Guard against overflow */
|
||||||
if (nr_tags > (unsigned) INT_MAX + 1) {
|
if (nr_tags > (unsigned) INT_MAX + 1) {
|
||||||
|
@ -319,7 +313,7 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
|
||||||
pool->nr_free = nr_tags;
|
pool->nr_free = nr_tags;
|
||||||
|
|
||||||
pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
|
pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
|
||||||
IDA_PCPU_SIZE * sizeof(unsigned),
|
pool->percpu_max_size * sizeof(unsigned),
|
||||||
sizeof(unsigned));
|
sizeof(unsigned));
|
||||||
if (!pool->tag_cpu)
|
if (!pool->tag_cpu)
|
||||||
goto err;
|
goto err;
|
||||||
|
@ -332,4 +326,4 @@ err:
|
||||||
percpu_ida_destroy(pool);
|
percpu_ida_destroy(pool);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(percpu_ida_init);
|
EXPORT_SYMBOL_GPL(__percpu_ida_init);
|
||||||
|
|
Loading…
Reference in New Issue