966a967116
struct call_single_data is used in IPIs to transfer information between CPUs. Its size is bigger than sizeof(unsigned long) and less than cache line size. Currently it is not allocated with any explicit alignment requirements. This makes it possible for allocated call_single_data to cross two cache lines, which results in double the number of the cache lines that need to be transferred among CPUs. This can be fixed by requiring call_single_data to be aligned with the size of call_single_data. Currently the size of call_single_data is the power of 2. If we add new fields to call_single_data, we may need to add padding to make sure the size of new definition is the power of 2 as well. Fortunately, this is enforced by GCC, which will report bad sizes. To set alignment requirements of call_single_data to the size of call_single_data, a struct definition and a typedef is used. To test the effect of the patch, I used the vm-scalability multiple thread swap test case (swap-w-seq-mt). The test will create multiple threads and each thread will eat memory until all RAM and part of swap is used, so that huge number of IPIs are triggered when unmapping memory. In the test, the throughput of memory writing improves ~5% compared with misaligned call_single_data, because of faster IPIs. Suggested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Huang, Ying <ying.huang@intel.com> [ Add call_single_data_t and align with size of call_single_data. ] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Aaron Lu <aaron.lu@intel.com> Cc: Borislav Petkov <bp@suse.de> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/87bmnqd6lz.fsf@yhuang-mobile.sh.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
181 lines
4.3 KiB
C
181 lines
4.3 KiB
C
/*
|
|
* Functions related to softirq rq completions
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/init.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/topology.h>
|
|
|
|
#include "blk.h"
|
|
|
|
static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
|
|
|
|
/*
|
|
* Softirq action handler - move entries to local list and loop over them
|
|
* while passing them to the queue registered handler.
|
|
*/
|
|
static __latent_entropy void blk_done_softirq(struct softirq_action *h)
|
|
{
|
|
struct list_head *cpu_list, local_list;
|
|
|
|
local_irq_disable();
|
|
cpu_list = this_cpu_ptr(&blk_cpu_done);
|
|
list_replace_init(cpu_list, &local_list);
|
|
local_irq_enable();
|
|
|
|
while (!list_empty(&local_list)) {
|
|
struct request *rq;
|
|
|
|
rq = list_entry(local_list.next, struct request, ipi_list);
|
|
list_del_init(&rq->ipi_list);
|
|
rq->q->softirq_done_fn(rq);
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_SMP
|
|
static void trigger_softirq(void *data)
|
|
{
|
|
struct request *rq = data;
|
|
unsigned long flags;
|
|
struct list_head *list;
|
|
|
|
local_irq_save(flags);
|
|
list = this_cpu_ptr(&blk_cpu_done);
|
|
list_add_tail(&rq->ipi_list, list);
|
|
|
|
if (list->next == &rq->ipi_list)
|
|
raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
|
|
|
local_irq_restore(flags);
|
|
}
|
|
|
|
/*
|
|
* Setup and invoke a run of 'trigger_softirq' on the given cpu.
|
|
*/
|
|
static int raise_blk_irq(int cpu, struct request *rq)
|
|
{
|
|
if (cpu_online(cpu)) {
|
|
call_single_data_t *data = &rq->csd;
|
|
|
|
data->func = trigger_softirq;
|
|
data->info = rq;
|
|
data->flags = 0;
|
|
|
|
smp_call_function_single_async(cpu, data);
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
#else /* CONFIG_SMP */
|
|
static int raise_blk_irq(int cpu, struct request *rq)
|
|
{
|
|
return 1;
|
|
}
|
|
#endif
|
|
|
|
static int blk_softirq_cpu_dead(unsigned int cpu)
|
|
{
|
|
/*
|
|
* If a CPU goes away, splice its entries to the current CPU
|
|
* and trigger a run of the softirq
|
|
*/
|
|
local_irq_disable();
|
|
list_splice_init(&per_cpu(blk_cpu_done, cpu),
|
|
this_cpu_ptr(&blk_cpu_done));
|
|
raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
|
local_irq_enable();
|
|
|
|
return 0;
|
|
}
|
|
|
|
void __blk_complete_request(struct request *req)
|
|
{
|
|
int ccpu, cpu;
|
|
struct request_queue *q = req->q;
|
|
unsigned long flags;
|
|
bool shared = false;
|
|
|
|
BUG_ON(!q->softirq_done_fn);
|
|
|
|
local_irq_save(flags);
|
|
cpu = smp_processor_id();
|
|
|
|
/*
|
|
* Select completion CPU
|
|
*/
|
|
if (req->cpu != -1) {
|
|
ccpu = req->cpu;
|
|
if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
|
|
shared = cpus_share_cache(cpu, ccpu);
|
|
} else
|
|
ccpu = cpu;
|
|
|
|
/*
|
|
* If current CPU and requested CPU share a cache, run the softirq on
|
|
* the current CPU. One might concern this is just like
|
|
* QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
|
|
* running in interrupt handler, and currently I/O controller doesn't
|
|
* support multiple interrupts, so current CPU is unique actually. This
|
|
* avoids IPI sending from current CPU to the first CPU of a group.
|
|
*/
|
|
if (ccpu == cpu || shared) {
|
|
struct list_head *list;
|
|
do_local:
|
|
list = this_cpu_ptr(&blk_cpu_done);
|
|
list_add_tail(&req->ipi_list, list);
|
|
|
|
/*
|
|
* if the list only contains our just added request,
|
|
* signal a raise of the softirq. If there are already
|
|
* entries there, someone already raised the irq but it
|
|
* hasn't run yet.
|
|
*/
|
|
if (list->next == &req->ipi_list)
|
|
raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
|
} else if (raise_blk_irq(ccpu, req))
|
|
goto do_local;
|
|
|
|
local_irq_restore(flags);
|
|
}
|
|
|
|
/**
|
|
* blk_complete_request - end I/O on a request
|
|
* @req: the request being processed
|
|
*
|
|
* Description:
|
|
* Ends all I/O on a request. It does not handle partial completions,
|
|
* unless the driver actually implements this in its completion callback
|
|
* through requeueing. The actual completion happens out-of-order,
|
|
* through a softirq handler. The user must have registered a completion
|
|
* callback through blk_queue_softirq_done().
|
|
**/
|
|
void blk_complete_request(struct request *req)
|
|
{
|
|
if (unlikely(blk_should_fake_timeout(req->q)))
|
|
return;
|
|
if (!blk_mark_rq_complete(req))
|
|
__blk_complete_request(req);
|
|
}
|
|
EXPORT_SYMBOL(blk_complete_request);
|
|
|
|
static __init int blk_softirq_init(void)
|
|
{
|
|
int i;
|
|
|
|
for_each_possible_cpu(i)
|
|
INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
|
|
|
|
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
|
|
cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
|
|
"block/softirq:dead", NULL,
|
|
blk_softirq_cpu_dead);
|
|
return 0;
|
|
}
|
|
subsys_initcall(blk_softirq_init);
|