s390/udelay: make udelay have busy loop semantics

When using systemtap it was observed that our udelay implementation is
rather suboptimal if being called from a kprobe handler installed by
systemtap.

The problem observed when a kprobe was installed on lock_acquired().
When the probe was hit the kprobe handler did call udelay, which set
up an (internal) timer and reenabled interrupts (only the clock comparator
interrupt) and waited for the interrupt.
This is an optimization to avoid that the cpu is busy looping while waiting
that enough time passes. The problem is that the interrupt handler still
does call irq_enter()/irq_exit() which then again can lead to a deadlock,
since some accounting functions may take locks as well.

If one of these locks is the same, which caused lock_acquired() to be
called, we have a nice deadlock.

This patch reworks the udelay code for the interrupts disabled case to
immediately leave the low level interrupt handler when the clock
comparator interrupt happens. That way no C code is being called and the
deadlock cannot happen anymore.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
Heiko Carstens 2015-08-15 11:42:21 +02:00 committed by Martin Schwidefsky
parent 0c5a69f432
commit db7e007fd6
6 changed files with 35 additions and 17 deletions

View File

@ -24,4 +24,6 @@ struct s390_idle_data {
extern struct device_attribute dev_attr_idle_count;
extern struct device_attribute dev_attr_idle_time_us;
void psw_idle(struct s390_idle_data *, unsigned long);
#endif /* _S390_IDLE_H */

View File

@ -96,6 +96,19 @@ enum irq_subclass {
IRQ_SUBCLASS_SERVICE_SIGNAL = 9,
};
#define CR0_IRQ_SUBCLASS_MASK \
((1UL << (63 - 30)) /* Warning Track */ | \
(1UL << (63 - 48)) /* Malfunction Alert */ | \
(1UL << (63 - 49)) /* Emergency Signal */ | \
(1UL << (63 - 50)) /* External Call */ | \
(1UL << (63 - 52)) /* Clock Comparator */ | \
(1UL << (63 - 53)) /* CPU Timer */ | \
(1UL << (63 - 54)) /* Service Signal */ | \
(1UL << (63 - 57)) /* Interrupt Key */ | \
(1UL << (63 - 58)) /* Measurement Alert */ | \
(1UL << (63 - 59)) /* Timing Alert */ | \
(1UL << (63 - 62))) /* IUCV */
void irq_subclass_register(enum irq_subclass subclass);
void irq_subclass_unregister(enum irq_subclass subclass);

View File

@ -15,11 +15,13 @@
#define CIF_ASCE 1 /* user asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_FPU 3 /* restore vector registers */
#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */
#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING)
#define _CIF_ASCE (1<<CIF_ASCE)
#define _CIF_NOHZ_DELAY (1<<CIF_NOHZ_DELAY)
#define _CIF_FPU (1<<CIF_FPU)
#define _CIF_IGNORE_IRQ (1<<CIF_IGNORE_IRQ)
#ifndef __ASSEMBLY__

View File

@ -563,6 +563,8 @@ ENTRY(io_int_handler)
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
jo .Lio_restore
TRACE_IRQS_OFF
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
.Lio_loop:
@ -738,6 +740,8 @@ ENTRY(ext_int_handler)
mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
mvc __PT_INT_PARM_LONG(8,%r11),0(%r1)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
jo .Lio_restore
TRACE_IRQS_OFF
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs

View File

@ -16,7 +16,6 @@ void io_int_handler(void);
void mcck_int_handler(void);
void restart_int_handler(void);
void restart_call_handler(void);
void psw_idle(struct s390_idle_data *, unsigned long);
asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);

View File

@ -12,8 +12,10 @@
#include <linux/module.h>
#include <linux/irqflags.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <asm/vtimer.h>
#include <asm/div64.h>
#include <asm/idle.h>
void __delay(unsigned long loops)
{
@ -30,26 +32,22 @@ EXPORT_SYMBOL(__delay);
static void __udelay_disabled(unsigned long long usecs)
{
unsigned long cr0, cr6, new;
u64 clock_saved, end;
unsigned long cr0, cr0_new, psw_mask;
struct s390_idle_data idle;
u64 end;
end = get_tod_clock() + (usecs << 12);
clock_saved = local_tick_disable();
__ctl_store(cr0, 0, 0);
__ctl_store(cr6, 6, 6);
new = (cr0 & 0xffff00e0) | 0x00000800;
__ctl_load(new , 0, 0);
new = 0;
__ctl_load(new, 6, 6);
lockdep_off();
do {
set_clock_comparator(end);
enabled_wait();
} while (get_tod_clock_fast() < end);
lockdep_on();
cr0_new = cr0 & ~CR0_IRQ_SUBCLASS_MASK;
cr0_new |= (1UL << (63 - 52)); /* enable clock comparator irq */
__ctl_load(cr0_new, 0, 0);
psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT;
set_clock_comparator(end);
set_cpu_flag(CIF_IGNORE_IRQ);
psw_idle(&idle, psw_mask);
clear_cpu_flag(CIF_IGNORE_IRQ);
set_clock_comparator(S390_lowcore.clock_comparator);
__ctl_load(cr0, 0, 0);
__ctl_load(cr6, 6, 6);
local_tick_enable(clock_saved);
}
static void __udelay_enabled(unsigned long long usecs)