Merge branches 'x86/mm', 'x86/build', 'x86/apic' and 'x86/platform' into x86/core, to apply dependent patch

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2015-06-03 10:05:18 +02:00
122 changed files with 3699 additions and 3233 deletions

View File

@ -746,6 +746,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
cpuidle.off=1 [CPU_IDLE]
disable the cpuidle sub-system
cpu_init_udelay=N
[X86] Delay for N microsec between assert and de-assert
of APIC INIT to start processors. This delay occurs
on every CPU online, such as boot, and resume from suspend.
Default: 10000
cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver
Format:
<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]

View File

@ -1,7 +1,19 @@
MTRR (Memory Type Range Register) control
3 Jun 1999
Richard Gooch
<rgooch@atnf.csiro.au>
Richard Gooch <rgooch@atnf.csiro.au> - 3 Jun 1999
Luis R. Rodriguez <mcgrof@do-not-panic.com> - April 9, 2015
===============================================================================
Phasing out MTRR use
MTRR use is replaced on modern x86 hardware with PAT. Over time the only type
of effective MTRR that is expected to be supported will be for write-combining.
As MTRR use is phased out device drivers should use arch_phys_wc_add() to make
MTRR effective on non-PAT systems while a no-op on PAT enabled systems.
For details refer to Documentation/x86/pat.txt.
===============================================================================
On Intel P6 family processors (Pentium Pro, Pentium II and later)
the Memory Type Range Registers (MTRRs) may be used to control

View File

@ -34,6 +34,8 @@ ioremap | -- | UC- | UC- |
| | | |
ioremap_cache | -- | WB | WB |
| | | |
ioremap_uc | -- | UC | UC |
| | | |
ioremap_nocache | -- | UC- | UC- |
| | | |
ioremap_wc | -- | -- | WC |
@ -102,7 +104,38 @@ wants to export a RAM region, it has to do set_memory_uc() or set_memory_wc()
as step 0 above and also track the usage of those pages and use set_memory_wb()
before the page is freed to free pool.
MTRR effects on PAT / non-PAT systems
-------------------------------------
The following table provides the effects of using write-combining MTRRs when
using ioremap*() calls on x86 for both non-PAT and PAT systems. Ideally
mtrr_add() usage will be phased out in favor of arch_phys_wc_add() which will
be a no-op on PAT enabled systems. The region over which a arch_phys_wc_add()
is made, should already have been ioremapped with WC attributes or PAT entries,
this can be done by using ioremap_wc() / set_memory_wc(). Devices which
combine areas of IO memory desired to remain uncacheable with areas where
write-combining is desirable should consider use of ioremap_uc() followed by
set_memory_wc() to white-list effective write-combined areas. Such use is
nevertheless discouraged as the effective memory type is considered
implementation defined, yet this strategy can be used as last resort on devices
with size-constrained regions where otherwise MTRR write-combining would
otherwise not be effective.
----------------------------------------------------------------------
MTRR Non-PAT PAT Linux ioremap value Effective memory type
----------------------------------------------------------------------
Non-PAT | PAT
PAT
|PCD
||PWT
|||
WC 000 WB _PAGE_CACHE_MODE_WB WC | WC
WC 001 WC _PAGE_CACHE_MODE_WC WC* | WC
WC 010 UC- _PAGE_CACHE_MODE_UC_MINUS WC* | UC
WC 011 UC _PAGE_CACHE_MODE_UC UC | UC
----------------------------------------------------------------------
(*) denotes implementation defined and is discouraged
Notes:

View File

@ -1,6 +1,4 @@
#ifndef __IA64_INTR_REMAPPING_H
#define __IA64_INTR_REMAPPING_H
#define irq_remapping_enabled 0
#define dmar_alloc_hwirq create_irq
#define dmar_free_hwirq destroy_irq
#endif

View File

@ -165,7 +165,7 @@ static struct irq_chip dmar_msi_type = {
.irq_retrigger = ia64_msi_retrigger_irq,
};
static int
static void
msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
{
struct irq_cfg *cfg = irq_cfg + irq;
@ -186,21 +186,29 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
MSI_DATA_LEVEL_ASSERT |
MSI_DATA_DELIVERY_FIXED |
MSI_DATA_VECTOR(cfg->vector);
return 0;
}
int arch_setup_dmar_msi(unsigned int irq)
int dmar_alloc_hwirq(int id, int node, void *arg)
{
int ret;
int irq;
struct msi_msg msg;
ret = msi_compose_msg(NULL, irq, &msg);
if (ret < 0)
return ret;
dmar_msi_write(irq, &msg);
irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
"edge");
return 0;
irq = create_irq();
if (irq > 0) {
irq_set_handler_data(irq, arg);
irq_set_chip_and_handler_name(irq, &dmar_msi_type,
handle_edge_irq, "edge");
msi_compose_msg(NULL, irq, &msg);
dmar_msi_write(irq, &msg);
}
return irq;
}
void dmar_free_hwirq(int irq)
{
irq_set_handler_data(irq, NULL);
destroy_irq(irq);
}
#endif /* CONFIG_INTEL_IOMMU */

View File

@ -100,7 +100,7 @@ config X86
select IRQ_FORCED_THREADING
select HAVE_BPF_JIT if X86_64
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_HUGE_VMAP if X86_64 || (X86_32 && X86_PAE)
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
select ARCH_HAS_SG_CHAIN
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
@ -341,7 +341,7 @@ config X86_FEATURE_NAMES
config X86_X2APIC
bool "Support x2apic"
depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP
depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST)
---help---
This enables x2apic support on CPUs that have this feature.
@ -441,6 +441,7 @@ config X86_UV
depends on X86_EXTENDED_PLATFORM
depends on NUMA
depends on X86_X2APIC
depends on PCI
---help---
This option is needed in order to support SGI Ultraviolet systems.
If you don't have one of these, you should say N here.
@ -466,7 +467,6 @@ config X86_INTEL_CE
select X86_REBOOTFIXUPS
select OF
select OF_EARLY_FLATTREE
select IRQ_DOMAIN
---help---
Select for the Intel CE media processor (CE4100) SOC.
This option compiles in support for the CE4100 SOC for settop
@ -851,11 +851,12 @@ config NR_CPUS
default "1" if !SMP
default "8192" if MAXSMP
default "32" if SMP && X86_BIGSMP
default "8" if SMP
default "8" if SMP && X86_32
default "64" if SMP
---help---
This allows you to specify the maximum number of CPUs which this
kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum
supported value is 4096, otherwise the maximum value is 512. The
supported value is 8192, otherwise the maximum value is 512. The
minimum value which makes sense is 2.
This is purely to save memory - each supported CPU adds
@ -914,12 +915,12 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
select IRQ_DOMAIN_HIERARCHY
select PCI_MSI_IRQ_DOMAIN if PCI_MSI
config X86_IO_APIC
def_bool y
depends on X86_LOCAL_APIC || X86_UP_IOAPIC
select IRQ_DOMAIN
config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
bool "Reroute for broken boot IRQs"

View File

@ -332,4 +332,15 @@ config X86_DEBUG_STATIC_CPU_HAS
If unsure, say N.
config PUNIT_ATOM_DEBUG
tristate "ATOM Punit debug driver"
select DEBUG_FS
select IOSF_MBI
---help---
This is a debug driver, which gets the power states
of all Punit North Complex devices. The power states of
each device is exposed as part of the debugfs interface.
The current power state can be read from
/sys/kernel/debug/punit_atom/dev_power_state
endmenu

View File

@ -77,6 +77,12 @@ else
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
# Align jump targets to 1 byte, not the default 16 bytes:
KBUILD_CFLAGS += -falign-jumps=1
# Pack loops tightly as well:
KBUILD_CFLAGS += -falign-loops=1
# Don't autogenerate traditional x87 instructions
KBUILD_CFLAGS += $(call cc-option,-mno-80387)
KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387)
@ -84,6 +90,9 @@ else
# Use -mpreferred-stack-boundary=3 if supported.
KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
# Use -mskip-rax-setup if supported.
KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)

View File

@ -77,12 +77,6 @@ ENTRY(native_usergs_sysret32)
swapgs
sysretl
ENDPROC(native_usergs_sysret32)
ENTRY(native_irq_enable_sysexit)
swapgs
sti
sysexit
ENDPROC(native_irq_enable_sysexit)
#endif
/*
@ -119,7 +113,7 @@ ENTRY(ia32_sysenter_target)
* it is too small to ever cause noticeable irq latency.
*/
SWAPGS_UNSAFE_STACK
movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
ENABLE_INTERRUPTS(CLBR_NONE)
/* Zero-extending 32-bit regs, do not remove */
@ -142,7 +136,7 @@ ENTRY(ia32_sysenter_target)
pushq_cfi_reg rsi /* pt_regs->si */
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rcx /* pt_regs->cx */
pushq_cfi_reg rax /* pt_regs->ax */
pushq_cfi $-ENOSYS /* pt_regs->ax */
cld
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
CFI_ADJUST_CFA_OFFSET 10*8
@ -169,8 +163,6 @@ sysenter_flags_fixed:
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
CFI_REMEMBER_STATE
jnz sysenter_tracesys
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
sysenter_do_call:
/* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
@ -179,8 +171,11 @@ sysenter_do_call:
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
sysenter_dispatch:
cmpq $(IA32_NR_syscalls-1),%rax
ja 1f
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX(%rsp)
1:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
@ -247,9 +242,7 @@ sysexit_from_sys_call:
movl %ebx,%esi /* 2nd arg: 1st syscall arg */
movl %eax,%edi /* 1st arg: syscall number */
call __audit_syscall_entry
movl RAX(%rsp),%eax /* reload syscall number */
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
movl ORIG_RAX(%rsp),%eax /* reload syscall number */
movl %ebx,%edi /* reload 1st syscall arg */
movl RCX(%rsp),%esi /* reload 2nd syscall arg */
movl RDX(%rsp),%edx /* reload 3rd syscall arg */
@ -300,13 +293,10 @@ sysenter_tracesys:
#endif
SAVE_EXTRA_REGS
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
RESTORE_EXTRA_REGS
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call
CFI_ENDPROC
ENDPROC(ia32_sysenter_target)
@ -356,7 +346,7 @@ ENTRY(ia32_cstar_target)
SWAPGS_UNSAFE_STACK
movl %esp,%r8d
CFI_REGISTER rsp,r8
movq PER_CPU_VAR(kernel_stack),%rsp
movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp
ENABLE_INTERRUPTS(CLBR_NONE)
/* Zero-extending 32-bit regs, do not remove */
@ -376,7 +366,7 @@ ENTRY(ia32_cstar_target)
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rbp /* pt_regs->cx */
movl %ebp,%ecx
pushq_cfi_reg rax /* pt_regs->ax */
pushq_cfi $-ENOSYS /* pt_regs->ax */
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
CFI_ADJUST_CFA_OFFSET 10*8
@ -392,8 +382,6 @@ ENTRY(ia32_cstar_target)
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
CFI_REMEMBER_STATE
jnz cstar_tracesys
cmpq $IA32_NR_syscalls-1,%rax
ja ia32_badsys
cstar_do_call:
/* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
@ -402,8 +390,11 @@ cstar_do_call:
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
cstar_dispatch:
cmpq $(IA32_NR_syscalls-1),%rax
ja 1f
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX(%rsp)
1:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
@ -457,14 +448,11 @@ cstar_tracesys:
xchgl %r9d,%ebp
SAVE_EXTRA_REGS
CLEAR_RREGS r9
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */
RESTORE_EXTRA_REGS
xchgl %ebp,%r9d
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
jmp cstar_do_call
END(ia32_cstar_target)
@ -523,7 +511,7 @@ ENTRY(ia32_syscall)
pushq_cfi_reg rsi /* pt_regs->si */
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rcx /* pt_regs->cx */
pushq_cfi_reg rax /* pt_regs->ax */
pushq_cfi $-ENOSYS /* pt_regs->ax */
cld
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
CFI_ADJUST_CFA_OFFSET 10*8
@ -531,8 +519,6 @@ ENTRY(ia32_syscall)
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz ia32_tracesys
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
ia32_do_call:
/* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
@ -540,9 +526,12 @@ ia32_do_call:
xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
cmpq $(IA32_NR_syscalls-1),%rax
ja 1f
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
movq %rax,RAX(%rsp)
1:
ia32_ret_from_sys_call:
CLEAR_RREGS
jmp int_ret_from_sys_call
@ -550,23 +539,14 @@ ia32_ret_from_sys_call:
ia32_tracesys:
SAVE_EXTRA_REGS
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
RESTORE_EXTRA_REGS
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
CFI_ENDPROC
END(ia32_syscall)
ia32_badsys:
movq $0,ORIG_RAX(%rsp)
movq $-ENOSYS,%rax
jmp ia32_sysret
CFI_ENDPROC
.macro PTREGSCALL label, func
ALIGN
GLOBAL(\label)

View File

@ -18,6 +18,12 @@
.endm
#endif
/*
* Issue one struct alt_instr descriptor entry (need to put it into
* the section .altinstructions, see below). This entry contains
* enough information for the alternatives patching code to patch an
* instruction. See apply_alternatives().
*/
.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
.long \orig - .
.long \alt - .
@ -27,6 +33,12 @@
.byte \pad_len
.endm
/*
* Define an alternative between two instructions. If @feature is
* present, early code in apply_alternatives() replaces @oldinstr with
* @newinstr. ".skip" directive takes care of proper instruction padding
* in case @newinstr is longer than @oldinstr.
*/
.macro ALTERNATIVE oldinstr, newinstr, feature
140:
\oldinstr
@ -55,6 +67,12 @@
*/
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
/*
* Same as ALTERNATIVE macro above but for two alternatives. If CPU
* has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
* @feature2, it replaces @oldinstr with @feature2.
*/
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
140:
\oldinstr

View File

@ -644,6 +644,12 @@ static inline void entering_ack_irq(void)
entering_irq();
}
static inline void ipi_entering_ack_irq(void)
{
ack_APIC_irq();
irq_enter();
}
static inline void exiting_irq(void)
{
irq_exit();

View File

@ -63,6 +63,31 @@
_ASM_ALIGN ; \
_ASM_PTR (entry); \
.popsection
.macro ALIGN_DESTINATION
/* check for bad alignment of destination */
movl %edi,%ecx
andl $7,%ecx
jz 102f /* already aligned */
subl $8,%ecx
negl %ecx
subl %ecx,%edx
100: movb (%rsi),%al
101: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz 100b
102:
.section .fixup,"ax"
103: addl %ecx,%edx /* ecx is zerorest also */
jmp copy_user_handle_tail
.previous
_ASM_EXTABLE(100b,103b)
_ASM_EXTABLE(101b,103b)
.endm
#else
# define _ASM_EXTABLE(from,to) \
" .pushsection \"__ex_table\",\"a\"\n" \

View File

@ -22,7 +22,7 @@
*
* Atomically reads the value of @v.
*/
static inline int atomic_read(const atomic_t *v)
static __always_inline int atomic_read(const atomic_t *v)
{
return ACCESS_ONCE((v)->counter);
}
@ -34,7 +34,7 @@ static inline int atomic_read(const atomic_t *v)
*
* Atomically sets the value of @v to @i.
*/
static inline void atomic_set(atomic_t *v, int i)
static __always_inline void atomic_set(atomic_t *v, int i)
{
v->counter = i;
}
@ -46,7 +46,7 @@ static inline void atomic_set(atomic_t *v, int i)
*
* Atomically adds @i to @v.
*/
static inline void atomic_add(int i, atomic_t *v)
static __always_inline void atomic_add(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "addl %1,%0"
: "+m" (v->counter)
@ -60,7 +60,7 @@ static inline void atomic_add(int i, atomic_t *v)
*
* Atomically subtracts @i from @v.
*/
static inline void atomic_sub(int i, atomic_t *v)
static __always_inline void atomic_sub(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "subl %1,%0"
: "+m" (v->counter)
@ -76,7 +76,7 @@ static inline void atomic_sub(int i, atomic_t *v)
* true if the result is zero, or false for all
* other cases.
*/
static inline int atomic_sub_and_test(int i, atomic_t *v)
static __always_inline int atomic_sub_and_test(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e");
}
@ -87,7 +87,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v)
*
* Atomically increments @v by 1.
*/
static inline void atomic_inc(atomic_t *v)
static __always_inline void atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
: "+m" (v->counter));
@ -99,7 +99,7 @@ static inline void atomic_inc(atomic_t *v)
*
* Atomically decrements @v by 1.
*/
static inline void atomic_dec(atomic_t *v)
static __always_inline void atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
: "+m" (v->counter));
@ -113,7 +113,7 @@ static inline void atomic_dec(atomic_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
static inline int atomic_dec_and_test(atomic_t *v)
static __always_inline int atomic_dec_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
}
@ -126,7 +126,7 @@ static inline int atomic_dec_and_test(atomic_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
static inline int atomic_inc_and_test(atomic_t *v)
static __always_inline int atomic_inc_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
}
@ -140,7 +140,7 @@ static inline int atomic_inc_and_test(atomic_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
static inline int atomic_add_negative(int i, atomic_t *v)
static __always_inline int atomic_add_negative(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s");
}
@ -152,7 +152,7 @@ static inline int atomic_add_negative(int i, atomic_t *v)
*
* Atomically adds @i to @v and returns @i + @v
*/
static inline int atomic_add_return(int i, atomic_t *v)
static __always_inline int atomic_add_return(int i, atomic_t *v)
{
return i + xadd(&v->counter, i);
}
@ -164,7 +164,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
*
* Atomically subtracts @i from @v and returns @v - @i
*/
static inline int atomic_sub_return(int i, atomic_t *v)
static __always_inline int atomic_sub_return(int i, atomic_t *v)
{
return atomic_add_return(-i, v);
}
@ -172,7 +172,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
#define atomic_inc_return(v) (atomic_add_return(1, v))
#define atomic_dec_return(v) (atomic_sub_return(1, v))
static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{
return cmpxchg(&v->counter, old, new);
}
@ -191,7 +191,7 @@ static inline int atomic_xchg(atomic_t *v, int new)
* Atomically adds @a to @v, so long as @v was not already @u.
* Returns the old value of @v.
*/
static inline int __atomic_add_unless(atomic_t *v, int a, int u)
static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
int c, old;
c = atomic_read(v);
@ -213,7 +213,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
* Atomically adds 1 to @v
* Returns the new value of @u
*/
static inline short int atomic_inc_short(short int *v)
static __always_inline short int atomic_inc_short(short int *v)
{
asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
return *v;

View File

@ -40,7 +40,7 @@ static inline void atomic64_set(atomic64_t *v, long i)
*
* Atomically adds @i to @v.
*/
static inline void atomic64_add(long i, atomic64_t *v)
static __always_inline void atomic64_add(long i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "addq %1,%0"
: "=m" (v->counter)
@ -81,7 +81,7 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v)
*
* Atomically increments @v by 1.
*/
static inline void atomic64_inc(atomic64_t *v)
static __always_inline void atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
@ -94,7 +94,7 @@ static inline void atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
static inline void atomic64_dec(atomic64_t *v)
static __always_inline void atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
@ -148,7 +148,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v)
*
* Atomically adds @i to @v and returns @i + @v
*/
static inline long atomic64_add_return(long i, atomic64_t *v)
static __always_inline long atomic64_add_return(long i, atomic64_t *v)
{
return i + xadd(&v->counter, i);
}

View File

@ -23,6 +23,8 @@ BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
#ifdef CONFIG_HAVE_KVM
BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
smp_kvm_posted_intr_ipi)
BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR,
smp_kvm_posted_intr_wakeup_ipi)
#endif
/*

View File

@ -14,6 +14,7 @@ typedef struct {
#endif
#ifdef CONFIG_HAVE_KVM
unsigned int kvm_posted_intr_ipis;
unsigned int kvm_posted_intr_wakeup_ipis;
#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;

View File

@ -74,20 +74,16 @@ extern unsigned int hpet_readl(unsigned int a);
extern void force_hpet_resume(void);
struct irq_data;
struct hpet_dev;
struct irq_domain;
extern void hpet_msi_unmask(struct irq_data *data);
extern void hpet_msi_mask(struct irq_data *data);
struct hpet_dev;
extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
#ifdef CONFIG_PCI_MSI
extern int default_setup_hpet_msi(unsigned int irq, unsigned int id);
#else
static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id)
{
return -EINVAL;
}
#endif
extern struct irq_domain *hpet_create_irq_domain(int hpet_id);
extern int hpet_assign_irq(struct irq_domain *domain,
struct hpet_dev *dev, int dev_num);
#ifdef CONFIG_HPET_EMULATE_RTC

View File

@ -29,6 +29,7 @@
extern asmlinkage void apic_timer_interrupt(void);
extern asmlinkage void x86_platform_ipi(void);
extern asmlinkage void kvm_posted_intr_ipi(void);
extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
extern asmlinkage void error_interrupt(void);
extern asmlinkage void irq_work_interrupt(void);
@ -36,40 +37,6 @@ extern asmlinkage void spurious_interrupt(void);
extern asmlinkage void thermal_interrupt(void);
extern asmlinkage void reschedule_interrupt(void);
extern asmlinkage void invalidate_interrupt(void);
extern asmlinkage void invalidate_interrupt0(void);
extern asmlinkage void invalidate_interrupt1(void);
extern asmlinkage void invalidate_interrupt2(void);
extern asmlinkage void invalidate_interrupt3(void);
extern asmlinkage void invalidate_interrupt4(void);
extern asmlinkage void invalidate_interrupt5(void);
extern asmlinkage void invalidate_interrupt6(void);
extern asmlinkage void invalidate_interrupt7(void);
extern asmlinkage void invalidate_interrupt8(void);
extern asmlinkage void invalidate_interrupt9(void);
extern asmlinkage void invalidate_interrupt10(void);
extern asmlinkage void invalidate_interrupt11(void);
extern asmlinkage void invalidate_interrupt12(void);
extern asmlinkage void invalidate_interrupt13(void);
extern asmlinkage void invalidate_interrupt14(void);
extern asmlinkage void invalidate_interrupt15(void);
extern asmlinkage void invalidate_interrupt16(void);
extern asmlinkage void invalidate_interrupt17(void);
extern asmlinkage void invalidate_interrupt18(void);
extern asmlinkage void invalidate_interrupt19(void);
extern asmlinkage void invalidate_interrupt20(void);
extern asmlinkage void invalidate_interrupt21(void);
extern asmlinkage void invalidate_interrupt22(void);
extern asmlinkage void invalidate_interrupt23(void);
extern asmlinkage void invalidate_interrupt24(void);
extern asmlinkage void invalidate_interrupt25(void);
extern asmlinkage void invalidate_interrupt26(void);
extern asmlinkage void invalidate_interrupt27(void);
extern asmlinkage void invalidate_interrupt28(void);
extern asmlinkage void invalidate_interrupt29(void);
extern asmlinkage void invalidate_interrupt30(void);
extern asmlinkage void invalidate_interrupt31(void);
extern asmlinkage void irq_move_cleanup_interrupt(void);
extern asmlinkage void reboot_interrupt(void);
extern asmlinkage void threshold_interrupt(void);
@ -92,55 +59,87 @@ extern void trace_call_function_single_interrupt(void);
#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
#define trace_reboot_interrupt reboot_interrupt
#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi
#endif /* CONFIG_TRACING */
#ifdef CONFIG_IRQ_REMAP
/* Intel specific interrupt remapping information */
struct irq_2_iommu {
struct intel_iommu *iommu;
u16 irte_index;
u16 sub_handle;
u8 irte_mask;
};
/* AMD specific interrupt remapping information */
struct irq_2_irte {
u16 devid; /* Device ID for IRTE table */
u16 index; /* Index into IRTE table*/
};
#endif /* CONFIG_IRQ_REMAP */
#ifdef CONFIG_X86_LOCAL_APIC
struct irq_data;
struct pci_dev;
struct msi_desc;
struct irq_cfg {
cpumask_var_t domain;
cpumask_var_t old_domain;
u8 vector;
u8 move_in_progress : 1;
#ifdef CONFIG_IRQ_REMAP
u8 remapped : 1;
enum irq_alloc_type {
X86_IRQ_ALLOC_TYPE_IOAPIC = 1,
X86_IRQ_ALLOC_TYPE_HPET,
X86_IRQ_ALLOC_TYPE_MSI,
X86_IRQ_ALLOC_TYPE_MSIX,
X86_IRQ_ALLOC_TYPE_DMAR,
X86_IRQ_ALLOC_TYPE_UV,
};
struct irq_alloc_info {
enum irq_alloc_type type;
u32 flags;
const struct cpumask *mask; /* CPU mask for vector allocation */
union {
struct irq_2_iommu irq_2_iommu;
struct irq_2_irte irq_2_irte;
};
#endif
union {
#ifdef CONFIG_X86_IO_APIC
int unused;
#ifdef CONFIG_HPET_TIMER
struct {
struct list_head irq_2_pin;
int hpet_id;
int hpet_index;
void *hpet_data;
};
#endif
#ifdef CONFIG_PCI_MSI
struct {
struct pci_dev *msi_dev;
irq_hw_number_t msi_hwirq;
};
#endif
#ifdef CONFIG_X86_IO_APIC
struct {
int ioapic_id;
int ioapic_pin;
int ioapic_node;
u32 ioapic_trigger : 1;
u32 ioapic_polarity : 1;
u32 ioapic_valid : 1;
struct IO_APIC_route_entry *ioapic_entry;
};
#endif
#ifdef CONFIG_DMAR_TABLE
struct {
int dmar_id;
void *dmar_data;
};
#endif
#ifdef CONFIG_HT_IRQ
struct {
int ht_pos;
int ht_idx;
struct pci_dev *ht_dev;
void *ht_update;
};
#endif
#ifdef CONFIG_X86_UV
struct {
int uv_limit;
int uv_blade;
unsigned long uv_offset;
char *uv_name;
};
#endif
};
};
struct irq_cfg {
unsigned int dest_apicid;
u8 vector;
};
extern struct irq_cfg *irq_cfg(unsigned int irq);
extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
extern void lock_vector_lock(void);
extern void unlock_vector_lock(void);
extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
extern void clear_irq_vector(int irq, struct irq_cfg *cfg);
extern void setup_vector_irq(int cpu);
#ifdef CONFIG_SMP
extern void send_cleanup_vector(struct irq_cfg *);
@ -150,10 +149,7 @@ static inline void send_cleanup_vector(struct irq_cfg *c) { }
static inline void irq_complete_move(struct irq_cfg *c) { }
#endif
extern int apic_retrigger_irq(struct irq_data *data);
extern void apic_ack_edge(struct irq_data *data);
extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
unsigned int *dest_id);
#else /* CONFIG_X86_LOCAL_APIC */
static inline void lock_vector_lock(void) {}
static inline void unlock_vector_lock(void) {}
@ -163,8 +159,7 @@ static inline void unlock_vector_lock(void) {}
extern atomic_t irq_err_count;
extern atomic_t irq_mis_count;
/* EISA */
extern void eisa_set_level_irq(unsigned int irq);
extern void elcr_set_level_irq(unsigned int irq);
/* SMP */
extern __visible void smp_apic_timer_interrupt(struct pt_regs *);
@ -178,7 +173,6 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
extern __visible void smp_reschedule_interrupt(struct pt_regs *);
extern __visible void smp_call_function_interrupt(struct pt_regs *);
extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
extern __visible void smp_invalidate_interrupt(struct pt_regs *);
#endif
extern char irq_entries_start[];

View File

@ -177,6 +177,7 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
* look at pci_iomap().
*/
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
unsigned long prot_val);
@ -338,6 +339,9 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
#define IO_SPACE_LIMIT 0xffff
#ifdef CONFIG_MTRR
extern int __must_check arch_phys_wc_index(int handle);
#define arch_phys_wc_index arch_phys_wc_index
extern int __must_check arch_phys_wc_add(unsigned long base,
unsigned long size);
extern void arch_phys_wc_del(int handle);

View File

@ -95,9 +95,22 @@ struct IR_IO_APIC_route_entry {
index : 15;
} __attribute__ ((packed));
#define IOAPIC_AUTO -1
#define IOAPIC_EDGE 0
#define IOAPIC_LEVEL 1
struct irq_alloc_info;
struct ioapic_domain_cfg;
#define IOAPIC_AUTO -1
#define IOAPIC_EDGE 0
#define IOAPIC_LEVEL 1
#define IOAPIC_MASKED 1
#define IOAPIC_UNMASKED 0
#define IOAPIC_POL_HIGH 0
#define IOAPIC_POL_LOW 1
#define IOAPIC_DEST_MODE_PHYSICAL 0
#define IOAPIC_DEST_MODE_LOGICAL 1
#define IOAPIC_MAP_ALLOC 0x1
#define IOAPIC_MAP_CHECK 0x2
@ -110,9 +123,6 @@ extern int nr_ioapics;
extern int mpc_ioapic_id(int ioapic);
extern unsigned int mpc_ioapic_addr(int ioapic);
extern struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic);
#define MP_MAX_IOAPIC_PIN 127
/* # of MP IRQ source entries */
extern int mp_irq_entries;
@ -120,9 +130,6 @@ extern int mp_irq_entries;
/* MP IRQ source entries */
extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* Older SiS APIC requires we rewrite the index register */
extern int sis_apic_bug;
/* 1 if "noapic" boot option passed */
extern int skip_ioapic_setup;
@ -132,6 +139,8 @@ extern int noioapicquirk;
/* -1 if "noapic" boot option passed */
extern int noioapicreroute;
extern u32 gsi_top;
extern unsigned long io_apic_irqs;
#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs))
@ -147,13 +156,6 @@ struct irq_cfg;
extern void ioapic_insert_resources(void);
extern int arch_early_ioapic_init(void);
extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
unsigned int, int,
struct io_apic_irq_attr *);
extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
extern int save_ioapic_entries(void);
extern void mask_ioapic_entries(void);
extern int restore_ioapic_entries(void);
@ -161,82 +163,32 @@ extern int restore_ioapic_entries(void);
extern void setup_ioapic_ids_from_mpc(void);
extern void setup_ioapic_ids_from_mpc_nocheck(void);
struct io_apic_irq_attr {
int ioapic;
int ioapic_pin;
int trigger;
int polarity;
};
enum ioapic_domain_type {
IOAPIC_DOMAIN_INVALID,
IOAPIC_DOMAIN_LEGACY,
IOAPIC_DOMAIN_STRICT,
IOAPIC_DOMAIN_DYNAMIC,
};
struct device_node;
struct irq_domain;
struct irq_domain_ops;
struct ioapic_domain_cfg {
enum ioapic_domain_type type;
const struct irq_domain_ops *ops;
struct device_node *dev;
};
struct mp_ioapic_gsi{
u32 gsi_base;
u32 gsi_end;
};
extern u32 gsi_top;
extern int mp_find_ioapic(u32 gsi);
extern int mp_find_ioapic_pin(int ioapic, u32 gsi);
extern u32 mp_pin_to_gsi(int ioapic, int pin);
extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags);
extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags,
struct irq_alloc_info *info);
extern void mp_unmap_irq(int irq);
extern int mp_register_ioapic(int id, u32 address, u32 gsi_base,
struct ioapic_domain_cfg *cfg);
extern int mp_unregister_ioapic(u32 gsi_base);
extern int mp_ioapic_registered(u32 gsi_base);
extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
irq_hw_number_t hwirq);
extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq);
extern int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node);
extern void __init pre_init_apic_IRQ0(void);
extern void ioapic_set_alloc_attr(struct irq_alloc_info *info,
int node, int trigger, int polarity);
extern void mp_save_irq(struct mpc_intsrc *m);
extern void disable_ioapic_support(void);
extern void __init native_io_apic_init_mappings(void);
extern void __init io_apic_init_mappings(void);
extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
extern void native_disable_io_apic(void);
extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
extern int native_ioapic_set_affinity(struct irq_data *,
const struct cpumask *,
bool);
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
{
return x86_io_apic_ops.read(apic, reg);
}
static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
{
x86_io_apic_ops.write(apic, reg, value);
}
static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
{
x86_io_apic_ops.modify(apic, reg, value);
}
extern void io_apic_eoi(unsigned int apic, unsigned int vector);
extern void setup_IO_APIC(void);
extern void enable_IO_APIC(void);
extern void disable_IO_APIC(void);
@ -253,8 +205,12 @@ static inline int arch_early_ioapic_init(void) { return 0; }
static inline void print_IO_APICs(void) {}
#define gsi_top (NR_IRQS_LEGACY)
static inline int mp_find_ioapic(u32 gsi) { return 0; }
static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; }
static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; }
static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags,
struct irq_alloc_info *info)
{
return gsi;
}
static inline void mp_unmap_irq(int irq) { }
static inline int save_ioapic_entries(void)
@ -268,17 +224,11 @@ static inline int restore_ioapic_entries(void)
return -ENOMEM;
}
static inline void mp_save_irq(struct mpc_intsrc *m) { };
static inline void mp_save_irq(struct mpc_intsrc *m) { }
static inline void disable_ioapic_support(void) { }
#define native_io_apic_init_mappings NULL
static inline void io_apic_init_mappings(void) { }
#define native_io_apic_read NULL
#define native_io_apic_write NULL
#define native_io_apic_modify NULL
#define native_disable_io_apic NULL
#define native_io_apic_print_entries NULL
#define native_ioapic_set_affinity NULL
#define native_setup_ioapic_entry NULL
#define native_eoi_ioapic_pin NULL
static inline void setup_IO_APIC(void) { }
static inline void enable_IO_APIC(void) { }

View File

@ -30,6 +30,10 @@ extern void fixup_irqs(void);
extern void irq_force_complete_move(int);
#endif
#ifdef CONFIG_HAVE_KVM
extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
#endif
extern void (*x86_platform_ipi_callback)(void);
extern void native_init_IRQ(void);
extern bool handle_irq(unsigned irq, struct pt_regs *regs);

View File

@ -22,14 +22,12 @@
#ifndef __X86_IRQ_REMAPPING_H
#define __X86_IRQ_REMAPPING_H
#include <asm/irqdomain.h>
#include <asm/hw_irq.h>
#include <asm/io_apic.h>
struct IO_APIC_route_entry;
struct io_apic_irq_attr;
struct irq_chip;
struct msi_msg;
struct pci_dev;
struct irq_cfg;
struct irq_alloc_info;
#ifdef CONFIG_IRQ_REMAP
@ -39,22 +37,21 @@ extern int irq_remapping_enable(void);
extern void irq_remapping_disable(void);
extern int irq_remapping_reenable(int);
extern int irq_remap_enable_fault_handling(void);
extern int setup_ioapic_remapped_entry(int irq,
struct IO_APIC_route_entry *entry,
unsigned int destination,
int vector,
struct io_apic_irq_attr *attr);
extern void free_remapped_irq(int irq);
extern void compose_remapped_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id);
extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
extern void panic_if_irq_remap(const char *msg);
extern bool setup_remapped_irq(int irq,
struct irq_cfg *cfg,
struct irq_chip *chip);
void irq_remap_modify_chip_defaults(struct irq_chip *chip);
extern struct irq_domain *
irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info);
extern struct irq_domain *
irq_remapping_get_irq_domain(struct irq_alloc_info *info);
/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */
extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent);
/* Get parent irqdomain for interrupt remapping irqdomain */
static inline struct irq_domain *arch_get_ir_parent_domain(void)
{
return x86_vector_domain;
}
#else /* CONFIG_IRQ_REMAP */
@ -64,42 +61,22 @@ static inline int irq_remapping_enable(void) { return -ENODEV; }
static inline void irq_remapping_disable(void) { }
static inline int irq_remapping_reenable(int eim) { return -ENODEV; }
static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; }
static inline int setup_ioapic_remapped_entry(int irq,
struct IO_APIC_route_entry *entry,
unsigned int destination,
int vector,
struct io_apic_irq_attr *attr)
{
return -ENODEV;
}
static inline void free_remapped_irq(int irq) { }
static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id)
{
}
static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
{
return -ENODEV;
}
static inline void panic_if_irq_remap(const char *msg)
{
}
static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
static inline struct irq_domain *
irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info)
{
return NULL;
}
static inline bool setup_remapped_irq(int irq,
struct irq_cfg *cfg,
struct irq_chip *chip)
static inline struct irq_domain *
irq_remapping_get_irq_domain(struct irq_alloc_info *info)
{
return false;
return NULL;
}
#endif /* CONFIG_IRQ_REMAP */
#define dmar_alloc_hwirq() irq_alloc_hwirq(-1)
#define dmar_free_hwirq irq_free_hwirq
#endif /* __X86_IRQ_REMAPPING_H */

View File

@ -47,31 +47,12 @@
#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
#define IA32_SYSCALL_VECTOR 0x80
#ifdef CONFIG_X86_32
# define SYSCALL_VECTOR 0x80
#endif
/*
* Vectors 0x30-0x3f are used for ISA interrupts.
* round up to the next 16-vector boundary
*/
#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15)
#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
#define IRQ3_VECTOR (IRQ0_VECTOR + 3)
#define IRQ4_VECTOR (IRQ0_VECTOR + 4)
#define IRQ5_VECTOR (IRQ0_VECTOR + 5)
#define IRQ6_VECTOR (IRQ0_VECTOR + 6)
#define IRQ7_VECTOR (IRQ0_VECTOR + 7)
#define IRQ8_VECTOR (IRQ0_VECTOR + 8)
#define IRQ9_VECTOR (IRQ0_VECTOR + 9)
#define IRQ10_VECTOR (IRQ0_VECTOR + 10)
#define IRQ11_VECTOR (IRQ0_VECTOR + 11)
#define IRQ12_VECTOR (IRQ0_VECTOR + 12)
#define IRQ13_VECTOR (IRQ0_VECTOR + 13)
#define IRQ14_VECTOR (IRQ0_VECTOR + 14)
#define IRQ15_VECTOR (IRQ0_VECTOR + 15)
#define ISA_IRQ_VECTOR(irq) (((FIRST_EXTERNAL_VECTOR + 16) & ~15) + irq)
/*
* Special IRQ vectors used by the SMP architecture, 0xf0-0xff
@ -105,6 +86,7 @@
/* Vector for KVM to deliver posted interrupt IPI */
#ifdef CONFIG_HAVE_KVM
#define POSTED_INTR_VECTOR 0xf2
#define POSTED_INTR_WAKEUP_VECTOR 0xf1
#endif
/*
@ -155,18 +137,22 @@ static inline int invalid_vm86_irq(int irq)
* static arrays.
*/
#define NR_IRQS_LEGACY 16
#define NR_IRQS_LEGACY 16
#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS )
#define CPU_VECTOR_LIMIT (64 * NR_CPUS)
#define IO_APIC_VECTOR_LIMIT (32 * MAX_IO_APICS)
#ifdef CONFIG_X86_IO_APIC
# define CPU_VECTOR_LIMIT (64 * NR_CPUS)
# define NR_IRQS \
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_PCI_MSI)
#define NR_IRQS \
(CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \
(NR_VECTORS + CPU_VECTOR_LIMIT) : \
(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
#else /* !CONFIG_X86_IO_APIC: */
# define NR_IRQS NR_IRQS_LEGACY
#elif defined(CONFIG_X86_IO_APIC)
#define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT)
#elif defined(CONFIG_PCI_MSI)
#define NR_IRQS (NR_VECTORS + CPU_VECTOR_LIMIT)
#else
#define NR_IRQS NR_IRQS_LEGACY
#endif
#endif /* _ASM_X86_IRQ_VECTORS_H */

View File

@ -0,0 +1,63 @@
#ifndef _ASM_IRQDOMAIN_H
#define _ASM_IRQDOMAIN_H
#include <linux/irqdomain.h>
#include <asm/hw_irq.h>
#ifdef CONFIG_X86_LOCAL_APIC
enum {
/* Allocate contiguous CPU vectors */
X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1,
};
extern struct irq_domain *x86_vector_domain;
extern void init_irq_alloc_info(struct irq_alloc_info *info,
const struct cpumask *mask);
extern void copy_irq_alloc_info(struct irq_alloc_info *dst,
struct irq_alloc_info *src);
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
struct device_node;
struct irq_data;
enum ioapic_domain_type {
IOAPIC_DOMAIN_INVALID,
IOAPIC_DOMAIN_LEGACY,
IOAPIC_DOMAIN_STRICT,
IOAPIC_DOMAIN_DYNAMIC,
};
struct ioapic_domain_cfg {
enum ioapic_domain_type type;
const struct irq_domain_ops *ops;
struct device_node *dev;
};
extern const struct irq_domain_ops mp_ioapic_irqdomain_ops;
extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg);
extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs);
extern void mp_irqdomain_activate(struct irq_domain *domain,
struct irq_data *irq_data);
extern void mp_irqdomain_deactivate(struct irq_domain *domain,
struct irq_data *irq_data);
extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
#endif /* CONFIG_X86_IO_APIC */
#ifdef CONFIG_PCI_MSI
extern void arch_init_msi_domain(struct irq_domain *domain);
#else
static inline void arch_init_msi_domain(struct irq_domain *domain) { }
#endif
#ifdef CONFIG_HT_IRQ
extern void arch_init_htirq_domain(struct irq_domain *domain);
#else
static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
#endif
#endif

View File

@ -0,0 +1,7 @@
#ifndef _ASM_X86_MSI_H
#define _ASM_X86_MSI_H
#include <asm/hw_irq.h>
typedef struct irq_alloc_info msi_alloc_info_t;
#endif /* _ASM_X86_MSI_H */

View File

@ -31,7 +31,7 @@
* arch_phys_wc_add and arch_phys_wc_del.
*/
# ifdef CONFIG_MTRR
extern u8 mtrr_type_lookup(u64 addr, u64 end);
extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform);
extern void mtrr_save_fixed_ranges(void *);
extern void mtrr_save_state(void);
extern int mtrr_add(unsigned long base, unsigned long size,
@ -48,14 +48,13 @@ extern void mtrr_aps_init(void);
extern void mtrr_bp_restore(void);
extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
extern int amd_special_default_mtrr(void);
extern int phys_wc_to_mtrr_index(int handle);
# else
static inline u8 mtrr_type_lookup(u64 addr, u64 end)
static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform)
{
/*
* Return no-MTRRs:
*/
return 0xff;
return MTRR_TYPE_INVALID;
}
#define mtrr_save_fixed_ranges(arg) do {} while (0)
#define mtrr_save_state() do {} while (0)
@ -84,10 +83,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
{
}
static inline int phys_wc_to_mtrr_index(int handle)
{
return -1;
}
#define mtrr_ap_init() do {} while (0)
#define mtrr_bp_init() do {} while (0)
@ -127,4 +122,8 @@ struct mtrr_gentry32 {
_IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32)
#endif /* CONFIG_COMPAT */
/* Bit fields for enabled in struct mtrr_state_type */
#define MTRR_STATE_MTRR_FIXED_ENABLED 0x01
#define MTRR_STATE_MTRR_ENABLED 0x02
#endif /* _ASM_X86_MTRR_H */

View File

@ -160,13 +160,14 @@ struct pv_cpu_ops {
u64 (*read_pmc)(int counter);
unsigned long long (*read_tscp)(unsigned int *aux);
#ifdef CONFIG_X86_32
/*
* Atomically enable interrupts and return to userspace. This
* is only ever used to return to 32-bit processes; in a
* 64-bit kernel, it's used for 32-on-64 compat processes, but
* never native 64-bit processes. (Jump, not call.)
* is only used in 32-bit kernels. 64-bit kernels use
* usergs_sysret32 instead.
*/
void (*irq_enable_sysexit)(void);
#endif
/*
* Switch to usermode gs and return to 64-bit usermode using

View File

@ -4,12 +4,7 @@
#include <linux/types.h>
#include <asm/pgtable_types.h>
#ifdef CONFIG_X86_PAT
extern int pat_enabled;
#else
static const int pat_enabled;
#endif
bool pat_enabled(void);
extern void pat_init(void);
void pat_init_cache_modes(void);

View File

@ -96,15 +96,10 @@ extern void pci_iommu_alloc(void);
#ifdef CONFIG_PCI_MSI
/* implemented in arch/x86/kernel/apic/io_apic. */
struct msi_desc;
void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq,
unsigned int dest, struct msi_msg *msg, u8 hpet_id);
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
void native_teardown_msi_irq(unsigned int irq);
void native_restore_msi_irqs(struct pci_dev *dev);
int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
unsigned int irq_base, unsigned int irq_offset);
#else
#define native_compose_msi_msg NULL
#define native_setup_msi_irqs NULL
#define native_teardown_msi_irq NULL
#endif

View File

@ -215,6 +215,44 @@ static inline void clwb(volatile void *__p)
: [pax] "a" (p));
}
/**
* pcommit_sfence() - persistent commit and fence
*
* The PCOMMIT instruction ensures that data that has been flushed from the
* processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to
* memory and is durable on the DIMM. The primary use case for this is
* persistent memory.
*
* This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT
* with appropriate fencing.
*
* Example:
* void flush_and_commit_buffer(void *vaddr, unsigned int size)
* {
* unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
* void *vend = vaddr + size;
* void *p;
*
* for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
* p < vend; p += boot_cpu_data.x86_clflush_size)
* clwb(p);
*
* // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes
* // MFENCE via mb() also works
* wmb();
*
* // PCOMMIT and the required SFENCE for ordering
* pcommit_sfence();
* }
*
* After this function completes the data pointed to by 'vaddr' has been
* accepted to memory and will be durable if the 'vaddr' points to persistent
* memory.
*
* PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify
* things we include both the PCOMMIT and the required SFENCE in the
* alternatives generated by pcommit_sfence().
*/
static inline void pcommit_sfence(void)
{
alternative(ASM_NOP7,

View File

@ -177,8 +177,6 @@ struct thread_info {
*/
#ifndef __ASSEMBLY__
DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void)
{
return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
@ -197,9 +195,13 @@ static inline unsigned long current_stack_pointer(void)
#else /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_64
# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
#endif
/* Load thread_info address into "reg" */
#define GET_THREAD_INFO(reg) \
_ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
_ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
_ASM_SUB $(THREAD_SIZE),reg ;
/*

View File

@ -59,6 +59,10 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
__put_user_size(*(u32 *)from, (u32 __user *)to,
4, ret, 4);
return ret;
case 8:
__put_user_size(*(u64 *)from, (u64 __user *)to,
8, ret, 8);
return ret;
}
}
return __copy_to_user_ll(to, from, n);

View File

@ -171,38 +171,17 @@ struct x86_platform_ops {
};
struct pci_dev;
struct msi_msg;
struct x86_msi_ops {
int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq,
unsigned int dest, struct msi_msg *msg,
u8 hpet_id);
void (*teardown_msi_irq)(unsigned int irq);
void (*teardown_msi_irqs)(struct pci_dev *dev);
void (*restore_msi_irqs)(struct pci_dev *dev);
int (*setup_hpet_msi)(unsigned int irq, unsigned int id);
};
struct IO_APIC_route_entry;
struct io_apic_irq_attr;
struct irq_data;
struct cpumask;
struct x86_io_apic_ops {
void (*init) (void);
unsigned int (*read) (unsigned int apic, unsigned int reg);
void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
void (*modify) (unsigned int apic, unsigned int reg, unsigned int value);
void (*disable)(void);
void (*print_entries)(unsigned int apic, unsigned int nr_entries);
int (*set_affinity)(struct irq_data *data,
const struct cpumask *mask,
bool force);
int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry,
unsigned int destination, int vector,
struct io_apic_irq_attr *attr);
void (*eoi_ioapic_pin)(int apic, int pin, int vector);
};
extern struct x86_init_ops x86_init;

View File

@ -103,7 +103,7 @@ struct mtrr_state_type {
#define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry)
#define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry)
/* These are the region types */
/* MTRR memory types, which are defined in SDM */
#define MTRR_TYPE_UNCACHABLE 0
#define MTRR_TYPE_WRCOMB 1
/*#define MTRR_TYPE_ 2*/
@ -113,5 +113,11 @@ struct mtrr_state_type {
#define MTRR_TYPE_WRBACK 6
#define MTRR_NUM_TYPES 7
/*
* Invalid MTRR memory type. mtrr_type_lookup() returns this value when
* MTRRs are disabled. Note, this value is allocated from the reserved
* values (0x7-0xff) of the MTRR memory types.
*/
#define MTRR_TYPE_INVALID 0xff
#endif /* _UAPI_ASM_X86_MTRR_H */

View File

@ -31,12 +31,12 @@
#include <linux/module.h>
#include <linux/dmi.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/slab.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
#include <linux/pci.h>
#include <asm/irqdomain.h>
#include <asm/pci_x86.h>
#include <asm/pgtable.h>
#include <asm/io_apic.h>
@ -400,57 +400,13 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
return 0;
}
static int mp_register_gsi(struct device *dev, u32 gsi, int trigger,
int polarity)
{
int irq, node;
if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
return gsi;
trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
if (mp_set_gsi_attr(gsi, trigger, polarity, node)) {
pr_warn("Failed to set pin attr for GSI%d\n", gsi);
return -1;
}
irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC);
if (irq < 0)
return irq;
/* Don't set up the ACPI SCI because it's already set up */
if (enable_update_mptable && acpi_gbl_FADT.sci_interrupt != gsi)
mp_config_acpi_gsi(dev, gsi, trigger, polarity);
return irq;
}
static void mp_unregister_gsi(u32 gsi)
{
int irq;
if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
return;
irq = mp_map_gsi_to_irq(gsi, 0);
if (irq > 0)
mp_unmap_irq(irq);
}
static struct irq_domain_ops acpi_irqdomain_ops = {
.map = mp_irqdomain_map,
.unmap = mp_irqdomain_unmap,
};
static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{
struct acpi_madt_io_apic *ioapic = NULL;
struct ioapic_domain_cfg cfg = {
.type = IOAPIC_DOMAIN_DYNAMIC,
.ops = &acpi_irqdomain_ops,
.ops = &mp_ioapic_irqdomain_ops,
};
ioapic = (struct acpi_madt_io_apic *)header;
@ -652,7 +608,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
* Make sure all (legacy) PCI IRQs are set as level-triggered.
*/
if (trigger == ACPI_LEVEL_SENSITIVE)
eisa_set_level_irq(gsi);
elcr_set_level_irq(gsi);
#endif
return gsi;
@ -663,10 +619,21 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
int trigger, int polarity)
{
int irq = gsi;
#ifdef CONFIG_X86_IO_APIC
int node;
struct irq_alloc_info info;
node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
ioapic_set_alloc_attr(&info, node, trigger, polarity);
mutex_lock(&acpi_ioapic_lock);
irq = mp_register_gsi(dev, gsi, trigger, polarity);
irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info);
/* Don't set up the ACPI SCI because it's already set up */
if (irq >= 0 && enable_update_mptable &&
acpi_gbl_FADT.sci_interrupt != gsi)
mp_config_acpi_gsi(dev, gsi, trigger, polarity);
mutex_unlock(&acpi_ioapic_lock);
#endif
@ -676,8 +643,12 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
static void acpi_unregister_gsi_ioapic(u32 gsi)
{
#ifdef CONFIG_X86_IO_APIC
int irq;
mutex_lock(&acpi_ioapic_lock);
mp_unregister_gsi(gsi);
irq = mp_map_gsi_to_irq(gsi, 0, NULL);
if (irq > 0)
mp_unmap_irq(irq);
mutex_unlock(&acpi_ioapic_lock);
#endif
}
@ -786,7 +757,7 @@ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
u64 addr;
struct ioapic_domain_cfg cfg = {
.type = IOAPIC_DOMAIN_DYNAMIC,
.ops = &acpi_irqdomain_ops,
.ops = &mp_ioapic_irqdomain_ops,
};
ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr);

View File

@ -62,7 +62,7 @@ ENTRY(do_suspend_lowlevel)
pushfq
popq pt_regs_flags(%rax)
movq $resume_point, saved_rip(%rip)
movq $.Lresume_point, saved_rip(%rip)
movq %rsp, saved_rsp
movq %rbp, saved_rbp
@ -75,10 +75,10 @@ ENTRY(do_suspend_lowlevel)
xorl %eax, %eax
call x86_acpi_enter_sleep_state
/* in case something went wrong, restore the machine status and go on */
jmp resume_point
jmp .Lresume_point
.align 4
resume_point:
.Lresume_point:
/* We don't restore %rax, it must be 0 anyway */
movq $saved_context, %rax
movq saved_context_cr4(%rax), %rbx

View File

@ -227,6 +227,15 @@ void __init arch_init_ideal_nops(void)
#endif
}
break;
case X86_VENDOR_AMD:
if (boot_cpu_data.x86 > 0xf) {
ideal_nops = p6_nops;
return;
}
/* fall through */
default:
#ifdef CONFIG_X86_64
ideal_nops = k8_nops;

View File

@ -171,10 +171,6 @@ static int __init apbt_clockevent_register(void)
static void apbt_setup_irq(struct apbt_dev *adev)
{
/* timer0 irq has been setup early */
if (adev->irq == 0)
return;
irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
}

View File

@ -3,6 +3,8 @@
*
* Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
* Moved from arch/x86/kernel/apic/io_apic.c.
* Jiang Liu <jiang.liu@linux.intel.com>
* Add support of hierarchical irqdomain
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@ -14,78 +16,112 @@
#include <linux/device.h>
#include <linux/pci.h>
#include <linux/htirq.h>
#include <asm/irqdomain.h>
#include <asm/hw_irq.h>
#include <asm/apic.h>
#include <asm/hypertransport.h>
static struct irq_domain *htirq_domain;
/*
* Hypertransport interrupt support
*/
static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
{
struct ht_irq_msg msg;
fetch_ht_irq_msg(irq, &msg);
msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
write_ht_irq_msg(irq, &msg);
}
static int
ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
{
struct irq_cfg *cfg = irqd_cfg(data);
unsigned int dest;
struct irq_data *parent = data->parent_data;
int ret;
ret = apic_set_affinity(data, mask, &dest);
if (ret)
return ret;
ret = parent->chip->irq_set_affinity(parent, mask, force);
if (ret >= 0) {
struct ht_irq_msg msg;
struct irq_cfg *cfg = irqd_cfg(data);
target_ht_irq(data->irq, dest, cfg->vector);
return IRQ_SET_MASK_OK_NOCOPY;
fetch_ht_irq_msg(data->irq, &msg);
msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK |
HT_IRQ_LOW_DEST_ID_MASK);
msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) |
HT_IRQ_LOW_DEST_ID(cfg->dest_apicid);
msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
write_ht_irq_msg(data->irq, &msg);
}
return ret;
}
static struct irq_chip ht_irq_chip = {
.name = "PCI-HT",
.irq_mask = mask_ht_irq,
.irq_unmask = unmask_ht_irq,
.irq_ack = apic_ack_edge,
.irq_ack = irq_chip_ack_parent,
.irq_set_affinity = ht_set_affinity,
.irq_retrigger = apic_retrigger_irq,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
struct ht_irq_cfg *ht_cfg;
struct irq_alloc_info *info = arg;
struct pci_dev *dev;
irq_hw_number_t hwirq;
int ret;
if (nr_irqs > 1 || !info)
return -EINVAL;
dev = info->ht_dev;
hwirq = (info->ht_idx & 0xFF) |
PCI_DEVID(dev->bus->number, dev->devfn) << 8 |
(pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24;
if (irq_find_mapping(domain, hwirq) > 0)
return -EEXIST;
ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL);
if (!ht_cfg)
return -ENOMEM;
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
if (ret < 0) {
kfree(ht_cfg);
return ret;
}
/* Initialize msg to a value that will never match the first write. */
ht_cfg->msg.address_lo = 0xffffffff;
ht_cfg->msg.address_hi = 0xffffffff;
ht_cfg->dev = info->ht_dev;
ht_cfg->update = info->ht_update;
ht_cfg->pos = info->ht_pos;
ht_cfg->idx = 0x10 + (info->ht_idx * 2);
irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg,
handle_edge_irq, ht_cfg, "edge");
return 0;
}
static void htirq_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs)
{
struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
BUG_ON(nr_irqs != 1);
kfree(irq_data->chip_data);
irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
static void htirq_domain_activate(struct irq_domain *domain,
struct irq_data *irq_data)
{
struct irq_cfg *cfg;
struct ht_irq_msg msg;
unsigned dest;
int err;
if (disable_apic)
return -ENXIO;
cfg = irq_cfg(irq);
err = assign_irq_vector(irq, cfg, apic->target_cpus());
if (err)
return err;
err = apic->cpu_mask_to_apicid_and(cfg->domain,
apic->target_cpus(), &dest);
if (err)
return err;
msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
struct irq_cfg *cfg = irqd_cfg(irq_data);
msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
msg.address_lo =
HT_IRQ_LOW_BASE |
HT_IRQ_LOW_DEST_ID(dest) |
HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) |
HT_IRQ_LOW_VECTOR(cfg->vector) |
((apic->irq_dest_mode == 0) ?
HT_IRQ_LOW_DM_PHYSICAL :
@ -95,13 +131,56 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
HT_IRQ_LOW_MT_FIXED :
HT_IRQ_LOW_MT_ARBITRATED) |
HT_IRQ_LOW_IRQ_MASKED;
write_ht_irq_msg(irq, &msg);
irq_set_chip_and_handler_name(irq, &ht_irq_chip,
handle_edge_irq, "edge");
dev_dbg(&dev->dev, "irq %d for HT\n", irq);
return 0;
write_ht_irq_msg(irq_data->irq, &msg);
}
static void htirq_domain_deactivate(struct irq_domain *domain,
struct irq_data *irq_data)
{
struct ht_irq_msg msg;
memset(&msg, 0, sizeof(msg));
write_ht_irq_msg(irq_data->irq, &msg);
}
static const struct irq_domain_ops htirq_domain_ops = {
.alloc = htirq_domain_alloc,
.free = htirq_domain_free,
.activate = htirq_domain_activate,
.deactivate = htirq_domain_deactivate,
};
void arch_init_htirq_domain(struct irq_domain *parent)
{
if (disable_apic)
return;
htirq_domain = irq_domain_add_tree(NULL, &htirq_domain_ops, NULL);
if (!htirq_domain)
pr_warn("failed to initialize irqdomain for HTIRQ.\n");
else
htirq_domain->parent = parent;
}
int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
ht_irq_update_t *update)
{
struct irq_alloc_info info;
if (!htirq_domain)
return -ENOSYS;
init_irq_alloc_info(&info, NULL);
info.ht_idx = idx;
info.ht_pos = pos;
info.ht_dev = dev;
info.ht_update = update;
return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev),
&info);
}
void arch_teardown_ht_irq(unsigned int irq)
{
irq_domain_free_irqs(irq, 1);
}

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,8 @@
*
* Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
* Moved from arch/x86/kernel/apic/io_apic.c.
* Jiang Liu <jiang.liu@linux.intel.com>
* Convert to hierarchical irqdomain
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@ -14,22 +16,23 @@
#include <linux/dmar.h>
#include <linux/hpet.h>
#include <linux/msi.h>
#include <asm/irqdomain.h>
#include <asm/msidef.h>
#include <asm/hpet.h>
#include <asm/hw_irq.h>
#include <asm/apic.h>
#include <asm/irq_remapping.h>
void native_compose_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id)
static struct irq_domain *msi_default_domain;
static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
{
struct irq_cfg *cfg = irq_cfg(irq);
struct irq_cfg *cfg = irqd_cfg(data);
msg->address_hi = MSI_ADDR_BASE_HI;
if (x2apic_enabled())
msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
msg->address_lo =
MSI_ADDR_BASE_LO |
@ -39,7 +42,7 @@ void native_compose_msi_msg(struct pci_dev *pdev,
((apic->irq_delivery_mode != dest_LowestPrio) ?
MSI_ADDR_REDIRECTION_CPU :
MSI_ADDR_REDIRECTION_LOWPRI) |
MSI_ADDR_DEST_ID(dest);
MSI_ADDR_DEST_ID(cfg->dest_apicid);
msg->data =
MSI_DATA_TRIGGER_EDGE |
@ -50,237 +53,305 @@ void native_compose_msi_msg(struct pci_dev *pdev,
MSI_DATA_VECTOR(cfg->vector);
}
static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
struct msi_msg *msg, u8 hpet_id)
{
struct irq_cfg *cfg;
int err;
unsigned dest;
if (disable_apic)
return -ENXIO;
cfg = irq_cfg(irq);
err = assign_irq_vector(irq, cfg, apic->target_cpus());
if (err)
return err;
err = apic->cpu_mask_to_apicid_and(cfg->domain,
apic->target_cpus(), &dest);
if (err)
return err;
x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
return 0;
}
static int
msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
{
struct irq_cfg *cfg = irqd_cfg(data);
struct msi_msg msg;
unsigned int dest;
int ret;
ret = apic_set_affinity(data, mask, &dest);
if (ret)
return ret;
__get_cached_msi_msg(data->msi_desc, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
__pci_write_msi_msg(data->msi_desc, &msg);
return IRQ_SET_MASK_OK_NOCOPY;
}
/*
* IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
* which implement the MSI or MSI-X Capability Structure.
*/
static struct irq_chip msi_chip = {
static struct irq_chip pci_msi_controller = {
.name = "PCI-MSI",
.irq_unmask = pci_msi_unmask_irq,
.irq_mask = pci_msi_mask_irq,
.irq_ack = apic_ack_edge,
.irq_set_affinity = msi_set_affinity,
.irq_retrigger = apic_retrigger_irq,
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_compose_msi_msg = irq_msi_compose_msg,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
unsigned int irq_base, unsigned int irq_offset)
{
struct irq_chip *chip = &msi_chip;
struct msi_msg msg;
unsigned int irq = irq_base + irq_offset;
int ret;
ret = msi_compose_msg(dev, irq, &msg, -1);
if (ret < 0)
return ret;
irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
/*
* MSI-X message is written per-IRQ, the offset is always 0.
* MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
*/
if (!irq_offset)
pci_write_msi_msg(irq, &msg);
setup_remapped_irq(irq, irq_cfg(irq), chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", irq);
return 0;
}
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
struct msi_desc *msidesc;
unsigned int irq;
int node, ret;
struct irq_domain *domain;
struct irq_alloc_info info;
/* Multiple MSI vectors only supported with interrupt remapping */
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
init_irq_alloc_info(&info, NULL);
info.type = X86_IRQ_ALLOC_TYPE_MSI;
info.msi_dev = dev;
node = dev_to_node(&dev->dev);
domain = irq_remapping_get_irq_domain(&info);
if (domain == NULL)
domain = msi_default_domain;
if (domain == NULL)
return -ENOSYS;
list_for_each_entry(msidesc, &dev->msi_list, list) {
irq = irq_alloc_hwirq(node);
if (!irq)
return -ENOSPC;
ret = setup_msi_irq(dev, msidesc, irq, 0);
if (ret < 0) {
irq_free_hwirq(irq);
return ret;
}
}
return 0;
return pci_msi_domain_alloc_irqs(domain, dev, nvec, type);
}
void native_teardown_msi_irq(unsigned int irq)
{
irq_free_hwirq(irq);
irq_domain_free_irqs(irq, 1);
}
#ifdef CONFIG_DMAR_TABLE
static int
dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info,
msi_alloc_info_t *arg)
{
struct irq_cfg *cfg = irqd_cfg(data);
unsigned int dest, irq = data->irq;
struct msi_msg msg;
int ret;
ret = apic_set_affinity(data, mask, &dest);
if (ret)
return ret;
dmar_msi_read(irq, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
dmar_msi_write(irq, &msg);
return IRQ_SET_MASK_OK_NOCOPY;
return arg->msi_hwirq;
}
static struct irq_chip dmar_msi_type = {
.name = "DMAR_MSI",
.irq_unmask = dmar_msi_unmask,
.irq_mask = dmar_msi_mask,
.irq_ack = apic_ack_edge,
.irq_set_affinity = dmar_msi_set_affinity,
.irq_retrigger = apic_retrigger_irq,
static int pci_msi_prepare(struct irq_domain *domain, struct device *dev,
int nvec, msi_alloc_info_t *arg)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct msi_desc *desc = first_pci_msi_entry(pdev);
init_irq_alloc_info(arg, NULL);
arg->msi_dev = pdev;
if (desc->msi_attrib.is_msix) {
arg->type = X86_IRQ_ALLOC_TYPE_MSIX;
} else {
arg->type = X86_IRQ_ALLOC_TYPE_MSI;
arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
}
return 0;
}
static void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
{
arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc);
}
static struct msi_domain_ops pci_msi_domain_ops = {
.get_hwirq = pci_msi_get_hwirq,
.msi_prepare = pci_msi_prepare,
.set_desc = pci_msi_set_desc,
};
static struct msi_domain_info pci_msi_domain_info = {
.flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
MSI_FLAG_PCI_MSIX,
.ops = &pci_msi_domain_ops,
.chip = &pci_msi_controller,
.handler = handle_edge_irq,
.handler_name = "edge",
};
void arch_init_msi_domain(struct irq_domain *parent)
{
if (disable_apic)
return;
msi_default_domain = pci_msi_create_irq_domain(NULL,
&pci_msi_domain_info, parent);
if (!msi_default_domain)
pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
}
#ifdef CONFIG_IRQ_REMAP
static struct irq_chip pci_msi_ir_controller = {
.name = "IR-PCI-MSI",
.irq_unmask = pci_msi_unmask_irq,
.irq_mask = pci_msi_mask_irq,
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_set_vcpu_affinity = irq_chip_set_vcpu_affinity_parent,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
int arch_setup_dmar_msi(unsigned int irq)
{
int ret;
struct msi_msg msg;
static struct msi_domain_info pci_msi_ir_domain_info = {
.flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
.ops = &pci_msi_domain_ops,
.chip = &pci_msi_ir_controller,
.handler = handle_edge_irq,
.handler_name = "edge",
};
struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent)
{
return pci_msi_create_irq_domain(NULL, &pci_msi_ir_domain_info, parent);
}
#endif
#ifdef CONFIG_DMAR_TABLE
static void dmar_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
{
dmar_msi_write(data->irq, msg);
}
static struct irq_chip dmar_msi_controller = {
.name = "DMAR-MSI",
.irq_unmask = dmar_msi_unmask,
.irq_mask = dmar_msi_mask,
.irq_ack = irq_chip_ack_parent,
.irq_set_affinity = msi_domain_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_compose_msi_msg = irq_msi_compose_msg,
.irq_write_msi_msg = dmar_msi_write_msg,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
static irq_hw_number_t dmar_msi_get_hwirq(struct msi_domain_info *info,
msi_alloc_info_t *arg)
{
return arg->dmar_id;
}
static int dmar_msi_init(struct irq_domain *domain,
struct msi_domain_info *info, unsigned int virq,
irq_hw_number_t hwirq, msi_alloc_info_t *arg)
{
irq_domain_set_info(domain, virq, arg->dmar_id, info->chip, NULL,
handle_edge_irq, arg->dmar_data, "edge");
ret = msi_compose_msg(NULL, irq, &msg, -1);
if (ret < 0)
return ret;
dmar_msi_write(irq, &msg);
irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
"edge");
return 0;
}
static struct msi_domain_ops dmar_msi_domain_ops = {
.get_hwirq = dmar_msi_get_hwirq,
.msi_init = dmar_msi_init,
};
static struct msi_domain_info dmar_msi_domain_info = {
.ops = &dmar_msi_domain_ops,
.chip = &dmar_msi_controller,
};
static struct irq_domain *dmar_get_irq_domain(void)
{
static struct irq_domain *dmar_domain;
static DEFINE_MUTEX(dmar_lock);
mutex_lock(&dmar_lock);
if (dmar_domain == NULL)
dmar_domain = msi_create_irq_domain(NULL, &dmar_msi_domain_info,
x86_vector_domain);
mutex_unlock(&dmar_lock);
return dmar_domain;
}
int dmar_alloc_hwirq(int id, int node, void *arg)
{
struct irq_domain *domain = dmar_get_irq_domain();
struct irq_alloc_info info;
if (!domain)
return -1;
init_irq_alloc_info(&info, NULL);
info.type = X86_IRQ_ALLOC_TYPE_DMAR;
info.dmar_id = id;
info.dmar_data = arg;
return irq_domain_alloc_irqs(domain, 1, node, &info);
}
void dmar_free_hwirq(int irq)
{
irq_domain_free_irqs(irq, 1);
}
#endif
/*
* MSI message composition
*/
#ifdef CONFIG_HPET_TIMER
static int hpet_msi_set_affinity(struct irq_data *data,
const struct cpumask *mask, bool force)
static inline int hpet_dev_id(struct irq_domain *domain)
{
struct irq_cfg *cfg = irqd_cfg(data);
struct msi_msg msg;
unsigned int dest;
int ret;
struct msi_domain_info *info = msi_get_domain_info(domain);
ret = apic_set_affinity(data, mask, &dest);
if (ret)
return ret;
hpet_msi_read(data->handler_data, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
hpet_msi_write(data->handler_data, &msg);
return IRQ_SET_MASK_OK_NOCOPY;
return (int)(long)info->data;
}
static struct irq_chip hpet_msi_type = {
.name = "HPET_MSI",
static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
{
hpet_msi_write(data->handler_data, msg);
}
static struct irq_chip hpet_msi_controller = {
.name = "HPET-MSI",
.irq_unmask = hpet_msi_unmask,
.irq_mask = hpet_msi_mask,
.irq_ack = apic_ack_edge,
.irq_set_affinity = hpet_msi_set_affinity,
.irq_retrigger = apic_retrigger_irq,
.irq_ack = irq_chip_ack_parent,
.irq_set_affinity = msi_domain_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_compose_msi_msg = irq_msi_compose_msg,
.irq_write_msi_msg = hpet_msi_write_msg,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
int default_setup_hpet_msi(unsigned int irq, unsigned int id)
static irq_hw_number_t hpet_msi_get_hwirq(struct msi_domain_info *info,
msi_alloc_info_t *arg)
{
struct irq_chip *chip = &hpet_msi_type;
struct msi_msg msg;
int ret;
return arg->hpet_index;
}
ret = msi_compose_msg(NULL, irq, &msg, id);
if (ret < 0)
return ret;
static int hpet_msi_init(struct irq_domain *domain,
struct msi_domain_info *info, unsigned int virq,
irq_hw_number_t hwirq, msi_alloc_info_t *arg)
{
irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
irq_domain_set_info(domain, virq, arg->hpet_index, info->chip, NULL,
handle_edge_irq, arg->hpet_data, "edge");
hpet_msi_write(irq_get_handler_data(irq), &msg);
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
setup_remapped_irq(irq, irq_cfg(irq), chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
return 0;
}
static void hpet_msi_free(struct irq_domain *domain,
struct msi_domain_info *info, unsigned int virq)
{
irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT);
}
static struct msi_domain_ops hpet_msi_domain_ops = {
.get_hwirq = hpet_msi_get_hwirq,
.msi_init = hpet_msi_init,
.msi_free = hpet_msi_free,
};
static struct msi_domain_info hpet_msi_domain_info = {
.ops = &hpet_msi_domain_ops,
.chip = &hpet_msi_controller,
};
struct irq_domain *hpet_create_irq_domain(int hpet_id)
{
struct irq_domain *parent;
struct irq_alloc_info info;
struct msi_domain_info *domain_info;
if (x86_vector_domain == NULL)
return NULL;
domain_info = kzalloc(sizeof(*domain_info), GFP_KERNEL);
if (!domain_info)
return NULL;
*domain_info = hpet_msi_domain_info;
domain_info->data = (void *)(long)hpet_id;
init_irq_alloc_info(&info, NULL);
info.type = X86_IRQ_ALLOC_TYPE_HPET;
info.hpet_id = hpet_id;
parent = irq_remapping_get_ir_irq_domain(&info);
if (parent == NULL)
parent = x86_vector_domain;
else
hpet_msi_controller.name = "IR-HPET-MSI";
return msi_create_irq_domain(NULL, domain_info, parent);
}
int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev,
int dev_num)
{
struct irq_alloc_info info;
init_irq_alloc_info(&info, NULL);
info.type = X86_IRQ_ALLOC_TYPE_HPET;
info.hpet_data = dev;
info.hpet_id = hpet_dev_id(domain);
info.hpet_index = dev_num;
return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info);
}
#endif

View File

@ -3,6 +3,8 @@
*
* Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
* Moved from arch/x86/kernel/apic/io_apic.c.
* Jiang Liu <jiang.liu@linux.intel.com>
* Enable support of hierarchical irqdomains
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@ -11,15 +13,28 @@
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/irqdomain.h>
#include <linux/slab.h>
#include <asm/irqdomain.h>
#include <asm/hw_irq.h>
#include <asm/apic.h>
#include <asm/i8259.h>
#include <asm/desc.h>
#include <asm/irq_remapping.h>
struct apic_chip_data {
struct irq_cfg cfg;
cpumask_var_t domain;
cpumask_var_t old_domain;
u8 move_in_progress : 1;
};
struct irq_domain *x86_vector_domain;
static DEFINE_RAW_SPINLOCK(vector_lock);
static cpumask_var_t vector_cpumask;
static struct irq_chip lapic_controller;
#ifdef CONFIG_X86_IO_APIC
static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
#endif
void lock_vector_lock(void)
{
@ -34,71 +49,59 @@ void unlock_vector_lock(void)
raw_spin_unlock(&vector_lock);
}
struct irq_cfg *irq_cfg(unsigned int irq)
static struct apic_chip_data *apic_chip_data(struct irq_data *irq_data)
{
return irq_get_chip_data(irq);
if (!irq_data)
return NULL;
while (irq_data->parent_data)
irq_data = irq_data->parent_data;
return irq_data->chip_data;
}
struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
{
return irq_data->chip_data;
struct apic_chip_data *data = apic_chip_data(irq_data);
return data ? &data->cfg : NULL;
}
static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
struct irq_cfg *irq_cfg(unsigned int irq)
{
struct irq_cfg *cfg;
return irqd_cfg(irq_get_irq_data(irq));
}
cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
if (!cfg)
static struct apic_chip_data *alloc_apic_chip_data(int node)
{
struct apic_chip_data *data;
data = kzalloc_node(sizeof(*data), GFP_KERNEL, node);
if (!data)
return NULL;
if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
goto out_cfg;
if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
if (!zalloc_cpumask_var_node(&data->domain, GFP_KERNEL, node))
goto out_data;
if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node))
goto out_domain;
#ifdef CONFIG_X86_IO_APIC
INIT_LIST_HEAD(&cfg->irq_2_pin);
#endif
return cfg;
return data;
out_domain:
free_cpumask_var(cfg->domain);
out_cfg:
kfree(cfg);
free_cpumask_var(data->domain);
out_data:
kfree(data);
return NULL;
}
struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
static void free_apic_chip_data(struct apic_chip_data *data)
{
int res = irq_alloc_desc_at(at, node);
struct irq_cfg *cfg;
if (res < 0) {
if (res != -EEXIST)
return NULL;
cfg = irq_cfg(at);
if (cfg)
return cfg;
if (data) {
free_cpumask_var(data->domain);
free_cpumask_var(data->old_domain);
kfree(data);
}
cfg = alloc_irq_cfg(at, node);
if (cfg)
irq_set_chip_data(at, cfg);
else
irq_free_desc(at);
return cfg;
}
static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
{
if (!cfg)
return;
irq_set_chip_data(at, NULL);
free_cpumask_var(cfg->domain);
free_cpumask_var(cfg->old_domain);
kfree(cfg);
}
static int
__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
static int __assign_irq_vector(int irq, struct apic_chip_data *d,
const struct cpumask *mask)
{
/*
* NOTE! The local APIC isn't very good at handling
@ -114,36 +117,33 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
static int current_offset = VECTOR_OFFSET_START % 16;
int cpu, err;
cpumask_var_t tmp_mask;
if (cfg->move_in_progress)
if (d->move_in_progress)
return -EBUSY;
if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return -ENOMEM;
/* Only try and allocate irqs on cpus that are present */
err = -ENOSPC;
cpumask_clear(cfg->old_domain);
cpumask_clear(d->old_domain);
cpu = cpumask_first_and(mask, cpu_online_mask);
while (cpu < nr_cpu_ids) {
int new_cpu, vector, offset;
apic->vector_allocation_domain(cpu, tmp_mask, mask);
apic->vector_allocation_domain(cpu, vector_cpumask, mask);
if (cpumask_subset(tmp_mask, cfg->domain)) {
if (cpumask_subset(vector_cpumask, d->domain)) {
err = 0;
if (cpumask_equal(tmp_mask, cfg->domain))
if (cpumask_equal(vector_cpumask, d->domain))
break;
/*
* New cpumask using the vector is a proper subset of
* the current in use mask. So cleanup the vector
* allocation for the members that are not used anymore.
*/
cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
cfg->move_in_progress =
cpumask_intersects(cfg->old_domain, cpu_online_mask);
cpumask_and(cfg->domain, cfg->domain, tmp_mask);
cpumask_andnot(d->old_domain, d->domain,
vector_cpumask);
d->move_in_progress =
cpumask_intersects(d->old_domain, cpu_online_mask);
cpumask_and(d->domain, d->domain, vector_cpumask);
break;
}
@ -157,16 +157,18 @@ next:
}
if (unlikely(current_vector == vector)) {
cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
cpumask_andnot(tmp_mask, mask, cfg->old_domain);
cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
cpumask_or(d->old_domain, d->old_domain,
vector_cpumask);
cpumask_andnot(vector_cpumask, mask, d->old_domain);
cpu = cpumask_first_and(vector_cpumask,
cpu_online_mask);
continue;
}
if (test_bit(vector, used_vectors))
goto next;
for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
if (per_cpu(vector_irq, new_cpu)[vector] >
VECTOR_UNDEFINED)
goto next;
@ -174,55 +176,73 @@ next:
/* Found one! */
current_vector = vector;
current_offset = offset;
if (cfg->vector) {
cpumask_copy(cfg->old_domain, cfg->domain);
cfg->move_in_progress =
cpumask_intersects(cfg->old_domain, cpu_online_mask);
if (d->cfg.vector) {
cpumask_copy(d->old_domain, d->domain);
d->move_in_progress =
cpumask_intersects(d->old_domain, cpu_online_mask);
}
for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
per_cpu(vector_irq, new_cpu)[vector] = irq;
cfg->vector = vector;
cpumask_copy(cfg->domain, tmp_mask);
d->cfg.vector = vector;
cpumask_copy(d->domain, vector_cpumask);
err = 0;
break;
}
free_cpumask_var(tmp_mask);
if (!err) {
/* cache destination APIC IDs into cfg->dest_apicid */
err = apic->cpu_mask_to_apicid_and(mask, d->domain,
&d->cfg.dest_apicid);
}
return err;
}
int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
static int assign_irq_vector(int irq, struct apic_chip_data *data,
const struct cpumask *mask)
{
int err;
unsigned long flags;
raw_spin_lock_irqsave(&vector_lock, flags);
err = __assign_irq_vector(irq, cfg, mask);
err = __assign_irq_vector(irq, data, mask);
raw_spin_unlock_irqrestore(&vector_lock, flags);
return err;
}
void clear_irq_vector(int irq, struct irq_cfg *cfg)
static int assign_irq_vector_policy(int irq, int node,
struct apic_chip_data *data,
struct irq_alloc_info *info)
{
if (info && info->mask)
return assign_irq_vector(irq, data, info->mask);
if (node != NUMA_NO_NODE &&
assign_irq_vector(irq, data, cpumask_of_node(node)) == 0)
return 0;
return assign_irq_vector(irq, data, apic->target_cpus());
}
static void clear_irq_vector(int irq, struct apic_chip_data *data)
{
int cpu, vector;
unsigned long flags;
raw_spin_lock_irqsave(&vector_lock, flags);
BUG_ON(!cfg->vector);
BUG_ON(!data->cfg.vector);
vector = cfg->vector;
for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
vector = data->cfg.vector;
for_each_cpu_and(cpu, data->domain, cpu_online_mask)
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
cfg->vector = 0;
cpumask_clear(cfg->domain);
data->cfg.vector = 0;
cpumask_clear(data->domain);
if (likely(!cfg->move_in_progress)) {
if (likely(!data->move_in_progress)) {
raw_spin_unlock_irqrestore(&vector_lock, flags);
return;
}
for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
vector++) {
if (per_cpu(vector_irq, cpu)[vector] != irq)
@ -231,10 +251,95 @@ void clear_irq_vector(int irq, struct irq_cfg *cfg)
break;
}
}
cfg->move_in_progress = 0;
data->move_in_progress = 0;
raw_spin_unlock_irqrestore(&vector_lock, flags);
}
void init_irq_alloc_info(struct irq_alloc_info *info,
const struct cpumask *mask)
{
memset(info, 0, sizeof(*info));
info->mask = mask;
}
void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
{
if (src)
*dst = *src;
else
memset(dst, 0, sizeof(*dst));
}
static void x86_vector_free_irqs(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs)
{
struct irq_data *irq_data;
int i;
for (i = 0; i < nr_irqs; i++) {
irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
if (irq_data && irq_data->chip_data) {
clear_irq_vector(virq + i, irq_data->chip_data);
free_apic_chip_data(irq_data->chip_data);
#ifdef CONFIG_X86_IO_APIC
if (virq + i < nr_legacy_irqs())
legacy_irq_data[virq + i] = NULL;
#endif
irq_domain_reset_irq_data(irq_data);
}
}
}
static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
struct irq_alloc_info *info = arg;
struct apic_chip_data *data;
struct irq_data *irq_data;
int i, err;
if (disable_apic)
return -ENXIO;
/* Currently vector allocator can't guarantee contiguous allocations */
if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1)
return -ENOSYS;
for (i = 0; i < nr_irqs; i++) {
irq_data = irq_domain_get_irq_data(domain, virq + i);
BUG_ON(!irq_data);
#ifdef CONFIG_X86_IO_APIC
if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i])
data = legacy_irq_data[virq + i];
else
#endif
data = alloc_apic_chip_data(irq_data->node);
if (!data) {
err = -ENOMEM;
goto error;
}
irq_data->chip = &lapic_controller;
irq_data->chip_data = data;
irq_data->hwirq = virq + i;
err = assign_irq_vector_policy(virq, irq_data->node, data,
info);
if (err)
goto error;
}
return 0;
error:
x86_vector_free_irqs(domain, virq, i + 1);
return err;
}
static const struct irq_domain_ops x86_vector_domain_ops = {
.alloc = x86_vector_alloc_irqs,
.free = x86_vector_free_irqs,
};
int __init arch_probe_nr_irqs(void)
{
int nr;
@ -258,8 +363,43 @@ int __init arch_probe_nr_irqs(void)
return nr_legacy_irqs();
}
#ifdef CONFIG_X86_IO_APIC
static void init_legacy_irqs(void)
{
int i, node = cpu_to_node(0);
struct apic_chip_data *data;
/*
* For legacy IRQ's, start with assigning irq0 to irq15 to
* ISA_IRQ_VECTOR(i) for all cpu's.
*/
for (i = 0; i < nr_legacy_irqs(); i++) {
data = legacy_irq_data[i] = alloc_apic_chip_data(node);
BUG_ON(!data);
data->cfg.vector = ISA_IRQ_VECTOR(i);
cpumask_setall(data->domain);
irq_set_chip_data(i, data);
}
}
#else
static void init_legacy_irqs(void) { }
#endif
int __init arch_early_irq_init(void)
{
init_legacy_irqs();
x86_vector_domain = irq_domain_add_tree(NULL, &x86_vector_domain_ops,
NULL);
BUG_ON(x86_vector_domain == NULL);
irq_set_default_host(x86_vector_domain);
arch_init_msi_domain(x86_vector_domain);
arch_init_htirq_domain(x86_vector_domain);
BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
return arch_early_ioapic_init();
}
@ -267,7 +407,7 @@ static void __setup_vector_irq(int cpu)
{
/* Initialize vector_irq on a new cpu */
int irq, vector;
struct irq_cfg *cfg;
struct apic_chip_data *data;
/*
* vector_lock will make sure that we don't run into irq vector
@ -277,13 +417,13 @@ static void __setup_vector_irq(int cpu)
raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
for_each_active_irq(irq) {
cfg = irq_cfg(irq);
if (!cfg)
data = apic_chip_data(irq_get_irq_data(irq));
if (!data)
continue;
if (!cpumask_test_cpu(cpu, cfg->domain))
if (!cpumask_test_cpu(cpu, data->domain))
continue;
vector = cfg->vector;
vector = data->cfg.vector;
per_cpu(vector_irq, cpu)[vector] = irq;
}
/* Mark the free vectors */
@ -292,8 +432,8 @@ static void __setup_vector_irq(int cpu)
if (irq <= VECTOR_UNDEFINED)
continue;
cfg = irq_cfg(irq);
if (!cpumask_test_cpu(cpu, cfg->domain))
data = apic_chip_data(irq_get_irq_data(irq));
if (!cpumask_test_cpu(cpu, data->domain))
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
}
raw_spin_unlock(&vector_lock);
@ -314,20 +454,20 @@ void setup_vector_irq(int cpu)
* legacy vector to irq mapping:
*/
for (irq = 0; irq < nr_legacy_irqs(); irq++)
per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq;
__setup_vector_irq(cpu);
}
int apic_retrigger_irq(struct irq_data *data)
static int apic_retrigger_irq(struct irq_data *irq_data)
{
struct irq_cfg *cfg = irqd_cfg(data);
struct apic_chip_data *data = apic_chip_data(irq_data);
unsigned long flags;
int cpu;
raw_spin_lock_irqsave(&vector_lock, flags);
cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
cpu = cpumask_first_and(data->domain, cpu_online_mask);
apic->send_IPI_mask(cpumask_of(cpu), data->cfg.vector);
raw_spin_unlock_irqrestore(&vector_lock, flags);
return 1;
@ -340,73 +480,76 @@ void apic_ack_edge(struct irq_data *data)
ack_APIC_irq();
}
/*
* Either sets data->affinity to a valid value, and returns
* ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
* leaves data->affinity untouched.
*/
int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
unsigned int *dest_id)
static int apic_set_affinity(struct irq_data *irq_data,
const struct cpumask *dest, bool force)
{
struct irq_cfg *cfg = irqd_cfg(data);
unsigned int irq = data->irq;
int err;
struct apic_chip_data *data = irq_data->chip_data;
int err, irq = irq_data->irq;
if (!config_enabled(CONFIG_SMP))
return -EPERM;
if (!cpumask_intersects(mask, cpu_online_mask))
if (!cpumask_intersects(dest, cpu_online_mask))
return -EINVAL;
err = assign_irq_vector(irq, cfg, mask);
if (err)
return err;
err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
err = assign_irq_vector(irq, data, dest);
if (err) {
if (assign_irq_vector(irq, cfg, data->affinity))
struct irq_data *top = irq_get_irq_data(irq);
if (assign_irq_vector(irq, data, top->affinity))
pr_err("Failed to recover vector for irq %d\n", irq);
return err;
}
cpumask_copy(data->affinity, mask);
return 0;
return IRQ_SET_MASK_OK;
}
static struct irq_chip lapic_controller = {
.irq_ack = apic_ack_edge,
.irq_set_affinity = apic_set_affinity,
.irq_retrigger = apic_retrigger_irq,
};
#ifdef CONFIG_SMP
void send_cleanup_vector(struct irq_cfg *cfg)
static void __send_cleanup_vector(struct apic_chip_data *data)
{
cpumask_var_t cleanup_mask;
if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
unsigned int i;
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
for_each_cpu_and(i, data->old_domain, cpu_online_mask)
apic->send_IPI_mask(cpumask_of(i),
IRQ_MOVE_CLEANUP_VECTOR);
} else {
cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
free_cpumask_var(cleanup_mask);
}
cfg->move_in_progress = 0;
data->move_in_progress = 0;
}
void send_cleanup_vector(struct irq_cfg *cfg)
{
struct apic_chip_data *data;
data = container_of(cfg, struct apic_chip_data, cfg);
if (data->move_in_progress)
__send_cleanup_vector(data);
}
asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
ack_APIC_irq();
irq_enter();
exit_idle();
entering_ack_irq();
me = smp_processor_id();
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
int irq;
unsigned int irr;
struct irq_desc *desc;
struct irq_cfg *cfg;
struct apic_chip_data *data;
irq = __this_cpu_read(vector_irq[vector]);
@ -417,8 +560,8 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
if (!desc)
continue;
cfg = irq_cfg(irq);
if (!cfg)
data = apic_chip_data(&desc->irq_data);
if (!data)
continue;
raw_spin_lock(&desc->lock);
@ -427,10 +570,11 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
* Check if the irq migration is in progress. If so, we
* haven't received the cleanup request yet for this irq.
*/
if (cfg->move_in_progress)
if (data->move_in_progress)
goto unlock;
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
if (vector == data->cfg.vector &&
cpumask_test_cpu(me, data->domain))
goto unlock;
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
@ -450,20 +594,21 @@ unlock:
raw_spin_unlock(&desc->lock);
}
irq_exit();
exiting_irq();
}
static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
{
unsigned me;
struct apic_chip_data *data;
if (likely(!cfg->move_in_progress))
data = container_of(cfg, struct apic_chip_data, cfg);
if (likely(!data->move_in_progress))
return;
me = smp_processor_id();
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
send_cleanup_vector(cfg);
if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain))
__send_cleanup_vector(data);
}
void irq_complete_move(struct irq_cfg *cfg)
@ -475,46 +620,11 @@ void irq_force_complete_move(int irq)
{
struct irq_cfg *cfg = irq_cfg(irq);
if (!cfg)
return;
__irq_complete_move(cfg, cfg->vector);
if (cfg)
__irq_complete_move(cfg, cfg->vector);
}
#endif
/*
* Dynamic irq allocate and deallocation. Should be replaced by irq domains!
*/
int arch_setup_hwirq(unsigned int irq, int node)
{
struct irq_cfg *cfg;
unsigned long flags;
int ret;
cfg = alloc_irq_cfg(irq, node);
if (!cfg)
return -ENOMEM;
raw_spin_lock_irqsave(&vector_lock, flags);
ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
raw_spin_unlock_irqrestore(&vector_lock, flags);
if (!ret)
irq_set_chip_data(irq, cfg);
else
free_irq_cfg(irq, cfg);
return ret;
}
void arch_teardown_hwirq(unsigned int irq)
{
struct irq_cfg *cfg = irq_cfg(irq);
free_remapped_irq(irq);
clear_irq_vector(irq, cfg);
free_irq_cfg(irq, cfg);
}
static void __init print_APIC_field(int base)
{
int i;

View File

@ -21,11 +21,13 @@ early_param("x2apic_phys", set_x2apic_phys_mode);
static bool x2apic_fadt_phys(void)
{
#ifdef CONFIG_ACPI
if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
printk(KERN_DEBUG "System requires x2apic physical mode\n");
return true;
}
#endif
return false;
}

View File

@ -41,6 +41,25 @@ void common(void) {
OFFSET(pbe_orig_address, pbe, orig_address);
OFFSET(pbe_next, pbe, next);
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
BLANK();
OFFSET(IA32_SIGCONTEXT_ax, sigcontext_ia32, ax);
OFFSET(IA32_SIGCONTEXT_bx, sigcontext_ia32, bx);
OFFSET(IA32_SIGCONTEXT_cx, sigcontext_ia32, cx);
OFFSET(IA32_SIGCONTEXT_dx, sigcontext_ia32, dx);
OFFSET(IA32_SIGCONTEXT_si, sigcontext_ia32, si);
OFFSET(IA32_SIGCONTEXT_di, sigcontext_ia32, di);
OFFSET(IA32_SIGCONTEXT_bp, sigcontext_ia32, bp);
OFFSET(IA32_SIGCONTEXT_sp, sigcontext_ia32, sp);
OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip);
BLANK();
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
BLANK();
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
#endif
#ifdef CONFIG_PARAVIRT
BLANK();
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
@ -49,7 +68,9 @@ void common(void) {
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
#ifdef CONFIG_X86_32
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
#endif
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
#endif

View File

@ -17,17 +17,6 @@ void foo(void);
void foo(void)
{
OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax);
OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx);
OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx);
OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx);
OFFSET(IA32_SIGCONTEXT_si, sigcontext, si);
OFFSET(IA32_SIGCONTEXT_di, sigcontext, di);
OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp);
OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp);
OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip);
BLANK();
OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
@ -37,10 +26,6 @@ void foo(void)
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
BLANK();
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
OFFSET(TI_cpu, thread_info, cpu);
BLANK();
OFFSET(PT_EBX, pt_regs, bx);
OFFSET(PT_ECX, pt_regs, cx);
OFFSET(PT_EDX, pt_regs, dx);
@ -60,9 +45,6 @@ void foo(void)
OFFSET(PT_OLDSS, pt_regs, ss);
BLANK();
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
BLANK();
OFFSET(saved_context_gdt_desc, saved_context, gdt_desc);
BLANK();

View File

@ -29,27 +29,6 @@ int main(void)
BLANK();
#endif
#ifdef CONFIG_IA32_EMULATION
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
BLANK();
#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
ENTRY(ax);
ENTRY(bx);
ENTRY(cx);
ENTRY(dx);
ENTRY(si);
ENTRY(di);
ENTRY(bp);
ENTRY(sp);
ENTRY(ip);
BLANK();
#undef ENTRY
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
BLANK();
#endif
#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
ENTRY(bx);
ENTRY(cx);

View File

@ -1155,10 +1155,6 @@ static __init int setup_disablecpuid(char *arg)
}
__setup("clearcpuid=", setup_disablecpuid);
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,

View File

@ -39,14 +39,12 @@ void hyperv_vector_handler(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
exit_idle();
entering_irq();
inc_irq_stat(irq_hv_callback_count);
if (vmbus_handler)
vmbus_handler();
irq_exit();
exiting_irq();
set_irq_regs(old_regs);
}

View File

@ -98,7 +98,8 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
continue;
base = range_state[i].base_pfn;
if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
(mtrr_state.enabled & 1)) {
(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
(mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
/* Var MTRR contains UC entry below 1M? Skip it: */
printk(BIOS_BUG_MSG, i);
if (base + size <= (1<<(20-PAGE_SHIFT)))

View File

@ -102,59 +102,76 @@ static int check_type_overlap(u8 *prev, u8 *curr)
return 0;
}
/*
* Error/Semi-error returns:
* 0xFF - when MTRR is not enabled
* *repeat == 1 implies [start:end] spanned across MTRR range and type returned
* corresponds only to [start:*partial_end].
* Caller has to lookup again for [*partial_end:end].
/**
* mtrr_type_lookup_fixed - look up memory type in MTRR fixed entries
*
* Return the MTRR fixed memory type of 'start'.
*
* MTRR fixed entries are divided into the following ways:
* 0x00000 - 0x7FFFF : This range is divided into eight 64KB sub-ranges
* 0x80000 - 0xBFFFF : This range is divided into sixteen 16KB sub-ranges
* 0xC0000 - 0xFFFFF : This range is divided into sixty-four 4KB sub-ranges
*
* Return Values:
* MTRR_TYPE_(type) - Matched memory type
* MTRR_TYPE_INVALID - Unmatched
*/
static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
static u8 mtrr_type_lookup_fixed(u64 start, u64 end)
{
int idx;
if (start >= 0x100000)
return MTRR_TYPE_INVALID;
/* 0x0 - 0x7FFFF */
if (start < 0x80000) {
idx = 0;
idx += (start >> 16);
return mtrr_state.fixed_ranges[idx];
/* 0x80000 - 0xBFFFF */
} else if (start < 0xC0000) {
idx = 1 * 8;
idx += ((start - 0x80000) >> 14);
return mtrr_state.fixed_ranges[idx];
}
/* 0xC0000 - 0xFFFFF */
idx = 3 * 8;
idx += ((start - 0xC0000) >> 12);
return mtrr_state.fixed_ranges[idx];
}
/**
* mtrr_type_lookup_variable - look up memory type in MTRR variable entries
*
* Return Value:
* MTRR_TYPE_(type) - Matched memory type or default memory type (unmatched)
*
* Output Arguments:
* repeat - Set to 1 when [start:end] spanned across MTRR range and type
* returned corresponds only to [start:*partial_end]. Caller has
* to lookup again for [*partial_end:end].
*
* uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the
* region is fully covered by a single MTRR entry or the default
* type.
*/
static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
int *repeat, u8 *uniform)
{
int i;
u64 base, mask;
u8 prev_match, curr_match;
*repeat = 0;
if (!mtrr_state_set)
return 0xFF;
*uniform = 1;
if (!mtrr_state.enabled)
return 0xFF;
/* Make end inclusive end, instead of exclusive */
/* Make end inclusive instead of exclusive */
end--;
/* Look in fixed ranges. Just return the type as per start */
if (mtrr_state.have_fixed && (start < 0x100000)) {
int idx;
if (start < 0x80000) {
idx = 0;
idx += (start >> 16);
return mtrr_state.fixed_ranges[idx];
} else if (start < 0xC0000) {
idx = 1 * 8;
idx += ((start - 0x80000) >> 14);
return mtrr_state.fixed_ranges[idx];
} else if (start < 0x1000000) {
idx = 3 * 8;
idx += ((start - 0xC0000) >> 12);
return mtrr_state.fixed_ranges[idx];
}
}
/*
* Look in variable ranges
* Look of multiple ranges matching this address and pick type
* as per MTRR precedence
*/
if (!(mtrr_state.enabled & 2))
return mtrr_state.def_type;
prev_match = 0xFF;
prev_match = MTRR_TYPE_INVALID;
for (i = 0; i < num_var_ranges; ++i) {
unsigned short start_state, end_state;
unsigned short start_state, end_state, inclusive;
if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11)))
continue;
@ -166,20 +183,29 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
start_state = ((start & mask) == (base & mask));
end_state = ((end & mask) == (base & mask));
inclusive = ((start < base) && (end > base));
if (start_state != end_state) {
if ((start_state != end_state) || inclusive) {
/*
* We have start:end spanning across an MTRR.
* We split the region into
* either
* (start:mtrr_end) (mtrr_end:end)
* or
* (start:mtrr_start) (mtrr_start:end)
* We split the region into either
*
* - start_state:1
* (start:mtrr_end)(mtrr_end:end)
* - end_state:1
* (start:mtrr_start)(mtrr_start:end)
* - inclusive:1
* (start:mtrr_start)(mtrr_start:mtrr_end)(mtrr_end:end)
*
* depending on kind of overlap.
* Return the type for first region and a pointer to
* the start of second region so that caller will
* lookup again on the second region.
* Note: This way we handle multiple overlaps as well.
*
* Return the type of the first region and a pointer
* to the start of next region so that caller will be
* advised to lookup again after having adjusted start
* and end.
*
* Note: This way we handle overlaps with multiple
* entries and the default type properly.
*/
if (start_state)
*partial_end = base + get_mtrr_size(mask);
@ -193,59 +219,94 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
end = *partial_end - 1; /* end is inclusive */
*repeat = 1;
*uniform = 0;
}
if ((start & mask) != (base & mask))
continue;
curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
if (prev_match == 0xFF) {
if (prev_match == MTRR_TYPE_INVALID) {
prev_match = curr_match;
continue;
}
*uniform = 0;
if (check_type_overlap(&prev_match, &curr_match))
return curr_match;
}
if (mtrr_tom2) {
if (start >= (1ULL<<32) && (end < mtrr_tom2))
return MTRR_TYPE_WRBACK;
}
if (prev_match != 0xFF)
if (prev_match != MTRR_TYPE_INVALID)
return prev_match;
return mtrr_state.def_type;
}
/*
* Returns the effective MTRR type for the region
* Error return:
* 0xFF - when MTRR is not enabled
/**
* mtrr_type_lookup - look up memory type in MTRR
*
* Return Values:
* MTRR_TYPE_(type) - The effective MTRR type for the region
* MTRR_TYPE_INVALID - MTRR is disabled
*
* Output Argument:
* uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the
* region is fully covered by a single MTRR entry or the default
* type.
*/
u8 mtrr_type_lookup(u64 start, u64 end)
u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
{
u8 type, prev_type;
u8 type, prev_type, is_uniform = 1, dummy;
int repeat;
u64 partial_end;
type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
if (!mtrr_state_set)
return MTRR_TYPE_INVALID;
if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED))
return MTRR_TYPE_INVALID;
/*
* Look up the fixed ranges first, which take priority over
* the variable ranges.
*/
if ((start < 0x100000) &&
(mtrr_state.have_fixed) &&
(mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
is_uniform = 0;
type = mtrr_type_lookup_fixed(start, end);
goto out;
}
/*
* Look up the variable ranges. Look of multiple ranges matching
* this address and pick type as per MTRR precedence.
*/
type = mtrr_type_lookup_variable(start, end, &partial_end,
&repeat, &is_uniform);
/*
* Common path is with repeat = 0.
* However, we can have cases where [start:end] spans across some
* MTRR range. Do repeated lookups for that case here.
* MTRR ranges and/or the default type. Do repeated lookups for
* that case here.
*/
while (repeat) {
prev_type = type;
start = partial_end;
type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
is_uniform = 0;
type = mtrr_type_lookup_variable(start, end, &partial_end,
&repeat, &dummy);
if (check_type_overlap(&prev_type, &type))
return type;
goto out;
}
if (mtrr_tom2 && (start >= (1ULL<<32)) && (end < mtrr_tom2))
type = MTRR_TYPE_WRBACK;
out:
*uniform = is_uniform;
return type;
}
@ -347,7 +408,9 @@ static void __init print_mtrr_state(void)
mtrr_attrib_to_str(mtrr_state.def_type));
if (mtrr_state.have_fixed) {
pr_debug("MTRR fixed ranges %sabled:\n",
mtrr_state.enabled & 1 ? "en" : "dis");
((mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
(mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) ?
"en" : "dis");
print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
for (i = 0; i < 2; ++i)
print_fixed(0x80000 + i * 0x20000, 0x04000,
@ -360,7 +423,7 @@ static void __init print_mtrr_state(void)
print_fixed_last();
}
pr_debug("MTRR variable ranges %sabled:\n",
mtrr_state.enabled & 2 ? "en" : "dis");
mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis");
high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;
for (i = 0; i < num_var_ranges; ++i) {
@ -382,7 +445,7 @@ static void __init print_mtrr_state(void)
}
/* Grab all of the MTRR state for this CPU into *state */
void __init get_mtrr_state(void)
bool __init get_mtrr_state(void)
{
struct mtrr_var_range *vrs;
unsigned long flags;
@ -426,6 +489,8 @@ void __init get_mtrr_state(void)
post_set();
local_irq_restore(flags);
return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
}
/* Some BIOS's are messed up and don't set all MTRRs the same! */

View File

@ -59,6 +59,12 @@
#define MTRR_TO_PHYS_WC_OFFSET 1000
u32 num_var_ranges;
static bool __mtrr_enabled;
static bool mtrr_enabled(void)
{
return __mtrr_enabled;
}
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);
@ -286,7 +292,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
int i, replace, error;
mtrr_type ltype;
if (!mtrr_if)
if (!mtrr_enabled())
return -ENXIO;
error = mtrr_if->validate_add_page(base, size, type);
@ -435,6 +441,8 @@ static int mtrr_check(unsigned long base, unsigned long size)
int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
bool increment)
{
if (!mtrr_enabled())
return -ENODEV;
if (mtrr_check(base, size))
return -EINVAL;
return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
@ -463,8 +471,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
unsigned long lbase, lsize;
int error = -EINVAL;
if (!mtrr_if)
return -ENXIO;
if (!mtrr_enabled())
return -ENODEV;
max = num_var_ranges;
/* No CPU hotplug when we change MTRR entries */
@ -523,6 +531,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
*/
int mtrr_del(int reg, unsigned long base, unsigned long size)
{
if (!mtrr_enabled())
return -ENODEV;
if (mtrr_check(base, size))
return -EINVAL;
return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
@ -538,6 +548,9 @@ EXPORT_SYMBOL(mtrr_del);
* attempts to add a WC MTRR covering size bytes starting at base and
* logs an error if this fails.
*
* The called should provide a power of two size on an equivalent
* power of two boundary.
*
* Drivers must store the return value to pass to mtrr_del_wc_if_needed,
* but drivers should not try to interpret that return value.
*/
@ -545,7 +558,7 @@ int arch_phys_wc_add(unsigned long base, unsigned long size)
{
int ret;
if (pat_enabled)
if (pat_enabled() || !mtrr_enabled())
return 0; /* Success! (We don't need to do anything.) */
ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
@ -577,7 +590,7 @@ void arch_phys_wc_del(int handle)
EXPORT_SYMBOL(arch_phys_wc_del);
/*
* phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value
* arch_phys_wc_index - translates arch_phys_wc_add's return value
* @handle: Return value from arch_phys_wc_add
*
* This will turn the return value from arch_phys_wc_add into an mtrr
@ -587,14 +600,14 @@ EXPORT_SYMBOL(arch_phys_wc_del);
* in printk line. Alas there is an illegitimate use in some ancient
* drm ioctls.
*/
int phys_wc_to_mtrr_index(int handle)
int arch_phys_wc_index(int handle)
{
if (handle < MTRR_TO_PHYS_WC_OFFSET)
return -1;
else
return handle - MTRR_TO_PHYS_WC_OFFSET;
}
EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index);
EXPORT_SYMBOL_GPL(arch_phys_wc_index);
/*
* HACK ALERT!
@ -734,10 +747,12 @@ void __init mtrr_bp_init(void)
}
if (mtrr_if) {
__mtrr_enabled = true;
set_num_var_ranges();
init_table();
if (use_intel()) {
get_mtrr_state();
/* BIOS may override */
__mtrr_enabled = get_mtrr_state();
if (mtrr_cleanup(phys_addr)) {
changed_by_mtrr_cleanup = 1;
@ -745,10 +760,16 @@ void __init mtrr_bp_init(void)
}
}
}
if (!mtrr_enabled())
pr_info("MTRR: Disabled\n");
}
void mtrr_ap_init(void)
{
if (!mtrr_enabled())
return;
if (!use_intel() || mtrr_aps_delayed_init)
return;
/*
@ -774,6 +795,9 @@ void mtrr_save_state(void)
{
int first_cpu;
if (!mtrr_enabled())
return;
get_online_cpus();
first_cpu = cpumask_first(cpu_online_mask);
smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
@ -782,6 +806,8 @@ void mtrr_save_state(void)
void set_mtrr_aps_delayed_init(void)
{
if (!mtrr_enabled())
return;
if (!use_intel())
return;
@ -793,7 +819,7 @@ void set_mtrr_aps_delayed_init(void)
*/
void mtrr_aps_init(void)
{
if (!use_intel())
if (!use_intel() || !mtrr_enabled())
return;
/*
@ -810,7 +836,7 @@ void mtrr_aps_init(void)
void mtrr_bp_restore(void)
{
if (!use_intel())
if (!use_intel() || !mtrr_enabled())
return;
mtrr_if->set_all();
@ -818,7 +844,7 @@ void mtrr_bp_restore(void)
static int __init mtrr_init_finialize(void)
{
if (!mtrr_if)
if (!mtrr_enabled())
return 0;
if (use_intel()) {

View File

@ -51,7 +51,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
void get_mtrr_state(void);
bool get_mtrr_state(void);
extern void set_mtrr_ops(const struct mtrr_ops *ops);

View File

@ -4,7 +4,6 @@
#include <linux/bootmem.h>
#include <linux/export.h>
#include <linux/io.h>
#include <linux/irqdomain.h>
#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/of.h>
@ -17,6 +16,7 @@
#include <linux/of_pci.h>
#include <linux/initrd.h>
#include <asm/irqdomain.h>
#include <asm/hpet.h>
#include <asm/apic.h>
#include <asm/pci_x86.h>
@ -196,38 +196,31 @@ static struct of_ioapic_type of_ioapic_type[] =
},
};
static int ioapic_xlate(struct irq_domain *domain,
struct device_node *controller,
const u32 *intspec, u32 intsize,
irq_hw_number_t *out_hwirq, u32 *out_type)
static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
struct of_phandle_args *irq_data = (void *)arg;
struct of_ioapic_type *it;
u32 line, idx, gsi;
struct irq_alloc_info tmp;
if (WARN_ON(intsize < 2))
if (WARN_ON(irq_data->args_count < 2))
return -EINVAL;
if (irq_data->args[1] >= ARRAY_SIZE(of_ioapic_type))
return -EINVAL;
line = intspec[0];
it = &of_ioapic_type[irq_data->args[1]];
ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->trigger, it->polarity);
tmp.ioapic_id = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain));
tmp.ioapic_pin = irq_data->args[0];
if (intspec[1] >= ARRAY_SIZE(of_ioapic_type))
return -EINVAL;
it = &of_ioapic_type[intspec[1]];
idx = (u32)(long)domain->host_data;
gsi = mp_pin_to_gsi(idx, line);
if (mp_set_gsi_attr(gsi, it->trigger, it->polarity, cpu_to_node(0)))
return -EBUSY;
*out_hwirq = line;
*out_type = it->out_type;
return 0;
return mp_irqdomain_alloc(domain, virq, nr_irqs, &tmp);
}
const struct irq_domain_ops ioapic_irq_domain_ops = {
.map = mp_irqdomain_map,
.unmap = mp_irqdomain_unmap,
.xlate = ioapic_xlate,
static const struct irq_domain_ops ioapic_irq_domain_ops = {
.alloc = dt_irqdomain_alloc,
.free = mp_irqdomain_free,
.activate = mp_irqdomain_activate,
.deactivate = mp_irqdomain_deactivate,
};
static void __init dtb_add_ioapic(struct device_node *dn)

View File

@ -216,7 +216,7 @@ ENTRY(system_call)
GLOBAL(system_call_after_swapgs)
movq %rsp,PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(kernel_stack),%rsp
movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp
/* Construct struct pt_regs on stack */
pushq_cfi $__USER_DS /* pt_regs->ss */
@ -419,26 +419,27 @@ syscall_return:
* a completely clean 64-bit userspace context.
*/
movq RCX(%rsp),%rcx
cmpq %rcx,RIP(%rsp) /* RCX == RIP */
movq RIP(%rsp),%r11
cmpq %rcx,%r11 /* RCX == RIP */
jne opportunistic_sysret_failed
/*
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
* in kernel space. This essentially lets the user take over
* the kernel, since userspace controls RSP. It's not worth
* testing for canonicalness exactly -- this check detects any
* of the 17 high bits set, which is true for non-canonical
* or kernel addresses. (This will pessimize vsyscall=native.
* Big deal.)
* the kernel, since userspace controls RSP.
*
* If virtual addresses ever become wider, this will need
* If width of "canonical tail" ever becomes variable, this will need
* to be updated to remain correct on both old and new CPUs.
*/
.ifne __VIRTUAL_MASK_SHIFT - 47
.error "virtual address width changed -- SYSRET checks need update"
.endif
shr $__VIRTUAL_MASK_SHIFT, %rcx
jnz opportunistic_sysret_failed
/* Change top 16 bits to be the sign-extension of 47th bit */
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
/* If this changed %rcx, it was not canonical */
cmpq %rcx, %r11
jne opportunistic_sysret_failed
cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */
jne opportunistic_sysret_failed
@ -475,8 +476,8 @@ syscall_return:
*/
syscall_return_via_sysret:
CFI_REMEMBER_STATE
/* r11 is already restored (see code above) */
RESTORE_C_REGS_EXCEPT_R11
/* rcx and r11 are already restored (see code above) */
RESTORE_C_REGS_EXCEPT_RCX_R11
movq RSP(%rsp),%rsp
USERGS_SYSRET64
CFI_RESTORE_STATE
@ -533,40 +534,27 @@ GLOBAL(stub_execveat)
CFI_ENDPROC
END(stub_execveat)
#ifdef CONFIG_X86_X32_ABI
#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
.align 8
GLOBAL(stub_x32_execve)
CFI_STARTPROC
DEFAULT_FRAME 0, 8
call compat_sys_execve
jmp return_from_execve
CFI_ENDPROC
END(stub_x32_execve)
.align 8
GLOBAL(stub_x32_execveat)
CFI_STARTPROC
DEFAULT_FRAME 0, 8
call compat_sys_execveat
jmp return_from_execve
CFI_ENDPROC
END(stub_x32_execveat)
#endif
#ifdef CONFIG_IA32_EMULATION
.align 8
GLOBAL(stub32_execve)
CFI_STARTPROC
DEFAULT_FRAME 0, 8
call compat_sys_execve
jmp return_from_execve
CFI_ENDPROC
END(stub32_execve)
END(stub_x32_execve)
.align 8
GLOBAL(stub_x32_execveat)
GLOBAL(stub32_execveat)
CFI_STARTPROC
DEFAULT_FRAME 0, 8
call compat_sys_execveat
jmp return_from_execve
CFI_ENDPROC
END(stub32_execveat)
END(stub_x32_execveat)
#endif
/*
@ -622,7 +610,7 @@ ENTRY(ret_from_fork)
RESTORE_EXTRA_REGS
testl $3,CS(%rsp) # from kernel_thread?
testb $3, CS(%rsp) # from kernel_thread?
/*
* By the time we get here, we have no idea whether our pt_regs,
@ -686,8 +674,8 @@ END(irq_entries_start)
leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */
testl $3, CS-RBP(%rsp)
je 1f
testb $3, CS-RBP(%rsp)
jz 1f
SWAPGS
1:
/*
@ -741,8 +729,8 @@ ret_from_intr:
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET RBP
testl $3,CS(%rsp)
je retint_kernel
testb $3, CS(%rsp)
jz retint_kernel
/* Interrupt came from user space */
GET_THREAD_INFO(%rcx)
@ -928,6 +916,8 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
#ifdef CONFIG_HAVE_KVM
apicinterrupt3 POSTED_INTR_VECTOR \
kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \
kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
@ -989,7 +979,7 @@ ENTRY(\sym)
.if \paranoid
.if \paranoid == 1
CFI_REMEMBER_STATE
testl $3, CS(%rsp) /* If coming from userspace, switch */
testb $3, CS(%rsp) /* If coming from userspace, switch */
jnz 1f /* stacks. */
.endif
call paranoid_entry
@ -1202,17 +1192,17 @@ ENTRY(xen_failsafe_callback)
/*CFI_REL_OFFSET ds,DS*/
CFI_REL_OFFSET r11,8
CFI_REL_OFFSET rcx,0
movw %ds,%cx
movl %ds,%ecx
cmpw %cx,0x10(%rsp)
CFI_REMEMBER_STATE
jne 1f
movw %es,%cx
movl %es,%ecx
cmpw %cx,0x18(%rsp)
jne 1f
movw %fs,%cx
movl %fs,%ecx
cmpw %cx,0x20(%rsp)
jne 1f
movw %gs,%cx
movl %gs,%ecx
cmpw %cx,0x28(%rsp)
jne 1f
/* All segments match their saved values => Category 2 (Bad IRET). */
@ -1330,8 +1320,8 @@ ENTRY(error_entry)
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
xorl %ebx,%ebx
testl $3,CS+8(%rsp)
je error_kernelspace
testb $3, CS+8(%rsp)
jz error_kernelspace
error_swapgs:
SWAPGS
error_sti:
@ -1382,7 +1372,7 @@ ENTRY(error_exit)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
testl %eax,%eax
jne retint_kernel
jnz retint_kernel
LOCKDEP_SYS_EXIT_IRQ
movl TI_flags(%rcx),%edx
movl $_TIF_WORK_MASK,%edi
@ -1627,7 +1617,6 @@ end_repeat_nmi:
je 1f
movq %r12, %cr2
1:
testl %ebx,%ebx /* swapgs needed? */
jnz nmi_restore
nmi_swapgs:

View File

@ -547,7 +547,7 @@ ENTRY(early_idt_handler)
cld
cmpl $2,(%esp) # X86_TRAP_NMI
je is_nmi # Ignore NMI
je .Lis_nmi # Ignore NMI
cmpl $2,%ss:early_recursion_flag
je hlt_loop
@ -600,7 +600,7 @@ ex_entry:
pop %ecx
pop %eax
decl %ss:early_recursion_flag
is_nmi:
.Lis_nmi:
addl $8,%esp /* drop vector number and error code */
iret
ENDPROC(early_idt_handler)

View File

@ -344,7 +344,7 @@ ENTRY(early_idt_handler)
cld
cmpl $2,(%rsp) # X86_TRAP_NMI
je is_nmi # Ignore NMI
je .Lis_nmi # Ignore NMI
cmpl $2,early_recursion_flag(%rip)
jz 1f
@ -409,7 +409,7 @@ ENTRY(early_idt_handler)
popq %rcx
popq %rax
decl early_recursion_flag(%rip)
is_nmi:
.Lis_nmi:
addq $16,%rsp # drop vector number and error code
INTERRUPT_RETURN
ENDPROC(early_idt_handler)

View File

@ -12,6 +12,7 @@
#include <linux/pm.h>
#include <linux/io.h>
#include <asm/irqdomain.h>
#include <asm/fixmap.h>
#include <asm/hpet.h>
#include <asm/time.h>
@ -305,8 +306,6 @@ static void hpet_legacy_clockevent_register(void)
printk(KERN_DEBUG "hpet clockevent registered\n");
}
static int hpet_setup_msi_irq(unsigned int irq);
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt, int timer)
{
@ -357,7 +356,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
hpet_enable_legacy_int();
} else {
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
hpet_setup_msi_irq(hdev->irq);
irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
disable_irq(hdev->irq);
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
@ -423,6 +422,7 @@ static int hpet_legacy_next_event(unsigned long delta,
static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
static struct hpet_dev *hpet_devs;
static struct irq_domain *hpet_domain;
void hpet_msi_unmask(struct irq_data *data)
{
@ -473,31 +473,6 @@ static int hpet_msi_next_event(unsigned long delta,
return hpet_next_event(delta, evt, hdev->num);
}
static int hpet_setup_msi_irq(unsigned int irq)
{
if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) {
irq_free_hwirq(irq);
return -EINVAL;
}
return 0;
}
static int hpet_assign_irq(struct hpet_dev *dev)
{
unsigned int irq = irq_alloc_hwirq(-1);
if (!irq)
return -EINVAL;
irq_set_handler_data(irq, dev);
if (hpet_setup_msi_irq(irq))
return -EINVAL;
dev->irq = irq;
return 0;
}
static irqreturn_t hpet_interrupt_handler(int irq, void *data)
{
struct hpet_dev *dev = (struct hpet_dev *)data;
@ -540,9 +515,6 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
if (!(hdev->flags & HPET_DEV_VALID))
return;
if (hpet_setup_msi_irq(hdev->irq))
return;
hdev->cpu = cpu;
per_cpu(cpu_hpet_dev, cpu) = hdev;
evt->name = hdev->name;
@ -574,7 +546,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
unsigned int id;
unsigned int num_timers;
unsigned int num_timers_used = 0;
int i;
int i, irq;
if (hpet_msi_disable)
return;
@ -587,6 +559,10 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
num_timers++; /* Value read out starts from 0 */
hpet_print_config();
hpet_domain = hpet_create_irq_domain(hpet_blockid);
if (!hpet_domain)
return;
hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
if (!hpet_devs)
return;
@ -601,15 +577,16 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
if (!(cfg & HPET_TN_FSB_CAP))
continue;
irq = hpet_assign_irq(hpet_domain, hdev, hdev->num);
if (irq < 0)
continue;
sprintf(hdev->name, "hpet%d", i);
hdev->num = i;
hdev->irq = irq;
hdev->flags = 0;
if (cfg & HPET_TN_PERIODIC_CAP)
hdev->flags |= HPET_DEV_PERI_CAP;
hdev->num = i;
sprintf(hdev->name, "hpet%d", i);
if (hpet_assign_irq(hdev))
continue;
hdev->flags |= HPET_DEV_FSB_CAP;
hdev->flags |= HPET_DEV_VALID;
num_timers_used++;
@ -709,10 +686,6 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
}
#else
static int hpet_setup_msi_irq(unsigned int irq)
{
return 0;
}
static void hpet_msi_capability_lookup(unsigned int start_timer)
{
return;

View File

@ -329,8 +329,8 @@ static void init_8259A(int auto_eoi)
*/
outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
/* ICW2: 8259A-1 IR0-7 mapped to ISA_IRQ_VECTOR(0) */
outb_pic(ISA_IRQ_VECTOR(0), PIC_MASTER_IMR);
/* 8259A-1 (the master) has a slave on IR2 */
outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);
@ -342,8 +342,8 @@ static void init_8259A(int auto_eoi)
outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
/* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */
outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
/* ICW2: 8259A-2 IR0-7 mapped to ISA_IRQ_VECTOR(8) */
outb_pic(ISA_IRQ_VECTOR(8), PIC_SLAVE_IMR);
/* 8259A-2 is a slave on master's IR2 */
outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
/* (slave's support for AEOI in flat mode is to be investigated) */

View File

@ -22,6 +22,12 @@
#define CREATE_TRACE_POINTS
#include <asm/trace/irq_vectors.h>
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs);
atomic_t irq_err_count;
/* Function pointer for generic interrupt vector handling */
@ -135,6 +141,18 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
#endif
#ifdef CONFIG_HAVE_KVM
seq_printf(p, "%*s: ", prec, "PIN");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
seq_puts(p, " Posted-interrupt notification event\n");
seq_printf(p, "%*s: ", prec, "PIW");
for_each_online_cpu(j)
seq_printf(p, "%10u ",
irq_stats(j)->kvm_posted_intr_wakeup_ipis);
seq_puts(p, " Posted-interrupt wakeup event\n");
#endif
return 0;
}
@ -192,8 +210,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
unsigned vector = ~regs->orig_ax;
unsigned irq;
irq_enter();
exit_idle();
entering_irq();
irq = __this_cpu_read(vector_irq[vector]);
@ -209,7 +226,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
}
}
irq_exit();
exiting_irq();
set_irq_regs(old_regs);
return 1;
@ -237,6 +254,18 @@ __visible void smp_x86_platform_ipi(struct pt_regs *regs)
}
#ifdef CONFIG_HAVE_KVM
static void dummy_handler(void) {}
static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler;
void kvm_set_posted_intr_wakeup_handler(void (*handler)(void))
{
if (handler)
kvm_posted_intr_wakeup_handler = handler;
else
kvm_posted_intr_wakeup_handler = dummy_handler;
}
EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler);
/*
* Handler for POSTED_INTERRUPT_VECTOR.
*/
@ -244,16 +273,23 @@ __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
irq_enter();
exit_idle();
entering_ack_irq();
inc_irq_stat(kvm_posted_intr_ipis);
exiting_irq();
set_irq_regs(old_regs);
}
irq_exit();
/*
* Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
*/
__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
entering_ack_irq();
inc_irq_stat(kvm_posted_intr_wakeup_ipis);
kvm_posted_intr_wakeup_handler();
exiting_irq();
set_irq_regs(old_regs);
}
#endif

View File

@ -21,12 +21,6 @@
#include <asm/apic.h>
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs);
#ifdef CONFIG_DEBUG_STACKOVERFLOW
int sysctl_panic_on_stackoverflow __read_mostly;

View File

@ -20,12 +20,6 @@
#include <asm/idle.h>
#include <asm/apic.h>
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs);
int sysctl_panic_on_stackoverflow;
/*

View File

@ -10,12 +10,6 @@
#include <asm/apic.h>
#include <asm/trace/irq_vectors.h>
static inline void irq_work_entering_irq(void)
{
irq_enter();
ack_APIC_irq();
}
static inline void __smp_irq_work_interrupt(void)
{
inc_irq_stat(apic_irq_work_irqs);
@ -24,14 +18,14 @@ static inline void __smp_irq_work_interrupt(void)
__visible void smp_irq_work_interrupt(struct pt_regs *regs)
{
irq_work_entering_irq();
ipi_entering_ack_irq();
__smp_irq_work_interrupt();
exiting_irq();
}
__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
{
irq_work_entering_irq();
ipi_entering_ack_irq();
trace_irq_work_entry(IRQ_WORK_VECTOR);
__smp_irq_work_interrupt();
trace_irq_work_exit(IRQ_WORK_VECTOR);

View File

@ -86,7 +86,7 @@ void __init init_IRQ(void)
int i;
/*
* On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
* On cpu 0, Assign ISA_IRQ_VECTOR(irq) to IRQ 0..15.
* If these IRQ's are handled by legacy interrupt-controllers like PIC,
* then this configuration will likely be static after the boot. If
* these IRQ's are handled by more mordern controllers like IO-APIC,
@ -94,7 +94,7 @@ void __init init_IRQ(void)
* irq's migrate etc.
*/
for (i = 0; i < nr_legacy_irqs(); i++)
per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = i;
x86_init.irqs.intr_init();
}
@ -144,6 +144,8 @@ static void __init apic_intr_init(void)
#ifdef CONFIG_HAVE_KVM
/* IPI for KVM to deliver posted interrupt */
alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
/* IPI for KVM to deliver interrupt to wake up tasks */
alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi);
#endif
/* IPI vectors for APIC spurious and error interrupts */

View File

@ -19,8 +19,8 @@
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/pci.h>
#include <linux/irqdomain.h>
#include <asm/irqdomain.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
#include <asm/pgalloc.h>
@ -113,11 +113,6 @@ static void __init MP_bus_info(struct mpc_bus *m)
pr_warn("Unknown bustype %s - ignoring\n", str);
}
static struct irq_domain_ops mp_ioapic_irqdomain_ops = {
.map = mp_irqdomain_map,
.unmap = mp_irqdomain_unmap,
};
static void __init MP_ioapic_info(struct mpc_ioapic *m)
{
struct ioapic_domain_cfg cfg = {

View File

@ -154,7 +154,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
#ifdef CONFIG_X86_32
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
#endif
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
/* If operation requires a jmp, then jmp */
@ -371,7 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.load_sp0 = native_load_sp0,
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
#if defined(CONFIG_X86_32)
.irq_enable_sysexit = native_irq_enable_sysexit,
#endif
#ifdef CONFIG_X86_64

View File

@ -49,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable);
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_cpu_ops, usergs_sysret32);
PATCH_SITE(pv_cpu_ops, usergs_sysret64);
PATCH_SITE(pv_cpu_ops, swapgs);

View File

@ -302,13 +302,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
arch_end_context_switch(next_p);
/*
* Reload esp0, kernel_stack, and current_top_of_stack. This changes
* Reload esp0 and cpu_current_top_of_stack. This changes
* current_thread_info().
*/
load_sp0(tss, next);
this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);
this_cpu_write(cpu_current_top_of_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);

View File

@ -409,9 +409,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* Reload esp0 and ss1. This changes current_thread_info(). */
load_sp0(tss, next);
this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) + THREAD_SIZE);
/*
* Now maybe reload the debug registers and handle I/O bitmaps
*/

View File

@ -1222,8 +1222,7 @@ void __init setup_arch(char **cmdline_p)
init_cpu_to_node();
init_apic_mappings();
if (x86_io_apic_ops.init)
x86_io_apic_ops.init();
io_apic_init_mappings();
kvm_guest_init();

View File

@ -170,8 +170,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
asmlinkage __visible void smp_reboot_interrupt(void)
{
ack_APIC_irq();
irq_enter();
ipi_entering_ack_irq();
stop_this_cpu(NULL);
irq_exit();
}
@ -265,12 +264,6 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs)
*/
}
static inline void smp_entering_irq(void)
{
ack_APIC_irq();
irq_enter();
}
__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
{
/*
@ -279,7 +272,7 @@ __visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
* scheduler_ipi(). This is OK, since those functions are allowed
* to nest.
*/
smp_entering_irq();
ipi_entering_ack_irq();
trace_reschedule_entry(RESCHEDULE_VECTOR);
__smp_reschedule_interrupt();
trace_reschedule_exit(RESCHEDULE_VECTOR);
@ -297,14 +290,14 @@ static inline void __smp_call_function_interrupt(void)
__visible void smp_call_function_interrupt(struct pt_regs *regs)
{
smp_entering_irq();
ipi_entering_ack_irq();
__smp_call_function_interrupt();
exiting_irq();
}
__visible void smp_trace_call_function_interrupt(struct pt_regs *regs)
{
smp_entering_irq();
ipi_entering_ack_irq();
trace_call_function_entry(CALL_FUNCTION_VECTOR);
__smp_call_function_interrupt();
trace_call_function_exit(CALL_FUNCTION_VECTOR);
@ -319,14 +312,14 @@ static inline void __smp_call_function_single_interrupt(void)
__visible void smp_call_function_single_interrupt(struct pt_regs *regs)
{
smp_entering_irq();
ipi_entering_ack_irq();
__smp_call_function_single_interrupt();
exiting_irq();
}
__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
{
smp_entering_irq();
ipi_entering_ack_irq();
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
__smp_call_function_single_interrupt();
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);

View File

@ -513,6 +513,40 @@ void __inquire_remote_apic(int apicid)
}
}
/*
* The Multiprocessor Specification 1.4 (1997) example code suggests
* that there should be a 10ms delay between the BSP asserting INIT
* and de-asserting INIT, when starting a remote processor.
* But that slows boot and resume on modern processors, which include
* many cores and don't require that delay.
*
* Cmdline "init_cpu_udelay=" is available to over-ride this delay.
* Modern processor families are quirked to remove the delay entirely.
*/
#define UDELAY_10MS_DEFAULT 10000
static unsigned int init_udelay = UDELAY_10MS_DEFAULT;
static int __init cpu_init_udelay(char *str)
{
get_option(&str, &init_udelay);
return 0;
}
early_param("cpu_init_udelay", cpu_init_udelay);
static void __init smp_quirk_init_udelay(void)
{
/* if cmdline changed it from default, leave it alone */
if (init_udelay != UDELAY_10MS_DEFAULT)
return;
/* if modern processor, use no delay */
if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF)))
init_udelay = 0;
}
/*
* Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
@ -555,7 +589,7 @@ wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
static int
wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
unsigned long send_status = 0, accept_status = 0;
int maxlvt, num_starts, j;
maxlvt = lapic_get_maxlvt();
@ -583,7 +617,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
mdelay(10);
udelay(init_udelay);
pr_debug("Deasserting INIT\n");
@ -651,6 +685,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
accept_status = (apic_read(APIC_ESR) & 0xEF);
@ -792,8 +827,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
#endif
per_cpu(kernel_stack, cpu) =
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
}
/*
@ -1176,6 +1209,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
uv_system_init();
set_mtrr_aps_delayed_init();
smp_quirk_init_udelay();
}
void arch_enable_nonboot_cpus_begin(void)

View File

@ -997,8 +997,8 @@ void __init trap_init(void)
#endif
#ifdef CONFIG_X86_32
set_system_trap_gate(SYSCALL_VECTOR, &system_call);
set_bit(SYSCALL_VECTOR, used_vectors);
set_system_trap_gate(IA32_SYSCALL_VECTOR, &system_call);
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
#endif
/*

View File

@ -111,11 +111,9 @@ EXPORT_SYMBOL_GPL(x86_platform);
#if defined(CONFIG_PCI_MSI)
struct x86_msi_ops x86_msi = {
.setup_msi_irqs = native_setup_msi_irqs,
.compose_msi_msg = native_compose_msi_msg,
.teardown_msi_irq = native_teardown_msi_irq,
.teardown_msi_irqs = default_teardown_msi_irqs,
.restore_msi_irqs = default_restore_msi_irqs,
.setup_hpet_msi = default_setup_hpet_msi,
};
/* MSI arch specific hooks */
@ -141,13 +139,6 @@ void arch_restore_msi_irqs(struct pci_dev *dev)
#endif
struct x86_io_apic_ops x86_io_apic_ops = {
.init = native_io_apic_init_mappings,
.read = native_io_apic_read,
.write = native_io_apic_write,
.modify = native_io_apic_modify,
.disable = native_disable_io_apic,
.print_entries = native_io_apic_print_entries,
.set_affinity = native_ioapic_set_affinity,
.setup_entry = native_setup_ioapic_entry,
.eoi_ioapic_pin = native_eoi_ioapic_pin,
};

View File

@ -90,7 +90,7 @@ struct lguest_data lguest_data = {
.noirq_iret = (u32)lguest_noirq_iret,
.kernel_address = PAGE_OFFSET,
.blocked_interrupts = { 1 }, /* Block timer interrupts */
.syscall_vec = SYSCALL_VECTOR,
.syscall_vec = IA32_SYSCALL_VECTOR,
};
/*G:037
@ -866,7 +866,7 @@ static void __init lguest_init_IRQ(void)
for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) {
/* Some systems map "vectors" to interrupts weirdly. Not us! */
__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
if (i != SYSCALL_VECTOR)
if (i != IA32_SYSCALL_VECTOR)
set_intr_gate(i, irq_entries_start +
8 * (i - FIRST_EXTERNAL_VECTOR));
}

View File

@ -40,6 +40,6 @@ else
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
lib-y += clear_page_64.o copy_page_64.o
lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o copy_user_nocache_64.o
lib-y += copy_user_64.o
lib-y += cmpxchg16b_emu.o
endif

View File

@ -16,30 +16,6 @@
#include <asm/asm.h>
#include <asm/smap.h>
.macro ALIGN_DESTINATION
/* check for bad alignment of destination */
movl %edi,%ecx
andl $7,%ecx
jz 102f /* already aligned */
subl $8,%ecx
negl %ecx
subl %ecx,%edx
100: movb (%rsi),%al
101: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz 100b
102:
.section .fixup,"ax"
103: addl %ecx,%edx /* ecx is zerorest also */
jmp copy_user_handle_tail
.previous
_ASM_EXTABLE(100b,103b)
_ASM_EXTABLE(101b,103b)
.endm
/* Standard copy_to_user with segment limit checking */
ENTRY(_copy_to_user)
CFI_STARTPROC
@ -266,3 +242,95 @@ ENTRY(copy_user_enhanced_fast_string)
_ASM_EXTABLE(1b,12b)
CFI_ENDPROC
ENDPROC(copy_user_enhanced_fast_string)
/*
* copy_user_nocache - Uncached memory copy with exception handling
* This will force destination/source out of cache for more performance.
*/
ENTRY(__copy_user_nocache)
CFI_STARTPROC
ASM_STAC
cmpl $8,%edx
jb 20f /* less then 8 bytes, go to byte copy loop */
ALIGN_DESTINATION
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
jz 17f
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
4: movq 3*8(%rsi),%r11
5: movnti %r8,(%rdi)
6: movnti %r9,1*8(%rdi)
7: movnti %r10,2*8(%rdi)
8: movnti %r11,3*8(%rdi)
9: movq 4*8(%rsi),%r8
10: movq 5*8(%rsi),%r9
11: movq 6*8(%rsi),%r10
12: movq 7*8(%rsi),%r11
13: movnti %r8,4*8(%rdi)
14: movnti %r9,5*8(%rdi)
15: movnti %r10,6*8(%rdi)
16: movnti %r11,7*8(%rdi)
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
decl %ecx
jnz 1b
17: movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
jz 20f
18: movq (%rsi),%r8
19: movnti %r8,(%rdi)
leaq 8(%rsi),%rsi
leaq 8(%rdi),%rdi
decl %ecx
jnz 18b
20: andl %edx,%edx
jz 23f
movl %edx,%ecx
21: movb (%rsi),%al
22: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz 21b
23: xorl %eax,%eax
ASM_CLAC
sfence
ret
.section .fixup,"ax"
30: shll $6,%ecx
addl %ecx,%edx
jmp 60f
40: lea (%rdx,%rcx,8),%rdx
jmp 60f
50: movl %ecx,%edx
60: sfence
jmp copy_user_handle_tail
.previous
_ASM_EXTABLE(1b,30b)
_ASM_EXTABLE(2b,30b)
_ASM_EXTABLE(3b,30b)
_ASM_EXTABLE(4b,30b)
_ASM_EXTABLE(5b,30b)
_ASM_EXTABLE(6b,30b)
_ASM_EXTABLE(7b,30b)
_ASM_EXTABLE(8b,30b)
_ASM_EXTABLE(9b,30b)
_ASM_EXTABLE(10b,30b)
_ASM_EXTABLE(11b,30b)
_ASM_EXTABLE(12b,30b)
_ASM_EXTABLE(13b,30b)
_ASM_EXTABLE(14b,30b)
_ASM_EXTABLE(15b,30b)
_ASM_EXTABLE(16b,30b)
_ASM_EXTABLE(18b,40b)
_ASM_EXTABLE(19b,40b)
_ASM_EXTABLE(21b,50b)
_ASM_EXTABLE(22b,50b)
CFI_ENDPROC
ENDPROC(__copy_user_nocache)

View File

@ -1,136 +0,0 @@
/*
* Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
* Copyright 2002 Andi Kleen, SuSE Labs.
* Subject to the GNU Public License v2.
*
* Functions to copy from and to user space.
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#define FIX_ALIGNMENT 1
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/asm.h>
#include <asm/smap.h>
.macro ALIGN_DESTINATION
#ifdef FIX_ALIGNMENT
/* check for bad alignment of destination */
movl %edi,%ecx
andl $7,%ecx
jz 102f /* already aligned */
subl $8,%ecx
negl %ecx
subl %ecx,%edx
100: movb (%rsi),%al
101: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz 100b
102:
.section .fixup,"ax"
103: addl %ecx,%edx /* ecx is zerorest also */
jmp copy_user_handle_tail
.previous
_ASM_EXTABLE(100b,103b)
_ASM_EXTABLE(101b,103b)
#endif
.endm
/*
* copy_user_nocache - Uncached memory copy with exception handling
* This will force destination/source out of cache for more performance.
*/
ENTRY(__copy_user_nocache)
CFI_STARTPROC
ASM_STAC
cmpl $8,%edx
jb 20f /* less then 8 bytes, go to byte copy loop */
ALIGN_DESTINATION
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
jz 17f
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
4: movq 3*8(%rsi),%r11
5: movnti %r8,(%rdi)
6: movnti %r9,1*8(%rdi)
7: movnti %r10,2*8(%rdi)
8: movnti %r11,3*8(%rdi)
9: movq 4*8(%rsi),%r8
10: movq 5*8(%rsi),%r9
11: movq 6*8(%rsi),%r10
12: movq 7*8(%rsi),%r11
13: movnti %r8,4*8(%rdi)
14: movnti %r9,5*8(%rdi)
15: movnti %r10,6*8(%rdi)
16: movnti %r11,7*8(%rdi)
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
decl %ecx
jnz 1b
17: movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
jz 20f
18: movq (%rsi),%r8
19: movnti %r8,(%rdi)
leaq 8(%rsi),%rsi
leaq 8(%rdi),%rdi
decl %ecx
jnz 18b
20: andl %edx,%edx
jz 23f
movl %edx,%ecx
21: movb (%rsi),%al
22: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz 21b
23: xorl %eax,%eax
ASM_CLAC
sfence
ret
.section .fixup,"ax"
30: shll $6,%ecx
addl %ecx,%edx
jmp 60f
40: lea (%rdx,%rcx,8),%rdx
jmp 60f
50: movl %ecx,%edx
60: sfence
jmp copy_user_handle_tail
.previous
_ASM_EXTABLE(1b,30b)
_ASM_EXTABLE(2b,30b)
_ASM_EXTABLE(3b,30b)
_ASM_EXTABLE(4b,30b)
_ASM_EXTABLE(5b,30b)
_ASM_EXTABLE(6b,30b)
_ASM_EXTABLE(7b,30b)
_ASM_EXTABLE(8b,30b)
_ASM_EXTABLE(9b,30b)
_ASM_EXTABLE(10b,30b)
_ASM_EXTABLE(11b,30b)
_ASM_EXTABLE(12b,30b)
_ASM_EXTABLE(13b,30b)
_ASM_EXTABLE(14b,30b)
_ASM_EXTABLE(15b,30b)
_ASM_EXTABLE(16b,30b)
_ASM_EXTABLE(18b,40b)
_ASM_EXTABLE(19b,40b)
_ASM_EXTABLE(21b,50b)
_ASM_EXTABLE(22b,50b)
CFI_ENDPROC
ENDPROC(__copy_user_nocache)

View File

@ -82,7 +82,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
* MTRR is UC or WC. UC_MINUS gets the real intention, of the
* user, which is "WC if the MTRR is WC, UC if you can't do that."
*/
if (!pat_enabled && pgprot_val(prot) ==
if (!pat_enabled() && pgprot_val(prot) ==
(__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_WC)))
prot = __pgprot(__PAGE_KERNEL |
cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));

View File

@ -234,10 +234,11 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
{
/*
* Ideally, this should be:
* pat_enabled ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
* pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
*
* Till we fix all X drivers to use ioremap_wc(), we will use
* UC MINUS.
* UC MINUS. Drivers that are certain they need or can already
* be converted over to strong UC can use ioremap_uc().
*/
enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
@ -246,6 +247,39 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
}
EXPORT_SYMBOL(ioremap_nocache);
/**
* ioremap_uc - map bus memory into CPU space as strongly uncachable
* @phys_addr: bus address of the memory
* @size: size of the resource to map
*
* ioremap_uc performs a platform specific sequence of operations to
* make bus memory CPU accessible via the readb/readw/readl/writeb/
* writew/writel functions and the other mmio helpers. The returned
* address is not guaranteed to be usable directly as a virtual
* address.
*
* This version of ioremap ensures that the memory is marked with a strong
* preference as completely uncachable on the CPU when possible. For non-PAT
* systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
* systems this will set the PAT entry for the pages as strong UC. This call
* will honor existing caching rules from things like the PCI bus. Note that
* there are other caches and buffers on many busses. In particular driver
* authors should read up on PCI writes.
*
* It's useful if some control registers are in such an area and
* write combining or read caching is not desirable:
*
* Must be freed with iounmap.
*/
void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
{
enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
return __ioremap_caller(phys_addr, size, pcm,
__builtin_return_address(0));
}
EXPORT_SYMBOL_GPL(ioremap_uc);
/**
* ioremap_wc - map memory into CPU space write combined
* @phys_addr: bus address of the memory
@ -258,7 +292,7 @@ EXPORT_SYMBOL(ioremap_nocache);
*/
void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
{
if (pat_enabled)
if (pat_enabled())
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
__builtin_return_address(0));
else
@ -331,7 +365,7 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
int arch_ioremap_pud_supported(void)
int __init arch_ioremap_pud_supported(void)
{
#ifdef CONFIG_X86_64
return cpu_has_gbpages;
@ -340,7 +374,7 @@ int arch_ioremap_pud_supported(void)
#endif
}
int arch_ioremap_pmd_supported(void)
int __init arch_ioremap_pmd_supported(void)
{
return cpu_has_pse;
}

View File

@ -129,16 +129,15 @@ within(unsigned long addr, unsigned long start, unsigned long end)
*/
void clflush_cache_range(void *vaddr, unsigned int size)
{
void *vend = vaddr + size - 1;
unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
void *vend = vaddr + size;
void *p;
mb();
for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size)
clflushopt(vaddr);
/*
* Flush any possible final partial cacheline:
*/
clflushopt(vend);
for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
p < vend; p += boot_cpu_data.x86_clflush_size)
clflushopt(p);
mb();
}
@ -418,13 +417,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr)
phys_addr_t phys_addr;
unsigned long offset;
enum pg_level level;
unsigned long psize;
unsigned long pmask;
pte_t *pte;
pte = lookup_address(virt_addr, &level);
BUG_ON(!pte);
psize = page_level_size(level);
pmask = page_level_mask(level);
offset = virt_addr & ~pmask;
phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
@ -1468,6 +1465,9 @@ int _set_memory_uc(unsigned long addr, int numpages)
{
/*
* for now UC MINUS. see comments in ioremap_nocache()
* If you really need strong UC use ioremap_uc(), but note
* that you cannot override IO areas with set_memory_*() as
* these helpers cannot work with IO memory.
*/
return change_page_attr_set(&addr, numpages,
cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
@ -1571,7 +1571,7 @@ int set_memory_wc(unsigned long addr, int numpages)
{
int ret;
if (!pat_enabled)
if (!pat_enabled())
return set_memory_uc(addr, numpages);
ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,

View File

@ -33,13 +33,15 @@
#include "pat_internal.h"
#include "mm_internal.h"
#ifdef CONFIG_X86_PAT
int __read_mostly pat_enabled = 1;
#undef pr_fmt
#define pr_fmt(fmt) "" fmt
static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
static inline void pat_disable(const char *reason)
{
pat_enabled = 0;
printk(KERN_INFO "%s\n", reason);
__pat_enabled = 0;
pr_info("x86/PAT: %s\n", reason);
}
static int __init nopat(char *str)
@ -48,13 +50,12 @@ static int __init nopat(char *str)
return 0;
}
early_param("nopat", nopat);
#else
static inline void pat_disable(const char *reason)
{
(void)reason;
}
#endif
bool pat_enabled(void)
{
return !!__pat_enabled;
}
EXPORT_SYMBOL_GPL(pat_enabled);
int pat_debug_enable;
@ -188,7 +189,7 @@ void pat_init_cache_modes(void)
pat_msg + 4 * i);
update_cache_mode_entry(i, cache);
}
pr_info("PAT configuration [0-7]: %s\n", pat_msg);
pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
}
#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
@ -198,7 +199,7 @@ void pat_init(void)
u64 pat;
bool boot_cpu = !boot_pat_state;
if (!pat_enabled)
if (!pat_enabled())
return;
if (!cpu_has_pat) {
@ -211,8 +212,7 @@ void pat_init(void)
* switched to PAT on the boot CPU. We have no way to
* undo PAT.
*/
printk(KERN_ERR "PAT enabled, "
"but not supported by secondary CPU\n");
pr_err("x86/PAT: PAT enabled, but not supported by secondary CPU\n");
BUG();
}
}
@ -267,9 +267,9 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end,
* request is for WB.
*/
if (req_type == _PAGE_CACHE_MODE_WB) {
u8 mtrr_type;
u8 mtrr_type, uniform;
mtrr_type = mtrr_type_lookup(start, end);
mtrr_type = mtrr_type_lookup(start, end, &uniform);
if (mtrr_type != MTRR_TYPE_WRBACK)
return _PAGE_CACHE_MODE_UC_MINUS;
@ -347,7 +347,7 @@ static int reserve_ram_pages_type(u64 start, u64 end,
page = pfn_to_page(pfn);
type = get_page_memtype(page);
if (type != -1) {
pr_info("reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
start, end - 1, type, req_type);
if (new_type)
*new_type = type;
@ -400,7 +400,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
BUG_ON(start >= end); /* end is exclusive */
if (!pat_enabled) {
if (!pat_enabled()) {
/* This is identical to page table setting without PAT */
if (new_type) {
if (req_type == _PAGE_CACHE_MODE_WC)
@ -451,9 +451,9 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
err = rbt_memtype_check_insert(new, new_type);
if (err) {
printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
start, end - 1,
cattr_name(new->type), cattr_name(req_type));
pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
start, end - 1,
cattr_name(new->type), cattr_name(req_type));
kfree(new);
spin_unlock(&memtype_lock);
@ -475,7 +475,7 @@ int free_memtype(u64 start, u64 end)
int is_range_ram;
struct memtype *entry;
if (!pat_enabled)
if (!pat_enabled())
return 0;
/* Low ISA region is always mapped WB. No need to track */
@ -497,8 +497,8 @@ int free_memtype(u64 start, u64 end)
spin_unlock(&memtype_lock);
if (!entry) {
printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid, start, end - 1);
pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid, start, end - 1);
return -EINVAL;
}
@ -623,13 +623,13 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
u64 to = from + size;
u64 cursor = from;
if (!pat_enabled)
if (!pat_enabled())
return 1;
while (cursor < to) {
if (!devmem_is_allowed(pfn)) {
printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
current->comm, from, to - 1);
pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
current->comm, from, to - 1);
return 0;
}
cursor += PAGE_SIZE;
@ -659,7 +659,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
* caching for the high addresses through the KEN pin, but
* we maintain the tradition of paranoia in this code.
*/
if (!pat_enabled &&
if (!pat_enabled() &&
!(boot_cpu_has(X86_FEATURE_MTRR) ||
boot_cpu_has(X86_FEATURE_K6_MTRR) ||
boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
@ -698,8 +698,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
size;
if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
"for [mem %#010Lx-%#010Lx]\n",
pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid,
cattr_name(pcm),
base, (unsigned long long)(base + size-1));
@ -729,12 +728,12 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
* the type requested matches the type of first page in the range.
*/
if (is_ram) {
if (!pat_enabled)
if (!pat_enabled())
return 0;
pcm = lookup_memtype(paddr);
if (want_pcm != pcm) {
printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
pr_warn("x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_pcm),
(unsigned long long)paddr,
@ -755,13 +754,12 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
if (strict_prot ||
!is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
free_memtype(paddr, paddr + size);
printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
" for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_pcm),
(unsigned long long)paddr,
(unsigned long long)(paddr + size - 1),
cattr_name(pcm));
pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_pcm),
(unsigned long long)paddr,
(unsigned long long)(paddr + size - 1),
cattr_name(pcm));
return -EINVAL;
}
/*
@ -844,7 +842,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return ret;
}
if (!pat_enabled)
if (!pat_enabled())
return 0;
/*
@ -872,7 +870,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
{
enum page_cache_mode pcm;
if (!pat_enabled)
if (!pat_enabled())
return 0;
/* Set prot based on lookup */
@ -913,7 +911,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
pgprot_t pgprot_writecombine(pgprot_t prot)
{
if (pat_enabled)
if (pat_enabled())
return __pgprot(pgprot_val(prot) |
cachemode2protval(_PAGE_CACHE_MODE_WC));
else
@ -996,7 +994,7 @@ static const struct file_operations memtype_fops = {
static int __init pat_memtype_list_init(void)
{
if (pat_enabled) {
if (pat_enabled()) {
debugfs_create_file("pat_memtype_list", S_IRUSR,
arch_debugfs_dir, NULL, &memtype_fops);
}

View File

@ -4,7 +4,7 @@
extern int pat_debug_enable;
#define dprintk(fmt, arg...) \
do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0)
do { if (pat_debug_enable) pr_info("x86/PAT: " fmt, ##arg); } while (0)
struct memtype {
u64 start;

View File

@ -160,9 +160,9 @@ success:
return 0;
failure:
printk(KERN_INFO "%s:%d conflicting memory types "
"%Lx-%Lx %s<->%s\n", current->comm, current->pid, start,
end, cattr_name(found_type), cattr_name(match->type));
pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
current->comm, current->pid, start, end,
cattr_name(found_type), cattr_name(match->type));
return -EBUSY;
}

View File

@ -563,16 +563,31 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
}
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
/**
* pud_set_huge - setup kernel PUD mapping
*
* MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
* function sets up a huge page only if any of the following conditions are met:
*
* - MTRRs are disabled, or
*
* - MTRRs are enabled and the range is completely covered by a single MTRR, or
*
* - MTRRs are enabled and the corresponding MTRR memory type is WB, which
* has no effect on the requested PAT memory type.
*
* Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger
* page mapping attempt fails.
*
* Returns 1 on success and 0 on failure.
*/
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
u8 mtrr;
u8 mtrr, uniform;
/*
* Do not use a huge page when the range is covered by non-WB type
* of MTRRs.
*/
mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE);
if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF))
mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
(mtrr != MTRR_TYPE_WRBACK))
return 0;
prot = pgprot_4k_2_large(prot);
@ -584,17 +599,24 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
return 1;
}
/**
* pmd_set_huge - setup kernel PMD mapping
*
* See text over pud_set_huge() above.
*
* Returns 1 on success and 0 on failure.
*/
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{
u8 mtrr;
u8 mtrr, uniform;
/*
* Do not use a huge page when the range is covered by non-WB type
* of MTRRs.
*/
mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE);
if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF))
mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
(mtrr != MTRR_TYPE_WRBACK)) {
pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
__func__, addr, addr + PMD_SIZE);
return 0;
}
prot = pgprot_4k_2_large(prot);
@ -605,6 +627,11 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
return 1;
}
/**
* pud_clear_huge - clear kernel PUD mapping when it is set
*
* Returns 1 on success and 0 on failure (no PUD map is found).
*/
int pud_clear_huge(pud_t *pud)
{
if (pud_large(*pud)) {
@ -615,6 +642,11 @@ int pud_clear_huge(pud_t *pud)
return 0;
}
/**
* pmd_clear_huge - clear kernel PMD mapping when it is set
*
* Returns 1 on success and 0 on failure (no PMD map is found).
*/
int pmd_clear_huge(pmd_t *pmd)
{
if (pmd_large(*pmd)) {

View File

@ -429,12 +429,12 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
* Caller can followup with UC MINUS request and add a WC mtrr if there
* is a free mtrr slot.
*/
if (!pat_enabled && write_combine)
if (!pat_enabled() && write_combine)
return -EINVAL;
if (pat_enabled && write_combine)
if (pat_enabled() && write_combine)
prot |= cachemode2protval(_PAGE_CACHE_MODE_WC);
else if (pat_enabled || boot_cpu_data.x86 > 3)
else if (pat_enabled() || boot_cpu_data.x86 > 3)
/*
* ioremap() and ioremap_nocache() defaults to UC MINUS for now.
* To avoid attribute conflicts, request UC MINUS here

View File

@ -208,6 +208,7 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
static int intel_mid_pci_irq_enable(struct pci_dev *dev)
{
struct irq_alloc_info info;
int polarity;
if (dev->irq_managed && dev->irq > 0)
@ -217,14 +218,13 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
polarity = 0; /* active high */
else
polarity = 1; /* active low */
ioapic_set_alloc_attr(&info, dev_to_node(&dev->dev), 1, polarity);
/*
* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
* IOAPIC RTE entries, so we just enable RTE for the device.
*/
if (mp_set_gsi_attr(dev->irq, 1, polarity, dev_to_node(&dev->dev)))
return -EBUSY;
if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0)
if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC, &info) < 0)
return -EBUSY;
dev->irq_managed = 1;

View File

@ -146,19 +146,20 @@ static void __init pirq_peer_trick(void)
/*
* Code for querying and setting of IRQ routes on various interrupt routers.
* PIC Edge/Level Control Registers (ELCR) 0x4d0 & 0x4d1.
*/
void eisa_set_level_irq(unsigned int irq)
void elcr_set_level_irq(unsigned int irq)
{
unsigned char mask = 1 << (irq & 7);
unsigned int port = 0x4d0 + (irq >> 3);
unsigned char val;
static u16 eisa_irq_mask;
static u16 elcr_irq_mask;
if (irq >= 16 || (1 << irq) & eisa_irq_mask)
if (irq >= 16 || (1 << irq) & elcr_irq_mask)
return;
eisa_irq_mask |= (1 << irq);
elcr_irq_mask |= (1 << irq);
printk(KERN_DEBUG "PCI: setting IRQ %u as level-triggered\n", irq);
val = inb(port);
if (!(val & mask)) {
@ -965,11 +966,11 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
} else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) {
msg = "found";
eisa_set_level_irq(irq);
elcr_set_level_irq(irq);
} else if (newirq && r->set &&
(dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
if (r->set(pirq_router_dev, dev, pirq, newirq)) {
eisa_set_level_irq(newirq);
elcr_set_level_irq(newirq);
msg = "assigned";
irq = newirq;
}

View File

@ -1,4 +1,5 @@
# Platform specific code goes here
obj-y += atom/
obj-y += ce4100/
obj-y += efi/
obj-y += geode/

View File

@ -0,0 +1 @@
obj-$(CONFIG_PUNIT_ATOM_DEBUG) += punit_atom_debug.o

View File

@ -0,0 +1,183 @@
/*
* Intel SOC Punit device state debug driver
* Punit controls power management for North Complex devices (Graphics
* blocks, Image Signal Processing, video processing, display, DSP etc.)
*
* Copyright (c) 2015, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/io.h>
#include <asm/cpu_device_id.h>
#include <asm/iosf_mbi.h>
/* Side band Interface port */
#define PUNIT_PORT 0x04
/* Power gate status reg */
#define PWRGT_STATUS 0x61
/* Subsystem config/status Video processor */
#define VED_SS_PM0 0x32
/* Subsystem config/status ISP (Image Signal Processor) */
#define ISP_SS_PM0 0x39
/* Subsystem config/status Input/output controller */
#define MIO_SS_PM 0x3B
/* Shift bits for getting status for video, isp and i/o */
#define SSS_SHIFT 24
/* Shift bits for getting status for graphics rendering */
#define RENDER_POS 0
/* Shift bits for getting status for media control */
#define MEDIA_POS 2
/* Shift bits for getting status for Valley View/Baytrail display */
#define VLV_DISPLAY_POS 6
/* Subsystem config/status display for Cherry Trail SOC */
#define CHT_DSP_SSS 0x36
/* Shift bits for getting status for display */
#define CHT_DSP_SSS_POS 16
struct punit_device {
char *name;
int reg;
int sss_pos;
};
static const struct punit_device punit_device_byt[] = {
{ "GFX RENDER", PWRGT_STATUS, RENDER_POS },
{ "GFX MEDIA", PWRGT_STATUS, MEDIA_POS },
{ "DISPLAY", PWRGT_STATUS, VLV_DISPLAY_POS },
{ "VED", VED_SS_PM0, SSS_SHIFT },
{ "ISP", ISP_SS_PM0, SSS_SHIFT },
{ "MIO", MIO_SS_PM, SSS_SHIFT },
{ NULL }
};
static const struct punit_device punit_device_cht[] = {
{ "GFX RENDER", PWRGT_STATUS, RENDER_POS },
{ "GFX MEDIA", PWRGT_STATUS, MEDIA_POS },
{ "DISPLAY", CHT_DSP_SSS, CHT_DSP_SSS_POS },
{ "VED", VED_SS_PM0, SSS_SHIFT },
{ "ISP", ISP_SS_PM0, SSS_SHIFT },
{ "MIO", MIO_SS_PM, SSS_SHIFT },
{ NULL }
};
static const char * const dstates[] = {"D0", "D0i1", "D0i2", "D0i3"};
static int punit_dev_state_show(struct seq_file *seq_file, void *unused)
{
u32 punit_pwr_status;
struct punit_device *punit_devp = seq_file->private;
int index;
int status;
seq_puts(seq_file, "\n\nPUNIT NORTH COMPLEX DEVICES :\n");
while (punit_devp->name) {
status = iosf_mbi_read(PUNIT_PORT, BT_MBI_PMC_READ,
punit_devp->reg,
&punit_pwr_status);
if (status) {
seq_printf(seq_file, "%9s : Read Failed\n",
punit_devp->name);
} else {
index = (punit_pwr_status >> punit_devp->sss_pos) & 3;
seq_printf(seq_file, "%9s : %s\n", punit_devp->name,
dstates[index]);
}
punit_devp++;
}
return 0;
}
static int punit_dev_state_open(struct inode *inode, struct file *file)
{
return single_open(file, punit_dev_state_show, inode->i_private);
}
static const struct file_operations punit_dev_state_ops = {
.open = punit_dev_state_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static struct dentry *punit_dbg_file;
static int punit_dbgfs_register(struct punit_device *punit_device)
{
static struct dentry *dev_state;
punit_dbg_file = debugfs_create_dir("punit_atom", NULL);
if (!punit_dbg_file)
return -ENXIO;
dev_state = debugfs_create_file("dev_power_state", S_IFREG | S_IRUGO,
punit_dbg_file, punit_device,
&punit_dev_state_ops);
if (!dev_state) {
pr_err("punit_dev_state register failed\n");
debugfs_remove(punit_dbg_file);
return -ENXIO;
}
return 0;
}
static void punit_dbgfs_unregister(void)
{
debugfs_remove_recursive(punit_dbg_file);
}
#define ICPU(model, drv_data) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT,\
(kernel_ulong_t)&drv_data }
static const struct x86_cpu_id intel_punit_cpu_ids[] = {
ICPU(55, punit_device_byt), /* Valleyview, Bay Trail */
ICPU(76, punit_device_cht), /* Braswell, Cherry Trail */
{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids);
static int __init punit_atom_debug_init(void)
{
const struct x86_cpu_id *id;
int ret;
id = x86_match_cpu(intel_punit_cpu_ids);
if (!id)
return -ENODEV;
ret = punit_dbgfs_register((struct punit_device *)id->driver_data);
if (ret < 0)
return ret;
return 0;
}
static void __exit punit_atom_debug_exit(void)
{
punit_dbgfs_unregister();
}
module_init(punit_atom_debug_init);
module_exit(punit_atom_debug_exit);
MODULE_AUTHOR("Kumar P, Mahesh <mahesh.kumar.p@intel.com>");
MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
MODULE_DESCRIPTION("Driver for Punit devices states debugging");
MODULE_LICENSE("GPL v2");

View File

@ -27,6 +27,7 @@ static struct platform_device wdt_dev = {
static int tangier_probe(struct platform_device *pdev)
{
int gsi;
struct irq_alloc_info info;
struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
if (!pdata)
@ -34,8 +35,8 @@ static int tangier_probe(struct platform_device *pdev)
/* IOAPIC builds identity mapping between GSI and IRQ on MID */
gsi = pdata->irq;
if (mp_set_gsi_attr(gsi, 1, 0, cpu_to_node(0)) ||
mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC) <= 0) {
ioapic_set_alloc_attr(&info, cpu_to_node(0), 1, 0);
if (mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info) <= 0) {
dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n",
gsi);
return -EINVAL;

View File

@ -81,26 +81,34 @@ static unsigned long __init intel_mid_calibrate_tsc(void)
return 0;
}
static void __init intel_mid_setup_bp_timer(void)
{
apbt_time_init();
setup_boot_APIC_clock();
}
static void __init intel_mid_time_init(void)
{
sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
switch (intel_mid_timer_options) {
case INTEL_MID_TIMER_APBT_ONLY:
break;
case INTEL_MID_TIMER_LAPIC_APBT:
x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
/* Use apbt and local apic */
x86_init.timers.setup_percpu_clockev = intel_mid_setup_bp_timer;
x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
break;
return;
default:
if (!boot_cpu_has(X86_FEATURE_ARAT))
break;
/* Lapic only, no apbt */
x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
return;
}
/* we need at least one APB timer */
pre_init_apic_IRQ0();
apbt_time_init();
x86_init.timers.setup_percpu_clockev = apbt_time_init;
}
static void intel_mid_arch_setup(void)

View File

@ -95,18 +95,16 @@ int __init sfi_parse_mtmr(struct sfi_table_header *table)
pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz, irq = %d\n",
totallen, (u32)pentry->phys_addr,
pentry->freq_hz, pentry->irq);
if (!pentry->irq)
continue;
mp_irq.type = MP_INTSRC;
mp_irq.irqtype = mp_INT;
/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
mp_irq.irqflag = 5;
mp_irq.srcbus = MP_BUS_ISA;
mp_irq.srcbusirq = pentry->irq; /* IRQ */
mp_irq.dstapic = MP_APIC_ALL;
mp_irq.dstirq = pentry->irq;
mp_save_irq(&mp_irq);
mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC);
mp_irq.type = MP_INTSRC;
mp_irq.irqtype = mp_INT;
/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
mp_irq.irqflag = 5;
mp_irq.srcbus = MP_BUS_ISA;
mp_irq.srcbusirq = pentry->irq; /* IRQ */
mp_irq.dstapic = MP_APIC_ALL;
mp_irq.dstirq = pentry->irq;
mp_save_irq(&mp_irq);
mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC, NULL);
}
return 0;
@ -177,7 +175,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
mp_irq.dstapic = MP_APIC_ALL;
mp_irq.dstirq = pentry->irq;
mp_save_irq(&mp_irq);
mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC);
mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC, NULL);
}
return 0;
}
@ -436,6 +434,7 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
struct devs_id *dev = NULL;
int num, i, ret;
int polarity;
struct irq_alloc_info info;
sb = (struct sfi_table_simple *)table;
num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
@ -469,9 +468,8 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
polarity = 1;
}
ret = mp_set_gsi_attr(irq, 1, polarity, NUMA_NO_NODE);
if (ret == 0)
ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC);
ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 1, polarity);
ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC, &info);
WARN_ON(ret < 0);
}

View File

@ -25,8 +25,8 @@
#include <linux/init.h>
#include <linux/sfi.h>
#include <linux/io.h>
#include <linux/irqdomain.h>
#include <asm/irqdomain.h>
#include <asm/io_apic.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
@ -71,9 +71,6 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table)
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
static struct irq_domain_ops sfi_ioapic_irqdomain_ops = {
.map = mp_irqdomain_map,
};
static int __init sfi_parse_ioapic(struct sfi_table_header *table)
{
@ -82,7 +79,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table)
int i, num;
struct ioapic_domain_cfg cfg = {
.type = IOAPIC_DOMAIN_STRICT,
.ops = &sfi_ioapic_irqdomain_ops,
.ops = &mp_ioapic_irqdomain_ops,
};
sb = (struct sfi_table_simple *)table;

View File

@ -13,22 +13,37 @@
#include <linux/slab.h>
#include <linux/irq.h>
#include <asm/irqdomain.h>
#include <asm/apic.h>
#include <asm/uv/uv_irq.h>
#include <asm/uv/uv_hub.h>
/* MMR offset and pnode of hub sourcing interrupts for a given irq */
struct uv_irq_2_mmr_pnode{
struct rb_node list;
struct uv_irq_2_mmr_pnode {
unsigned long offset;
int pnode;
int irq;
};
static DEFINE_SPINLOCK(uv_irq_lock);
static struct rb_root uv_irq_root;
static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info)
{
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
sizeof(unsigned long));
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->vector = cfg->vector;
entry->delivery_mode = apic->irq_delivery_mode;
entry->dest_mode = apic->irq_dest_mode;
entry->polarity = 0;
entry->trigger = 0;
entry->mask = 0;
entry->dest = cfg->dest_apicid;
uv_write_global_mmr64(info->pnode, info->offset, mmr_value);
}
static void uv_noop(struct irq_data *data) { }
@ -37,6 +52,23 @@ static void uv_ack_apic(struct irq_data *data)
ack_APIC_irq();
}
static int
uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
{
struct irq_data *parent = data->parent_data;
struct irq_cfg *cfg = irqd_cfg(data);
int ret;
ret = parent->chip->irq_set_affinity(parent, mask, force);
if (ret >= 0) {
uv_program_mmr(cfg, data->chip_data);
send_cleanup_vector(cfg);
}
return ret;
}
static struct irq_chip uv_irq_chip = {
.name = "UV-CORE",
.irq_mask = uv_noop,
@ -45,189 +77,99 @@ static struct irq_chip uv_irq_chip = {
.irq_set_affinity = uv_set_irq_affinity,
};
/*
* Add offset and pnode information of the hub sourcing interrupts to the
* rb tree for a specific irq.
*/
static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
static int uv_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
struct rb_node **link = &uv_irq_root.rb_node;
struct rb_node *parent = NULL;
struct uv_irq_2_mmr_pnode *n;
struct uv_irq_2_mmr_pnode *e;
unsigned long irqflags;
struct uv_irq_2_mmr_pnode *chip_data;
struct irq_alloc_info *info = arg;
struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
int ret;
n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
uv_blade_to_memory_nid(blade));
if (!n)
if (nr_irqs > 1 || !info || info->type != X86_IRQ_ALLOC_TYPE_UV)
return -EINVAL;
chip_data = kmalloc_node(sizeof(*chip_data), GFP_KERNEL,
irq_data->node);
if (!chip_data)
return -ENOMEM;
n->irq = irq;
n->offset = offset;
n->pnode = uv_blade_to_pnode(blade);
spin_lock_irqsave(&uv_irq_lock, irqflags);
/* Find the right place in the rbtree: */
while (*link) {
parent = *link;
e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
if (unlikely(irq == e->irq)) {
/* irq entry exists */
e->pnode = uv_blade_to_pnode(blade);
e->offset = offset;
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
kfree(n);
return 0;
}
if (irq < e->irq)
link = &(*link)->rb_left;
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
if (ret >= 0) {
if (info->uv_limit == UV_AFFINITY_CPU)
irq_set_status_flags(virq, IRQ_NO_BALANCING);
else
link = &(*link)->rb_right;
irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
chip_data->pnode = uv_blade_to_pnode(info->uv_blade);
chip_data->offset = info->uv_offset;
irq_domain_set_info(domain, virq, virq, &uv_irq_chip, chip_data,
handle_percpu_irq, NULL, info->uv_name);
} else {
kfree(chip_data);
}
/* Insert the node into the rbtree. */
rb_link_node(&n->list, parent, link);
rb_insert_color(&n->list, &uv_irq_root);
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
return 0;
return ret;
}
/* Retrieve offset and pnode information from the rb tree for a specific irq */
int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs)
{
struct uv_irq_2_mmr_pnode *e;
struct rb_node *n;
unsigned long irqflags;
struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
spin_lock_irqsave(&uv_irq_lock, irqflags);
n = uv_irq_root.rb_node;
while (n) {
e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
if (e->irq == irq) {
*offset = e->offset;
*pnode = e->pnode;
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
return 0;
}
if (irq < e->irq)
n = n->rb_left;
else
n = n->rb_right;
}
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
return -1;
BUG_ON(nr_irqs != 1);
kfree(irq_data->chip_data);
irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT);
irq_clear_status_flags(virq, IRQ_NO_BALANCING);
irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
/*
* Re-target the irq to the specified CPU and enable the specified MMR located
* on the specified blade to allow the sending of MSIs to the specified CPU.
*/
static int
arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
unsigned long mmr_offset, int limit)
static void uv_domain_activate(struct irq_domain *domain,
struct irq_data *irq_data)
{
const struct cpumask *eligible_cpu = cpumask_of(cpu);
struct irq_cfg *cfg = irq_cfg(irq);
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
int mmr_pnode, err;
unsigned int dest;
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
sizeof(unsigned long));
err = assign_irq_vector(irq, cfg, eligible_cpu);
if (err != 0)
return err;
err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest);
if (err != 0)
return err;
if (limit == UV_AFFINITY_CPU)
irq_set_status_flags(irq, IRQ_NO_BALANCING);
else
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
irq_name);
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->vector = cfg->vector;
entry->delivery_mode = apic->irq_delivery_mode;
entry->dest_mode = apic->irq_dest_mode;
entry->polarity = 0;
entry->trigger = 0;
entry->mask = 0;
entry->dest = dest;
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
return irq;
uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
}
/*
* Disable the specified MMR located on the specified blade so that MSIs are
* longer allowed to be sent.
*/
static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
static void uv_domain_deactivate(struct irq_domain *domain,
struct irq_data *irq_data)
{
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
sizeof(unsigned long));
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->mask = 1;
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
}
static int
uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
static const struct irq_domain_ops uv_domain_ops = {
.alloc = uv_domain_alloc,
.free = uv_domain_free,
.activate = uv_domain_activate,
.deactivate = uv_domain_deactivate,
};
static struct irq_domain *uv_get_irq_domain(void)
{
struct irq_cfg *cfg = irqd_cfg(data);
unsigned int dest;
unsigned long mmr_value, mmr_offset;
struct uv_IO_APIC_route_entry *entry;
int mmr_pnode;
static struct irq_domain *uv_domain;
static DEFINE_MUTEX(uv_lock);
if (apic_set_affinity(data, mask, &dest))
return -1;
mutex_lock(&uv_lock);
if (uv_domain == NULL) {
uv_domain = irq_domain_add_tree(NULL, &uv_domain_ops, NULL);
if (uv_domain)
uv_domain->parent = x86_vector_domain;
}
mutex_unlock(&uv_lock);
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->vector = cfg->vector;
entry->delivery_mode = apic->irq_delivery_mode;
entry->dest_mode = apic->irq_dest_mode;
entry->polarity = 0;
entry->trigger = 0;
entry->mask = 0;
entry->dest = dest;
/* Get previously stored MMR and pnode of hub sourcing interrupts */
if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode))
return -1;
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
return IRQ_SET_MASK_OK_NOCOPY;
return uv_domain;
}
/*
@ -238,19 +180,21 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
unsigned long mmr_offset, int limit)
{
int ret, irq = irq_alloc_hwirq(uv_blade_to_memory_nid(mmr_blade));
struct irq_alloc_info info;
struct irq_domain *domain = uv_get_irq_domain();
if (!irq)
return -EBUSY;
if (!domain)
return -ENOMEM;
ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
limit);
if (ret == irq)
uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
else
irq_free_hwirq(irq);
init_irq_alloc_info(&info, cpumask_of(cpu));
info.type = X86_IRQ_ALLOC_TYPE_UV;
info.uv_limit = limit;
info.uv_blade = mmr_blade;
info.uv_offset = mmr_offset;
info.uv_name = irq_name;
return ret;
return irq_domain_alloc_irqs(domain, 1,
uv_blade_to_memory_nid(mmr_blade), &info);
}
EXPORT_SYMBOL_GPL(uv_setup_irq);
@ -263,26 +207,6 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
*/
void uv_teardown_irq(unsigned int irq)
{
struct uv_irq_2_mmr_pnode *e;
struct rb_node *n;
unsigned long irqflags;
spin_lock_irqsave(&uv_irq_lock, irqflags);
n = uv_irq_root.rb_node;
while (n) {
e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
if (e->irq == irq) {
arch_disable_uv_irq(e->pnode, e->offset);
rb_erase(n, &uv_irq_root);
kfree(e);
break;
}
if (irq < e->irq)
n = n->rb_left;
else
n = n->rb_right;
}
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
irq_free_hwirq(irq);
irq_domain_free_irqs(irq, 1);
}
EXPORT_SYMBOL_GPL(uv_teardown_irq);

View File

@ -78,9 +78,9 @@ ENTRY(restore_image)
/* code below has been relocated to a safe page */
ENTRY(core_restore_code)
loop:
.Lloop:
testq %rdx, %rdx
jz done
jz .Ldone
/* get addresses from the pbe and copy the page */
movq pbe_address(%rdx), %rsi
@ -91,8 +91,8 @@ loop:
/* progress to the next pbe */
movq pbe_next(%rdx), %rdx
jmp loop
done:
jmp .Lloop
.Ldone:
/* jump to the restore_registers address from the image header */
jmpq *%rax
/*

View File

@ -1181,10 +1181,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.read_tscp = native_read_tscp,
.iret = xen_iret,
.irq_enable_sysexit = xen_sysexit,
#ifdef CONFIG_X86_64
.usergs_sysret32 = xen_sysret32,
.usergs_sysret64 = xen_sysret64,
#else
.irq_enable_sysexit = xen_sysexit,
#endif
.load_tr_desc = paravirt_nop,

View File

@ -15,6 +15,8 @@
#include <asm/percpu.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <xen/interface/xen.h>
@ -47,29 +49,13 @@ ENTRY(xen_iret)
ENDPATCH(xen_iret)
RELOC(xen_iret, 1b+1)
/*
* sysexit is not used for 64-bit processes, so it's only ever used to
* return to 32-bit compat userspace.
*/
ENTRY(xen_sysexit)
pushq $__USER32_DS
pushq %rcx
pushq $X86_EFLAGS_IF
pushq $__USER32_CS
pushq %rdx
pushq $0
1: jmp hypercall_iret
ENDPATCH(xen_sysexit)
RELOC(xen_sysexit, 1b+1)
ENTRY(xen_sysret64)
/*
* We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(kernel_stack), %rsp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq $__USER_DS
pushq PER_CPU_VAR(rsp_scratch)
@ -88,7 +74,7 @@ ENTRY(xen_sysret32)
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(kernel_stack), %rsp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq $__USER32_DS
pushq PER_CPU_VAR(rsp_scratch)

View File

@ -134,7 +134,9 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
#ifdef CONFIG_X86_32
__visible void xen_sysexit(void);
#endif
__visible void xen_sysret32(void);
__visible void xen_sysret64(void);
__visible void xen_adjust_exception_frame(void);

View File

@ -36,9 +36,6 @@
#include <linux/pci.h>
#include <linux/export.h>
#ifdef CONFIG_X86
#include <asm/mtrr.h>
#endif
static int drm_version(struct drm_device *dev, void *data,
struct drm_file *file_priv);
@ -197,16 +194,7 @@ static int drm_getmap(struct drm_device *dev, void *data,
map->type = r_list->map->type;
map->flags = r_list->map->flags;
map->handle = (void *)(unsigned long) r_list->user_token;
#ifdef CONFIG_X86
/*
* There appears to be exactly one user of the mtrr index: dritest.
* It's easy enough to keep it working on non-PAT systems.
*/
map->mtrr = phys_wc_to_mtrr_index(r_list->map->mtrr);
#else
map->mtrr = -1;
#endif
map->mtrr = arch_phys_wc_index(r_list->map->mtrr);
mutex_unlock(&dev->struct_mutex);

Some files were not shown because too many files have changed in this diff Show More