From 9bf05098ce34e68a9e15f09ad6cdfea4ed64057a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 23 May 2011 10:23:32 +0200 Subject: [PATCH 01/29] [S390] cio: fix unreg race in set_online path In ccw_device_set_online we basically start path verification and wait for the device to reach a final state. If it turns out that the device has no useable path we schedule the deregistration of the device (which is still in an non-final state) and wake up the waiting process. The deregistration process will set a final state, but if the wake up happens to be prior to this, the device will hang forever in ccw_device_set_online. To fix this just set the final NOT_OPER state prior to the scheduled deregistration of the device. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device_fsm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 6084103672b5..e087a8685f95 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -408,9 +408,10 @@ ccw_device_done(struct ccw_device *cdev, int state) CIO_MSG_EVENT(0, "Disconnected device %04x on subchannel " "%04x\n", cdev->private->dev_id.devno, sch->schid.sch_no); - if (ccw_device_notify(cdev, CIO_NO_PATH) != NOTIFY_OK) + if (ccw_device_notify(cdev, CIO_NO_PATH) != NOTIFY_OK) { + cdev->private->state = DEV_STATE_NOT_OPER; ccw_device_sched_todo(cdev, CDEV_TODO_UNREG); - else + } else ccw_device_set_disconnected(cdev); cdev->private->flags.donotify = 0; break; From 043d07084b5347a26eab0a07aa13a4a929ad9e71 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 May 2011 10:24:23 +0200 Subject: [PATCH 02/29] [S390] Remove data execution protection The noexec support on s390 does not rely on a bit in the page table entry but utilizes the secondary space mode to distinguish between memory accesses for instructions vs. data. The noexec code relies on the assumption that the cpu will always use the secondary space page table for data accesses while it is running in the secondary space mode. Up to the z9-109 class machines this has been the case. Unfortunately this is not true anymore with z10 and later machines. The load-relative-long instructions lrl, lgrl and lgfrl access the memory operand using the same addressing-space mode that has been used to fetch the instruction. This breaks the noexec mode for all user space binaries compiled with march=z10 or later. The only option is to remove the current noexec support. Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 11 ----- arch/s390/include/asm/elf.h | 12 ----- arch/s390/include/asm/hugetlb.h | 17 ++----- arch/s390/include/asm/lowcore.h | 4 +- arch/s390/include/asm/mmu.h | 3 -- arch/s390/include/asm/mmu_context.h | 6 +-- arch/s390/include/asm/pgalloc.h | 28 ++--------- arch/s390/include/asm/pgtable.h | 75 +++++------------------------ arch/s390/include/asm/tlbflush.h | 11 ++--- arch/s390/kernel/asm-offsets.c | 3 -- arch/s390/kernel/setup.c | 31 +----------- arch/s390/mm/fault.c | 39 --------------- arch/s390/mm/hugetlbpage.c | 10 ---- arch/s390/mm/pgtable.c | 68 ++++---------------------- 14 files changed, 35 insertions(+), 283 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4a7f14079e03..ff2d2371b2e9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -230,17 +230,6 @@ config SYSVIPC_COMPAT config AUDIT_ARCH def_bool y -config S390_EXEC_PROTECT - def_bool y - prompt "Data execute protection" - help - This option allows to enable a buffer overflow protection for user - space programs and it also selects the addressing mode option above. - The kernel parameter noexec=on will enable this feature and also - switch the addressing modes, default is disabled. Enabling this (via - kernel parameter) on machines earlier than IBM System z9 this will - reduce system performance. - comment "Code generation options" choice diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 10c029cfcc7d..64b61bf72e93 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -196,18 +196,6 @@ do { \ } while (0) #endif /* __s390x__ */ -/* - * An executable for which elf_read_implies_exec() returns TRUE will - * have the READ_IMPLIES_EXEC personality flag set automatically. - */ -#define elf_read_implies_exec(ex, executable_stack) \ -({ \ - if (current->mm->context.noexec && \ - executable_stack != EXSTACK_DISABLE_X) \ - disable_noexec(current->mm, current); \ - current->mm->context.noexec == 0; \ -}) - #define STACK_RND_MASK 0x7ffUL #define ARCH_DLINFO \ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index b56403c2df28..799ed0f1643d 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -111,21 +111,10 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm, { pmd_t *pmdp = (pmd_t *) ptep; - if (!MACHINE_HAS_IDTE) { - __pmd_csp(pmdp); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); - __pmd_csp(pmdp); - } - return; - } - - __pmd_idte(address, pmdp); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); + if (MACHINE_HAS_IDTE) __pmd_idte(address, pmdp); - } - return; + else + __pmd_csp(pmdp); } #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 65e172f8209d..b8624d53c379 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -124,7 +124,7 @@ struct _lowcore { /* Address space pointer. */ __u32 kernel_asce; /* 0x02ac */ __u32 user_asce; /* 0x02b0 */ - __u32 user_exec_asce; /* 0x02b4 */ + __u8 pad_0x02b4[0x02b8-0x02b4]; /* 0x02b4 */ /* SMP info area */ __u32 cpu_nr; /* 0x02b8 */ @@ -255,7 +255,7 @@ struct _lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0310 */ __u64 user_asce; /* 0x0318 */ - __u64 user_exec_asce; /* 0x0320 */ + __u8 pad_0x0320[0x0328-0x0320]; /* 0x0320 */ /* SMP info area */ __u32 cpu_nr; /* 0x0328 */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 78522cdefdd4..818e8298a6bd 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -5,19 +5,16 @@ typedef struct { atomic_t attach_count; unsigned int flush_mm; spinlock_t list_lock; - struct list_head crst_list; struct list_head pgtable_list; unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - int noexec; int has_pgste; /* The mmu context has extended page tables */ int alloc_pgste; /* cloned contexts will have extended page tables */ } mm_context_t; #define INIT_MM_CONTEXT(name) \ .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \ - .context.crst_list = LIST_HEAD_INIT(name.context.crst_list), \ .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 8c277caa8d3a..5682f160ff82 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -35,11 +35,9 @@ static inline int init_new_context(struct task_struct *tsk, * and if has_pgste is set, it will create extended page * tables. */ - mm->context.noexec = 0; mm->context.has_pgste = 1; mm->context.alloc_pgste = 1; } else { - mm->context.noexec = (user_mode == SECONDARY_SPACE_MODE); mm->context.has_pgste = 0; mm->context.alloc_pgste = 0; } @@ -63,10 +61,8 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd); if (user_mode != HOME_SPACE_MODE) { /* Load primary space page table origin. */ - pgd = mm->context.noexec ? get_shadow_table(pgd) : pgd; - S390_lowcore.user_exec_asce = mm->context.asce_bits | __pa(pgd); asm volatile(LCTL_OPCODE" 1,1,%0\n" - : : "m" (S390_lowcore.user_exec_asce) ); + : : "m" (S390_lowcore.user_asce) ); } else /* Load home space page table origin. */ asm volatile(LCTL_OPCODE" 13,13,%0" diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 082eb4e50e8b..739ff9ec1395 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -19,14 +19,13 @@ #define check_pgt_cache() do {} while (0) -unsigned long *crst_table_alloc(struct mm_struct *, int); +unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); void crst_table_free_rcu(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mm_struct *, unsigned long *); -void disable_noexec(struct mm_struct *, struct task_struct *); static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { @@ -50,9 +49,6 @@ static inline void clear_table(unsigned long *s, unsigned long val, size_t n) static inline void crst_table_init(unsigned long *crst, unsigned long entry) { clear_table(crst, entry, sizeof(unsigned long)*2048); - crst = get_shadow_table(crst); - if (crst) - clear_table(crst, entry, sizeof(unsigned long)*2048); } #ifndef __s390x__ @@ -90,7 +86,7 @@ void crst_table_downgrade(struct mm_struct *, unsigned long limit); static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { - unsigned long *table = crst_table_alloc(mm, mm->context.noexec); + unsigned long *table = crst_table_alloc(mm); if (table) crst_table_init(table, _REGION3_ENTRY_EMPTY); return (pud_t *) table; @@ -99,7 +95,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *table = crst_table_alloc(mm, mm->context.noexec); + unsigned long *table = crst_table_alloc(mm); if (table) crst_table_init(table, _SEGMENT_ENTRY_EMPTY); return (pmd_t *) table; @@ -115,11 +111,6 @@ static inline void pgd_populate_kernel(struct mm_struct *mm, static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { pgd_populate_kernel(mm, pgd, pud); - if (mm->context.noexec) { - pgd = get_shadow_table(pgd); - pud = get_shadow_table(pud); - pgd_populate_kernel(mm, pgd, pud); - } } static inline void pud_populate_kernel(struct mm_struct *mm, @@ -131,11 +122,6 @@ static inline void pud_populate_kernel(struct mm_struct *mm, static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { pud_populate_kernel(mm, pud, pmd); - if (mm->context.noexec) { - pud = get_shadow_table(pud); - pmd = get_shadow_table(pmd); - pud_populate_kernel(mm, pud, pmd); - } } #endif /* __s390x__ */ @@ -143,10 +129,8 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { spin_lock_init(&mm->context.list_lock); - INIT_LIST_HEAD(&mm->context.crst_list); INIT_LIST_HEAD(&mm->context.pgtable_list); - return (pgd_t *) - crst_table_alloc(mm, user_mode == SECONDARY_SPACE_MODE); + return (pgd_t *) crst_table_alloc(mm); } #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) @@ -160,10 +144,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { pmd_populate_kernel(mm, pmd, pte); - if (mm->context.noexec) { - pmd = get_shadow_table(pmd); - pmd_populate_kernel(mm, pmd, pte + PTRS_PER_PTE); - } } #define pmd_pgtable(pmd) \ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 02ace3491c51..763620ec7925 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -256,8 +256,6 @@ extern unsigned long VMALLOC_START; #define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ #define _PAGE_TYPE_RO 0x200 #define _PAGE_TYPE_RW 0x000 -#define _PAGE_TYPE_EX_RO 0x202 -#define _PAGE_TYPE_EX_RW 0x002 /* * Only four types for huge pages, using the invalid bit and protection bit @@ -287,8 +285,6 @@ extern unsigned long VMALLOC_START; * _PAGE_TYPE_FILE 11?1 -> 11?1 * _PAGE_TYPE_RO 0100 -> 1100 * _PAGE_TYPE_RW 0000 -> 1000 - * _PAGE_TYPE_EX_RO 0110 -> 1110 - * _PAGE_TYPE_EX_RW 0010 -> 1010 * * pte_none is true for bits combinations 1000, 1010, 1100, 1110 * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 @@ -387,55 +383,33 @@ extern unsigned long VMALLOC_START; #define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) #define PAGE_RO __pgprot(_PAGE_TYPE_RO) #define PAGE_RW __pgprot(_PAGE_TYPE_RW) -#define PAGE_EX_RO __pgprot(_PAGE_TYPE_EX_RO) -#define PAGE_EX_RW __pgprot(_PAGE_TYPE_EX_RW) #define PAGE_KERNEL PAGE_RW #define PAGE_COPY PAGE_RO /* - * Dependent on the EXEC_PROTECT option s390 can do execute protection. - * Write permission always implies read permission. In theory with a - * primary/secondary page table execute only can be implemented but - * it would cost an additional bit in the pte to distinguish all the - * different pte types. To avoid that execute permission currently - * implies read permission as well. + * On s390 the page table entry has an invalid bit and a read-only bit. + * Read permission implies execute permission and write permission + * implies read permission. */ /*xwr*/ #define __P000 PAGE_NONE #define __P001 PAGE_RO #define __P010 PAGE_RO #define __P011 PAGE_RO -#define __P100 PAGE_EX_RO -#define __P101 PAGE_EX_RO -#define __P110 PAGE_EX_RO -#define __P111 PAGE_EX_RO +#define __P100 PAGE_RO +#define __P101 PAGE_RO +#define __P110 PAGE_RO +#define __P111 PAGE_RO #define __S000 PAGE_NONE #define __S001 PAGE_RO #define __S010 PAGE_RW #define __S011 PAGE_RW -#define __S100 PAGE_EX_RO -#define __S101 PAGE_EX_RO -#define __S110 PAGE_EX_RW -#define __S111 PAGE_EX_RW - -#ifndef __s390x__ -# define PxD_SHADOW_SHIFT 1 -#else /* __s390x__ */ -# define PxD_SHADOW_SHIFT 2 -#endif /* __s390x__ */ - -static inline void *get_shadow_table(void *table) -{ - unsigned long addr, offset; - struct page *page; - - addr = (unsigned long) table; - offset = addr & ((PAGE_SIZE << PxD_SHADOW_SHIFT) - 1); - page = virt_to_page((void *)(addr ^ offset)); - return (void *)(addr_t)(page->index ? (page->index | offset) : 0UL); -} +#define __S100 PAGE_RO +#define __S101 PAGE_RO +#define __S110 PAGE_RW +#define __S111 PAGE_RW /* * Certain architectures need to do special things when PTEs @@ -446,14 +420,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { *ptep = entry; - if (mm->context.noexec) { - if (!(pte_val(entry) & _PAGE_INVALID) && - (pte_val(entry) & _PAGE_SWX)) - pte_val(entry) |= _PAGE_RO; - else - pte_val(entry) = _PAGE_TYPE_EMPTY; - ptep[PTRS_PER_PTE] = entry; - } } /* @@ -662,11 +628,7 @@ static inline void pgd_clear_kernel(pgd_t * pgd) static inline void pgd_clear(pgd_t * pgd) { - pgd_t *shadow = get_shadow_table(pgd); - pgd_clear_kernel(pgd); - if (shadow) - pgd_clear_kernel(shadow); } static inline void pud_clear_kernel(pud_t *pud) @@ -677,13 +639,8 @@ static inline void pud_clear_kernel(pud_t *pud) static inline void pud_clear(pud_t *pud) { - pud_t *shadow = get_shadow_table(pud); - pud_clear_kernel(pud); - if (shadow) - pud_clear_kernel(shadow); } - #endif /* __s390x__ */ static inline void pmd_clear_kernel(pmd_t * pmdp) @@ -693,18 +650,12 @@ static inline void pmd_clear_kernel(pmd_t * pmdp) static inline void pmd_clear(pmd_t *pmd) { - pmd_t *shadow = get_shadow_table(pmd); - pmd_clear_kernel(pmd); - if (shadow) - pmd_clear_kernel(shadow); } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_val(*ptep) = _PAGE_TYPE_EMPTY; - if (mm->context.noexec) - pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY; } /* @@ -903,10 +854,6 @@ static inline void ptep_invalidate(struct mm_struct *mm, } __ptep_ipte(address, ptep); pte_val(*ptep) = _PAGE_TYPE_EMPTY; - if (mm->context.noexec) { - __ptep_ipte(address, ptep + PTRS_PER_PTE); - pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY; - } } /* diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 29d5d6d4becc..4fdcefc1a98d 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -80,16 +80,11 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) * on all cpus instead of doing a local flush if the mm * only ran on the local cpu. */ - if (MACHINE_HAS_IDTE) { - if (mm->context.noexec) - __tlb_flush_idte((unsigned long) - get_shadow_table(mm->pgd) | - mm->context.asce_bits); + if (MACHINE_HAS_IDTE) __tlb_flush_idte((unsigned long) mm->pgd | mm->context.asce_bits); - return; - } - __tlb_flush_full(mm); + else + __tlb_flush_full(mm); } static inline void __tlb_flush_mm_cond(struct mm_struct * mm) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index fe03c140002a..ef4555611013 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -128,9 +128,6 @@ int main(void) DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); - DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce)); - DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); - DEFINE(__LC_USER_EXEC_ASCE, offsetof(struct _lowcore, user_exec_asce)); DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f5434d1ecb31..0c35dee10b00 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -305,8 +305,7 @@ static int set_amode_and_uaccess(unsigned long user_amode, */ static int __init early_parse_switch_amode(char *p) { - if (user_mode != SECONDARY_SPACE_MODE) - user_mode = PRIMARY_SPACE_MODE; + user_mode = PRIMARY_SPACE_MODE; return 0; } early_param("switch_amode", early_parse_switch_amode); @@ -315,10 +314,6 @@ static int __init early_parse_user_mode(char *p) { if (p && strcmp(p, "primary") == 0) user_mode = PRIMARY_SPACE_MODE; -#ifdef CONFIG_S390_EXEC_PROTECT - else if (p && strcmp(p, "secondary") == 0) - user_mode = SECONDARY_SPACE_MODE; -#endif else if (!p || strcmp(p, "home") == 0) user_mode = HOME_SPACE_MODE; else @@ -327,31 +322,9 @@ static int __init early_parse_user_mode(char *p) } early_param("user_mode", early_parse_user_mode); -#ifdef CONFIG_S390_EXEC_PROTECT -/* - * Enable execute protection? - */ -static int __init early_parse_noexec(char *p) -{ - if (!strncmp(p, "off", 3)) - return 0; - user_mode = SECONDARY_SPACE_MODE; - return 0; -} -early_param("noexec", early_parse_noexec); -#endif /* CONFIG_S390_EXEC_PROTECT */ - static void setup_addressing_mode(void) { - if (user_mode == SECONDARY_SPACE_MODE) { - if (set_amode_and_uaccess(PSW_ASC_SECONDARY, - PSW32_ASC_SECONDARY)) - pr_info("Execute protection active, " - "mvcos available\n"); - else - pr_info("Execute protection active, " - "mvcos not available\n"); - } else if (user_mode == PRIMARY_SPACE_MODE) { + if (user_mode == PRIMARY_SPACE_MODE) { if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY)) pr_info("Address spaces switched, " "mvcos available\n"); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ab988135e5c6..177745c520ca 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -225,33 +225,6 @@ static noinline void do_sigbus(struct pt_regs *regs, long int_code, force_sig_info(SIGBUS, &si, tsk); } -#ifdef CONFIG_S390_EXEC_PROTECT -static noinline int signal_return(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) -{ - u16 instruction; - int rc; - - rc = __get_user(instruction, (u16 __user *) regs->psw.addr); - - if (!rc && instruction == 0x0a77) { - clear_tsk_thread_flag(current, TIF_PER_TRAP); - if (is_compat_task()) - sys32_sigreturn(); - else - sys_sigreturn(); - } else if (!rc && instruction == 0x0aad) { - clear_tsk_thread_flag(current, TIF_PER_TRAP); - if (is_compat_task()) - sys32_rt_sigreturn(); - else - sys_rt_sigreturn(); - } else - do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); - return 0; -} -#endif /* CONFIG_S390_EXEC_PROTECT */ - static noinline void do_fault_error(struct pt_regs *regs, long int_code, unsigned long trans_exc_code, int fault) { @@ -259,13 +232,6 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, switch (fault) { case VM_FAULT_BADACCESS: -#ifdef CONFIG_S390_EXEC_PROTECT - if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && - (trans_exc_code & 3) == 0) { - signal_return(regs, int_code, trans_exc_code); - break; - } -#endif /* CONFIG_S390_EXEC_PROTECT */ case VM_FAULT_BADMAP: /* Bad memory access. Check if it is kernel or user space. */ if (regs->psw.mask & PSW_MASK_PSTATE) { @@ -414,11 +380,6 @@ void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code, int access, fault; access = VM_READ | VM_EXEC | VM_WRITE; -#ifdef CONFIG_S390_EXEC_PROTECT - if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && - (trans_exc_code & 3) == 0) - access = VM_EXEC; -#endif fault = do_exception(regs, access, trans_exc_code); if (unlikely(fault)) do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 639cd21f2218..a4d856db9154 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -13,7 +13,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *pteptr, pte_t pteval) { pmd_t *pmdp = (pmd_t *) pteptr; - pte_t shadow_pteval = pteval; unsigned long mask; if (!MACHINE_HAS_HPAGE) { @@ -21,18 +20,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, mask = pte_val(pteval) & (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; - if (mm->context.noexec) { - pteptr += PTRS_PER_PTE; - pte_val(shadow_pteval) = - (_SEGMENT_ENTRY + __pa(pteptr)) | mask; - } } pmd_val(*pmdp) = pte_val(pteval); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); - pmd_val(*pmdp) = pte_val(shadow_pteval); - } } int arch_prepare_hugepage(struct page *page) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e1850c28cd68..8d4330642512 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -40,7 +40,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); static void __page_table_free(struct mm_struct *mm, unsigned long *table); -static void __crst_table_free(struct mm_struct *mm, unsigned long *table); static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) { @@ -67,7 +66,7 @@ static void rcu_table_freelist_callback(struct rcu_head *head) while (batch->pgt_index > 0) __page_table_free(batch->mm, batch->table[--batch->pgt_index]); while (batch->crst_index < RCU_FREELIST_SIZE) - __crst_table_free(batch->mm, batch->table[batch->crst_index++]); + crst_table_free(batch->mm, batch->table[batch->crst_index++]); free_page((unsigned long) batch); } @@ -125,63 +124,33 @@ static int __init parse_vmalloc(char *arg) } early_param("vmalloc", parse_vmalloc); -unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) +unsigned long *crst_table_alloc(struct mm_struct *mm) { struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); if (!page) return NULL; - page->index = 0; - if (noexec) { - struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); - if (!shadow) { - __free_pages(page, ALLOC_ORDER); - return NULL; - } - page->index = page_to_phys(shadow); - } - spin_lock_bh(&mm->context.list_lock); - list_add(&page->lru, &mm->context.crst_list); - spin_unlock_bh(&mm->context.list_lock); return (unsigned long *) page_to_phys(page); } -static void __crst_table_free(struct mm_struct *mm, unsigned long *table) -{ - unsigned long *shadow = get_shadow_table(table); - - if (shadow) - free_pages((unsigned long) shadow, ALLOC_ORDER); - free_pages((unsigned long) table, ALLOC_ORDER); -} - void crst_table_free(struct mm_struct *mm, unsigned long *table) { - struct page *page = virt_to_page(table); - - spin_lock_bh(&mm->context.list_lock); - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - __crst_table_free(mm, table); + free_pages((unsigned long) table, ALLOC_ORDER); } void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) { struct rcu_table_freelist *batch; - struct page *page = virt_to_page(table); - spin_lock_bh(&mm->context.list_lock); - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); if (atomic_read(&mm->mm_users) < 2 && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - __crst_table_free(mm, table); + crst_table_free(mm, table); return; } batch = rcu_table_freelist_get(mm); if (!batch) { smp_call_function(smp_sync, NULL, 1); - __crst_table_free(mm, table); + crst_table_free(mm, table); return; } batch->table[--batch->crst_index] = table; @@ -197,7 +166,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) BUG_ON(limit > (1UL << 53)); repeat: - table = crst_table_alloc(mm, mm->context.noexec); + table = crst_table_alloc(mm); if (!table) return -ENOMEM; spin_lock_bh(&mm->page_table_lock); @@ -273,7 +242,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long *table; unsigned long bits; - bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits = (mm->context.has_pgste) ? 3UL : 1UL; spin_lock_bh(&mm->context.list_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { @@ -329,7 +298,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned long bits; - bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits = (mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock_bh(&mm->context.list_lock); @@ -366,7 +335,7 @@ void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) page_table_free(mm, table); return; } - bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits = (mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock_bh(&mm->context.list_lock); @@ -379,25 +348,6 @@ void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) rcu_table_freelist_finish(); } -void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) -{ - struct page *page; - - spin_lock_bh(&mm->context.list_lock); - /* Free shadow region and segment tables. */ - list_for_each_entry(page, &mm->context.crst_list, lru) - if (page->index) { - free_pages((unsigned long) page->index, ALLOC_ORDER); - page->index = 0; - } - /* "Free" second halves of page tables. */ - list_for_each_entry(page, &mm->context.pgtable_list, lru) - page->flags &= ~SECOND_HALVES; - spin_unlock_bh(&mm->context.list_lock); - mm->context.noexec = 0; - update_mm(mm, tsk); -} - /* * switch on pgstes for its userspace process (for kvm) */ From 66ceed5ad1318863c21710f316942bcefff8081c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:24 +0200 Subject: [PATCH 03/29] [S390] Remove tape block device driver. Remove the tape block device driver. It's not of real use but has already created some confusion when users wanted to access tape devices and used the block device nodes instead of the character device nodes. Also remove the whole tape documentation since it's completely outdated and we have the device drivers book which is the place where everything is properly documented. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/Kconfig | 12 - drivers/s390/char/Makefile | 1 - drivers/s390/char/tape_block.c | 444 --------------------------------- 3 files changed, 457 deletions(-) delete mode 100644 drivers/s390/char/tape_block.c diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index dcee3c5c8954..a4f117d9fdc6 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -119,18 +119,6 @@ config S390_TAPE comment "S/390 tape interface support" depends on S390_TAPE -config S390_TAPE_BLOCK - def_bool y - prompt "Support for tape block devices" - depends on S390_TAPE && BLOCK - help - Select this option if you want to access your channel-attached tape - devices using the block device interface. This interface is similar - to CD-ROM devices on other platforms. The tapes can only be - accessed read-only when using this interface. Have a look at - for further information about creating - volumes for and using this interface. It is safe to say "Y" here. - comment "S/390 tape hardware support" depends on S390_TAPE diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index efb500ab66c0..af01b5abd069 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -22,7 +22,6 @@ obj-$(CONFIG_ZVM_WATCHDOG) += vmwatchdog.o obj-$(CONFIG_VMLOGRDR) += vmlogrdr.o obj-$(CONFIG_VMCP) += vmcp.o -tape-$(CONFIG_S390_TAPE_BLOCK) += tape_block.o tape-$(CONFIG_PROC_FS) += tape_proc.o tape-objs := tape_core.o tape_std.o tape_char.o $(tape-y) obj-$(CONFIG_S390_TAPE) += tape.o tape_class.o diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c deleted file mode 100644 index 1b3924c2fffd..000000000000 --- a/drivers/s390/char/tape_block.c +++ /dev/null @@ -1,444 +0,0 @@ -/* - * drivers/s390/char/tape_block.c - * block device frontend for tape device driver - * - * S390 and zSeries version - * Copyright (C) 2001,2003 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Carsten Otte - * Tuan Ngo-Anh - * Martin Schwidefsky - * Stefan Bader - */ - -#define KMSG_COMPONENT "tape" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include -#include -#include -#include -#include -#include -#include - -#include - -#define TAPE_DBF_AREA tape_core_dbf - -#include "tape.h" - -#define TAPEBLOCK_MAX_SEC 100 -#define TAPEBLOCK_MIN_REQUEUE 3 - -/* - * 2003/11/25 Stefan Bader - * - * In 2.5/2.6 the block device request function is very likely to be called - * with disabled interrupts (e.g. generic_unplug_device). So the driver can't - * just call any function that tries to allocate CCW requests from that con- - * text since it might sleep. There are two choices to work around this: - * a) do not allocate with kmalloc but use its own memory pool - * b) take requests from the queue outside that context, knowing that - * allocation might sleep - */ - -/* - * file operation structure for tape block frontend - */ -static DEFINE_MUTEX(tape_block_mutex); -static int tapeblock_open(struct block_device *, fmode_t); -static int tapeblock_release(struct gendisk *, fmode_t); -static unsigned int tapeblock_check_events(struct gendisk *, unsigned int); -static int tapeblock_revalidate_disk(struct gendisk *); - -static const struct block_device_operations tapeblock_fops = { - .owner = THIS_MODULE, - .open = tapeblock_open, - .release = tapeblock_release, - .check_events = tapeblock_check_events, - .revalidate_disk = tapeblock_revalidate_disk, -}; - -static int tapeblock_major = 0; - -static void -tapeblock_trigger_requeue(struct tape_device *device) -{ - /* Protect against rescheduling. */ - if (atomic_cmpxchg(&device->blk_data.requeue_scheduled, 0, 1) != 0) - return; - schedule_work(&device->blk_data.requeue_task); -} - -/* - * Post finished request. - */ -static void -__tapeblock_end_request(struct tape_request *ccw_req, void *data) -{ - struct tape_device *device; - struct request *req; - - DBF_LH(6, "__tapeblock_end_request()\n"); - - device = ccw_req->device; - req = (struct request *) data; - blk_end_request_all(req, (ccw_req->rc == 0) ? 0 : -EIO); - if (ccw_req->rc == 0) - /* Update position. */ - device->blk_data.block_position = - (blk_rq_pos(req) + blk_rq_sectors(req)) >> TAPEBLOCK_HSEC_S2B; - else - /* We lost the position information due to an error. */ - device->blk_data.block_position = -1; - device->discipline->free_bread(ccw_req); - if (!list_empty(&device->req_queue) || - blk_peek_request(device->blk_data.request_queue)) - tapeblock_trigger_requeue(device); -} - -/* - * Feed the tape device CCW queue with requests supplied in a list. - */ -static int -tapeblock_start_request(struct tape_device *device, struct request *req) -{ - struct tape_request * ccw_req; - int rc; - - DBF_LH(6, "tapeblock_start_request(%p, %p)\n", device, req); - - ccw_req = device->discipline->bread(device, req); - if (IS_ERR(ccw_req)) { - DBF_EVENT(1, "TBLOCK: bread failed\n"); - blk_end_request_all(req, -EIO); - return PTR_ERR(ccw_req); - } - ccw_req->callback = __tapeblock_end_request; - ccw_req->callback_data = (void *) req; - ccw_req->retries = TAPEBLOCK_RETRIES; - - rc = tape_do_io_async(device, ccw_req); - if (rc) { - /* - * Start/enqueueing failed. No retries in - * this case. - */ - blk_end_request_all(req, -EIO); - device->discipline->free_bread(ccw_req); - } - - return rc; -} - -/* - * Move requests from the block device request queue to the tape device ccw - * queue. - */ -static void -tapeblock_requeue(struct work_struct *work) { - struct tape_blk_data * blkdat; - struct tape_device * device; - struct request_queue * queue; - int nr_queued; - struct request * req; - struct list_head * l; - int rc; - - blkdat = container_of(work, struct tape_blk_data, requeue_task); - device = blkdat->device; - if (!device) - return; - - spin_lock_irq(get_ccwdev_lock(device->cdev)); - queue = device->blk_data.request_queue; - - /* Count number of requests on ccw queue. */ - nr_queued = 0; - list_for_each(l, &device->req_queue) - nr_queued++; - spin_unlock(get_ccwdev_lock(device->cdev)); - - spin_lock_irq(&device->blk_data.request_queue_lock); - while ( - blk_peek_request(queue) && - nr_queued < TAPEBLOCK_MIN_REQUEUE - ) { - req = blk_fetch_request(queue); - if (rq_data_dir(req) == WRITE) { - DBF_EVENT(1, "TBLOCK: Rejecting write request\n"); - spin_unlock_irq(&device->blk_data.request_queue_lock); - blk_end_request_all(req, -EIO); - spin_lock_irq(&device->blk_data.request_queue_lock); - continue; - } - nr_queued++; - spin_unlock_irq(&device->blk_data.request_queue_lock); - rc = tapeblock_start_request(device, req); - spin_lock_irq(&device->blk_data.request_queue_lock); - } - spin_unlock_irq(&device->blk_data.request_queue_lock); - atomic_set(&device->blk_data.requeue_scheduled, 0); -} - -/* - * Tape request queue function. Called from ll_rw_blk.c - */ -static void -tapeblock_request_fn(struct request_queue *queue) -{ - struct tape_device *device; - - device = (struct tape_device *) queue->queuedata; - DBF_LH(6, "tapeblock_request_fn(device=%p)\n", device); - BUG_ON(device == NULL); - tapeblock_trigger_requeue(device); -} - -/* - * This function is called for every new tapedevice - */ -int -tapeblock_setup_device(struct tape_device * device) -{ - struct tape_blk_data * blkdat; - struct gendisk * disk; - int rc; - - blkdat = &device->blk_data; - blkdat->device = device; - spin_lock_init(&blkdat->request_queue_lock); - atomic_set(&blkdat->requeue_scheduled, 0); - - blkdat->request_queue = blk_init_queue( - tapeblock_request_fn, - &blkdat->request_queue_lock - ); - if (!blkdat->request_queue) - return -ENOMEM; - - rc = elevator_change(blkdat->request_queue, "noop"); - if (rc) - goto cleanup_queue; - - blk_queue_logical_block_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE); - blk_queue_max_hw_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC); - blk_queue_max_segments(blkdat->request_queue, -1L); - blk_queue_max_segment_size(blkdat->request_queue, -1L); - blk_queue_segment_boundary(blkdat->request_queue, -1L); - - disk = alloc_disk(1); - if (!disk) { - rc = -ENOMEM; - goto cleanup_queue; - } - - disk->major = tapeblock_major; - disk->first_minor = device->first_minor; - disk->fops = &tapeblock_fops; - disk->private_data = tape_get_device(device); - disk->queue = blkdat->request_queue; - set_capacity(disk, 0); - sprintf(disk->disk_name, "btibm%d", - device->first_minor / TAPE_MINORS_PER_DEV); - - blkdat->disk = disk; - blkdat->medium_changed = 1; - blkdat->request_queue->queuedata = tape_get_device(device); - - add_disk(disk); - - tape_get_device(device); - INIT_WORK(&blkdat->requeue_task, tapeblock_requeue); - - return 0; - -cleanup_queue: - blk_cleanup_queue(blkdat->request_queue); - blkdat->request_queue = NULL; - - return rc; -} - -void -tapeblock_cleanup_device(struct tape_device *device) -{ - flush_work_sync(&device->blk_data.requeue_task); - tape_put_device(device); - - if (!device->blk_data.disk) { - goto cleanup_queue; - } - - del_gendisk(device->blk_data.disk); - device->blk_data.disk->private_data = NULL; - tape_put_device(device); - put_disk(device->blk_data.disk); - - device->blk_data.disk = NULL; -cleanup_queue: - device->blk_data.request_queue->queuedata = NULL; - tape_put_device(device); - - blk_cleanup_queue(device->blk_data.request_queue); - device->blk_data.request_queue = NULL; -} - -/* - * Detect number of blocks of the tape. - * FIXME: can we extent this to detect the blocks size as well ? - */ -static int -tapeblock_revalidate_disk(struct gendisk *disk) -{ - struct tape_device * device; - unsigned int nr_of_blks; - int rc; - - device = (struct tape_device *) disk->private_data; - BUG_ON(!device); - - if (!device->blk_data.medium_changed) - return 0; - - rc = tape_mtop(device, MTFSFM, 1); - if (rc) - return rc; - - rc = tape_mtop(device, MTTELL, 1); - if (rc < 0) - return rc; - - pr_info("%s: Determining the size of the recorded area...\n", - dev_name(&device->cdev->dev)); - DBF_LH(3, "Image file ends at %d\n", rc); - nr_of_blks = rc; - - /* This will fail for the first file. Catch the error by checking the - * position. */ - tape_mtop(device, MTBSF, 1); - - rc = tape_mtop(device, MTTELL, 1); - if (rc < 0) - return rc; - - if (rc > nr_of_blks) - return -EINVAL; - - DBF_LH(3, "Image file starts at %d\n", rc); - device->bof = rc; - nr_of_blks -= rc; - - pr_info("%s: The size of the recorded area is %i blocks\n", - dev_name(&device->cdev->dev), nr_of_blks); - set_capacity(device->blk_data.disk, - nr_of_blks*(TAPEBLOCK_HSEC_SIZE/512)); - - device->blk_data.block_position = 0; - device->blk_data.medium_changed = 0; - return 0; -} - -static unsigned int -tapeblock_check_events(struct gendisk *disk, unsigned int clearing) -{ - struct tape_device *device; - - device = (struct tape_device *) disk->private_data; - DBF_LH(6, "tapeblock_medium_changed(%p) = %d\n", - device, device->blk_data.medium_changed); - - return device->blk_data.medium_changed ? DISK_EVENT_MEDIA_CHANGE : 0; -} - -/* - * Block frontend tape device open function. - */ -static int -tapeblock_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk * disk = bdev->bd_disk; - struct tape_device * device; - int rc; - - mutex_lock(&tape_block_mutex); - device = tape_get_device(disk->private_data); - - if (device->required_tapemarks) { - DBF_EVENT(2, "TBLOCK: missing tapemarks\n"); - pr_warning("%s: Opening the tape failed because of missing " - "end-of-file marks\n", dev_name(&device->cdev->dev)); - rc = -EPERM; - goto put_device; - } - - rc = tape_open(device); - if (rc) - goto put_device; - - rc = tapeblock_revalidate_disk(disk); - if (rc) - goto release; - - /* - * Note: The reference to is hold until the release function - * is called. - */ - tape_state_set(device, TS_BLKUSE); - mutex_unlock(&tape_block_mutex); - return 0; - -release: - tape_release(device); - put_device: - tape_put_device(device); - mutex_unlock(&tape_block_mutex); - return rc; -} - -/* - * Block frontend tape device release function. - * - * Note: One reference to the tape device was made by the open function. So - * we just get the pointer here and release the reference. - */ -static int -tapeblock_release(struct gendisk *disk, fmode_t mode) -{ - struct tape_device *device = disk->private_data; - - mutex_lock(&tape_block_mutex); - tape_state_set(device, TS_IN_USE); - tape_release(device); - tape_put_device(device); - mutex_unlock(&tape_block_mutex); - - return 0; -} - -/* - * Initialize block device frontend. - */ -int -tapeblock_init(void) -{ - int rc; - - /* Register the tape major number to the kernel */ - rc = register_blkdev(tapeblock_major, "tBLK"); - if (rc < 0) - return rc; - - if (tapeblock_major == 0) - tapeblock_major = rc; - return 0; -} - -/* - * Deregister major for block device frontend - */ -void -tapeblock_exit(void) -{ - unregister_blkdev(tapeblock_major, "tBLK"); -} From 7712f83aa904fef0d7d6e5ba7684c3272bddbb19 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:25 +0200 Subject: [PATCH 04/29] [S390] get rid of unused variables Remove trivially unused variables as detected with -Wunused-but-set-variable. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata_base.c | 2 -- arch/s390/mm/extmem.c | 4 ---- drivers/s390/block/dasd_alias.c | 4 ---- drivers/s390/block/dasd_eckd.c | 8 -------- drivers/s390/char/raw3270.c | 2 -- drivers/s390/char/sclp_sdias.c | 3 --- drivers/s390/char/tape_3590.c | 6 ------ drivers/s390/cio/device_fsm.c | 3 --- drivers/s390/cio/device_ops.c | 3 --- 9 files changed, 35 deletions(-) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 5c91995b74e4..24bff4f1cc52 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -130,9 +130,7 @@ static void appldata_work_fn(struct work_struct *work) { struct list_head *lh; struct appldata_ops *ops; - int i; - i = 0; get_online_cpus(); mutex_lock(&appldata_ops_mutex); list_for_each(lh, &appldata_ops_list) { diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 3cc95dd0a3a6..214dd7b23934 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -681,8 +681,6 @@ void segment_save(char *name) { struct dcss_segment *seg; - int startpfn = 0; - int endpfn = 0; char cmd1[160]; char cmd2[80]; int i, response; @@ -698,8 +696,6 @@ segment_save(char *name) goto out; } - startpfn = seg->start_addr >> PAGE_SHIFT; - endpfn = (seg->end) >> PAGE_SHIFT; sprintf(cmd1, "DEFSEG %s", name); for (i=0; isegcnt; i++) { sprintf(cmd1+strlen(cmd1), " %lX-%lX %s", diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 2b771f18d1ad..c388eda1e2b1 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -253,13 +253,11 @@ int dasd_alias_make_device_known_to_lcu(struct dasd_device *device) */ void dasd_alias_lcu_setup_complete(struct dasd_device *device) { - struct dasd_eckd_private *private; unsigned long flags; struct alias_server *server; struct alias_lcu *lcu; struct dasd_uid uid; - private = (struct dasd_eckd_private *) device->private; device->discipline->get_uid(device, &uid); lcu = NULL; spin_lock_irqsave(&aliastree.lock, flags); @@ -279,13 +277,11 @@ void dasd_alias_lcu_setup_complete(struct dasd_device *device) void dasd_alias_wait_for_lcu_setup(struct dasd_device *device) { - struct dasd_eckd_private *private; unsigned long flags; struct alias_server *server; struct alias_lcu *lcu; struct dasd_uid uid; - private = (struct dasd_eckd_private *) device->private; device->discipline->get_uid(device, &uid); lcu = NULL; spin_lock_irqsave(&aliastree.lock, flags); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 3ebdf5f92f8f..58209434c992 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1611,10 +1611,8 @@ static void dasd_eckd_analysis_callback(struct dasd_ccw_req *init_cqr, static int dasd_eckd_start_analysis(struct dasd_block *block) { - struct dasd_eckd_private *private; struct dasd_ccw_req *init_cqr; - private = (struct dasd_eckd_private *) block->base->private; init_cqr = dasd_eckd_analysis_ccw(block->base); if (IS_ERR(init_cqr)) return PTR_ERR(init_cqr); @@ -2264,7 +2262,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( unsigned int blk_per_trk, unsigned int blksize) { - struct dasd_eckd_private *private; unsigned long *idaws; struct dasd_ccw_req *cqr; struct ccw1 *ccw; @@ -2283,7 +2280,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( unsigned int recoffs; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_TRACK_DATA; else if (rq_data_dir(req) == WRITE) @@ -2573,7 +2569,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( unsigned int blk_per_trk, unsigned int blksize) { - struct dasd_eckd_private *private; struct dasd_ccw_req *cqr; struct req_iterator iter; struct bio_vec *bv; @@ -2594,7 +2589,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( unsigned int count, count_to_trk_end; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; if (rq_data_dir(req) == READ) { cmd = DASD_ECKD_CCW_READ_TRACK_DATA; itcw_op = ITCW_OP_READ; @@ -2801,7 +2795,6 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev, struct dasd_block *block, struct request *req) { - struct dasd_eckd_private *private; unsigned long *idaws; struct dasd_device *basedev; struct dasd_ccw_req *cqr; @@ -2836,7 +2829,6 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev, trkcount = last_trk - first_trk + 1; first_offs = 0; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_TRACK; diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index e21a5c39ef20..810ac38631c3 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -598,7 +598,6 @@ __raw3270_size_device(struct raw3270 *rp) static const unsigned char wbuf[] = { 0x00, 0x07, 0x01, 0xff, 0x03, 0x00, 0x81 }; struct raw3270_ua *uap; - unsigned short count; int rc; /* @@ -653,7 +652,6 @@ __raw3270_size_device(struct raw3270 *rp) if (rc) return rc; /* Got a Query Reply */ - count = sizeof(rp->init_data) - rp->init_request.rescnt; uap = (struct raw3270_ua *) (rp->init_data + 1); /* Paranoia check. */ if (rp->init_data[0] != 0x88 || uap->uab.qcode != 0x81) diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c index 6a1c58dc61a7..fa733ecd3d70 100644 --- a/drivers/s390/char/sclp_sdias.c +++ b/drivers/s390/char/sclp_sdias.c @@ -69,9 +69,6 @@ static DEFINE_MUTEX(sdias_mutex); static void sdias_callback(struct sclp_req *request, void *data) { - struct sdias_sccb *cbsccb; - - cbsccb = (struct sdias_sccb *) request->sccb; sclp_req_done = 1; wake_up(&sdias_wq); /* Inform caller, that request is complete */ TRACE("callback done\n"); diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index b98dcbd16711..22db2529e1b2 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -796,10 +796,8 @@ static void tape_3590_med_state_set(struct tape_device *device, static int tape_3590_done(struct tape_device *device, struct tape_request *request) { - struct tape_3590_disc_data *disc_data; DBF_EVENT(6, "%s done\n", tape_op_verbose[request->op]); - disc_data = device->discdata; switch (request->op) { case TO_BSB: @@ -1397,11 +1395,9 @@ static int tape_3590_crypt_error(struct tape_device *device, u8 cu_rc, ekm_rc1; u16 ekm_rc2; u32 drv_rc; - const char *bus_id; char *sense; sense = ((struct tape_3590_sense *) irb->ecw)->fmt.data; - bus_id = dev_name(&device->cdev->dev); cu_rc = sense[0]; drv_rc = *((u32*) &sense[5]) & 0xffffff; ekm_rc1 = sense[9]; @@ -1429,7 +1425,6 @@ tape_3590_unit_check(struct tape_device *device, struct tape_request *request, struct irb *irb) { struct tape_3590_sense *sense; - int rc; #ifdef CONFIG_S390_TAPE_BLOCK if (request->op == TO_BLOCK) { @@ -1454,7 +1449,6 @@ tape_3590_unit_check(struct tape_device *device, struct tape_request *request, * - "break": basic error recovery is done * - "goto out:": just print error message if available */ - rc = -EIO; switch (sense->rc_rqc) { case 0x1110: diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index e087a8685f95..52c233fa2b12 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -841,9 +841,6 @@ call_handler: static void ccw_device_killing_irq(struct ccw_device *cdev, enum dev_event dev_event) { - struct subchannel *sch; - - sch = to_subchannel(cdev->dev.parent); ccw_device_set_timeout(cdev, 0); /* Start delayed path verification. */ ccw_device_online_verify(cdev, 0); diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index 651976b54af8..f98698d5735e 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -418,12 +418,9 @@ int ccw_device_resume(struct ccw_device *cdev) int ccw_device_call_handler(struct ccw_device *cdev) { - struct subchannel *sch; unsigned int stctl; int ending_status; - sch = to_subchannel(cdev->dev.parent); - /* * we allow for the device action handler if . * - we received ending status From 364c18d817a6beb3303e6b8f28ea37d9c06ff382 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:26 +0200 Subject: [PATCH 05/29] [S390] dasd: fix return code handling Check return value of itcw_add_dcw() for error code and return it. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_eckd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 58209434c992..30fb979d684d 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2552,8 +2552,7 @@ static int prepare_itcw(struct itcw *itcw, dcw = itcw_add_dcw(itcw, pfx_cmd, 0, &pfxdata, sizeof(pfxdata), total_data_size); - - return rc; + return IS_ERR(dcw) ? PTR_ERR(dcw) : 0; } static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( From 37fa9975b28a2d86a5b47ec17e6d845dbd899c60 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:27 +0200 Subject: [PATCH 06/29] [S390] monwriter: fix return code handling Fix return code handling within monwrite_new_hdr(). Return code handling is everwhere implemented, the return code of the diagnose function was just not passed. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/monwriter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index e0702d3ea33b..4600aa10a1c6 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -97,7 +97,7 @@ static int monwrite_new_hdr(struct mon_private *monpriv) { struct monwrite_hdr *monhdr = &monpriv->hdr; struct mon_buf *monbuf; - int rc; + int rc = 0; if (monhdr->datalen > MONWRITE_MAX_DATALEN || monhdr->mon_function > MONWRITE_START_CONFIG || @@ -135,7 +135,7 @@ static int monwrite_new_hdr(struct mon_private *monpriv) mon_buf_count++; } monpriv->current_buf = monbuf; - return 0; + return rc; } static int monwrite_new_data(struct mon_private *monpriv) From 160fbf2e92cccbc1dccb4f837f5174808b2636d2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:28 +0200 Subject: [PATCH 07/29] [S390] tape: remove unused/not handled return codes Return codes are on purpose not handled or used. So remove them. Acked-by: Michael Holzheu Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/tape_3590.c | 5 +---- drivers/s390/char/tape_std.c | 3 +-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index 22db2529e1b2..a7d570728882 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -1392,15 +1392,12 @@ tape_3590_print_era_msg(struct tape_device *device, struct irb *irb) static int tape_3590_crypt_error(struct tape_device *device, struct tape_request *request, struct irb *irb) { - u8 cu_rc, ekm_rc1; + u8 cu_rc; u16 ekm_rc2; - u32 drv_rc; char *sense; sense = ((struct tape_3590_sense *) irb->ecw)->fmt.data; cu_rc = sense[0]; - drv_rc = *((u32*) &sense[5]) & 0xffffff; - ekm_rc1 = sense[9]; ekm_rc2 = *((u16*) &sense[10]); if ((cu_rc == 0) && (ekm_rc2 == 0xee31)) /* key not defined on EKM */ diff --git a/drivers/s390/char/tape_std.c b/drivers/s390/char/tape_std.c index 3c3f342149ec..e7650170274a 100644 --- a/drivers/s390/char/tape_std.c +++ b/drivers/s390/char/tape_std.c @@ -564,7 +564,6 @@ int tape_std_mtreten(struct tape_device *device, int mt_count) { struct tape_request *request; - int rc; request = tape_alloc_request(4, 0); if (IS_ERR(request)) @@ -576,7 +575,7 @@ tape_std_mtreten(struct tape_device *device, int mt_count) tape_ccw_cc(request->cpaddr + 2, NOP, 0, NULL); tape_ccw_end(request->cpaddr + 3, CCW_CMD_TIC, 0, request->cpaddr); /* execute it, MTRETEN rc gets ignored */ - rc = tape_do_io_interruptible(device, request); + tape_do_io_interruptible(device, request); tape_free_request(request); return tape_mtop(device, MTREW, 1); } From 89db4df160948d005b5efce82ef10f25ab5aac8b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:29 +0200 Subject: [PATCH 08/29] [S390] extmem: get rid of compile warning Get rid of these: arch/s390/mm/extmem.c: In function 'segment_modify_shared': arch/s390/mm/extmem.c:622:3: warning: 'end_addr' may be used uninitialized in this function [-Wuninitialized] arch/s390/mm/extmem.c:627:18: warning: 'start_addr' may be used uninitialized in this function [-Wuninitialized] arch/s390/mm/extmem.c: In function 'segment_load': arch/s390/mm/extmem.c:481:11: warning: 'end_addr' may be used uninitialized in this function [-Wuninitialized] arch/s390/mm/extmem.c:480:18: warning: 'start_addr' may be used uninitialized in this function [-Wuninitialized] Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/extmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 214dd7b23934..075ddada4911 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -412,6 +412,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long struct dcss_segment *seg; int rc, diag_cc; + start_addr = end_addr = 0; seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA); if (seg == NULL) { rc = -ENOMEM; @@ -573,6 +574,7 @@ segment_modify_shared (char *name, int do_nonshared) unsigned long start_addr, end_addr, dummy; int rc, diag_cc; + start_addr = end_addr = 0; mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { From cf2d007bd43bc254d5254fe9d30af3e73ed5b98a Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Mon, 23 May 2011 10:24:30 +0200 Subject: [PATCH 09/29] [S390] ap: skip device registration on type probe failure The registration of an ap device will be skipped, if the device type probing fails. Add names of current crypto adapters to the Kconfig help. Signed-off-by: Holger Dengler Signed-off-by: Martin Schwidefsky --- drivers/crypto/Kconfig | 2 ++ drivers/s390/crypto/ap_bus.c | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index c64c3807f516..e0b25de1e339 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -74,6 +74,8 @@ config ZCRYPT + PCI-X Cryptographic Coprocessor (PCIXCC) + Crypto Express2 Coprocessor (CEX2C) + Crypto Express2 Accelerator (CEX2A) + + Crypto Express3 Coprocessor (CEX3C) + + Crypto Express3 Accelerator (CEX3A) config ZCRYPT_MONOLITHIC bool "Monolithic zcrypt module" diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 67302b944ab3..16e4a25596e7 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1183,8 +1183,12 @@ static void ap_scan_bus(struct work_struct *unused) INIT_LIST_HEAD(&ap_dev->list); setup_timer(&ap_dev->timeout, ap_request_timeout, (unsigned long) ap_dev); - if (device_type == 0) - ap_probe_device_type(ap_dev); + if (device_type == 0) { + if (ap_probe_device_type(ap_dev)) { + kfree(ap_dev); + continue; + } + } else ap_dev->device_type = device_type; From add7490c273650c3aa8012f3e082aa64af86f17c Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Mon, 23 May 2011 10:24:31 +0200 Subject: [PATCH 10/29] [S390] vdso: disable gcov profiling The concepts of VDSO and gcov-based profiling don't mix: the former includes kernel-provided code running in userspace, the latter adds instructions that modify counters in kernel data segments. On s390 this has not been a problem so far due to VDSO code being written in all-assembler which is exempt from gcov-based profiling. This could change in the future, so disable profiling excplicitly for VDSO code. Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vdso32/Makefile | 3 +++ arch/s390/kernel/vdso64/Makefile | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index d13e8755a8cc..8ad2b34ad151 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -22,6 +22,9 @@ obj-y += vdso32_wrapper.o extra-y += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 449352dda9cd..2a8ddfd12a5b 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -22,6 +22,9 @@ obj-y += vdso64_wrapper.o extra-y += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so From 4c2241fd42298007d7c3a92318806a4a9490a93c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:32 +0200 Subject: [PATCH 11/29] [S390] percpu: implement arch specific irqsafe_cpu_ops Implement arch specific irqsafe_cpu ops. The arch specific ops do not disable/enable interrupts since that is an expensive operation. Instead we disable preemption and perform a compare and swap loop. Since on server distros (the ones we care about) preemption is disabled the preempt_disable()/preempt_enable() pair is a nop. In the end this code should be faster than the generic one. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cmpxchg.h | 1 - arch/s390/include/asm/percpu.h | 68 +++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 7488e52efa97..81d7908416cf 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -167,7 +167,6 @@ static inline unsigned long __cmpxchg(void *ptr, unsigned long old, #ifdef CONFIG_64BIT #define cmpxchg64(ptr, o, n) \ ({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg((ptr), (o), (n)); \ }) #else /* CONFIG_64BIT */ diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index f7ad8719d02d..5325c89a5843 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -1,6 +1,9 @@ #ifndef __ARCH_S390_PERCPU__ #define __ARCH_S390_PERCPU__ +#include +#include + /* * s390 uses its own implementation for per cpu data, the offset of * the cpu local data area is cached in the cpu's lowcore memory. @@ -16,6 +19,71 @@ #define ARCH_NEEDS_WEAK_PER_CPU #endif +#define arch_irqsafe_cpu_to_op(pcp, val, op) \ +do { \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ old__, new__, prev__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + prev__ = *ptr__; \ + do { \ + old__ = prev__; \ + new__ = old__ op (val); \ + switch (sizeof(*ptr__)) { \ + case 8: \ + prev__ = cmpxchg64(ptr__, old__, new__); \ + break; \ + default: \ + prev__ = cmpxchg(ptr__, old__, new__); \ + } \ + } while (prev__ != old__); \ + preempt_enable(); \ +} while (0) + +#define irqsafe_cpu_add_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define irqsafe_cpu_add_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define irqsafe_cpu_add_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define irqsafe_cpu_add_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) + +#define irqsafe_cpu_and_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define irqsafe_cpu_and_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define irqsafe_cpu_and_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define irqsafe_cpu_and_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) + +#define irqsafe_cpu_or_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define irqsafe_cpu_or_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define irqsafe_cpu_or_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define irqsafe_cpu_or_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) + +#define irqsafe_cpu_xor_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define irqsafe_cpu_xor_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define irqsafe_cpu_xor_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define irqsafe_cpu_xor_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) + +#define arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ ret__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + switch (sizeof(*ptr__)) { \ + case 8: \ + ret__ = cmpxchg64(ptr__, oval, nval); \ + break; \ + default: \ + ret__ = cmpxchg(ptr__, oval, nval); \ + } \ + preempt_enable(); \ + ret__; \ +}) + +#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) + #include #endif /* __ARCH_S390_PERCPU__ */ From b456d94a9757db54eca4677c1b3a13e7170c9bb3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:33 +0200 Subject: [PATCH 12/29] [S390] smp: add __noreturn attribute to cpu_die() Add missing __noreturn attribute to cpu_die(): arch/s390/kernel/smp.c:691:6: error: symbol 'cpu_die' redeclared with different type Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 63c7d9ff220d..2310f07aaadb 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -681,7 +681,7 @@ void __cpu_die(unsigned int cpu) atomic_dec(&init_mm.context.attach_count); } -void cpu_die(void) +void __noreturn cpu_die(void) { idle_task_exit(); while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) From f2db2e6cb3f5f766cbb3788af44705685ff2445a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:34 +0200 Subject: [PATCH 13/29] [S390] pfault: cpu hotplug vs missing completion interrupts On cpu hot remove a PFAULT CANCEL command is sent to the hypervisor which in turn will cancel all outstanding pfault requests that have been issued on that cpu (the same happens with a SIGP cpu reset). The result is that we end up with uninterruptible processes where the interrupt that would wake up these processes never arrives. In order to solve this all processes which wait for a pfault completion interrupt get woken up after a cpu hot remove. The worst case that could happen is that they fault again and in turn need to wait again. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 4 +- arch/s390/include/asm/processor.h | 1 + arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kernel/entry.S | 1 + arch/s390/kernel/entry64.S | 1 + arch/s390/mm/fault.c | 89 ++++++++++++++++++++++--------- 6 files changed, 71 insertions(+), 26 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index b8624d53c379..228cf0b295db 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -124,7 +124,7 @@ struct _lowcore { /* Address space pointer. */ __u32 kernel_asce; /* 0x02ac */ __u32 user_asce; /* 0x02b0 */ - __u8 pad_0x02b4[0x02b8-0x02b4]; /* 0x02b4 */ + __u32 current_pid; /* 0x02b4 */ /* SMP info area */ __u32 cpu_nr; /* 0x02b8 */ @@ -255,7 +255,7 @@ struct _lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0310 */ __u64 user_asce; /* 0x0318 */ - __u8 pad_0x0320[0x0328-0x0320]; /* 0x0320 */ + __u64 current_pid; /* 0x0320 */ /* SMP info area */ __u32 cpu_nr; /* 0x0328 */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 2c79b6416271..1300c3025334 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -84,6 +84,7 @@ struct thread_struct { struct per_event per_event; /* Cause of the last PER trap */ /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; + struct list_head list; }; typedef struct thread_struct thread_struct; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index ef4555611013..edfbd17d7082 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -124,6 +124,7 @@ int main(void) DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer)); DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock)); DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task)); + DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid)); DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info)); DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 1b67fc6ebdc2..0476174dfff5 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -212,6 +212,7 @@ __switch_to: lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task st %r3,__LC_CURRENT # store task struct of next + mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next st %r5,__LC_THREAD_INFO # store thread info of next ahi %r5,STACK_SIZE # end of kernel stack of next st %r5,__LC_KERNEL_STACK # store end of kernel stack diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 9fd864563499..d61967e2eab0 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -220,6 +220,7 @@ __switch_to: lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task stg %r3,__LC_CURRENT # store task struct of next + mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next stg %r5,__LC_THREAD_INFO # store thread info of next aghi %r5,STACK_SIZE # end of kernel stack of next stg %r5,__LC_KERNEL_STACK # store end of kernel stack diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 177745c520ca..1ca656478326 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -466,7 +466,7 @@ typedef struct { int pfault_init(void) { pfault_refbk_t refbk = - { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48, + { 0x258, 0, 5, 2, __LC_CURRENT_PID, 1ULL << 48, 1ULL << 48, __PF_RES_FIELD }; int rc; @@ -498,11 +498,15 @@ void pfault_fini(void) : : "a" (&refbk), "m" (refbk) : "cc"); } +static DEFINE_SPINLOCK(pfault_lock); +static LIST_HEAD(pfault_list); + static void pfault_interrupt(unsigned int ext_int_code, unsigned int param32, unsigned long param64) { struct task_struct *tsk; __u16 subcode; + pid_t pid; /* * Get the external interruption subcode & pfault @@ -514,44 +518,79 @@ static void pfault_interrupt(unsigned int ext_int_code, if ((subcode & 0xff00) != __SUBCODE_MASK) return; kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; - - /* - * Get the token (= address of the task structure of the affected task). - */ -#ifdef CONFIG_64BIT - tsk = (struct task_struct *) param64; -#else - tsk = (struct task_struct *) param32; -#endif - + if (subcode & 0x0080) { + /* Get the token (= pid of the affected task). */ + pid = sizeof(void *) == 4 ? param32 : param64; + rcu_read_lock(); + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return; + } else { + tsk = current; + } + spin_lock(&pfault_lock); if (subcode & 0x0080) { /* signal bit is set -> a page has been swapped in by VM */ - if (xchg(&tsk->thread.pfault_wait, -1) != 0) { + if (tsk->thread.pfault_wait == 1) { /* Initial interrupt was faster than the completion * interrupt. pfault_wait is valid. Set pfault_wait * back to zero and wake up the process. This can * safely be done because the task is still sleeping * and can't produce new pfaults. */ tsk->thread.pfault_wait = 0; + list_del(&tsk->thread.list); wake_up_process(tsk); - put_task_struct(tsk); + } else { + /* Completion interrupt was faster than initial + * interrupt. Set pfault_wait to -1 so the initial + * interrupt doesn't put the task to sleep. */ + tsk->thread.pfault_wait = -1; } + put_task_struct(tsk); } else { /* signal bit not set -> a real page is missing. */ - get_task_struct(tsk); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (xchg(&tsk->thread.pfault_wait, 1) != 0) { + if (tsk->thread.pfault_wait == -1) { /* Completion interrupt was faster than the initial - * interrupt (swapped in a -1 for pfault_wait). Set - * pfault_wait back to zero and exit. This can be - * done safely because tsk is running in kernel - * mode and can't produce new pfaults. */ + * interrupt (pfault_wait == -1). Set pfault_wait + * back to zero and exit. */ tsk->thread.pfault_wait = 0; - set_task_state(tsk, TASK_RUNNING); - put_task_struct(tsk); - } else + } else { + /* Initial interrupt arrived before completion + * interrupt. Let the task sleep. */ + tsk->thread.pfault_wait = 1; + list_add(&tsk->thread.list, &pfault_list); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); set_tsk_need_resched(tsk); + } } + spin_unlock(&pfault_lock); +} + +static int __cpuinit pfault_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + struct thread_struct *thread, *next; + struct task_struct *tsk; + + switch (action) { + case CPU_DEAD: + case CPU_DEAD_FROZEN: + spin_lock_irq(&pfault_lock); + list_for_each_entry_safe(thread, next, &pfault_list, list) { + thread->pfault_wait = 0; + list_del(&thread->list); + tsk = container_of(thread, struct task_struct, thread); + wake_up_process(tsk); + } + spin_unlock_irq(&pfault_lock); + break; + default: + break; + } + return NOTIFY_OK; } static int __init pfault_irq_init(void) @@ -568,8 +607,10 @@ static int __init pfault_irq_init(void) pfault_disable = 1; return rc; } - if (pfault_init() == 0) + if (pfault_init() == 0) { + hotcpu_notifier(pfault_cpu_notify, 0); return 0; + } /* Tough luck, no pfault. */ pfault_disable = 1; From 7dd8fe1f910f9644167ef91ddab44107d0d668c5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:35 +0200 Subject: [PATCH 14/29] [S390] pfault: cleanup code Small code cleanup. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 69 ++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 1ca656478326..a0f9e730f26a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -452,22 +452,28 @@ static int __init nopfault(char *str) __setup("nopfault", nopfault); -typedef struct { - __u16 refdiagc; - __u16 reffcode; - __u16 refdwlen; - __u16 refversn; - __u64 refgaddr; - __u64 refselmk; - __u64 refcmpmk; - __u64 reserved; -} __attribute__ ((packed, aligned(8))) pfault_refbk_t; +struct pfault_refbk { + u16 refdiagc; + u16 reffcode; + u16 refdwlen; + u16 refversn; + u64 refgaddr; + u64 refselmk; + u64 refcmpmk; + u64 reserved; +} __attribute__ ((packed, aligned(8))); int pfault_init(void) { - pfault_refbk_t refbk = - { 0x258, 0, 5, 2, __LC_CURRENT_PID, 1ULL << 48, 1ULL << 48, - __PF_RES_FIELD }; + struct pfault_refbk refbk = { + .refdiagc = 0x258, + .reffcode = 0, + .refdwlen = 5, + .refversn = 2, + .refgaddr = __LC_CURRENT_PID, + .refselmk = 1ULL << 48, + .refcmpmk = 1ULL << 48, + .reserved = __PF_RES_FIELD }; int rc; if (!MACHINE_IS_VM || pfault_disable) @@ -485,8 +491,12 @@ int pfault_init(void) void pfault_fini(void) { - pfault_refbk_t refbk = - { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; + struct pfault_refbk refbk = { + .refdiagc = 0x258, + .reffcode = 1, + .refdwlen = 5, + .refversn = 2, + }; if (!MACHINE_IS_VM || pfault_disable) return; @@ -599,24 +609,21 @@ static int __init pfault_irq_init(void) if (!MACHINE_IS_VM) return 0; - /* - * Try to get pfault pseudo page faults going. - */ rc = register_external_interrupt(0x2603, pfault_interrupt); - if (rc) { - pfault_disable = 1; - return rc; - } - if (pfault_init() == 0) { - hotcpu_notifier(pfault_cpu_notify, 0); - return 0; - } - - /* Tough luck, no pfault. */ - pfault_disable = 1; - unregister_external_interrupt(0x2603, pfault_interrupt); + if (rc) + goto out_extint; + rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; + if (rc) + goto out_pfault; + hotcpu_notifier(pfault_cpu_notify, 0); return 0; + +out_pfault: + unregister_external_interrupt(0x2603, pfault_interrupt); +out_extint: + pfault_disable = 1; + return rc; } early_initcall(pfault_irq_init); -#endif +#endif /* CONFIG_PFAULT */ From 0f1959f50646612b247d624bdbf8b0c8816f2a93 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 23 May 2011 10:24:36 +0200 Subject: [PATCH 15/29] [S390] convert old cpumask API into new one Adapt new API. Signed-off-by: KOSAKI Motohiro Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/tlbflush.h | 2 +- arch/s390/kernel/smp.c | 24 ++++++++++++------------ arch/s390/kernel/time.c | 4 ++-- arch/s390/kernel/topology.c | 16 ++++++++-------- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 4fdcefc1a98d..b7a4f2eb0057 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -50,7 +50,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) /* * If the process only ran on the local cpu, do a local flush. */ - local_cpumask = cpumask_of_cpu(smp_processor_id()); + cpumask_copy(&local_cpumask, cpumask_of(smp_processor_id())); if (cpumask_equal(mm_cpumask(mm), &local_cpumask)) __tlb_flush_local(); else diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2310f07aaadb..fed71dcaa0d6 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -335,7 +335,7 @@ static int smp_rescan_cpus_sigp(cpumask_t avail) smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN; if (!cpu_stopped(logical_cpu)) continue; - cpu_set(logical_cpu, cpu_present_map); + set_cpu_present(logical_cpu, true); smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; logical_cpu = cpumask_next(logical_cpu, &avail); if (logical_cpu >= nr_cpu_ids) @@ -367,7 +367,7 @@ static int smp_rescan_cpus_sclp(cpumask_t avail) continue; __cpu_logical_map[logical_cpu] = cpu_id; smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN; - cpu_set(logical_cpu, cpu_present_map); + set_cpu_present(logical_cpu, true); if (cpu >= info->configured) smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY; else @@ -385,7 +385,7 @@ static int __smp_rescan_cpus(void) { cpumask_t avail; - cpus_xor(avail, cpu_possible_map, cpu_present_map); + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); if (smp_use_sigp_detection) return smp_rescan_cpus_sigp(avail); else @@ -467,7 +467,7 @@ int __cpuinit start_secondary(void *cpuvoid) notify_cpu_starting(smp_processor_id()); /* Mark this cpu as online */ ipi_call_lock(); - cpu_set(smp_processor_id(), cpu_online_map); + set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); /* Switch on interrupts */ local_irq_enable(); @@ -644,7 +644,7 @@ int __cpu_disable(void) struct ec_creg_mask_parms cr_parms; int cpu = smp_processor_id(); - cpu_clear(cpu, cpu_online_map); + set_cpu_online(cpu, false); /* Disable pfault pseudo page faults on this cpu. */ pfault_fini(); @@ -738,8 +738,8 @@ void __init smp_prepare_boot_cpu(void) BUG_ON(smp_processor_id() != 0); current_thread_info()->cpu = 0; - cpu_set(0, cpu_present_map); - cpu_set(0, cpu_online_map); + set_cpu_present(0, true); + set_cpu_online(0, true); S390_lowcore.percpu_offset = __per_cpu_offset[0]; current_set[0] = current; smp_cpu_state[0] = CPU_STATE_CONFIGURED; @@ -1016,21 +1016,21 @@ int __ref smp_rescan_cpus(void) get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - newcpus = cpu_present_map; + cpumask_copy(&newcpus, cpu_present_mask); rc = __smp_rescan_cpus(); if (rc) goto out; - cpus_andnot(newcpus, cpu_present_map, newcpus); - for_each_cpu_mask(cpu, newcpus) { + cpumask_andnot(&newcpus, cpu_present_mask, &newcpus); + for_each_cpu(cpu, &newcpus) { rc = smp_add_present_cpu(cpu); if (rc) - cpu_clear(cpu, cpu_present_map); + set_cpu_present(cpu, false); } rc = 0; out: mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); - if (!cpus_empty(newcpus)) + if (!cpumask_empty(&newcpus)) topology_schedule_update(); return rc; } diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 87be655557aa..a59557f1fb5f 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -810,7 +810,7 @@ static int etr_sync_clock_stop(struct etr_aib *aib, int port) etr_sync.etr_port = port; get_online_cpus(); atomic_set(&etr_sync.cpus, num_online_cpus() - 1); - rc = stop_machine(etr_sync_clock, &etr_sync, &cpu_online_map); + rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask); put_online_cpus(); return rc; } @@ -1579,7 +1579,7 @@ static void stp_work_fn(struct work_struct *work) memset(&stp_sync, 0, sizeof(stp_sync)); get_online_cpus(); atomic_set(&stp_sync.cpus, num_online_cpus() - 1); - stop_machine(stp_sync_clock, &stp_sync, &cpu_online_map); + stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask); put_online_cpus(); if (!check_sync_clock()) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 94b06c31fc8a..2eafb8c7a746 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -52,20 +52,20 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) { cpumask_t mask; - cpus_clear(mask); + cpumask_clear(&mask); if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) { cpumask_copy(&mask, cpumask_of(cpu)); return mask; } while (info) { - if (cpu_isset(cpu, info->mask)) { + if (cpumask_test_cpu(cpu, &info->mask)) { mask = info->mask; break; } info = info->next; } - if (cpus_empty(mask)) - mask = cpumask_of_cpu(cpu); + if (cpumask_empty(&mask)) + cpumask_copy(&mask, cpumask_of(cpu)); return mask; } @@ -85,10 +85,10 @@ static void add_cpus_to_mask(struct topology_cpu *tl_cpu, if (cpu_logical_map(lcpu) != rcpu) continue; #ifdef CONFIG_SCHED_BOOK - cpu_set(lcpu, book->mask); + cpumask_set_cpu(lcpu, &book->mask); cpu_book_id[lcpu] = book->id; #endif - cpu_set(lcpu, core->mask); + cpumask_set_cpu(lcpu, &core->mask); cpu_core_id[lcpu] = core->id; smp_cpu_polarization[lcpu] = tl_cpu->pp; } @@ -101,13 +101,13 @@ static void clear_masks(void) info = &core_info; while (info) { - cpus_clear(info->mask); + cpumask_clear(&info->mask); info = info->next; } #ifdef CONFIG_SCHED_BOOK info = &book_info; while (info) { - cpus_clear(info->mask); + cpumask_clear(&info->mask); info = info->next; } #endif From 1b60f68f6674ecc5c86f7dfe7e87941bc4236b9b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:37 +0200 Subject: [PATCH 16/29] [S390] sclp: remove unnecessary sendmask check The sendmask check is not needed. Remove it. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/sclp_config.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 16e232a99fb7..95b909ac2b73 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -71,21 +71,9 @@ static struct sclp_register sclp_conf_register = static int __init sclp_conf_init(void) { - int rc; - INIT_WORK(&sclp_cpu_capability_work, sclp_cpu_capability_notify); INIT_WORK(&sclp_cpu_change_work, sclp_cpu_change_notify); - - rc = sclp_register(&sclp_conf_register); - if (rc) - return rc; - - if (!(sclp_conf_register.sclp_send_mask & EVTYP_CONFMGMDATA_MASK)) { - pr_warning("no configuration management.\n"); - sclp_unregister(&sclp_conf_register); - rc = -ENOSYS; - } - return rc; + return sclp_register(&sclp_conf_register); } __initcall(sclp_conf_init); From c26001d4e9133fe45e47eee18cfd826219e71fb9 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 23 May 2011 10:24:38 +0200 Subject: [PATCH 17/29] [S390] qdio: prevent compile warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent the following compile warning for !CONFIG_64BIT: CC drivers/s390/cio/qdio_main.o drivers/s390/cio/qdio_main.c: In function ‘handle_outbound’: drivers/s390/cio/qdio_main.c:1449: warning: ‘state’ may be used uninitialized in this function Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index e8f267eb8887..55e8f721e38a 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1446,7 +1446,7 @@ set: static int handle_outbound(struct qdio_q *q, unsigned int callflags, int bufnr, int count) { - unsigned char state; + unsigned char state = 0; int used, rc = 0; qperf_inc(q, outbound_call); From 2d42552d1c1659b014851cf449ad2fe458509128 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 May 2011 10:24:39 +0200 Subject: [PATCH 18/29] [S390] merge page_test_dirty and page_clear_dirty The page_clear_dirty primitive always sets the default storage key which resets the access control bits and the fetch protection bit. That will surprise a KVM guest that sets non-zero access control bits or the fetch protection bit. Merge page_test_dirty and page_clear_dirty back to a single function and only clear the dirty bit from the storage key. In addition move the function page_test_and_clear_dirty and page_test_and_clear_young to page.h where they belong. This requires to change the parameter from a struct page * to a page frame number. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/page.h | 56 +++++++++++++++++++++++++++---- arch/s390/include/asm/pgtable.h | 58 +++++---------------------------- include/asm-generic/pgtable.h | 12 +++---- include/linux/page-flags.h | 2 +- mm/rmap.c | 11 +++---- 5 files changed, 68 insertions(+), 71 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 3c987e9ec8d6..81ee2776088d 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -107,8 +107,8 @@ typedef pte_t *pgtable_t; #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) -static inline void -page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) +static inline void page_set_storage_key(unsigned long addr, + unsigned char skey, int mapped) { if (!mapped) asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0" @@ -117,15 +117,59 @@ page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); } -static inline unsigned int -page_get_storage_key(unsigned long addr) +static inline unsigned char page_get_storage_key(unsigned long addr) { - unsigned int skey; + unsigned char skey; - asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr), "0" (0)); + asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr)); return skey; } +static inline int page_reset_referenced(unsigned long addr) +{ + unsigned int ipm; + + asm volatile( + " rrbe 0,%1\n" + " ipm %0\n" + : "=d" (ipm) : "a" (addr) : "cc"); + return !!(ipm & 0x20000000); +} + +/* Bits int the storage key */ +#define _PAGE_CHANGED 0x02 /* HW changed bit */ +#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ +#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ +#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ + +/* + * Test and clear dirty bit in storage key. + * We can't clear the changed bit atomically. This is a potential + * race against modification of the referenced bit. This function + * should therefore only be called if it is not mapped in any + * address space. + */ +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY +static inline int page_test_and_clear_dirty(unsigned long pfn, int mapped) +{ + unsigned char skey; + + skey = page_get_storage_key(pfn << PAGE_SHIFT); + if (!(skey & _PAGE_CHANGED)) + return 0; + page_set_storage_key(pfn << PAGE_SHIFT, skey & ~_PAGE_CHANGED, mapped); + return 1; +} + +/* + * Test and clear referenced bit in storage key. + */ +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG +static inline int page_test_and_clear_young(unsigned long pfn) +{ + return page_reset_referenced(pfn << PAGE_SHIFT); +} + struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 763620ec7925..4ca4dd2b329a 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -373,10 +373,6 @@ extern unsigned long VMALLOC_START; #define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ _ASCE_ALT_EVENT) -/* Bits int the storage key */ -#define _PAGE_CHANGED 0x02 /* HW changed bit */ -#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ - /* * Page protection definitions. */ @@ -555,8 +551,6 @@ static inline void rcp_unlock(pte_t *ptep) #endif } -/* forward declaration for SetPageUptodate in page-flags.h*/ -static inline void page_clear_dirty(struct page *page, int mapped); #include static inline void ptep_rcp_copy(pte_t *ptep) @@ -566,7 +560,7 @@ static inline void ptep_rcp_copy(pte_t *ptep) unsigned int skey; unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - skey = page_get_storage_key(page_to_phys(page)); + skey = page_get_storage_key(pte_val(*ptep) >> PAGE_SHIFT); if (skey & _PAGE_CHANGED) { set_bit_simple(RCP_GC_BIT, pgste); set_bit_simple(KVM_UD_BIT, pgste); @@ -760,6 +754,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, { int dirty; unsigned long *pgste; + unsigned long pfn; struct page *page; unsigned int skey; @@ -767,8 +762,9 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, return -EINVAL; rcp_lock(ptep); pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - page = virt_to_page(pte_val(*ptep)); - skey = page_get_storage_key(page_to_phys(page)); + pfn = pte_val(*ptep) >> PAGE_SHIFT; + page = pfn_to_page(pfn); + skey = page_get_storage_key(pfn); if (skey & _PAGE_CHANGED) { set_bit_simple(RCP_GC_BIT, pgste); set_bit_simple(KVM_UD_BIT, pgste); @@ -779,7 +775,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, } dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); if (skey & _PAGE_CHANGED) - page_clear_dirty(page, 1); + page_set_storage_key(pfn, skey & ~_PAGE_CHANGED, 1); rcp_unlock(ptep); return dirty; } @@ -790,16 +786,16 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { #ifdef CONFIG_PGSTE - unsigned long physpage; + unsigned long pfn; int young; unsigned long *pgste; if (!vma->vm_mm->context.has_pgste) return 0; - physpage = pte_val(*ptep) & PAGE_MASK; + pfn = pte_val(*ptep) >> PAGE_SHIFT; pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0); + young = ((page_get_storage_key(pfn) & _PAGE_REFERENCED) != 0); rcp_lock(ptep); if (young) set_bit_simple(RCP_GR_BIT, pgste); @@ -936,42 +932,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, __changed; \ }) -/* - * Test and clear dirty bit in storage key. - * We can't clear the changed bit atomically. This is a potential - * race against modification of the referenced bit. This function - * should therefore only be called if it is not mapped in any - * address space. - */ -#define __HAVE_ARCH_PAGE_TEST_DIRTY -static inline int page_test_dirty(struct page *page) -{ - return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0; -} - -#define __HAVE_ARCH_PAGE_CLEAR_DIRTY -static inline void page_clear_dirty(struct page *page, int mapped) -{ - page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped); -} - -/* - * Test and clear referenced bit in storage key. - */ -#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -static inline int page_test_and_clear_young(struct page *page) -{ - unsigned long physpage = page_to_phys(page); - int ccode; - - asm volatile( - " rrbe 0,%1\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode) : "a" (physpage) : "cc" ); - return ccode & 2; -} - /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b4bfe338ea0e..e9b8e5926bef 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -184,22 +184,18 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif -#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY -#define page_test_dirty(page) (0) +#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY +#define page_test_and_clear_dirty(pfn, mapped) (0) #endif -#ifndef __HAVE_ARCH_PAGE_CLEAR_DIRTY -#define page_clear_dirty(page, mapped) do { } while (0) -#endif - -#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY +#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY #define pte_maybe_dirty(pte) pte_dirty(pte) #else #define pte_maybe_dirty(pte) (1) #endif #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -#define page_test_and_clear_young(page) (0) +#define page_test_and_clear_young(pfn) (0) #endif #ifndef __HAVE_ARCH_PGD_OFFSET_GATE diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 811183de1ef5..79a6700b7162 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -308,7 +308,7 @@ static inline void SetPageUptodate(struct page *page) { #ifdef CONFIG_S390 if (!test_and_set_bit(PG_uptodate, &page->flags)) - page_clear_dirty(page, 0); + page_set_storage_key(page_to_pfn(page), PAGE_DEFAULT_KEY, 0); #else /* * Memory barrier must be issued before setting the PG_uptodate bit, diff --git a/mm/rmap.c b/mm/rmap.c index 8da044a1db0f..522e4a93cadd 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -719,7 +719,7 @@ int page_referenced(struct page *page, unlock_page(page); } out: - if (page_test_and_clear_young(page)) + if (page_test_and_clear_young(page_to_pfn(page))) referenced++; return referenced; @@ -785,10 +785,8 @@ int page_mkclean(struct page *page) struct address_space *mapping = page_mapping(page); if (mapping) { ret = page_mkclean_file(mapping, page); - if (page_test_dirty(page)) { - page_clear_dirty(page, 1); + if (page_test_and_clear_dirty(page_to_pfn(page), 1)) ret = 1; - } } } @@ -981,10 +979,9 @@ void page_remove_rmap(struct page *page) * not if it's in swapcache - there might be another pte slot * containing the swap entry, but page not yet written to swap. */ - if ((!PageAnon(page) || PageSwapCache(page)) && page_test_dirty(page)) { - page_clear_dirty(page, 1); + if ((!PageAnon(page) || PageSwapCache(page)) && + page_test_and_clear_dirty(page_to_pfn(page), 1)) set_page_dirty(page); - } /* * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED * and not charged by memcg for now. From b2fa47e6bf5148aa6dbf22ec79f18141b421eeba Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 May 2011 10:24:40 +0200 Subject: [PATCH 19/29] [S390] refactor page table functions for better pgste support Rework the architecture page table functions to access the bits in the page table extension array (pgste). There are a number of changes: 1) Fix missing pgste update if the attach_count for the mm is <= 1. 2) For every operation that affects the invalid bit in the pte or the rcp byte in the pgste the pcl lock needs to be acquired. The function pgste_get_lock gets the pcl lock and returns the current pgste value for a pte pointer. The function pgste_set_unlock stores the pgste and releases the lock. Between these two calls the bits in the pgste can be shuffled. 3) Define two software bits in the pte _PAGE_SWR and _PAGE_SWC to avoid calling SetPageDirty and SetPageReferenced from pgtable.h. If the host reference backup bit or the host change backup bit has been set the dirty/referenced state is transfered to the pte. The common code will pick up the state from the pte. 4) Add ptep_modify_prot_start and ptep_modify_prot_commit for mprotect. 5) Remove pgd_populate_kernel, pud_populate_kernel, pmd_populate_kernel pgd_clear_kernel, pud_clear_kernel, pmd_clear_kernel and ptep_invalidate. 6) Rename kvm_s390_test_and_clear_page_dirty to ptep_test_and_clear_user_dirty and add ptep_test_and_clear_user_young. 7) Define mm_exclusive() and mm_has_pgste() helper to improve readability. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 6 +- arch/s390/include/asm/page.h | 4 + arch/s390/include/asm/pgalloc.h | 29 +- arch/s390/include/asm/pgtable.h | 510 +++++++++++++++++++------------- arch/s390/mm/init.c | 3 +- arch/s390/mm/pageattr.c | 2 +- arch/s390/mm/vmem.c | 14 +- 7 files changed, 333 insertions(+), 235 deletions(-) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 818e8298a6bd..82d0847896a0 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -9,8 +9,10 @@ typedef struct { unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - int has_pgste; /* The mmu context has extended page tables */ - int alloc_pgste; /* cloned contexts will have extended page tables */ + /* Cloned contexts will be created with extended page tables. */ + unsigned int alloc_pgste:1; + /* The mmu context has extended page tables. */ + unsigned int has_pgste:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 81ee2776088d..accb372ddc7e 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -90,6 +90,7 @@ static inline void copy_page(void *to, void *from) */ typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct { unsigned long pgste; } pgste_t; typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pud; } pud_t; @@ -97,13 +98,16 @@ typedef struct { unsigned long pgd; } pgd_t; typedef pte_t *pgtable_t; #define pgprot_val(x) ((x).pgprot) +#define pgste_val(x) ((x).pgste) #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) #define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) +#define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 739ff9ec1395..f6314af3b354 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -65,10 +65,7 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) #define pmd_free(mm, x) do { } while (0) #define pgd_populate(mm, pgd, pud) BUG() -#define pgd_populate_kernel(mm, pgd, pud) BUG() - #define pud_populate(mm, pud, pmd) BUG() -#define pud_populate_kernel(mm, pud, pmd) BUG() #else /* __s390x__ */ @@ -102,26 +99,14 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) } #define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd) -static inline void pgd_populate_kernel(struct mm_struct *mm, - pgd_t *pgd, pud_t *pud) +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) -{ - pgd_populate_kernel(mm, pgd, pud); -} - -static inline void pud_populate_kernel(struct mm_struct *mm, - pud_t *pud, pmd_t *pmd) -{ - pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); -} - static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_populate_kernel(mm, pud, pmd); + pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); } #endif /* __s390x__ */ @@ -134,17 +119,13 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) } #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) -static inline void pmd_populate_kernel(struct mm_struct *mm, - pmd_t *pmd, pte_t *pte) +static inline void pmd_populate(struct mm_struct *mm, + pmd_t *pmd, pgtable_t pte) { pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); } -static inline void pmd_populate(struct mm_struct *mm, - pmd_t *pmd, pgtable_t pte) -{ - pmd_populate_kernel(mm, pmd, pte); -} +#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) #define pmd_pgtable(pmd) \ (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4ca4dd2b329a..c4773a2ef3d3 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -31,9 +31,8 @@ #ifndef __ASSEMBLY__ #include #include -#include #include -#include +#include extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); extern void paging_init(void); @@ -243,11 +242,13 @@ extern unsigned long VMALLOC_START; /* Software bits in the page table entry */ #define _PAGE_SWT 0x001 /* SW pte type bit t */ #define _PAGE_SWX 0x002 /* SW pte type bit x */ -#define _PAGE_SPECIAL 0x004 /* SW associated with special page */ +#define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */ +#define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */ +#define _PAGE_SPECIAL 0x010 /* SW associated with special page */ #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR) /* Six different types of pages. */ #define _PAGE_TYPE_EMPTY 0x400 @@ -293,14 +294,17 @@ extern unsigned long VMALLOC_START; */ /* Page status table bits for virtualization */ -#define RCP_PCL_BIT 55 -#define RCP_HR_BIT 54 -#define RCP_HC_BIT 53 -#define RCP_GR_BIT 50 -#define RCP_GC_BIT 49 +#define RCP_ACC_BITS 0xf000000000000000UL +#define RCP_FP_BIT 0x0800000000000000UL +#define RCP_PCL_BIT 0x0080000000000000UL +#define RCP_HR_BIT 0x0040000000000000UL +#define RCP_HC_BIT 0x0020000000000000UL +#define RCP_GR_BIT 0x0004000000000000UL +#define RCP_GC_BIT 0x0002000000000000UL -/* User dirty bit for KVM's migration feature */ -#define KVM_UD_BIT 47 +/* User dirty / referenced bit for KVM's migration feature */ +#define KVM_UR_BIT 0x0000800000000000UL +#define KVM_UC_BIT 0x0000400000000000UL #ifndef __s390x__ @@ -407,17 +411,20 @@ extern unsigned long VMALLOC_START; #define __S110 PAGE_RW #define __S111 PAGE_RW -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) +static inline int mm_exclusive(struct mm_struct *mm) { - *ptep = entry; + return likely(mm == current->active_mm && + atomic_read(&mm->context.attach_count) <= 1); } +static inline int mm_has_pgste(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (unlikely(mm->context.has_pgste)) + return 1; +#endif + return 0; +} /* * pgd/pmd/pte query functions */ @@ -530,50 +537,127 @@ static inline int pte_special(pte_t pte) } #define __HAVE_ARCH_PTE_SAME -#define pte_same(a,b) (pte_val(a) == pte_val(b)) - -static inline void rcp_lock(pte_t *ptep) +static inline int pte_same(pte_t a, pte_t b) { -#ifdef CONFIG_PGSTE - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - preempt_disable(); - while (test_and_set_bit(RCP_PCL_BIT, pgste)) - ; -#endif + return pte_val(a) == pte_val(b); } -static inline void rcp_unlock(pte_t *ptep) +static inline pgste_t pgste_get_lock(pte_t *ptep) +{ + unsigned long new = 0; +#ifdef CONFIG_PGSTE + unsigned long old; + + preempt_disable(); + asm( + " lg %0,%2\n" + "0: lgr %1,%0\n" + " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ + " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ + " csg %0,%1,%2\n" + " jl 0b\n" + : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) + : "Q" (ptep[PTRS_PER_PTE]) : "cc"); +#endif + return __pgste(new); +} + +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - clear_bit(RCP_PCL_BIT, pgste); + asm( + " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ + " stg %1,%0\n" + : "=Q" (ptep[PTRS_PER_PTE]) + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc"); preempt_enable(); #endif } -#include - -static inline void ptep_rcp_copy(pte_t *ptep) +static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - struct page *page = virt_to_page(pte_val(*ptep)); - unsigned int skey; - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + unsigned long pfn, bits; + unsigned char skey; - skey = page_get_storage_key(pte_val(*ptep) >> PAGE_SHIFT); - if (skey & _PAGE_CHANGED) { - set_bit_simple(RCP_GC_BIT, pgste); - set_bit_simple(KVM_UD_BIT, pgste); + pfn = pte_val(*ptep) >> PAGE_SHIFT; + skey = page_get_storage_key(pfn); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Clear page changed & referenced bit in the storage key */ + if (bits) { + skey ^= bits; + page_set_storage_key(pfn, skey, 1); } - if (skey & _PAGE_REFERENCED) - set_bit_simple(RCP_GR_BIT, pgste); - if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { - SetPageDirty(page); - set_bit_simple(KVM_UD_BIT, pgste); - } - if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) - SetPageReferenced(page); + /* Transfer page changed & referenced bit to guest bits in pgste */ + pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ + /* Get host changed & referenced bits from pgste */ + bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; + /* Clear host bits in pgste. */ + pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); + pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); + /* Copy page access key and fetch protection bit to pgste */ + pgste_val(pgste) |= + (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + /* Transfer changed and referenced to kvm user bits */ + pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ + /* Transfer changed & referenced to pte sofware bits */ + pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */ #endif + return pgste; + +} + +static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + int young; + + young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); + /* Transfer page referenced bit to pte software bit (host view) */ + if (young || (pgste_val(pgste) & RCP_HR_BIT)) + pte_val(*ptep) |= _PAGE_SWR; + /* Clear host referenced bit in pgste. */ + pgste_val(pgste) &= ~RCP_HR_BIT; + /* Transfer page referenced bit to guest bit in pgste */ + pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */ +#endif + return pgste; + +} + +static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + unsigned long pfn; + unsigned long okey, nkey; + + pfn = pte_val(*ptep) >> PAGE_SHIFT; + okey = nkey = page_get_storage_key(pfn); + nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); + /* Set page access key and fetch protection bit from pgste */ + nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; + if (okey != nkey) + page_set_storage_key(pfn, nkey, 1); +#endif +} + +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + pgste_t pgste; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_set_pte(ptep, pgste); + *ptep = entry; + pgste_set_unlock(ptep, pgste); + } else + *ptep = entry; } /* @@ -587,19 +671,19 @@ static inline int pte_write(pte_t pte) static inline int pte_dirty(pte_t pte) { - /* A pte is neither clean nor dirty on s/390. The dirty bit - * is in the storage key. See page_test_and_clear_dirty for - * details. - */ +#ifdef CONFIG_PGSTE + if (pte_val(pte) & _PAGE_SWC) + return 1; +#endif return 0; } static inline int pte_young(pte_t pte) { - /* A pte is neither young nor old on s/390. The young bit - * is in the storage key. See page_test_and_clear_young for - * details. - */ +#ifdef CONFIG_PGSTE + if (pte_val(pte) & _PAGE_SWR) + return 1; +#endif return 0; } @@ -607,46 +691,27 @@ static inline int pte_young(pte_t pte) * pgd/pmd/pte modification functions */ -#ifndef __s390x__ - -#define pgd_clear(pgd) do { } while (0) -#define pud_clear(pud) do { } while (0) - -#else /* __s390x__ */ - -static inline void pgd_clear_kernel(pgd_t * pgd) +static inline void pgd_clear(pgd_t *pgd) { +#ifdef __s390x__ if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; -} - -static inline void pgd_clear(pgd_t * pgd) -{ - pgd_clear_kernel(pgd); -} - -static inline void pud_clear_kernel(pud_t *pud) -{ - if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - pud_val(*pud) = _REGION3_ENTRY_EMPTY; +#endif } static inline void pud_clear(pud_t *pud) { - pud_clear_kernel(pud); +#ifdef __s390x__ + if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pud_val(*pud) = _REGION3_ENTRY_EMPTY; +#endif } -#endif /* __s390x__ */ -static inline void pmd_clear_kernel(pmd_t * pmdp) +static inline void pmd_clear(pmd_t *pmdp) { pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; } -static inline void pmd_clear(pmd_t *pmd) -{ - pmd_clear_kernel(pmd); -} - static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_val(*ptep) = _PAGE_TYPE_EMPTY; @@ -679,35 +744,27 @@ static inline pte_t pte_mkwrite(pte_t pte) static inline pte_t pte_mkclean(pte_t pte) { - /* The only user of pte_mkclean is the fork() code. - We must *not* clear the *physical* page dirty bit - just because fork() wants to clear the dirty bit in - *one* of the page's mappings. So we just do nothing. */ +#ifdef CONFIG_PGSTE + pte_val(pte) &= ~_PAGE_SWC; +#endif return pte; } static inline pte_t pte_mkdirty(pte_t pte) { - /* We do not explicitly set the dirty bit because the - * sske instruction is slow. It is faster to let the - * next instruction set the dirty bit. - */ return pte; } static inline pte_t pte_mkold(pte_t pte) { - /* S/390 doesn't keep its dirty/referenced bit in the pte. - * There is no point in clearing the real referenced bit. - */ +#ifdef CONFIG_PGSTE + pte_val(pte) &= ~_PAGE_SWR; +#endif return pte; } static inline pte_t pte_mkyoung(pte_t pte) { - /* S/390 doesn't keep its dirty/referenced bit in the pte. - * There is no point in setting the real referenced bit. - */ return pte; } @@ -745,64 +802,60 @@ static inline pte_t pte_mkhuge(pte_t pte) } #endif -#ifdef CONFIG_PGSTE /* - * Get (and clear) the user dirty bit for a PTE. + * Get (and clear) the user dirty bit for a pte. */ -static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, - pte_t *ptep) +static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, + pte_t *ptep) { - int dirty; - unsigned long *pgste; - unsigned long pfn; - struct page *page; - unsigned int skey; + pgste_t pgste; + int dirty = 0; - if (!mm->context.has_pgste) - return -EINVAL; - rcp_lock(ptep); - pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - pfn = pte_val(*ptep) >> PAGE_SHIFT; - page = pfn_to_page(pfn); - skey = page_get_storage_key(pfn); - if (skey & _PAGE_CHANGED) { - set_bit_simple(RCP_GC_BIT, pgste); - set_bit_simple(KVM_UD_BIT, pgste); + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_all(ptep, pgste); + dirty = !!(pgste_val(pgste) & KVM_UC_BIT); + pgste_val(pgste) &= ~KVM_UC_BIT; + pgste_set_unlock(ptep, pgste); + return dirty; } - if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { - SetPageDirty(page); - set_bit_simple(KVM_UD_BIT, pgste); - } - dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); - if (skey & _PAGE_CHANGED) - page_set_storage_key(pfn, skey & ~_PAGE_CHANGED, 1); - rcp_unlock(ptep); return dirty; } -#endif + +/* + * Get (and clear) the user referenced bit for a pte. + */ +static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, + pte_t *ptep) +{ + pgste_t pgste; + int young = 0; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_young(ptep, pgste); + young = !!(pgste_val(pgste) & KVM_UR_BIT); + pgste_val(pgste) &= ~KVM_UR_BIT; + pgste_set_unlock(ptep, pgste); + } + return young; +} #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { -#ifdef CONFIG_PGSTE - unsigned long pfn; - int young; - unsigned long *pgste; + pgste_t pgste; + pte_t pte; - if (!vma->vm_mm->context.has_pgste) - return 0; - pfn = pte_val(*ptep) >> PAGE_SHIFT; - pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - - young = ((page_get_storage_key(pfn) & _PAGE_REFERENCED) != 0); - rcp_lock(ptep); - if (young) - set_bit_simple(RCP_GR_BIT, pgste); - young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste); - rcp_unlock(ptep); - return young; -#endif + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_young(ptep, pgste); + pte = *ptep; + *ptep = pte_mkold(pte); + pgste_set_unlock(ptep, pgste); + return pte_young(pte); + } return 0; } @@ -814,10 +867,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, * On s390 reference bits are in storage key and never in TLB * With virtualization we handle the reference bit, without we * we can simply return */ -#ifdef CONFIG_PGSTE return ptep_test_and_clear_young(vma, address, ptep); -#endif - return 0; } static inline void __ptep_ipte(unsigned long address, pte_t *ptep) @@ -837,21 +887,6 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) } } -static inline void ptep_invalidate(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - if (mm->context.has_pgste) { - rcp_lock(ptep); - __ptep_ipte(address, ptep); - ptep_rcp_copy(ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; - rcp_unlock(ptep); - return; - } - __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; -} - /* * This is hard to understand. ptep_get_and_clear and ptep_clear_flush * both clear the TLB for the unmapped pte. The reason is that @@ -866,24 +901,72 @@ static inline void ptep_invalidate(struct mm_struct *mm, * is a nop. */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define ptep_get_and_clear(__mm, __address, __ptep) \ -({ \ - pte_t __pte = *(__ptep); \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - ptep_invalidate(__mm, __address, __ptep); \ - else \ - pte_clear((__mm), (__address), (__ptep)); \ - __pte; \ -}) +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, + unsigned long address, + pte_t *ptep) +{ + pte_t pte; + + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste_get_lock(ptep); + + pte = *ptep; + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + return pte; +} + +static inline void ptep_modify_prot_commit(struct mm_struct *mm, + unsigned long address, + pte_t *ptep, pte_t pte) +{ + *ptep = pte; + if (mm_has_pgste(mm)) + pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE)); +} #define __HAVE_ARCH_PTEP_CLEAR_FLUSH static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - pte_t pte = *ptep; - ptep_invalidate(vma->vm_mm, address, ptep); + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(vma->vm_mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } return pte; } @@ -896,41 +979,68 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, - unsigned long addr, + unsigned long address, pte_t *ptep, int full) { - pte_t pte = *ptep; + pgste_t pgste; + pte_t pte; - if (full) - pte_clear(mm, addr, ptep); - else - ptep_invalidate(mm, addr, ptep); + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + if (!full) + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } return pte; } #define __HAVE_ARCH_PTEP_SET_WRPROTECT -#define ptep_set_wrprotect(__mm, __addr, __ptep) \ -({ \ - pte_t __pte = *(__ptep); \ - if (pte_write(__pte)) { \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - ptep_invalidate(__mm, __addr, __ptep); \ - set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \ - } \ -}) +static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte = *ptep; + + if (pte_write(pte)) { + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + *ptep = pte_wrprotect(pte); + + if (mm_has_pgste(mm)) + pgste_set_unlock(ptep, pgste); + } + return pte; +} #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ -({ \ - int __changed = !pte_same(*(__ptep), __entry); \ - if (__changed) { \ - ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ - set_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ - } \ - __changed; \ -}) +static inline int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) +{ + pgste_t pgste; + + if (pte_same(*ptep, entry)) + return 0; + if (mm_has_pgste(vma->vm_mm)) + pgste = pgste_get_lock(ptep); + + __ptep_ipte(address, ptep); + *ptep = entry; + + if (mm_has_pgste(vma->vm_mm)) + pgste_set_unlock(ptep, pgste); + return 1; +} /* * Conversion functions: convert a page and protection to a page entry, diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index bb409332a484..dfefc2171691 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -175,7 +175,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); if (!enable) { - ptep_invalidate(&init_mm, address, pte); + __ptep_ipte(address, pte); + pte_val(*pte) = _PAGE_TYPE_EMPTY; continue; } *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW)); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index f05edcc3beff..d013ed39743b 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -28,7 +28,7 @@ static void change_page_attr(unsigned long addr, int numpages, pte = *ptep; pte = set(pte); - ptep_invalidate(&init_mm, addr, ptep); + __ptep_ipte(addr, ptep); *ptep = pte; addr += PAGE_SIZE; } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 34c43f23b28c..8c1970d1dd91 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -95,7 +95,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate_kernel(&init_mm, pg_dir, pu_dir); + pgd_populate(&init_mm, pg_dir, pu_dir); } pu_dir = pud_offset(pg_dir, address); @@ -103,7 +103,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pm_dir = vmem_pmd_alloc(); if (!pm_dir) goto out; - pud_populate_kernel(&init_mm, pu_dir, pm_dir); + pud_populate(&init_mm, pu_dir, pm_dir); } pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); @@ -123,7 +123,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pt_dir = vmem_pte_alloc(); if (!pt_dir) goto out; - pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + pmd_populate(&init_mm, pm_dir, pt_dir); } pt_dir = pte_offset_kernel(pm_dir, address); @@ -159,7 +159,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) continue; if (pmd_huge(*pm_dir)) { - pmd_clear_kernel(pm_dir); + pmd_clear(pm_dir); address += HPAGE_SIZE - PAGE_SIZE; continue; } @@ -192,7 +192,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate_kernel(&init_mm, pg_dir, pu_dir); + pgd_populate(&init_mm, pg_dir, pu_dir); } pu_dir = pud_offset(pg_dir, address); @@ -200,7 +200,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) pm_dir = vmem_pmd_alloc(); if (!pm_dir) goto out; - pud_populate_kernel(&init_mm, pu_dir, pm_dir); + pud_populate(&init_mm, pu_dir, pm_dir); } pm_dir = pmd_offset(pu_dir, address); @@ -208,7 +208,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) pt_dir = vmem_pte_alloc(); if (!pt_dir) goto out; - pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + pmd_populate(&init_mm, pm_dir, pt_dir); } pt_dir = pte_offset_kernel(pm_dir, address); From fca894edd7566f5c548598c8fad7f329278c23b4 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 23 May 2011 10:24:41 +0200 Subject: [PATCH 20/29] [S390] chsc: process channel-path-availability information Update affected channel path descriptors when receiving channel path availability information. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 0689fcf23a11..75c3f1f8fd43 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -326,6 +326,36 @@ static void chsc_process_sei_res_acc(struct chsc_sei_area *sei_area) s390_process_res_acc(&link); } +static void chsc_process_sei_chp_avail(struct chsc_sei_area *sei_area) +{ + struct channel_path *chp; + struct chp_id chpid; + u8 *data; + int num; + + CIO_CRW_EVENT(4, "chsc: channel path availability information\n"); + if (sei_area->rs != 0) + return; + data = sei_area->ccdf; + chp_id_init(&chpid); + for (num = 0; num <= __MAX_CHPID; num++) { + if (!chp_test_bit(data, num)) + continue; + chpid.id = num; + + CIO_CRW_EVENT(4, "Update information for channel path " + "%x.%02x\n", chpid.cssid, chpid.id); + chp = chpid_to_chp(chpid); + if (!chp) { + chp_new(chpid); + continue; + } + mutex_lock(&chp->lock); + chsc_determine_base_channel_path_desc(chpid, &chp->desc); + mutex_unlock(&chp->lock); + } +} + struct chp_config_data { u8 map[32]; u8 op; @@ -376,9 +406,12 @@ static void chsc_process_sei(struct chsc_sei_area *sei_area) case 1: /* link incident*/ chsc_process_sei_link_incident(sei_area); break; - case 2: /* i/o resource accessibiliy */ + case 2: /* i/o resource accessibility */ chsc_process_sei_res_acc(sei_area); break; + case 7: /* channel-path-availability information */ + chsc_process_sei_chp_avail(sei_area); + break; case 8: /* channel-path-configuration notification */ chsc_process_sei_chp_config(sei_area); break; From 30c2df51173ea4e4755ad52be7f2914f01e32404 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 May 2011 10:24:42 +0200 Subject: [PATCH 21/29] [S390] sclp: event buffer dissection Move gds vector/subvector find functions to the sclp header file. Simplify event buffer dissection in sclp tty code. Signed-off-by: Martin Schwidefsky --- drivers/s390/char/sclp.h | 22 +++++++ drivers/s390/char/sclp_tty.c | 124 +++++++++++------------------------ 2 files changed, 61 insertions(+), 85 deletions(-) diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 6bb5a6bdfab5..bc23b05bfe7d 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -186,4 +186,26 @@ sclp_ascebc_str(unsigned char *str, int nr) (MACHINE_IS_VM) ? ASCEBC(str, nr) : ASCEBC_500(str, nr); } +static inline struct gds_vector * +sclp_find_gds_vector(void *start, void *end, u16 id) +{ + struct gds_vector *v; + + for (v = start; (void *) v < end; v = (void *) v + v->length) + if (v->gds_id == id) + return v; + return NULL; +} + +static inline struct gds_subvector * +sclp_find_gds_subvector(void *start, void *end, u8 key) +{ + struct gds_subvector *sv; + + for (sv = start; (void *) sv < end; sv = (void *) sv + sv->length) + if (sv->key == key) + return sv; + return NULL; +} + #endif /* __SCLP_H__ */ diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c index 8258d590505f..a879c139926a 100644 --- a/drivers/s390/char/sclp_tty.c +++ b/drivers/s390/char/sclp_tty.c @@ -408,118 +408,72 @@ static int sclp_switch_cases(unsigned char *buf, int count) return op - buf; } -static void -sclp_get_input(unsigned char *start, unsigned char *end) +static void sclp_get_input(struct gds_subvector *sv) { + unsigned char *str; int count; - count = end - start; + str = (unsigned char *) (sv + 1); + count = sv->length - sizeof(*sv); if (sclp_tty_tolower) - EBC_TOLOWER(start, count); - count = sclp_switch_cases(start, count); + EBC_TOLOWER(str, count); + count = sclp_switch_cases(str, count); /* convert EBCDIC to ASCII (modify original input in SCCB) */ - sclp_ebcasc_str(start, count); + sclp_ebcasc_str(str, count); /* transfer input to high level driver */ - sclp_tty_input(start, count); + sclp_tty_input(str, count); } -static inline struct gds_vector * -find_gds_vector(struct gds_vector *start, struct gds_vector *end, u16 id) +static inline void sclp_eval_selfdeftextmsg(struct gds_subvector *sv) { - struct gds_vector *vec; + void *end; - for (vec = start; vec < end; vec = (void *) vec + vec->length) - if (vec->gds_id == id) - return vec; - return NULL; + end = (void *) sv + sv->length; + for (sv = sv + 1; (void *) sv < end; sv = (void *) sv + sv->length) + if (sv->key == 0x30) + sclp_get_input(sv); } -static inline struct gds_subvector * -find_gds_subvector(struct gds_subvector *start, - struct gds_subvector *end, u8 key) +static inline void sclp_eval_textcmd(struct gds_vector *v) { - struct gds_subvector *subvec; + struct gds_subvector *sv; + void *end; + + end = (void *) v + v->length; + for (sv = (struct gds_subvector *) (v + 1); + (void *) sv < end; sv = (void *) sv + sv->length) + if (sv->key == GDS_KEY_SELFDEFTEXTMSG) + sclp_eval_selfdeftextmsg(sv); - for (subvec = start; subvec < end; - subvec = (void *) subvec + subvec->length) - if (subvec->key == key) - return subvec; - return NULL; } -static inline void -sclp_eval_selfdeftextmsg(struct gds_subvector *start, - struct gds_subvector *end) +static inline void sclp_eval_cpmsu(struct gds_vector *v) { - struct gds_subvector *subvec; + void *end; - subvec = start; - while (subvec < end) { - subvec = find_gds_subvector(subvec, end, 0x30); - if (!subvec) - break; - sclp_get_input((unsigned char *)(subvec + 1), - (unsigned char *) subvec + subvec->length); - subvec = (void *) subvec + subvec->length; - } -} - -static inline void -sclp_eval_textcmd(struct gds_subvector *start, - struct gds_subvector *end) -{ - struct gds_subvector *subvec; - - subvec = start; - while (subvec < end) { - subvec = find_gds_subvector(subvec, end, - GDS_KEY_SELFDEFTEXTMSG); - if (!subvec) - break; - sclp_eval_selfdeftextmsg((struct gds_subvector *)(subvec + 1), - (void *)subvec + subvec->length); - subvec = (void *) subvec + subvec->length; - } -} - -static inline void -sclp_eval_cpmsu(struct gds_vector *start, struct gds_vector *end) -{ - struct gds_vector *vec; - - vec = start; - while (vec < end) { - vec = find_gds_vector(vec, end, GDS_ID_TEXTCMD); - if (!vec) - break; - sclp_eval_textcmd((struct gds_subvector *)(vec + 1), - (void *) vec + vec->length); - vec = (void *) vec + vec->length; - } + end = (void *) v + v->length; + for (v = v + 1; (void *) v < end; v = (void *) v + v->length) + if (v->gds_id == GDS_ID_TEXTCMD) + sclp_eval_textcmd(v); } -static inline void -sclp_eval_mdsmu(struct gds_vector *start, void *end) +static inline void sclp_eval_mdsmu(struct gds_vector *v) { - struct gds_vector *vec; - - vec = find_gds_vector(start, end, GDS_ID_CPMSU); - if (vec) - sclp_eval_cpmsu(vec + 1, (void *) vec + vec->length); + v = sclp_find_gds_vector(v + 1, (void *) v + v->length, GDS_ID_CPMSU); + if (v) + sclp_eval_cpmsu(v); } -static void -sclp_tty_receiver(struct evbuf_header *evbuf) +static void sclp_tty_receiver(struct evbuf_header *evbuf) { - struct gds_vector *start, *end, *vec; + struct gds_vector *v; - start = (struct gds_vector *)(evbuf + 1); - end = (void *) evbuf + evbuf->length; - vec = find_gds_vector(start, end, GDS_ID_MDSMU); - if (vec) - sclp_eval_mdsmu(vec + 1, (void *) vec + vec->length); + v = sclp_find_gds_vector(evbuf + 1, (void *) evbuf + evbuf->length, + GDS_ID_MDSMU); + if (v) + sclp_eval_mdsmu(v); } static void From 7eb9d5bec552eff896ebf079386dc47e9dc2fc89 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 May 2011 10:24:43 +0200 Subject: [PATCH 22/29] [S390] get CPC image name Provide sysfs attributes that contain the CPC name and the HMC network name of the machine the operating system is running on. This information is retrieved with the operation communication parameters (OCF) sclp interface. Signed-off-by: Martin Schwidefsky --- drivers/s390/char/Makefile | 2 +- drivers/s390/char/sclp.h | 2 + drivers/s390/char/sclp_ocf.c | 145 +++++++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 drivers/s390/char/sclp_ocf.c diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index af01b5abd069..f3c325207445 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -3,7 +3,7 @@ # obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \ - sclp_cmd.o sclp_config.o sclp_cpi_sys.o + sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o obj-$(CONFIG_TN3270) += raw3270.o obj-$(CONFIG_TN3270_CONSOLE) += con3270.o diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index bc23b05bfe7d..49a1bb52bc87 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -28,6 +28,7 @@ #define EVTYP_CONFMGMDATA 0x04 #define EVTYP_SDIAS 0x1C #define EVTYP_ASYNC 0x0A +#define EVTYP_OCF 0x1E #define EVTYP_OPCMD_MASK 0x80000000 #define EVTYP_MSG_MASK 0x40000000 @@ -40,6 +41,7 @@ #define EVTYP_CONFMGMDATA_MASK 0x10000000 #define EVTYP_SDIAS_MASK 0x00000010 #define EVTYP_ASYNC_MASK 0x00400000 +#define EVTYP_OCF_MASK 0x00000004 #define GNRLMSGFLGS_DOM 0x8000 #define GNRLMSGFLGS_SNDALRM 0x4000 diff --git a/drivers/s390/char/sclp_ocf.c b/drivers/s390/char/sclp_ocf.c new file mode 100644 index 000000000000..ab294d5a534e --- /dev/null +++ b/drivers/s390/char/sclp_ocf.c @@ -0,0 +1,145 @@ +/* + * drivers/s390/char/sclp_ocf.c + * SCLP OCF communication parameters sysfs interface + * + * Copyright IBM Corp. 2011 + * Author(s): Martin Schwidefsky + */ + +#define KMSG_COMPONENT "sclp_ocf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sclp.h" + +#define OCF_LENGTH_HMC_NETWORK 8UL +#define OCF_LENGTH_CPC_NAME 8UL + +static char hmc_network[OCF_LENGTH_HMC_NETWORK + 1]; +static char cpc_name[OCF_LENGTH_CPC_NAME + 1]; + +static DEFINE_SPINLOCK(sclp_ocf_lock); +static struct work_struct sclp_ocf_change_work; + +static struct kset *ocf_kset; + +static void sclp_ocf_change_notify(struct work_struct *work) +{ + kobject_uevent(&ocf_kset->kobj, KOBJ_CHANGE); +} + +/* Handler for OCF event. Look for the CPC image name. */ +static void sclp_ocf_handler(struct evbuf_header *evbuf) +{ + struct gds_vector *v; + struct gds_subvector *sv, *netid, *cpc; + size_t size; + + /* Find the 0x9f00 block. */ + v = sclp_find_gds_vector(evbuf + 1, (void *) evbuf + evbuf->length, + 0x9f00); + if (!v) + return; + /* Find the 0x9f22 block inside the 0x9f00 block. */ + v = sclp_find_gds_vector(v + 1, (void *) v + v->length, 0x9f22); + if (!v) + return; + /* Find the 0x81 block inside the 0x9f22 block. */ + sv = sclp_find_gds_subvector(v + 1, (void *) v + v->length, 0x81); + if (!sv) + return; + /* Find the 0x01 block inside the 0x81 block. */ + netid = sclp_find_gds_subvector(sv + 1, (void *) sv + sv->length, 1); + /* Find the 0x02 block inside the 0x81 block. */ + cpc = sclp_find_gds_subvector(sv + 1, (void *) sv + sv->length, 2); + /* Copy network name and cpc name. */ + spin_lock(&sclp_ocf_lock); + if (netid) { + size = min(OCF_LENGTH_HMC_NETWORK, (size_t) netid->length); + memcpy(hmc_network, netid + 1, size); + EBCASC(hmc_network, size); + hmc_network[size] = 0; + } + if (cpc) { + size = min(OCF_LENGTH_CPC_NAME, (size_t) cpc->length); + memcpy(cpc_name, cpc + 1, size); + EBCASC(cpc_name, size); + cpc_name[size] = 0; + } + spin_unlock(&sclp_ocf_lock); + schedule_work(&sclp_ocf_change_work); +} + +static struct sclp_register sclp_ocf_event = { + .receive_mask = EVTYP_OCF_MASK, + .receiver_fn = sclp_ocf_handler, +}; + +static ssize_t cpc_name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + int rc; + + spin_lock_irq(&sclp_ocf_lock); + rc = snprintf(page, PAGE_SIZE, "%s\n", cpc_name); + spin_unlock_irq(&sclp_ocf_lock); + return rc; +} + +static struct kobj_attribute cpc_name_attr = + __ATTR(cpc_name, 0444, cpc_name_show, NULL); + +static ssize_t hmc_network_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + int rc; + + spin_lock_irq(&sclp_ocf_lock); + rc = snprintf(page, PAGE_SIZE, "%s\n", hmc_network); + spin_unlock_irq(&sclp_ocf_lock); + return rc; +} + +static struct kobj_attribute hmc_network_attr = + __ATTR(hmc_network, 0444, hmc_network_show, NULL); + +static struct attribute *ocf_attrs[] = { + &cpc_name_attr.attr, + &hmc_network_attr.attr, + NULL, +}; + +static struct attribute_group ocf_attr_group = { + .attrs = ocf_attrs, +}; + +static int __init ocf_init(void) +{ + int rc; + + INIT_WORK(&sclp_ocf_change_work, sclp_ocf_change_notify); + ocf_kset = kset_create_and_add("ocf", NULL, firmware_kobj); + if (!ocf_kset) + return -ENOMEM; + + rc = sysfs_create_group(&ocf_kset->kobj, &ocf_attr_group); + if (rc) { + kset_unregister(ocf_kset); + return rc; + } + + return sclp_register(&sclp_ocf_event); +} + +device_initcall(ocf_init); From 3af6fb687b06393c00390d8d779c5d97ced00cde Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 23 May 2011 10:24:44 +0200 Subject: [PATCH 23/29] [S390] Remove unused includes in process.c Remove unsused includes from arch/s390/kernel/process.c. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/process.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index a895e69379f7..541a7509faeb 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -9,41 +9,26 @@ #include #include -#include #include #include #include -#include #include -#include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include #include -#include -#include #include #include #include #include #include -#include -#include -#include +#include #include #include #include #include #include #include +#include #include #include "entry.h" From 9529cdc51f36ca0c87ce0246627c5951c40cbc01 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 23 May 2011 10:24:45 +0200 Subject: [PATCH 24/29] [S390] s390,oprofile: fix alert counter increment The counter for requested interrupts should be incremented if the program-request-alert bit is set and not the invalid-address-entry bit. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/oprofile/hwsampler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 33cbd373cce4..90c5a8770a99 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -764,7 +764,7 @@ static int worker_check_error(unsigned int cpu, int ext_params) if (!sdbt || !*sdbt) return -EINVAL; - if (ext_params & EI_IEA) + if (ext_params & EI_PRA) cb->req_alert++; if (ext_params & EI_LSDA) From 43a679d6c9f59d7aa20443d43d8041ea9d9aa363 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 23 May 2011 10:24:46 +0200 Subject: [PATCH 25/29] [S390] s390,oprofile: fix compile error for !CONFIG_SMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use ctl_set_bit instead of the smp_ctl_set_bit (likewise for clear bit) to prevent the following build error for !CONFIG_SMP: CC arch/s390/oprofile/hwsampler.o arch/s390/oprofile/hwsampler.c: In function ‘hwsampler_deallocate’: arch/s390/oprofile/hwsampler.c:1012: error: implicit declaration of function ‘smp_ctl_clear_bit’ arch/s390/oprofile/hwsampler.c: In function ‘hwsampler_start_all’: arch/s390/oprofile/hwsampler.c:1201: error: implicit declaration of function ‘smp_ctl_set_bit’ CC kernel/seccomp.o make[1]: *** [arch/s390/oprofile/hwsampler.o] Error 1 make: *** [arch/s390/oprofile] Error 2 Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/oprofile/hwsampler.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 90c5a8770a99..8e686bfe2959 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -1009,7 +1009,7 @@ int hwsampler_deallocate() if (hws_state != HWS_STOPPED) goto deallocate_exit; - smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ deallocate_sdbt(); hws_state = HWS_DEALLOCATED; @@ -1123,7 +1123,7 @@ int hwsampler_shutdown() mutex_lock(&hws_sem); if (hws_state == HWS_STOPPED) { - smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ deallocate_sdbt(); } if (hws_wq) { @@ -1198,7 +1198,7 @@ start_all_exit: hws_oom = 1; hws_flush_all = 0; /* now let them in, 1407 CPUMF external interrupts */ - smp_ctl_set_bit(0, 5); /* set CR0 bit 58 */ + ctl_set_bit(0, 5); /* set CR0 bit 58 */ return 0; } From bfac1d2d34990bf6ce0abf947fd5043cf412256b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:47 +0200 Subject: [PATCH 26/29] [S390] Ignore sendmmsg system call note wired up warning sendmmsg is reachable via the socket system call. We don't enable a second way on s390 to reach the same system call. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/unistd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index e82152572377..9208e69245a0 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -385,6 +385,7 @@ /* Ignore system calls that are also reachable via sys_socket */ #define __IGNORE_recvmmsg +#define __IGNORE_sendmmsg #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR From fcdd65b0e7bbbd6aef9be2f20e7c238759cf0b41 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:48 +0200 Subject: [PATCH 27/29] [S390] oprofile: add missing irq stats counter Count CPU measurement external interrupts as well. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/irq.h | 1 + arch/s390/kernel/irq.c | 1 + arch/s390/oprofile/hwsampler.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index db14a311f1d2..1544b90bd6d6 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -15,6 +15,7 @@ enum interruption_class { EXTINT_VRT, EXTINT_SCP, EXTINT_IUC, + EXTINT_CPM, IOINT_QAI, IOINT_QDI, IOINT_DAS, diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ea5099c9709c..e204f9597aaf 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -32,6 +32,7 @@ static const struct irq_class intrclass_names[] = { {.name = "VRT", .desc = "[EXT] Virtio" }, {.name = "SCP", .desc = "[EXT] Service Call" }, {.name = "IUC", .desc = "[EXT] IUCV" }, + {.name = "CPM", .desc = "[EXT] CPU Measurement" }, {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, {.name = "QDI", .desc = "[I/O] QDIO Interrupt" }, {.name = "DAS", .desc = "[I/O] DASD" }, diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 8e686bfe2959..cb4338ccbf7b 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -5,6 +5,7 @@ * Author: Heinz Graalfs */ +#include #include #include #include @@ -677,6 +678,7 @@ static void hws_ext_handler(unsigned int ext_int_code, int cpu; struct hws_cpu_buffer *cb; + kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; cpu = smp_processor_id(); cb = &per_cpu(sampler_cpu_buffer, cpu); From 5f420c5bd15c948302aafd3341903bc06895f1bb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:49 +0200 Subject: [PATCH 28/29] [S390] oprofile: dont access lowcore The external interrupt parameter is passed as function call parameter. No need to access lowcore. Signed-off-by: Heiko Carstens --- arch/s390/oprofile/hwsampler.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index cb4338ccbf7b..053caa0fd276 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -675,18 +675,11 @@ int hwsampler_activate(unsigned int cpu) static void hws_ext_handler(unsigned int ext_int_code, unsigned int param32, unsigned long param64) { - int cpu; struct hws_cpu_buffer *cb; kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; - cpu = smp_processor_id(); - cb = &per_cpu(sampler_cpu_buffer, cpu); - - atomic_xchg( - &cb->ext_params, - atomic_read(&cb->ext_params) - | S390_lowcore.ext_params); - + cb = &__get_cpu_var(sampler_cpu_buffer); + atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); if (hws_wq) queue_work(hws_wq, &cb->worker); } From 5bd418784a2764a8d9de177a5462bfc008fd334a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:50 +0200 Subject: [PATCH 29/29] [S390] cpu hotplug: fix external interrupt subclass mask handling When disabling a cpu all external interrupt subclass masks in control register 0 get cleared. However instead of the service signal subclass mask bit an unused bit got cleared. Accidently (or luckily) the service subclass mask gets cleared with the pfault_fini() call that happens just before the rest of the subclass mask bits get cleared. Signed-off-by: Heiko Carstens --- arch/s390/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index fed71dcaa0d6..f8e85ecbc459 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -654,8 +654,8 @@ int __cpu_disable(void) /* disable all external interrupts */ cr_parms.orvals[0] = 0; - cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 12 | - 1 << 11 | 1 << 10 | 1 << 6 | 1 << 4); + cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 11 | + 1 << 10 | 1 << 9 | 1 << 6 | 1 << 4); /* disable all I/O interrupts */ cr_parms.orvals[6] = 0; cr_parms.andvals[6] = ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28 |