From d590faf9e8f8509a0a0aa79c38e87fcc6b913248 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Fri, 27 Nov 2015 14:55:56 +0100
Subject: [PATCH 001/262] iio: adis_buffer: Fix out-of-bounds memory access

The SPI tx and rx buffers are both supposed to be scan_bytes amount of
bytes large and a common allocation is used to allocate both buffers. This
puts the beginning of the tx buffer scan_bytes bytes after the rx buffer.
The initialization of the tx buffer pointer is done adding scan_bytes to
the beginning of the rx buffer, but since the rx buffer is of type __be16
this will actually add two times as much and the tx buffer ends up pointing
after the allocated buffer.

Fix this by using scan_count, which is scan_bytes / 2, instead of
scan_bytes when initializing the tx buffer pointer.

Fixes: aacff892cbd5 ("staging:iio:adis: Preallocate transfer message")
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/imu/adis_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/imu/adis_buffer.c b/drivers/iio/imu/adis_buffer.c
index cb32b593f1c5..36607d52fee0 100644
--- a/drivers/iio/imu/adis_buffer.c
+++ b/drivers/iio/imu/adis_buffer.c
@@ -43,7 +43,7 @@ int adis_update_scan_mode(struct iio_dev *indio_dev,
 		return -ENOMEM;
 
 	rx = adis->buffer;
-	tx = rx + indio_dev->scan_bytes;
+	tx = rx + scan_count;
 
 	spi_message_init(&adis->msg);
 

From 1c9e3d51d5410acbc4fad70d59370ec2f13c6748 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 12 Nov 2015 16:43:48 +1100
Subject: [PATCH 002/262] KVM: PPC: Book3S HV: Handle unexpected traps in guest
 entry/exit code better

As we saw with the TM Bad Thing type of program interrupt occurring
on the hrfid that enters the guest, it is not completely impossible
to have a trap occurring in the guest entry/exit code, despite the
fact that the code has been written to avoid taking any traps.

This adds a check in the kvmppc_handle_exit_hv() function to detect
the case when a trap has occurred in the hypervisor-mode code, and
instead of treating it just like a trap in guest code, we now print
a message and return to userspace with a KVM_EXIT_INTERNAL_ERROR
exit reason.

Of the various interrupts that get handled in the assembly code in
the guest exit path and that can return directly to the guest, the
only one that can occur when MSR.HV=1 and MSR.EE=0 is machine check
(other than system call, which we can avoid just by not doing a sc
instruction).  Therefore this adds code to the machine check path to
ensure that if the MCE occurred in hypervisor mode, we exit to the
host rather than trying to continue the guest.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kvm/book3s_hv.c            | 18 ++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a29da44cdc6c..89eafd16f408 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -827,6 +827,24 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	vcpu->stat.sum_exits++;
 
+	/*
+	 * This can happen if an interrupt occurs in the last stages
+	 * of guest entry or the first stages of guest exit (i.e. after
+	 * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+	 * and before setting it to KVM_GUEST_MODE_HOST_HV).
+	 * That can happen due to a bug, or due to a machine check
+	 * occurring at just the wrong time.
+	 */
+	if (vcpu->arch.shregs.msr & MSR_HV) {
+		printk(KERN_EMERG "KVM trap in HV mode!\n");
+		printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+			vcpu->arch.trap, kvmppc_get_pc(vcpu),
+			vcpu->arch.shregs.msr);
+		kvmppc_dump_regs(vcpu);
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		run->hw.hardware_exit_reason = vcpu->arch.trap;
+		return RESUME_HOST;
+	}
 	run->exit_reason = KVM_EXIT_UNKNOWN;
 	run->ready_for_interrupt_injection = 1;
 	switch (vcpu->arch.trap) {
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 3c6badcd53ef..b3ce8ff18195 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2404,6 +2404,8 @@ machine_check_realmode:
 	 * guest as machine check causing guest to crash.
 	 */
 	ld	r11, VCPU_MSR(r9)
+	rldicl.	r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
+	bne	mc_cont			/* if so, exit to host */
 	andi.	r10, r11, MSR_RI	/* check for unrecoverable exception */
 	beq	1f			/* Deliver a machine check to guest */
 	ld	r10, VCPU_PC(r9)

From 760a7364f27d974d100118d88190e574626e18a6 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 20 Nov 2015 09:11:45 +0100
Subject: [PATCH 003/262] KVM: PPC: Fix emulation of H_SET_DABR/X on POWER8

In the old DABR register, the BT (Breakpoint Translation) bit
is bit number 61. In the new DAWRX register, the WT (Watchpoint
Translation) bit is bit number 59. So to move the DABR-BT bit
into the position of the DAWRX-WT bit, it has to be shifted by
two, not only by one. This fixes hardware watchpoints in gdb of
older guests that only use the H_SET_DABR/X interface instead
of the new H_SET_MODE interface.

Cc: stable@vger.kernel.org # v3.14+
Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b3ce8ff18195..6ee26de9a1de 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2153,7 +2153,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 	/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
 2:	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
-	rlwimi	r5, r4, 1, DAWRX_WT
+	rlwimi	r5, r4, 2, DAWRX_WT
 	clrrdi	r4, r4, 3
 	std	r4, VCPU_DAWR(r3)
 	std	r5, VCPU_DAWRX(r3)

From edfaff269f94e81b9fbaa130e0ad7776fbb523e2 Mon Sep 17 00:00:00 2001
From: "Geyslan G. Bem" <geyslan@gmail.com>
Date: Tue, 1 Dec 2015 20:42:10 -0300
Subject: [PATCH 004/262] KVM: PPC: Book3S PR: Remove unused variable
 'vcpu_book3s'

The vcpu_book3s variable is assigned but never used. So remove it.
Found using cppcheck.

Signed-off-by: Geyslan G. Bem <geyslan@gmail.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kvm/book3s_64_mmu.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 774a253ca4e1..9bf7031a67ff 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -377,15 +377,12 @@ no_seg_found:
 
 static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
 {
-	struct kvmppc_vcpu_book3s *vcpu_book3s;
 	u64 esid, esid_1t;
 	int slb_nr;
 	struct kvmppc_slb *slbe;
 
 	dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
 
-	vcpu_book3s = to_book3s(vcpu);
-
 	esid = GET_ESID(rb);
 	esid_1t = GET_ESID_1T(rb);
 	slb_nr = rb & 0xfff;

From 696066f875bc86d0a9cdcf9cbc4846d89b1f38db Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Wed, 9 Dec 2015 11:34:07 +0100
Subject: [PATCH 005/262] KVM: PPC: Increase memslots to 512

Only using 32 memslots for KVM on powerpc is way too low, you can
nowadays hit this limit quite fast by adding a couple of PCI devices
and/or pluggable memory DIMMs to the guest.

x86 already increased the KVM_USER_MEM_SLOTS to 509, to satisfy 256
pluggable DIMM slots, 3 private slots and 253 slots for other things
like PCI devices (i.e. resulting in 256 + 3 + 253 = 512 slots in
total). We should do something similar for powerpc, and since we do
not use private slots here, we can set the value to 512 directly.

While we're at it, also remove the KVM_MEM_SLOTS_NUM definition
from the powerpc-specific header since this gets defined in the
generic kvm_host.h header anyway.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/kvm_host.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index cfa758c6b4f6..8c8f2435a13d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -38,8 +38,7 @@
 
 #define KVM_MAX_VCPUS		NR_CPUS
 #define KVM_MAX_VCORES		NR_CPUS
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
+#define KVM_USER_MEM_SLOTS	512
 
 #ifdef CONFIG_KVM_MMIO
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1

From b4d7f161feb3015d6306e1d35b565c888ff70c9d Mon Sep 17 00:00:00 2001
From: Greg Kurz <gkurz@linux.vnet.ibm.com>
Date: Wed, 13 Jan 2016 18:28:17 +0100
Subject: [PATCH 006/262] KVM: PPC: Fix ONE_REG AltiVec support

The get and set operations got exchanged by mistake when moving the
code from book3s.c to powerpc.c.

Fixes: 3840edc8033ad5b86deee309c1c321ca54257452
Cc: stable@vger.kernel.org # 3.18+
Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kvm/powerpc.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6fd2405c7f4a..a3b182dcb823 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -919,21 +919,17 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 				r = -ENXIO;
 				break;
 			}
-			vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+			val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
 			break;
 		case KVM_REG_PPC_VSCR:
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 				r = -ENXIO;
 				break;
 			}
-			vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+			val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
 			break;
 		case KVM_REG_PPC_VRSAVE:
-			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-				r = -ENXIO;
-				break;
-			}
-			vcpu->arch.vrsave = set_reg_val(reg->id, val);
+			val = get_reg_val(reg->id, vcpu->arch.vrsave);
 			break;
 #endif /* CONFIG_ALTIVEC */
 		default:
@@ -974,17 +970,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 				r = -ENXIO;
 				break;
 			}
-			val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+			vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
 			break;
 		case KVM_REG_PPC_VSCR:
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 				r = -ENXIO;
 				break;
 			}
-			val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+			vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
 			break;
 		case KVM_REG_PPC_VRSAVE:
-			val = get_reg_val(reg->id, vcpu->arch.vrsave);
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			vcpu->arch.vrsave = set_reg_val(reg->id, val);
 			break;
 #endif /* CONFIG_ALTIVEC */
 		default:

From 7717c6be699975f6733d278b13b7c4295d73caf6 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 13 Jan 2016 15:48:54 -0500
Subject: [PATCH 007/262] tracing: Fix stacktrace skip depth in
 trace_buffer_unlock_commit_regs()

While cleaning the stacktrace code I unintentially changed the skip depth of
trace_buffer_unlock_commit_regs() from 0 to 6. kprobes uses this function,
and with skipping 6 call backs, it can easily produce no stack.

Here's how I tested it:

 # echo 'p:ext4_sync_fs ext4_sync_fs ' > /sys/kernel/debug/tracing/kprobe_events
 # echo 1 > /sys/kernel/debug/tracing/events/kprobes/enable
 # cat /sys/kernel/debug/trace
            sync-2394  [005]   502.457060: ext4_sync_fs: (ffffffff81317650)
            sync-2394  [005]   502.457063: kernel_stack:         <stack trace>
            sync-2394  [005]   502.457086: ext4_sync_fs: (ffffffff81317650)
            sync-2394  [005]   502.457087: kernel_stack:         <stack trace>
            sync-2394  [005]   502.457091: ext4_sync_fs: (ffffffff81317650)

After putting back the skip stack to zero, we have:

            sync-2270  [000]   748.052693: ext4_sync_fs: (ffffffff81317650)
            sync-2270  [000]   748.052695: kernel_stack:         <stack trace>
 => iterate_supers (ffffffff8126412e)
 => sys_sync (ffffffff8129c4b6)
 => entry_SYSCALL_64_fastpath (ffffffff8181f0b2)
            sync-2270  [000]   748.053017: ext4_sync_fs: (ffffffff81317650)
            sync-2270  [000]   748.053019: kernel_stack:         <stack trace>
 => iterate_supers (ffffffff8126412e)
 => sys_sync (ffffffff8129c4b6)
 => entry_SYSCALL_64_fastpath (ffffffff8181f0b2)
            sync-2270  [000]   748.053381: ext4_sync_fs: (ffffffff81317650)
            sync-2270  [000]   748.053383: kernel_stack:         <stack trace>
 => iterate_supers (ffffffff8126412e)
 => sys_sync (ffffffff8129c4b6)
 => entry_SYSCALL_64_fastpath (ffffffff8181f0b2)

Cc: stable@vger.kernel.org # v4.4+
Fixes: 73dddbb57bb0 "tracing: Only create stacktrace option when STACKTRACE is configured"
Reported-by: Brendan Gregg <brendan.d.gregg@gmail.com>
Tested-by: Brendan Gregg <brendan.d.gregg@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 87fb9801bd9e..d9293402ee68 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1751,7 +1751,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
 {
 	__buffer_unlock_commit(buffer, event);
 
-	ftrace_trace_stack(tr, buffer, flags, 6, pc, regs);
+	ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
 	ftrace_trace_userstack(buffer, flags, pc);
 }
 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);

From b7522056ec1a62a5f3246627e12ef48e6274c21c Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <0x7f454c46@gmail.com>
Date: Wed, 13 Jan 2016 18:39:58 +0300
Subject: [PATCH 008/262] ftrace: Remove unused nr_trampolines var

It's not needed & not used since introducing old_hash: commit fef5aeeee9e371
("ftrace: Replace tramp_hash with old_*_hash to save space").

Link: http://lkml.kernel.org/r/1452699598-27610-1-git-send-email-0x7f454c46@gmail.com

Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 60048c50404e..65460ad93c7b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -164,7 +164,6 @@ struct ftrace_ops {
 	ftrace_func_t			saved_func;
 	int __percpu			*disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
-	int				nr_trampolines;
 	struct ftrace_ops_hash		local_hash;
 	struct ftrace_ops_hash		*func_hash;
 	struct ftrace_ops_hash		old_hash;

From 570540d50710ed192e98e2f7f74578c9486b6b05 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Jan 2016 14:07:25 +0100
Subject: [PATCH 009/262] genirq: Validate action before dereferencing it in
 handle_irq_event_percpu()

commit 71f64340fc0e changed the handling of irq_desc->action from

CPU 0                   CPU 1
free_irq()              lock(desc)
  lock(desc)            handle_edge_irq()
                        if (desc->action) {
                          handle_irq_event()
                            action = desc->action
                            unlock(desc)
  desc->action = NULL       handle_irq_event_percpu(desc, action)
                              action->xxx
to

CPU 0                   CPU 1
free_irq()              lock(desc)
  lock(desc)            handle_edge_irq()
                        if (desc->action) {
                          handle_irq_event()
                            unlock(desc)
  desc->action = NULL       handle_irq_event_percpu(desc, action)
                              action = desc->action
                              action->xxx

So if free_irq manages to set the action to NULL between the unlock and before
the readout, we happily dereference a null pointer.

We could simply revert 71f64340fc0e, but we want to preserve the better code
generation. A simple solution is to change the action loop from a do {} while
to a while {} loop.

This is safe because we either see a valid desc->action or NULL. If the action
is about to be removed it is still valid as free_irq() is blocked on
synchronize_irq().

CPU 0                   CPU 1
free_irq()              lock(desc)
  lock(desc)            handle_edge_irq()
                          handle_irq_event(desc)
                            set(INPROGRESS)
                            unlock(desc)
                            handle_irq_event_percpu(desc)
                            action = desc->action
  desc->action = NULL           while (action) {
                                  action->xxx
                                  ...
                                  action = action->next;
  sychronize_irq()
    while(INPROGRESS);      lock(desc)
                            clr(INPROGRESS)
free(action)

That's basically the same mechanism as we have for shared
interrupts. action->next can become NULL while handle_irq_event_percpu()
runs. Either it sees the action or NULL. It does not matter, because action
itself cannot go away before the interrupt in progress flag has been cleared.

Fixes: commit 71f64340fc0e "genirq: Remove the second parameter from handle_irq_event_percpu()"
Reported-by: zyjzyj2000@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Huang Shijie <shijie.huang@arm.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1601131224190.3575@nanos
---
 kernel/irq/handle.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a302cf9a2126..57bff7857e87 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -138,7 +138,8 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
 	unsigned int flags = 0, irq = desc->irq_data.irq;
 	struct irqaction *action = desc->action;
 
-	do {
+	/* action might have become NULL since we dropped the lock */
+	while (action) {
 		irqreturn_t res;
 
 		trace_irq_handler_entry(irq, action);
@@ -173,7 +174,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
 
 		retval |= res;
 		action = action->next;
-	} while (action);
+	}
 
 	add_interrupt_randomness(irq, flags);
 

From f02b4b72d12cbae7020a959e2ed0410a464b4cc4 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 15 Jan 2016 11:34:21 +0100
Subject: [PATCH 010/262] clockevents/tcb_clksrc: Prevent disabling an already
 disabled clock

clockevents_exchange_device is calling clockevents_shutdown() on the new
clockenvents device but it may have never been enabled in the first place.
This results in the tcb clock being disabled without being enabled first:

------------[ cut here ]------------
WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:680 clk_disable+0x28/0x34()
Modules linked in:
CPU: 0 PID: 1 Comm: swapper Not tainted 4.4.0+ #6
Hardware name: Atmel AT91SAM9
[<c000f2b8>] (unwind_backtrace) from [<c000d01c>] (show_stack+0x10/0x14)
[<c000d01c>] (show_stack) from [<c00172f0>] (warn_slowpath_common+0x78/0xa0)
[<c00172f0>] (warn_slowpath_common) from [<c00173a8>] (warn_slowpath_null+0x18/0x20)
[<c00173a8>] (warn_slowpath_null) from [<c0361528>] (clk_disable+0x28/0x34)
[<c0361528>] (clk_disable) from [<c034d560>] (tc_shutdown+0x38/0x4c)
[<c034d560>] (tc_shutdown) from [<c0059ad4>] (clockevents_switch_state+0x38/0x6c)
[<c0059ad4>] (clockevents_switch_state) from [<c0059b18>] (clockevents_shutdown+0x10/0x24)
[<c0059b18>] (clockevents_shutdown) from [<c005a458>] (tick_check_new_device+0x84/0xac)
[<c005a458>] (tick_check_new_device) from [<c0059660>] (clockevents_register_device+0x7c/0x108)
[<c0059660>] (clockevents_register_device) from [<c06b5a68>] (tcb_clksrc_init+0x390/0x3e8)
[<c06b5a68>] (tcb_clksrc_init) from [<c00097cc>] (do_one_initcall+0x114/0x1d4)
[<c00097cc>] (do_one_initcall) from [<c069bd54>] (kernel_init_freeable+0xfc/0x1b8)
[<c069bd54>] (kernel_init_freeable) from [<c04c3818>] (kernel_init+0x8/0xe0)
[<c04c3818>] (kernel_init) from [<c000a410>] (ret_from_fork+0x14/0x24)
---[ end trace 0000000000000001 ]---

Check what state we were in before trying to disable the clock.

Fixes: cf4541c101ea ("clockevents/drivers/tcb_clksrc: Migrate to new 'set-state' interface")
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1452854061-30370-1-git-send-email-alexandre.belloni@free-electrons.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/clocksource/tcb_clksrc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index 6ee91401918e..4da2af9694a2 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -98,7 +98,8 @@ static int tc_shutdown(struct clock_event_device *d)
 
 	__raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
 	__raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
-	clk_disable(tcd->clk);
+	if (!clockevent_state_detached(d))
+		clk_disable(tcd->clk);
 
 	return 0;
 }

From e23b257c293ce4bcc8cabb2aa3097b6ed8a8261a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 14 Jan 2016 08:43:38 +0100
Subject: [PATCH 011/262] x86/irq: Call chip->irq_set_affinity in proper
 context

setup_ioapic_dest() calls irqchip->irq_set_affinity() completely
unprotected. That's wrong in several aspects:

 - it opens a race window where irq_set_affinity() can be interrupted and the
   irq chip left in unconsistent state.

 - it triggers a lockdep splat when we fix the vector race for 4.3+ because
   vector lock is taken with interrupts enabled.

The proper calling convention is irq descriptor lock held and interrupts
disabled.

Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Joe Lawrence <joe.lawrence@stratus.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1601140919420.3575@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/io_apic.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f25321894ad2..fdb0fbfb1197 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2521,6 +2521,7 @@ void __init setup_ioapic_dest(void)
 {
 	int pin, ioapic, irq, irq_entry;
 	const struct cpumask *mask;
+	struct irq_desc *desc;
 	struct irq_data *idata;
 	struct irq_chip *chip;
 
@@ -2536,7 +2537,9 @@ void __init setup_ioapic_dest(void)
 		if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
 			continue;
 
-		idata = irq_get_irq_data(irq);
+		desc = irq_to_desc(irq);
+		raw_spin_lock_irq(&desc->lock);
+		idata = irq_desc_get_irq_data(desc);
 
 		/*
 		 * Honour affinities which have been set in early boot
@@ -2550,6 +2553,7 @@ void __init setup_ioapic_dest(void)
 		/* Might be lapic_chip for irq 0 */
 		if (chip->irq_set_affinity)
 			chip->irq_set_affinity(idata, mask, false);
+		raw_spin_unlock_irq(&desc->lock);
 	}
 }
 #endif

From 111abeba67e0dbdc26537429de9155e4f1d807d8 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 31 Dec 2015 16:30:44 +0000
Subject: [PATCH 012/262] x86/irq: Fix a race in x86_vector_free_irqs()

There's a race condition between

x86_vector_free_irqs()
{
	free_apic_chip_data(irq_data->chip_data);
	xxxxx	//irq_data->chip_data has been freed, but the pointer
		//hasn't been reset yet
	irq_domain_reset_irq_data(irq_data);
}

and

smp_irq_move_cleanup_interrupt()
{
	raw_spin_lock(&vector_lock);
	data = apic_chip_data(irq_desc_get_irq_data(desc));
	access data->xxxx	// may access freed memory
	raw_spin_unlock(&desc->lock);
}

which may cause smp_irq_move_cleanup_interrupt() to access freed memory.

Call irq_domain_reset_irq_data(), which clears the pointer with vector lock
held.

[ tglx: Free memory outside of lock held region. ]

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/1450880014-11741-3-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 908cb37da171..cf1e325b67ee 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -226,10 +226,8 @@ static int assign_irq_vector_policy(int irq, int node,
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
 {
 	struct irq_desc *desc;
-	unsigned long flags;
 	int cpu, vector;
 
-	raw_spin_lock_irqsave(&vector_lock, flags);
 	BUG_ON(!data->cfg.vector);
 
 	vector = data->cfg.vector;
@@ -239,10 +237,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 	data->cfg.vector = 0;
 	cpumask_clear(data->domain);
 
-	if (likely(!data->move_in_progress)) {
-		raw_spin_unlock_irqrestore(&vector_lock, flags);
+	if (likely(!data->move_in_progress))
 		return;
-	}
 
 	desc = irq_to_desc(irq);
 	for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
@@ -255,7 +251,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 		}
 	}
 	data->move_in_progress = 0;
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
 void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -276,19 +271,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
 static void x86_vector_free_irqs(struct irq_domain *domain,
 				 unsigned int virq, unsigned int nr_irqs)
 {
+	struct apic_chip_data *apic_data;
 	struct irq_data *irq_data;
+	unsigned long flags;
 	int i;
 
 	for (i = 0; i < nr_irqs; i++) {
 		irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
 		if (irq_data && irq_data->chip_data) {
+			raw_spin_lock_irqsave(&vector_lock, flags);
 			clear_irq_vector(virq + i, irq_data->chip_data);
-			free_apic_chip_data(irq_data->chip_data);
+			apic_data = irq_data->chip_data;
+			irq_domain_reset_irq_data(irq_data);
+			raw_spin_unlock_irqrestore(&vector_lock, flags);
+			free_apic_chip_data(apic_data);
 #ifdef	CONFIG_X86_IO_APIC
 			if (virq + i < nr_legacy_irqs())
 				legacy_irq_data[virq + i] = NULL;
 #endif
-			irq_domain_reset_irq_data(irq_data);
 		}
 	}
 }

From 36f34c8c63da3e272fd66f91089228c22d2b6e8b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:45 +0000
Subject: [PATCH 013/262] x86/irq: Validate that irq descriptor is still active

In fixup_irqs() we unconditionally dereference the irq chip of an irq
descriptor. The descriptor might still be valid, but already cleaned up,
i.e. the chip removed. Add a check for this condition.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.236423282@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/irq.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index f8062aaf5df9..c0b58dd1ca04 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -470,6 +470,15 @@ void fixup_irqs(void)
 		}
 
 		chip = irq_data_get_irq_chip(data);
+		/*
+		 * The interrupt descriptor might have been cleaned up
+		 * already, but it is not yet removed from the radix tree
+		 */
+		if (!chip) {
+			raw_spin_unlock(&desc->lock);
+			continue;
+		}
+
 		if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
 			chip->irq_mask(data);
 

From 8a580f70f6936ec095da217018cdeeb5835c0207 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 31 Dec 2015 16:30:46 +0000
Subject: [PATCH 014/262] x86/irq: Do not use apic_chip_data.old_domain as
 temporary buffer

Function __assign_irq_vector() makes use of apic_chip_data.old_domain as a
temporary buffer, which is in the way of using apic_chip_data.old_domain for
synchronizing the vector cleanup with the vector assignement code.

Use a proper temporary cpumask for this.

[ tglx: Renamed the mask to searched_cpumask for clarity ]

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/1450880014-11741-1-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index cf1e325b67ee..19082cf56616 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -31,7 +31,7 @@ struct apic_chip_data {
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask;
+static cpumask_var_t vector_cpumask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef	CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -126,6 +126,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	/* Only try and allocate irqs on cpus that are present */
 	err = -ENOSPC;
 	cpumask_clear(d->old_domain);
+	cpumask_clear(searched_cpumask);
 	cpu = cpumask_first_and(mask, cpu_online_mask);
 	while (cpu < nr_cpu_ids) {
 		int new_cpu, vector, offset;
@@ -159,9 +160,9 @@ next:
 		}
 
 		if (unlikely(current_vector == vector)) {
-			cpumask_or(d->old_domain, d->old_domain,
+			cpumask_or(searched_cpumask, searched_cpumask,
 				   vector_cpumask);
-			cpumask_andnot(vector_cpumask, mask, d->old_domain);
+			cpumask_andnot(vector_cpumask, mask, searched_cpumask);
 			cpu = cpumask_first_and(vector_cpumask,
 						cpu_online_mask);
 			continue;
@@ -406,6 +407,7 @@ int __init arch_early_irq_init(void)
 	arch_init_htirq_domain(x86_vector_domain);
 
 	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
 	return arch_early_ioapic_init();
 }

From 433cbd57d190a1cdd02f243df41c3d7f55ec4b94 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:46 +0000
Subject: [PATCH 015/262] x86/irq: Reorganize the return path in
 assign_irq_vector

Use an explicit goto for the cases where we have success in the search/update
and return -ENOSPC if the search loop ends due to no space.

Preparatory patch for fixes. No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.403491024@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 19082cf56616..613b1cd8eecb 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -118,13 +118,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	 */
 	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
 	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu, err;
+	int cpu;
 
 	if (d->move_in_progress)
 		return -EBUSY;
 
 	/* Only try and allocate irqs on cpus that are present */
-	err = -ENOSPC;
 	cpumask_clear(d->old_domain);
 	cpumask_clear(searched_cpumask);
 	cpu = cpumask_first_and(mask, cpu_online_mask);
@@ -134,9 +133,8 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
 
 		if (cpumask_subset(vector_cpumask, d->domain)) {
-			err = 0;
 			if (cpumask_equal(vector_cpumask, d->domain))
-				break;
+				goto success;
 			/*
 			 * New cpumask using the vector is a proper subset of
 			 * the current in use mask. So cleanup the vector
@@ -147,7 +145,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 			d->move_in_progress =
 			   cpumask_intersects(d->old_domain, cpu_online_mask);
 			cpumask_and(d->domain, d->domain, vector_cpumask);
-			break;
+			goto success;
 		}
 
 		vector = current_vector;
@@ -187,17 +185,13 @@ next:
 			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
 		d->cfg.vector = vector;
 		cpumask_copy(d->domain, vector_cpumask);
-		err = 0;
-		break;
+		goto success;
 	}
+	return -ENOSPC;
 
-	if (!err) {
-		/* cache destination APIC IDs into cfg->dest_apicid */
-		err = apic->cpu_mask_to_apicid_and(mask, d->domain,
-						   &d->cfg.dest_apicid);
-	}
-
-	return err;
+success:
+	/* cache destination APIC IDs into cfg->dest_apicid */
+	return apic->cpu_mask_to_apicid_and(mask, d->domain, &d->cfg.dest_apicid);
 }
 
 static int assign_irq_vector(int irq, struct apic_chip_data *data,

From 95ffeb4b5baca266e1d0d2bc90f1513e6f419cdd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:47 +0000
Subject: [PATCH 016/262] x86/irq: Reorganize the search in assign_irq_vector

Split out the code which advances the target cpu for the search so we can
reuse it for the next patch which adds an early validation check for the
vectormask which we get from the apic.

Add comments while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.484562040@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 613b1cd8eecb..cef31955ab18 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -157,14 +157,9 @@ next:
 			vector = FIRST_EXTERNAL_VECTOR + offset;
 		}
 
-		if (unlikely(current_vector == vector)) {
-			cpumask_or(searched_cpumask, searched_cpumask,
-				   vector_cpumask);
-			cpumask_andnot(vector_cpumask, mask, searched_cpumask);
-			cpu = cpumask_first_and(vector_cpumask,
-						cpu_online_mask);
-			continue;
-		}
+		/* If the search wrapped around, try the next cpu */
+		if (unlikely(current_vector == vector))
+			goto next_cpu;
 
 		if (test_bit(vector, used_vectors))
 			goto next;
@@ -186,6 +181,19 @@ next:
 		d->cfg.vector = vector;
 		cpumask_copy(d->domain, vector_cpumask);
 		goto success;
+
+next_cpu:
+		/*
+		 * We exclude the current @vector_cpumask from the requested
+		 * @mask and try again with the next online cpu in the
+		 * result. We cannot modify @mask, so we use @vector_cpumask
+		 * as a temporary buffer here as it will be reassigned when
+		 * calling apic->vector_allocation_domain() above.
+		 */
+		cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
+		cpumask_andnot(vector_cpumask, mask, searched_cpumask);
+		cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
+		continue;
 	}
 	return -ENOSPC;
 

From 3716fd27a604d61a91cda47083504971486b80f1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:48 +0000
Subject: [PATCH 017/262] x86/irq: Check vector allocation early

__assign_irq_vector() uses the vector_cpumask which is assigned by
apic->vector_allocation_domain() without doing basic sanity checks. That can
result in a situation where the final assignement of a newly found vector
fails in apic->cpu_mask_to_apicid_and(). So we have to do rollbacks for no
reason.

apic->cpu_mask_to_apicid_and() only fails if

  vector_cpumask & requested_cpumask & cpu_online_mask

is empty.

Check for this condition right away and if the result is empty try immediately
the next possible cpu in the requested mask. So in case of a failure the old
setting is unchanged and we can remove the rollback code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.561877324@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 38 +++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index cef31955ab18..940e18d4dbcd 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -31,7 +31,7 @@ struct apic_chip_data {
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask, searched_cpumask;
+static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef	CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -130,8 +130,20 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	while (cpu < nr_cpu_ids) {
 		int new_cpu, vector, offset;
 
+		/* Get the possible target cpus for @mask/@cpu from the apic */
 		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
 
+		/*
+		 * Clear the offline cpus from @vector_cpumask for searching
+		 * and verify whether the result overlaps with @mask. If true,
+		 * then the call to apic->cpu_mask_to_apicid_and() will
+		 * succeed as well. If not, no point in trying to find a
+		 * vector in this mask.
+		 */
+		cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
+		if (!cpumask_intersects(vector_searchmask, mask))
+			goto next_cpu;
+
 		if (cpumask_subset(vector_cpumask, d->domain)) {
 			if (cpumask_equal(vector_cpumask, d->domain))
 				goto success;
@@ -164,7 +176,7 @@ next:
 		if (test_bit(vector, used_vectors))
 			goto next;
 
-		for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
+		for_each_cpu(new_cpu, vector_searchmask) {
 			if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
 				goto next;
 		}
@@ -176,7 +188,7 @@ next:
 			d->move_in_progress =
 			   cpumask_intersects(d->old_domain, cpu_online_mask);
 		}
-		for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
+		for_each_cpu(new_cpu, vector_searchmask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
 		d->cfg.vector = vector;
 		cpumask_copy(d->domain, vector_cpumask);
@@ -198,8 +210,14 @@ next_cpu:
 	return -ENOSPC;
 
 success:
-	/* cache destination APIC IDs into cfg->dest_apicid */
-	return apic->cpu_mask_to_apicid_and(mask, d->domain, &d->cfg.dest_apicid);
+	/*
+	 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
+	 * as we already established, that mask & d->domain & cpu_online_mask
+	 * is not empty.
+	 */
+	BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
+					    &d->cfg.dest_apicid));
+	return 0;
 }
 
 static int assign_irq_vector(int irq, struct apic_chip_data *data,
@@ -409,6 +427,7 @@ int __init arch_early_irq_init(void)
 	arch_init_htirq_domain(x86_vector_domain);
 
 	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
 	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
 	return arch_early_ioapic_init();
@@ -498,14 +517,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
 		return -EINVAL;
 
 	err = assign_irq_vector(irq, data, dest);
-	if (err) {
-		if (assign_irq_vector(irq, data,
-				      irq_data_get_affinity_mask(irq_data)))
-			pr_err("Failed to recover vector for irq %d\n", irq);
-		return err;
-	}
-
-	return IRQ_SET_MASK_OK;
+	return err ? err : IRQ_SET_MASK_OK;
 }
 
 static struct irq_chip lapic_controller = {

From 9ac15b7a8af4cf3337a101498c0ed690d23ade75 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:49 +0000
Subject: [PATCH 018/262] x86/irq: Copy vectormask instead of an AND operation

In the case that the new vector mask is a subset of the existing mask there is
no point to do a AND operation of currentmask & newmask. The result is
newmask. So we can simply copy the new mask to the current mask and be done
with it. Preparatory patch for further consolidation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.640253454@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 940e18d4dbcd..1bd29c624531 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -156,7 +156,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 				       vector_cpumask);
 			d->move_in_progress =
 			   cpumask_intersects(d->old_domain, cpu_online_mask);
-			cpumask_and(d->domain, d->domain, vector_cpumask);
+			cpumask_copy(d->domain, vector_cpumask);
 			goto success;
 		}
 

From ab25ac02148b600e645f77cfb8b8ea415ed75bb4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:49 +0000
Subject: [PATCH 019/262] x86/irq: Get rid of code duplication

Reusing an existing vector and assigning a new vector has duplicated
code. Consolidate it.

This is also a preparatory patch for finally plugging the cleanup race.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.721599216@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 1bd29c624531..fccfa3f5545c 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -118,7 +118,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	 */
 	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
 	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu;
+	int cpu, vector;
 
 	if (d->move_in_progress)
 		return -EBUSY;
@@ -128,7 +128,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	cpumask_clear(searched_cpumask);
 	cpu = cpumask_first_and(mask, cpu_online_mask);
 	while (cpu < nr_cpu_ids) {
-		int new_cpu, vector, offset;
+		int new_cpu, offset;
 
 		/* Get the possible target cpus for @mask/@cpu from the apic */
 		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
@@ -148,16 +148,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 			if (cpumask_equal(vector_cpumask, d->domain))
 				goto success;
 			/*
-			 * New cpumask using the vector is a proper subset of
-			 * the current in use mask. So cleanup the vector
-			 * allocation for the members that are not used anymore.
+			 * Mark the cpus which are not longer in the mask for
+			 * cleanup.
 			 */
-			cpumask_andnot(d->old_domain, d->domain,
-				       vector_cpumask);
-			d->move_in_progress =
-			   cpumask_intersects(d->old_domain, cpu_online_mask);
-			cpumask_copy(d->domain, vector_cpumask);
-			goto success;
+			cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
+			vector = d->cfg.vector;
+			goto update;
 		}
 
 		vector = current_vector;
@@ -183,16 +179,12 @@ next:
 		/* Found one! */
 		current_vector = vector;
 		current_offset = offset;
-		if (d->cfg.vector) {
+		/* Schedule the old vector for cleanup on all cpus */
+		if (d->cfg.vector)
 			cpumask_copy(d->old_domain, d->domain);
-			d->move_in_progress =
-			   cpumask_intersects(d->old_domain, cpu_online_mask);
-		}
 		for_each_cpu(new_cpu, vector_searchmask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-		d->cfg.vector = vector;
-		cpumask_copy(d->domain, vector_cpumask);
-		goto success;
+		goto update;
 
 next_cpu:
 		/*
@@ -209,6 +201,11 @@ next_cpu:
 	}
 	return -ENOSPC;
 
+update:
+	/* Cleanup required ? */
+	d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask);
+	d->cfg.vector = vector;
+	cpumask_copy(d->domain, vector_cpumask);
 success:
 	/*
 	 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail

From 847667ef10356b824a11c853fc8a8b1b437b6a8d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:50 +0000
Subject: [PATCH 020/262] x86/irq: Remove offline cpus from vector cleanup

No point of keeping offline cpus in the cleanup mask.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.808642683@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index fccfa3f5545c..68d18b338e3a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -202,8 +202,12 @@ next_cpu:
 	return -ENOSPC;
 
 update:
-	/* Cleanup required ? */
-	d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask);
+	/*
+	 * Exclude offline cpus from the cleanup mask and set the
+	 * move_in_progress flag when the result is not empty.
+	 */
+	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+	d->move_in_progress = !cpumask_empty(d->old_domain);
 	d->cfg.vector = vector;
 	cpumask_copy(d->domain, vector_cpumask);
 success:

From c1684f5035b60e9f98566493e869496fb5de1d89 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:51 +0000
Subject: [PATCH 021/262] x86/irq: Clear move_in_progress before sending
 cleanup IPI

send_cleanup_vector() fiddles with the old_domain mask unprotected because it
relies on the protection by the move_in_progress flag. But this is fatal, as
the flag is reset after the IPI has been sent. So a cpu which receives the IPI
can still see the flag set and therefor ignores the cleanup request. If no
other cleanup request happens then the vector stays stale on that cpu and in
case of an irq removal the vector still persists. That can lead to use after
free when the next cleanup IPI happens.

Protect the code with vector_lock and clear move_in_progress before sending
the IPI.

This does not plug the race which Joe reported because:

CPU0                          CPU1                      CPU2
lock_vector()
data->move_in_progress=0
sendIPI()
unlock_vector()
                              set_affinity()
                              assign_irq_vector()
                              lock_vector()             handle_IPI
                              move_in_progress = 1      lock_vector()
                              unlock_vector()
                                                        move_in_progress == 1

The full fix comes with a later patch.

Reported-and-tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.892412198@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 68d18b338e3a..ed62f9c3f785 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -532,6 +532,8 @@ static void __send_cleanup_vector(struct apic_chip_data *data)
 {
 	cpumask_var_t cleanup_mask;
 
+	raw_spin_lock(&vector_lock);
+	data->move_in_progress = 0;
 	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
 		unsigned int i;
 
@@ -543,7 +545,7 @@ static void __send_cleanup_vector(struct apic_chip_data *data)
 		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 		free_cpumask_var(cleanup_mask);
 	}
-	data->move_in_progress = 0;
+	raw_spin_unlock(&vector_lock);
 }
 
 void send_cleanup_vector(struct irq_cfg *cfg)

From 5da0c1217f05d2ccc9a8ed6e6e5c23a8a1d24dd6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:52 +0000
Subject: [PATCH 022/262] x86/irq: Remove the cpumask allocation from
 send_cleanup_vector()

There is no need to allocate a new cpumask for sending the cleanup vector. The
old_domain mask is now protected by the vector_lock, so we can safely remove
the offline cpus from it and send the IPI with the resulting mask.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.967993932@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index ed62f9c3f785..91dc2742cfb1 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -530,21 +530,11 @@ static struct irq_chip lapic_controller = {
 #ifdef CONFIG_SMP
 static void __send_cleanup_vector(struct apic_chip_data *data)
 {
-	cpumask_var_t cleanup_mask;
-
 	raw_spin_lock(&vector_lock);
+	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
 	data->move_in_progress = 0;
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-
-		for_each_cpu_and(i, data->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i),
-					    IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
+	if (!cpumask_empty(data->old_domain))
+		apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
 	raw_spin_unlock(&vector_lock);
 }
 

From 56d7d2f4bbd00fb198b7907cb3ab657d06115a42 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:52 +0000
Subject: [PATCH 023/262] x86/irq: Remove outgoing CPU from vector cleanup mask

We want to synchronize new vector assignments with a pending cleanup. Remove a
dying cpu from a pending cleanup mask.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160107.045961667@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 91dc2742cfb1..a7fa11e49582 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -633,9 +633,23 @@ void irq_complete_move(struct irq_cfg *cfg)
 void irq_force_complete_move(int irq)
 {
 	struct irq_cfg *cfg = irq_cfg(irq);
+	struct apic_chip_data *data;
 
-	if (cfg)
-		__irq_complete_move(cfg, cfg->vector);
+	if (!cfg)
+		return;
+
+	__irq_complete_move(cfg, cfg->vector);
+
+	/*
+	 * Remove this cpu from the cleanup mask. The IPI might have been sent
+	 * just before the cpu was removed from the offline mask, but has not
+	 * been processed because the CPU has interrupts disabled and is on
+	 * the way out.
+	 */
+	raw_spin_lock(&vector_lock);
+	data = container_of(cfg, struct apic_chip_data, cfg);
+	cpumask_clear_cpu(smp_processor_id(), data->old_domain);
+	raw_spin_unlock(&vector_lock);
 }
 #endif
 

From 90a2282e23f0522e4b3f797ad447c5e91bf7fe32 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:53 +0000
Subject: [PATCH 024/262] x86/irq: Call irq_force_move_complete with irq
 descriptor

First of all there is no point in looking up the irq descriptor again, but we
also need the descriptor for the final cleanup race fix in the next
patch. Make that change seperate. No functional difference.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160107.125211743@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/irq.h    |  5 +++--
 arch/x86/kernel/apic/vector.c | 11 +++++++----
 arch/x86/kernel/irq.c         |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 881b4768644a..e7de5c9a4fbd 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -23,11 +23,13 @@ extern void irq_ctx_init(int cpu);
 
 #define __ARCH_HAS_DO_SOFTIRQ
 
+struct irq_desc;
+
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
 extern int check_irq_vectors_for_cpu_disable(void);
 extern void fixup_irqs(void);
-extern void irq_force_complete_move(int);
+extern void irq_force_complete_move(struct irq_desc *desc);
 #endif
 
 #ifdef CONFIG_HAVE_KVM
@@ -37,7 +39,6 @@ extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
 extern void (*x86_platform_ipi_callback)(void);
 extern void native_init_IRQ(void);
 
-struct irq_desc;
 extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
 
 extern __visible unsigned int do_IRQ(struct pt_regs *regs);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index a7fa11e49582..5f7883578880 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -630,10 +630,14 @@ void irq_complete_move(struct irq_cfg *cfg)
 	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
 }
 
-void irq_force_complete_move(int irq)
+/*
+ * Called with @desc->lock held and interrupts disabled.
+ */
+void irq_force_complete_move(struct irq_desc *desc)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
-	struct apic_chip_data *data;
+	struct irq_data *irqdata = irq_desc_get_irq_data(desc);
+	struct apic_chip_data *data = apic_chip_data(irqdata);
+	struct irq_cfg *cfg = data ? &data->cfg : NULL;
 
 	if (!cfg)
 		return;
@@ -647,7 +651,6 @@ void irq_force_complete_move(int irq)
 	 * the way out.
 	 */
 	raw_spin_lock(&vector_lock);
-	data = container_of(cfg, struct apic_chip_data, cfg);
 	cpumask_clear_cpu(smp_processor_id(), data->old_domain);
 	raw_spin_unlock(&vector_lock);
 }
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c0b58dd1ca04..61521dc19c10 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -462,7 +462,7 @@ void fixup_irqs(void)
 		 * non intr-remapping case, we can't wait till this interrupt
 		 * arrives at this cpu before completing the irq move.
 		 */
-		irq_force_complete_move(irq);
+		irq_force_complete_move(desc);
 
 		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 			break_affinity = 1;

From 98229aa36caa9c769b13565523de9b813013c703 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:54 +0000
Subject: [PATCH 025/262] x86/irq: Plug vector cleanup race

We still can end up with a stale vector due to the following:

CPU0                          CPU1                      CPU2
lock_vector()
data->move_in_progress=0
sendIPI()
unlock_vector()
                              set_affinity()
                              assign_irq_vector()
                              lock_vector()             handle_IPI
                              move_in_progress = 1      lock_vector()
                              unlock_vector()
                                                        move_in_progress == 1

So we need to serialize the vector assignment against a pending cleanup. The
solution is rather simple now. We not only check for the move_in_progress flag
in assign_irq_vector(), we also check whether there is still a cleanup pending
in the old_domain cpumask. If so, we return -EBUSY to the caller and let him
deal with it. Though we have to be careful in the cpu unplug case. If the
cleanout has not yet completed then the following setaffinity() call would
return -EBUSY. Add code which prevents this.

Full context is here: http://lkml.kernel.org/r/5653B688.4050809@stratus.com

Reported-and-tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160107.207265407@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 63 +++++++++++++++++++++++++++++------
 1 file changed, 53 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 5f7883578880..3b670df4ba7b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -120,7 +120,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	static int current_offset = VECTOR_OFFSET_START % 16;
 	int cpu, vector;
 
-	if (d->move_in_progress)
+	/*
+	 * If there is still a move in progress or the previous move has not
+	 * been cleaned up completely, tell the caller to come back later.
+	 */
+	if (d->move_in_progress ||
+	    cpumask_intersects(d->old_domain, cpu_online_mask))
 		return -EBUSY;
 
 	/* Only try and allocate irqs on cpus that are present */
@@ -259,7 +264,12 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 	data->cfg.vector = 0;
 	cpumask_clear(data->domain);
 
-	if (likely(!data->move_in_progress))
+	/*
+	 * If move is in progress or the old_domain mask is not empty,
+	 * i.e. the cleanup IPI has not been processed yet, we need to remove
+	 * the old references to desc from all cpus vector tables.
+	 */
+	if (!data->move_in_progress && cpumask_empty(data->old_domain))
 		return;
 
 	desc = irq_to_desc(irq);
@@ -579,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
 			goto unlock;
 
 		/*
-		 * Check if the irq migration is in progress. If so, we
-		 * haven't received the cleanup request yet for this irq.
+		 * Nothing to cleanup if irq migration is in progress
+		 * or this cpu is not set in the cleanup mask.
 		 */
-		if (data->move_in_progress)
+		if (data->move_in_progress ||
+		    !cpumask_test_cpu(me, data->old_domain))
 			goto unlock;
 
+		/*
+		 * We have two cases to handle here:
+		 * 1) vector is unchanged but the target mask got reduced
+		 * 2) vector and the target mask has changed
+		 *
+		 * #1 is obvious, but in #2 we have two vectors with the same
+		 * irq descriptor: the old and the new vector. So we need to
+		 * make sure that we only cleanup the old vector. The new
+		 * vector has the current @vector number in the config and
+		 * this cpu is part of the target mask. We better leave that
+		 * one alone.
+		 */
 		if (vector == data->cfg.vector &&
 		    cpumask_test_cpu(me, data->domain))
 			goto unlock;
@@ -602,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
 			goto unlock;
 		}
 		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+		cpumask_clear_cpu(me, data->old_domain);
 unlock:
 		raw_spin_unlock(&desc->lock);
 	}
@@ -645,13 +669,32 @@ void irq_force_complete_move(struct irq_desc *desc)
 	__irq_complete_move(cfg, cfg->vector);
 
 	/*
-	 * Remove this cpu from the cleanup mask. The IPI might have been sent
-	 * just before the cpu was removed from the offline mask, but has not
-	 * been processed because the CPU has interrupts disabled and is on
-	 * the way out.
+	 * This is tricky. If the cleanup of @data->old_domain has not been
+	 * done yet, then the following setaffinity call will fail with
+	 * -EBUSY. This can leave the interrupt in a stale state.
+	 *
+	 * The cleanup cannot make progress because we hold @desc->lock. So in
+	 * case @data->old_domain is not yet cleaned up, we need to drop the
+	 * lock and acquire it again. @desc cannot go away, because the
+	 * hotplug code holds the sparse irq lock.
 	 */
 	raw_spin_lock(&vector_lock);
-	cpumask_clear_cpu(smp_processor_id(), data->old_domain);
+	/* Clean out all offline cpus (including ourself) first. */
+	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+	while (!cpumask_empty(data->old_domain)) {
+		raw_spin_unlock(&vector_lock);
+		raw_spin_unlock(&desc->lock);
+		cpu_relax();
+		raw_spin_lock(&desc->lock);
+		/*
+		 * Reevaluate apic_chip_data. It might have been cleared after
+		 * we dropped @desc->lock.
+		 */
+		data = apic_chip_data(irqdata);
+		if (!data)
+			return;
+		raw_spin_lock(&vector_lock);
+	}
 	raw_spin_unlock(&vector_lock);
 }
 #endif

From e160e4db833c7e8587ec3c88efaed0d84f1bcf42 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 14 Jan 2016 13:48:24 -0500
Subject: [PATCH 026/262] drm/amdgpu: fix tonga smu resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Need to make sure smu buffers are pinned on resume.  This
matches what Fiji does.

Cc: stable@vger.kernel.org
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Ken Wang <Qingqing.Wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/tonga_dpm.c | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
index f4a1346525fe..0497784b3652 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
@@ -122,25 +122,12 @@ static int tonga_dpm_hw_fini(void *handle)
 
 static int tonga_dpm_suspend(void *handle)
 {
-	return 0;
+	return tonga_dpm_hw_fini(handle);
 }
 
 static int tonga_dpm_resume(void *handle)
 {
-	int ret;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	mutex_lock(&adev->pm.mutex);
-
-	ret = tonga_smu_start(adev);
-	if (ret) {
-		DRM_ERROR("SMU start failed\n");
-		goto fail;
-	}
-
-fail:
-	mutex_unlock(&adev->pm.mutex);
-	return ret;
+	return tonga_dpm_hw_init(handle);
 }
 
 static int tonga_dpm_set_clockgating_state(void *handle,

From 7776a69386179ea477f501aa222692b9856ec56c Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 15 Oct 2015 10:59:16 -0400
Subject: [PATCH 027/262] drm/amdgpu: Add some tweaks to gfx 8 soft reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Ken Wang <Qingqing.Wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 13235d84e5a6..95c0cdfbd1b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4186,7 +4186,18 @@ static int gfx_v8_0_soft_reset(void *handle)
 		gfx_v8_0_cp_gfx_enable(adev, false);
 
 		/* Disable MEC parsing/prefetching */
-		/* XXX todo */
+		gfx_v8_0_cp_compute_enable(adev, false);
+
+		if (grbm_soft_reset || srbm_soft_reset) {
+			tmp = RREG32(mmGMCON_DEBUG);
+			tmp = REG_SET_FIELD(tmp,
+					    GMCON_DEBUG, GFX_STALL, 1);
+			tmp = REG_SET_FIELD(tmp,
+					    GMCON_DEBUG, GFX_CLEAR, 1);
+			WREG32(mmGMCON_DEBUG, tmp);
+
+			udelay(50);
+		}
 
 		if (grbm_soft_reset) {
 			tmp = RREG32(mmGRBM_SOFT_RESET);
@@ -4215,6 +4226,16 @@ static int gfx_v8_0_soft_reset(void *handle)
 			WREG32(mmSRBM_SOFT_RESET, tmp);
 			tmp = RREG32(mmSRBM_SOFT_RESET);
 		}
+
+		if (grbm_soft_reset || srbm_soft_reset) {
+			tmp = RREG32(mmGMCON_DEBUG);
+			tmp = REG_SET_FIELD(tmp,
+					    GMCON_DEBUG, GFX_STALL, 0);
+			tmp = REG_SET_FIELD(tmp,
+					    GMCON_DEBUG, GFX_CLEAR, 0);
+			WREG32(mmGMCON_DEBUG, tmp);
+		}
+
 		/* Wait a little for things to settle down */
 		udelay(50);
 		gfx_v8_0_print_status((void *)adev);

From 3caeaa562733c4836e61086ec07666635006a787 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Mon, 7 Dec 2015 12:25:02 +0530
Subject: [PATCH 028/262] perf kvm record/report: 'unprocessable sample' error
 while recording/reporting guest data

While recording guest samples in host using perf kvm record, it will
populate unprocessable sample error, though samples will be recorded
properly. While generating report using perf kvm report, no samples will
be processed and same error will populate. We have seen this behaviour
with upstream perf(4.4-rc3) on x86 and ppc64 hardware.

Reason behind this failure is, when it tries to fetch machine from
rb_tree of machines, it fails. As a part of tracing a bug, we figured
out that this code was incorrectly refactored in commit 54245fdc3576
("perf session: Remove wrappers to machines__find").

This patch will change the functionality such that if it can't fetch
machine in first trial, it will create one node of machine and add that to
rb_tree. So next time when it tries to fetch same machine from rb_tree,
it won't fail. Actually it was the case before refactoring of code in
aforementioned commit.

This patch is generated from acme perf/core branch.

Below I've mention an example that demonstrate the behaviour before and
after applying patch.

Before applying patch:
[Note: One needs to run guest before recording data in host]

  ravi@ravi-bangoria:~$ ./perf kvm record -a
  Warning:
  5903 unprocessable samples recorded.
  Do you have a KVM guest running and not using 'perf kvm'?
  [ perf record: Captured and wrote 1.409 MB perf.data.guest (285 samples) ]

  ravi@ravi-bangoria:~$ ./perf kvm report --stdio
  Warning:
  5903 unprocessable samples recorded.
  Do you have a KVM guest running and not using 'perf kvm'?
  # To display the perf.data header info, please use --header/--header-only options.
  #
  # Total Lost Samples: 0
  #
  # Samples: 285  of event 'cycles'
  # Event count (approx.): 88715406
  #
  # Overhead  Command  Shared Object  Symbol
  # ........  .......  .............  ......
  #

  # (For a higher level overview, try: perf report --sort comm,dso)
  #

After applying patch:

  ravi@ravi-bangoria:~$ ./perf kvm record -a
  [ perf record: Captured and wrote 1.188 MB perf.data.guest (17 samples) ]

  ravi@ravi-bangoria:~$ ./perf kvm report --stdio
  # To display the perf.data header info, please use --header/--header-only options.
  #
  # Total Lost Samples: 0
  #
  # Samples: 17  of event 'cycles'
  # Event count (approx.): 700746
  #
  # Overhead  Command  Shared Object     Symbol
  # ........  .......  ................  ......................
  #
      34.19%  :5758    [unknown]         [g] 0xffffffff818682ab
      22.79%  :5758    [unknown]         [g] 0xffffffff812dc7f8
      22.79%  :5758    [unknown]         [g] 0xffffffff818650d0
      14.83%  :5758    [unknown]         [g] 0xffffffff8161a1b6
       2.49%  :5758    [unknown]         [g] 0xffffffff818692bf
       0.48%  :5758    [unknown]         [g] 0xffffffff81869253
       0.05%  :5758    [unknown]         [g] 0xffffffff81869250

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: stable@vger.kernel.org # v3.19+
Fixes: 54245fdc3576 ("perf session: Remove wrappers to machines__find")
Link: http://lkml.kernel.org/r/1449471302-11283-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d5636ba94b20..40b7a0d0905b 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1149,7 +1149,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
 
 		machine = machines__find(machines, pid);
 		if (!machine)
-			machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID);
+			machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
 		return machine;
 	}
 

From 40c4a0f92aed570cc529a1e5c24c7e04a0ce8b85 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 13 Jan 2016 17:23:01 +0000
Subject: [PATCH 029/262] perf symbols: Fix reading of build-id from vDSO

We need to use the long name (the filename) when reading the build-id
from a DSO.  Using the short name doesn't work for (at least) vDSOs.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20160113172301.GT28542@decadent.org.uk
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 3b2de6eb3376..ab02209a7cf3 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1466,7 +1466,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 	 * Read the build id if possible. This is required for
 	 * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
 	 */
-	if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0)
+	if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
 		dso__set_build_id(dso, build_id);
 
 	/*

From 7be43dfb1e617b87bf2d936d82c026be39b43910 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 13 Jan 2016 12:17:14 +0000
Subject: [PATCH 030/262] perf build: Set parallel making options build-test

'make build-test' is painful because of time consuming. In a full test,
all test cases are built twice with tools/perf/Makefile and
tools/perf/Makefile.perf. 'Makefile' automatically computes parallel
options for make, but 'Makefile.perf' not, so all test cases is built
with one job. It is very slow.

This patch adds '-j' options to Makefile.perf testing. It computes
parallel building options like what tools/perf/Makefile does, and pass
'-j' option to Makefile.perf test.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1452687442-6186-2-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/make | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index df38decc48c3..c0ee67906e0a 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -5,7 +5,7 @@ ifeq ($(MAKECMDGOALS),)
 # no target specified, trigger the whole suite
 all:
 	@echo "Testing Makefile";      $(MAKE) -sf tests/make MK=Makefile
-	@echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf
+	@echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1
 else
 # run only specific test over 'Makefile'
 %:
@@ -14,6 +14,15 @@ endif
 else
 PERF := .
 
+PARALLEL_OPT=
+ifeq ($(SET_PARALLEL),1)
+  cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+  ifeq ($(cores),0)
+    cores := 1
+  endif
+  PARALLEL_OPT="-j$(cores)"
+endif
+
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
 LC_COLLATE=C
@@ -252,7 +261,7 @@ clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null)
 $(run):
 	$(call clean)
 	@TMP_DEST=$$(mktemp -d); \
-	cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \
+	cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) DESTDIR=$$TMP_DEST $($@)"; \
 	echo "- $@: $$cmd" && echo $$cmd > $@ && \
 	( eval $$cmd ) >> $@ 2>&1; \
 	echo "  test: $(call test,$@)" >> $@ 2>&1; \
@@ -263,7 +272,7 @@ $(run_O):
 	$(call clean)
 	@TMP_O=$$(mktemp -d); \
 	TMP_DEST=$$(mktemp -d); \
-	cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
+	cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
 	echo "- $@: $$cmd" && echo $$cmd > $@ && \
 	( eval $$cmd ) >> $@ 2>&1 && \
 	echo "  test: $(call test_O,$@)" >> $@ 2>&1; \
@@ -277,15 +286,15 @@ tarpkg:
 	rm -f $@
 
 make_kernelsrc:
-	@echo "- make -C <kernelsrc> tools/perf"
+	@echo "- make -C <kernelsrc> $(PARALLEL_OPT) tools/perf"
 	$(call clean); \
-	(make -C ../.. tools/perf) > $@ 2>&1 && \
+	(make -C ../.. $(PARALLEL_OPT) tools/perf) > $@ 2>&1 && \
 	test -x perf && rm -f $@ || (cat $@ ; false)
 
 make_kernelsrc_tools:
-	@echo "- make -C <kernelsrc>/tools perf"
+	@echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) perf"
 	$(call clean); \
-	(make -C ../../tools perf) > $@ 2>&1 && \
+	(make -C ../../tools $(PARALLEL_OPT) perf) > $@ 2>&1 && \
 	test -x perf && rm -f $@ || (cat $@ ; false)
 
 all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools

From eb807730c034090599135bc03578015ebf8974af Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 15 Jan 2016 04:00:14 +0000
Subject: [PATCH 031/262] perf build: Pass O option to Makefile.perf in
 build-test

Unlike tools/perf/Makefile, tools/perf/Makefile.perf obey 'O' option
when it is passed through cmdline only, due to code in
tools/scripts/Makefile.include:

 ifneq ($(O),)
 ifeq ($(origin O), command line)
 	...
 	ABSOLUTE_O := $(shell cd $(O) ; pwd)
 	OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/)
 endif
 endif

This patch passes 'O' to Makefile.perf through cmdline explicitly
to make it follow O variable during build-test.

'make clean' should have identical 'O' option with 'make'. If not,
config-clean may error.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1452830421-77757-3-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/make | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index c0ee67906e0a..fc2a9a31113f 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -5,7 +5,7 @@ ifeq ($(MAKECMDGOALS),)
 # no target specified, trigger the whole suite
 all:
 	@echo "Testing Makefile";      $(MAKE) -sf tests/make MK=Makefile
-	@echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1
+	@echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1 SET_O=1
 else
 # run only specific test over 'Makefile'
 %:
@@ -13,6 +13,14 @@ else
 endif
 else
 PERF := .
+O_OPT :=
+
+ifneq ($(O),)
+  FULL_O := $(shell readlink -f $(O) || echo $(O))
+  ifeq ($(SET_O),1)
+    O_OPT := 'O=$(FULL_O)'
+  endif
+endif
 
 PARALLEL_OPT=
 ifeq ($(SET_PARALLEL),1)
@@ -256,12 +264,12 @@ endif
 
 MAKEFLAGS := --no-print-directory
 
-clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null)
+clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null)
 
 $(run):
 	$(call clean)
 	@TMP_DEST=$$(mktemp -d); \
-	cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) DESTDIR=$$TMP_DEST $($@)"; \
+	cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \
 	echo "- $@: $$cmd" && echo $$cmd > $@ && \
 	( eval $$cmd ) >> $@ 2>&1; \
 	echo "  test: $(call test,$@)" >> $@ 2>&1; \

From 68824de19a0b4cdc17193cb004c55d82c06af8bd Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 15 Jan 2016 04:00:15 +0000
Subject: [PATCH 032/262] perf build: Test correct path of perf in build-test

If an 'O' is passed to 'make build-test', many 'test -x' and 'test -f'
will fail because perf resides in a different directory. Fix this by
computing PERF_OUT according to 'O' and test correct output files.
For make_kernelsrc and make_kernelsrc_tools, set KBUILD_OUTPUT_DIR
instead because the path is different from others ($(O)/perf vs
 $(O)/tools/perf).

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1452830421-77757-4-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/make | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index fc2a9a31113f..29810cf2c117 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -13,10 +13,12 @@ else
 endif
 else
 PERF := .
+PERF_O := $(PERF)
 O_OPT :=
 
 ifneq ($(O),)
   FULL_O := $(shell readlink -f $(O) || echo $(O))
+  PERF_O := $(FULL_O)
   ifeq ($(SET_O),1)
     O_OPT := 'O=$(FULL_O)'
   endif
@@ -173,11 +175,11 @@ test_make_doc    := $(test_ok)
 test_make_help_O := $(test_ok)
 test_make_doc_O  := $(test_ok)
 
-test_make_python_perf_so := test -f $(PERF)/python/perf.so
+test_make_python_perf_so := test -f $(PERF_O)/python/perf.so
 
-test_make_perf_o           := test -f $(PERF)/perf.o
-test_make_util_map_o       := test -f $(PERF)/util/map.o
-test_make_util_pmu_bison_o := test -f $(PERF)/util/pmu-bison.o
+test_make_perf_o           := test -f $(PERF_O)/perf.o
+test_make_util_map_o       := test -f $(PERF_O)/util/map.o
+test_make_util_pmu_bison_o := test -f $(PERF_O)/util/pmu-bison.o
 
 define test_dest_files
   for file in $(1); do				\
@@ -244,7 +246,7 @@ test_make_perf_o_O            := test -f $$TMP_O/perf.o
 test_make_util_map_o_O        := test -f $$TMP_O/util/map.o
 test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o
 
-test_default = test -x $(PERF)/perf
+test_default = test -x $(PERF_O)/perf
 test = $(if $(test_$1),$(test_$1),$(test_default))
 
 test_default_O = test -x $$TMP_O/perf
@@ -293,17 +295,22 @@ tarpkg:
 	( eval $$cmd ) >> $@ 2>&1 && \
 	rm -f $@
 
+KERNEL_O := ../..
+ifneq ($(O),)
+  KERNEL_O := $(O)
+endif
+
 make_kernelsrc:
 	@echo "- make -C <kernelsrc> $(PARALLEL_OPT) tools/perf"
 	$(call clean); \
 	(make -C ../.. $(PARALLEL_OPT) tools/perf) > $@ 2>&1 && \
-	test -x perf && rm -f $@ || (cat $@ ; false)
+	test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 make_kernelsrc_tools:
 	@echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) perf"
 	$(call clean); \
 	(make -C ../../tools $(PARALLEL_OPT) perf) > $@ 2>&1 && \
-	test -x perf && rm -f $@ || (cat $@ ; false)
+	test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
 	@echo OK

From c15e758c4bd7fbe38983e36258541ffcf81e1500 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 15 Jan 2016 04:00:16 +0000
Subject: [PATCH 033/262] perf build: Pass O option to kernel makefile in
 build-test

Kernel makefile only follows an 'O' option passed from command line
explicitely. In build-test with 'O' option set, kernel makefile
contaminate kernel source directory. Build test also fail if we don't
create output directory manually.

K_O_OPT is added and passed to kernel makefile if 'O' is passed
to build-test.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1452830421-77757-5-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/make | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 29810cf2c117..f918015512af 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -22,6 +22,7 @@ ifneq ($(O),)
   ifeq ($(SET_O),1)
     O_OPT := 'O=$(FULL_O)'
   endif
+  K_O_OPT := 'O=$(FULL_O)'
 endif
 
 PARALLEL_OPT=
@@ -301,15 +302,15 @@ ifneq ($(O),)
 endif
 
 make_kernelsrc:
-	@echo "- make -C <kernelsrc> $(PARALLEL_OPT) tools/perf"
+	@echo "- make -C <kernelsrc> $(PARALLEL_OPT) $(K_O_OPT) tools/perf"
 	$(call clean); \
-	(make -C ../.. $(PARALLEL_OPT) tools/perf) > $@ 2>&1 && \
+	(make -C ../.. $(PARALLEL_OPT) $(K_O_OPT) tools/perf) > $@ 2>&1 && \
 	test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 make_kernelsrc_tools:
-	@echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) perf"
+	@echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) $(K_O_OPT) perf"
 	$(call clean); \
-	(make -C ../../tools $(PARALLEL_OPT) perf) > $@ 2>&1 && \
+	(make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
 	test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools

From b8e52be00ca7e7659e8ee172ce9e6f0c6af28ec8 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 15 Jan 2016 04:00:17 +0000
Subject: [PATCH 034/262] perf build: Add feature-dump target

To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY) if defined, with no
further action.

Get feature dump of the current build:
  $ make feature-dump
    BUILD:   Doing 'make -j4' parallel build

  Auto-detecting system features:
  ...                         dwarf: [ on  ]

  FEATURE-DUMP file available in FEATURE-DUMP

Get feature dump static build into /tmp/fd file:

  $ make feature-dump FEATURE_DUMP_COPY=/tmp/fd LDFLAGS=-static
    BUILD:   Doing 'make -j4' parallel build

  Auto-detecting system features:
  ...                         dwarf: [ OFF ]

  SNIP

  FEATURE-DUMP file copied into /tmp/fd

Suggested-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1452830421-77757-6-git-send-email-wangnan0@huawei.com
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 0a22407e1d7d..f758a72df1b3 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -610,6 +610,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean
 	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
 	$(python-clean)
 
+#
+# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
+# file if defined, with no further action.
+feature-dump:
+ifdef FEATURE_DUMP_COPY
+	@cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY)
+	@echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)"
+else
+	@echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
+endif
+
 #
 # Trick: if ../../.git does not exist - we are building out of tree for example,
 # then force version regeneration:

From 96b9e70b8e6cd65f71ee71889143976f3afb038a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 15 Jan 2016 04:00:18 +0000
Subject: [PATCH 035/262] perf build: Introduce FEATURES_DUMP make variable

Introducing FEATURES_DUMP make variable to provide features
detection dump file and bypass the feature detection.

The intention is to use this during build tests to skip
repeated features detection, like:

Get feature dump static build into /tmp/fd file:
  $ make feature-dump FEATURE_DUMP_COPY=/tmp/fd LDFLAGS=-static
    BUILD:   Doing 'make -j4' parallel build

  Auto-detecting system features:
  ...                         dwarf: [ OFF ]

  SNIP

  FEATURE-DUMP file copied into /tmp/fd

Use /tmp/fd to build perf:
  $ make FEATURES_DUMP=/tmp/fd LDFLAGS=-static

  $ file perf
  perf: ELF 64-bit LSB executable, x86-64, version 1 (GNU/Linux), statically linked, for ...

Suggested-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1452830421-77757-7-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf   | 14 +++++++++++++-
 tools/perf/config/Makefile |  4 ++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index f758a72df1b3..5d34815c7ccb 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -77,6 +77,9 @@ include config/utilities.mak
 # Define NO_AUXTRACE if you do not want AUX area tracing support
 #
 # Define NO_LIBBPF if you do not want BPF support
+#
+# Define FEATURES_DUMP to provide features detection dump file
+# and bypass the feature detection
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -166,6 +169,15 @@ ifeq ($(config),1)
 include config/Makefile
 endif
 
+# The FEATURE_DUMP_EXPORT holds location of the actual
+# FEATURE_DUMP file to be used to bypass feature detection
+# (for bpf or any other subproject)
+ifeq ($(FEATURES_DUMP),)
+FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP)
+else
+FEATURE_DUMP_EXPORT := $(FEATURES_DUMP)
+endif
+
 export prefix bindir sharedir sysconfdir DESTDIR
 
 # sparse is architecture-neutral, which means that we need to tell it
@@ -436,7 +448,7 @@ $(LIBAPI)-clean:
 	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
 
 $(LIBBPF): fixdep FORCE
-	$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(realpath $(OUTPUT)FEATURE-DUMP)
+	$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
 
 $(LIBBPF)-clean:
 	$(call QUIET_CLEAN, libbpf)
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index e5959c136a19..511141b102e8 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -181,7 +181,11 @@ LDFLAGS += -Wl,-z,noexecstack
 
 EXTLIBS = -lpthread -lrt -lm -ldl
 
+ifeq ($(FEATURES_DUMP),)
 include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
 
 ifeq ($(feature-stackprotector-all), 1)
   CFLAGS += -fstack-protector-all

From 203cbf77de59fc8f13502dcfd11350c6d4a5c95f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 14 Jan 2016 16:54:46 +0000
Subject: [PATCH 036/262] hrtimer: Handle remaining time proper for
 TIME_LOW_RES

If CONFIG_TIME_LOW_RES is enabled we add a jiffie to the relative timeout to
prevent short sleeps, but we do not account for that in interfaces which
retrieve the remaining time.

Helge observed that timerfd can return a remaining time larger than the
relative timeout. That's not expected and breaks userland test programs.

Store the information that the timer was armed relative and provide functions
to adjust the remaining time. To avoid bloating the hrtimer struct make state
a u8, which as a bonus results in better code on x86 at least.

Reported-and-tested-by: Helge Deller <deller@gmx.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: linux-m68k@lists.linux-m68k.org
Cc: dhowells@redhat.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20160114164159.273328486@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h  | 34 ++++++++++++++++++++++---
 kernel/time/hrtimer.c    | 55 +++++++++++++++++++++++++++-------------
 kernel/time/timer_list.c |  2 +-
 3 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 76dd4f0da5ca..2ead22dd74a0 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -87,7 +87,8 @@ enum hrtimer_restart {
  * @function:	timer expiry callback function
  * @base:	pointer to the timer base (per cpu and per clock)
  * @state:	state information (See bit values above)
- * @start_pid: timer statistics field to store the pid of the task which
+ * @is_rel:	Set if the timer was armed relative
+ * @start_pid:  timer statistics field to store the pid of the task which
  *		started the timer
  * @start_site:	timer statistics field to store the site where the timer
  *		was started
@@ -101,7 +102,8 @@ struct hrtimer {
 	ktime_t				_softexpires;
 	enum hrtimer_restart		(*function)(struct hrtimer *);
 	struct hrtimer_clock_base	*base;
-	unsigned long			state;
+	u8				state;
+	u8				is_rel;
 #ifdef CONFIG_TIMER_STATS
 	int				start_pid;
 	void				*start_site;
@@ -321,6 +323,27 @@ static inline void clock_was_set_delayed(void) { }
 
 #endif
 
+static inline ktime_t
+__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
+{
+	ktime_t rem = ktime_sub(timer->node.expires, now);
+
+	/*
+	 * Adjust relative timers for the extra we added in
+	 * hrtimer_start_range_ns() to prevent short timeouts.
+	 */
+	if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel)
+		rem.tv64 -= hrtimer_resolution;
+	return rem;
+}
+
+static inline ktime_t
+hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
+{
+	return __hrtimer_expires_remaining_adjusted(timer,
+						    timer->base->get_time());
+}
+
 extern void clock_was_set(void);
 #ifdef CONFIG_TIMERFD
 extern void timerfd_clock_was_set(void);
@@ -390,7 +413,12 @@ static inline void hrtimer_restart(struct hrtimer *timer)
 }
 
 /* Query timers: */
-extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
+extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
+
+static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+{
+	return __hrtimer_get_remaining(timer, false);
+}
 
 extern u64 hrtimer_get_next_event(void);
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 435b8850dd80..fa909f9fd559 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -897,10 +897,10 @@ static int enqueue_hrtimer(struct hrtimer *timer,
  */
 static void __remove_hrtimer(struct hrtimer *timer,
 			     struct hrtimer_clock_base *base,
-			     unsigned long newstate, int reprogram)
+			     u8 newstate, int reprogram)
 {
 	struct hrtimer_cpu_base *cpu_base = base->cpu_base;
-	unsigned int state = timer->state;
+	u8 state = timer->state;
 
 	timer->state = newstate;
 	if (!(state & HRTIMER_STATE_ENQUEUED))
@@ -930,7 +930,7 @@ static inline int
 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
 {
 	if (hrtimer_is_queued(timer)) {
-		unsigned long state = timer->state;
+		u8 state = timer->state;
 		int reprogram;
 
 		/*
@@ -954,6 +954,22 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
 	return 0;
 }
 
+static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
+					    const enum hrtimer_mode mode)
+{
+#ifdef CONFIG_TIME_LOW_RES
+	/*
+	 * CONFIG_TIME_LOW_RES indicates that the system has no way to return
+	 * granular time values. For relative timers we add hrtimer_resolution
+	 * (i.e. one jiffie) to prevent short timeouts.
+	 */
+	timer->is_rel = mode & HRTIMER_MODE_REL;
+	if (timer->is_rel)
+		tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
+#endif
+	return tim;
+}
+
 /**
  * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
  * @timer:	the timer to be added
@@ -974,19 +990,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 	/* Remove an active timer from the queue: */
 	remove_hrtimer(timer, base, true);
 
-	if (mode & HRTIMER_MODE_REL) {
+	if (mode & HRTIMER_MODE_REL)
 		tim = ktime_add_safe(tim, base->get_time());
-		/*
-		 * CONFIG_TIME_LOW_RES is a temporary way for architectures
-		 * to signal that they simply return xtime in
-		 * do_gettimeoffset(). In this case we want to round up by
-		 * resolution when starting a relative timer, to avoid short
-		 * timeouts. This will go away with the GTOD framework.
-		 */
-#ifdef CONFIG_TIME_LOW_RES
-		tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
-#endif
-	}
+
+	tim = hrtimer_update_lowres(timer, tim, mode);
 
 	hrtimer_set_expires_range_ns(timer, tim, delta_ns);
 
@@ -1074,19 +1081,23 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel);
 /**
  * hrtimer_get_remaining - get remaining time for the timer
  * @timer:	the timer to read
+ * @adjust:	adjust relative timers when CONFIG_TIME_LOW_RES=y
  */
-ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
 {
 	unsigned long flags;
 	ktime_t rem;
 
 	lock_hrtimer_base(timer, &flags);
-	rem = hrtimer_expires_remaining(timer);
+	if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
+		rem = hrtimer_expires_remaining_adjusted(timer);
+	else
+		rem = hrtimer_expires_remaining(timer);
 	unlock_hrtimer_base(timer, &flags);
 
 	return rem;
 }
-EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
+EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
 
 #ifdef CONFIG_NO_HZ_COMMON
 /**
@@ -1219,6 +1230,14 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
 	timer_stats_account_hrtimer(timer);
 	fn = timer->function;
 
+	/*
+	 * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
+	 * timer is restarted with a period then it becomes an absolute
+	 * timer. If its not restarted it does not matter.
+	 */
+	if (IS_ENABLED(CONFIG_TIME_LOW_RES))
+		timer->is_rel = false;
+
 	/*
 	 * Because we run timers from hardirq context, there is no chance
 	 * they get migrated to another cpu, therefore its safe to unlock
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index f75e35b60149..ba7d8b288bb3 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -69,7 +69,7 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
 	print_name_offset(m, taddr);
 	SEQ_printf(m, ", ");
 	print_name_offset(m, timer->function);
-	SEQ_printf(m, ", S:%02lx", timer->state);
+	SEQ_printf(m, ", S:%02x", timer->state);
 #ifdef CONFIG_TIMER_STATS
 	SEQ_printf(m, ", ");
 	print_name_offset(m, timer->start_site);

From b62526ed11a1fe3861ab98d40b7fdab8981d788a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 14 Jan 2016 16:54:46 +0000
Subject: [PATCH 037/262] timerfd: Handle relative timers with
 CONFIG_TIME_LOW_RES proper

Helge reported that a relative timer can return a remaining time larger than
the programmed relative time on parisc and other architectures which have
CONFIG_TIME_LOW_RES set. This happens because we add a jiffie to the resulting
expiry time to prevent short timeouts.

Use the new function hrtimer_expires_remaining_adjusted() to calculate the
remaining time. It takes that extra added time into account for relative
timers.

Reported-and-tested-by: Helge Deller <deller@gmx.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: linux-m68k@lists.linux-m68k.org
Cc: dhowells@redhat.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20160114164159.354500742@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/timerfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/timerfd.c b/fs/timerfd.c
index b94fa6c3c6eb..053818dd6c18 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -153,7 +153,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
 	if (isalarm(ctx))
 		remaining = alarm_expires_remaining(&ctx->t.alarm);
 	else
-		remaining = hrtimer_expires_remaining(&ctx->t.tmr);
+		remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
 
 	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }

From 572c39172684c3711e4a03c9a7380067e2b0661c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 14 Jan 2016 16:54:47 +0000
Subject: [PATCH 038/262] posix-timers: Handle relative timers with
 CONFIG_TIME_LOW_RES proper

As Helge reported for timerfd we have the same issue in posix timers. We
return remaining time larger than the programmed relative time to user space
in case of CONFIG_TIME_LOW_RES=y. Use the proper function to adjust the extra
time added in hrtimer_start_range_ns().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Helge Deller <deller@gmx.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: linux-m68k@lists.linux-m68k.org
Cc: dhowells@redhat.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20160114164159.450510905@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/posix-timers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 31d11ac9fa47..f2826c35e918 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -760,7 +760,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
 	    (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
 		timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
 
-	remaining = ktime_sub(hrtimer_get_expires(timer), now);
+	remaining = __hrtimer_expires_remaining_adjusted(timer, now);
 	/* Return 0 only, when the timer is expired and not pending */
 	if (remaining.tv64 <= 0) {
 		/*

From 51cbb5242a41700a3f250ecfb48dcfb7e4375ea4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 14 Jan 2016 16:54:48 +0000
Subject: [PATCH 039/262] itimers: Handle relative timers with
 CONFIG_TIME_LOW_RES proper

As Helge reported for timerfd we have the same issue in itimers. We return
remaining time larger than the programmed relative time to user space in case
of CONFIG_TIME_LOW_RES=y. Use the proper function to adjust the extra time
added in hrtimer_start_range_ns().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Helge Deller <deller@gmx.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: linux-m68k@lists.linux-m68k.org
Cc: dhowells@redhat.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20160114164159.528222587@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/itimer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 8d262b467573..1d5c7204ddc9 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -26,7 +26,7 @@
  */
 static struct timeval itimer_get_remtime(struct hrtimer *timer)
 {
-	ktime_t rem = hrtimer_get_remaining(timer);
+	ktime_t rem = __hrtimer_get_remaining(timer, true);
 
 	/*
 	 * Racy but safe: if the itimer expires after the above

From ce90d092bcd96b646b370121f0f1508270627f98 Mon Sep 17 00:00:00 2001
From: Mark Yao <mark.yao@rock-chips.com>
Date: Tue, 12 Jan 2016 15:51:12 +0800
Subject: [PATCH 040/262] drm/rockchip: Don't build rockchip_drm_vop as modules

rockchip_drm_vop's module init had moved to rockchip_vop_reg.c
so no need to build rockchip_drm_vop.ko

Signed-off-by: Mark Yao <mark.yao@rock-chips.com>
---
 drivers/gpu/drm/rockchip/Makefile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile
index d1dc0f7b01db..a4e03bcde035 100644
--- a/drivers/gpu/drm/rockchip/Makefile
+++ b/drivers/gpu/drm/rockchip/Makefile
@@ -3,10 +3,9 @@
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
 rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o rockchip_drm_fbdev.o \
-		rockchip_drm_gem.o
+		rockchip_drm_gem.o rockchip_drm_vop.o
 
 obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o
 obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o
 
-obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_drm_vop.o \
-				rockchip_vop_reg.o
+obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_vop_reg.o

From 63087aae5a7976a4557d16873146eae03948ec74 Mon Sep 17 00:00:00 2001
From: Mark Yao <mark.yao@rock-chips.com>
Date: Tue, 12 Jan 2016 16:04:39 +0800
Subject: [PATCH 041/262] drm/rockchip: cleanup unnecessary export symbol

Now rockchip_drm_vop.c is build into rockchipdrm.ko, so
no need to export following symbol anymore:
    rockchip_drm_dma_attach_device
    rockchip_drm_dma_detach_device
    rockchip_drm_dma_attach_device
    rockchip_drm_dma_detach_device
    rockchip_register_crtc_funcs
    rockchip_unregister_crtc_funcs
    rockchip_fb_get_gem_obj

Signed-off-by: Mark Yao <mark.yao@rock-chips.com>
---
 drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 4 ----
 drivers/gpu/drm/rockchip/rockchip_drm_fb.c  | 1 -
 2 files changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 8397d1b62ef9..a0d51ccb6ea4 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -55,14 +55,12 @@ int rockchip_drm_dma_attach_device(struct drm_device *drm_dev,
 
 	return arm_iommu_attach_device(dev, mapping);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_attach_device);
 
 void rockchip_drm_dma_detach_device(struct drm_device *drm_dev,
 				    struct device *dev)
 {
 	arm_iommu_detach_device(dev);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_detach_device);
 
 int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
 				 const struct rockchip_crtc_funcs *crtc_funcs)
@@ -77,7 +75,6 @@ int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rockchip_register_crtc_funcs);
 
 void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 {
@@ -89,7 +86,6 @@ void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 
 	priv->crtc_funcs[pipe] = NULL;
 }
-EXPORT_SYMBOL_GPL(rockchip_unregister_crtc_funcs);
 
 static struct drm_crtc *rockchip_crtc_from_pipe(struct drm_device *drm,
 						int pipe)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index f7844883cb76..dd1f8d3c98d8 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -39,7 +39,6 @@ struct drm_gem_object *rockchip_fb_get_gem_obj(struct drm_framebuffer *fb,
 
 	return rk_fb->obj[plane];
 }
-EXPORT_SYMBOL_GPL(rockchip_fb_get_gem_obj);
 
 static void rockchip_drm_fb_destroy(struct drm_framebuffer *fb)
 {

From c7647f8681feeb6c0957e3cf5daed1fbf8b3a5af Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Tue, 12 Jan 2016 18:05:18 +0000
Subject: [PATCH 042/262] drm/rockchip: vop: fix mask when updating interrupts

Commit dbb3d94 (drm/rockchip: vop: move interrupt registers into
vop_data) introduced new macros for updating the interrupt control
registers but these always use the mask from the register definition
without refining it for the particular bits that are being changed.

This means that whenever we enable/disable a particular interrupt we end
up disabling all of the others as a side effect.

Signed-off-by: John Keeping <john@metanate.com>
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 46c2a8dfd8aa..fd370548d7d7 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -43,8 +43,8 @@
 
 #define REG_SET(x, base, reg, v, mode) \
 		__REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
-#define REG_SET_MASK(x, base, reg, v, mode) \
-		__REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
+#define REG_SET_MASK(x, base, reg, mask, v, mode) \
+		__REG_SET_##mode(x, base + reg.offset, mask, reg.shift, v)
 
 #define VOP_WIN_SET(x, win, name, v) \
 		REG_SET(x, win->base, win->phy->name, v, RELAXED)
@@ -58,16 +58,18 @@
 #define VOP_INTR_GET(vop, name) \
 		vop_read_reg(vop, 0, &vop->data->ctrl->name)
 
-#define VOP_INTR_SET(vop, name, v) \
-		REG_SET(vop, 0, vop->data->intr->name, v, NORMAL)
+#define VOP_INTR_SET(vop, name, mask, v) \
+		REG_SET_MASK(vop, 0, vop->data->intr->name, mask, v, NORMAL)
 #define VOP_INTR_SET_TYPE(vop, name, type, v) \
 	do { \
-		int i, reg = 0; \
+		int i, reg = 0, mask = 0; \
 		for (i = 0; i < vop->data->intr->nintrs; i++) { \
-			if (vop->data->intr->intrs[i] & type) \
+			if (vop->data->intr->intrs[i] & type) { \
 				reg |= (v) << i; \
+				mask |= 1 << i; \
+			} \
 		} \
-		VOP_INTR_SET(vop, name, reg); \
+		VOP_INTR_SET(vop, name, mask, reg); \
 	} while (0)
 #define VOP_INTR_GET_TYPE(vop, name, type) \
 		vop_get_intr_type(vop, &vop->data->intr->name, type)

From 484bb6c969523aa547d854bb57104339ee4aa800 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Thu, 14 Jan 2016 09:59:02 +0100
Subject: [PATCH 043/262] drm/rockchip/dsi: fix handling
 mipi_dsi_pixel_format_to_bpp result

The function can return negative value so it should be assigned to signed
variable.

The problem has been detected using patch
scripts/coccinelle/tests/unsigned_lesser_than_zero.cocci.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Reviewed-by: Chris Zhong <zyw@rock-chips.com>
---
 drivers/gpu/drm/rockchip/dw-mipi-dsi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
index 7bfe243c6173..f8f8f29fb7c3 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
@@ -461,10 +461,11 @@ static int dw_mipi_dsi_phy_init(struct dw_mipi_dsi *dsi)
 
 static int dw_mipi_dsi_get_lane_bps(struct dw_mipi_dsi *dsi)
 {
-	unsigned int bpp, i, pre;
+	unsigned int i, pre;
 	unsigned long mpclk, pllref, tmp;
 	unsigned int m = 1, n = 1, target_mbps = 1000;
 	unsigned int max_mbps = dptdin_map[ARRAY_SIZE(dptdin_map) - 1].max_mbps;
+	int bpp;
 
 	bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
 	if (bpp < 0) {

From 3fda5bb420e79b357328b358409e4c547d8f0a18 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 15 Jan 2016 22:11:07 +0200
Subject: [PATCH 044/262] x86/platform/intel-mid: Enable 64-bit build

Intel Tangier SoC is known to have 64-bit dual core CPU. Enable
64-bit build for it.

The kernel has been tested on Intel Edison board:

	Linux buildroot 4.4.0-next-20160115+ #25 SMP Fri Jan 15 22:03:19 EET 2016 x86_64 GNU/Linux

	processor       : 0
	vendor_id       : GenuineIntel
	cpu family      : 6
	model           : 74
	model name      : Genuine Intel(R) CPU   4000  @  500MHz
	stepping        : 8

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1452888668-147116-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig         | 3 +--
 arch/x86/kernel/head64.c | 8 ++++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 258965d56beb..07459a6b417d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -495,11 +495,10 @@ config X86_INTEL_CE
 
 config X86_INTEL_MID
 	bool "Intel MID platform support"
-	depends on X86_32
 	depends on X86_EXTENDED_PLATFORM
 	depends on X86_PLATFORM_DEVICES
 	depends on PCI
-	depends on PCI_GOANY
+	depends on X86_64 || (PCI_GOANY && X86_32)
 	depends on X86_IO_APIC
 	select SFI
 	select I2C
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index f129a9af6357..2c0f3407bd1f 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -192,5 +192,13 @@ void __init x86_64_start_reservations(char *real_mode_data)
 
 	reserve_ebda_region();
 
+	switch (boot_params.hdr.hardware_subarch) {
+	case X86_SUBARCH_INTEL_MID:
+		x86_intel_mid_early_setup();
+		break;
+	default:
+		break;
+	}
+
 	start_kernel();
 }

From b000de5848441bc4e99c662fe1fd1b854151a84e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 15 Jan 2016 22:11:08 +0200
Subject: [PATCH 045/262] x86/platform/intel-mid: Join string and fix SoC name

Join string back to make grepping a bit easier. While here,
lowering case for Penwell SoC name in one case to be aligned
with the rest messages.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1452888668-147116-2-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/intel-mid.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 1bbc21e2e4ae..90bb997ed0a2 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -138,7 +138,7 @@ static void intel_mid_arch_setup(void)
 		intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
 	else {
 		intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
-		pr_info("ARCH: Unknown SoC, assuming PENWELL!\n");
+		pr_info("ARCH: Unknown SoC, assuming Penwell!\n");
 	}
 
 out:
@@ -214,12 +214,10 @@ static inline int __init setup_x86_intel_mid_timer(char *arg)
 	else if (strcmp("lapic_and_apbt", arg) == 0)
 		intel_mid_timer_options = INTEL_MID_TIMER_LAPIC_APBT;
 	else {
-		pr_warn("X86 INTEL_MID timer option %s not recognised"
-			   " use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
-			   arg);
+		pr_warn("X86 INTEL_MID timer option %s not recognised use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
+			arg);
 		return -EINVAL;
 	}
 	return 0;
 }
 __setup("x86_intel_mid_timer=", setup_x86_intel_mid_timer);
-

From 9c03ee147193645be4c186d3688232fa438c57c7 Mon Sep 17 00:00:00 2001
From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Date: Sat, 16 Jan 2016 00:31:23 +0530
Subject: [PATCH 046/262] sched: Fix crash in sched_init_numa()

The following PowerPC commit:

  c118baf80256 ("arch/powerpc/mm/numa.c: do not allocate bootmem memory for non existing nodes")

avoids allocating bootmem memory for non existent nodes.

But when DEBUG_PER_CPU_MAPS=y is enabled, my powerNV system failed to boot
because in sched_init_numa(), cpumask_or() operation was done on
unallocated nodes.

Fix that by making cpumask_or() operation only on existing nodes.

[ Tested with and w/o DEBUG_PER_CPU_MAPS=y on x86 and PowerPC. ]

Reported-by: Jan Stancek <jstancek@redhat.com>
Tested-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Cc: <gkurz@linux.vnet.ibm.com>
Cc: <grant.likely@linaro.org>
Cc: <nikunj@linux.vnet.ibm.com>
Cc: <vdavydov@parallels.com>
Cc: <linuxppc-dev@lists.ozlabs.org>
Cc: <linux-mm@kvack.org>
Cc: <peterz@infradead.org>
Cc: <benh@kernel.crashing.org>
Cc: <paulus@samba.org>
Cc: <mpe@ellerman.id.au>
Cc: <anton@samba.org>
Link: http://lkml.kernel.org/r/1452884483-11676-1-git-send-email-raghavendra.kt@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44253adb3c36..474658b51fe6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6840,7 +6840,7 @@ static void sched_init_numa(void)
 
 			sched_domains_numa_masks[i][j] = mask;
 
-			for (k = 0; k < nr_node_ids; k++) {
+			for_each_node(k) {
 				if (node_distance(j, k) > sched_domains_numa_distance[i])
 					continue;
 

From d394f2d9d8e1e7b4959819344baf67b5995da9b0 Mon Sep 17 00:00:00 2001
From: Alex Thorlton <athorlton@sgi.com>
Date: Fri, 11 Dec 2015 14:59:45 -0600
Subject: [PATCH 047/262] x86/platform/UV: Remove EFI memmap quirk for UV2+

Commit a5d90c923bcf ("x86/efi: Quirk out SGI UV") added a quirk
to efi_apply_memmap_quirks to force SGI UV systems to fall back
to the old EFI memmap mechanism.  We have a BIOS fix for this
issue on all systems except for UV1.  This commit fixes up the
EFI quirk/MMR mapping code so that we only apply the special
case to UV1 hardware.

Signed-off-by: Alex Thorlton <athorlton@sgi.com>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hedi Berriche <hedi@sgi.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Travis <travis@sgi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1449867585-189233-2-git-send-email-athorlton@sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/x2apic_uv_x.c |  5 ++++-
 arch/x86/platform/efi/quirks.c     | 17 +++++++++++++----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index d760c6bb37b5..624db00583f4 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -889,7 +889,10 @@ void __init uv_system_init(void)
 		return;
 	}
 	pr_info("UV: Found %s hub\n", hub);
-	map_low_mmrs();
+
+	/* We now only need to map the MMRs on UV1 */
+	if (is_uv1_hub())
+		map_low_mmrs();
 
 	m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
 	m_val = m_n_config.s.m_skt;
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 1c7380da65ff..2d66db8f80f9 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -8,6 +8,7 @@
 #include <linux/memblock.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
 
@@ -248,6 +249,16 @@ out:
 	return ret;
 }
 
+static const struct dmi_system_id sgi_uv1_dmi[] = {
+	{ NULL, "SGI UV1",
+		{	DMI_MATCH(DMI_PRODUCT_NAME,	"Stoutland Platform"),
+			DMI_MATCH(DMI_PRODUCT_VERSION,	"1.0"),
+			DMI_MATCH(DMI_BIOS_VENDOR,	"SGI.COM"),
+		}
+	},
+	{ } /* NULL entry stops DMI scanning */
+};
+
 void __init efi_apply_memmap_quirks(void)
 {
 	/*
@@ -260,10 +271,8 @@ void __init efi_apply_memmap_quirks(void)
 		efi_unmap_memmap();
 	}
 
-	/*
-	 * UV doesn't support the new EFI pagetable mapping yet.
-	 */
-	if (is_uv_system())
+	/* UV2+ BIOS has a fix for this issue.  UV1 still needs the quirk. */
+	if (dmi_check_system(sgi_uv1_dmi))
 		set_bit(EFI_OLD_MEMMAP, &efi.flags);
 }
 

From a9d7ab97812cf2cf9fd7c980205ae35f58c52cf5 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Mon, 11 Jan 2016 11:47:12 +0100
Subject: [PATCH 048/262] s390/mm: use TASK_MAX_SIZE where applicable

To improve readability we can use TASK_MAX_SIZE when we just check for the
upper limit.  All places explicitly dealing with 3 vs 4 level pgtables
were left unchanged.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Reviewed-By: Sascha Silbe <silbe@linux.vnet.ibm.com>
---
 arch/s390/mm/mmap.c    | 12 ++++++------
 arch/s390/mm/pgtable.c |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index ea01477b4aa6..45c4daa49930 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -169,12 +169,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
 {
-	if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
+	if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
 		return 0;
 	if (!(flags & MAP_FIXED))
 		addr = 0;
 	if ((addr + len) >= TASK_SIZE)
-		return crst_table_upgrade(current->mm, 1UL << 53);
+		return crst_table_upgrade(current->mm, TASK_MAX_SIZE);
 	return 0;
 }
 
@@ -189,9 +189,9 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
 	area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
 	if (!(area & ~PAGE_MASK))
 		return area;
-	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
 		/* Upgrade the page table to 4 levels and retry. */
-		rc = crst_table_upgrade(mm, 1UL << 53);
+		rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
 		if (rc)
 			return (unsigned long) rc;
 		area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
@@ -211,9 +211,9 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
 	area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
 	if (!(area & ~PAGE_MASK))
 		return area;
-	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
 		/* Upgrade the page table to 4 levels and retry. */
-		rc = crst_table_upgrade(mm, 1UL << 53);
+		rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
 		if (rc)
 			return (unsigned long) rc;
 		area = arch_get_unmapped_area_topdown(filp, addr, len,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index a809fa8e6f8b..5109827883ac 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -55,7 +55,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
 	unsigned long entry;
 	int flush;
 
-	BUG_ON(limit > (1UL << 53));
+	BUG_ON(limit > TASK_MAX_SIZE);
 	flush = 0;
 repeat:
 	table = crst_table_alloc(mm);

From 204ee2c5643199a25181ec04ea645d00709c2a5a Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 11 Jan 2016 09:17:18 +0100
Subject: [PATCH 049/262] s390/irqflags: optimize irq restore

The ssm instruction takes longer that stnsm/stosm as it is often
used to modify DAT and PER. We know that irqsave/irqrestore only
deals with external and I/O interrupts and we know that irqrestore
can transition only from disabled->disabled or disabled->enabled,
so we can use the faster stosm.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/irqflags.h | 9 +++++++--
 arch/s390/mm/init.c              | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
index 16aa0c779e07..595a275c36f8 100644
--- a/arch/s390/include/asm/irqflags.h
+++ b/arch/s390/include/asm/irqflags.h
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#define ARCH_IRQ_ENABLED	(3UL << (BITS_PER_LONG - 8))
+
 /* store then OR system mask. */
 #define __arch_local_irq_stosm(__or)					\
 ({									\
@@ -54,14 +56,17 @@ static inline notrace void arch_local_irq_enable(void)
 	__arch_local_irq_stosm(0x03);
 }
 
+/* This only restores external and I/O interrupt state */
 static inline notrace void arch_local_irq_restore(unsigned long flags)
 {
-	__arch_local_irq_ssm(flags);
+	/* only disabled->disabled and disabled->enabled is valid */
+	if (flags & ARCH_IRQ_ENABLED)
+		arch_local_irq_enable();
 }
 
 static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
 {
-	return !(flags & (3UL << (BITS_PER_LONG - 8)));
+	return !(flags & ARCH_IRQ_ENABLED);
 }
 
 static inline notrace bool arch_irqs_disabled(void)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index c722400c7697..73e290337092 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -98,7 +98,7 @@ void __init paging_init(void)
 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
 	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
 	__ctl_load(S390_lowcore.kernel_asce, 13, 13);
-	arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
+	__arch_local_irq_stosm(0x04);
 
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();

From ef1f7fd7eba19eb64fc424355eb9d4e49ca06fb1 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 15 Jan 2016 14:50:25 +0100
Subject: [PATCH 050/262] s390/numa: allocate memory with correct alignment

Allocating memory with a requested minimum alignment of 1 is wrong
since pg_data_t contains a spinlock which requires an alignment of 4
bytes.

Therefore fix this and ask for an alignment of 8 bytes like it is
guarenteed for all kmalloc requests.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/numa/numa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 43f32ce60aa3..b75ac43060e1 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -57,7 +57,7 @@ static __init pg_data_t *alloc_node_data(void)
 {
 	pg_data_t *res;
 
-	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
+	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8);
 	if (!res)
 		panic("Could not allocate memory for node data!\n");
 	memset(res, 0, sizeof(pg_data_t));

From cd17e153af499c74d7e347c8ad7eab273f55f20b Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 15 Jan 2016 14:52:59 +0100
Subject: [PATCH 051/262] s390: remove superfluous memblock_alloc() return
 value checks

memblock_alloc() and memblock_alloc_base() will panic on their own if
they can't find free memory. Therefore remove some pointless checks.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/crash_dump.c | 2 --
 arch/s390/kernel/smp.c        | 2 --
 arch/s390/numa/numa.c         | 2 --
 3 files changed, 6 deletions(-)

diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index a92b39fd0e63..3986c9f62191 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -59,8 +59,6 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
 	struct save_area *sa;
 
 	sa = (void *) memblock_alloc(sizeof(*sa), 8);
-	if (!sa)
-		return NULL;
 	if (is_boot_cpu)
 		list_add(&sa->list, &dump_save_areas);
 	else
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a13468b9a913..3c65a8eae34d 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -623,8 +623,6 @@ void __init smp_save_dump_cpus(void)
 		return;
 	/* Allocate a page as dumping area for the store status sigps */
 	page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
-	if (!page)
-		panic("could not allocate memory for save area\n");
 	/* Set multi-threading state to the previous system. */
 	pcpu_set_smt(sclp.mtid_prev);
 	boot_cpu_addr = stap();
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index b75ac43060e1..969b4ed4ed5e 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -58,8 +58,6 @@ static __init pg_data_t *alloc_node_data(void)
 	pg_data_t *res;
 
 	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8);
-	if (!res)
-		panic("Could not allocate memory for node data!\n");
 	memset(res, 0, sizeof(pg_data_t));
 	return res;
 }

From 696aafd36905a90af87c64d1278f91e39e66ae1e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 19 Jan 2016 10:19:02 +0100
Subject: [PATCH 052/262] s390: wire up copy_file_range syscall

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/uapi/asm/unistd.h | 3 ++-
 arch/s390/kernel/compat_wrapper.c   | 1 +
 arch/s390/kernel/syscalls.S         | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 34ec202472c6..ab3aa6875a59 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -310,7 +310,8 @@
 #define __NR_recvmsg		372
 #define __NR_shutdown		373
 #define __NR_mlock2		374
-#define NR_syscalls 375
+#define __NR_copy_file_range	375
+#define NR_syscalls 376
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index fac4eeddef91..ae2cda5eee5a 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -177,3 +177,4 @@ COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
 COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 5378c3ea1b98..293d8b98fd52 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -383,3 +383,4 @@ SYSCALL(sys_recvfrom,compat_sys_recvfrom)
 SYSCALL(sys_recvmsg,compat_sys_recvmsg)
 SYSCALL(sys_shutdown,sys_shutdown)
 SYSCALL(sys_mlock2,compat_sys_mlock2)
+SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */

From fecc868a668774b0fc666728c3f5d9f6fceefe64 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 18 Jan 2016 12:49:44 +0100
Subject: [PATCH 053/262] s390: remove all usages of PSW_ADDR_AMODE

This is a leftover from the 31 bit area. For CONFIG_64BIT the usual
operation "y = x | PSW_ADDR_AMODE" is a nop. Therefore remove all
usages of PSW_ADDR_AMODE and make the code a bit less confusing.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
---
 arch/s390/include/asm/processor.h |  4 ++--
 arch/s390/include/asm/ptrace.h    |  2 +-
 arch/s390/kernel/early.c          |  6 +++---
 arch/s390/kernel/ipl.c            |  4 ++--
 arch/s390/kernel/kprobes.c        | 10 +++++-----
 arch/s390/kernel/process.c        |  2 +-
 arch/s390/kernel/setup.c          | 16 ++++++----------
 arch/s390/kernel/signal.c         | 13 ++++++-------
 arch/s390/kernel/traps.c          |  2 +-
 arch/s390/mm/fault.c              |  2 +-
 10 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index f16debf6a612..1c4fe129486d 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -166,14 +166,14 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS];
  */
 #define start_thread(regs, new_psw, new_stackp) do {			\
 	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA;	\
-	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
+	regs->psw.addr	= new_psw;					\
 	regs->gprs[15]	= new_stackp;					\
 	execve_tail();							\
 } while (0)
 
 #define start_thread31(regs, new_psw, new_stackp) do {			\
 	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_BA;			\
-	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
+	regs->psw.addr	= new_psw;					\
 	regs->gprs[15]	= new_stackp;					\
 	crst_table_downgrade(current->mm, 1UL << 31);			\
 	execve_tail();							\
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index f00cd35c8ac4..7423fd05cb94 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -161,7 +161,7 @@ static inline long regs_return_value(struct pt_regs *regs)
 static inline void instruction_pointer_set(struct pt_regs *regs,
 					   unsigned long val)
 {
-	regs->psw.addr = val | PSW_ADDR_AMODE;
+	regs->psw.addr = val;
 }
 
 int regs_query_register_offset(const char *name);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 20a5caf6d981..43ea2feb85d4 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -259,7 +259,7 @@ static void early_pgm_check_handler(void)
 	__ctl_store(cr0, 0, 0);
 	cr0_new = cr0 & ~(1UL << 28);
 	__ctl_load(cr0_new, 0, 0);
-	S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+	S390_lowcore.program_old_psw.addr = extable_fixup(fixup);
 	__ctl_load(cr0, 0, 0);
 }
 
@@ -268,9 +268,9 @@ static noinline __init void setup_lowcore_early(void)
 	psw_t psw;
 
 	psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
-	psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
+	psw.addr = (unsigned long) s390_base_ext_handler;
 	S390_lowcore.external_new_psw = psw;
-	psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+	psw.addr = (unsigned long) s390_base_pgm_handler;
 	S390_lowcore.program_new_psw = psw;
 	s390_base_pgm_handler_fn = early_pgm_check_handler;
 }
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 0a5a6b661b93..f20abdb5630a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2057,12 +2057,12 @@ void s390_reset_system(void)
 	/* Set new machine check handler */
 	S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
 	S390_lowcore.mcck_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
+		(unsigned long) s390_base_mcck_handler;
 
 	/* Set new program check handler */
 	S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
 	S390_lowcore.program_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+		(unsigned long) s390_base_pgm_handler;
 
 	/*
 	 * Clear subchannel ID and number to signal new kernel that no CCW or
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 389db56a2208..66490289b028 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -226,7 +226,7 @@ static void enable_singlestep(struct kprobe_ctlblk *kcb,
 	__ctl_load(per_kprobe, 9, 11);
 	regs->psw.mask |= PSW_MASK_PER;
 	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
-	regs->psw.addr = ip | PSW_ADDR_AMODE;
+	regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(enable_singlestep);
 
@@ -238,7 +238,7 @@ static void disable_singlestep(struct kprobe_ctlblk *kcb,
 	__ctl_load(kcb->kprobe_saved_ctl, 9, 11);
 	regs->psw.mask &= ~PSW_MASK_PER;
 	regs->psw.mask |= kcb->kprobe_saved_imask;
-	regs->psw.addr = ip | PSW_ADDR_AMODE;
+	regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(disable_singlestep);
 
@@ -460,7 +460,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 			break;
 	}
 
-	regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
+	regs->psw.addr = orig_ret_address;
 
 	pop_kprobe(get_kprobe_ctlblk());
 	kretprobe_hash_unlock(current, &flags);
@@ -607,7 +607,7 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
 		 */
 		entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
 		if (entry) {
-			regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
+			regs->psw.addr = extable_fixup(entry);
 			return 1;
 		}
 
@@ -683,7 +683,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
 
 	/* setup return addr to the jprobe handler routine */
-	regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
+	regs->psw.addr = (unsigned long) jp->entry;
 	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
 
 	/* r15 is the stack pointer */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 114ee8b96f17..4a41eb7d66cc 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -154,7 +154,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 		memset(&frame->childregs, 0, sizeof(struct pt_regs));
 		frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
 				PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-		frame->childregs.psw.addr = PSW_ADDR_AMODE |
+		frame->childregs.psw.addr =
 				(unsigned long) kernel_thread_starter;
 		frame->childregs.gprs[9] = new_stackp; /* function */
 		frame->childregs.gprs[10] = arg;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c6878fbbcf13..9220db5c996a 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -301,25 +301,21 @@ static void __init setup_lowcore(void)
 	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096);
 	lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
 	lc->restart_psw.mask = PSW_KERNEL_BITS;
-	lc->restart_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
+	lc->restart_psw.addr = (unsigned long) restart_int_handler;
 	lc->external_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_MCHECK;
-	lc->external_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
+	lc->external_new_psw.addr = (unsigned long) ext_int_handler;
 	lc->svc_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-	lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
+	lc->svc_new_psw.addr = (unsigned long) system_call;
 	lc->program_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_MCHECK;
-	lc->program_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
+	lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
 	lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
-	lc->mcck_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
+	lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
 	lc->io_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_MCHECK;
-	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
+	lc->io_new_psw.addr = (unsigned long) io_int_handler;
 	lc->clock_comparator = -1ULL;
 	lc->kernel_stack = ((unsigned long) &init_thread_union)
 		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 028cc46cb82a..d82562cf0a0e 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -331,13 +331,13 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ka->sa.sa_flags & SA_RESTORER) {
-		restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
+		restorer = (unsigned long) ka->sa.sa_restorer;
 	} else {
 		/* Signal frame without vector registers are short ! */
 		__u16 __user *svc = (void __user *) frame + frame_size - 2;
 		if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
 			return -EFAULT;
-		restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+		restorer = (unsigned long) svc;
 	}
 
 	/* Set up registers for signal handler */
@@ -347,7 +347,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
 		(PSW_USER_BITS & PSW_MASK_ASC) |
 		(regs->psw.mask & ~PSW_MASK_ASC);
-	regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+	regs->psw.addr = (unsigned long) ka->sa.sa_handler;
 
 	regs->gprs[2] = sig;
 	regs->gprs[3] = (unsigned long) &frame->sc;
@@ -394,13 +394,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
-		restorer = (unsigned long)
-			ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE;
+		restorer = (unsigned long) ksig->ka.sa.sa_restorer;
 	} else {
 		__u16 __user *svc = &frame->svc_insn;
 		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
 			return -EFAULT;
-		restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+		restorer = (unsigned long) svc;
 	}
 
 	/* Create siginfo on the signal stack */
@@ -426,7 +425,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
 		(PSW_USER_BITS & PSW_MASK_ASC) |
 		(regs->psw.mask & ~PSW_MASK_ASC);
-	regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE;
+	regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler;
 
 	regs->gprs[2] = ksig->sig;
 	regs->gprs[3] = (unsigned long) &frame->info;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index d69d648759c9..6a49b7d90e0d 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -71,7 +71,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
                 const struct exception_table_entry *fixup;
                 fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
                 if (fixup)
-			regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+			regs->psw.addr = extable_fixup(fixup);
 		else {
 			enum bug_trap_type btt;
 
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 1b903f6ad54a..95ed8d48f54e 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -258,7 +258,7 @@ static noinline void do_no_context(struct pt_regs *regs)
 	/* Are we prepared to handle this kernel fault?  */
 	fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
 	if (fixup) {
-		regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+		regs->psw.addr = extable_fixup(fixup);
 		return;
 	}
 

From 9cb1ccecb69d133e014b7be4de2609f689398c07 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 18 Jan 2016 13:12:19 +0100
Subject: [PATCH 054/262] s390: remove all usages of PSW_ADDR_INSN

Yet another leftover from the 31 bit era. The usual operation
"y = x & PSW_ADDR_INSN" with the PSW_ADDR_INSN mask is a nop for
CONFIG_64BIT.

Therefore remove all usages and hope the code is a bit less confusing.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
---
 arch/s390/include/asm/ptrace.h |  4 ++--
 arch/s390/kernel/debug.c       |  2 +-
 arch/s390/kernel/dumpstack.c   |  9 ++++-----
 arch/s390/kernel/early.c       |  2 +-
 arch/s390/kernel/ftrace.c      |  2 +-
 arch/s390/kernel/kprobes.c     |  6 +++---
 arch/s390/kernel/perf_event.c  | 12 +++++-------
 arch/s390/kernel/process.c     | 10 +++++-----
 arch/s390/kernel/ptrace.c      |  6 ++----
 arch/s390/kernel/stacktrace.c  | 11 +++++------
 arch/s390/kernel/traps.c       |  9 ++++-----
 arch/s390/kvm/guestdbg.c       |  4 ++--
 arch/s390/mm/fault.c           |  4 ++--
 arch/s390/oprofile/backtrace.c |  9 ++++-----
 14 files changed, 41 insertions(+), 49 deletions(-)

diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 7423fd05cb94..99bc456cc26a 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -149,7 +149,7 @@ static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
 #define arch_has_block_step()	(1)
 
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
-#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define instruction_pointer(regs) ((regs)->psw.addr)
 #define user_stack_pointer(regs)((regs)->gprs[15])
 #define profile_pc(regs) instruction_pointer(regs)
 
@@ -171,7 +171,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
 
 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
 {
-	return regs->gprs[15] & PSW_ADDR_INSN;
+	return regs->gprs[15];
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 6fca0e46464e..c890a5589e59 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1470,7 +1470,7 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
 		except_str = "*";
 	else
 		except_str = "-";
-	caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
+	caller = (unsigned long) entry->caller;
 	rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p  ",
 		      area, (long long)time_spec.tv_sec,
 		      time_spec.tv_nsec / 1000, level, except_str,
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index dc8e20473484..02bd02ff648b 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -34,22 +34,21 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
 	unsigned long addr;
 
 	while (1) {
-		sp = sp & PSW_ADDR_INSN;
 		if (sp < low || sp > high - sizeof(*sf))
 			return sp;
 		sf = (struct stack_frame *) sp;
-		addr = sf->gprs[8] & PSW_ADDR_INSN;
+		addr = sf->gprs[8];
 		printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
 		/* Follow the backchain. */
 		while (1) {
 			low = sp;
-			sp = sf->back_chain & PSW_ADDR_INSN;
+			sp = sf->back_chain;
 			if (!sp)
 				break;
 			if (sp <= low || sp > high - sizeof(*sf))
 				return sp;
 			sf = (struct stack_frame *) sp;
-			addr = sf->gprs[8] & PSW_ADDR_INSN;
+			addr = sf->gprs[8];
 			printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
 		}
 		/* Zero backchain detected, check for interrupt frame. */
@@ -57,7 +56,7 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
 		if (sp <= low || sp > high - sizeof(*regs))
 			return sp;
 		regs = (struct pt_regs *) sp;
-		addr = regs->psw.addr & PSW_ADDR_INSN;
+		addr = regs->psw.addr;
 		printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
 		low = sp;
 		sp = regs->gprs[15];
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 43ea2feb85d4..c55576bbaa1f 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -252,7 +252,7 @@ static void early_pgm_check_handler(void)
 	unsigned long addr;
 
 	addr = S390_lowcore.program_old_psw.addr;
-	fixup = search_exception_tables(addr & PSW_ADDR_INSN);
+	fixup = search_exception_tables(addr);
 	if (!fixup)
 		disabled_wait(0);
 	/* Disable low address protection before storing into lowcore. */
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index e0eaf11134b4..0f7bfeba6da6 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -203,7 +203,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
 		goto out;
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		goto out;
-	ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
+	ip -= MCOUNT_INSN_SIZE;
 	trace.func = ip;
 	trace.depth = current->curr_ret_stack + 1;
 	/* Only trace if the calling function expects to. */
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 66490289b028..250f5972536a 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -310,7 +310,7 @@ static int kprobe_handler(struct pt_regs *regs)
 	 */
 	preempt_disable();
 	kcb = get_kprobe_ctlblk();
-	p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
+	p = get_kprobe((void *)(regs->psw.addr - 2));
 
 	if (p) {
 		if (kprobe_running()) {
@@ -490,7 +490,7 @@ NOKPROBE_SYMBOL(trampoline_probe_handler);
 static void resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-	unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
+	unsigned long ip = regs->psw.addr;
 	int fixup = probe_get_fixup_type(p->ainsn.insn);
 
 	/* Check if the kprobes location is an enabled ftrace caller */
@@ -605,7 +605,7 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
 		 */
-		entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+		entry = search_exception_tables(regs->psw.addr);
 		if (entry) {
 			regs->psw.addr = extable_fixup(entry);
 			return 1;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 61595c1f0a0f..cfcba2dd9bb5 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -74,7 +74,7 @@ static unsigned long guest_is_user_mode(struct pt_regs *regs)
 
 static unsigned long instruction_pointer_guest(struct pt_regs *regs)
 {
-	return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+	return sie_block(regs)->gpsw.addr;
 }
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
@@ -231,29 +231,27 @@ static unsigned long __store_trace(struct perf_callchain_entry *entry,
 	struct pt_regs *regs;
 
 	while (1) {
-		sp = sp & PSW_ADDR_INSN;
 		if (sp < low || sp > high - sizeof(*sf))
 			return sp;
 		sf = (struct stack_frame *) sp;
-		perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+		perf_callchain_store(entry, sf->gprs[8]);
 		/* Follow the backchain. */
 		while (1) {
 			low = sp;
-			sp = sf->back_chain & PSW_ADDR_INSN;
+			sp = sf->back_chain;
 			if (!sp)
 				break;
 			if (sp <= low || sp > high - sizeof(*sf))
 				return sp;
 			sf = (struct stack_frame *) sp;
-			perf_callchain_store(entry,
-					     sf->gprs[8] & PSW_ADDR_INSN);
+			perf_callchain_store(entry, sf->gprs[8]);
 		}
 		/* Zero backchain detected, check for interrupt frame. */
 		sp = (unsigned long) (sf + 1);
 		if (sp <= low || sp > high - sizeof(*regs))
 			return sp;
 		regs = (struct pt_regs *) sp;
-		perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+		perf_callchain_store(entry, sf->gprs[8]);
 		low = sp;
 		sp = regs->gprs[15];
 	}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 4a41eb7d66cc..2bba7df4ac51 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -56,10 +56,10 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 		return 0;
 	low = task_stack_page(tsk);
 	high = (struct stack_frame *) task_pt_regs(tsk);
-	sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN);
+	sf = (struct stack_frame *) tsk->thread.ksp;
 	if (sf <= low || sf > high)
 		return 0;
-	sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+	sf = (struct stack_frame *) sf->back_chain;
 	if (sf <= low || sf > high)
 		return 0;
 	return sf->gprs[8];
@@ -220,14 +220,14 @@ unsigned long get_wchan(struct task_struct *p)
 		return 0;
 	low = task_stack_page(p);
 	high = (struct stack_frame *) task_pt_regs(p);
-	sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN);
+	sf = (struct stack_frame *) p->thread.ksp;
 	if (sf <= low || sf > high)
 		return 0;
 	for (count = 0; count < 16; count++) {
-		sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+		sf = (struct stack_frame *) sf->back_chain;
 		if (sf <= low || sf > high)
 			return 0;
-		return_address = sf->gprs[8] & PSW_ADDR_INSN;
+		return_address = sf->gprs[8];
 		if (!in_sched_functions(return_address))
 			return return_address;
 	}
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 01c37b36caf9..49b1c13bf6c9 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -84,7 +84,7 @@ void update_cr_regs(struct task_struct *task)
 		if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
 			new.control |= PER_EVENT_IFETCH;
 		new.start = 0;
-		new.end = PSW_ADDR_INSN;
+		new.end = -1UL;
 	}
 
 	/* Take care of the PER enablement bit in the PSW. */
@@ -148,7 +148,7 @@ static inline unsigned long __peek_user_per(struct task_struct *child,
 	else if (addr == (addr_t) &dummy->cr11)
 		/* End address of the active per set. */
 		return test_thread_flag(TIF_SINGLE_STEP) ?
-			PSW_ADDR_INSN : child->thread.per_user.end;
+			-1UL : child->thread.per_user.end;
 	else if (addr == (addr_t) &dummy->bits)
 		/* Single-step bit. */
 		return test_thread_flag(TIF_SINGLE_STEP) ?
@@ -495,8 +495,6 @@ long arch_ptrace(struct task_struct *child, long request,
 		}
 		return 0;
 	default:
-		/* Removing high order bit from addr (only for 31 bit). */
-		addr &= PSW_ADDR_INSN;
 		return ptrace_request(child, request, addr, data);
 	}
 }
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 1785cd82253c..5acba3cb7220 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -21,12 +21,11 @@ static unsigned long save_context_stack(struct stack_trace *trace,
 	unsigned long addr;
 
 	while(1) {
-		sp &= PSW_ADDR_INSN;
 		if (sp < low || sp > high)
 			return sp;
 		sf = (struct stack_frame *)sp;
 		while(1) {
-			addr = sf->gprs[8] & PSW_ADDR_INSN;
+			addr = sf->gprs[8];
 			if (!trace->skip)
 				trace->entries[trace->nr_entries++] = addr;
 			else
@@ -34,7 +33,7 @@ static unsigned long save_context_stack(struct stack_trace *trace,
 			if (trace->nr_entries >= trace->max_entries)
 				return sp;
 			low = sp;
-			sp = sf->back_chain & PSW_ADDR_INSN;
+			sp = sf->back_chain;
 			if (!sp)
 				break;
 			if (sp <= low || sp > high - sizeof(*sf))
@@ -46,7 +45,7 @@ static unsigned long save_context_stack(struct stack_trace *trace,
 		if (sp <= low || sp > high - sizeof(*regs))
 			return sp;
 		regs = (struct pt_regs *)sp;
-		addr = regs->psw.addr & PSW_ADDR_INSN;
+		addr = regs->psw.addr;
 		if (savesched || !in_sched_functions(addr)) {
 			if (!trace->skip)
 				trace->entries[trace->nr_entries++] = addr;
@@ -65,7 +64,7 @@ void save_stack_trace(struct stack_trace *trace)
 	register unsigned long sp asm ("15");
 	unsigned long orig_sp, new_sp;
 
-	orig_sp = sp & PSW_ADDR_INSN;
+	orig_sp = sp;
 	new_sp = save_context_stack(trace, orig_sp,
 				    S390_lowcore.panic_stack - PAGE_SIZE,
 				    S390_lowcore.panic_stack, 1);
@@ -86,7 +85,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
 	unsigned long sp, low, high;
 
-	sp = tsk->thread.ksp & PSW_ADDR_INSN;
+	sp = tsk->thread.ksp;
 	low = (unsigned long) task_stack_page(tsk);
 	high = (unsigned long) task_pt_regs(tsk);
 	save_context_stack(trace, sp, low, high, 0);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 6a49b7d90e0d..017eb03daee2 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -32,8 +32,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
 		address = *(unsigned long *)(current->thread.trap_tdb + 24);
 	else
 		address = regs->psw.addr;
-	return (void __user *)
-		((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
+	return (void __user *) (address - (regs->int_code >> 16));
 }
 
 static inline void report_user_fault(struct pt_regs *regs, int signr)
@@ -46,7 +45,7 @@ static inline void report_user_fault(struct pt_regs *regs, int signr)
 		return;
 	printk("User process fault: interruption code %04x ilc:%d ",
 	       regs->int_code & 0xffff, regs->int_code >> 17);
-	print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
+	print_vma_addr("in ", regs->psw.addr);
 	printk("\n");
 	show_regs(regs);
 }
@@ -69,13 +68,13 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
 		report_user_fault(regs, si_signo);
         } else {
                 const struct exception_table_entry *fixup;
-                fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+		fixup = search_exception_tables(regs->psw.addr);
                 if (fixup)
 			regs->psw.addr = extable_fixup(fixup);
 		else {
 			enum bug_trap_type btt;
 
-			btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
+			btt = report_bug(regs->psw.addr, regs);
 			if (btt == BUG_TRAP_TYPE_WARN)
 				return;
 			die(regs, str);
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 47518a324d75..d697312ce9ee 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -116,7 +116,7 @@ static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
 	if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
 		*cr9 &= ~PER_CONTROL_ALTERATION;
 		*cr10 = 0;
-		*cr11 = PSW_ADDR_INSN;
+		*cr11 = -1UL;
 	} else {
 		*cr9 &= ~PER_CONTROL_ALTERATION;
 		*cr9 |= PER_EVENT_STORE;
@@ -159,7 +159,7 @@ void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
 		vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
 		vcpu->arch.sie_block->gcr[10] = 0;
-		vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+		vcpu->arch.sie_block->gcr[11] = -1UL;
 	}
 
 	if (guestdbg_hw_bp_enabled(vcpu)) {
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 95ed8d48f54e..791a4146052c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -228,7 +228,7 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
 		return;
 	printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
 	       regs->int_code & 0xffff, regs->int_code >> 17);
-	print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
+	print_vma_addr(KERN_CONT "in ", regs->psw.addr);
 	printk(KERN_CONT "\n");
 	printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
 	       regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
@@ -256,7 +256,7 @@ static noinline void do_no_context(struct pt_regs *regs)
 	const struct exception_table_entry *fixup;
 
 	/* Are we prepared to handle this kernel fault?  */
-	fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+	fixup = search_exception_tables(regs->psw.addr);
 	if (fixup) {
 		regs->psw.addr = extable_fixup(fixup);
 		return;
diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c
index 8a6811b2cdb9..fe0bfe370c45 100644
--- a/arch/s390/oprofile/backtrace.c
+++ b/arch/s390/oprofile/backtrace.c
@@ -16,24 +16,23 @@ __show_trace(unsigned int *depth, unsigned long sp,
 	struct pt_regs *regs;
 
 	while (*depth) {
-		sp = sp & PSW_ADDR_INSN;
 		if (sp < low || sp > high - sizeof(*sf))
 			return sp;
 		sf = (struct stack_frame *) sp;
 		(*depth)--;
-		oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+		oprofile_add_trace(sf->gprs[8]);
 
 		/* Follow the backchain.  */
 		while (*depth) {
 			low = sp;
-			sp = sf->back_chain & PSW_ADDR_INSN;
+			sp = sf->back_chain;
 			if (!sp)
 				break;
 			if (sp <= low || sp > high - sizeof(*sf))
 				return sp;
 			sf = (struct stack_frame *) sp;
 			(*depth)--;
-			oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+			oprofile_add_trace(sf->gprs[8]);
 
 		}
 
@@ -46,7 +45,7 @@ __show_trace(unsigned int *depth, unsigned long sp,
 			return sp;
 		regs = (struct pt_regs *) sp;
 		(*depth)--;
-		oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+		oprofile_add_trace(sf->gprs[8]);
 		low = sp;
 		sp = regs->gprs[15];
 	}

From 9571e1d84042f5670df9fabdcbe7dd5da3abe43e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Tue, 19 Jan 2016 17:59:46 +0900
Subject: [PATCH 055/262] drm/amdgpu: Use drm_calloc_large for VM page_tables
 array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It can be big, depending on the VM address space size, which is tunable
via the vm_size module parameter.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93721
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index aefc668e6b5d..9599f7559b3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1282,7 +1282,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT * 8);
-	unsigned pd_size, pd_entries, pts_size;
+	unsigned pd_size, pd_entries;
 	int i, r;
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -1300,8 +1300,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	pd_entries = amdgpu_vm_num_pdes(adev);
 
 	/* allocate page table array */
-	pts_size = pd_entries * sizeof(struct amdgpu_vm_pt);
-	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+	vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
 	if (vm->page_tables == NULL) {
 		DRM_ERROR("Cannot allocate memory for page table array\n");
 		return -ENOMEM;
@@ -1361,7 +1360,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
 		amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
-	kfree(vm->page_tables);
+	drm_free_large(vm->page_tables);
 
 	amdgpu_bo_unref(&vm->page_directory);
 	fence_put(vm->page_directory_fence);

From 3466904d38ff1e63f0a19cb31166db67f2d05c61 Mon Sep 17 00:00:00 2001
From: Jordan Lazare <Jordan.Lazare@amd.com>
Date: Mon, 18 Jan 2016 17:00:03 -0500
Subject: [PATCH 056/262] drm/amdgpu: Allow the driver to load if
 amdgpu.powerplay=1 on asics without powerplay support

Avoid setting pp_enabled if there is no powerplay implementation.

Signed-off-by: Jordan Lazare <Jordan.Lazare@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 25 +++++++++++++------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 5ee9a0690278..b9d0d55f6b47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -99,13 +99,24 @@ static int amdgpu_pp_early_init(void *handle)
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
 	switch (adev->asic_type) {
-		case CHIP_TONGA:
-		case CHIP_FIJI:
-			adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-			break;
-		default:
-			adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-			break;
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+		adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true;
+		break;
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+		adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
+		break;
+	/* These chips don't have powerplay implemenations */
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+	case CHIP_KAVERI:
+	case CHIP_TOPAZ:
+	default:
+		adev->pp_enabled = false;
+		break;
 	}
 #else
 	adev->pp_enabled = false;

From 8483d152db61c5baf5452b844ef65b96ee9a6cfb Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 21 Dec 2015 16:11:44 -0800
Subject: [PATCH 057/262] drm/vc4: Remove broken attempt at GPU reset using
 genpd.

I've tested and confirmed that it doesn't actually work.  We'll need
to sort out how to do this properly later, but for now just remove it
since it also caused build breakage due to using CONFIG_PM_SLEEP
functions without our Kconfig depending on PM_SLEEP.

Signed-off-by: Eric Anholt <eric@anholt.net>
Acked-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
---
 drivers/gpu/drm/vc4/vc4_v3d.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 424d515ffcda..314ff71db978 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -144,19 +144,16 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
 }
 #endif /* CONFIG_DEBUG_FS */
 
-/*
- * Asks the firmware to turn on power to the V3D engine.
- *
- * This may be doable with just the clocks interface, though this
- * packet does some other register setup from the firmware, too.
- */
 int
 vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
 {
-	if (on)
-		return pm_generic_poweroff(&vc4->v3d->pdev->dev);
-	else
-		return pm_generic_resume(&vc4->v3d->pdev->dev);
+	/* XXX: This interface is needed for GPU reset, and the way to
+	 * do it is to turn our power domain off and back on.  We
+	 * can't just reset from within the driver, because the reset
+	 * bits are in the power domain's register area, and get set
+	 * during the poweron process.
+	 */
+	return 0;
 }
 
 static void vc4_v3d_init_hw(struct drm_device *dev)

From c240906d36653944d5c049df7ce667a7e8bea6ac Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Tue, 19 Jan 2016 10:46:58 +0000
Subject: [PATCH 058/262] drm/atomic-helper: Export framebuffer_changed()

The Rockchip driver cannot use drm_atomic_helper_wait_for_vblanks()
because it has hardware counters for neither vblanks nor scanlines.

In order to simplify re-implementing the functionality for this driver,
export the framebuffer_changed() helper so it can be reused.

Signed-off-by: John Keeping <john@metanate.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_atomic_helper.c | 24 ++++++++++++++++++++----
 include/drm/drm_atomic_helper.h     |  4 ++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 57cccd68ca52..7c523060a076 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -946,9 +946,23 @@ static void wait_for_fences(struct drm_device *dev,
 	}
 }
 
-static bool framebuffer_changed(struct drm_device *dev,
-				struct drm_atomic_state *old_state,
-				struct drm_crtc *crtc)
+/**
+ * drm_atomic_helper_framebuffer_changed - check if framebuffer has changed
+ * @dev: DRM device
+ * @old_state: atomic state object with old state structures
+ * @crtc: DRM crtc
+ *
+ * Checks whether the framebuffer used for this CRTC changes as a result of
+ * the atomic update.  This is useful for drivers which cannot use
+ * drm_atomic_helper_wait_for_vblanks() and need to reimplement its
+ * functionality.
+ *
+ * Returns:
+ * true if the framebuffer changed.
+ */
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+					   struct drm_atomic_state *old_state,
+					   struct drm_crtc *crtc)
 {
 	struct drm_plane *plane;
 	struct drm_plane_state *old_plane_state;
@@ -965,6 +979,7 @@ static bool framebuffer_changed(struct drm_device *dev,
 
 	return false;
 }
+EXPORT_SYMBOL(drm_atomic_helper_framebuffer_changed);
 
 /**
  * drm_atomic_helper_wait_for_vblanks - wait for vblank on crtcs
@@ -999,7 +1014,8 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
 		if (old_state->legacy_cursor_update)
 			continue;
 
-		if (!framebuffer_changed(dev, old_state, crtc))
+		if (!drm_atomic_helper_framebuffer_changed(dev,
+				old_state, crtc))
 			continue;
 
 		ret = drm_crtc_vblank_get(crtc);
diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index 89d008dc08e2..fe5efada9d68 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -42,6 +42,10 @@ int drm_atomic_helper_commit(struct drm_device *dev,
 			     struct drm_atomic_state *state,
 			     bool async);
 
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+					   struct drm_atomic_state *old_state,
+					   struct drm_crtc *crtc);
+
 void drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
 					struct drm_atomic_state *old_state);
 

From f2227f469782e55765deacb8ebcc7ec05fe04013 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Tue, 19 Jan 2016 10:46:59 +0000
Subject: [PATCH 059/262] drm/rockchip: don't wait for vblank if fb hasn't
 changed

As commented in drm_atomic_helper_wait_for_vblanks(), userspace relies
on cursor ioctls being unsynced.  Converting the rockchip driver to
atomic has significantly impacted cursor performance by making every
cursor update wait for vblank.

By skipping the vblank sync when the framebuffer has not changed (as is
done in drm_atomic_helper_wait_for_vblanks()) we can avoid this for the
common case of moving the cursor and only need to delay the cursor ioctl
when the cursor icon changes.

We cannot add the check on legacy_cursor_update since that results in
the cursor bo being unreferenced while the hardware may still be reading
it.  Fully supporting unsynced cursor updates is left for the future
when the atomic helper framework supports async updates.

Signed-off-by: John Keeping <john@metanate.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
---
 drivers/gpu/drm/rockchip/rockchip_drm_fb.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index dd1f8d3c98d8..87c77c4b5e50 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -177,7 +177,7 @@ static void rockchip_crtc_wait_for_update(struct drm_crtc *crtc)
 }
 
 static void
-rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
+rockchip_atomic_wait_for_complete(struct drm_device *dev, struct drm_atomic_state *old_state)
 {
 	struct drm_crtc_state *old_crtc_state;
 	struct drm_crtc *crtc;
@@ -193,6 +193,10 @@ rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
 		if (!crtc->state->active)
 			continue;
 
+		if (!drm_atomic_helper_framebuffer_changed(dev,
+				old_state, crtc))
+			continue;
+
 		ret = drm_crtc_vblank_get(crtc);
 		if (ret != 0)
 			continue;
@@ -240,7 +244,7 @@ rockchip_atomic_commit_complete(struct rockchip_atomic_commit *commit)
 
 	drm_atomic_helper_commit_planes(dev, state, true);
 
-	rockchip_atomic_wait_for_complete(state);
+	rockchip_atomic_wait_for_complete(dev, state);
 
 	drm_atomic_helper_cleanup_planes(dev, state);
 

From c9ad1d9946e849ac3d8821d91e136d7fd728dec5 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Tue, 19 Jan 2016 10:47:00 +0000
Subject: [PATCH 060/262] drm/rockchip: explain why we can't wait_for_vblanks

Signed-off-by: John Keeping <john@metanate.com>
---
 drivers/gpu/drm/rockchip/rockchip_drm_fb.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index 87c77c4b5e50..3b8f652698f8 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -176,6 +176,21 @@ static void rockchip_crtc_wait_for_update(struct drm_crtc *crtc)
 		crtc_funcs->wait_for_update(crtc);
 }
 
+/*
+ * We can't use drm_atomic_helper_wait_for_vblanks() because rk3288 and rk3066
+ * have hardware counters for neither vblanks nor scanlines, which results in
+ * a race where:
+ *				| <-- HW vsync irq and reg take effect
+ *	       plane_commit --> |
+ *	get_vblank and wait --> |
+ *				| <-- handle_vblank, vblank->count + 1
+ *		 cleanup_fb --> |
+ *		iommu crash --> |
+ *				| <-- HW vsync irq and reg take effect
+ *
+ * This function is equivalent but uses rockchip_crtc_wait_for_update() instead
+ * of waiting for vblank_count to change.
+ */
 static void
 rockchip_atomic_wait_for_complete(struct drm_device *dev, struct drm_atomic_state *old_state)
 {

From ba2d084055fd3f67af120070f5620173efd867c8 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 20 Jan 2016 19:07:04 +0100
Subject: [PATCH 061/262] btrfs: sysfs: fix typo in compat_ro attribute
 definition

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/sysfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 9c09522125a6..72408e2c4ea8 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = {			     \
 #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
 	BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
 #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
-	BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
+	BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
 #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
 	BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
 

From 973abd30a14681543ea1edb80eb5ef04bf0bb021 Mon Sep 17 00:00:00 2001
From: Fengguang Wu <fengguang.wu@intel.com>
Date: Fri, 18 Dec 2015 15:51:59 +0800
Subject: [PATCH 062/262] MAINTAINERS: add git URL for swiotlb

CC: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 233f83464814..15f46cc466a9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10320,6 +10320,7 @@ F:	arch/x86/boot/video*
 SWIOTLB SUBSYSTEM
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 L:	linux-kernel@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git
 S:	Supported
 F:	lib/swiotlb.c
 F:	arch/*/kernel/pci-swiotlb.c

From 386744425e35e04984c6e741c7750fd6eef1a9df Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 1 Jul 2015 14:17:58 +0200
Subject: [PATCH 063/262] swiotlb: Make linux/swiotlb.h standalone includible

This header file uses the enum dma_data_direction and struct page types
without explicitly including the corresponding header files. This makes
it rely on the includer to have included the proper headers before.

To fix this, include linux/dma-direction.h and forward-declare struct
page. The swiotlb_free() function is also annotated __init, therefore
requires linux/init.h to be included as well.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/swiotlb.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index e7a018eaf3a2..017fced60242 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -1,10 +1,13 @@
 #ifndef __LINUX_SWIOTLB_H
 #define __LINUX_SWIOTLB_H
 
+#include <linux/dma-direction.h>
+#include <linux/init.h>
 #include <linux/types.h>
 
 struct device;
 struct dma_attrs;
+struct page;
 struct scatterlist;
 
 extern int swiotlb_force;

From fb7a6ae7554f15b5b0b7f31b02ed4ef40a8c2eb6 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 15 Jan 2016 14:39:08 +0100
Subject: [PATCH 064/262] hisi_sas: SCSI_HISI_SAS should depend on HAS_DMA

If NO_DMA=y:

    ERROR: "dma_map_sg" [drivers/scsi/hisi_sas/hisi_sas_v1_hw.ko] undefined!
    ERROR: "dma_pool_alloc" [drivers/scsi/hisi_sas/hisi_sas_v1_hw.ko] undefined!
    ERROR: "dma_unmap_sg" [drivers/scsi/hisi_sas/hisi_sas_v1_hw.ko] undefined!
    ERROR: "dma_unmap_sg" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined!
    ERROR: "dma_set_mask" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined!
    ERROR: "dma_map_sg" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined!
    ERROR: "dma_pool_destroy" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined!
    ERROR: "dma_free_coherent" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined!
    ERROR: "dma_pool_free" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined!
    ERROR: "dma_alloc_coherent" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined!
    ERROR: "dma_pool_alloc" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined!
    ERROR: "dma_supported" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined!
    ERROR: "dma_pool_create" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined!

Add a dependency on HAS_DMA to fix this.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/hisi_sas/Kconfig b/drivers/scsi/hisi_sas/Kconfig
index 37a0c7156087..2c05021636a5 100644
--- a/drivers/scsi/hisi_sas/Kconfig
+++ b/drivers/scsi/hisi_sas/Kconfig
@@ -1,5 +1,6 @@
 config SCSI_HISI_SAS
 	tristate "HiSilicon SAS"
+	depends on HAS_DMA
 	select SCSI_SAS_LIBSAS
 	select BLK_DEV_INTEGRITY
 	help

From c142ce0d3c6b070c248cef4bbe3a0f58cfd61f3f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 15 Jan 2016 14:39:09 +0100
Subject: [PATCH 065/262] hisi_sas: Restrict SCSI_HISI_SAS to arm64

The HiSilicon SAS HBA is available in HiSilicon arm64 SoCs only.
Restrict it to arm64, unless compile-testing.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/hisi_sas/Kconfig b/drivers/scsi/hisi_sas/Kconfig
index 2c05021636a5..b67661836c9f 100644
--- a/drivers/scsi/hisi_sas/Kconfig
+++ b/drivers/scsi/hisi_sas/Kconfig
@@ -1,6 +1,7 @@
 config SCSI_HISI_SAS
 	tristate "HiSilicon SAS"
 	depends on HAS_DMA
+	depends on ARM64 || COMPILE_TEST
 	select SCSI_SAS_LIBSAS
 	select BLK_DEV_INTEGRITY
 	help

From d0eb20a863ba7dc1d3f4b841639671f134560be2 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 20 Jan 2016 11:01:23 -0500
Subject: [PATCH 066/262] sd: Optimal I/O size is in bytes, not sectors

Commit ca369d51b3e1 ("block/sd: Fix device-imposed transfer length
limits") accidentally switched optimal I/O size reporting from bytes to
block layer sectors.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Fixes: ca369d51b3e1649be4a72addd6d6a168cfb3f537
Cc: stable@vger.kernel.org # 4.4+
Reviewed-by: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Reviewed-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
---
 drivers/scsi/sd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 4e08d1cd704d..ec163d08f6c3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2893,7 +2893,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
 	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
 	    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
 		rw_max = q->limits.io_opt =
-			logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+			sdkp->opt_xfer_blocks * sdp->sector_size;
 	else
 		rw_max = BLK_DEF_MAX_SECTORS;
 

From e3c4abdb3bc9b76bedd416ecc5c27633a2f8afed Mon Sep 17 00:00:00 2001
From: Mark Yao <mark.yao@rock-chips.com>
Date: Wed, 23 Sep 2015 12:34:34 +0800
Subject: [PATCH 067/262] drm/rockchip: fix wrong pitch/size using on gem

args->pitch and args->size may not be set by userspace, sometimes
userspace only malloc args and not memset args to zero, then
args->pitch and args->size is random, it is very danger to use
pitch/size on gem.

pitch's type is u32, and min_pitch's type is int, example,
pitch is 0xffffffff, then pitch < min_pitch return true, then gem will
alloc very very big bufffer, it would eat all the memory and cause kernel
crash.

Stop using pitch/size from args, calc them from other args.

Signed-off-by: Mark Yao <mark.yao@rock-chips.com>
---
 drivers/gpu/drm/rockchip/rockchip_drm_gem.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index d908321b94ce..18e07338c6e5 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -234,13 +234,8 @@ int rockchip_gem_dumb_create(struct drm_file *file_priv,
 	/*
 	 * align to 64 bytes since Mali requires it.
 	 */
-	min_pitch = ALIGN(min_pitch, 64);
-
-	if (args->pitch < min_pitch)
-		args->pitch = min_pitch;
-
-	if (args->size < args->pitch * args->height)
-		args->size = args->pitch * args->height;
+	args->pitch = ALIGN(min_pitch, 64);
+	args->size = args->pitch * args->height;
 
 	rk_obj = rockchip_gem_create_with_handle(file_priv, dev, args->size,
 						 &args->handle);

From d7f9ee60a6ebc263861a1d8c06acf100495201b7 Mon Sep 17 00:00:00 2001
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Date: Wed, 13 Jan 2016 22:20:22 +0530
Subject: [PATCH 068/262] powerpc: Wire up copy_file_range() syscall

Test runs on a ppc64 BE guest succeeded using modified fstests.

Also tested on ppc64 LE using a home made test - mpe.

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/systbl.h      | 1 +
 arch/powerpc/include/asm/unistd.h      | 2 +-
 arch/powerpc/include/uapi/asm/unistd.h | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 5654ece02c0d..3fa9df70aa20 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -383,3 +383,4 @@ SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYSCALL(mlock2)
+SYSCALL(copy_file_range)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 6a5ace5fa0c8..1f2594d45605 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls		379
+#define NR_syscalls		380
 
 #define __NR__exit __NR_exit
 
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 12a05652377a..940290d45b08 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -389,5 +389,6 @@
 #define __NR_userfaultfd	364
 #define __NR_membarrier		365
 #define __NR_mlock2		378
+#define __NR_copy_file_range	379
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */

From c153693d7eb9eeb28478aa2deaaf0b4e7b5ff5e9 Mon Sep 17 00:00:00 2001
From: Alan Modra <amodra@gmail.com>
Date: Fri, 15 Jan 2016 20:52:22 +1100
Subject: [PATCH 069/262] powerpc: Simplify module TOC handling

PowerPC64 uses the symbol .TOC. much as other targets use
_GLOBAL_OFFSET_TABLE_. It identifies the value of the GOT pointer (or in
powerpc parlance, the TOC pointer). Global offset tables are generally
local to an executable or shared library, or in the kernel, module. Thus
it does not make sense for a module to resolve a relocation against
.TOC. to the kernel's .TOC. value. A module has its own .TOC., and
indeed the powerpc64 module relocation processing ignores the kernel
value of .TOC. and instead calculates a module-local value.

This patch removes code involved in exporting the kernel .TOC., tweaks
modpost to ignore an undefined .TOC., and the module loader to twiddle
the section symbol so that .TOC. isn't seen as undefined.

Note that if the kernel was compiled with -msingle-pic-base then ELFv2
would not have function global entry code setting up r2. In that case
the module call stubs would need to be modified to set up r2 using the
kernel .TOC. value, requiring some of this code to be reinstated.

mpe: Furthermore a change in binutils master (not yet released) causes
the current way we handle the TOC to no longer work when building with
MODVERSIONS=y and RELOCATABLE=n. The symptom is that modules can not be
loaded due to there being no version found for TOC.

Cc: stable@vger.kernel.org # 3.16+
Signed-off-by: Alan Modra <amodra@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/misc_64.S   | 28 ----------------------------
 arch/powerpc/kernel/module_64.c | 12 +++++++++---
 scripts/mod/modpost.c           |  3 ++-
 3 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index db475d41b57a..f28754c497e5 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -701,31 +701,3 @@ _GLOBAL(kexec_sequence)
 	li	r5,0
 	blr	/* image->start(physid, image->start, 0); */
 #endif /* CONFIG_KEXEC */
-
-#ifdef CONFIG_MODULES
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-
-#ifdef CONFIG_MODVERSIONS
-.weak __crc_TOC.
-.section "___kcrctab+TOC.","a"
-.globl __kcrctab_TOC.
-__kcrctab_TOC.:
-	.llong	__crc_TOC.
-#endif
-
-/*
- * Export a fake .TOC. since both modpost and depmod will complain otherwise.
- * Both modpost and depmod strip the leading . so we do the same here.
- */
-.section "__ksymtab_strings","a"
-__kstrtab_TOC.:
-	.asciz "TOC."
-
-.section "___ksymtab+TOC.","a"
-/* This symbol name is important: it's used by modpost to find exported syms */
-.globl __ksymtab_TOC.
-__ksymtab_TOC.:
-	.llong 0 /* .value */
-	.llong __kstrtab_TOC.
-#endif /* ELFv2 */
-#endif /* MODULES */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 59663af9315f..ac64ffdb52c8 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -326,7 +326,10 @@ static void dedotify_versions(struct modversion_info *vers,
 		}
 }
 
-/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
+/*
+ * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
+ * seem to be defined (value set later).
+ */
 static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
 {
 	unsigned int i;
@@ -334,8 +337,11 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
 	for (i = 1; i < numsyms; i++) {
 		if (syms[i].st_shndx == SHN_UNDEF) {
 			char *name = strtab + syms[i].st_name;
-			if (name[0] == '.')
+			if (name[0] == '.') {
+				if (strcmp(name+1, "TOC.") == 0)
+					syms[i].st_shndx = SHN_ABS;
 				memmove(name, name+1, strlen(name));
+			}
 		}
 	}
 }
@@ -351,7 +357,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
 	numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
 
 	for (i = 1; i < numsyms; i++) {
-		if (syms[i].st_shndx == SHN_UNDEF
+		if (syms[i].st_shndx == SHN_ABS
 		    && strcmp(strtab + syms[i].st_name, "TOC.") == 0)
 			return &syms[i];
 	}
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index e080746e1a6b..48958d3cec9e 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -594,7 +594,8 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname)
 		if (strncmp(symname, "_restgpr0_", sizeof("_restgpr0_") - 1) == 0 ||
 		    strncmp(symname, "_savegpr0_", sizeof("_savegpr0_") - 1) == 0 ||
 		    strncmp(symname, "_restvr_", sizeof("_restvr_") - 1) == 0 ||
-		    strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0)
+		    strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0 ||
+		    strcmp(symname, ".TOC.") == 0)
 			return 1;
 	/* Do not ignore this symbol */
 	return 0;

From 0e2bce7411542fa336ef49041471ea3e7dc911f8 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 21 Jan 2016 13:05:20 +1100
Subject: [PATCH 070/262] powerpc: Remove newly added extra definition of
 pmd_dirty

Commit d5d6a443b243 ("arch/powerpc/include/asm/pgtable-ppc64.h:
add pmd_[dirty|mkclean] for THP") added a new identical definition
of pmd_dirty(). Remove it again.

Cc: Minchan Kim <minchan@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8204b0c393aa..8d1c41d28318 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -223,7 +223,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 #define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
-#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))

From 22c43f36b5cf22a7e4506cb3a53f3ad2a43f60f7 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 20 Jan 2016 22:13:41 +0200
Subject: [PATCH 071/262] x86/platform/quark: Print boundaries correctly

When we print values, such as @size, we have to understand that
it's derived from [begin .. end] as:

	size = end - begin + 1

On the opposite the @end is derived from the rest as:

	end = begin + size - 1

Correct the IMR code to print values correctly.

Note that @__end_rodata actually points to the next address
after the aligned .rodata section.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ong, Boon Leong <boon.leong.ong@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1453320821-64328-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-quark/imr.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index 0ee619f9fcb7..ad5ec6f784e8 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -228,11 +228,12 @@ static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
 		if (imr_is_enabled(&imr)) {
 			base = imr_to_phys(imr.addr_lo);
 			end = imr_to_phys(imr.addr_hi) + IMR_MASK;
+			size = end - base + 1;
 		} else {
 			base = 0;
 			end = 0;
+			size = 0;
 		}
-		size = end - base;
 		seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
 			   "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
 			   &base, &end, size, imr.rmask, imr.wmask,
@@ -587,6 +588,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
 {
 	phys_addr_t base = virt_to_phys(&_text);
 	size_t size = virt_to_phys(&__end_rodata) - base;
+	unsigned long start, end;
 	int i;
 	int ret;
 
@@ -594,18 +596,24 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
 	for (i = 0; i < idev->max_imr; i++)
 		imr_clear(i);
 
+	start = (unsigned long)_text;
+	end = (unsigned long)__end_rodata - 1;
+
 	/*
 	 * Setup a locked IMR around the physical extent of the kernel
 	 * from the beginning of the .text secton to the end of the
 	 * .rodata section as one physically contiguous block.
+	 *
+	 * We don't round up @size since it is already PAGE_SIZE aligned.
+	 * See vmlinux.lds.S for details.
 	 */
 	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
 	if (ret < 0) {
-		pr_err("unable to setup IMR for kernel: (%p - %p)\n",
-			&_text, &__end_rodata);
+		pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
+			size / 1024, start, end);
 	} else {
-		pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n",
-			size / 1024, &_text, &__end_rodata);
+		pr_info("protecting kernel .text - .rodata: %zu KiB (%lx - %lx)\n",
+			size / 1024, start, end);
 	}
 
 }

From 3b5bb73bd88d1d90163c91e7cad50b12725dbb1c Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 21 Jan 2016 18:36:46 +0100
Subject: [PATCH 072/262] btrfs: sysfs: add free-space-tree bit attribute

The incompat bit representing the newly added free space tree feature is
missing. Right now it will be listed only among features supported by
the module, not per-fs.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/sysfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index e0ac85949067..906f7ed6fc80 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
 BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
 BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
 
 static struct attribute *btrfs_supported_feature_attrs[] = {
 	BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
 	BTRFS_FEAT_ATTR_PTR(raid56),
 	BTRFS_FEAT_ATTR_PTR(skinny_metadata),
 	BTRFS_FEAT_ATTR_PTR(no_holes),
+	BTRFS_FEAT_ATTR_PTR(free_space_tree),
 	NULL
 };
 

From 444e75169872f668eb890f19ec1f32dfc632e704 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 21 Jan 2016 18:50:40 +0100
Subject: [PATCH 073/262] btrfs: sysfs: introduce helper for syncing bits with
 sysfs files

The files under /sys/fs/UUID/features get out of sync with the actual
incompat bits set for the filesystem if they change after mount. We're
going to sync them and need a helper to do that.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/sysfs.c | 30 ++++++++++++++++++++++++++++++
 fs/btrfs/sysfs.h |  3 +++
 2 files changed, 33 insertions(+)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 906f7ed6fc80..6986886243bf 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -782,6 +782,36 @@ failure:
 	return error;
 }
 
+
+/*
+ * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
+ * values in superblock. Call after any changes to incompat/compat_ro flags
+ */
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+		u64 bit, enum btrfs_feature_set set)
+{
+	struct btrfs_fs_devices *fs_devs;
+	struct kobject *fsid_kobj;
+	u64 features;
+	int ret;
+
+	if (!fs_info)
+		return;
+
+	features = get_features(fs_info, set);
+	ASSERT(bit & supported_feature_masks[set]);
+
+	fs_devs = fs_info->fs_devices;
+	fsid_kobj = &fs_devs->fsid_kobj;
+
+	/*
+	 * FIXME: this is too heavy to update just one value, ideally we'd like
+	 * to use sysfs_update_group but some refactoring is needed first.
+	 */
+	sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
+	ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+}
+
 static int btrfs_init_debugfs(void)
 {
 #ifdef CONFIG_DEBUG_FS
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 72408e2c4ea8..d7da1a4c2f6c 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
 				struct kobject *parent);
 int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
 void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+		u64 bit, enum btrfs_feature_set set);
+
 #endif /* _BTRFS_SYSFS_H_ */

From c994d6136738fd8b24a79f5ad8df40a6a79e2cf7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 Jan 2016 09:20:23 +0100
Subject: [PATCH 074/262] perf: Add lockdep assertions

Make various bugs easier to see.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index bf8244190d0f..c77b05d9a37d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1246,6 +1246,8 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+	lockdep_assert_held(&ctx->lock);
+
 	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
 	event->attach_state |= PERF_ATTACH_CONTEXT;
 
@@ -2342,8 +2344,10 @@ static void ctx_sched_out(struct perf_event_context *ctx,
 			  struct perf_cpu_context *cpuctx,
 			  enum event_type_t event_type)
 {
-	struct perf_event *event;
 	int is_active = ctx->is_active;
+	struct perf_event *event;
+
+	lockdep_assert_held(&ctx->lock);
 
 	ctx->is_active &= ~event_type;
 	if (likely(!ctx->nr_events))
@@ -2725,8 +2729,10 @@ ctx_sched_in(struct perf_event_context *ctx,
 	     enum event_type_t event_type,
 	     struct task_struct *task)
 {
-	u64 now;
 	int is_active = ctx->is_active;
+	u64 now;
+
+	lockdep_assert_held(&ctx->lock);
 
 	ctx->is_active |= event_type;
 	if (likely(!ctx->nr_events))

From 7e41d17753e6e0da55d343997454dd4fbe8d28a8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 Jan 2016 09:21:40 +0100
Subject: [PATCH 075/262] perf: Fix cgroup event scheduling

There appears to be a problem in __perf_event_task_sched_in() wrt
cgroup event scheduling.

The normal event scheduling order is:

	CPU pinned
	Task pinned
	CPU flexible
	Task flexible

And since perf_cgroup_sched*() only schedules the cpu context, we must
call this _before_ adding the task events.

Note: double check what happens on the ctx switch optimization where
the task ctx isn't scheduled.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index c77b05d9a37d..9d1195af819c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2806,6 +2806,16 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 	struct perf_event_context *ctx;
 	int ctxn;
 
+	/*
+	 * If cgroup events exist on this CPU, then we need to check if we have
+	 * to switch in PMU state; cgroup event are system-wide mode only.
+	 *
+	 * Since cgroup events are CPU events, we must schedule these in before
+	 * we schedule in the task events.
+	 */
+	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
+		perf_cgroup_sched_in(prev, task);
+
 	for_each_task_context_nr(ctxn) {
 		ctx = task->perf_event_ctxp[ctxn];
 		if (likely(!ctx))
@@ -2813,13 +2823,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 
 		perf_event_context_sched_in(ctx, task);
 	}
-	/*
-	 * if cgroup events exist on this CPU, then we need
-	 * to check if we have to switch in PMU state.
-	 * cgroup event are system-wide mode only
-	 */
-	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-		perf_cgroup_sched_in(prev, task);
 
 	if (atomic_read(&nr_switch_events))
 		perf_event_switch(task, prev, true);

From 70a0165752944e0be0b1de4a9020473079962c18 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 Jan 2016 09:29:16 +0100
Subject: [PATCH 076/262] perf: Fix cgroup scheduling in perf_enable_on_exec()

There is a comment that states that perf_event_context_sched_in() will
also switch in the cgroup events, I cannot find it does so. Therefore
all the resulting logic goes out the window too.

Clean that up.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 31 +++++++------------------------
 1 file changed, 7 insertions(+), 24 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9d1195af819c..e7bda0ed8d40 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -579,13 +579,7 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
 	 * we are holding the rcu lock
 	 */
 	cgrp1 = perf_cgroup_from_task(task, NULL);
-
-	/*
-	 * next is NULL when called from perf_event_enable_on_exec()
-	 * that will systematically cause a cgroup_switch()
-	 */
-	if (next)
-		cgrp2 = perf_cgroup_from_task(next, NULL);
+	cgrp2 = perf_cgroup_from_task(next, NULL);
 
 	/*
 	 * only schedule out current cgroup events if we know
@@ -611,8 +605,6 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
 	 * we are holding the rcu lock
 	 */
 	cgrp1 = perf_cgroup_from_task(task, NULL);
-
-	/* prev can never be NULL */
 	cgrp2 = perf_cgroup_from_task(prev, NULL);
 
 	/*
@@ -1450,11 +1442,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 
 	if (is_cgroup_event(event)) {
 		ctx->nr_cgroups--;
+		/*
+		 * Because cgroup events are always per-cpu events, this will
+		 * always be called from the right CPU.
+		 */
 		cpuctx = __get_cpu_context(ctx);
 		/*
-		 * if there are no more cgroup events
-		 * then cler cgrp to avoid stale pointer
-		 * in update_cgrp_time_from_cpuctx()
+		 * If there are no more cgroup events then clear cgrp to avoid
+		 * stale pointer in update_cgrp_time_from_cpuctx().
 		 */
 		if (!ctx->nr_cgroups)
 			cpuctx->cgrp = NULL;
@@ -3118,15 +3113,6 @@ static void perf_event_enable_on_exec(int ctxn)
 	if (!ctx || !ctx->nr_events)
 		goto out;
 
-	/*
-	 * We must ctxsw out cgroup events to avoid conflict
-	 * when invoking perf_task_event_sched_in() later on
-	 * in this function. Otherwise we end up trying to
-	 * ctxswin cgroup events which are already scheduled
-	 * in.
-	 */
-	perf_cgroup_sched_out(current, NULL);
-
 	raw_spin_lock(&ctx->lock);
 	task_ctx_sched_out(ctx);
 
@@ -3144,9 +3130,6 @@ static void perf_event_enable_on_exec(int ctxn)
 
 	raw_spin_unlock(&ctx->lock);
 
-	/*
-	 * Also calls ctxswin for cgroup events, if any:
-	 */
 	perf_event_context_sched_in(ctx, ctx->task);
 out:
 	local_irq_restore(flags);

From 5947f6576e2edee1189b00bf5b2a3f2c653caa6b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 Jan 2016 09:43:38 +0100
Subject: [PATCH 077/262] perf: Remove stale comment

The comment here is horribly out of date, remove it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index e7bda0ed8d40..751538ce19a7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2130,13 +2130,6 @@ static int  __perf_install_in_context(void *info)
 
 /*
  * Attach a performance event to a context
- *
- * First we add the event to the list with the hardware enable bit
- * in event->hw_config cleared.
- *
- * If the event is attached to a task which is on a CPU we use a smp
- * call to enable it in the task context. The task might have been
- * scheduled away, but we check this in the smp call again.
  */
 static void
 perf_install_in_context(struct perf_event_context *ctx,

From 3e349507d12de93b08b0aa814fc2aa0dee91c5ba Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 Jan 2016 10:01:18 +0100
Subject: [PATCH 078/262] perf: Fix perf_enable_on_exec() event scheduling

There are two problems with the current perf_enable_on_exec() event
scheduling:

  - the newly enabled events will be immediately scheduled
    irrespective of their ctx event list order.

  - there's a hole in the ctx->lock between scheduling the events
    out and putting them back on.

Esp. the latter issue is a real problem because a hole in event
scheduling leaves the thing in an observable inconsistent state,
confusing things.

Fix both issues by first doing the enable iteration and at the end,
when there are newly enabled events, reschedule the ctx in one go.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 47 +++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 751538ce19a7..0679e73f5f63 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2036,7 +2036,8 @@ static void add_event_to_ctx(struct perf_event *event,
 	event->tstamp_stopped = tstamp;
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx);
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+			       struct perf_event_context *ctx);
 static void
 ctx_sched_in(struct perf_event_context *ctx,
 	     struct perf_cpu_context *cpuctx,
@@ -2067,6 +2068,17 @@ static void ___perf_install_in_context(void *info)
 	add_event_to_ctx(event, ctx);
 }
 
+static void ctx_resched(struct perf_cpu_context *cpuctx,
+			struct perf_event_context *task_ctx)
+{
+	perf_pmu_disable(cpuctx->ctx.pmu);
+	if (task_ctx)
+		task_ctx_sched_out(cpuctx, task_ctx);
+	cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+	perf_event_sched_in(cpuctx, task_ctx, current);
+	perf_pmu_enable(cpuctx->ctx.pmu);
+}
+
 /*
  * Cross CPU call to install and enable a performance event
  *
@@ -2087,7 +2099,7 @@ static int  __perf_install_in_context(void *info)
 	 * If there was an active task_ctx schedule it out.
 	 */
 	if (task_ctx)
-		task_ctx_sched_out(task_ctx);
+		task_ctx_sched_out(cpuctx, task_ctx);
 
 	/*
 	 * If the context we're installing events in is not the
@@ -2629,10 +2641,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
 		perf_cgroup_sched_out(task, next);
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx)
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+			       struct perf_event_context *ctx)
 {
-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
 	if (!cpuctx->task_ctx)
 		return;
 
@@ -3096,34 +3107,30 @@ static int event_enable_on_exec(struct perf_event *event,
 static void perf_event_enable_on_exec(int ctxn)
 {
 	struct perf_event_context *ctx, *clone_ctx = NULL;
+	struct perf_cpu_context *cpuctx;
 	struct perf_event *event;
 	unsigned long flags;
 	int enabled = 0;
-	int ret;
 
 	local_irq_save(flags);
 	ctx = current->perf_event_ctxp[ctxn];
 	if (!ctx || !ctx->nr_events)
 		goto out;
 
-	raw_spin_lock(&ctx->lock);
-	task_ctx_sched_out(ctx);
-
-	list_for_each_entry(event, &ctx->event_list, event_entry) {
-		ret = event_enable_on_exec(event, ctx);
-		if (ret)
-			enabled = 1;
-	}
+	cpuctx = __get_cpu_context(ctx);
+	perf_ctx_lock(cpuctx, ctx);
+	list_for_each_entry(event, &ctx->event_list, event_entry)
+		enabled |= event_enable_on_exec(event, ctx);
 
 	/*
-	 * Unclone this context if we enabled any event.
+	 * Unclone and reschedule this context if we enabled any event.
 	 */
-	if (enabled)
+	if (enabled) {
 		clone_ctx = unclone_ctx(ctx);
+		ctx_resched(cpuctx, ctx);
+	}
+	perf_ctx_unlock(cpuctx, ctx);
 
-	raw_spin_unlock(&ctx->lock);
-
-	perf_event_context_sched_in(ctx, ctx->task);
 out:
 	local_irq_restore(flags);
 
@@ -8737,7 +8744,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 * incremented the context's refcount before we do put_ctx below.
 	 */
 	raw_spin_lock(&child_ctx->lock);
-	task_ctx_sched_out(child_ctx);
+	task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
 	child->perf_event_ctxp[ctxn] = NULL;
 
 	/*

From 8833d0e286c12fd4456089a7a553faf4921e4b08 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 Jan 2016 10:02:37 +0100
Subject: [PATCH 079/262] perf: Use task_ctx_sched_out()

We have a function that does exactly what we want here, use it. This
reduces the amount of cpuctx->task_ctx muckery.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0679e73f5f63..12f1d4a52da9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2545,8 +2545,7 @@ unlock:
 
 	if (do_switch) {
 		raw_spin_lock(&ctx->lock);
-		ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-		cpuctx->task_ctx = NULL;
+		task_ctx_sched_out(cpuctx, ctx);
 		raw_spin_unlock(&ctx->lock);
 	}
 }

From aee7dbc45f8aa976913de9b352fa6da816f1f3cd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 Jan 2016 10:45:11 +0100
Subject: [PATCH 080/262] perf: Simplify/fix perf_event_enable() event
 scheduling

Like perf_enable_on_exec(), perf_event_enable() event scheduling has problems
respecting the context hierarchy when trying to schedule events (for
example, it will try and add a pinned event without first removing
existing flexible events).

So simplify it by using the new ctx_resched() call which will DTRT.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 31 +++++--------------------------
 1 file changed, 5 insertions(+), 26 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 12f1d4a52da9..079eb9fcaaa8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2188,7 +2188,7 @@ static int __perf_event_enable(void *info)
 	struct perf_event_context *ctx = event->ctx;
 	struct perf_event *leader = event->group_leader;
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-	int err;
+	struct perf_event_context *task_ctx = cpuctx->task_ctx;
 
 	/*
 	 * There's a time window between 'ctx->is_active' check
@@ -2202,7 +2202,8 @@ static int __perf_event_enable(void *info)
 	if (!ctx->is_active)
 		return -EINVAL;
 
-	raw_spin_lock(&ctx->lock);
+	perf_ctx_lock(cpuctx, task_ctx);
+	WARN_ON_ONCE(&cpuctx->ctx != ctx && task_ctx != ctx);
 	update_context_time(ctx);
 
 	if (event->state >= PERF_EVENT_STATE_INACTIVE)
@@ -2228,32 +2229,10 @@ static int __perf_event_enable(void *info)
 	if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
 		goto unlock;
 
-	if (!group_can_go_on(event, cpuctx, 1)) {
-		err = -EEXIST;
-	} else {
-		if (event == leader)
-			err = group_sched_in(event, cpuctx, ctx);
-		else
-			err = event_sched_in(event, cpuctx, ctx);
-	}
-
-	if (err) {
-		/*
-		 * If this event can't go on and it's part of a
-		 * group, then the whole group has to come off.
-		 */
-		if (leader != event) {
-			group_sched_out(leader, cpuctx, ctx);
-			perf_mux_hrtimer_restart(cpuctx);
-		}
-		if (leader->attr.pinned) {
-			update_group_times(leader);
-			leader->state = PERF_EVENT_STATE_ERROR;
-		}
-	}
+	ctx_resched(cpuctx, task_ctx);
 
 unlock:
-	raw_spin_unlock(&ctx->lock);
+	perf_ctx_unlock(cpuctx, task_ctx);
 
 	return 0;
 }

From 25432ae96a9889774a05bf5f0f6fd8dbcdec5e72 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 Jan 2016 11:05:09 +0100
Subject: [PATCH 081/262] perf: Optimize perf_sched_events() usage

It doesn't make sense to take up-to _4_ references on
perf_sched_events() per event, avoid doing this.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 079eb9fcaaa8..935aefd16354 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3491,11 +3491,13 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
 
 static void unaccount_event(struct perf_event *event)
 {
+	bool dec = false;
+
 	if (event->parent)
 		return;
 
 	if (event->attach_state & PERF_ATTACH_TASK)
-		static_key_slow_dec_deferred(&perf_sched_events);
+		dec = true;
 	if (event->attr.mmap || event->attr.mmap_data)
 		atomic_dec(&nr_mmap_events);
 	if (event->attr.comm)
@@ -3505,12 +3507,15 @@ static void unaccount_event(struct perf_event *event)
 	if (event->attr.freq)
 		atomic_dec(&nr_freq_events);
 	if (event->attr.context_switch) {
-		static_key_slow_dec_deferred(&perf_sched_events);
+		dec = true;
 		atomic_dec(&nr_switch_events);
 	}
 	if (is_cgroup_event(event))
-		static_key_slow_dec_deferred(&perf_sched_events);
+		dec = true;
 	if (has_branch_stack(event))
+		dec = true;
+
+	if (dec)
 		static_key_slow_dec_deferred(&perf_sched_events);
 
 	unaccount_event_cpu(event, event->cpu);
@@ -7723,11 +7728,13 @@ static void account_event_cpu(struct perf_event *event, int cpu)
 
 static void account_event(struct perf_event *event)
 {
+	bool inc = false;
+
 	if (event->parent)
 		return;
 
 	if (event->attach_state & PERF_ATTACH_TASK)
-		static_key_slow_inc(&perf_sched_events.key);
+		inc = true;
 	if (event->attr.mmap || event->attr.mmap_data)
 		atomic_inc(&nr_mmap_events);
 	if (event->attr.comm)
@@ -7740,11 +7747,14 @@ static void account_event(struct perf_event *event)
 	}
 	if (event->attr.context_switch) {
 		atomic_inc(&nr_switch_events);
-		static_key_slow_inc(&perf_sched_events.key);
+		inc = true;
 	}
 	if (has_branch_stack(event))
-		static_key_slow_inc(&perf_sched_events.key);
+		inc = true;
 	if (is_cgroup_event(event))
+		inc = true;
+
+	if (inc)
 		static_key_slow_inc(&perf_sched_events.key);
 
 	account_event_cpu(event, event->cpu);

From 63e30d3e52d4d85854ce6c761ffc6ab55209a630 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 Jan 2016 11:39:10 +0100
Subject: [PATCH 082/262] perf: Make ctx->is_active and cpuctx->task_ctx
 consistent

For no apparent reason and to great confusion the rules for
ctx->is_active and cpuctx->task_ctx are different. This means that its
not always possible to find all active (task) contexts.

Fix this such that if ctx->is_active gets set, we also set (or verify)
cpuctx->task_ctx.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 935aefd16354..89b47050a2e8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2329,6 +2329,12 @@ static void ctx_sched_out(struct perf_event_context *ctx,
 	lockdep_assert_held(&ctx->lock);
 
 	ctx->is_active &= ~event_type;
+	if (ctx->task) {
+		WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+		if (!ctx->is_active)
+			cpuctx->task_ctx = NULL;
+	}
+
 	if (likely(!ctx->nr_events))
 		return;
 
@@ -2629,7 +2635,6 @@ static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
 		return;
 
 	ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-	cpuctx->task_ctx = NULL;
 }
 
 /*
@@ -2712,6 +2717,13 @@ ctx_sched_in(struct perf_event_context *ctx,
 	lockdep_assert_held(&ctx->lock);
 
 	ctx->is_active |= event_type;
+	if (ctx->task) {
+		if (!is_active)
+			cpuctx->task_ctx = ctx;
+		else
+			WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+	}
+
 	if (likely(!ctx->nr_events))
 		return;
 
@@ -2756,12 +2768,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
 	 * cpu flexible, task flexible.
 	 */
 	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-
-	if (ctx->nr_events)
-		cpuctx->task_ctx = ctx;
-
-	perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);
-
+	perf_event_sched_in(cpuctx, ctx, task);
 	perf_pmu_enable(ctx->pmu);
 	perf_ctx_unlock(cpuctx, ctx);
 }

From 39a4364076921511e212bc42f94fbf062c989576 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 11 Jan 2016 12:46:35 +0100
Subject: [PATCH 083/262] perf: Fix task context scheduling

There is a very nasty problem wrt disabling the perf task scheduling
hooks.

Currently we {set,clear} ctx->is_active on every
__perf_event_task_sched_{in,out}, _however_ this means that if we
disable these calls we'll have task contexts with ->is_active set that
are not active and 'active' task contexts without ->is_active set.

This can result in event_function_call() looping on the ctx->is_active
condition basically indefinitely.

Resolve this by changing things such that contexts without events do
not set ->is_active like we used to. From this invariant it trivially
follows that if there are no (task) events, every task ctx is inactive
and disabling the context switch hooks is harmless.

This leaves two places that need attention (and already had
accumulated weird and wonderful hacks to work around, without
recognising this actual problem).

Namely:

 - perf_install_in_context() will need to deal with installing events
   in an inactive context, meaning it cannot rely on ctx-is_active for
   its IPIs.

 - perf_remove_from_context() will have to mark a context as inactive
   when it removes the last event.

For specific detail, see the patch/comments.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 155 +++++++++++++++++++++++++------------------
 1 file changed, 91 insertions(+), 64 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 89b47050a2e8..c27e04655d86 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -126,6 +126,38 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info)
 	return data.ret;
 }
 
+/*
+ * On task ctx scheduling...
+ *
+ * When !ctx->nr_events a task context will not be scheduled. This means
+ * we can disable the scheduler hooks (for performance) without leaving
+ * pending task ctx state.
+ *
+ * This however results in two special cases:
+ *
+ *  - removing the last event from a task ctx; this is relatively straight
+ *    forward and is done in __perf_remove_from_context.
+ *
+ *  - adding the first event to a task ctx; this is tricky because we cannot
+ *    rely on ctx->is_active and therefore cannot use event_function_call().
+ *    See perf_install_in_context().
+ *
+ * This is because we need a ctx->lock serialized variable (ctx->is_active)
+ * to reliably determine if a particular task/context is scheduled in. The
+ * task_curr() use in task_function_call() is racy in that a remote context
+ * switch is not a single atomic operation.
+ *
+ * As is, the situation is 'safe' because we set rq->curr before we do the
+ * actual context switch. This means that task_curr() will fail early, but
+ * we'll continue spinning on ctx->is_active until we've passed
+ * perf_event_task_sched_out().
+ *
+ * Without this ctx->lock serialized variable we could have race where we find
+ * the task (and hence the context) would not be active while in fact they are.
+ *
+ * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
+ */
+
 static void event_function_call(struct perf_event *event,
 				int (*active)(void *),
 				void (*inactive)(void *),
@@ -1686,9 +1718,13 @@ static int __perf_remove_from_context(void *info)
 	if (re->detach_group)
 		perf_group_detach(event);
 	list_del_event(event, ctx);
-	if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
+
+	if (!ctx->nr_events && ctx->is_active) {
 		ctx->is_active = 0;
-		cpuctx->task_ctx = NULL;
+		if (ctx->task) {
+			WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+			cpuctx->task_ctx = NULL;
+		}
 	}
 	raw_spin_unlock(&ctx->lock);
 
@@ -2056,18 +2092,6 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
 		ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
 }
 
-static void ___perf_install_in_context(void *info)
-{
-	struct perf_event *event = info;
-	struct perf_event_context *ctx = event->ctx;
-
-	/*
-	 * Since the task isn't running, its safe to add the event, us holding
-	 * the ctx->lock ensures the task won't get scheduled in.
-	 */
-	add_event_to_ctx(event, ctx);
-}
-
 static void ctx_resched(struct perf_cpu_context *cpuctx,
 			struct perf_event_context *task_ctx)
 {
@@ -2086,55 +2110,27 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
  */
 static int  __perf_install_in_context(void *info)
 {
-	struct perf_event *event = info;
-	struct perf_event_context *ctx = event->ctx;
+	struct perf_event_context *ctx = info;
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 	struct perf_event_context *task_ctx = cpuctx->task_ctx;
-	struct task_struct *task = current;
 
-	perf_ctx_lock(cpuctx, task_ctx);
-	perf_pmu_disable(cpuctx->ctx.pmu);
+	if (ctx->task) {
+		/*
+		 * If we hit the 'wrong' task, we've since scheduled and
+		 * everything should be sorted, nothing to do!
+		 */
+		if (ctx->task != current)
+			return 0;
 
-	/*
-	 * If there was an active task_ctx schedule it out.
-	 */
-	if (task_ctx)
-		task_ctx_sched_out(cpuctx, task_ctx);
-
-	/*
-	 * If the context we're installing events in is not the
-	 * active task_ctx, flip them.
-	 */
-	if (ctx->task && task_ctx != ctx) {
-		if (task_ctx)
-			raw_spin_unlock(&task_ctx->lock);
-		raw_spin_lock(&ctx->lock);
+		/*
+		 * If task_ctx is set, it had better be to us.
+		 */
+		WARN_ON_ONCE(cpuctx->task_ctx != ctx && cpuctx->task_ctx);
 		task_ctx = ctx;
 	}
 
-	if (task_ctx) {
-		cpuctx->task_ctx = task_ctx;
-		task = task_ctx->task;
-	}
-
-	cpu_ctx_sched_out(cpuctx, EVENT_ALL);
-
-	update_context_time(ctx);
-	/*
-	 * update cgrp time only if current cgrp
-	 * matches event->cgrp. Must be done before
-	 * calling add_event_to_ctx()
-	 */
-	update_cgrp_time_from_event(event);
-
-	add_event_to_ctx(event, ctx);
-
-	/*
-	 * Schedule everything back in
-	 */
-	perf_event_sched_in(cpuctx, task_ctx, task);
-
-	perf_pmu_enable(cpuctx->ctx.pmu);
+	perf_ctx_lock(cpuctx, task_ctx);
+	ctx_resched(cpuctx, task_ctx);
 	perf_ctx_unlock(cpuctx, task_ctx);
 
 	return 0;
@@ -2148,14 +2144,38 @@ perf_install_in_context(struct perf_event_context *ctx,
 			struct perf_event *event,
 			int cpu)
 {
+	struct task_struct *task = NULL;
+
 	lockdep_assert_held(&ctx->mutex);
 
 	event->ctx = ctx;
 	if (event->cpu != -1)
 		event->cpu = cpu;
 
-	event_function_call(event, __perf_install_in_context,
-			    ___perf_install_in_context, event);
+	/*
+	 * Installing events is tricky because we cannot rely on ctx->is_active
+	 * to be set in case this is the nr_events 0 -> 1 transition.
+	 *
+	 * So what we do is we add the event to the list here, which will allow
+	 * a future context switch to DTRT and then send a racy IPI. If the IPI
+	 * fails to hit the right task, this means a context switch must have
+	 * happened and that will have taken care of business.
+	 */
+	raw_spin_lock_irq(&ctx->lock);
+	update_context_time(ctx);
+	/*
+	 * Update cgrp time only if current cgrp matches event->cgrp.
+	 * Must be done before calling add_event_to_ctx().
+	 */
+	update_cgrp_time_from_event(event);
+	add_event_to_ctx(event, ctx);
+	task = ctx->task;
+	raw_spin_unlock_irq(&ctx->lock);
+
+	if (task)
+		task_function_call(task, __perf_install_in_context, ctx);
+	else
+		cpu_function_call(cpu, __perf_install_in_context, ctx);
 }
 
 /*
@@ -2328,6 +2348,16 @@ static void ctx_sched_out(struct perf_event_context *ctx,
 
 	lockdep_assert_held(&ctx->lock);
 
+	if (likely(!ctx->nr_events)) {
+		/*
+		 * See __perf_remove_from_context().
+		 */
+		WARN_ON_ONCE(ctx->is_active);
+		if (ctx->task)
+			WARN_ON_ONCE(cpuctx->task_ctx);
+		return;
+	}
+
 	ctx->is_active &= ~event_type;
 	if (ctx->task) {
 		WARN_ON_ONCE(cpuctx->task_ctx != ctx);
@@ -2335,9 +2365,6 @@ static void ctx_sched_out(struct perf_event_context *ctx,
 			cpuctx->task_ctx = NULL;
 	}
 
-	if (likely(!ctx->nr_events))
-		return;
-
 	update_context_time(ctx);
 	update_cgrp_time_from_cpuctx(cpuctx);
 	if (!ctx->nr_active)
@@ -2716,6 +2743,9 @@ ctx_sched_in(struct perf_event_context *ctx,
 
 	lockdep_assert_held(&ctx->lock);
 
+	if (likely(!ctx->nr_events))
+		return;
+
 	ctx->is_active |= event_type;
 	if (ctx->task) {
 		if (!is_active)
@@ -2724,9 +2754,6 @@ ctx_sched_in(struct perf_event_context *ctx,
 			WARN_ON_ONCE(cpuctx->task_ctx != ctx);
 	}
 
-	if (likely(!ctx->nr_events))
-		return;
-
 	now = perf_clock();
 	ctx->timestamp = now;
 	perf_cgroup_set_timestamp(task, ctx);

From 32132a3d0d5d6f127388be3e3fd7759f798c2eb4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 11 Jan 2016 15:40:59 +0100
Subject: [PATCH 084/262] perf: Specialize perf_event_exit_task()

The perf_remove_from_context() usage in __perf_event_exit_task() is
different from the other usages in that this site has already
detached and scheduled out the task context.

This will stand in the way of stronger assertions checking the (task)
context scheduling invariants.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index c27e04655d86..66c9ad4f8707 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8726,7 +8726,13 @@ __perf_event_exit_task(struct perf_event *child_event,
 	 * Do destroy all inherited groups, we don't care about those
 	 * and being thorough is better.
 	 */
-	perf_remove_from_context(child_event, !!child_event->parent);
+	raw_spin_lock_irq(&child_ctx->lock);
+	WARN_ON_ONCE(child_ctx->is_active);
+
+	if (!!child_event->parent)
+		perf_group_detach(child_event);
+	list_del_event(child_event, child_ctx);
+	raw_spin_unlock_irq(&child_ctx->lock);
 
 	/*
 	 * It can happen that the parent exits first, and has events
@@ -8746,17 +8752,15 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
 	struct perf_event *child_event, *next;
 	struct perf_event_context *child_ctx, *clone_ctx = NULL;
-	unsigned long flags;
 
 	if (likely(!child->perf_event_ctxp[ctxn]))
 		return;
 
-	local_irq_save(flags);
+	local_irq_disable();
+	WARN_ON_ONCE(child != current);
 	/*
 	 * We can't reschedule here because interrupts are disabled,
-	 * and either child is current or it is a task that can't be
-	 * scheduled, so we are now safe from rescheduling changing
-	 * our context.
+	 * and child must be current.
 	 */
 	child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
 
@@ -8776,7 +8780,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 */
 	clone_ctx = unclone_ctx(child_ctx);
 	update_context_time(child_ctx);
-	raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
+	raw_spin_unlock_irq(&child_ctx->lock);
 
 	if (clone_ctx)
 		put_ctx(clone_ctx);

From fae3fde65138b6071b1b0e0b567d4058a8b6a88c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 11 Jan 2016 15:00:50 +0100
Subject: [PATCH 085/262] perf: Collapse and fix event_function_call() users

There is one common bug left in all the event_function_call() users,
between loading ctx->task and getting to the remote_function(),
ctx->task can already have been changed.

Therefore we need to double check and retry if ctx->task != current.

Insert another trampoline specific to event_function_call() that
checks for this and further validates state. This also allows getting
rid of the active/inactive functions.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h    |   2 +-
 kernel/events/core.c          | 365 ++++++++++++++++------------------
 kernel/events/hw_breakpoint.c |   2 +-
 3 files changed, 168 insertions(+), 201 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f9828a48f16a..6612732d8fd0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1044,7 +1044,7 @@ extern void perf_swevent_put_recursion_context(int rctx);
 extern u64 perf_swevent_set_period(struct perf_event *event);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
-extern int __perf_event_disable(void *info);
+extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_task_tick(void);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 66c9ad4f8707..6620432491f6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -126,6 +126,28 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info)
 	return data.ret;
 }
 
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
+{
+	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
+}
+
+static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
+			  struct perf_event_context *ctx)
+{
+	raw_spin_lock(&cpuctx->ctx.lock);
+	if (ctx)
+		raw_spin_lock(&ctx->lock);
+}
+
+static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
+			    struct perf_event_context *ctx)
+{
+	if (ctx)
+		raw_spin_unlock(&ctx->lock);
+	raw_spin_unlock(&cpuctx->ctx.lock);
+}
+
 /*
  * On task ctx scheduling...
  *
@@ -158,21 +180,96 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info)
  * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
  */
 
-static void event_function_call(struct perf_event *event,
-				int (*active)(void *),
-				void (*inactive)(void *),
-				void *data)
+typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
+			struct perf_event_context *, void *);
+
+struct event_function_struct {
+	struct perf_event *event;
+	event_f func;
+	void *data;
+};
+
+static int event_function(void *info)
+{
+	struct event_function_struct *efs = info;
+	struct perf_event *event = efs->event;
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	struct perf_event_context *task_ctx = cpuctx->task_ctx;
+
+	WARN_ON_ONCE(!irqs_disabled());
+
+	/*
+	 * Since we do the IPI call without holding ctx->lock things can have
+	 * changed, double check we hit the task we set out to hit.
+	 *
+	 * If ctx->task == current, we know things must remain valid because
+	 * we have IRQs disabled so we cannot schedule.
+	 */
+	if (ctx->task) {
+		if (ctx->task != current)
+			return -EAGAIN;
+
+		WARN_ON_ONCE(task_ctx != ctx);
+	} else {
+		WARN_ON_ONCE(&cpuctx->ctx != ctx);
+	}
+
+	perf_ctx_lock(cpuctx, task_ctx);
+	/*
+	 * Now that we hold locks, double check state. Paranoia pays.
+	 */
+	if (task_ctx) {
+		WARN_ON_ONCE(task_ctx->task != current);
+		/*
+		 * We only use event_function_call() on established contexts,
+		 * and event_function() is only ever called when active (or
+		 * rather, we'll have bailed in task_function_call() or the
+		 * above ctx->task != current test), therefore we must have
+		 * ctx->is_active here.
+		 */
+		WARN_ON_ONCE(!ctx->is_active);
+		/*
+		 * And since we have ctx->is_active, cpuctx->task_ctx must
+		 * match.
+		 */
+		WARN_ON_ONCE(cpuctx->task_ctx != task_ctx);
+	}
+	efs->func(event, cpuctx, ctx, efs->data);
+	perf_ctx_unlock(cpuctx, task_ctx);
+
+	return 0;
+}
+
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+	struct event_function_struct efs = {
+		.event = event,
+		.func = func,
+		.data = data,
+	};
+
+	int ret = event_function(&efs);
+	WARN_ON_ONCE(ret);
+}
+
+static void event_function_call(struct perf_event *event, event_f func, void *data)
 {
 	struct perf_event_context *ctx = event->ctx;
 	struct task_struct *task = ctx->task;
+	struct event_function_struct efs = {
+		.event = event,
+		.func = func,
+		.data = data,
+	};
 
 	if (!task) {
-		cpu_function_call(event->cpu, active, data);
+		cpu_function_call(event->cpu, event_function, &efs);
 		return;
 	}
 
 again:
-	if (!task_function_call(task, active, data))
+	if (!task_function_call(task, event_function, &efs))
 		return;
 
 	raw_spin_lock_irq(&ctx->lock);
@@ -185,7 +282,7 @@ again:
 		raw_spin_unlock_irq(&ctx->lock);
 		goto again;
 	}
-	inactive(data);
+	func(event, NULL, ctx, data);
 	raw_spin_unlock_irq(&ctx->lock);
 }
 
@@ -400,28 +497,6 @@ static inline u64 perf_event_clock(struct perf_event *event)
 	return event->clock();
 }
 
-static inline struct perf_cpu_context *
-__get_cpu_context(struct perf_event_context *ctx)
-{
-	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
-}
-
-static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
-			  struct perf_event_context *ctx)
-{
-	raw_spin_lock(&cpuctx->ctx.lock);
-	if (ctx)
-		raw_spin_lock(&ctx->lock);
-}
-
-static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
-			    struct perf_event_context *ctx)
-{
-	if (ctx)
-		raw_spin_unlock(&ctx->lock);
-	raw_spin_unlock(&cpuctx->ctx.lock);
-}
-
 #ifdef CONFIG_CGROUP_PERF
 
 static inline bool
@@ -1684,38 +1759,22 @@ group_sched_out(struct perf_event *group_event,
 		cpuctx->exclusive = 0;
 }
 
-struct remove_event {
-	struct perf_event *event;
-	bool detach_group;
-};
-
-static void ___perf_remove_from_context(void *info)
-{
-	struct remove_event *re = info;
-	struct perf_event *event = re->event;
-	struct perf_event_context *ctx = event->ctx;
-
-	if (re->detach_group)
-		perf_group_detach(event);
-	list_del_event(event, ctx);
-}
-
 /*
  * Cross CPU call to remove a performance event
  *
  * We disable the event on the hardware level first. After that we
  * remove it from the context list.
  */
-static int __perf_remove_from_context(void *info)
+static void
+__perf_remove_from_context(struct perf_event *event,
+			   struct perf_cpu_context *cpuctx,
+			   struct perf_event_context *ctx,
+			   void *info)
 {
-	struct remove_event *re = info;
-	struct perf_event *event = re->event;
-	struct perf_event_context *ctx = event->ctx;
-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	bool detach_group = (unsigned long)info;
 
-	raw_spin_lock(&ctx->lock);
 	event_sched_out(event, cpuctx, ctx);
-	if (re->detach_group)
+	if (detach_group)
 		perf_group_detach(event);
 	list_del_event(event, ctx);
 
@@ -1726,17 +1785,11 @@ static int __perf_remove_from_context(void *info)
 			cpuctx->task_ctx = NULL;
 		}
 	}
-	raw_spin_unlock(&ctx->lock);
-
-	return 0;
 }
 
 /*
  * Remove the event from a task's (or a CPU's) list of events.
  *
- * CPU events are removed with a smp call. For task events we only
- * call when the task is on a CPU.
- *
  * If event->ctx is a cloned context, callers must make sure that
  * every task struct that event->ctx->task could possibly point to
  * remains valid.  This is OK when called from perf_release since
@@ -1746,71 +1799,31 @@ static int __perf_remove_from_context(void *info)
  */
 static void perf_remove_from_context(struct perf_event *event, bool detach_group)
 {
-	struct perf_event_context *ctx = event->ctx;
-	struct remove_event re = {
-		.event = event,
-		.detach_group = detach_group,
-	};
-
-	lockdep_assert_held(&ctx->mutex);
+	lockdep_assert_held(&event->ctx->mutex);
 
 	event_function_call(event, __perf_remove_from_context,
-			    ___perf_remove_from_context, &re);
+			    (void *)(unsigned long)detach_group);
 }
 
 /*
  * Cross CPU call to disable a performance event
  */
-int __perf_event_disable(void *info)
+static void __perf_event_disable(struct perf_event *event,
+				 struct perf_cpu_context *cpuctx,
+				 struct perf_event_context *ctx,
+				 void *info)
 {
-	struct perf_event *event = info;
-	struct perf_event_context *ctx = event->ctx;
-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	if (event->state < PERF_EVENT_STATE_INACTIVE)
+		return;
 
-	/*
-	 * If this is a per-task event, need to check whether this
-	 * event's task is the current task on this cpu.
-	 *
-	 * Can trigger due to concurrent perf_event_context_sched_out()
-	 * flipping contexts around.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return -EINVAL;
-
-	raw_spin_lock(&ctx->lock);
-
-	/*
-	 * If the event is on, turn it off.
-	 * If it is in error state, leave it in error state.
-	 */
-	if (event->state >= PERF_EVENT_STATE_INACTIVE) {
-		update_context_time(ctx);
-		update_cgrp_time_from_event(event);
-		update_group_times(event);
-		if (event == event->group_leader)
-			group_sched_out(event, cpuctx, ctx);
-		else
-			event_sched_out(event, cpuctx, ctx);
-		event->state = PERF_EVENT_STATE_OFF;
-	}
-
-	raw_spin_unlock(&ctx->lock);
-
-	return 0;
-}
-
-void ___perf_event_disable(void *info)
-{
-	struct perf_event *event = info;
-
-	/*
-	 * Since we have the lock this context can't be scheduled
-	 * in, so we can change the state safely.
-	 */
-	if (event->state == PERF_EVENT_STATE_INACTIVE) {
-		update_group_times(event);
-		event->state = PERF_EVENT_STATE_OFF;
-	}
+	update_context_time(ctx);
+	update_cgrp_time_from_event(event);
+	update_group_times(event);
+	if (event == event->group_leader)
+		group_sched_out(event, cpuctx, ctx);
+	else
+		event_sched_out(event, cpuctx, ctx);
+	event->state = PERF_EVENT_STATE_OFF;
 }
 
 /*
@@ -1837,8 +1850,12 @@ static void _perf_event_disable(struct perf_event *event)
 	}
 	raw_spin_unlock_irq(&ctx->lock);
 
-	event_function_call(event, __perf_event_disable,
-			    ___perf_event_disable, event);
+	event_function_call(event, __perf_event_disable, NULL);
+}
+
+void perf_event_disable_local(struct perf_event *event)
+{
+	event_function_local(event, __perf_event_disable, NULL);
 }
 
 /*
@@ -2202,44 +2219,29 @@ static void __perf_event_mark_enabled(struct perf_event *event)
 /*
  * Cross CPU call to enable a performance event
  */
-static int __perf_event_enable(void *info)
+static void __perf_event_enable(struct perf_event *event,
+				struct perf_cpu_context *cpuctx,
+				struct perf_event_context *ctx,
+				void *info)
 {
-	struct perf_event *event = info;
-	struct perf_event_context *ctx = event->ctx;
 	struct perf_event *leader = event->group_leader;
-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-	struct perf_event_context *task_ctx = cpuctx->task_ctx;
-
-	/*
-	 * There's a time window between 'ctx->is_active' check
-	 * in perf_event_enable function and this place having:
-	 *   - IRQs on
-	 *   - ctx->lock unlocked
-	 *
-	 * where the task could be killed and 'ctx' deactivated
-	 * by perf_event_exit_task.
-	 */
-	if (!ctx->is_active)
-		return -EINVAL;
-
-	perf_ctx_lock(cpuctx, task_ctx);
-	WARN_ON_ONCE(&cpuctx->ctx != ctx && task_ctx != ctx);
-	update_context_time(ctx);
+	struct perf_event_context *task_ctx;
 
 	if (event->state >= PERF_EVENT_STATE_INACTIVE)
-		goto unlock;
-
-	/*
-	 * set current task's cgroup time reference point
-	 */
-	perf_cgroup_set_timestamp(current, ctx);
+		return;
 
+	update_context_time(ctx);
 	__perf_event_mark_enabled(event);
 
+	if (!ctx->is_active)
+		return;
+
 	if (!event_filter_match(event)) {
-		if (is_cgroup_event(event))
+		if (is_cgroup_event(event)) {
+			perf_cgroup_set_timestamp(current, ctx); // XXX ?
 			perf_cgroup_defer_enabled(event);
-		goto unlock;
+		}
+		return;
 	}
 
 	/*
@@ -2247,19 +2249,13 @@ static int __perf_event_enable(void *info)
 	 * then don't put it on unless the group is on.
 	 */
 	if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
-		goto unlock;
+		return;
+
+	task_ctx = cpuctx->task_ctx;
+	if (ctx->task)
+		WARN_ON_ONCE(task_ctx != ctx);
 
 	ctx_resched(cpuctx, task_ctx);
-
-unlock:
-	perf_ctx_unlock(cpuctx, task_ctx);
-
-	return 0;
-}
-
-void ___perf_event_enable(void *info)
-{
-	__perf_event_mark_enabled((struct perf_event *)info);
 }
 
 /*
@@ -2292,8 +2288,7 @@ static void _perf_event_enable(struct perf_event *event)
 		event->state = PERF_EVENT_STATE_OFF;
 	raw_spin_unlock_irq(&ctx->lock);
 
-	event_function_call(event, __perf_event_enable,
-			    ___perf_event_enable, event);
+	event_function_call(event, __perf_event_enable, NULL);
 }
 
 /*
@@ -4095,36 +4090,14 @@ static void perf_event_for_each(struct perf_event *event,
 		perf_event_for_each_child(sibling, func);
 }
 
-struct period_event {
-	struct perf_event *event;
-	u64 value;
-};
-
-static void ___perf_event_period(void *info)
+static void __perf_event_period(struct perf_event *event,
+				struct perf_cpu_context *cpuctx,
+				struct perf_event_context *ctx,
+				void *info)
 {
-	struct period_event *pe = info;
-	struct perf_event *event = pe->event;
-	u64 value = pe->value;
-
-	if (event->attr.freq) {
-		event->attr.sample_freq = value;
-	} else {
-		event->attr.sample_period = value;
-		event->hw.sample_period = value;
-	}
-
-	local64_set(&event->hw.period_left, 0);
-}
-
-static int __perf_event_period(void *info)
-{
-	struct period_event *pe = info;
-	struct perf_event *event = pe->event;
-	struct perf_event_context *ctx = event->ctx;
-	u64 value = pe->value;
+	u64 value = *((u64 *)info);
 	bool active;
 
-	raw_spin_lock(&ctx->lock);
 	if (event->attr.freq) {
 		event->attr.sample_freq = value;
 	} else {
@@ -4144,14 +4117,10 @@ static int __perf_event_period(void *info)
 		event->pmu->start(event, PERF_EF_RELOAD);
 		perf_pmu_enable(ctx->pmu);
 	}
-	raw_spin_unlock(&ctx->lock);
-
-	return 0;
 }
 
 static int perf_event_period(struct perf_event *event, u64 __user *arg)
 {
-	struct period_event pe = { .event = event, };
 	u64 value;
 
 	if (!is_sampling_event(event))
@@ -4166,10 +4135,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
 	if (event->attr.freq && value > sysctl_perf_event_sample_rate)
 		return -EINVAL;
 
-	pe.value = value;
-
-	event_function_call(event, __perf_event_period,
-			    ___perf_event_period, &pe);
+	event_function_call(event, __perf_event_period, &value);
 
 	return 0;
 }
@@ -4941,7 +4907,7 @@ static void perf_pending_event(struct irq_work *entry)
 
 	if (event->pending_disable) {
 		event->pending_disable = 0;
-		__perf_event_disable(event);
+		perf_event_disable_local(event);
 	}
 
 	if (event->pending_wakeup) {
@@ -9239,13 +9205,14 @@ static void perf_event_init_cpu(int cpu)
 #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
 static void __perf_event_exit_context(void *__info)
 {
-	struct remove_event re = { .detach_group = true };
 	struct perf_event_context *ctx = __info;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	struct perf_event *event;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
-		__perf_remove_from_context(&re);
-	rcu_read_unlock();
+	raw_spin_lock(&ctx->lock);
+	list_for_each_entry(event, &ctx->event_list, event_entry)
+		__perf_remove_from_context(event, cpuctx, ctx, (void *)(unsigned long)true);
+	raw_spin_unlock(&ctx->lock);
 }
 
 static void perf_event_exit_cpu_context(int cpu)
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 92ce5f4ccc26..3f8cb1e14588 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -444,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
 	 * current task.
 	 */
 	if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
-		__perf_event_disable(bp);
+		perf_event_disable_local(bp);
 	else
 		perf_event_disable(bp);
 

From c97f473643a9d3e618c0f0426b926bc3a3e23944 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Jan 2016 10:51:03 +0100
Subject: [PATCH 086/262] perf: Add more assertions

Try to trigger warnings before races do damage.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6620432491f6..3eaf91b104e9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -263,6 +263,15 @@ static void event_function_call(struct perf_event *event, event_f func, void *da
 		.data = data,
 	};
 
+	if (!event->parent) {
+		/*
+		 * If this is a !child event, we must hold ctx::mutex to
+		 * stabilize the the event->ctx relation. See
+		 * perf_event_ctx_lock().
+		 */
+		lockdep_assert_held(&ctx->mutex);
+	}
+
 	if (!task) {
 		cpu_function_call(event->cpu, event_function, &efs);
 		return;

From 63b6da39bb38e8f1a1ef3180d32a39d6baf9da84 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Jan 2016 16:05:37 +0100
Subject: [PATCH 087/262] perf: Fix perf_event_exit_task() race

There is a race against perf_event_exit_task() vs
event_function_call(),find_get_context(),perf_install_in_context()
(iow, everyone).

Since there is no permanent marker on a context that its dead, it is
quite possible that we access (and even modify) a context after its
passed through perf_event_exit_task().

For instance, find_get_context() might find the context still
installed, but by the time we get to perf_install_in_context() it
might already have passed through perf_event_exit_task() and be
considered dead, we will however still add the event to it.

Solve this by marking a ctx dead by setting its ctx->task value to -1,
it must be !0 so we still know its a (former) task context.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 151 ++++++++++++++++++++++++-------------------
 1 file changed, 85 insertions(+), 66 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3eaf91b104e9..9de4d352ba8c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -148,6 +148,13 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
 	raw_spin_unlock(&cpuctx->ctx.lock);
 }
 
+#define TASK_TOMBSTONE ((void *)-1L)
+
+static bool is_kernel_event(struct perf_event *event)
+{
+	return event->owner == TASK_TOMBSTONE;
+}
+
 /*
  * On task ctx scheduling...
  *
@@ -196,31 +203,21 @@ static int event_function(void *info)
 	struct perf_event_context *ctx = event->ctx;
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 	struct perf_event_context *task_ctx = cpuctx->task_ctx;
+	int ret = 0;
 
 	WARN_ON_ONCE(!irqs_disabled());
 
+	perf_ctx_lock(cpuctx, task_ctx);
 	/*
 	 * Since we do the IPI call without holding ctx->lock things can have
 	 * changed, double check we hit the task we set out to hit.
-	 *
-	 * If ctx->task == current, we know things must remain valid because
-	 * we have IRQs disabled so we cannot schedule.
 	 */
 	if (ctx->task) {
-		if (ctx->task != current)
-			return -EAGAIN;
+		if (ctx->task != current) {
+			ret = -EAGAIN;
+			goto unlock;
+		}
 
-		WARN_ON_ONCE(task_ctx != ctx);
-	} else {
-		WARN_ON_ONCE(&cpuctx->ctx != ctx);
-	}
-
-	perf_ctx_lock(cpuctx, task_ctx);
-	/*
-	 * Now that we hold locks, double check state. Paranoia pays.
-	 */
-	if (task_ctx) {
-		WARN_ON_ONCE(task_ctx->task != current);
 		/*
 		 * We only use event_function_call() on established contexts,
 		 * and event_function() is only ever called when active (or
@@ -233,12 +230,16 @@ static int event_function(void *info)
 		 * And since we have ctx->is_active, cpuctx->task_ctx must
 		 * match.
 		 */
-		WARN_ON_ONCE(cpuctx->task_ctx != task_ctx);
+		WARN_ON_ONCE(task_ctx != ctx);
+	} else {
+		WARN_ON_ONCE(&cpuctx->ctx != ctx);
 	}
+
 	efs->func(event, cpuctx, ctx, efs->data);
+unlock:
 	perf_ctx_unlock(cpuctx, task_ctx);
 
-	return 0;
+	return ret;
 }
 
 static void event_function_local(struct perf_event *event, event_f func, void *data)
@@ -256,7 +257,7 @@ static void event_function_local(struct perf_event *event, event_f func, void *d
 static void event_function_call(struct perf_event *event, event_f func, void *data)
 {
 	struct perf_event_context *ctx = event->ctx;
-	struct task_struct *task = ctx->task;
+	struct task_struct *task = READ_ONCE(ctx->task); /* verified in event_function */
 	struct event_function_struct efs = {
 		.event = event,
 		.func = func,
@@ -278,30 +279,28 @@ static void event_function_call(struct perf_event *event, event_f func, void *da
 	}
 
 again:
+	if (task == TASK_TOMBSTONE)
+		return;
+
 	if (!task_function_call(task, event_function, &efs))
 		return;
 
 	raw_spin_lock_irq(&ctx->lock);
-	if (ctx->is_active) {
-		/*
-		 * Reload the task pointer, it might have been changed by
-		 * a concurrent perf_event_context_sched_out().
-		 */
-		task = ctx->task;
-		raw_spin_unlock_irq(&ctx->lock);
-		goto again;
+	/*
+	 * Reload the task pointer, it might have been changed by
+	 * a concurrent perf_event_context_sched_out().
+	 */
+	task = ctx->task;
+	if (task != TASK_TOMBSTONE) {
+		if (ctx->is_active) {
+			raw_spin_unlock_irq(&ctx->lock);
+			goto again;
+		}
+		func(event, NULL, ctx, data);
 	}
-	func(event, NULL, ctx, data);
 	raw_spin_unlock_irq(&ctx->lock);
 }
 
-#define EVENT_OWNER_KERNEL ((void *) -1)
-
-static bool is_kernel_event(struct perf_event *event)
-{
-	return event->owner == EVENT_OWNER_KERNEL;
-}
-
 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
 		       PERF_FLAG_FD_OUTPUT  |\
 		       PERF_FLAG_PID_CGROUP |\
@@ -1025,7 +1024,7 @@ static void put_ctx(struct perf_event_context *ctx)
 	if (atomic_dec_and_test(&ctx->refcount)) {
 		if (ctx->parent_ctx)
 			put_ctx(ctx->parent_ctx);
-		if (ctx->task)
+		if (ctx->task && ctx->task != TASK_TOMBSTONE)
 			put_task_struct(ctx->task);
 		call_rcu(&ctx->rcu_head, free_ctx);
 	}
@@ -1186,6 +1185,7 @@ static u64 primary_event_id(struct perf_event *event)
 
 /*
  * Get the perf_event_context for a task and lock it.
+ *
  * This has to cope with with the fact that until it is locked,
  * the context could get moved to another task.
  */
@@ -1226,10 +1226,13 @@ retry:
 			goto retry;
 		}
 
-		if (!atomic_inc_not_zero(&ctx->refcount)) {
+		if (ctx->task == TASK_TOMBSTONE ||
+		    !atomic_inc_not_zero(&ctx->refcount)) {
 			raw_spin_unlock(&ctx->lock);
 			ctx = NULL;
 		}
+
+		WARN_ON_ONCE(ctx->task != task);
 	}
 	rcu_read_unlock();
 	if (!ctx)
@@ -2140,23 +2143,27 @@ static int  __perf_install_in_context(void *info)
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 	struct perf_event_context *task_ctx = cpuctx->task_ctx;
 
+	raw_spin_lock(&cpuctx->ctx.lock);
 	if (ctx->task) {
+		raw_spin_lock(&ctx->lock);
 		/*
 		 * If we hit the 'wrong' task, we've since scheduled and
 		 * everything should be sorted, nothing to do!
 		 */
+		task_ctx = ctx;
 		if (ctx->task != current)
-			return 0;
+			goto unlock;
 
 		/*
 		 * If task_ctx is set, it had better be to us.
 		 */
 		WARN_ON_ONCE(cpuctx->task_ctx != ctx && cpuctx->task_ctx);
-		task_ctx = ctx;
+	} else if (task_ctx) {
+		raw_spin_lock(&task_ctx->lock);
 	}
 
-	perf_ctx_lock(cpuctx, task_ctx);
 	ctx_resched(cpuctx, task_ctx);
+unlock:
 	perf_ctx_unlock(cpuctx, task_ctx);
 
 	return 0;
@@ -2188,6 +2195,17 @@ perf_install_in_context(struct perf_event_context *ctx,
 	 * happened and that will have taken care of business.
 	 */
 	raw_spin_lock_irq(&ctx->lock);
+	task = ctx->task;
+	/*
+	 * Worse, we cannot even rely on the ctx actually existing anymore. If
+	 * between find_get_context() and perf_install_in_context() the task
+	 * went through perf_event_exit_task() its dead and we should not be
+	 * adding new events.
+	 */
+	if (task == TASK_TOMBSTONE) {
+		raw_spin_unlock_irq(&ctx->lock);
+		return;
+	}
 	update_context_time(ctx);
 	/*
 	 * Update cgrp time only if current cgrp matches event->cgrp.
@@ -2195,7 +2213,6 @@ perf_install_in_context(struct perf_event_context *ctx,
 	 */
 	update_cgrp_time_from_event(event);
 	add_event_to_ctx(event, ctx);
-	task = ctx->task;
 	raw_spin_unlock_irq(&ctx->lock);
 
 	if (task)
@@ -2538,17 +2555,21 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 		raw_spin_lock(&ctx->lock);
 		raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
 		if (context_equiv(ctx, next_ctx)) {
-			/*
-			 * XXX do we need a memory barrier of sorts
-			 * wrt to rcu_dereference() of perf_event_ctxp
-			 */
-			task->perf_event_ctxp[ctxn] = next_ctx;
-			next->perf_event_ctxp[ctxn] = ctx;
-			ctx->task = next;
-			next_ctx->task = task;
+			WRITE_ONCE(ctx->task, next);
+			WRITE_ONCE(next_ctx->task, task);
 
 			swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
 
+			/*
+			 * RCU_INIT_POINTER here is safe because we've not
+			 * modified the ctx and the above modification of
+			 * ctx->task and ctx->task_ctx_data are immaterial
+			 * since those values are always verified under
+			 * ctx->lock which we're now holding.
+			 */
+			RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx);
+			RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx);
+
 			do_switch = 0;
 
 			perf_event_sync_stat(ctx, next_ctx);
@@ -8545,7 +8566,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	}
 
 	/* Mark owner so we could distinguish it from user events. */
-	event->owner = EVENT_OWNER_KERNEL;
+	event->owner = TASK_TOMBSTONE;
 
 	account_event(event);
 
@@ -8725,28 +8746,26 @@ __perf_event_exit_task(struct perf_event *child_event,
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
-	struct perf_event *child_event, *next;
 	struct perf_event_context *child_ctx, *clone_ctx = NULL;
+	struct perf_event *child_event, *next;
+	unsigned long flags;
 
-	if (likely(!child->perf_event_ctxp[ctxn]))
+	WARN_ON_ONCE(child != current);
+
+	child_ctx = perf_lock_task_context(child, ctxn, &flags);
+	if (!child_ctx)
 		return;
 
-	local_irq_disable();
-	WARN_ON_ONCE(child != current);
-	/*
-	 * We can't reschedule here because interrupts are disabled,
-	 * and child must be current.
-	 */
-	child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
+	task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
 
 	/*
-	 * Take the context lock here so that if find_get_context is
-	 * reading child->perf_event_ctxp, we wait until it has
-	 * incremented the context's refcount before we do put_ctx below.
+	 * Now that the context is inactive, destroy the task <-> ctx relation
+	 * and mark the context dead.
 	 */
-	raw_spin_lock(&child_ctx->lock);
-	task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
-	child->perf_event_ctxp[ctxn] = NULL;
+	RCU_INIT_POINTER(child->perf_event_ctxp[ctxn], NULL);
+	put_ctx(child_ctx); /* cannot be last */
+	WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
+	put_task_struct(current); /* cannot be last */
 
 	/*
 	 * If this context is a clone; unclone it so it can't get
@@ -8755,7 +8774,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 */
 	clone_ctx = unclone_ctx(child_ctx);
 	update_context_time(child_ctx);
-	raw_spin_unlock_irq(&child_ctx->lock);
+	raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
 
 	if (clone_ctx)
 		put_ctx(clone_ctx);

From 0e1eb0a1f5530bd751fe5bd2c62caa470aaa9643 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 7 Jan 2016 08:25:46 +0100
Subject: [PATCH 088/262] perf/x86: add Intel SkyLake uncore IMC PMU support

This patch enables the uncore_imc PMU for Intel
SkyLake Desktop processors (Core i7-6700, model 94).

It is possible to compute memory read/write bandwidth
using:

  $ perf stat -a -e uncore_imc/data_reads/,uncore_imc/data_writes/ ....

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: kan.liang@intel.com
Link: http://lkml.kernel.org/r/1452151546-8853-1-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c |  3 +++
 arch/x86/kernel/cpu/perf_event_intel_uncore.h |  1 +
 .../kernel/cpu/perf_event_intel_uncore_snb.c  | 20 +++++++++++++++++++
 3 files changed, 24 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index f97f8075bf04..3bf41d413775 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -995,6 +995,9 @@ static int __init uncore_pci_init(void)
 	case 87: /* Knights Landing */
 		ret = knl_uncore_pci_init();
 		break;
+	case 94: /* SkyLake */
+		ret = skl_uncore_pci_init();
+		break;
 	default:
 		return 0;
 	}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 07aa2d6bd710..a7086b862156 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -336,6 +336,7 @@ int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
 int bdw_uncore_pci_init(void);
+int skl_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 0b934820fafd..2bd030ddd0db 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -8,6 +8,7 @@
 #define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
 #define PCI_DEVICE_ID_INTEL_BDW_IMC	0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_IMC	0x191f
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
@@ -524,6 +525,14 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = {
 	{ /* end: all zeroes */ },
 };
 
+static const struct pci_device_id skl_uncore_pci_ids[] = {
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* end: all zeroes */ },
+};
+
 static struct pci_driver snb_uncore_pci_driver = {
 	.name		= "snb_uncore",
 	.id_table	= snb_uncore_pci_ids,
@@ -544,6 +553,11 @@ static struct pci_driver bdw_uncore_pci_driver = {
 	.id_table	= bdw_uncore_pci_ids,
 };
 
+static struct pci_driver skl_uncore_pci_driver = {
+	.name		= "skl_uncore",
+	.id_table	= skl_uncore_pci_ids,
+};
+
 struct imc_uncore_pci_dev {
 	__u32 pci_id;
 	struct pci_driver *driver;
@@ -558,6 +572,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
 	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
 	IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
+	IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
 	{  /* end marker */ }
 };
 
@@ -610,6 +625,11 @@ int bdw_uncore_pci_init(void)
 	return imc_uncore_pci_init();
 }
 
+int skl_uncore_pci_init(void)
+{
+	return imc_uncore_pci_init();
+}
+
 /* end of Sandy Bridge uncore support */
 
 /* Nehalem uncore support */

From 45c815f06b80031659c63d7b93e580015d6024dd Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 19 Jan 2016 17:14:29 +0200
Subject: [PATCH 089/262] perf: Synchronously free aux pages in case of
 allocation failure

We are currently using asynchronous deallocation in the error path in
AUX mmap code, which is unnecessary and also presents a problem for users
that wish to probe for the biggest possible buffer size they can get:
they'll get -EINVAL on all subsequent attemts to allocate a smaller
buffer before the asynchronous deallocation callback frees up the pages
from the previous unsuccessful attempt.

Currently, gdb does that for allocating AUX buffers for Intel PT traces.
More specifically, overwrite mode of AUX pmus that don't support hardware
sg (some implementations of Intel PT, for instance) is limited to only
one contiguous high order allocation for its buffer and there is no way
of knowing its size without trying.

This patch changes error path freeing to be synchronous as there won't
be any contenders for the AUX pages at that point.

Reported-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/1453216469-9509-1-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/ring_buffer.c | 40 ++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index adfdc0536117..1faad2cfdb9e 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -459,6 +459,25 @@ static void rb_free_aux_page(struct ring_buffer *rb, int idx)
 	__free_page(page);
 }
 
+static void __rb_free_aux(struct ring_buffer *rb)
+{
+	int pg;
+
+	if (rb->aux_priv) {
+		rb->free_aux(rb->aux_priv);
+		rb->free_aux = NULL;
+		rb->aux_priv = NULL;
+	}
+
+	if (rb->aux_nr_pages) {
+		for (pg = 0; pg < rb->aux_nr_pages; pg++)
+			rb_free_aux_page(rb, pg);
+
+		kfree(rb->aux_pages);
+		rb->aux_nr_pages = 0;
+	}
+}
+
 int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 		 pgoff_t pgoff, int nr_pages, long watermark, int flags)
 {
@@ -547,30 +566,11 @@ out:
 	if (!ret)
 		rb->aux_pgoff = pgoff;
 	else
-		rb_free_aux(rb);
+		__rb_free_aux(rb);
 
 	return ret;
 }
 
-static void __rb_free_aux(struct ring_buffer *rb)
-{
-	int pg;
-
-	if (rb->aux_priv) {
-		rb->free_aux(rb->aux_priv);
-		rb->free_aux = NULL;
-		rb->aux_priv = NULL;
-	}
-
-	if (rb->aux_nr_pages) {
-		for (pg = 0; pg < rb->aux_nr_pages; pg++)
-			rb_free_aux_page(rb, pg);
-
-		kfree(rb->aux_pages);
-		rb->aux_nr_pages = 0;
-	}
-}
-
 void rb_free_aux(struct ring_buffer *rb)
 {
 	if (atomic_dec_and_test(&rb->aux_refcount))

From 14e46e04958df740c6c6a94849f176159a333f13 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 21 Jan 2016 18:54:41 +0100
Subject: [PATCH 090/262] btrfs: synchronize incompat feature bits with sysfs
 files

The files under /sys/fs/UUID/features get out of sync with the actual
incompat bits set for the filesystem if they change after mount (eg. the
LZO compression).

Synchronize the feature bits with the sysfs files representing them
right after we set/clear them.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/free-space-tree.c | 7 +++++++
 fs/btrfs/ioctl.c           | 4 ++++
 fs/btrfs/super.c           | 4 ++++
 fs/btrfs/volumes.c         | 2 ++
 4 files changed, 17 insertions(+)

diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 393e36bd5845..94e887f5ec4e 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -23,6 +23,7 @@
 #include "locking.h"
 #include "free-space-tree.h"
 #include "transaction.h"
+#include "sysfs.h"
 
 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
 					struct btrfs_fs_info *fs_info,
@@ -1169,6 +1170,9 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
 	}
 
 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+	btrfs_sysfs_feature_update(fs_info,
+		BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE, FEAT_COMPAT_RO);
+
 	fs_info->creating_free_space_tree = 0;
 
 	ret = btrfs_commit_transaction(trans, tree_root);
@@ -1237,6 +1241,9 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
 		return PTR_ERR(trans);
 
 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+	btrfs_sysfs_feature_update(fs_info,
+		BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE, FEAT_COMPAT_RO);
+
 	fs_info->free_space_root = NULL;
 
 	ret = clear_free_space_tree(trans, free_space_root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e392dd67f0ba..209dcfa9ab33 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1455,6 +1455,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 
 	if (range->compress_type == BTRFS_COMPRESS_LZO) {
 		btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO);
+		btrfs_sysfs_feature_update(root->fs_info,
+			BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO, FEAT_INCOMPAT);
 	}
 
 	ret = defrag_count;
@@ -4063,6 +4065,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
 	btrfs_free_path(path);
 
 	btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL);
+	btrfs_sysfs_feature_update(root->fs_info,
+		BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL, FEAT_INCOMPAT);
 	btrfs_end_transaction(trans, root);
 out:
 	mnt_drop_write_file(file);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 86f7fdc05633..5a1bab11984d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -58,6 +58,7 @@
 #include "dev-replace.h"
 #include "free-space-cache.h"
 #include "backref.h"
+#include "sysfs.h"
 #include "tests/btrfs-tests.h"
 
 #include "qgroup.h"
@@ -477,6 +478,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 				btrfs_clear_opt(info->mount_opt, NODATACOW);
 				btrfs_clear_opt(info->mount_opt, NODATASUM);
 				btrfs_set_fs_incompat(info, COMPRESS_LZO);
+				btrfs_sysfs_feature_update(root->fs_info,
+					BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO,
+					FEAT_INCOMPAT);
 			} else if (strncmp(args[0].from, "no", 2) == 0) {
 				compress_type = "no";
 				btrfs_clear_opt(info->mount_opt, COMPRESS);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c32abbca9d77..73bcd1322c1d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4468,6 +4468,8 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
 		return;
 
 	btrfs_set_fs_incompat(info, RAID56);
+	btrfs_sysfs_feature_update(info, BTRFS_FEATURE_INCOMPAT_RAID56,
+		FEAT_INCOMPAT);
 }
 
 #define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r)		\

From f0442df2156a2171e40f1643c60103e6333f4e7e Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Thu, 21 Jan 2016 18:19:34 +0000
Subject: [PATCH 091/262] drm/rockchip: respect CONFIG_DRM_FBDEV_EMULATION

If DRM_FBDEV_EMULATION is not selected in the config then we can save a
bit of space by not including the framebuffer code.

Signed-off-by: John Keeping <john@metanate.com>
---
 drivers/gpu/drm/rockchip/Makefile             |  3 ++-
 drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h | 11 +++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile
index a4e03bcde035..f6a809afceec 100644
--- a/drivers/gpu/drm/rockchip/Makefile
+++ b/drivers/gpu/drm/rockchip/Makefile
@@ -2,8 +2,9 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o rockchip_drm_fbdev.o \
+rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o \
 		rockchip_drm_gem.o rockchip_drm_vop.o
+rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o
 
 obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o
 obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
index 50432e9b5b37..73718c5f5bbf 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
@@ -15,7 +15,18 @@
 #ifndef _ROCKCHIP_DRM_FBDEV_H
 #define _ROCKCHIP_DRM_FBDEV_H
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 int rockchip_drm_fbdev_init(struct drm_device *dev);
 void rockchip_drm_fbdev_fini(struct drm_device *dev);
+#else
+static inline int rockchip_drm_fbdev_init(struct drm_device *dev)
+{
+	return 0;
+}
+
+static inline void rockchip_drm_fbdev_fini(struct drm_device *dev)
+{
+}
+#endif
 
 #endif /* _ROCKCHIP_DRM_FBDEV_H */

From 0106ef5146f9e89e4dc9354f308ecaddb9617310 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Wed, 20 Jan 2016 10:13:42 +0100
Subject: [PATCH 092/262] PM / domains: fix lockdep issue for all subdomains

During genpd_poweron, genpd->lock is acquired recursively for each
parent (master) domain, which are separate objects. This confuses
lockdep, which considers every operation on genpd->lock as being done on
the same lock class. This leads to the following false positive warning:

=============================================
[ INFO: possible recursive locking detected ]
4.4.0-rc4-xu3s #32 Not tainted
---------------------------------------------
swapper/0/1 is trying to acquire lock:
 (&genpd->lock){+.+...}, at: [<c0361550>] __genpd_poweron+0x64/0x108

but task is already holding lock:
 (&genpd->lock){+.+...}, at: [<c0361af8>] genpd_dev_pm_attach+0x168/0x1b8

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&genpd->lock);
  lock(&genpd->lock);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

3 locks held by swapper/0/1:
 #0:  (&dev->mutex){......}, at: [<c0350910>] __driver_attach+0x48/0x98
 #1:  (&dev->mutex){......}, at: [<c0350920>] __driver_attach+0x58/0x98
 #2:  (&genpd->lock){+.+...}, at: [<c0361af8>] genpd_dev_pm_attach+0x168/0x1b8

stack backtrace:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.4.0-rc4-xu3s #32
Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
[<c0016c98>] (unwind_backtrace) from [<c00139c4>] (show_stack+0x10/0x14)
[<c00139c4>] (show_stack) from [<c0270df0>] (dump_stack+0x84/0xc4)
[<c0270df0>] (dump_stack) from [<c00780b8>] (__lock_acquire+0x1f88/0x215c)
[<c00780b8>] (__lock_acquire) from [<c007886c>] (lock_acquire+0xa4/0xd0)
[<c007886c>] (lock_acquire) from [<c0641f2c>] (mutex_lock_nested+0x70/0x4d4)
[<c0641f2c>] (mutex_lock_nested) from [<c0361550>] (__genpd_poweron+0x64/0x108)
[<c0361550>] (__genpd_poweron) from [<c0361b00>] (genpd_dev_pm_attach+0x170/0x1b8)
[<c0361b00>] (genpd_dev_pm_attach) from [<c03520a8>] (platform_drv_probe+0x2c/0xac)
[<c03520a8>] (platform_drv_probe) from [<c03507d4>] (driver_probe_device+0x208/0x2fc)
[<c03507d4>] (driver_probe_device) from [<c035095c>] (__driver_attach+0x94/0x98)
[<c035095c>] (__driver_attach) from [<c034ec14>] (bus_for_each_dev+0x68/0x9c)
[<c034ec14>] (bus_for_each_dev) from [<c034fec8>] (bus_add_driver+0x1a0/0x218)
[<c034fec8>] (bus_add_driver) from [<c035115c>] (driver_register+0x78/0xf8)
[<c035115c>] (driver_register) from [<c0338488>] (exynos_drm_register_drivers+0x28/0x74)
[<c0338488>] (exynos_drm_register_drivers) from [<c0338594>] (exynos_drm_init+0x6c/0xc4)
[<c0338594>] (exynos_drm_init) from [<c00097f4>] (do_one_initcall+0x90/0x1dc)
[<c00097f4>] (do_one_initcall) from [<c0895e08>] (kernel_init_freeable+0x158/0x1f8)
[<c0895e08>] (kernel_init_freeable) from [<c063ecac>] (kernel_init+0x8/0xe8)
[<c063ecac>] (kernel_init) from [<c000f7d0>] (ret_from_fork+0x14/0x24)

This patch replaces mutex_lock with mutex_lock_nested() and uses
recursion depth to annotate each genpd->lock operation with separate
lockdep subclass.

Reported-by: Anand Moon <linux.amoon@gmail.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Anand Moon <linux.amoon@gmail.com>
Tested-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index b80379012840..e02ddf65bc43 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -170,16 +170,15 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
 	queue_work(pm_wq, &genpd->power_off_work);
 }
 
-static int genpd_poweron(struct generic_pm_domain *genpd);
-
 /**
  * __genpd_poweron - Restore power to a given PM domain and its masters.
  * @genpd: PM domain to power up.
+ * @depth: nesting count for lockdep.
  *
  * Restore power to @genpd and all of its masters so that it is possible to
  * resume a device belonging to it.
  */
-static int __genpd_poweron(struct generic_pm_domain *genpd)
+static int __genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth)
 {
 	struct gpd_link *link;
 	int ret = 0;
@@ -194,11 +193,16 @@ static int __genpd_poweron(struct generic_pm_domain *genpd)
 	 * with it.
 	 */
 	list_for_each_entry(link, &genpd->slave_links, slave_node) {
-		genpd_sd_counter_inc(link->master);
+		struct generic_pm_domain *master = link->master;
+
+		genpd_sd_counter_inc(master);
+
+		mutex_lock_nested(&master->lock, depth + 1);
+		ret = __genpd_poweron(master, depth + 1);
+		mutex_unlock(&master->lock);
 
-		ret = genpd_poweron(link->master);
 		if (ret) {
-			genpd_sd_counter_dec(link->master);
+			genpd_sd_counter_dec(master);
 			goto err;
 		}
 	}
@@ -230,11 +234,12 @@ static int genpd_poweron(struct generic_pm_domain *genpd)
 	int ret;
 
 	mutex_lock(&genpd->lock);
-	ret = __genpd_poweron(genpd);
+	ret = __genpd_poweron(genpd, 0);
 	mutex_unlock(&genpd->lock);
 	return ret;
 }
 
+
 static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
 {
 	return GENPD_DEV_CALLBACK(genpd, int, save_state, dev);
@@ -482,7 +487,7 @@ static int pm_genpd_runtime_resume(struct device *dev)
 	}
 
 	mutex_lock(&genpd->lock);
-	ret = __genpd_poweron(genpd);
+	ret = __genpd_poweron(genpd, 0);
 	mutex_unlock(&genpd->lock);
 
 	if (ret)

From 6f16886b7c050c934305b1f285c3458ff1b6e4e4 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <Sudeep.Holla@arm.com>
Date: Thu, 21 Jan 2016 11:19:29 +0000
Subject: [PATCH 093/262] cpuidle: fix fallback mechanism for suspend to idle
 in absence of enter_freeze

Commit 51164251f5c3 "sched / idle: Drop default_idle_call() fallback
from call_cpuidle()" made find_deepest_state() return non-negative
value and check all the states with index > 0.  Also as a result,
find_deepest_state() returns 0 even when enter_freeze callbacks are not
implemented and enter_freeze_proper() is called which ends up crashing
the kernel.

This patch updates the check for index > 0 in cpuidle_enter_freeze and
cpuidle_idle_call(when idle_should_freeze is true) to restore the
suspend-to-idle functionality in absence of enter_freeze callback.

Fixes: 51164251f5c3 "sched / idle: Drop default_idle_call() fallback from call_cpuidle()"
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c | 2 +-
 kernel/sched/idle.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 046423b0c5ca..f996efc56605 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -153,7 +153,7 @@ int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * be frozen safely.
 	 */
 	index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
-	if (index >= 0)
+	if (index > 0)
 		enter_freeze_proper(drv, dev, index);
 
 	return index;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 34852ee70d54..30f7b6ad3920 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -162,7 +162,7 @@ static void cpuidle_idle_call(void)
 	 */
 	if (idle_should_freeze()) {
 		entered_state = cpuidle_enter_freeze(drv, dev);
-		if (entered_state >= 0) {
+		if (entered_state > 0) {
 			local_irq_enable();
 			goto exit_idle;
 		}

From 8cce83ba5062a301a09e0920df813bbbdd3e9dbf Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 22 Jan 2016 10:28:24 +0100
Subject: [PATCH 094/262] btrfs: tests: switch to GFP_KERNEL

There's no reason to do GFP_NOFS in tests, it's not data-heavy and
memory allocation failures would affect only developers or testers.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tests/btrfs-tests.c     | 10 +++++-----
 fs/btrfs/tests/extent-io-tests.c | 12 ++++++------
 fs/btrfs/tests/inode-tests.c     |  8 ++++----
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index b1d920b30070..0e1e61a7ec23 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void)
 struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
 {
 	struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
-						GFP_NOFS);
+						GFP_KERNEL);
 
 	if (!fs_info)
 		return fs_info;
 	fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
-				      GFP_NOFS);
+				      GFP_KERNEL);
 	if (!fs_info->fs_devices) {
 		kfree(fs_info);
 		return NULL;
 	}
 	fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
-				      GFP_NOFS);
+				      GFP_KERNEL);
 	if (!fs_info->super_copy) {
 		kfree(fs_info->fs_devices);
 		kfree(fs_info);
@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length)
 {
 	struct btrfs_block_group_cache *cache;
 
-	cache = kzalloc(sizeof(*cache), GFP_NOFS);
+	cache = kzalloc(sizeof(*cache), GFP_KERNEL);
 	if (!cache)
 		return NULL;
 	cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
-					GFP_NOFS);
+					GFP_KERNEL);
 	if (!cache->free_space_ctl) {
 		kfree(cache);
 		return NULL;
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index e29fa297e053..669b58201e36 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -94,7 +94,7 @@ static int test_find_delalloc(void)
 	 * test.
 	 */
 	for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
-		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+		page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
 		if (!page) {
 			test_msg("Failed to allocate test page\n");
 			ret = -ENOMEM;
@@ -113,7 +113,7 @@ static int test_find_delalloc(void)
 	 * |--- delalloc ---|
 	 * |---  search  ---|
 	 */
-	set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
+	set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
 	start = 0;
 	end = 0;
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -144,7 +144,7 @@ static int test_find_delalloc(void)
 		test_msg("Couldn't find the locked page\n");
 		goto out_bits;
 	}
-	set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
+	set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
 	start = test_start;
 	end = 0;
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -199,7 +199,7 @@ static int test_find_delalloc(void)
 	 *
 	 * We are re-using our test_start from above since it works out well.
 	 */
-	set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
+	set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
 	start = test_start;
 	end = 0;
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -262,7 +262,7 @@ static int test_find_delalloc(void)
 	}
 	ret = 0;
 out_bits:
-	clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
+	clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
 out:
 	if (locked_page)
 		page_cache_release(locked_page);
@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void)
 
 	test_msg("Running extent buffer bitmap tests\n");
 
-	bitmap = kmalloc(len, GFP_NOFS);
+	bitmap = kmalloc(len, GFP_KERNEL);
 	if (!bitmap) {
 		test_msg("Couldn't allocate test bitmap\n");
 		return -ENOMEM;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 5de55fdd28bc..e2d3da02deee 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -974,7 +974,7 @@ static int test_extent_accounting(void)
 			       (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
 			       EXTENT_DELALLOC | EXTENT_DIRTY |
 			       EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
-			       NULL, GFP_NOFS);
+			       NULL, GFP_KERNEL);
 	if (ret) {
 		test_msg("clear_extent_bit returned %d\n", ret);
 		goto out;
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
 			       BTRFS_MAX_EXTENT_SIZE+8191,
 			       EXTENT_DIRTY | EXTENT_DELALLOC |
 			       EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-			       NULL, GFP_NOFS);
+			       NULL, GFP_KERNEL);
 	if (ret) {
 		test_msg("clear_extent_bit returned %d\n", ret);
 		goto out;
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
 	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
 			       EXTENT_DIRTY | EXTENT_DELALLOC |
 			       EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-			       NULL, GFP_NOFS);
+			       NULL, GFP_KERNEL);
 	if (ret) {
 		test_msg("clear_extent_bit returned %d\n", ret);
 		goto out;
@@ -1096,7 +1096,7 @@ out:
 		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
 				 EXTENT_DIRTY | EXTENT_DELALLOC |
 				 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-				 NULL, GFP_NOFS);
+				 NULL, GFP_KERNEL);
 	iput(inode);
 	btrfs_free_dummy_root(root);
 	return ret;

From dd4e17ab704269bce71402285f5e8b9ac24b1eff Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 21 Jan 2016 15:03:34 -0800
Subject: [PATCH 095/262] ntp: Fix ADJ_SETOFFSET being used w/ ADJ_NANO

Recently, in commit 37cf4dc3370f I forgot to check if the timeval being passed
was actually a timespec (as is signaled with ADJ_NANO).

This resulted in that patch breaking ADJ_SETOFFSET users who set
ADJ_NANO, by rejecting valid timespecs that were compared with
valid timeval ranges.

This patch addresses this by checking for the ADJ_NANO flag and
using the timepsec check instead in that case.

Reported-by: Harald Hoyer <harald@redhat.com>
Reported-by: Kay Sievers <kay@vrfy.org>
Fixes: 37cf4dc3370f "time: Verify time values in adjtimex ADJ_SETOFFSET to avoid overflow"
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: David Herrmann <dh.herrmann@gmail.com>
Link: http://lkml.kernel.org/r/1453417415-19110-2-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/ntp.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 36f2ca09aa5e..6df8927c58a5 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -685,8 +685,18 @@ int ntp_validate_timex(struct timex *txc)
 		if (!capable(CAP_SYS_TIME))
 			return -EPERM;
 
-		if (!timeval_inject_offset_valid(&txc->time))
-			return -EINVAL;
+		if (txc->modes & ADJ_NANO) {
+			struct timespec ts;
+
+			ts.tv_sec = txc->time.tv_sec;
+			ts.tv_nsec = txc->time.tv_usec;
+			if (!timespec_inject_offset_valid(&ts))
+				return -EINVAL;
+
+		} else {
+			if (!timeval_inject_offset_valid(&txc->time))
+				return -EINVAL;
+		}
 	}
 
 	/*

From 1dff76b92f69051e579bdc131e01500da9fa2a91 Mon Sep 17 00:00:00 2001
From: Gavin Guo <gavin.guo@canonical.com>
Date: Wed, 20 Jan 2016 12:36:58 +0800
Subject: [PATCH 096/262] sched/numa: Fix use-after-free bug in the
 task_numa_compare

The following message can be observed on the Ubuntu v3.13.0-65 with KASan
backported:

  ==================================================================
  BUG: KASan: use after free in task_numa_find_cpu+0x64c/0x890 at addr ffff880dd393ecd8
  Read of size 8 by task qemu-system-x86/3998900
  =============================================================================
  BUG kmalloc-128 (Tainted: G    B        ): kasan: bad access detected
  -----------------------------------------------------------------------------

  INFO: Allocated in task_numa_fault+0xc1b/0xed0 age=41980 cpu=18 pid=3998890
	__slab_alloc+0x4f8/0x560
	__kmalloc+0x1eb/0x280
	task_numa_fault+0xc1b/0xed0
	do_numa_page+0x192/0x200
	handle_mm_fault+0x808/0x1160
	__do_page_fault+0x218/0x750
	do_page_fault+0x1a/0x70
	page_fault+0x28/0x30
	SyS_poll+0x66/0x1a0
	system_call_fastpath+0x1a/0x1f
  INFO: Freed in task_numa_free+0x1d2/0x200 age=62 cpu=18 pid=0
	__slab_free+0x2ab/0x3f0
	kfree+0x161/0x170
	task_numa_free+0x1d2/0x200
	finish_task_switch+0x1d2/0x210
	__schedule+0x5d4/0xc60
	schedule_preempt_disabled+0x40/0xc0
	cpu_startup_entry+0x2da/0x340
	start_secondary+0x28f/0x360
  Call Trace:
   [<ffffffff81a6ce35>] dump_stack+0x45/0x56
   [<ffffffff81244aed>] print_trailer+0xfd/0x170
   [<ffffffff8124ac36>] object_err+0x36/0x40
   [<ffffffff8124cbf9>] kasan_report_error+0x1e9/0x3a0
   [<ffffffff8124d260>] kasan_report+0x40/0x50
   [<ffffffff810dda7c>] ? task_numa_find_cpu+0x64c/0x890
   [<ffffffff8124bee9>] __asan_load8+0x69/0xa0
   [<ffffffff814f5c38>] ? find_next_bit+0xd8/0x120
   [<ffffffff810dda7c>] task_numa_find_cpu+0x64c/0x890
   [<ffffffff810de16c>] task_numa_migrate+0x4ac/0x7b0
   [<ffffffff810de523>] numa_migrate_preferred+0xb3/0xc0
   [<ffffffff810e0b88>] task_numa_fault+0xb88/0xed0
   [<ffffffff8120ef02>] do_numa_page+0x192/0x200
   [<ffffffff81211038>] handle_mm_fault+0x808/0x1160
   [<ffffffff810d7dbd>] ? sched_clock_cpu+0x10d/0x160
   [<ffffffff81068c52>] ? native_load_tls+0x82/0xa0
   [<ffffffff81a7bd68>] __do_page_fault+0x218/0x750
   [<ffffffff810c2186>] ? hrtimer_try_to_cancel+0x76/0x160
   [<ffffffff81a6f5e7>] ? schedule_hrtimeout_range_clock.part.24+0xf7/0x1c0
   [<ffffffff81a7c2ba>] do_page_fault+0x1a/0x70
   [<ffffffff81a772e8>] page_fault+0x28/0x30
   [<ffffffff8128cbd4>] ? do_sys_poll+0x1c4/0x6d0
   [<ffffffff810e64f6>] ? enqueue_task_fair+0x4b6/0xaa0
   [<ffffffff810233c9>] ? sched_clock+0x9/0x10
   [<ffffffff810cf70a>] ? resched_task+0x7a/0xc0
   [<ffffffff810d0663>] ? check_preempt_curr+0xb3/0x130
   [<ffffffff8128b5c0>] ? poll_select_copy_remaining+0x170/0x170
   [<ffffffff810d3bc0>] ? wake_up_state+0x10/0x20
   [<ffffffff8112a28f>] ? drop_futex_key_refs.isra.14+0x1f/0x90
   [<ffffffff8112d40e>] ? futex_requeue+0x3de/0xba0
   [<ffffffff8112e49e>] ? do_futex+0xbe/0x8f0
   [<ffffffff81022c89>] ? read_tsc+0x9/0x20
   [<ffffffff8111bd9d>] ? ktime_get_ts+0x12d/0x170
   [<ffffffff8108f699>] ? timespec_add_safe+0x59/0xe0
   [<ffffffff8128d1f6>] SyS_poll+0x66/0x1a0
   [<ffffffff81a830dd>] system_call_fastpath+0x1a/0x1f

As commit 1effd9f19324 ("sched/numa: Fix unsafe get_task_struct() in
task_numa_assign()") points out, the rcu_read_lock() cannot protect the
task_struct from being freed in the finish_task_switch(). And the bug
happens in the process of calculation of imp which requires the access of
p->numa_faults being freed in the following path:

do_exit()
        current->flags |= PF_EXITING;
    release_task()
        ~~delayed_put_task_struct()~~
    schedule()
    ...
    ...
rq->curr = next;
    context_switch()
        finish_task_switch()
            put_task_struct()
                __put_task_struct()
		    task_numa_free()

The fix here to get_task_struct() early before end of dst_rq->lock to
protect the calculation process and also put_task_struct() in the
corresponding point if finally the dst_rq->curr somehow cannot be
assigned.

Additional credit to Liang Chen who helped fix the error logic and add the
put_task_struct() to the place it missed.

Signed-off-by: Gavin Guo <gavin.guo@canonical.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: jay.vosburgh@canonical.com
Cc: liang.chen@canonical.com
Link: http://lkml.kernel.org/r/1453264618-17645-1-git-send-email-gavin.guo@canonical.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1926606ece80..56b7d4b83947 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1220,8 +1220,6 @@ static void task_numa_assign(struct task_numa_env *env,
 {
 	if (env->best_task)
 		put_task_struct(env->best_task);
-	if (p)
-		get_task_struct(p);
 
 	env->best_task = p;
 	env->best_imp = imp;
@@ -1289,20 +1287,30 @@ static void task_numa_compare(struct task_numa_env *env,
 	long imp = env->p->numa_group ? groupimp : taskimp;
 	long moveimp = imp;
 	int dist = env->dist;
+	bool assigned = false;
 
 	rcu_read_lock();
 
 	raw_spin_lock_irq(&dst_rq->lock);
 	cur = dst_rq->curr;
 	/*
-	 * No need to move the exiting task, and this ensures that ->curr
-	 * wasn't reaped and thus get_task_struct() in task_numa_assign()
-	 * is safe under RCU read lock.
-	 * Note that rcu_read_lock() itself can't protect from the final
-	 * put_task_struct() after the last schedule().
+	 * No need to move the exiting task or idle task.
 	 */
 	if ((cur->flags & PF_EXITING) || is_idle_task(cur))
 		cur = NULL;
+	else {
+		/*
+		 * The task_struct must be protected here to protect the
+		 * p->numa_faults access in the task_weight since the
+		 * numa_faults could already be freed in the following path:
+		 * finish_task_switch()
+		 *     --> put_task_struct()
+		 *         --> __put_task_struct()
+		 *             --> task_numa_free()
+		 */
+		get_task_struct(cur);
+	}
+
 	raw_spin_unlock_irq(&dst_rq->lock);
 
 	/*
@@ -1386,6 +1394,7 @@ balance:
 		 */
 		if (!load_too_imbalanced(src_load, dst_load, env)) {
 			imp = moveimp - 1;
+			put_task_struct(cur);
 			cur = NULL;
 			goto assign;
 		}
@@ -1411,9 +1420,16 @@ balance:
 		env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
 
 assign:
+	assigned = true;
 	task_numa_assign(env, cur, imp);
 unlock:
 	rcu_read_unlock();
+	/*
+	 * The dst_rq->curr isn't assigned. The protection for task_struct is
+	 * finished.
+	 */
+	if (cur && !assigned)
+		put_task_struct(cur);
 }
 
 static void task_numa_find_cpu(struct task_numa_env *env,

From 4b5ece24ce564176f8ad786ad02b681a373bc615 Mon Sep 17 00:00:00 2001
From: Eric Huang <JinHuiEric.Huang@amd.com>
Date: Tue, 19 Jan 2016 14:28:56 -0500
Subject: [PATCH 097/262] drm/amd/amdgpu: Improve amdgpu_dpm* macros to avoid
 unexpected result (v2)

The two macros returns are values which probably are used
in the expression of calculation. Without the brackets
the result of the expression may be wrong.

v2: agd: squash both patches together

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Eric Huang <JinHuiEric.Huang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 44 ++++++++++++++---------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 313b0cc8d676..82edf95b7740 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -2278,60 +2278,60 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
 
 #define amdgpu_dpm_get_temperature(adev) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
-	      (adev)->pm.funcs->get_temperature((adev))
+	      (adev)->pm.funcs->get_temperature((adev)))
 
 #define amdgpu_dpm_set_fan_control_mode(adev, m) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \
-	      (adev)->pm.funcs->set_fan_control_mode((adev), (m))
+	      (adev)->pm.funcs->set_fan_control_mode((adev), (m)))
 
 #define amdgpu_dpm_get_fan_control_mode(adev) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \
-	      (adev)->pm.funcs->get_fan_control_mode((adev))
+	      (adev)->pm.funcs->get_fan_control_mode((adev)))
 
 #define amdgpu_dpm_set_fan_speed_percent(adev, s) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-	      (adev)->pm.funcs->set_fan_speed_percent((adev), (s))
+	      (adev)->pm.funcs->set_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_fan_speed_percent(adev, s) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-	      (adev)->pm.funcs->get_fan_speed_percent((adev), (s))
+	      (adev)->pm.funcs->get_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_sclk(adev, l) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \
-		(adev)->pm.funcs->get_sclk((adev), (l))
+		(adev)->pm.funcs->get_sclk((adev), (l)))
 
 #define amdgpu_dpm_get_mclk(adev, l)  \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \
-	      (adev)->pm.funcs->get_mclk((adev), (l))
+	      (adev)->pm.funcs->get_mclk((adev), (l)))
 
 
 #define amdgpu_dpm_force_performance_level(adev, l) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \
-	      (adev)->pm.funcs->force_performance_level((adev), (l))
+	      (adev)->pm.funcs->force_performance_level((adev), (l)))
 
 #define amdgpu_dpm_powergate_uvd(adev, g) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \
-	      (adev)->pm.funcs->powergate_uvd((adev), (g))
+	      (adev)->pm.funcs->powergate_uvd((adev), (g)))
 
 #define amdgpu_dpm_powergate_vce(adev, g) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
-	      (adev)->pm.funcs->powergate_vce((adev), (g))
+	      (adev)->pm.funcs->powergate_vce((adev), (g)))
 
 #define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \
-	      (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m))
+	      (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)))
 
 #define amdgpu_dpm_get_current_power_state(adev) \
 	(adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)

From 78d0e182b6c1f5336f6e8cbb197f403276dabc7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 19 Jan 2016 12:48:14 +0100
Subject: [PATCH 098/262] drm/amdgpu: fix amdgpu_bo_pin_restricted VRAM placing
 v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We could pin BOs into invisible VRAM otherwise.

v2: make logic more readable as suggested by Michel

Cc: stable@vger.kernel.org
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
Reviewed-by: Rex Zhu <Rex.Zhu@amd.com> (v1)
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index c3ce103b6a33..a2a16acee34d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -399,7 +399,8 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 		}
 		if (fpfn > bo->placements[i].fpfn)
 			bo->placements[i].fpfn = fpfn;
-		if (lpfn && lpfn < bo->placements[i].lpfn)
+		if (!bo->placements[i].lpfn ||
+		    (lpfn && lpfn < bo->placements[i].lpfn))
 			bo->placements[i].lpfn = lpfn;
 		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 	}

From 9441f964f8e39374ab175e0a8fe870e1a2f02af1 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 20 Jan 2016 12:15:09 -0500
Subject: [PATCH 099/262] drm/amdgpu: add a message to indicate when powerplay
 is enabled (v2)

Makes it clear to the user which power management path is in
use.

v2: make consistent with dpm

Reviewed-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Tom St Denis <tom.stdenis@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 8f5d5edcf193..aa67244a77ae 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -64,6 +64,11 @@ static int pp_sw_init(void *handle)
 	if (ret == 0)
 		ret = hwmgr->hwmgr_func->backend_init(hwmgr);
 
+	if (ret)
+		printk("amdgpu: powerplay initialization failed\n");
+	else
+		printk("amdgpu: powerplay initialized\n");
+
 	return ret;
 }
 

From cc78eb22885bba64445cde438ba098de0104920f Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 22 Jan 2016 00:13:15 -0500
Subject: [PATCH 100/262] drm/radeon: properly byte swap vce firmware setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Firmware is LE.  Need to properly byteswap some of the fields
so they are interpreted correctly by the driver on BE systems.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/vce_v1_0.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c
index 07a0d378e122..a01efe39a820 100644
--- a/drivers/gpu/drm/radeon/vce_v1_0.c
+++ b/drivers/gpu/drm/radeon/vce_v1_0.c
@@ -178,12 +178,12 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
 		return -EINVAL;
 	}
 
-	for (i = 0; i < sign->num; ++i) {
-		if (sign->val[i].chip_id == chip_id)
+	for (i = 0; i < le32_to_cpu(sign->num); ++i) {
+		if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
 			break;
 	}
 
-	if (i == sign->num)
+	if (i == le32_to_cpu(sign->num))
 		return -EINVAL;
 
 	data += (256 - 64) / 4;
@@ -191,18 +191,18 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
 	data[1] = sign->val[i].nonce[1];
 	data[2] = sign->val[i].nonce[2];
 	data[3] = sign->val[i].nonce[3];
-	data[4] = sign->len + 64;
+	data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64);
 
 	memset(&data[5], 0, 44);
 	memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
 
-	data += data[4] / 4;
+	data += le32_to_cpu(data[4]) / 4;
 	data[0] = sign->val[i].sigval[0];
 	data[1] = sign->val[i].sigval[1];
 	data[2] = sign->val[i].sigval[2];
 	data[3] = sign->val[i].sigval[3];
 
-	rdev->vce.keyselect = sign->val[i].keyselect;
+	rdev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
 
 	return 0;
 }

From 79b134a22b6fc45f48bcc1c27d6bf05b3ede5e06 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 22 Jan 2016 17:16:18 +0100
Subject: [PATCH 101/262] btrfs: tweak free space tree bitmap allocation

The requested bitmap size varies, observed numbers were < 4K up to 16K.
Using vmalloc unconditionally would be too heavy, we'll try contiguous
allocations first and fall back to vmalloc if there's no contig memory.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/free-space-tree.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 393e36bd5845..0f33d58cb321 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
 
 static unsigned long *alloc_bitmap(u32 bitmap_size)
 {
+	void *mem;
+
+	/*
+	 * The allocation size varies, observed numbers were < 4K up to 16K.
+	 * Using vmalloc unconditionally would be too heavy, we'll try
+	 * contiguous allocations first.
+	 */
+	if  (bitmap_size <= PAGE_SIZE)
+		return kzalloc(bitmap_size, GFP_NOFS);
+
+	mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOWARN);
+	if (mem)
+		return mem;
+
 	return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
 			 PAGE_KERNEL);
 }
@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 
 	ret = 0;
 out:
-	vfree(bitmap);
+	kvfree(bitmap);
 	if (ret)
 		btrfs_abort_transaction(trans, root, ret);
 	return ret;
@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 
 	ret = 0;
 out:
-	vfree(bitmap);
+	kvfree(bitmap);
 	if (ret)
 		btrfs_abort_transaction(trans, root, ret);
 	return ret;

From 41f2d9905646b1cd4be5852b273c06b6a6d23a80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Thu, 21 Jan 2016 12:56:52 +0100
Subject: [PATCH 102/262] drm/amdgpu: fix next_rptr handling for debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That somehow got lost.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 78e9b0f14661..d1f234dd2126 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -487,7 +487,7 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
 	seq_printf(m, "rptr: 0x%08x [%5d]\n",
 		   rptr, rptr);
 
-	rptr_next = ~0;
+	rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
 
 	seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
 		   ring->wptr, ring->wptr);

From b186b4dcb79b1914c3dadb27ac72dafaa4267998 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 22 Jan 2016 11:41:05 +0100
Subject: [PATCH 103/262] ACPI: Revert "ACPI / video: Add Dell Inspiron 5737 to
 the blacklist"

The quirk to get "acpi_backlight=vendor" behavior by default on the
Dell Inspiron 5737 was added before we started doing
"acpi_backlight=native" by default on Win8 ready machines.

Since we now avoid using acpi-video as backlight driver on these machines
by default (using the native driver instead) we no longer need this quirk.

Moreover the vendor driver does not work after a suspend/resume where
as the native driver does.

This reverts commit 08a56226d847 (ACPI / video: Add Dell Inspiron 5737
to the blacklist).

Link: https://bugzilla.kernel.org/show_bug.cgi?id=111061
Cc: 3.19+ <stable@vger.kernel.org> # 3.19+
Reported-and-tested-by: erusan@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/video_detect.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 90e2d54be526..1316ddd92fac 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -135,14 +135,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "UL30A"),
 		},
 	},
-	{
-	.callback = video_detect_force_vendor,
-	.ident = "Dell Inspiron 5737",
-	.matches = {
-		DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-		DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5737"),
-		},
-	},
 
 	/*
 	 * These models have a working acpi_video backlight control, and using

From d0e5fbb01a67e400e82fefe4896ea40c6447ab98 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Sat, 23 Jan 2016 08:05:33 +0800
Subject: [PATCH 104/262] block: fix bio splitting on max sectors

After commit e36f62042880(block: split bios to maxpossible length),
bio can be splitted in the middle of a vector entry, then it
is easy to split out one bio which size isn't aligned with block
size, especially when the block size is bigger than 512.

This patch fixes the issue by making the max io size aligned
to logical block size.

Fixes: e36f62042880(block: split bios to maxpossible length)
Reported-by: Stefan Haberland <sth@linux.vnet.ibm.com>
Cc: Keith Busch <keith.busch@intel.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-merge.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1699df5b0493..888a7fec81f7 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -70,6 +70,18 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,
 	return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs);
 }
 
+static inline unsigned get_max_io_size(struct request_queue *q,
+				       struct bio *bio)
+{
+	unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
+	unsigned mask = queue_logical_block_size(q) - 1;
+
+	/* aligned to logical block size */
+	sectors &= ~(mask >> 9);
+
+	return sectors;
+}
+
 static struct bio *blk_bio_segment_split(struct request_queue *q,
 					 struct bio *bio,
 					 struct bio_set *bs,
@@ -81,6 +93,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 	unsigned front_seg_size = bio->bi_seg_front_size;
 	bool do_split = true;
 	struct bio *new = NULL;
+	const unsigned max_sectors = get_max_io_size(q, bio);
 
 	bio_for_each_segment(bv, bio, iter) {
 		/*
@@ -90,20 +103,19 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 		if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
 			goto split;
 
-		if (sectors + (bv.bv_len >> 9) >
-				blk_max_size_offset(q, bio->bi_iter.bi_sector)) {
+		if (sectors + (bv.bv_len >> 9) > max_sectors) {
 			/*
 			 * Consider this a new segment if we're splitting in
 			 * the middle of this vector.
 			 */
 			if (nsegs < queue_max_segments(q) &&
-			    sectors < blk_max_size_offset(q,
-						bio->bi_iter.bi_sector)) {
+			    sectors < max_sectors) {
 				nsegs++;
-				sectors = blk_max_size_offset(q,
-						bio->bi_iter.bi_sector);
+				sectors = max_sectors;
 			}
-			goto split;
+			if (sectors)
+				goto split;
+			/* Make this single bvec as the 1st segment */
 		}
 
 		if (bvprvp && blk_queue_cluster(q)) {

From 2c3033a0664dfae91e1dee7fabac10f24354b958 Mon Sep 17 00:00:00 2001
From: Insu Yun <wuninsu@gmail.com>
Date: Sat, 23 Jan 2016 15:44:19 -0500
Subject: [PATCH 105/262] ACPI / PCI / hotplug: unlock in error path in
 acpiphp_enable_slot()

In acpiphp_enable_slot(), there is a missing unlock path
when error occurred.  It needs to be unlocked before returning
an error.

Signed-off-by: Insu Yun <wuninsu@gmail.com>
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/hotplug/acpiphp_glue.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index ff538568a617..0b3e0bfa7be5 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -953,8 +953,10 @@ int acpiphp_enable_slot(struct acpiphp_slot *slot)
 {
 	pci_lock_rescan_remove();
 
-	if (slot->flags & SLOT_IS_GOING_AWAY)
+	if (slot->flags & SLOT_IS_GOING_AWAY) {
+		pci_unlock_rescan_remove();
 		return -ENODEV;
+	}
 
 	/* configure all functions */
 	if (!(slot->flags & SLOT_ENABLED))

From 6220f4ebd7b4db499238c2dc91268a9c473fd01c Mon Sep 17 00:00:00 2001
From: Thorsten Leemhuis <linux@leemhuis.info>
Date: Sun, 17 Jan 2016 16:03:04 +0100
Subject: [PATCH 106/262] hwmon: (dell-smm) Blacklist Dell Studio XPS 8000
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since Linux 4.0 the CPU fan speed is going up and down on Dell Studio
XPS 8000 and 8100 for unknown reasons. The 8100 was already
blacklisted in commit a4b45b25f18d ("hwmon: (dell-smm) Blacklist
Dell Studio XPS 8100"). This patch blacklists the XPS 8000.

Without further debugging on the affected machine, it is not possible
to find the problem. For more details see
https://bugzilla.kernel.org/show_bug.cgi?id=100121

Signed-off-by: Thorsten Leemhuis <linux@leemhuis.info>
Acked-by: Pali Rohár <pali.rohar@gmail.com>
Cc: stable@vger.kernel.org # v4.0+, will need backport
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/dell-smm-hwmon.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index c8487894b312..c43318d3416e 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -930,6 +930,17 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
 MODULE_DEVICE_TABLE(dmi, i8k_dmi_table);
 
 static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
+	{
+		/*
+		 * CPU fan speed going up and down on Dell Studio XPS 8000
+		 * for unknown reasons.
+		 */
+		.ident = "Dell Studio XPS 8000",
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8000"),
+		},
+	},
 	{
 		/*
 		 * CPU fan speed going up and down on Dell Studio XPS 8100

From 19454462acb1bdef80542061bdc9b410e4ed1ff6 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 30 Dec 2015 12:59:08 +0800
Subject: [PATCH 107/262] usb: cdc-acm: handle unlinked urb in acm read
 callback

In current acm driver, the bulk-in callback function ignores the
URBs unlinked in usb core.

This causes unexpected data loss in some cases. For example,
runtime suspend entry will unlinked all urbs and set urb->status
to -ENOENT even those urbs might have data not processed yet.
Hence, data loss occurs.

This patch lets bulk-in callback function handle unlinked urbs
to avoid data loss.

Signed-off-by: Tang Jian Qiang <jianqiang.tang@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Cc: stable@vger.kernel.org
Acked-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 26ca4f910cb0..8cd193bbd044 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -428,7 +428,8 @@ static void acm_read_bulk_callback(struct urb *urb)
 		set_bit(rb->index, &acm->read_urbs_free);
 		dev_dbg(&acm->data->dev, "%s - non-zero urb status: %d\n",
 							__func__, status);
-		return;
+		if ((status != -ENOENT) || (urb->actual_length == 0))
+			return;
 	}
 
 	usb_mark_last_busy(acm->dev);

From ffdb1e369a73b380fce95b05f8498d92c43842b4 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 6 Jan 2016 15:10:04 +0800
Subject: [PATCH 108/262] usb: cdc-acm: send zero packet for intel 7260 modem

For Intel 7260 modem, it is needed for host side to send zero
packet if the BULK OUT size is equal to USB endpoint max packet
length. Otherwise, modem side may still wait for more data and
cannot give response to host side.

Signed-off-by: Konrad Leszczynski <konrad.leszczynski@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 6 ++++++
 drivers/usb/class/cdc-acm.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 8cd193bbd044..e4c70dce3e7c 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1405,6 +1405,8 @@ made_compressed_probe:
 				usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
 				NULL, acm->writesize, acm_write_bulk, snd);
 		snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+		if (quirks & SEND_ZERO_PACKET)
+			snd->urb->transfer_flags |= URB_ZERO_PACKET;
 		snd->instance = acm;
 	}
 
@@ -1862,6 +1864,10 @@ static const struct usb_device_id acm_ids[] = {
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
 		USB_CDC_ACM_PROTO_AT_CDMA) },
 
+	{ USB_DEVICE(0x1519, 0x0452), /* Intel 7260 modem */
+	.driver_info = SEND_ZERO_PACKET,
+	},
+
 	{ }
 };
 
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index dd9af38e7cda..ccfaba9ab4e4 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -134,3 +134,4 @@ struct acm {
 #define IGNORE_DEVICE			BIT(5)
 #define QUIRK_CONTROL_LINE_STATE	BIT(6)
 #define CLEAR_HALT_CONDITIONS		BIT(7)
+#define SEND_ZERO_PACKET		BIT(8)

From e912e685f372ab62a2405a1acd923597f524e94a Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Mon, 18 Jan 2016 15:45:18 +0100
Subject: [PATCH 109/262] cdc-acm:exclude Samsung phone 04e8:685d

This phone needs to be handled by a specialised firmware tool
and is reported to crash irrevocably if cdc-acm takes it.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
CC: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index e4c70dce3e7c..fa4e23930614 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1841,6 +1841,11 @@ static const struct usb_device_id acm_ids[] = {
 	},
 #endif
 
+	/*Samsung phone in firmware update mode */
+	{ USB_DEVICE(0x04e8, 0x685d),
+	.driver_info = IGNORE_DEVICE,
+	},
+
 	/* Exclude Infineon Flash Loader utility */
 	{ USB_DEVICE(0x058b, 0x0041),
 	.driver_info = IGNORE_DEVICE,

From d8f00cd685f5c8e0def8593e520a7fef12c22407 Mon Sep 17 00:00:00 2001
From: "Du, Changbin" <changbin.du@intel.com>
Date: Mon, 18 Jan 2016 21:02:42 +0800
Subject: [PATCH 110/262] usb: hub: do not clear BOS field during reset device

In function usb_reset_and_verify_device, the old BOS descriptor may
still be used before allocating a new one. (usb_unlocked_disable_lpm
function uses it under the situation that it fails to disable lpm.)
So we cannot set the udev->bos to NULL before that, just keep what it
was. It will be overwrite when allocating a new one.

Crash log:
BUG: unable to handle kernel NULL pointer dereference at
0000000000000010
IP: [<ffffffff8171f98d>] usb_enable_link_state+0x2d/0x2f0
Call Trace:
[<ffffffff8171ed5b>] ? usb_set_lpm_timeout+0x12b/0x140
[<ffffffff8171fcd1>] usb_enable_lpm+0x81/0xa0
[<ffffffff8171fdd8>] usb_disable_lpm+0xa8/0xc0
[<ffffffff8171fe1c>] usb_unlocked_disable_lpm+0x2c/0x50
[<ffffffff81723933>] usb_reset_and_verify_device+0xc3/0x710
[<ffffffff8172c4ed>] ? usb_sg_wait+0x13d/0x190
[<ffffffff81724743>] usb_reset_device+0x133/0x280
[<ffffffff8179ccd1>] usb_stor_port_reset+0x61/0x70
[<ffffffff8179cd68>] usb_stor_invoke_transport+0x88/0x520

Signed-off-by: Du, Changbin <changbin.du@intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 51b436918f78..350dcd9af5d8 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -5401,7 +5401,6 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
 	}
 
 	bos = udev->bos;
-	udev->bos = NULL;
 
 	for (i = 0; i < SET_CONFIG_TRIES; ++i) {
 
@@ -5494,8 +5493,11 @@ done:
 	usb_set_usb2_hardware_lpm(udev, 1);
 	usb_unlocked_enable_lpm(udev);
 	usb_enable_ltm(udev);
-	usb_release_bos_descriptor(udev);
-	udev->bos = bos;
+	/* release the new BOS descriptor allocated  by hub_port_init() */
+	if (udev->bos != bos) {
+		usb_release_bos_descriptor(udev);
+		udev->bos = bos;
+	}
 	return 0;
 
 re_enumerate:

From e256caa7d0515e301f8c8c6e7d1204a2b67b1381 Mon Sep 17 00:00:00 2001
From: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Date: Thu, 21 Jan 2016 21:45:31 +0530
Subject: [PATCH 111/262] powerpc/mm: Allow user space to map rtas_rmo_buf

With commit 90a545e9 (restrict /dev/mem to idle io memory ranges) mapping
rtas_rmo_buf from user space is failing. Hence we are not able to make
RTAS syscall.

This patch calls page_is_rtas_user_buf before calling iomem_is_exclusive
in  devmem_is_allowed(). This will allow user space to map rtas_rmo_buf
and we are able to make RTAS syscall.

Reported-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
CC: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 22d94c3e6fc4..d0f0a514b04e 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -560,12 +560,12 @@ subsys_initcall(add_system_ram_resources);
  */
 int devmem_is_allowed(unsigned long pfn)
 {
+	if (page_is_rtas_user_buf(pfn))
+		return 1;
 	if (iomem_is_exclusive(PFN_PHYS(pfn)))
 		return 0;
 	if (!page_is_ram(pfn))
 		return 1;
-	if (page_is_rtas_user_buf(pfn))
-		return 1;
 	return 0;
 }
 #endif /* CONFIG_STRICT_DEVMEM */

From f487c54ddd544e1c9172cd510954f697b77b76e3 Mon Sep 17 00:00:00 2001
From: Peter Dedecker <peter.dedecker@hotmail.com>
Date: Fri, 8 Jan 2016 12:34:41 +0100
Subject: [PATCH 112/262] USB: cp210x: add ID for IAI USB to RS485 adaptor

Added the USB serial console device ID for IAI Corp. RCB-CV-USB
USB to RS485 adaptor.

Signed-off-by: Peter Dedecker <peter.dedecker@hotmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/cp210x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 9b90ad747d87..987813b8a7f9 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -99,6 +99,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x81AC) }, /* MSD Dash Hawk */
 	{ USB_DEVICE(0x10C4, 0x81AD) }, /* INSYS USB Modem */
 	{ USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */
+	{ USB_DEVICE(0x10C4, 0x81D7) }, /* IAI Corp. RCB-CV-USB USB to RS485 Adaptor */
 	{ USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */
 	{ USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */
 	{ USB_DEVICE(0x10C4, 0x81E8) }, /* Zephyr Bioharness */

From cac9b50b0d75a1d50d6c056ff65c005f3224c8e0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 12 Jan 2016 12:05:20 +0100
Subject: [PATCH 113/262] USB: visor: fix null-deref at probe

Fix null-pointer dereference at probe should a (malicious) Treo device
lack the expected endpoints.

Specifically, the Treo port-setup hack was dereferencing the bulk-in and
interrupt-in urbs without first making sure they had been allocated by
core.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/visor.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 60afb39eb73c..c53fbb3e0b8c 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -544,6 +544,11 @@ static int treo_attach(struct usb_serial *serial)
 		(serial->num_interrupt_in == 0))
 		return 0;
 
+	if (serial->num_bulk_in < 2 || serial->num_interrupt_in < 2) {
+		dev_err(&serial->interface->dev, "missing endpoints\n");
+		return -ENODEV;
+	}
+
 	/*
 	* It appears that Treos and Kyoceras want to use the
 	* 1st bulk in endpoint to communicate with the 2nd bulk out endpoint,

From cb3232138e37129e88240a98a1d2aba2187ff57c Mon Sep 17 00:00:00 2001
From: Vladis Dronov <vdronov@redhat.com>
Date: Tue, 12 Jan 2016 15:10:50 +0100
Subject: [PATCH 114/262] USB: serial: visor: fix crash on detecting device
 without write_urbs

The visor driver crashes in clie_5_attach() when a specially crafted USB
device without bulk-out endpoint is detected. This fix adds a check that
the device has proper configuration expected by the driver.

Reported-by: Ralf Spenneberg <ralf@spenneberg.net>
Signed-off-by: Vladis Dronov <vdronov@redhat.com>
Fixes: cfb8da8f69b8 ("USB: visor: fix initialisation of UX50/TH55 devices")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/visor.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index c53fbb3e0b8c..337a0be89fcf 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -602,8 +602,10 @@ static int clie_5_attach(struct usb_serial *serial)
 	 */
 
 	/* some sanity check */
-	if (serial->num_ports < 2)
-		return -1;
+	if (serial->num_bulk_out < 2) {
+		dev_err(&serial->interface->dev, "missing bulk out endpoints\n");
+		return -ENODEV;
+	}
 
 	/* port 0 now uses the modified endpoint Address */
 	port = serial->port[0];

From ff4e2494dc17b173468e1713fdf6237fd8578bc7 Mon Sep 17 00:00:00 2001
From: Daniele Palmas <dnlplm@gmail.com>
Date: Tue, 12 Jan 2016 17:22:06 +0100
Subject: [PATCH 115/262] USB: serial: option: Adding support for Telit LE922

This patch adds support for two PIDs of LE922.

Signed-off-by: Daniele Palmas <dnlplm@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index f2280606b73c..75350fc550a1 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -268,6 +268,8 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_CC864_SINGLE		0x1006
 #define TELIT_PRODUCT_DE910_DUAL		0x1010
 #define TELIT_PRODUCT_UE910_V2			0x1012
+#define TELIT_PRODUCT_LE922_USBCFG0		0x1042
+#define TELIT_PRODUCT_LE922_USBCFG3		0x1043
 #define TELIT_PRODUCT_LE920			0x1200
 #define TELIT_PRODUCT_LE910			0x1201
 
@@ -615,6 +617,16 @@ static const struct option_blacklist_info telit_le920_blacklist = {
 	.reserved = BIT(1) | BIT(5),
 };
 
+static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = {
+	.sendsetup = BIT(2),
+	.reserved = BIT(0) | BIT(1) | BIT(3),
+};
+
+static const struct option_blacklist_info telit_le922_blacklist_usbcfg3 = {
+	.sendsetup = BIT(0),
+	.reserved = BIT(1) | BIT(2) | BIT(3),
+};
+
 static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1160,6 +1172,10 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_SINGLE) },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_DE910_DUAL) },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0),
+		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3),
+		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
 		.driver_info = (kernel_ulong_t)&telit_le910_blacklist },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),

From 599151336638d57b98d92338aa59c048e3a3e97d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 25 Jan 2016 11:01:47 +0100
Subject: [PATCH 116/262] ALSA: seq: Fix incorrect sanity check at
 snd_seq_oss_synth_cleanup()

ALSA sequencer OSS emulation code has a sanity check for currently
opened devices, but there is a thinko there, eventually it spews
warnings and skips the operation wrongly like:
  WARNING: CPU: 1 PID: 7573 at sound/core/seq/oss/seq_oss_synth.c:311

Fix this off-by-one error.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/oss/seq_oss_synth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c
index 0f3b38184fe5..b16dbef04174 100644
--- a/sound/core/seq/oss/seq_oss_synth.c
+++ b/sound/core/seq/oss/seq_oss_synth.c
@@ -308,7 +308,7 @@ snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp)
 	struct seq_oss_synth *rec;
 	struct seq_oss_synthinfo *info;
 
-	if (snd_BUG_ON(dp->max_synthdev >= SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
+	if (snd_BUG_ON(dp->max_synthdev > SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
 		return;
 	for (i = 0; i < dp->max_synthdev; i++) {
 		info = &dp->synths[i];

From da10816e3d923565b470fec78a674baba794ed33 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 25 Jan 2016 11:24:56 +0100
Subject: [PATCH 117/262] ALSA: seq: Degrade the error message for too many
 opens

ALSA OSS sequencer spews a kernel error message ("ALSA: seq_oss: too
many applications") when user-space tries to open more than the
limit.  This means that it can easily fill the log buffer.

Since it's merely a normal error, it's safe to suppress it via
pr_debug() instead.

Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/oss/seq_oss_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c
index b1221b29728e..6779e82b46dd 100644
--- a/sound/core/seq/oss/seq_oss_init.c
+++ b/sound/core/seq/oss/seq_oss_init.c
@@ -202,7 +202,7 @@ snd_seq_oss_open(struct file *file, int level)
 
 	dp->index = i;
 	if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) {
-		pr_err("ALSA: seq_oss: too many applications\n");
+		pr_debug("ALSA: seq_oss: too many applications\n");
 		rc = -ENOMEM;
 		goto _error;
 	}

From e03cdf22a2727c60307be6a729233edab3bfda9c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 19 Jan 2016 23:43:13 -0800
Subject: [PATCH 118/262] USB: serial: ftdi_sio: add support for Yaesu SCU-18
 cable

Harald Linden reports that the ftdi_sio driver works properly for the
Yaesu SCU-18 cable if the device ids are added to the driver.  So let's
add them.

Reported-by: Harald Linden <harald.linden@7183.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ftdi_sio.c     | 1 +
 drivers/usb/serial/ftdi_sio_ids.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index a5a0376bbd48..8c660ae401d8 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -824,6 +824,7 @@ static const struct usb_device_id id_table_combined[] = {
 	{ USB_DEVICE(FTDI_VID, FTDI_TURTELIZER_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
 	{ USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) },
+	{ USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_SCU18) },
 	{ USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) },
 
 	/* Papouch devices based on FTDI chip */
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 67c6d4469730..a84df2513994 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -615,6 +615,7 @@
  */
 #define RATOC_VENDOR_ID		0x0584
 #define RATOC_PRODUCT_ID_USB60F	0xb020
+#define RATOC_PRODUCT_ID_SCU18	0xb03a
 
 /*
  * Infineon Technologies

From 75feee3d9d51775072d3a04f47d4a439a4c4590e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 15 Jan 2016 13:28:57 +0100
Subject: [PATCH 119/262] arm64: hide __efistub_ aliases from kallsyms

Commit e8f3010f7326 ("arm64/efi: isolate EFI stub from the kernel
proper") isolated the EFI stub code from the kernel proper by prefixing
all of its symbols with __efistub_, and selectively allowing access to
core kernel symbols from the stub by emitting __efistub_ aliases for
functions and variables that the stub can access legally.

As an unintended side effect, these aliases are emitted into the
kallsyms symbol table, which means they may turn up in backtraces,
e.g.,

  ...
  PC is at __efistub_memset+0x108/0x200
  LR is at fixup_init+0x3c/0x48
  ...
  [<ffffff8008328608>] __efistub_memset+0x108/0x200
  [<ffffff8008094dcc>] free_initmem+0x2c/0x40
  [<ffffff8008645198>] kernel_init+0x20/0xe0
  [<ffffff8008085cd0>] ret_from_fork+0x10/0x40

The backtrace in question has nothing to do with the EFI stub, but
simply returns one of the several aliases of memset() that have been
recorded in the kallsyms table. This is undesirable, since it may
suggest to people who are not aware of this that the issue they are
seeing is somehow EFI related.

So hide the __efistub_ aliases from kallsyms, by emitting them as
absolute linker symbols explicitly. The distinction between those
and section relative symbols is completely irrelevant to these
definitions, and to the final link we are performing when these
definitions are being taken into account (the distinction is only
relevant to symbols defined inside a section definition when performing
a partial link), and so the resulting values are identical to the
original ones. Since absolute symbols are ignored by kallsyms, this
will result in these values to be omitted from its symbol table.

After this patch, the backtrace generated from the same address looks
like this:
  ...
  PC is at __memset+0x108/0x200
  LR is at fixup_init+0x3c/0x48
  ...
  [<ffffff8008328608>] __memset+0x108/0x200
  [<ffffff8008094dcc>] free_initmem+0x2c/0x40
  [<ffffff8008645198>] kernel_init+0x20/0xe0
  [<ffffff8008085cd0>] ret_from_fork+0x10/0x40

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/image.h | 40 ++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index bc2abb8b1599..999633bd7294 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -64,6 +64,16 @@
 
 #ifdef CONFIG_EFI
 
+/*
+ * Prevent the symbol aliases below from being emitted into the kallsyms
+ * table, by forcing them to be absolute symbols (which are conveniently
+ * ignored by scripts/kallsyms) rather than section relative symbols.
+ * The distinction is only relevant for partial linking, and only for symbols
+ * that are defined within a section declaration (which is not the case for
+ * the definitions below) so the resulting values will be identical.
+ */
+#define KALLSYMS_HIDE(sym)	ABSOLUTE(sym)
+
 /*
  * The EFI stub has its own symbol namespace prefixed by __efistub_, to
  * isolate it from the kernel proper. The following symbols are legally
@@ -73,25 +83,25 @@
  * linked at. The routines below are all implemented in assembler in a
  * position independent manner
  */
-__efistub_memcmp		= __pi_memcmp;
-__efistub_memchr		= __pi_memchr;
-__efistub_memcpy		= __pi_memcpy;
-__efistub_memmove		= __pi_memmove;
-__efistub_memset		= __pi_memset;
-__efistub_strlen		= __pi_strlen;
-__efistub_strcmp		= __pi_strcmp;
-__efistub_strncmp		= __pi_strncmp;
-__efistub___flush_dcache_area	= __pi___flush_dcache_area;
+__efistub_memcmp		= KALLSYMS_HIDE(__pi_memcmp);
+__efistub_memchr		= KALLSYMS_HIDE(__pi_memchr);
+__efistub_memcpy		= KALLSYMS_HIDE(__pi_memcpy);
+__efistub_memmove		= KALLSYMS_HIDE(__pi_memmove);
+__efistub_memset		= KALLSYMS_HIDE(__pi_memset);
+__efistub_strlen		= KALLSYMS_HIDE(__pi_strlen);
+__efistub_strcmp		= KALLSYMS_HIDE(__pi_strcmp);
+__efistub_strncmp		= KALLSYMS_HIDE(__pi_strncmp);
+__efistub___flush_dcache_area	= KALLSYMS_HIDE(__pi___flush_dcache_area);
 
 #ifdef CONFIG_KASAN
-__efistub___memcpy		= __pi_memcpy;
-__efistub___memmove		= __pi_memmove;
-__efistub___memset		= __pi_memset;
+__efistub___memcpy		= KALLSYMS_HIDE(__pi_memcpy);
+__efistub___memmove		= KALLSYMS_HIDE(__pi_memmove);
+__efistub___memset		= KALLSYMS_HIDE(__pi_memset);
 #endif
 
-__efistub__text			= _text;
-__efistub__end			= _end;
-__efistub__edata		= _edata;
+__efistub__text			= KALLSYMS_HIDE(_text);
+__efistub__end			= KALLSYMS_HIDE(_end);
+__efistub__edata		= KALLSYMS_HIDE(_edata);
 
 #endif
 

From 7b1af9795773d745c2a8c7d4ca5f2936e8b6adfb Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 11 Jan 2016 14:50:21 +0100
Subject: [PATCH 120/262] arm64: kasan: ensure that the KASAN zero page is
 mapped read-only

When switching from the early KASAN shadow region, which maps the
entire shadow space read-write, to the permanent KASAN shadow region,
which uses a zero page to shadow regions that are not subject to
instrumentation, the lowest level table kasan_zero_pte[] may be
reused unmodified, which means that the mappings of the zero page
that it contains will still be read-write.

So update it explicitly to map the zero page read only when we
activate the permanent mapping.

Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/kasan_init.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cf038c7d9fa9..cab7a5be40aa 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -120,6 +120,7 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1)
 void __init kasan_init(void)
 {
 	struct memblock_region *reg;
+	int i;
 
 	/*
 	 * We are going to perform proper setup of shadow memory.
@@ -155,6 +156,14 @@ void __init kasan_init(void)
 				pfn_to_nid(virt_to_pfn(start)));
 	}
 
+	/*
+	 * KAsan may reuse the contents of kasan_zero_pte directly, so we
+	 * should make sure that it maps the zero page read-only.
+	 */
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		set_pte(&kasan_zero_pte[i],
+			pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+
 	memset(kasan_zero_page, 0, PAGE_SIZE);
 	cpu_set_ttbr1(__pa(swapper_pg_dir));
 	flush_tlb_all();

From f436b2ac90a095746beb6729b8ee8ed87c9eaede Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Wed, 13 Jan 2016 14:50:03 +0000
Subject: [PATCH 121/262] arm64: kernel: fix architected PMU registers
 unconditional access

The Performance Monitors extension is an optional feature of the
AArch64 architecture, therefore, in order to access Performance
Monitors registers safely, the kernel should detect the architected
PMU unit presence through the ID_AA64DFR0_EL1 register PMUVer field
before accessing them.

This patch implements a guard by reading the ID_AA64DFR0_EL1 register
PMUVer field to detect the architected PMU presence and prevent accessing
PMU system registers if the Performance Monitors extension is not
implemented in the core.

Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: <stable@vger.kernel.org>
Fixes: 60792ad349f3 ("arm64: kernel: enforce pmuserenr_el0 initialization and restore")
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/head.S    |  5 +++++
 arch/arm64/mm/proc-macros.S | 12 ++++++++++++
 arch/arm64/mm/proc.S        |  4 ++--
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index ffe9c2b6431b..917d98108b3f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -514,9 +514,14 @@ CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
 #endif
 
 	/* EL2 debug */
+	mrs	x0, id_aa64dfr0_el1		// Check ID_AA64DFR0_EL1 PMUVer
+	sbfx	x0, x0, #8, #4
+	cmp	x0, #1
+	b.lt	4f				// Skip if no PMU present
 	mrs	x0, pmcr_el0			// Disable debug access traps
 	ubfx	x0, x0, #11, #5			// to EL2 and allow access to
 	msr	mdcr_el2, x0			// all PMU counters from EL1
+4:
 
 	/* Stage-2 translation */
 	msr	vttbr_el2, xzr
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 146bd99a7532..e6a30e1268a8 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -84,3 +84,15 @@
 	b.lo	9998b
 	dsb	\domain
 	.endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+	.macro	reset_pmuserenr_el0, tmpreg
+	mrs	\tmpreg, id_aa64dfr0_el1	// Check ID_AA64DFR0_EL1 PMUVer
+	sbfx	\tmpreg, \tmpreg, #8, #4
+	cmp	\tmpreg, #1			// Skip if no PMU present
+	b.lt	9000f
+	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
+9000:
+	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a3d867e723b4..c164d2cb35c0 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -117,7 +117,7 @@ ENTRY(cpu_do_resume)
 	 */
 	ubfx	x11, x11, #1, #1
 	msr	oslar_el1, x11
-	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
+	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
 	mov	x0, x12
 	dsb	nsh		// Make sure local tlb invalidation completed
 	isb
@@ -154,7 +154,7 @@ ENTRY(__cpu_setup)
 	msr	cpacr_el1, x0			// Enable FP/ASIMD
 	mov	x0, #1 << 12			// Reset mdscr_el1 and disable
 	msr	mdscr_el1, x0			// access to the DCC from EL0
-	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
+	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
 	/*
 	 * Memory region attributes for LPAE:
 	 *

From ac15bd63bbb24238f763ec5b24ee175ec301e8cd Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 7 Jan 2016 16:07:20 +0000
Subject: [PATCH 122/262] arm64: Honour !PTE_WRITE in set_pte_at() for kernel
 mappings

Currently, set_pte_at() only checks the software PTE_WRITE bit for user
mappings when it sets or clears the hardware PTE_RDONLY accordingly. The
kernel ptes are written directly without any modification, relying
solely on the protection bits in macros like PAGE_KERNEL. However,
modifying kernel pte attributes via pte_wrprotect() would be ignored by
set_pte_at(). Since pte_wrprotect() does not set PTE_RDONLY (it only
clears PTE_WRITE), the new permission is not taken into account.

This patch changes set_pte_at() to adjust the read-only permission for
kernel ptes as well. As a side effect, existing PROT_* definitions used
for kernel ioremap*() need to include PTE_DIRTY | PTE_WRITE.

(additionally, white space fix for PTE_KERNEL_ROX)

Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 2d545d7aa80b..bf464de33f52 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -67,11 +67,11 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 #define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
-#define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -81,7 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 
 #define PAGE_KERNEL		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_RO		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX		__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
@@ -153,6 +153,7 @@ extern struct page *empty_zero_page;
 #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
+#define pte_user(pte)		(!!(pte_val(pte) & PTE_USER))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -163,8 +164,6 @@ extern struct page *empty_zero_page;
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_user(pte) \
-	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 #define pte_valid_not_user(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
 #define pte_valid_young(pte) \
@@ -278,13 +277,13 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
-	if (pte_valid_user(pte)) {
-		if (!pte_special(pte) && pte_exec(pte))
-			__sync_icache_dcache(pte, addr);
+	if (pte_valid(pte)) {
 		if (pte_sw_dirty(pte) && pte_write(pte))
 			pte_val(pte) &= ~PTE_RDONLY;
 		else
 			pte_val(pte) |= PTE_RDONLY;
+		if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+			__sync_icache_dcache(pte, addr);
 	}
 
 	/*

From 028635d6b5b42de0e0fe5f5c92e1b99868e81a29 Mon Sep 17 00:00:00 2001
From: Mathieu OTHACEHE <m.othacehe@gmail.com>
Date: Mon, 4 Jan 2016 19:49:36 +0100
Subject: [PATCH 123/262] USB: mxu11x0: fix memory leak on usb_serial private
 data

On nominal execution, private data allocated on port_probe and attach
are never freed. Add port_remove and release callbacks to free them
respectively.

Signed-off-by: Mathieu OTHACEHE <m.othacehe@gmail.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/mxu11x0.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/usb/serial/mxu11x0.c b/drivers/usb/serial/mxu11x0.c
index e3c3f57c2d82..619607323bfd 100644
--- a/drivers/usb/serial/mxu11x0.c
+++ b/drivers/usb/serial/mxu11x0.c
@@ -368,6 +368,16 @@ static int mxu1_port_probe(struct usb_serial_port *port)
 	return 0;
 }
 
+static int mxu1_port_remove(struct usb_serial_port *port)
+{
+	struct mxu1_port *mxport;
+
+	mxport = usb_get_serial_port_data(port);
+	kfree(mxport);
+
+	return 0;
+}
+
 static int mxu1_startup(struct usb_serial *serial)
 {
 	struct mxu1_device *mxdev;
@@ -427,6 +437,14 @@ err_free_mxdev:
 	return err;
 }
 
+static void mxu1_release(struct usb_serial *serial)
+{
+	struct mxu1_device *mxdev;
+
+	mxdev = usb_get_serial_data(serial);
+	kfree(mxdev);
+}
+
 static int mxu1_write_byte(struct usb_serial_port *port, u32 addr,
 			   u8 mask, u8 byte)
 {
@@ -957,7 +975,9 @@ static struct usb_serial_driver mxu11x0_device = {
 	.id_table		= mxu1_idtable,
 	.num_ports		= 1,
 	.port_probe             = mxu1_port_probe,
+	.port_remove            = mxu1_port_remove,
 	.attach			= mxu1_startup,
+	.release                = mxu1_release,
 	.open			= mxu1_open,
 	.close			= mxu1_close,
 	.ioctl			= mxu1_ioctl,

From b3122023df935cf14bf951da98ca598d71b9f826 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Sun, 24 Jan 2016 15:24:12 +0900
Subject: [PATCH 124/262] arm64: Fix an enum typo in mm/dump.c

This patch fixes a typo in mm/dump.c:
"MODUELS_END_NR" should be "MODULES_END_NR".

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/dump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 5a22a119a74c..0adbebbc2803 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -46,7 +46,7 @@ enum address_markers_idx {
 	PCI_START_NR,
 	PCI_END_NR,
 	MODULES_START_NR,
-	MODUELS_END_NR,
+	MODULES_END_NR,
 	KERNEL_SPACE_NR,
 };
 

From 4152b387da81617c80cb2946b2d56e3958906b3e Mon Sep 17 00:00:00 2001
From: John Ernberg <john.ernberg@actia.se>
Date: Mon, 25 Jan 2016 12:27:17 +0000
Subject: [PATCH 125/262] USB: option: fix Cinterion AHxx enumeration

In certain kernel configurations where the cdc_ether and option drivers
are compiled as modules there can occur a race condition in enumeration.
This causes the option driver to enumerate the ethernet(wwan) interface
as usb-serial interfaces.

usb-devices output for the modem:
T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  5 Spd=480 MxCh= 0
D:  Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=1e2d ProdID=0055 Rev=00.00
S:  Manufacturer=Cinterion
S:  Product=AHx
C:  #Ifs= 6 Cfg#= 1 Atr=e0 MxPwr=10mA
I:  If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 4 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether
I:  If#= 5 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether

Signed-off-by: John Ernberg <john.ernberg@actia.se>
Fixes: 1941138e1c02 ("USB: added support for Cinterion's products...")
Cc: stable <stable@vger.kernel.org>	# v3.9: 8ff10bdb14a52
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 75350fc550a1..db86e512e0fc 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1695,7 +1695,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EU3_P) },
 	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8),
 		.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
-	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX) },
+	{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX, 0xff) },
 	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PLXX),
 		.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, 

From 23a9d5dcb6ae114733fac4757b0b154571c21129 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Fri, 8 Jan 2016 11:52:04 -0200
Subject: [PATCH 126/262] drm/etnaviv: remove owner assignment from
 platform_driver

This platform_driver does not need to set an owner as it will be
populated by the driver core.

Generated by scripts/coccinelle/api/platform_no_drv_owner.cocci.

Signed-off-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_drv.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 5c89ebb52fd2..e8858985f01e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -668,7 +668,6 @@ static struct platform_driver etnaviv_platform_driver = {
 	.probe      = etnaviv_pdev_probe,
 	.remove     = etnaviv_pdev_remove,
 	.driver     = {
-		.owner  = THIS_MODULE,
 		.name   = "etnaviv",
 		.of_match_table = dt_match,
 	},

From 339073ef77e45e87ec4cc8671b2d2328dcfd31f0 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 22 Jan 2016 12:03:03 +0100
Subject: [PATCH 127/262] drm/etnaviv: hold object lock while getting pages for
 coredump

While all objects that get coredumped have an active IOVA and thus
pages already populated, etnaviv_gem_get_pages() still requires the
object lock to be held.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_dump.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index bf8fa859e8be..fd7d3e989e79 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -201,7 +201,9 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 
 		obj = vram->object;
 
+		mutex_lock(&obj->lock);
 		pages = etnaviv_gem_get_pages(obj);
+		mutex_unlock(&obj->lock);
 		if (pages) {
 			int j;
 

From f6427760a29ba3fd968e27ea567b1ebdd500740b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 24 Jan 2016 17:32:13 +0000
Subject: [PATCH 128/262] drm/etnaviv: fix failure path if model is zero

Fix the failure path to call pm_runtime_mark_last_busy() when failing
due to the model field being zero.

Acked-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 056a72e6ed26..e0e68dd7565d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -511,8 +511,8 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 
 	if (gpu->identity.model == 0) {
 		dev_err(gpu->dev, "Unknown GPU model\n");
-		pm_runtime_put_autosuspend(gpu->dev);
-		return -ENXIO;
+		ret = -ENXIO;
+		goto fail;
 	}
 
 	ret = etnaviv_hw_reset(gpu);

From 6b20e0ad2e39b77b7d4a48cc06c4de39768c7bb8 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Mon, 25 Jan 2016 16:30:22 +0100
Subject: [PATCH 129/262] btrfs: add free space tree to lockdep classes

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c67c129fe99a..ca78bf2ed287 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -176,6 +176,7 @@ static struct btrfs_lockdep_keyset {
 	{ .id = BTRFS_TREE_RELOC_OBJECTID,	.name_stem = "treloc"	},
 	{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID,	.name_stem = "dreloc"	},
 	{ .id = BTRFS_UUID_TREE_OBJECTID,	.name_stem = "uuid"	},
+	{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID,	.name_stem = "free-space" },
 	{ .id = 0,				.name_stem = "tree"	},
 };
 

From 3e4c5efbb3ac7c9c4fb5f33b659fa98afe568ab1 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Mon, 25 Jan 2016 16:47:10 +0100
Subject: [PATCH 130/262] btrfs: add free space tree to the cow-only list

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/relocation.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ef6d8fc85853..c5540302402d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
 	    root_objectid == BTRFS_TREE_LOG_OBJECTID ||
 	    root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
 	    root_objectid == BTRFS_UUID_TREE_OBJECTID ||
-	    root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
+	    root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
+	    root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
 		return 1;
 	return 0;
 }

From 186bac815227a4c26a0ad2f18c7450015d93ed0a Mon Sep 17 00:00:00 2001
From: Matthew Dawson <matthew@mjdsystems.ca>
Date: Mon, 25 Jan 2016 10:34:12 -0500
Subject: [PATCH 131/262] drm/radeon: Ensure radeon bo is unreserved in
 radeon_gem_va_ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Found with lockdep while testing gpu reset.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Matthew Dawson <matthew@mjdsystems.ca>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_gem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 3dcc5733ff69..e26c963f2e93 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -663,6 +663,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
 	bo_va = radeon_vm_bo_find(&fpriv->vm, rbo);
 	if (!bo_va) {
 		args->operation = RADEON_VA_RESULT_ERROR;
+		radeon_bo_unreserve(rbo);
 		drm_gem_object_unreference_unlocked(gobj);
 		return -ENOENT;
 	}

From 7fd13615992ae0fc132c9abb24511be43f3b5d9d Mon Sep 17 00:00:00 2001
From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Date: Thu, 21 Jan 2016 09:48:14 -0200
Subject: [PATCH 132/262] tracing/dma-buf/fence: Fix timeline str value on
 fence_annotate_wait_on

timeline was wrongly assigned with ->get_driver_name().

Link: http://lkml.kernel.org/r/1453376895-30747-1-git-send-email-gustavo@padovan.org

Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/fence.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/trace/events/fence.h b/include/trace/events/fence.h
index 98feb1b82896..d6dfa05ba322 100644
--- a/include/trace/events/fence.h
+++ b/include/trace/events/fence.h
@@ -17,7 +17,7 @@ TRACE_EVENT(fence_annotate_wait_on,
 
 	TP_STRUCT__entry(
 		__string(driver, fence->ops->get_driver_name(fence))
-		__string(timeline, fence->ops->get_driver_name(fence))
+		__string(timeline, fence->ops->get_timeline_name(fence))
 		__field(unsigned int, context)
 		__field(unsigned int, seqno)
 

From 462b3f161beb62eeb290f4ec52f5ead29a2f8ac7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 25 Jan 2016 13:59:21 +0100
Subject: [PATCH 133/262] ALSA: compress: Disable GET_CODEC_CAPS ioctl for some
 architectures

Some architectures like PowerPC can handle the maximum struct size in
an ioctl only up to 13 bits, and struct snd_compr_codec_caps used by
SNDRV_COMPRESS_GET_CODEC_CAPS ioctl overflows this limit.  This
problem was revealed recently by a powerpc change, as it's now treated
as a fatal build error.

This patch is a stop-gap for that: for architectures with less than 14
bit ioctl struct size, get rid of the handling of the relevant ioctl.
We should provide an alternative equivalent ioctl code later, but for
now just paper over it.  Luckily, the compress API hasn't been used on
such architectures, so the impact must be effectively zero.

Reviewed-by: Mark Brown <broonie@kernel.org>
Acked-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/compress_offload.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 18b8dc45bb8f..7fac3cae8abd 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -46,6 +46,13 @@
 #include <sound/compress_offload.h>
 #include <sound/compress_driver.h>
 
+/* struct snd_compr_codec_caps overflows the ioctl bit size for some
+ * architectures, so we need to disable the relevant ioctls.
+ */
+#if _IOC_SIZEBITS < 14
+#define COMPR_CODEC_CAPS_OVERFLOW
+#endif
+
 /* TODO:
  * - add substream support for multiple devices in case of
  *	SND_DYNAMIC_MINORS is not used
@@ -440,6 +447,7 @@ out:
 	return retval;
 }
 
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
 static int
 snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg)
 {
@@ -463,6 +471,7 @@ out:
 	kfree(caps);
 	return retval;
 }
+#endif /* !COMPR_CODEC_CAPS_OVERFLOW */
 
 /* revisit this with snd_pcm_preallocate_xxx */
 static int snd_compr_allocate_buffer(struct snd_compr_stream *stream,
@@ -801,9 +810,11 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 	case _IOC_NR(SNDRV_COMPRESS_GET_CAPS):
 		retval = snd_compr_get_caps(stream, arg);
 		break;
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
 	case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS):
 		retval = snd_compr_get_codec_caps(stream, arg);
 		break;
+#endif
 	case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS):
 		retval = snd_compr_set_params(stream, arg);
 		break;

From de0ee0edb21fbab4c7afa3e94573ecfebfb0244e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 21 Jan 2016 10:17:54 +0000
Subject: [PATCH 134/262] Btrfs: fix race between fsync and lockless direct IO
 writes

An fsync, using the fast path, can race with a concurrent lockless direct
IO write and end up logging a file extent item that points to an extent
that wasn't written to yet. This is because the fast fsync path collects
ordered extents into a local list and then collects all the new extent
maps to log file extent items based on them, while the direct IO write
path creates the new extent map before it creates the corresponding
ordered extent (and submitting the respective bio(s)).

So fix this by making the direct IO write path create ordered extents
before the extent maps and make the fast fsync path collect any new
ordered extents after it collects the extent maps.
Note that making the fsync handler call inode_dio_wait() (after acquiring
the inode's i_mutex) would not work and lead to a deadlock when doing
AIO, as through AIO we end up in a path where the fsync handler is called
(through dio_aio_complete_work() -> dio_complete() -> vfs_fsync_range())
before the inode's dio counter is decremented (inode_dio_wait() waits
for this counter to have a value of zero).

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/inode.c    | 36 ++++++++++++++++++++++++++++--------
 fs/btrfs/tree-log.c | 14 +++++++++++---
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b8bb7591ff9f..e4565456eb01 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7115,21 +7115,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
 	if (ret)
 		return ERR_PTR(ret);
 
-	em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
-			      ins.offset, ins.offset, ins.offset, 0);
-	if (IS_ERR(em)) {
-		btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-		return em;
-	}
-
+	/*
+	 * Create the ordered extent before the extent map. This is to avoid
+	 * races with the fast fsync path that would lead to it logging file
+	 * extent items that point to disk extents that were not yet written to.
+	 * The fast fsync path collects ordered extents into a local list and
+	 * then collects all the new extent maps, so we must create the ordered
+	 * extent first and make sure the fast fsync path collects any new
+	 * ordered extents after collecting new extent maps as well.
+	 * The fsync path simply can not rely on inode_dio_wait() because it
+	 * causes deadlock with AIO.
+	 */
 	ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
 					   ins.offset, ins.offset, 0);
 	if (ret) {
 		btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-		free_extent_map(em);
 		return ERR_PTR(ret);
 	}
 
+	em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+			      ins.offset, ins.offset, ins.offset, 0);
+	if (IS_ERR(em)) {
+		struct btrfs_ordered_extent *oe;
+
+		btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
+		oe = btrfs_lookup_ordered_extent(inode, start);
+		ASSERT(oe);
+		if (WARN_ON(!oe))
+			return em;
+		set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
+		set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
+		btrfs_remove_ordered_extent(inode, oe);
+		/* Once for our lookup and once for the ordered extents tree. */
+		btrfs_put_ordered_extent(oe);
+		btrfs_put_ordered_extent(oe);
+	}
 	return em;
 }
 
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 323e12cc9d2f..978c3a810893 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 				     struct inode *inode,
 				     struct btrfs_path *path,
 				     struct list_head *logged_list,
-				     struct btrfs_log_ctx *ctx)
+				     struct btrfs_log_ctx *ctx,
+				     const u64 start,
+				     const u64 end)
 {
 	struct extent_map *em, *n;
 	struct list_head extents;
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 	}
 
 	list_sort(NULL, &extents, extent_cmp);
-
+	/*
+	 * Collect any new ordered extents within the range. This is to
+	 * prevent logging file extent items without waiting for the disk
+	 * location they point to being written. We do this only to deal
+	 * with races against concurrent lockless direct IO writes.
+	 */
+	btrfs_get_logged_extents(inode, logged_list, start, end);
 process:
 	while (!list_empty(&extents)) {
 		em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@ log_extents:
 			goto out_unlock;
 		}
 		ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
-						&logged_list, ctx);
+						&logged_list, ctx, start, end);
 		if (ret) {
 			err = ret;
 			goto out_unlock;

From 0a95b851370b84a4b9d92ee6d1fa0926901d0454 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Date: Fri, 22 Jan 2016 09:28:38 +0800
Subject: [PATCH 135/262] btrfs: async-thread: Fix a use-after-free error for
 trace

Parameter of trace_btrfs_work_queued() can be freed in its workqueue.
So no one use use that pointer after queue_work().

Fix the user-after-free bug by move the trace line before queue_work().

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/async-thread.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 88d9af3d4581..5fb60ea7eee2 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
 		list_add_tail(&work->ordered_list, &wq->ordered_list);
 		spin_unlock_irqrestore(&wq->list_lock, flags);
 	}
-	queue_work(wq->normal_wq, &work->normal_work);
 	trace_btrfs_work_queued(work);
+	queue_work(wq->normal_wq, &work->normal_work);
 }
 
 void btrfs_queue_work(struct btrfs_workqueue *wq,

From 80ad623edd2d0ccb47d85357ee31c97e6c684e82 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Mon, 25 Jan 2016 11:02:06 +0100
Subject: [PATCH 136/262] Revert "btrfs: clear PF_NOFREEZE in
 cleaner_kthread()"

This reverts commit 696249132158014d594896df3a81390616069c5c. The
cleaner thread can block freezing when there's a snapshot cleaning in
progress and the other threads get suspended first. From the logs
provided by Martin we're waiting for reading extent pages:

kernel: PM: Syncing filesystems ... done.
kernel: Freezing user space processes ... (elapsed 0.015 seconds) done.
kernel: Freezing remaining freezable tasks ...
kernel: Freezing of tasks failed after 20.003 seconds (1 tasks refusing to freeze, wq_busy=0):
kernel: btrfs-cleaner   D ffff88033dd13bc0     0   152      2 0x00000000
kernel: ffff88032ebc2e00 ffff88032e750000 ffff88032e74fa50 7fffffffffffffff
kernel: ffffffff814a58df 0000000000000002 ffffea000934d580 ffffffff814a5451
kernel: 7fffffffffffffff ffffffff814a6e8f 0000000000000000 0000000000000020
kernel: Call Trace:
kernel: [<ffffffff814a58df>] ? bit_wait+0x2c/0x2c
kernel: [<ffffffff814a5451>] ? schedule+0x6f/0x7c
kernel: [<ffffffff814a6e8f>] ? schedule_timeout+0x2f/0xd8
kernel: [<ffffffff81076f94>] ? timekeeping_get_ns+0xa/0x2e
kernel: [<ffffffff81077603>] ? ktime_get+0x36/0x44
kernel: [<ffffffff814a4f6c>] ? io_schedule_timeout+0x94/0xf2
kernel: [<ffffffff814a4f6c>] ? io_schedule_timeout+0x94/0xf2
kernel: [<ffffffff814a590b>] ? bit_wait_io+0x2c/0x30
kernel: [<ffffffff814a5694>] ? __wait_on_bit+0x41/0x73
kernel: [<ffffffff8109eba8>] ? wait_on_page_bit+0x6d/0x72
kernel: [<ffffffff8105d718>] ? autoremove_wake_function+0x2a/0x2a
kernel: [<ffffffff811a02d7>] ? read_extent_buffer_pages+0x1bd/0x203
kernel: [<ffffffff8117d9e9>] ? free_root_pointers+0x4c/0x4c
kernel: [<ffffffff8117e831>] ? btree_read_extent_buffer_pages.constprop.57+0x5a/0xe9
kernel: [<ffffffff8117f4f3>] ? read_tree_block+0x2d/0x45
kernel: [<ffffffff8116782a>] ? read_block_for_search.isra.34+0x22a/0x26b
kernel: [<ffffffff811656c3>] ? btrfs_set_path_blocking+0x1e/0x4a
kernel: [<ffffffff8116919b>] ? btrfs_search_slot+0x648/0x736
kernel: [<ffffffff81170559>] ? btrfs_lookup_extent_info+0xb7/0x2c7
kernel: [<ffffffff81170ee5>] ? walk_down_proc+0x9c/0x1ae
kernel: [<ffffffff81171c9d>] ? walk_down_tree+0x40/0xa4
kernel: [<ffffffff8117375f>] ? btrfs_drop_snapshot+0x2da/0x664
kernel: [<ffffffff8104ff21>] ? finish_task_switch+0x126/0x167
kernel: [<ffffffff811850f8>] ? btrfs_clean_one_deleted_snapshot+0xa6/0xb0
kernel: [<ffffffff8117eaba>] ? cleaner_kthread+0x13e/0x17b
kernel: [<ffffffff8117e97c>] ? btrfs_item_end+0x33/0x33
kernel: [<ffffffff8104d256>] ? kthread+0x95/0x9d
kernel: [<ffffffff8104d1c1>] ? kthread_parkme+0x16/0x16
kernel: [<ffffffff814a7b5f>] ? ret_from_fork+0x3f/0x70
kernel: [<ffffffff8104d1c1>] ? kthread_parkme+0x16/0x16

As this affects a released kernel (4.4) we need a minimal fix for
stable kernels.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=108361
Reported-by: Martin Ziegler <ziegler@uni-freiburg.de>
CC: stable@vger.kernel.org # 4.4
CC: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/disk-io.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 26ef14152093..404e894c33d9 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1787,7 +1787,6 @@ static int cleaner_kthread(void *arg)
 	int again;
 	struct btrfs_trans_handle *trans;
 
-	set_freezable();
 	do {
 		again = 0;
 

From f95429eccc570dc45d589c327bfcfddcdc3e8228 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Mon, 25 Jan 2016 18:06:48 -0500
Subject: [PATCH 137/262] drm/radeon: only init fbdev if we have connectors

This fixes an issue that was noticed on an optimus/prime laptop with
a kernel that was old enough to not support the integrated intel gfx
(which was driving all the outputs), but did have support for the
discrete radeon gpu.  The end result was not falling back to VESA and
leaving the user with a black screen.

(Plus it is kind of silly to create an framebuffer device if there
are no outputs hooked up to the gpu.)

Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_display.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index b3bb92368ae0..298ea1c453c3 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -1670,8 +1670,10 @@ int radeon_modeset_init(struct radeon_device *rdev)
 	/* setup afmt */
 	radeon_afmt_init(rdev);
 
-	radeon_fbdev_init(rdev);
-	drm_kms_helper_poll_init(rdev->ddev);
+	if (!list_empty(&rdev->ddev->mode_config.connector_list)) {
+		radeon_fbdev_init(rdev);
+		drm_kms_helper_poll_init(rdev->ddev);
+	}
 
 	/* do pm late init */
 	ret = radeon_pm_late_init(rdev);

From f49d45c973984eb805eb989b6220aa7c1ee30bc2 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 26 Jan 2016 00:30:33 -0500
Subject: [PATCH 138/262] drm/amdgpu: don't init fbdev if we don't have any
 connectors

Don't init fbdev if we don't have connectors.  E.g., if you have
a PX laptop with the displays attached to an IGP with no driver
support, you may end up with a blank screen rather than falling
back to vesa, etc.

Based on a similar radeon patch from Rob Clark.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index cfb6caad2a73..919146780a15 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -333,6 +333,10 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
 	if (!adev->mode_info.mode_config_initialized)
 		return 0;
 
+	/* don't init fbdev if there are no connectors */
+	if (list_empty(&adev->ddev->mode_config.connector_list))
+		return 0;
+
 	/* select 8 bpp console on low vram cards */
 	if (adev->mc.real_vram_size <= (32*1024*1024))
 		bpp_sel = 8;

From a28e35171cb1ff84197e8d271b65aaeb8c404827 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Fri, 22 Jan 2016 14:12:02 +0100
Subject: [PATCH 139/262] i2c: piix4: Fix SB800 locking

We need a single mutex for all 4 shared SMBus ports on the SB800. A
per-port mutex doesn't protect us from concurrent access.

In theory the mutex should be per PCI device, however in practice we
know that there's only ever a single instance of the device in a given
system so we can use a global.

Also take the mutex during initialization, as first port may be already
in use when second port is initialized.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Tested-by: Christian Fetzer <fetzer.ch@gmail.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
[wsa: made mutex static]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-piix4.c | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index e04598595073..5fd7505b9018 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -137,6 +137,7 @@ static const struct dmi_system_id piix4_dmi_ibm[] = {
 };
 
 /* SB800 globals */
+static DEFINE_MUTEX(piix4_mutex_sb800);
 static const char *piix4_main_port_names_sb800[PIIX4_MAX_ADAPTERS] = {
 	"SDA0", "SDA2", "SDA3", "SDA4"
 };
@@ -148,7 +149,6 @@ struct i2c_piix4_adapdata {
 	/* SB800 */
 	bool sb800_main;
 	unsigned short port;
-	struct mutex *mutex;
 };
 
 static int piix4_setup(struct pci_dev *PIIX4_dev,
@@ -275,10 +275,12 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
 	else
 		smb_en = (aux) ? 0x28 : 0x2c;
 
+	mutex_lock(&piix4_mutex_sb800);
 	outb_p(smb_en, SB800_PIIX4_SMB_IDX);
 	smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
 	outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX);
 	smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1);
+	mutex_unlock(&piix4_mutex_sb800);
 
 	if (!smb_en) {
 		smb_en_status = smba_en_lo & 0x10;
@@ -559,7 +561,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 	u8 port;
 	int retval;
 
-	mutex_lock(adapdata->mutex);
+	mutex_lock(&piix4_mutex_sb800);
 
 	outb_p(SB800_PIIX4_PORT_IDX, SB800_PIIX4_SMB_IDX);
 	smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
@@ -574,7 +576,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 
 	outb_p(smba_en_lo, SB800_PIIX4_SMB_IDX + 1);
 
-	mutex_unlock(adapdata->mutex);
+	mutex_unlock(&piix4_mutex_sb800);
 
 	return retval;
 }
@@ -673,17 +675,10 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 
 static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba)
 {
-	struct mutex *mutex;
 	struct i2c_piix4_adapdata *adapdata;
 	int port;
 	int retval;
 
-	mutex = kzalloc(sizeof(*mutex), GFP_KERNEL);
-	if (mutex == NULL)
-		return -ENOMEM;
-
-	mutex_init(mutex);
-
 	for (port = 0; port < PIIX4_MAX_ADAPTERS; port++) {
 		retval = piix4_add_adapter(dev, smba,
 					   piix4_main_port_names_sb800[port],
@@ -696,7 +691,6 @@ static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba)
 		adapdata = i2c_get_adapdata(piix4_main_adapters[port]);
 		adapdata->sb800_main = true;
 		adapdata->port = port;
-		adapdata->mutex = mutex;
 	}
 
 	return retval;
@@ -714,8 +708,6 @@ error:
 		}
 	}
 
-	kfree(mutex);
-
 	return retval;
 }
 
@@ -798,10 +790,8 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
 		i2c_del_adapter(adap);
 		if (adapdata->port == 0) {
 			release_region(adapdata->smba, SMBIOSIZE);
-			if (adapdata->sb800_main) {
-				kfree(adapdata->mutex);
+			if (adapdata->sb800_main)
 				release_region(SB800_PIIX4_SMB_IDX, 2);
-			}
 		}
 		kfree(adapdata);
 		kfree(adap);

From 83c60158ebf16417af28b338bc0380cf17f2b9f9 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Mon, 25 Jan 2016 12:17:07 +0100
Subject: [PATCH 140/262] i2c: piix4: Fully initialize SB800 before it is
 registered

This closes a race window where I2C device drivers attempt to access
I2C buses which aren't fully initialized yet.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Tested-by: Christian Fetzer <fetzer.ch@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-piix4.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index 5fd7505b9018..f79a84ef1aa4 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -627,6 +627,7 @@ static struct i2c_adapter *piix4_main_adapters[PIIX4_MAX_ADAPTERS];
 static struct i2c_adapter *piix4_aux_adapter;
 
 static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
+			     bool sb800_main, unsigned short port,
 			     const char *name, struct i2c_adapter **padap)
 {
 	struct i2c_adapter *adap;
@@ -641,7 +642,8 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 
 	adap->owner = THIS_MODULE;
 	adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
-	adap->algo = &smbus_algorithm;
+	adap->algo = sb800_main ? &piix4_smbus_algorithm_sb800
+				: &smbus_algorithm;
 
 	adapdata = kzalloc(sizeof(*adapdata), GFP_KERNEL);
 	if (adapdata == NULL) {
@@ -651,6 +653,8 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 	}
 
 	adapdata->smba = smba;
+	adapdata->sb800_main = sb800_main;
+	adapdata->port = port;
 
 	/* set up the sysfs linkage to our parent device */
 	adap->dev.parent = &dev->dev;
@@ -680,17 +684,11 @@ static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba)
 	int retval;
 
 	for (port = 0; port < PIIX4_MAX_ADAPTERS; port++) {
-		retval = piix4_add_adapter(dev, smba,
+		retval = piix4_add_adapter(dev, smba, true, port,
 					   piix4_main_port_names_sb800[port],
 					   &piix4_main_adapters[port]);
 		if (retval < 0)
 			goto error;
-
-		piix4_main_adapters[port]->algo = &piix4_smbus_algorithm_sb800;
-
-		adapdata = i2c_get_adapdata(piix4_main_adapters[port]);
-		adapdata->sb800_main = true;
-		adapdata->port = port;
 	}
 
 	return retval;
@@ -748,7 +746,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
 			return retval;
 
 		/* Try to register main SMBus adapter, give up if we can't */
-		retval = piix4_add_adapter(dev, retval, "main",
+		retval = piix4_add_adapter(dev, retval, false, 0, "main",
 					   &piix4_main_adapters[0]);
 		if (retval < 0)
 			return retval;
@@ -775,7 +773,8 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	if (retval > 0) {
 		/* Try to add the aux adapter if it exists,
 		 * piix4_add_adapter will clean up if this fails */
-		piix4_add_adapter(dev, retval, piix4_aux_port_name_sb800,
+		piix4_add_adapter(dev, retval, false, 0,
+				  piix4_aux_port_name_sb800,
 				  &piix4_aux_adapter);
 	}
 

From 5a4ff9ec8d6edd2ab1cfe8ce6a080d6e57cbea9a Mon Sep 17 00:00:00 2001
From: Guillaume Fougnies <guillaume@eulerian.com>
Date: Tue, 26 Jan 2016 00:28:27 +0100
Subject: [PATCH 141/262] ALSA: usb-audio: Fix TEAC UD-501/UD-503/NT-503 usb
 delay

TEAC UD-501/UD-503/NT-503 fail to switch properly between different
rate/format. Similar to 'Playback Design', this patch corrects the
invalid clock source error for TEAC products and avoids complete
freeze of the usb interface of 503 series.

Signed-off-by: Guillaume Fougnies <guillaume@eulerian.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/quirks.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 23ea6d800c4c..a75d9ce7d77a 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1205,8 +1205,12 @@ void snd_usb_set_interface_quirk(struct usb_device *dev)
 	 * "Playback Design" products need a 50ms delay after setting the
 	 * USB interface.
 	 */
-	if (le16_to_cpu(dev->descriptor.idVendor) == 0x23ba)
+	switch (le16_to_cpu(dev->descriptor.idVendor)) {
+	case 0x23ba: /* Playback Design */
+	case 0x0644: /* TEAC Corp. */
 		mdelay(50);
+		break;
+	}
 }
 
 void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
@@ -1221,6 +1225,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
 	    (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
 		mdelay(20);
 
+	/*
+	 * "TEAC Corp." products need a 20ms delay after each
+	 * class compliant request
+	 */
+	if ((le16_to_cpu(dev->descriptor.idVendor) == 0x0644) &&
+	    (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
+		mdelay(20);
+
 	/* Marantz/Denon devices with USB DAC functionality need a delay
 	 * after each class compliant request
 	 */

From 08c6e8cc66282a082484480c1a5641bc27d26c55 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 15 Jan 2016 22:02:12 +0200
Subject: [PATCH 142/262] i2c: designware-pci: use IRQF_COND_SUSPEND flag

This is effectively reapplies the commit b0898fdaffb2 ("i2c: designware-pci: use
IRQF_COND_SUSPEND flag") after the commit d80d134182ba ("i2c: designware: Move
common probe code into i2c_dw_probe()"). Original message as follows.

The mentioned flag fixes a warning on Intel Edison board since one of the I2C
controller shares IRQ line with watchdog timer.

Fixes: d80d134182ba (i2c: designware: Move common probe code into i2c_dw_probe())
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
index ba9732c236c5..10fbd6d841e0 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -874,7 +874,8 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
 	i2c_set_adapdata(adap, dev);
 
 	i2c_dw_disable_int(dev);
-	r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, IRQF_SHARED,
+	r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr,
+			     IRQF_SHARED | IRQF_COND_SUSPEND,
 			     dev_name(dev->dev), dev);
 	if (r) {
 		dev_err(dev->dev, "failure requesting irq %i: %d\n",

From 2989be09a8a9d62a785137586ad941f916e08f83 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 14 Jan 2016 16:00:41 +0200
Subject: [PATCH 143/262] virtio_pci: fix use after free on release

KASan detected a use-after-free error in virtio-pci remove code. In
virtio_pci_remove(), vp_dev is still used after being freed in
unregister_virtio_device() (in virtio_pci_release_dev() more
precisely).

To fix, keep a reference until cleanup is done.

Fixes: 63bd62a08ca4 ("virtio_pci: defer kfree until release callback")
Reported-by: Jerome Marchand <jmarchan@redhat.com>
Cc: stable@vger.kernel.org
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Jerome Marchand <jmarchan@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 36205c27c4d0..f6bed86c17f9 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -545,6 +545,7 @@ err_enable_device:
 static void virtio_pci_remove(struct pci_dev *pci_dev)
 {
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct device *dev = get_device(&vp_dev->vdev.dev);
 
 	unregister_virtio_device(&vp_dev->vdev);
 
@@ -554,6 +555,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
 		virtio_pci_modern_remove(vp_dev);
 
 	pci_disable_device(pci_dev);
+	put_device(dev);
 }
 
 static struct pci_driver virtio_pci_driver = {

From a7c490333df3cff5086ddf19a0837529304fa097 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 20 Jan 2016 21:12:58 +0200
Subject: [PATCH 144/262] tools/virtio: use virt_xxx barriers

Fix build after API changes.

Reported-by: Kamal Mostafa <kamal@canonical.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tools/virtio/asm/barrier.h    | 22 +++++++++++++---------
 tools/virtio/linux/compiler.h |  9 +++++++++
 tools/virtio/linux/kernel.h   |  1 +
 3 files changed, 23 insertions(+), 9 deletions(-)
 create mode 100644 tools/virtio/linux/compiler.h

diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index 26b7926bda88..ba34f9e96efd 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -1,15 +1,19 @@
 #if defined(__i386__) || defined(__x86_64__)
 #define barrier() asm volatile("" ::: "memory")
-#define mb() __sync_synchronize()
-
-#define smp_mb()	mb()
-# define dma_rmb()	barrier()
-# define dma_wmb()	barrier()
-# define smp_rmb()	barrier()
-# define smp_wmb()	barrier()
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() barrier()
+#define virt_wmb() barrier()
+/* Atomic store should be enough, but gcc generates worse code in that case. */
+#define virt_store_mb(var, value)  do { \
+	typeof(var) virt_store_mb_value = (value); \
+	__atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
+			  __ATOMIC_SEQ_CST); \
+	barrier(); \
+} while (0);
 /* Weak barriers should be used. If not - it's a bug */
-# define rmb()	abort()
-# define wmb()	abort()
+# define mb() abort()
+# define rmb() abort()
+# define wmb() abort()
 #else
 #error Please fill in barrier macros
 #endif
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644
index 000000000000..845960e1cbf2
--- /dev/null
+++ b/tools/virtio/linux/compiler.h
@@ -0,0 +1,9 @@
+#ifndef LINUX_COMPILER_H
+#define LINUX_COMPILER_H
+
+#define WRITE_ONCE(var, val) \
+	(*((volatile typeof(val) *)(&(var))) = (val))
+
+#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var))))
+
+#endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 4db7d5691ba7..033849948215 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -8,6 +8,7 @@
 #include <assert.h>
 #include <stdarg.h>
 
+#include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/printk.h>
 #include <linux/bug.h>

From fb9b050ce9695cf2769a850d94aa2ab39c8c3cd5 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 27 Dec 2015 15:04:42 +0200
Subject: [PATCH 145/262] sh: fix smp_store_mb for !SMP

sh variant of smp_store_mb() calls xchg() on !SMP which is stronger than
implied by both the name and the documentation.

commit 90a3ccb0be538a914e6a5c51ae919762261563ad ("sh: define __smp_xxx,
fix smp_store_mb for !SMP") was supposed to fix it but
left the bug in place.

Drop smp_store_mb, so that code in asm-generic/barrier.h
will define it correctly depending on CONFIG_SMP.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 arch/sh/include/asm/barrier.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index f887c6465a82..8a84e05adb2e 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -33,7 +33,6 @@
 #endif
 
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#define smp_store_mb(var, value) __smp_store_mb(var, value)
 
 #include <asm-generic/barrier.h>
 

From 481eaec37e91e2b33f17275901172f50ce2c71e8 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 21 Jan 2016 14:44:10 +0200
Subject: [PATCH 146/262] tools/virtio: add ringtest utilities

This adds micro-benchmarks useful for tuning virtio ring layouts.
Three layouts are currently implemented:

- virtio 0.9 compatible one
- an experimental extension bypassing the ring index, polling ring
  itself instead
- an experimental extension bypassing avail and used ring completely

Typical use:

sh run-on-all.sh perf stat -r 10 --log-fd 1 -- ./ring

It doesn't depend on the kernel directly, but it's handy
to have as much virtio stuff as possible in one tree.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tools/virtio/ringtest/Makefile           |  22 ++
 tools/virtio/ringtest/README             |   2 +
 tools/virtio/ringtest/main.c             | 366 +++++++++++++++++++++++
 tools/virtio/ringtest/main.h             | 119 ++++++++
 tools/virtio/ringtest/ring.c             | 272 +++++++++++++++++
 tools/virtio/ringtest/run-on-all.sh      |  24 ++
 tools/virtio/ringtest/virtio_ring_0_9.c  | 316 +++++++++++++++++++
 tools/virtio/ringtest/virtio_ring_poll.c |   2 +
 8 files changed, 1123 insertions(+)
 create mode 100644 tools/virtio/ringtest/Makefile
 create mode 100644 tools/virtio/ringtest/README
 create mode 100644 tools/virtio/ringtest/main.c
 create mode 100644 tools/virtio/ringtest/main.h
 create mode 100644 tools/virtio/ringtest/ring.c
 create mode 100755 tools/virtio/ringtest/run-on-all.sh
 create mode 100644 tools/virtio/ringtest/virtio_ring_0_9.c
 create mode 100644 tools/virtio/ringtest/virtio_ring_poll.c

diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644
index 000000000000..feaa64ac4630
--- /dev/null
+++ b/tools/virtio/ringtest/Makefile
@@ -0,0 +1,22 @@
+all:
+
+all: ring virtio_ring_0_9 virtio_ring_poll
+
+CFLAGS += -Wall
+CFLAGS += -pthread -O2 -ggdb
+LDFLAGS += -pthread -O2 -ggdb
+
+main.o: main.c main.h
+ring.o: ring.c main.h
+virtio_ring_0_9.o: virtio_ring_0_9.c main.h
+virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
+ring: ring.o main.o
+virtio_ring_0_9: virtio_ring_0_9.o main.o
+virtio_ring_poll: virtio_ring_poll.o main.o
+clean:
+	-rm main.o
+	-rm ring.o ring
+	-rm virtio_ring_0_9.o virtio_ring_0_9
+	-rm virtio_ring_poll.o virtio_ring_poll
+
+.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644
index 000000000000..34e94c46104f
--- /dev/null
+++ b/tools/virtio/ringtest/README
@@ -0,0 +1,2 @@
+Partial implementation of various ring layouts, useful to tune virtio design.
+Uses shared memory heavily.
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644
index 000000000000..3a5ff438bd62
--- /dev/null
+++ b/tools/virtio/ringtest/main.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Command line processing and common functions for ring benchmarking.
+ */
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sched.h>
+#include "main.h"
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <limits.h>
+
+int runcycles = 10000000;
+int max_outstanding = INT_MAX;
+int batch = 1;
+
+bool do_sleep = false;
+bool do_relax = false;
+bool do_exit = true;
+
+unsigned ring_size = 256;
+
+static int kickfd = -1;
+static int callfd = -1;
+
+void notify(int fd)
+{
+	unsigned long long v = 1;
+	int r;
+
+	vmexit();
+	r = write(fd, &v, sizeof v);
+	assert(r == sizeof v);
+	vmentry();
+}
+
+void wait_for_notify(int fd)
+{
+	unsigned long long v = 1;
+	int r;
+
+	vmexit();
+	r = read(fd, &v, sizeof v);
+	assert(r == sizeof v);
+	vmentry();
+}
+
+void kick(void)
+{
+	notify(kickfd);
+}
+
+void wait_for_kick(void)
+{
+	wait_for_notify(kickfd);
+}
+
+void call(void)
+{
+	notify(callfd);
+}
+
+void wait_for_call(void)
+{
+	wait_for_notify(callfd);
+}
+
+void set_affinity(const char *arg)
+{
+	cpu_set_t cpuset;
+	int ret;
+	pthread_t self;
+	long int cpu;
+	char *endptr;
+
+	if (!arg)
+		return;
+
+	cpu = strtol(arg, &endptr, 0);
+	assert(!*endptr);
+
+	assert(cpu >= 0 || cpu < CPU_SETSIZE);
+
+	self = pthread_self();
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+
+	ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
+	assert(!ret);
+}
+
+static void run_guest(void)
+{
+	int completed_before;
+	int completed = 0;
+	int started = 0;
+	int bufs = runcycles;
+	int spurious = 0;
+	int r;
+	unsigned len;
+	void *buf;
+	int tokick = batch;
+
+	for (;;) {
+		if (do_sleep)
+			disable_call();
+		completed_before = completed;
+		do {
+			if (started < bufs &&
+			    started - completed < max_outstanding) {
+				r = add_inbuf(0, NULL, "Hello, world!");
+				if (__builtin_expect(r == 0, true)) {
+					++started;
+					if (!--tokick) {
+						tokick = batch;
+						if (do_sleep)
+							kick_available();
+					}
+
+				}
+			} else
+				r = -1;
+
+			/* Flush out completed bufs if any */
+			if (get_buf(&len, &buf)) {
+				++completed;
+				if (__builtin_expect(completed == bufs, false))
+					return;
+				r = 0;
+			}
+		} while (r == 0);
+		if (completed == completed_before)
+			++spurious;
+		assert(completed <= bufs);
+		assert(started <= bufs);
+		if (do_sleep) {
+			if (enable_call())
+				wait_for_call();
+		} else {
+			poll_used();
+		}
+	}
+}
+
+static void run_host(void)
+{
+	int completed_before;
+	int completed = 0;
+	int spurious = 0;
+	int bufs = runcycles;
+	unsigned len;
+	void *buf;
+
+	for (;;) {
+		if (do_sleep) {
+			if (enable_kick())
+				wait_for_kick();
+		} else {
+			poll_avail();
+		}
+		if (do_sleep)
+			disable_kick();
+		completed_before = completed;
+		while (__builtin_expect(use_buf(&len, &buf), true)) {
+			if (do_sleep)
+				call_used();
+			++completed;
+			if (__builtin_expect(completed == bufs, false))
+				return;
+		}
+		if (completed == completed_before)
+			++spurious;
+		assert(completed <= bufs);
+		if (completed == bufs)
+			break;
+	}
+}
+
+void *start_guest(void *arg)
+{
+	set_affinity(arg);
+	run_guest();
+	pthread_exit(NULL);
+}
+
+void *start_host(void *arg)
+{
+	set_affinity(arg);
+	run_host();
+	pthread_exit(NULL);
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+	{
+		.name = "help",
+		.has_arg = no_argument,
+		.val = 'h',
+	},
+	{
+		.name = "host-affinity",
+		.has_arg = required_argument,
+		.val = 'H',
+	},
+	{
+		.name = "guest-affinity",
+		.has_arg = required_argument,
+		.val = 'G',
+	},
+	{
+		.name = "ring-size",
+		.has_arg = required_argument,
+		.val = 'R',
+	},
+	{
+		.name = "run-cycles",
+		.has_arg = required_argument,
+		.val = 'C',
+	},
+	{
+		.name = "outstanding",
+		.has_arg = required_argument,
+		.val = 'o',
+	},
+	{
+		.name = "batch",
+		.has_arg = required_argument,
+		.val = 'b',
+	},
+	{
+		.name = "sleep",
+		.has_arg = no_argument,
+		.val = 's',
+	},
+	{
+		.name = "relax",
+		.has_arg = no_argument,
+		.val = 'x',
+	},
+	{
+		.name = "exit",
+		.has_arg = no_argument,
+		.val = 'e',
+	},
+	{
+	}
+};
+
+static void help(void)
+{
+	fprintf(stderr, "Usage: <test> [--help]"
+		" [--host-affinity H]"
+		" [--guest-affinity G]"
+		" [--ring-size R (default: %d)]"
+		" [--run-cycles C (default: %d)]"
+		" [--batch b]"
+		" [--outstanding o]"
+		" [--sleep]"
+		" [--relax]"
+		" [--exit]"
+		"\n",
+		ring_size,
+		runcycles);
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+	pthread_t host, guest;
+	void *tret;
+	char *host_arg = NULL;
+	char *guest_arg = NULL;
+	char *endptr;
+	long int c;
+
+	kickfd = eventfd(0, 0);
+	assert(kickfd >= 0);
+	callfd = eventfd(0, 0);
+	assert(callfd >= 0);
+
+	for (;;) {
+		int o = getopt_long(argc, argv, optstring, longopts, NULL);
+		switch (o) {
+		case -1:
+			goto done;
+		case '?':
+			help();
+			exit(2);
+		case 'H':
+			host_arg = optarg;
+			break;
+		case 'G':
+			guest_arg = optarg;
+			break;
+		case 'R':
+			ring_size = strtol(optarg, &endptr, 0);
+			assert(ring_size && !(ring_size & (ring_size - 1)));
+			assert(!*endptr);
+			break;
+		case 'C':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			runcycles = c;
+			break;
+		case 'o':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			max_outstanding = c;
+			break;
+		case 'b':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			batch = c;
+			break;
+		case 's':
+			do_sleep = true;
+			break;
+		case 'x':
+			do_relax = true;
+			break;
+		case 'e':
+			do_exit = true;
+			break;
+		default:
+			help();
+			exit(4);
+			break;
+		}
+	}
+
+	/* does nothing here, used to make sure all smp APIs compile */
+	smp_acquire();
+	smp_release();
+	smp_mb();
+done:
+
+	if (batch > max_outstanding)
+		batch = max_outstanding;
+
+	if (optind < argc) {
+		help();
+		exit(4);
+	}
+	alloc_ring();
+
+	ret = pthread_create(&host, NULL, start_host, host_arg);
+	assert(!ret);
+	ret = pthread_create(&guest, NULL, start_guest, guest_arg);
+	assert(!ret);
+
+	ret = pthread_join(guest, &tret);
+	assert(!ret);
+	ret = pthread_join(host, &tret);
+	assert(!ret);
+	return 0;
+}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644
index 000000000000..16917acb0ade
--- /dev/null
+++ b/tools/virtio/ringtest/main.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Common macros and functions for ring benchmarking.
+ */
+#ifndef MAIN_H
+#define MAIN_H
+
+#include <stdbool.h>
+
+extern bool do_exit;
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "x86intrin.h"
+
+static inline void wait_cycles(unsigned long long cycles)
+{
+	unsigned long long t;
+
+	t = __rdtsc();
+	while (__rdtsc() - t < cycles) {}
+}
+
+#define VMEXIT_CYCLES 500
+#define VMENTRY_CYCLES 500
+
+#else
+static inline void wait_cycles(unsigned long long cycles)
+{
+	_Exit(5);
+}
+#define VMEXIT_CYCLES 0
+#define VMENTRY_CYCLES 0
+#endif
+
+static inline void vmexit(void)
+{
+	if (!do_exit)
+		return;
+	
+	wait_cycles(VMEXIT_CYCLES);
+}
+static inline void vmentry(void)
+{
+	if (!do_exit)
+		return;
+	
+	wait_cycles(VMENTRY_CYCLES);
+}
+
+/* implemented by ring */
+void alloc_ring(void);
+/* guest side */
+int add_inbuf(unsigned, void *, void *);
+void *get_buf(unsigned *, void **);
+void disable_call();
+bool enable_call();
+void kick_available();
+void poll_used();
+/* host side */
+void disable_kick();
+bool enable_kick();
+bool use_buf(unsigned *, void **);
+void call_used();
+void poll_avail();
+
+/* implemented by main */
+extern bool do_sleep;
+void kick(void);
+void wait_for_kick(void);
+void call(void);
+void wait_for_call(void);
+
+extern unsigned ring_size;
+
+/* Compiler barrier - similar to what Linux uses */
+#define barrier() asm volatile("" ::: "memory")
+
+/* Is there a portable way to do this? */
+#if defined(__x86_64__) || defined(__i386__)
+#define cpu_relax() asm ("rep; nop" ::: "memory")
+#else
+#define cpu_relax() assert(0)
+#endif
+
+extern bool do_relax;
+
+static inline void busy_wait(void)
+{
+	if (do_relax)
+		cpu_relax();
+	else
+		/* prevent compiler from removing busy loops */
+		barrier();
+} 
+
+/*
+ * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
+ * with other __ATOMIC_SEQ_CST calls.
+ */
+#define smp_mb() __sync_synchronize()
+
+/*
+ * This abuses the atomic builtins for thread fences, and
+ * adds a compiler barrier.
+ */
+#define smp_release() do { \
+    barrier(); \
+    __atomic_thread_fence(__ATOMIC_RELEASE); \
+} while (0)
+
+#define smp_acquire() do { \
+    __atomic_thread_fence(__ATOMIC_ACQUIRE); \
+    barrier(); \
+} while (0)
+
+#endif
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644
index 000000000000..c25c8d248b6b
--- /dev/null
+++ b/tools/virtio/ringtest/ring.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
+ * signalling, unconditionally.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Next - Where next entry will be written.
+ * Prev - "Next" value when event triggered previously.
+ * Event - Peer requested event after writing this entry.
+ */
+static inline bool need_event(unsigned short event,
+			      unsigned short next,
+			      unsigned short prev)
+{
+	return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
+}
+
+/* Design:
+ * Guest adds descriptors with unique index values and DESC_HW in flags.
+ * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
+ * Flags are always set last.
+ */
+#define DESC_HW 0x1
+
+struct desc {
+	unsigned short flags;
+	unsigned short index;
+	unsigned len;
+	unsigned long long addr;
+};
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+/* Mostly read */
+struct event {
+	unsigned short kick_index;
+	unsigned char reserved0[HOST_GUEST_PADDING - 2];
+	unsigned short call_index;
+	unsigned char reserved1[HOST_GUEST_PADDING - 2];
+};
+
+struct data {
+	void *buf; /* descriptor is writeable, we can't get buf from there */
+	void *data;
+} *data;
+
+struct desc *ring;
+struct event *event;
+
+struct guest {
+	unsigned avail_idx;
+	unsigned last_used_idx;
+	unsigned num_free;
+	unsigned kicked_avail_idx;
+	unsigned char reserved[HOST_GUEST_PADDING - 12];
+} guest;
+
+struct host {
+	/* we do not need to track last avail index
+	 * unless we have more than one in flight.
+	 */
+	unsigned used_idx;
+	unsigned called_used_idx;
+	unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+	int ret;
+	int i;
+
+	ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
+	if (ret) {
+		perror("Unable to allocate ring buffer.\n");
+		exit(3);
+	}
+	event = malloc(sizeof *event);
+	if (!event) {
+		perror("Unable to allocate event buffer.\n");
+		exit(3);
+	}
+	memset(event, 0, sizeof *event);
+	guest.avail_idx = 0;
+	guest.kicked_avail_idx = -1;
+	guest.last_used_idx = 0;
+	host.used_idx = 0;
+	host.called_used_idx = -1;
+	for (i = 0; i < ring_size; ++i) {
+		struct desc desc = {
+			.index = i,
+		};
+		ring[i] = desc;
+	}
+	guest.num_free = ring_size;
+	data = malloc(ring_size * sizeof *data);
+	if (!data) {
+		perror("Unable to allocate data buffer.\n");
+		exit(3);
+	}
+	memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+	unsigned head, index;
+
+	if (!guest.num_free)
+		return -1;
+
+	guest.num_free--;
+	head = (ring_size - 1) & (guest.avail_idx++);
+
+	/* Start with a write. On MESI architectures this helps
+	 * avoid a shared state with consumer that is polling this descriptor.
+	 */
+	ring[head].addr = (unsigned long)(void*)buf;
+	ring[head].len = len;
+	/* read below might bypass write above. That is OK because it's just an
+	 * optimization. If this happens, we will get the cache line in a
+	 * shared state which is unfortunate, but probably not worth it to
+	 * add an explicit full barrier to avoid this.
+	 */
+	barrier();
+	index = ring[head].index;
+	data[index].buf = buf;
+	data[index].data = datap;
+	/* Barrier A (for pairing) */
+	smp_release();
+	ring[head].flags = DESC_HW;
+
+	return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+	unsigned index;
+	void *datap;
+
+	if (ring[head].flags & DESC_HW)
+		return NULL;
+	/* Barrier B (for pairing) */
+	smp_acquire();
+	*lenp = ring[head].len;
+	index = ring[head].index & (ring_size - 1);
+	datap = data[index].data;
+	*bufp = data[index].buf;
+	data[index].buf = NULL;
+	data[index].data = NULL;
+	guest.num_free++;
+	guest.last_used_idx++;
+	return datap;
+}
+
+void poll_used(void)
+{
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+	while (ring[head].flags & DESC_HW)
+		busy_wait();
+}
+
+void disable_call()
+{
+	/* Doing nothing to disable calls might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_call()
+{
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+	event->call_index = guest.last_used_idx;
+	/* Flush call index write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+	return ring[head].flags & DESC_HW;
+}
+
+void kick_available(void)
+{
+	/* Flush in previous flags write */
+	/* Barrier C (for pairing) */
+	smp_mb();
+	if (!need_event(event->kick_index,
+			guest.avail_idx,
+			guest.kicked_avail_idx))
+		return;
+
+	guest.kicked_avail_idx = guest.avail_idx;
+	kick();
+}
+
+/* host side */
+void disable_kick()
+{
+	/* Doing nothing to disable kicks might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_kick()
+{
+	unsigned head = (ring_size - 1) & host.used_idx;
+
+	event->kick_index = host.used_idx;
+	/* Barrier C (for pairing) */
+	smp_mb();
+	return !(ring[head].flags & DESC_HW);
+}
+
+void poll_avail(void)
+{
+	unsigned head = (ring_size - 1) & host.used_idx;
+
+	while (!(ring[head].flags & DESC_HW))
+		busy_wait();
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+	unsigned head = (ring_size - 1) & host.used_idx;
+
+	if (!(ring[head].flags & DESC_HW))
+		return false;
+
+	/* make sure length read below is not speculated */
+	/* Barrier A (for pairing) */
+	smp_acquire();
+
+	/* simple in-order completion: we don't need
+	 * to touch index at all. This also means we
+	 * can just modify the descriptor in-place.
+	 */
+	ring[head].len--;
+	/* Make sure len is valid before flags.
+	 * Note: alternative is to write len and flags in one access -
+	 * possible on 64 bit architectures but wmb is free on Intel anyway
+	 * so I have no way to test whether it's a gain.
+	 */
+	/* Barrier B (for pairing) */
+	smp_release();
+	ring[head].flags = 0;
+	host.used_idx++;
+	return true;
+}
+
+void call_used(void)
+{
+	/* Flush in previous flags write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+	if (!need_event(event->call_index,
+			host.used_idx,
+			host.called_used_idx))
+		return;
+
+	host.called_used_idx = host.used_idx;
+	call();
+}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755
index 000000000000..52b0f71ffa8d
--- /dev/null
+++ b/tools/virtio/ringtest/run-on-all.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+#use last CPU for host. Why not the first?
+#many devices tend to use cpu0 by default so
+#it tends to be busier
+HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1)
+
+#run command on all cpus
+for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n);
+do
+	#Don't run guest and host on same CPU
+	#It actually works ok if using signalling
+	if
+		(echo "$@" | grep -e "--sleep" > /dev/null) || \
+			test $HOST_AFFINITY '!=' $cpu
+	then
+		echo "GUEST AFFINITY $cpu"
+		"$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
+	fi
+done
+echo "NO GUEST AFFINITY"
+"$@" --host-affinity $HOST_AFFINITY
+echo "NO AFFINITY"
+"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644
index 000000000000..47c9a1a18d36
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_0_9.c
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Partial implementation of virtio 0.9. event index is used for signalling,
+ * unconditionally. Design roughly follows linux kernel implementation in order
+ * to be able to judge its performance.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <linux/virtio_ring.h>
+
+struct data {
+	void *data;
+} *data;
+
+struct vring ring;
+
+/* enabling the below activates experimental ring polling code
+ * (which skips index reads on consumer in favor of looking at
+ * high bits of ring id ^ 0x8000).
+ */
+/* #ifdef RING_POLL */
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+struct guest {
+	unsigned short avail_idx;
+	unsigned short last_used_idx;
+	unsigned short num_free;
+	unsigned short kicked_avail_idx;
+	unsigned short free_head;
+	unsigned char reserved[HOST_GUEST_PADDING - 10];
+} guest;
+
+struct host {
+	/* we do not need to track last avail index
+	 * unless we have more than one in flight.
+	 */
+	unsigned short used_idx;
+	unsigned short called_used_idx;
+	unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+	int ret;
+	int i;
+	void *p;
+
+	ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
+	if (ret) {
+		perror("Unable to allocate ring buffer.\n");
+		exit(3);
+	}
+	memset(p, 0, vring_size(ring_size, 0x1000));
+	vring_init(&ring, ring_size, p, 0x1000);
+
+	guest.avail_idx = 0;
+	guest.kicked_avail_idx = -1;
+	guest.last_used_idx = 0;
+	/* Put everything in free lists. */
+	guest.free_head = 0;
+	for (i = 0; i < ring_size - 1; i++)
+		ring.desc[i].next = i + 1;
+	host.used_idx = 0;
+	host.called_used_idx = -1;
+	guest.num_free = ring_size;
+	data = malloc(ring_size * sizeof *data);
+	if (!data) {
+		perror("Unable to allocate data buffer.\n");
+		exit(3);
+	}
+	memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+	unsigned head, avail;
+	struct vring_desc *desc;
+
+	if (!guest.num_free)
+		return -1;
+
+	head = guest.free_head;
+	guest.num_free--;
+
+	desc = ring.desc;
+	desc[head].flags = VRING_DESC_F_NEXT;
+	desc[head].addr = (unsigned long)(void *)buf;
+	desc[head].len = len;
+	/* We do it like this to simulate the way
+	 * we'd have to flip it if we had multiple
+	 * descriptors.
+	 */
+	desc[head].flags &= ~VRING_DESC_F_NEXT;
+	guest.free_head = desc[head].next;
+
+	data[head].data = datap;
+
+#ifdef RING_POLL
+	/* Barrier A (for pairing) */
+	smp_release();
+	avail = guest.avail_idx++;
+	ring.avail->ring[avail & (ring_size - 1)] =
+		(head | (avail & ~(ring_size - 1))) ^ 0x8000;
+#else
+	avail = (ring_size - 1) & (guest.avail_idx++);
+	ring.avail->ring[avail] = head;
+	/* Barrier A (for pairing) */
+	smp_release();
+#endif
+	ring.avail->idx = guest.avail_idx;
+	return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+	unsigned head;
+	unsigned index;
+	void *datap;
+
+#ifdef RING_POLL
+	head = (ring_size - 1) & guest.last_used_idx;
+	index = ring.used->ring[head].id;
+	if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+		return NULL;
+	/* Barrier B (for pairing) */
+	smp_acquire();
+	index &= ring_size - 1;
+#else
+	if (ring.used->idx == guest.last_used_idx)
+		return NULL;
+	/* Barrier B (for pairing) */
+	smp_acquire();
+	head = (ring_size - 1) & guest.last_used_idx;
+	index = ring.used->ring[head].id;
+#endif
+	*lenp = ring.used->ring[head].len;
+	datap = data[index].data;
+	*bufp = (void*)(unsigned long)ring.desc[index].addr;
+	data[index].data = NULL;
+	ring.desc[index].next = guest.free_head;
+	guest.free_head = index;
+	guest.num_free++;
+	guest.last_used_idx++;
+	return datap;
+}
+
+void poll_used(void)
+{
+#ifdef RING_POLL
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+	for (;;) {
+		unsigned index = ring.used->ring[head].id;
+
+		if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+			busy_wait();
+		else
+			break;
+	}
+#else
+	unsigned head = guest.last_used_idx;
+
+	while (ring.used->idx == head)
+		busy_wait();
+#endif
+}
+
+void disable_call()
+{
+	/* Doing nothing to disable calls might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_call()
+{
+	unsigned short last_used_idx;
+
+	vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
+	/* Flush call index write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+#ifdef RING_POLL
+	{
+		unsigned short head = last_used_idx & (ring_size - 1);
+		unsigned index = ring.used->ring[head].id;
+
+		return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
+	}
+#else
+	return ring.used->idx == last_used_idx;
+#endif
+}
+
+void kick_available(void)
+{
+	/* Flush in previous flags write */
+	/* Barrier C (for pairing) */
+	smp_mb();
+	if (!vring_need_event(vring_avail_event(&ring),
+			      guest.avail_idx,
+			      guest.kicked_avail_idx))
+		return;
+
+	guest.kicked_avail_idx = guest.avail_idx;
+	kick();
+}
+
+/* host side */
+void disable_kick()
+{
+	/* Doing nothing to disable kicks might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_kick()
+{
+	unsigned head = host.used_idx;
+
+	vring_avail_event(&ring) = head;
+	/* Barrier C (for pairing) */
+	smp_mb();
+#ifdef RING_POLL
+	{
+		unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+		return (index ^ head ^ 0x8000) & ~(ring_size - 1);
+	}
+#else
+	return head == ring.avail->idx;
+#endif
+}
+
+void poll_avail(void)
+{
+	unsigned head = host.used_idx;
+#ifdef RING_POLL
+	for (;;) {
+		unsigned index = ring.avail->ring[head & (ring_size - 1)];
+		if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
+			busy_wait();
+		else
+			break;
+	}
+#else
+	while (ring.avail->idx == head)
+		busy_wait();
+#endif
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+	unsigned used_idx = host.used_idx;
+	struct vring_desc *desc;
+	unsigned head;
+
+#ifdef RING_POLL
+	head = ring.avail->ring[used_idx & (ring_size - 1)];
+	if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
+		return false;
+	/* Barrier A (for pairing) */
+	smp_acquire();
+
+	used_idx &= ring_size - 1;
+	desc = &ring.desc[head & (ring_size - 1)];
+#else
+	if (used_idx == ring.avail->idx)
+		return false;
+
+	/* Barrier A (for pairing) */
+	smp_acquire();
+
+	used_idx &= ring_size - 1;
+	head = ring.avail->ring[used_idx];
+	desc = &ring.desc[head];
+#endif
+
+	*lenp = desc->len;
+	*bufp = (void *)(unsigned long)desc->addr;
+
+	/* now update used ring */
+	ring.used->ring[used_idx].id = head;
+	ring.used->ring[used_idx].len = desc->len - 1;
+	/* Barrier B (for pairing) */
+	smp_release();
+	host.used_idx++;
+	ring.used->idx = host.used_idx;
+	
+	return true;
+}
+
+void call_used(void)
+{
+	/* Flush in previous flags write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+	if (!vring_need_event(vring_used_event(&ring),
+			      host.used_idx,
+			      host.called_used_idx))
+		return;
+
+	host.called_used_idx = host.used_idx;
+	call();
+}
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644
index 000000000000..84fc2c557aaa
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_poll.c
@@ -0,0 +1,2 @@
+#define RING_POLL 1
+#include "virtio_ring_0_9.c"

From b4abf91047cf054f203dcfac97e1038388826937 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Jan 2016 11:25:38 +0100
Subject: [PATCH 147/262] rtmutex: Make wait_lock irq safe

Sasha reported a lockdep splat about a potential deadlock between RCU boosting
rtmutex and the posix timer it_lock.

CPU0					CPU1

rtmutex_lock(&rcu->rt_mutex)
  spin_lock(&rcu->rt_mutex.wait_lock)
					local_irq_disable()
					spin_lock(&timer->it_lock)
					spin_lock(&rcu->mutex.wait_lock)
--> Interrupt
    spin_lock(&timer->it_lock)

This is caused by the following code sequence on CPU1

     rcu_read_lock()
     x = lookup();
     if (x)
     	spin_lock_irqsave(&x->it_lock);
     rcu_read_unlock();
     return x;

We could fix that in the posix timer code by keeping rcu read locked across
the spinlocked and irq disabled section, but the above sequence is common and
there is no reason not to support it.

Taking rt_mutex.wait_lock irq safe prevents the deadlock.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/futex.c           |  18 +++---
 kernel/locking/rtmutex.c | 135 +++++++++++++++++++++------------------
 2 files changed, 81 insertions(+), 72 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 0773f2b23b10..5d6ce6413ef1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1191,7 +1191,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
 	if (pi_state->owner != current)
 		return -EINVAL;
 
-	raw_spin_lock(&pi_state->pi_mutex.wait_lock);
+	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
 	new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
 
 	/*
@@ -1217,22 +1217,22 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
 	else if (curval != uval)
 		ret = -EINVAL;
 	if (ret) {
-		raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+		raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 		return ret;
 	}
 
-	raw_spin_lock_irq(&pi_state->owner->pi_lock);
+	raw_spin_lock(&pi_state->owner->pi_lock);
 	WARN_ON(list_empty(&pi_state->list));
 	list_del_init(&pi_state->list);
-	raw_spin_unlock_irq(&pi_state->owner->pi_lock);
+	raw_spin_unlock(&pi_state->owner->pi_lock);
 
-	raw_spin_lock_irq(&new_owner->pi_lock);
+	raw_spin_lock(&new_owner->pi_lock);
 	WARN_ON(!list_empty(&pi_state->list));
 	list_add(&pi_state->list, &new_owner->pi_state_list);
 	pi_state->owner = new_owner;
-	raw_spin_unlock_irq(&new_owner->pi_lock);
+	raw_spin_unlock(&new_owner->pi_lock);
 
-	raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 
 	deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
 
@@ -2127,11 +2127,11 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
 		 * we returned due to timeout or signal without taking the
 		 * rt_mutex. Too late.
 		 */
-		raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
+		raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
 		owner = rt_mutex_owner(&q->pi_state->pi_mutex);
 		if (!owner)
 			owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
-		raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
+		raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
 		ret = fixup_pi_state_owner(uaddr, q, owner);
 		goto out;
 	}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 8251e75dd9c0..3e746607abe5 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -99,13 +99,14 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
  * 2) Drop lock->wait_lock
  * 3) Try to unlock the lock with cmpxchg
  */
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+					unsigned long flags)
 	__releases(lock->wait_lock)
 {
 	struct task_struct *owner = rt_mutex_owner(lock);
 
 	clear_rt_mutex_waiters(lock);
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 	/*
 	 * If a new waiter comes in between the unlock and the cmpxchg
 	 * we have two situations:
@@ -147,11 +148,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 /*
  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
  */
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+					unsigned long flags)
 	__releases(lock->wait_lock)
 {
 	lock->owner = NULL;
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 	return true;
 }
 #endif
@@ -433,7 +435,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 	int ret = 0, depth = 0;
 	struct rt_mutex *lock;
 	bool detect_deadlock;
-	unsigned long flags;
 	bool requeue = true;
 
 	detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
@@ -476,7 +477,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 	/*
 	 * [1] Task cannot go away as we did a get_task() before !
 	 */
-	raw_spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock_irq(&task->pi_lock);
 
 	/*
 	 * [2] Get the waiter on which @task is blocked on.
@@ -560,7 +561,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 	 * operations.
 	 */
 	if (!raw_spin_trylock(&lock->wait_lock)) {
-		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+		raw_spin_unlock_irq(&task->pi_lock);
 		cpu_relax();
 		goto retry;
 	}
@@ -591,7 +592,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 		/*
 		 * No requeue[7] here. Just release @task [8]
 		 */
-		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+		raw_spin_unlock(&task->pi_lock);
 		put_task_struct(task);
 
 		/*
@@ -599,14 +600,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 		 * If there is no owner of the lock, end of chain.
 		 */
 		if (!rt_mutex_owner(lock)) {
-			raw_spin_unlock(&lock->wait_lock);
+			raw_spin_unlock_irq(&lock->wait_lock);
 			return 0;
 		}
 
 		/* [10] Grab the next task, i.e. owner of @lock */
 		task = rt_mutex_owner(lock);
 		get_task_struct(task);
-		raw_spin_lock_irqsave(&task->pi_lock, flags);
+		raw_spin_lock(&task->pi_lock);
 
 		/*
 		 * No requeue [11] here. We just do deadlock detection.
@@ -621,8 +622,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 		top_waiter = rt_mutex_top_waiter(lock);
 
 		/* [13] Drop locks */
-		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-		raw_spin_unlock(&lock->wait_lock);
+		raw_spin_unlock(&task->pi_lock);
+		raw_spin_unlock_irq(&lock->wait_lock);
 
 		/* If owner is not blocked, end of chain. */
 		if (!next_lock)
@@ -643,7 +644,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 	rt_mutex_enqueue(lock, waiter);
 
 	/* [8] Release the task */
-	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock(&task->pi_lock);
 	put_task_struct(task);
 
 	/*
@@ -661,14 +662,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 		 */
 		if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
 			wake_up_process(rt_mutex_top_waiter(lock)->task);
-		raw_spin_unlock(&lock->wait_lock);
+		raw_spin_unlock_irq(&lock->wait_lock);
 		return 0;
 	}
 
 	/* [10] Grab the next task, i.e. the owner of @lock */
 	task = rt_mutex_owner(lock);
 	get_task_struct(task);
-	raw_spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock(&task->pi_lock);
 
 	/* [11] requeue the pi waiters if necessary */
 	if (waiter == rt_mutex_top_waiter(lock)) {
@@ -722,8 +723,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 	top_waiter = rt_mutex_top_waiter(lock);
 
 	/* [13] Drop the locks */
-	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock(&task->pi_lock);
+	raw_spin_unlock_irq(&lock->wait_lock);
 
 	/*
 	 * Make the actual exit decisions [12], based on the stored
@@ -746,7 +747,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 	goto again;
 
  out_unlock_pi:
-	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock_irq(&task->pi_lock);
  out_put_task:
 	put_task_struct(task);
 
@@ -756,7 +757,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 /*
  * Try to take an rt-mutex
  *
- * Must be called with lock->wait_lock held.
+ * Must be called with lock->wait_lock held and interrupts disabled
  *
  * @lock:   The lock to be acquired.
  * @task:   The task which wants to acquire the lock
@@ -766,8 +767,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
 				struct rt_mutex_waiter *waiter)
 {
-	unsigned long flags;
-
 	/*
 	 * Before testing whether we can acquire @lock, we set the
 	 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
@@ -852,7 +851,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
 	 * case, but conditionals are more expensive than a redundant
 	 * store.
 	 */
-	raw_spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock(&task->pi_lock);
 	task->pi_blocked_on = NULL;
 	/*
 	 * Finish the lock acquisition. @task is the new owner. If
@@ -861,7 +860,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
 	 */
 	if (rt_mutex_has_waiters(lock))
 		rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
-	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock(&task->pi_lock);
 
 takeit:
 	/* We got the lock. */
@@ -883,7 +882,7 @@ takeit:
  *
  * Prepare waiter and propagate pi chain
  *
- * This must be called with lock->wait_lock held.
+ * This must be called with lock->wait_lock held and interrupts disabled
  */
 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 				   struct rt_mutex_waiter *waiter,
@@ -894,7 +893,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 	struct rt_mutex_waiter *top_waiter = waiter;
 	struct rt_mutex *next_lock;
 	int chain_walk = 0, res;
-	unsigned long flags;
 
 	/*
 	 * Early deadlock detection. We really don't want the task to
@@ -908,7 +906,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 	if (owner == task)
 		return -EDEADLK;
 
-	raw_spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock(&task->pi_lock);
 	__rt_mutex_adjust_prio(task);
 	waiter->task = task;
 	waiter->lock = lock;
@@ -921,12 +919,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 
 	task->pi_blocked_on = waiter;
 
-	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock(&task->pi_lock);
 
 	if (!owner)
 		return 0;
 
-	raw_spin_lock_irqsave(&owner->pi_lock, flags);
+	raw_spin_lock(&owner->pi_lock);
 	if (waiter == rt_mutex_top_waiter(lock)) {
 		rt_mutex_dequeue_pi(owner, top_waiter);
 		rt_mutex_enqueue_pi(owner, waiter);
@@ -941,7 +939,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 	/* Store the lock on which owner is blocked or NULL */
 	next_lock = task_blocked_on_lock(owner);
 
-	raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+	raw_spin_unlock(&owner->pi_lock);
 	/*
 	 * Even if full deadlock detection is on, if the owner is not
 	 * blocked itself, we can avoid finding this out in the chain
@@ -957,12 +955,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 	 */
 	get_task_struct(owner);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irq(&lock->wait_lock);
 
 	res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
 					 next_lock, waiter, task);
 
-	raw_spin_lock(&lock->wait_lock);
+	raw_spin_lock_irq(&lock->wait_lock);
 
 	return res;
 }
@@ -971,15 +969,14 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  * Remove the top waiter from the current tasks pi waiter tree and
  * queue it up.
  *
- * Called with lock->wait_lock held.
+ * Called with lock->wait_lock held and interrupts disabled.
  */
 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
 				    struct rt_mutex *lock)
 {
 	struct rt_mutex_waiter *waiter;
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&current->pi_lock, flags);
+	raw_spin_lock(&current->pi_lock);
 
 	waiter = rt_mutex_top_waiter(lock);
 
@@ -1001,7 +998,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
 	 */
 	lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
 
-	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+	raw_spin_unlock(&current->pi_lock);
 
 	wake_q_add(wake_q, waiter->task);
 }
@@ -1009,7 +1006,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
 /*
  * Remove a waiter from a lock and give up
  *
- * Must be called with lock->wait_lock held and
+ * Must be called with lock->wait_lock held and interrupts disabled. I must
  * have just failed to try_to_take_rt_mutex().
  */
 static void remove_waiter(struct rt_mutex *lock,
@@ -1018,12 +1015,11 @@ static void remove_waiter(struct rt_mutex *lock,
 	bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
 	struct task_struct *owner = rt_mutex_owner(lock);
 	struct rt_mutex *next_lock;
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&current->pi_lock, flags);
+	raw_spin_lock(&current->pi_lock);
 	rt_mutex_dequeue(lock, waiter);
 	current->pi_blocked_on = NULL;
-	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+	raw_spin_unlock(&current->pi_lock);
 
 	/*
 	 * Only update priority if the waiter was the highest priority
@@ -1032,7 +1028,7 @@ static void remove_waiter(struct rt_mutex *lock,
 	if (!owner || !is_top_waiter)
 		return;
 
-	raw_spin_lock_irqsave(&owner->pi_lock, flags);
+	raw_spin_lock(&owner->pi_lock);
 
 	rt_mutex_dequeue_pi(owner, waiter);
 
@@ -1044,7 +1040,7 @@ static void remove_waiter(struct rt_mutex *lock,
 	/* Store the lock on which owner is blocked or NULL */
 	next_lock = task_blocked_on_lock(owner);
 
-	raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+	raw_spin_unlock(&owner->pi_lock);
 
 	/*
 	 * Don't walk the chain, if the owner task is not blocked
@@ -1056,12 +1052,12 @@ static void remove_waiter(struct rt_mutex *lock,
 	/* gets dropped in rt_mutex_adjust_prio_chain()! */
 	get_task_struct(owner);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irq(&lock->wait_lock);
 
 	rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
 				   next_lock, NULL, current);
 
-	raw_spin_lock(&lock->wait_lock);
+	raw_spin_lock_irq(&lock->wait_lock);
 }
 
 /*
@@ -1097,11 +1093,11 @@ void rt_mutex_adjust_pi(struct task_struct *task)
  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
  * @lock:		 the rt_mutex to take
  * @state:		 the state the task should block in (TASK_INTERRUPTIBLE
- * 			 or TASK_UNINTERRUPTIBLE)
+ *			 or TASK_UNINTERRUPTIBLE)
  * @timeout:		 the pre-initialized and started timer, or NULL for none
  * @waiter:		 the pre-initialized rt_mutex_waiter
  *
- * lock->wait_lock must be held by the caller.
+ * Must be called with lock->wait_lock held and interrupts disabled
  */
 static int __sched
 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
@@ -1129,13 +1125,13 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
 				break;
 		}
 
-		raw_spin_unlock(&lock->wait_lock);
+		raw_spin_unlock_irq(&lock->wait_lock);
 
 		debug_rt_mutex_print_deadlock(waiter);
 
 		schedule();
 
-		raw_spin_lock(&lock->wait_lock);
+		raw_spin_lock_irq(&lock->wait_lock);
 		set_current_state(state);
 	}
 
@@ -1172,17 +1168,26 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 		  enum rtmutex_chainwalk chwalk)
 {
 	struct rt_mutex_waiter waiter;
+	unsigned long flags;
 	int ret = 0;
 
 	debug_rt_mutex_init_waiter(&waiter);
 	RB_CLEAR_NODE(&waiter.pi_tree_entry);
 	RB_CLEAR_NODE(&waiter.tree_entry);
 
-	raw_spin_lock(&lock->wait_lock);
+	/*
+	 * Technically we could use raw_spin_[un]lock_irq() here, but this can
+	 * be called in early boot if the cmpxchg() fast path is disabled
+	 * (debug, no architecture support). In this case we will acquire the
+	 * rtmutex with lock->wait_lock held. But we cannot unconditionally
+	 * enable interrupts in that early boot case. So we need to use the
+	 * irqsave/restore variants.
+	 */
+	raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
 	/* Try to acquire the lock again: */
 	if (try_to_take_rt_mutex(lock, current, NULL)) {
-		raw_spin_unlock(&lock->wait_lock);
+		raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 		return 0;
 	}
 
@@ -1211,7 +1216,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 	 */
 	fixup_rt_mutex_waiters(lock);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
 	/* Remove pending timer: */
 	if (unlikely(timeout))
@@ -1227,6 +1232,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
  */
 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 {
+	unsigned long flags;
 	int ret;
 
 	/*
@@ -1238,10 +1244,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 		return 0;
 
 	/*
-	 * The mutex has currently no owner. Lock the wait lock and
-	 * try to acquire the lock.
+	 * The mutex has currently no owner. Lock the wait lock and try to
+	 * acquire the lock. We use irqsave here to support early boot calls.
 	 */
-	raw_spin_lock(&lock->wait_lock);
+	raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
 	ret = try_to_take_rt_mutex(lock, current, NULL);
 
@@ -1251,7 +1257,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 	 */
 	fixup_rt_mutex_waiters(lock);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
 	return ret;
 }
@@ -1263,7 +1269,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
 					struct wake_q_head *wake_q)
 {
-	raw_spin_lock(&lock->wait_lock);
+	unsigned long flags;
+
+	/* irqsave required to support early boot calls */
+	raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
 	debug_rt_mutex_unlock(lock);
 
@@ -1302,10 +1311,10 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
 	 */
 	while (!rt_mutex_has_waiters(lock)) {
 		/* Drops lock->wait_lock ! */
-		if (unlock_rt_mutex_safe(lock) == true)
+		if (unlock_rt_mutex_safe(lock, flags) == true)
 			return false;
 		/* Relock the rtmutex and try again */
-		raw_spin_lock(&lock->wait_lock);
+		raw_spin_lock_irqsave(&lock->wait_lock, flags);
 	}
 
 	/*
@@ -1316,7 +1325,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
 	 */
 	mark_wakeup_next_waiter(wake_q, lock);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
 	/* check PI boosting */
 	return true;
@@ -1596,10 +1605,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 {
 	int ret;
 
-	raw_spin_lock(&lock->wait_lock);
+	raw_spin_lock_irq(&lock->wait_lock);
 
 	if (try_to_take_rt_mutex(lock, task, NULL)) {
-		raw_spin_unlock(&lock->wait_lock);
+		raw_spin_unlock_irq(&lock->wait_lock);
 		return 1;
 	}
 
@@ -1620,7 +1629,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 	if (unlikely(ret))
 		remove_waiter(lock, waiter);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irq(&lock->wait_lock);
 
 	debug_rt_mutex_print_deadlock(waiter);
 
@@ -1668,7 +1677,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 {
 	int ret;
 
-	raw_spin_lock(&lock->wait_lock);
+	raw_spin_lock_irq(&lock->wait_lock);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1684,7 +1693,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 	 */
 	fixup_rt_mutex_waiters(lock);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irq(&lock->wait_lock);
 
 	return ret;
 }

From 07905298e4d5777eb58516cdc242f7ac1ca387a2 Mon Sep 17 00:00:00 2001
From: Lucas Tanure <tanure@linux.com>
Date: Mon, 25 Jan 2016 19:30:23 -0200
Subject: [PATCH 148/262] ALSA: bebob: Use a signed return type for
 get_formation_index

The return type "unsigned int" was used by the get_formation_index function
despite of the aspect that it will eventually return a negative	error code.
So, change to signed int and get index by reference in the parameters.

Done with the help of Coccinelle.

[Fix the missing braces suggested by Julia Lawall -- tiwai]

Signed-off-by: Lucas Tanure <tanure@linux.com>
Reviewed-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Tested-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/bebob/bebob_stream.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c
index 926e5dcbb66a..5022c9b97ddf 100644
--- a/sound/firewire/bebob/bebob_stream.c
+++ b/sound/firewire/bebob/bebob_stream.c
@@ -47,14 +47,16 @@ static const unsigned int bridgeco_freq_table[] = {
 	[6] = 0x07,
 };
 
-static unsigned int
-get_formation_index(unsigned int rate)
+static int
+get_formation_index(unsigned int rate, unsigned int *index)
 {
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(snd_bebob_rate_table); i++) {
-		if (snd_bebob_rate_table[i] == rate)
-			return i;
+		if (snd_bebob_rate_table[i] == rate) {
+			*index = i;
+			return 0;
+		}
 	}
 	return -EINVAL;
 }
@@ -425,7 +427,9 @@ make_both_connections(struct snd_bebob *bebob, unsigned int rate)
 		goto end;
 
 	/* confirm params for both streams */
-	index = get_formation_index(rate);
+	err = get_formation_index(rate, &index);
+	if (err < 0)
+		goto end;
 	pcm_channels = bebob->tx_stream_formations[index].pcm;
 	midi_channels = bebob->tx_stream_formations[index].midi;
 	err = amdtp_am824_set_parameters(&bebob->tx_stream, rate,

From 728dabd6d1751cf5e0f8e0535891393da62396e9 Mon Sep 17 00:00:00 2001
From: William Cohen <wcohen@redhat.com>
Date: Thu, 21 Jan 2016 22:56:26 -0500
Subject: [PATCH 149/262] Eliminate the .eh_frame sections from the aarch64
 vmlinux and kernel modules

By default the aarch64 gcc generates .eh_frame sections.  Unlike
.debug_frame sections, the .eh_frame sections are loaded into memory
when the associated code is loaded.  On an example kernel being built
with this default the .eh_frame section in vmlinux used an extra 1.7MB
of memory.  The x86 disables the creation of the .eh_frame section.
The aarch64 should probably do the same to save some memory.

Signed-off-by: William Cohen <wcohen@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index cd822d8454c0..094a137f6083 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -27,6 +27,7 @@ $(warning LSE atomics not supported by binutils)
 endif
 
 KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr)
+KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
 KBUILD_AFLAGS	+= $(lseinstr)
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)

From 67dfa1751ce71e629aad7c438e1678ad41054677 Mon Sep 17 00:00:00 2001
From: dann frazier <dann.frazier@canonical.com>
Date: Mon, 25 Jan 2016 16:52:16 -0700
Subject: [PATCH 150/262] arm64: errata: Add -mpc-relative-literal-loads to
 build flags

GCC6 (and Linaro's 2015.12 snapshot of GCC5) has a new default that uses
adrp/ldr or adrp/add to address literal pools. When CONFIG_ARM64_ERRATUM_843419
is enabled, modules built with this toolchain fail to load:

  module libahci: unsupported RELA relocation: 275

This patch fixes the problem by passing '-mpc-relative-literal-loads'
to the compiler.

Cc: stable@vger.kernel.org
Fixes: df057cc7b4fa ("arm64: errata: add module build workaround for erratum #843419")
BugLink: http://bugs.launchpad.net/bugs/1533009
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Suggested-by: Christophe Lyon <christophe.lyon@linaro.org>
Signed-off-by: Dann Frazier <dann.frazier@canonical.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 094a137f6083..307237cfe728 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -28,6 +28,7 @@ endif
 
 KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr)
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS	+= $(call cc-option, -mpc-relative-literal-loads)
 KBUILD_AFLAGS	+= $(lseinstr)
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)

From 531d3067314f63d80936c565dcd0fa01c7a1760f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 25 Jan 2016 18:25:59 +0000
Subject: [PATCH 151/262] arm64: defconfig: updates for 4.5

Based on requests, update our defconfig so that:

  - We don't build any modules
  - PL031 is enabled (RTC emulated by qemu)
  - Xen guest support is enabled
  - The Uniphier built-in I2C controller is enabled
  - PCI host controller drivers for the various arm64 SoCs are enabled
  - Device passthrough works on Seattle using SMMU and VFIO
  - The Hisilicon IRQ controller (mbigen) is enabled

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/configs/defconfig | 42 +++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 18ca9fb9e65f..86581f793e39 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -16,7 +16,6 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
 CONFIG_CGROUP_HUGETLB=y
 # CONFIG_UTS_NS is not set
 # CONFIG_IPC_NS is not set
@@ -37,15 +36,13 @@ CONFIG_ARCH_EXYNOS7=y
 CONFIG_ARCH_LAYERSCAPE=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
 CONFIG_ARCH_SEATTLE=y
 CONFIG_ARCH_RENESAS=y
 CONFIG_ARCH_R8A7795=y
 CONFIG_ARCH_STRATIX10=y
 CONFIG_ARCH_TEGRA=y
-CONFIG_ARCH_TEGRA_132_SOC=y
-CONFIG_ARCH_TEGRA_210_SOC=y
-CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_SPRD=y
 CONFIG_ARCH_THUNDER=y
 CONFIG_ARCH_UNIPHIER=y
@@ -54,14 +51,19 @@ CONFIG_ARCH_XGENE=y
 CONFIG_ARCH_ZYNQMP=y
 CONFIG_PCI=y
 CONFIG_PCI_MSI=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_RCAR_GEN2_PCIE=y
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCI_XGENE=y
-CONFIG_SMP=y
+CONFIG_PCI_LAYERSCAPE=y
+CONFIG_PCI_HISI=y
+CONFIG_PCIE_QCOM=y
 CONFIG_SCHED_MC=y
 CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_XEN=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
@@ -100,7 +102,11 @@ CONFIG_PATA_OF_PLATFORM=y
 CONFIG_NETDEVICES=y
 CONFIG_TUN=y
 CONFIG_VIRTIO_NET=y
+CONFIG_AMD_XGBE=y
 CONFIG_NET_XGENE=y
+CONFIG_E1000E=y
+CONFIG_IGB=y
+CONFIG_IGBVF=y
 CONFIG_SKY2=y
 CONFIG_RAVB=y
 CONFIG_SMC91X=y
@@ -117,25 +123,23 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_8250_MT6577=y
 CONFIG_SERIAL_8250_UNIPHIER=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 CONFIG_SERIAL_SAMSUNG=y
-CONFIG_SERIAL_SAMSUNG_UARTS_4=y
-CONFIG_SERIAL_SAMSUNG_UARTS=4
 CONFIG_SERIAL_SAMSUNG_CONSOLE=y
+CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=11
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
-CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_MSM=y
 CONFIG_SERIAL_MSM_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_XILINX_PS_UART=y
 CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
 CONFIG_VIRTIO_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
 CONFIG_I2C_QUP=y
+CONFIG_I2C_UNIPHIER_F=y
 CONFIG_I2C_RCAR=y
 CONFIG_SPI=y
 CONFIG_SPI_PL022=y
@@ -176,8 +180,6 @@ CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SDHCI_TEGRA=y
 CONFIG_MMC_SPI=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
-CONFIG_MMC_DW_PLTFM=y
 CONFIG_MMC_DW_EXYNOS=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
@@ -187,28 +189,33 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_CPU=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_PL031=y
 CONFIG_RTC_DRV_XGENE=y
 CONFIG_DMADEVICES=y
-CONFIG_RCAR_DMAC=y
 CONFIG_QCOM_BAM_DMA=y
 CONFIG_TEGRA20_APB_DMA=y
+CONFIG_RCAR_DMAC=y
+CONFIG_VFIO=y
+CONFIG_VFIO_PCI=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_XEN_GNTDEV=y
+CONFIG_XEN_GRANT_DEV_ALLOC=y
 CONFIG_COMMON_CLK_CS2000_CP=y
 CONFIG_COMMON_CLK_QCOM=y
 CONFIG_MSM_GCC_8916=y
 CONFIG_HWSPINLOCK_QCOM=y
-# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ARM_SMMU=y
 CONFIG_QCOM_SMEM=y
 CONFIG_QCOM_SMD=y
 CONFIG_QCOM_SMD_RPM=y
+CONFIG_ARCH_TEGRA_132_SOC=y
+CONFIG_ARCH_TEGRA_210_SOC=y
+CONFIG_HISILICON_IRQ_MBIGEN=y
 CONFIG_PHY_XGENE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_EXT4_FS=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -239,6 +246,7 @@ CONFIG_LOCKUP_DETECTOR=y
 # CONFIG_FTRACE is not set
 CONFIG_MEMTEST=y
 CONFIG_SECURITY=y
+CONFIG_CRYPTO_ECHAINIV=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y

From 2d0f76a6ca1f2cdcffca7ce130f67ec61caa0999 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Wed, 20 Jan 2016 19:22:16 +0100
Subject: [PATCH 152/262] s390/numa: move numa_init_late() from device to
 arch_initcall

Commit 3e89e1c5ea ("hugetlb: make mm and fs code explicitly non-modular")
moves hugetlb_init() from module_init to subsys_initcall.

The hugetlb_init()->hugetlb_register_node() code accesses "node->dev.kobj"
which is initialized in numa_init_late().

Since numa_init_late() is a device_initcall which is called *after*
subsys_initcall the above mentioned patch breaks NUMA on s390.

So fix this and move numa_init_late() to arch_initcall.

Fixes: 3e89e1c5ea ("hugetlb: make mm and fs code explicitly non-modular")
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/numa/numa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 969b4ed4ed5e..2794845061c6 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -160,7 +160,7 @@ static int __init numa_init_late(void)
 		register_one_node(nid);
 	return 0;
 }
-device_initcall(numa_init_late);
+arch_initcall(numa_init_late);
 
 static int __init parse_debug(char *parm)
 {

From c2e1fcf3ec39de092e3e84b489b5c0cc39f6dd05 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 22 Jan 2016 13:57:34 +0100
Subject: [PATCH 153/262] s390/pci: adjust IOMAP_MAX_ENTRIES

ZPCI_IOMAP_MAX_ENTRIES is off by one. Let's adjust this
for the sake of correctness.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pci_io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 1a9a98de5bde..67f7ddd4617d 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -8,7 +8,7 @@
 #include <asm/pci_insn.h>
 
 /* I/O Map */
-#define ZPCI_IOMAP_MAX_ENTRIES		0x7fff
+#define ZPCI_IOMAP_MAX_ENTRIES		0x8000
 #define ZPCI_IOMAP_ADDR_BASE		0x8000000000000000ULL
 #define ZPCI_IOMAP_ADDR_IDX_MASK	0x7fff000000000000ULL
 #define ZPCI_IOMAP_ADDR_OFF_MASK	0x0000ffffffffffffULL

From 9e00caaea14b90a788c17a2e0c32108a3d7008ec Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 22 Jan 2016 13:58:42 +0100
Subject: [PATCH 154/262] s390/pci: provide ZPCI_ADDR macro

Provide and use a ZPCI_ADDR macro as the complement of ZPCI_IDX
to get rid of some constants in the code.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pci_io.h |  1 +
 arch/s390/pci/pci.c            | 10 +++-------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 67f7ddd4617d..27dbc1129dfc 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -21,6 +21,7 @@ struct zpci_iomap_entry {
 
 extern struct zpci_iomap_entry *zpci_iomap_start;
 
+#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48))
 #define ZPCI_IDX(addr)								\
 	(((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48)
 #define ZPCI_OFFSET(addr)							\
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 11d4f277e9f6..3942348317da 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -265,7 +265,6 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
 			      unsigned long max)
 {
 	struct zpci_dev *zdev =	to_zpci(pdev);
-	u64 addr;
 	int idx;
 
 	if ((bar & 7) != bar)
@@ -284,8 +283,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
 	BUG_ON(!zpci_iomap_start[idx].count);
 	spin_unlock(&zpci_iomap_lock);
 
-	addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
-	return (void __iomem *) addr + offset;
+	return (void __iomem *) ZPCI_ADDR(idx) + offset;
 }
 EXPORT_SYMBOL(pci_iomap_range);
 
@@ -297,9 +295,8 @@ EXPORT_SYMBOL(pci_iomap);
 
 void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
 {
-	unsigned int idx;
+	unsigned int idx = ZPCI_IDX(addr);
 
-	idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48;
 	spin_lock(&zpci_iomap_lock);
 	/* Detect underrun */
 	BUG_ON(!zpci_iomap_start[idx].count);
@@ -611,8 +608,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
 		if (zdev->bars[i].val & 4)
 			flags |= IORESOURCE_MEM_64;
 
-		addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48);
-
+		addr = ZPCI_ADDR(entry);
 		size = 1UL << zdev->bars[i].size;
 
 		res = __alloc_res(zdev, addr, size, flags);

From bf19c94d5caa100f22c20d756c57e3550c01cdb8 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 22 Jan 2016 13:59:35 +0100
Subject: [PATCH 155/262] s390/pci: improve ZPCI_* macros

Most of the constants defined in pci_io.h depend on each other
and thus can be calculated.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pci_io.h | 15 +++++++++------
 arch/s390/pci/pci.c            |  2 +-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 27dbc1129dfc..69aa18be61af 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -8,10 +8,13 @@
 #include <asm/pci_insn.h>
 
 /* I/O Map */
-#define ZPCI_IOMAP_MAX_ENTRIES		0x8000
-#define ZPCI_IOMAP_ADDR_BASE		0x8000000000000000ULL
-#define ZPCI_IOMAP_ADDR_IDX_MASK	0x7fff000000000000ULL
-#define ZPCI_IOMAP_ADDR_OFF_MASK	0x0000ffffffffffffULL
+#define ZPCI_IOMAP_SHIFT		48
+#define ZPCI_IOMAP_ADDR_BASE		0x8000000000000000UL
+#define ZPCI_IOMAP_ADDR_OFF_MASK	((1UL << ZPCI_IOMAP_SHIFT) - 1)
+#define ZPCI_IOMAP_MAX_ENTRIES							\
+	((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
+#define ZPCI_IOMAP_ADDR_IDX_MASK						\
+	(~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
 
 struct zpci_iomap_entry {
 	u32 fh;
@@ -21,9 +24,9 @@ struct zpci_iomap_entry {
 
 extern struct zpci_iomap_entry *zpci_iomap_start;
 
-#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48))
+#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << ZPCI_IOMAP_SHIFT))
 #define ZPCI_IDX(addr)								\
-	(((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48)
+	(((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> ZPCI_IOMAP_SHIFT)
 #define ZPCI_OFFSET(addr)							\
 	((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK)
 
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 3942348317da..3627ae075f40 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -541,7 +541,7 @@ static void zpci_irq_exit(void)
 
 static int zpci_alloc_iomap(struct zpci_dev *zdev)
 {
-	int entry;
+	unsigned long entry;
 
 	spin_lock(&zpci_iomap_lock);
 	entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);

From c506fff3d3a8a632e8eb2270680548ec415f0357 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 22 Jan 2016 14:01:44 +0100
Subject: [PATCH 156/262] s390/pci: resize iomap

On s390 we need to maintain a mapping between iomem addresses
and arch specific function identifiers. Currently the mapping
table is created as such that we could span the whole iomem
address space. Since we can only map each bar space from each
possible function we have an upper bound for the number of
mapping entries.

This reduces the size of the iomap from 256K to less than 4K
(using the defconfig).

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 3627ae075f40..53139158010d 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -68,9 +68,12 @@ static struct airq_struct zpci_airq = {
 	.isc = PCI_ISC,
 };
 
-/* I/O Map */
+#define ZPCI_IOMAP_ENTRIES						\
+	min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT),	\
+	    ZPCI_IOMAP_MAX_ENTRIES)
+
 static DEFINE_SPINLOCK(zpci_iomap_lock);
-static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
+static unsigned long *zpci_iomap_bitmap;
 struct zpci_iomap_entry *zpci_iomap_start;
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
@@ -544,12 +547,12 @@ static int zpci_alloc_iomap(struct zpci_dev *zdev)
 	unsigned long entry;
 
 	spin_lock(&zpci_iomap_lock);
-	entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
-	if (entry == ZPCI_IOMAP_MAX_ENTRIES) {
+	entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES);
+	if (entry == ZPCI_IOMAP_ENTRIES) {
 		spin_unlock(&zpci_iomap_lock);
 		return -ENOSPC;
 	}
-	set_bit(entry, zpci_iomap);
+	set_bit(entry, zpci_iomap_bitmap);
 	spin_unlock(&zpci_iomap_lock);
 	return entry;
 }
@@ -558,7 +561,7 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
 {
 	spin_lock(&zpci_iomap_lock);
 	memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
-	clear_bit(entry, zpci_iomap);
+	clear_bit(entry, zpci_iomap_bitmap);
 	spin_unlock(&zpci_iomap_lock);
 }
 
@@ -869,23 +872,30 @@ static int zpci_mem_init(void)
 	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
 				16, 0, NULL);
 	if (!zdev_fmb_cache)
-		goto error_zdev;
+		goto error_fmb;
 
-	/* TODO: use realloc */
-	zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
-				   GFP_KERNEL);
+	zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES,
+				   sizeof(*zpci_iomap_start), GFP_KERNEL);
 	if (!zpci_iomap_start)
 		goto error_iomap;
-	return 0;
 
+	zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES),
+				    sizeof(*zpci_iomap_bitmap), GFP_KERNEL);
+	if (!zpci_iomap_bitmap)
+		goto error_iomap_bitmap;
+
+	return 0;
+error_iomap_bitmap:
+	kfree(zpci_iomap_start);
 error_iomap:
 	kmem_cache_destroy(zdev_fmb_cache);
-error_zdev:
+error_fmb:
 	return -ENOMEM;
 }
 
 static void zpci_mem_exit(void)
 {
+	kfree(zpci_iomap_bitmap);
 	kfree(zpci_iomap_start);
 	kmem_cache_destroy(zdev_fmb_cache);
 }

From c0cabaddeeeab13db8ef4e275ab5c7c0e8746324 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 22 Jan 2016 14:03:06 +0100
Subject: [PATCH 157/262] s390/pci: fix bar check

Fix the check which bar space we should map to allow available bars only.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 53139158010d..d28d194d580c 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -270,7 +270,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
 	struct zpci_dev *zdev =	to_zpci(pdev);
 	int idx;
 
-	if ((bar & 7) != bar)
+	if (!pci_resource_len(pdev, bar))
 		return NULL;
 
 	idx = zdev->bars[bar].map_idx;

From 8ead7efb6379354d0d31efb39342f4399c87cb67 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 22 Jan 2016 14:04:18 +0100
Subject: [PATCH 158/262] s390/pci: set error state for unusable functions

We receive special notifications from firmware when an error was detected
and a pci function became unusable. Set the error_state accordingly to give
device drivers a hint that they don't need to try error recovery.

Suggested-by: Alexander Schmidt <alexschm@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_event.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 369a3e05d468..b0e04751c5d5 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -53,6 +53,11 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 
 	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
 	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+
+	if (!pdev)
+		return;
+
+	pdev->error_state = pci_channel_io_perm_failure;
 }
 
 void zpci_event_error(void *data)

From f5e44f82c1848d8e55fb0061308f14c0884e5483 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 22 Jan 2016 14:11:21 +0100
Subject: [PATCH 159/262] s390/pci: remove iomap sanity checks

Since each iomap_entry handles only one bar of one pci function
(even when disjunct ranges of a bar are mapped) the sanity check
in pci_iomap_range is not needed and can be removed.

Also convert the remaining BUG_ONs to WARN_ONs.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index d28d194d580c..8f19c8f9d660 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -275,15 +275,10 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
 
 	idx = zdev->bars[bar].map_idx;
 	spin_lock(&zpci_iomap_lock);
-	if (zpci_iomap_start[idx].count++) {
-		BUG_ON(zpci_iomap_start[idx].fh != zdev->fh ||
-		       zpci_iomap_start[idx].bar != bar);
-	} else {
-		zpci_iomap_start[idx].fh = zdev->fh;
-		zpci_iomap_start[idx].bar = bar;
-	}
 	/* Detect overrun */
-	BUG_ON(!zpci_iomap_start[idx].count);
+	WARN_ON(!++zpci_iomap_start[idx].count);
+	zpci_iomap_start[idx].fh = zdev->fh;
+	zpci_iomap_start[idx].bar = bar;
 	spin_unlock(&zpci_iomap_lock);
 
 	return (void __iomem *) ZPCI_ADDR(idx) + offset;
@@ -302,7 +297,7 @@ void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
 
 	spin_lock(&zpci_iomap_lock);
 	/* Detect underrun */
-	BUG_ON(!zpci_iomap_start[idx].count);
+	WARN_ON(!zpci_iomap_start[idx].count);
 	if (!--zpci_iomap_start[idx].count) {
 		zpci_iomap_start[idx].fh = 0;
 		zpci_iomap_start[idx].bar = 0;

From d8f51227f33fbb34e1e54e315175268f54e573e7 Mon Sep 17 00:00:00 2001
From: Ingo Tuchscherer <ingo.tuchscherer@linux.vnet.ibm.com>
Date: Fri, 22 Jan 2016 16:18:29 +0100
Subject: [PATCH 160/262] s390/zcrypt: Fix cryptographic device id in kernel
 messages

Currently, on card response failures a combination of card domain and
domain id is recorded in the kernel messages.

According to the message description only the card id will be recorded.
The domain id is not relevant, since the whole card including all domains
is set offline.

Signed-off-by: Ingo Tuchscherer <ingo.tuchscherer@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/zcrypt_error.h     | 15 +++++++++------
 drivers/s390/crypto/zcrypt_msgtype50.c |  9 +++++----
 drivers/s390/crypto/zcrypt_msgtype6.c  | 20 ++++++++++----------
 3 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h
index 7b23f43c7b08..de1b6c1d172c 100644
--- a/drivers/s390/crypto/zcrypt_error.h
+++ b/drivers/s390/crypto/zcrypt_error.h
@@ -112,9 +112,10 @@ static inline int convert_error(struct zcrypt_device *zdev,
 		atomic_set(&zcrypt_rescan_req, 1);
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+			AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+			ehdr->reply_code);
 		return -EAGAIN;
 	case REP82_ERROR_TRANSPORT_FAIL:
 	case REP82_ERROR_MACHINE_FAILURE:
@@ -123,16 +124,18 @@ static inline int convert_error(struct zcrypt_device *zdev,
 		atomic_set(&zcrypt_rescan_req, 1);
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+			AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+			ehdr->reply_code);
 		return -EAGAIN;
 	default:
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+			AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+			ehdr->reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index 74edf2934e7c..eedfaa2cf715 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -336,9 +336,10 @@ static int convert_type80(struct zcrypt_device *zdev,
 		/* The result is too short, the CEX2A card may not do that.. */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid, zdev->online, t80h->code);
+			       AP_QID_DEVICE(zdev->ap_dev->qid),
+			       zdev->online, t80h->code);
 
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
@@ -368,9 +369,9 @@ static int convert_response(struct zcrypt_device *zdev,
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 9a2dd472c1cc..21959719daef 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -572,9 +572,9 @@ static int convert_type86_ica(struct zcrypt_device *zdev,
 			return -EINVAL;
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid, zdev->online,
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
 			       msg->hdr.reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
@@ -715,9 +715,9 @@ static int convert_response_ica(struct zcrypt_device *zdev,
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
@@ -747,9 +747,9 @@ static int convert_response_xcrb(struct zcrypt_device *zdev,
 		xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
@@ -773,9 +773,9 @@ static int convert_response_ep11_xcrb(struct zcrypt_device *zdev,
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN; /* repeat the request on a different device. */
 	}
 }
@@ -800,9 +800,9 @@ static int convert_response_rng(struct zcrypt_device *zdev,
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }

From 0d9bfe9123cfde59bf5c2e375b59d2a7d5061c4c Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Mon, 25 Jan 2016 10:30:27 +0100
Subject: [PATCH 161/262] s390/cio: fix measurement characteristics memleak

Measurement characteristics are allocated during channel path
registration but not freed during deregistration. Fix this by
embedding these characteristics inside struct channel_path.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/chp.c  |  6 +++---
 drivers/s390/cio/chp.h  |  2 +-
 drivers/s390/cio/chsc.c | 16 ++--------------
 3 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index c692dfebd0ba..3d2b6c48c18e 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -139,11 +139,11 @@ static ssize_t chp_measurement_chars_read(struct file *filp,
 
 	device = container_of(kobj, struct device, kobj);
 	chp = to_channelpath(device);
-	if (!chp->cmg_chars)
+	if (chp->cmg == -1)
 		return 0;
 
-	return memory_read_from_buffer(buf, count, &off,
-				chp->cmg_chars, sizeof(struct cmg_chars));
+	return memory_read_from_buffer(buf, count, &off, &chp->cmg_chars,
+				       sizeof(chp->cmg_chars));
 }
 
 static struct bin_attribute chp_measurement_chars_attr = {
diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h
index 4efd5b867cc3..af0232290dc4 100644
--- a/drivers/s390/cio/chp.h
+++ b/drivers/s390/cio/chp.h
@@ -48,7 +48,7 @@ struct channel_path {
 	/* Channel-measurement related stuff: */
 	int cmg;
 	int shared;
-	void *cmg_chars;
+	struct cmg_chars cmg_chars;
 };
 
 /* Return channel_path struct for given chpid. */
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index a831d18596a5..5df0efee54db 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -967,22 +967,19 @@ static void
 chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv,
 			  struct cmg_chars *chars)
 {
-	struct cmg_chars *cmg_chars;
 	int i, mask;
 
-	cmg_chars = chp->cmg_chars;
 	for (i = 0; i < NR_MEASUREMENT_CHARS; i++) {
 		mask = 0x80 >> (i + 3);
 		if (cmcv & mask)
-			cmg_chars->values[i] = chars->values[i];
+			chp->cmg_chars.values[i] = chars->values[i];
 		else
-			cmg_chars->values[i] = 0;
+			chp->cmg_chars.values[i] = 0;
 	}
 }
 
 int chsc_get_channel_measurement_chars(struct channel_path *chp)
 {
-	struct cmg_chars *cmg_chars;
 	int ccode, ret;
 
 	struct {
@@ -1006,11 +1003,6 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
 		u32 data[NR_MEASUREMENT_CHARS];
 	} __attribute__ ((packed)) *scmc_area;
 
-	chp->cmg_chars = NULL;
-	cmg_chars = kmalloc(sizeof(*cmg_chars), GFP_KERNEL);
-	if (!cmg_chars)
-		return -ENOMEM;
-
 	spin_lock_irq(&chsc_page_lock);
 	memset(chsc_page, 0, PAGE_SIZE);
 	scmc_area = chsc_page;
@@ -1042,14 +1034,10 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
 		/* No cmg-dependent data. */
 		goto out;
 	}
-	chp->cmg_chars = cmg_chars;
 	chsc_initialize_cmg_chars(chp, scmc_area->cmcv,
 				  (struct cmg_chars *) &scmc_area->data);
 out:
 	spin_unlock_irq(&chsc_page_lock);
-	if (!chp->cmg_chars)
-		kfree(cmg_chars);
-
 	return ret;
 }
 

From 61f0bfcf8020f02eb09adaef96745d1c1d1b3623 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Mon, 25 Jan 2016 10:31:33 +0100
Subject: [PATCH 162/262] s390/cio: ensure consistent measurement state

Make sure that in all cases where we could not obtain measurement
characteristics the associated fields are set to invalid values.

Note: without this change the "shared" capability of a channel path
for which we could not obtain the measurement characteristics was
incorrectly displayed as 0 (not shared). We will now correctly
report "unknown" in this case.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/chp.c  | 13 +++++--------
 drivers/s390/cio/chsc.c | 12 ++++++++----
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 3d2b6c48c18e..8504629cbf72 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -466,14 +466,11 @@ int chp_new(struct chp_id chpid)
 		ret = -ENODEV;
 		goto out_free;
 	}
-	/* Get channel-measurement characteristics. */
-	if (css_chsc_characteristics.scmc && css_chsc_characteristics.secm) {
-		ret = chsc_get_channel_measurement_chars(chp);
-		if (ret)
-			goto out_free;
-	} else {
-		chp->cmg = -1;
-	}
+
+	ret = chsc_get_channel_measurement_chars(chp);
+	if (ret)
+		goto out_free;
+
 	dev_set_name(&chp->dev, "chp%x.%02x", chpid.cssid, chpid.id);
 
 	/* make it known to the system */
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 5df0efee54db..13747c510c0b 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -1003,6 +1003,12 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
 		u32 data[NR_MEASUREMENT_CHARS];
 	} __attribute__ ((packed)) *scmc_area;
 
+	chp->shared = -1;
+	chp->cmg = -1;
+
+	if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
+		return 0;
+
 	spin_lock_irq(&chsc_page_lock);
 	memset(chsc_page, 0, PAGE_SIZE);
 	scmc_area = chsc_page;
@@ -1023,11 +1029,9 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
 			      scmc_area->response.code);
 		goto out;
 	}
-	if (scmc_area->not_valid) {
-		chp->cmg = -1;
-		chp->shared = -1;
+	if (scmc_area->not_valid)
 		goto out;
-	}
+
 	chp->cmg = scmc_area->cmg;
 	chp->shared = scmc_area->shared;
 	if (chp->cmg != 2 && chp->cmg != 3) {

From 9f3d6d7a40a178b8a5b5274f4e55fec8c30147c9 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Mon, 25 Jan 2016 10:32:51 +0100
Subject: [PATCH 163/262] s390/cio: update measurement characteristics

Per channel path measurement characteristics are obtained during channel
path registration. However if some properties of a channel path change
we don't update the measurement characteristics.

Make sure to update the characteristics when we change the properties of
a channel path or receive a notification from FW about such a change.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/chp.c  | 12 +++++-------
 drivers/s390/cio/chsc.c | 17 +++++++++++++++--
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 8504629cbf72..50597f9522fe 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -416,7 +416,8 @@ static void chp_release(struct device *dev)
  * chp_update_desc - update channel-path description
  * @chp - channel-path
  *
- * Update the channel-path description of the specified channel-path.
+ * Update the channel-path description of the specified channel-path
+ * including channel measurement related information.
  * Return zero on success, non-zero otherwise.
  */
 int chp_update_desc(struct channel_path *chp)
@@ -428,8 +429,10 @@ int chp_update_desc(struct channel_path *chp)
 		return rc;
 
 	rc = chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
+	if (rc)
+		return rc;
 
-	return rc;
+	return chsc_get_channel_measurement_chars(chp);
 }
 
 /**
@@ -466,11 +469,6 @@ int chp_new(struct chp_id chpid)
 		ret = -ENODEV;
 		goto out_free;
 	}
-
-	ret = chsc_get_channel_measurement_chars(chp);
-	if (ret)
-		goto out_free;
-
 	dev_set_name(&chp->dev, "chp%x.%02x", chpid.cssid, chpid.id);
 
 	/* make it known to the system */
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 13747c510c0b..c424c0c7367e 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/device.h>
+#include <linux/mutex.h>
 #include <linux/pci.h>
 
 #include <asm/cio.h>
@@ -224,8 +225,9 @@ out_unreg:
 
 void chsc_chp_offline(struct chp_id chpid)
 {
-	char dbf_txt[15];
+	struct channel_path *chp = chpid_to_chp(chpid);
 	struct chp_link link;
+	char dbf_txt[15];
 
 	sprintf(dbf_txt, "chpr%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
@@ -236,6 +238,11 @@ void chsc_chp_offline(struct chp_id chpid)
 	link.chpid = chpid;
 	/* Wait until previous actions have settled. */
 	css_wait_for_slow_path();
+
+	mutex_lock(&chp->lock);
+	chp_update_desc(chp);
+	mutex_unlock(&chp->lock);
+
 	for_each_subchannel_staged(s390_subchannel_remove_chpid, NULL, &link);
 }
 
@@ -690,8 +697,9 @@ static void chsc_process_crw(struct crw *crw0, struct crw *crw1, int overflow)
 
 void chsc_chp_online(struct chp_id chpid)
 {
-	char dbf_txt[15];
+	struct channel_path *chp = chpid_to_chp(chpid);
 	struct chp_link link;
+	char dbf_txt[15];
 
 	sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
@@ -701,6 +709,11 @@ void chsc_chp_online(struct chp_id chpid)
 		link.chpid = chpid;
 		/* Wait until previous actions have settled. */
 		css_wait_for_slow_path();
+
+		mutex_lock(&chp->lock);
+		chp_update_desc(chp);
+		mutex_unlock(&chp->lock);
+
 		for_each_subchannel_staged(__s390_process_res_acc, NULL,
 					   &link);
 		css_schedule_reprobe();

From cf89813a5b514bff9b3b5e7eaf2090f22fba62e0 Mon Sep 17 00:00:00 2001
From: Markus Trippelsdorf <markus@trippelsdorf.de>
Date: Mon, 14 Dec 2015 16:43:35 +0100
Subject: [PATCH 164/262] perf tests: Remove wrong semicolon in while loop in
 CQM test

The while loop was spinning. Fix by removing a semicolon.

The issue was pointed out by gcc-6's -Wmisleading-indentation.

Signed-off-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 035827e9f2bd ("perf tests: Add Intel CQM test")
Link: http://lkml.kernel.org/r/20151214154335.GA1409@x4
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/tests/intel-cqm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
index 3e89ba825f6b..7f064eb37158 100644
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -17,7 +17,7 @@ static pid_t spawn(void)
 	if (pid)
 		return pid;
 
-	while(1);
+	while(1)
 		sleep(5);
 	return 0;
 }

From d4913cbd05bab685e49c8174896e563b2487d054 Mon Sep 17 00:00:00 2001
From: Markus Trippelsdorf <markus@trippelsdorf.de>
Date: Mon, 14 Dec 2015 16:44:03 +0100
Subject: [PATCH 165/262] perf annotate browser: Fix behaviour of Shift-Tab
 with nothing focussed

The issue was pointed out by gcc-6's -Wmisleading-indentation.

Signed-off-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: c97cf42219b7 ("perf top: Live TUI Annotation")
Link: http://lkml.kernel.org/r/20151214154403.GB1409@x4
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index d4d7cc27252f..718bd46d47fa 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -755,11 +755,11 @@ static int annotate_browser__run(struct annotate_browser *browser,
 				nd = browser->curr_hot;
 			break;
 		case K_UNTAB:
-			if (nd != NULL)
+			if (nd != NULL) {
 				nd = rb_next(nd);
 				if (nd == NULL)
 					nd = rb_first(&browser->entries);
-			else
+			} else
 				nd = browser->curr_hot;
 			break;
 		case K_F1:

From 0805909f59e02036a4e2660159f27dbf8b6084ac Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 20 Jan 2016 12:56:33 +0100
Subject: [PATCH 166/262] perf hists: Fix HISTC_MEM_DCACHELINE width setting

Set correct width for unresolved mem_dcacheline addr.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Fixes: 9b32ba71ba90 ("perf tools: Add dcacheline sort")
Link: http://lkml.kernel.org/r/1453290995-18485-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hist.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index c226303e3da0..68a7612019dc 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -131,6 +131,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 			symlen = unresolved_col_width + 4 + 2;
 			hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
 					   symlen);
+			hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+					   symlen);
 		}
 
 		if (h->mem_info->iaddr.sym) {

From 3f416f22d1e21709a631189ba169f76fd267b374 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 20 Jan 2016 12:56:34 +0100
Subject: [PATCH 167/262] perf stat: Do not clean event's private stats

Mel reported stddev reporting was broken due to following commit:

	106a94a0f8c2 ("perf stat: Introduce read_counters function")

This commit merged interval and overall counters reading into single
read_counters function.

The old interval code cleaned the stddev data for some reason (it's
never displayed in interval mode) and the mentioned commit kept on
cleaning the stddev data in merged function, which resulted in the
stddev not being displayed.

Removing the wrong stddev data cleanup init_stats call.

Reported-and-Tested-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stable@vger.kernel.org # v4.2+
Fixes: 106a94a0f8c2 ("perf stat: Introduce read_counters function")
Link: http://lkml.kernel.org/r/1453290995-18485-4-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 2f901d15e063..2b58edccd56f 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -310,7 +310,6 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	int i, ret;
 
 	aggr->val = aggr->ena = aggr->run = 0;
-	init_stats(ps->res_stats);
 
 	if (counter->per_pkg)
 		zero_per_pkg(counter);

From 9c7ebb613bffea2feef4ec562ba1dbcaa810942b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Fri, 22 Jan 2016 14:55:56 +0100
Subject: [PATCH 168/262] KVM: s390: fix guest fprs memory leak

fprs is never freed, therefore resulting in a memory leak if
kvm_vcpu_init() fails or the vcpu is destroyed.

Fixes: 9977e886cbbc ("s390/kernel: lazy restore fpu registers")
Cc: stable@vger.kernel.org # v4.3+
Reported-by: Eric Farman <farman@linux.vnet.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Eric Farman <farman@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 835d60bedb54..797dbc4e11c3 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1244,6 +1244,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 	if (vcpu->kvm->arch.use_cmma)
 		kvm_s390_vcpu_unsetup_cmma(vcpu);
+	kfree(vcpu->arch.guest_fpregs.fprs);
 	free_page((unsigned long)(vcpu->arch.sie_block));
 
 	kvm_vcpu_uninit(vcpu);
@@ -1661,12 +1662,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
-		goto out_free_sie_block;
+		goto out_free_fprs;
 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
 		 vcpu->arch.sie_block);
 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
 
 	return vcpu;
+out_free_fprs:
+	kfree(vcpu->arch.guest_fpregs.fprs);
 out_free_sie_block:
 	free_page((unsigned long)(vcpu->arch.sie_block));
 out_free_cpu:

From 14b0b4ac37cbb3e57f564da30ece5626d1e3767a Mon Sep 17 00:00:00 2001
From: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Date: Wed, 20 Jan 2016 06:33:42 +0100
Subject: [PATCH 169/262] KVM: s390: Enable the KVM-VFIO device

The KVM-VFIO device is used by the QEMU VFIO device. It is used to
record the list of in-use VFIO groups so that KVM can manipulate
them.
While we don't need this on s390 currently, let's try to be like
everyone else.

Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/Kconfig  | 1 +
 arch/s390/kvm/Makefile | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 5fce52cf0e57..5ea5af3c7db7 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -29,6 +29,7 @@ config KVM
 	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select SRCU
+	select KVM_VFIO
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index b3b553469650..d42fa38c2429 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
 # as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 

From 9abc2a08a7d665b02bdde974fd6c44aae86e923e Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 14 Jan 2016 22:12:47 +0100
Subject: [PATCH 170/262] KVM: s390: fix memory overwrites when vx is disabled

The kernel now always uses vector registers when available, however KVM
has special logic if support is really enabled for a guest. If support
is disabled, guest_fpregs.fregs will only contain memory for the fpu.
The kernel, however, will store vector registers into that area,
resulting in crazy memory overwrites.

Simply extending that area is not enough, because the format of the
registers also changes. We would have to do additional conversions, making
the code even more complex. Therefore let's directly use one place for
the vector/fpu registers + fpc (in kvm_run). We just have to convert the
data properly when accessing it. This makes current code much easier.

Please note that vector/fpu registers are now always stored to
vcpu->run->s.regs.vrs. Although this data is visible to QEMU and
used for migration, we only guarantee valid values to user space  when
KVM_SYNC_VRS is set. As that is only the case when we have vector
register support, we are on the safe side.

Fixes: b5510d9b68c3 ("s390/fpu: always enable the vector facility if it is available")
Cc: stable@vger.kernel.org # v4.4 d9a3a09af54d s390/kvm: remove dependency on struct save_area definition
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
[adopt to d9a3a09af54d]
---
 arch/s390/include/asm/kvm_host.h |   1 -
 arch/s390/kvm/kvm-s390.c         | 126 +++++++++++--------------------
 2 files changed, 43 insertions(+), 84 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 6742414dbd6f..8959ebb6d2c9 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -546,7 +546,6 @@ struct kvm_vcpu_arch {
 	struct kvm_s390_sie_block *sie_block;
 	unsigned int      host_acrs[NUM_ACRS];
 	struct fpu	  host_fpregs;
-	struct fpu	  guest_fpregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 797dbc4e11c3..4af21c771f9b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1244,7 +1244,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 	if (vcpu->kvm->arch.use_cmma)
 		kvm_s390_vcpu_unsetup_cmma(vcpu);
-	kfree(vcpu->arch.guest_fpregs.fprs);
 	free_page((unsigned long)(vcpu->arch.sie_block));
 
 	kvm_vcpu_uninit(vcpu);
@@ -1424,44 +1423,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-/*
- * Backs up the current FP/VX register save area on a particular
- * destination.  Used to switch between different register save
- * areas.
- */
-static inline void save_fpu_to(struct fpu *dst)
-{
-	dst->fpc = current->thread.fpu.fpc;
-	dst->regs = current->thread.fpu.regs;
-}
-
-/*
- * Switches the FP/VX register save area from which to lazy
- * restore register contents.
- */
-static inline void load_fpu_from(struct fpu *from)
-{
-	current->thread.fpu.fpc = from->fpc;
-	current->thread.fpu.regs = from->regs;
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	/* Save host register state */
 	save_fpu_regs();
-	save_fpu_to(&vcpu->arch.host_fpregs);
-
-	if (test_kvm_facility(vcpu->kvm, 129)) {
-		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-		/*
-		 * Use the register save area in the SIE-control block
-		 * for register restore and save in kvm_arch_vcpu_put()
-		 */
-		current->thread.fpu.vxrs =
-			(__vector128 *)&vcpu->run->s.regs.vrs;
-	} else
-		load_fpu_from(&vcpu->arch.guest_fpregs);
+	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
 
+	/* Depending on MACHINE_HAS_VX, data stored to vrs either
+	 * has vector register or floating point register format.
+	 */
+	current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
 	if (test_fp_ctl(current->thread.fpu.fpc))
 		/* User space provided an invalid FPC, let's clear it */
 		current->thread.fpu.fpc = 0;
@@ -1477,19 +1450,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
 
+	/* Save guest register state */
 	save_fpu_regs();
+	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 
-	if (test_kvm_facility(vcpu->kvm, 129))
-		/*
-		 * kvm_arch_vcpu_load() set up the register save area to
-		 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
-		 * are already saved.  Only the floating-point control must be
-		 * copied.
-		 */
-		vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
-	else
-		save_fpu_to(&vcpu->arch.guest_fpregs);
-	load_fpu_from(&vcpu->arch.host_fpregs);
+	/* Restore host register state */
+	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
 
 	save_access_regs(vcpu->run->s.regs.acrs);
 	restore_access_regs(vcpu->arch.host_acrs);
@@ -1507,8 +1474,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
-	vcpu->arch.guest_fpregs.fpc = 0;
-	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+	/* make sure the new fpc will be lazily loaded */
+	save_fpu_regs();
+	current->thread.fpu.fpc = 0;
 	vcpu->arch.sie_block->gbea = 1;
 	vcpu->arch.sie_block->pp = 0;
 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1649,27 +1617,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 
-	/*
-	 * Allocate a save area for floating-point registers.  If the vector
-	 * extension is available, register contents are saved in the SIE
-	 * control block.  The allocated save area is still required in
-	 * particular places, for example, in kvm_s390_vcpu_store_status().
-	 */
-	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
-					       GFP_KERNEL);
-	if (!vcpu->arch.guest_fpregs.fprs)
-		goto out_free_sie_block;
-
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
-		goto out_free_fprs;
+		goto out_free_sie_block;
 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
 		 vcpu->arch.sie_block);
 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
 
 	return vcpu;
-out_free_fprs:
-	kfree(vcpu->arch.guest_fpregs.fprs);
 out_free_sie_block:
 	free_page((unsigned long)(vcpu->arch.sie_block));
 out_free_cpu:
@@ -1882,19 +1837,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
+	/* make sure the new values will be lazily loaded */
+	save_fpu_regs();
 	if (test_fp_ctl(fpu->fpc))
 		return -EINVAL;
-	memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
-	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
-	save_fpu_regs();
-	load_fpu_from(&vcpu->arch.guest_fpregs);
+	current->thread.fpu.fpc = fpu->fpc;
+	if (MACHINE_HAS_VX)
+		convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+	else
+		memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
 	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-	memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
-	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+	/* make sure we have the latest values */
+	save_fpu_regs();
+	if (MACHINE_HAS_VX)
+		convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+	else
+		memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+	fpu->fpc = current->thread.fpu.fpc;
 	return 0;
 }
 
@@ -2399,6 +2362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 {
 	unsigned char archmode = 1;
+	freg_t fprs[NUM_FPRS];
 	unsigned int px;
 	u64 clkcomp;
 	int rc;
@@ -2414,8 +2378,16 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 		gpa = px;
 	} else
 		gpa -= __LC_FPREGS_SAVE_AREA;
-	rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
-			     vcpu->arch.guest_fpregs.fprs, 128);
+
+	/* manually convert vector registers if necessary */
+	if (MACHINE_HAS_VX) {
+		convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
+		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+				     fprs, 128);
+	} else {
+		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+				     vcpu->run->s.regs.vrs, 128);
+	}
 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
 			      vcpu->run->s.regs.gprs, 128);
 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
@@ -2423,7 +2395,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
 			      &px, 4);
 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
-			      &vcpu->arch.guest_fpregs.fpc, 4);
+			      &vcpu->run->s.regs.fpc, 4);
 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
 			      &vcpu->arch.sie_block->todpr, 4);
 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
@@ -2446,19 +2418,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	 * it into the save area
 	 */
 	save_fpu_regs();
-	if (test_kvm_facility(vcpu->kvm, 129)) {
-		/*
-		 * If the vector extension is available, the vector registers
-		 * which overlaps with floating-point registers are saved in
-		 * the SIE-control block.  Hence, extract the floating-point
-		 * registers and the FPC value and store them in the
-		 * guest_fpregs structure.
-		 */
-		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
-		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
-				 current->thread.fpu.vxrs);
-	} else
-		save_fpu_to(&vcpu->arch.guest_fpregs);
+	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 	save_access_regs(vcpu->run->s.regs.acrs);
 
 	return kvm_s390_store_status_unloaded(vcpu, addr);

From eb249a11913d316fc1a27dd07a1e1e43bda9199d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Jan 2016 16:58:08 +0100
Subject: [PATCH 171/262] irqchip/s3c24xx: Mark init_eint as __maybe_unused

The init_eint array in the s3c24xx irqchip driver is used by
every individual chip variant, but Kconfig allows building
the driver when they are all disabled, and that leads to
a harmless compile-time warning:

drivers/irqchip/irq-s3c24xx.c:608:28: error: 'init_eint' defined but not used [-Werror=unused-variable]

This marks the array as __maybe_unused to avoid the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1453737499-1960073-1-git-send-email-arnd@arndb.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-s3c24xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-s3c24xx.c b/drivers/irqchip/irq-s3c24xx.c
index c71914e8f596..5dc5a760c723 100644
--- a/drivers/irqchip/irq-s3c24xx.c
+++ b/drivers/irqchip/irq-s3c24xx.c
@@ -605,7 +605,7 @@ err:
 	return ERR_PTR(ret);
 }
 
-static struct s3c_irq_data init_eint[32] = {
+static struct s3c_irq_data __maybe_unused init_eint[32] = {
 	{ .type = S3C_IRQTYPE_NONE, }, /* reserved */
 	{ .type = S3C_IRQTYPE_NONE, }, /* reserved */
 	{ .type = S3C_IRQTYPE_NONE, }, /* reserved */

From 0df337cf92c107fb515c9df6907052a97bd484b9 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Mon, 25 Jan 2016 23:24:17 +0100
Subject: [PATCH 172/262] irqchip: Fix dependencies for archs w/o HAS_IOMEM

Not every arch has io memory. So, unbreak the build by fixing the
dependencies.

Signed-off-by: Richard Weinberger <richard@nod.at>
Cc: user-mode-linux-devel@lists.sourceforge.net
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Link: http://lkml.kernel.org/r/1453760661-1444-19-git-send-email-richard@nod.at
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 11fc2a27fa2e..90ab59107830 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -154,6 +154,7 @@ config TB10X_IRQC
 config TS4800_IRQ
 	tristate "TS-4800 IRQ controller"
 	select IRQ_DOMAIN
+	depends on HAS_IOMEM
 	help
 	  Support for the TS-4800 FPGA IRQ controller
 

From 530cbe100ef7587aa5b5ac3a4b670cda4d50e598 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 26 Jan 2016 13:52:25 +0000
Subject: [PATCH 173/262] irqdomain: Allow domain lookup with DOMAIN_BUS_WIRED
 token

Let's take the (outlandish) example of an interrupt controller
capable of handling both wired interrupts and PCI MSIs.

With the current code, the PCI MSI domain is going to be tagged
with DOMAIN_BUS_PCI_MSI, and the wired domain with DOMAIN_BUS_ANY.

Things get hairy when we start looking up the domain for a wired
interrupt (typically when creating it based on some firmware
information - DT or ACPI).

In irq_create_fwspec_mapping(), we perform the lookup using
DOMAIN_BUS_ANY, which is actually used as a wildcard. This gives
us one chance out of two to end up with the wrong domain, and
we try to configure a wired interrupt with the MSI domain.
Everything grinds to a halt pretty quickly.

What we really need to do is to start looking for a domain that
would uniquely identify a wired interrupt domain, and only use
DOMAIN_BUS_ANY as a fallback.

In order to solve this, let's introduce a new DOMAIN_BUS_WIRED
token, which is going to be used exactly as described above.
Of course, this depends on the irqchip to setup the domain
bus_token, and nobody had to implement this so far.

Only so far.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Link: http://lkml.kernel.org/r/1453816347-32720-2-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h |  1 +
 kernel/irq/irqdomain.c    | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index f64622ad02c1..04579d9fbce4 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -70,6 +70,7 @@ struct irq_fwspec {
  */
 enum irq_domain_bus_token {
 	DOMAIN_BUS_ANY		= 0,
+	DOMAIN_BUS_WIRED,
 	DOMAIN_BUS_PCI_MSI,
 	DOMAIN_BUS_PLATFORM_MSI,
 	DOMAIN_BUS_NEXUS,
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8cf95de1ab3f..d75179735a28 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -575,10 +575,15 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 	unsigned int type = IRQ_TYPE_NONE;
 	int virq;
 
-	if (fwspec->fwnode)
-		domain = irq_find_matching_fwnode(fwspec->fwnode, DOMAIN_BUS_ANY);
-	else
+	if (fwspec->fwnode) {
+		domain = irq_find_matching_fwnode(fwspec->fwnode,
+						  DOMAIN_BUS_WIRED);
+		if (!domain)
+			domain = irq_find_matching_fwnode(fwspec->fwnode,
+							  DOMAIN_BUS_ANY);
+	} else {
 		domain = irq_default_domain;
+	}
 
 	if (!domain) {
 		pr_warn("no irq domain found for %s !\n",

From 14a0db3cdd114da757197193f66786e63649c91e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 26 Jan 2016 13:52:26 +0000
Subject: [PATCH 174/262] of: MSI: Simplify irqdomain lookup

So far, when trying to associate a device with its MSI domain,
we first lookup the domain using a MSI token, and if this
doesn't return anything useful, we pick up any domain matching
the same node.

This logic is broken for two reasons:
1) Only the generic MSI code (PCI or platform) sets this token
   to PCI/MSI or platform MSI. So we're guaranteed that if there
   is something to be found, we will find it with the first call.
2) If we have a convoluted situation where:
   - a single node implements both wired and MSI interrupts
   - MSI support for that HW hasn't been compiled in
   we'll end up using the wired domain for MSIs anyway, and things
   break badly.

So let's just remove __of_get_msi_domain, and replace it by a direct
call to irq_find_matching_host, because that's what we really want.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Rob Herring <robh+dt@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Link: http://lkml.kernel.org/r/1453816347-32720-3-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/of/irq.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 4fa916dffc91..a9ea5525109b 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -680,18 +680,6 @@ u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in)
 	return __of_msi_map_rid(dev, &msi_np, rid_in);
 }
 
-static struct irq_domain *__of_get_msi_domain(struct device_node *np,
-					      enum irq_domain_bus_token token)
-{
-	struct irq_domain *d;
-
-	d = irq_find_matching_host(np, token);
-	if (!d)
-		d = irq_find_host(np);
-
-	return d;
-}
-
 /**
  * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain
  * @dev: device for which the mapping is to be done.
@@ -707,7 +695,7 @@ struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 rid)
 	struct device_node *np = NULL;
 
 	__of_msi_map_rid(dev, &np, rid);
-	return __of_get_msi_domain(np, DOMAIN_BUS_PCI_MSI);
+	return irq_find_matching_host(np, DOMAIN_BUS_PCI_MSI);
 }
 
 /**
@@ -731,7 +719,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev,
 	/* Check for a single msi-parent property */
 	msi_np = of_parse_phandle(np, "msi-parent", 0);
 	if (msi_np && !of_property_read_bool(msi_np, "#msi-cells")) {
-		d = __of_get_msi_domain(msi_np, token);
+		d = irq_find_matching_host(msi_np, token);
 		if (!d)
 			of_node_put(msi_np);
 		return d;
@@ -745,7 +733,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev,
 		while (!of_parse_phandle_with_args(np, "msi-parent",
 						   "#msi-cells",
 						   index, &args)) {
-			d = __of_get_msi_domain(args.np, token);
+			d = irq_find_matching_host(args.np, token);
 			if (d)
 				return d;
 

From e03a58c320e1103ebe97bda8ebdfcc5c9829c53f Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 21 Jan 2016 15:03:35 -0800
Subject: [PATCH 175/262] kselftests: timers: Add adjtimex SETOFFSET validity
 tests

Add some simple tests to check both valid and invalid
offsets when using adjtimex's ADJ_SETOFFSET method.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Acked-by: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Harald Hoyer <harald@redhat.com>
Cc: Kay Sievers <kay@vrfy.org>
Cc: David Herrmann <dh.herrmann@gmail.com>
Link: http://lkml.kernel.org/r/1453417415-19110-3-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 .../testing/selftests/timers/valid-adjtimex.c | 139 +++++++++++++++++-
 1 file changed, 138 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index e86d937cc22c..60fe3c569bd9 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -45,7 +45,17 @@ static inline int ksft_exit_fail(void)
 }
 #endif
 
-#define NSEC_PER_SEC 1000000000L
+#define NSEC_PER_SEC 1000000000LL
+#define USEC_PER_SEC 1000000LL
+
+#define ADJ_SETOFFSET 0x0100
+
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+	return syscall(__NR_clock_adjtime, id, tx);
+}
+
 
 /* clear NTP time_status & time_state */
 int clear_time_state(void)
@@ -193,10 +203,137 @@ out:
 }
 
 
+int set_offset(long long offset, int use_nano)
+{
+	struct timex tmx = {};
+	int ret;
+
+	tmx.modes = ADJ_SETOFFSET;
+	if (use_nano) {
+		tmx.modes |= ADJ_NANO;
+
+		tmx.time.tv_sec = offset / NSEC_PER_SEC;
+		tmx.time.tv_usec = offset % NSEC_PER_SEC;
+
+		if (offset < 0 && tmx.time.tv_usec) {
+			tmx.time.tv_sec -= 1;
+			tmx.time.tv_usec += NSEC_PER_SEC;
+		}
+	} else {
+		tmx.time.tv_sec = offset / USEC_PER_SEC;
+		tmx.time.tv_usec = offset % USEC_PER_SEC;
+
+		if (offset < 0 && tmx.time.tv_usec) {
+			tmx.time.tv_sec -= 1;
+			tmx.time.tv_usec += USEC_PER_SEC;
+		}
+	}
+
+	ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+	if (ret < 0) {
+		printf("(sec: %ld  usec: %ld) ", tmx.time.tv_sec, tmx.time.tv_usec);
+		printf("[FAIL]\n");
+		return -1;
+	}
+	return 0;
+}
+
+int set_bad_offset(long sec, long usec, int use_nano)
+{
+	struct timex tmx = {};
+	int ret;
+
+	tmx.modes = ADJ_SETOFFSET;
+	if (use_nano)
+		tmx.modes |= ADJ_NANO;
+
+	tmx.time.tv_sec = sec;
+	tmx.time.tv_usec = usec;
+	ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+	if (ret >= 0) {
+		printf("Invalid (sec: %ld  usec: %ld) did not fail! ", tmx.time.tv_sec, tmx.time.tv_usec);
+		printf("[FAIL]\n");
+		return -1;
+	}
+	return 0;
+}
+
+int validate_set_offset(void)
+{
+	printf("Testing ADJ_SETOFFSET... ");
+
+	/* Test valid values */
+	if (set_offset(NSEC_PER_SEC - 1, 1))
+		return -1;
+
+	if (set_offset(-NSEC_PER_SEC + 1, 1))
+		return -1;
+
+	if (set_offset(-NSEC_PER_SEC - 1, 1))
+		return -1;
+
+	if (set_offset(5 * NSEC_PER_SEC, 1))
+		return -1;
+
+	if (set_offset(-5 * NSEC_PER_SEC, 1))
+		return -1;
+
+	if (set_offset(5 * NSEC_PER_SEC + NSEC_PER_SEC / 2, 1))
+		return -1;
+
+	if (set_offset(-5 * NSEC_PER_SEC - NSEC_PER_SEC / 2, 1))
+		return -1;
+
+	if (set_offset(USEC_PER_SEC - 1, 0))
+		return -1;
+
+	if (set_offset(-USEC_PER_SEC + 1, 0))
+		return -1;
+
+	if (set_offset(-USEC_PER_SEC - 1, 0))
+		return -1;
+
+	if (set_offset(5 * USEC_PER_SEC, 0))
+		return -1;
+
+	if (set_offset(-5 * USEC_PER_SEC, 0))
+		return -1;
+
+	if (set_offset(5 * USEC_PER_SEC + USEC_PER_SEC / 2, 0))
+		return -1;
+
+	if (set_offset(-5 * USEC_PER_SEC - USEC_PER_SEC / 2, 0))
+		return -1;
+
+	/* Test invalid values */
+	if (set_bad_offset(0, -1, 1))
+		return -1;
+	if (set_bad_offset(0, -1, 0))
+		return -1;
+	if (set_bad_offset(0, 2 * NSEC_PER_SEC, 1))
+		return -1;
+	if (set_bad_offset(0, 2 * USEC_PER_SEC, 0))
+		return -1;
+	if (set_bad_offset(0, NSEC_PER_SEC, 1))
+		return -1;
+	if (set_bad_offset(0, USEC_PER_SEC, 0))
+		return -1;
+	if (set_bad_offset(0, -NSEC_PER_SEC, 1))
+		return -1;
+	if (set_bad_offset(0, -USEC_PER_SEC, 0))
+		return -1;
+
+	printf("[OK]\n");
+	return 0;
+}
+
 int main(int argc, char **argv)
 {
 	if (validate_freq())
 		return ksft_exit_fail();
 
+	if (validate_set_offset())
+		return ksft_exit_fail();
+
 	return ksft_exit_pass();
 }

From 7809998ab1af22602a8463845108edc49dfb9ef0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Jan 2016 16:41:49 +0100
Subject: [PATCH 176/262] tick/sched: Hide unused oneshot timer code

A couple of functions in kernel/time/tick-sched.c are only
relevant for oneshot timer mode, i.e. when hires-timers or
nohz mode are enabled. If both are disabled, we get gcc warnings
about them:

kernel/time/tick-sched.c:98:16: warning: 'tick_init_jiffy_update' defined but not used [-Wunused-function]
 static ktime_t tick_init_jiffy_update(void)
                ^
kernel/time/tick-sched.c:112:13: warning: 'tick_sched_do_timer' defined but not used [-Wunused-function]
 static void tick_sched_do_timer(ktime_t now)
             ^
kernel/time/tick-sched.c:134:13: warning: 'tick_sched_handle' defined but not used [-Wunused-function]
 static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
             ^

This encloses the whole set of functions in an appropriate ifdef
to avoid the warning and to make it clearer when they are used.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1453736525-1959191-1-git-send-email-arnd@arndb.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-sched.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7ea28ed3109d..cbe5d8dcf15a 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -36,16 +36,17 @@
  */
 static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
-/*
- * The time, when the last jiffy update happened. Protected by jiffies_lock.
- */
-static ktime_t last_jiffies_update;
-
 struct tick_sched *tick_get_tick_sched(int cpu)
 {
 	return &per_cpu(tick_cpu_sched, cpu);
 }
 
+#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
+/*
+ * The time, when the last jiffy update happened. Protected by jiffies_lock.
+ */
+static ktime_t last_jiffies_update;
+
 /*
  * Must be called with interrupts disabled !
  */
@@ -151,6 +152,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 	update_process_times(user_mode(regs));
 	profile_tick(CPU_PROFILING);
 }
+#endif
 
 #ifdef CONFIG_NO_HZ_FULL
 cpumask_var_t tick_nohz_full_mask;

From 2be6d9bfef53d185975b44fd808aece36595e83e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Jan 2016 17:02:49 +0100
Subject: [PATCH 177/262] clocksource: Select CLKSRC_MMIO where needed

The Tegra clocksource implementation uses the clocksource_mmio helper
functions, but currently can be configured without them, which fails:

drivers/clocksource/built-in.o: In function `tegra20_init_timer':
:(.init.text+0xac): undefined reference to `clocksource_mmio_init'
:(.init.text+0x140): undefined reference to `clocksource_mmio_readl_up'

The same problem exists for Digicolor:

drivers/clocksource/built-in.o: In function `digicolor_timer_init':
:(.init.text+0xfa): undefined reference to `clocksource_mmio_init'
:(.init.text+0x14c): undefined reference to `clocksource_mmio_readl_down'

I've inspected the Kconfig file to look for other cases that I have not
yet run into, and added an explicit 'select' to each one to ensure we
can successfully link the drivers.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: http://lkml.kernel.org/r/1453737776-1960372-1-git-send-email-arnd@arndb.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/clocksource/Kconfig | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 56777f04d2d9..f70b4f3b4857 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -30,6 +30,7 @@ config CLKSRC_MMIO
 config DIGICOLOR_TIMER
 	bool "Digicolor timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
 	help
 	  Enables the support for the digicolor timer driver.
 
@@ -55,6 +56,7 @@ config ARMADA_370_XP_TIMER
 	bool "Armada 370 and XP timer driver" if COMPILE_TEST
 	depends on ARM
 	select CLKSRC_OF
+	select CLKSRC_MMIO
 	help
 	  Enables the support for the Armada 370 and XP timer driver.
 
@@ -89,6 +91,7 @@ config SUN5I_HSTIMER
 
 config TEGRA_TIMER
 	bool "Tegra timer driver" if COMPILE_TEST
+	select CLKSRC_MMIO
 	depends on ARM
 	help
 	  Enables support for the Tegra driver.
@@ -263,6 +266,7 @@ config FSL_FTM_TIMER
 
 config VF_PIT_TIMER
 	bool
+	select CLKSRC_MMIO
 	help
 	  Support for Period Interrupt Timer on Freescale Vybrid Family SoCs.
 
@@ -394,6 +398,7 @@ config CLKSRC_ST_LPC
 	bool "Low power clocksource found in the LPC" if COMPILE_TEST
 	select CLKSRC_OF if OF
 	depends on HAS_IOMEM
+	select CLKSRC_MMIO
 	help
 	  Enable this option to use the Low Power controller timer
 	  as clocksource.

From d7023e62c5128bf9f150d792a3ea8c758cb431a3 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Mon, 25 Jan 2016 23:24:19 +0100
Subject: [PATCH 178/262] clocksource: Fix dependencies for archs w/o HAS_IOMEM

Not every arch has io memory.
So, unbreak the build by fixing the dependencies.

Signed-off-by: Richard Weinberger <richard@nod.at>
Cc: user-mode-linux-devel@lists.sourceforge.net
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: http://lkml.kernel.org/r/1453760661-1444-21-git-send-email-richard@nod.at
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/clocksource/Kconfig | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index f70b4f3b4857..33db7406c0e2 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -31,6 +31,7 @@ config DIGICOLOR_TIMER
 	bool "Digicolor timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
 	select CLKSRC_MMIO
+	depends on HAS_IOMEM
 	help
 	  Enables the support for the digicolor timer driver.
 
@@ -78,6 +79,7 @@ config ORION_TIMER
 config SUN4I_TIMER
 	bool "Sun4i timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	depends on HAS_IOMEM
 	select CLKSRC_MMIO
 	help
 	  Enables support for the Sun4i timer.
@@ -99,6 +101,7 @@ config TEGRA_TIMER
 config VT8500_TIMER
 	bool "VT8500 timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	depends on HAS_IOMEM
 	help
 	  Enables support for the VT8500 driver.
 
@@ -134,6 +137,7 @@ config CLKSRC_NOMADIK_MTU_SCHED_CLOCK
 config CLKSRC_DBX500_PRCMU
 	bool "Clocksource PRCMU Timer" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	depends on HAS_IOMEM
 	help
 	  Use the always on PRCMU Timer as clocksource
 
@@ -251,6 +255,7 @@ config CLKSRC_EXYNOS_MCT
 config CLKSRC_SAMSUNG_PWM
 	bool "PWM timer drvier for Samsung S3C, S5P" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	depends on HAS_IOMEM
 	help
 	  This is a new clocksource driver for the PWM timer found in
 	  Samsung S3C, S5P and Exynos SoCs, replacing an earlier driver
@@ -260,6 +265,7 @@ config CLKSRC_SAMSUNG_PWM
 config FSL_FTM_TIMER
 	bool "Freescale FlexTimer Module driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	depends on HAS_IOMEM
 	select CLKSRC_MMIO
 	help
 	  Support for Freescale FlexTimer Module (FTM) timer.
@@ -364,6 +370,7 @@ config CLKSRC_TANGO_XTAL
 config CLKSRC_PXA
 	bool "Clocksource for PXA or SA-11x0 platform" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	depends on HAS_IOMEM
 	select CLKSRC_MMIO
 	help
 	  This enables OST0 support available on PXA and SA-11x0

From ed8e5a242826004d6dcf1c33775e08aa0d017c41 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Tue, 19 Jan 2016 16:12:28 +1100
Subject: [PATCH 179/262] KVM doc: Fix KVM_SMI chapter number

The KVM_SMI capability is following the KVM_S390_SET_IRQ_STATE capability
which is "4.95", this changes the number of the KVM_SMI chapter to 4.96.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 053f613fc9a9..07e4cdf02407 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3025,7 +3025,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
-4.90 KVM_SMI
+4.96 KVM_SMI
 
 Capability: KVM_CAP_X86_SMM
 Architectures: x86

From bb1a793125d9cc61f2d1cff92fe3927fec45d528 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 26 Jan 2016 13:52:27 +0000
Subject: [PATCH 180/262] base: Export platform_msi_domain_[alloc,free]_irqs

The new function platform_msi_domain_{alloc,free}_irqs are meant to be
used in platform drivers, which can be built as modules. Therefore, it
makes sense to export them to be used from kernel modules.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Link: http://lkml.kernel.org/r/1453816347-32720-4-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/base/platform-msi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 47c43386786b..279e53989374 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -284,6 +284,7 @@ out_free_priv_data:
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(platform_msi_domain_alloc_irqs);
 
 /**
  * platform_msi_domain_free_irqs - Free MSI interrupts for @dev
@@ -301,6 +302,7 @@ void platform_msi_domain_free_irqs(struct device *dev)
 	msi_domain_free_irqs(dev->msi_domain, dev);
 	platform_msi_free_descs(dev, 0, MAX_DEV_MSIS);
 }
+EXPORT_SYMBOL_GPL(platform_msi_domain_free_irqs);
 
 /**
  * platform_msi_get_host_data - Query the private data associated with

From 18aa60ce2751c95d3412ed06a58b8b6cfb6f88f2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 26 Jan 2016 14:24:15 +0000
Subject: [PATCH 181/262] irqchip/gic-v3-its: Recompute the number of pages on
 page size change

When the programming of a GITS_BASERn register fails because of
an unsupported ITS page size, we retry it with a smaller page size.
Unfortunately, we don't recompute the number of allocated ITS pages,
indicating the wrong value computed in the original allocation.

A convenient fix is to free the pages we allocated, update the
page size, and restart the allocation. This will ensure that
we always allocate the right amount in the case of a device
table, specially if we have to reduce the allocation order
to stay within the boundaries of the ITS maximum allocation.

Reported-and-tested-by: Ma Jun <majun258@huawei.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1453818255-1289-1-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-gic-v3-its.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index e23d1d18f9d6..3447549fcc93 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -875,6 +875,7 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
 		}
 
 		alloc_size = (1 << order) * PAGE_SIZE;
+retry_alloc_baser:
 		alloc_pages = (alloc_size / psz);
 		if (alloc_pages > GITS_BASER_PAGES_MAX) {
 			alloc_pages = GITS_BASER_PAGES_MAX;
@@ -938,13 +939,16 @@ retry_baser:
 			 * size and retry. If we reach 4K, then
 			 * something is horribly wrong...
 			 */
+			free_pages((unsigned long)base, order);
+			its->tables[i] = NULL;
+
 			switch (psz) {
 			case SZ_16K:
 				psz = SZ_4K;
-				goto retry_baser;
+				goto retry_alloc_baser;
 			case SZ_64K:
 				psz = SZ_16K;
-				goto retry_baser;
+				goto retry_alloc_baser;
 			}
 		}
 

From 57adec866c0440976c96a4b8f5b59fb411b1cacb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=20Penttil=C3=A4?= <mika.penttila@nextfour.com>
Date: Tue, 26 Jan 2016 15:47:25 +0000
Subject: [PATCH 182/262] arm64: mm: avoid calling apply_to_page_range on empty
 range
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Calling apply_to_page_range with an empty range results in a BUG_ON
from the core code. This can be triggered by trying to load the st_drv
module with CONFIG_DEBUG_SET_MODULE_RONX enabled:

  kernel BUG at mm/memory.c:1874!
  Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
  Modules linked in:
  CPU: 3 PID: 1764 Comm: insmod Not tainted 4.5.0-rc1+ #2
  Hardware name: ARM Juno development board (r0) (DT)
  task: ffffffc9763b8000 ti: ffffffc975af8000 task.ti: ffffffc975af8000
  PC is at apply_to_page_range+0x2cc/0x2d0
  LR is at change_memory_common+0x80/0x108

This patch fixes the issue by making change_memory_common (called by the
set_memory_* functions) a NOP when numpages == 0, therefore avoiding the
erroneous call to apply_to_page_range and bringing us into line with x86
and s390.

Cc: <stable@vger.kernel.org>
Reviewed-by: Laura Abbott <labbott@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Mika Penttilä <mika.penttila@nextfour.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/pageattr.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 3571c7309c5e..cf6240741134 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -57,6 +57,9 @@ static int change_memory_common(unsigned long addr, int numpages,
 	if (end < MODULES_VADDR || end >= MODULES_END)
 		return -EINVAL;
 
+	if (!numpages)
+		return 0;
+
 	data.set_mask = set_mask;
 	data.clear_mask = clear_mask;
 

From 25cad69f21f5532d99e2ee73c8ab6512bcab614c Mon Sep 17 00:00:00 2001
From: Martin Wilck <Martin.Wilck@ts.fujitsu.com>
Date: Mon, 30 Nov 2015 12:50:05 +0100
Subject: [PATCH 183/262] base/platform: Fix platform drivers with no probe
 callback

Since b8b2c7d845d5, platform_drv_probe() is called for all platform
devices. If drv->probe is NULL, and dev_pm_domain_attach() fails,
platform_drv_probe() will return the error code from dev_pm_domain_attach().

This causes real_probe() to enter the "probe_failed" path and set
dev->driver to NULL. Before b8b2c7d845d5, real_probe() would assume
success if both dev->bus->probe and drv->probe were missing. As a result,
a device and driver could be "bound" together just by matching their names;
this doesn't work any more after b8b2c7d845d5.

This may cause problems later for certain usage of platform_driver_register()
and platform_device_register_simple(). I observed a panic while loading
the tpm_tis driver with parameter "force=1" (i.e. registering tpm_tis as
a platform driver), because tpm_tis_init's assumption that the device
returned by platform_device_register_simple() was bound didn't hold any more
(tpmm_chip_alloc() dereferences chip->pdev->driver, causing panic).

This patch restores the previous (4.3.0 and earlier) behavior of
platform_drv_probe() in the case when the associated platform driver has
no "probe" function.

Fixes: b8b2c7d845d5 ("base/platform: assert that dev_pm_domain callbacks are called unconditionally")
Signed-off-by: Martin Wilck <Martin.Wilck@ts.fujitsu.com>
Cc: stable <stable@vger.kernel.org> # 4.4
Cc: Martin Fuzzey <mfuzzey@parkeon.com>
Acked-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/platform.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 73d6e5d39e33..f437afa17f2b 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -558,10 +558,15 @@ static int platform_drv_probe(struct device *_dev)
 		return ret;
 
 	ret = dev_pm_domain_attach(_dev, true);
-	if (ret != -EPROBE_DEFER && drv->probe) {
-		ret = drv->probe(dev);
-		if (ret)
-			dev_pm_domain_detach(_dev, true);
+	if (ret != -EPROBE_DEFER) {
+		if (drv->probe) {
+			ret = drv->probe(dev);
+			if (ret)
+				dev_pm_domain_detach(_dev, true);
+		} else {
+			/* don't fail if just dev_pm_domain_attach failed */
+			ret = 0;
+		}
 	}
 
 	if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {

From b98c66887ee1aec661dcb88fe17399d5d112ed98 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 21 Jan 2016 15:19:59 +0000
Subject: [PATCH 184/262] drm/etnaviv: ignore VG GPUs with FE2.0

Ignore GPUs with a 2.0 front end.  These have a different register
layout for the front end, which provokes imprecise aborts from the
register accesses in the 'gpu' debugfs file.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index e0e68dd7565d..a9530f59e6fe 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -515,6 +515,14 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 		goto fail;
 	}
 
+	/* Exclude VG cores with FE2.0 */
+	if (gpu->identity.features & chipFeatures_PIPE_VG &&
+	    gpu->identity.features & chipFeatures_FE20) {
+		dev_info(gpu->dev, "Ignoring GPU with VG and FE2.0\n");
+		ret = -ENXIO;
+		goto fail;
+	}
+
 	ret = etnaviv_hw_reset(gpu);
 	if (ret)
 		goto fail;

From e2a2e263e06a0c153234b3e93fb85612d1c454d3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 24 Jan 2016 17:35:43 +0000
Subject: [PATCH 185/262] drm/etnaviv: update common and state_hi xml.h files

Update the common and state_hi xml.h header files from the etnaviv
repository.

Acked-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/common.xml.h   | 59 ++++++++++++++++++++++----
 drivers/gpu/drm/etnaviv/state_hi.xml.h | 26 +++++++++++-
 2 files changed, 75 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/common.xml.h b/drivers/gpu/drm/etnaviv/common.xml.h
index 9e585d51fb78..e881482b5971 100644
--- a/drivers/gpu/drm/etnaviv/common.xml.h
+++ b/drivers/gpu/drm/etnaviv/common.xml.h
@@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_vg.xml (   5973 bytes, from 2015-03-25 11:26:01)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18379 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -30,15 +30,19 @@ Copyright (C) 2015
 #define ENDIAN_MODE_NO_SWAP					0x00000000
 #define ENDIAN_MODE_SWAP_16					0x00000001
 #define ENDIAN_MODE_SWAP_32					0x00000002
+#define chipModel_GC200						0x00000200
 #define chipModel_GC300						0x00000300
 #define chipModel_GC320						0x00000320
+#define chipModel_GC328						0x00000328
 #define chipModel_GC350						0x00000350
 #define chipModel_GC355						0x00000355
 #define chipModel_GC400						0x00000400
 #define chipModel_GC410						0x00000410
 #define chipModel_GC420						0x00000420
+#define chipModel_GC428						0x00000428
 #define chipModel_GC450						0x00000450
 #define chipModel_GC500						0x00000500
+#define chipModel_GC520						0x00000520
 #define chipModel_GC530						0x00000530
 #define chipModel_GC600						0x00000600
 #define chipModel_GC700						0x00000700
@@ -46,9 +50,16 @@ Copyright (C) 2015
 #define chipModel_GC860						0x00000860
 #define chipModel_GC880						0x00000880
 #define chipModel_GC1000					0x00001000
+#define chipModel_GC1500					0x00001500
 #define chipModel_GC2000					0x00002000
 #define chipModel_GC2100					0x00002100
+#define chipModel_GC2200					0x00002200
+#define chipModel_GC2500					0x00002500
+#define chipModel_GC3000					0x00003000
 #define chipModel_GC4000					0x00004000
+#define chipModel_GC5000					0x00005000
+#define chipModel_GC5200					0x00005200
+#define chipModel_GC6400					0x00006400
 #define RGBA_BITS_R						0x00000001
 #define RGBA_BITS_G						0x00000002
 #define RGBA_BITS_B						0x00000004
@@ -160,7 +171,7 @@ Copyright (C) 2015
 #define chipMinorFeatures2_UNK8					0x00000100
 #define chipMinorFeatures2_UNK9					0x00000200
 #define chipMinorFeatures2_UNK10				0x00000400
-#define chipMinorFeatures2_SAMPLERBASE_16			0x00000800
+#define chipMinorFeatures2_HALTI1				0x00000800
 #define chipMinorFeatures2_UNK12				0x00001000
 #define chipMinorFeatures2_UNK13				0x00002000
 #define chipMinorFeatures2_UNK14				0x00004000
@@ -189,7 +200,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK5					0x00000020
 #define chipMinorFeatures3_UNK6					0x00000040
 #define chipMinorFeatures3_UNK7					0x00000080
-#define chipMinorFeatures3_UNK8					0x00000100
+#define chipMinorFeatures3_FAST_MSAA				0x00000100
 #define chipMinorFeatures3_UNK9					0x00000200
 #define chipMinorFeatures3_BUG_FIXES10				0x00000400
 #define chipMinorFeatures3_UNK11				0x00000800
@@ -199,7 +210,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK15				0x00008000
 #define chipMinorFeatures3_UNK16				0x00010000
 #define chipMinorFeatures3_UNK17				0x00020000
-#define chipMinorFeatures3_UNK18				0x00040000
+#define chipMinorFeatures3_ACE					0x00040000
 #define chipMinorFeatures3_UNK19				0x00080000
 #define chipMinorFeatures3_UNK20				0x00100000
 #define chipMinorFeatures3_UNK21				0x00200000
@@ -207,7 +218,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK23				0x00800000
 #define chipMinorFeatures3_UNK24				0x01000000
 #define chipMinorFeatures3_UNK25				0x02000000
-#define chipMinorFeatures3_UNK26				0x04000000
+#define chipMinorFeatures3_NEW_HZ				0x04000000
 #define chipMinorFeatures3_UNK27				0x08000000
 #define chipMinorFeatures3_UNK28				0x10000000
 #define chipMinorFeatures3_UNK29				0x20000000
@@ -229,9 +240,9 @@ Copyright (C) 2015
 #define chipMinorFeatures4_UNK13				0x00002000
 #define chipMinorFeatures4_UNK14				0x00004000
 #define chipMinorFeatures4_UNK15				0x00008000
-#define chipMinorFeatures4_UNK16				0x00010000
+#define chipMinorFeatures4_HALTI2				0x00010000
 #define chipMinorFeatures4_UNK17				0x00020000
-#define chipMinorFeatures4_UNK18				0x00040000
+#define chipMinorFeatures4_SMALL_MSAA				0x00040000
 #define chipMinorFeatures4_UNK19				0x00080000
 #define chipMinorFeatures4_UNK20				0x00100000
 #define chipMinorFeatures4_UNK21				0x00200000
@@ -245,5 +256,37 @@ Copyright (C) 2015
 #define chipMinorFeatures4_UNK29				0x20000000
 #define chipMinorFeatures4_UNK30				0x40000000
 #define chipMinorFeatures4_UNK31				0x80000000
+#define chipMinorFeatures5_UNK0					0x00000001
+#define chipMinorFeatures5_UNK1					0x00000002
+#define chipMinorFeatures5_UNK2					0x00000004
+#define chipMinorFeatures5_UNK3					0x00000008
+#define chipMinorFeatures5_UNK4					0x00000010
+#define chipMinorFeatures5_UNK5					0x00000020
+#define chipMinorFeatures5_UNK6					0x00000040
+#define chipMinorFeatures5_UNK7					0x00000080
+#define chipMinorFeatures5_UNK8					0x00000100
+#define chipMinorFeatures5_HALTI3				0x00000200
+#define chipMinorFeatures5_UNK10				0x00000400
+#define chipMinorFeatures5_UNK11				0x00000800
+#define chipMinorFeatures5_UNK12				0x00001000
+#define chipMinorFeatures5_UNK13				0x00002000
+#define chipMinorFeatures5_UNK14				0x00004000
+#define chipMinorFeatures5_UNK15				0x00008000
+#define chipMinorFeatures5_UNK16				0x00010000
+#define chipMinorFeatures5_UNK17				0x00020000
+#define chipMinorFeatures5_UNK18				0x00040000
+#define chipMinorFeatures5_UNK19				0x00080000
+#define chipMinorFeatures5_UNK20				0x00100000
+#define chipMinorFeatures5_UNK21				0x00200000
+#define chipMinorFeatures5_UNK22				0x00400000
+#define chipMinorFeatures5_UNK23				0x00800000
+#define chipMinorFeatures5_UNK24				0x01000000
+#define chipMinorFeatures5_UNK25				0x02000000
+#define chipMinorFeatures5_UNK26				0x04000000
+#define chipMinorFeatures5_UNK27				0x08000000
+#define chipMinorFeatures5_UNK28				0x10000000
+#define chipMinorFeatures5_UNK29				0x20000000
+#define chipMinorFeatures5_UNK30				0x40000000
+#define chipMinorFeatures5_UNK31				0x80000000
 
 #endif /* COMMON_XML */
diff --git a/drivers/gpu/drm/etnaviv/state_hi.xml.h b/drivers/gpu/drm/etnaviv/state_hi.xml.h
index 0064f2640396..6a7de5f1454a 100644
--- a/drivers/gpu/drm/etnaviv/state_hi.xml.h
+++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h
@@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_hi.xml (  23420 bytes, from 2015-03-25 11:47:21)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18437 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -182,8 +182,25 @@ Copyright (C) 2015
 
 #define VIVS_HI_CHIP_MINOR_FEATURE_3				0x00000088
 
+#define VIVS_HI_CHIP_SPECS_3					0x0000008c
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK		0x000001f0
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT		4
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK		0x00000007
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT		0
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK)
+
 #define VIVS_HI_CHIP_MINOR_FEATURE_4				0x00000094
 
+#define VIVS_HI_CHIP_SPECS_4					0x0000009c
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK			0x0001f000
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT		12
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK)
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_5				0x000000a0
+
+#define VIVS_HI_CHIP_PRODUCT_ID					0x000000a8
+
 #define VIVS_PM							0x00000000
 
 #define VIVS_PM_POWER_CONTROLS					0x00000100
@@ -206,6 +223,11 @@ Copyright (C) 2015
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE		0x00000001
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_DE		0x00000002
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PE		0x00000004
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SH		0x00000008
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PA		0x00000010
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SE		0x00000020
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_RA		0x00000040
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_TX		0x00000080
 
 #define VIVS_PM_PULSE_EATER					0x0000010c
 

From 507f899137f9e4f1405820b946063a6db78b2295 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 24 Jan 2016 17:35:48 +0000
Subject: [PATCH 186/262] drm/etnaviv: use defined constants for the chip model

Use the defined constants in common.xml.h for the chip model rather
than coding these as hex numbers.

Acked-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 31 +++++++++++++++------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index a9530f59e6fe..7dc355ef7c4e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -173,7 +173,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 	/* Convert the register max value */
 	if (gpu->identity.register_max)
 		gpu->identity.register_max = 1 << gpu->identity.register_max;
-	else if (gpu->identity.model == 0x0400)
+	else if (gpu->identity.model == chipModel_GC400)
 		gpu->identity.register_max = 32;
 	else
 		gpu->identity.register_max = 64;
@@ -181,10 +181,10 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 	/* Convert thread count */
 	if (gpu->identity.thread_count)
 		gpu->identity.thread_count = 1 << gpu->identity.thread_count;
-	else if (gpu->identity.model == 0x0400)
+	else if (gpu->identity.model == chipModel_GC400)
 		gpu->identity.thread_count = 64;
-	else if (gpu->identity.model == 0x0500 ||
-		 gpu->identity.model == 0x0530)
+	else if (gpu->identity.model == chipModel_GC500 ||
+		 gpu->identity.model == chipModel_GC530)
 		gpu->identity.thread_count = 128;
 	else
 		gpu->identity.thread_count = 256;
@@ -206,7 +206,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 	if (gpu->identity.vertex_output_buffer_size) {
 		gpu->identity.vertex_output_buffer_size =
 			1 << gpu->identity.vertex_output_buffer_size;
-	} else if (gpu->identity.model == 0x0400) {
+	} else if (gpu->identity.model == chipModel_GC400) {
 		if (gpu->identity.revision < 0x4000)
 			gpu->identity.vertex_output_buffer_size = 512;
 		else if (gpu->identity.revision < 0x4200)
@@ -219,9 +219,9 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 
 	switch (gpu->identity.instruction_count) {
 	case 0:
-		if ((gpu->identity.model == 0x2000 &&
+		if ((gpu->identity.model == chipModel_GC2000 &&
 		     gpu->identity.revision == 0x5108) ||
-		    gpu->identity.model == 0x880)
+		    gpu->identity.model == chipModel_GC880)
 			gpu->identity.instruction_count = 512;
 		else
 			gpu->identity.instruction_count = 256;
@@ -253,7 +253,7 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
 	/* Special case for older graphic cores. */
 	if (((chipIdentity & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK)
 	     >> VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) ==  0x01) {
-		gpu->identity.model    = 0x500; /* gc500 */
+		gpu->identity.model    = chipModel_GC500;
 		gpu->identity.revision =
 			(chipIdentity & VIVS_HI_CHIP_IDENTITY_REVISION__MASK)
 			>> VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT;
@@ -269,12 +269,12 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
 		 * same.  Only for GC400 family.
 		 */
 		if ((gpu->identity.model & 0xff00) == 0x0400 &&
-		    gpu->identity.model != 0x0420) {
+		    gpu->identity.model != chipModel_GC420) {
 			gpu->identity.model = gpu->identity.model & 0x0400;
 		}
 
 		/* Another special case */
-		if (gpu->identity.model == 0x300 &&
+		if (gpu->identity.model == chipModel_GC300 &&
 		    gpu->identity.revision == 0x2201) {
 			u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
 			u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
@@ -295,11 +295,13 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
 	gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE);
 
 	/* Disable fast clear on GC700. */
-	if (gpu->identity.model == 0x700)
+	if (gpu->identity.model == chipModel_GC700)
 		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
 
-	if ((gpu->identity.model == 0x500 && gpu->identity.revision < 2) ||
-	    (gpu->identity.model == 0x300 && gpu->identity.revision < 0x2000)) {
+	if ((gpu->identity.model == chipModel_GC500 &&
+	     gpu->identity.revision < 2) ||
+	    (gpu->identity.model == chipModel_GC300 &&
+	     gpu->identity.revision < 0x2000)) {
 
 		/*
 		 * GC500 rev 1.x and GC300 rev < 2.0 doesn't have these
@@ -466,7 +468,8 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 		  VIVS_HI_AXI_CONFIG_ARCACHE(2));
 
 	/* GC2000 rev 5108 needs a special bus config */
-	if (gpu->identity.model == 0x2000 && gpu->identity.revision == 0x5108) {
+	if (gpu->identity.model == chipModel_GC2000 &&
+	    gpu->identity.revision == 0x5108) {
 		u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
 		bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
 				VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);

From 52f36ba1d6134f5c1c45deb0da53442a5971358e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 24 Jan 2016 17:35:54 +0000
Subject: [PATCH 187/262] drm/etnaviv: add helper to extract bitfields

Add a helper to extract etnaviv bitfields from register values.

Acked-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 60 ++++++++++++---------------
 1 file changed, 26 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 7dc355ef7c4e..2c27fac0d2de 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -120,6 +120,9 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
 	return 0;
 }
 
+#define etnaviv_field(val, field) \
+	(((val) & field##__MASK) >> field##__SHIFT)
+
 static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 {
 	if (gpu->identity.minor_features0 &
@@ -129,37 +132,28 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 		specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
 		specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
 
-		gpu->identity.stream_count =
-			(specs[0] & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
-				>> VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT;
-		gpu->identity.register_max =
-			(specs[0] & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
-				>> VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT;
-		gpu->identity.thread_count =
-			(specs[0] & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
-				>> VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT;
-		gpu->identity.vertex_cache_size =
-			(specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
-				>> VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT;
-		gpu->identity.shader_core_count =
-			(specs[0] & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
-				>> VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT;
-		gpu->identity.pixel_pipes =
-			(specs[0] & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
-				>> VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT;
+		gpu->identity.stream_count = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_STREAM_COUNT);
+		gpu->identity.register_max = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_REGISTER_MAX);
+		gpu->identity.thread_count = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_THREAD_COUNT);
+		gpu->identity.vertex_cache_size = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE);
+		gpu->identity.shader_core_count = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT);
+		gpu->identity.pixel_pipes = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_PIXEL_PIPES);
 		gpu->identity.vertex_output_buffer_size =
-			(specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
-				>> VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT;
+			etnaviv_field(specs[0],
+				VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE);
 
-		gpu->identity.buffer_size =
-			(specs[1] & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
-				>> VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT;
-		gpu->identity.instruction_count =
-			(specs[1] & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
-				>> VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT;
-		gpu->identity.num_constants =
-			(specs[1] & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
-				>> VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT;
+		gpu->identity.buffer_size = etnaviv_field(specs[1],
+					VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE);
+		gpu->identity.instruction_count = etnaviv_field(specs[1],
+					VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT);
+		gpu->identity.num_constants = etnaviv_field(specs[1],
+					VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS);
 	}
 
 	/* Fill in the stream count if not specified */
@@ -251,12 +245,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
 	chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY);
 
 	/* Special case for older graphic cores. */
-	if (((chipIdentity & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK)
-	     >> VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) ==  0x01) {
+	if (etnaviv_field(chipIdentity, VIVS_HI_CHIP_IDENTITY_FAMILY) == 0x01) {
 		gpu->identity.model    = chipModel_GC500;
-		gpu->identity.revision =
-			(chipIdentity & VIVS_HI_CHIP_IDENTITY_REVISION__MASK)
-			>> VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT;
+		gpu->identity.revision = etnaviv_field(chipIdentity,
+					 VIVS_HI_CHIP_IDENTITY_REVISION);
 	} else {
 
 		gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);

From 472f79dcf21d34f4d667910002482efe3ca4ba34 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 24 Jan 2016 17:35:59 +0000
Subject: [PATCH 188/262] drm/etnaviv: add helper for comparing model/revision
 IDs

Add and use a helper for comparing the model and revision IDs.

Acked-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 2c27fac0d2de..d5fad472e91f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -120,6 +120,10 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
 	return 0;
 }
 
+
+#define etnaviv_is_model_rev(gpu, mod, rev) \
+	((gpu)->identity.model == chipModel_##mod && \
+	 (gpu)->identity.revision == rev)
 #define etnaviv_field(val, field) \
 	(((val) & field##__MASK) >> field##__SHIFT)
 
@@ -213,8 +217,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 
 	switch (gpu->identity.instruction_count) {
 	case 0:
-		if ((gpu->identity.model == chipModel_GC2000 &&
-		     gpu->identity.revision == 0x5108) ||
+		if (etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
 		    gpu->identity.model == chipModel_GC880)
 			gpu->identity.instruction_count = 512;
 		else
@@ -266,8 +269,7 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
 		}
 
 		/* Another special case */
-		if (gpu->identity.model == chipModel_GC300 &&
-		    gpu->identity.revision == 0x2201) {
+		if (etnaviv_is_model_rev(gpu, GC300, 0x2201)) {
 			u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
 			u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
 
@@ -435,10 +437,9 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 {
 	u16 prefetch;
 
-	if (gpu->identity.model == chipModel_GC320 &&
-	    gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400 &&
-	    (gpu->identity.revision == 0x5007 ||
-	     gpu->identity.revision == 0x5220)) {
+	if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) ||
+	     etnaviv_is_model_rev(gpu, GC320, 0x5220)) &&
+	    gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) {
 		u32 mc_memory_debug;
 
 		mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff;
@@ -460,8 +461,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 		  VIVS_HI_AXI_CONFIG_ARCACHE(2));
 
 	/* GC2000 rev 5108 needs a special bus config */
-	if (gpu->identity.model == chipModel_GC2000 &&
-	    gpu->identity.revision == 0x5108) {
+	if (etnaviv_is_model_rev(gpu, GC2000, 0x5108)) {
 		u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
 		bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
 				VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);

From 602eb48966d7b7f7e64dca8d9ea2842d83bfae73 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 24 Jan 2016 17:36:04 +0000
Subject: [PATCH 189/262] drm/etnaviv: add further minor features and varyings
 count

Export further minor feature bitmasks and the varyings count from
the GPU specifications registers to userspace.

Acked-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 62 ++++++++++++++++++++++++++-
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h |  9 ++++
 include/uapi/drm/etnaviv_drm.h        |  3 ++
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index d5fad472e91f..7b511ad9f54b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -72,6 +72,14 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
 		*value = gpu->identity.minor_features3;
 		break;
 
+	case ETNAVIV_PARAM_GPU_FEATURES_5:
+		*value = gpu->identity.minor_features4;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_6:
+		*value = gpu->identity.minor_features5;
+		break;
+
 	case ETNAVIV_PARAM_GPU_STREAM_COUNT:
 		*value = gpu->identity.stream_count;
 		break;
@@ -112,6 +120,10 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
 		*value = gpu->identity.num_constants;
 		break;
 
+	case ETNAVIV_PARAM_GPU_NUM_VARYINGS:
+		*value = gpu->identity.varyings_count;
+		break;
+
 	default:
 		DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
 		return -EINVAL;
@@ -131,10 +143,13 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 {
 	if (gpu->identity.minor_features0 &
 	    chipMinorFeatures0_MORE_MINOR_FEATURES) {
-		u32 specs[2];
+		u32 specs[4];
+		unsigned int streams;
 
 		specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
 		specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
+		specs[2] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_3);
+		specs[3] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_4);
 
 		gpu->identity.stream_count = etnaviv_field(specs[0],
 					VIVS_HI_CHIP_SPECS_STREAM_COUNT);
@@ -158,6 +173,15 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 					VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT);
 		gpu->identity.num_constants = etnaviv_field(specs[1],
 					VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS);
+
+		gpu->identity.varyings_count = etnaviv_field(specs[2],
+					VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT);
+
+		/* This overrides the value from older register if non-zero */
+		streams = etnaviv_field(specs[3],
+					VIVS_HI_CHIP_SPECS_4_STREAM_COUNT);
+		if (streams)
+			gpu->identity.stream_count = streams;
 	}
 
 	/* Fill in the stream count if not specified */
@@ -239,6 +263,30 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 
 	if (gpu->identity.num_constants == 0)
 		gpu->identity.num_constants = 168;
+
+	if (gpu->identity.varyings_count == 0) {
+		if (gpu->identity.minor_features1 & chipMinorFeatures1_HALTI0)
+			gpu->identity.varyings_count = 12;
+		else
+			gpu->identity.varyings_count = 8;
+	}
+
+	/*
+	 * For some cores, two varyings are consumed for position, so the
+	 * maximum varying count needs to be reduced by one.
+	 */
+	if (etnaviv_is_model_rev(gpu, GC5000, 0x5434) ||
+	    etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
+	    etnaviv_is_model_rev(gpu, GC4000, 0x5245) ||
+	    etnaviv_is_model_rev(gpu, GC4000, 0x5208) ||
+	    etnaviv_is_model_rev(gpu, GC3000, 0x5435) ||
+	    etnaviv_is_model_rev(gpu, GC2200, 0x5244) ||
+	    etnaviv_is_model_rev(gpu, GC2100, 0x5108) ||
+	    etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+	    etnaviv_is_model_rev(gpu, GC1500, 0x5246) ||
+	    etnaviv_is_model_rev(gpu, GC880, 0x5107) ||
+	    etnaviv_is_model_rev(gpu, GC880, 0x5106))
+		gpu->identity.varyings_count -= 1;
 }
 
 static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
@@ -305,6 +353,8 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
 		gpu->identity.minor_features1 = 0;
 		gpu->identity.minor_features2 = 0;
 		gpu->identity.minor_features3 = 0;
+		gpu->identity.minor_features4 = 0;
+		gpu->identity.minor_features5 = 0;
 	} else
 		gpu->identity.minor_features0 =
 				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0);
@@ -317,6 +367,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
 				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2);
 		gpu->identity.minor_features3 =
 				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3);
+		gpu->identity.minor_features4 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_4);
+		gpu->identity.minor_features5 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5);
 	}
 
 	/* GC600 idle register reports zero bits where modules aren't present */
@@ -645,6 +699,10 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
 		   gpu->identity.minor_features2);
 	seq_printf(m, "\t minor_features3: 0x%08x\n",
 		   gpu->identity.minor_features3);
+	seq_printf(m, "\t minor_features4: 0x%08x\n",
+		   gpu->identity.minor_features4);
+	seq_printf(m, "\t minor_features5: 0x%08x\n",
+		   gpu->identity.minor_features5);
 
 	seq_puts(m, "\tspecs\n");
 	seq_printf(m, "\t stream_count:  %d\n",
@@ -667,6 +725,8 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
 			gpu->identity.instruction_count);
 	seq_printf(m, "\t num_constants: %d\n",
 			gpu->identity.num_constants);
+	seq_printf(m, "\t varyings_count: %d\n",
+			gpu->identity.varyings_count);
 
 	seq_printf(m, "\taxi: 0x%08x\n", axi);
 	seq_printf(m, "\tidle: 0x%08x\n", idle);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index c75d50359ab0..f233ac4c7c1c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -46,6 +46,12 @@ struct etnaviv_chip_identity {
 	/* Supported minor feature 3 fields. */
 	u32 minor_features3;
 
+	/* Supported minor feature 4 fields. */
+	u32 minor_features4;
+
+	/* Supported minor feature 5 fields. */
+	u32 minor_features5;
+
 	/* Number of streams supported. */
 	u32 stream_count;
 
@@ -75,6 +81,9 @@ struct etnaviv_chip_identity {
 
 	/* Buffer size */
 	u32 buffer_size;
+
+	/* Number of varyings */
+	u8 varyings_count;
 };
 
 struct etnaviv_event {
diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
index 4cc989ad6851..f95e1c43c3fb 100644
--- a/include/uapi/drm/etnaviv_drm.h
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -48,6 +48,8 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_FEATURES_2                0x05
 #define ETNAVIV_PARAM_GPU_FEATURES_3                0x06
 #define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
+#define ETNAVIV_PARAM_GPU_FEATURES_5                0x08
+#define ETNAVIV_PARAM_GPU_FEATURES_6                0x09
 
 #define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
 #define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
@@ -59,6 +61,7 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_BUFFER_SIZE               0x17
 #define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
 #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
+#define ETNAVIV_PARAM_GPU_NUM_VARYINGS              0x1a
 
 #define ETNA_MAX_PIPES 4
 

From 45d16a6d94580cd3c6baed69b5fe441ece599fc4 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 25 Jan 2016 12:41:05 +0100
Subject: [PATCH 190/262] drm/etnaviv: fix memory leak in IOMMU init path

Plug in error handling to free any allocated ressources in the
IOMMU init path.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 7b511ad9f54b..a33162cf4f4c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -596,10 +596,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 		goto fail;
 	}
 
-	/* TODO: we will leak here memory - fix it! */
-
 	gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
 	if (!gpu->mmu) {
+		iommu_domain_free(iommu);
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -609,7 +608,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	if (!gpu->buffer) {
 		ret = -ENOMEM;
 		dev_err(gpu->dev, "could not create command buffer\n");
-		goto fail;
+		goto destroy_iommu;
 	}
 	if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
 		ret = -EINVAL;
@@ -639,6 +638,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 free_buffer:
 	etnaviv_gpu_cmdbuf_free(gpu->buffer);
 	gpu->buffer = NULL;
+destroy_iommu:
+	etnaviv_iommu_destroy(gpu->mmu);
+	gpu->mmu = NULL;
 fail:
 	pm_runtime_mark_last_busy(gpu->dev);
 	pm_runtime_put_autosuspend(gpu->dev);

From 9f07bb0d4ada68f05b2e51c10720d4688e6adea4 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 25 Jan 2016 15:37:28 +0100
Subject: [PATCH 191/262] drm/etnaviv: fix get pages error path in
 etnaviv_gem_vaddr

In case that etnaviv_gem_get_pages is unable to get the required
pages the object mutex needs to be unlocked. Also return NULL in
this case instead of propagating the error, as callers of this
function might not be prepared to handle a pointer error, but
expect this call to follow the semantics of a plain vmap to return
NULL in case of an error.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_dump.c | 2 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem.c  | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index fd7d3e989e79..09a759e69dd2 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -216,7 +216,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 		iter.hdr->iova = cpu_to_le64(vram->iova);
 
 		vaddr = etnaviv_gem_vaddr(&obj->base);
-		if (vaddr && !IS_ERR(vaddr))
+		if (vaddr)
 			memcpy(iter.data, vaddr, obj->base.size);
 
 		etnaviv_core_dump_header(&iter, ETDUMP_BUF_BO, iter.data +
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 9f77c3b94cc6..b22712fdd31e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -361,8 +361,10 @@ void *etnaviv_gem_vaddr(struct drm_gem_object *obj)
 	if (!etnaviv_obj->vaddr) {
 		struct page **pages = etnaviv_gem_get_pages(etnaviv_obj);
 
-		if (IS_ERR(pages))
-			return ERR_CAST(pages);
+		if (IS_ERR(pages)) {
+			mutex_unlock(&etnaviv_obj->lock);
+			return NULL;
+		}
 
 		etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
 				VM_MAP, pgprot_writecombine(PAGE_KERNEL));

From ce3088fdb51eda7b9ef3d119e7c302c08428f274 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Tue, 26 Jan 2016 18:10:32 +0100
Subject: [PATCH 192/262] drm/etnaviv: rename etnaviv_gem_vaddr to
 etnaviv_gem_vmap

This function follows the semantics of vmap() by returning
NULL in case of an error. To make things less confusing
rename it to make make both functions more closely related.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_drv.h       | 2 +-
 drivers/gpu/drm/etnaviv/etnaviv_dump.c      | 2 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem.c       | 2 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index d6bd438bd5be..1cd6046e76b1 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -85,7 +85,7 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
 	struct dma_buf_attachment *attach, struct sg_table *sg);
 int etnaviv_gem_prime_pin(struct drm_gem_object *obj);
 void etnaviv_gem_prime_unpin(struct drm_gem_object *obj);
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj);
+void *etnaviv_gem_vmap(struct drm_gem_object *obj);
 int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
 		struct timespec *timeout);
 int etnaviv_gem_cpu_fini(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 09a759e69dd2..4a29eeadbf1e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -215,7 +215,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 
 		iter.hdr->iova = cpu_to_le64(vram->iova);
 
-		vaddr = etnaviv_gem_vaddr(&obj->base);
+		vaddr = etnaviv_gem_vmap(&obj->base);
 		if (vaddr)
 			memcpy(iter.data, vaddr, obj->base.size);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index b22712fdd31e..1da3d48054c2 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -353,7 +353,7 @@ void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj)
 	drm_gem_object_unreference_unlocked(obj);
 }
 
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj)
+void *etnaviv_gem_vmap(struct drm_gem_object *obj)
 {
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index e94db4f95770..9c054b6c24d6 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -31,7 +31,7 @@ struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj)
 
 void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj)
 {
-	return etnaviv_gem_vaddr(obj);
+	return etnaviv_gem_vmap(obj);
 }
 
 void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)

From a0a5ab3e99b8e617221caabf074dcabd1659b9d8 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 25 Jan 2016 15:47:28 +0100
Subject: [PATCH 193/262] drm/etnaviv: call correct function when trying to
 vmap a DMABUF

When trying to get the vmap address of an imported buffer, we must
call into the appropriate helper function, to allow the exporter to
establish the vmap, instead of trying to vmap the buffer on our own.

Add an indirection through etnaviv_gem_ops to allow the correct
implementation to be called.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem.c       | 36 ++++++++++++++-------
 drivers/gpu/drm/etnaviv/etnaviv_gem.h       |  1 +
 drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c |  8 +++++
 3 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 1da3d48054c2..4b519e4309b2 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -357,23 +357,35 @@ void *etnaviv_gem_vmap(struct drm_gem_object *obj)
 {
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 
+	if (etnaviv_obj->vaddr)
+		return etnaviv_obj->vaddr;
+
 	mutex_lock(&etnaviv_obj->lock);
-	if (!etnaviv_obj->vaddr) {
-		struct page **pages = etnaviv_gem_get_pages(etnaviv_obj);
-
-		if (IS_ERR(pages)) {
-			mutex_unlock(&etnaviv_obj->lock);
-			return NULL;
-		}
-
-		etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
-				VM_MAP, pgprot_writecombine(PAGE_KERNEL));
-	}
+	/*
+	 * Need to check again, as we might have raced with another thread
+	 * while waiting for the mutex.
+	 */
+	if (!etnaviv_obj->vaddr)
+		etnaviv_obj->vaddr = etnaviv_obj->ops->vmap(etnaviv_obj);
 	mutex_unlock(&etnaviv_obj->lock);
 
 	return etnaviv_obj->vaddr;
 }
 
+static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj)
+{
+	struct page **pages;
+
+	lockdep_assert_held(&obj->lock);
+
+	pages = etnaviv_gem_get_pages(obj);
+	if (IS_ERR(pages))
+		return NULL;
+
+	return vmap(pages, obj->base.size >> PAGE_SHIFT,
+			VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+}
+
 static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op)
 {
 	if (op & ETNA_PREP_READ)
@@ -524,6 +536,7 @@ static void etnaviv_gem_shmem_release(struct etnaviv_gem_object *etnaviv_obj)
 static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
 	.get_pages = etnaviv_gem_shmem_get_pages,
 	.release = etnaviv_gem_shmem_release,
+	.vmap = etnaviv_gem_vmap_impl,
 };
 
 void etnaviv_gem_free_object(struct drm_gem_object *obj)
@@ -868,6 +881,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
 static const struct etnaviv_gem_ops etnaviv_gem_userptr_ops = {
 	.get_pages = etnaviv_gem_userptr_get_pages,
 	.release = etnaviv_gem_userptr_release,
+	.vmap = etnaviv_gem_vmap_impl,
 };
 
 int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index a300b4b3d545..ab5df8147a5f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -78,6 +78,7 @@ struct etnaviv_gem_object *to_etnaviv_bo(struct drm_gem_object *obj)
 struct etnaviv_gem_ops {
 	int (*get_pages)(struct etnaviv_gem_object *);
 	void (*release)(struct etnaviv_gem_object *);
+	void *(*vmap)(struct etnaviv_gem_object *);
 };
 
 static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index 9c054b6c24d6..4e67395f5fa1 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -77,9 +77,17 @@ static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj)
 	drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt);
 }
 
+static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj)
+{
+	lockdep_assert_held(&etnaviv_obj->lock);
+
+	return dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf);
+}
+
 static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
 	/* .get_pages should never be called */
 	.release = etnaviv_gem_prime_release,
+	.vmap = etnaviv_gem_prime_vmap_impl,
 };
 
 struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,

From 3625c2c234ef66acf21a72d47a5ffa94f6c5ebf2 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Tue, 26 Jan 2016 04:15:18 -0700
Subject: [PATCH 194/262] x86/mm: Fix types used in pgprot cacheability flags
 translations

For PAE kernels "unsigned long" is not suitable to hold page protection
flags, since _PAGE_NX doesn't fit there. This is the reason for quite a
few W+X pages getting reported as insecure during boot (observed namely
for the entire initrd range).

Fixes: 281d4078be ("x86: Make page cache mode a real type")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Juergen Gross <JGross@suse.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/56A7635602000078000CAFF1@prv-mh.provo.novell.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/pgtable_types.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index a471cadb9630..79c91853e50e 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -363,20 +363,18 @@ static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
 }
 static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
 {
+	pgprotval_t val = pgprot_val(pgprot);
 	pgprot_t new;
-	unsigned long val;
 
-	val = pgprot_val(pgprot);
 	pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
 		((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
 	return new;
 }
 static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
 {
+	pgprotval_t val = pgprot_val(pgprot);
 	pgprot_t new;
-	unsigned long val;
 
-	val = pgprot_val(pgprot);
 	pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
 			  ((val & _PAGE_PAT_LARGE) >>
 			   (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));

From 7f66cd3f5420e7d11abd234033e7cb7a9738fc38 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 26 Jan 2016 01:18:13 +0100
Subject: [PATCH 195/262] ARM: 8500/1: fix atags_to_fdt with
 stack-protector-strong

Building with CONFIG_CC_STACKPROTECTOR_STRONG triggers protection code
generation under CONFIG_ARM_ATAG_DTB_COMPAT but this is too early for
being able to use any of the stack_chk code. Explicitly disable it for
only the atags_to_fdt bits.

Suggested-by: zhxihu <zhxihu@marvell.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/boot/compressed/Makefile | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 4c23a68a0917..7a6a58ef8aaf 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -106,6 +106,15 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
+# -fstack-protector-strong triggers protection checks in this code,
+# but it is being used too early to link to meaningful stack_chk logic.
+nossp_flags := $(call cc-option, -fno-stack-protector)
+CFLAGS_atags_to_fdt.o := $(nossp_flags)
+CFLAGS_fdt.o := $(nossp_flags)
+CFLAGS_fdt_ro.o := $(nossp_flags)
+CFLAGS_fdt_rw.o := $(nossp_flags)
+CFLAGS_fdt_wip.o := $(nossp_flags)
+
 ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
 asflags-y := -DZIMAGE
 

From 7e56f627768da4e6480986b5145dc3422bc448a5 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Wed, 2 Dec 2015 16:25:32 +1100
Subject: [PATCH 196/262] powerpc/eeh: Fix PE location code

In eeh_pe_loc_get(), the PE location code is retrieved from the
"ibm,loc-code" property of the device node for the bridge of the
PE's primary bus. It's not correct because the property indicates
the parent PE's location code.

This reads the correct PE location code from "ibm,io-base-loc-code"
or "ibm,slot-location-code" property of PE parent bus's device node.

Cc: stable@vger.kernel.org # v3.16+
Fixes: 357b2f3dd9b7 ("powerpc/eeh: Dump PE location code")
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Tested-by: Russell Currey <ruscur@russell.cc>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh_pe.c | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 8654cb166c19..ca9e5371930e 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -883,32 +883,29 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)
 const char *eeh_pe_loc_get(struct eeh_pe *pe)
 {
 	struct pci_bus *bus = eeh_pe_bus_get(pe);
-	struct device_node *dn = pci_bus_to_OF_node(bus);
+	struct device_node *dn;
 	const char *loc = NULL;
 
-	if (!dn)
-		goto out;
+	while (bus) {
+		dn = pci_bus_to_OF_node(bus);
+		if (!dn) {
+			bus = bus->parent;
+			continue;
+		}
 
-	/* PHB PE or root PE ? */
-	if (pci_is_root_bus(bus)) {
-		loc = of_get_property(dn, "ibm,loc-code", NULL);
-		if (!loc)
+		if (pci_is_root_bus(bus))
 			loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
-		if (loc)
-			goto out;
+		else
+			loc = of_get_property(dn, "ibm,slot-location-code",
+					      NULL);
 
-		/* Check the root port */
-		dn = dn->child;
-		if (!dn)
-			goto out;
+		if (loc)
+			return loc;
+
+		bus = bus->parent;
 	}
 
-	loc = of_get_property(dn, "ibm,loc-code", NULL);
-	if (!loc)
-		loc = of_get_property(dn, "ibm,slot-location-code", NULL);
-
-out:
-	return loc ? loc : "N/A";
+	return "N/A";
 }
 
 /**

From 13b4389143413a1f18127c07f72c74cad5b563e8 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 20 Jan 2016 11:26:01 -0500
Subject: [PATCH 197/262] SCSI: fix crashes in sd and sr runtime PM

Runtime suspend during driver probe and removal can cause problems.
The driver's runtime_suspend or runtime_resume callbacks may invoked
before the driver has finished binding to the device or after the
driver has unbound from the device.

This problem shows up with the sd and sr drivers, and can cause disk
or CD/DVD drives to become unusable as a result.  The fix is simple.
The drivers store a pointer to the scsi_disk or scsi_cd structure as
their private device data when probing is finished, so we simply have
to be sure to clear the private data during removal and test it during
runtime suspend/resume.

This fixes <https://bugs.debian.org/801925>.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Paul Menzel <paul.menzel@giantmonkey.de>
Reported-by: Erich Schubert <erich@debian.org>
Reported-by: Alexandre Rossi <alexandre.rossi@gmail.com>
Tested-by: Paul Menzel <paul.menzel@giantmonkey.de>
Tested-by: Erich Schubert <erich@debian.org>
CC: <stable@vger.kernel.org>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/sd.c | 7 +++++--
 drivers/scsi/sr.c | 4 ++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 4e08d1cd704d..84fa4c46eaa6 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3268,8 +3268,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
 	int ret = 0;
 
-	if (!sdkp)
-		return 0;	/* this can happen */
+	if (!sdkp)	/* E.g.: runtime suspend following sd_remove() */
+		return 0;
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
@@ -3308,6 +3308,9 @@ static int sd_resume(struct device *dev)
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
 
+	if (!sdkp)	/* E.g.: runtime resume at the start of sd_probe() */
+		return 0;
+
 	if (!sdkp->device->manage_start_stop)
 		return 0;
 
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 8bd54a64efd6..64c867405ad4 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -144,6 +144,9 @@ static int sr_runtime_suspend(struct device *dev)
 {
 	struct scsi_cd *cd = dev_get_drvdata(dev);
 
+	if (!cd)	/* E.g.: runtime suspend following sr_remove() */
+		return 0;
+
 	if (cd->media_present)
 		return -EBUSY;
 	else
@@ -985,6 +988,7 @@ static int sr_remove(struct device *dev)
 	scsi_autopm_get_device(cd->device);
 
 	del_gendisk(cd->disk);
+	dev_set_drvdata(dev, NULL);
 
 	mutex_lock(&sr_ref_mutex);
 	kref_put(&cd->kref, sr_kref_release);

From 61595dca742a9ba9a4c998b9af1f468adc816275 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 27 Jan 2016 07:05:56 +0100
Subject: [PATCH 198/262] ALSA: Add missing dependency on CONFIG_SND_TIMER

Since the build of PCM timer may be disabled via Kconfig now, each
driver that provides a timer interface needs to set CONFIG_SND_TIMER
explicitly.  Otherwise it may get a build error due to missing
symbol.

Fixes: 90bbaf66ee7b ('ALSA: timer: add config item to export PCM timer disabling for expert')
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Cc: <stable@vger.kernel.org> # v4.4+
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/isa/Kconfig   | 4 ++++
 sound/pci/Kconfig   | 3 +++
 sound/sparc/Kconfig | 1 +
 3 files changed, 8 insertions(+)

diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig
index 0216475fc759..37adcc6cbe6b 100644
--- a/sound/isa/Kconfig
+++ b/sound/isa/Kconfig
@@ -3,6 +3,7 @@
 config SND_WSS_LIB
         tristate
         select SND_PCM
+	select SND_TIMER
 
 config SND_SB_COMMON
         tristate
@@ -42,6 +43,7 @@ config SND_AD1816A
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	select SND_PCM
+	select SND_TIMER
 	help
 	  Say Y here to include support for Analog Devices SoundPort
 	  AD1816A or compatible sound chips.
@@ -209,6 +211,7 @@ config SND_GUSCLASSIC
 	tristate "Gravis UltraSound Classic"
 	select SND_RAWMIDI
 	select SND_PCM
+	select SND_TIMER
 	help
 	  Say Y here to include support for Gravis UltraSound Classic
 	  soundcards.
@@ -221,6 +224,7 @@ config SND_GUSEXTREME
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	select SND_PCM
+	select SND_TIMER
 	help
 	  Say Y here to include support for Gravis UltraSound Extreme
 	  soundcards.
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 656ce39bddbc..8f6594a7d37f 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -155,6 +155,7 @@ config SND_AZT3328
 	select SND_PCM
 	select SND_RAWMIDI
 	select SND_AC97_CODEC
+	select SND_TIMER
 	depends on ZONE_DMA
 	help
 	  Say Y here to include support for Aztech AZF3328 (PCI168)
@@ -463,6 +464,7 @@ config SND_EMU10K1
 	select SND_HWDEP
 	select SND_RAWMIDI
 	select SND_AC97_CODEC
+	select SND_TIMER
 	depends on ZONE_DMA
 	help
 	  Say Y to include support for Sound Blaster PCI 512, Live!,
@@ -889,6 +891,7 @@ config SND_YMFPCI
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	select SND_AC97_CODEC
+	select SND_TIMER
 	help
 	  Say Y here to include support for Yamaha PCI audio chips -
 	  YMF724, YMF724F, YMF740, YMF740C, YMF744, YMF754.
diff --git a/sound/sparc/Kconfig b/sound/sparc/Kconfig
index d75deba5617d..dfcd38647606 100644
--- a/sound/sparc/Kconfig
+++ b/sound/sparc/Kconfig
@@ -22,6 +22,7 @@ config SND_SUN_AMD7930
 config SND_SUN_CS4231
 	tristate "Sun CS4231"
 	select SND_PCM
+	select SND_TIMER
 	help
 	  Say Y here to include support for CS4231 sound device on Sun.
 

From 1ca8ec532fc2d986f1f4a319857bb18e0c9739b4 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <kernellwp@gmail.com>
Date: Wed, 27 Jan 2016 19:26:07 +0800
Subject: [PATCH 199/262] tick/nohz: Set the correct expiry when switching to
 nohz/lowres mode

commit 0ff53d096422 sets the next tick interrupt to the last jiffies update,
i.e. in the past, because the forward operation is invoked before the set
operation. There is no resulting damage (yet), but we get an extra pointless
tick interrupt.

Revert the order so we get the next tick interrupt in the future.

Fixes: commit 0ff53d096422 "tick: sched: Force tick interrupt and get rid of softirq magic"
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1453893967-3458-1-git-send-email-wanpeng.li@hotmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-sched.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index cbe5d8dcf15a..de2d9fef6ea6 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -995,9 +995,9 @@ static void tick_nohz_switch_to_nohz(void)
 	/* Get the next period */
 	next = tick_init_jiffy_update();
 
-	hrtimer_forward_now(&ts->sched_timer, tick_period);
 	hrtimer_set_expires(&ts->sched_timer, next);
-	tick_program_event(next, 1);
+	hrtimer_forward_now(&ts->sched_timer, tick_period);
+	tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
 	tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
 }
 

From 49f34134aea74f19ca016f055d25ee55ec359dee Mon Sep 17 00:00:00 2001
From: Milo Kim <milo.kim@ti.com>
Date: Wed, 13 Jan 2016 16:19:50 +0900
Subject: [PATCH 200/262] irqchip/atmel-aic: Fix wrong bit operation for IRQ
 priority

Atmel AIC has common structure for SMR (Source Mode Register).

  bit[6:5] Interrupt source type
  bit[2:0] Priority level
  Other bits are unused.

To update new priority value, bit[2:0] should be cleared first and then
new priority level can be written. However, aic_common_set_priority()
helper clears source type bits instead of priority bits.
This patch fixes wrong mask bit operation.

Fixes: b1479ebb7720 "irqchip: atmel-aic: Add atmel AIC/AIC5 drivers"
Signed-off-by: Milo Kim <milo.kim@ti.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Ludovic Desroches <ludovic.desroches@atmel.com>
Cc: Nicholas Ferre <nicolas.ferre@atmel.com>
Cc: stable@vger.kernel.org #v3.17+
Link: http://lkml.kernel.org/r/1452669592-3401-2-git-send-email-milo.kim@ti.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-atmel-aic-common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c
index b12a5d58546f..37199b9b2cfa 100644
--- a/drivers/irqchip/irq-atmel-aic-common.c
+++ b/drivers/irqchip/irq-atmel-aic-common.c
@@ -86,7 +86,7 @@ int aic_common_set_priority(int priority, unsigned *val)
 	    priority > AT91_AIC_IRQ_MAX_PRIORITY)
 		return -EINVAL;
 
-	*val &= AT91_AIC_PRIOR;
+	*val &= ~AT91_AIC_PRIOR;
 	*val |= priority;
 
 	return 0;

From bf6092066f80840410e3401cd962b23d54a95713 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 27 Jan 2016 14:06:29 +0100
Subject: [PATCH 201/262] btrfs: sysfs: check initialization state before
 updating features

If the mount phase is not finished, we can't update the sysfs files.

Reported-by: Chris Mason <clm@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/sysfs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 6986886243bf..539e7b5e3f86 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -804,6 +804,9 @@ void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
 	fs_devs = fs_info->fs_devices;
 	fsid_kobj = &fs_devs->fsid_kobj;
 
+	if (!fsid_kobj->state_initialized)
+		return;
+
 	/*
 	 * FIXME: this is too heavy to update just one value, ideally we'd like
 	 * to use sysfs_update_group but some refactoring is needed first.

From b3c6de492b9ea30a8dcc535d4dc2eaaf0bb3f116 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Thu, 21 Jan 2016 16:47:29 +0100
Subject: [PATCH 202/262] cleancache: constify cleancache_ops structure

The cleancache_ops structure is never modified, so declare it as const.

Done with the help of Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/tmem.c         | 2 +-
 include/linux/cleancache.h | 2 +-
 mm/cleancache.c            | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 945fc4327201..4ac2ca8a7656 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -242,7 +242,7 @@ static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
 	return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
 }
 
-static struct cleancache_ops tmem_cleancache_ops = {
+static const struct cleancache_ops tmem_cleancache_ops = {
 	.put_page = tmem_cleancache_put_page,
 	.get_page = tmem_cleancache_get_page,
 	.invalidate_page = tmem_cleancache_flush_page,
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
index bda5ec0b4b4d..cb3e14234502 100644
--- a/include/linux/cleancache.h
+++ b/include/linux/cleancache.h
@@ -37,7 +37,7 @@ struct cleancache_ops {
 	void (*invalidate_fs)(int);
 };
 
-extern int cleancache_register_ops(struct cleancache_ops *ops);
+extern int cleancache_register_ops(const struct cleancache_ops *ops);
 extern void __cleancache_init_fs(struct super_block *);
 extern void __cleancache_init_shared_fs(struct super_block *);
 extern int  __cleancache_get_page(struct page *);
diff --git a/mm/cleancache.c b/mm/cleancache.c
index 8fc50811119b..ba5d8f3e6d68 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -22,7 +22,7 @@
  * cleancache_ops is set by cleancache_register_ops to contain the pointers
  * to the cleancache "backend" implementation functions.
  */
-static struct cleancache_ops *cleancache_ops __read_mostly;
+static const struct cleancache_ops *cleancache_ops __read_mostly;
 
 /*
  * Counters available via /sys/kernel/debug/cleancache (if debugfs is
@@ -49,7 +49,7 @@ static void cleancache_register_ops_sb(struct super_block *sb, void *unused)
 /*
  * Register operations for cleancache. Returns 0 on success.
  */
-int cleancache_register_ops(struct cleancache_ops *ops)
+int cleancache_register_ops(const struct cleancache_ops *ops)
 {
 	if (cmpxchg(&cleancache_ops, NULL, ops))
 		return -EBUSY;

From a39bb9a0557a3fc860c3c9d67a7326b0d2f1bd66 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen.5i5j@gmail.com>
Date: Thu, 7 Jan 2016 05:06:13 +0800
Subject: [PATCH 203/262] include/linux/cleancache.h: Clean up code

Let cleancache_fs_enabled() call cleancache_fs_enabled_mapping()
directly.

Remove redundant variable ret in cleancache_get_page().

Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/cleancache.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
index cb3e14234502..fccf7f44139d 100644
--- a/include/linux/cleancache.h
+++ b/include/linux/cleancache.h
@@ -48,14 +48,14 @@ extern void __cleancache_invalidate_fs(struct super_block *);
 
 #ifdef CONFIG_CLEANCACHE
 #define cleancache_enabled (1)
-static inline bool cleancache_fs_enabled(struct page *page)
-{
-	return page->mapping->host->i_sb->cleancache_poolid >= 0;
-}
 static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
 {
 	return mapping->host->i_sb->cleancache_poolid >= 0;
 }
+static inline bool cleancache_fs_enabled(struct page *page)
+{
+	return cleancache_fs_enabled_mapping(page->mapping);
+}
 #else
 #define cleancache_enabled (0)
 #define cleancache_fs_enabled(_page) (0)
@@ -89,11 +89,9 @@ static inline void cleancache_init_shared_fs(struct super_block *sb)
 
 static inline int cleancache_get_page(struct page *page)
 {
-	int ret = -1;
-
 	if (cleancache_enabled && cleancache_fs_enabled(page))
-		ret = __cleancache_get_page(page);
-	return ret;
+		return __cleancache_get_page(page);
+	return -1;
 }
 
 static inline void cleancache_put_page(struct page *page)

From 0b6ec8c0a3708f0a54b75ee1200772ec5226ec49 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 27 Jan 2016 15:37:58 +0100
Subject: [PATCH 204/262] debugobjects: Allow bigger number of early boot
 objects

On my bigger s390 systems  I always get "Out of memory.
ODEBUG disabled". Since the number of objects is needed at
compile time, we can not change the size dynamically before
the caches etc are available. Doubling the size seems to
do the trick. Since it is init data it will be freed anyway,
this should be ok.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Link: http://lkml.kernel.org/r/1453905478-13409-1-git-send-email-borntraeger@de.ibm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 lib/debugobjects.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 547f7f923dbc..519b5a10fd70 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -21,7 +21,7 @@
 #define ODEBUG_HASH_BITS	14
 #define ODEBUG_HASH_SIZE	(1 << ODEBUG_HASH_BITS)
 
-#define ODEBUG_POOL_SIZE	512
+#define ODEBUG_POOL_SIZE	1024
 #define ODEBUG_POOL_MIN_LEVEL	256
 
 #define ODEBUG_CHUNK_SHIFT	PAGE_SHIFT

From e1c0ebad3f77387c6684f8b7e86a4bbaca7577ac Mon Sep 17 00:00:00 2001
From: Chris Mason <clm@fb.com>
Date: Wed, 27 Jan 2016 06:38:45 -0800
Subject: [PATCH 205/262] btrfs: don't use GFP_HIGHMEM for free-space-tree
 bitmap kzalloc

This was copied incorrectly from the __vmalloc call.

Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/free-space-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index dfa8124effb8..81a053d2966b 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -164,7 +164,7 @@ static unsigned long *alloc_bitmap(u32 bitmap_size)
 	if  (bitmap_size <= PAGE_SIZE)
 		return kzalloc(bitmap_size, GFP_NOFS);
 
-	mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOWARN);
+	mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
 	if (mem)
 		return mem;
 

From 103502a35cfce0710909da874f092cb44823ca03 Mon Sep 17 00:00:00 2001
From: Jann Horn <jann@thejh.net>
Date: Sat, 26 Dec 2015 06:00:48 +0100
Subject: [PATCH 206/262] seccomp: always propagate NO_NEW_PRIVS on tsync

Before this patch, a process with some permissive seccomp filter
that was applied by root without NO_NEW_PRIVS was able to add
more filters to itself without setting NO_NEW_PRIVS by setting
the new filter from a throwaway thread with NO_NEW_PRIVS.

Signed-off-by: Jann Horn <jann@thejh.net>
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/seccomp.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 580ac2d4024f..15a1795bbba1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -316,24 +316,24 @@ static inline void seccomp_sync_threads(void)
 		put_seccomp_filter(thread);
 		smp_store_release(&thread->seccomp.filter,
 				  caller->seccomp.filter);
+
+		/*
+		 * Don't let an unprivileged task work around
+		 * the no_new_privs restriction by creating
+		 * a thread that sets it up, enters seccomp,
+		 * then dies.
+		 */
+		if (task_no_new_privs(caller))
+			task_set_no_new_privs(thread);
+
 		/*
 		 * Opt the other thread into seccomp if needed.
 		 * As threads are considered to be trust-realm
 		 * equivalent (see ptrace_may_access), it is safe to
 		 * allow one thread to transition the other.
 		 */
-		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
-			/*
-			 * Don't let an unprivileged task work around
-			 * the no_new_privs restriction by creating
-			 * a thread that sets it up, enters seccomp,
-			 * then dies.
-			 */
-			if (task_no_new_privs(caller))
-				task_set_no_new_privs(thread);
-
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
 			seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
-		}
 	}
 }
 

From 08b21d30c6f619aa3718620a7de91207d28bdbc5 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Thu, 21 Jan 2016 19:24:44 +0800
Subject: [PATCH 207/262] drm/amd/powerplay: Update SMU firmware loading for
 Stoney

Fix firmware init on Stoney when powerplay is enabled.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/powerplay/smumgr/cz_smumgr.c  | 41 +++++++++++++++----
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
index 873a8d264d5c..ec222c665602 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
@@ -272,6 +272,9 @@ static int cz_start_smu(struct pp_smumgr *smumgr)
 				UCODE_ID_CP_MEC_JT1_MASK |
 				UCODE_ID_CP_MEC_JT2_MASK;
 
+	if (smumgr->chip_id == CHIP_STONEY)
+		fw_to_check &= ~(UCODE_ID_SDMA1_MASK | UCODE_ID_CP_MEC_JT2_MASK);
+
 	cz_request_smu_load_fw(smumgr);
 	cz_check_fw_load_finish(smumgr, fw_to_check);
 
@@ -282,7 +285,7 @@ static int cz_start_smu(struct pp_smumgr *smumgr)
 	return ret;
 }
 
-static uint8_t cz_translate_firmware_enum_to_arg(
+static uint8_t cz_translate_firmware_enum_to_arg(struct pp_smumgr *smumgr,
 			enum cz_scratch_entry firmware_enum)
 {
 	uint8_t ret = 0;
@@ -292,7 +295,10 @@ static uint8_t cz_translate_firmware_enum_to_arg(
 		ret = UCODE_ID_SDMA0;
 		break;
 	case CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1:
-		ret = UCODE_ID_SDMA1;
+		if (smumgr->chip_id == CHIP_STONEY)
+			ret = UCODE_ID_SDMA0;
+		else
+			ret = UCODE_ID_SDMA1;
 		break;
 	case CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE:
 		ret = UCODE_ID_CP_CE;
@@ -307,7 +313,10 @@ static uint8_t cz_translate_firmware_enum_to_arg(
 		ret = UCODE_ID_CP_MEC_JT1;
 		break;
 	case CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2:
-		ret = UCODE_ID_CP_MEC_JT2;
+		if (smumgr->chip_id == CHIP_STONEY)
+			ret = UCODE_ID_CP_MEC_JT1;
+		else
+			ret = UCODE_ID_CP_MEC_JT2;
 		break;
 	case CZ_SCRATCH_ENTRY_UCODE_ID_GMCON_RENG:
 		ret = UCODE_ID_GMCON_RENG;
@@ -396,7 +405,7 @@ static int cz_smu_populate_single_scratch_task(
 	struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
 	task->type = type;
-	task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+	task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
 	task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
 	for (i = 0; i < cz_smu->scratch_buffer_length; i++)
@@ -433,7 +442,7 @@ static int cz_smu_populate_single_ucode_load_task(
 	struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
 	task->type = TASK_TYPE_UCODE_LOAD;
-	task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+	task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
 	task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
 	for (i = 0; i < cz_smu->driver_buffer_length; i++)
@@ -509,8 +518,14 @@ static int cz_smu_construct_toc_for_vddgfx_exit(struct pp_smumgr *smumgr)
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-	cz_smu_populate_single_ucode_load_task(smumgr,
+
+	if (smumgr->chip_id == CHIP_STONEY)
+		cz_smu_populate_single_ucode_load_task(smumgr,
+				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+	else
+		cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
+
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, false);
 
@@ -551,7 +566,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr)
 
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
-	cz_smu_populate_single_ucode_load_task(smumgr,
+	if (smumgr->chip_id == CHIP_STONEY)
+		cz_smu_populate_single_ucode_load_task(smumgr,
+				CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
+	else
+		cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, false);
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, false);
@@ -561,7 +580,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr)
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-	cz_smu_populate_single_ucode_load_task(smumgr,
+	if (smumgr->chip_id == CHIP_STONEY)
+		cz_smu_populate_single_ucode_load_task(smumgr,
+				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+	else
+		cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, true);
@@ -618,7 +641,7 @@ static int cz_smu_populate_firmware_entries(struct pp_smumgr *smumgr)
 
 	for (i = 0; i < sizeof(firmware_list)/sizeof(*firmware_list); i++) {
 
-		firmware_type = cz_translate_firmware_enum_to_arg(
+		firmware_type = cz_translate_firmware_enum_to_arg(smumgr,
 					firmware_list[i]);
 
 		ucode_id = cz_convert_fw_type_to_cgs(firmware_type);

From c9a392eac18409f51a071520cf508c0b4ad990e2 Mon Sep 17 00:00:00 2001
From: Slava Grigorev <slava.grigorev@amd.com>
Date: Tue, 26 Jan 2016 16:45:10 -0500
Subject: [PATCH 208/262] drm/radeon: cleaned up VCO output settings for DP
 audio
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is preparation for the fixes in the following patches.

Signed-off-by: Slava Grigorev <slava.grigorev@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/dce6_afmt.c       |  2 +-
 drivers/gpu/drm/radeon/radeon.h          |  2 +-
 drivers/gpu/drm/radeon/radeon_atombios.c | 12 +++++++-----
 drivers/gpu/drm/radeon/radeon_audio.c    |  8 +-------
 4 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index 6bfc46369db1..ea4e3fc2744f 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -315,7 +315,7 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev,
 			div = 0;
 
 		if (div)
-			clock = rdev->clock.gpupll_outputfreq * 10 / div;
+			clock /= div;
 
 		WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000);
 		WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5ae6db98aa4d..78a51b3eda10 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -268,7 +268,7 @@ struct radeon_clock {
 	uint32_t current_dispclk;
 	uint32_t dp_extclk;
 	uint32_t max_pixel_clock;
-	uint32_t gpupll_outputfreq;
+	uint32_t vco_freq;
 };
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 08fc1b5effa8..9a9363a7e5b9 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -1257,12 +1257,14 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
 		rdev->mode_info.firmware_flags =
 			le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess);
 
-		if (ASIC_IS_DCE8(rdev)) {
-			rdev->clock.gpupll_outputfreq =
+		if (ASIC_IS_DCE8(rdev))
+			rdev->clock.vco_freq =
 				le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq);
-			if (rdev->clock.gpupll_outputfreq == 0)
-				rdev->clock.gpupll_outputfreq = 360000;	/* 3.6 GHz */
-		}
+		else
+			rdev->clock.vco_freq = rdev->clock.current_dispclk;
+
+		if (rdev->clock.vco_freq == 0)
+			rdev->clock.vco_freq = 360000;	/* 3.6 GHz */
 
 		return true;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index 2c02e99b5f95..85e1c234f020 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -739,9 +739,6 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder,
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
-	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
-	struct radeon_connector_atom_dig *dig_connector =
-		radeon_connector->con_priv;
 
 	if (!dig || !dig->afmt)
 		return;
@@ -753,10 +750,7 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder,
 		radeon_audio_write_speaker_allocation(encoder);
 		radeon_audio_write_sad_regs(encoder);
 		radeon_audio_write_latency_fields(encoder, mode);
-		if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev))
-			radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10);
-		else
-			radeon_audio_set_dto(encoder, dig_connector->dp_clock);
+		radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10);
 		radeon_audio_set_audio_packet(encoder);
 		radeon_audio_select_pin(encoder);
 

From a64c9dab1c4d05c87ec8a1cb9b48915816462143 Mon Sep 17 00:00:00 2001
From: Slava Grigorev <slava.grigorev@amd.com>
Date: Tue, 26 Jan 2016 16:56:25 -0500
Subject: [PATCH 209/262] drm/radeon: Add a common function for DFS handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move encoding of DFS (digital frequency synthesizer) divider into a
separate function and improve calculation precision.

Signed-off-by: Slava Grigorev <slava.grigorev@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/dce6_afmt.c    | 12 ++----------
 drivers/gpu/drm/radeon/radeon_audio.c | 12 ++++++++++++
 drivers/gpu/drm/radeon/radeon_audio.h |  1 +
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index ea4e3fc2744f..367a916f364e 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -304,18 +304,10 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev,
 		unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) &
 			DENTIST_DPREFCLK_WDIVIDER_MASK) >>
 			DENTIST_DPREFCLK_WDIVIDER_SHIFT;
-
-		if (div < 128 && div >= 96)
-			div -= 64;
-		else if (div >= 64)
-			div = div / 2 - 16;
-		else if (div >= 8)
-			div /= 4;
-		else
-			div = 0;
+		div = radeon_audio_decode_dfs_div(div);
 
 		if (div)
-			clock /= div;
+			clock = clock * 100 / div;
 
 		WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000);
 		WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock);
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index 85e1c234f020..b214663b370d 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -775,3 +775,15 @@ void radeon_audio_dpms(struct drm_encoder *encoder, int mode)
 	if (radeon_encoder->audio && radeon_encoder->audio->dpms)
 		radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON);
 }
+
+unsigned int radeon_audio_decode_dfs_div(unsigned int div)
+{
+	if (div >= 8 && div < 64)
+		return (div - 8) * 25 + 200;
+	else if (div >= 64 && div < 96)
+		return (div - 64) * 50 + 1600;
+	else if (div >= 96 && div < 128)
+		return (div - 96) * 100 + 3200;
+	else
+		return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h
index 059cc3012062..5c70cceaa4a6 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.h
+++ b/drivers/gpu/drm/radeon/radeon_audio.h
@@ -79,5 +79,6 @@ void radeon_audio_fini(struct radeon_device *rdev);
 void radeon_audio_mode_set(struct drm_encoder *encoder,
 	struct drm_display_mode *mode);
 void radeon_audio_dpms(struct drm_encoder *encoder, int mode);
+unsigned int radeon_audio_decode_dfs_div(unsigned int div);
 
 #endif

From fe6fc1f132b4300c1f6defd43a5d673eb60a820d Mon Sep 17 00:00:00 2001
From: Slava Grigorev <slava.grigorev@amd.com>
Date: Tue, 26 Jan 2016 17:35:57 -0500
Subject: [PATCH 210/262] drm/radeon: fix DP audio support for APU with DCE4.1
 display engine
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Properly setup the DFS divider for DP audio for DCE4.1.

Signed-off-by: Slava Grigorev <slava.grigorev@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/evergreen_hdmi.c  | 10 +++++++
 drivers/gpu/drm/radeon/evergreend.h      |  5 ++++
 drivers/gpu/drm/radeon/radeon_atombios.c | 37 +++++++++++++++++++-----
 3 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 9953356fe263..3cf04a2f44bb 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -289,6 +289,16 @@ void dce4_dp_audio_set_dto(struct radeon_device *rdev,
 	 * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
 	 * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
 	 */
+	if (ASIC_IS_DCE41(rdev)) {
+		unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) &
+			DENTIST_DPREFCLK_WDIVIDER_MASK) >>
+			DENTIST_DPREFCLK_WDIVIDER_SHIFT;
+		div = radeon_audio_decode_dfs_div(div);
+
+		if (div)
+			clock = 100 * clock / div;
+	}
+
 	WREG32(DCCG_AUDIO_DTO1_PHASE, 24000);
 	WREG32(DCCG_AUDIO_DTO1_MODULE, clock);
 }
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 4aa5f755572b..13b6029d65cc 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -511,6 +511,11 @@
 #define DCCG_AUDIO_DTO1_CNTL              0x05cc
 #       define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3)
 
+#define DCE41_DENTIST_DISPCLK_CNTL			0x049c
+#       define DENTIST_DPREFCLK_WDIVIDER(x)		(((x) & 0x7f) << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_MASK		(0x7f << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_SHIFT		24
+
 /* DCE 4.0 AFMT */
 #define HDMI_CONTROL                         0x7030
 #       define HDMI_KEEPOUT_MODE             (1 << 0)
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 9a9363a7e5b9..de9a2ffcf5f7 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -1106,6 +1106,31 @@ union firmware_info {
 	ATOM_FIRMWARE_INFO_V2_2 info_22;
 };
 
+union igp_info {
+	struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
+};
+
+static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev)
+{
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+	union igp_info *igp_info;
+	u8 frev, crev;
+	u16 data_offset;
+
+	if (atom_parse_data_header(mode_info->atom_context, index, NULL,
+			&frev, &crev, &data_offset)) {
+		igp_info = (union igp_info *)(mode_info->atom_context->bios +
+			data_offset);
+		rdev->clock.vco_freq =
+			le32_to_cpu(igp_info->info_6.ulDentistVCOFreq);
+	}
+}
+
 bool radeon_atom_get_clock_info(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -1260,6 +1285,10 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
 		if (ASIC_IS_DCE8(rdev))
 			rdev->clock.vco_freq =
 				le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq);
+		else if (ASIC_IS_DCE5(rdev))
+			rdev->clock.vco_freq = rdev->clock.current_dispclk;
+		else if (ASIC_IS_DCE41(rdev))
+			radeon_atombios_get_dentist_vco_freq(rdev);
 		else
 			rdev->clock.vco_freq = rdev->clock.current_dispclk;
 
@@ -1272,14 +1301,6 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
 	return false;
 }
 
-union igp_info {
-	struct _ATOM_INTEGRATED_SYSTEM_INFO info;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
-};
-
 bool radeon_atombios_sideport_present(struct radeon_device *rdev)
 {
 	struct radeon_mode_info *mode_info = &rdev->mode_info;

From 03590cb56d5df7367ccb9e6e0127593254401c42 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 27 Jan 2016 18:14:13 +0000
Subject: [PATCH 211/262] ARM: wire up copy_file_range() syscall

Add the copy_file_range() syscall to ARM.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/uapi/asm/unistd.h | 1 +
 arch/arm/kernel/calls.S            | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index ede692ffa32e..5dd2528e9e45 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -417,6 +417,7 @@
 #define __NR_userfaultfd		(__NR_SYSCALL_BASE+388)
 #define __NR_membarrier			(__NR_SYSCALL_BASE+389)
 #define __NR_mlock2			(__NR_SYSCALL_BASE+390)
+#define __NR_copy_file_range		(__NR_SYSCALL_BASE+391)
 
 /*
  * The following SWIs are ARM private.
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index ac368bb068d1..dfc7cd6851ad 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -400,6 +400,7 @@
 		CALL(sys_userfaultfd)
 		CALL(sys_membarrier)
 		CALL(sys_mlock2)
+		CALL(sys_copy_file_range)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted

From 16ab8a5cbea463e4d14bf0ce698f11fa64b70ae1 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 27 Jan 2016 11:22:25 -0700
Subject: [PATCH 212/262] vfio/noiommu: Don't use iommu_present() to track fake
 groups

Using iommu_present() to determine whether an IOMMU group is real or
fake has some problems.  First, apparently Power systems don't
register an IOMMU on the device bus, so the groups and containers get
marked as noiommu and then won't bind to their actual IOMMU driver.
Second, I expect we'll run into the same issue as we try to support
vGPUs through vfio, since they're likely to emulate this behavior of
creating an IOMMU group on a virtual device and then providing a vfio
IOMMU backend tailored to the sort of isolation they provide, which
won't necessarily be fully compatible with the IOMMU API.

The solution here is to use the existing iommudata interface to IOMMU
groups, which allows us to easily identify the fake groups we've
created for noiommu purposes.  The iommudata we set is purely
arbitrary since we're only comparing the address, so we use the
address of the noiommu switch itself.

Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Tested-by: Anatoly Burakov <anatoly.burakov@intel.com>
Tested-by: Santosh Shukla <sshukla@mvista.com>
Fixes: 03a76b60f8ba ("vfio: Include No-IOMMU mode")
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 82f25cc1c460..ecca316386f5 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -123,8 +123,8 @@ struct iommu_group *vfio_iommu_group_get(struct device *dev)
 	/*
 	 * With noiommu enabled, an IOMMU group will be created for a device
 	 * that doesn't already have one and doesn't have an iommu_ops on their
-	 * bus.  We use iommu_present() again in the main code to detect these
-	 * fake groups.
+	 * bus.  We set iommudata simply to be able to identify these groups
+	 * as special use and for reclamation later.
 	 */
 	if (group || !noiommu || iommu_present(dev->bus))
 		return group;
@@ -134,6 +134,7 @@ struct iommu_group *vfio_iommu_group_get(struct device *dev)
 		return NULL;
 
 	iommu_group_set_name(group, "vfio-noiommu");
+	iommu_group_set_iommudata(group, &noiommu, NULL);
 	ret = iommu_group_add_device(group, dev);
 	iommu_group_put(group);
 	if (ret)
@@ -158,7 +159,7 @@ EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
 void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
 {
 #ifdef CONFIG_VFIO_NOIOMMU
-	if (!iommu_present(dev->bus))
+	if (iommu_group_get_iommudata(group) == &noiommu)
 		iommu_group_remove_device(dev);
 #endif
 
@@ -190,16 +191,10 @@ static long vfio_noiommu_ioctl(void *iommu_data,
 	return -ENOTTY;
 }
 
-static int vfio_iommu_present(struct device *dev, void *unused)
-{
-	return iommu_present(dev->bus) ? 1 : 0;
-}
-
 static int vfio_noiommu_attach_group(void *iommu_data,
 				     struct iommu_group *iommu_group)
 {
-	return iommu_group_for_each_dev(iommu_group, NULL,
-					vfio_iommu_present) ? -EINVAL : 0;
+	return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
 }
 
 static void vfio_noiommu_detach_group(void *iommu_data,
@@ -323,8 +318,7 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
 /**
  * Group objects - create, release, get, put, search
  */
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
-					    bool iommu_present)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 {
 	struct vfio_group *group, *tmp;
 	struct device *dev;
@@ -342,7 +336,9 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
 	atomic_set(&group->container_users, 0);
 	atomic_set(&group->opened, 0);
 	group->iommu_group = iommu_group;
-	group->noiommu = !iommu_present;
+#ifdef CONFIG_VFIO_NOIOMMU
+	group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
+#endif
 
 	group->nb.notifier_call = vfio_iommu_group_notifier;
 
@@ -767,7 +763,7 @@ int vfio_add_group_dev(struct device *dev,
 
 	group = vfio_group_get_from_iommu(iommu_group);
 	if (!group) {
-		group = vfio_create_group(iommu_group, iommu_present(dev->bus));
+		group = vfio_create_group(iommu_group);
 		if (IS_ERR(group)) {
 			iommu_group_put(iommu_group);
 			return PTR_ERR(group);

From 119ae9b7de212e5b31b09c2500a10096b3b5256f Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ao2@ao2.it>
Date: Wed, 27 Jan 2016 13:37:34 +0100
Subject: [PATCH 213/262] mailmap: redirect inactive address
 <ao2@amarulasolutions.com>

The email address <ao2@amarulasolutions.com> is not active anymore, use
Antonio's personal email address <ao2@ao2.it> in case someone wants to
get in touch for the code wrote for Amarula Solutions.

Signed-off-by: Antonio Ospite <ao2@ao2.it>
Cc: Michael Trimarchi <michael@amarulasolutions.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index b1e9a97653dc..7e6c5334c337 100644
--- a/.mailmap
+++ b/.mailmap
@@ -21,6 +21,7 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
 Andrew Morton <akpm@linux-foundation.org>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
 Andy Adamson <andros@citi.umich.edu>
+Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
 Archit Taneja <archit@ti.com>
 Arnaud Patard <arnaud.patard@rtp-net.org>
 Arnd Bergmann <arnd@arndb.de>

From b0a119fd990420d84baf8bc802bd2caf85edb464 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 27 Jan 2016 20:50:36 +0100
Subject: [PATCH 214/262] Revert "MIPS: bcm63xx: nvram: Remove unused
 bcm63xx_nvram_get_psi_size() function"

This reverts commit 5bdb102b3f9785cb88467bc7c75fa0f5cacc8dc5.

Brian Norris <computersforpeace@gmail.com> is reporting:

Ralf,

Please revert this and send it to Linus (or else, I can send it myself).
This is causing build failures, because I didn't take the rest of
Simon's series yet.

drivers/mtd/bcm63xxpart.c: In function 'bcm63xx_parse_cfe_partitions':
drivers/mtd/bcm63xxpart.c:93:2: error: implicit declaration of function
'bcm63xx_nvram_get_psi_size' [-Werror=implicit-function-declaration]

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
References: https://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20160126191607.GA111152%40google.com
---
 arch/mips/bcm63xx/nvram.c                          | 11 +++++++++++
 arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/arch/mips/bcm63xx/nvram.c b/arch/mips/bcm63xx/nvram.c
index 5f2bc1e10eae..05757aed016c 100644
--- a/arch/mips/bcm63xx/nvram.c
+++ b/arch/mips/bcm63xx/nvram.c
@@ -19,6 +19,8 @@
 
 #include <bcm63xx_nvram.h>
 
+#define BCM63XX_DEFAULT_PSI_SIZE	64
+
 static struct bcm963xx_nvram nvram;
 static int mac_addr_used;
 
@@ -85,3 +87,12 @@ int bcm63xx_nvram_get_mac_address(u8 *mac)
 	return 0;
 }
 EXPORT_SYMBOL(bcm63xx_nvram_get_mac_address);
+
+int bcm63xx_nvram_get_psi_size(void)
+{
+	if (nvram.psi_size > 0)
+		return nvram.psi_size;
+
+	return BCM63XX_DEFAULT_PSI_SIZE;
+}
+EXPORT_SYMBOL(bcm63xx_nvram_get_psi_size);
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
index 4e0b6bc1165e..348df49dcc9f 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
@@ -30,4 +30,6 @@ u8 *bcm63xx_nvram_get_name(void);
  */
 int bcm63xx_nvram_get_mac_address(u8 *mac);
 
+int bcm63xx_nvram_get_psi_size(void);
+
 #endif /* BCM63XX_NVRAM_H */

From cdb300a041f5df1dfbde1367f95109b6449d1371 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 27 Jan 2016 09:10:37 +0100
Subject: [PATCH 215/262] PM / Domains: Fix potential deadlock while
 adding/removing subdomains

We must preserve the same order of how we acquire and release the lock for
genpd, as otherwise we may encounter deadlocks.

The power on phase of a genpd starts by acquiring its lock. Then it walks
the hierarchy of its parent domains to be able to power on these first, as
per design of genpd.

From a locking perspective this means the locks of the parents becomes
acquired after the lock of the subdomain.

Let's fix pm_genpd_add|remove_subdomain() to maintain the same order of
acquiring/releasing the genpd lock as being applied in the power on/off
sequence.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e02ddf65bc43..2ff818c2ae6b 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1340,8 +1340,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 	if (!link)
 		return -ENOMEM;
 
-	mutex_lock(&genpd->lock);
-	mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
+	mutex_lock(&subdomain->lock);
+	mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
 	if (genpd->status == GPD_STATE_POWER_OFF
 	    &&  subdomain->status != GPD_STATE_POWER_OFF) {
@@ -1364,8 +1364,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 		genpd_sd_counter_inc(genpd);
 
  out:
-	mutex_unlock(&subdomain->lock);
 	mutex_unlock(&genpd->lock);
+	mutex_unlock(&subdomain->lock);
 	if (ret)
 		kfree(link);
 	return ret;
@@ -1386,7 +1386,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain))
 		return -EINVAL;
 
-	mutex_lock(&genpd->lock);
+	mutex_lock(&subdomain->lock);
+	mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
 	if (!list_empty(&subdomain->slave_links) || subdomain->device_count) {
 		pr_warn("%s: unable to remove subdomain %s\n", genpd->name,
@@ -1399,22 +1400,19 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 		if (link->slave != subdomain)
 			continue;
 
-		mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
-
 		list_del(&link->master_node);
 		list_del(&link->slave_node);
 		kfree(link);
 		if (subdomain->status != GPD_STATE_POWER_OFF)
 			genpd_sd_counter_dec(genpd);
 
-		mutex_unlock(&subdomain->lock);
-
 		ret = 0;
 		break;
 	}
 
 out:
 	mutex_unlock(&genpd->lock);
+	mutex_unlock(&subdomain->lock);
 
 	return ret;
 }

From a3d09c73492e57a1189e410f67e4d2115b23a3a8 Mon Sep 17 00:00:00 2001
From: Moritz Fischer <moritz.fischer@ettus.com>
Date: Wed, 27 Jan 2016 08:29:27 +0100
Subject: [PATCH 216/262] PM / Domains: Fix typo in comment

Acked-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Moritz Fischer <moritz.fischer@ettus.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 2ff818c2ae6b..014024a2fee9 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -160,7 +160,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool timed)
 
 /**
  * genpd_queue_power_off_work - Queue up the execution of genpd_poweroff().
- * @genpd: PM domait to power off.
+ * @genpd: PM domain to power off.
  *
  * Queue up the execution of genpd_poweroff() unless it's already been done
  * before.

From 75274b33e779ae40a750bcb4bd0b07c4dfef4746 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Wed, 27 Jan 2016 20:26:54 +0100
Subject: [PATCH 217/262] cpuidle: coupled: remove unused define
 cpuidle_coupled_lock

This was found with the -RT patch enabled, but the fix should apply to
non-RT also.

Used multi_v7_defconfig+PREEMPT_RT_FULL=y and this caused a compilation
warning without this fix:
../drivers/cpuidle/coupled.c:122:21: warning: 'cpuidle_coupled_lock'
defined but not used [-Wunused-variable]

Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/coupled.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 344058f8501a..d5657d50ac40 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -119,7 +119,6 @@ struct cpuidle_coupled {
 
 #define CPUIDLE_COUPLED_NOT_IDLE	(-1)
 
-static DEFINE_MUTEX(cpuidle_coupled_lock);
 static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
 
 /*

From e4b133cc4b30b48d488e4e4fffb132f173ce4358 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 25 Jan 2016 22:33:46 +0530
Subject: [PATCH 218/262] cpufreq: Fix NULL reference crash while accessing
 policy->governor_data

There is a race discovered by Juri, where we are able to:
- create and read a sysfs file before policy->governor_data is being set
  to a non NULL value.
  OR
- set policy->governor_data to NULL, and reading a file before being
  destroyed.

And so such a crash is reported:

Unable to handle kernel NULL pointer dereference at virtual address 0000000c
pgd = edfc8000
[0000000c] *pgd=bfc8c835
Internal error: Oops: 17 [#1] SMP ARM
Modules linked in:
CPU: 4 PID: 1730 Comm: cat Not tainted 4.5.0-rc1+ #463
Hardware name: ARM-Versatile Express
task: ee8e8480 ti: ee930000 task.ti: ee930000
PC is at show_ignore_nice_load_gov_pol+0x24/0x34
LR is at show+0x4c/0x60
pc : [<c058f1bc>]    lr : [<c058ae88>]    psr: a0070013
sp : ee931dd0  ip : ee931de0  fp : ee931ddc
r10: ee4bc290  r9 : 00001000  r8 : ef2cb000
r7 : ee4bc200  r6 : ef2cb000  r5 : c0af57b0  r4 : ee4bc2e0
r3 : 00000000  r2 : 00000000  r1 : c0928df4  r0 : ef2cb000
Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
Control: 10c5387d  Table: adfc806a  DAC: 00000051
Process cat (pid: 1730, stack limit = 0xee930210)
Stack: (0xee931dd0 to 0xee932000)
1dc0:                                     ee931dfc ee931de0 c058ae88 c058f1a4
1de0: edce3bc0 c07bfca4 edce3ac0 00001000 ee931e24 ee931e00 c01fcb90 c058ae48
1e00: 00000001 edce3bc0 00000000 00000001 ee931e50 ee8ff480 ee931e34 ee931e28
1e20: c01fb33c c01fcb0c ee931e8c ee931e38 c01a5210 c01fb314 ee931e9c ee931e48
1e40: 00000000 edce3bf0 befe4a00 ee931f78 00000000 00000000 000001e4 00000000
1e60: c00545a8 edce3ac0 00001000 00001000 befe4a00 ee931f78 00000000 00001000
1e80: ee931ed4 ee931e90 c01fbed8 c01a5038 ed085a58 00020000 00000000 00000000
1ea0: c0ad72e4 ee931f78 ee8ff488 ee8ff480 c077f3fc 00001000 befe4a00 ee931f78
1ec0: 00000000 00001000 ee931f44 ee931ed8 c017c328 c01fbdc4 00001000 00000000
1ee0: ee8ff480 00001000 ee931f44 ee931ef8 c017c65c c03deb10 ee931fac ee931f08
1f00: c0009270 c001f290 c0a8d968 ef2cb000 ef2cb000 ee8ff480 00000020 ee8ff480
1f20: ee8ff480 befe4a00 00001000 ee931f78 00000000 00000000 ee931f74 ee931f48
1f40: c017d1ec c017c2f8 c019c724 c019c684 ee8ff480 ee8ff480 00001000 befe4a00
1f60: 00000000 00000000 ee931fa4 ee931f78 c017d2a8 c017d160 00000000 00000000
1f80: 000a9f20 00001000 befe4a00 00000003 c000ffe4 ee930000 00000000 ee931fa8
1fa0: c000fe40 c017d264 000a9f20 00001000 00000003 befe4a00 00001000 00000000
Unable to handle kernel NULL pointer dereference at virtual address 0000000c
1fc0: 000a9f20 00001000 befe4a00 00000003 00000000 00000000 00000003 00000001
pgd = edfc4000
[0000000c] *pgd=bfcac835
1fe0: 00000000 befe49dc 000197f8 b6e35dfc 60070010 00000003 3065b49d 134ac2c9

[<c058f1bc>] (show_ignore_nice_load_gov_pol) from [<c058ae88>] (show+0x4c/0x60)
[<c058ae88>] (show) from [<c01fcb90>] (sysfs_kf_seq_show+0x90/0xfc)
[<c01fcb90>] (sysfs_kf_seq_show) from [<c01fb33c>] (kernfs_seq_show+0x34/0x38)
[<c01fb33c>] (kernfs_seq_show) from [<c01a5210>] (seq_read+0x1e4/0x4e4)
[<c01a5210>] (seq_read) from [<c01fbed8>] (kernfs_fop_read+0x120/0x1a0)
[<c01fbed8>] (kernfs_fop_read) from [<c017c328>] (__vfs_read+0x3c/0xe0)
[<c017c328>] (__vfs_read) from [<c017d1ec>] (vfs_read+0x98/0x104)
[<c017d1ec>] (vfs_read) from [<c017d2a8>] (SyS_read+0x50/0x90)
[<c017d2a8>] (SyS_read) from [<c000fe40>] (ret_fast_syscall+0x0/0x1c)
Code: e5903044 e1a00001 e3081df4 e34c1092 (e593300c)
---[ end trace 5994b9a5111f35ee ]---

Fix that by making sure, policy->governor_data is updated at the right
places only.

Cc: 4.2+ <stable@vger.kernel.org> # 4.2+
Reported-and-tested-by: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_governor.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index bab3a514ec12..e0d111024d48 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -387,16 +387,18 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy,
 	if (!have_governor_per_policy())
 		cdata->gdbs_data = dbs_data;
 
+	policy->governor_data = dbs_data;
+
 	ret = sysfs_create_group(get_governor_parent_kobj(policy),
 				 get_sysfs_attr(dbs_data));
 	if (ret)
 		goto reset_gdbs_data;
 
-	policy->governor_data = dbs_data;
-
 	return 0;
 
 reset_gdbs_data:
+	policy->governor_data = NULL;
+
 	if (!have_governor_per_policy())
 		cdata->gdbs_data = NULL;
 	cdata->exit(dbs_data, !policy->governor->initialized);
@@ -417,16 +419,19 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy,
 	if (!cdbs->shared || cdbs->shared->policy)
 		return -EBUSY;
 
-	policy->governor_data = NULL;
 	if (!--dbs_data->usage_count) {
 		sysfs_remove_group(get_governor_parent_kobj(policy),
 				   get_sysfs_attr(dbs_data));
 
+		policy->governor_data = NULL;
+
 		if (!have_governor_per_policy())
 			cdata->gdbs_data = NULL;
 
 		cdata->exit(dbs_data, policy->governor->initialized == 1);
 		kfree(dbs_data);
+	} else {
+		policy->governor_data = NULL;
 	}
 
 	free_common_dbs_info(policy, cdata);

From 2dadfd7564ef28391609c7ef896fd85218799012 Mon Sep 17 00:00:00 2001
From: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Date: Wed, 27 Jan 2016 12:02:26 +0530
Subject: [PATCH 219/262] cpufreq: Use list_is_last() to check last entry of
 the policy list

Currently next_policy() explicitly checks if a policy is the last
policy in the cpufreq_policy_list. Use the standard list_is_last
primitive instead.

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c35e7da1ed7a..e979ec78b695 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -48,11 +48,11 @@ static struct cpufreq_policy *next_policy(struct cpufreq_policy *policy,
 					  bool active)
 {
 	do {
-		policy = list_next_entry(policy, policy_list);
-
 		/* No more policies in the list */
-		if (&policy->policy_list == &cpufreq_policy_list)
+		if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
 			return NULL;
+
+		policy = list_next_entry(policy, policy_list);
 	} while (!suitable_policy(policy, active));
 
 	return policy;

From 993e9fe1e70aedbb6e9670211f1ad56beea8373d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Jan 2016 16:48:28 +0100
Subject: [PATCH 220/262] PM: APM_EMULATION does not depend on PM

The APM emulation code does multiple things, and some of them depend on
PM_SLEEP, while the battery management does not. However, selecting
the symbol like SHARPSL_PM does causes a Kconfig warning:

warning: (SHARPSL_PM && PMAC_APM_EMU) selects APM_EMULATION which has unmet direct dependencies (PM && SYS_SUPPORTS_APM_EMULATION)

From all I can tell, this is completely harmless, and we can simply allow
APM_EMULATION to be enabled here, even if PM is not.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 02e8dfaa1ce2..68d3ebc12601 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -235,7 +235,7 @@ config PM_TRACE_RTC
 
 config APM_EMULATION
 	tristate "Advanced Power Management Emulation"
-	depends on PM && SYS_SUPPORTS_APM_EMULATION
+	depends on SYS_SUPPORTS_APM_EMULATION
 	help
 	  APM is a BIOS specification for saving power using several different
 	  techniques. This is mostly useful for battery powered laptops with

From fb2a24a1c6457d21df9fae0dd66b20c63ba56077 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Jan 2016 16:44:38 +0100
Subject: [PATCH 221/262] cpufreq: pxa2xx: fix pxa_cpufreq_change_voltage
 prototype

There are two definitions of pxa_cpufreq_change_voltage, with slightly
different prototypes after one of them had its argument marked 'const'.
Now the other one (for !CONFIG_REGULATOR) produces a harmless warning:

drivers/cpufreq/pxa2xx-cpufreq.c: In function 'pxa_set_target':
drivers/cpufreq/pxa2xx-cpufreq.c:291:36: warning: passing argument 1 of 'pxa_cpufreq_change_voltage' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
   ret = pxa_cpufreq_change_voltage(&pxa_freq_settings[idx]);
                                    ^
drivers/cpufreq/pxa2xx-cpufreq.c:205:12: note: expected 'struct pxa_freqs *' but argument is of type 'const struct pxa_freqs *'
 static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq)
            ^

This changes the prototype in the same way as the other, which
avoids the warning.

Fixes: 03c229906311 (cpufreq: pxa: make pxa_freqs arrays const)
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: 4.2+ <stable@vger.kernel.org> # 4.2+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/pxa2xx-cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c
index 1d99c97defa9..096377232747 100644
--- a/drivers/cpufreq/pxa2xx-cpufreq.c
+++ b/drivers/cpufreq/pxa2xx-cpufreq.c
@@ -202,7 +202,7 @@ static void __init pxa_cpufreq_init_voltages(void)
 	}
 }
 #else
-static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq)
+static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq)
 {
 	return 0;
 }

From b331bc20d9281213f7fb67912638e0fb5baeb324 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Jan 2016 16:45:48 +0100
Subject: [PATCH 222/262] cpufreq: cpufreq-dt: avoid uninitialized variable
 warnings:

gcc warns quite a bit about values returned from allocate_resources()
in cpufreq-dt.c:

cpufreq-dt.c: In function 'cpufreq_init':
cpufreq-dt.c:327:6: error: 'cpu_dev' may be used uninitialized in this function [-Werror=maybe-uninitialized]
cpufreq-dt.c:197:17: note: 'cpu_dev' was declared here
cpufreq-dt.c:376:2: error: 'cpu_clk' may be used uninitialized in this function [-Werror=maybe-uninitialized]
cpufreq-dt.c:199:14: note: 'cpu_clk' was declared here
cpufreq-dt.c: In function 'dt_cpufreq_probe':
cpufreq-dt.c:461:2: error: 'cpu_clk' may be used uninitialized in this function [-Werror=maybe-uninitialized]
cpufreq-dt.c:447:14: note: 'cpu_clk' was declared here

The problem is that it's slightly hard for gcc to follow return
codes across PTR_ERR() calls.

This patch uses explicit assignments to the "ret" variable to make
it easier for gcc to verify that the code is actually correct,
without the need to add a bogus initialization.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq-dt.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 9bc37c437874..0ca74d070058 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -142,15 +142,16 @@ static int allocate_resources(int cpu, struct device **cdev,
 
 try_again:
 	cpu_reg = regulator_get_optional(cpu_dev, reg);
-	if (IS_ERR(cpu_reg)) {
+	ret = PTR_ERR_OR_ZERO(cpu_reg);
+	if (ret) {
 		/*
 		 * If cpu's regulator supply node is present, but regulator is
 		 * not yet registered, we should try defering probe.
 		 */
-		if (PTR_ERR(cpu_reg) == -EPROBE_DEFER) {
+		if (ret == -EPROBE_DEFER) {
 			dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n",
 				cpu);
-			return -EPROBE_DEFER;
+			return ret;
 		}
 
 		/* Try with "cpu-supply" */
@@ -159,18 +160,16 @@ try_again:
 			goto try_again;
 		}
 
-		dev_dbg(cpu_dev, "no regulator for cpu%d: %ld\n",
-			cpu, PTR_ERR(cpu_reg));
+		dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret);
 	}
 
 	cpu_clk = clk_get(cpu_dev, NULL);
-	if (IS_ERR(cpu_clk)) {
+	ret = PTR_ERR_OR_ZERO(cpu_clk);
+	if (ret) {
 		/* put regulator */
 		if (!IS_ERR(cpu_reg))
 			regulator_put(cpu_reg);
 
-		ret = PTR_ERR(cpu_clk);
-
 		/*
 		 * If cpu's clk node is present, but clock is not yet
 		 * registered, we should try defering probe.

From eee045021fb22aeac7f5d6f2092430b530c880ee Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 27 Jan 2016 01:02:03 +0000
Subject: [PATCH 223/262] KEYS: Only apply KEY_FLAG_KEEP to a key if a parent
 keyring has it set

KEY_FLAG_KEEP should only be applied to a key if the keyring it is being
linked into has KEY_FLAG_KEEP set.

To this end, partially revert the following patch:

	commit 1d6d167c2efcfe9539d9cffb1a1be9c92e39c2c0
	Author: Mimi Zohar <zohar@linux.vnet.ibm.com>
	Date:   Thu Jan 7 07:46:36 2016 -0500
	KEYS: refcount bug fix

to undo the change that made it unconditional (Mimi got it right the first
time).

Without undoing this change, it becomes impossible to delete, revoke or
invalidate keys added to keyrings through __key_instantiate_and_link()
where the keyring has itself been linked to.  To test this, run the
following command sequence:

    keyctl newring foo @s
    keyctl add user a a %:foo
    keyctl unlink %user:a %:foo
    keyctl clear %:foo

With the commit mentioned above the third and fourth commands fail with
EPERM when they should succeed.

Reported-by: Stephen Gallager <sgallagh@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by:  Mimi Zohar <zohar@linux.vnet.ibm.com>
cc: Mimi Zohar <zohar@linux.vnet.ibm.com>
cc: keyrings@vger.kernel.org
cc: stable@vger.kernel.org
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/keys/key.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/security/keys/key.c b/security/keys/key.c
index 07a87311055c..09ef276c4bdc 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -430,7 +430,8 @@ static int __key_instantiate_and_link(struct key *key,
 
 			/* and link it into the destination keyring */
 			if (keyring) {
-				set_bit(KEY_FLAG_KEEP, &key->flags);
+				if (test_bit(KEY_FLAG_KEEP, &keyring->flags))
+					set_bit(KEY_FLAG_KEEP, &key->flags);
 
 				__key_link(key, _edit);
 			}

From 60dee3ca27b3613e6519df52d02eadf71f853859 Mon Sep 17 00:00:00 2001
From: Gioh Kim <gi-oh.kim@profitbricks.com>
Date: Wed, 27 Jan 2016 12:02:09 +0100
Subject: [PATCH 224/262] hwmon: (fam15h_power) Add bit masking for tdp_limit

Add bit masking to read ApmTdpLimit precisely

Signed-off-by: Gioh Kim <gi-oh.kim@profitbricks.com>
Acked-by: Borislav Petkov <bp@suse.de>
Acked-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/fam15h_power.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index f77eb971ce95..4f695d8fcafa 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -90,7 +90,15 @@ static ssize_t show_power(struct device *dev,
 	pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5),
 				  REG_TDP_LIMIT3, &val);
 
-	tdp_limit = val >> 16;
+	/*
+	 * On Carrizo and later platforms, ApmTdpLimit bit field
+	 * is extended to 16:31 from 16:28.
+	 */
+	if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60)
+		tdp_limit = val >> 16;
+	else
+		tdp_limit = (val >> 16) & 0x1fff;
+
 	curr_pwr_watts = ((u64)(tdp_limit +
 				data->base_tdp)) << running_avg_range;
 	curr_pwr_watts -= running_avg_capture;

From ac1efcfb35f73657ec1e28b1e8a3c895529c1108 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 27 Jan 2016 18:04:10 -0800
Subject: [PATCH 225/262] ALSA: timer: fix SND_PCM_TIMER Kconfig text

Fix spelling and typos for SND_PCM_TIMER.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/core/Kconfig b/sound/core/Kconfig
index e3e949126a56..a2a1e24becc6 100644
--- a/sound/core/Kconfig
+++ b/sound/core/Kconfig
@@ -97,11 +97,11 @@ config SND_PCM_TIMER
 	bool "PCM timer interface" if EXPERT
 	default y
 	help
-	  If you disable this option, pcm timer will be inavailable, so
-	  those stubs used pcm timer (e.g. dmix, dsnoop & co) may work
+	  If you disable this option, pcm timer will be unavailable, so
+	  those stubs that use pcm timer (e.g. dmix, dsnoop & co) may work
 	  incorrectlly.
 
-	  For some embedded device, we may disable it to reduce memory
+	  For some embedded devices, we may disable it to reduce memory
 	  footprint, about 20KB on x86_64 platform.
 
 config SND_SEQUENCER_OSS

From 7ee96216c31aabe1eb42fb91ff50dae9fcd014b2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 28 Jan 2016 07:54:16 +0100
Subject: [PATCH 226/262] ALSA: dummy: Disable switching timer backend via
 sysfs

ALSA dummy driver can switch the timer backend between system timer
and hrtimer via its hrtimer module option.  This can be also switched
dynamically via sysfs, but it may lead to a memory corruption when
switching is done while a PCM stream is running; the stream instance
for the newly switched timer method tries to access the memory that
was allocated by another timer method although the sizes differ.

As the simplest fix, this patch just disables the switch via sysfs by
dropping the writable bit.

BugLink: http://lkml.kernel.org/r/CACT4Y+ZGEeEBntHW5WHn2GoeE0G_kRrCmUh6=dWyy-wfzvuJLg@mail.gmail.com
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/drivers/dummy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index 75b74850c005..bde33308f0d6 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -87,7 +87,7 @@ MODULE_PARM_DESC(pcm_substreams, "PCM substreams # (1-128) for dummy driver.");
 module_param(fake_buffer, bool, 0444);
 MODULE_PARM_DESC(fake_buffer, "Fake buffer allocations.");
 #ifdef CONFIG_HIGH_RES_TIMERS
-module_param(hrtimer, bool, 0644);
+module_param(hrtimer, bool, 0444);
 MODULE_PARM_DESC(hrtimer, "Use hrtimer as the timer source.");
 #endif
 

From 370f06c88528b3988fe24a372c10e1303bb94cf6 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Mon, 25 Jan 2016 14:03:46 +0530
Subject: [PATCH 227/262] powerpc/perf: Remove PPMU_HAS_SSLOT flag for Power8

Commit 7a7868326d77 ("powerpc/perf: Add an explict flag indicating
presence of SLOT field") introduced the PPMU_HAS_SSLOT flag to remove
the assumption that MMCRA[SLOT] was present when PPMU_ALT_SIPR was not
set.

That commit's changelog also mentions that Power8 does not support
MMCRA[SLOT]. However when the Power8 PMU support was merged, it
errnoeously included the PPMU_HAS_SSLOT flag.

So remove PPMU_HAS_SSLOT from the Power8 flags.

mpe: On systems where MMCRA[SLOT] exists, the field occupies bits 37:39
(IBM numbering). On Power8 bit 37 is reserved, and 38:39 overlap with
the high bits of the Threshold Event Counter Mantissa. I am not aware of
any published events which use the threshold counting mechanism, which
would cause the mantissa bits to be set. So in practice this bug is
unlikely to trigger.

Fixes: e05b9b9e5c10 ("powerpc/perf: Power8 PMU support")
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/power8-pmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 7d5e295255b7..9958ba8bf0d2 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -816,7 +816,7 @@ static struct power_pmu power8_pmu = {
 	.get_constraint		= power8_get_constraint,
 	.get_alternatives	= power8_get_alternatives,
 	.disable_pmc		= power8_disable_pmc,
-	.flags			= PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S,
+	.flags			= PPMU_HAS_SIER | PPMU_ARCH_207S,
 	.n_generic		= ARRAY_SIZE(power8_generic_events),
 	.generic_events		= power8_generic_events,
 	.cache_events		= &power8_cache_events,

From 2d19fc639516dc7b4184450b315c931d38549e61 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 27 Jan 2016 12:04:20 +0530
Subject: [PATCH 228/262] powerpc/mm: Fixup _HPAGE_CHG_MASK

This was wrongly updated by commit 7aa9a23c69ea ("powerpc, thp: remove
infrastructure for handling splitting PMDs") during the last merge
window. Fix it up.

This could lead to incorrect behaviour in THP and/or mprotect(), at a
minimum.

Fixes: 7aa9a23c69ea ("powerpc, thp: remove infrastructure for handling splitting PMDs")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 06f17e778c27..8d1c8162f0c1 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -50,7 +50,9 @@
  * set of bits not changed in pmd_modify.
  */
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-			 _PAGE_ACCESSED | _PAGE_THP_HUGE)
+			 _PAGE_ACCESSED | _PAGE_THP_HUGE | _PAGE_PTE | \
+			 _PAGE_SOFT_DIRTY)
+
 
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/hash-64k.h>

From 28b8d66e0c1cc6c02b8159ae44aec359e12feefa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nicolai.haehnle@amd.com>
Date: Wed, 27 Jan 2016 11:04:19 -0500
Subject: [PATCH 229/262] drm/amdgpu: only move pt bos in LRU list on success
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a race condition in the error case: since the pt bos have not
necessarily been reserved in case of an error, we could move a pt bo that
is currently in the middle of being evicted/moved by another process,
which then resulted in a BUG_ON in ttm_bo_add_to_lru.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6f89f8e034d0..b882e8175615 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -478,9 +478,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
 	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
 	unsigned i;
 
-	amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
-
 	if (!error) {
+		amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
+
 		/* Sort the buffer list from the smallest to largest buffer,
 		 * which affects the order of buffers in the LRU list.
 		 * This assures that the smallest buffers are added first

From 78cd2c748f459739ff864dd9308c0f6caf7f6e41 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 25 Jan 2016 14:08:45 +0100
Subject: [PATCH 230/262] perf: Fix orphan hole

We should set event->owner before we install the event,
otherwise there is a hole where the target task can fork() and
we'll not inherit the event because it thinks the event is
orphaned.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9de4d352ba8c..6759f2a332d7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8489,6 +8489,8 @@ SYSCALL_DEFINE5(perf_event_open,
 	perf_event__header_size(event);
 	perf_event__id_header_size(event);
 
+	event->owner = current;
+
 	perf_install_in_context(ctx, event, event->cpu);
 	perf_unpin_context(ctx);
 
@@ -8498,8 +8500,6 @@ SYSCALL_DEFINE5(perf_event_open,
 
 	put_online_cpus();
 
-	event->owner = current;
-
 	mutex_lock(&current->perf_event_mutex);
 	list_add_tail(&event->owner_entry, &current->perf_event_list);
 	mutex_unlock(&current->perf_event_mutex);

From 6a3351b612b72c558910c88a43e2ef6d7d68bc97 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 25 Jan 2016 14:09:54 +0100
Subject: [PATCH 231/262] perf: Fix race in perf_event_exit_task_context()

There is a race between perf_event_exit_task_context() and
orphans_remove_work() which results in a use-after-free.

We mark ctx->task with TASK_TOMBSTONE to indicate a context is
'dead', under ctx->lock. After which point event_function_call()
on any event of that context will NOP

A concurrent orphans_remove_work() will only hold ctx->mutex for
the list iteration and not serialize against this. Therefore its
possible that orphans_remove_work()'s perf_remove_from_context()
call will fail, but we'll continue to free the event, with the
result of free'd memory still being on lists and everything.

Once perf_event_exit_task_context() gets around to acquiring
ctx->mutex it too will iterate the event list, encounter the
already free'd event and proceed to free it _again_. This fails
with the WARN in free_event().

Plug the race by having perf_event_exit_task_context() hold
ctx::mutex over the whole tear-down, thereby 'naturally'
serializing against all other sites, including the orphan work.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: alexander.shishkin@linux.intel.com
Cc: dsahern@gmail.com
Cc: namhyung@kernel.org
Link: http://lkml.kernel.org/r/20160125130954.GY6357@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 50 +++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6759f2a332d7..1d243fadfd12 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8748,14 +8748,40 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
 	struct perf_event_context *child_ctx, *clone_ctx = NULL;
 	struct perf_event *child_event, *next;
-	unsigned long flags;
 
 	WARN_ON_ONCE(child != current);
 
-	child_ctx = perf_lock_task_context(child, ctxn, &flags);
+	child_ctx = perf_pin_task_context(child, ctxn);
 	if (!child_ctx)
 		return;
 
+	/*
+	 * In order to reduce the amount of tricky in ctx tear-down, we hold
+	 * ctx::mutex over the entire thing. This serializes against almost
+	 * everything that wants to access the ctx.
+	 *
+	 * The exception is sys_perf_event_open() /
+	 * perf_event_create_kernel_count() which does find_get_context()
+	 * without ctx::mutex (it cannot because of the move_group double mutex
+	 * lock thing). See the comments in perf_install_in_context().
+	 *
+	 * We can recurse on the same lock type through:
+	 *
+	 *   __perf_event_exit_task()
+	 *     sync_child_event()
+	 *       put_event()
+	 *         mutex_lock(&ctx->mutex)
+	 *
+	 * But since its the parent context it won't be the same instance.
+	 */
+	mutex_lock(&child_ctx->mutex);
+
+	/*
+	 * In a single ctx::lock section, de-schedule the events and detach the
+	 * context from the task such that we cannot ever get it scheduled back
+	 * in.
+	 */
+	raw_spin_lock_irq(&child_ctx->lock);
 	task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
 
 	/*
@@ -8767,14 +8793,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
 	put_task_struct(current); /* cannot be last */
 
-	/*
-	 * If this context is a clone; unclone it so it can't get
-	 * swapped to another process while we're removing all
-	 * the events from it.
-	 */
 	clone_ctx = unclone_ctx(child_ctx);
-	update_context_time(child_ctx);
-	raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
+	raw_spin_unlock_irq(&child_ctx->lock);
 
 	if (clone_ctx)
 		put_ctx(clone_ctx);
@@ -8786,18 +8806,6 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 */
 	perf_event_task(child, child_ctx, 0);
 
-	/*
-	 * We can recurse on the same lock type through:
-	 *
-	 *   __perf_event_exit_task()
-	 *     sync_child_event()
-	 *       put_event()
-	 *         mutex_lock(&ctx->mutex)
-	 *
-	 * But since its the parent context it won't be the same instance.
-	 */
-	mutex_lock(&child_ctx->mutex);
-
 	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
 		__perf_event_exit_task(child_event, child_ctx, child);
 

From 96c5d076f0a5e2023ecdb44d8261f87641ee71e0 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Wed, 15 Oct 2014 15:00:47 -0400
Subject: [PATCH 232/262] drm/vmwgfx: respect 'nomodeset'

Signed-off-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>.
Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index c49812b80dd0..24fb348a44e1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -25,6 +25,7 @@
  *
  **************************************************************************/
 #include <linux/module.h>
+#include <linux/console.h>
 
 #include <drm/drmP.h>
 #include "vmwgfx_drv.h"
@@ -1538,6 +1539,12 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 static int __init vmwgfx_init(void)
 {
 	int ret;
+
+#ifdef CONFIG_VGA_CONSOLE
+	if (vgacon_text_force())
+		return -EINVAL;
+#endif
+
 	ret = drm_pci_init(&driver, &vmw_pci_driver);
 	if (ret)
 		DRM_ERROR("Failed initializing DRM.\n");

From b64a1cbef6df47c986ad622b5b2e4d3d1940070c Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Sat, 23 Jan 2016 14:49:20 +0530
Subject: [PATCH 233/262] Revert "Staging: panel: usleep_range is preferred
 over udelay"

This reverts commit ebd43516d3879f882a403836bba8bc5791f26a28.

We should not be sleeping inside spin_lock.

Fixes: ebd43516d387 ("Staging: panel: usleep_range is preferred over udelay")
Cc: Sirnam Swetha <theonly.ultimate@gmail.com>
Signed-off-by: Sudip Mukherjee <sudip@vectorindia.org>
Reported-by: Huang, Ying <ying.huang@intel.com>
Tested-by: Huang, Ying <ying.huang@intel.com>
Cc: stable <stable@vger.kernel.org> # 4.4
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/panel/panel.c | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c
index 79ac19246548..70b8f4fabfad 100644
--- a/drivers/staging/panel/panel.c
+++ b/drivers/staging/panel/panel.c
@@ -825,8 +825,7 @@ static void lcd_write_cmd_s(int cmd)
 	lcd_send_serial(0x1F);	/* R/W=W, RS=0 */
 	lcd_send_serial(cmd & 0x0F);
 	lcd_send_serial((cmd >> 4) & 0x0F);
-	/* the shortest command takes at least 40 us */
-	usleep_range(40, 100);
+	udelay(40);		/* the shortest command takes at least 40 us */
 	spin_unlock_irq(&pprt_lock);
 }
 
@@ -837,8 +836,7 @@ static void lcd_write_data_s(int data)
 	lcd_send_serial(0x5F);	/* R/W=W, RS=1 */
 	lcd_send_serial(data & 0x0F);
 	lcd_send_serial((data >> 4) & 0x0F);
-	/* the shortest data takes at least 40 us */
-	usleep_range(40, 100);
+	udelay(40);		/* the shortest data takes at least 40 us */
 	spin_unlock_irq(&pprt_lock);
 }
 
@@ -848,20 +846,19 @@ static void lcd_write_cmd_p8(int cmd)
 	spin_lock_irq(&pprt_lock);
 	/* present the data to the data port */
 	w_dtr(pprt, cmd);
-	/* maintain the data during 20 us before the strobe */
-	usleep_range(20, 100);
+	udelay(20);	/* maintain the data during 20 us before the strobe */
 
 	bits.e = BIT_SET;
 	bits.rs = BIT_CLR;
 	bits.rw = BIT_CLR;
 	set_ctrl_bits();
 
-	usleep_range(40, 100);	/* maintain the strobe during 40 us */
+	udelay(40);	/* maintain the strobe during 40 us */
 
 	bits.e = BIT_CLR;
 	set_ctrl_bits();
 
-	usleep_range(120, 500);	/* the shortest command takes at least 120 us */
+	udelay(120);	/* the shortest command takes at least 120 us */
 	spin_unlock_irq(&pprt_lock);
 }
 
@@ -871,20 +868,19 @@ static void lcd_write_data_p8(int data)
 	spin_lock_irq(&pprt_lock);
 	/* present the data to the data port */
 	w_dtr(pprt, data);
-	/* maintain the data during 20 us before the strobe */
-	usleep_range(20, 100);
+	udelay(20);	/* maintain the data during 20 us before the strobe */
 
 	bits.e = BIT_SET;
 	bits.rs = BIT_SET;
 	bits.rw = BIT_CLR;
 	set_ctrl_bits();
 
-	usleep_range(40, 100);	/* maintain the strobe during 40 us */
+	udelay(40);	/* maintain the strobe during 40 us */
 
 	bits.e = BIT_CLR;
 	set_ctrl_bits();
 
-	usleep_range(45, 100);	/* the shortest data takes at least 45 us */
+	udelay(45);	/* the shortest data takes at least 45 us */
 	spin_unlock_irq(&pprt_lock);
 }
 
@@ -894,7 +890,7 @@ static void lcd_write_cmd_tilcd(int cmd)
 	spin_lock_irq(&pprt_lock);
 	/* present the data to the control port */
 	w_ctr(pprt, cmd);
-	usleep_range(60, 120);
+	udelay(60);
 	spin_unlock_irq(&pprt_lock);
 }
 
@@ -904,7 +900,7 @@ static void lcd_write_data_tilcd(int data)
 	spin_lock_irq(&pprt_lock);
 	/* present the data to the data port */
 	w_dtr(pprt, data);
-	usleep_range(60, 120);
+	udelay(60);
 	spin_unlock_irq(&pprt_lock);
 }
 
@@ -947,7 +943,7 @@ static void lcd_clear_fast_s(void)
 		lcd_send_serial(0x5F);	/* R/W=W, RS=1 */
 		lcd_send_serial(' ' & 0x0F);
 		lcd_send_serial((' ' >> 4) & 0x0F);
-		usleep_range(40, 100);	/* the shortest data takes at least 40 us */
+		udelay(40);	/* the shortest data takes at least 40 us */
 	}
 	spin_unlock_irq(&pprt_lock);
 
@@ -971,7 +967,7 @@ static void lcd_clear_fast_p8(void)
 		w_dtr(pprt, ' ');
 
 		/* maintain the data during 20 us before the strobe */
-		usleep_range(20, 100);
+		udelay(20);
 
 		bits.e = BIT_SET;
 		bits.rs = BIT_SET;
@@ -979,13 +975,13 @@ static void lcd_clear_fast_p8(void)
 		set_ctrl_bits();
 
 		/* maintain the strobe during 40 us */
-		usleep_range(40, 100);
+		udelay(40);
 
 		bits.e = BIT_CLR;
 		set_ctrl_bits();
 
 		/* the shortest data takes at least 45 us */
-		usleep_range(45, 100);
+		udelay(45);
 	}
 	spin_unlock_irq(&pprt_lock);
 
@@ -1007,7 +1003,7 @@ static void lcd_clear_fast_tilcd(void)
 	for (pos = 0; pos < lcd.height * lcd.hwidth; pos++) {
 		/* present the data to the data port */
 		w_dtr(pprt, ' ');
-		usleep_range(60, 120);
+		udelay(60);
 	}
 
 	spin_unlock_irq(&pprt_lock);

From 3ec622f40913ae036f218e5e7e92df9c1f1753d9 Mon Sep 17 00:00:00 2001
From: Aaron Plattner <aplattner@nvidia.com>
Date: Thu, 28 Jan 2016 14:07:38 -0800
Subject: [PATCH 234/262] ALSA: hda - Add new GPU codec ID 0x10de0083 to
 snd-hda

Vendor ID 0x10de0083 is used by a yet-to-be-named GPU chip.

This chip also has the 2-ch audio swapping bug, so patch_nvhdmi is
appropriate here.

Signed-off-by: Aaron Plattner <aplattner@nvidia.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_hdmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 426a29a1c19b..1f52b55d77c9 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -3653,6 +3653,7 @@ HDA_CODEC_ENTRY(0x10de0070, "GPU 70 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP",	patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI",	patch_nvhdmi_2ch),
 HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP",	patch_via_hdmi),
 HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP",	patch_via_hdmi),

From 327b882d3bcc1fba82dbd39b5cf5a838c81218e2 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Fri, 15 Jan 2016 00:47:41 +0100
Subject: [PATCH 235/262] Staging: speakup: Fix getting port information

Commit f79b0d9c223c ("staging: speakup: Fixed warning <linux/serial.h>
instead of <asm/serial.h>") broke the port information in the speakup
driver: SERIAL_PORT_DFNS only gets defined if asm/serial.h is included,
and no other header includes asm/serial.h.

We here make sure serialio.c does get the arch-specific definition of
SERIAL_PORT_DFNS from asm/serial.h, if any.

Along the way, this makes sure that we do have information for the
requested serial port number (index)

Fixes: f79b0d9c223c ("staging: speakup: Fixed warning <linux/serial.h> instead of <asm/serial.h>")
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Cc: stable <stable@vger.kernel.org> # 3.18
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/speakup/serialio.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/speakup/serialio.c b/drivers/staging/speakup/serialio.c
index 3b5835b28128..a5bbb338f275 100644
--- a/drivers/staging/speakup/serialio.c
+++ b/drivers/staging/speakup/serialio.c
@@ -6,6 +6,11 @@
 #include "spk_priv.h"
 #include "serialio.h"
 
+#include <linux/serial_core.h>
+/* WARNING:  Do not change this to <linux/serial.h> without testing that
+ * SERIAL_PORT_DFNS does get defined to the appropriate value. */
+#include <asm/serial.h>
+
 #ifndef SERIAL_PORT_DFNS
 #define SERIAL_PORT_DFNS
 #endif
@@ -23,9 +28,15 @@ const struct old_serial_port *spk_serial_init(int index)
 	int baud = 9600, quot = 0;
 	unsigned int cval = 0;
 	int cflag = CREAD | HUPCL | CLOCAL | B9600 | CS8;
-	const struct old_serial_port *ser = rs_table + index;
+	const struct old_serial_port *ser;
 	int err;
 
+	if (index >= ARRAY_SIZE(rs_table)) {
+		pr_info("no port info for ttyS%d\n", index);
+		return NULL;
+	}
+	ser = rs_table + index;
+
 	/*	Divisor, bytesize and parity */
 	quot = ser->baud_base / baud;
 	cval = cflag & (CSIZE | CSTOPB);

From 88867e3d0b7eea256c1cd432b0a3c7a21e8edf07 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Mon, 25 Jan 2016 01:32:08 +0100
Subject: [PATCH 236/262] Staging: speakup: fix read scrolled-back VT

Previously, speakup would always read the non-scrolled part of the VT,
even when the VT is scrolled back with shift-page.  This patch makes
vt.c export screen_pos so that speakup can use it to properly access
the content of the scrolled-back VT.

This was tested with both vgacon and fbcon.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/speakup/main.c | 21 +++++++++++++--------
 drivers/tty/vt/vt.c            |  1 +
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c
index 63c59bc89b04..30cf973f326d 100644
--- a/drivers/staging/speakup/main.c
+++ b/drivers/staging/speakup/main.c
@@ -264,8 +264,9 @@ static struct notifier_block vt_notifier_block = {
 	.notifier_call = vt_notifier_call,
 };
 
-static unsigned char get_attributes(u16 *pos)
+static unsigned char get_attributes(struct vc_data *vc, u16 *pos)
 {
+	pos = screen_pos(vc, pos - (u16 *)vc->vc_origin, 1);
 	return (u_char) (scr_readw(pos) >> 8);
 }
 
@@ -275,7 +276,7 @@ static void speakup_date(struct vc_data *vc)
 	spk_y = spk_cy = vc->vc_y;
 	spk_pos = spk_cp = vc->vc_pos;
 	spk_old_attr = spk_attr;
-	spk_attr = get_attributes((u_short *) spk_pos);
+	spk_attr = get_attributes(vc, (u_short *)spk_pos);
 }
 
 static void bleep(u_short val)
@@ -469,8 +470,12 @@ static u16 get_char(struct vc_data *vc, u16 *pos, u_char *attribs)
 	u16 ch = ' ';
 
 	if (vc && pos) {
-		u16 w = scr_readw(pos);
-		u16 c = w & 0xff;
+		u16 w;
+		u16 c;
+
+		pos = screen_pos(vc, pos - (u16 *)vc->vc_origin, 1);
+		w = scr_readw(pos);
+		c = w & 0xff;
 
 		if (w & vc->vc_hi_font_mask)
 			c |= 0x100;
@@ -746,7 +751,7 @@ static int get_line(struct vc_data *vc)
 	u_char tmp2;
 
 	spk_old_attr = spk_attr;
-	spk_attr = get_attributes((u_short *) spk_pos);
+	spk_attr = get_attributes(vc, (u_short *)spk_pos);
 	for (i = 0; i < vc->vc_cols; i++) {
 		buf[i] = (u_char) get_char(vc, (u_short *) tmp, &tmp2);
 		tmp += 2;
@@ -811,7 +816,7 @@ static int say_from_to(struct vc_data *vc, u_long from, u_long to,
 	u_short saved_punc_mask = spk_punc_mask;
 
 	spk_old_attr = spk_attr;
-	spk_attr = get_attributes((u_short *) from);
+	spk_attr = get_attributes(vc, (u_short *)from);
 	while (from < to) {
 		buf[i++] = (char)get_char(vc, (u_short *) from, &tmp);
 		from += 2;
@@ -886,7 +891,7 @@ static int get_sentence_buf(struct vc_data *vc, int read_punc)
 	sentmarks[bn][0] = &sentbuf[bn][0];
 	i = 0;
 	spk_old_attr = spk_attr;
-	spk_attr = get_attributes((u_short *) start);
+	spk_attr = get_attributes(vc, (u_short *)start);
 
 	while (start < end) {
 		sentbuf[bn][i] = (char)get_char(vc, (u_short *) start, &tmp);
@@ -1585,7 +1590,7 @@ static int count_highlight_color(struct vc_data *vc)
 		u16 *ptr;
 
 		for (ptr = start; ptr < end; ptr++) {
-			ch = get_attributes(ptr);
+			ch = get_attributes(vc, ptr);
 			bg = (ch & 0x70) >> 4;
 			speakup_console[vc_num]->ht.bgcount[bg]++;
 		}
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 4462d167900c..12210dab9ab1 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -4250,6 +4250,7 @@ unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed)
 {
 	return screenpos(vc, 2 * w_offset, viewed);
 }
+EXPORT_SYMBOL_GPL(screen_pos);
 
 void getconsxy(struct vc_data *vc, unsigned char *p)
 {

From e01d8718de4170373cd7fbf5cf6f9cb61cebb1e9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 27 Jan 2016 23:24:29 +0100
Subject: [PATCH 237/262] perf/x86: Fix uninitialized value usage

When calling intel_alt_er() with .idx != EXTRA_REG_RSP_* we will not
initialize alt_idx and then use this uninitialized value to index an
array.

When that is not fatal, it can result in an infinite loop in its
caller __intel_shared_reg_get_constraints(), with IRQs disabled.

Alternative error modes are random memory corruption due to the
cpuc->shared_regs->regs[] array overrun, which manifest in either
get_constraints or put_constraints doing weird stuff.

Only took 6 hours of painful debugging to find this. Neither GCC nor
Smatch warnings flagged this bug.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: ae3f011fc251 ("perf/x86/intel: Fix SLM MSR_OFFCORE_RSP1 valid_mask")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a667078a5180..4264a084e59b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1960,7 +1960,8 @@ intel_bts_constraints(struct perf_event *event)
 
 static int intel_alt_er(int idx, u64 config)
 {
-	int alt_idx;
+	int alt_idx = idx;
+
 	if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
 		return idx;
 

From 8f04b8536f0c94f8999b65cd1c6c7523116a00ae Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 27 Jan 2016 23:31:09 +0100
Subject: [PATCH 238/262] perf/x86: De-obfuscate code

Get rid of the 'onln' obfuscation.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 4264a084e59b..fed2ab1f1065 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2898,14 +2898,12 @@ static void intel_pmu_cpu_starting(int cpu)
 		return;
 
 	if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
-		void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
-
 		for_each_cpu(i, topology_sibling_cpumask(cpu)) {
 			struct intel_shared_regs *pc;
 
 			pc = per_cpu(cpu_hw_events, i).shared_regs;
 			if (pc && pc->core_id == core_id) {
-				*onln = cpuc->shared_regs;
+				cpuc->kfree_on_online[0] = cpuc->shared_regs;
 				cpuc->shared_regs = pc;
 				break;
 			}

From 828b6f0e26170938d617e99a17177453be4d77a3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 27 Jan 2016 21:59:04 +0100
Subject: [PATCH 239/262] perf: Fix NULL deref

Dan reported:

  1229                  if (ctx->task == TASK_TOMBSTONE ||
  1230                      !atomic_inc_not_zero(&ctx->refcount)) {
  1231                          raw_spin_unlock(&ctx->lock);
  1232                          ctx = NULL;
                                ^^^^^^^^^^
ctx is NULL.

  1233                  }
  1234
  1235                  WARN_ON_ONCE(ctx->task != task);
                                     ^^^^^^^^^^^^^^^^^
The patch adds a NULL dereference.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: 63b6da39bb38 ("perf: Fix perf_event_exit_task() race")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1d243fadfd12..fe97f95f204e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1230,9 +1230,9 @@ retry:
 		    !atomic_inc_not_zero(&ctx->refcount)) {
 			raw_spin_unlock(&ctx->lock);
 			ctx = NULL;
+		} else {
+			WARN_ON_ONCE(ctx->task != task);
 		}
-
-		WARN_ON_ONCE(ctx->task != task);
 	}
 	rcu_read_unlock();
 	if (!ctx)

From e03e7ee34fdd1c3ef494949a75cb8c61c7265fa9 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Date: Mon, 25 Jan 2016 20:59:49 -0800
Subject: [PATCH 240/262] perf/bpf: Convert perf_event_array to use struct file

Robustify refcounting.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/20160126045947.GA40151@ast-mbp.thefacebook.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h |  4 ++--
 kernel/bpf/arraymap.c      | 21 +++++++++++----------
 kernel/events/core.c       | 21 ++++++++++-----------
 kernel/trace/bpf_trace.c   | 14 ++++++++++----
 4 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6612732d8fd0..4f90434b8d64 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -729,7 +729,7 @@ extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
-extern struct perf_event *perf_event_get(unsigned int fd);
+extern struct file *perf_event_get(unsigned int fd);
 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
@@ -1070,7 +1070,7 @@ static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
 static inline void perf_event_delayed_put(struct task_struct *task)	{ }
-static inline struct perf_event *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
+static inline struct file *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 {
 	return ERR_PTR(-EINVAL);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b0799bced518..89ebbc4d1164 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -291,10 +291,13 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 {
 	struct perf_event *event;
 	const struct perf_event_attr *attr;
+	struct file *file;
 
-	event = perf_event_get(fd);
-	if (IS_ERR(event))
-		return event;
+	file = perf_event_get(fd);
+	if (IS_ERR(file))
+		return file;
+
+	event = file->private_data;
 
 	attr = perf_event_attrs(event);
 	if (IS_ERR(attr))
@@ -304,24 +307,22 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 		goto err;
 
 	if (attr->type == PERF_TYPE_RAW)
-		return event;
+		return file;
 
 	if (attr->type == PERF_TYPE_HARDWARE)
-		return event;
+		return file;
 
 	if (attr->type == PERF_TYPE_SOFTWARE &&
 	    attr->config == PERF_COUNT_SW_BPF_OUTPUT)
-		return event;
+		return file;
 err:
-	perf_event_release_kernel(event);
+	fput(file);
 	return ERR_PTR(-EINVAL);
 }
 
 static void perf_event_fd_array_put_ptr(void *ptr)
 {
-	struct perf_event *event = ptr;
-
-	perf_event_release_kernel(event);
+	fput((struct file *)ptr);
 }
 
 static const struct bpf_map_ops perf_event_array_ops = {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fe97f95f204e..eb44730afea5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8916,21 +8916,20 @@ void perf_event_delayed_put(struct task_struct *task)
 		WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
 }
 
-struct perf_event *perf_event_get(unsigned int fd)
+struct file *perf_event_get(unsigned int fd)
 {
-	int err;
-	struct fd f;
-	struct perf_event *event;
+	struct file *file;
 
-	err = perf_fget_light(fd, &f);
-	if (err)
-		return ERR_PTR(err);
+	file = fget_raw(fd);
+	if (!file)
+		return ERR_PTR(-EBADF);
 
-	event = f.file->private_data;
-	atomic_long_inc(&event->refcount);
-	fdput(f);
+	if (file->f_op != &perf_fops) {
+		fput(file);
+		return ERR_PTR(-EBADF);
+	}
 
-	return event;
+	return file;
 }
 
 const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 45dd798bcd37..326a75e884db 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -191,14 +191,17 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
 	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	struct perf_event *event;
+	struct file *file;
 
 	if (unlikely(index >= array->map.max_entries))
 		return -E2BIG;
 
-	event = (struct perf_event *)array->ptrs[index];
-	if (!event)
+	file = (struct file *)array->ptrs[index];
+	if (unlikely(!file))
 		return -ENOENT;
 
+	event = file->private_data;
+
 	/* make sure event is local and doesn't have pmu::count */
 	if (event->oncpu != smp_processor_id() ||
 	    event->pmu->count)
@@ -228,6 +231,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
 	void *data = (void *) (long) r4;
 	struct perf_sample_data sample_data;
 	struct perf_event *event;
+	struct file *file;
 	struct perf_raw_record raw = {
 		.size = size,
 		.data = data,
@@ -236,10 +240,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
 	if (unlikely(index >= array->map.max_entries))
 		return -E2BIG;
 
-	event = (struct perf_event *)array->ptrs[index];
-	if (unlikely(!event))
+	file = (struct file *)array->ptrs[index];
+	if (unlikely(!file))
 		return -ENOENT;
 
+	event = file->private_data;
+
 	if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
 		     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
 		return -EINVAL;

From a0733e695b83a9c31f779e41dcaec8ef924716b5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 26 Jan 2016 12:14:40 +0100
Subject: [PATCH 241/262] perf: Remove __free_event()

There is but a single caller, remove the function - we already have
_free_event(), the extra indirection is nonsensical..

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 45 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 25 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index eb44730afea5..024adf0e34eb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3590,7 +3590,7 @@ static void unaccount_event(struct perf_event *event)
  *  3) two matching events on the same context.
  *
  * The former two cases are handled in the allocation path (perf_event_alloc(),
- * __free_event()), the latter -- before the first perf_install_in_context().
+ * _free_event()), the latter -- before the first perf_install_in_context().
  */
 static int exclusive_event_init(struct perf_event *event)
 {
@@ -3665,29 +3665,6 @@ static bool exclusive_event_installable(struct perf_event *event,
 	return true;
 }
 
-static void __free_event(struct perf_event *event)
-{
-	if (!event->parent) {
-		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
-			put_callchain_buffers();
-	}
-
-	perf_event_free_bpf_prog(event);
-
-	if (event->destroy)
-		event->destroy(event);
-
-	if (event->ctx)
-		put_ctx(event->ctx);
-
-	if (event->pmu) {
-		exclusive_event_destroy(event);
-		module_put(event->pmu->module);
-	}
-
-	call_rcu(&event->rcu_head, free_event_rcu);
-}
-
 static void _free_event(struct perf_event *event)
 {
 	irq_work_sync(&event->pending);
@@ -3709,7 +3686,25 @@ static void _free_event(struct perf_event *event)
 	if (is_cgroup_event(event))
 		perf_detach_cgroup(event);
 
-	__free_event(event);
+	if (!event->parent) {
+		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+			put_callchain_buffers();
+	}
+
+	perf_event_free_bpf_prog(event);
+
+	if (event->destroy)
+		event->destroy(event);
+
+	if (event->ctx)
+		put_ctx(event->ctx);
+
+	if (event->pmu) {
+		exclusive_event_destroy(event);
+		module_put(event->pmu->module);
+	}
+
+	call_rcu(&event->rcu_head, free_event_rcu);
 }
 
 /*

From 07c4a776135ea6d808ea19aabeb51de6b8648402 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 26 Jan 2016 12:15:37 +0100
Subject: [PATCH 242/262] perf: Update locking order

Update the locking order to note that ctx::lock nests inside of
child_mutex, as per:

  perf_ioctl():                ctx::mutex
  -> perf_event_for_each():    event::child_mutex
    -> _perf_event_enable():   ctx::lock

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 024adf0e34eb..d345964c2bd6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1086,8 +1086,8 @@ static void put_ctx(struct perf_event_context *ctx)
  * Lock order:
  *	task_struct::perf_event_mutex
  *	  perf_event_context::mutex
- *	    perf_event_context::lock
  *	    perf_event::child_mutex;
+ *	      perf_event_context::lock
  *	    perf_event::mmap_mutex
  *	    mmap_sem
  */

From 6e801e016917989ab8a7ddfc4229a15a5621622a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 26 Jan 2016 12:17:08 +0100
Subject: [PATCH 243/262] perf: Fix STATE_EXIT usage

We should never attempt to enable a STATE_EXIT event.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index d345964c2bd6..d84374fa44e5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2253,7 +2253,8 @@ static void __perf_event_enable(struct perf_event *event,
 	struct perf_event *leader = event->group_leader;
 	struct perf_event_context *task_ctx;
 
-	if (event->state >= PERF_EVENT_STATE_INACTIVE)
+	if (event->state >= PERF_EVENT_STATE_INACTIVE ||
+	    event->state <= PERF_EVENT_STATE_ERROR)
 		return;
 
 	update_context_time(ctx);
@@ -2298,7 +2299,8 @@ static void _perf_event_enable(struct perf_event *event)
 	struct perf_event_context *ctx = event->ctx;
 
 	raw_spin_lock_irq(&ctx->lock);
-	if (event->state >= PERF_EVENT_STATE_INACTIVE) {
+	if (event->state >= PERF_EVENT_STATE_INACTIVE ||
+	    event->state <  PERF_EVENT_STATE_ERROR) {
 		raw_spin_unlock_irq(&ctx->lock);
 		return;
 	}

From f47c02c0c8403963fbb8c3484e285727305d0f73 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 26 Jan 2016 12:30:14 +0100
Subject: [PATCH 244/262] perf: Robustify event->owner usage and SMP ordering

Use smp_store_release() to clear event->owner and
lockless_dereference() to observe it. Further use READ_ONCE() for all
lockless reads.

This changes perf_remove_from_owner() to leave event->owner cleared.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index d84374fa44e5..5f055de90c6d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -152,7 +152,7 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
 
 static bool is_kernel_event(struct perf_event *event)
 {
-	return event->owner == TASK_TOMBSTONE;
+	return READ_ONCE(event->owner) == TASK_TOMBSTONE;
 }
 
 /*
@@ -1651,7 +1651,7 @@ out:
  */
 static bool is_orphaned_event(struct perf_event *event)
 {
-	return event && !is_kernel_event(event) && !event->owner;
+	return event && !is_kernel_event(event) && !READ_ONCE(event->owner);
 }
 
 /*
@@ -3733,14 +3733,13 @@ static void perf_remove_from_owner(struct perf_event *event)
 	struct task_struct *owner;
 
 	rcu_read_lock();
-	owner = ACCESS_ONCE(event->owner);
 	/*
-	 * Matches the smp_wmb() in perf_event_exit_task(). If we observe
-	 * !owner it means the list deletion is complete and we can indeed
-	 * free this event, otherwise we need to serialize on
+	 * Matches the smp_store_release() in perf_event_exit_task(). If we
+	 * observe !owner it means the list deletion is complete and we can
+	 * indeed free this event, otherwise we need to serialize on
 	 * owner->perf_event_mutex.
 	 */
-	smp_read_barrier_depends();
+	owner = lockless_dereference(event->owner);
 	if (owner) {
 		/*
 		 * Since delayed_put_task_struct() also drops the last
@@ -3768,8 +3767,10 @@ static void perf_remove_from_owner(struct perf_event *event)
 		 * ensured they're done, and we can proceed with freeing the
 		 * event.
 		 */
-		if (event->owner)
+		if (event->owner) {
 			list_del_init(&event->owner_entry);
+			smp_store_release(&event->owner, NULL);
+		}
 		mutex_unlock(&owner->perf_event_mutex);
 		put_task_struct(owner);
 	}
@@ -8829,8 +8830,7 @@ void perf_event_exit_task(struct task_struct *child)
 		 * the owner, closes a race against perf_release() where
 		 * we need to serialize on the owner->perf_event_mutex.
 		 */
-		smp_wmb();
-		event->owner = NULL;
+		smp_store_release(&event->owner, NULL);
 	}
 	mutex_unlock(&child->perf_event_mutex);
 

From 8ba289b8d4e4dbd1f971fbf0d2085e4776a4ba25 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 26 Jan 2016 13:06:56 +0100
Subject: [PATCH 245/262] perf: Clean up sync_child_event()

sync_child_event() has outgrown its purpose, it does far too much.
Bring it back to its named purpose.

Rename __perf_event_exit_task() to perf_event_exit_event() to better
reflect what it does and move the event->state assignment under the
ctx->lock, like state changes ought to be.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 81 +++++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 42 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5f055de90c6d..8c3d95195f05 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1041,9 +1041,8 @@ static void put_ctx(struct perf_event_context *ctx)
  * perf_event_context::mutex nests and those are:
  *
  *  - perf_event_exit_task_context()	[ child , 0 ]
- *      __perf_event_exit_task()
- *        sync_child_event()
- *          put_event()			[ parent, 1 ]
+ *      perf_event_exit_event()
+ *        put_event()			[ parent, 1 ]
  *
  *  - perf_event_init_context()		[ parent, 0 ]
  *      inherit_task_group()
@@ -1846,7 +1845,8 @@ static void __perf_event_disable(struct perf_event *event,
  * remains valid.  This condition is satisifed when called through
  * perf_event_for_each_child or perf_event_for_each because they
  * hold the top-level event's child_mutex, so any descendant that
- * goes to exit will block in sync_child_event.
+ * goes to exit will block in perf_event_exit_event().
+ *
  * When called from perf_pending_event it's OK because event->ctx
  * is the current context on this CPU and preemption is disabled,
  * hence we can't get into perf_event_task_sched_out for this context.
@@ -4086,7 +4086,7 @@ static void _perf_event_reset(struct perf_event *event)
 /*
  * Holding the top-level event's child_mutex means that any
  * descendant process that has inherited this event will block
- * in sync_child_event if it goes to exit, thus satisfying the
+ * in perf_event_exit_event() if it goes to exit, thus satisfying the
  * task existence requirements of perf_event_enable/disable.
  */
 static void perf_event_for_each_child(struct perf_event *event,
@@ -8681,33 +8681,15 @@ static void sync_child_event(struct perf_event *child_event,
 		     &parent_event->child_total_time_enabled);
 	atomic64_add(child_event->total_time_running,
 		     &parent_event->child_total_time_running);
-
-	/*
-	 * Remove this event from the parent's list
-	 */
-	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-	mutex_lock(&parent_event->child_mutex);
-	list_del_init(&child_event->child_list);
-	mutex_unlock(&parent_event->child_mutex);
-
-	/*
-	 * Make sure user/parent get notified, that we just
-	 * lost one event.
-	 */
-	perf_event_wakeup(parent_event);
-
-	/*
-	 * Release the parent event, if this was the last
-	 * reference to it.
-	 */
-	put_event(parent_event);
 }
 
 static void
-__perf_event_exit_task(struct perf_event *child_event,
-			 struct perf_event_context *child_ctx,
-			 struct task_struct *child)
+perf_event_exit_event(struct perf_event *child_event,
+		      struct perf_event_context *child_ctx,
+		      struct task_struct *child)
 {
+	struct perf_event *parent_event = child_event->parent;
+
 	/*
 	 * Do not destroy the 'original' grouping; because of the context
 	 * switch optimization the original events could've ended up in a
@@ -8723,23 +8705,39 @@ __perf_event_exit_task(struct perf_event *child_event,
 	raw_spin_lock_irq(&child_ctx->lock);
 	WARN_ON_ONCE(child_ctx->is_active);
 
-	if (!!child_event->parent)
+	if (parent_event)
 		perf_group_detach(child_event);
 	list_del_event(child_event, child_ctx);
+	child_event->state = PERF_EVENT_STATE_EXIT;
 	raw_spin_unlock_irq(&child_ctx->lock);
 
 	/*
-	 * It can happen that the parent exits first, and has events
-	 * that are still around due to the child reference. These
-	 * events need to be zapped.
+	 * Parent events are governed by their filedesc, retain them.
 	 */
-	if (child_event->parent) {
-		sync_child_event(child_event, child);
-		free_event(child_event);
-	} else {
-		child_event->state = PERF_EVENT_STATE_EXIT;
+	if (!parent_event) {
 		perf_event_wakeup(child_event);
+		return;
 	}
+	/*
+	 * Child events can be cleaned up.
+	 */
+
+	sync_child_event(child_event, child);
+
+	/*
+	 * Remove this event from the parent's list
+	 */
+	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+	mutex_lock(&parent_event->child_mutex);
+	list_del_init(&child_event->child_list);
+	mutex_unlock(&parent_event->child_mutex);
+
+	/*
+	 * Kick perf_poll() for is_event_hup().
+	 */
+	perf_event_wakeup(parent_event);
+	free_event(child_event);
+	put_event(parent_event);
 }
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
@@ -8765,10 +8763,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 *
 	 * We can recurse on the same lock type through:
 	 *
-	 *   __perf_event_exit_task()
-	 *     sync_child_event()
-	 *       put_event()
-	 *         mutex_lock(&ctx->mutex)
+	 *   perf_event_exit_event()
+	 *     put_event()
+	 *       mutex_lock(&ctx->mutex)
 	 *
 	 * But since its the parent context it won't be the same instance.
 	 */
@@ -8805,7 +8802,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	perf_event_task(child, child_ctx, 0);
 
 	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
-		__perf_event_exit_task(child_event, child_ctx, child);
+		perf_event_exit_event(child_event, child_ctx, child);
 
 	mutex_unlock(&child_ctx->mutex);
 

From 45a0e07abf4933490a2d2f81b1a31fe267bd3561 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 26 Jan 2016 13:09:48 +0100
Subject: [PATCH 246/262] perf: Add flags argument to
 perf_remove_from_context()

In preparation to adding more options, convert the boolean argument
into a flags word.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8c3d95195f05..4291a4d27664 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1770,6 +1770,8 @@ group_sched_out(struct perf_event *group_event,
 		cpuctx->exclusive = 0;
 }
 
+#define DETACH_GROUP	0x01UL
+
 /*
  * Cross CPU call to remove a performance event
  *
@@ -1782,10 +1784,10 @@ __perf_remove_from_context(struct perf_event *event,
 			   struct perf_event_context *ctx,
 			   void *info)
 {
-	bool detach_group = (unsigned long)info;
+	unsigned long flags = (unsigned long)info;
 
 	event_sched_out(event, cpuctx, ctx);
-	if (detach_group)
+	if (flags & DETACH_GROUP)
 		perf_group_detach(event);
 	list_del_event(event, ctx);
 
@@ -1808,12 +1810,11 @@ __perf_remove_from_context(struct perf_event *event,
  * When called from perf_event_exit_task, it's OK because the
  * context has been detached from its task.
  */
-static void perf_remove_from_context(struct perf_event *event, bool detach_group)
+static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
 {
 	lockdep_assert_held(&event->ctx->mutex);
 
-	event_function_call(event, __perf_remove_from_context,
-			    (void *)(unsigned long)detach_group);
+	event_function_call(event, __perf_remove_from_context, (void *)flags);
 }
 
 /*
@@ -3800,7 +3801,7 @@ static void put_event(struct perf_event *event)
 	 */
 	ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
 	WARN_ON_ONCE(ctx->parent_ctx);
-	perf_remove_from_context(event, true);
+	perf_remove_from_context(event, DETACH_GROUP);
 	perf_event_ctx_unlock(event, ctx);
 
 	_free_event(event);
@@ -3840,7 +3841,7 @@ static void orphans_remove_work(struct work_struct *work)
 		if (!is_orphaned_child(event))
 			continue;
 
-		perf_remove_from_context(event, true);
+		perf_remove_from_context(event, DETACH_GROUP);
 
 		mutex_lock(&parent_event->child_mutex);
 		list_del_init(&event->child_list);
@@ -8430,11 +8431,11 @@ SYSCALL_DEFINE5(perf_event_open,
 		 * See perf_event_ctx_lock() for comments on the details
 		 * of swizzling perf_event::ctx.
 		 */
-		perf_remove_from_context(group_leader, false);
+		perf_remove_from_context(group_leader, 0);
 
 		list_for_each_entry(sibling, &group_leader->sibling_list,
 				    group_entry) {
-			perf_remove_from_context(sibling, false);
+			perf_remove_from_context(sibling, 0);
 			put_ctx(gctx);
 		}
 
@@ -8614,7 +8615,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
 	mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
 	list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
 				 event_entry) {
-		perf_remove_from_context(event, false);
+		perf_remove_from_context(event, 0);
 		unaccount_event_cpu(event, src_cpu);
 		put_ctx(src_ctx);
 		list_add(&event->migrate_entry, &events);
@@ -9240,7 +9241,7 @@ static void __perf_event_exit_context(void *__info)
 
 	raw_spin_lock(&ctx->lock);
 	list_for_each_entry(event, &ctx->event_list, event_entry)
-		__perf_remove_from_context(event, cpuctx, ctx, (void *)(unsigned long)true);
+		__perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
 	raw_spin_unlock(&ctx->lock);
 }
 

From 60beda849343494b2a598b927630bbe293c1cc6e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 26 Jan 2016 14:55:02 +0100
Subject: [PATCH 247/262] perf: Untangle 'owner' confusion

There are two concepts of owner wrt an event and they are conflated:

 - event::owner / event::owner_list,
   used by prctl(.option = PR_TASK_PERF_EVENTS_{EN,DIS}ABLE).

 - the 'owner' of the event object, typically the file descriptor.

Currently these two concepts are conflated, which gives trouble with
scm_rights passing of file descriptors. Passing the event and then
closing the creating task would render the event 'orphan' and would
have it cleared out. Unlikely what is expectd.

This patch untangles these two concepts by using PERF_EVENT_STATE_EXIT
to denote the second type.

Reported-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4291a4d27664..e549cf2accdd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1650,7 +1650,7 @@ out:
  */
 static bool is_orphaned_event(struct perf_event *event)
 {
-	return event && !is_kernel_event(event) && !READ_ONCE(event->owner);
+	return event && event->state == PERF_EVENT_STATE_EXIT;
 }
 
 /*
@@ -1771,6 +1771,7 @@ group_sched_out(struct perf_event *group_event,
 }
 
 #define DETACH_GROUP	0x01UL
+#define DETACH_STATE	0x02UL
 
 /*
  * Cross CPU call to remove a performance event
@@ -1790,6 +1791,8 @@ __perf_remove_from_context(struct perf_event *event,
 	if (flags & DETACH_GROUP)
 		perf_group_detach(event);
 	list_del_event(event, ctx);
+	if (flags & DETACH_STATE)
+		event->state = PERF_EVENT_STATE_EXIT;
 
 	if (!ctx->nr_events && ctx->is_active) {
 		ctx->is_active = 0;
@@ -3801,9 +3804,16 @@ static void put_event(struct perf_event *event)
 	 */
 	ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
 	WARN_ON_ONCE(ctx->parent_ctx);
-	perf_remove_from_context(event, DETACH_GROUP);
+	perf_remove_from_context(event, DETACH_GROUP | DETACH_STATE);
 	perf_event_ctx_unlock(event, ctx);
 
+	/*
+	 * At this point we must have event->state == PERF_EVENT_STATE_EXIT,
+	 * either from the above perf_remove_from_context() or through
+	 * perf_event_exit_event().
+	 */
+	WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT);
+
 	_free_event(event);
 }
 

From c6e5b73242d2d9172ea880483bc4ba7ffca0cfb2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 15 Jan 2016 16:07:41 +0200
Subject: [PATCH 248/262] perf: Synchronously clean up child events

The orphan cleanup workqueue doesn't always catch orphans, for example,
if they never schedule after they are orphaned. IOW, the event leak is
still very real. It also wouldn't work for kernel counters.

Doing it synchonously is a little hairy due to lock inversion issues,
but is made to work.

Patch based on work by Alexander Shishkin.

Suggested-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: vince@deater.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h |   3 -
 kernel/events/core.c       | 174 ++++++++++++++++++-------------------
 2 files changed, 84 insertions(+), 93 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4f90434b8d64..b35a61a481fa 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -634,9 +634,6 @@ struct perf_event_context {
 	int				nr_cgroups;	 /* cgroup evts */
 	void				*task_ctx_data; /* pmu specific data */
 	struct rcu_head			rcu_head;
-
-	struct delayed_work		orphans_remove;
-	bool				orphans_remove_sched;
 };
 
 /*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e549cf2accdd..98c862aff8fa 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -49,8 +49,6 @@
 
 #include <asm/irq_regs.h>
 
-static struct workqueue_struct *perf_wq;
-
 typedef int (*remote_function_f)(void *);
 
 struct remote_function_call {
@@ -1645,45 +1643,11 @@ out:
 		perf_event__header_size(tmp);
 }
 
-/*
- * User event without the task.
- */
 static bool is_orphaned_event(struct perf_event *event)
 {
-	return event && event->state == PERF_EVENT_STATE_EXIT;
+	return event->state == PERF_EVENT_STATE_EXIT;
 }
 
-/*
- * Event has a parent but parent's task finished and it's
- * alive only because of children holding refference.
- */
-static bool is_orphaned_child(struct perf_event *event)
-{
-	return is_orphaned_event(event->parent);
-}
-
-static void orphans_remove_work(struct work_struct *work);
-
-static void schedule_orphans_remove(struct perf_event_context *ctx)
-{
-	if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
-		return;
-
-	if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
-		get_ctx(ctx);
-		ctx->orphans_remove_sched = true;
-	}
-}
-
-static int __init perf_workqueue_init(void)
-{
-	perf_wq = create_singlethread_workqueue("perf");
-	WARN(!perf_wq, "failed to create perf workqueue\n");
-	return perf_wq ? 0 : -1;
-}
-
-core_initcall(perf_workqueue_init);
-
 static inline int pmu_filter_match(struct perf_event *event)
 {
 	struct pmu *pmu = event->pmu;
@@ -1744,9 +1708,6 @@ event_sched_out(struct perf_event *event,
 	if (event->attr.exclusive || !cpuctx->active_oncpu)
 		cpuctx->exclusive = 0;
 
-	if (is_orphaned_child(event))
-		schedule_orphans_remove(ctx);
-
 	perf_pmu_enable(event->pmu);
 }
 
@@ -1984,9 +1945,6 @@ event_sched_in(struct perf_event *event,
 	if (event->attr.exclusive)
 		cpuctx->exclusive = 1;
 
-	if (is_orphaned_child(event))
-		schedule_orphans_remove(ctx);
-
 out:
 	perf_pmu_enable(event->pmu);
 
@@ -3369,7 +3327,6 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
 	INIT_LIST_HEAD(&ctx->flexible_groups);
 	INIT_LIST_HEAD(&ctx->event_list);
 	atomic_set(&ctx->refcount, 1);
-	INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
 }
 
 static struct perf_event_context *
@@ -3782,11 +3739,22 @@ static void perf_remove_from_owner(struct perf_event *event)
 
 static void put_event(struct perf_event *event)
 {
-	struct perf_event_context *ctx;
-
 	if (!atomic_long_dec_and_test(&event->refcount))
 		return;
 
+	_free_event(event);
+}
+
+/*
+ * Kill an event dead; while event:refcount will preserve the event
+ * object, it will not preserve its functionality. Once the last 'user'
+ * gives up the object, we'll destroy the thing.
+ */
+int perf_event_release_kernel(struct perf_event *event)
+{
+	struct perf_event_context *ctx;
+	struct perf_event *child, *tmp;
+
 	if (!is_kernel_event(event))
 		perf_remove_from_owner(event);
 
@@ -3811,14 +3779,70 @@ static void put_event(struct perf_event *event)
 	 * At this point we must have event->state == PERF_EVENT_STATE_EXIT,
 	 * either from the above perf_remove_from_context() or through
 	 * perf_event_exit_event().
+	 *
+	 * Therefore, anybody acquiring event->child_mutex after the below
+	 * loop _must_ also see this, most importantly inherit_event() which
+	 * will avoid placing more children on the list.
+	 *
+	 * Thus this guarantees that we will in fact observe and kill _ALL_
+	 * child events.
 	 */
 	WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT);
 
-	_free_event(event);
-}
+again:
+	mutex_lock(&event->child_mutex);
+	list_for_each_entry(child, &event->child_list, child_list) {
 
-int perf_event_release_kernel(struct perf_event *event)
-{
+		/*
+		 * Cannot change, child events are not migrated, see the
+		 * comment with perf_event_ctx_lock_nested().
+		 */
+		ctx = lockless_dereference(child->ctx);
+		/*
+		 * Since child_mutex nests inside ctx::mutex, we must jump
+		 * through hoops. We start by grabbing a reference on the ctx.
+		 *
+		 * Since the event cannot get freed while we hold the
+		 * child_mutex, the context must also exist and have a !0
+		 * reference count.
+		 */
+		get_ctx(ctx);
+
+		/*
+		 * Now that we have a ctx ref, we can drop child_mutex, and
+		 * acquire ctx::mutex without fear of it going away. Then we
+		 * can re-acquire child_mutex.
+		 */
+		mutex_unlock(&event->child_mutex);
+		mutex_lock(&ctx->mutex);
+		mutex_lock(&event->child_mutex);
+
+		/*
+		 * Now that we hold ctx::mutex and child_mutex, revalidate our
+		 * state, if child is still the first entry, it didn't get freed
+		 * and we can continue doing so.
+		 */
+		tmp = list_first_entry_or_null(&event->child_list,
+					       struct perf_event, child_list);
+		if (tmp == child) {
+			perf_remove_from_context(child, DETACH_GROUP);
+			list_del(&child->child_list);
+			free_event(child);
+			/*
+			 * This matches the refcount bump in inherit_event();
+			 * this can't be the last reference.
+			 */
+			put_event(event);
+		}
+
+		mutex_unlock(&event->child_mutex);
+		mutex_unlock(&ctx->mutex);
+		put_ctx(ctx);
+		goto again;
+	}
+	mutex_unlock(&event->child_mutex);
+
+	/* Must be the last reference */
 	put_event(event);
 	return 0;
 }
@@ -3829,46 +3853,10 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
  */
 static int perf_release(struct inode *inode, struct file *file)
 {
-	put_event(file->private_data);
+	perf_event_release_kernel(file->private_data);
 	return 0;
 }
 
-/*
- * Remove all orphanes events from the context.
- */
-static void orphans_remove_work(struct work_struct *work)
-{
-	struct perf_event_context *ctx;
-	struct perf_event *event, *tmp;
-
-	ctx = container_of(work, struct perf_event_context,
-			   orphans_remove.work);
-
-	mutex_lock(&ctx->mutex);
-	list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
-		struct perf_event *parent_event = event->parent;
-
-		if (!is_orphaned_child(event))
-			continue;
-
-		perf_remove_from_context(event, DETACH_GROUP);
-
-		mutex_lock(&parent_event->child_mutex);
-		list_del_init(&event->child_list);
-		mutex_unlock(&parent_event->child_mutex);
-
-		free_event(event);
-		put_event(parent_event);
-	}
-
-	raw_spin_lock_irq(&ctx->lock);
-	ctx->orphans_remove_sched = false;
-	raw_spin_unlock_irq(&ctx->lock);
-	mutex_unlock(&ctx->mutex);
-
-	put_ctx(ctx);
-}
-
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
 	struct perf_event *child;
@@ -8719,7 +8707,7 @@ perf_event_exit_event(struct perf_event *child_event,
 	if (parent_event)
 		perf_group_detach(child_event);
 	list_del_event(child_event, child_ctx);
-	child_event->state = PERF_EVENT_STATE_EXIT;
+	child_event->state = PERF_EVENT_STATE_EXIT; /* see perf_event_release_kernel() */
 	raw_spin_unlock_irq(&child_ctx->lock);
 
 	/*
@@ -8977,8 +8965,16 @@ inherit_event(struct perf_event *parent_event,
 	if (IS_ERR(child_event))
 		return child_event;
 
+	/*
+	 * is_orphaned_event() and list_add_tail(&parent_event->child_list)
+	 * must be under the same lock in order to serialize against
+	 * perf_event_release_kernel(), such that either we must observe
+	 * is_orphaned_event() or they will observe us on the child_list.
+	 */
+	mutex_lock(&parent_event->child_mutex);
 	if (is_orphaned_event(parent_event) ||
 	    !atomic_long_inc_not_zero(&parent_event->refcount)) {
+		mutex_unlock(&parent_event->child_mutex);
 		free_event(child_event);
 		return NULL;
 	}
@@ -9026,8 +9022,6 @@ inherit_event(struct perf_event *parent_event,
 	/*
 	 * Link this into the parent event's child list
 	 */
-	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-	mutex_lock(&parent_event->child_mutex);
 	list_add_tail(&child_event->child_list, &parent_event->child_list);
 	mutex_unlock(&parent_event->child_mutex);
 

From 5fa7c8ec57f70a7b5c6fe269fa9c51b9e465989c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 26 Jan 2016 15:25:15 +0100
Subject: [PATCH 249/262] perf: Remove/simplify lockdep annotation

Now that the perf_event_ctx_lock_nested() call has moved from
put_event() into perf_event_release_kernel() the first reason is no
longer valid as that can no longer happen.

The second reason seems to have been invalidated when Al Viro made fput()
unconditionally async in the following commit:

  4a9d4b024a31 ("switch fput to task_work_add")

such that munmap()->fput()->release()->perf_release() would no longer happen.

Therefore, remove the annotation. This should increase the efficiency
of lockdep coverage of perf locking.

Suggested-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 98c862aff8fa..f1e53e8d4ae2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3758,19 +3758,7 @@ int perf_event_release_kernel(struct perf_event *event)
 	if (!is_kernel_event(event))
 		perf_remove_from_owner(event);
 
-	/*
-	 * There are two ways this annotation is useful:
-	 *
-	 *  1) there is a lock recursion from perf_event_exit_task
-	 *     see the comment there.
-	 *
-	 *  2) there is a lock-inversion with mmap_sem through
-	 *     perf_read_group(), which takes faults while
-	 *     holding ctx->mutex, however this is called after
-	 *     the last filedesc died, so there is no possibility
-	 *     to trigger the AB-BA case.
-	 */
-	ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
+	ctx = perf_event_ctx_lock(event);
 	WARN_ON_ONCE(ctx->parent_ctx);
 	perf_remove_from_context(event, DETACH_GROUP | DETACH_STATE);
 	perf_event_ctx_unlock(event, ctx);
@@ -8759,14 +8747,6 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 * perf_event_create_kernel_count() which does find_get_context()
 	 * without ctx::mutex (it cannot because of the move_group double mutex
 	 * lock thing). See the comments in perf_install_in_context().
-	 *
-	 * We can recurse on the same lock type through:
-	 *
-	 *   perf_event_exit_event()
-	 *     put_event()
-	 *       mutex_lock(&ctx->mutex)
-	 *
-	 * But since its the parent context it won't be the same instance.
 	 */
 	mutex_lock(&child_ctx->mutex);
 

From 52795f6fdeb8a2b98373108ac2838c674bb2cbc4 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Wed, 27 Jan 2016 14:40:33 +0100
Subject: [PATCH 250/262] i2c: piix4: don't regress on bus names

The I2C bus names are supposed to be stable as they can be used by
userspace to uniquely identify a specific I2C bus. So restore the
original names for all legacy (pre-SB800) devices.

For SB800 devices and later, improve the names. "SDA" refers to the
serial data pin of each SMBus port, it's an implementation detail the
user doesn't need to know. Use "port" instead, which is easier to
understand.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Christian Fetzer <fetzer.ch@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-piix4.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index f79a84ef1aa4..93f2895383ee 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -139,9 +139,9 @@ static const struct dmi_system_id piix4_dmi_ibm[] = {
 /* SB800 globals */
 static DEFINE_MUTEX(piix4_mutex_sb800);
 static const char *piix4_main_port_names_sb800[PIIX4_MAX_ADAPTERS] = {
-	"SDA0", "SDA2", "SDA3", "SDA4"
+	" port 0", " port 2", " port 3", " port 4"
 };
-static const char *piix4_aux_port_name_sb800 = "SDA1";
+static const char *piix4_aux_port_name_sb800 = " port 1";
 
 struct i2c_piix4_adapdata {
 	unsigned short smba;
@@ -660,7 +660,7 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 	adap->dev.parent = &dev->dev;
 
 	snprintf(adap->name, sizeof(adap->name),
-		"SMBus PIIX4 adapter %s at %04x", name, smba);
+		"SMBus PIIX4 adapter%s at %04x", name, smba);
 
 	i2c_set_adapdata(adap, adapdata);
 
@@ -712,11 +712,14 @@ error:
 static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	int retval;
+	bool is_sb800 = false;
 
 	if ((dev->vendor == PCI_VENDOR_ID_ATI &&
 	     dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS &&
 	     dev->revision >= 0x40) ||
 	    dev->vendor == PCI_VENDOR_ID_AMD) {
+		is_sb800 = true;
+
 		if (!request_region(SB800_PIIX4_SMB_IDX, 2, "smba_idx")) {
 			dev_err(&dev->dev,
 			"SMBus base address index region 0x%x already in use!\n",
@@ -746,7 +749,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
 			return retval;
 
 		/* Try to register main SMBus adapter, give up if we can't */
-		retval = piix4_add_adapter(dev, retval, false, 0, "main",
+		retval = piix4_add_adapter(dev, retval, false, 0, "",
 					   &piix4_main_adapters[0]);
 		if (retval < 0)
 			return retval;
@@ -774,7 +777,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		/* Try to add the aux adapter if it exists,
 		 * piix4_add_adapter will clean up if this fails */
 		piix4_add_adapter(dev, retval, false, 0,
-				  piix4_aux_port_name_sb800,
+				  is_sb800 ? piix4_aux_port_name_sb800 : "",
 				  &piix4_aux_adapter);
 	}
 

From 8f6aff9858c45525345b92b2a88c2af776c64340 Mon Sep 17 00:00:00 2001
From: Lada Trimasova <Lada.Trimasova@synopsys.com>
Date: Wed, 27 Jan 2016 11:10:32 +0000
Subject: [PATCH 251/262] iommu/io-pgtable-arm: Fix io-pgtable-arm build
 failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trying to build a kernel for ARC with both options CONFIG_COMPILE_TEST
and CONFIG_IOMMU_IO_PGTABLE_LPAE enabled (e.g. as a result of "make
allyesconfig") results in the following build failure:

 | CC drivers/iommu/io-pgtable-arm.o
 | linux/drivers/iommu/io-pgtable-arm.c: In
 | function ‘__arm_lpae_alloc_pages’:
 | linux/drivers/iommu/io-pgtable-arm.c:221:3:
 | error: implicit declaration of function ‘dma_map_single’
 | [-Werror=implicit-function-declaration]
 | dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
 | ^
 | linux/drivers/iommu/io-pgtable-arm.c:221:42:
 | error: ‘DMA_TO_DEVICE’ undeclared (first use in this function)
 | dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
 | ^

Since IOMMU_IO_PGTABLE_LPAE depends on DMA API, io-pgtable-arm.c should
include linux/dma-mapping.h. This fixes the reported failure.

Cc: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Lada Trimasova <ltrimas@synopsys.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/io-pgtable-arm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 8bbcbfe7695c..381ca5a37a7b 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -25,6 +25,7 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/barrier.h>
 

From c5b635203032462603c503ecce91a7021c1ad44a Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <linux@rempel-privat.de>
Date: Fri, 29 Jan 2016 10:57:53 +0100
Subject: [PATCH 252/262] irqchip/mxs: Add missing set_handle_irq()

The rework of the driver missed to move the call to set_handle_irq() into
asm9260_of_init(). As a consequence no interrupt entry point is installed and
no interrupts are delivered

Solution is simple: Install the interrupt entry handler.

Fixes: 7e4ac676ee ("irqchip/mxs: Add Alphascale ASM9260 support")
Signed-off-by: Oleksij Rempel <linux@rempel-privat.de>
Cc: kernel@pengutronix.de
Cc: jason@lakedaemon.net
Cc: marc.zyngier@arm.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1454061473-24957-1-git-send-email-linux@rempel-privat.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-mxs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c
index c22e2d40cb30..efe50845939d 100644
--- a/drivers/irqchip/irq-mxs.c
+++ b/drivers/irqchip/irq-mxs.c
@@ -241,6 +241,7 @@ static int __init asm9260_of_init(struct device_node *np,
 		writel(0, icoll_priv.intr + i);
 
 	icoll_add_domain(np, ASM9260_NUM_IRQS);
+	set_handle_irq(icoll_handle_irq);
 
 	return 0;
 }

From da972fb13bc5a1baad450c11f9182e4cd0a091f6 Mon Sep 17 00:00:00 2001
From: Jeremy McNicoll <jmcnicol@redhat.com>
Date: Thu, 14 Jan 2016 21:33:06 -0800
Subject: [PATCH 253/262] iommu/vt-d: Don't skip PCI devices when disabling
 IOTLB

Fix a simple typo when disabling IOTLB on PCI(e) devices.

Fixes: b16d0cb9e2fc ("iommu/vt-d: Always enable PASID/PRI PCI capabilities before ATS")
Cc: stable@vger.kernel.org  # v4.4
Signed-off-by: Jeremy McNicoll <jmcnicol@redhat.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index ac7387686ddc..986a53e3eb96 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1489,7 +1489,7 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info)
 {
 	struct pci_dev *pdev;
 
-	if (dev_is_pci(info->dev))
+	if (!dev_is_pci(info->dev))
 		return;
 
 	pdev = to_pci_dev(info->dev);

From 9b1a12d29109234d2b9718d04d4d404b7da4e794 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Wed, 20 Jan 2016 22:01:19 +0800
Subject: [PATCH 254/262] iommu/amd: Correct the wrong setting of alias DTE in
 do_attach

In below commit alias DTE is set when its peripheral is
setting DTE. However there's a code bug here to wrongly
set the alias DTE, correct it in this patch.

commit e25bfb56ea7f046b71414e02f80f620deb5c6362
Author: Joerg Roedel <jroedel@suse.de>
Date:   Tue Oct 20 17:33:38 2015 +0200

    iommu/amd: Set alias DTE in do_attach/do_detach

Signed-off-by: Baoquan He <bhe@redhat.com>
Tested-by: Mark Hounschell <markh@compro.net>
Cc: stable@vger.kernel.org # v4.4
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 539b0dea8034..e5e223938eec 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2049,7 +2049,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
 	/* Update device table */
 	set_dte_entry(dev_data->devid, domain, ats);
 	if (alias != dev_data->devid)
-		set_dte_entry(dev_data->devid, domain, ats);
+		set_dte_entry(alias, domain, ats);
 
 	device_flush_dte(dev_data);
 }

From 6f3cdb380f429af47ad6635568c22ba28df83a48 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 26 Jan 2016 18:33:04 +0200
Subject: [PATCH 255/262] iommu/vt-d: Fix link to Intel IOMMU Specification

Looks like the VT-d spec at intel.com got moved.  Update the link.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 Documentation/Intel-IOMMU.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt
index 7b57fc087088..49585b6e1ea2 100644
--- a/Documentation/Intel-IOMMU.txt
+++ b/Documentation/Intel-IOMMU.txt
@@ -3,7 +3,7 @@ Linux IOMMU Support
 
 The architecture spec can be obtained from the below location.
 
-http://www.intel.com/technology/virtualization/
+http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
 
 This guide gives a quick cheat sheet for some basic understanding.
 

From 0d9bacb6c8265e7aa75ac28ae9b5d7748065942f Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Tue, 19 Jan 2016 14:28:48 +0900
Subject: [PATCH 256/262] iommu: Update struct iommu_ops comments

Update the comments around struct iommu_ops to match
current state and fix a few typos while at it.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f28dff313b07..a5c539fa5d2b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -133,8 +133,9 @@ struct iommu_dm_region {
 
 /**
  * struct iommu_ops - iommu ops and capabilities
- * @domain_init: init iommu domain
- * @domain_destroy: destroy iommu domain
+ * @capable: check capability
+ * @domain_alloc: allocate iommu domain
+ * @domain_free: free iommu domain
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
@@ -144,8 +145,15 @@ struct iommu_dm_region {
  * @iova_to_phys: translate iova to physical address
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
+ * @device_group: find iommu group for a particular device
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
+ * @get_dm_regions: Request list of direct mapping requirements for a device
+ * @put_dm_regions: Free list of direct mapping requirements for a device
+ * @domain_window_enable: Configure and enable a particular window for a domain
+ * @domain_window_disable: Disable a particular window for a domain
+ * @domain_set_windows: Set the number of windows for a domain
+ * @domain_get_windows: Return the number of windows for a domain
  * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of supported page sizes
  * @priv: per-instance data private to the iommu driver
@@ -182,9 +190,9 @@ struct iommu_ops {
 	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
 				    phys_addr_t paddr, u64 size, int prot);
 	void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
-	/* Set the numer of window per domain */
+	/* Set the number of windows per domain */
 	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
-	/* Get the numer of window per domain */
+	/* Get the number of windows per domain */
 	u32 (*domain_get_windows)(struct iommu_domain *domain);
 
 #ifdef CONFIG_OF_IOMMU

From 6639484ddaf6707b41082c9fa9ca9af342df6402 Mon Sep 17 00:00:00 2001
From: Libin Yang <libin.yang@linux.intel.com>
Date: Fri, 29 Jan 2016 20:39:09 +0800
Subject: [PATCH 257/262] ALSA: hda - disable dynamic clock gating on Broxton
 before reset

On Broxton, to make sure the reset controller works properly,
MISCBDCGE bit (bit 6) in CGCTL (0x48) of PCI configuration space
need be cleared before reset and set back to 1 after reset.
Otherwise, it may prevent the CORB/RIRB logic from being reset.

Signed-off-by: Libin Yang <libin.yang@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.4+
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 256e6cda218f..4045dca3d699 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -90,6 +90,8 @@ enum {
 #define NVIDIA_HDA_ENABLE_COHBIT      0x01
 
 /* Defines for Intel SCH HDA snoop control */
+#define INTEL_HDA_CGCTL	 0x48
+#define INTEL_HDA_CGCTL_MISCBDCGE        (0x1 << 6)
 #define INTEL_SCH_HDA_DEVC      0x78
 #define INTEL_SCH_HDA_DEVC_NOSNOOP       (0x1<<11)
 
@@ -534,10 +536,21 @@ static void hda_intel_init_chip(struct azx *chip, bool full_reset)
 {
 	struct hdac_bus *bus = azx_bus(chip);
 	struct pci_dev *pci = chip->pci;
+	u32 val;
 
 	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
 		snd_hdac_set_codec_wakeup(bus, true);
+	if (IS_BROXTON(pci)) {
+		pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+		val = val & ~INTEL_HDA_CGCTL_MISCBDCGE;
+		pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+	}
 	azx_init_chip(chip, full_reset);
+	if (IS_BROXTON(pci)) {
+		pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+		val = val | INTEL_HDA_CGCTL_MISCBDCGE;
+		pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+	}
 	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
 		snd_hdac_set_codec_wakeup(bus, false);
 

From 742563777e8da62197d6cb4b99f4027f59454735 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@codeblueprint.co.uk>
Date: Fri, 29 Jan 2016 11:36:10 +0000
Subject: [PATCH 258/262] x86/mm/pat: Avoid truncation when converting
 cpa->numpages to address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are a couple of nasty truncation bugs lurking in the pageattr
code that can be triggered when mapping EFI regions, e.g. when we pass
a cpa->pgd pointer. Because cpa->numpages is a 32-bit value, shifting
left by PAGE_SHIFT will truncate the resultant address to 32-bits.

Viorel-Cătălin managed to trigger this bug on his Dell machine that
provides a ~5GB EFI region which requires 1236992 pages to be mapped.
When calling populate_pud() the end of the region gets calculated
incorrectly in the following buggy expression,

  end = start + (cpa->numpages << PAGE_SHIFT);

And only 188416 pages are mapped. Next, populate_pud() gets invoked
for a second time because of the loop in __change_page_attr_set_clr(),
only this time no pages get mapped because shifting the remaining
number of pages (1048576) by PAGE_SHIFT is zero. At which point the
loop in __change_page_attr_set_clr() spins forever because we fail to
map progress.

Hitting this bug depends very much on the virtual address we pick to
map the large region at and how many pages we map on the initial run
through the loop. This explains why this issue was only recently hit
with the introduction of commit

  a5caa209ba9c ("x86/efi: Fix boot crash by mapping EFI memmap
   entries bottom-up at runtime, instead of top-down")

It's interesting to note that safe uses of cpa->numpages do exist in
the pageattr code. If instead of shifting ->numpages we multiply by
PAGE_SIZE, no truncation occurs because PAGE_SIZE is a UL value, and
so the result is unsigned long.

To avoid surprises when users try to convert very large cpa->numpages
values to addresses, change the data type from 'int' to 'unsigned
long', thereby making it suitable for shifting by PAGE_SHIFT without
any type casting.

The alternative would be to make liberal use of casting, but that is
far more likely to cause problems in the future when someone adds more
code and fails to cast properly; this bug was difficult enough to
track down in the first place.

Reported-and-tested-by: Viorel-Cătălin Răpițeanu <rapiteanu.catalin@gmail.com>
Acked-by: Borislav Petkov <bp@alien8.de>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=110131
Link: http://lkml.kernel.org/r/1454067370-10374-1-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index fc6a4c8f6e2a..2440814b0069 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -33,7 +33,7 @@ struct cpa_data {
 	pgd_t		*pgd;
 	pgprot_t	mask_set;
 	pgprot_t	mask_clr;
-	int		numpages;
+	unsigned long	numpages;
 	int		flags;
 	unsigned long	pfn;
 	unsigned	force_split : 1;
@@ -1350,7 +1350,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
 		 * CPA operation. Either a large page has been
 		 * preserved or a single page update happened.
 		 */
-		BUG_ON(cpa->numpages > numpages);
+		BUG_ON(cpa->numpages > numpages || !cpa->numpages);
 		numpages -= cpa->numpages;
 		if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
 			cpa->curpage++;

From 6c55d9b98335f7f6bd5f061866ff1633401f3a44 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Fri, 29 Jan 2016 16:49:47 +0200
Subject: [PATCH 259/262] serial: 8250_pci: Add Intel Broadwell ports

Some recent (early 2015) macbooks have Intel Broadwell where LPSS UARTs are
PCI enumerated instead of ACPI. The LPSS UART block is pretty much same as
used on Intel Baytrail so we can reuse the existing Baytrail setup code.

Add both Broadwell LPSS UART ports to the list of supported devices.

Signed-off-by: Leif Liddy <leif.liddy@gmail.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_pci.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 4097f3f65b3b..e71ec78fc11e 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1379,6 +1379,9 @@ ce4100_serial_setup(struct serial_private *priv,
 #define PCI_DEVICE_ID_INTEL_BSW_UART1	0x228a
 #define PCI_DEVICE_ID_INTEL_BSW_UART2	0x228c
 
+#define PCI_DEVICE_ID_INTEL_BDW_UART1	0x9ce3
+#define PCI_DEVICE_ID_INTEL_BDW_UART2	0x9ce4
+
 #define BYT_PRV_CLK			0x800
 #define BYT_PRV_CLK_EN			(1 << 0)
 #define BYT_PRV_CLK_M_VAL_SHIFT		1
@@ -1461,11 +1464,13 @@ byt_serial_setup(struct serial_private *priv,
 	switch (pdev->device) {
 	case PCI_DEVICE_ID_INTEL_BYT_UART1:
 	case PCI_DEVICE_ID_INTEL_BSW_UART1:
+	case PCI_DEVICE_ID_INTEL_BDW_UART1:
 		rx_param->src_id = 3;
 		tx_param->dst_id = 2;
 		break;
 	case PCI_DEVICE_ID_INTEL_BYT_UART2:
 	case PCI_DEVICE_ID_INTEL_BSW_UART2:
+	case PCI_DEVICE_ID_INTEL_BDW_UART2:
 		rx_param->src_id = 5;
 		tx_param->dst_id = 4;
 		break;
@@ -2062,6 +2067,20 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
 		.subdevice	= PCI_ANY_ID,
 		.setup		= byt_serial_setup,
 	},
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_BDW_UART1,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= byt_serial_setup,
+	},
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_BDW_UART2,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= byt_serial_setup,
+	},
 	/*
 	 * ITE
 	 */
@@ -5506,6 +5525,16 @@ static struct pci_device_id serial_pci_tbl[] = {
 		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
 		pbn_byt },
 
+	/* Intel Broadwell */
+	{	PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART1,
+		PCI_ANY_ID,  PCI_ANY_ID,
+		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+		pbn_byt },
+	{	PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART2,
+		PCI_ANY_ID,  PCI_ANY_ID,
+		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+		pbn_byt },
+
 	/*
 	 * Intel Quark x1000
 	 */

From e410e34fad913dd568ec28d2a9949694324c14db Mon Sep 17 00:00:00 2001
From: Chris Mason <clm@fb.com>
Date: Fri, 29 Jan 2016 08:19:37 -0800
Subject: [PATCH 260/262] Revert "btrfs: synchronize incompat feature bits with
 sysfs files"

This reverts commit 14e46e04958df740c6c6a94849f176159a333f13.

This ends up doing sysfs operations from deep in balance (where we
should be GFP_NOFS) and under heavy balance load, we're making races
against sysfs internals.

Revert it for now while we figure things out.

Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/free-space-tree.c | 7 -------
 fs/btrfs/ioctl.c           | 4 ----
 fs/btrfs/super.c           | 4 ----
 fs/btrfs/volumes.c         | 2 --
 4 files changed, 17 deletions(-)

diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 81a053d2966b..53dbeaf6ce94 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -23,7 +23,6 @@
 #include "locking.h"
 #include "free-space-tree.h"
 #include "transaction.h"
-#include "sysfs.h"
 
 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
 					struct btrfs_fs_info *fs_info,
@@ -1184,9 +1183,6 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
 	}
 
 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
-	btrfs_sysfs_feature_update(fs_info,
-		BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE, FEAT_COMPAT_RO);
-
 	fs_info->creating_free_space_tree = 0;
 
 	ret = btrfs_commit_transaction(trans, tree_root);
@@ -1255,9 +1251,6 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
 		return PTR_ERR(trans);
 
 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
-	btrfs_sysfs_feature_update(fs_info,
-		BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE, FEAT_COMPAT_RO);
-
 	fs_info->free_space_root = NULL;
 
 	ret = clear_free_space_tree(trans, free_space_root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1568f57a3376..83c9ad3f2621 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1459,8 +1459,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 
 	if (range->compress_type == BTRFS_COMPRESS_LZO) {
 		btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO);
-		btrfs_sysfs_feature_update(root->fs_info,
-			BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO, FEAT_INCOMPAT);
 	}
 
 	ret = defrag_count;
@@ -4069,8 +4067,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
 	btrfs_free_path(path);
 
 	btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL);
-	btrfs_sysfs_feature_update(root->fs_info,
-		BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL, FEAT_INCOMPAT);
 	btrfs_end_transaction(trans, root);
 out:
 	mnt_drop_write_file(file);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 49b424103c32..a8e049ae933d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -58,7 +58,6 @@
 #include "dev-replace.h"
 #include "free-space-cache.h"
 #include "backref.h"
-#include "sysfs.h"
 #include "tests/btrfs-tests.h"
 
 #include "qgroup.h"
@@ -486,9 +485,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 				btrfs_clear_opt(info->mount_opt, NODATACOW);
 				btrfs_clear_opt(info->mount_opt, NODATASUM);
 				btrfs_set_fs_incompat(info, COMPRESS_LZO);
-				btrfs_sysfs_feature_update(root->fs_info,
-					BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO,
-					FEAT_INCOMPAT);
 				no_compress = 0;
 			} else if (strncmp(args[0].from, "no", 2) == 0) {
 				compress_type = "no";
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5b505f61929e..366b335946fa 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4469,8 +4469,6 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
 		return;
 
 	btrfs_set_fs_incompat(info, RAID56);
-	btrfs_sysfs_feature_update(info, BTRFS_FEATURE_INCOMPAT_RAID56,
-		FEAT_INCOMPAT);
 }
 
 #define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r)		\

From 840d6fe7425ffb6a62d53b2759e01ae6daf90e4e Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 30 Jan 2016 10:04:17 +0800
Subject: [PATCH 261/262] pid: Fix spelling in comments

Accidentally discovered this typo when I studied this module.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tianhong Ding <dingtianhong@huawei.com>
Cc: Xinwei Hu <huxinwei@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1454119457-11272-1-git-send-email-thunder.leizhen@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/pid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/pid.c b/kernel/pid.c
index 78b3d9f80d44..e793d091f680 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -588,7 +588,7 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
-	/* Veryify no one has done anything silly */
+	/* Verify no one has done anything silly: */
 	BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
 
 	/* bump default and minimum pid_max based on number of cpus */

From 36f90b0a2ddd60823fe193a85e60ff1906c2a9b3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 31 Jan 2016 18:12:16 -0800
Subject: [PATCH 262/262] Linux 4.5-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index c65fe37c99e5..6c1a3c247988 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*