From 191f57c137bcce0e3e9313acb77b2f114d15afbb Mon Sep 17 00:00:00 2001
From: Qiaowei Ren <qiaowei.ren@intel.com>
Date: Sat, 7 Dec 2013 08:20:57 +0800
Subject: [PATCH 1/6] x86, cpufeature: Define the Intel MPX feature flag

Define the Intel MPX (Memory Protection Extensions) CPU feature flag
in the cpufeature list.

Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
Link: http://lkml.kernel.org/r/1386375658-2191-2-git-send-email-qiaowei.ren@intel.com
Signed-off-by: Xudong Hao <xudong.hao@intel.com>
Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/cpufeature.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 89270b4318db..e099f9502ace 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -216,6 +216,7 @@
 #define X86_FEATURE_ERMS	(9*32+ 9) /* Enhanced REP MOVSB/STOSB */
 #define X86_FEATURE_INVPCID	(9*32+10) /* Invalidate Processor Context ID */
 #define X86_FEATURE_RTM		(9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_MPX		(9*32+14) /* Memory Protection Extension */
 #define X86_FEATURE_RDSEED	(9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX		(9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP	(9*32+20) /* Supervisor Mode Access Prevention */

From e7d820a5e549b3eb6c3f9467507566565646a669 Mon Sep 17 00:00:00 2001
From: Qiaowei Ren <qiaowei.ren@intel.com>
Date: Thu, 5 Dec 2013 17:15:34 +0800
Subject: [PATCH 2/6] x86, xsave: Support eager-only xsave features, add MPX
 support

Some features, like Intel MPX, work only if the kernel uses eagerfpu
model.  So we should force eagerfpu on unless the user has explicitly
disabled it.

Add definitions for Intel MPX and add it to the supported list.

[ hpa: renamed XSTATE_FLEXIBLE to XSTATE_LAZY and added comments ]

Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
Link: http://lkml.kernel.org/r/9E0BE1322F2F2246BD820DA9FC397ADE014A6115@SHSMSX102.ccr.corp.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/processor.h | 23 +++++++++++++++++++++++
 arch/x86/include/asm/xsave.h     | 14 ++++++++++----
 arch/x86/kernel/xsave.c          | 10 ++++++++++
 3 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7b034a4057f9..b7845a126792 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -370,6 +370,26 @@ struct ymmh_struct {
 	u32 ymmh_space[64];
 };
 
+struct lwp_struct {
+	u64 lwpcb_addr;
+	u32 flags;
+	u32 buf_head_offset;
+	u64 buf_base;
+	u32 buf_size;
+	u32 filters;
+	u64 saved_event_record[4];
+	u32 event_counter[16];
+};
+
+struct bndregs_struct {
+	u64 bndregs[8];
+} __packed;
+
+struct bndcsr_struct {
+	u64 cfg_reg_u;
+	u64 status_reg;
+} __packed;
+
 struct xsave_hdr_struct {
 	u64 xstate_bv;
 	u64 reserved1[2];
@@ -380,6 +400,9 @@ struct xsave_struct {
 	struct i387_fxsave_struct i387;
 	struct xsave_hdr_struct xsave_hdr;
 	struct ymmh_struct ymmh;
+	struct lwp_struct lwp;
+	struct bndregs_struct bndregs;
+	struct bndcsr_struct bndcsr;
 	/* new processor state extensions will go here */
 } __attribute__ ((packed, aligned (64)));
 
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 0415cdabb5a6..554738963b28 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -9,6 +9,8 @@
 #define XSTATE_FP	0x1
 #define XSTATE_SSE	0x2
 #define XSTATE_YMM	0x4
+#define XSTATE_BNDREGS	0x8
+#define XSTATE_BNDCSR	0x10
 
 #define XSTATE_FPSSE	(XSTATE_FP | XSTATE_SSE)
 
@@ -20,10 +22,14 @@
 #define XSAVE_YMM_SIZE	    256
 #define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
 
-/*
- * These are the features that the OS can handle currently.
- */
-#define XCNTXT_MASK	(XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
+/* Supported features which support lazy state saving */
+#define XSTATE_LAZY	(XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
+
+/* Supported features which require eager state saving */
+#define XSTATE_EAGER	(XSTATE_BNDREGS | XSTATE_BNDCSR)
+
+/* All currently supported features */
+#define XCNTXT_MASK	(XSTATE_LAZY | XSTATE_EAGER)
 
 #ifdef CONFIG_X86_64
 #define REX_PREFIX	"0x48, "
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 422fd8223470..a4b451c6addf 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -562,6 +562,16 @@ static void __init xstate_enable_boot_cpu(void)
 	if (cpu_has_xsaveopt && eagerfpu != DISABLE)
 		eagerfpu = ENABLE;
 
+	if (pcntxt_mask & XSTATE_EAGER) {
+		if (eagerfpu == DISABLE) {
+			pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n",
+					pcntxt_mask & XSTATE_EAGER);
+			pcntxt_mask &= ~XSTATE_EAGER;
+		} else {
+			eagerfpu = ENABLE;
+		}
+	}
+
 	pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
 		pcntxt_mask, xstate_size);
 }

From f09174c501f8bb259788cc36d5a7aa5b2831fb5e Mon Sep 17 00:00:00 2001
From: Qiaowei Ren <qiaowei.ren@intel.com>
Date: Sat, 14 Dec 2013 14:25:02 +0800
Subject: [PATCH 3/6] x86: add user_atomic_cmpxchg_inatomic at uaccess.h

This patch adds user_atomic_cmpxchg_inatomic() to use CMPXCHG
instruction against a user space address.

This generalizes the already existing futex_atomic_cmpxchg_inatomic()
so it can be used in other contexts.  This will be used in the
upcoming support for Intel MPX (Memory Protection Extensions.)

[ hpa: replaced #ifdef inside a macro with IS_ENABLED() ]

Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
Link: http://lkml.kernel.org/r/1387002303-6620-1-git-send-email-qiaowei.ren@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 arch/x86/include/asm/uaccess.h | 92 ++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 8ec57c07b125..48ff83855268 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -525,6 +525,98 @@ extern __must_check long strnlen_user(const char __user *str, long n);
 unsigned long __must_check clear_user(void __user *mem, unsigned long len);
 unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
 
+extern void __cmpxchg_wrong_size(void)
+	__compiletime_error("Bad argument size for cmpxchg");
+
+#define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size)	\
+({									\
+	int __ret = 0;							\
+	__typeof__(ptr) __uval = (uval);				\
+	__typeof__(*(ptr)) __old = (old);				\
+	__typeof__(*(ptr)) __new = (new);				\
+	switch (size) {							\
+	case 1:								\
+	{								\
+		asm volatile("\t" ASM_STAC "\n"				\
+			"1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n"		\
+			"2:\t" ASM_CLAC "\n"				\
+			"\t.section .fixup, \"ax\"\n"			\
+			"3:\tmov     %3, %0\n"				\
+			"\tjmp     2b\n"				\
+			"\t.previous\n"					\
+			_ASM_EXTABLE(1b, 3b)				\
+			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\
+			: "i" (-EFAULT), "q" (__new), "1" (__old)	\
+			: "memory"					\
+		);							\
+		break;							\
+	}								\
+	case 2:								\
+	{								\
+		asm volatile("\t" ASM_STAC "\n"				\
+			"1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n"		\
+			"2:\t" ASM_CLAC "\n"				\
+			"\t.section .fixup, \"ax\"\n"			\
+			"3:\tmov     %3, %0\n"				\
+			"\tjmp     2b\n"				\
+			"\t.previous\n"					\
+			_ASM_EXTABLE(1b, 3b)				\
+			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\
+			: "i" (-EFAULT), "r" (__new), "1" (__old)	\
+			: "memory"					\
+		);							\
+		break;							\
+	}								\
+	case 4:								\
+	{								\
+		asm volatile("\t" ASM_STAC "\n"				\
+			"1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"		\
+			"2:\t" ASM_CLAC "\n"				\
+			"\t.section .fixup, \"ax\"\n"			\
+			"3:\tmov     %3, %0\n"				\
+			"\tjmp     2b\n"				\
+			"\t.previous\n"					\
+			_ASM_EXTABLE(1b, 3b)				\
+			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\
+			: "i" (-EFAULT), "r" (__new), "1" (__old)	\
+			: "memory"					\
+		);							\
+		break;							\
+	}								\
+	case 8:								\
+	{								\
+		if (!IS_ENABLED(CONFIG_X86_64))				\
+			__cmpxchg_wrong_size();				\
+									\
+		asm volatile("\t" ASM_STAC "\n"				\
+			"1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n"		\
+			"2:\t" ASM_CLAC "\n"				\
+			"\t.section .fixup, \"ax\"\n"			\
+			"3:\tmov     %3, %0\n"				\
+			"\tjmp     2b\n"				\
+			"\t.previous\n"					\
+			_ASM_EXTABLE(1b, 3b)				\
+			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\
+			: "i" (-EFAULT), "r" (__new), "1" (__old)	\
+			: "memory"					\
+		);							\
+		break;							\
+	}								\
+	default:							\
+		__cmpxchg_wrong_size();					\
+	}								\
+	*__uval = __old;						\
+	__ret;								\
+})
+
+#define user_atomic_cmpxchg_inatomic(uval, ptr, old, new)		\
+({									\
+	access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ?		\
+		__user_atomic_cmpxchg_inatomic((uval), (ptr),		\
+				(old), (new), sizeof(*(ptr))) :		\
+		-EFAULT;						\
+})
+
 /*
  * movsl can be slow when source and dest are not both 8-byte aligned
  */

From 0ee3b6f87d4d748d5362cb47ff33fa1553805cb4 Mon Sep 17 00:00:00 2001
From: Qiaowei Ren <qiaowei.ren@intel.com>
Date: Sat, 14 Dec 2013 14:25:03 +0800
Subject: [PATCH 4/6] x86: replace futex_atomic_cmpxchg_inatomic() with
 user_atomic_cmpxchg_inatomic

futex_atomic_cmpxchg_inatomic() is simply the 32-bit implementation of
user_atomic_cmpxchg_inatomic(), which in turn is simply a
generalization of the original code in
futex_atomic_cmpxchg_inatomic().

Use the newly generalized user_atomic_cmpxchg_inatomic() as the futex
implementation, too.

[ hpa: retain the inline in futex.h rather than changing it to a macro ]

Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
Link: http://lkml.kernel.org/r/1387002303-6620-2-git-send-email-qiaowei.ren@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 arch/x86/include/asm/futex.h | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index be27ba1e947a..b4c1f5453436 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -110,26 +110,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 						u32 oldval, u32 newval)
 {
-	int ret = 0;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
-
-	asm volatile("\t" ASM_STAC "\n"
-		     "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
-		     "2:\t" ASM_CLAC "\n"
-		     "\t.section .fixup, \"ax\"\n"
-		     "3:\tmov     %3, %0\n"
-		     "\tjmp     2b\n"
-		     "\t.previous\n"
-		     _ASM_EXTABLE(1b, 3b)
-		     : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
-		     : "i" (-EFAULT), "r" (newval), "1" (oldval)
-		     : "memory"
-	);
-
-	*uval = oldval;
-	return ret;
+	return user_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval);
 }
 
 #endif

From fb09b78151361f5001ad462e4b242b10845830e2 Mon Sep 17 00:00:00 2001
From: Qiaowei Ren <qiaowei.ren@intel.com>
Date: Sun, 12 Jan 2014 17:20:02 +0800
Subject: [PATCH 5/6] x86, mpx: Add MPX related opcodes to the x86 opcode map

This patch adds all the MPX instructions to x86 opcode map, so the x86
instruction decoder can decode MPX instructions.

Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
Link: http://lkml.kernel.org/r/1389518403-7715-4-git-send-email-qiaowei.ren@intel.com
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/lib/x86-opcode-map.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 533a85e3a07e..1a2be7c6895d 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -346,8 +346,8 @@ AVXcode: 1
 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
 18: Grp16 (1A)
 19:
-1a:
-1b:
+1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv
+1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv
 1c:
 1d:
 1e:

From 741e3902cd89a7fbc04ae53f29a7ca0da452aa8e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 20 Jan 2014 19:51:05 +0100
Subject: [PATCH 6/6] x86/intel/mpx: Remove unused LWP structure

We don't support LWP yet, don't give the impression that we do:
represent the LWP state as opaque 128 bytes, the way Linux sees it
currently.

Cc: Qiaowei Ren <qiaowei.ren@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-ecarmjtfKpanpAapfck6dj6g@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/processor.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b7845a126792..865c39910cc8 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -370,15 +370,9 @@ struct ymmh_struct {
 	u32 ymmh_space[64];
 };
 
+/* We don't support LWP yet: */
 struct lwp_struct {
-	u64 lwpcb_addr;
-	u32 flags;
-	u32 buf_head_offset;
-	u64 buf_base;
-	u32 buf_size;
-	u32 filters;
-	u64 saved_event_record[4];
-	u32 event_counter[16];
+	u8 reserved[128];
 };
 
 struct bndregs_struct {