Linux patch 4.14.18

Signed-off-by: Mike Pagano <mpagano@gentoo.org>
author: Mike Pagano <mpagano@gentoo.org> 2018-02-07 19:40:27 -0500
committer: Mike Pagano <mpagano@gentoo.org> 2018-11-14 09:00:39 -0500
commit: bf700be01b4e88d2b151c924c8a3e1be7a47be9d (patch)
tree: af43005304675dc5a3a81159f9c7fa5eff0cc84d /1017_linux-4.14.18.patch
parent: Linux patch 4.14.17 (diff)
download: linux-patches-bf700be01b4e88d2b151c924c8a3e1be7a47be9d.tar.gz
linux-patches-bf700be01b4e88d2b151c924c8a3e1be7a47be9d.tar.bz2
linux-patches-bf700be01b4e88d2b151c924c8a3e1be7a47be9d.zip
1 files changed, 3790 insertions, 0 deletions
diff --git a/1017_linux-4.14.18.patch b/1017_linux-4.14.18.patch
new file mode 100644
index 00000000..07fbf451
--- /dev/null
+++ b/1017_linux-4.14.18.patch
@@ -0,0 +1,3790 @@
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 8122b5f98ea1..c76afdcafbef 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -2718,8 +2718,6 @@
+ 	norandmaps	Don't use address space randomization.  Equivalent to
+ 			echo 0 > /proc/sys/kernel/randomize_va_space
+ 
+-	noreplace-paravirt	[X86,IA-64,PV_OPS] Don't patch paravirt_ops
+-
+ 	noreplace-smp	[X86-32,SMP] Don't replace SMP instructions
+ 			with UP alternatives
+ 
+diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt
+new file mode 100644
+index 000000000000..e9e6cbae2841
+--- /dev/null
++++ b/Documentation/speculation.txt
+@@ -0,0 +1,90 @@
++This document explains potential effects of speculation, and how undesirable
++effects can be mitigated portably using common APIs.
++
++===========
++Speculation
++===========
++
++To improve performance and minimize average latencies, many contemporary CPUs
++employ speculative execution techniques such as branch prediction, performing
++work which may be discarded at a later stage.
++
++Typically speculative execution cannot be observed from architectural state,
++such as the contents of registers. However, in some cases it is possible to
++observe its impact on microarchitectural state, such as the presence or
++absence of data in caches. Such state may form side-channels which can be
++observed to extract secret information.
++
++For example, in the presence of branch prediction, it is possible for bounds
++checks to be ignored by code which is speculatively executed. Consider the
++following code:
++
++	int load_array(int *array, unsigned int index)
++	{
++		if (index >= MAX_ARRAY_ELEMS)
++			return 0;
++		else
++			return array[index];
++	}
++
++Which, on arm64, may be compiled to an assembly sequence such as:
++
++	CMP	<index>, #MAX_ARRAY_ELEMS
++	B.LT	less
++	MOV	<returnval>, #0
++	RET
++  less:
++	LDR	<returnval>, [<array>, <index>]
++	RET
++
++It is possible that a CPU mis-predicts the conditional branch, and
++speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This
++value will subsequently be discarded, but the speculated load may affect
++microarchitectural state which can be subsequently measured.
++
++More complex sequences involving multiple dependent memory accesses may
++result in sensitive information being leaked. Consider the following
++code, building on the prior example:
++
++	int load_dependent_arrays(int *arr1, int *arr2, int index)
++	{
++		int val1, val2,
++
++		val1 = load_array(arr1, index);
++		val2 = load_array(arr2, val1);
++
++		return val2;
++	}
++
++Under speculation, the first call to load_array() may return the value
++of an out-of-bounds address, while the second call will influence
++microarchitectural state dependent on this value. This may provide an
++arbitrary read primitive.
++
++====================================
++Mitigating speculation side-channels
++====================================
++
++The kernel provides a generic API to ensure that bounds checks are
++respected even under speculation. Architectures which are affected by
++speculation-based side-channels are expected to implement these
++primitives.
++
++The array_index_nospec() helper in <linux/nospec.h> can be used to
++prevent information from being leaked via side-channels.
++
++A call to array_index_nospec(index, size) returns a sanitized index
++value that is bounded to [0, size) even under cpu speculation
++conditions.
++
++This can be used to protect the earlier load_array() example:
++
++	int load_array(int *array, unsigned int index)
++	{
++		if (index >= MAX_ARRAY_ELEMS)
++			return 0;
++		else {
++			index = array_index_nospec(index, MAX_ARRAY_ELEMS);
++			return array[index];
++		}
++	}
+diff --git a/Makefile b/Makefile
+index 7ed993896dd5..a69e5da9ed86 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 4
+ PATCHLEVEL = 14
+-SUBLEVEL = 17
++SUBLEVEL = 18
+ EXTRAVERSION =
+ NAME = Petit Gorille
+ 
+diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
+index cb782ac1c35d..fe418226df7f 100644
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -164,6 +164,7 @@ config PPC
+ 	select GENERIC_CLOCKEVENTS_BROADCAST	if SMP
+ 	select GENERIC_CMOS_UPDATE
+ 	select GENERIC_CPU_AUTOPROBE
++	select GENERIC_CPU_VULNERABILITIES	if PPC_BOOK3S_64
+ 	select GENERIC_IRQ_SHOW
+ 	select GENERIC_IRQ_SHOW_LEVEL
+ 	select GENERIC_SMP_IDLE_THREAD
+diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
+index 935059cb9e40..9527a4c6cbc2 100644
+--- a/arch/powerpc/kernel/setup_64.c
++++ b/arch/powerpc/kernel/setup_64.c
+@@ -38,6 +38,7 @@
+ #include <linux/memory.h>
+ #include <linux/nmi.h>
+ 
++#include <asm/debugfs.h>
+ #include <asm/io.h>
+ #include <asm/kdump.h>
+ #include <asm/prom.h>
+@@ -884,4 +885,41 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+ 	if (!no_rfi_flush)
+ 		rfi_flush_enable(enable);
+ }
++
++#ifdef CONFIG_DEBUG_FS
++static int rfi_flush_set(void *data, u64 val)
++{
++	if (val == 1)
++		rfi_flush_enable(true);
++	else if (val == 0)
++		rfi_flush_enable(false);
++	else
++		return -EINVAL;
++
++	return 0;
++}
++
++static int rfi_flush_get(void *data, u64 *val)
++{
++	*val = rfi_flush ? 1 : 0;
++	return 0;
++}
++
++DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
++
++static __init int rfi_flush_debugfs_init(void)
++{
++	debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
++	return 0;
++}
++device_initcall(rfi_flush_debugfs_init);
++#endif
++
++ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
++{
++	if (rfi_flush)
++		return sprintf(buf, "Mitigation: RFI Flush\n");
++
++	return sprintf(buf, "Vulnerable\n");
++}
+ #endif /* CONFIG_PPC_BOOK3S_64 */
+diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
+index 03505ffbe1b6..60e21ccfb6d6 100644
+--- a/arch/x86/entry/common.c
++++ b/arch/x86/entry/common.c
+@@ -21,6 +21,7 @@
+ #include <linux/export.h>
+ #include <linux/context_tracking.h>
+ #include <linux/user-return-notifier.h>
++#include <linux/nospec.h>
+ #include <linux/uprobes.h>
+ #include <linux/livepatch.h>
+ #include <linux/syscalls.h>
+@@ -208,7 +209,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
+ 	 * special case only applies after poking regs and before the
+ 	 * very next return to user mode.
+ 	 */
+-	current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
++	ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+ #endif
+ 
+ 	user_enter_irqoff();
+@@ -284,7 +285,8 @@ __visible void do_syscall_64(struct pt_regs *regs)
+ 	 * regs->orig_ax, which changes the behavior of some syscalls.
+ 	 */
+ 	if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
+-		regs->ax = sys_call_table[nr & __SYSCALL_MASK](
++		nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls);
++		regs->ax = sys_call_table[nr](
+ 			regs->di, regs->si, regs->dx,
+ 			regs->r10, regs->r8, regs->r9);
+ 	}
+@@ -306,7 +308,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
+ 	unsigned int nr = (unsigned int)regs->orig_ax;
+ 
+ #ifdef CONFIG_IA32_EMULATION
+-	current->thread.status |= TS_COMPAT;
++	ti->status |= TS_COMPAT;
+ #endif
+ 
+ 	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
+@@ -320,6 +322,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
+ 	}
+ 
+ 	if (likely(nr < IA32_NR_syscalls)) {
++		nr = array_index_nospec(nr, IA32_NR_syscalls);
+ 		/*
+ 		 * It's possible that a 32-bit syscall implementation
+ 		 * takes a 64-bit parameter but nonetheless assumes that
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
+index 60c4c342316c..2a35b1e0fb90 100644
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -252,7 +252,8 @@ ENTRY(__switch_to_asm)
+ 	 * exist, overwrite the RSB with entries which capture
+ 	 * speculative execution to prevent attack.
+ 	 */
+-	FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
++	/* Clobbers %ebx */
++	FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ #endif
+ 
+ 	/* restore callee-saved registers */
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index be6b66464f6a..16e2d72e79a0 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -232,91 +232,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
+ 	pushq	%r9				/* pt_regs->r9 */
+ 	pushq	%r10				/* pt_regs->r10 */
+ 	pushq	%r11				/* pt_regs->r11 */
+-	sub	$(6*8), %rsp			/* pt_regs->bp, bx, r12-15 not saved */
+-	UNWIND_HINT_REGS extra=0
+-
+-	TRACE_IRQS_OFF
+-
+-	/*
+-	 * If we need to do entry work or if we guess we'll need to do
+-	 * exit work, go straight to the slow path.
+-	 */
+-	movq	PER_CPU_VAR(current_task), %r11
+-	testl	$_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
+-	jnz	entry_SYSCALL64_slow_path
+-
+-entry_SYSCALL_64_fastpath:
+-	/*
+-	 * Easy case: enable interrupts and issue the syscall.  If the syscall
+-	 * needs pt_regs, we'll call a stub that disables interrupts again
+-	 * and jumps to the slow path.
+-	 */
+-	TRACE_IRQS_ON
+-	ENABLE_INTERRUPTS(CLBR_NONE)
+-#if __SYSCALL_MASK == ~0
+-	cmpq	$__NR_syscall_max, %rax
+-#else
+-	andl	$__SYSCALL_MASK, %eax
+-	cmpl	$__NR_syscall_max, %eax
+-#endif
+-	ja	1f				/* return -ENOSYS (already in pt_regs->ax) */
+-	movq	%r10, %rcx
+-
+-	/*
+-	 * This call instruction is handled specially in stub_ptregs_64.
+-	 * It might end up jumping to the slow path.  If it jumps, RAX
+-	 * and all argument registers are clobbered.
+-	 */
+-#ifdef CONFIG_RETPOLINE
+-	movq	sys_call_table(, %rax, 8), %rax
+-	call	__x86_indirect_thunk_rax
+-#else
+-	call	*sys_call_table(, %rax, 8)
+-#endif
+-.Lentry_SYSCALL_64_after_fastpath_call:
+-
+-	movq	%rax, RAX(%rsp)
+-1:
++	pushq	%rbx				/* pt_regs->rbx */
++	pushq	%rbp				/* pt_regs->rbp */
++	pushq	%r12				/* pt_regs->r12 */
++	pushq	%r13				/* pt_regs->r13 */
++	pushq	%r14				/* pt_regs->r14 */
++	pushq	%r15				/* pt_regs->r15 */
++	UNWIND_HINT_REGS
+ 
+-	/*
+-	 * If we get here, then we know that pt_regs is clean for SYSRET64.
+-	 * If we see that no exit work is required (which we are required
+-	 * to check with IRQs off), then we can go straight to SYSRET64.
+-	 */
+-	DISABLE_INTERRUPTS(CLBR_ANY)
+ 	TRACE_IRQS_OFF
+-	movq	PER_CPU_VAR(current_task), %r11
+-	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
+-	jnz	1f
+-
+-	LOCKDEP_SYS_EXIT
+-	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
+-	movq	RIP(%rsp), %rcx
+-	movq	EFLAGS(%rsp), %r11
+-	addq	$6*8, %rsp	/* skip extra regs -- they were preserved */
+-	UNWIND_HINT_EMPTY
+-	jmp	.Lpop_c_regs_except_rcx_r11_and_sysret
+ 
+-1:
+-	/*
+-	 * The fast path looked good when we started, but something changed
+-	 * along the way and we need to switch to the slow path.  Calling
+-	 * raise(3) will trigger this, for example.  IRQs are off.
+-	 */
+-	TRACE_IRQS_ON
+-	ENABLE_INTERRUPTS(CLBR_ANY)
+-	SAVE_EXTRA_REGS
+-	movq	%rsp, %rdi
+-	call	syscall_return_slowpath	/* returns with IRQs disabled */
+-	jmp	return_from_SYSCALL_64
+-
+-entry_SYSCALL64_slow_path:
+ 	/* IRQs are off. */
+-	SAVE_EXTRA_REGS
+ 	movq	%rsp, %rdi
+ 	call	do_syscall_64		/* returns with IRQs disabled */
+ 
+-return_from_SYSCALL_64:
+ 	TRACE_IRQS_IRETQ		/* we're about to change IF */
+ 
+ 	/*
+@@ -389,7 +318,6 @@ syscall_return_via_sysret:
+ 	/* rcx and r11 are already restored (see code above) */
+ 	UNWIND_HINT_EMPTY
+ 	POP_EXTRA_REGS
+-.Lpop_c_regs_except_rcx_r11_and_sysret:
+ 	popq	%rsi	/* skip r11 */
+ 	popq	%r10
+ 	popq	%r9
+@@ -420,47 +348,6 @@ syscall_return_via_sysret:
+ 	USERGS_SYSRET64
+ END(entry_SYSCALL_64)
+ 
+-ENTRY(stub_ptregs_64)
+-	/*
+-	 * Syscalls marked as needing ptregs land here.
+-	 * If we are on the fast path, we need to save the extra regs,
+-	 * which we achieve by trying again on the slow path.  If we are on
+-	 * the slow path, the extra regs are already saved.
+-	 *
+-	 * RAX stores a pointer to the C function implementing the syscall.
+-	 * IRQs are on.
+-	 */
+-	cmpq	$.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
+-	jne	1f
+-
+-	/*
+-	 * Called from fast path -- disable IRQs again, pop return address
+-	 * and jump to slow path
+-	 */
+-	DISABLE_INTERRUPTS(CLBR_ANY)
+-	TRACE_IRQS_OFF
+-	popq	%rax
+-	UNWIND_HINT_REGS extra=0
+-	jmp	entry_SYSCALL64_slow_path
+-
+-1:
+-	JMP_NOSPEC %rax				/* Called from C */
+-END(stub_ptregs_64)
+-
+-.macro ptregs_stub func
+-ENTRY(ptregs_\func)
+-	UNWIND_HINT_FUNC
+-	leaq	\func(%rip), %rax
+-	jmp	stub_ptregs_64
+-END(ptregs_\func)
+-.endm
+-
+-/* Instantiate ptregs_stub for each ptregs-using syscall */
+-#define __SYSCALL_64_QUAL_(sym)
+-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
+-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
+-#include <asm/syscalls_64.h>
+-
+ /*
+  * %rdi: prev task
+  * %rsi: next task
+@@ -495,7 +382,8 @@ ENTRY(__switch_to_asm)
+ 	 * exist, overwrite the RSB with entries which capture
+ 	 * speculative execution to prevent attack.
+ 	 */
+-	FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
++	/* Clobbers %rbx */
++	FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ #endif
+ 
+ 	/* restore callee-saved registers */
+diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
+index 9c09775e589d..c176d2fab1da 100644
+--- a/arch/x86/entry/syscall_64.c
++++ b/arch/x86/entry/syscall_64.c
+@@ -7,14 +7,11 @@
+ #include <asm/asm-offsets.h>
+ #include <asm/syscall.h>
+ 
+-#define __SYSCALL_64_QUAL_(sym) sym
+-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
+-
+-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
++#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+ #include <asm/syscalls_64.h>
+ #undef __SYSCALL_64
+ 
+-#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
++#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
+ 
+ extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+ 
+diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
+index 0927cdc4f946..4d111616524b 100644
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -38,5 +38,7 @@ INDIRECT_THUNK(dx)
+ INDIRECT_THUNK(si)
+ INDIRECT_THUNK(di)
+ INDIRECT_THUNK(bp)
+-INDIRECT_THUNK(sp)
++asmlinkage void __fill_rsb(void);
++asmlinkage void __clear_rsb(void);
++
+ #endif /* CONFIG_RETPOLINE */
+diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
+index 01727dbc294a..1e7c955b6303 100644
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -24,6 +24,34 @@
+ #define wmb()	asm volatile("sfence" ::: "memory")
+ #endif
+ 
++/**
++ * array_index_mask_nospec() - generate a mask that is ~0UL when the
++ * 	bounds check succeeds and 0 otherwise
++ * @index: array element index
++ * @size: number of elements in array
++ *
++ * Returns:
++ *     0 - (index < size)
++ */
++static inline unsigned long array_index_mask_nospec(unsigned long index,
++		unsigned long size)
++{
++	unsigned long mask;
++
++	asm ("cmp %1,%2; sbb %0,%0;"
++			:"=r" (mask)
++			:"r"(size),"r" (index)
++			:"cc");
++	return mask;
++}
++
++/* Override the default implementation from linux/nospec.h. */
++#define array_index_mask_nospec array_index_mask_nospec
++
++/* Prevent speculative execution past this barrier. */
++#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
++					   "lfence", X86_FEATURE_LFENCE_RDTSC)
++
+ #ifdef CONFIG_X86_PPRO_FENCE
+ #define dma_rmb()	rmb()
+ #else
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
+index ea9a7dde62e5..70eddb3922ff 100644
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -29,6 +29,7 @@ enum cpuid_leafs
+ 	CPUID_8000_000A_EDX,
+ 	CPUID_7_ECX,
+ 	CPUID_8000_0007_EBX,
++	CPUID_7_EDX,
+ };
+ 
+ #ifdef CONFIG_X86_FEATURE_NAMES
+@@ -79,8 +80,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) ||	\
+ 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) ||	\
+ 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) ||	\
++	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) ||	\
+ 	   REQUIRED_MASK_CHECK					  ||	\
+-	   BUILD_BUG_ON_ZERO(NCAPINTS != 18))
++	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+ 
+ #define DISABLED_MASK_BIT_SET(feature_bit)				\
+ 	 ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  0, feature_bit) ||	\
+@@ -101,8 +103,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) ||	\
+ 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) ||	\
+ 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) ||	\
++	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) ||	\
+ 	   DISABLED_MASK_CHECK					  ||	\
+-	   BUILD_BUG_ON_ZERO(NCAPINTS != 18))
++	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+ 
+ #define cpu_has(c, bit)							\
+ 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 25b9375c1484..73b5fff159a4 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -13,7 +13,7 @@
+ /*
+  * Defines x86 CPU feature bits
+  */
+-#define NCAPINTS			18	   /* N 32-bit words worth of info */
++#define NCAPINTS			19	   /* N 32-bit words worth of info */
+ #define NBUGINTS			1	   /* N 32-bit bug flags */
+ 
+ /*
+@@ -203,14 +203,14 @@
+ #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
+ #define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
+-#define X86_FEATURE_RETPOLINE		( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+-#define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE		( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
+-#define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
+-#define X86_FEATURE_AVX512_4FMAPS	( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ 
+ #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
+-#define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* Fill RSB on context switches */
++#define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
++
++#define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+ 
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
+@@ -271,6 +271,9 @@
+ #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
+ #define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
+ #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */
++#define X86_FEATURE_IBPB		(13*32+12) /* Indirect Branch Prediction Barrier */
++#define X86_FEATURE_IBRS		(13*32+14) /* Indirect Branch Restricted Speculation */
++#define X86_FEATURE_STIBP		(13*32+15) /* Single Thread Indirect Branch Predictors */
+ 
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
+@@ -319,6 +322,13 @@
+ #define X86_FEATURE_SUCCOR		(17*32+ 1) /* Uncorrectable error containment and recovery */
+ #define X86_FEATURE_SMCA		(17*32+ 3) /* Scalable MCA */
+ 
++/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
++#define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
++#define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
++#define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
++#define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
++#define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
++
+ /*
+  * BUG word(s)
+  */
+diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
+index e428e16dd822..c6a3af198294 100644
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -71,6 +71,7 @@
+ #define DISABLED_MASK15	0
+ #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57)
+ #define DISABLED_MASK17	0
+-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
++#define DISABLED_MASK18	0
++#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+ 
+ #endif /* _ASM_X86_DISABLED_FEATURES_H */
+diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
+index 64c4a30e0d39..e203169931c7 100644
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -137,8 +137,10 @@ enum fixed_addresses {
+ 
+ extern void reserve_top_address(unsigned long reserve);
+ 
+-#define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+-#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
++#define FIXADDR_SIZE		(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
++#define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
++#define FIXADDR_TOT_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
++#define FIXADDR_TOT_START	(FIXADDR_TOP - FIXADDR_TOT_SIZE)
+ 
+ extern int fixmaps_set;
+ 
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index fa11fb1fa570..eb83ff1bae8f 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -39,6 +39,13 @@
+ 
+ /* Intel MSRs. Some also available on other CPUs */
+ 
++#define MSR_IA32_SPEC_CTRL		0x00000048 /* Speculation Control */
++#define SPEC_CTRL_IBRS			(1 << 0)   /* Indirect Branch Restricted Speculation */
++#define SPEC_CTRL_STIBP			(1 << 1)   /* Single Thread Indirect Branch Predictors */
++
++#define MSR_IA32_PRED_CMD		0x00000049 /* Prediction Command */
++#define PRED_CMD_IBPB			(1 << 0)   /* Indirect Branch Prediction Barrier */
++
+ #define MSR_PPIN_CTL			0x0000004e
+ #define MSR_PPIN			0x0000004f
+ 
+@@ -57,6 +64,11 @@
+ #define SNB_C3_AUTO_UNDEMOTE		(1UL << 28)
+ 
+ #define MSR_MTRRcap			0x000000fe
++
++#define MSR_IA32_ARCH_CAPABILITIES	0x0000010a
++#define ARCH_CAP_RDCL_NO		(1 << 0)   /* Not susceptible to Meltdown */
++#define ARCH_CAP_IBRS_ALL		(1 << 1)   /* Enhanced IBRS support */
++
+ #define MSR_IA32_BBL_CR_CTL		0x00000119
+ #define MSR_IA32_BBL_CR_CTL3		0x0000011e
+ 
+diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
+index 07962f5f6fba..30df295f6d94 100644
+--- a/arch/x86/include/asm/msr.h
++++ b/arch/x86/include/asm/msr.h
+@@ -214,8 +214,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
+ 	 * that some other imaginary CPU is updating continuously with a
+ 	 * time stamp.
+ 	 */
+-	alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+-			  "lfence", X86_FEATURE_LFENCE_RDTSC);
++	barrier_nospec();
+ 	return rdtsc();
+ }
+ 
+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
+index 4ad41087ce0e..4d57894635f2 100644
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -1,56 +1,12 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+ 
+-#ifndef __NOSPEC_BRANCH_H__
+-#define __NOSPEC_BRANCH_H__
++#ifndef _ASM_X86_NOSPEC_BRANCH_H_
++#define _ASM_X86_NOSPEC_BRANCH_H_
+ 
+ #include <asm/alternative.h>
+ #include <asm/alternative-asm.h>
+ #include <asm/cpufeatures.h>
+ 
+-/*
+- * Fill the CPU return stack buffer.
+- *
+- * Each entry in the RSB, if used for a speculative 'ret', contains an
+- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+- *
+- * This is required in various cases for retpoline and IBRS-based
+- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+- * eliminate potentially bogus entries from the RSB, and sometimes
+- * purely to ensure that it doesn't get empty, which on some CPUs would
+- * allow predictions from other (unwanted!) sources to be used.
+- *
+- * We define a CPP macro such that it can be used from both .S files and
+- * inline assembly. It's possible to do a .macro and then include that
+- * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+- */
+-
+-#define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
+-#define RSB_FILL_LOOPS		16	/* To avoid underflow */
+-
+-/*
+- * Google experimented with loop-unrolling and this turned out to be
+- * the optimal version — two calls, each with their own speculation
+- * trap should their return address end up getting used, in a loop.
+- */
+-#define __FILL_RETURN_BUFFER(reg, nr, sp)	\
+-	mov	$(nr/2), reg;			\
+-771:						\
+-	call	772f;				\
+-773:	/* speculation trap */			\
+-	pause;					\
+-	lfence;					\
+-	jmp	773b;				\
+-772:						\
+-	call	774f;				\
+-775:	/* speculation trap */			\
+-	pause;					\
+-	lfence;					\
+-	jmp	775b;				\
+-774:						\
+-	dec	reg;				\
+-	jnz	771b;				\
+-	add	$(BITS_PER_LONG/8) * nr, sp;
+-
+ #ifdef __ASSEMBLY__
+ 
+ /*
+@@ -121,17 +77,10 @@
+ #endif
+ .endm
+ 
+- /*
+-  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+-  * monstrosity above, manually.
+-  */
+-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
++/* This clobbers the BX register */
++.macro FILL_RETURN_BUFFER nr:req ftr:req
+ #ifdef CONFIG_RETPOLINE
+-	ANNOTATE_NOSPEC_ALTERNATIVE
+-	ALTERNATIVE "jmp .Lskip_rsb_\@",				\
+-		__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))	\
+-		\ftr
+-.Lskip_rsb_\@:
++	ALTERNATIVE "", "call __clear_rsb", \ftr
+ #endif
+ .endm
+ 
+@@ -201,22 +150,25 @@ extern char __indirect_thunk_end[];
+  * On VMEXIT we must ensure that no RSB predictions learned in the guest
+  * can be followed in the host, by overwriting the RSB completely. Both
+  * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+- * CPUs with IBRS_ATT *might* it be avoided.
++ * CPUs with IBRS_ALL *might* it be avoided.
+  */
+ static inline void vmexit_fill_RSB(void)
+ {
+ #ifdef CONFIG_RETPOLINE
+-	unsigned long loops;
+-
+-	asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+-		      ALTERNATIVE("jmp 910f",
+-				  __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+-				  X86_FEATURE_RETPOLINE)
+-		      "910:"
+-		      : "=r" (loops), ASM_CALL_CONSTRAINT
+-		      : : "memory" );
++	alternative_input("",
++			  "call __fill_rsb",
++			  X86_FEATURE_RETPOLINE,
++			  ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+ #endif
+ }
+ 
++static inline void indirect_branch_prediction_barrier(void)
++{
++	alternative_input("",
++			  "call __ibp_barrier",
++			  X86_FEATURE_USE_IBPB,
++			  ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory"));
++}
++
+ #endif /* __ASSEMBLY__ */
+-#endif /* __NOSPEC_BRANCH_H__ */
++#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
+diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
+index ce245b0cdfca..0777e18a1d23 100644
+--- a/arch/x86/include/asm/pgtable_32_types.h
++++ b/arch/x86/include/asm/pgtable_32_types.h
+@@ -44,8 +44,9 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
+  */
+ #define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 40)
+ 
+-#define CPU_ENTRY_AREA_BASE				\
+-	((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
++#define CPU_ENTRY_AREA_BASE						\
++	((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1))   \
++	 & PMD_MASK)
+ 
+ #define PKMAP_BASE		\
+ 	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 9c18da64daa9..c57c6e77c29f 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -459,8 +459,6 @@ struct thread_struct {
+ 	unsigned short		gsindex;
+ #endif
+ 
+-	u32			status;		/* thread synchronous flags */
+-
+ #ifdef CONFIG_X86_64
+ 	unsigned long		fsbase;
+ 	unsigned long		gsbase;
+@@ -970,4 +968,7 @@ bool xen_set_default_idle(void);
+ 
+ void stop_this_cpu(void *dummy);
+ void df_debug(struct pt_regs *regs, long error_code);
++
++void __ibp_barrier(void);
++
+ #endif /* _ASM_X86_PROCESSOR_H */
+diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
+index d91ba04dd007..fb3a6de7440b 100644
+--- a/arch/x86/include/asm/required-features.h
++++ b/arch/x86/include/asm/required-features.h
+@@ -106,6 +106,7 @@
+ #define REQUIRED_MASK15	0
+ #define REQUIRED_MASK16	(NEED_LA57)
+ #define REQUIRED_MASK17	0
+-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
++#define REQUIRED_MASK18	0
++#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+ 
+ #endif /* _ASM_X86_REQUIRED_FEATURES_H */
+diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
+index e3c95e8e61c5..03eedc21246d 100644
+--- a/arch/x86/include/asm/syscall.h
++++ b/arch/x86/include/asm/syscall.h
+@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
+ 	 * TS_COMPAT is set for 32-bit syscall entries and then
+ 	 * remains set until we return to user mode.
+ 	 */
+-	if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
++	if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
+ 		/*
+ 		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+ 		 * and will match correctly in comparisons.
+@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
+ 					 unsigned long *args)
+ {
+ # ifdef CONFIG_IA32_EMULATION
+-	if (task->thread.status & TS_COMPAT)
++	if (task->thread_info.status & TS_COMPAT)
+ 		switch (i) {
+ 		case 0:
+ 			if (!n--) break;
+@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
+ 					 const unsigned long *args)
+ {
+ # ifdef CONFIG_IA32_EMULATION
+-	if (task->thread.status & TS_COMPAT)
++	if (task->thread_info.status & TS_COMPAT)
+ 		switch (i) {
+ 		case 0:
+ 			if (!n--) break;
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index 00223333821a..eda3b6823ca4 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -55,6 +55,7 @@ struct task_struct;
+ 
+ struct thread_info {
+ 	unsigned long		flags;		/* low level flags */
++	u32			status;		/* thread synchronous flags */
+ };
+ 
+ #define INIT_THREAD_INFO(tsk)			\
+@@ -221,7 +222,7 @@ static inline int arch_within_stack_frames(const void * const stack,
+ #define in_ia32_syscall() true
+ #else
+ #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
+-			   current->thread.status & TS_COMPAT)
++			   current_thread_info()->status & TS_COMPAT)
+ #endif
+ 
+ /*
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 3effd3c994af..4405c4b308e8 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -174,6 +174,8 @@ struct tlb_state {
+ 	struct mm_struct *loaded_mm;
+ 	u16 loaded_mm_asid;
+ 	u16 next_asid;
++	/* last user mm's ctx id */
++	u64 last_ctx_id;
+ 
+ 	/*
+ 	 * We can be in one of several states:
+diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
+index 574dff4d2913..aae77eb8491c 100644
+--- a/arch/x86/include/asm/uaccess.h
++++ b/arch/x86/include/asm/uaccess.h
+@@ -124,6 +124,11 @@ extern int __get_user_bad(void);
+ 
+ #define __uaccess_begin() stac()
+ #define __uaccess_end()   clac()
++#define __uaccess_begin_nospec()	\
++({					\
++	stac();				\
++	barrier_nospec();		\
++})
+ 
+ /*
+  * This is a type: either unsigned long, if the argument fits into
+@@ -445,7 +450,7 @@ do {									\
+ ({									\
+ 	int __gu_err;							\
+ 	__inttype(*(ptr)) __gu_val;					\
+-	__uaccess_begin();						\
++	__uaccess_begin_nospec();					\
+ 	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);	\
+ 	__uaccess_end();						\
+ 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
+@@ -487,6 +492,10 @@ struct __large_struct { unsigned long buf[100]; };
+ 	__uaccess_begin();						\
+ 	barrier();
+ 
++#define uaccess_try_nospec do {						\
++	current->thread.uaccess_err = 0;				\
++	__uaccess_begin_nospec();					\
++
+ #define uaccess_catch(err)						\
+ 	__uaccess_end();						\
+ 	(err) |= (current->thread.uaccess_err ? -EFAULT : 0);		\
+@@ -548,7 +557,7 @@ struct __large_struct { unsigned long buf[100]; };
+  *	get_user_ex(...);
+  * } get_user_catch(err)
+  */
+-#define get_user_try		uaccess_try
++#define get_user_try		uaccess_try_nospec
+ #define get_user_catch(err)	uaccess_catch(err)
+ 
+ #define get_user_ex(x, ptr)	do {					\
+@@ -582,7 +591,7 @@ extern void __cmpxchg_wrong_size(void)
+ 	__typeof__(ptr) __uval = (uval);				\
+ 	__typeof__(*(ptr)) __old = (old);				\
+ 	__typeof__(*(ptr)) __new = (new);				\
+-	__uaccess_begin();						\
++	__uaccess_begin_nospec();					\
+ 	switch (size) {							\
+ 	case 1:								\
+ 	{								\
+diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
+index 72950401b223..ba2dc1930630 100644
+--- a/arch/x86/include/asm/uaccess_32.h
++++ b/arch/x86/include/asm/uaccess_32.h
+@@ -29,21 +29,21 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+ 		switch (n) {
+ 		case 1:
+ 			ret = 0;
+-			__uaccess_begin();
++			__uaccess_begin_nospec();
+ 			__get_user_asm_nozero(*(u8 *)to, from, ret,
+ 					      "b", "b", "=q", 1);
+ 			__uaccess_end();
+ 			return ret;
+ 		case 2:
+ 			ret = 0;
+-			__uaccess_begin();
++			__uaccess_begin_nospec();
+ 			__get_user_asm_nozero(*(u16 *)to, from, ret,
+ 					      "w", "w", "=r", 2);
+ 			__uaccess_end();
+ 			return ret;
+ 		case 4:
+ 			ret = 0;
+-			__uaccess_begin();
++			__uaccess_begin_nospec();
+ 			__get_user_asm_nozero(*(u32 *)to, from, ret,
+ 					      "l", "k", "=r", 4);
+ 			__uaccess_end();
+diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
+index f07ef3c575db..62546b3a398e 100644
+--- a/arch/x86/include/asm/uaccess_64.h
++++ b/arch/x86/include/asm/uaccess_64.h
+@@ -55,31 +55,31 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
+ 		return copy_user_generic(dst, (__force void *)src, size);
+ 	switch (size) {
+ 	case 1:
+-		__uaccess_begin();
++		__uaccess_begin_nospec();
+ 		__get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src,
+ 			      ret, "b", "b", "=q", 1);
+ 		__uaccess_end();
+ 		return ret;
+ 	case 2:
+-		__uaccess_begin();
++		__uaccess_begin_nospec();
+ 		__get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src,
+ 			      ret, "w", "w", "=r", 2);
+ 		__uaccess_end();
+ 		return ret;
+ 	case 4:
+-		__uaccess_begin();
++		__uaccess_begin_nospec();
+ 		__get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src,
+ 			      ret, "l", "k", "=r", 4);
+ 		__uaccess_end();
+ 		return ret;
+ 	case 8:
+-		__uaccess_begin();
++		__uaccess_begin_nospec();
+ 		__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
+ 			      ret, "q", "", "=r", 8);
+ 		__uaccess_end();
+ 		return ret;
+ 	case 10:
+-		__uaccess_begin();
++		__uaccess_begin_nospec();
+ 		__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
+ 			       ret, "q", "", "=r", 10);
+ 		if (likely(!ret))
+@@ -89,7 +89,7 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
+ 		__uaccess_end();
+ 		return ret;
+ 	case 16:
+-		__uaccess_begin();
++		__uaccess_begin_nospec();
+ 		__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
+ 			       ret, "q", "", "=r", 16);
+ 		if (likely(!ret))
+diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
+index e0b97e4d1db5..21be0193d9dc 100644
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str)
+ }
+ __setup("noreplace-smp", setup_noreplace_smp);
+ 
+-#ifdef CONFIG_PARAVIRT
+-static int __initdata_or_module noreplace_paravirt = 0;
+-
+-static int __init setup_noreplace_paravirt(char *str)
+-{
+-	noreplace_paravirt = 1;
+-	return 1;
+-}
+-__setup("noreplace-paravirt", setup_noreplace_paravirt);
+-#endif
+-
+ #define DPRINTK(fmt, args...)						\
+ do {									\
+ 	if (debug_alternative)						\
+@@ -298,7 +287,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
+ 	tgt_rip  = next_rip + o_dspl;
+ 	n_dspl = tgt_rip - orig_insn;
+ 
+-	DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
++	DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl);
+ 
+ 	if (tgt_rip - orig_insn >= 0) {
+ 		if (n_dspl - 2 <= 127)
+@@ -355,7 +344,7 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins
+ 	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+ 	local_irq_restore(flags);
+ 
+-	DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
++	DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
+ 		   instr, a->instrlen - a->padlen, a->padlen);
+ }
+ 
+@@ -376,7 +365,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
+ 	u8 *instr, *replacement;
+ 	u8 insnbuf[MAX_PATCH_LEN];
+ 
+-	DPRINTK("alt table %p -> %p", start, end);
++	DPRINTK("alt table %px, -> %px", start, end);
+ 	/*
+ 	 * The scan order should be from start to end. A later scanned
+ 	 * alternative code can overwrite previously scanned alternative code.
+@@ -400,14 +389,14 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
+ 			continue;
+ 		}
+ 
+-		DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
++		DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d",
+ 			a->cpuid >> 5,
+ 			a->cpuid & 0x1f,
+ 			instr, a->instrlen,
+ 			replacement, a->replacementlen, a->padlen);
+ 
+-		DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
+-		DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
++		DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
++		DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
+ 
+ 		memcpy(insnbuf, replacement, a->replacementlen);
+ 		insnbuf_sz = a->replacementlen;
+@@ -433,7 +422,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
+ 				 a->instrlen - a->replacementlen);
+ 			insnbuf_sz += a->instrlen - a->replacementlen;
+ 		}
+-		DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
++		DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr);
+ 
+ 		text_poke_early(instr, insnbuf, insnbuf_sz);
+ 	}
+@@ -599,9 +588,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
+ 	struct paravirt_patch_site *p;
+ 	char insnbuf[MAX_PATCH_LEN];
+ 
+-	if (noreplace_paravirt)
+-		return;
+-
+ 	for (p = start; p < end; p++) {
+ 		unsigned int used;
+ 
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 390b3dc3d438..71949bf2de5a 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -11,6 +11,7 @@
+ #include <linux/init.h>
+ #include <linux/utsname.h>
+ #include <linux/cpu.h>
++#include <linux/module.h>
+ 
+ #include <asm/nospec-branch.h>
+ #include <asm/cmdline.h>
+@@ -90,20 +91,41 @@ static const char *spectre_v2_strings[] = {
+ };
+ 
+ #undef pr_fmt
+-#define pr_fmt(fmt)     "Spectre V2 mitigation: " fmt
++#define pr_fmt(fmt)     "Spectre V2 : " fmt
+ 
+ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+ 
++#ifdef RETPOLINE
++static bool spectre_v2_bad_module;
++
++bool retpoline_module_ok(bool has_retpoline)
++{
++	if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
++		return true;
++
++	pr_err("System may be vulnerable to spectre v2\n");
++	spectre_v2_bad_module = true;
++	return false;
++}
++
++static inline const char *spectre_v2_module_string(void)
++{
++	return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
++}
++#else
++static inline const char *spectre_v2_module_string(void) { return ""; }
++#endif
++
+ static void __init spec2_print_if_insecure(const char *reason)
+ {
+ 	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+-		pr_info("%s\n", reason);
++		pr_info("%s selected on command line.\n", reason);
+ }
+ 
+ static void __init spec2_print_if_secure(const char *reason)
+ {
+ 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+-		pr_info("%s\n", reason);
++		pr_info("%s selected on command line.\n", reason);
+ }
+ 
+ static inline bool retp_compiler(void)
+@@ -118,42 +140,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt)
+ 	return len == arglen && !strncmp(arg, opt, len);
+ }
+ 
++static const struct {
++	const char *option;
++	enum spectre_v2_mitigation_cmd cmd;
++	bool secure;
++} mitigation_options[] = {
++	{ "off",               SPECTRE_V2_CMD_NONE,              false },
++	{ "on",                SPECTRE_V2_CMD_FORCE,             true },
++	{ "retpoline",         SPECTRE_V2_CMD_RETPOLINE,         false },
++	{ "retpoline,amd",     SPECTRE_V2_CMD_RETPOLINE_AMD,     false },
++	{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
++	{ "auto",              SPECTRE_V2_CMD_AUTO,              false },
++};
++
+ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+ {
+ 	char arg[20];
+-	int ret;
+-
+-	ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+-				  sizeof(arg));
+-	if (ret > 0)  {
+-		if (match_option(arg, ret, "off")) {
+-			goto disable;
+-		} else if (match_option(arg, ret, "on")) {
+-			spec2_print_if_secure("force enabled on command line.");
+-			return SPECTRE_V2_CMD_FORCE;
+-		} else if (match_option(arg, ret, "retpoline")) {
+-			spec2_print_if_insecure("retpoline selected on command line.");
+-			return SPECTRE_V2_CMD_RETPOLINE;
+-		} else if (match_option(arg, ret, "retpoline,amd")) {
+-			if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+-				pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+-				return SPECTRE_V2_CMD_AUTO;
+-			}
+-			spec2_print_if_insecure("AMD retpoline selected on command line.");
+-			return SPECTRE_V2_CMD_RETPOLINE_AMD;
+-		} else if (match_option(arg, ret, "retpoline,generic")) {
+-			spec2_print_if_insecure("generic retpoline selected on command line.");
+-			return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+-		} else if (match_option(arg, ret, "auto")) {
++	int ret, i;
++	enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
++
++	if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
++		return SPECTRE_V2_CMD_NONE;
++	else {
++		ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
++					  sizeof(arg));
++		if (ret < 0)
++			return SPECTRE_V2_CMD_AUTO;
++
++		for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
++			if (!match_option(arg, ret, mitigation_options[i].option))
++				continue;
++			cmd = mitigation_options[i].cmd;
++			break;
++		}
++
++		if (i >= ARRAY_SIZE(mitigation_options)) {
++			pr_err("unknown option (%s). Switching to AUTO select\n",
++			       mitigation_options[i].option);
+ 			return SPECTRE_V2_CMD_AUTO;
+ 		}
+ 	}
+ 
+-	if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
++	if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
++	     cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
++	     cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
++	    !IS_ENABLED(CONFIG_RETPOLINE)) {
++		pr_err("%s selected but not compiled in. Switching to AUTO select\n",
++		       mitigation_options[i].option);
+ 		return SPECTRE_V2_CMD_AUTO;
+-disable:
+-	spec2_print_if_insecure("disabled on command line.");
+-	return SPECTRE_V2_CMD_NONE;
++	}
++
++	if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
++	    boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
++		pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
++		return SPECTRE_V2_CMD_AUTO;
++	}
++
++	if (mitigation_options[i].secure)
++		spec2_print_if_secure(mitigation_options[i].option);
++	else
++		spec2_print_if_insecure(mitigation_options[i].option);
++
++	return cmd;
+ }
+ 
+ /* Check for Skylake-like CPUs (for RSB handling) */
+@@ -191,10 +239,10 @@ static void __init spectre_v2_select_mitigation(void)
+ 		return;
+ 
+ 	case SPECTRE_V2_CMD_FORCE:
+-		/* FALLTRHU */
+ 	case SPECTRE_V2_CMD_AUTO:
+-		goto retpoline_auto;
+-
++		if (IS_ENABLED(CONFIG_RETPOLINE))
++			goto retpoline_auto;
++		break;
+ 	case SPECTRE_V2_CMD_RETPOLINE_AMD:
+ 		if (IS_ENABLED(CONFIG_RETPOLINE))
+ 			goto retpoline_amd;
+@@ -249,6 +297,12 @@ static void __init spectre_v2_select_mitigation(void)
+ 		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ 		pr_info("Filling RSB on context switch\n");
+ 	}
++
++	/* Initialize Indirect Branch Prediction Barrier if supported */
++	if (boot_cpu_has(X86_FEATURE_IBPB)) {
++		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
++		pr_info("Enabling Indirect Branch Prediction Barrier\n");
++	}
+ }
+ 
+ #undef pr_fmt
+@@ -269,7 +323,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev,
+ {
+ 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+ 		return sprintf(buf, "Not affected\n");
+-	return sprintf(buf, "Vulnerable\n");
++	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ }
+ 
+ ssize_t cpu_show_spectre_v2(struct device *dev,
+@@ -278,6 +332,14 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
+ 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ 		return sprintf(buf, "Not affected\n");
+ 
+-	return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
++	return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
++		       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
++		       spectre_v2_module_string());
+ }
+ #endif
++
++void __ibp_barrier(void)
++{
++	__wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
++}
++EXPORT_SYMBOL_GPL(__ibp_barrier);
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 372ba3fb400f..92b66e21bae5 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -47,6 +47,8 @@
+ #include <asm/pat.h>
+ #include <asm/microcode.h>
+ #include <asm/microcode_intel.h>
++#include <asm/intel-family.h>
++#include <asm/cpu_device_id.h>
+ 
+ #ifdef CONFIG_X86_LOCAL_APIC
+ #include <asm/uv/uv.h>
+@@ -724,6 +726,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
+ 	}
+ }
+ 
++static void init_speculation_control(struct cpuinfo_x86 *c)
++{
++	/*
++	 * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
++	 * and they also have a different bit for STIBP support. Also,
++	 * a hypervisor might have set the individual AMD bits even on
++	 * Intel CPUs, for finer-grained selection of what's available.
++	 *
++	 * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
++	 * features, which are visible in /proc/cpuinfo and used by the
++	 * kernel. So set those accordingly from the Intel bits.
++	 */
++	if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
++		set_cpu_cap(c, X86_FEATURE_IBRS);
++		set_cpu_cap(c, X86_FEATURE_IBPB);
++	}
++	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
++		set_cpu_cap(c, X86_FEATURE_STIBP);
++}
++
+ void get_cpu_cap(struct cpuinfo_x86 *c)
+ {
+ 	u32 eax, ebx, ecx, edx;
+@@ -745,6 +767,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ 		cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
+ 		c->x86_capability[CPUID_7_0_EBX] = ebx;
+ 		c->x86_capability[CPUID_7_ECX] = ecx;
++		c->x86_capability[CPUID_7_EDX] = edx;
+ 	}
+ 
+ 	/* Extended state features: level 0x0000000d */
+@@ -817,6 +840,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ 		c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
+ 
+ 	init_scattered_cpuid_features(c);
++	init_speculation_control(c);
+ 
+ 	/*
+ 	 * Clear/Set all flags overridden by options, after probe.
+@@ -852,6 +876,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+ #endif
+ }
+ 
++static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
++	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_CEDARVIEW,	X86_FEATURE_ANY },
++	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_CLOVERVIEW,	X86_FEATURE_ANY },
++	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_LINCROFT,	X86_FEATURE_ANY },
++	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_PENWELL,	X86_FEATURE_ANY },
++	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_PINEVIEW,	X86_FEATURE_ANY },
++	{ X86_VENDOR_CENTAUR,	5 },
++	{ X86_VENDOR_INTEL,	5 },
++	{ X86_VENDOR_NSC,	5 },
++	{ X86_VENDOR_ANY,	4 },
++	{}
++};
++
++static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
++	{ X86_VENDOR_AMD },
++	{}
++};
++
++static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
++{
++	u64 ia32_cap = 0;
++
++	if (x86_match_cpu(cpu_no_meltdown))
++		return false;
++
++	if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
++		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
++
++	/* Rogue Data Cache Load? No! */
++	if (ia32_cap & ARCH_CAP_RDCL_NO)
++		return false;
++
++	return true;
++}
++
+ /*
+  * Do minimum CPU detection early.
+  * Fields really needed: vendor, cpuid_level, family, model, mask,
+@@ -899,11 +958,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
+ 
+ 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+ 
+-	if (c->x86_vendor != X86_VENDOR_AMD)
+-		setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+-
+-	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+-	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
++	if (!x86_match_cpu(cpu_no_speculation)) {
++		if (cpu_vulnerable_to_meltdown(c))
++			setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
++		setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
++		setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
++	}
+ 
+ 	fpu__init_system(c);
+ 
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
+index b720dacac051..4cf4f8cbc69d 100644
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -102,6 +102,59 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
+ 		ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
+ }
+ 
++/*
++ * Early microcode releases for the Spectre v2 mitigation were broken.
++ * Information taken from;
++ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
++ * - https://kb.vmware.com/s/article/52345
++ * - Microcode revisions observed in the wild
++ * - Release note from 20180108 microcode release
++ */
++struct sku_microcode {
++	u8 model;
++	u8 stepping;
++	u32 microcode;
++};
++static const struct sku_microcode spectre_bad_microcodes[] = {
++	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0B,	0x84 },
++	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0A,	0x84 },
++	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x09,	0x84 },
++	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x0A,	0x84 },
++	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x09,	0x84 },
++	{ INTEL_FAM6_SKYLAKE_X,		0x03,	0x0100013e },
++	{ INTEL_FAM6_SKYLAKE_X,		0x04,	0x0200003c },
++	{ INTEL_FAM6_SKYLAKE_MOBILE,	0x03,	0xc2 },
++	{ INTEL_FAM6_SKYLAKE_DESKTOP,	0x03,	0xc2 },
++	{ INTEL_FAM6_BROADWELL_CORE,	0x04,	0x28 },
++	{ INTEL_FAM6_BROADWELL_GT3E,	0x01,	0x1b },
++	{ INTEL_FAM6_BROADWELL_XEON_D,	0x02,	0x14 },
++	{ INTEL_FAM6_BROADWELL_XEON_D,	0x03,	0x07000011 },
++	{ INTEL_FAM6_BROADWELL_X,	0x01,	0x0b000025 },
++	{ INTEL_FAM6_HASWELL_ULT,	0x01,	0x21 },
++	{ INTEL_FAM6_HASWELL_GT3E,	0x01,	0x18 },
++	{ INTEL_FAM6_HASWELL_CORE,	0x03,	0x23 },
++	{ INTEL_FAM6_HASWELL_X,		0x02,	0x3b },
++	{ INTEL_FAM6_HASWELL_X,		0x04,	0x10 },
++	{ INTEL_FAM6_IVYBRIDGE_X,	0x04,	0x42a },
++	/* Updated in the 20180108 release; blacklist until we know otherwise */
++	{ INTEL_FAM6_ATOM_GEMINI_LAKE,	0x01,	0x22 },
++	/* Observed in the wild */
++	{ INTEL_FAM6_SANDYBRIDGE_X,	0x06,	0x61b },
++	{ INTEL_FAM6_SANDYBRIDGE_X,	0x07,	0x712 },
++};
++
++static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
++		if (c->x86_model == spectre_bad_microcodes[i].model &&
++		    c->x86_mask == spectre_bad_microcodes[i].stepping)
++			return (c->microcode <= spectre_bad_microcodes[i].microcode);
++	}
++	return false;
++}
++
+ static void early_init_intel(struct cpuinfo_x86 *c)
+ {
+ 	u64 misc_enable;
+@@ -122,6 +175,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
+ 	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
+ 		c->microcode = intel_get_microcode_revision();
+ 
++	/* Now if any of them are set, check the blacklist and clear the lot */
++	if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
++	     cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
++	     cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
++	     cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
++		pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
++		setup_clear_cpu_cap(X86_FEATURE_IBRS);
++		setup_clear_cpu_cap(X86_FEATURE_IBPB);
++		setup_clear_cpu_cap(X86_FEATURE_STIBP);
++		setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
++		setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
++	}
++
+ 	/*
+ 	 * Atom erratum AAE44/AAF40/AAG38/AAH41:
+ 	 *
+diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
+index d0e69769abfd..df11f5d604be 100644
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -21,8 +21,6 @@ struct cpuid_bit {
+ static const struct cpuid_bit cpuid_bits[] = {
+ 	{ X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
+ 	{ X86_FEATURE_EPB,		CPUID_ECX,  3, 0x00000006, 0 },
+-	{ X86_FEATURE_AVX512_4VNNIW,    CPUID_EDX,  2, 0x00000007, 0 },
+-	{ X86_FEATURE_AVX512_4FMAPS,    CPUID_EDX,  3, 0x00000007, 0 },
+ 	{ X86_FEATURE_CAT_L3,		CPUID_EBX,  1, 0x00000010, 0 },
+ 	{ X86_FEATURE_CAT_L2,		CPUID_EBX,  2, 0x00000010, 0 },
+ 	{ X86_FEATURE_CDP_L3,		CPUID_ECX,  2, 0x00000010, 1 },
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index c75466232016..9eb448c7859d 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -557,7 +557,7 @@ static void __set_personality_x32(void)
+ 	 * Pretend to come from a x32 execve.
+ 	 */
+ 	task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
+-	current->thread.status &= ~TS_COMPAT;
++	current_thread_info()->status &= ~TS_COMPAT;
+ #endif
+ }
+ 
+@@ -571,7 +571,7 @@ static void __set_personality_ia32(void)
+ 	current->personality |= force_personality32;
+ 	/* Prepare the first "return" to user space */
+ 	task_pt_regs(current)->orig_ax = __NR_ia32_execve;
+-	current->thread.status |= TS_COMPAT;
++	current_thread_info()->status |= TS_COMPAT;
+ #endif
+ }
+ 
+diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
+index f37d18124648..ed5c4cdf0a34 100644
+--- a/arch/x86/kernel/ptrace.c
++++ b/arch/x86/kernel/ptrace.c
+@@ -935,7 +935,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
+ 		 */
+ 		regs->orig_ax = value;
+ 		if (syscall_get_nr(child, regs) >= 0)
+-			child->thread.status |= TS_I386_REGS_POKED;
++			child->thread_info.status |= TS_I386_REGS_POKED;
+ 		break;
+ 
+ 	case offsetof(struct user32, regs.eflags):
+diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
+index b9e00e8f1c9b..4cdc0b27ec82 100644
+--- a/arch/x86/kernel/signal.c
++++ b/arch/x86/kernel/signal.c
+@@ -787,7 +787,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
+ 	 * than the tracee.
+ 	 */
+ #ifdef CONFIG_IA32_EMULATION
+-	if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
++	if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
+ 		return __NR_ia32_restart_syscall;
+ #endif
+ #ifdef CONFIG_X86_X32_ABI
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index 0099e10eb045..13f5d4217e4f 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -67,9 +67,7 @@ u64 kvm_supported_xcr0(void)
+ 
+ #define F(x) bit(X86_FEATURE_##x)
+ 
+-/* These are scattered features in cpufeatures.h. */
+-#define KVM_CPUID_BIT_AVX512_4VNNIW     2
+-#define KVM_CPUID_BIT_AVX512_4FMAPS     3
++/* For scattered features from cpufeatures.h; we currently expose none */
+ #define KF(x) bit(KVM_CPUID_BIT_##x)
+ 
+ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
+@@ -367,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ 		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
+ 		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
+ 
++	/* cpuid 0x80000008.ebx */
++	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
++		F(IBPB) | F(IBRS);
++
+ 	/* cpuid 0xC0000001.edx */
+ 	const u32 kvm_cpuid_C000_0001_edx_x86_features =
+ 		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
+@@ -392,7 +394,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ 
+ 	/* cpuid 7.0.edx*/
+ 	const u32 kvm_cpuid_7_0_edx_x86_features =
+-		KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
++		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
++		F(ARCH_CAPABILITIES);
+ 
+ 	/* all calls to cpuid_count() should be made on the same cpu */
+ 	get_cpu();
+@@ -477,7 +480,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ 			if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
+ 				entry->ecx &= ~F(PKU);
+ 			entry->edx &= kvm_cpuid_7_0_edx_x86_features;
+-			entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX);
++			cpuid_mask(&entry->edx, CPUID_7_EDX);
+ 		} else {
+ 			entry->ebx = 0;
+ 			entry->ecx = 0;
+@@ -627,7 +630,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ 		if (!g_phys_as)
+ 			g_phys_as = phys_as;
+ 		entry->eax = g_phys_as | (virt_as << 8);
+-		entry->ebx = entry->edx = 0;
++		entry->edx = 0;
++		/* IBRS and IBPB aren't necessarily present in hardware cpuid */
++		if (boot_cpu_has(X86_FEATURE_IBPB))
++			entry->ebx |= F(IBPB);
++		if (boot_cpu_has(X86_FEATURE_IBRS))
++			entry->ebx |= F(IBRS);
++		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
++		cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+ 		break;
+ 	}
+ 	case 0x80000019:
+diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
+index c2cea6651279..9a327d5b6d1f 100644
+--- a/arch/x86/kvm/cpuid.h
++++ b/arch/x86/kvm/cpuid.h
+@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
+ 	[CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
+ 	[CPUID_7_ECX]         = {         7, 0, CPUID_ECX},
+ 	[CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
++	[CPUID_7_EDX]         = {         7, 0, CPUID_EDX},
+ };
+ 
+ static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index eca6a89f2326..fab073b19528 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -25,6 +25,7 @@
+ #include <asm/kvm_emulate.h>
+ #include <linux/stringify.h>
+ #include <asm/debugreg.h>
++#include <asm/nospec-branch.h>
+ 
+ #include "x86.h"
+ #include "tss.h"
+@@ -1021,8 +1022,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
+ 	void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
+ 
+ 	flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
+-	asm("push %[flags]; popf; call *%[fastop]"
+-	    : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
++	asm("push %[flags]; popf; " CALL_NOSPEC
++	    : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
+ 	return rc;
+ }
+ 
+@@ -5350,9 +5351,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
+ 	if (!(ctxt->d & ByteOp))
+ 		fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
+ 
+-	asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
++	asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
+ 	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
+-	      [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
++	      [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
+ 	    : "c"(ctxt->src2.val));
+ 
+ 	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 6a8284f72328..e0bc3ad0f6cd 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -184,6 +184,8 @@ struct vcpu_svm {
+ 		u64 gs_base;
+ 	} host;
+ 
++	u64 spec_ctrl;
++
+ 	u32 *msrpm;
+ 
+ 	ulong nmi_iret_rip;
+@@ -249,6 +251,8 @@ static const struct svm_direct_access_msrs {
+ 	{ .index = MSR_CSTAR,				.always = true  },
+ 	{ .index = MSR_SYSCALL_MASK,			.always = true  },
+ #endif
++	{ .index = MSR_IA32_SPEC_CTRL,			.always = false },
++	{ .index = MSR_IA32_PRED_CMD,			.always = false },
+ 	{ .index = MSR_IA32_LASTBRANCHFROMIP,		.always = false },
+ 	{ .index = MSR_IA32_LASTBRANCHTOIP,		.always = false },
+ 	{ .index = MSR_IA32_LASTINTFROMIP,		.always = false },
+@@ -529,6 +533,7 @@ struct svm_cpu_data {
+ 	struct kvm_ldttss_desc *tss_desc;
+ 
+ 	struct page *save_area;
++	struct vmcb *current_vmcb;
+ };
+ 
+ static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
+@@ -880,6 +885,25 @@ static bool valid_msr_intercept(u32 index)
+ 	return false;
+ }
+ 
++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
++{
++	u8 bit_write;
++	unsigned long tmp;
++	u32 offset;
++	u32 *msrpm;
++
++	msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
++				      to_svm(vcpu)->msrpm;
++
++	offset    = svm_msrpm_offset(msr);
++	bit_write = 2 * (msr & 0x0f) + 1;
++	tmp       = msrpm[offset];
++
++	BUG_ON(offset == MSR_INVALID);
++
++	return !!test_bit(bit_write,  &tmp);
++}
++
+ static void set_msr_interception(u32 *msrpm, unsigned msr,
+ 				 int read, int write)
+ {
+@@ -1585,6 +1609,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+ 	u32 dummy;
+ 	u32 eax = 1;
+ 
++	svm->spec_ctrl = 0;
++
+ 	if (!init_event) {
+ 		svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+ 					   MSR_IA32_APICBASE_ENABLE;
+@@ -1706,11 +1732,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
+ 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
+ 	kvm_vcpu_uninit(vcpu);
+ 	kmem_cache_free(kvm_vcpu_cache, svm);
++	/*
++	 * The vmcb page can be recycled, causing a false negative in
++	 * svm_vcpu_load(). So do a full IBPB now.
++	 */
++	indirect_branch_prediction_barrier();
+ }
+ 
+ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ {
+ 	struct vcpu_svm *svm = to_svm(vcpu);
++	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ 	int i;
+ 
+ 	if (unlikely(cpu != vcpu->cpu)) {
+@@ -1739,6 +1771,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ 	if (static_cpu_has(X86_FEATURE_RDTSCP))
+ 		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+ 
++	if (sd->current_vmcb != svm->vmcb) {
++		sd->current_vmcb = svm->vmcb;
++		indirect_branch_prediction_barrier();
++	}
+ 	avic_vcpu_load(vcpu, cpu);
+ }
+ 
+@@ -3579,6 +3615,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ 	case MSR_VM_CR:
+ 		msr_info->data = svm->nested.vm_cr_msr;
+ 		break;
++	case MSR_IA32_SPEC_CTRL:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
++			return 1;
++
++		msr_info->data = svm->spec_ctrl;
++		break;
+ 	case MSR_IA32_UCODE_REV:
+ 		msr_info->data = 0x01000065;
+ 		break;
+@@ -3670,6 +3713,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+ 	case MSR_IA32_TSC:
+ 		kvm_write_tsc(vcpu, msr);
+ 		break;
++	case MSR_IA32_SPEC_CTRL:
++		if (!msr->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
++			return 1;
++
++		/* The STIBP bit doesn't fault even if it's not advertised */
++		if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
++			return 1;
++
++		svm->spec_ctrl = data;
++
++		if (!data)
++			break;
++
++		/*
++		 * For non-nested:
++		 * When it's written (to non-zero) for the first time, pass
++		 * it through.
++		 *
++		 * For nested:
++		 * The handling of the MSR bitmap for L2 guests is done in
++		 * nested_svm_vmrun_msrpm.
++		 * We update the L1 MSR bit as well since it will end up
++		 * touching the MSR anyway now.
++		 */
++		set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
++		break;
++	case MSR_IA32_PRED_CMD:
++		if (!msr->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
++			return 1;
++
++		if (data & ~PRED_CMD_IBPB)
++			return 1;
++
++		if (!data)
++			break;
++
++		wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
++		if (is_guest_mode(vcpu))
++			break;
++		set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
++		break;
+ 	case MSR_STAR:
+ 		svm->vmcb->save.star = data;
+ 		break;
+@@ -4922,6 +5008,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ 
+ 	local_irq_enable();
+ 
++	/*
++	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
++	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
++	 * is no need to worry about the conditional branch over the wrmsr
++	 * being speculatively taken.
++	 */
++	if (svm->spec_ctrl)
++		wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
++
+ 	asm volatile (
+ 		"push %%" _ASM_BP "; \n\t"
+ 		"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
+@@ -5014,6 +5109,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ #endif
+ 		);
+ 
++	/*
++	 * We do not use IBRS in the kernel. If this vCPU has used the
++	 * SPEC_CTRL MSR it may have left it on; save the value and
++	 * turn it off. This is much more efficient than blindly adding
++	 * it to the atomic save/restore list. Especially as the former
++	 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
++	 *
++	 * For non-nested case:
++	 * If the L01 MSR bitmap does not intercept the MSR, then we need to
++	 * save it.
++	 *
++	 * For nested case:
++	 * If the L02 MSR bitmap does not intercept the MSR, then we need to
++	 * save it.
++	 */
++	if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
++		rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
++
++	if (svm->spec_ctrl)
++		wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
+ 	/* Eliminate branch target predictions from guest mode */
+ 	vmexit_fill_RSB();
+ 
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index a45063a9219c..0ae4b1a86168 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -34,6 +34,7 @@
+ #include <linux/tboot.h>
+ #include <linux/hrtimer.h>
+ #include <linux/frame.h>
++#include <linux/nospec.h>
+ #include "kvm_cache_regs.h"
+ #include "x86.h"
+ 
+@@ -108,6 +109,14 @@ static u64 __read_mostly host_xss;
+ static bool __read_mostly enable_pml = 1;
+ module_param_named(pml, enable_pml, bool, S_IRUGO);
+ 
++#define MSR_TYPE_R	1
++#define MSR_TYPE_W	2
++#define MSR_TYPE_RW	3
++
++#define MSR_BITMAP_MODE_X2APIC		1
++#define MSR_BITMAP_MODE_X2APIC_APICV	2
++#define MSR_BITMAP_MODE_LM		4
++
+ #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
+ 
+ /* Guest_tsc -> host_tsc conversion requires 64-bit division.  */
+@@ -182,7 +191,6 @@ module_param(ple_window_max, int, S_IRUGO);
+ extern const ulong vmx_return;
+ 
+ #define NR_AUTOLOAD_MSRS 8
+-#define VMCS02_POOL_SIZE 1
+ 
+ struct vmcs {
+ 	u32 revision_id;
+@@ -207,6 +215,7 @@ struct loaded_vmcs {
+ 	int soft_vnmi_blocked;
+ 	ktime_t entry_time;
+ 	s64 vnmi_blocked_time;
++	unsigned long *msr_bitmap;
+ 	struct list_head loaded_vmcss_on_cpu_link;
+ };
+ 
+@@ -223,7 +232,7 @@ struct shared_msr_entry {
+  * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
+  * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
+  * More than one of these structures may exist, if L1 runs multiple L2 guests.
+- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
++ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
+  * underlying hardware which will be used to run L2.
+  * This structure is packed to ensure that its layout is identical across
+  * machines (necessary for live migration).
+@@ -406,13 +415,6 @@ struct __packed vmcs12 {
+  */
+ #define VMCS12_SIZE 0x1000
+ 
+-/* Used to remember the last vmcs02 used for some recently used vmcs12s */
+-struct vmcs02_list {
+-	struct list_head list;
+-	gpa_t vmptr;
+-	struct loaded_vmcs vmcs02;
+-};
+-
+ /*
+  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
+  * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
+@@ -437,15 +439,15 @@ struct nested_vmx {
+ 	 */
+ 	bool sync_shadow_vmcs;
+ 
+-	/* vmcs02_list cache of VMCSs recently used to run L2 guests */
+-	struct list_head vmcs02_pool;
+-	int vmcs02_num;
+ 	bool change_vmcs01_virtual_x2apic_mode;
+ 	/* L2 must run next, and mustn't decide to exit to L1. */
+ 	bool nested_run_pending;
++
++	struct loaded_vmcs vmcs02;
++
+ 	/*
+-	 * Guest pages referred to in vmcs02 with host-physical pointers, so
+-	 * we must keep them pinned while L2 runs.
++	 * Guest pages referred to in the vmcs02 with host-physical
++	 * pointers, so we must keep them pinned while L2 runs.
+ 	 */
+ 	struct page *apic_access_page;
+ 	struct page *virtual_apic_page;
+@@ -454,8 +456,6 @@ struct nested_vmx {
+ 	bool pi_pending;
+ 	u16 posted_intr_nv;
+ 
+-	unsigned long *msr_bitmap;
+-
+ 	struct hrtimer preemption_timer;
+ 	bool preemption_timer_expired;
+ 
+@@ -570,6 +570,7 @@ struct vcpu_vmx {
+ 	struct kvm_vcpu       vcpu;
+ 	unsigned long         host_rsp;
+ 	u8                    fail;
++	u8		      msr_bitmap_mode;
+ 	u32                   exit_intr_info;
+ 	u32                   idt_vectoring_info;
+ 	ulong                 rflags;
+@@ -581,6 +582,10 @@ struct vcpu_vmx {
+ 	u64 		      msr_host_kernel_gs_base;
+ 	u64 		      msr_guest_kernel_gs_base;
+ #endif
++
++	u64 		      arch_capabilities;
++	u64 		      spec_ctrl;
++
+ 	u32 vm_entry_controls_shadow;
+ 	u32 vm_exit_controls_shadow;
+ 	u32 secondary_exec_control;
+@@ -887,21 +892,18 @@ static const unsigned short vmcs_field_to_offset_table[] = {
+ 
+ static inline short vmcs_field_to_offset(unsigned long field)
+ {
+-	BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
++	const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
++	unsigned short offset;
+ 
+-	if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
++	BUILD_BUG_ON(size > SHRT_MAX);
++	if (field >= size)
+ 		return -ENOENT;
+ 
+-	/*
+-	 * FIXME: Mitigation for CVE-2017-5753.  To be replaced with a
+-	 * generic mechanism.
+-	 */
+-	asm("lfence");
+-
+-	if (vmcs_field_to_offset_table[field] == 0)
++	field = array_index_nospec(field, size);
++	offset = vmcs_field_to_offset_table[field];
++	if (offset == 0)
+ 		return -ENOENT;
+-
+-	return vmcs_field_to_offset_table[field];
++	return offset;
+ }
+ 
+ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
+@@ -927,6 +929,9 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
+ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
+ static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
+ 					    u16 error_code);
++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
++static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
++							  u32 msr, int type);
+ 
+ static DEFINE_PER_CPU(struct vmcs *, vmxarea);
+ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
+@@ -946,12 +951,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+ enum {
+ 	VMX_IO_BITMAP_A,
+ 	VMX_IO_BITMAP_B,
+-	VMX_MSR_BITMAP_LEGACY,
+-	VMX_MSR_BITMAP_LONGMODE,
+-	VMX_MSR_BITMAP_LEGACY_X2APIC_APICV,
+-	VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV,
+-	VMX_MSR_BITMAP_LEGACY_X2APIC,
+-	VMX_MSR_BITMAP_LONGMODE_X2APIC,
+ 	VMX_VMREAD_BITMAP,
+ 	VMX_VMWRITE_BITMAP,
+ 	VMX_BITMAP_NR
+@@ -961,12 +960,6 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
+ 
+ #define vmx_io_bitmap_a                      (vmx_bitmap[VMX_IO_BITMAP_A])
+ #define vmx_io_bitmap_b                      (vmx_bitmap[VMX_IO_BITMAP_B])
+-#define vmx_msr_bitmap_legacy                (vmx_bitmap[VMX_MSR_BITMAP_LEGACY])
+-#define vmx_msr_bitmap_longmode              (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE])
+-#define vmx_msr_bitmap_legacy_x2apic_apicv   (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV])
+-#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV])
+-#define vmx_msr_bitmap_legacy_x2apic         (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC])
+-#define vmx_msr_bitmap_longmode_x2apic       (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC])
+ #define vmx_vmread_bitmap                    (vmx_bitmap[VMX_VMREAD_BITMAP])
+ #define vmx_vmwrite_bitmap                   (vmx_bitmap[VMX_VMWRITE_BITMAP])
+ 
+@@ -1913,6 +1906,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
+ 	vmcs_write32(EXCEPTION_BITMAP, eb);
+ }
+ 
++/*
++ * Check if MSR is intercepted for currently loaded MSR bitmap.
++ */
++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
++{
++	unsigned long *msr_bitmap;
++	int f = sizeof(unsigned long);
++
++	if (!cpu_has_vmx_msr_bitmap())
++		return true;
++
++	msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
++
++	if (msr <= 0x1fff) {
++		return !!test_bit(msr, msr_bitmap + 0x800 / f);
++	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
++		msr &= 0x1fff;
++		return !!test_bit(msr, msr_bitmap + 0xc00 / f);
++	}
++
++	return true;
++}
++
++/*
++ * Check if MSR is intercepted for L01 MSR bitmap.
++ */
++static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
++{
++	unsigned long *msr_bitmap;
++	int f = sizeof(unsigned long);
++
++	if (!cpu_has_vmx_msr_bitmap())
++		return true;
++
++	msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
++
++	if (msr <= 0x1fff) {
++		return !!test_bit(msr, msr_bitmap + 0x800 / f);
++	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
++		msr &= 0x1fff;
++		return !!test_bit(msr, msr_bitmap + 0xc00 / f);
++	}
++
++	return true;
++}
++
+ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+ 		unsigned long entry, unsigned long exit)
+ {
+@@ -2291,6 +2330,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ 	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
+ 		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
+ 		vmcs_load(vmx->loaded_vmcs->vmcs);
++		indirect_branch_prediction_barrier();
+ 	}
+ 
+ 	if (!already_loaded) {
+@@ -2567,36 +2607,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
+ 	vmx->guest_msrs[from] = tmp;
+ }
+ 
+-static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
+-{
+-	unsigned long *msr_bitmap;
+-
+-	if (is_guest_mode(vcpu))
+-		msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
+-	else if (cpu_has_secondary_exec_ctrls() &&
+-		 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
+-		  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
+-		if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
+-			if (is_long_mode(vcpu))
+-				msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv;
+-			else
+-				msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv;
+-		} else {
+-			if (is_long_mode(vcpu))
+-				msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
+-			else
+-				msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
+-		}
+-	} else {
+-		if (is_long_mode(vcpu))
+-			msr_bitmap = vmx_msr_bitmap_longmode;
+-		else
+-			msr_bitmap = vmx_msr_bitmap_legacy;
+-	}
+-
+-	vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
+-}
+-
+ /*
+  * Set up the vmcs to automatically save and restore system
+  * msrs.  Don't touch the 64-bit msrs if the guest is in legacy
+@@ -2637,7 +2647,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
+ 	vmx->save_nmsrs = save_nmsrs;
+ 
+ 	if (cpu_has_vmx_msr_bitmap())
+-		vmx_set_msr_bitmap(&vmx->vcpu);
++		vmx_update_msr_bitmap(&vmx->vcpu);
+ }
+ 
+ /*
+@@ -3273,6 +3283,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ 	case MSR_IA32_TSC:
+ 		msr_info->data = guest_read_tsc(vcpu);
+ 		break;
++	case MSR_IA32_SPEC_CTRL:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
++			return 1;
++
++		msr_info->data = to_vmx(vcpu)->spec_ctrl;
++		break;
++	case MSR_IA32_ARCH_CAPABILITIES:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
++			return 1;
++		msr_info->data = to_vmx(vcpu)->arch_capabilities;
++		break;
+ 	case MSR_IA32_SYSENTER_CS:
+ 		msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
+ 		break;
+@@ -3380,6 +3404,70 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ 	case MSR_IA32_TSC:
+ 		kvm_write_tsc(vcpu, msr_info);
+ 		break;
++	case MSR_IA32_SPEC_CTRL:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
++			return 1;
++
++		/* The STIBP bit doesn't fault even if it's not advertised */
++		if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
++			return 1;
++
++		vmx->spec_ctrl = data;
++
++		if (!data)
++			break;
++
++		/*
++		 * For non-nested:
++		 * When it's written (to non-zero) for the first time, pass
++		 * it through.
++		 *
++		 * For nested:
++		 * The handling of the MSR bitmap for L2 guests is done in
++		 * nested_vmx_merge_msr_bitmap. We should not touch the
++		 * vmcs02.msr_bitmap here since it gets completely overwritten
++		 * in the merging. We update the vmcs01 here for L1 as well
++		 * since it will end up touching the MSR anyway now.
++		 */
++		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
++					      MSR_IA32_SPEC_CTRL,
++					      MSR_TYPE_RW);
++		break;
++	case MSR_IA32_PRED_CMD:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
++			return 1;
++
++		if (data & ~PRED_CMD_IBPB)
++			return 1;
++
++		if (!data)
++			break;
++
++		wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
++
++		/*
++		 * For non-nested:
++		 * When it's written (to non-zero) for the first time, pass
++		 * it through.
++		 *
++		 * For nested:
++		 * The handling of the MSR bitmap for L2 guests is done in
++		 * nested_vmx_merge_msr_bitmap. We should not touch the
++		 * vmcs02.msr_bitmap here since it gets completely overwritten
++		 * in the merging.
++		 */
++		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
++					      MSR_TYPE_W);
++		break;
++	case MSR_IA32_ARCH_CAPABILITIES:
++		if (!msr_info->host_initiated)
++			return 1;
++		vmx->arch_capabilities = data;
++		break;
+ 	case MSR_IA32_CR_PAT:
+ 		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+ 			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+@@ -3822,11 +3910,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
+ 	return vmcs;
+ }
+ 
+-static struct vmcs *alloc_vmcs(void)
+-{
+-	return alloc_vmcs_cpu(raw_smp_processor_id());
+-}
+-
+ static void free_vmcs(struct vmcs *vmcs)
+ {
+ 	free_pages((unsigned long)vmcs, vmcs_config.order);
+@@ -3842,9 +3925,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
+ 	loaded_vmcs_clear(loaded_vmcs);
+ 	free_vmcs(loaded_vmcs->vmcs);
+ 	loaded_vmcs->vmcs = NULL;
++	if (loaded_vmcs->msr_bitmap)
++		free_page((unsigned long)loaded_vmcs->msr_bitmap);
+ 	WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
+ }
+ 
++static struct vmcs *alloc_vmcs(void)
++{
++	return alloc_vmcs_cpu(raw_smp_processor_id());
++}
++
++static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
++{
++	loaded_vmcs->vmcs = alloc_vmcs();
++	if (!loaded_vmcs->vmcs)
++		return -ENOMEM;
++
++	loaded_vmcs->shadow_vmcs = NULL;
++	loaded_vmcs_init(loaded_vmcs);
++
++	if (cpu_has_vmx_msr_bitmap()) {
++		loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
++		if (!loaded_vmcs->msr_bitmap)
++			goto out_vmcs;
++		memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
++	}
++	return 0;
++
++out_vmcs:
++	free_loaded_vmcs(loaded_vmcs);
++	return -ENOMEM;
++}
++
+ static void free_kvm_area(void)
+ {
+ 	int cpu;
+@@ -4917,10 +5029,8 @@ static void free_vpid(int vpid)
+ 	spin_unlock(&vmx_vpid_lock);
+ }
+ 
+-#define MSR_TYPE_R	1
+-#define MSR_TYPE_W	2
+-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+-						u32 msr, int type)
++static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
++							  u32 msr, int type)
+ {
+ 	int f = sizeof(unsigned long);
+ 
+@@ -4954,6 +5064,50 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+ 	}
+ }
+ 
++static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
++							 u32 msr, int type)
++{
++	int f = sizeof(unsigned long);
++
++	if (!cpu_has_vmx_msr_bitmap())
++		return;
++
++	/*
++	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
++	 * have the write-low and read-high bitmap offsets the wrong way round.
++	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
++	 */
++	if (msr <= 0x1fff) {
++		if (type & MSR_TYPE_R)
++			/* read-low */
++			__set_bit(msr, msr_bitmap + 0x000 / f);
++
++		if (type & MSR_TYPE_W)
++			/* write-low */
++			__set_bit(msr, msr_bitmap + 0x800 / f);
++
++	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
++		msr &= 0x1fff;
++		if (type & MSR_TYPE_R)
++			/* read-high */
++			__set_bit(msr, msr_bitmap + 0x400 / f);
++
++		if (type & MSR_TYPE_W)
++			/* write-high */
++			__set_bit(msr, msr_bitmap + 0xc00 / f);
++
++	}
++}
++
++static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
++			     			      u32 msr, int type, bool value)
++{
++	if (value)
++		vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
++	else
++		vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
++}
++
+ /*
+  * If a msr is allowed by L0, we should check whether it is allowed by L1.
+  * The corresponding bit will be cleared unless both of L0 and L1 allow it.
+@@ -5000,30 +5154,70 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
+ 	}
+ }
+ 
+-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
++static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
+ {
+-	if (!longmode_only)
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
+-						msr, MSR_TYPE_R | MSR_TYPE_W);
+-	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
+-						msr, MSR_TYPE_R | MSR_TYPE_W);
++	u8 mode = 0;
++
++	if (cpu_has_secondary_exec_ctrls() &&
++	    (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
++	     SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
++		mode |= MSR_BITMAP_MODE_X2APIC;
++		if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
++			mode |= MSR_BITMAP_MODE_X2APIC_APICV;
++	}
++
++	if (is_long_mode(vcpu))
++		mode |= MSR_BITMAP_MODE_LM;
++
++	return mode;
+ }
+ 
+-static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active)
++#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
++
++static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
++					 u8 mode)
+ {
+-	if (apicv_active) {
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv,
+-				msr, type);
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv,
+-				msr, type);
+-	} else {
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+-				msr, type);
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+-				msr, type);
++	int msr;
++
++	for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
++		unsigned word = msr / BITS_PER_LONG;
++		msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
++		msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
++	}
++
++	if (mode & MSR_BITMAP_MODE_X2APIC) {
++		/*
++		 * TPR reads and writes can be virtualized even if virtual interrupt
++		 * delivery is not in use.
++		 */
++		vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
++		if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
++			vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
++			vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
++			vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
++		}
+ 	}
+ }
+ 
++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
++	u8 mode = vmx_msr_bitmap_mode(vcpu);
++	u8 changed = mode ^ vmx->msr_bitmap_mode;
++
++	if (!changed)
++		return;
++
++	vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
++				  !(mode & MSR_BITMAP_MODE_LM));
++
++	if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
++		vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
++
++	vmx->msr_bitmap_mode = mode;
++}
++
+ static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
+ {
+ 	return enable_apicv;
+@@ -5269,7 +5463,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
+ 	}
+ 
+ 	if (cpu_has_vmx_msr_bitmap())
+-		vmx_set_msr_bitmap(vcpu);
++		vmx_update_msr_bitmap(vcpu);
+ }
+ 
+ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
+@@ -5456,7 +5650,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
+ 		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+ 	}
+ 	if (cpu_has_vmx_msr_bitmap())
+-		vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
++		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
+ 
+ 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
+ 
+@@ -5534,6 +5728,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
+ 		++vmx->nmsrs;
+ 	}
+ 
++	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
++		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
+ 
+ 	vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
+ 
+@@ -5564,6 +5760,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+ 	u64 cr0;
+ 
+ 	vmx->rmode.vm86_active = 0;
++	vmx->spec_ctrl = 0;
+ 
+ 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
+ 	kvm_set_cr8(vcpu, 0);
+@@ -6739,7 +6936,7 @@ void vmx_enable_tdp(void)
+ 
+ static __init int hardware_setup(void)
+ {
+-	int r = -ENOMEM, i, msr;
++	int r = -ENOMEM, i;
+ 
+ 	rdmsrl_safe(MSR_EFER, &host_efer);
+ 
+@@ -6760,9 +6957,6 @@ static __init int hardware_setup(void)
+ 
+ 	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
+ 
+-	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+-	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
+-
+ 	if (setup_vmcs_config(&vmcs_config) < 0) {
+ 		r = -EIO;
+ 		goto out;
+@@ -6825,42 +7019,8 @@ static __init int hardware_setup(void)
+ 		kvm_tsc_scaling_ratio_frac_bits = 48;
+ 	}
+ 
+-	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+-	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+-	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+-
+-	memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
+-			vmx_msr_bitmap_legacy, PAGE_SIZE);
+-	memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
+-			vmx_msr_bitmap_longmode, PAGE_SIZE);
+-	memcpy(vmx_msr_bitmap_legacy_x2apic,
+-			vmx_msr_bitmap_legacy, PAGE_SIZE);
+-	memcpy(vmx_msr_bitmap_longmode_x2apic,
+-			vmx_msr_bitmap_longmode, PAGE_SIZE);
+-
+ 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+ 
+-	for (msr = 0x800; msr <= 0x8ff; msr++) {
+-		if (msr == 0x839 /* TMCCT */)
+-			continue;
+-		vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true);
+-	}
+-
+-	/*
+-	 * TPR reads and writes can be virtualized even if virtual interrupt
+-	 * delivery is not in use.
+-	 */
+-	vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true);
+-	vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false);
+-
+-	/* EOI */
+-	vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true);
+-	/* SELF-IPI */
+-	vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true);
+-
+ 	if (enable_ept)
+ 		vmx_enable_tdp();
+ 	else
+@@ -6963,94 +7123,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
+ 	return handle_nop(vcpu);
+ }
+ 
+-/*
+- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
+- * We could reuse a single VMCS for all the L2 guests, but we also want the
+- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
+- * allows keeping them loaded on the processor, and in the future will allow
+- * optimizations where prepare_vmcs02 doesn't need to set all the fields on
+- * every entry if they never change.
+- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
+- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
+- *
+- * The following functions allocate and free a vmcs02 in this pool.
+- */
+-
+-/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
+-static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
+-{
+-	struct vmcs02_list *item;
+-	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
+-		if (item->vmptr == vmx->nested.current_vmptr) {
+-			list_move(&item->list, &vmx->nested.vmcs02_pool);
+-			return &item->vmcs02;
+-		}
+-
+-	if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
+-		/* Recycle the least recently used VMCS. */
+-		item = list_last_entry(&vmx->nested.vmcs02_pool,
+-				       struct vmcs02_list, list);
+-		item->vmptr = vmx->nested.current_vmptr;
+-		list_move(&item->list, &vmx->nested.vmcs02_pool);
+-		return &item->vmcs02;
+-	}
+-
+-	/* Create a new VMCS */
+-	item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+-	if (!item)
+-		return NULL;
+-	item->vmcs02.vmcs = alloc_vmcs();
+-	item->vmcs02.shadow_vmcs = NULL;
+-	if (!item->vmcs02.vmcs) {
+-		kfree(item);
+-		return NULL;
+-	}
+-	loaded_vmcs_init(&item->vmcs02);
+-	item->vmptr = vmx->nested.current_vmptr;
+-	list_add(&(item->list), &(vmx->nested.vmcs02_pool));
+-	vmx->nested.vmcs02_num++;
+-	return &item->vmcs02;
+-}
+-
+-/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
+-static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
+-{
+-	struct vmcs02_list *item;
+-	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
+-		if (item->vmptr == vmptr) {
+-			free_loaded_vmcs(&item->vmcs02);
+-			list_del(&item->list);
+-			kfree(item);
+-			vmx->nested.vmcs02_num--;
+-			return;
+-		}
+-}
+-
+-/*
+- * Free all VMCSs saved for this vcpu, except the one pointed by
+- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
+- * must be &vmx->vmcs01.
+- */
+-static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
+-{
+-	struct vmcs02_list *item, *n;
+-
+-	WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
+-	list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
+-		/*
+-		 * Something will leak if the above WARN triggers.  Better than
+-		 * a use-after-free.
+-		 */
+-		if (vmx->loaded_vmcs == &item->vmcs02)
+-			continue;
+-
+-		free_loaded_vmcs(&item->vmcs02);
+-		list_del(&item->list);
+-		kfree(item);
+-		vmx->nested.vmcs02_num--;
+-	}
+-}
+-
+ /*
+  * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
+  * set the success or error code of an emulated VMX instruction, as specified
+@@ -7231,13 +7303,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
+ {
+ 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+ 	struct vmcs *shadow_vmcs;
++	int r;
+ 
+-	if (cpu_has_vmx_msr_bitmap()) {
+-		vmx->nested.msr_bitmap =
+-				(unsigned long *)__get_free_page(GFP_KERNEL);
+-		if (!vmx->nested.msr_bitmap)
+-			goto out_msr_bitmap;
+-	}
++	r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
++	if (r < 0)
++		goto out_vmcs02;
+ 
+ 	vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
+ 	if (!vmx->nested.cached_vmcs12)
+@@ -7254,9 +7324,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
+ 		vmx->vmcs01.shadow_vmcs = shadow_vmcs;
+ 	}
+ 
+-	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
+-	vmx->nested.vmcs02_num = 0;
+-
+ 	hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
+ 		     HRTIMER_MODE_REL_PINNED);
+ 	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
+@@ -7268,9 +7335,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
+ 	kfree(vmx->nested.cached_vmcs12);
+ 
+ out_cached_vmcs12:
+-	free_page((unsigned long)vmx->nested.msr_bitmap);
++	free_loaded_vmcs(&vmx->nested.vmcs02);
+ 
+-out_msr_bitmap:
++out_vmcs02:
+ 	return -ENOMEM;
+ }
+ 
+@@ -7412,10 +7479,6 @@ static void free_nested(struct vcpu_vmx *vmx)
+ 	free_vpid(vmx->nested.vpid02);
+ 	vmx->nested.posted_intr_nv = -1;
+ 	vmx->nested.current_vmptr = -1ull;
+-	if (vmx->nested.msr_bitmap) {
+-		free_page((unsigned long)vmx->nested.msr_bitmap);
+-		vmx->nested.msr_bitmap = NULL;
+-	}
+ 	if (enable_shadow_vmcs) {
+ 		vmx_disable_shadow_vmcs(vmx);
+ 		vmcs_clear(vmx->vmcs01.shadow_vmcs);
+@@ -7423,7 +7486,7 @@ static void free_nested(struct vcpu_vmx *vmx)
+ 		vmx->vmcs01.shadow_vmcs = NULL;
+ 	}
+ 	kfree(vmx->nested.cached_vmcs12);
+-	/* Unpin physical memory we referred to in current vmcs02 */
++	/* Unpin physical memory we referred to in the vmcs02 */
+ 	if (vmx->nested.apic_access_page) {
+ 		kvm_release_page_dirty(vmx->nested.apic_access_page);
+ 		vmx->nested.apic_access_page = NULL;
+@@ -7439,7 +7502,7 @@ static void free_nested(struct vcpu_vmx *vmx)
+ 		vmx->nested.pi_desc = NULL;
+ 	}
+ 
+-	nested_free_all_saved_vmcss(vmx);
++	free_loaded_vmcs(&vmx->nested.vmcs02);
+ }
+ 
+ /* Emulate the VMXOFF instruction */
+@@ -7482,8 +7545,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
+ 			vmptr + offsetof(struct vmcs12, launch_state),
+ 			&zero, sizeof(zero));
+ 
+-	nested_free_vmcs02(vmx, vmptr);
+-
+ 	nested_vmx_succeed(vcpu);
+ 	return kvm_skip_emulated_instruction(vcpu);
+ }
+@@ -8395,10 +8456,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
+ 
+ 	/*
+ 	 * The host physical addresses of some pages of guest memory
+-	 * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
+-	 * may write to these pages via their host physical address while
+-	 * L2 is running, bypassing any address-translation-based dirty
+-	 * tracking (e.g. EPT write protection).
++	 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
++	 * Page). The CPU may write to these pages via their host
++	 * physical address while L2 is running, bypassing any
++	 * address-translation-based dirty tracking (e.g. EPT write
++	 * protection).
+ 	 *
+ 	 * Mark them dirty on every exit from L2 to prevent them from
+ 	 * getting out of sync with dirty tracking.
+@@ -8932,7 +8994,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+ 	}
+ 	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
+ 
+-	vmx_set_msr_bitmap(vcpu);
++	vmx_update_msr_bitmap(vcpu);
+ }
+ 
+ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
+@@ -9118,14 +9180,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
+ #endif
+ 			"pushf\n\t"
+ 			__ASM_SIZE(push) " $%c[cs]\n\t"
+-			"call *%[entry]\n\t"
++			CALL_NOSPEC
+ 			:
+ #ifdef CONFIG_X86_64
+ 			[sp]"=&r"(tmp),
+ #endif
+ 			ASM_CALL_CONSTRAINT
+ 			:
+-			[entry]"r"(entry),
++			THUNK_TARGET(entry),
+ 			[ss]"i"(__KERNEL_DS),
+ 			[cs]"i"(__KERNEL_CS)
+ 			);
+@@ -9362,6 +9424,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ 
+ 	vmx_arm_hv_timer(vcpu);
+ 
++	/*
++	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
++	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
++	 * is no need to worry about the conditional branch over the wrmsr
++	 * being speculatively taken.
++	 */
++	if (vmx->spec_ctrl)
++		wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
++
+ 	vmx->__launched = vmx->loaded_vmcs->launched;
+ 	asm(
+ 		/* Store host registers */
+@@ -9480,6 +9551,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ #endif
+ 	      );
+ 
++	/*
++	 * We do not use IBRS in the kernel. If this vCPU has used the
++	 * SPEC_CTRL MSR it may have left it on; save the value and
++	 * turn it off. This is much more efficient than blindly adding
++	 * it to the atomic save/restore list. Especially as the former
++	 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
++	 *
++	 * For non-nested case:
++	 * If the L01 MSR bitmap does not intercept the MSR, then we need to
++	 * save it.
++	 *
++	 * For nested case:
++	 * If the L02 MSR bitmap does not intercept the MSR, then we need to
++	 * save it.
++	 */
++	if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
++		rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
++
++	if (vmx->spec_ctrl)
++		wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
+ 	/* Eliminate branch target predictions from guest mode */
+ 	vmexit_fill_RSB();
+ 
+@@ -9594,6 +9686,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
+ {
+ 	int err;
+ 	struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
++	unsigned long *msr_bitmap;
+ 	int cpu;
+ 
+ 	if (!vmx)
+@@ -9626,13 +9719,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
+ 	if (!vmx->guest_msrs)
+ 		goto free_pml;
+ 
+-	vmx->loaded_vmcs = &vmx->vmcs01;
+-	vmx->loaded_vmcs->vmcs = alloc_vmcs();
+-	vmx->loaded_vmcs->shadow_vmcs = NULL;
+-	if (!vmx->loaded_vmcs->vmcs)
++	err = alloc_loaded_vmcs(&vmx->vmcs01);
++	if (err < 0)
+ 		goto free_msrs;
+-	loaded_vmcs_init(vmx->loaded_vmcs);
+ 
++	msr_bitmap = vmx->vmcs01.msr_bitmap;
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
++	vmx->msr_bitmap_mode = 0;
++
++	vmx->loaded_vmcs = &vmx->vmcs01;
+ 	cpu = get_cpu();
+ 	vmx_vcpu_load(&vmx->vcpu, cpu);
+ 	vmx->vcpu.cpu = cpu;
+@@ -10101,10 +10201,25 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
+ 	int msr;
+ 	struct page *page;
+ 	unsigned long *msr_bitmap_l1;
+-	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
++	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
++	/*
++	 * pred_cmd & spec_ctrl are trying to verify two things:
++	 *
++	 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
++	 *    ensures that we do not accidentally generate an L02 MSR bitmap
++	 *    from the L12 MSR bitmap that is too permissive.
++	 * 2. That L1 or L2s have actually used the MSR. This avoids
++	 *    unnecessarily merging of the bitmap if the MSR is unused. This
++	 *    works properly because we only update the L01 MSR bitmap lazily.
++	 *    So even if L0 should pass L1 these MSRs, the L01 bitmap is only
++	 *    updated to reflect this when L1 (or its L2s) actually write to
++	 *    the MSR.
++	 */
++	bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
++	bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
+ 
+-	/* This shortcut is ok because we support only x2APIC MSRs so far. */
+-	if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
++	if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
++	    !pred_cmd && !spec_ctrl)
+ 		return false;
+ 
+ 	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
+@@ -10137,6 +10252,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
+ 				MSR_TYPE_W);
+ 		}
+ 	}
++
++	if (spec_ctrl)
++		nested_vmx_disable_intercept_for_msr(
++					msr_bitmap_l1, msr_bitmap_l0,
++					MSR_IA32_SPEC_CTRL,
++					MSR_TYPE_R | MSR_TYPE_W);
++
++	if (pred_cmd)
++		nested_vmx_disable_intercept_for_msr(
++					msr_bitmap_l1, msr_bitmap_l0,
++					MSR_IA32_PRED_CMD,
++					MSR_TYPE_W);
++
+ 	kunmap(page);
+ 	kvm_release_page_clean(page);
+ 
+@@ -10678,6 +10806,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ 	if (kvm_has_tsc_control)
+ 		decache_tsc_multiplier(vmx);
+ 
++	if (cpu_has_vmx_msr_bitmap())
++		vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
++
+ 	if (enable_vpid) {
+ 		/*
+ 		 * There is no direct mapping between vpid02 and vpid12, the
+@@ -10894,20 +11025,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
+ {
+ 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+ 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+-	struct loaded_vmcs *vmcs02;
+ 	u32 msr_entry_idx;
+ 	u32 exit_qual;
+ 
+-	vmcs02 = nested_get_current_vmcs02(vmx);
+-	if (!vmcs02)
+-		return -ENOMEM;
+-
+ 	enter_guest_mode(vcpu);
+ 
+ 	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
+ 		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ 
+-	vmx_switch_vmcs(vcpu, vmcs02);
++	vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
+ 	vmx_segment_cache_clear(vmx);
+ 
+ 	if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
+@@ -11476,7 +11602,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
+ 	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+ 
+ 	if (cpu_has_vmx_msr_bitmap())
+-		vmx_set_msr_bitmap(vcpu);
++		vmx_update_msr_bitmap(vcpu);
+ 
+ 	if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
+ 				vmcs12->vm_exit_msr_load_count))
+@@ -11522,10 +11648,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
+ 	vm_exit_controls_reset_shadow(vmx);
+ 	vmx_segment_cache_clear(vmx);
+ 
+-	/* if no vmcs02 cache requested, remove the one we used */
+-	if (VMCS02_POOL_SIZE == 0)
+-		nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
+-
+ 	/* Update any VMCS fields that might have changed while L2 ran */
+ 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 8c28023a43b1..f97358423f9c 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1006,6 +1006,7 @@ static u32 msrs_to_save[] = {
+ #endif
+ 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+ 	MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
++	MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
+ };
+ 
+ static unsigned num_msrs_to_save;
+diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
+index d435c89875c1..d0a3170e6804 100644
+--- a/arch/x86/lib/Makefile
++++ b/arch/x86/lib/Makefile
+@@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
+ lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+ lib-$(CONFIG_RETPOLINE) += retpoline.o
++OBJECT_FILES_NON_STANDARD_retpoline.o :=y
+ 
+ obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
+ 
+diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
+index c97d935a29e8..49b167f73215 100644
+--- a/arch/x86/lib/getuser.S
++++ b/arch/x86/lib/getuser.S
+@@ -40,6 +40,8 @@ ENTRY(__get_user_1)
+ 	mov PER_CPU_VAR(current_task), %_ASM_DX
+ 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ 	jae bad_get_user
++	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
++	and %_ASM_DX, %_ASM_AX
+ 	ASM_STAC
+ 1:	movzbl (%_ASM_AX),%edx
+ 	xor %eax,%eax
+@@ -54,6 +56,8 @@ ENTRY(__get_user_2)
+ 	mov PER_CPU_VAR(current_task), %_ASM_DX
+ 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ 	jae bad_get_user
++	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
++	and %_ASM_DX, %_ASM_AX
+ 	ASM_STAC
+ 2:	movzwl -1(%_ASM_AX),%edx
+ 	xor %eax,%eax
+@@ -68,6 +72,8 @@ ENTRY(__get_user_4)
+ 	mov PER_CPU_VAR(current_task), %_ASM_DX
+ 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ 	jae bad_get_user
++	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
++	and %_ASM_DX, %_ASM_AX
+ 	ASM_STAC
+ 3:	movl -3(%_ASM_AX),%edx
+ 	xor %eax,%eax
+@@ -83,6 +89,8 @@ ENTRY(__get_user_8)
+ 	mov PER_CPU_VAR(current_task), %_ASM_DX
+ 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ 	jae bad_get_user
++	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
++	and %_ASM_DX, %_ASM_AX
+ 	ASM_STAC
+ 4:	movq -7(%_ASM_AX),%rdx
+ 	xor %eax,%eax
+@@ -94,6 +102,8 @@ ENTRY(__get_user_8)
+ 	mov PER_CPU_VAR(current_task), %_ASM_DX
+ 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ 	jae bad_get_user_8
++	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
++	and %_ASM_DX, %_ASM_AX
+ 	ASM_STAC
+ 4:	movl -7(%_ASM_AX),%edx
+ 5:	movl -3(%_ASM_AX),%ecx
+diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
+index dfb2ba91b670..480edc3a5e03 100644
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -7,6 +7,7 @@
+ #include <asm/alternative-asm.h>
+ #include <asm/export.h>
+ #include <asm/nospec-branch.h>
++#include <asm/bitsperlong.h>
+ 
+ .macro THUNK reg
+ 	.section .text.__x86.indirect_thunk
+@@ -36,7 +37,6 @@ GENERATE_THUNK(_ASM_DX)
+ GENERATE_THUNK(_ASM_SI)
+ GENERATE_THUNK(_ASM_DI)
+ GENERATE_THUNK(_ASM_BP)
+-GENERATE_THUNK(_ASM_SP)
+ #ifdef CONFIG_64BIT
+ GENERATE_THUNK(r8)
+ GENERATE_THUNK(r9)
+@@ -47,3 +47,58 @@ GENERATE_THUNK(r13)
+ GENERATE_THUNK(r14)
+ GENERATE_THUNK(r15)
+ #endif
++
++/*
++ * Fill the CPU return stack buffer.
++ *
++ * Each entry in the RSB, if used for a speculative 'ret', contains an
++ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
++ *
++ * This is required in various cases for retpoline and IBRS-based
++ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
++ * eliminate potentially bogus entries from the RSB, and sometimes
++ * purely to ensure that it doesn't get empty, which on some CPUs would
++ * allow predictions from other (unwanted!) sources to be used.
++ *
++ * Google experimented with loop-unrolling and this turned out to be
++ * the optimal version - two calls, each with their own speculation
++ * trap should their return address end up getting used, in a loop.
++ */
++.macro STUFF_RSB nr:req sp:req
++	mov	$(\nr / 2), %_ASM_BX
++	.align 16
++771:
++	call	772f
++773:						/* speculation trap */
++	pause
++	lfence
++	jmp	773b
++	.align 16
++772:
++	call	774f
++775:						/* speculation trap */
++	pause
++	lfence
++	jmp	775b
++	.align 16
++774:
++	dec	%_ASM_BX
++	jnz	771b
++	add	$((BITS_PER_LONG/8) * \nr), \sp
++.endm
++
++#define RSB_FILL_LOOPS		16	/* To avoid underflow */
++
++ENTRY(__fill_rsb)
++	STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
++	ret
++END(__fill_rsb)
++EXPORT_SYMBOL_GPL(__fill_rsb)
++
++#define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
++
++ENTRY(__clear_rsb)
++	STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
++	ret
++END(__clear_rsb)
++EXPORT_SYMBOL_GPL(__clear_rsb)
+diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
+index 1b377f734e64..7add8ba06887 100644
+--- a/arch/x86/lib/usercopy_32.c
++++ b/arch/x86/lib/usercopy_32.c
+@@ -331,12 +331,12 @@ do {									\
+ 
+ unsigned long __copy_user_ll(void *to, const void *from, unsigned long n)
+ {
+-	stac();
++	__uaccess_begin_nospec();
+ 	if (movsl_is_ok(to, from, n))
+ 		__copy_user(to, from, n);
+ 	else
+ 		n = __copy_user_intel(to, from, n);
+-	clac();
++	__uaccess_end();
+ 	return n;
+ }
+ EXPORT_SYMBOL(__copy_user_ll);
+@@ -344,7 +344,7 @@ EXPORT_SYMBOL(__copy_user_ll);
+ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
+ 					unsigned long n)
+ {
+-	stac();
++	__uaccess_begin_nospec();
+ #ifdef CONFIG_X86_INTEL_USERCOPY
+ 	if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
+ 		n = __copy_user_intel_nocache(to, from, n);
+@@ -353,7 +353,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
+ #else
+ 	__copy_user(to, from, n);
+ #endif
+-	clac();
++	__uaccess_end();
+ 	return n;
+ }
+ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 5bfe61a5e8e3..012d02624848 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -6,13 +6,14 @@
+ #include <linux/interrupt.h>
+ #include <linux/export.h>
+ #include <linux/cpu.h>
++#include <linux/debugfs.h>
+ 
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
++#include <asm/nospec-branch.h>
+ #include <asm/cache.h>
+ #include <asm/apic.h>
+ #include <asm/uv/uv.h>
+-#include <linux/debugfs.h>
+ 
+ /*
+  *	TLB flushing, formerly SMP-only
+@@ -247,6 +248,27 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ 	} else {
+ 		u16 new_asid;
+ 		bool need_flush;
++		u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
++
++		/*
++		 * Avoid user/user BTB poisoning by flushing the branch
++		 * predictor when switching between processes. This stops
++		 * one process from doing Spectre-v2 attacks on another.
++		 *
++		 * As an optimization, flush indirect branches only when
++		 * switching into processes that disable dumping. This
++		 * protects high value processes like gpg, without having
++		 * too high performance overhead. IBPB is *expensive*!
++		 *
++		 * This will not flush branches when switching into kernel
++		 * threads. It will also not flush if we switch to idle
++		 * thread and back to the same process. It will flush if we
++		 * switch to a different non-dumpable process.
++		 */
++		if (tsk && tsk->mm &&
++		    tsk->mm->context.ctx_id != last_ctx_id &&
++		    get_dumpable(tsk->mm) != SUID_DUMP_USER)
++			indirect_branch_prediction_barrier();
+ 
+ 		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ 			/*
+@@ -292,6 +314,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ 			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
+ 		}
+ 
++		/*
++		 * Record last user mm's context id, so we can avoid
++		 * flushing branch buffer with IBPB if we switch back
++		 * to the same user.
++		 */
++		if (next != &init_mm)
++			this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
++
+ 		this_cpu_write(cpu_tlbstate.loaded_mm, next);
+ 		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
+ 	}
+@@ -369,6 +399,7 @@ void initialize_tlbstate_and_flush(void)
+ 	write_cr3(build_cr3(mm->pgd, 0));
+ 
+ 	/* Reinitialize tlbstate. */
++	this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
+ 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
+ 	this_cpu_write(cpu_tlbstate.next_asid, 1);
+ 	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
+diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c
+index a9020f82eea7..58403052514f 100644
+--- a/drivers/auxdisplay/img-ascii-lcd.c
++++ b/drivers/auxdisplay/img-ascii-lcd.c
+@@ -443,3 +443,7 @@ static struct platform_driver img_ascii_lcd_driver = {
+ 	.remove	= img_ascii_lcd_remove,
+ };
+ module_platform_driver(img_ascii_lcd_driver);
++
++MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display");
++MODULE_AUTHOR("Paul Burton <paul.burton@mips.com>");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c
+index d9ab7c75b14f..e0c73ceba2ed 100644
+--- a/drivers/fpga/fpga-region.c
++++ b/drivers/fpga/fpga-region.c
+@@ -147,6 +147,7 @@ static struct fpga_manager *fpga_region_get_manager(struct fpga_region *region)
+ 			mgr_node = of_parse_phandle(np, "fpga-mgr", 0);
+ 			if (mgr_node) {
+ 				mgr = of_fpga_mgr_get(mgr_node);
++				of_node_put(mgr_node);
+ 				of_node_put(np);
+ 				return mgr;
+ 			}
+@@ -192,10 +193,13 @@ static int fpga_region_get_bridges(struct fpga_region *region,
+ 		parent_br = region_np->parent;
+ 
+ 	/* If overlay has a list of bridges, use it. */
+-	if (of_parse_phandle(overlay, "fpga-bridges", 0))
++	br = of_parse_phandle(overlay, "fpga-bridges", 0);
++	if (br) {
++		of_node_put(br);
+ 		np = overlay;
+-	else
++	} else {
+ 		np = region_np;
++	}
+ 
+ 	for (i = 0; ; i++) {
+ 		br = of_parse_phandle(np, "fpga-bridges", i);
+@@ -203,12 +207,15 @@ static int fpga_region_get_bridges(struct fpga_region *region,
+ 			break;
+ 
+ 		/* If parent bridge is in list, skip it. */
+-		if (br == parent_br)
++		if (br == parent_br) {
++			of_node_put(br);
+ 			continue;
++		}
+ 
+ 		/* If node is a bridge, get it and add to list */
+ 		ret = fpga_bridge_get_to_list(br, region->info,
+ 					      &region->bridge_list);
++		of_node_put(br);
+ 
+ 		/* If any of the bridges are in use, give up */
+ 		if (ret == -EBUSY) {
+diff --git a/drivers/iio/accel/kxsd9-i2c.c b/drivers/iio/accel/kxsd9-i2c.c
+index 98fbb628d5bd..38411e1c155b 100644
+--- a/drivers/iio/accel/kxsd9-i2c.c
++++ b/drivers/iio/accel/kxsd9-i2c.c
+@@ -63,3 +63,6 @@ static struct i2c_driver kxsd9_i2c_driver = {
+ 	.id_table	= kxsd9_i2c_id,
+ };
+ module_i2c_driver(kxsd9_i2c_driver);
++
++MODULE_LICENSE("GPL v2");
++MODULE_DESCRIPTION("KXSD9 accelerometer I2C interface");
+diff --git a/drivers/iio/adc/qcom-vadc-common.c b/drivers/iio/adc/qcom-vadc-common.c
+index 47d24ae5462f..fe3d7826783c 100644
+--- a/drivers/iio/adc/qcom-vadc-common.c
++++ b/drivers/iio/adc/qcom-vadc-common.c
+@@ -5,6 +5,7 @@
+ #include <linux/math64.h>
+ #include <linux/log2.h>
+ #include <linux/err.h>
++#include <linux/module.h>
+ 
+ #include "qcom-vadc-common.h"
+ 
+@@ -229,3 +230,6 @@ int qcom_vadc_decimation_from_dt(u32 value)
+ 	return __ffs64(value / VADC_DECIMATION_MIN);
+ }
+ EXPORT_SYMBOL(qcom_vadc_decimation_from_dt);
++
++MODULE_LICENSE("GPL v2");
++MODULE_DESCRIPTION("Qualcomm ADC common functionality");
+diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
+index 866aa3ce1ac9..6cf0006d4c8d 100644
+--- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
++++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
+@@ -436,3 +436,7 @@ int pxa2xx_pinctrl_exit(struct platform_device *pdev)
+ 	return 0;
+ }
+ EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit);
++
++MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
++MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver");
++MODULE_LICENSE("GPL v2");
+diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
+index 3a14cccbd7ff..7948acf14601 100644
+--- a/drivers/tty/serial/serial_core.c
++++ b/drivers/tty/serial/serial_core.c
+@@ -987,6 +987,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port,
+ 		}
+ 	} else {
+ 		retval = uart_startup(tty, state, 1);
++		if (retval == 0)
++			tty_port_set_initialized(port, true);
+ 		if (retval > 0)
+ 			retval = 0;
+ 	}
+diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
+index 1c65817673db..41615f38bcff 100644
+--- a/include/linux/fdtable.h
++++ b/include/linux/fdtable.h
+@@ -10,6 +10,7 @@
+ #include <linux/compiler.h>
+ #include <linux/spinlock.h>
+ #include <linux/rcupdate.h>
++#include <linux/nospec.h>
+ #include <linux/types.h>
+ #include <linux/init.h>
+ #include <linux/fs.h>
+@@ -82,8 +83,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i
+ {
+ 	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+ 
+-	if (fd < fdt->max_fds)
++	if (fd < fdt->max_fds) {
++		fd = array_index_nospec(fd, fdt->max_fds);
+ 		return rcu_dereference_raw(fdt->fd[fd]);
++	}
+ 	return NULL;
+ }
+ 
+diff --git a/include/linux/init.h b/include/linux/init.h
+index f38b993edacb..943139a563e3 100644
+--- a/include/linux/init.h
++++ b/include/linux/init.h
+@@ -5,6 +5,13 @@
+ #include <linux/compiler.h>
+ #include <linux/types.h>
+ 
++/* Built-in __init functions needn't be compiled with retpoline */
++#if defined(RETPOLINE) && !defined(MODULE)
++#define __noretpoline __attribute__((indirect_branch("keep")))
++#else
++#define __noretpoline
++#endif
++
+ /* These macros are used to mark some functions or 
+  * initialized data (doesn't apply to uninitialized data)
+  * as `initialization' functions. The kernel can take this
+@@ -40,7 +47,7 @@
+ 
+ /* These are for everybody (although not all archs will actually
+    discard it in modules) */
+-#define __init		__section(.init.text) __cold __inittrace __latent_entropy
++#define __init		__section(.init.text) __cold __inittrace __latent_entropy __noretpoline
+ #define __initdata	__section(.init.data)
+ #define __initconst	__section(.init.rodata)
+ #define __exitdata	__section(.exit.data)
+diff --git a/include/linux/module.h b/include/linux/module.h
+index fe5aa3736707..b1cc541f2ddf 100644
+--- a/include/linux/module.h
++++ b/include/linux/module.h
+@@ -794,6 +794,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr,
+ static inline void module_bug_cleanup(struct module *mod) {}
+ #endif	/* CONFIG_GENERIC_BUG */
+ 
++#ifdef RETPOLINE
++extern bool retpoline_module_ok(bool has_retpoline);
++#else
++static inline bool retpoline_module_ok(bool has_retpoline)
++{
++	return true;
++}
++#endif
++
+ #ifdef CONFIG_MODULE_SIG
+ static inline bool module_sig_ok(struct module *module)
+ {
+diff --git a/include/linux/nospec.h b/include/linux/nospec.h
+new file mode 100644
+index 000000000000..b99bced39ac2
+--- /dev/null
++++ b/include/linux/nospec.h
+@@ -0,0 +1,72 @@
++// SPDX-License-Identifier: GPL-2.0
++// Copyright(c) 2018 Linus Torvalds. All rights reserved.
++// Copyright(c) 2018 Alexei Starovoitov. All rights reserved.
++// Copyright(c) 2018 Intel Corporation. All rights reserved.
++
++#ifndef _LINUX_NOSPEC_H
++#define _LINUX_NOSPEC_H
++
++/**
++ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
++ * @index: array element index
++ * @size: number of elements in array
++ *
++ * When @index is out of bounds (@index >= @size), the sign bit will be
++ * set.  Extend the sign bit to all bits and invert, giving a result of
++ * zero for an out of bounds index, or ~0 if within bounds [0, @size).
++ */
++#ifndef array_index_mask_nospec
++static inline unsigned long array_index_mask_nospec(unsigned long index,
++						    unsigned long size)
++{
++	/*
++	 * Warn developers about inappropriate array_index_nospec() usage.
++	 *
++	 * Even if the CPU speculates past the WARN_ONCE branch, the
++	 * sign bit of @index is taken into account when generating the
++	 * mask.
++	 *
++	 * This warning is compiled out when the compiler can infer that
++	 * @index and @size are less than LONG_MAX.
++	 */
++	if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,
++			"array_index_nospec() limited to range of [0, LONG_MAX]\n"))
++		return 0;
++
++	/*
++	 * Always calculate and emit the mask even if the compiler
++	 * thinks the mask is not needed. The compiler does not take
++	 * into account the value of @index under speculation.
++	 */
++	OPTIMIZER_HIDE_VAR(index);
++	return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1);
++}
++#endif
++
++/*
++ * array_index_nospec - sanitize an array index after a bounds check
++ *
++ * For a code sequence like:
++ *
++ *     if (index < size) {
++ *         index = array_index_nospec(index, size);
++ *         val = array[index];
++ *     }
++ *
++ * ...if the CPU speculates past the bounds check then
++ * array_index_nospec() will clamp the index within the range of [0,
++ * size).
++ */
++#define array_index_nospec(index, size)					\
++({									\
++	typeof(index) _i = (index);					\
++	typeof(size) _s = (size);					\
++	unsigned long _mask = array_index_mask_nospec(_i, _s);		\
++									\
++	BUILD_BUG_ON(sizeof(_i) > sizeof(long));			\
++	BUILD_BUG_ON(sizeof(_s) > sizeof(long));			\
++									\
++	_i &= _mask;							\
++	_i;								\
++})
++#endif /* _LINUX_NOSPEC_H */
+diff --git a/kernel/module.c b/kernel/module.c
+index de66ec825992..690c0651c40f 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -2855,6 +2855,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info)
+ }
+ #endif /* CONFIG_LIVEPATCH */
+ 
++static void check_modinfo_retpoline(struct module *mod, struct load_info *info)
++{
++	if (retpoline_module_ok(get_modinfo(info, "retpoline")))
++		return;
++
++	pr_warn("%s: loading module not compiled with retpoline compiler.\n",
++		mod->name);
++}
++
+ /* Sets info->hdr and info->len. */
+ static int copy_module_from_user(const void __user *umod, unsigned long len,
+ 				  struct load_info *info)
+@@ -3021,6 +3030,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
+ 		add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
+ 	}
+ 
++	check_modinfo_retpoline(mod, info);
++
+ 	if (get_modinfo(info, "staging")) {
+ 		add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
+ 		pr_warn("%s: module is from the staging directory, the quality "
+diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
+index d396cb61a280..81bef0676e1d 100644
+--- a/net/wireless/nl80211.c
++++ b/net/wireless/nl80211.c
+@@ -16,6 +16,7 @@
+ #include <linux/nl80211.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/netlink.h>
++#include <linux/nospec.h>
+ #include <linux/etherdevice.h>
+ #include <net/net_namespace.h>
+ #include <net/genetlink.h>
+@@ -2056,20 +2057,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = {
+ static int parse_txq_params(struct nlattr *tb[],
+ 			    struct ieee80211_txq_params *txq_params)
+ {
++	u8 ac;
++
+ 	if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] ||
+ 	    !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] ||
+ 	    !tb[NL80211_TXQ_ATTR_AIFS])
+ 		return -EINVAL;
+ 
+-	txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
++	ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
+ 	txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]);
+ 	txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]);
+ 	txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]);
+ 	txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]);
+ 
+-	if (txq_params->ac >= NL80211_NUM_ACS)
++	if (ac >= NL80211_NUM_ACS)
+ 		return -EINVAL;
+-
++	txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS);
+ 	return 0;
+ }
+ 
+diff --git a/scripts/faddr2line b/scripts/faddr2line
+index 39e07d8574dd..7721d5b2b0c0 100755
+--- a/scripts/faddr2line
++++ b/scripts/faddr2line
+@@ -44,10 +44,10 @@
+ set -o errexit
+ set -o nounset
+ 
+-READELF="${CROSS_COMPILE}readelf"
+-ADDR2LINE="${CROSS_COMPILE}addr2line"
+-SIZE="${CROSS_COMPILE}size"
+-NM="${CROSS_COMPILE}nm"
++READELF="${CROSS_COMPILE:-}readelf"
++ADDR2LINE="${CROSS_COMPILE:-}addr2line"
++SIZE="${CROSS_COMPILE:-}size"
++NM="${CROSS_COMPILE:-}nm"
+ 
+ command -v awk >/dev/null 2>&1 || die "awk isn't installed"
+ command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed"
+diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
+index 98314b400a95..54deaa1066cf 100644
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -2165,6 +2165,14 @@ static void add_intree_flag(struct buffer *b, int is_intree)
+ 		buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n");
+ }
+ 
++/* Cannot check for assembler */
++static void add_retpoline(struct buffer *b)
++{
++	buf_printf(b, "\n#ifdef RETPOLINE\n");
++	buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n");
++	buf_printf(b, "#endif\n");
++}
++
+ static void add_staging_flag(struct buffer *b, const char *name)
+ {
+ 	static const char *staging_dir = "drivers/staging";
+@@ -2506,6 +2514,7 @@ int main(int argc, char **argv)
+ 		err |= check_modname_len(mod);
+ 		add_header(&buf, mod);
+ 		add_intree_flag(&buf, !external_module);
++		add_retpoline(&buf);
+ 		add_staging_flag(&buf, mod->name);
+ 		err |= add_versions(&buf, mod);
+ 		add_depends(&buf, mod, modules);
+diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c
+index 712ed6598c48..ebdf9bd5a64c 100644
+--- a/sound/soc/codecs/pcm512x-spi.c
++++ b/sound/soc/codecs/pcm512x-spi.c
+@@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = {
+ };
+ 
+ module_spi_driver(pcm512x_spi_driver);
++
++MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI");
++MODULE_AUTHOR("Mark Brown <broonie@kernel.org>");
++MODULE_LICENSE("GPL v2");
+diff --git a/tools/objtool/check.c b/tools/objtool/check.c
+index f40d46e24bcc..9cd028aa1509 100644
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -543,18 +543,14 @@ static int add_call_destinations(struct objtool_file *file)
+ 			dest_off = insn->offset + insn->len + insn->immediate;
+ 			insn->call_dest = find_symbol_by_offset(insn->sec,
+ 								dest_off);
+-			/*
+-			 * FIXME: Thanks to retpolines, it's now considered
+-			 * normal for a function to call within itself.  So
+-			 * disable this warning for now.
+-			 */
+-#if 0
+-			if (!insn->call_dest) {
+-				WARN_FUNC("can't find call dest symbol at offset 0x%lx",
+-					  insn->sec, insn->offset, dest_off);
++
++			if (!insn->call_dest && !insn->ignore) {
++				WARN_FUNC("unsupported intra-function call",
++					  insn->sec, insn->offset);
++				WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
+ 				return -1;
+ 			}
+-#endif
++
+ 		} else if (rela->sym->type == STT_SECTION) {
+ 			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
+ 								rela->addend+4);
+@@ -598,7 +594,7 @@ static int handle_group_alt(struct objtool_file *file,
+ 			    struct instruction *orig_insn,
+ 			    struct instruction **new_insn)
+ {
+-	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
++	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
+ 	unsigned long dest_off;
+ 
+ 	last_orig_insn = NULL;
+@@ -614,28 +610,30 @@ static int handle_group_alt(struct objtool_file *file,
+ 		last_orig_insn = insn;
+ 	}
+ 
+-	if (!next_insn_same_sec(file, last_orig_insn)) {
+-		WARN("%s: don't know how to handle alternatives at end of section",
+-		     special_alt->orig_sec->name);
+-		return -1;
+-	}
+-
+-	fake_jump = malloc(sizeof(*fake_jump));
+-	if (!fake_jump) {
+-		WARN("malloc failed");
+-		return -1;
++	if (next_insn_same_sec(file, last_orig_insn)) {
++		fake_jump = malloc(sizeof(*fake_jump));
++		if (!fake_jump) {
++			WARN("malloc failed");
++			return -1;
++		}
++		memset(fake_jump, 0, sizeof(*fake_jump));
++		INIT_LIST_HEAD(&fake_jump->alts);
++		clear_insn_state(&fake_jump->state);
++
++		fake_jump->sec = special_alt->new_sec;
++		fake_jump->offset = -1;
++		fake_jump->type = INSN_JUMP_UNCONDITIONAL;
++		fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
++		fake_jump->ignore = true;
+ 	}
+-	memset(fake_jump, 0, sizeof(*fake_jump));
+-	INIT_LIST_HEAD(&fake_jump->alts);
+-	clear_insn_state(&fake_jump->state);
+-
+-	fake_jump->sec = special_alt->new_sec;
+-	fake_jump->offset = -1;
+-	fake_jump->type = INSN_JUMP_UNCONDITIONAL;
+-	fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
+-	fake_jump->ignore = true;
+ 
+ 	if (!special_alt->new_len) {
++		if (!fake_jump) {
++			WARN("%s: empty alternative at end of section",
++			     special_alt->orig_sec->name);
++			return -1;
++		}
++
+ 		*new_insn = fake_jump;
+ 		return 0;
+ 	}
+@@ -648,6 +646,8 @@ static int handle_group_alt(struct objtool_file *file,
+ 
+ 		last_new_insn = insn;
+ 
++		insn->ignore = orig_insn->ignore_alts;
++
+ 		if (insn->type != INSN_JUMP_CONDITIONAL &&
+ 		    insn->type != INSN_JUMP_UNCONDITIONAL)
+ 			continue;
+@@ -656,8 +656,14 @@ static int handle_group_alt(struct objtool_file *file,
+ 			continue;
+ 
+ 		dest_off = insn->offset + insn->len + insn->immediate;
+-		if (dest_off == special_alt->new_off + special_alt->new_len)
++		if (dest_off == special_alt->new_off + special_alt->new_len) {
++			if (!fake_jump) {
++				WARN("%s: alternative jump to end of section",
++				     special_alt->orig_sec->name);
++				return -1;
++			}
+ 			insn->jump_dest = fake_jump;
++		}
+ 
+ 		if (!insn->jump_dest) {
+ 			WARN_FUNC("can't find alternative jump destination",
+@@ -672,7 +678,8 @@ static int handle_group_alt(struct objtool_file *file,
+ 		return -1;
+ 	}
+ 
+-	list_add(&fake_jump->list, &last_new_insn->list);
++	if (fake_jump)
++		list_add(&fake_jump->list, &last_new_insn->list);
+ 
+ 	return 0;
+ }
+@@ -729,10 +736,6 @@ static int add_special_section_alts(struct objtool_file *file)
+ 			goto out;
+ 		}
+ 
+-		/* Ignore retpoline alternatives. */
+-		if (orig_insn->ignore_alts)
+-			continue;
+-
+ 		new_insn = NULL;
+ 		if (!special_alt->group || special_alt->new_len) {
+ 			new_insn = find_insn(file, special_alt->new_sec,
+@@ -1089,11 +1092,11 @@ static int decode_sections(struct objtool_file *file)
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = add_call_destinations(file);
++	ret = add_special_section_alts(file);
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = add_special_section_alts(file);
++	ret = add_call_destinations(file);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -1720,10 +1723,12 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
+ 
+ 		insn->visited = true;
+ 
+-		list_for_each_entry(alt, &insn->alts, list) {
+-			ret = validate_branch(file, alt->insn, state);
+-			if (ret)
+-				return 1;
++		if (!insn->ignore_alts) {
++			list_for_each_entry(alt, &insn->alts, list) {
++				ret = validate_branch(file, alt->insn, state);
++				if (ret)
++					return 1;
++			}
+ 		}
+ 
+ 		switch (insn->type) {
+diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
+index e61fe703197b..18384d9be4e1 100644
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -98,6 +98,11 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec,
+ 	struct orc_entry *orc;
+ 	struct rela *rela;
+ 
++	if (!insn_sec->sym) {
++		WARN("missing symbol for section %s", insn_sec->name);
++		return -1;
++	}
++
+ 	/* populate ORC data */
+ 	orc = (struct orc_entry *)u_sec->data->d_buf + idx;
+ 	memcpy(orc, o, sizeof(*orc));
author	Mike Pagano <mpagano@gentoo.org>	2018-02-07 19:40:27 -0500
committer	Mike Pagano <mpagano@gentoo.org>	2018-11-14 09:00:39 -0500
commit	bf700be01b4e88d2b151c924c8a3e1be7a47be9d (patch)
tree	af43005304675dc5a3a81159f9c7fa5eff0cc84d /1017_linux-4.14.18.patch
parent	Linux patch 4.14.17 (diff)
download	linux-patches-bf700be01b4e88d2b151c924c8a3e1be7a47be9d.tar.gz linux-patches-bf700be01b4e88d2b151c924c8a3e1be7a47be9d.tar.bz2 linux-patches-bf700be01b4e88d2b151c924c8a3e1be7a47be9d.zip