proj/linux-patches: Linux patch 4.14.864.14-93

Signed-off-by: Mike Pagano <mpagano@gentoo.org>
author: Mike Pagano <mpagano@gentoo.org> 2018-12-05 14:42:14 -0500
committer: Mike Pagano <mpagano@gentoo.org> 2018-12-05 14:42:14 -0500
commit: a1249a08fe1aead9f7e3e0c0438a14d3c1487981 (patch)
tree: 39715dbb2dca6cad88e31ed7c5e001b34132f3e8
parent: proj/linux-patches: Update existing patch for 4.14.85 (diff)
download: linux-patches-a1249a08fe1aead9f7e3e0c0438a14d3c1487981.tar.gz
linux-patches-a1249a08fe1aead9f7e3e0c0438a14d3c1487981.tar.bz2
linux-patches-a1249a08fe1aead9f7e3e0c0438a14d3c1487981.zip
2 files changed, 7061 insertions, 5 deletions
diff --git a/0000_README b/0000_README
index b328a3b6..b0b15a3d 100644
--- a/0000_README
+++ b/0000_README
@@ -363,26 +363,30 @@ Patch:  1079_linux-4.14.80.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.14.80
 
-Patch:  1080-4.14.81.patch
+Patch:  1080_4.14.81.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.14.81
 
-Patch:  1081-4.14.82.patch
+Patch:  1081_4.14.82.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.14.82
 
-Patch:  1082-4.14.83.patch
+Patch:  1082_4.14.83.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.14.83
 
-Patch:  1083-4.14.84.patch
+Patch:  1083_4.14.84.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.14.84
 
-Patch:  1084-4.14.85.patch
+Patch:  1084_4.14.85.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.14.85
 
+Patch:  1085_4.14.86.patch
+From:   http://www.kernel.org
+Desc:   Linux 4.14.86
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.
diff --git a/1085_linux-4.14.86.patch b/1085_linux-4.14.86.patch
new file mode 100644
index 00000000..c1ec4d9e
--- /dev/null
+++ b/1085_linux-4.14.86.patch
@@ -0,0 +1,7052 @@
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 99a08722124d..5f3d58142600 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -3994,9 +3994,13 @@
+ 
+ 	spectre_v2=	[X86] Control mitigation of Spectre variant 2
+ 			(indirect branch speculation) vulnerability.
++			The default operation protects the kernel from
++			user space attacks.
+ 
+-			on   - unconditionally enable
+-			off  - unconditionally disable
++			on   - unconditionally enable, implies
++			       spectre_v2_user=on
++			off  - unconditionally disable, implies
++			       spectre_v2_user=off
+ 			auto - kernel detects whether your CPU model is
+ 			       vulnerable
+ 
+@@ -4006,6 +4010,12 @@
+ 			CONFIG_RETPOLINE configuration option, and the
+ 			compiler with which the kernel was built.
+ 
++			Selecting 'on' will also enable the mitigation
++			against user space to user space task attacks.
++
++			Selecting 'off' will disable both the kernel and
++			the user space protections.
++
+ 			Specific mitigations can also be selected manually:
+ 
+ 			retpoline	  - replace indirect branches
+@@ -4015,6 +4025,48 @@
+ 			Not specifying this option is equivalent to
+ 			spectre_v2=auto.
+ 
++	spectre_v2_user=
++			[X86] Control mitigation of Spectre variant 2
++		        (indirect branch speculation) vulnerability between
++		        user space tasks
++
++			on	- Unconditionally enable mitigations. Is
++				  enforced by spectre_v2=on
++
++			off     - Unconditionally disable mitigations. Is
++				  enforced by spectre_v2=off
++
++			prctl   - Indirect branch speculation is enabled,
++				  but mitigation can be enabled via prctl
++				  per thread.  The mitigation control state
++				  is inherited on fork.
++
++			prctl,ibpb
++				- Like "prctl" above, but only STIBP is
++				  controlled per thread. IBPB is issued
++				  always when switching between different user
++				  space processes.
++
++			seccomp
++				- Same as "prctl" above, but all seccomp
++				  threads will enable the mitigation unless
++				  they explicitly opt out.
++
++			seccomp,ibpb
++				- Like "seccomp" above, but only STIBP is
++				  controlled per thread. IBPB is issued
++				  always when switching between different
++				  user space processes.
++
++			auto    - Kernel selects the mitigation depending on
++				  the available CPU features and vulnerability.
++
++			Default mitigation:
++			If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl"
++
++			Not specifying this option is equivalent to
++			spectre_v2_user=auto.
++
+ 	spec_store_bypass_disable=
+ 			[HW] Control Speculative Store Bypass (SSB) Disable mitigation
+ 			(Speculative Store Bypass vulnerability)
+diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
+index 32f3d55c54b7..c4dbe6f7cdae 100644
+--- a/Documentation/userspace-api/spec_ctrl.rst
++++ b/Documentation/userspace-api/spec_ctrl.rst
+@@ -92,3 +92,12 @@ Speculation misfeature controls
+    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
+    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
+    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
++
++- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes
++                        (Mitigate Spectre V2 style attacks against user processes)
++
++  Invocations:
++   * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);
++   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
++   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
++   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
+diff --git a/Makefile b/Makefile
+index 58a248264090..572bd98d2344 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 4
+ PATCHLEVEL = 14
+-SUBLEVEL = 85
++SUBLEVEL = 86
+ EXTRAVERSION =
+ NAME = Petit Gorille
+ 
+diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi
+index 6e5bd8974f22..679b839bb2eb 100644
+--- a/arch/arm/boot/dts/rk3288-veyron.dtsi
++++ b/arch/arm/boot/dts/rk3288-veyron.dtsi
+@@ -47,7 +47,11 @@
+ #include "rk3288.dtsi"
+ 
+ / {
+-	memory@0 {
++	/*
++	 * The default coreboot on veyron devices ignores memory@0 nodes
++	 * and would instead create another memory node.
++	 */
++	memory {
+ 		device_type = "memory";
+ 		reg = <0x0 0x0 0x0 0x80000000>;
+ 	};
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
+index 9a7486058455..eea7f8f070cf 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
+@@ -130,7 +130,7 @@
+ };
+ 
+ &pcie0 {
+-	ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>;
++	ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>;
+ 	num-lanes = <4>;
+ 	pinctrl-names = "default";
+ 	pinctrl-0 = <&pcie_clkreqn_cpm>;
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 2af0af33362a..4f393eb9745f 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -440,10 +440,6 @@ config RETPOLINE
+ 	  branches. Requires a compiler with -mindirect-branch=thunk-extern
+ 	  support for full protection. The kernel may run slower.
+ 
+-	  Without compiler support, at least indirect branches in assembler
+-	  code are eliminated. Since this includes the syscall entry path,
+-	  it is not entirely pointless.
+-
+ config INTEL_RDT
+ 	bool "Intel Resource Director Technology support"
+ 	default n
+@@ -959,13 +955,7 @@ config NR_CPUS
+ 	  approximately eight kilobytes to the kernel image.
+ 
+ config SCHED_SMT
+-	bool "SMT (Hyperthreading) scheduler support"
+-	depends on SMP
+-	---help---
+-	  SMT scheduler support improves the CPU scheduler's decision making
+-	  when dealing with Intel Pentium 4 chips with HyperThreading at a
+-	  cost of slightly increased overhead in some places. If unsure say
+-	  N here.
++	def_bool y if SMP
+ 
+ config SCHED_MC
+ 	def_bool y
+diff --git a/arch/x86/Makefile b/arch/x86/Makefile
+index 1c4d012550ec..ce3658dd98e8 100644
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -241,9 +241,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+ 
+ # Avoid indirect branches in kernel to deal with Spectre
+ ifdef CONFIG_RETPOLINE
+-ifneq ($(RETPOLINE_CFLAGS),)
+-  KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
++ifeq ($(RETPOLINE_CFLAGS),)
++  $(error You are building kernel with non-retpoline compiler, please update your compiler.)
+ endif
++  KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
+ endif
+ 
+ archscripts: scripts_basic
+diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
+index e5097dc85a06..7d12b0d1f359 100644
+--- a/arch/x86/events/core.c
++++ b/arch/x86/events/core.c
+@@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event)
+ 	if (config == -1LL)
+ 		return -EINVAL;
+ 
+-	/*
+-	 * Branch tracing:
+-	 */
+-	if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+-	    !attr->freq && hwc->sample_period == 1) {
+-		/* BTS is not supported by this architecture. */
+-		if (!x86_pmu.bts_active)
+-			return -EOPNOTSUPP;
+-
+-		/* BTS is currently only allowed for user-mode. */
+-		if (!attr->exclude_kernel)
+-			return -EOPNOTSUPP;
+-
+-		/* disallow bts if conflicting events are present */
+-		if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+-			return -EBUSY;
+-
+-		event->destroy = hw_perf_lbr_event_destroy;
+-	}
+-
+ 	hwc->config |= config;
+ 
+ 	return 0;
+diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
+index 228732654cfe..7bb80151bfff 100644
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -2345,16 +2345,7 @@ done:
+ static struct event_constraint *
+ intel_bts_constraints(struct perf_event *event)
+ {
+-	struct hw_perf_event *hwc = &event->hw;
+-	unsigned int hw_event, bts_event;
+-
+-	if (event->attr.freq)
+-		return NULL;
+-
+-	hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
+-	bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+-
+-	if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
++	if (unlikely(intel_pmu_has_bts(event)))
+ 		return &bts_constraint;
+ 
+ 	return NULL;
+@@ -2973,10 +2964,47 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+ 	return flags;
+ }
+ 
++static int intel_pmu_bts_config(struct perf_event *event)
++{
++	struct perf_event_attr *attr = &event->attr;
++
++	if (unlikely(intel_pmu_has_bts(event))) {
++		/* BTS is not supported by this architecture. */
++		if (!x86_pmu.bts_active)
++			return -EOPNOTSUPP;
++
++		/* BTS is currently only allowed for user-mode. */
++		if (!attr->exclude_kernel)
++			return -EOPNOTSUPP;
++
++		/* disallow bts if conflicting events are present */
++		if (x86_add_exclusive(x86_lbr_exclusive_lbr))
++			return -EBUSY;
++
++		event->destroy = hw_perf_lbr_event_destroy;
++	}
++
++	return 0;
++}
++
++static int core_pmu_hw_config(struct perf_event *event)
++{
++	int ret = x86_pmu_hw_config(event);
++
++	if (ret)
++		return ret;
++
++	return intel_pmu_bts_config(event);
++}
++
+ static int intel_pmu_hw_config(struct perf_event *event)
+ {
+ 	int ret = x86_pmu_hw_config(event);
+ 
++	if (ret)
++		return ret;
++
++	ret = intel_pmu_bts_config(event);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -2999,7 +3027,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
+ 		/*
+ 		 * BTS is set up earlier in this path, so don't account twice
+ 		 */
+-		if (!intel_pmu_has_bts(event)) {
++		if (!unlikely(intel_pmu_has_bts(event))) {
+ 			/* disallow lbr if conflicting events are present */
+ 			if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+ 				return -EBUSY;
+@@ -3462,7 +3490,7 @@ static __initconst const struct x86_pmu core_pmu = {
+ 	.enable_all		= core_pmu_enable_all,
+ 	.enable			= core_pmu_enable_event,
+ 	.disable		= x86_pmu_disable_event,
+-	.hw_config		= x86_pmu_hw_config,
++	.hw_config		= core_pmu_hw_config,
+ 	.schedule_events	= x86_schedule_events,
+ 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+ 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
+index c6698c63c047..3c51fcaf1e34 100644
+--- a/arch/x86/events/perf_event.h
++++ b/arch/x86/events/perf_event.h
+@@ -850,11 +850,16 @@ static inline int amd_pmu_init(void)
+ 
+ static inline bool intel_pmu_has_bts(struct perf_event *event)
+ {
+-	if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+-	    !event->attr.freq && event->hw.sample_period == 1)
+-		return true;
++	struct hw_perf_event *hwc = &event->hw;
++	unsigned int hw_event, bts_event;
++
++	if (event->attr.freq)
++		return false;
++
++	hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
++	bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+ 
+-	return false;
++	return hw_event == bts_event && hwc->sample_period == 1;
+ }
+ 
+ int intel_pmu_save_and_restart(struct perf_event *event);
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 673d6e988196..7d910827126b 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -284,7 +284,9 @@
+ #define X86_FEATURE_AMD_IBPB		(13*32+12) /* "" Indirect Branch Prediction Barrier */
+ #define X86_FEATURE_AMD_IBRS		(13*32+14) /* "" Indirect Branch Restricted Speculation */
+ #define X86_FEATURE_AMD_STIBP		(13*32+15) /* "" Single Thread Indirect Branch Predictors */
++#define X86_FEATURE_AMD_SSBD		(13*32+24) /* "" Speculative Store Bypass Disable */
+ #define X86_FEATURE_VIRT_SSBD		(13*32+25) /* Virtualized Speculative Store Bypass Disable */
++#define X86_FEATURE_AMD_SSB_NO		(13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
+ 
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index ef7eec669a1b..62c62d3eb0ff 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -41,9 +41,10 @@
+ 
+ #define MSR_IA32_SPEC_CTRL		0x00000048 /* Speculation Control */
+ #define SPEC_CTRL_IBRS			(1 << 0)   /* Indirect Branch Restricted Speculation */
+-#define SPEC_CTRL_STIBP			(1 << 1)   /* Single Thread Indirect Branch Predictors */
++#define SPEC_CTRL_STIBP_SHIFT		1	   /* Single Thread Indirect Branch Predictor (STIBP) bit */
++#define SPEC_CTRL_STIBP			(1 << SPEC_CTRL_STIBP_SHIFT)	/* STIBP mask */
+ #define SPEC_CTRL_SSBD_SHIFT		2	   /* Speculative Store Bypass Disable bit */
+-#define SPEC_CTRL_SSBD			(1 << SPEC_CTRL_SSBD_SHIFT)   /* Speculative Store Bypass Disable */
++#define SPEC_CTRL_SSBD			(1 << SPEC_CTRL_SSBD_SHIFT)	/* Speculative Store Bypass Disable */
+ 
+ #define MSR_IA32_PRED_CMD		0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB			(1 << 0)   /* Indirect Branch Prediction Barrier */
+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
+index 1b4132161c1f..a633767419f2 100644
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -3,6 +3,8 @@
+ #ifndef _ASM_X86_NOSPEC_BRANCH_H_
+ #define _ASM_X86_NOSPEC_BRANCH_H_
+ 
++#include <linux/static_key.h>
++
+ #include <asm/alternative.h>
+ #include <asm/alternative-asm.h>
+ #include <asm/cpufeatures.h>
+@@ -162,29 +164,35 @@
+ 	_ASM_PTR " 999b\n\t"					\
+ 	".popsection\n\t"
+ 
+-#if defined(CONFIG_X86_64) && defined(RETPOLINE)
++#ifdef CONFIG_RETPOLINE
++#ifdef CONFIG_X86_64
+ 
+ /*
+- * Since the inline asm uses the %V modifier which is only in newer GCC,
+- * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
++ * Inline asm uses the %V modifier which is only in newer GCC
++ * which is ensured when CONFIG_RETPOLINE is defined.
+  */
+ # define CALL_NOSPEC						\
+ 	ANNOTATE_NOSPEC_ALTERNATIVE				\
+-	ALTERNATIVE(						\
++	ALTERNATIVE_2(						\
+ 	ANNOTATE_RETPOLINE_SAFE					\
+ 	"call *%[thunk_target]\n",				\
+ 	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
+-	X86_FEATURE_RETPOLINE)
++	X86_FEATURE_RETPOLINE,					\
++	"lfence;\n"						\
++	ANNOTATE_RETPOLINE_SAFE					\
++	"call *%[thunk_target]\n",				\
++	X86_FEATURE_RETPOLINE_AMD)
+ # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+ 
+-#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
++#else /* CONFIG_X86_32 */
+ /*
+  * For i386 we use the original ret-equivalent retpoline, because
+  * otherwise we'll run out of registers. We don't care about CET
+  * here, anyway.
+  */
+ # define CALL_NOSPEC						\
+-	ALTERNATIVE(						\
++	ANNOTATE_NOSPEC_ALTERNATIVE				\
++	ALTERNATIVE_2(						\
+ 	ANNOTATE_RETPOLINE_SAFE					\
+ 	"call *%[thunk_target]\n",				\
+ 	"       jmp    904f;\n"					\
+@@ -199,9 +207,14 @@
+ 	"       ret;\n"						\
+ 	"       .align 16\n"					\
+ 	"904:	call   901b;\n",				\
+-	X86_FEATURE_RETPOLINE)
++	X86_FEATURE_RETPOLINE,					\
++	"lfence;\n"						\
++	ANNOTATE_RETPOLINE_SAFE					\
++	"call *%[thunk_target]\n",				\
++	X86_FEATURE_RETPOLINE_AMD)
+ 
+ # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
++#endif
+ #else /* No retpoline for C / inline asm */
+ # define CALL_NOSPEC "call *%[thunk_target]\n"
+ # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+@@ -210,14 +223,19 @@
+ /* The Spectre V2 mitigation variants */
+ enum spectre_v2_mitigation {
+ 	SPECTRE_V2_NONE,
+-	SPECTRE_V2_RETPOLINE_MINIMAL,
+-	SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
+ 	SPECTRE_V2_RETPOLINE_GENERIC,
+ 	SPECTRE_V2_RETPOLINE_AMD,
+-	SPECTRE_V2_IBRS,
+ 	SPECTRE_V2_IBRS_ENHANCED,
+ };
+ 
++/* The indirect branch speculation control variants */
++enum spectre_v2_user_mitigation {
++	SPECTRE_V2_USER_NONE,
++	SPECTRE_V2_USER_STRICT,
++	SPECTRE_V2_USER_PRCTL,
++	SPECTRE_V2_USER_SECCOMP,
++};
++
+ /* The Speculative Store Bypass disable variants */
+ enum ssb_mitigation {
+ 	SPEC_STORE_BYPASS_NONE,
+@@ -295,6 +313,10 @@ do {									\
+ 	preempt_enable();						\
+ } while (0)
+ 
++DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
++DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
++DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
++
+ #endif /* __ASSEMBLY__ */
+ 
+ /*
+diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
+index ae7c2c5cd7f0..5393babc0598 100644
+--- a/arch/x86/include/asm/spec-ctrl.h
++++ b/arch/x86/include/asm/spec-ctrl.h
+@@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
+ 	return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+ }
+ 
++static inline u64 stibp_tif_to_spec_ctrl(u64 tifn)
++{
++	BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
++	return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
++}
++
+ static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
+ {
+ 	BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+ 	return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+ }
+ 
++static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl)
++{
++	BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
++	return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
++}
++
+ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
+ {
+ 	return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
+@@ -70,11 +82,7 @@ extern void speculative_store_bypass_ht_init(void);
+ static inline void speculative_store_bypass_ht_init(void) { }
+ #endif
+ 
+-extern void speculative_store_bypass_update(unsigned long tif);
+-
+-static inline void speculative_store_bypass_update_current(void)
+-{
+-	speculative_store_bypass_update(current_thread_info()->flags);
+-}
++extern void speculation_ctrl_update(unsigned long tif);
++extern void speculation_ctrl_update_current(void);
+ 
+ #endif
+diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
+index 9b6df68d8fd1..12ef2b49d11b 100644
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
+ 
+ __visible struct task_struct *__switch_to(struct task_struct *prev,
+ 					  struct task_struct *next);
+-struct tss_struct;
+-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+-		      struct tss_struct *tss);
+ 
+ /* This runs runs on the previous thread's stack. */
+ static inline void prepare_switch_to(struct task_struct *prev,
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index 95ff2d7f553f..bf9175d87844 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -81,10 +81,12 @@ struct thread_info {
+ #define TIF_SIGPENDING		2	/* signal pending */
+ #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+ #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
+-#define TIF_SSBD			5	/* Reduced data speculation */
++#define TIF_SSBD		5	/* Speculative store bypass disable */
+ #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
+ #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
+ #define TIF_SECCOMP		8	/* secure computing */
++#define TIF_SPEC_IB		9	/* Indirect branch speculation mitigation */
++#define TIF_SPEC_FORCE_UPDATE	10	/* Force speculation MSR update in context switch */
+ #define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
+ #define TIF_UPROBE		12	/* breakpointed or singlestepping */
+ #define TIF_PATCH_PENDING	13	/* pending live patching update */
+@@ -112,6 +114,8 @@ struct thread_info {
+ #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
+ #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+ #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
++#define _TIF_SPEC_IB		(1 << TIF_SPEC_IB)
++#define _TIF_SPEC_FORCE_UPDATE	(1 << TIF_SPEC_FORCE_UPDATE)
+ #define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
+ #define _TIF_UPROBE		(1 << TIF_UPROBE)
+ #define _TIF_PATCH_PENDING	(1 << TIF_PATCH_PENDING)
+@@ -147,8 +151,18 @@ struct thread_info {
+ 	 _TIF_FSCHECK)
+ 
+ /* flags to check in __switch_to() */
+-#define _TIF_WORK_CTXSW							\
+-	(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
++#define _TIF_WORK_CTXSW_BASE						\
++	(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|		\
++	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
++
++/*
++ * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
++ */
++#ifdef CONFIG_SMP
++# define _TIF_WORK_CTXSW	(_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB)
++#else
++# define _TIF_WORK_CTXSW	(_TIF_WORK_CTXSW_BASE)
++#endif
+ 
+ #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
+ #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 2501be609b82..e31040333f0c 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -185,10 +185,14 @@ struct tlb_state {
+ 
+ #define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+ 
++	/* Last user mm for optimizing IBPB */
++	union {
++		struct mm_struct	*last_user_mm;
++		unsigned long		last_user_mm_ibpb;
++	};
++
+ 	u16 loaded_mm_asid;
+ 	u16 next_asid;
+-	/* last user mm's ctx id */
+-	u64 last_ctx_id;
+ 
+ 	/*
+ 	 * We can be in one of several states:
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index dda741bd5789..7e03515662c0 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -554,7 +554,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
+ 		nodes_per_socket = ((value >> 3) & 7) + 1;
+ 	}
+ 
+-	if (c->x86 >= 0x15 && c->x86 <= 0x17) {
++	if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
++	    !boot_cpu_has(X86_FEATURE_VIRT_SSBD) &&
++	    c->x86 >= 0x15 && c->x86 <= 0x17) {
+ 		unsigned int bit;
+ 
+ 		switch (c->x86) {
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index e92aedd93806..f7a6d6203e13 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -14,6 +14,7 @@
+ #include <linux/module.h>
+ #include <linux/nospec.h>
+ #include <linux/prctl.h>
++#include <linux/sched/smt.h>
+ 
+ #include <asm/spec-ctrl.h>
+ #include <asm/cmdline.h>
+@@ -34,12 +35,10 @@ static void __init spectre_v2_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+ 
+-/*
+- * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
+- * writes to SPEC_CTRL contain whatever reserved bits have been set.
+- */
+-u64 __ro_after_init x86_spec_ctrl_base;
++/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
++u64 x86_spec_ctrl_base;
+ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
++static DEFINE_MUTEX(spec_ctrl_mutex);
+ 
+ /*
+  * The vendor and possibly platform specific bits which can be modified in
+@@ -54,6 +53,13 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+ u64 __ro_after_init x86_amd_ls_cfg_base;
+ u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
+ 
++/* Control conditional STIPB in switch_to() */
++DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
++/* Control conditional IBPB in switch_mm() */
++DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
++/* Control unconditional IBPB in switch_mm() */
++DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
++
+ void __init check_bugs(void)
+ {
+ 	identify_boot_cpu();
+@@ -124,31 +130,6 @@ void __init check_bugs(void)
+ #endif
+ }
+ 
+-/* The kernel command line selection */
+-enum spectre_v2_mitigation_cmd {
+-	SPECTRE_V2_CMD_NONE,
+-	SPECTRE_V2_CMD_AUTO,
+-	SPECTRE_V2_CMD_FORCE,
+-	SPECTRE_V2_CMD_RETPOLINE,
+-	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+-	SPECTRE_V2_CMD_RETPOLINE_AMD,
+-};
+-
+-static const char *spectre_v2_strings[] = {
+-	[SPECTRE_V2_NONE]			= "Vulnerable",
+-	[SPECTRE_V2_RETPOLINE_MINIMAL]		= "Vulnerable: Minimal generic ASM retpoline",
+-	[SPECTRE_V2_RETPOLINE_MINIMAL_AMD]	= "Vulnerable: Minimal AMD ASM retpoline",
+-	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
+-	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
+-	[SPECTRE_V2_IBRS_ENHANCED]		= "Mitigation: Enhanced IBRS",
+-};
+-
+-#undef pr_fmt
+-#define pr_fmt(fmt)     "Spectre V2 : " fmt
+-
+-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+-	SPECTRE_V2_NONE;
+-
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ {
+@@ -166,9 +147,14 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ 		guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+ 
+ 		/* SSBD controlled in MSR_SPEC_CTRL */
+-		if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
++		if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
++		    static_cpu_has(X86_FEATURE_AMD_SSBD))
+ 			hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
+ 
++		/* Conditional STIBP enabled? */
++		if (static_branch_unlikely(&switch_to_cond_stibp))
++			hostval |= stibp_tif_to_spec_ctrl(ti->flags);
++
+ 		if (hostval != guestval) {
+ 			msrval = setguest ? guestval : hostval;
+ 			wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+@@ -202,7 +188,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ 		tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
+ 				 ssbd_spec_ctrl_to_tif(hostval);
+ 
+-		speculative_store_bypass_update(tif);
++		speculation_ctrl_update(tif);
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
+@@ -217,6 +203,15 @@ static void x86_amd_ssb_disable(void)
+ 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
+ }
+ 
++#undef pr_fmt
++#define pr_fmt(fmt)     "Spectre V2 : " fmt
++
++static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
++	SPECTRE_V2_NONE;
++
++static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
++	SPECTRE_V2_USER_NONE;
++
+ #ifdef RETPOLINE
+ static bool spectre_v2_bad_module;
+ 
+@@ -238,67 +233,217 @@ static inline const char *spectre_v2_module_string(void)
+ static inline const char *spectre_v2_module_string(void) { return ""; }
+ #endif
+ 
+-static void __init spec2_print_if_insecure(const char *reason)
++static inline bool match_option(const char *arg, int arglen, const char *opt)
+ {
+-	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+-		pr_info("%s selected on command line.\n", reason);
++	int len = strlen(opt);
++
++	return len == arglen && !strncmp(arg, opt, len);
+ }
+ 
+-static void __init spec2_print_if_secure(const char *reason)
++/* The kernel command line selection for spectre v2 */
++enum spectre_v2_mitigation_cmd {
++	SPECTRE_V2_CMD_NONE,
++	SPECTRE_V2_CMD_AUTO,
++	SPECTRE_V2_CMD_FORCE,
++	SPECTRE_V2_CMD_RETPOLINE,
++	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
++	SPECTRE_V2_CMD_RETPOLINE_AMD,
++};
++
++enum spectre_v2_user_cmd {
++	SPECTRE_V2_USER_CMD_NONE,
++	SPECTRE_V2_USER_CMD_AUTO,
++	SPECTRE_V2_USER_CMD_FORCE,
++	SPECTRE_V2_USER_CMD_PRCTL,
++	SPECTRE_V2_USER_CMD_PRCTL_IBPB,
++	SPECTRE_V2_USER_CMD_SECCOMP,
++	SPECTRE_V2_USER_CMD_SECCOMP_IBPB,
++};
++
++static const char * const spectre_v2_user_strings[] = {
++	[SPECTRE_V2_USER_NONE]		= "User space: Vulnerable",
++	[SPECTRE_V2_USER_STRICT]	= "User space: Mitigation: STIBP protection",
++	[SPECTRE_V2_USER_PRCTL]		= "User space: Mitigation: STIBP via prctl",
++	[SPECTRE_V2_USER_SECCOMP]	= "User space: Mitigation: STIBP via seccomp and prctl",
++};
++
++static const struct {
++	const char			*option;
++	enum spectre_v2_user_cmd	cmd;
++	bool				secure;
++} v2_user_options[] __initdata = {
++	{ "auto",		SPECTRE_V2_USER_CMD_AUTO,		false },
++	{ "off",		SPECTRE_V2_USER_CMD_NONE,		false },
++	{ "on",			SPECTRE_V2_USER_CMD_FORCE,		true  },
++	{ "prctl",		SPECTRE_V2_USER_CMD_PRCTL,		false },
++	{ "prctl,ibpb",		SPECTRE_V2_USER_CMD_PRCTL_IBPB,		false },
++	{ "seccomp",		SPECTRE_V2_USER_CMD_SECCOMP,		false },
++	{ "seccomp,ibpb",	SPECTRE_V2_USER_CMD_SECCOMP_IBPB,	false },
++};
++
++static void __init spec_v2_user_print_cond(const char *reason, bool secure)
+ {
+-	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+-		pr_info("%s selected on command line.\n", reason);
++	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
++		pr_info("spectre_v2_user=%s forced on command line.\n", reason);
+ }
+ 
+-static inline bool retp_compiler(void)
++static enum spectre_v2_user_cmd __init
++spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
+ {
+-	return __is_defined(RETPOLINE);
++	char arg[20];
++	int ret, i;
++
++	switch (v2_cmd) {
++	case SPECTRE_V2_CMD_NONE:
++		return SPECTRE_V2_USER_CMD_NONE;
++	case SPECTRE_V2_CMD_FORCE:
++		return SPECTRE_V2_USER_CMD_FORCE;
++	default:
++		break;
++	}
++
++	ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
++				  arg, sizeof(arg));
++	if (ret < 0)
++		return SPECTRE_V2_USER_CMD_AUTO;
++
++	for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
++		if (match_option(arg, ret, v2_user_options[i].option)) {
++			spec_v2_user_print_cond(v2_user_options[i].option,
++						v2_user_options[i].secure);
++			return v2_user_options[i].cmd;
++		}
++	}
++
++	pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg);
++	return SPECTRE_V2_USER_CMD_AUTO;
+ }
+ 
+-static inline bool match_option(const char *arg, int arglen, const char *opt)
++static void __init
++spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
+ {
+-	int len = strlen(opt);
++	enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
++	bool smt_possible = IS_ENABLED(CONFIG_SMP);
++	enum spectre_v2_user_cmd cmd;
+ 
+-	return len == arglen && !strncmp(arg, opt, len);
++	if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
++		return;
++
++	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
++	    cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
++		smt_possible = false;
++
++	cmd = spectre_v2_parse_user_cmdline(v2_cmd);
++	switch (cmd) {
++	case SPECTRE_V2_USER_CMD_NONE:
++		goto set_mode;
++	case SPECTRE_V2_USER_CMD_FORCE:
++		mode = SPECTRE_V2_USER_STRICT;
++		break;
++	case SPECTRE_V2_USER_CMD_PRCTL:
++	case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
++		mode = SPECTRE_V2_USER_PRCTL;
++		break;
++	case SPECTRE_V2_USER_CMD_AUTO:
++	case SPECTRE_V2_USER_CMD_SECCOMP:
++	case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
++		if (IS_ENABLED(CONFIG_SECCOMP))
++			mode = SPECTRE_V2_USER_SECCOMP;
++		else
++			mode = SPECTRE_V2_USER_PRCTL;
++		break;
++	}
++
++	/* Initialize Indirect Branch Prediction Barrier */
++	if (boot_cpu_has(X86_FEATURE_IBPB)) {
++		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
++
++		switch (cmd) {
++		case SPECTRE_V2_USER_CMD_FORCE:
++		case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
++		case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
++			static_branch_enable(&switch_mm_always_ibpb);
++			break;
++		case SPECTRE_V2_USER_CMD_PRCTL:
++		case SPECTRE_V2_USER_CMD_AUTO:
++		case SPECTRE_V2_USER_CMD_SECCOMP:
++			static_branch_enable(&switch_mm_cond_ibpb);
++			break;
++		default:
++			break;
++		}
++
++		pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
++			static_key_enabled(&switch_mm_always_ibpb) ?
++			"always-on" : "conditional");
++	}
++
++	/* If enhanced IBRS is enabled no STIPB required */
++	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
++		return;
++
++	/*
++	 * If SMT is not possible or STIBP is not available clear the STIPB
++	 * mode.
++	 */
++	if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
++		mode = SPECTRE_V2_USER_NONE;
++set_mode:
++	spectre_v2_user = mode;
++	/* Only print the STIBP mode when SMT possible */
++	if (smt_possible)
++		pr_info("%s\n", spectre_v2_user_strings[mode]);
+ }
+ 
++static const char * const spectre_v2_strings[] = {
++	[SPECTRE_V2_NONE]			= "Vulnerable",
++	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
++	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
++	[SPECTRE_V2_IBRS_ENHANCED]		= "Mitigation: Enhanced IBRS",
++};
++
+ static const struct {
+ 	const char *option;
+ 	enum spectre_v2_mitigation_cmd cmd;
+ 	bool secure;
+-} mitigation_options[] = {
+-	{ "off",               SPECTRE_V2_CMD_NONE,              false },
+-	{ "on",                SPECTRE_V2_CMD_FORCE,             true },
+-	{ "retpoline",         SPECTRE_V2_CMD_RETPOLINE,         false },
+-	{ "retpoline,amd",     SPECTRE_V2_CMD_RETPOLINE_AMD,     false },
+-	{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+-	{ "auto",              SPECTRE_V2_CMD_AUTO,              false },
++} mitigation_options[] __initdata = {
++	{ "off",		SPECTRE_V2_CMD_NONE,		  false },
++	{ "on",			SPECTRE_V2_CMD_FORCE,		  true  },
++	{ "retpoline",		SPECTRE_V2_CMD_RETPOLINE,	  false },
++	{ "retpoline,amd",	SPECTRE_V2_CMD_RETPOLINE_AMD,	  false },
++	{ "retpoline,generic",	SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
++	{ "auto",		SPECTRE_V2_CMD_AUTO,		  false },
+ };
+ 
++static void __init spec_v2_print_cond(const char *reason, bool secure)
++{
++	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
++		pr_info("%s selected on command line.\n", reason);
++}
++
+ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+ {
++	enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
+ 	char arg[20];
+ 	int ret, i;
+-	enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
+ 
+ 	if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ 		return SPECTRE_V2_CMD_NONE;
+-	else {
+-		ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
+-		if (ret < 0)
+-			return SPECTRE_V2_CMD_AUTO;
+ 
+-		for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
+-			if (!match_option(arg, ret, mitigation_options[i].option))
+-				continue;
+-			cmd = mitigation_options[i].cmd;
+-			break;
+-		}
++	ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
++	if (ret < 0)
++		return SPECTRE_V2_CMD_AUTO;
+ 
+-		if (i >= ARRAY_SIZE(mitigation_options)) {
+-			pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+-			return SPECTRE_V2_CMD_AUTO;
+-		}
++	for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
++		if (!match_option(arg, ret, mitigation_options[i].option))
++			continue;
++		cmd = mitigation_options[i].cmd;
++		break;
++	}
++
++	if (i >= ARRAY_SIZE(mitigation_options)) {
++		pr_err("unknown option (%s). Switching to AUTO select\n", arg);
++		return SPECTRE_V2_CMD_AUTO;
+ 	}
+ 
+ 	if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
+@@ -315,11 +460,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+ 		return SPECTRE_V2_CMD_AUTO;
+ 	}
+ 
+-	if (mitigation_options[i].secure)
+-		spec2_print_if_secure(mitigation_options[i].option);
+-	else
+-		spec2_print_if_insecure(mitigation_options[i].option);
+-
++	spec_v2_print_cond(mitigation_options[i].option,
++			   mitigation_options[i].secure);
+ 	return cmd;
+ }
+ 
+@@ -375,14 +517,12 @@ retpoline_auto:
+ 			pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
+ 			goto retpoline_generic;
+ 		}
+-		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+-					 SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
++		mode = SPECTRE_V2_RETPOLINE_AMD;
+ 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+ 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ 	} else {
+ 	retpoline_generic:
+-		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+-					 SPECTRE_V2_RETPOLINE_MINIMAL;
++		mode = SPECTRE_V2_RETPOLINE_GENERIC;
+ 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ 	}
+ 
+@@ -401,12 +541,6 @@ specv2_set_mode:
+ 	setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ 	pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
+ 
+-	/* Initialize Indirect Branch Prediction Barrier if supported */
+-	if (boot_cpu_has(X86_FEATURE_IBPB)) {
+-		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+-		pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
+-	}
+-
+ 	/*
+ 	 * Retpoline means the kernel is safe because it has no indirect
+ 	 * branches. Enhanced IBRS protects firmware too, so, enable restricted
+@@ -422,6 +556,66 @@ specv2_set_mode:
+ 		setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+ 		pr_info("Enabling Restricted Speculation for firmware calls\n");
+ 	}
++
++	/* Set up IBPB and STIBP depending on the general spectre V2 command */
++	spectre_v2_user_select_mitigation(cmd);
++
++	/* Enable STIBP if appropriate */
++	arch_smt_update();
++}
++
++static void update_stibp_msr(void * __unused)
++{
++	wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++}
++
++/* Update x86_spec_ctrl_base in case SMT state changed. */
++static void update_stibp_strict(void)
++{
++	u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
++
++	if (sched_smt_active())
++		mask |= SPEC_CTRL_STIBP;
++
++	if (mask == x86_spec_ctrl_base)
++		return;
++
++	pr_info("Update user space SMT mitigation: STIBP %s\n",
++		mask & SPEC_CTRL_STIBP ? "always-on" : "off");
++	x86_spec_ctrl_base = mask;
++	on_each_cpu(update_stibp_msr, NULL, 1);
++}
++
++/* Update the static key controlling the evaluation of TIF_SPEC_IB */
++static void update_indir_branch_cond(void)
++{
++	if (sched_smt_active())
++		static_branch_enable(&switch_to_cond_stibp);
++	else
++		static_branch_disable(&switch_to_cond_stibp);
++}
++
++void arch_smt_update(void)
++{
++	/* Enhanced IBRS implies STIBP. No update required. */
++	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
++		return;
++
++	mutex_lock(&spec_ctrl_mutex);
++
++	switch (spectre_v2_user) {
++	case SPECTRE_V2_USER_NONE:
++		break;
++	case SPECTRE_V2_USER_STRICT:
++		update_stibp_strict();
++		break;
++	case SPECTRE_V2_USER_PRCTL:
++	case SPECTRE_V2_USER_SECCOMP:
++		update_indir_branch_cond();
++		break;
++	}
++
++	mutex_unlock(&spec_ctrl_mutex);
+ }
+ 
+ #undef pr_fmt
+@@ -438,7 +632,7 @@ enum ssb_mitigation_cmd {
+ 	SPEC_STORE_BYPASS_CMD_SECCOMP,
+ };
+ 
+-static const char *ssb_strings[] = {
++static const char * const ssb_strings[] = {
+ 	[SPEC_STORE_BYPASS_NONE]	= "Vulnerable",
+ 	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled",
+ 	[SPEC_STORE_BYPASS_PRCTL]	= "Mitigation: Speculative Store Bypass disabled via prctl",
+@@ -448,7 +642,7 @@ static const char *ssb_strings[] = {
+ static const struct {
+ 	const char *option;
+ 	enum ssb_mitigation_cmd cmd;
+-} ssb_mitigation_options[] = {
++} ssb_mitigation_options[]  __initdata = {
+ 	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO },    /* Platform decides */
+ 	{ "on",		SPEC_STORE_BYPASS_CMD_ON },      /* Disable Speculative Store Bypass */
+ 	{ "off",	SPEC_STORE_BYPASS_CMD_NONE },    /* Don't touch Speculative Store Bypass */
+@@ -532,18 +726,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
+ 	if (mode == SPEC_STORE_BYPASS_DISABLE) {
+ 		setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
+ 		/*
+-		 * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
+-		 * a completely different MSR and bit dependent on family.
++		 * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
++		 * use a completely different MSR and bit dependent on family.
+ 		 */
+-		switch (boot_cpu_data.x86_vendor) {
+-		case X86_VENDOR_INTEL:
++		if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
++		    !static_cpu_has(X86_FEATURE_AMD_SSBD)) {
++			x86_amd_ssb_disable();
++		} else {
+ 			x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+ 			x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+ 			wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+-			break;
+-		case X86_VENDOR_AMD:
+-			x86_amd_ssb_disable();
+-			break;
+ 		}
+ 	}
+ 
+@@ -561,10 +753,25 @@ static void ssb_select_mitigation(void)
+ #undef pr_fmt
+ #define pr_fmt(fmt)     "Speculation prctl: " fmt
+ 
+-static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
++static void task_update_spec_tif(struct task_struct *tsk)
+ {
+-	bool update;
++	/* Force the update of the real TIF bits */
++	set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE);
+ 
++	/*
++	 * Immediately update the speculation control MSRs for the current
++	 * task, but for a non-current task delay setting the CPU
++	 * mitigation until it is scheduled next.
++	 *
++	 * This can only happen for SECCOMP mitigation. For PRCTL it's
++	 * always the current task.
++	 */
++	if (tsk == current)
++		speculation_ctrl_update_current();
++}
++
++static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
++{
+ 	if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
+ 	    ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
+ 		return -ENXIO;
+@@ -575,28 +782,56 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+ 		if (task_spec_ssb_force_disable(task))
+ 			return -EPERM;
+ 		task_clear_spec_ssb_disable(task);
+-		update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
++		task_update_spec_tif(task);
+ 		break;
+ 	case PR_SPEC_DISABLE:
+ 		task_set_spec_ssb_disable(task);
+-		update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
++		task_update_spec_tif(task);
+ 		break;
+ 	case PR_SPEC_FORCE_DISABLE:
+ 		task_set_spec_ssb_disable(task);
+ 		task_set_spec_ssb_force_disable(task);
+-		update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
++		task_update_spec_tif(task);
+ 		break;
+ 	default:
+ 		return -ERANGE;
+ 	}
++	return 0;
++}
+ 
+-	/*
+-	 * If being set on non-current task, delay setting the CPU
+-	 * mitigation until it is next scheduled.
+-	 */
+-	if (task == current && update)
+-		speculative_store_bypass_update_current();
+-
++static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
++{
++	switch (ctrl) {
++	case PR_SPEC_ENABLE:
++		if (spectre_v2_user == SPECTRE_V2_USER_NONE)
++			return 0;
++		/*
++		 * Indirect branch speculation is always disabled in strict
++		 * mode.
++		 */
++		if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
++			return -EPERM;
++		task_clear_spec_ib_disable(task);
++		task_update_spec_tif(task);
++		break;
++	case PR_SPEC_DISABLE:
++	case PR_SPEC_FORCE_DISABLE:
++		/*
++		 * Indirect branch speculation is always allowed when
++		 * mitigation is force disabled.
++		 */
++		if (spectre_v2_user == SPECTRE_V2_USER_NONE)
++			return -EPERM;
++		if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
++			return 0;
++		task_set_spec_ib_disable(task);
++		if (ctrl == PR_SPEC_FORCE_DISABLE)
++			task_set_spec_ib_force_disable(task);
++		task_update_spec_tif(task);
++		break;
++	default:
++		return -ERANGE;
++	}
+ 	return 0;
+ }
+ 
+@@ -606,6 +841,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ 	switch (which) {
+ 	case PR_SPEC_STORE_BYPASS:
+ 		return ssb_prctl_set(task, ctrl);
++	case PR_SPEC_INDIRECT_BRANCH:
++		return ib_prctl_set(task, ctrl);
+ 	default:
+ 		return -ENODEV;
+ 	}
+@@ -616,6 +853,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
+ {
+ 	if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
+ 		ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
++	if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
++		ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+ }
+ #endif
+ 
+@@ -638,11 +877,35 @@ static int ssb_prctl_get(struct task_struct *task)
+ 	}
+ }
+ 
++static int ib_prctl_get(struct task_struct *task)
++{
++	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
++		return PR_SPEC_NOT_AFFECTED;
++
++	switch (spectre_v2_user) {
++	case SPECTRE_V2_USER_NONE:
++		return PR_SPEC_ENABLE;
++	case SPECTRE_V2_USER_PRCTL:
++	case SPECTRE_V2_USER_SECCOMP:
++		if (task_spec_ib_force_disable(task))
++			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
++		if (task_spec_ib_disable(task))
++			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
++		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
++	case SPECTRE_V2_USER_STRICT:
++		return PR_SPEC_DISABLE;
++	default:
++		return PR_SPEC_NOT_AFFECTED;
++	}
++}
++
+ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+ {
+ 	switch (which) {
+ 	case PR_SPEC_STORE_BYPASS:
+ 		return ssb_prctl_get(task);
++	case PR_SPEC_INDIRECT_BRANCH:
++		return ib_prctl_get(task);
+ 	default:
+ 		return -ENODEV;
+ 	}
+@@ -780,7 +1043,7 @@ early_param("l1tf", l1tf_cmdline);
+ #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
+ 
+ #if IS_ENABLED(CONFIG_KVM_INTEL)
+-static const char *l1tf_vmx_states[] = {
++static const char * const l1tf_vmx_states[] = {
+ 	[VMENTER_L1D_FLUSH_AUTO]		= "auto",
+ 	[VMENTER_L1D_FLUSH_NEVER]		= "vulnerable",
+ 	[VMENTER_L1D_FLUSH_COND]		= "conditional cache flushes",
+@@ -796,13 +1059,14 @@ static ssize_t l1tf_show_state(char *buf)
+ 
+ 	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
+ 	    (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
+-	     cpu_smt_control == CPU_SMT_ENABLED))
++	     sched_smt_active())) {
+ 		return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
+ 			       l1tf_vmx_states[l1tf_vmx_mitigation]);
++	}
+ 
+ 	return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
+ 		       l1tf_vmx_states[l1tf_vmx_mitigation],
+-		       cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled");
++		       sched_smt_active() ? "vulnerable" : "disabled");
+ }
+ #else
+ static ssize_t l1tf_show_state(char *buf)
+@@ -811,6 +1075,36 @@ static ssize_t l1tf_show_state(char *buf)
+ }
+ #endif
+ 
++static char *stibp_state(void)
++{
++	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
++		return "";
++
++	switch (spectre_v2_user) {
++	case SPECTRE_V2_USER_NONE:
++		return ", STIBP: disabled";
++	case SPECTRE_V2_USER_STRICT:
++		return ", STIBP: forced";
++	case SPECTRE_V2_USER_PRCTL:
++	case SPECTRE_V2_USER_SECCOMP:
++		if (static_key_enabled(&switch_to_cond_stibp))
++			return ", STIBP: conditional";
++	}
++	return "";
++}
++
++static char *ibpb_state(void)
++{
++	if (boot_cpu_has(X86_FEATURE_IBPB)) {
++		if (static_key_enabled(&switch_mm_always_ibpb))
++			return ", IBPB: always-on";
++		if (static_key_enabled(&switch_mm_cond_ibpb))
++			return ", IBPB: conditional";
++		return ", IBPB: disabled";
++	}
++	return "";
++}
++
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+ 			       char *buf, unsigned int bug)
+ {
+@@ -828,9 +1122,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
+ 		return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ 
+ 	case X86_BUG_SPECTRE_V2:
+-		return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+-			       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
++		return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
++			       ibpb_state(),
+ 			       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
++			       stibp_state(),
++			       boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+ 			       spectre_v2_module_string());
+ 
+ 	case X86_BUG_SPEC_STORE_BYPASS:
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 96643e2c75b8..51e49f6fe8e1 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -760,6 +760,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
+ 		set_cpu_cap(c, X86_FEATURE_STIBP);
+ 		set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+ 	}
++
++	if (cpu_has(c, X86_FEATURE_AMD_SSBD)) {
++		set_cpu_cap(c, X86_FEATURE_SSBD);
++		set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
++		clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD);
++	}
+ }
+ 
+ void get_cpu_cap(struct cpuinfo_x86 *c)
+@@ -958,7 +964,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+ 		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ 
+ 	if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
+-	   !(ia32_cap & ARCH_CAP_SSB_NO))
++	   !(ia32_cap & ARCH_CAP_SSB_NO) &&
++	   !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
+ 		setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
+ 
+ 	if (x86_match_cpu(cpu_no_speculation))
+diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+index dbcb01006749..beec0daecbc5 100644
+--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+@@ -56,7 +56,7 @@
+ /* Threshold LVT offset is at MSR0xC0000410[15:12] */
+ #define SMCA_THR_LVT_OFF	0xF000
+ 
+-static bool thresholding_en;
++static bool thresholding_irq_en;
+ 
+ static const char * const th_names[] = {
+ 	"load_store",
+@@ -533,9 +533,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
+ 
+ set_offset:
+ 	offset = setup_APIC_mce_threshold(offset, new);
+-
+-	if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
+-		mce_threshold_vector = amd_threshold_interrupt;
++	if (offset == new)
++		thresholding_irq_en = true;
+ 
+ done:
+ 	mce_threshold_block_init(&b, offset);
+@@ -1356,9 +1355,6 @@ int mce_threshold_remove_device(unsigned int cpu)
+ {
+ 	unsigned int bank;
+ 
+-	if (!thresholding_en)
+-		return 0;
+-
+ 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ 		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
+ 			continue;
+@@ -1376,9 +1372,6 @@ int mce_threshold_create_device(unsigned int cpu)
+ 	struct threshold_bank **bp;
+ 	int err = 0;
+ 
+-	if (!thresholding_en)
+-		return 0;
+-
+ 	bp = per_cpu(threshold_banks, cpu);
+ 	if (bp)
+ 		return 0;
+@@ -1407,9 +1400,6 @@ static __init int threshold_init_device(void)
+ {
+ 	unsigned lcpu = 0;
+ 
+-	if (mce_threshold_vector == amd_threshold_interrupt)
+-		thresholding_en = true;
+-
+ 	/* to hit CPUs online before the notifier is up */
+ 	for_each_online_cpu(lcpu) {
+ 		int err = mce_threshold_create_device(lcpu);
+@@ -1418,6 +1408,9 @@ static __init int threshold_init_device(void)
+ 			return err;
+ 	}
+ 
++	if (thresholding_irq_en)
++		mce_threshold_vector = amd_threshold_interrupt;
++
+ 	return 0;
+ }
+ /*
+diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
+index 61a949d84dfa..d99a8ee9e185 100644
+--- a/arch/x86/kernel/fpu/signal.c
++++ b/arch/x86/kernel/fpu/signal.c
+@@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
+ 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
+ 		}
+ 
++		local_bh_disable();
+ 		fpu->initialized = 1;
+-		preempt_disable();
+ 		fpu__restore(fpu);
+-		preempt_enable();
++		local_bh_enable();
+ 
+ 		return err;
+ 	} else {
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index 988a98f34c66..a98d1cdd6299 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -41,6 +41,8 @@
+ #include <asm/prctl.h>
+ #include <asm/spec-ctrl.h>
+ 
++#include "process.h"
++
+ /*
+  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+  * no more per-task TSS's. The TSS size is kept cacheline-aligned
+@@ -255,11 +257,12 @@ void arch_setup_new_exec(void)
+ 		enable_cpuid();
+ }
+ 
+-static inline void switch_to_bitmap(struct tss_struct *tss,
+-				    struct thread_struct *prev,
++static inline void switch_to_bitmap(struct thread_struct *prev,
+ 				    struct thread_struct *next,
+ 				    unsigned long tifp, unsigned long tifn)
+ {
++	struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
++
+ 	if (tifn & _TIF_IO_BITMAP) {
+ 		/*
+ 		 * Copy the relevant range of the IO bitmap.
+@@ -398,32 +401,85 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
+ 	wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
+ }
+ 
+-static __always_inline void intel_set_ssb_state(unsigned long tifn)
++/*
++ * Update the MSRs managing speculation control, during context switch.
++ *
++ * tifp: Previous task's thread flags
++ * tifn: Next task's thread flags
++ */
++static __always_inline void __speculation_ctrl_update(unsigned long tifp,
++						      unsigned long tifn)
+ {
+-	u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
++	unsigned long tif_diff = tifp ^ tifn;
++	u64 msr = x86_spec_ctrl_base;
++	bool updmsr = false;
++
++	/*
++	 * If TIF_SSBD is different, select the proper mitigation
++	 * method. Note that if SSBD mitigation is disabled or permanentely
++	 * enabled this branch can't be taken because nothing can set
++	 * TIF_SSBD.
++	 */
++	if (tif_diff & _TIF_SSBD) {
++		if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
++			amd_set_ssb_virt_state(tifn);
++		} else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
++			amd_set_core_ssb_state(tifn);
++		} else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
++			   static_cpu_has(X86_FEATURE_AMD_SSBD)) {
++			msr |= ssbd_tif_to_spec_ctrl(tifn);
++			updmsr  = true;
++		}
++	}
++
++	/*
++	 * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
++	 * otherwise avoid the MSR write.
++	 */
++	if (IS_ENABLED(CONFIG_SMP) &&
++	    static_branch_unlikely(&switch_to_cond_stibp)) {
++		updmsr |= !!(tif_diff & _TIF_SPEC_IB);
++		msr |= stibp_tif_to_spec_ctrl(tifn);
++	}
+ 
+-	wrmsrl(MSR_IA32_SPEC_CTRL, msr);
++	if (updmsr)
++		wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+ }
+ 
+-static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
++static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
+ {
+-	if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
+-		amd_set_ssb_virt_state(tifn);
+-	else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+-		amd_set_core_ssb_state(tifn);
+-	else
+-		intel_set_ssb_state(tifn);
++	if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) {
++		if (task_spec_ssb_disable(tsk))
++			set_tsk_thread_flag(tsk, TIF_SSBD);
++		else
++			clear_tsk_thread_flag(tsk, TIF_SSBD);
++
++		if (task_spec_ib_disable(tsk))
++			set_tsk_thread_flag(tsk, TIF_SPEC_IB);
++		else
++			clear_tsk_thread_flag(tsk, TIF_SPEC_IB);
++	}
++	/* Return the updated threadinfo flags*/
++	return task_thread_info(tsk)->flags;
+ }
+ 
+-void speculative_store_bypass_update(unsigned long tif)
++void speculation_ctrl_update(unsigned long tif)
+ {
++	/* Forced update. Make sure all relevant TIF flags are different */
+ 	preempt_disable();
+-	__speculative_store_bypass_update(tif);
++	__speculation_ctrl_update(~tif, tif);
+ 	preempt_enable();
+ }
+ 
+-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+-		      struct tss_struct *tss)
++/* Called from seccomp/prctl update */
++void speculation_ctrl_update_current(void)
++{
++	preempt_disable();
++	speculation_ctrl_update(speculation_ctrl_update_tif(current));
++	preempt_enable();
++}
++
++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
+ {
+ 	struct thread_struct *prev, *next;
+ 	unsigned long tifp, tifn;
+@@ -433,7 +489,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ 
+ 	tifn = READ_ONCE(task_thread_info(next_p)->flags);
+ 	tifp = READ_ONCE(task_thread_info(prev_p)->flags);
+-	switch_to_bitmap(tss, prev, next, tifp, tifn);
++	switch_to_bitmap(prev, next, tifp, tifn);
+ 
+ 	propagate_user_return_notify(prev_p, next_p);
+ 
+@@ -454,8 +510,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ 	if ((tifp ^ tifn) & _TIF_NOCPUID)
+ 		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
+ 
+-	if ((tifp ^ tifn) & _TIF_SSBD)
+-		__speculative_store_bypass_update(tifn);
++	if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
++		__speculation_ctrl_update(tifp, tifn);
++	} else {
++		speculation_ctrl_update_tif(prev_p);
++		tifn = speculation_ctrl_update_tif(next_p);
++
++		/* Enforce MSR update to ensure consistent state */
++		__speculation_ctrl_update(~tifn, tifn);
++	}
+ }
+ 
+ /*
+diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
+new file mode 100644
+index 000000000000..898e97cf6629
+--- /dev/null
++++ b/arch/x86/kernel/process.h
+@@ -0,0 +1,39 @@
++// SPDX-License-Identifier: GPL-2.0
++//
++// Code shared between 32 and 64 bit
++
++#include <asm/spec-ctrl.h>
++
++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
++
++/*
++ * This needs to be inline to optimize for the common case where no extra
++ * work needs to be done.
++ */
++static inline void switch_to_extra(struct task_struct *prev,
++				   struct task_struct *next)
++{
++	unsigned long next_tif = task_thread_info(next)->flags;
++	unsigned long prev_tif = task_thread_info(prev)->flags;
++
++	if (IS_ENABLED(CONFIG_SMP)) {
++		/*
++		 * Avoid __switch_to_xtra() invocation when conditional
++		 * STIPB is disabled and the only different bit is
++		 * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not
++		 * in the TIF_WORK_CTXSW masks.
++		 */
++		if (!static_branch_likely(&switch_to_cond_stibp)) {
++			prev_tif &= ~_TIF_SPEC_IB;
++			next_tif &= ~_TIF_SPEC_IB;
++		}
++	}
++
++	/*
++	 * __switch_to_xtra() handles debug registers, i/o bitmaps,
++	 * speculation mitigations etc.
++	 */
++	if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT ||
++		     prev_tif & _TIF_WORK_CTXSW_PREV))
++		__switch_to_xtra(prev, next);
++}
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index 5224c6099184..c2df91eab573 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -59,6 +59,8 @@
+ #include <asm/intel_rdt_sched.h>
+ #include <asm/proto.h>
+ 
++#include "process.h"
++
+ void __show_regs(struct pt_regs *regs, int all)
+ {
+ 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+@@ -234,7 +236,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ 	struct fpu *prev_fpu = &prev->fpu;
+ 	struct fpu *next_fpu = &next->fpu;
+ 	int cpu = smp_processor_id();
+-	struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
+ 
+ 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
+ 
+@@ -266,12 +267,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ 	if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
+ 		set_iopl_mask(next->iopl);
+ 
+-	/*
+-	 * Now maybe handle debug registers and/or IO bitmaps
+-	 */
+-	if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
+-		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
+-		__switch_to_xtra(prev_p, next_p, tss);
++	switch_to_extra(prev_p, next_p);
+ 
+ 	/*
+ 	 * Leave lazy mode, flushing any hypercalls made here.
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index cbeecfcc66d6..ec63d6be5e02 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -59,6 +59,8 @@
+ #include <asm/unistd_32_ia32.h>
+ #endif
+ 
++#include "process.h"
++
+ __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
+ 
+ /* Prints also some state that isn't saved in the pt_regs */
+@@ -400,7 +402,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ 	struct fpu *prev_fpu = &prev->fpu;
+ 	struct fpu *next_fpu = &next->fpu;
+ 	int cpu = smp_processor_id();
+-	struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
+ 
+ 	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
+ 		     this_cpu_read(irq_count) != -1);
+@@ -467,12 +468,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ 	/* Reload sp0. */
+ 	update_sp0(next_p);
+ 
+-	/*
+-	 * Now maybe reload the debug registers and handle I/O bitmaps
+-	 */
+-	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
+-		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
+-		__switch_to_xtra(prev_p, next_p, tss);
++	__switch_to_xtra(prev_p, next_p);
+ 
+ #ifdef CONFIG_XEN_PV
+ 	/*
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index d1f5c744142b..bbcd69c76d96 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -367,7 +367,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ 
+ 	/* cpuid 0x80000008.ebx */
+ 	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
+-		F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD);
++		F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
++		F(AMD_SSB_NO);
+ 
+ 	/* cpuid 0xC0000001.edx */
+ 	const u32 kvm_cpuid_C000_0001_edx_x86_features =
+@@ -649,7 +650,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ 			entry->ebx |= F(VIRT_SSBD);
+ 		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
+ 		cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+-		if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
++		/*
++		 * The preference is to use SPEC CTRL MSR instead of the
++		 * VIRT_SPEC MSR.
++		 */
++		if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
++		    !boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ 			entry->ebx |= F(VIRT_SSBD);
+ 		break;
+ 	}
+diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
+index d755e0d44ac1..364d9895dd56 100644
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -4734,9 +4734,9 @@ static bool need_remote_flush(u64 old, u64 new)
+ }
+ 
+ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
+-				    const u8 *new, int *bytes)
++				    int *bytes)
+ {
+-	u64 gentry;
++	u64 gentry = 0;
+ 	int r;
+ 
+ 	/*
+@@ -4748,22 +4748,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
+ 		/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
+ 		*gpa &= ~(gpa_t)7;
+ 		*bytes = 8;
+-		r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
+-		if (r)
+-			gentry = 0;
+-		new = (const u8 *)&gentry;
+ 	}
+ 
+-	switch (*bytes) {
+-	case 4:
+-		gentry = *(const u32 *)new;
+-		break;
+-	case 8:
+-		gentry = *(const u64 *)new;
+-		break;
+-	default:
+-		gentry = 0;
+-		break;
++	if (*bytes == 4 || *bytes == 8) {
++		r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
++		if (r)
++			gentry = 0;
+ 	}
+ 
+ 	return gentry;
+@@ -4876,8 +4866,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+ 
+ 	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
+ 
+-	gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
+-
+ 	/*
+ 	 * No need to care whether allocation memory is successful
+ 	 * or not since pte prefetch is skiped if it does not have
+@@ -4886,6 +4874,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+ 	mmu_topup_memory_caches(vcpu);
+ 
+ 	spin_lock(&vcpu->kvm->mmu_lock);
++
++	gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
++
+ 	++vcpu->kvm->stat.mmu_pte_write;
+ 	kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
+ 
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index f6bebcec60b4..17f08db34547 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1733,21 +1733,31 @@ out:
+ 	return ERR_PTR(err);
+ }
+ 
++static void svm_clear_current_vmcb(struct vmcb *vmcb)
++{
++	int i;
++
++	for_each_online_cpu(i)
++		cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
++}
++
+ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
+ {
+ 	struct vcpu_svm *svm = to_svm(vcpu);
+ 
++	/*
++	 * The vmcb page can be recycled, causing a false negative in
++	 * svm_vcpu_load(). So, ensure that no logical CPU has this
++	 * vmcb page recorded as its current vmcb.
++	 */
++	svm_clear_current_vmcb(svm->vmcb);
++
+ 	__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
+ 	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
+ 	__free_page(virt_to_page(svm->nested.hsave));
+ 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
+ 	kvm_vcpu_uninit(vcpu);
+ 	kmem_cache_free(kvm_vcpu_cache, svm);
+-	/*
+-	 * The vmcb page can be recycled, causing a false negative in
+-	 * svm_vcpu_load(). So do a full IBPB now.
+-	 */
+-	indirect_branch_prediction_barrier();
+ }
+ 
+ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+@@ -3644,7 +3654,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ 		break;
+ 	case MSR_IA32_SPEC_CTRL:
+ 		if (!msr_info->host_initiated &&
+-		    !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
++		    !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+ 			return 1;
+ 
+ 		msr_info->data = svm->spec_ctrl;
+@@ -3749,11 +3760,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+ 		break;
+ 	case MSR_IA32_SPEC_CTRL:
+ 		if (!msr->host_initiated &&
+-		    !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
++		    !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+ 			return 1;
+ 
+ 		/* The STIBP bit doesn't fault even if it's not advertised */
+-		if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
++		if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
+ 			return 1;
+ 
+ 		svm->spec_ctrl = data;
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 8d688b213504..f24329659bea 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -6378,6 +6378,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
+ 	clock_pairing.nsec = ts.tv_nsec;
+ 	clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
+ 	clock_pairing.flags = 0;
++	memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
+ 
+ 	ret = 0;
+ 	if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
+@@ -6884,7 +6885,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
+ 	else {
+ 		if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
+ 			kvm_x86_ops->sync_pir_to_irr(vcpu);
+-		kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
++		if (ioapic_in_kernel(vcpu->kvm))
++			kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
+ 	}
+ 	bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
+ 		  vcpu_to_synic(vcpu)->vec_bitmap, 256);
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 83a3f4c935fc..5400a24e1a8c 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -29,6 +29,12 @@
+  *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+  */
+ 
++/*
++ * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
++ * stored in cpu_tlb_state.last_user_mm_ibpb.
++ */
++#define LAST_USER_MM_IBPB	0x1UL
++
+ /*
+  * We get here when we do something requiring a TLB invalidation
+  * but could not go invalidate all of the contexts.  We do the
+@@ -180,6 +186,89 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
+ 	}
+ }
+ 
++static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
++{
++	unsigned long next_tif = task_thread_info(next)->flags;
++	unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
++
++	return (unsigned long)next->mm | ibpb;
++}
++
++static void cond_ibpb(struct task_struct *next)
++{
++	if (!next || !next->mm)
++		return;
++
++	/*
++	 * Both, the conditional and the always IBPB mode use the mm
++	 * pointer to avoid the IBPB when switching between tasks of the
++	 * same process. Using the mm pointer instead of mm->context.ctx_id
++	 * opens a hypothetical hole vs. mm_struct reuse, which is more or
++	 * less impossible to control by an attacker. Aside of that it
++	 * would only affect the first schedule so the theoretically
++	 * exposed data is not really interesting.
++	 */
++	if (static_branch_likely(&switch_mm_cond_ibpb)) {
++		unsigned long prev_mm, next_mm;
++
++		/*
++		 * This is a bit more complex than the always mode because
++		 * it has to handle two cases:
++		 *
++		 * 1) Switch from a user space task (potential attacker)
++		 *    which has TIF_SPEC_IB set to a user space task
++		 *    (potential victim) which has TIF_SPEC_IB not set.
++		 *
++		 * 2) Switch from a user space task (potential attacker)
++		 *    which has TIF_SPEC_IB not set to a user space task
++		 *    (potential victim) which has TIF_SPEC_IB set.
++		 *
++		 * This could be done by unconditionally issuing IBPB when
++		 * a task which has TIF_SPEC_IB set is either scheduled in
++		 * or out. Though that results in two flushes when:
++		 *
++		 * - the same user space task is scheduled out and later
++		 *   scheduled in again and only a kernel thread ran in
++		 *   between.
++		 *
++		 * - a user space task belonging to the same process is
++		 *   scheduled in after a kernel thread ran in between
++		 *
++		 * - a user space task belonging to the same process is
++		 *   scheduled in immediately.
++		 *
++		 * Optimize this with reasonably small overhead for the
++		 * above cases. Mangle the TIF_SPEC_IB bit into the mm
++		 * pointer of the incoming task which is stored in
++		 * cpu_tlbstate.last_user_mm_ibpb for comparison.
++		 */
++		next_mm = mm_mangle_tif_spec_ib(next);
++		prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
++
++		/*
++		 * Issue IBPB only if the mm's are different and one or
++		 * both have the IBPB bit set.
++		 */
++		if (next_mm != prev_mm &&
++		    (next_mm | prev_mm) & LAST_USER_MM_IBPB)
++			indirect_branch_prediction_barrier();
++
++		this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
++	}
++
++	if (static_branch_unlikely(&switch_mm_always_ibpb)) {
++		/*
++		 * Only flush when switching to a user space task with a
++		 * different context than the user space task which ran
++		 * last on this CPU.
++		 */
++		if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
++			indirect_branch_prediction_barrier();
++			this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
++		}
++	}
++}
++
+ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ 			struct task_struct *tsk)
+ {
+@@ -248,27 +337,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ 	} else {
+ 		u16 new_asid;
+ 		bool need_flush;
+-		u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
+ 
+ 		/*
+ 		 * Avoid user/user BTB poisoning by flushing the branch
+ 		 * predictor when switching between processes. This stops
+ 		 * one process from doing Spectre-v2 attacks on another.
+-		 *
+-		 * As an optimization, flush indirect branches only when
+-		 * switching into processes that disable dumping. This
+-		 * protects high value processes like gpg, without having
+-		 * too high performance overhead. IBPB is *expensive*!
+-		 *
+-		 * This will not flush branches when switching into kernel
+-		 * threads. It will also not flush if we switch to idle
+-		 * thread and back to the same process. It will flush if we
+-		 * switch to a different non-dumpable process.
+ 		 */
+-		if (tsk && tsk->mm &&
+-		    tsk->mm->context.ctx_id != last_ctx_id &&
+-		    get_dumpable(tsk->mm) != SUID_DUMP_USER)
+-			indirect_branch_prediction_barrier();
++		cond_ibpb(tsk);
+ 
+ 		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ 			/*
+@@ -318,14 +393,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ 			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
+ 		}
+ 
+-		/*
+-		 * Record last user mm's context id, so we can avoid
+-		 * flushing branch buffer with IBPB if we switch back
+-		 * to the same user.
+-		 */
+-		if (next != &init_mm)
+-			this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
+-
+ 		/* Make sure we write CR3 before loaded_mm. */
+ 		barrier();
+ 
+@@ -406,7 +473,7 @@ void initialize_tlbstate_and_flush(void)
+ 	write_cr3(build_cr3(mm->pgd, 0));
+ 
+ 	/* Reinitialize tlbstate. */
+-	this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
++	this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
+ 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
+ 	this_cpu_write(cpu_tlbstate.next_asid, 1);
+ 	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
+diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
+index bcb5beb81177..7df02fc934a9 100644
+--- a/arch/xtensa/kernel/asm-offsets.c
++++ b/arch/xtensa/kernel/asm-offsets.c
+@@ -91,14 +91,14 @@ int main(void)
+ 	DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
+ 	DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
+ #if XTENSA_HAVE_COPROCESSORS
+-	DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp));
+-	DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp));
+-	DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp));
+-	DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp));
+-	DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp));
+-	DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp));
+-	DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp));
+-	DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp));
++	DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
++	DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
++	DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2));
++	DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3));
++	DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4));
++	DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5));
++	DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6));
++	DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7));
+ #endif
+ 	DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user));
+ 	DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t));
+diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
+index ff4f0ecb03dd..f1c46bc5d465 100644
+--- a/arch/xtensa/kernel/process.c
++++ b/arch/xtensa/kernel/process.c
+@@ -88,18 +88,21 @@ void coprocessor_release_all(struct thread_info *ti)
+ 
+ void coprocessor_flush_all(struct thread_info *ti)
+ {
+-	unsigned long cpenable;
++	unsigned long cpenable, old_cpenable;
+ 	int i;
+ 
+ 	preempt_disable();
+ 
++	RSR_CPENABLE(old_cpenable);
+ 	cpenable = ti->cpenable;
++	WSR_CPENABLE(cpenable);
+ 
+ 	for (i = 0; i < XCHAL_CP_MAX; i++) {
+ 		if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
+ 			coprocessor_flush(ti, i);
+ 		cpenable >>= 1;
+ 	}
++	WSR_CPENABLE(old_cpenable);
+ 
+ 	preempt_enable();
+ }
+diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
+index e2461968efb2..7c3ed7d78075 100644
+--- a/arch/xtensa/kernel/ptrace.c
++++ b/arch/xtensa/kernel/ptrace.c
+@@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs)
+ }
+ 
+ 
++#if XTENSA_HAVE_COPROCESSORS
++#define CP_OFFSETS(cp) \
++	{ \
++		.elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \
++		.ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \
++		.sz = sizeof(xtregs_ ## cp ## _t), \
++	}
++
++static const struct {
++	size_t elf_xtregs_offset;
++	size_t ti_offset;
++	size_t sz;
++} cp_offsets[] = {
++	CP_OFFSETS(cp0),
++	CP_OFFSETS(cp1),
++	CP_OFFSETS(cp2),
++	CP_OFFSETS(cp3),
++	CP_OFFSETS(cp4),
++	CP_OFFSETS(cp5),
++	CP_OFFSETS(cp6),
++	CP_OFFSETS(cp7),
++};
++#endif
++
+ static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
+ {
+ 	struct pt_regs *regs = task_pt_regs(child);
+ 	struct thread_info *ti = task_thread_info(child);
+ 	elf_xtregs_t __user *xtregs = uregs;
+ 	int ret = 0;
++	int i __maybe_unused;
+ 
+ 	if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t)))
+ 		return -EIO;
+@@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
+ #if XTENSA_HAVE_COPROCESSORS
+ 	/* Flush all coprocessor registers to memory. */
+ 	coprocessor_flush_all(ti);
+-	ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp,
+-			      sizeof(xtregs_coprocessor_t));
++
++	for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
++		ret |= __copy_to_user((char __user *)xtregs +
++				      cp_offsets[i].elf_xtregs_offset,
++				      (const char *)ti +
++				      cp_offsets[i].ti_offset,
++				      cp_offsets[i].sz);
+ #endif
+ 	ret |= __copy_to_user(&xtregs->opt, &regs->xtregs_opt,
+ 			      sizeof(xtregs->opt));
+@@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
+ 	struct pt_regs *regs = task_pt_regs(child);
+ 	elf_xtregs_t *xtregs = uregs;
+ 	int ret = 0;
++	int i __maybe_unused;
+ 
+ 	if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t)))
+ 		return -EFAULT;
+@@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
+ 	coprocessor_flush_all(ti);
+ 	coprocessor_release_all(ti);
+ 
+-	ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0,
+-				sizeof(xtregs_coprocessor_t));
++	for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
++		ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset,
++					(const char __user *)xtregs +
++					cp_offsets[i].elf_xtregs_offset,
++					cp_offsets[i].sz);
+ #endif
+ 	ret |= __copy_from_user(&regs->xtregs_opt, &xtregs->opt,
+ 				sizeof(xtregs->opt));
+diff --git a/drivers/android/binder.c b/drivers/android/binder.c
+index a86c27948fca..96a0f940e54d 100644
+--- a/drivers/android/binder.c
++++ b/drivers/android/binder.c
+@@ -2918,7 +2918,6 @@ static void binder_transaction(struct binder_proc *proc,
+ 		t->buffer = NULL;
+ 		goto err_binder_alloc_buf_failed;
+ 	}
+-	t->buffer->allow_user_free = 0;
+ 	t->buffer->debug_id = t->debug_id;
+ 	t->buffer->transaction = t;
+ 	t->buffer->target_node = target_node;
+@@ -3407,14 +3406,18 @@ static int binder_thread_write(struct binder_proc *proc,
+ 
+ 			buffer = binder_alloc_prepare_to_free(&proc->alloc,
+ 							      data_ptr);
+-			if (buffer == NULL) {
+-				binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n",
+-					proc->pid, thread->pid, (u64)data_ptr);
+-				break;
+-			}
+-			if (!buffer->allow_user_free) {
+-				binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n",
+-					proc->pid, thread->pid, (u64)data_ptr);
++			if (IS_ERR_OR_NULL(buffer)) {
++				if (PTR_ERR(buffer) == -EPERM) {
++					binder_user_error(
++						"%d:%d BC_FREE_BUFFER u%016llx matched unreturned or currently freeing buffer\n",
++						proc->pid, thread->pid,
++						(u64)data_ptr);
++				} else {
++					binder_user_error(
++						"%d:%d BC_FREE_BUFFER u%016llx no match\n",
++						proc->pid, thread->pid,
++						(u64)data_ptr);
++				}
+ 				break;
+ 			}
+ 			binder_debug(BINDER_DEBUG_FREE_BUFFER,
+diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
+index 58e4658f9dd6..b9281f2725a6 100644
+--- a/drivers/android/binder_alloc.c
++++ b/drivers/android/binder_alloc.c
+@@ -149,14 +149,12 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked(
+ 		else {
+ 			/*
+ 			 * Guard against user threads attempting to
+-			 * free the buffer twice
++			 * free the buffer when in use by kernel or
++			 * after it's already been freed.
+ 			 */
+-			if (buffer->free_in_progress) {
+-				pr_err("%d:%d FREE_BUFFER u%016llx user freed buffer twice\n",
+-				       alloc->pid, current->pid, (u64)user_ptr);
+-				return NULL;
+-			}
+-			buffer->free_in_progress = 1;
++			if (!buffer->allow_user_free)
++				return ERR_PTR(-EPERM);
++			buffer->allow_user_free = 0;
+ 			return buffer;
+ 		}
+ 	}
+@@ -486,7 +484,7 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc,
+ 
+ 	rb_erase(best_fit, &alloc->free_buffers);
+ 	buffer->free = 0;
+-	buffer->free_in_progress = 0;
++	buffer->allow_user_free = 0;
+ 	binder_insert_allocated_buffer_locked(alloc, buffer);
+ 	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+ 		     "%d: binder_alloc_buf size %zd got %pK\n",
+diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
+index 2dd33b6df104..a3ad7683b6f2 100644
+--- a/drivers/android/binder_alloc.h
++++ b/drivers/android/binder_alloc.h
+@@ -50,8 +50,7 @@ struct binder_buffer {
+ 	unsigned free:1;
+ 	unsigned allow_user_free:1;
+ 	unsigned async_transaction:1;
+-	unsigned free_in_progress:1;
+-	unsigned debug_id:28;
++	unsigned debug_id:29;
+ 
+ 	struct binder_transaction *transaction;
+ 
+diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
+index a861b5b4d443..21ed0e20c5d9 100644
+--- a/drivers/dma/at_hdmac.c
++++ b/drivers/dma/at_hdmac.c
+@@ -1641,6 +1641,12 @@ static void atc_free_chan_resources(struct dma_chan *chan)
+ 	atchan->descs_allocated = 0;
+ 	atchan->status = 0;
+ 
++	/*
++	 * Free atslave allocated in at_dma_xlate()
++	 */
++	kfree(chan->private);
++	chan->private = NULL;
++
+ 	dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
+ }
+ 
+@@ -1675,7 +1681,7 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
+ 	dma_cap_zero(mask);
+ 	dma_cap_set(DMA_SLAVE, mask);
+ 
+-	atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL);
++	atslave = kzalloc(sizeof(*atslave), GFP_KERNEL);
+ 	if (!atslave)
+ 		return NULL;
+ 
+@@ -2000,6 +2006,8 @@ static int at_dma_remove(struct platform_device *pdev)
+ 	struct resource		*io;
+ 
+ 	at_dma_off(atdma);
++	if (pdev->dev.of_node)
++		of_dma_controller_free(pdev->dev.of_node);
+ 	dma_async_device_unregister(&atdma->dma_common);
+ 
+ 	dma_pool_destroy(atdma->memset_pool);
+diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
+index d96b09fea835..e05de5032f0c 100644
+--- a/drivers/hv/channel.c
++++ b/drivers/hv/channel.c
+@@ -454,6 +454,14 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
+ 	}
+ 	wait_for_completion(&msginfo->waitevent);
+ 
++	if (msginfo->response.gpadl_created.creation_status != 0) {
++		pr_err("Failed to establish GPADL: err = 0x%x\n",
++		       msginfo->response.gpadl_created.creation_status);
++
++		ret = -EDQUOT;
++		goto cleanup;
++	}
++
+ 	if (channel->rescind) {
+ 		ret = -ENODEV;
+ 		goto cleanup;
+diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c
+index 0a9e8fadfa9d..37ab30566464 100644
+--- a/drivers/iio/magnetometer/st_magn_buffer.c
++++ b/drivers/iio/magnetometer/st_magn_buffer.c
+@@ -30,11 +30,6 @@ int st_magn_trig_set_state(struct iio_trigger *trig, bool state)
+ 	return st_sensors_set_dataready_irq(indio_dev, state);
+ }
+ 
+-static int st_magn_buffer_preenable(struct iio_dev *indio_dev)
+-{
+-	return st_sensors_set_enable(indio_dev, true);
+-}
+-
+ static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
+ {
+ 	int err;
+@@ -50,7 +45,7 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
+ 	if (err < 0)
+ 		goto st_magn_buffer_postenable_error;
+ 
+-	return err;
++	return st_sensors_set_enable(indio_dev, true);
+ 
+ st_magn_buffer_postenable_error:
+ 	kfree(mdata->buffer_data);
+@@ -63,11 +58,11 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev)
+ 	int err;
+ 	struct st_sensor_data *mdata = iio_priv(indio_dev);
+ 
+-	err = iio_triggered_buffer_predisable(indio_dev);
++	err = st_sensors_set_enable(indio_dev, false);
+ 	if (err < 0)
+ 		goto st_magn_buffer_predisable_error;
+ 
+-	err = st_sensors_set_enable(indio_dev, false);
++	err = iio_triggered_buffer_predisable(indio_dev);
+ 
+ st_magn_buffer_predisable_error:
+ 	kfree(mdata->buffer_data);
+@@ -75,7 +70,6 @@ st_magn_buffer_predisable_error:
+ }
+ 
+ static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = {
+-	.preenable = &st_magn_buffer_preenable,
+ 	.postenable = &st_magn_buffer_postenable,
+ 	.predisable = &st_magn_buffer_predisable,
+ };
+diff --git a/drivers/media/usb/em28xx/em28xx-dvb.c b/drivers/media/usb/em28xx/em28xx-dvb.c
+index 4a7db623fe29..29cdaaf1ed90 100644
+--- a/drivers/media/usb/em28xx/em28xx-dvb.c
++++ b/drivers/media/usb/em28xx/em28xx-dvb.c
+@@ -2105,6 +2105,8 @@ static int em28xx_dvb_fini(struct em28xx *dev)
+ 		}
+ 	}
+ 
++	em28xx_unregister_dvb(dvb);
++
+ 	/* remove I2C SEC */
+ 	client = dvb->i2c_client_sec;
+ 	if (client) {
+@@ -2126,7 +2128,6 @@ static int em28xx_dvb_fini(struct em28xx *dev)
+ 		i2c_unregister_device(client);
+ 	}
+ 
+-	em28xx_unregister_dvb(dvb);
+ 	kfree(dvb);
+ 	dev->dvb = NULL;
+ 	kref_put(&dev->ref, em28xx_free_device);
+diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
+index 329727e00e97..95745dc4e0ec 100644
+--- a/drivers/misc/mic/scif/scif_rma.c
++++ b/drivers/misc/mic/scif/scif_rma.c
+@@ -417,7 +417,7 @@ static int scif_create_remote_lookup(struct scif_dev *remote_dev,
+ 		if (err)
+ 			goto error_window;
+ 		err = scif_map_page(&window->num_pages_lookup.lookup[j],
+-				    vmalloc_dma_phys ?
++				    vmalloc_num_pages ?
+ 				    vmalloc_to_page(&window->num_pages[i]) :
+ 				    virt_to_page(&window->num_pages[i]),
+ 				    remote_dev);
+diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
+index 94d7a865b135..7504f430c011 100644
+--- a/drivers/mtd/ubi/vtbl.c
++++ b/drivers/mtd/ubi/vtbl.c
+@@ -578,6 +578,16 @@ static int init_volumes(struct ubi_device *ubi,
+ 		vol->ubi = ubi;
+ 		reserved_pebs += vol->reserved_pebs;
+ 
++		/*
++		 * We use ubi->peb_count and not vol->reserved_pebs because
++		 * we want to keep the code simple. Otherwise we'd have to
++		 * resize/check the bitmap upon volume resize too.
++		 * Allocating a few bytes more does not hurt.
++		 */
++		err = ubi_fastmap_init_checkmap(vol, ubi->peb_count);
++		if (err)
++			return err;
++
+ 		/*
+ 		 * In case of dynamic volume UBI knows nothing about how many
+ 		 * data is stored there. So assume the whole volume is used.
+@@ -620,16 +630,6 @@ static int init_volumes(struct ubi_device *ubi,
+ 			(long long)(vol->used_ebs - 1) * vol->usable_leb_size;
+ 		vol->used_bytes += av->last_data_size;
+ 		vol->last_eb_bytes = av->last_data_size;
+-
+-		/*
+-		 * We use ubi->peb_count and not vol->reserved_pebs because
+-		 * we want to keep the code simple. Otherwise we'd have to
+-		 * resize/check the bitmap upon volume resize too.
+-		 * Allocating a few bytes more does not hurt.
+-		 */
+-		err = ubi_fastmap_init_checkmap(vol, ubi->peb_count);
+-		if (err)
+-			return err;
+ 	}
+ 
+ 	/* And add the layout volume */
+diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+index 2237ef8e4344..f13256af8031 100644
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+@@ -1691,6 +1691,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
+ 	bool if_up = netif_running(nic->netdev);
+ 	struct bpf_prog *old_prog;
+ 	bool bpf_attached = false;
++	int ret = 0;
+ 
+ 	/* For now just support only the usual MTU sized frames */
+ 	if (prog && (dev->mtu > 1500)) {
+@@ -1724,8 +1725,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
+ 	if (nic->xdp_prog) {
+ 		/* Attach BPF program */
+ 		nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
+-		if (!IS_ERR(nic->xdp_prog))
++		if (!IS_ERR(nic->xdp_prog)) {
+ 			bpf_attached = true;
++		} else {
++			ret = PTR_ERR(nic->xdp_prog);
++			nic->xdp_prog = NULL;
++		}
+ 	}
+ 
+ 	/* Calculate Tx queues needed for XDP and network stack */
+@@ -1737,7 +1742,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
+ 		netif_trans_update(nic->netdev);
+ 	}
+ 
+-	return 0;
++	return ret;
+ }
+ 
+ static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
+diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+index a3d12dbde95b..09494e1c77c5 100644
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+@@ -585,10 +585,12 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
+ 	if (!sq->dmem.base)
+ 		return;
+ 
+-	if (sq->tso_hdrs)
++	if (sq->tso_hdrs) {
+ 		dma_free_coherent(&nic->pdev->dev,
+ 				  sq->dmem.q_len * TSO_HEADER_SIZE,
+ 				  sq->tso_hdrs, sq->tso_hdrs_phys);
++		sq->tso_hdrs = NULL;
++	}
+ 
+ 	/* Free pending skbs in the queue */
+ 	smp_rmb();
+diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
+index e9f101c9bae2..bfbb39f93554 100644
+--- a/drivers/net/rionet.c
++++ b/drivers/net/rionet.c
+@@ -216,9 +216,9 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+ 			 * it just report sending a packet to the target
+ 			 * (without actual packet transfer).
+ 			 */
+-			dev_kfree_skb_any(skb);
+ 			ndev->stats.tx_packets++;
+ 			ndev->stats.tx_bytes += skb->len;
++			dev_kfree_skb_any(skb);
+ 		}
+ 	}
+ 
+diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
+index d49c7103085e..aabbcfb6e6da 100644
+--- a/drivers/net/usb/ipheth.c
++++ b/drivers/net/usb/ipheth.c
+@@ -140,7 +140,6 @@ struct ipheth_device {
+ 	struct usb_device *udev;
+ 	struct usb_interface *intf;
+ 	struct net_device *net;
+-	struct sk_buff *tx_skb;
+ 	struct urb *tx_urb;
+ 	struct urb *rx_urb;
+ 	unsigned char *tx_buf;
+@@ -229,6 +228,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb)
+ 	case -ENOENT:
+ 	case -ECONNRESET:
+ 	case -ESHUTDOWN:
++	case -EPROTO:
+ 		return;
+ 	case 0:
+ 		break;
+@@ -280,7 +280,6 @@ static void ipheth_sndbulk_callback(struct urb *urb)
+ 		dev_err(&dev->intf->dev, "%s: urb status: %d\n",
+ 		__func__, status);
+ 
+-	dev_kfree_skb_irq(dev->tx_skb);
+ 	netif_wake_queue(dev->net);
+ }
+ 
+@@ -410,7 +409,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
+ 	if (skb->len > IPHETH_BUF_SIZE) {
+ 		WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len);
+ 		dev->net->stats.tx_dropped++;
+-		dev_kfree_skb_irq(skb);
++		dev_kfree_skb_any(skb);
+ 		return NETDEV_TX_OK;
+ 	}
+ 
+@@ -430,12 +429,11 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
+ 		dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n",
+ 			__func__, retval);
+ 		dev->net->stats.tx_errors++;
+-		dev_kfree_skb_irq(skb);
++		dev_kfree_skb_any(skb);
+ 	} else {
+-		dev->tx_skb = skb;
+-
+ 		dev->net->stats.tx_packets++;
+ 		dev->net->stats.tx_bytes += skb->len;
++		dev_consume_skb_any(skb);
+ 		netif_stop_queue(net);
+ 	}
+ 
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index f528e9ac3413..0e8e3be50332 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -61,7 +61,8 @@ static const unsigned long guest_offloads[] = {
+ 	VIRTIO_NET_F_GUEST_TSO4,
+ 	VIRTIO_NET_F_GUEST_TSO6,
+ 	VIRTIO_NET_F_GUEST_ECN,
+-	VIRTIO_NET_F_GUEST_UFO
++	VIRTIO_NET_F_GUEST_UFO,
++	VIRTIO_NET_F_GUEST_CSUM
+ };
+ 
+ struct virtnet_stats {
+@@ -1939,9 +1940,6 @@ static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
+ 	if (!vi->guest_offloads)
+ 		return 0;
+ 
+-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
+-		offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
+-
+ 	return virtnet_set_guest_offloads(vi, offloads);
+ }
+ 
+@@ -1951,8 +1949,6 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
+ 
+ 	if (!vi->guest_offloads)
+ 		return 0;
+-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
+-		offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;
+ 
+ 	return virtnet_set_guest_offloads(vi, offloads);
+ }
+@@ -1970,8 +1966,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ 	    && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+ 	        virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
+ 	        virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
+-		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) {
+-		NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
++		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
++		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
++		NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
+ 		return -EOPNOTSUPP;
+ 	}
+ 
+diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
+index ffdd2fa401b1..d63d7c326801 100644
+--- a/drivers/net/wireless/ath/wil6210/wmi.c
++++ b/drivers/net/wireless/ath/wil6210/wmi.c
+@@ -1380,8 +1380,14 @@ int wmi_set_ie(struct wil6210_priv *wil, u8 type, u16 ie_len, const void *ie)
+ 	};
+ 	int rc;
+ 	u16 len = sizeof(struct wmi_set_appie_cmd) + ie_len;
+-	struct wmi_set_appie_cmd *cmd = kzalloc(len, GFP_KERNEL);
++	struct wmi_set_appie_cmd *cmd;
+ 
++	if (len < ie_len) {
++		rc = -EINVAL;
++		goto out;
++	}
++
++	cmd = kzalloc(len, GFP_KERNEL);
+ 	if (!cmd) {
+ 		rc = -ENOMEM;
+ 		goto out;
+diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c
+index f48c3f62966d..761cf8573a80 100644
+--- a/drivers/net/wireless/ti/wlcore/cmd.c
++++ b/drivers/net/wireless/ti/wlcore/cmd.c
+@@ -35,7 +35,6 @@
+ #include "wl12xx_80211.h"
+ #include "cmd.h"
+ #include "event.h"
+-#include "ps.h"
+ #include "tx.h"
+ #include "hw_ops.h"
+ 
+@@ -192,10 +191,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl,
+ 
+ 	timeout_time = jiffies + msecs_to_jiffies(WL1271_EVENT_TIMEOUT);
+ 
+-	ret = wl1271_ps_elp_wakeup(wl);
+-	if (ret < 0)
+-		return ret;
+-
+ 	do {
+ 		if (time_after(jiffies, timeout_time)) {
+ 			wl1271_debug(DEBUG_CMD, "timeout waiting for event %d",
+@@ -227,7 +222,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl,
+ 	} while (!event);
+ 
+ out:
+-	wl1271_ps_elp_sleep(wl);
+ 	kfree(events_vector);
+ 	return ret;
+ }
+diff --git a/drivers/pci/dwc/pci-layerscape.c b/drivers/pci/dwc/pci-layerscape.c
+index 87fa486bee2c..1ede4b60aac3 100644
+--- a/drivers/pci/dwc/pci-layerscape.c
++++ b/drivers/pci/dwc/pci-layerscape.c
+@@ -89,7 +89,7 @@ static void ls_pcie_disable_outbound_atus(struct ls_pcie *pcie)
+ 	int i;
+ 
+ 	for (i = 0; i < PCIE_IATU_NUM; i++)
+-		dw_pcie_disable_atu(pcie->pci, DW_PCIE_REGION_OUTBOUND, i);
++		dw_pcie_disable_atu(pcie->pci, i, DW_PCIE_REGION_OUTBOUND);
+ }
+ 
+ static int ls1021_pcie_link_up(struct dw_pcie *pci)
+diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
+index 169dd7127f9e..69ef5f4060ed 100644
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -4545,8 +4545,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
+ {
+ 	struct qeth_ipa_cmd *cmd;
+ 	struct qeth_arp_query_info *qinfo;
+-	struct qeth_snmp_cmd *snmp;
+ 	unsigned char *data;
++	void *snmp_data;
+ 	__u16 data_len;
+ 
+ 	QETH_CARD_TEXT(card, 3, "snpcmdcb");
+@@ -4554,7 +4554,6 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
+ 	cmd = (struct qeth_ipa_cmd *) sdata;
+ 	data = (unsigned char *)((char *)cmd - reply->offset);
+ 	qinfo = (struct qeth_arp_query_info *) reply->param;
+-	snmp = &cmd->data.setadapterparms.data.snmp;
+ 
+ 	if (cmd->hdr.return_code) {
+ 		QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code);
+@@ -4567,10 +4566,15 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
+ 		return 0;
+ 	}
+ 	data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data));
+-	if (cmd->data.setadapterparms.hdr.seq_no == 1)
+-		data_len -= (__u16)((char *)&snmp->data - (char *)cmd);
+-	else
+-		data_len -= (__u16)((char *)&snmp->request - (char *)cmd);
++	if (cmd->data.setadapterparms.hdr.seq_no == 1) {
++		snmp_data = &cmd->data.setadapterparms.data.snmp;
++		data_len -= offsetof(struct qeth_ipa_cmd,
++				     data.setadapterparms.data.snmp);
++	} else {
++		snmp_data = &cmd->data.setadapterparms.data.snmp.request;
++		data_len -= offsetof(struct qeth_ipa_cmd,
++				     data.setadapterparms.data.snmp.request);
++	}
+ 
+ 	/* check if there is enough room in userspace */
+ 	if ((qinfo->udata_len - qinfo->udata_offset) < data_len) {
+@@ -4583,16 +4587,9 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
+ 	QETH_CARD_TEXT_(card, 4, "sseqn%i",
+ 		cmd->data.setadapterparms.hdr.seq_no);
+ 	/*copy entries to user buffer*/
+-	if (cmd->data.setadapterparms.hdr.seq_no == 1) {
+-		memcpy(qinfo->udata + qinfo->udata_offset,
+-		       (char *)snmp,
+-		       data_len + offsetof(struct qeth_snmp_cmd, data));
+-		qinfo->udata_offset += offsetof(struct qeth_snmp_cmd, data);
+-	} else {
+-		memcpy(qinfo->udata + qinfo->udata_offset,
+-		       (char *)&snmp->request, data_len);
+-	}
++	memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len);
+ 	qinfo->udata_offset += data_len;
++
+ 	/* check if all replies received ... */
+ 		QETH_CARD_TEXT_(card, 4, "srtot%i",
+ 			       cmd->data.setadapterparms.hdr.used_total);
+diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+index bd4352fe2de3..83852f323c5e 100644
+--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
++++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+@@ -1293,7 +1293,7 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy,
+ 
+ 		sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
+ 		sinfo->tx_packets = psta->sta_stats.tx_pkts;
+-
++		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
+ 	}
+ 
+ 	/* for Ad-Hoc/AP mode */
+diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+index 314ffac50bb8..f05e9af4fe81 100644
+--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
++++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+@@ -1461,6 +1461,7 @@ vchiq_compat_ioctl_await_completion(struct file *file,
+ 	struct vchiq_await_completion32 args32;
+ 	struct vchiq_completion_data32 completion32;
+ 	unsigned int *msgbufcount32;
++	unsigned int msgbufcount_native;
+ 	compat_uptr_t msgbuf32;
+ 	void *msgbuf;
+ 	void **msgbufptr;
+@@ -1572,7 +1573,11 @@ vchiq_compat_ioctl_await_completion(struct file *file,
+ 			 sizeof(completion32)))
+ 		return -EFAULT;
+ 
+-	args32.msgbufcount--;
++	if (get_user(msgbufcount_native, &args->msgbufcount))
++		return -EFAULT;
++
++	if (!msgbufcount_native)
++		args32.msgbufcount--;
+ 
+ 	msgbufcount32 =
+ 		&((struct vchiq_await_completion32 __user *)arg)->msgbufcount;
+diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
+index 1e8f68960014..808437c5ec49 100644
+--- a/drivers/usb/core/quirks.c
++++ b/drivers/usb/core/quirks.c
+@@ -64,6 +64,9 @@ static const struct usb_device_id usb_quirk_list[] = {
+ 	/* Microsoft LifeCam-VX700 v2.0 */
+ 	{ USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
+ 
++	/* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */
++	{ USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME },
++
+ 	/* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
+ 	{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
+ 	{ USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index ac8d619ff887..b8704c0678f9 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -1511,9 +1511,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
+ 		unsigned transfer_in_flight;
+ 		unsigned started;
+ 
+-		if (dep->flags & DWC3_EP_STALL)
+-			return 0;
+-
+ 		if (dep->number > 1)
+ 			trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue);
+ 		else
+@@ -1535,8 +1532,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
+ 		else
+ 			dep->flags |= DWC3_EP_STALL;
+ 	} else {
+-		if (!(dep->flags & DWC3_EP_STALL))
+-			return 0;
+ 
+ 		ret = dwc3_send_clear_stall_ep_cmd(dep);
+ 		if (ret)
+diff --git a/drivers/usb/storage/unusual_realtek.h b/drivers/usb/storage/unusual_realtek.h
+index 8fe624ad302a..7ca779493671 100644
+--- a/drivers/usb/storage/unusual_realtek.h
++++ b/drivers/usb/storage/unusual_realtek.h
+@@ -39,4 +39,14 @@ UNUSUAL_DEV(0x0bda, 0x0159, 0x0000, 0x9999,
+ 		"USB Card Reader",
+ 		USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
+ 
++UNUSUAL_DEV(0x0bda, 0x0177, 0x0000, 0x9999,
++		"Realtek",
++		"USB Card Reader",
++		USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
++
++UNUSUAL_DEV(0x0bda, 0x0184, 0x0000, 0x9999,
++		"Realtek",
++		"USB Card Reader",
++		USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
++
+ #endif  /* defined(CONFIG_USB_STORAGE_REALTEK) || ... */
+diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
+index f2cd9dedb037..195229df5ba0 100644
+--- a/fs/btrfs/Makefile
++++ b/fs/btrfs/Makefile
+@@ -10,7 +10,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
+ 	   export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
+ 	   compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
+ 	   reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
+-	   uuid-tree.o props.o hash.o free-space-tree.o
++	   uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
+ 
+ btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
+ btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 0e67cee73c53..e42673477c25 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -50,6 +50,7 @@
+ #include "sysfs.h"
+ #include "qgroup.h"
+ #include "compression.h"
++#include "tree-checker.h"
+ 
+ #ifdef CONFIG_X86
+ #include <asm/cpufeature.h>
+@@ -544,146 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
+ 	return ret;
+ }
+ 
+-#define CORRUPT(reason, eb, root, slot)					\
+-	btrfs_crit(root->fs_info,					\
+-		   "corrupt %s, %s: block=%llu, root=%llu, slot=%d",	\
+-		   btrfs_header_level(eb) == 0 ? "leaf" : "node",	\
+-		   reason, btrfs_header_bytenr(eb), root->objectid, slot)
+-
+-static noinline int check_leaf(struct btrfs_root *root,
+-			       struct extent_buffer *leaf)
+-{
+-	struct btrfs_fs_info *fs_info = root->fs_info;
+-	struct btrfs_key key;
+-	struct btrfs_key leaf_key;
+-	u32 nritems = btrfs_header_nritems(leaf);
+-	int slot;
+-
+-	/*
+-	 * Extent buffers from a relocation tree have a owner field that
+-	 * corresponds to the subvolume tree they are based on. So just from an
+-	 * extent buffer alone we can not find out what is the id of the
+-	 * corresponding subvolume tree, so we can not figure out if the extent
+-	 * buffer corresponds to the root of the relocation tree or not. So skip
+-	 * this check for relocation trees.
+-	 */
+-	if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
+-		struct btrfs_root *check_root;
+-
+-		key.objectid = btrfs_header_owner(leaf);
+-		key.type = BTRFS_ROOT_ITEM_KEY;
+-		key.offset = (u64)-1;
+-
+-		check_root = btrfs_get_fs_root(fs_info, &key, false);
+-		/*
+-		 * The only reason we also check NULL here is that during
+-		 * open_ctree() some roots has not yet been set up.
+-		 */
+-		if (!IS_ERR_OR_NULL(check_root)) {
+-			struct extent_buffer *eb;
+-
+-			eb = btrfs_root_node(check_root);
+-			/* if leaf is the root, then it's fine */
+-			if (leaf != eb) {
+-				CORRUPT("non-root leaf's nritems is 0",
+-					leaf, check_root, 0);
+-				free_extent_buffer(eb);
+-				return -EIO;
+-			}
+-			free_extent_buffer(eb);
+-		}
+-		return 0;
+-	}
+-
+-	if (nritems == 0)
+-		return 0;
+-
+-	/* Check the 0 item */
+-	if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
+-	    BTRFS_LEAF_DATA_SIZE(fs_info)) {
+-		CORRUPT("invalid item offset size pair", leaf, root, 0);
+-		return -EIO;
+-	}
+-
+-	/*
+-	 * Check to make sure each items keys are in the correct order and their
+-	 * offsets make sense.  We only have to loop through nritems-1 because
+-	 * we check the current slot against the next slot, which verifies the
+-	 * next slot's offset+size makes sense and that the current's slot
+-	 * offset is correct.
+-	 */
+-	for (slot = 0; slot < nritems - 1; slot++) {
+-		btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
+-		btrfs_item_key_to_cpu(leaf, &key, slot + 1);
+-
+-		/* Make sure the keys are in the right order */
+-		if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
+-			CORRUPT("bad key order", leaf, root, slot);
+-			return -EIO;
+-		}
+-
+-		/*
+-		 * Make sure the offset and ends are right, remember that the
+-		 * item data starts at the end of the leaf and grows towards the
+-		 * front.
+-		 */
+-		if (btrfs_item_offset_nr(leaf, slot) !=
+-			btrfs_item_end_nr(leaf, slot + 1)) {
+-			CORRUPT("slot offset bad", leaf, root, slot);
+-			return -EIO;
+-		}
+-
+-		/*
+-		 * Check to make sure that we don't point outside of the leaf,
+-		 * just in case all the items are consistent to each other, but
+-		 * all point outside of the leaf.
+-		 */
+-		if (btrfs_item_end_nr(leaf, slot) >
+-		    BTRFS_LEAF_DATA_SIZE(fs_info)) {
+-			CORRUPT("slot end outside of leaf", leaf, root, slot);
+-			return -EIO;
+-		}
+-	}
+-
+-	return 0;
+-}
+-
+-static int check_node(struct btrfs_root *root, struct extent_buffer *node)
+-{
+-	unsigned long nr = btrfs_header_nritems(node);
+-	struct btrfs_key key, next_key;
+-	int slot;
+-	u64 bytenr;
+-	int ret = 0;
+-
+-	if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
+-		btrfs_crit(root->fs_info,
+-			   "corrupt node: block %llu root %llu nritems %lu",
+-			   node->start, root->objectid, nr);
+-		return -EIO;
+-	}
+-
+-	for (slot = 0; slot < nr - 1; slot++) {
+-		bytenr = btrfs_node_blockptr(node, slot);
+-		btrfs_node_key_to_cpu(node, &key, slot);
+-		btrfs_node_key_to_cpu(node, &next_key, slot + 1);
+-
+-		if (!bytenr) {
+-			CORRUPT("invalid item slot", node, root, slot);
+-			ret = -EIO;
+-			goto out;
+-		}
+-
+-		if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
+-			CORRUPT("bad key order", node, root, slot);
+-			ret = -EIO;
+-			goto out;
+-		}
+-	}
+-out:
+-	return ret;
+-}
+-
+ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+ 				      u64 phy_offset, struct page *page,
+ 				      u64 start, u64 end, int mirror)
+@@ -749,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+ 	 * that we don't try and read the other copies of this block, just
+ 	 * return -EIO.
+ 	 */
+-	if (found_level == 0 && check_leaf(root, eb)) {
++	if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
+ 		set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
+ 		ret = -EIO;
+ 	}
+ 
+-	if (found_level > 0 && check_node(root, eb))
++	if (found_level > 0 && btrfs_check_node(root, eb))
+ 		ret = -EIO;
+ 
+ 	if (!ret)
+@@ -4009,7 +3870,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
+ 					 buf->len,
+ 					 fs_info->dirty_metadata_batch);
+ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+-	if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
++	/*
++	 * Since btrfs_mark_buffer_dirty() can be called with item pointer set
++	 * but item data not updated.
++	 * So here we should only check item pointers, not item data.
++	 */
++	if (btrfs_header_level(buf) == 0 &&
++	    btrfs_check_leaf_relaxed(root, buf)) {
+ 		btrfs_print_leaf(buf);
+ 		ASSERT(0);
+ 	}
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 2cb3569ac548..83791d13c204 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -9828,6 +9828,8 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
+ 	int ret = 0;
+ 	struct btrfs_key found_key;
+ 	struct extent_buffer *leaf;
++	struct btrfs_block_group_item bg;
++	u64 flags;
+ 	int slot;
+ 
+ 	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+@@ -9862,8 +9864,32 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
+ 			"logical %llu len %llu found bg but no related chunk",
+ 					  found_key.objectid, found_key.offset);
+ 				ret = -ENOENT;
++			} else if (em->start != found_key.objectid ||
++				   em->len != found_key.offset) {
++				btrfs_err(fs_info,
++		"block group %llu len %llu mismatch with chunk %llu len %llu",
++					  found_key.objectid, found_key.offset,
++					  em->start, em->len);
++				ret = -EUCLEAN;
+ 			} else {
+-				ret = 0;
++				read_extent_buffer(leaf, &bg,
++					btrfs_item_ptr_offset(leaf, slot),
++					sizeof(bg));
++				flags = btrfs_block_group_flags(&bg) &
++					BTRFS_BLOCK_GROUP_TYPE_MASK;
++
++				if (flags != (em->map_lookup->type &
++					      BTRFS_BLOCK_GROUP_TYPE_MASK)) {
++					btrfs_err(fs_info,
++"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
++						found_key.objectid,
++						found_key.offset, flags,
++						(BTRFS_BLOCK_GROUP_TYPE_MASK &
++						 em->map_lookup->type));
++					ret = -EUCLEAN;
++				} else {
++					ret = 0;
++				}
+ 			}
+ 			free_extent_map(em);
+ 			goto out;
+@@ -10092,6 +10118,62 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
+ 	return cache;
+ }
+ 
++
++/*
++ * Iterate all chunks and verify that each of them has the corresponding block
++ * group
++ */
++static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
++{
++	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
++	struct extent_map *em;
++	struct btrfs_block_group_cache *bg;
++	u64 start = 0;
++	int ret = 0;
++
++	while (1) {
++		read_lock(&map_tree->map_tree.lock);
++		/*
++		 * lookup_extent_mapping will return the first extent map
++		 * intersecting the range, so setting @len to 1 is enough to
++		 * get the first chunk.
++		 */
++		em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
++		read_unlock(&map_tree->map_tree.lock);
++		if (!em)
++			break;
++
++		bg = btrfs_lookup_block_group(fs_info, em->start);
++		if (!bg) {
++			btrfs_err(fs_info,
++	"chunk start=%llu len=%llu doesn't have corresponding block group",
++				     em->start, em->len);
++			ret = -EUCLEAN;
++			free_extent_map(em);
++			break;
++		}
++		if (bg->key.objectid != em->start ||
++		    bg->key.offset != em->len ||
++		    (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
++		    (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
++			btrfs_err(fs_info,
++"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
++				em->start, em->len,
++				em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
++				bg->key.objectid, bg->key.offset,
++				bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
++			ret = -EUCLEAN;
++			free_extent_map(em);
++			btrfs_put_block_group(bg);
++			break;
++		}
++		start = em->start + em->len;
++		free_extent_map(em);
++		btrfs_put_block_group(bg);
++	}
++	return ret;
++}
++
+ int btrfs_read_block_groups(struct btrfs_fs_info *info)
+ {
+ 	struct btrfs_path *path;
+@@ -10264,7 +10346,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
+ 	}
+ 
+ 	init_global_block_rsv(info);
+-	ret = 0;
++	ret = check_chunk_block_group_mappings(info);
+ error:
+ 	btrfs_free_path(path);
+ 	return ret;
+diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
+index eeae2c3ab17e..5feb8b03ffe8 100644
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -4048,6 +4048,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
+ restart:
+ 		if (update_backref_cache(trans, &rc->backref_cache)) {
+ 			btrfs_end_transaction(trans);
++			trans = NULL;
+ 			continue;
+ 		}
+ 
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index fe960d5e8913..49a02bf091ae 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -2176,6 +2176,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
+ 	vol = memdup_user((void __user *)arg, sizeof(*vol));
+ 	if (IS_ERR(vol))
+ 		return PTR_ERR(vol);
++	vol->name[BTRFS_PATH_NAME_MAX] = '\0';
+ 
+ 	switch (cmd) {
+ 	case BTRFS_IOC_SCAN_DEV:
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index f74005ca8f08..73c1fbca0c35 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1955,6 +1955,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
+ 		return ret;
+ 	}
+ 
++	btrfs_trans_release_metadata(trans, fs_info);
++	trans->block_rsv = NULL;
++
+ 	/* make a pass through all the delayed refs we have so far
+ 	 * any runnings procs may add more while we are here
+ 	 */
+@@ -1964,9 +1967,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
+ 		return ret;
+ 	}
+ 
+-	btrfs_trans_release_metadata(trans, fs_info);
+-	trans->block_rsv = NULL;
+-
+ 	cur_trans = trans->transaction;
+ 
+ 	/*
+diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
+new file mode 100644
+index 000000000000..f206aec1525d
+--- /dev/null
++++ b/fs/btrfs/tree-checker.c
+@@ -0,0 +1,649 @@
++/*
++ * Copyright (C) Qu Wenruo 2017.  All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public
++ * License v2 as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public
++ * License along with this program.
++ */
++
++/*
++ * The module is used to catch unexpected/corrupted tree block data.
++ * Such behavior can be caused either by a fuzzed image or bugs.
++ *
++ * The objective is to do leaf/node validation checks when tree block is read
++ * from disk, and check *every* possible member, so other code won't
++ * need to checking them again.
++ *
++ * Due to the potential and unwanted damage, every checker needs to be
++ * carefully reviewed otherwise so it does not prevent mount of valid images.
++ */
++
++#include "ctree.h"
++#include "tree-checker.h"
++#include "disk-io.h"
++#include "compression.h"
++#include "hash.h"
++#include "volumes.h"
++
++#define CORRUPT(reason, eb, root, slot)					\
++	btrfs_crit(root->fs_info,					\
++		   "corrupt %s, %s: block=%llu, root=%llu, slot=%d",	\
++		   btrfs_header_level(eb) == 0 ? "leaf" : "node",	\
++		   reason, btrfs_header_bytenr(eb), root->objectid, slot)
++
++/*
++ * Error message should follow the following format:
++ * corrupt <type>: <identifier>, <reason>[, <bad_value>]
++ *
++ * @type:	leaf or node
++ * @identifier:	the necessary info to locate the leaf/node.
++ * 		It's recommened to decode key.objecitd/offset if it's
++ * 		meaningful.
++ * @reason:	describe the error
++ * @bad_value:	optional, it's recommened to output bad value and its
++ *		expected value (range).
++ *
++ * Since comma is used to separate the components, only space is allowed
++ * inside each component.
++ */
++
++/*
++ * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
++ * Allows callers to customize the output.
++ */
++__printf(4, 5)
++static void generic_err(const struct btrfs_root *root,
++			const struct extent_buffer *eb, int slot,
++			const char *fmt, ...)
++{
++	struct va_format vaf;
++	va_list args;
++
++	va_start(args, fmt);
++
++	vaf.fmt = fmt;
++	vaf.va = &args;
++
++	btrfs_crit(root->fs_info,
++		"corrupt %s: root=%llu block=%llu slot=%d, %pV",
++		btrfs_header_level(eb) == 0 ? "leaf" : "node",
++		root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
++	va_end(args);
++}
++
++static int check_extent_data_item(struct btrfs_root *root,
++				  struct extent_buffer *leaf,
++				  struct btrfs_key *key, int slot)
++{
++	struct btrfs_file_extent_item *fi;
++	u32 sectorsize = root->fs_info->sectorsize;
++	u32 item_size = btrfs_item_size_nr(leaf, slot);
++
++	if (!IS_ALIGNED(key->offset, sectorsize)) {
++		CORRUPT("unaligned key offset for file extent",
++			leaf, root, slot);
++		return -EUCLEAN;
++	}
++
++	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
++
++	if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
++		CORRUPT("invalid file extent type", leaf, root, slot);
++		return -EUCLEAN;
++	}
++
++	/*
++	 * Support for new compression/encrption must introduce incompat flag,
++	 * and must be caught in open_ctree().
++	 */
++	if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
++		CORRUPT("invalid file extent compression", leaf, root, slot);
++		return -EUCLEAN;
++	}
++	if (btrfs_file_extent_encryption(leaf, fi)) {
++		CORRUPT("invalid file extent encryption", leaf, root, slot);
++		return -EUCLEAN;
++	}
++	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
++		/* Inline extent must have 0 as key offset */
++		if (key->offset) {
++			CORRUPT("inline extent has non-zero key offset",
++				leaf, root, slot);
++			return -EUCLEAN;
++		}
++
++		/* Compressed inline extent has no on-disk size, skip it */
++		if (btrfs_file_extent_compression(leaf, fi) !=
++		    BTRFS_COMPRESS_NONE)
++			return 0;
++
++		/* Uncompressed inline extent size must match item size */
++		if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
++		    btrfs_file_extent_ram_bytes(leaf, fi)) {
++			CORRUPT("plaintext inline extent has invalid size",
++				leaf, root, slot);
++			return -EUCLEAN;
++		}
++		return 0;
++	}
++
++	/* Regular or preallocated extent has fixed item size */
++	if (item_size != sizeof(*fi)) {
++		CORRUPT(
++		"regluar or preallocated extent data item size is invalid",
++			leaf, root, slot);
++		return -EUCLEAN;
++	}
++	if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) ||
++	    !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) ||
++	    !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) ||
++	    !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) ||
++	    !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) {
++		CORRUPT(
++		"regular or preallocated extent data item has unaligned value",
++			leaf, root, slot);
++		return -EUCLEAN;
++	}
++
++	return 0;
++}
++
++static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
++			   struct btrfs_key *key, int slot)
++{
++	u32 sectorsize = root->fs_info->sectorsize;
++	u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
++
++	if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
++		CORRUPT("invalid objectid for csum item", leaf, root, slot);
++		return -EUCLEAN;
++	}
++	if (!IS_ALIGNED(key->offset, sectorsize)) {
++		CORRUPT("unaligned key offset for csum item", leaf, root, slot);
++		return -EUCLEAN;
++	}
++	if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
++		CORRUPT("unaligned csum item size", leaf, root, slot);
++		return -EUCLEAN;
++	}
++	return 0;
++}
++
++/*
++ * Customized reported for dir_item, only important new info is key->objectid,
++ * which represents inode number
++ */
++__printf(4, 5)
++static void dir_item_err(const struct btrfs_root *root,
++			 const struct extent_buffer *eb, int slot,
++			 const char *fmt, ...)
++{
++	struct btrfs_key key;
++	struct va_format vaf;
++	va_list args;
++
++	btrfs_item_key_to_cpu(eb, &key, slot);
++	va_start(args, fmt);
++
++	vaf.fmt = fmt;
++	vaf.va = &args;
++
++	btrfs_crit(root->fs_info,
++	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
++		btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
++		btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
++	va_end(args);
++}
++
++static int check_dir_item(struct btrfs_root *root,
++			  struct extent_buffer *leaf,
++			  struct btrfs_key *key, int slot)
++{
++	struct btrfs_dir_item *di;
++	u32 item_size = btrfs_item_size_nr(leaf, slot);
++	u32 cur = 0;
++
++	di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
++	while (cur < item_size) {
++		u32 name_len;
++		u32 data_len;
++		u32 max_name_len;
++		u32 total_size;
++		u32 name_hash;
++		u8 dir_type;
++
++		/* header itself should not cross item boundary */
++		if (cur + sizeof(*di) > item_size) {
++			dir_item_err(root, leaf, slot,
++		"dir item header crosses item boundary, have %zu boundary %u",
++				cur + sizeof(*di), item_size);
++			return -EUCLEAN;
++		}
++
++		/* dir type check */
++		dir_type = btrfs_dir_type(leaf, di);
++		if (dir_type >= BTRFS_FT_MAX) {
++			dir_item_err(root, leaf, slot,
++			"invalid dir item type, have %u expect [0, %u)",
++				dir_type, BTRFS_FT_MAX);
++			return -EUCLEAN;
++		}
++
++		if (key->type == BTRFS_XATTR_ITEM_KEY &&
++		    dir_type != BTRFS_FT_XATTR) {
++			dir_item_err(root, leaf, slot,
++		"invalid dir item type for XATTR key, have %u expect %u",
++				dir_type, BTRFS_FT_XATTR);
++			return -EUCLEAN;
++		}
++		if (dir_type == BTRFS_FT_XATTR &&
++		    key->type != BTRFS_XATTR_ITEM_KEY) {
++			dir_item_err(root, leaf, slot,
++			"xattr dir type found for non-XATTR key");
++			return -EUCLEAN;
++		}
++		if (dir_type == BTRFS_FT_XATTR)
++			max_name_len = XATTR_NAME_MAX;
++		else
++			max_name_len = BTRFS_NAME_LEN;
++
++		/* Name/data length check */
++		name_len = btrfs_dir_name_len(leaf, di);
++		data_len = btrfs_dir_data_len(leaf, di);
++		if (name_len > max_name_len) {
++			dir_item_err(root, leaf, slot,
++			"dir item name len too long, have %u max %u",
++				name_len, max_name_len);
++			return -EUCLEAN;
++		}
++		if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
++			dir_item_err(root, leaf, slot,
++			"dir item name and data len too long, have %u max %u",
++				name_len + data_len,
++				BTRFS_MAX_XATTR_SIZE(root->fs_info));
++			return -EUCLEAN;
++		}
++
++		if (data_len && dir_type != BTRFS_FT_XATTR) {
++			dir_item_err(root, leaf, slot,
++			"dir item with invalid data len, have %u expect 0",
++				data_len);
++			return -EUCLEAN;
++		}
++
++		total_size = sizeof(*di) + name_len + data_len;
++
++		/* header and name/data should not cross item boundary */
++		if (cur + total_size > item_size) {
++			dir_item_err(root, leaf, slot,
++		"dir item data crosses item boundary, have %u boundary %u",
++				cur + total_size, item_size);
++			return -EUCLEAN;
++		}
++
++		/*
++		 * Special check for XATTR/DIR_ITEM, as key->offset is name
++		 * hash, should match its name
++		 */
++		if (key->type == BTRFS_DIR_ITEM_KEY ||
++		    key->type == BTRFS_XATTR_ITEM_KEY) {
++			char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
++
++			read_extent_buffer(leaf, namebuf,
++					(unsigned long)(di + 1), name_len);
++			name_hash = btrfs_name_hash(namebuf, name_len);
++			if (key->offset != name_hash) {
++				dir_item_err(root, leaf, slot,
++		"name hash mismatch with key, have 0x%016x expect 0x%016llx",
++					name_hash, key->offset);
++				return -EUCLEAN;
++			}
++		}
++		cur += total_size;
++		di = (struct btrfs_dir_item *)((void *)di + total_size);
++	}
++	return 0;
++}
++
++__printf(4, 5)
++__cold
++static void block_group_err(const struct btrfs_fs_info *fs_info,
++			    const struct extent_buffer *eb, int slot,
++			    const char *fmt, ...)
++{
++	struct btrfs_key key;
++	struct va_format vaf;
++	va_list args;
++
++	btrfs_item_key_to_cpu(eb, &key, slot);
++	va_start(args, fmt);
++
++	vaf.fmt = fmt;
++	vaf.va = &args;
++
++	btrfs_crit(fs_info,
++	"corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
++		btrfs_header_level(eb) == 0 ? "leaf" : "node",
++		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
++		key.objectid, key.offset, &vaf);
++	va_end(args);
++}
++
++static int check_block_group_item(struct btrfs_fs_info *fs_info,
++				  struct extent_buffer *leaf,
++				  struct btrfs_key *key, int slot)
++{
++	struct btrfs_block_group_item bgi;
++	u32 item_size = btrfs_item_size_nr(leaf, slot);
++	u64 flags;
++	u64 type;
++
++	/*
++	 * Here we don't really care about alignment since extent allocator can
++	 * handle it.  We care more about the size, as if one block group is
++	 * larger than maximum size, it's must be some obvious corruption.
++	 */
++	if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
++		block_group_err(fs_info, leaf, slot,
++			"invalid block group size, have %llu expect (0, %llu]",
++				key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
++		return -EUCLEAN;
++	}
++
++	if (item_size != sizeof(bgi)) {
++		block_group_err(fs_info, leaf, slot,
++			"invalid item size, have %u expect %zu",
++				item_size, sizeof(bgi));
++		return -EUCLEAN;
++	}
++
++	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
++			   sizeof(bgi));
++	if (btrfs_block_group_chunk_objectid(&bgi) !=
++	    BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
++		block_group_err(fs_info, leaf, slot,
++		"invalid block group chunk objectid, have %llu expect %llu",
++				btrfs_block_group_chunk_objectid(&bgi),
++				BTRFS_FIRST_CHUNK_TREE_OBJECTID);
++		return -EUCLEAN;
++	}
++
++	if (btrfs_block_group_used(&bgi) > key->offset) {
++		block_group_err(fs_info, leaf, slot,
++			"invalid block group used, have %llu expect [0, %llu)",
++				btrfs_block_group_used(&bgi), key->offset);
++		return -EUCLEAN;
++	}
++
++	flags = btrfs_block_group_flags(&bgi);
++	if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
++		block_group_err(fs_info, leaf, slot,
++"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
++			flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
++			hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
++		return -EUCLEAN;
++	}
++
++	type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
++	if (type != BTRFS_BLOCK_GROUP_DATA &&
++	    type != BTRFS_BLOCK_GROUP_METADATA &&
++	    type != BTRFS_BLOCK_GROUP_SYSTEM &&
++	    type != (BTRFS_BLOCK_GROUP_METADATA |
++			   BTRFS_BLOCK_GROUP_DATA)) {
++		block_group_err(fs_info, leaf, slot,
++"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
++			type, hweight64(type),
++			BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
++			BTRFS_BLOCK_GROUP_SYSTEM,
++			BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
++		return -EUCLEAN;
++	}
++	return 0;
++}
++
++/*
++ * Common point to switch the item-specific validation.
++ */
++static int check_leaf_item(struct btrfs_root *root,
++			   struct extent_buffer *leaf,
++			   struct btrfs_key *key, int slot)
++{
++	int ret = 0;
++
++	switch (key->type) {
++	case BTRFS_EXTENT_DATA_KEY:
++		ret = check_extent_data_item(root, leaf, key, slot);
++		break;
++	case BTRFS_EXTENT_CSUM_KEY:
++		ret = check_csum_item(root, leaf, key, slot);
++		break;
++	case BTRFS_DIR_ITEM_KEY:
++	case BTRFS_DIR_INDEX_KEY:
++	case BTRFS_XATTR_ITEM_KEY:
++		ret = check_dir_item(root, leaf, key, slot);
++		break;
++	case BTRFS_BLOCK_GROUP_ITEM_KEY:
++		ret = check_block_group_item(root->fs_info, leaf, key, slot);
++		break;
++	}
++	return ret;
++}
++
++static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
++		      bool check_item_data)
++{
++	struct btrfs_fs_info *fs_info = root->fs_info;
++	/* No valid key type is 0, so all key should be larger than this key */
++	struct btrfs_key prev_key = {0, 0, 0};
++	struct btrfs_key key;
++	u32 nritems = btrfs_header_nritems(leaf);
++	int slot;
++
++	if (btrfs_header_level(leaf) != 0) {
++		generic_err(root, leaf, 0,
++			"invalid level for leaf, have %d expect 0",
++			btrfs_header_level(leaf));
++		return -EUCLEAN;
++	}
++
++	/*
++	 * Extent buffers from a relocation tree have a owner field that
++	 * corresponds to the subvolume tree they are based on. So just from an
++	 * extent buffer alone we can not find out what is the id of the
++	 * corresponding subvolume tree, so we can not figure out if the extent
++	 * buffer corresponds to the root of the relocation tree or not. So
++	 * skip this check for relocation trees.
++	 */
++	if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
++		u64 owner = btrfs_header_owner(leaf);
++		struct btrfs_root *check_root;
++
++		/* These trees must never be empty */
++		if (owner == BTRFS_ROOT_TREE_OBJECTID ||
++		    owner == BTRFS_CHUNK_TREE_OBJECTID ||
++		    owner == BTRFS_EXTENT_TREE_OBJECTID ||
++		    owner == BTRFS_DEV_TREE_OBJECTID ||
++		    owner == BTRFS_FS_TREE_OBJECTID ||
++		    owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
++			generic_err(root, leaf, 0,
++			"invalid root, root %llu must never be empty",
++				    owner);
++			return -EUCLEAN;
++		}
++		key.objectid = owner;
++		key.type = BTRFS_ROOT_ITEM_KEY;
++		key.offset = (u64)-1;
++
++		check_root = btrfs_get_fs_root(fs_info, &key, false);
++		/*
++		 * The only reason we also check NULL here is that during
++		 * open_ctree() some roots has not yet been set up.
++		 */
++		if (!IS_ERR_OR_NULL(check_root)) {
++			struct extent_buffer *eb;
++
++			eb = btrfs_root_node(check_root);
++			/* if leaf is the root, then it's fine */
++			if (leaf != eb) {
++				CORRUPT("non-root leaf's nritems is 0",
++					leaf, check_root, 0);
++				free_extent_buffer(eb);
++				return -EUCLEAN;
++			}
++			free_extent_buffer(eb);
++		}
++		return 0;
++	}
++
++	if (nritems == 0)
++		return 0;
++
++	/*
++	 * Check the following things to make sure this is a good leaf, and
++	 * leaf users won't need to bother with similar sanity checks:
++	 *
++	 * 1) key ordering
++	 * 2) item offset and size
++	 *    No overlap, no hole, all inside the leaf.
++	 * 3) item content
++	 *    If possible, do comprehensive sanity check.
++	 *    NOTE: All checks must only rely on the item data itself.
++	 */
++	for (slot = 0; slot < nritems; slot++) {
++		u32 item_end_expected;
++		int ret;
++
++		btrfs_item_key_to_cpu(leaf, &key, slot);
++
++		/* Make sure the keys are in the right order */
++		if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
++			CORRUPT("bad key order", leaf, root, slot);
++			return -EUCLEAN;
++		}
++
++		/*
++		 * Make sure the offset and ends are right, remember that the
++		 * item data starts at the end of the leaf and grows towards the
++		 * front.
++		 */
++		if (slot == 0)
++			item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
++		else
++			item_end_expected = btrfs_item_offset_nr(leaf,
++								 slot - 1);
++		if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
++			CORRUPT("slot offset bad", leaf, root, slot);
++			return -EUCLEAN;
++		}
++
++		/*
++		 * Check to make sure that we don't point outside of the leaf,
++		 * just in case all the items are consistent to each other, but
++		 * all point outside of the leaf.
++		 */
++		if (btrfs_item_end_nr(leaf, slot) >
++		    BTRFS_LEAF_DATA_SIZE(fs_info)) {
++			CORRUPT("slot end outside of leaf", leaf, root, slot);
++			return -EUCLEAN;
++		}
++
++		/* Also check if the item pointer overlaps with btrfs item. */
++		if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
++		    btrfs_item_ptr_offset(leaf, slot)) {
++			CORRUPT("slot overlap with its data", leaf, root, slot);
++			return -EUCLEAN;
++		}
++
++		if (check_item_data) {
++			/*
++			 * Check if the item size and content meet other
++			 * criteria
++			 */
++			ret = check_leaf_item(root, leaf, &key, slot);
++			if (ret < 0)
++				return ret;
++		}
++
++		prev_key.objectid = key.objectid;
++		prev_key.type = key.type;
++		prev_key.offset = key.offset;
++	}
++
++	return 0;
++}
++
++int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
++{
++	return check_leaf(root, leaf, true);
++}
++
++int btrfs_check_leaf_relaxed(struct btrfs_root *root,
++			     struct extent_buffer *leaf)
++{
++	return check_leaf(root, leaf, false);
++}
++
++int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
++{
++	unsigned long nr = btrfs_header_nritems(node);
++	struct btrfs_key key, next_key;
++	int slot;
++	int level = btrfs_header_level(node);
++	u64 bytenr;
++	int ret = 0;
++
++	if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
++		generic_err(root, node, 0,
++			"invalid level for node, have %d expect [1, %d]",
++			level, BTRFS_MAX_LEVEL - 1);
++		return -EUCLEAN;
++	}
++	if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
++		btrfs_crit(root->fs_info,
++"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
++			   root->objectid, node->start,
++			   nr == 0 ? "small" : "large", nr,
++			   BTRFS_NODEPTRS_PER_BLOCK(root->fs_info));
++		return -EUCLEAN;
++	}
++
++	for (slot = 0; slot < nr - 1; slot++) {
++		bytenr = btrfs_node_blockptr(node, slot);
++		btrfs_node_key_to_cpu(node, &key, slot);
++		btrfs_node_key_to_cpu(node, &next_key, slot + 1);
++
++		if (!bytenr) {
++			generic_err(root, node, slot,
++				"invalid NULL node pointer");
++			ret = -EUCLEAN;
++			goto out;
++		}
++		if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) {
++			generic_err(root, node, slot,
++			"unaligned pointer, have %llu should be aligned to %u",
++				bytenr, root->fs_info->sectorsize);
++			ret = -EUCLEAN;
++			goto out;
++		}
++
++		if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
++			generic_err(root, node, slot,
++	"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
++				key.objectid, key.type, key.offset,
++				next_key.objectid, next_key.type,
++				next_key.offset);
++			ret = -EUCLEAN;
++			goto out;
++		}
++	}
++out:
++	return ret;
++}
+diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
+new file mode 100644
+index 000000000000..3d53e8d6fda0
+--- /dev/null
++++ b/fs/btrfs/tree-checker.h
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (C) Qu Wenruo 2017.  All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public
++ * License v2 as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public
++ * License along with this program.
++ */
++
++#ifndef __BTRFS_TREE_CHECKER__
++#define __BTRFS_TREE_CHECKER__
++
++#include "ctree.h"
++#include "extent_io.h"
++
++/*
++ * Comprehensive leaf checker.
++ * Will check not only the item pointers, but also every possible member
++ * in item data.
++ */
++int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
++
++/*
++ * Less strict leaf checker.
++ * Will only check item pointers, not reading item data.
++ */
++int btrfs_check_leaf_relaxed(struct btrfs_root *root,
++			     struct extent_buffer *leaf);
++int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
++
++#endif
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index a0947f4a3e87..9663b6aa2a56 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -4647,7 +4647,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
+ 
+ 	if (type & BTRFS_BLOCK_GROUP_DATA) {
+ 		max_stripe_size = SZ_1G;
+-		max_chunk_size = 10 * max_stripe_size;
++		max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
+ 		if (!devs_max)
+ 			devs_max = BTRFS_MAX_DEVS(info->chunk_root);
+ 	} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
+@@ -6353,6 +6353,8 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
+ 	u16 num_stripes;
+ 	u16 sub_stripes;
+ 	u64 type;
++	u64 features;
++	bool mixed = false;
+ 
+ 	length = btrfs_chunk_length(leaf, chunk);
+ 	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+@@ -6391,6 +6393,32 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
+ 			  btrfs_chunk_type(leaf, chunk));
+ 		return -EIO;
+ 	}
++
++	if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
++		btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
++		return -EIO;
++	}
++
++	if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
++	    (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
++		btrfs_err(fs_info,
++			"system chunk with data or metadata type: 0x%llx", type);
++		return -EIO;
++	}
++
++	features = btrfs_super_incompat_flags(fs_info->super_copy);
++	if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
++		mixed = true;
++
++	if (!mixed) {
++		if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
++		    (type & BTRFS_BLOCK_GROUP_DATA)) {
++			btrfs_err(fs_info,
++			"mixed chunk type in non-mixed mode: 0x%llx", type);
++			return -EIO;
++		}
++	}
++
+ 	if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+ 	    (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+ 	    (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
+index c5dd48eb7b3d..76fb6e84f201 100644
+--- a/fs/btrfs/volumes.h
++++ b/fs/btrfs/volumes.h
+@@ -24,6 +24,8 @@
+ #include <linux/btrfs.h>
+ #include "async-thread.h"
+ 
++#define BTRFS_MAX_DATA_CHUNK_SIZE	(10ULL * SZ_1G)
++
+ extern struct mutex uuid_mutex;
+ 
+ #define BTRFS_STRIPE_LEN	SZ_64K
+diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
+index bf378ddca4db..a48984dd6426 100644
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -4079,6 +4079,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
+ 	return auth;
+ }
+ 
++static int add_authorizer_challenge(struct ceph_connection *con,
++				    void *challenge_buf, int challenge_buf_len)
++{
++	struct ceph_mds_session *s = con->private;
++	struct ceph_mds_client *mdsc = s->s_mdsc;
++	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
++
++	return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
++					    challenge_buf, challenge_buf_len);
++}
+ 
+ static int verify_authorizer_reply(struct ceph_connection *con)
+ {
+@@ -4142,6 +4152,7 @@ static const struct ceph_connection_operations mds_con_ops = {
+ 	.put = con_put,
+ 	.dispatch = dispatch,
+ 	.get_authorizer = get_authorizer,
++	.add_authorizer_challenge = add_authorizer_challenge,
+ 	.verify_authorizer_reply = verify_authorizer_reply,
+ 	.invalidate_authorizer = invalidate_authorizer,
+ 	.peer_reset = peer_reset,
+diff --git a/fs/direct-io.c b/fs/direct-io.c
+index 625a84aa6484..40567501015f 100644
+--- a/fs/direct-io.c
++++ b/fs/direct-io.c
+@@ -304,8 +304,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
+ 		 */
+ 		dio->iocb->ki_pos += transferred;
+ 
+-		if (dio->op == REQ_OP_WRITE)
+-			ret = generic_write_sync(dio->iocb,  transferred);
++		if (ret > 0 && dio->op == REQ_OP_WRITE)
++			ret = generic_write_sync(dio->iocb, ret);
+ 		dio->iocb->ki_complete(dio->iocb, ret, 0);
+ 	}
+ 
+diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
+index 62d9a659a8ff..dd8f10db82e9 100644
+--- a/fs/ext2/xattr.c
++++ b/fs/ext2/xattr.c
+@@ -612,9 +612,9 @@ skip_replace:
+ 	}
+ 
+ cleanup:
+-	brelse(bh);
+ 	if (!(bh && header == HDR(bh)))
+ 		kfree(header);
++	brelse(bh);
+ 	up_write(&EXT2_I(inode)->xattr_sem);
+ 
+ 	return error;
+diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
+index 41fce930f44c..624817eeb25e 100644
+--- a/fs/f2fs/checkpoint.c
++++ b/fs/f2fs/checkpoint.c
+@@ -69,6 +69,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
+ 		.old_blkaddr = index,
+ 		.new_blkaddr = index,
+ 		.encrypted_page = NULL,
++		.is_meta = is_meta,
+ 	};
+ 
+ 	if (unlikely(!is_meta))
+@@ -85,8 +86,10 @@ repeat:
+ 	fio.page = page;
+ 
+ 	if (f2fs_submit_page_bio(&fio)) {
+-		f2fs_put_page(page, 1);
+-		goto repeat;
++		memset(page_address(page), 0, PAGE_SIZE);
++		f2fs_stop_checkpoint(sbi, false);
++		f2fs_bug_on(sbi, 1);
++		return page;
+ 	}
+ 
+ 	lock_page(page);
+@@ -117,7 +120,8 @@ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
+ 	return __get_meta_page(sbi, index, false);
+ }
+ 
+-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
++bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
++					block_t blkaddr, int type)
+ {
+ 	switch (type) {
+ 	case META_NAT:
+@@ -137,8 +141,20 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
+ 			return false;
+ 		break;
+ 	case META_POR:
++	case DATA_GENERIC:
+ 		if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+-			blkaddr < MAIN_BLKADDR(sbi)))
++			blkaddr < MAIN_BLKADDR(sbi))) {
++			if (type == DATA_GENERIC) {
++				f2fs_msg(sbi->sb, KERN_WARNING,
++					"access invalid blkaddr:%u", blkaddr);
++				WARN_ON(1);
++			}
++			return false;
++		}
++		break;
++	case META_GENERIC:
++		if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
++			blkaddr >= MAIN_BLKADDR(sbi)))
+ 			return false;
+ 		break;
+ 	default:
+@@ -163,6 +179,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+ 		.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
+ 		.encrypted_page = NULL,
+ 		.in_list = false,
++		.is_meta = (type != META_POR),
+ 	};
+ 	struct blk_plug plug;
+ 
+@@ -172,7 +189,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+ 	blk_start_plug(&plug);
+ 	for (; nrpages-- > 0; blkno++) {
+ 
+-		if (!is_valid_blkaddr(sbi, blkno, type))
++		if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
+ 			goto out;
+ 
+ 		switch (type) {
+@@ -737,6 +754,14 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
+ 					&cp_page_1, version);
+ 	if (err)
+ 		return NULL;
++
++	if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
++					sbi->blocks_per_seg) {
++		f2fs_msg(sbi->sb, KERN_WARNING,
++			"invalid cp_pack_total_block_count:%u",
++			le32_to_cpu(cp_block->cp_pack_total_block_count));
++		goto invalid_cp;
++	}
+ 	pre_version = *version;
+ 
+ 	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
+@@ -800,15 +825,15 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
+ 	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
+ 	memcpy(sbi->ckpt, cp_block, blk_size);
+ 
+-	/* Sanity checking of checkpoint */
+-	if (sanity_check_ckpt(sbi))
+-		goto free_fail_no_cp;
+-
+ 	if (cur_page == cp1)
+ 		sbi->cur_cp_pack = 1;
+ 	else
+ 		sbi->cur_cp_pack = 2;
+ 
++	/* Sanity checking of checkpoint */
++	if (sanity_check_ckpt(sbi))
++		goto free_fail_no_cp;
++
+ 	if (cp_blks <= 1)
+ 		goto done;
+ 
+diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
+index 6fbb6d75318a..8f6e7c3a10f8 100644
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -369,6 +369,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
+ 	struct page *page = fio->encrypted_page ?
+ 			fio->encrypted_page : fio->page;
+ 
++	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
++			__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
++		return -EFAULT;
++
+ 	trace_f2fs_submit_page_bio(page, fio);
+ 	f2fs_trace_ios(fio, 0);
+ 
+@@ -412,9 +416,9 @@ next:
+ 		spin_unlock(&io->io_lock);
+ 	}
+ 
+-	if (fio->old_blkaddr != NEW_ADDR)
+-		verify_block_addr(sbi, fio->old_blkaddr);
+-	verify_block_addr(sbi, fio->new_blkaddr);
++	if (__is_valid_data_blkaddr(fio->old_blkaddr))
++		verify_block_addr(fio, fio->old_blkaddr);
++	verify_block_addr(fio, fio->new_blkaddr);
+ 
+ 	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+ 
+@@ -945,7 +949,13 @@ next_dnode:
+ next_block:
+ 	blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
+ 
+-	if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
++	if (__is_valid_data_blkaddr(blkaddr) &&
++		!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
++		err = -EFAULT;
++		goto sync_out;
++	}
++
++	if (!is_valid_data_blkaddr(sbi, blkaddr)) {
+ 		if (create) {
+ 			if (unlikely(f2fs_cp_error(sbi))) {
+ 				err = -EIO;
+@@ -1263,6 +1273,10 @@ got_it:
+ 				SetPageUptodate(page);
+ 				goto confused;
+ 			}
++
++			if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
++								DATA_GENERIC))
++				goto set_error_page;
+ 		} else {
+ 			zero_user_segment(page, 0, PAGE_SIZE);
+ 			if (!PageUptodate(page))
+@@ -1387,15 +1401,6 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio)
+ 	return need_inplace_update_policy(inode, fio);
+ }
+ 
+-static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
+-{
+-	if (fio->old_blkaddr == NEW_ADDR)
+-		return false;
+-	if (fio->old_blkaddr == NULL_ADDR)
+-		return false;
+-	return true;
+-}
+-
+ int do_write_data_page(struct f2fs_io_info *fio)
+ {
+ 	struct page *page = fio->page;
+@@ -1410,11 +1415,13 @@ int do_write_data_page(struct f2fs_io_info *fio)
+ 			f2fs_lookup_extent_cache(inode, page->index, &ei)) {
+ 		fio->old_blkaddr = ei.blk + page->index - ei.fofs;
+ 
+-		if (valid_ipu_blkaddr(fio)) {
+-			ipu_force = true;
+-			fio->need_lock = LOCK_DONE;
+-			goto got_it;
+-		}
++		if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
++							DATA_GENERIC))
++			return -EFAULT;
++
++		ipu_force = true;
++		fio->need_lock = LOCK_DONE;
++		goto got_it;
+ 	}
+ 
+ 	/* Deadlock due to between page->lock and f2fs_lock_op */
+@@ -1433,11 +1440,18 @@ int do_write_data_page(struct f2fs_io_info *fio)
+ 		goto out_writepage;
+ 	}
+ got_it:
++	if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
++		!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
++							DATA_GENERIC)) {
++		err = -EFAULT;
++		goto out_writepage;
++	}
+ 	/*
+ 	 * If current allocation needs SSR,
+ 	 * it had better in-place writes for updated data.
+ 	 */
+-	if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
++	if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
++					need_inplace_update(fio))) {
+ 		err = encrypt_one_page(fio);
+ 		if (err)
+ 			goto out_writepage;
+diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
+index 54f8520ad7a2..3f1a44696036 100644
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -162,7 +162,7 @@ struct cp_control {
+ };
+ 
+ /*
+- * For CP/NAT/SIT/SSA readahead
++ * indicate meta/data type
+  */
+ enum {
+ 	META_CP,
+@@ -170,6 +170,8 @@ enum {
+ 	META_SIT,
+ 	META_SSA,
+ 	META_POR,
++	DATA_GENERIC,
++	META_GENERIC,
+ };
+ 
+ /* for the list of ino */
+@@ -910,6 +912,7 @@ struct f2fs_io_info {
+ 	bool submitted;		/* indicate IO submission */
+ 	int need_lock;		/* indicate we need to lock cp_rwsem */
+ 	bool in_list;		/* indicate fio is in io_list */
++	bool is_meta;		/* indicate borrow meta inode mapping or not */
+ 	enum iostat_type io_type;	/* io type */
+ };
+ 
+@@ -2354,6 +2357,39 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
+ 	spin_unlock(&sbi->iostat_lock);
+ }
+ 
++#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META &&	\
++				(!is_read_io(fio->op) || fio->is_meta))
++
++bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
++					block_t blkaddr, int type);
++void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
++static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
++					block_t blkaddr, int type)
++{
++	if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
++		f2fs_msg(sbi->sb, KERN_ERR,
++			"invalid blkaddr: %u, type: %d, run fsck to fix.",
++			blkaddr, type);
++		f2fs_bug_on(sbi, 1);
++	}
++}
++
++static inline bool __is_valid_data_blkaddr(block_t blkaddr)
++{
++	if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
++		return false;
++	return true;
++}
++
++static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
++						block_t blkaddr)
++{
++	if (!__is_valid_data_blkaddr(blkaddr))
++		return false;
++	verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
++	return true;
++}
++
+ /*
+  * file.c
+  */
+@@ -2564,7 +2600,8 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
+ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
+-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
++bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
++					block_t blkaddr, int type);
+ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+ 			int type, bool sync);
+ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
+diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
+index 6f589730782d..7d3189f1941c 100644
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -328,13 +328,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
+ 	return pgofs;
+ }
+ 
+-static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
+-							int whence)
++static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
++				pgoff_t dirty, pgoff_t pgofs, int whence)
+ {
+ 	switch (whence) {
+ 	case SEEK_DATA:
+ 		if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
+-			(blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
++			is_valid_data_blkaddr(sbi, blkaddr))
+ 			return true;
+ 		break;
+ 	case SEEK_HOLE:
+@@ -397,7 +397,15 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
+ 			blkaddr = datablock_addr(dn.inode,
+ 					dn.node_page, dn.ofs_in_node);
+ 
+-			if (__found_offset(blkaddr, dirty, pgofs, whence)) {
++			if (__is_valid_data_blkaddr(blkaddr) &&
++				!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
++						blkaddr, DATA_GENERIC)) {
++				f2fs_put_dnode(&dn);
++				goto fail;
++			}
++
++			if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
++							pgofs, whence)) {
+ 				f2fs_put_dnode(&dn);
+ 				goto found;
+ 			}
+@@ -495,6 +503,11 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
+ 
+ 		dn->data_blkaddr = NULL_ADDR;
+ 		set_data_blkaddr(dn);
++
++		if (__is_valid_data_blkaddr(blkaddr) &&
++			!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
++			continue;
++
+ 		invalidate_blocks(sbi, blkaddr);
+ 		if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
+ 			clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
+diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
+index 259b0aa283f0..9a40724dbaa6 100644
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -62,11 +62,12 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
+ 	}
+ }
+ 
+-static bool __written_first_block(struct f2fs_inode *ri)
++static bool __written_first_block(struct f2fs_sb_info *sbi,
++					struct f2fs_inode *ri)
+ {
+ 	block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
+ 
+-	if (addr != NEW_ADDR && addr != NULL_ADDR)
++	if (is_valid_data_blkaddr(sbi, addr))
+ 		return true;
+ 	return false;
+ }
+@@ -179,6 +180,72 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
+ 	ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
+ }
+ 
++static bool sanity_check_inode(struct inode *inode, struct page *node_page)
++{
++	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
++	struct f2fs_inode_info *fi = F2FS_I(inode);
++	unsigned long long iblocks;
++
++	iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
++	if (!iblocks) {
++		set_sbi_flag(sbi, SBI_NEED_FSCK);
++		f2fs_msg(sbi->sb, KERN_WARNING,
++			"%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
++			"run fsck to fix.",
++			__func__, inode->i_ino, iblocks);
++		return false;
++	}
++
++	if (ino_of_node(node_page) != nid_of_node(node_page)) {
++		set_sbi_flag(sbi, SBI_NEED_FSCK);
++		f2fs_msg(sbi->sb, KERN_WARNING,
++			"%s: corrupted inode footer i_ino=%lx, ino,nid: "
++			"[%u, %u] run fsck to fix.",
++			__func__, inode->i_ino,
++			ino_of_node(node_page), nid_of_node(node_page));
++		return false;
++	}
++
++	if (f2fs_has_extra_attr(inode) &&
++			!f2fs_sb_has_extra_attr(sbi->sb)) {
++		set_sbi_flag(sbi, SBI_NEED_FSCK);
++		f2fs_msg(sbi->sb, KERN_WARNING,
++			"%s: inode (ino=%lx) is with extra_attr, "
++			"but extra_attr feature is off",
++			__func__, inode->i_ino);
++		return false;
++	}
++
++	if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
++			fi->i_extra_isize % sizeof(__le32)) {
++		set_sbi_flag(sbi, SBI_NEED_FSCK);
++		f2fs_msg(sbi->sb, KERN_WARNING,
++			"%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
++			"max: %zu",
++			__func__, inode->i_ino, fi->i_extra_isize,
++			F2FS_TOTAL_EXTRA_ATTR_SIZE);
++		return false;
++	}
++
++	if (F2FS_I(inode)->extent_tree) {
++		struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
++
++		if (ei->len &&
++			(!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
++			!f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
++							DATA_GENERIC))) {
++			set_sbi_flag(sbi, SBI_NEED_FSCK);
++			f2fs_msg(sbi->sb, KERN_WARNING,
++				"%s: inode (ino=%lx) extent info [%u, %u, %u] "
++				"is incorrect, run fsck to fix",
++				__func__, inode->i_ino,
++				ei->blk, ei->fofs, ei->len);
++			return false;
++		}
++	}
++	return true;
++}
++
+ static int do_read_inode(struct inode *inode)
+ {
+ 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+@@ -228,6 +295,11 @@ static int do_read_inode(struct inode *inode)
+ 	fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
+ 					le16_to_cpu(ri->i_extra_isize) : 0;
+ 
++	if (!sanity_check_inode(inode, node_page)) {
++		f2fs_put_page(node_page, 1);
++		return -EINVAL;
++	}
++
+ 	/* check data exist */
+ 	if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
+ 		__recover_inline_status(inode, node_page);
+@@ -235,7 +307,7 @@ static int do_read_inode(struct inode *inode)
+ 	/* get rdev by using inline_info */
+ 	__get_inode_rdev(inode, ri);
+ 
+-	if (__written_first_block(ri))
++	if (__written_first_block(sbi, ri))
+ 		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ 
+ 	if (!need_inode_block_update(sbi, inode->i_ino))
+diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
+index 712505ec5de4..65de72d65562 100644
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -334,8 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
+ 			new_blkaddr == NULL_ADDR);
+ 	f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
+ 			new_blkaddr == NEW_ADDR);
+-	f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
+-			nat_get_blkaddr(e) != NULL_ADDR &&
++	f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
+ 			new_blkaddr == NEW_ADDR);
+ 
+ 	/* increment version no as node is removed */
+@@ -350,7 +349,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
+ 
+ 	/* change address */
+ 	nat_set_blkaddr(e, new_blkaddr);
+-	if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
++	if (!is_valid_data_blkaddr(sbi, new_blkaddr))
+ 		set_nat_flag(e, IS_CHECKPOINTED, false);
+ 	__set_nat_cache_dirty(nm_i, e);
+ 
+@@ -1399,6 +1398,12 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
+ 		return 0;
+ 	}
+ 
++	if (__is_valid_data_blkaddr(ni.blk_addr) &&
++		!f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
++		up_read(&sbi->node_write);
++		goto redirty_out;
++	}
++
+ 	if (atomic && !test_opt(sbi, NOBARRIER))
+ 		fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
+ 
+diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
+index 765fadf954af..6ea445377767 100644
+--- a/fs/f2fs/recovery.c
++++ b/fs/f2fs/recovery.c
+@@ -236,7 +236,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
+ 	while (1) {
+ 		struct fsync_inode_entry *entry;
+ 
+-		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
++		if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
+ 			return 0;
+ 
+ 		page = get_tmp_page(sbi, blkaddr);
+@@ -479,7 +479,7 @@ retry_dn:
+ 		}
+ 
+ 		/* dest is valid block, try to recover from src to dest */
+-		if (is_valid_blkaddr(sbi, dest, META_POR)) {
++		if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
+ 
+ 			if (src == NULL_ADDR) {
+ 				err = reserve_new_block(&dn);
+@@ -540,7 +540,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
+ 	while (1) {
+ 		struct fsync_inode_entry *entry;
+ 
+-		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
++		if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
+ 			break;
+ 
+ 		ra_meta_pages_cond(sbi, blkaddr);
+diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
+index 3c7bbbae0afa..5c698757e116 100644
+--- a/fs/f2fs/segment.c
++++ b/fs/f2fs/segment.c
+@@ -1758,7 +1758,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
+ 	struct seg_entry *se;
+ 	bool is_cp = false;
+ 
+-	if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
++	if (!is_valid_data_blkaddr(sbi, blkaddr))
+ 		return true;
+ 
+ 	mutex_lock(&sit_i->sentry_lock);
+@@ -2571,7 +2571,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
+ {
+ 	struct page *cpage;
+ 
+-	if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
++	if (!is_valid_data_blkaddr(sbi, blkaddr))
+ 		return;
+ 
+ 	cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
+@@ -3304,6 +3304,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
+ 		unsigned int old_valid_blocks;
+ 
+ 		start = le32_to_cpu(segno_in_journal(journal, i));
++		if (start >= MAIN_SEGS(sbi)) {
++			f2fs_msg(sbi->sb, KERN_ERR,
++					"Wrong journal entry on segno %u",
++					start);
++			set_sbi_flag(sbi, SBI_NEED_FSCK);
++			err = -EINVAL;
++			break;
++		}
++
+ 		se = &sit_i->sentries[start];
+ 		sit = sit_in_journal(journal, i);
+ 
+diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
+index 4dfb5080098f..47348d98165b 100644
+--- a/fs/f2fs/segment.h
++++ b/fs/f2fs/segment.h
+@@ -53,13 +53,19 @@
+ 	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
+ 	  (sbi)->segs_per_sec))	\
+ 
+-#define MAIN_BLKADDR(sbi)	(SM_I(sbi)->main_blkaddr)
+-#define SEG0_BLKADDR(sbi)	(SM_I(sbi)->seg0_blkaddr)
++#define MAIN_BLKADDR(sbi)						\
++	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
++		le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr))
++#define SEG0_BLKADDR(sbi)						\
++	(SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : 				\
++		le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr))
+ 
+ #define MAIN_SEGS(sbi)	(SM_I(sbi)->main_segments)
+ #define MAIN_SECS(sbi)	((sbi)->total_sections)
+ 
+-#define TOTAL_SEGS(sbi)	(SM_I(sbi)->segment_count)
++#define TOTAL_SEGS(sbi)							\
++	(SM_I(sbi) ? SM_I(sbi)->segment_count : 				\
++		le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count))
+ #define TOTAL_BLKS(sbi)	(TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg)
+ 
+ #define MAX_BLKADDR(sbi)	(SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
+@@ -79,7 +85,7 @@
+ 	(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
+ 
+ #define GET_SEGNO(sbi, blk_addr)					\
+-	((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ?	\
++	((!is_valid_data_blkaddr(sbi, blk_addr)) ?			\
+ 	NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi),			\
+ 		GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
+ #define BLKS_PER_SEC(sbi)					\
+@@ -619,10 +625,14 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
+ 	f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
+ }
+ 
+-static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
++static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
+ {
+-	BUG_ON(blk_addr < SEG0_BLKADDR(sbi)
+-			|| blk_addr >= MAX_BLKADDR(sbi));
++	struct f2fs_sb_info *sbi = fio->sbi;
++
++	if (__is_meta_io(fio))
++		verify_blkaddr(sbi, blk_addr, META_GENERIC);
++	else
++		verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
+ }
+ 
+ /*
+diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
+index 7cda685296b2..de4de4ebe64c 100644
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -1807,6 +1807,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
+ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
+ 				struct buffer_head *bh)
+ {
++	block_t segment_count, segs_per_sec, secs_per_zone;
++	block_t total_sections, blocks_per_seg;
+ 	struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
+ 					(bh->b_data + F2FS_SUPER_OFFSET);
+ 	struct super_block *sb = sbi->sb;
+@@ -1863,6 +1865,68 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
+ 		return 1;
+ 	}
+ 
++	segment_count = le32_to_cpu(raw_super->segment_count);
++	segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
++	secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
++	total_sections = le32_to_cpu(raw_super->section_count);
++
++	/* blocks_per_seg should be 512, given the above check */
++	blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
++
++	if (segment_count > F2FS_MAX_SEGMENT ||
++				segment_count < F2FS_MIN_SEGMENTS) {
++		f2fs_msg(sb, KERN_INFO,
++			"Invalid segment count (%u)",
++			segment_count);
++		return 1;
++	}
++
++	if (total_sections > segment_count ||
++			total_sections < F2FS_MIN_SEGMENTS ||
++			segs_per_sec > segment_count || !segs_per_sec) {
++		f2fs_msg(sb, KERN_INFO,
++			"Invalid segment/section count (%u, %u x %u)",
++			segment_count, total_sections, segs_per_sec);
++		return 1;
++	}
++
++	if ((segment_count / segs_per_sec) < total_sections) {
++		f2fs_msg(sb, KERN_INFO,
++			"Small segment_count (%u < %u * %u)",
++			segment_count, segs_per_sec, total_sections);
++		return 1;
++	}
++
++	if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) {
++		f2fs_msg(sb, KERN_INFO,
++			"Wrong segment_count / block_count (%u > %u)",
++			segment_count, le32_to_cpu(raw_super->block_count));
++		return 1;
++	}
++
++	if (secs_per_zone > total_sections || !secs_per_zone) {
++		f2fs_msg(sb, KERN_INFO,
++			"Wrong secs_per_zone / total_sections (%u, %u)",
++			secs_per_zone, total_sections);
++		return 1;
++	}
++	if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION) {
++		f2fs_msg(sb, KERN_INFO,
++			"Corrupted extension count (%u > %u)",
++			le32_to_cpu(raw_super->extension_count),
++			F2FS_MAX_EXTENSION);
++		return 1;
++	}
++
++	if (le32_to_cpu(raw_super->cp_payload) >
++				(blocks_per_seg - F2FS_CP_PACKS)) {
++		f2fs_msg(sb, KERN_INFO,
++			"Insane cp_payload (%u > %u)",
++			le32_to_cpu(raw_super->cp_payload),
++			blocks_per_seg - F2FS_CP_PACKS);
++		return 1;
++	}
++
+ 	/* check reserved ino info */
+ 	if (le32_to_cpu(raw_super->node_ino) != 1 ||
+ 		le32_to_cpu(raw_super->meta_ino) != 2 ||
+@@ -1875,13 +1939,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
+ 		return 1;
+ 	}
+ 
+-	if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
+-		f2fs_msg(sb, KERN_INFO,
+-			"Invalid segment count (%u)",
+-			le32_to_cpu(raw_super->segment_count));
+-		return 1;
+-	}
+-
+ 	/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
+ 	if (sanity_check_area_boundary(sbi, bh))
+ 		return 1;
+@@ -1899,6 +1956,9 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+ 	unsigned int sit_segs, nat_segs;
+ 	unsigned int sit_bitmap_size, nat_bitmap_size;
+ 	unsigned int log_blocks_per_seg;
++	unsigned int segment_count_main;
++	unsigned int cp_pack_start_sum, cp_payload;
++	block_t user_block_count;
+ 	int i;
+ 
+ 	total = le32_to_cpu(raw_super->segment_count);
+@@ -1923,6 +1983,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+ 		return 1;
+ 	}
+ 
++	user_block_count = le64_to_cpu(ckpt->user_block_count);
++	segment_count_main = le32_to_cpu(raw_super->segment_count_main);
++	log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
++	if (!user_block_count || user_block_count >=
++			segment_count_main << log_blocks_per_seg) {
++		f2fs_msg(sbi->sb, KERN_ERR,
++			"Wrong user_block_count: %u", user_block_count);
++		return 1;
++	}
++
+ 	main_segs = le32_to_cpu(raw_super->segment_count_main);
+ 	blocks_per_seg = sbi->blocks_per_seg;
+ 
+@@ -1939,7 +2009,6 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+ 
+ 	sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
+ 	nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
+-	log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+ 
+ 	if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
+ 		nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
+@@ -1949,6 +2018,17 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+ 		return 1;
+ 	}
+ 
++	cp_pack_start_sum = __start_sum_addr(sbi);
++	cp_payload = __cp_payload(sbi);
++	if (cp_pack_start_sum < cp_payload + 1 ||
++		cp_pack_start_sum > blocks_per_seg - 1 -
++			NR_CURSEG_TYPE) {
++		f2fs_msg(sbi->sb, KERN_ERR,
++			"Wrong cp_pack_start_sum: %u",
++			cp_pack_start_sum);
++		return 1;
++	}
++
+ 	if (unlikely(f2fs_cp_error(sbi))) {
+ 		f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
+ 		return 1;
+diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
+index 6249c92671de..ea66f04f46f7 100644
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -501,7 +501,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
+ 		if (args->flags & ATTR_CREATE)
+ 			return retval;
+ 		retval = xfs_attr_shortform_remove(args);
+-		ASSERT(retval == 0);
++		if (retval)
++			return retval;
++		/*
++		 * Since we have removed the old attr, clear ATTR_REPLACE so
++		 * that the leaf format add routine won't trip over the attr
++		 * not being around.
++		 */
++		args->flags &= ~ATTR_REPLACE;
+ 	}
+ 
+ 	if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
+diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
+index a3333004fd2b..8458cc5fbce5 100644
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -113,6 +113,7 @@ struct bpf_insn_aux_data {
+ 		struct bpf_map *map_ptr;	/* pointer for call insn into lookup_elem */
+ 	};
+ 	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
++	int sanitize_stack_off; /* stack slot to be cleared */
+ 	bool seen; /* this insn was processed by the verifier */
+ };
+ 
+diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
+index e931da8424a4..6728c2ee0205 100644
+--- a/include/linux/ceph/auth.h
++++ b/include/linux/ceph/auth.h
+@@ -64,6 +64,10 @@ struct ceph_auth_client_ops {
+ 	/* ensure that an existing authorizer is up to date */
+ 	int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
+ 				 struct ceph_auth_handshake *auth);
++	int (*add_authorizer_challenge)(struct ceph_auth_client *ac,
++					struct ceph_authorizer *a,
++					void *challenge_buf,
++					int challenge_buf_len);
+ 	int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
+ 				       struct ceph_authorizer *a);
+ 	void (*invalidate_authorizer)(struct ceph_auth_client *ac,
+@@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
+ extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+ 				       int peer_type,
+ 				       struct ceph_auth_handshake *a);
++int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
++				       struct ceph_authorizer *a,
++				       void *challenge_buf,
++				       int challenge_buf_len);
+ extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+ 					     struct ceph_authorizer *a);
+ extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
+diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
+index 59042d5ac520..70f42eef813b 100644
+--- a/include/linux/ceph/ceph_features.h
++++ b/include/linux/ceph/ceph_features.h
+@@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
+ DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
+ DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
+ DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
+-DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING)  // *do not share this bit*
++DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit*
++DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2)             // *do not share this bit*
+ 
+-DEFINE_CEPH_FEATURE(61, 1, RESERVED2)          // unused, but slow down!
+ DEFINE_CEPH_FEATURE(62, 1, RESERVED)           // do not use; used as a sentinal
+ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
+ 
+@@ -209,7 +209,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
+ 	 CEPH_FEATURE_SERVER_JEWEL |		\
+ 	 CEPH_FEATURE_MON_STATEFUL_SUB |	\
+ 	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
+-	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
++	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING |	\
++	 CEPH_FEATURE_CEPHX_V2)
+ 
+ #define CEPH_FEATURES_REQUIRED_DEFAULT   \
+ 	(CEPH_FEATURE_NOSRCADDR |	 \
+diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
+index ead9d85f1c11..18fbe910ed55 100644
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -31,6 +31,9 @@ struct ceph_connection_operations {
+ 	struct ceph_auth_handshake *(*get_authorizer) (
+ 				struct ceph_connection *con,
+ 			       int *proto, int force_new);
++	int (*add_authorizer_challenge)(struct ceph_connection *con,
++					void *challenge_buf,
++					int challenge_buf_len);
+ 	int (*verify_authorizer_reply) (struct ceph_connection *con);
+ 	int (*invalidate_authorizer)(struct ceph_connection *con);
+ 
+@@ -203,9 +206,8 @@ struct ceph_connection {
+ 				 attempt for this connection, client */
+ 	u32 peer_global_seq;  /* peer's global seq for this connection */
+ 
++	struct ceph_auth_handshake *auth;
+ 	int auth_retry;       /* true if we need a newer authorizer */
+-	void *auth_reply_buf;   /* where to put the authorizer reply */
+-	int auth_reply_buf_len;
+ 
+ 	struct mutex mutex;
+ 
+diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
+index 73ae2a926548..9e50aede46c8 100644
+--- a/include/linux/ceph/msgr.h
++++ b/include/linux/ceph/msgr.h
+@@ -91,7 +91,7 @@ struct ceph_entity_inst {
+ #define CEPH_MSGR_TAG_SEQ           13 /* 64-bit int follows with seen seq number */
+ #define CEPH_MSGR_TAG_KEEPALIVE2    14 /* keepalive2 byte + ceph_timespec */
+ #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
+-
++#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16  /* cephx v2 doing server challenge */
+ 
+ /*
+  * connection negotiation
+diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
+index 3b7675bcca64..cd0d2270998f 100644
+--- a/include/linux/jump_label.h
++++ b/include/linux/jump_label.h
+@@ -160,6 +160,8 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry,
+ extern int jump_label_text_reserved(void *start, void *end);
+ extern void static_key_slow_inc(struct static_key *key);
+ extern void static_key_slow_dec(struct static_key *key);
++extern void static_key_slow_inc_cpuslocked(struct static_key *key);
++extern void static_key_slow_dec_cpuslocked(struct static_key *key);
+ extern void jump_label_apply_nops(struct module *mod);
+ extern int static_key_count(struct static_key *key);
+ extern void static_key_enable(struct static_key *key);
+@@ -222,6 +224,9 @@ static inline void static_key_slow_dec(struct static_key *key)
+ 	atomic_dec(&key->enabled);
+ }
+ 
++#define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key)
++#define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key)
++
+ static inline int jump_label_text_reserved(void *start, void *end)
+ {
+ 	return 0;
+@@ -416,6 +421,8 @@ extern bool ____wrong_branch_error(void);
+ 
+ #define static_branch_inc(x)		static_key_slow_inc(&(x)->key)
+ #define static_branch_dec(x)		static_key_slow_dec(&(x)->key)
++#define static_branch_inc_cpuslocked(x)	static_key_slow_inc_cpuslocked(&(x)->key)
++#define static_branch_dec_cpuslocked(x)	static_key_slow_dec_cpuslocked(&(x)->key)
+ 
+ /*
+  * Normal usage; boolean enable/disable.
+diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
+index 919b2a0b0307..38342e88b3f3 100644
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -62,8 +62,8 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
+ #define PTRACE_MODE_READ	0x01
+ #define PTRACE_MODE_ATTACH	0x02
+ #define PTRACE_MODE_NOAUDIT	0x04
+-#define PTRACE_MODE_FSCREDS 0x08
+-#define PTRACE_MODE_REALCREDS 0x10
++#define PTRACE_MODE_FSCREDS	0x08
++#define PTRACE_MODE_REALCREDS	0x10
+ 
+ /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
+ #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index e04919aa8201..866439c361a9 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1405,6 +1405,8 @@ static inline bool is_percpu_thread(void)
+ #define PFA_SPREAD_SLAB			2	/* Spread some slab caches over cpuset */
+ #define PFA_SPEC_SSB_DISABLE		3	/* Speculative Store Bypass disabled */
+ #define PFA_SPEC_SSB_FORCE_DISABLE	4	/* Speculative Store Bypass force disabled*/
++#define PFA_SPEC_IB_DISABLE		5	/* Indirect branch speculation restricted */
++#define PFA_SPEC_IB_FORCE_DISABLE	6	/* Indirect branch speculation permanently restricted */
+ 
+ #define TASK_PFA_TEST(name, func)					\
+ 	static inline bool task_##func(struct task_struct *p)		\
+@@ -1436,6 +1438,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
+ TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+ TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+ 
++TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
++TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
++TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
++
++TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
++TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
++
+ static inline void
+ current_restore_flags(unsigned long orig_flags, unsigned long flags)
+ {
+diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
+new file mode 100644
+index 000000000000..59d3736c454c
+--- /dev/null
++++ b/include/linux/sched/smt.h
+@@ -0,0 +1,20 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_SCHED_SMT_H
++#define _LINUX_SCHED_SMT_H
++
++#include <linux/static_key.h>
++
++#ifdef CONFIG_SCHED_SMT
++extern struct static_key_false sched_smt_present;
++
++static __always_inline bool sched_smt_active(void)
++{
++	return static_branch_likely(&sched_smt_present);
++}
++#else
++static inline bool sched_smt_active(void) { return false; }
++#endif
++
++void arch_smt_update(void);
++
++#endif
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index f64e88444082..f6250555ce7d 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1288,6 +1288,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
+ 	}
+ }
+ 
++static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val)
++{
++	skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL);
++	skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
++}
++
++static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb)
++{
++	return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL;
++}
++
++static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb)
++{
++	return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL);
++}
++
+ /* Release a reference on a zerocopy structure */
+ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
+ {
+@@ -1297,7 +1313,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
+ 		if (uarg->callback == sock_zerocopy_callback) {
+ 			uarg->zerocopy = uarg->zerocopy && zerocopy;
+ 			sock_zerocopy_put(uarg);
+-		} else {
++		} else if (!skb_zcopy_is_nouarg(skb)) {
+ 			uarg->callback(uarg, zerocopy);
+ 		}
+ 
+diff --git a/include/net/tls.h b/include/net/tls.h
+index 86ed3dd80fe7..604fd982da19 100644
+--- a/include/net/tls.h
++++ b/include/net/tls.h
+@@ -89,6 +89,8 @@ struct tls_context {
+ 
+ 	void *priv_ctx;
+ 
++	u8 tx_conf:2;
++
+ 	u16 prepend_size;
+ 	u16 tag_size;
+ 	u16 overhead_size;
+@@ -104,7 +106,6 @@ struct tls_context {
+ 
+ 	u16 pending_open_record_frags;
+ 	int (*push_pending_record)(struct sock *sk, int flags);
+-	void (*free_resources)(struct sock *sk);
+ 
+ 	void (*sk_write_space)(struct sock *sk);
+ 	void (*sk_proto_close)(struct sock *sk, long timeout);
+@@ -129,6 +130,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+ int tls_sw_sendpage(struct sock *sk, struct page *page,
+ 		    int offset, size_t size, int flags);
+ void tls_sw_close(struct sock *sk, long timeout);
++void tls_sw_free_tx_resources(struct sock *sk);
+ 
+ void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
+ void tls_icsk_clean_acked(struct sock *sk);
+diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
+index 7115838fbf2a..38ab0e06259a 100644
+--- a/include/uapi/linux/btrfs_tree.h
++++ b/include/uapi/linux/btrfs_tree.h
+@@ -734,6 +734,7 @@ struct btrfs_balance_item {
+ #define BTRFS_FILE_EXTENT_INLINE 0
+ #define BTRFS_FILE_EXTENT_REG 1
+ #define BTRFS_FILE_EXTENT_PREALLOC 2
++#define BTRFS_FILE_EXTENT_TYPES	2
+ 
+ struct btrfs_file_extent_item {
+ 	/*
+diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
+index 3027f943f4b3..214102fab940 100644
+--- a/include/uapi/linux/prctl.h
++++ b/include/uapi/linux/prctl.h
+@@ -203,6 +203,7 @@ struct prctl_mm_map {
+ #define PR_SET_SPECULATION_CTRL		53
+ /* Speculation control variants */
+ # define PR_SPEC_STORE_BYPASS		0
++# define PR_SPEC_INDIRECT_BRANCH	1
+ /* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+ # define PR_SPEC_NOT_AFFECTED		0
+ # define PR_SPEC_PRCTL			(1UL << 0)
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 013b0cd1958e..f6755fd5bae2 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -717,8 +717,9 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
+ /* check_stack_read/write functions track spill/fill of registers,
+  * stack boundary and alignment are checked in check_mem_access()
+  */
+-static int check_stack_write(struct bpf_verifier_state *state, int off,
+-			     int size, int value_regno)
++static int check_stack_write(struct bpf_verifier_env *env,
++			     struct bpf_verifier_state *state, int off,
++			     int size, int value_regno, int insn_idx)
+ {
+ 	int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
+ 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
+@@ -738,8 +739,32 @@ static int check_stack_write(struct bpf_verifier_state *state, int off,
+ 		state->spilled_regs[spi] = state->regs[value_regno];
+ 		state->spilled_regs[spi].live |= REG_LIVE_WRITTEN;
+ 
+-		for (i = 0; i < BPF_REG_SIZE; i++)
++		for (i = 0; i < BPF_REG_SIZE; i++) {
++			if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC &&
++			    !env->allow_ptr_leaks) {
++				int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
++				int soff = (-spi - 1) * BPF_REG_SIZE;
++
++				/* detected reuse of integer stack slot with a pointer
++				 * which means either llvm is reusing stack slot or
++				 * an attacker is trying to exploit CVE-2018-3639
++				 * (speculative store bypass)
++				 * Have to sanitize that slot with preemptive
++				 * store of zero.
++				 */
++				if (*poff && *poff != soff) {
++					/* disallow programs where single insn stores
++					 * into two different stack slots, since verifier
++					 * cannot sanitize them
++					 */
++					verbose("insn %d cannot access two stack slots fp%d and fp%d",
++						insn_idx, *poff, soff);
++					return -EINVAL;
++				}
++				*poff = soff;
++			}
+ 			state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
++		}
+ 	} else {
+ 		/* regular write of data into stack */
+ 		state->spilled_regs[spi] = (struct bpf_reg_state) {};
+@@ -1216,7 +1241,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
+ 				verbose("attempt to corrupt spilled pointer on stack\n");
+ 				return -EACCES;
+ 			}
+-			err = check_stack_write(state, off, size, value_regno);
++			err = check_stack_write(env, state, off, size,
++						value_regno, insn_idx);
+ 		} else {
+ 			err = check_stack_read(state, off, size, value_regno);
+ 		}
+@@ -4270,6 +4296,34 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
+ 		else
+ 			continue;
+ 
++		if (type == BPF_WRITE &&
++		    env->insn_aux_data[i + delta].sanitize_stack_off) {
++			struct bpf_insn patch[] = {
++				/* Sanitize suspicious stack slot with zero.
++				 * There are no memory dependencies for this store,
++				 * since it's only using frame pointer and immediate
++				 * constant of zero
++				 */
++				BPF_ST_MEM(BPF_DW, BPF_REG_FP,
++					   env->insn_aux_data[i + delta].sanitize_stack_off,
++					   0),
++				/* the original STX instruction will immediately
++				 * overwrite the same stack slot with appropriate value
++				 */
++				*insn,
++			};
++
++			cnt = ARRAY_SIZE(patch);
++			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
++			if (!new_prog)
++				return -ENOMEM;
++
++			delta    += cnt - 1;
++			env->prog = new_prog;
++			insn      = new_prog->insnsi + i + delta;
++			continue;
++		}
++
+ 		if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
+ 			continue;
+ 
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index f3f389e33343..5c907d96e3dd 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -10,6 +10,7 @@
+ #include <linux/sched/signal.h>
+ #include <linux/sched/hotplug.h>
+ #include <linux/sched/task.h>
++#include <linux/sched/smt.h>
+ #include <linux/unistd.h>
+ #include <linux/cpu.h>
+ #include <linux/oom.h>
+@@ -347,6 +348,12 @@ void cpu_hotplug_enable(void)
+ EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
+ #endif	/* CONFIG_HOTPLUG_CPU */
+ 
++/*
++ * Architectures that need SMT-specific errata handling during SMT hotplug
++ * should override this.
++ */
++void __weak arch_smt_update(void) { }
++
+ #ifdef CONFIG_HOTPLUG_SMT
+ enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
+ EXPORT_SYMBOL_GPL(cpu_smt_control);
+@@ -998,6 +1005,7 @@ out:
+ 	 * concurrent CPU hotplug via cpu_add_remove_lock.
+ 	 */
+ 	lockup_detector_cleanup();
++	arch_smt_update();
+ 	return ret;
+ }
+ 
+@@ -1126,6 +1134,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
+ 	ret = cpuhp_up_callbacks(cpu, st, target);
+ out:
+ 	cpus_write_unlock();
++	arch_smt_update();
+ 	return ret;
+ }
+ 
+@@ -2071,8 +2080,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
+ 		 */
+ 		cpuhp_offline_cpu_device(cpu);
+ 	}
+-	if (!ret)
++	if (!ret) {
+ 		cpu_smt_control = ctrlval;
++		arch_smt_update();
++	}
+ 	cpu_maps_update_done();
+ 	return ret;
+ }
+@@ -2083,6 +2094,7 @@ static int cpuhp_smt_enable(void)
+ 
+ 	cpu_maps_update_begin();
+ 	cpu_smt_control = CPU_SMT_ENABLED;
++	arch_smt_update();
+ 	for_each_present_cpu(cpu) {
+ 		/* Skip online CPUs and CPUs on offline nodes */
+ 		if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
+diff --git a/kernel/jump_label.c b/kernel/jump_label.c
+index 7c3774ac1d51..70be35a19be2 100644
+--- a/kernel/jump_label.c
++++ b/kernel/jump_label.c
+@@ -79,7 +79,7 @@ int static_key_count(struct static_key *key)
+ }
+ EXPORT_SYMBOL_GPL(static_key_count);
+ 
+-static void static_key_slow_inc_cpuslocked(struct static_key *key)
++void static_key_slow_inc_cpuslocked(struct static_key *key)
+ {
+ 	int v, v1;
+ 
+@@ -180,7 +180,7 @@ void static_key_disable(struct static_key *key)
+ }
+ EXPORT_SYMBOL_GPL(static_key_disable);
+ 
+-static void static_key_slow_dec_cpuslocked(struct static_key *key,
++static void __static_key_slow_dec_cpuslocked(struct static_key *key,
+ 					   unsigned long rate_limit,
+ 					   struct delayed_work *work)
+ {
+@@ -211,7 +211,7 @@ static void __static_key_slow_dec(struct static_key *key,
+ 				  struct delayed_work *work)
+ {
+ 	cpus_read_lock();
+-	static_key_slow_dec_cpuslocked(key, rate_limit, work);
++	__static_key_slow_dec_cpuslocked(key, rate_limit, work);
+ 	cpus_read_unlock();
+ }
+ 
+@@ -229,6 +229,12 @@ void static_key_slow_dec(struct static_key *key)
+ }
+ EXPORT_SYMBOL_GPL(static_key_slow_dec);
+ 
++void static_key_slow_dec_cpuslocked(struct static_key *key)
++{
++	STATIC_KEY_CHECK_USE();
++	__static_key_slow_dec_cpuslocked(key, 0, NULL);
++}
++
+ void static_key_slow_dec_deferred(struct static_key_deferred *key)
+ {
+ 	STATIC_KEY_CHECK_USE();
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 3bc664662081..0552ddbb25e2 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -5617,15 +5617,10 @@ int sched_cpu_activate(unsigned int cpu)
+ 
+ #ifdef CONFIG_SCHED_SMT
+ 	/*
+-	 * The sched_smt_present static key needs to be evaluated on every
+-	 * hotplug event because at boot time SMT might be disabled when
+-	 * the number of booted CPUs is limited.
+-	 *
+-	 * If then later a sibling gets hotplugged, then the key would stay
+-	 * off and SMT scheduling would never be functional.
++	 * When going up, increment the number of cores with SMT present.
+ 	 */
+-	if (cpumask_weight(cpu_smt_mask(cpu)) > 1)
+-		static_branch_enable_cpuslocked(&sched_smt_present);
++	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
++		static_branch_inc_cpuslocked(&sched_smt_present);
+ #endif
+ 	set_cpu_active(cpu, true);
+ 
+@@ -5669,6 +5664,14 @@ int sched_cpu_deactivate(unsigned int cpu)
+ 	 */
+ 	synchronize_rcu_mult(call_rcu, call_rcu_sched);
+ 
++#ifdef CONFIG_SCHED_SMT
++	/*
++	 * When going down, decrement the number of cores with SMT present.
++	 */
++	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
++		static_branch_dec_cpuslocked(&sched_smt_present);
++#endif
++
+ 	if (!sched_smp_initialized)
+ 		return 0;
+ 
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 2d4d79420e36..7240bb4a4090 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -4040,12 +4040,12 @@ static inline bool cfs_bandwidth_used(void)
+ 
+ void cfs_bandwidth_usage_inc(void)
+ {
+-	static_key_slow_inc(&__cfs_bandwidth_used);
++	static_key_slow_inc_cpuslocked(&__cfs_bandwidth_used);
+ }
+ 
+ void cfs_bandwidth_usage_dec(void)
+ {
+-	static_key_slow_dec(&__cfs_bandwidth_used);
++	static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used);
+ }
+ #else /* HAVE_JUMP_LABEL */
+ static bool cfs_bandwidth_used(void)
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 63d999dfec80..b3ba6e5e99f2 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -20,6 +20,7 @@
+ #include <linux/sched/task_stack.h>
+ #include <linux/sched/cputime.h>
+ #include <linux/sched/init.h>
++#include <linux/sched/smt.h>
+ 
+ #include <linux/u64_stats_sync.h>
+ #include <linux/kernel_stat.h>
+@@ -825,9 +826,6 @@ static inline int cpu_of(struct rq *rq)
+ 
+ 
+ #ifdef CONFIG_SCHED_SMT
+-
+-extern struct static_key_false sched_smt_present;
+-
+ extern void __update_idle_core(struct rq *rq);
+ 
+ static inline void update_idle_core(struct rq *rq)
+diff --git a/lib/test_kmod.c b/lib/test_kmod.c
+index 96c304fd656a..7abb59ce6613 100644
+--- a/lib/test_kmod.c
++++ b/lib/test_kmod.c
+@@ -1221,7 +1221,6 @@ void unregister_test_dev_kmod(struct kmod_test_device *test_dev)
+ 
+ 	dev_info(test_dev->dev, "removing interface\n");
+ 	misc_deregister(&test_dev->misc_dev);
+-	kfree(&test_dev->misc_dev.name);
+ 
+ 	mutex_unlock(&test_dev->config_mutex);
+ 	mutex_unlock(&test_dev->trigger_mutex);
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index adacfe66cf3d..930f2aa3bb4d 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2280,7 +2280,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
+ 	}
+ }
+ 
+-static void freeze_page(struct page *page)
++static void unmap_page(struct page *page)
+ {
+ 	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
+ 		TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+@@ -2295,7 +2295,7 @@ static void freeze_page(struct page *page)
+ 	VM_BUG_ON_PAGE(!unmap_success, page);
+ }
+ 
+-static void unfreeze_page(struct page *page)
++static void remap_page(struct page *page)
+ {
+ 	int i;
+ 	if (PageTransHuge(page)) {
+@@ -2312,26 +2312,13 @@ static void __split_huge_page_tail(struct page *head, int tail,
+ 	struct page *page_tail = head + tail;
+ 
+ 	VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
+-	VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail);
+ 
+ 	/*
+-	 * tail_page->_refcount is zero and not changing from under us. But
+-	 * get_page_unless_zero() may be running from under us on the
+-	 * tail_page. If we used atomic_set() below instead of atomic_inc() or
+-	 * atomic_add(), we would then run atomic_set() concurrently with
+-	 * get_page_unless_zero(), and atomic_set() is implemented in C not
+-	 * using locked ops. spin_unlock on x86 sometime uses locked ops
+-	 * because of PPro errata 66, 92, so unless somebody can guarantee
+-	 * atomic_set() here would be safe on all archs (and not only on x86),
+-	 * it's safer to use atomic_inc()/atomic_add().
++	 * Clone page flags before unfreezing refcount.
++	 *
++	 * After successful get_page_unless_zero() might follow flags change,
++	 * for exmaple lock_page() which set PG_waiters.
+ 	 */
+-	if (PageAnon(head) && !PageSwapCache(head)) {
+-		page_ref_inc(page_tail);
+-	} else {
+-		/* Additional pin to radix tree */
+-		page_ref_add(page_tail, 2);
+-	}
+-
+ 	page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
+ 	page_tail->flags |= (head->flags &
+ 			((1L << PG_referenced) |
+@@ -2344,36 +2331,42 @@ static void __split_huge_page_tail(struct page *head, int tail,
+ 			 (1L << PG_unevictable) |
+ 			 (1L << PG_dirty)));
+ 
+-	/*
+-	 * After clearing PageTail the gup refcount can be released.
+-	 * Page flags also must be visible before we make the page non-compound.
+-	 */
++	/* ->mapping in first tail page is compound_mapcount */
++	VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
++			page_tail);
++	page_tail->mapping = head->mapping;
++	page_tail->index = head->index + tail;
++
++	/* Page flags must be visible before we make the page non-compound. */
+ 	smp_wmb();
+ 
++	/*
++	 * Clear PageTail before unfreezing page refcount.
++	 *
++	 * After successful get_page_unless_zero() might follow put_page()
++	 * which needs correct compound_head().
++	 */
+ 	clear_compound_head(page_tail);
+ 
++	/* Finally unfreeze refcount. Additional reference from page cache. */
++	page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) ||
++					  PageSwapCache(head)));
++
+ 	if (page_is_young(head))
+ 		set_page_young(page_tail);
+ 	if (page_is_idle(head))
+ 		set_page_idle(page_tail);
+ 
+-	/* ->mapping in first tail page is compound_mapcount */
+-	VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
+-			page_tail);
+-	page_tail->mapping = head->mapping;
+-
+-	page_tail->index = head->index + tail;
+ 	page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
+ 	lru_add_page_tail(head, page_tail, lruvec, list);
+ }
+ 
+ static void __split_huge_page(struct page *page, struct list_head *list,
+-		unsigned long flags)
++		pgoff_t end, unsigned long flags)
+ {
+ 	struct page *head = compound_head(page);
+ 	struct zone *zone = page_zone(head);
+ 	struct lruvec *lruvec;
+-	pgoff_t end = -1;
+ 	int i;
+ 
+ 	lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
+@@ -2381,9 +2374,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
+ 	/* complete memcg works before add pages to LRU */
+ 	mem_cgroup_split_huge_fixup(head);
+ 
+-	if (!PageAnon(page))
+-		end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
+-
+ 	for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
+ 		__split_huge_page_tail(head, i, lruvec, list);
+ 		/* Some pages can be beyond i_size: drop them from page cache */
+@@ -2412,7 +2402,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
+ 
+ 	spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
+ 
+-	unfreeze_page(head);
++	remap_page(head);
+ 
+ 	for (i = 0; i < HPAGE_PMD_NR; i++) {
+ 		struct page *subpage = head + i;
+@@ -2555,6 +2545,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ 	int count, mapcount, extra_pins, ret;
+ 	bool mlocked;
+ 	unsigned long flags;
++	pgoff_t end;
+ 
+ 	VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
+ 	VM_BUG_ON_PAGE(!PageLocked(page), page);
+@@ -2577,6 +2568,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ 			ret = -EBUSY;
+ 			goto out;
+ 		}
++		end = -1;
+ 		mapping = NULL;
+ 		anon_vma_lock_write(anon_vma);
+ 	} else {
+@@ -2590,10 +2582,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ 
+ 		anon_vma = NULL;
+ 		i_mmap_lock_read(mapping);
++
++		/*
++		 *__split_huge_page() may need to trim off pages beyond EOF:
++		 * but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
++		 * which cannot be nested inside the page tree lock. So note
++		 * end now: i_size itself may be changed at any moment, but
++		 * head page lock is good enough to serialize the trimming.
++		 */
++		end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+ 	}
+ 
+ 	/*
+-	 * Racy check if we can split the page, before freeze_page() will
++	 * Racy check if we can split the page, before unmap_page() will
+ 	 * split PMDs
+ 	 */
+ 	if (!can_split_huge_page(head, &extra_pins)) {
+@@ -2602,7 +2603,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ 	}
+ 
+ 	mlocked = PageMlocked(page);
+-	freeze_page(head);
++	unmap_page(head);
+ 	VM_BUG_ON_PAGE(compound_mapcount(head), head);
+ 
+ 	/* Make sure the page is not on per-CPU pagevec as it takes pin */
+@@ -2639,7 +2640,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ 		if (mapping)
+ 			__dec_node_page_state(page, NR_SHMEM_THPS);
+ 		spin_unlock(&pgdata->split_queue_lock);
+-		__split_huge_page(page, list, flags);
++		__split_huge_page(page, list, end, flags);
+ 		if (PageSwapCache(head)) {
+ 			swp_entry_t entry = { .val = page_private(head) };
+ 
+@@ -2659,7 +2660,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ fail:		if (mapping)
+ 			spin_unlock(&mapping->tree_lock);
+ 		spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
+-		unfreeze_page(head);
++		remap_page(head);
+ 		ret = -EBUSY;
+ 	}
+ 
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 0a5bb3e8a8a3..d27a73737f1a 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1288,7 +1288,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+  * collapse_shmem - collapse small tmpfs/shmem pages into huge one.
+  *
+  * Basic scheme is simple, details are more complex:
+- *  - allocate and freeze a new huge page;
++ *  - allocate and lock a new huge page;
+  *  - scan over radix tree replacing old pages the new one
+  *    + swap in pages if necessary;
+  *    + fill in gaps;
+@@ -1296,11 +1296,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+  *  - if replacing succeed:
+  *    + copy data over;
+  *    + free old pages;
+- *    + unfreeze huge page;
++ *    + unlock huge page;
+  *  - if replacing failed;
+  *    + put all pages back and unfreeze them;
+  *    + restore gaps in the radix-tree;
+- *    + free huge page;
++ *    + unlock and free huge page;
+  */
+ static void collapse_shmem(struct mm_struct *mm,
+ 		struct address_space *mapping, pgoff_t start,
+@@ -1333,18 +1333,15 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		goto out;
+ 	}
+ 
++	__SetPageLocked(new_page);
++	__SetPageSwapBacked(new_page);
+ 	new_page->index = start;
+ 	new_page->mapping = mapping;
+-	__SetPageSwapBacked(new_page);
+-	__SetPageLocked(new_page);
+-	BUG_ON(!page_ref_freeze(new_page, 1));
+-
+ 
+ 	/*
+-	 * At this point the new_page is 'frozen' (page_count() is zero), locked
+-	 * and not up-to-date. It's safe to insert it into radix tree, because
+-	 * nobody would be able to map it or use it in other way until we
+-	 * unfreeze it.
++	 * At this point the new_page is locked and not up-to-date.
++	 * It's safe to insert it into the page cache, because nobody would
++	 * be able to map it or use it in another way until we unlock it.
+ 	 */
+ 
+ 	index = start;
+@@ -1352,19 +1349,29 @@ static void collapse_shmem(struct mm_struct *mm,
+ 	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+ 		int n = min(iter.index, end) - index;
+ 
++		/*
++		 * Stop if extent has been hole-punched, and is now completely
++		 * empty (the more obvious i_size_read() check would take an
++		 * irq-unsafe seqlock on 32-bit).
++		 */
++		if (n >= HPAGE_PMD_NR) {
++			result = SCAN_TRUNCATED;
++			goto tree_locked;
++		}
++
+ 		/*
+ 		 * Handle holes in the radix tree: charge it from shmem and
+ 		 * insert relevant subpage of new_page into the radix-tree.
+ 		 */
+ 		if (n && !shmem_charge(mapping->host, n)) {
+ 			result = SCAN_FAIL;
+-			break;
++			goto tree_locked;
+ 		}
+-		nr_none += n;
+ 		for (; index < min(iter.index, end); index++) {
+ 			radix_tree_insert(&mapping->page_tree, index,
+ 					new_page + (index % HPAGE_PMD_NR));
+ 		}
++		nr_none += n;
+ 
+ 		/* We are done. */
+ 		if (index >= end)
+@@ -1380,12 +1387,12 @@ static void collapse_shmem(struct mm_struct *mm,
+ 				result = SCAN_FAIL;
+ 				goto tree_unlocked;
+ 			}
+-			spin_lock_irq(&mapping->tree_lock);
+ 		} else if (trylock_page(page)) {
+ 			get_page(page);
++			spin_unlock_irq(&mapping->tree_lock);
+ 		} else {
+ 			result = SCAN_PAGE_LOCK;
+-			break;
++			goto tree_locked;
+ 		}
+ 
+ 		/*
+@@ -1394,17 +1401,24 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		 */
+ 		VM_BUG_ON_PAGE(!PageLocked(page), page);
+ 		VM_BUG_ON_PAGE(!PageUptodate(page), page);
+-		VM_BUG_ON_PAGE(PageTransCompound(page), page);
++
++		/*
++		 * If file was truncated then extended, or hole-punched, before
++		 * we locked the first page, then a THP might be there already.
++		 */
++		if (PageTransCompound(page)) {
++			result = SCAN_PAGE_COMPOUND;
++			goto out_unlock;
++		}
+ 
+ 		if (page_mapping(page) != mapping) {
+ 			result = SCAN_TRUNCATED;
+ 			goto out_unlock;
+ 		}
+-		spin_unlock_irq(&mapping->tree_lock);
+ 
+ 		if (isolate_lru_page(page)) {
+ 			result = SCAN_DEL_PAGE_LRU;
+-			goto out_isolate_failed;
++			goto out_unlock;
+ 		}
+ 
+ 		if (page_mapped(page))
+@@ -1426,7 +1440,9 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		 */
+ 		if (!page_ref_freeze(page, 3)) {
+ 			result = SCAN_PAGE_COUNT;
+-			goto out_lru;
++			spin_unlock_irq(&mapping->tree_lock);
++			putback_lru_page(page);
++			goto out_unlock;
+ 		}
+ 
+ 		/*
+@@ -1442,17 +1458,10 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		slot = radix_tree_iter_resume(slot, &iter);
+ 		index++;
+ 		continue;
+-out_lru:
+-		spin_unlock_irq(&mapping->tree_lock);
+-		putback_lru_page(page);
+-out_isolate_failed:
+-		unlock_page(page);
+-		put_page(page);
+-		goto tree_unlocked;
+ out_unlock:
+ 		unlock_page(page);
+ 		put_page(page);
+-		break;
++		goto tree_unlocked;
+ 	}
+ 
+ 	/*
+@@ -1460,14 +1469,18 @@ out_unlock:
+ 	 * This code only triggers if there's nothing in radix tree
+ 	 * beyond 'end'.
+ 	 */
+-	if (result == SCAN_SUCCEED && index < end) {
++	if (index < end) {
+ 		int n = end - index;
+ 
++		/* Stop if extent has been truncated, and is now empty */
++		if (n >= HPAGE_PMD_NR) {
++			result = SCAN_TRUNCATED;
++			goto tree_locked;
++		}
+ 		if (!shmem_charge(mapping->host, n)) {
+ 			result = SCAN_FAIL;
+ 			goto tree_locked;
+ 		}
+-
+ 		for (; index < end; index++) {
+ 			radix_tree_insert(&mapping->page_tree, index,
+ 					new_page + (index % HPAGE_PMD_NR));
+@@ -1475,57 +1488,62 @@ out_unlock:
+ 		nr_none += n;
+ 	}
+ 
++	__inc_node_page_state(new_page, NR_SHMEM_THPS);
++	if (nr_none) {
++		struct zone *zone = page_zone(new_page);
++
++		__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
++		__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
++	}
++
+ tree_locked:
+ 	spin_unlock_irq(&mapping->tree_lock);
+ tree_unlocked:
+ 
+ 	if (result == SCAN_SUCCEED) {
+-		unsigned long flags;
+-		struct zone *zone = page_zone(new_page);
+-
+ 		/*
+ 		 * Replacing old pages with new one has succeed, now we need to
+ 		 * copy the content and free old pages.
+ 		 */
++		index = start;
+ 		list_for_each_entry_safe(page, tmp, &pagelist, lru) {
++			while (index < page->index) {
++				clear_highpage(new_page + (index % HPAGE_PMD_NR));
++				index++;
++			}
+ 			copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
+ 					page);
+ 			list_del(&page->lru);
+-			unlock_page(page);
+-			page_ref_unfreeze(page, 1);
+ 			page->mapping = NULL;
++			page_ref_unfreeze(page, 1);
+ 			ClearPageActive(page);
+ 			ClearPageUnevictable(page);
++			unlock_page(page);
+ 			put_page(page);
++			index++;
+ 		}
+-
+-		local_irq_save(flags);
+-		__inc_node_page_state(new_page, NR_SHMEM_THPS);
+-		if (nr_none) {
+-			__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
+-			__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
++		while (index < end) {
++			clear_highpage(new_page + (index % HPAGE_PMD_NR));
++			index++;
+ 		}
+-		local_irq_restore(flags);
+ 
+-		/*
+-		 * Remove pte page tables, so we can re-faulti
+-		 * the page as huge.
+-		 */
+-		retract_page_tables(mapping, start);
+-
+-		/* Everything is ready, let's unfreeze the new_page */
+-		set_page_dirty(new_page);
+ 		SetPageUptodate(new_page);
+-		page_ref_unfreeze(new_page, HPAGE_PMD_NR);
++		page_ref_add(new_page, HPAGE_PMD_NR - 1);
++		set_page_dirty(new_page);
+ 		mem_cgroup_commit_charge(new_page, memcg, false, true);
+ 		lru_cache_add_anon(new_page);
+-		unlock_page(new_page);
+ 
++		/*
++		 * Remove pte page tables, so we can re-fault the page as huge.
++		 */
++		retract_page_tables(mapping, start);
+ 		*hpage = NULL;
+ 	} else {
+ 		/* Something went wrong: rollback changes to the radix-tree */
+-		shmem_uncharge(mapping->host, nr_none);
+ 		spin_lock_irq(&mapping->tree_lock);
++		mapping->nrpages -= nr_none;
++		shmem_uncharge(mapping->host, nr_none);
++
+ 		radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
+ 				start) {
+ 			if (iter.index >= end)
+@@ -1551,19 +1569,18 @@ tree_unlocked:
+ 						slot, page);
+ 			slot = radix_tree_iter_resume(slot, &iter);
+ 			spin_unlock_irq(&mapping->tree_lock);
+-			putback_lru_page(page);
+ 			unlock_page(page);
++			putback_lru_page(page);
+ 			spin_lock_irq(&mapping->tree_lock);
+ 		}
+ 		VM_BUG_ON(nr_none);
+ 		spin_unlock_irq(&mapping->tree_lock);
+ 
+-		/* Unfreeze new_page, caller would take care about freeing it */
+-		page_ref_unfreeze(new_page, 1);
+ 		mem_cgroup_cancel_charge(new_page, memcg, true);
+-		unlock_page(new_page);
+ 		new_page->mapping = NULL;
+ 	}
++
++	unlock_page(new_page);
+ out:
+ 	VM_BUG_ON(!list_empty(&pagelist));
+ 	/* TODO: tracepoints */
+diff --git a/mm/shmem.c b/mm/shmem.c
+index fa08f56fd5e5..ab7ff0aeae2d 100644
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -296,12 +296,14 @@ bool shmem_charge(struct inode *inode, long pages)
+ 	if (!shmem_inode_acct_block(inode, pages))
+ 		return false;
+ 
++	/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
++	inode->i_mapping->nrpages += pages;
++
+ 	spin_lock_irqsave(&info->lock, flags);
+ 	info->alloced += pages;
+ 	inode->i_blocks += pages * BLOCKS_PER_PAGE;
+ 	shmem_recalc_inode(inode);
+ 	spin_unlock_irqrestore(&info->lock, flags);
+-	inode->i_mapping->nrpages += pages;
+ 
+ 	return true;
+ }
+@@ -311,6 +313,8 @@ void shmem_uncharge(struct inode *inode, long pages)
+ 	struct shmem_inode_info *info = SHMEM_I(inode);
+ 	unsigned long flags;
+ 
++	/* nrpages adjustment done by __delete_from_page_cache() or caller */
++
+ 	spin_lock_irqsave(&info->lock, flags);
+ 	info->alloced -= pages;
+ 	inode->i_blocks -= pages * BLOCKS_PER_PAGE;
+@@ -1528,11 +1532,13 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
+ {
+ 	struct page *oldpage, *newpage;
+ 	struct address_space *swap_mapping;
++	swp_entry_t entry;
+ 	pgoff_t swap_index;
+ 	int error;
+ 
+ 	oldpage = *pagep;
+-	swap_index = page_private(oldpage);
++	entry.val = page_private(oldpage);
++	swap_index = swp_offset(entry);
+ 	swap_mapping = page_mapping(oldpage);
+ 
+ 	/*
+@@ -1551,7 +1557,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
+ 	__SetPageLocked(newpage);
+ 	__SetPageSwapBacked(newpage);
+ 	SetPageUptodate(newpage);
+-	set_page_private(newpage, swap_index);
++	set_page_private(newpage, entry.val);
+ 	SetPageSwapCache(newpage);
+ 
+ 	/*
+diff --git a/net/ceph/auth.c b/net/ceph/auth.c
+index dbde2b3c3c15..fbeee068ea14 100644
+--- a/net/ceph/auth.c
++++ b/net/ceph/auth.c
+@@ -315,6 +315,22 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+ }
+ EXPORT_SYMBOL(ceph_auth_update_authorizer);
+ 
++int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
++				       struct ceph_authorizer *a,
++				       void *challenge_buf,
++				       int challenge_buf_len)
++{
++	int ret = 0;
++
++	mutex_lock(&ac->mutex);
++	if (ac->ops && ac->ops->add_authorizer_challenge)
++		ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf,
++							challenge_buf_len);
++	mutex_unlock(&ac->mutex);
++	return ret;
++}
++EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge);
++
+ int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+ 				      struct ceph_authorizer *a)
+ {
+diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
+index 2f4a1baf5f52..2bf9d9f7ddf3 100644
+--- a/net/ceph/auth_x.c
++++ b/net/ceph/auth_x.c
+@@ -9,6 +9,7 @@
+ 
+ #include <linux/ceph/decode.h>
+ #include <linux/ceph/auth.h>
++#include <linux/ceph/ceph_features.h>
+ #include <linux/ceph/libceph.h>
+ #include <linux/ceph/messenger.h>
+ 
+@@ -70,25 +71,40 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf,
+ 	return sizeof(u32) + ciphertext_len;
+ }
+ 
++static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p,
++			    int ciphertext_len)
++{
++	struct ceph_x_encrypt_header *hdr = p;
++	int plaintext_len;
++	int ret;
++
++	ret = ceph_crypt(secret, false, p, ciphertext_len, ciphertext_len,
++			 &plaintext_len);
++	if (ret)
++		return ret;
++
++	if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) {
++		pr_err("%s bad magic\n", __func__);
++		return -EINVAL;
++	}
++
++	return plaintext_len - sizeof(*hdr);
++}
++
+ static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end)
+ {
+-	struct ceph_x_encrypt_header *hdr = *p + sizeof(u32);
+-	int ciphertext_len, plaintext_len;
++	int ciphertext_len;
+ 	int ret;
+ 
+ 	ceph_decode_32_safe(p, end, ciphertext_len, e_inval);
+ 	ceph_decode_need(p, end, ciphertext_len, e_inval);
+ 
+-	ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len,
+-			 &plaintext_len);
+-	if (ret)
++	ret = __ceph_x_decrypt(secret, *p, ciphertext_len);
++	if (ret < 0)
+ 		return ret;
+ 
+-	if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC)
+-		return -EPERM;
+-
+ 	*p += ciphertext_len;
+-	return plaintext_len - sizeof(struct ceph_x_encrypt_header);
++	return ret;
+ 
+ e_inval:
+ 	return -EINVAL;
+@@ -275,6 +291,51 @@ bad:
+ 	return -EINVAL;
+ }
+ 
++/*
++ * Encode and encrypt the second part (ceph_x_authorize_b) of the
++ * authorizer.  The first part (ceph_x_authorize_a) should already be
++ * encoded.
++ */
++static int encrypt_authorizer(struct ceph_x_authorizer *au,
++			      u64 *server_challenge)
++{
++	struct ceph_x_authorize_a *msg_a;
++	struct ceph_x_authorize_b *msg_b;
++	void *p, *end;
++	int ret;
++
++	msg_a = au->buf->vec.iov_base;
++	WARN_ON(msg_a->ticket_blob.secret_id != cpu_to_le64(au->secret_id));
++	p = (void *)(msg_a + 1) + le32_to_cpu(msg_a->ticket_blob.blob_len);
++	end = au->buf->vec.iov_base + au->buf->vec.iov_len;
++
++	msg_b = p + ceph_x_encrypt_offset();
++	msg_b->struct_v = 2;
++	msg_b->nonce = cpu_to_le64(au->nonce);
++	if (server_challenge) {
++		msg_b->have_challenge = 1;
++		msg_b->server_challenge_plus_one =
++		    cpu_to_le64(*server_challenge + 1);
++	} else {
++		msg_b->have_challenge = 0;
++		msg_b->server_challenge_plus_one = 0;
++	}
++
++	ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
++	if (ret < 0)
++		return ret;
++
++	p += ret;
++	if (server_challenge) {
++		WARN_ON(p != end);
++	} else {
++		WARN_ON(p > end);
++		au->buf->vec.iov_len = p - au->buf->vec.iov_base;
++	}
++
++	return 0;
++}
++
+ static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au)
+ {
+ 	ceph_crypto_key_destroy(&au->session_key);
+@@ -291,7 +352,6 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
+ 	int maxlen;
+ 	struct ceph_x_authorize_a *msg_a;
+ 	struct ceph_x_authorize_b *msg_b;
+-	void *p, *end;
+ 	int ret;
+ 	int ticket_blob_len =
+ 		(th->ticket_blob ? th->ticket_blob->vec.iov_len : 0);
+@@ -335,21 +395,13 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
+ 	dout(" th %p secret_id %lld %lld\n", th, th->secret_id,
+ 	     le64_to_cpu(msg_a->ticket_blob.secret_id));
+ 
+-	p = msg_a + 1;
+-	p += ticket_blob_len;
+-	end = au->buf->vec.iov_base + au->buf->vec.iov_len;
+-
+-	msg_b = p + ceph_x_encrypt_offset();
+-	msg_b->struct_v = 1;
+ 	get_random_bytes(&au->nonce, sizeof(au->nonce));
+-	msg_b->nonce = cpu_to_le64(au->nonce);
+-	ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
+-	if (ret < 0)
++	ret = encrypt_authorizer(au, NULL);
++	if (ret) {
++		pr_err("failed to encrypt authorizer: %d", ret);
+ 		goto out_au;
++	}
+ 
+-	p += ret;
+-	WARN_ON(p > end);
+-	au->buf->vec.iov_len = p - au->buf->vec.iov_base;
+ 	dout(" built authorizer nonce %llx len %d\n", au->nonce,
+ 	     (int)au->buf->vec.iov_len);
+ 	return 0;
+@@ -626,6 +678,54 @@ static int ceph_x_update_authorizer(
+ 	return 0;
+ }
+ 
++static int decrypt_authorize_challenge(struct ceph_x_authorizer *au,
++				       void *challenge_buf,
++				       int challenge_buf_len,
++				       u64 *server_challenge)
++{
++	struct ceph_x_authorize_challenge *ch =
++	    challenge_buf + sizeof(struct ceph_x_encrypt_header);
++	int ret;
++
++	/* no leading len */
++	ret = __ceph_x_decrypt(&au->session_key, challenge_buf,
++			       challenge_buf_len);
++	if (ret < 0)
++		return ret;
++	if (ret < sizeof(*ch)) {
++		pr_err("bad size %d for ceph_x_authorize_challenge\n", ret);
++		return -EINVAL;
++	}
++
++	*server_challenge = le64_to_cpu(ch->server_challenge);
++	return 0;
++}
++
++static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
++					   struct ceph_authorizer *a,
++					   void *challenge_buf,
++					   int challenge_buf_len)
++{
++	struct ceph_x_authorizer *au = (void *)a;
++	u64 server_challenge;
++	int ret;
++
++	ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len,
++					  &server_challenge);
++	if (ret) {
++		pr_err("failed to decrypt authorize challenge: %d", ret);
++		return ret;
++	}
++
++	ret = encrypt_authorizer(au, &server_challenge);
++	if (ret) {
++		pr_err("failed to encrypt authorizer w/ challenge: %d", ret);
++		return ret;
++	}
++
++	return 0;
++}
++
+ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
+ 					  struct ceph_authorizer *a)
+ {
+@@ -637,8 +737,10 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
+ 	ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN);
+ 	if (ret < 0)
+ 		return ret;
+-	if (ret != sizeof(*reply))
+-		return -EPERM;
++	if (ret < sizeof(*reply)) {
++		pr_err("bad size %d for ceph_x_authorize_reply\n", ret);
++		return -EINVAL;
++	}
+ 
+ 	if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one))
+ 		ret = -EPERM;
+@@ -704,26 +806,64 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg,
+ 			  __le64 *psig)
+ {
+ 	void *enc_buf = au->enc_buf;
+-	struct {
+-		__le32 len;
+-		__le32 header_crc;
+-		__le32 front_crc;
+-		__le32 middle_crc;
+-		__le32 data_crc;
+-	} __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
+ 	int ret;
+ 
+-	sigblock->len = cpu_to_le32(4*sizeof(u32));
+-	sigblock->header_crc = msg->hdr.crc;
+-	sigblock->front_crc = msg->footer.front_crc;
+-	sigblock->middle_crc = msg->footer.middle_crc;
+-	sigblock->data_crc =  msg->footer.data_crc;
+-	ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN,
+-			     sizeof(*sigblock));
+-	if (ret < 0)
+-		return ret;
++	if (!CEPH_HAVE_FEATURE(msg->con->peer_features, CEPHX_V2)) {
++		struct {
++			__le32 len;
++			__le32 header_crc;
++			__le32 front_crc;
++			__le32 middle_crc;
++			__le32 data_crc;
++		} __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
++
++		sigblock->len = cpu_to_le32(4*sizeof(u32));
++		sigblock->header_crc = msg->hdr.crc;
++		sigblock->front_crc = msg->footer.front_crc;
++		sigblock->middle_crc = msg->footer.middle_crc;
++		sigblock->data_crc =  msg->footer.data_crc;
++
++		ret = ceph_x_encrypt(&au->session_key, enc_buf,
++				     CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock));
++		if (ret < 0)
++			return ret;
++
++		*psig = *(__le64 *)(enc_buf + sizeof(u32));
++	} else {
++		struct {
++			__le32 header_crc;
++			__le32 front_crc;
++			__le32 front_len;
++			__le32 middle_crc;
++			__le32 middle_len;
++			__le32 data_crc;
++			__le32 data_len;
++			__le32 seq_lower_word;
++		} __packed *sigblock = enc_buf;
++		struct {
++			__le64 a, b, c, d;
++		} __packed *penc = enc_buf;
++		int ciphertext_len;
++
++		sigblock->header_crc = msg->hdr.crc;
++		sigblock->front_crc = msg->footer.front_crc;
++		sigblock->front_len = msg->hdr.front_len;
++		sigblock->middle_crc = msg->footer.middle_crc;
++		sigblock->middle_len = msg->hdr.middle_len;
++		sigblock->data_crc =  msg->footer.data_crc;
++		sigblock->data_len = msg->hdr.data_len;
++		sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq;
++
++		/* no leading len, no ceph_x_encrypt_header */
++		ret = ceph_crypt(&au->session_key, true, enc_buf,
++				 CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock),
++				 &ciphertext_len);
++		if (ret)
++			return ret;
++
++		*psig = penc->a ^ penc->b ^ penc->c ^ penc->d;
++	}
+ 
+-	*psig = *(__le64 *)(enc_buf + sizeof(u32));
+ 	return 0;
+ }
+ 
+@@ -778,6 +918,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
+ 	.handle_reply = ceph_x_handle_reply,
+ 	.create_authorizer = ceph_x_create_authorizer,
+ 	.update_authorizer = ceph_x_update_authorizer,
++	.add_authorizer_challenge = ceph_x_add_authorizer_challenge,
+ 	.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
+ 	.invalidate_authorizer = ceph_x_invalidate_authorizer,
+ 	.reset =  ceph_x_reset,
+diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
+index 32c13d763b9a..24b0b74564d0 100644
+--- a/net/ceph/auth_x_protocol.h
++++ b/net/ceph/auth_x_protocol.h
+@@ -70,6 +70,13 @@ struct ceph_x_authorize_a {
+ struct ceph_x_authorize_b {
+ 	__u8 struct_v;
+ 	__le64 nonce;
++	__u8 have_challenge;
++	__le64 server_challenge_plus_one;
++} __attribute__ ((packed));
++
++struct ceph_x_authorize_challenge {
++	__u8 struct_v;
++	__le64 server_challenge;
+ } __attribute__ ((packed));
+ 
+ struct ceph_x_authorize_reply {
+diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
+index 5281da82371a..f864807284d4 100644
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1411,24 +1411,26 @@ static void prepare_write_keepalive(struct ceph_connection *con)
+  * Connection negotiation.
+  */
+ 
+-static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
+-						int *auth_proto)
++static int get_connect_authorizer(struct ceph_connection *con)
+ {
+ 	struct ceph_auth_handshake *auth;
++	int auth_proto;
+ 
+ 	if (!con->ops->get_authorizer) {
++		con->auth = NULL;
+ 		con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
+ 		con->out_connect.authorizer_len = 0;
+-		return NULL;
++		return 0;
+ 	}
+ 
+-	auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
++	auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry);
+ 	if (IS_ERR(auth))
+-		return auth;
++		return PTR_ERR(auth);
+ 
+-	con->auth_reply_buf = auth->authorizer_reply_buf;
+-	con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
+-	return auth;
++	con->auth = auth;
++	con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
++	con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len);
++	return 0;
+ }
+ 
+ /*
+@@ -1444,12 +1446,22 @@ static void prepare_write_banner(struct ceph_connection *con)
+ 	con_flag_set(con, CON_FLAG_WRITE_PENDING);
+ }
+ 
++static void __prepare_write_connect(struct ceph_connection *con)
++{
++	con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect);
++	if (con->auth)
++		con_out_kvec_add(con, con->auth->authorizer_buf_len,
++				 con->auth->authorizer_buf);
++
++	con->out_more = 0;
++	con_flag_set(con, CON_FLAG_WRITE_PENDING);
++}
++
+ static int prepare_write_connect(struct ceph_connection *con)
+ {
+ 	unsigned int global_seq = get_global_seq(con->msgr, 0);
+ 	int proto;
+-	int auth_proto;
+-	struct ceph_auth_handshake *auth;
++	int ret;
+ 
+ 	switch (con->peer_name.type) {
+ 	case CEPH_ENTITY_TYPE_MON:
+@@ -1476,24 +1488,11 @@ static int prepare_write_connect(struct ceph_connection *con)
+ 	con->out_connect.protocol_version = cpu_to_le32(proto);
+ 	con->out_connect.flags = 0;
+ 
+-	auth_proto = CEPH_AUTH_UNKNOWN;
+-	auth = get_connect_authorizer(con, &auth_proto);
+-	if (IS_ERR(auth))
+-		return PTR_ERR(auth);
+-
+-	con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
+-	con->out_connect.authorizer_len = auth ?
+-		cpu_to_le32(auth->authorizer_buf_len) : 0;
+-
+-	con_out_kvec_add(con, sizeof (con->out_connect),
+-					&con->out_connect);
+-	if (auth && auth->authorizer_buf_len)
+-		con_out_kvec_add(con, auth->authorizer_buf_len,
+-					auth->authorizer_buf);
+-
+-	con->out_more = 0;
+-	con_flag_set(con, CON_FLAG_WRITE_PENDING);
++	ret = get_connect_authorizer(con);
++	if (ret)
++		return ret;
+ 
++	__prepare_write_connect(con);
+ 	return 0;
+ }
+ 
+@@ -1753,11 +1752,21 @@ static int read_partial_connect(struct ceph_connection *con)
+ 	if (ret <= 0)
+ 		goto out;
+ 
+-	size = le32_to_cpu(con->in_reply.authorizer_len);
+-	end += size;
+-	ret = read_partial(con, end, size, con->auth_reply_buf);
+-	if (ret <= 0)
+-		goto out;
++	if (con->auth) {
++		size = le32_to_cpu(con->in_reply.authorizer_len);
++		if (size > con->auth->authorizer_reply_buf_len) {
++			pr_err("authorizer reply too big: %d > %zu\n", size,
++			       con->auth->authorizer_reply_buf_len);
++			ret = -EINVAL;
++			goto out;
++		}
++
++		end += size;
++		ret = read_partial(con, end, size,
++				   con->auth->authorizer_reply_buf);
++		if (ret <= 0)
++			goto out;
++	}
+ 
+ 	dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
+ 	     con, (int)con->in_reply.tag,
+@@ -1765,7 +1774,6 @@ static int read_partial_connect(struct ceph_connection *con)
+ 	     le32_to_cpu(con->in_reply.global_seq));
+ out:
+ 	return ret;
+-
+ }
+ 
+ /*
+@@ -2048,12 +2056,27 @@ static int process_connect(struct ceph_connection *con)
+ 
+ 	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
+ 
+-	if (con->auth_reply_buf) {
++	if (con->auth) {
+ 		/*
+ 		 * Any connection that defines ->get_authorizer()
+-		 * should also define ->verify_authorizer_reply().
++		 * should also define ->add_authorizer_challenge() and
++		 * ->verify_authorizer_reply().
++		 *
+ 		 * See get_connect_authorizer().
+ 		 */
++		if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
++			ret = con->ops->add_authorizer_challenge(
++				    con, con->auth->authorizer_reply_buf,
++				    le32_to_cpu(con->in_reply.authorizer_len));
++			if (ret < 0)
++				return ret;
++
++			con_out_kvec_reset(con);
++			__prepare_write_connect(con);
++			prepare_read_connect(con);
++			return 0;
++		}
++
+ 		ret = con->ops->verify_authorizer_reply(con);
+ 		if (ret < 0) {
+ 			con->error_msg = "bad authorize reply";
+diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
+index 2814dba5902d..53ea2d48896c 100644
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -5292,6 +5292,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
+ 	return auth;
+ }
+ 
++static int add_authorizer_challenge(struct ceph_connection *con,
++				    void *challenge_buf, int challenge_buf_len)
++{
++	struct ceph_osd *o = con->private;
++	struct ceph_osd_client *osdc = o->o_osdc;
++	struct ceph_auth_client *ac = osdc->client->monc.auth;
++
++	return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer,
++					    challenge_buf, challenge_buf_len);
++}
+ 
+ static int verify_authorizer_reply(struct ceph_connection *con)
+ {
+@@ -5341,6 +5351,7 @@ static const struct ceph_connection_operations osd_con_ops = {
+ 	.put = put_osd_con,
+ 	.dispatch = dispatch,
+ 	.get_authorizer = get_authorizer,
++	.add_authorizer_challenge = add_authorizer_challenge,
+ 	.verify_authorizer_reply = verify_authorizer_reply,
+ 	.invalidate_authorizer = invalidate_authorizer,
+ 	.alloc_msg = alloc_msg,
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index c19a118f9f82..4067fa3fcbb2 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4882,6 +4882,10 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
+ 	nf_reset(skb);
+ 	nf_reset_trace(skb);
+ 
++#ifdef CONFIG_NET_SWITCHDEV
++	skb->offload_fwd_mark = 0;
++#endif
++
+ 	if (!xnet)
+ 		return;
+ 
+diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
+index 8d1a7c900393..88d5b2645bb0 100644
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2433,7 +2433,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
+ 		void *ph;
+ 		__u32 ts;
+ 
+-		ph = skb_shinfo(skb)->destructor_arg;
++		ph = skb_zcopy_get_nouarg(skb);
+ 		packet_dec_pending(&po->tx_ring);
+ 
+ 		ts = __packet_set_timestamp(po, ph, skb);
+@@ -2499,7 +2499,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
+ 	skb->priority = po->sk.sk_priority;
+ 	skb->mark = po->sk.sk_mark;
+ 	sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
+-	skb_shinfo(skb)->destructor_arg = ph.raw;
++	skb_zcopy_set_nouarg(skb, ph.raw);
+ 
+ 	skb_reserve(skb, hlen);
+ 	skb_reset_network_header(skb);
+diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
+index 4f2971f528db..e903bdd39b9f 100644
+--- a/net/tls/tls_main.c
++++ b/net/tls/tls_main.c
+@@ -46,8 +46,28 @@ MODULE_DESCRIPTION("Transport Layer Security Support");
+ MODULE_LICENSE("Dual BSD/GPL");
+ MODULE_ALIAS_TCP_ULP("tls");
+ 
+-static struct proto tls_base_prot;
+-static struct proto tls_sw_prot;
++enum {
++	TLSV4,
++	TLSV6,
++	TLS_NUM_PROTS,
++};
++
++enum {
++	TLS_BASE_TX,
++	TLS_SW_TX,
++	TLS_NUM_CONFIG,
++};
++
++static struct proto *saved_tcpv6_prot;
++static DEFINE_MUTEX(tcpv6_prot_mutex);
++static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG];
++
++static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
++{
++	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
++
++	sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf];
++}
+ 
+ int wait_on_pending_writer(struct sock *sk, long *timeo)
+ {
+@@ -239,6 +259,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
+ 	void (*sk_proto_close)(struct sock *sk, long timeout);
+ 
+ 	lock_sock(sk);
++	sk_proto_close = ctx->sk_proto_close;
++
++	if (ctx->tx_conf == TLS_BASE_TX) {
++		tls_ctx_free(ctx);
++		goto skip_tx_cleanup;
++	}
+ 
+ 	if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
+ 		tls_handle_open_record(sk, 0);
+@@ -255,13 +281,16 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
+ 			sg++;
+ 		}
+ 	}
+-	ctx->free_resources(sk);
++
+ 	kfree(ctx->rec_seq);
+ 	kfree(ctx->iv);
+ 
+-	sk_proto_close = ctx->sk_proto_close;
+-	tls_ctx_free(ctx);
++	if (ctx->tx_conf == TLS_SW_TX) {
++		tls_sw_free_tx_resources(sk);
++		tls_ctx_free(ctx);
++	}
+ 
++skip_tx_cleanup:
+ 	release_sock(sk);
+ 	sk_proto_close(sk, timeout);
+ }
+@@ -362,48 +391,43 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
+ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
+ 				unsigned int optlen)
+ {
+-	struct tls_crypto_info *crypto_info, tmp_crypto_info;
++	struct tls_crypto_info *crypto_info;
+ 	struct tls_context *ctx = tls_get_ctx(sk);
+-	struct proto *prot = NULL;
+ 	int rc = 0;
++	int tx_conf;
+ 
+ 	if (!optval || (optlen < sizeof(*crypto_info))) {
+ 		rc = -EINVAL;
+ 		goto out;
+ 	}
+ 
+-	rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info));
++	crypto_info = &ctx->crypto_send.info;
++	/* Currently we don't support set crypto info more than one time */
++	if (TLS_CRYPTO_INFO_READY(crypto_info)) {
++		rc = -EBUSY;
++		goto out;
++	}
++
++	rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
+ 	if (rc) {
+ 		rc = -EFAULT;
+ 		goto out;
+ 	}
+ 
+ 	/* check version */
+-	if (tmp_crypto_info.version != TLS_1_2_VERSION) {
++	if (crypto_info->version != TLS_1_2_VERSION) {
+ 		rc = -ENOTSUPP;
+-		goto out;
+-	}
+-
+-	/* get user crypto info */
+-	crypto_info = &ctx->crypto_send.info;
+-
+-	/* Currently we don't support set crypto info more than one time */
+-	if (TLS_CRYPTO_INFO_READY(crypto_info)) {
+-		rc = -EBUSY;
+-		goto out;
++		goto err_crypto_info;
+ 	}
+ 
+-	switch (tmp_crypto_info.cipher_type) {
++	switch (crypto_info->cipher_type) {
+ 	case TLS_CIPHER_AES_GCM_128: {
+ 		if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
+ 			rc = -EINVAL;
+ 			goto err_crypto_info;
+ 		}
+-		rc = copy_from_user(
+-		  crypto_info,
+-		  optval,
+-		  sizeof(struct tls12_crypto_info_aes_gcm_128));
+-
++		rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
++				    optlen - sizeof(*crypto_info));
+ 		if (rc) {
+ 			rc = -EFAULT;
+ 			goto err_crypto_info;
+@@ -415,18 +439,16 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
+ 		goto err_crypto_info;
+ 	}
+ 
+-	ctx->sk_write_space = sk->sk_write_space;
+-	sk->sk_write_space = tls_write_space;
+-
+-	ctx->sk_proto_close = sk->sk_prot->close;
+-
+ 	/* currently SW is default, we will have ethtool in future */
+ 	rc = tls_set_sw_offload(sk, ctx);
+-	prot = &tls_sw_prot;
++	tx_conf = TLS_SW_TX;
+ 	if (rc)
+ 		goto err_crypto_info;
+ 
+-	sk->sk_prot = prot;
++	ctx->tx_conf = tx_conf;
++	update_sk_prot(sk, ctx);
++	ctx->sk_write_space = sk->sk_write_space;
++	sk->sk_write_space = tls_write_space;
+ 	goto out;
+ 
+ err_crypto_info:
+@@ -464,8 +486,21 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
+ 	return do_tls_setsockopt(sk, optname, optval, optlen);
+ }
+ 
++static void build_protos(struct proto *prot, struct proto *base)
++{
++	prot[TLS_BASE_TX] = *base;
++	prot[TLS_BASE_TX].setsockopt	= tls_setsockopt;
++	prot[TLS_BASE_TX].getsockopt	= tls_getsockopt;
++	prot[TLS_BASE_TX].close		= tls_sk_proto_close;
++
++	prot[TLS_SW_TX] = prot[TLS_BASE_TX];
++	prot[TLS_SW_TX].sendmsg		= tls_sw_sendmsg;
++	prot[TLS_SW_TX].sendpage	= tls_sw_sendpage;
++}
++
+ static int tls_init(struct sock *sk)
+ {
++	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
+ 	struct inet_connection_sock *icsk = inet_csk(sk);
+ 	struct tls_context *ctx;
+ 	int rc = 0;
+@@ -488,7 +523,21 @@ static int tls_init(struct sock *sk)
+ 	icsk->icsk_ulp_data = ctx;
+ 	ctx->setsockopt = sk->sk_prot->setsockopt;
+ 	ctx->getsockopt = sk->sk_prot->getsockopt;
+-	sk->sk_prot = &tls_base_prot;
++	ctx->sk_proto_close = sk->sk_prot->close;
++
++	/* Build IPv6 TLS whenever the address of tcpv6_prot changes */
++	if (ip_ver == TLSV6 &&
++	    unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
++		mutex_lock(&tcpv6_prot_mutex);
++		if (likely(sk->sk_prot != saved_tcpv6_prot)) {
++			build_protos(tls_prots[TLSV6], sk->sk_prot);
++			smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
++		}
++		mutex_unlock(&tcpv6_prot_mutex);
++	}
++
++	ctx->tx_conf = TLS_BASE_TX;
++	update_sk_prot(sk, ctx);
+ out:
+ 	return rc;
+ }
+@@ -501,14 +550,7 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
+ 
+ static int __init tls_register(void)
+ {
+-	tls_base_prot			= tcp_prot;
+-	tls_base_prot.setsockopt	= tls_setsockopt;
+-	tls_base_prot.getsockopt	= tls_getsockopt;
+-
+-	tls_sw_prot			= tls_base_prot;
+-	tls_sw_prot.sendmsg		= tls_sw_sendmsg;
+-	tls_sw_prot.sendpage            = tls_sw_sendpage;
+-	tls_sw_prot.close               = tls_sk_proto_close;
++	build_protos(tls_prots[TLSV4], &tcp_prot);
+ 
+ 	tcp_register_ulp(&tcp_tls_ulp_ops);
+ 
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index 6ae9ca567d6c..d18d4a478e4f 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -388,7 +388,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+ {
+ 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+ 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+-	int ret = 0;
++	int ret;
+ 	int required_size;
+ 	long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+ 	bool eor = !(msg->msg_flags & MSG_MORE);
+@@ -403,7 +403,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+ 
+ 	lock_sock(sk);
+ 
+-	if (tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo))
++	ret = tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo);
++	if (ret)
+ 		goto send_end;
+ 
+ 	if (unlikely(msg->msg_controllen)) {
+@@ -539,7 +540,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
+ {
+ 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+ 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+-	int ret = 0;
++	int ret;
+ 	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+ 	bool eor;
+ 	size_t orig_size = size;
+@@ -559,7 +560,8 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
+ 
+ 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+ 
+-	if (tls_complete_pending_work(sk, tls_ctx, flags, &timeo))
++	ret = tls_complete_pending_work(sk, tls_ctx, flags, &timeo);
++	if (ret)
+ 		goto sendpage_end;
+ 
+ 	/* Call the sk_stream functions to manage the sndbuf mem. */
+@@ -646,7 +648,7 @@ sendpage_end:
+ 	return ret;
+ }
+ 
+-static void tls_sw_free_resources(struct sock *sk)
++void tls_sw_free_tx_resources(struct sock *sk)
+ {
+ 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+ 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+@@ -685,7 +687,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
+ 	}
+ 
+ 	ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
+-	ctx->free_resources = tls_sw_free_resources;
+ 
+ 	crypto_info = &ctx->crypto_send.info;
+ 	switch (crypto_info->cipher_type) {
+diff --git a/scripts/Makefile.build b/scripts/Makefile.build
+index 7143da06d702..be9e5deb58ba 100644
+--- a/scripts/Makefile.build
++++ b/scripts/Makefile.build
+@@ -272,10 +272,8 @@ else
+ objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
+ endif
+ ifdef CONFIG_RETPOLINE
+-ifneq ($(RETPOLINE_CFLAGS),)
+   objtool_args += --retpoline
+ endif
+-endif
+ 
+ 
+ ifdef CONFIG_MODVERSIONS
+diff --git a/sound/core/control.c b/sound/core/control.c
+index af7e6165e21e..36571cd49be3 100644
+--- a/sound/core/control.c
++++ b/sound/core/control.c
+@@ -347,6 +347,40 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count)
+ 	return 0;
+ }
+ 
++/* add a new kcontrol object; call with card->controls_rwsem locked */
++static int __snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol)
++{
++	struct snd_ctl_elem_id id;
++	unsigned int idx;
++	unsigned int count;
++
++	id = kcontrol->id;
++	if (id.index > UINT_MAX - kcontrol->count)
++		return -EINVAL;
++
++	if (snd_ctl_find_id(card, &id)) {
++		dev_err(card->dev,
++			"control %i:%i:%i:%s:%i is already present\n",
++			id.iface, id.device, id.subdevice, id.name, id.index);
++		return -EBUSY;
++	}
++
++	if (snd_ctl_find_hole(card, kcontrol->count) < 0)
++		return -ENOMEM;
++
++	list_add_tail(&kcontrol->list, &card->controls);
++	card->controls_count += kcontrol->count;
++	kcontrol->id.numid = card->last_numid + 1;
++	card->last_numid += kcontrol->count;
++
++	id = kcontrol->id;
++	count = kcontrol->count;
++	for (idx = 0; idx < count; idx++, id.index++, id.numid++)
++		snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id);
++
++	return 0;
++}
++
+ /**
+  * snd_ctl_add - add the control instance to the card
+  * @card: the card instance
+@@ -363,45 +397,18 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count)
+  */
+ int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol)
+ {
+-	struct snd_ctl_elem_id id;
+-	unsigned int idx;
+-	unsigned int count;
+ 	int err = -EINVAL;
+ 
+ 	if (! kcontrol)
+ 		return err;
+ 	if (snd_BUG_ON(!card || !kcontrol->info))
+ 		goto error;
+-	id = kcontrol->id;
+-	if (id.index > UINT_MAX - kcontrol->count)
+-		goto error;
+ 
+ 	down_write(&card->controls_rwsem);
+-	if (snd_ctl_find_id(card, &id)) {
+-		up_write(&card->controls_rwsem);
+-		dev_err(card->dev, "control %i:%i:%i:%s:%i is already present\n",
+-					id.iface,
+-					id.device,
+-					id.subdevice,
+-					id.name,
+-					id.index);
+-		err = -EBUSY;
+-		goto error;
+-	}
+-	if (snd_ctl_find_hole(card, kcontrol->count) < 0) {
+-		up_write(&card->controls_rwsem);
+-		err = -ENOMEM;
+-		goto error;
+-	}
+-	list_add_tail(&kcontrol->list, &card->controls);
+-	card->controls_count += kcontrol->count;
+-	kcontrol->id.numid = card->last_numid + 1;
+-	card->last_numid += kcontrol->count;
+-	id = kcontrol->id;
+-	count = kcontrol->count;
++	err = __snd_ctl_add(card, kcontrol);
+ 	up_write(&card->controls_rwsem);
+-	for (idx = 0; idx < count; idx++, id.index++, id.numid++)
+-		snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id);
++	if (err < 0)
++		goto error;
+ 	return 0;
+ 
+  error:
+@@ -1360,9 +1367,12 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
+ 		kctl->tlv.c = snd_ctl_elem_user_tlv;
+ 
+ 	/* This function manage to free the instance on failure. */
+-	err = snd_ctl_add(card, kctl);
+-	if (err < 0)
+-		return err;
++	down_write(&card->controls_rwsem);
++	err = __snd_ctl_add(card, kctl);
++	if (err < 0) {
++		snd_ctl_free_one(kctl);
++		goto unlock;
++	}
+ 	offset = snd_ctl_get_ioff(kctl, &info->id);
+ 	snd_ctl_build_ioff(&info->id, kctl, offset);
+ 	/*
+@@ -1373,10 +1383,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
+ 	 * which locks the element.
+ 	 */
+ 
+-	down_write(&card->controls_rwsem);
+ 	card->user_ctl_count++;
+-	up_write(&card->controls_rwsem);
+ 
++ unlock:
++	up_write(&card->controls_rwsem);
+ 	return 0;
+ }
+ 
+diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c
+index 8a852042a066..91cd305cabd7 100644
+--- a/sound/isa/wss/wss_lib.c
++++ b/sound/isa/wss/wss_lib.c
+@@ -1531,7 +1531,6 @@ static int snd_wss_playback_open(struct snd_pcm_substream *substream)
+ 	if (err < 0) {
+ 		if (chip->release_dma)
+ 			chip->release_dma(chip, chip->dma_private_data, chip->dma1);
+-		snd_free_pages(runtime->dma_area, runtime->dma_bytes);
+ 		return err;
+ 	}
+ 	chip->playback_substream = substream;
+@@ -1572,7 +1571,6 @@ static int snd_wss_capture_open(struct snd_pcm_substream *substream)
+ 	if (err < 0) {
+ 		if (chip->release_dma)
+ 			chip->release_dma(chip, chip->dma_private_data, chip->dma2);
+-		snd_free_pages(runtime->dma_area, runtime->dma_bytes);
+ 		return err;
+ 	}
+ 	chip->capture_substream = substream;
+diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
+index 1ef7cdf1d3e8..38f355ae1863 100644
+--- a/sound/pci/ac97/ac97_codec.c
++++ b/sound/pci/ac97/ac97_codec.c
+@@ -824,7 +824,7 @@ static int snd_ac97_put_spsa(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_
+ {
+ 	struct snd_ac97 *ac97 = snd_kcontrol_chip(kcontrol);
+ 	int reg = kcontrol->private_value & 0xff;
+-	int shift = (kcontrol->private_value >> 8) & 0xff;
++	int shift = (kcontrol->private_value >> 8) & 0x0f;
+ 	int mask = (kcontrol->private_value >> 16) & 0xff;
+ 	// int invert = (kcontrol->private_value >> 24) & 0xff;
+ 	unsigned short value, old, new;
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index eb8807de3ebc..66b0a124beae 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -343,6 +343,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
+ 	case 0x10ec0285:
+ 	case 0x10ec0298:
+ 	case 0x10ec0289:
++	case 0x10ec0300:
+ 		alc_update_coef_idx(codec, 0x10, 1<<9, 0);
+ 		break;
+ 	case 0x10ec0275:
+@@ -2758,6 +2759,7 @@ enum {
+ 	ALC269_TYPE_ALC215,
+ 	ALC269_TYPE_ALC225,
+ 	ALC269_TYPE_ALC294,
++	ALC269_TYPE_ALC300,
+ 	ALC269_TYPE_ALC700,
+ };
+ 
+@@ -2792,6 +2794,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
+ 	case ALC269_TYPE_ALC215:
+ 	case ALC269_TYPE_ALC225:
+ 	case ALC269_TYPE_ALC294:
++	case ALC269_TYPE_ALC300:
+ 	case ALC269_TYPE_ALC700:
+ 		ssids = alc269_ssids;
+ 		break;
+@@ -6408,6 +6411,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8),
+ 	SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
+ 	SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
++	SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
+ 	SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS),
+ 	SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
+ 	SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
+@@ -7089,6 +7093,10 @@ static int patch_alc269(struct hda_codec *codec)
+ 		spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */
+ 		alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */
+ 		break;
++	case 0x10ec0300:
++		spec->codec_variant = ALC269_TYPE_ALC300;
++		spec->gen.mixer_nid = 0; /* no loopback on ALC300 */
++		break;
+ 	case 0x10ec0700:
+ 	case 0x10ec0701:
+ 	case 0x10ec0703:
+@@ -8160,6 +8168,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
+ 	HDA_CODEC_ENTRY(0x10ec0295, "ALC295", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269),
++	HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269),
+ 	HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861),
+ 	HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd),
+ 	HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861),
+diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c
+index e73c962590eb..079063d8038d 100644
+--- a/sound/sparc/cs4231.c
++++ b/sound/sparc/cs4231.c
+@@ -1146,10 +1146,8 @@ static int snd_cs4231_playback_open(struct snd_pcm_substream *substream)
+ 	runtime->hw = snd_cs4231_playback;
+ 
+ 	err = snd_cs4231_open(chip, CS4231_MODE_PLAY);
+-	if (err < 0) {
+-		snd_free_pages(runtime->dma_area, runtime->dma_bytes);
++	if (err < 0)
+ 		return err;
+-	}
+ 	chip->playback_substream = substream;
+ 	chip->p_periods_sent = 0;
+ 	snd_pcm_set_sync(substream);
+@@ -1167,10 +1165,8 @@ static int snd_cs4231_capture_open(struct snd_pcm_substream *substream)
+ 	runtime->hw = snd_cs4231_capture;
+ 
+ 	err = snd_cs4231_open(chip, CS4231_MODE_RECORD);
+-	if (err < 0) {
+-		snd_free_pages(runtime->dma_area, runtime->dma_bytes);
++	if (err < 0)
+ 		return err;
+-	}
+ 	chip->capture_substream = substream;
+ 	chip->c_periods_sent = 0;
+ 	snd_pcm_set_sync(substream);
author	Mike Pagano <mpagano@gentoo.org>	2018-12-05 14:42:14 -0500
committer	Mike Pagano <mpagano@gentoo.org>	2018-12-05 14:42:14 -0500
commit	a1249a08fe1aead9f7e3e0c0438a14d3c1487981 (patch)
tree	39715dbb2dca6cad88e31ed7c5e001b34132f3e8
parent	proj/linux-patches: Update existing patch for 4.14.85 (diff)
download	linux-patches-a1249a08fe1aead9f7e3e0c0438a14d3c1487981.tar.gz linux-patches-a1249a08fe1aead9f7e3e0c0438a14d3c1487981.tar.bz2 linux-patches-a1249a08fe1aead9f7e3e0c0438a14d3c1487981.zip