summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pagano <mpagano@gentoo.org>2018-12-05 14:42:14 -0500
committerMike Pagano <mpagano@gentoo.org>2018-12-05 14:42:14 -0500
commita1249a08fe1aead9f7e3e0c0438a14d3c1487981 (patch)
tree39715dbb2dca6cad88e31ed7c5e001b34132f3e8
parentproj/linux-patches: Update existing patch for 4.14.85 (diff)
downloadlinux-patches-a1249a08fe1aead9f7e3e0c0438a14d3c1487981.tar.gz
linux-patches-a1249a08fe1aead9f7e3e0c0438a14d3c1487981.tar.bz2
linux-patches-a1249a08fe1aead9f7e3e0c0438a14d3c1487981.zip
proj/linux-patches: Linux patch 4.14.864.14-93
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r--0000_README14
-rw-r--r--1085_linux-4.14.86.patch7052
2 files changed, 7061 insertions, 5 deletions
diff --git a/0000_README b/0000_README
index b328a3b6..b0b15a3d 100644
--- a/0000_README
+++ b/0000_README
@@ -363,26 +363,30 @@ Patch: 1079_linux-4.14.80.patch
From: http://www.kernel.org
Desc: Linux 4.14.80
-Patch: 1080-4.14.81.patch
+Patch: 1080_4.14.81.patch
From: http://www.kernel.org
Desc: Linux 4.14.81
-Patch: 1081-4.14.82.patch
+Patch: 1081_4.14.82.patch
From: http://www.kernel.org
Desc: Linux 4.14.82
-Patch: 1082-4.14.83.patch
+Patch: 1082_4.14.83.patch
From: http://www.kernel.org
Desc: Linux 4.14.83
-Patch: 1083-4.14.84.patch
+Patch: 1083_4.14.84.patch
From: http://www.kernel.org
Desc: Linux 4.14.84
-Patch: 1084-4.14.85.patch
+Patch: 1084_4.14.85.patch
From: http://www.kernel.org
Desc: Linux 4.14.85
+Patch: 1085_4.14.86.patch
+From: http://www.kernel.org
+Desc: Linux 4.14.86
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1085_linux-4.14.86.patch b/1085_linux-4.14.86.patch
new file mode 100644
index 00000000..c1ec4d9e
--- /dev/null
+++ b/1085_linux-4.14.86.patch
@@ -0,0 +1,7052 @@
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 99a08722124d..5f3d58142600 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -3994,9 +3994,13 @@
+
+ spectre_v2= [X86] Control mitigation of Spectre variant 2
+ (indirect branch speculation) vulnerability.
++ The default operation protects the kernel from
++ user space attacks.
+
+- on - unconditionally enable
+- off - unconditionally disable
++ on - unconditionally enable, implies
++ spectre_v2_user=on
++ off - unconditionally disable, implies
++ spectre_v2_user=off
+ auto - kernel detects whether your CPU model is
+ vulnerable
+
+@@ -4006,6 +4010,12 @@
+ CONFIG_RETPOLINE configuration option, and the
+ compiler with which the kernel was built.
+
++ Selecting 'on' will also enable the mitigation
++ against user space to user space task attacks.
++
++ Selecting 'off' will disable both the kernel and
++ the user space protections.
++
+ Specific mitigations can also be selected manually:
+
+ retpoline - replace indirect branches
+@@ -4015,6 +4025,48 @@
+ Not specifying this option is equivalent to
+ spectre_v2=auto.
+
++ spectre_v2_user=
++ [X86] Control mitigation of Spectre variant 2
++ (indirect branch speculation) vulnerability between
++ user space tasks
++
++ on - Unconditionally enable mitigations. Is
++ enforced by spectre_v2=on
++
++ off - Unconditionally disable mitigations. Is
++ enforced by spectre_v2=off
++
++ prctl - Indirect branch speculation is enabled,
++ but mitigation can be enabled via prctl
++ per thread. The mitigation control state
++ is inherited on fork.
++
++ prctl,ibpb
++ - Like "prctl" above, but only STIBP is
++ controlled per thread. IBPB is issued
++ always when switching between different user
++ space processes.
++
++ seccomp
++ - Same as "prctl" above, but all seccomp
++ threads will enable the mitigation unless
++ they explicitly opt out.
++
++ seccomp,ibpb
++ - Like "seccomp" above, but only STIBP is
++ controlled per thread. IBPB is issued
++ always when switching between different
++ user space processes.
++
++ auto - Kernel selects the mitigation depending on
++ the available CPU features and vulnerability.
++
++ Default mitigation:
++ If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl"
++
++ Not specifying this option is equivalent to
++ spectre_v2_user=auto.
++
+ spec_store_bypass_disable=
+ [HW] Control Speculative Store Bypass (SSB) Disable mitigation
+ (Speculative Store Bypass vulnerability)
+diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
+index 32f3d55c54b7..c4dbe6f7cdae 100644
+--- a/Documentation/userspace-api/spec_ctrl.rst
++++ b/Documentation/userspace-api/spec_ctrl.rst
+@@ -92,3 +92,12 @@ Speculation misfeature controls
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
++
++- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes
++ (Mitigate Spectre V2 style attacks against user processes)
++
++ Invocations:
++ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);
++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
+diff --git a/Makefile b/Makefile
+index 58a248264090..572bd98d2344 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 4
+ PATCHLEVEL = 14
+-SUBLEVEL = 85
++SUBLEVEL = 86
+ EXTRAVERSION =
+ NAME = Petit Gorille
+
+diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi
+index 6e5bd8974f22..679b839bb2eb 100644
+--- a/arch/arm/boot/dts/rk3288-veyron.dtsi
++++ b/arch/arm/boot/dts/rk3288-veyron.dtsi
+@@ -47,7 +47,11 @@
+ #include "rk3288.dtsi"
+
+ / {
+- memory@0 {
++ /*
++ * The default coreboot on veyron devices ignores memory@0 nodes
++ * and would instead create another memory node.
++ */
++ memory {
+ device_type = "memory";
+ reg = <0x0 0x0 0x0 0x80000000>;
+ };
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
+index 9a7486058455..eea7f8f070cf 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
+@@ -130,7 +130,7 @@
+ };
+
+ &pcie0 {
+- ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>;
++ ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>;
+ num-lanes = <4>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie_clkreqn_cpm>;
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 2af0af33362a..4f393eb9745f 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -440,10 +440,6 @@ config RETPOLINE
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+- Without compiler support, at least indirect branches in assembler
+- code are eliminated. Since this includes the syscall entry path,
+- it is not entirely pointless.
+-
+ config INTEL_RDT
+ bool "Intel Resource Director Technology support"
+ default n
+@@ -959,13 +955,7 @@ config NR_CPUS
+ approximately eight kilobytes to the kernel image.
+
+ config SCHED_SMT
+- bool "SMT (Hyperthreading) scheduler support"
+- depends on SMP
+- ---help---
+- SMT scheduler support improves the CPU scheduler's decision making
+- when dealing with Intel Pentium 4 chips with HyperThreading at a
+- cost of slightly increased overhead in some places. If unsure say
+- N here.
++ def_bool y if SMP
+
+ config SCHED_MC
+ def_bool y
+diff --git a/arch/x86/Makefile b/arch/x86/Makefile
+index 1c4d012550ec..ce3658dd98e8 100644
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -241,9 +241,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+
+ # Avoid indirect branches in kernel to deal with Spectre
+ ifdef CONFIG_RETPOLINE
+-ifneq ($(RETPOLINE_CFLAGS),)
+- KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
++ifeq ($(RETPOLINE_CFLAGS),)
++ $(error You are building kernel with non-retpoline compiler, please update your compiler.)
+ endif
++ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
+ endif
+
+ archscripts: scripts_basic
+diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
+index e5097dc85a06..7d12b0d1f359 100644
+--- a/arch/x86/events/core.c
++++ b/arch/x86/events/core.c
+@@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event)
+ if (config == -1LL)
+ return -EINVAL;
+
+- /*
+- * Branch tracing:
+- */
+- if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+- !attr->freq && hwc->sample_period == 1) {
+- /* BTS is not supported by this architecture. */
+- if (!x86_pmu.bts_active)
+- return -EOPNOTSUPP;
+-
+- /* BTS is currently only allowed for user-mode. */
+- if (!attr->exclude_kernel)
+- return -EOPNOTSUPP;
+-
+- /* disallow bts if conflicting events are present */
+- if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+- return -EBUSY;
+-
+- event->destroy = hw_perf_lbr_event_destroy;
+- }
+-
+ hwc->config |= config;
+
+ return 0;
+diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
+index 228732654cfe..7bb80151bfff 100644
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -2345,16 +2345,7 @@ done:
+ static struct event_constraint *
+ intel_bts_constraints(struct perf_event *event)
+ {
+- struct hw_perf_event *hwc = &event->hw;
+- unsigned int hw_event, bts_event;
+-
+- if (event->attr.freq)
+- return NULL;
+-
+- hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
+- bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+-
+- if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
++ if (unlikely(intel_pmu_has_bts(event)))
+ return &bts_constraint;
+
+ return NULL;
+@@ -2973,10 +2964,47 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+ return flags;
+ }
+
++static int intel_pmu_bts_config(struct perf_event *event)
++{
++ struct perf_event_attr *attr = &event->attr;
++
++ if (unlikely(intel_pmu_has_bts(event))) {
++ /* BTS is not supported by this architecture. */
++ if (!x86_pmu.bts_active)
++ return -EOPNOTSUPP;
++
++ /* BTS is currently only allowed for user-mode. */
++ if (!attr->exclude_kernel)
++ return -EOPNOTSUPP;
++
++ /* disallow bts if conflicting events are present */
++ if (x86_add_exclusive(x86_lbr_exclusive_lbr))
++ return -EBUSY;
++
++ event->destroy = hw_perf_lbr_event_destroy;
++ }
++
++ return 0;
++}
++
++static int core_pmu_hw_config(struct perf_event *event)
++{
++ int ret = x86_pmu_hw_config(event);
++
++ if (ret)
++ return ret;
++
++ return intel_pmu_bts_config(event);
++}
++
+ static int intel_pmu_hw_config(struct perf_event *event)
+ {
+ int ret = x86_pmu_hw_config(event);
+
++ if (ret)
++ return ret;
++
++ ret = intel_pmu_bts_config(event);
+ if (ret)
+ return ret;
+
+@@ -2999,7 +3027,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
+ /*
+ * BTS is set up earlier in this path, so don't account twice
+ */
+- if (!intel_pmu_has_bts(event)) {
++ if (!unlikely(intel_pmu_has_bts(event))) {
+ /* disallow lbr if conflicting events are present */
+ if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+ return -EBUSY;
+@@ -3462,7 +3490,7 @@ static __initconst const struct x86_pmu core_pmu = {
+ .enable_all = core_pmu_enable_all,
+ .enable = core_pmu_enable_event,
+ .disable = x86_pmu_disable_event,
+- .hw_config = x86_pmu_hw_config,
++ .hw_config = core_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
+ .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
+index c6698c63c047..3c51fcaf1e34 100644
+--- a/arch/x86/events/perf_event.h
++++ b/arch/x86/events/perf_event.h
+@@ -850,11 +850,16 @@ static inline int amd_pmu_init(void)
+
+ static inline bool intel_pmu_has_bts(struct perf_event *event)
+ {
+- if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+- !event->attr.freq && event->hw.sample_period == 1)
+- return true;
++ struct hw_perf_event *hwc = &event->hw;
++ unsigned int hw_event, bts_event;
++
++ if (event->attr.freq)
++ return false;
++
++ hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
++ bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+
+- return false;
++ return hw_event == bts_event && hwc->sample_period == 1;
+ }
+
+ int intel_pmu_save_and_restart(struct perf_event *event);
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 673d6e988196..7d910827126b 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -284,7 +284,9 @@
+ #define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
+ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
+ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
++#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
+ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
++#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
+
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index ef7eec669a1b..62c62d3eb0ff 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -41,9 +41,10 @@
+
+ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
+ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
+-#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
++#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
++#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
+ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
+-#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
++#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+
+ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
+index 1b4132161c1f..a633767419f2 100644
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -3,6 +3,8 @@
+ #ifndef _ASM_X86_NOSPEC_BRANCH_H_
+ #define _ASM_X86_NOSPEC_BRANCH_H_
+
++#include <linux/static_key.h>
++
+ #include <asm/alternative.h>
+ #include <asm/alternative-asm.h>
+ #include <asm/cpufeatures.h>
+@@ -162,29 +164,35 @@
+ _ASM_PTR " 999b\n\t" \
+ ".popsection\n\t"
+
+-#if defined(CONFIG_X86_64) && defined(RETPOLINE)
++#ifdef CONFIG_RETPOLINE
++#ifdef CONFIG_X86_64
+
+ /*
+- * Since the inline asm uses the %V modifier which is only in newer GCC,
+- * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
++ * Inline asm uses the %V modifier which is only in newer GCC
++ * which is ensured when CONFIG_RETPOLINE is defined.
+ */
+ # define CALL_NOSPEC \
+ ANNOTATE_NOSPEC_ALTERNATIVE \
+- ALTERNATIVE( \
++ ALTERNATIVE_2( \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%[thunk_target]\n", \
+ "call __x86_indirect_thunk_%V[thunk_target]\n", \
+- X86_FEATURE_RETPOLINE)
++ X86_FEATURE_RETPOLINE, \
++ "lfence;\n" \
++ ANNOTATE_RETPOLINE_SAFE \
++ "call *%[thunk_target]\n", \
++ X86_FEATURE_RETPOLINE_AMD)
+ # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+-#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
++#else /* CONFIG_X86_32 */
+ /*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+ # define CALL_NOSPEC \
+- ALTERNATIVE( \
++ ANNOTATE_NOSPEC_ALTERNATIVE \
++ ALTERNATIVE_2( \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%[thunk_target]\n", \
+ " jmp 904f;\n" \
+@@ -199,9 +207,14 @@
+ " ret;\n" \
+ " .align 16\n" \
+ "904: call 901b;\n", \
+- X86_FEATURE_RETPOLINE)
++ X86_FEATURE_RETPOLINE, \
++ "lfence;\n" \
++ ANNOTATE_RETPOLINE_SAFE \
++ "call *%[thunk_target]\n", \
++ X86_FEATURE_RETPOLINE_AMD)
+
+ # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
++#endif
+ #else /* No retpoline for C / inline asm */
+ # define CALL_NOSPEC "call *%[thunk_target]\n"
+ # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+@@ -210,14 +223,19 @@
+ /* The Spectre V2 mitigation variants */
+ enum spectre_v2_mitigation {
+ SPECTRE_V2_NONE,
+- SPECTRE_V2_RETPOLINE_MINIMAL,
+- SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
+ SPECTRE_V2_RETPOLINE_GENERIC,
+ SPECTRE_V2_RETPOLINE_AMD,
+- SPECTRE_V2_IBRS,
+ SPECTRE_V2_IBRS_ENHANCED,
+ };
+
++/* The indirect branch speculation control variants */
++enum spectre_v2_user_mitigation {
++ SPECTRE_V2_USER_NONE,
++ SPECTRE_V2_USER_STRICT,
++ SPECTRE_V2_USER_PRCTL,
++ SPECTRE_V2_USER_SECCOMP,
++};
++
+ /* The Speculative Store Bypass disable variants */
+ enum ssb_mitigation {
+ SPEC_STORE_BYPASS_NONE,
+@@ -295,6 +313,10 @@ do { \
+ preempt_enable(); \
+ } while (0)
+
++DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
++DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
++DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
++
+ #endif /* __ASSEMBLY__ */
+
+ /*
+diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
+index ae7c2c5cd7f0..5393babc0598 100644
+--- a/arch/x86/include/asm/spec-ctrl.h
++++ b/arch/x86/include/asm/spec-ctrl.h
+@@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
+ return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+ }
+
++static inline u64 stibp_tif_to_spec_ctrl(u64 tifn)
++{
++ BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
++ return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
++}
++
+ static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
+ {
+ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+ return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+ }
+
++static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl)
++{
++ BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
++ return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
++}
++
+ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
+ {
+ return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
+@@ -70,11 +82,7 @@ extern void speculative_store_bypass_ht_init(void);
+ static inline void speculative_store_bypass_ht_init(void) { }
+ #endif
+
+-extern void speculative_store_bypass_update(unsigned long tif);
+-
+-static inline void speculative_store_bypass_update_current(void)
+-{
+- speculative_store_bypass_update(current_thread_info()->flags);
+-}
++extern void speculation_ctrl_update(unsigned long tif);
++extern void speculation_ctrl_update_current(void);
+
+ #endif
+diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
+index 9b6df68d8fd1..12ef2b49d11b 100644
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
+
+ __visible struct task_struct *__switch_to(struct task_struct *prev,
+ struct task_struct *next);
+-struct tss_struct;
+-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+- struct tss_struct *tss);
+
+ /* This runs runs on the previous thread's stack. */
+ static inline void prepare_switch_to(struct task_struct *prev,
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index 95ff2d7f553f..bf9175d87844 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -81,10 +81,12 @@ struct thread_info {
+ #define TIF_SIGPENDING 2 /* signal pending */
+ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */
+ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
+-#define TIF_SSBD 5 /* Reduced data speculation */
++#define TIF_SSBD 5 /* Speculative store bypass disable */
+ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
+ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
+ #define TIF_SECCOMP 8 /* secure computing */
++#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
++#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
+ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
+ #define TIF_UPROBE 12 /* breakpointed or singlestepping */
+ #define TIF_PATCH_PENDING 13 /* pending live patching update */
+@@ -112,6 +114,8 @@ struct thread_info {
+ #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
+ #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+ #define _TIF_SECCOMP (1 << TIF_SECCOMP)
++#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
++#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
+ #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
+ #define _TIF_UPROBE (1 << TIF_UPROBE)
+ #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
+@@ -147,8 +151,18 @@ struct thread_info {
+ _TIF_FSCHECK)
+
+ /* flags to check in __switch_to() */
+-#define _TIF_WORK_CTXSW \
+- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
++#define _TIF_WORK_CTXSW_BASE \
++ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \
++ _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
++
++/*
++ * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
++ */
++#ifdef CONFIG_SMP
++# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB)
++#else
++# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE)
++#endif
+
+ #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
+ #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 2501be609b82..e31040333f0c 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -185,10 +185,14 @@ struct tlb_state {
+
+ #define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+
++ /* Last user mm for optimizing IBPB */
++ union {
++ struct mm_struct *last_user_mm;
++ unsigned long last_user_mm_ibpb;
++ };
++
+ u16 loaded_mm_asid;
+ u16 next_asid;
+- /* last user mm's ctx id */
+- u64 last_ctx_id;
+
+ /*
+ * We can be in one of several states:
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index dda741bd5789..7e03515662c0 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -554,7 +554,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
+ nodes_per_socket = ((value >> 3) & 7) + 1;
+ }
+
+- if (c->x86 >= 0x15 && c->x86 <= 0x17) {
++ if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
++ !boot_cpu_has(X86_FEATURE_VIRT_SSBD) &&
++ c->x86 >= 0x15 && c->x86 <= 0x17) {
+ unsigned int bit;
+
+ switch (c->x86) {
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index e92aedd93806..f7a6d6203e13 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -14,6 +14,7 @@
+ #include <linux/module.h>
+ #include <linux/nospec.h>
+ #include <linux/prctl.h>
++#include <linux/sched/smt.h>
+
+ #include <asm/spec-ctrl.h>
+ #include <asm/cmdline.h>
+@@ -34,12 +35,10 @@ static void __init spectre_v2_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+
+-/*
+- * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
+- * writes to SPEC_CTRL contain whatever reserved bits have been set.
+- */
+-u64 __ro_after_init x86_spec_ctrl_base;
++/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
++u64 x86_spec_ctrl_base;
+ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
++static DEFINE_MUTEX(spec_ctrl_mutex);
+
+ /*
+ * The vendor and possibly platform specific bits which can be modified in
+@@ -54,6 +53,13 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+ u64 __ro_after_init x86_amd_ls_cfg_base;
+ u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
+
++/* Control conditional STIPB in switch_to() */
++DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
++/* Control conditional IBPB in switch_mm() */
++DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
++/* Control unconditional IBPB in switch_mm() */
++DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
++
+ void __init check_bugs(void)
+ {
+ identify_boot_cpu();
+@@ -124,31 +130,6 @@ void __init check_bugs(void)
+ #endif
+ }
+
+-/* The kernel command line selection */
+-enum spectre_v2_mitigation_cmd {
+- SPECTRE_V2_CMD_NONE,
+- SPECTRE_V2_CMD_AUTO,
+- SPECTRE_V2_CMD_FORCE,
+- SPECTRE_V2_CMD_RETPOLINE,
+- SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+- SPECTRE_V2_CMD_RETPOLINE_AMD,
+-};
+-
+-static const char *spectre_v2_strings[] = {
+- [SPECTRE_V2_NONE] = "Vulnerable",
+- [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
+- [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
+- [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
+- [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+- [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
+-};
+-
+-#undef pr_fmt
+-#define pr_fmt(fmt) "Spectre V2 : " fmt
+-
+-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+- SPECTRE_V2_NONE;
+-
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ {
+@@ -166,9 +147,14 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+
+ /* SSBD controlled in MSR_SPEC_CTRL */
+- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
++ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
++ static_cpu_has(X86_FEATURE_AMD_SSBD))
+ hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
+
++ /* Conditional STIBP enabled? */
++ if (static_branch_unlikely(&switch_to_cond_stibp))
++ hostval |= stibp_tif_to_spec_ctrl(ti->flags);
++
+ if (hostval != guestval) {
+ msrval = setguest ? guestval : hostval;
+ wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+@@ -202,7 +188,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
+ ssbd_spec_ctrl_to_tif(hostval);
+
+- speculative_store_bypass_update(tif);
++ speculation_ctrl_update(tif);
+ }
+ }
+ EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
+@@ -217,6 +203,15 @@ static void x86_amd_ssb_disable(void)
+ wrmsrl(MSR_AMD64_LS_CFG, msrval);
+ }
+
++#undef pr_fmt
++#define pr_fmt(fmt) "Spectre V2 : " fmt
++
++static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
++ SPECTRE_V2_NONE;
++
++static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
++ SPECTRE_V2_USER_NONE;
++
+ #ifdef RETPOLINE
+ static bool spectre_v2_bad_module;
+
+@@ -238,67 +233,217 @@ static inline const char *spectre_v2_module_string(void)
+ static inline const char *spectre_v2_module_string(void) { return ""; }
+ #endif
+
+-static void __init spec2_print_if_insecure(const char *reason)
++static inline bool match_option(const char *arg, int arglen, const char *opt)
+ {
+- if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+- pr_info("%s selected on command line.\n", reason);
++ int len = strlen(opt);
++
++ return len == arglen && !strncmp(arg, opt, len);
+ }
+
+-static void __init spec2_print_if_secure(const char *reason)
++/* The kernel command line selection for spectre v2 */
++enum spectre_v2_mitigation_cmd {
++ SPECTRE_V2_CMD_NONE,
++ SPECTRE_V2_CMD_AUTO,
++ SPECTRE_V2_CMD_FORCE,
++ SPECTRE_V2_CMD_RETPOLINE,
++ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
++ SPECTRE_V2_CMD_RETPOLINE_AMD,
++};
++
++enum spectre_v2_user_cmd {
++ SPECTRE_V2_USER_CMD_NONE,
++ SPECTRE_V2_USER_CMD_AUTO,
++ SPECTRE_V2_USER_CMD_FORCE,
++ SPECTRE_V2_USER_CMD_PRCTL,
++ SPECTRE_V2_USER_CMD_PRCTL_IBPB,
++ SPECTRE_V2_USER_CMD_SECCOMP,
++ SPECTRE_V2_USER_CMD_SECCOMP_IBPB,
++};
++
++static const char * const spectre_v2_user_strings[] = {
++ [SPECTRE_V2_USER_NONE] = "User space: Vulnerable",
++ [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection",
++ [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl",
++ [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl",
++};
++
++static const struct {
++ const char *option;
++ enum spectre_v2_user_cmd cmd;
++ bool secure;
++} v2_user_options[] __initdata = {
++ { "auto", SPECTRE_V2_USER_CMD_AUTO, false },
++ { "off", SPECTRE_V2_USER_CMD_NONE, false },
++ { "on", SPECTRE_V2_USER_CMD_FORCE, true },
++ { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false },
++ { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false },
++ { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false },
++ { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false },
++};
++
++static void __init spec_v2_user_print_cond(const char *reason, bool secure)
+ {
+- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+- pr_info("%s selected on command line.\n", reason);
++ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
++ pr_info("spectre_v2_user=%s forced on command line.\n", reason);
+ }
+
+-static inline bool retp_compiler(void)
++static enum spectre_v2_user_cmd __init
++spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
+ {
+- return __is_defined(RETPOLINE);
++ char arg[20];
++ int ret, i;
++
++ switch (v2_cmd) {
++ case SPECTRE_V2_CMD_NONE:
++ return SPECTRE_V2_USER_CMD_NONE;
++ case SPECTRE_V2_CMD_FORCE:
++ return SPECTRE_V2_USER_CMD_FORCE;
++ default:
++ break;
++ }
++
++ ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
++ arg, sizeof(arg));
++ if (ret < 0)
++ return SPECTRE_V2_USER_CMD_AUTO;
++
++ for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
++ if (match_option(arg, ret, v2_user_options[i].option)) {
++ spec_v2_user_print_cond(v2_user_options[i].option,
++ v2_user_options[i].secure);
++ return v2_user_options[i].cmd;
++ }
++ }
++
++ pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg);
++ return SPECTRE_V2_USER_CMD_AUTO;
+ }
+
+-static inline bool match_option(const char *arg, int arglen, const char *opt)
++static void __init
++spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
+ {
+- int len = strlen(opt);
++ enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
++ bool smt_possible = IS_ENABLED(CONFIG_SMP);
++ enum spectre_v2_user_cmd cmd;
+
+- return len == arglen && !strncmp(arg, opt, len);
++ if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
++ return;
++
++ if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
++ cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
++ smt_possible = false;
++
++ cmd = spectre_v2_parse_user_cmdline(v2_cmd);
++ switch (cmd) {
++ case SPECTRE_V2_USER_CMD_NONE:
++ goto set_mode;
++ case SPECTRE_V2_USER_CMD_FORCE:
++ mode = SPECTRE_V2_USER_STRICT;
++ break;
++ case SPECTRE_V2_USER_CMD_PRCTL:
++ case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
++ mode = SPECTRE_V2_USER_PRCTL;
++ break;
++ case SPECTRE_V2_USER_CMD_AUTO:
++ case SPECTRE_V2_USER_CMD_SECCOMP:
++ case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
++ if (IS_ENABLED(CONFIG_SECCOMP))
++ mode = SPECTRE_V2_USER_SECCOMP;
++ else
++ mode = SPECTRE_V2_USER_PRCTL;
++ break;
++ }
++
++ /* Initialize Indirect Branch Prediction Barrier */
++ if (boot_cpu_has(X86_FEATURE_IBPB)) {
++ setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
++
++ switch (cmd) {
++ case SPECTRE_V2_USER_CMD_FORCE:
++ case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
++ case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
++ static_branch_enable(&switch_mm_always_ibpb);
++ break;
++ case SPECTRE_V2_USER_CMD_PRCTL:
++ case SPECTRE_V2_USER_CMD_AUTO:
++ case SPECTRE_V2_USER_CMD_SECCOMP:
++ static_branch_enable(&switch_mm_cond_ibpb);
++ break;
++ default:
++ break;
++ }
++
++ pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
++ static_key_enabled(&switch_mm_always_ibpb) ?
++ "always-on" : "conditional");
++ }
++
++ /* If enhanced IBRS is enabled no STIPB required */
++ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
++ return;
++
++ /*
++ * If SMT is not possible or STIBP is not available clear the STIPB
++ * mode.
++ */
++ if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
++ mode = SPECTRE_V2_USER_NONE;
++set_mode:
++ spectre_v2_user = mode;
++ /* Only print the STIBP mode when SMT possible */
++ if (smt_possible)
++ pr_info("%s\n", spectre_v2_user_strings[mode]);
+ }
+
++static const char * const spectre_v2_strings[] = {
++ [SPECTRE_V2_NONE] = "Vulnerable",
++ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
++ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
++ [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
++};
++
+ static const struct {
+ const char *option;
+ enum spectre_v2_mitigation_cmd cmd;
+ bool secure;
+-} mitigation_options[] = {
+- { "off", SPECTRE_V2_CMD_NONE, false },
+- { "on", SPECTRE_V2_CMD_FORCE, true },
+- { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
+- { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
+- { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+- { "auto", SPECTRE_V2_CMD_AUTO, false },
++} mitigation_options[] __initdata = {
++ { "off", SPECTRE_V2_CMD_NONE, false },
++ { "on", SPECTRE_V2_CMD_FORCE, true },
++ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
++ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
++ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
++ { "auto", SPECTRE_V2_CMD_AUTO, false },
+ };
+
++static void __init spec_v2_print_cond(const char *reason, bool secure)
++{
++ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
++ pr_info("%s selected on command line.\n", reason);
++}
++
+ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+ {
++ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
+ char arg[20];
+ int ret, i;
+- enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
+
+ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ return SPECTRE_V2_CMD_NONE;
+- else {
+- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
+- if (ret < 0)
+- return SPECTRE_V2_CMD_AUTO;
+
+- for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
+- if (!match_option(arg, ret, mitigation_options[i].option))
+- continue;
+- cmd = mitigation_options[i].cmd;
+- break;
+- }
++ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
++ if (ret < 0)
++ return SPECTRE_V2_CMD_AUTO;
+
+- if (i >= ARRAY_SIZE(mitigation_options)) {
+- pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+- return SPECTRE_V2_CMD_AUTO;
+- }
++ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
++ if (!match_option(arg, ret, mitigation_options[i].option))
++ continue;
++ cmd = mitigation_options[i].cmd;
++ break;
++ }
++
++ if (i >= ARRAY_SIZE(mitigation_options)) {
++ pr_err("unknown option (%s). Switching to AUTO select\n", arg);
++ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
+@@ -315,11 +460,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+- if (mitigation_options[i].secure)
+- spec2_print_if_secure(mitigation_options[i].option);
+- else
+- spec2_print_if_insecure(mitigation_options[i].option);
+-
++ spec_v2_print_cond(mitigation_options[i].option,
++ mitigation_options[i].secure);
+ return cmd;
+ }
+
+@@ -375,14 +517,12 @@ retpoline_auto:
+ pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
+ goto retpoline_generic;
+ }
+- mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+- SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
++ mode = SPECTRE_V2_RETPOLINE_AMD;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ } else {
+ retpoline_generic:
+- mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+- SPECTRE_V2_RETPOLINE_MINIMAL;
++ mode = SPECTRE_V2_RETPOLINE_GENERIC;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ }
+
+@@ -401,12 +541,6 @@ specv2_set_mode:
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
+
+- /* Initialize Indirect Branch Prediction Barrier if supported */
+- if (boot_cpu_has(X86_FEATURE_IBPB)) {
+- setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+- pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
+- }
+-
+ /*
+ * Retpoline means the kernel is safe because it has no indirect
+ * branches. Enhanced IBRS protects firmware too, so, enable restricted
+@@ -422,6 +556,66 @@ specv2_set_mode:
+ setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+ pr_info("Enabling Restricted Speculation for firmware calls\n");
+ }
++
++ /* Set up IBPB and STIBP depending on the general spectre V2 command */
++ spectre_v2_user_select_mitigation(cmd);
++
++ /* Enable STIBP if appropriate */
++ arch_smt_update();
++}
++
++static void update_stibp_msr(void * __unused)
++{
++ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++}
++
++/* Update x86_spec_ctrl_base in case SMT state changed. */
++static void update_stibp_strict(void)
++{
++ u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
++
++ if (sched_smt_active())
++ mask |= SPEC_CTRL_STIBP;
++
++ if (mask == x86_spec_ctrl_base)
++ return;
++
++ pr_info("Update user space SMT mitigation: STIBP %s\n",
++ mask & SPEC_CTRL_STIBP ? "always-on" : "off");
++ x86_spec_ctrl_base = mask;
++ on_each_cpu(update_stibp_msr, NULL, 1);
++}
++
++/* Update the static key controlling the evaluation of TIF_SPEC_IB */
++static void update_indir_branch_cond(void)
++{
++ if (sched_smt_active())
++ static_branch_enable(&switch_to_cond_stibp);
++ else
++ static_branch_disable(&switch_to_cond_stibp);
++}
++
++void arch_smt_update(void)
++{
++ /* Enhanced IBRS implies STIBP. No update required. */
++ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
++ return;
++
++ mutex_lock(&spec_ctrl_mutex);
++
++ switch (spectre_v2_user) {
++ case SPECTRE_V2_USER_NONE:
++ break;
++ case SPECTRE_V2_USER_STRICT:
++ update_stibp_strict();
++ break;
++ case SPECTRE_V2_USER_PRCTL:
++ case SPECTRE_V2_USER_SECCOMP:
++ update_indir_branch_cond();
++ break;
++ }
++
++ mutex_unlock(&spec_ctrl_mutex);
+ }
+
+ #undef pr_fmt
+@@ -438,7 +632,7 @@ enum ssb_mitigation_cmd {
+ SPEC_STORE_BYPASS_CMD_SECCOMP,
+ };
+
+-static const char *ssb_strings[] = {
++static const char * const ssb_strings[] = {
+ [SPEC_STORE_BYPASS_NONE] = "Vulnerable",
+ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
+ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
+@@ -448,7 +642,7 @@ static const char *ssb_strings[] = {
+ static const struct {
+ const char *option;
+ enum ssb_mitigation_cmd cmd;
+-} ssb_mitigation_options[] = {
++} ssb_mitigation_options[] __initdata = {
+ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
+ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
+ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
+@@ -532,18 +726,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
+ if (mode == SPEC_STORE_BYPASS_DISABLE) {
+ setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
+ /*
+- * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
+- * a completely different MSR and bit dependent on family.
++ * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
++ * use a completely different MSR and bit dependent on family.
+ */
+- switch (boot_cpu_data.x86_vendor) {
+- case X86_VENDOR_INTEL:
++ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
++ !static_cpu_has(X86_FEATURE_AMD_SSBD)) {
++ x86_amd_ssb_disable();
++ } else {
+ x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+- break;
+- case X86_VENDOR_AMD:
+- x86_amd_ssb_disable();
+- break;
+ }
+ }
+
+@@ -561,10 +753,25 @@ static void ssb_select_mitigation(void)
+ #undef pr_fmt
+ #define pr_fmt(fmt) "Speculation prctl: " fmt
+
+-static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
++static void task_update_spec_tif(struct task_struct *tsk)
+ {
+- bool update;
++ /* Force the update of the real TIF bits */
++ set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE);
+
++ /*
++ * Immediately update the speculation control MSRs for the current
++ * task, but for a non-current task delay setting the CPU
++ * mitigation until it is scheduled next.
++ *
++ * This can only happen for SECCOMP mitigation. For PRCTL it's
++ * always the current task.
++ */
++ if (tsk == current)
++ speculation_ctrl_update_current();
++}
++
++static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
++{
+ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
+ ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
+ return -ENXIO;
+@@ -575,28 +782,56 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+ if (task_spec_ssb_force_disable(task))
+ return -EPERM;
+ task_clear_spec_ssb_disable(task);
+- update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
++ task_update_spec_tif(task);
+ break;
+ case PR_SPEC_DISABLE:
+ task_set_spec_ssb_disable(task);
+- update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
++ task_update_spec_tif(task);
+ break;
+ case PR_SPEC_FORCE_DISABLE:
+ task_set_spec_ssb_disable(task);
+ task_set_spec_ssb_force_disable(task);
+- update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
++ task_update_spec_tif(task);
+ break;
+ default:
+ return -ERANGE;
+ }
++ return 0;
++}
+
+- /*
+- * If being set on non-current task, delay setting the CPU
+- * mitigation until it is next scheduled.
+- */
+- if (task == current && update)
+- speculative_store_bypass_update_current();
+-
++static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
++{
++ switch (ctrl) {
++ case PR_SPEC_ENABLE:
++ if (spectre_v2_user == SPECTRE_V2_USER_NONE)
++ return 0;
++ /*
++ * Indirect branch speculation is always disabled in strict
++ * mode.
++ */
++ if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
++ return -EPERM;
++ task_clear_spec_ib_disable(task);
++ task_update_spec_tif(task);
++ break;
++ case PR_SPEC_DISABLE:
++ case PR_SPEC_FORCE_DISABLE:
++ /*
++ * Indirect branch speculation is always allowed when
++ * mitigation is force disabled.
++ */
++ if (spectre_v2_user == SPECTRE_V2_USER_NONE)
++ return -EPERM;
++ if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
++ return 0;
++ task_set_spec_ib_disable(task);
++ if (ctrl == PR_SPEC_FORCE_DISABLE)
++ task_set_spec_ib_force_disable(task);
++ task_update_spec_tif(task);
++ break;
++ default:
++ return -ERANGE;
++ }
+ return 0;
+ }
+
+@@ -606,6 +841,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_set(task, ctrl);
++ case PR_SPEC_INDIRECT_BRANCH:
++ return ib_prctl_set(task, ctrl);
+ default:
+ return -ENODEV;
+ }
+@@ -616,6 +853,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
+ {
+ if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
+ ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
++ if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
++ ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+ }
+ #endif
+
+@@ -638,11 +877,35 @@ static int ssb_prctl_get(struct task_struct *task)
+ }
+ }
+
++static int ib_prctl_get(struct task_struct *task)
++{
++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
++ return PR_SPEC_NOT_AFFECTED;
++
++ switch (spectre_v2_user) {
++ case SPECTRE_V2_USER_NONE:
++ return PR_SPEC_ENABLE;
++ case SPECTRE_V2_USER_PRCTL:
++ case SPECTRE_V2_USER_SECCOMP:
++ if (task_spec_ib_force_disable(task))
++ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
++ if (task_spec_ib_disable(task))
++ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
++ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
++ case SPECTRE_V2_USER_STRICT:
++ return PR_SPEC_DISABLE;
++ default:
++ return PR_SPEC_NOT_AFFECTED;
++ }
++}
++
+ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+ {
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_get(task);
++ case PR_SPEC_INDIRECT_BRANCH:
++ return ib_prctl_get(task);
+ default:
+ return -ENODEV;
+ }
+@@ -780,7 +1043,7 @@ early_param("l1tf", l1tf_cmdline);
+ #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
+
+ #if IS_ENABLED(CONFIG_KVM_INTEL)
+-static const char *l1tf_vmx_states[] = {
++static const char * const l1tf_vmx_states[] = {
+ [VMENTER_L1D_FLUSH_AUTO] = "auto",
+ [VMENTER_L1D_FLUSH_NEVER] = "vulnerable",
+ [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes",
+@@ -796,13 +1059,14 @@ static ssize_t l1tf_show_state(char *buf)
+
+ if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
+ (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
+- cpu_smt_control == CPU_SMT_ENABLED))
++ sched_smt_active())) {
+ return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
+ l1tf_vmx_states[l1tf_vmx_mitigation]);
++ }
+
+ return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
+ l1tf_vmx_states[l1tf_vmx_mitigation],
+- cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled");
++ sched_smt_active() ? "vulnerable" : "disabled");
+ }
+ #else
+ static ssize_t l1tf_show_state(char *buf)
+@@ -811,6 +1075,36 @@ static ssize_t l1tf_show_state(char *buf)
+ }
+ #endif
+
++static char *stibp_state(void)
++{
++ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
++ return "";
++
++ switch (spectre_v2_user) {
++ case SPECTRE_V2_USER_NONE:
++ return ", STIBP: disabled";
++ case SPECTRE_V2_USER_STRICT:
++ return ", STIBP: forced";
++ case SPECTRE_V2_USER_PRCTL:
++ case SPECTRE_V2_USER_SECCOMP:
++ if (static_key_enabled(&switch_to_cond_stibp))
++ return ", STIBP: conditional";
++ }
++ return "";
++}
++
++static char *ibpb_state(void)
++{
++ if (boot_cpu_has(X86_FEATURE_IBPB)) {
++ if (static_key_enabled(&switch_mm_always_ibpb))
++ return ", IBPB: always-on";
++ if (static_key_enabled(&switch_mm_cond_ibpb))
++ return ", IBPB: conditional";
++ return ", IBPB: disabled";
++ }
++ return "";
++}
++
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+ char *buf, unsigned int bug)
+ {
+@@ -828,9 +1122,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+
+ case X86_BUG_SPECTRE_V2:
+- return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+- boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
++ return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
++ ibpb_state(),
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
++ stibp_state(),
++ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+ spectre_v2_module_string());
+
+ case X86_BUG_SPEC_STORE_BYPASS:
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 96643e2c75b8..51e49f6fe8e1 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -760,6 +760,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
+ set_cpu_cap(c, X86_FEATURE_STIBP);
+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+ }
++
++ if (cpu_has(c, X86_FEATURE_AMD_SSBD)) {
++ set_cpu_cap(c, X86_FEATURE_SSBD);
++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
++ clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD);
++ }
+ }
+
+ void get_cpu_cap(struct cpuinfo_x86 *c)
+@@ -958,7 +964,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+
+ if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
+- !(ia32_cap & ARCH_CAP_SSB_NO))
++ !(ia32_cap & ARCH_CAP_SSB_NO) &&
++ !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
+ setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
+
+ if (x86_match_cpu(cpu_no_speculation))
+diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+index dbcb01006749..beec0daecbc5 100644
+--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+@@ -56,7 +56,7 @@
+ /* Threshold LVT offset is at MSR0xC0000410[15:12] */
+ #define SMCA_THR_LVT_OFF 0xF000
+
+-static bool thresholding_en;
++static bool thresholding_irq_en;
+
+ static const char * const th_names[] = {
+ "load_store",
+@@ -533,9 +533,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
+
+ set_offset:
+ offset = setup_APIC_mce_threshold(offset, new);
+-
+- if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
+- mce_threshold_vector = amd_threshold_interrupt;
++ if (offset == new)
++ thresholding_irq_en = true;
+
+ done:
+ mce_threshold_block_init(&b, offset);
+@@ -1356,9 +1355,6 @@ int mce_threshold_remove_device(unsigned int cpu)
+ {
+ unsigned int bank;
+
+- if (!thresholding_en)
+- return 0;
+-
+ for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ if (!(per_cpu(bank_map, cpu) & (1 << bank)))
+ continue;
+@@ -1376,9 +1372,6 @@ int mce_threshold_create_device(unsigned int cpu)
+ struct threshold_bank **bp;
+ int err = 0;
+
+- if (!thresholding_en)
+- return 0;
+-
+ bp = per_cpu(threshold_banks, cpu);
+ if (bp)
+ return 0;
+@@ -1407,9 +1400,6 @@ static __init int threshold_init_device(void)
+ {
+ unsigned lcpu = 0;
+
+- if (mce_threshold_vector == amd_threshold_interrupt)
+- thresholding_en = true;
+-
+ /* to hit CPUs online before the notifier is up */
+ for_each_online_cpu(lcpu) {
+ int err = mce_threshold_create_device(lcpu);
+@@ -1418,6 +1408,9 @@ static __init int threshold_init_device(void)
+ return err;
+ }
+
++ if (thresholding_irq_en)
++ mce_threshold_vector = amd_threshold_interrupt;
++
+ return 0;
+ }
+ /*
+diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
+index 61a949d84dfa..d99a8ee9e185 100644
+--- a/arch/x86/kernel/fpu/signal.c
++++ b/arch/x86/kernel/fpu/signal.c
+@@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
+ sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
+ }
+
++ local_bh_disable();
+ fpu->initialized = 1;
+- preempt_disable();
+ fpu__restore(fpu);
+- preempt_enable();
++ local_bh_enable();
+
+ return err;
+ } else {
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index 988a98f34c66..a98d1cdd6299 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -41,6 +41,8 @@
+ #include <asm/prctl.h>
+ #include <asm/spec-ctrl.h>
+
++#include "process.h"
++
+ /*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+@@ -255,11 +257,12 @@ void arch_setup_new_exec(void)
+ enable_cpuid();
+ }
+
+-static inline void switch_to_bitmap(struct tss_struct *tss,
+- struct thread_struct *prev,
++static inline void switch_to_bitmap(struct thread_struct *prev,
+ struct thread_struct *next,
+ unsigned long tifp, unsigned long tifn)
+ {
++ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
++
+ if (tifn & _TIF_IO_BITMAP) {
+ /*
+ * Copy the relevant range of the IO bitmap.
+@@ -398,32 +401,85 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
+ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
+ }
+
+-static __always_inline void intel_set_ssb_state(unsigned long tifn)
++/*
++ * Update the MSRs managing speculation control, during context switch.
++ *
++ * tifp: Previous task's thread flags
++ * tifn: Next task's thread flags
++ */
++static __always_inline void __speculation_ctrl_update(unsigned long tifp,
++ unsigned long tifn)
+ {
+- u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
++ unsigned long tif_diff = tifp ^ tifn;
++ u64 msr = x86_spec_ctrl_base;
++ bool updmsr = false;
++
++ /*
++ * If TIF_SSBD is different, select the proper mitigation
++ * method. Note that if SSBD mitigation is disabled or permanentely
++ * enabled this branch can't be taken because nothing can set
++ * TIF_SSBD.
++ */
++ if (tif_diff & _TIF_SSBD) {
++ if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
++ amd_set_ssb_virt_state(tifn);
++ } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
++ amd_set_core_ssb_state(tifn);
++ } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
++ static_cpu_has(X86_FEATURE_AMD_SSBD)) {
++ msr |= ssbd_tif_to_spec_ctrl(tifn);
++ updmsr = true;
++ }
++ }
++
++ /*
++ * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
++ * otherwise avoid the MSR write.
++ */
++ if (IS_ENABLED(CONFIG_SMP) &&
++ static_branch_unlikely(&switch_to_cond_stibp)) {
++ updmsr |= !!(tif_diff & _TIF_SPEC_IB);
++ msr |= stibp_tif_to_spec_ctrl(tifn);
++ }
+
+- wrmsrl(MSR_IA32_SPEC_CTRL, msr);
++ if (updmsr)
++ wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+ }
+
+-static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
++static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
+ {
+- if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
+- amd_set_ssb_virt_state(tifn);
+- else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+- amd_set_core_ssb_state(tifn);
+- else
+- intel_set_ssb_state(tifn);
++ if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) {
++ if (task_spec_ssb_disable(tsk))
++ set_tsk_thread_flag(tsk, TIF_SSBD);
++ else
++ clear_tsk_thread_flag(tsk, TIF_SSBD);
++
++ if (task_spec_ib_disable(tsk))
++ set_tsk_thread_flag(tsk, TIF_SPEC_IB);
++ else
++ clear_tsk_thread_flag(tsk, TIF_SPEC_IB);
++ }
++ /* Return the updated threadinfo flags*/
++ return task_thread_info(tsk)->flags;
+ }
+
+-void speculative_store_bypass_update(unsigned long tif)
++void speculation_ctrl_update(unsigned long tif)
+ {
++ /* Forced update. Make sure all relevant TIF flags are different */
+ preempt_disable();
+- __speculative_store_bypass_update(tif);
++ __speculation_ctrl_update(~tif, tif);
+ preempt_enable();
+ }
+
+-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+- struct tss_struct *tss)
++/* Called from seccomp/prctl update */
++void speculation_ctrl_update_current(void)
++{
++ preempt_disable();
++ speculation_ctrl_update(speculation_ctrl_update_tif(current));
++ preempt_enable();
++}
++
++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
+ {
+ struct thread_struct *prev, *next;
+ unsigned long tifp, tifn;
+@@ -433,7 +489,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+
+ tifn = READ_ONCE(task_thread_info(next_p)->flags);
+ tifp = READ_ONCE(task_thread_info(prev_p)->flags);
+- switch_to_bitmap(tss, prev, next, tifp, tifn);
++ switch_to_bitmap(prev, next, tifp, tifn);
+
+ propagate_user_return_notify(prev_p, next_p);
+
+@@ -454,8 +510,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ if ((tifp ^ tifn) & _TIF_NOCPUID)
+ set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
+
+- if ((tifp ^ tifn) & _TIF_SSBD)
+- __speculative_store_bypass_update(tifn);
++ if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
++ __speculation_ctrl_update(tifp, tifn);
++ } else {
++ speculation_ctrl_update_tif(prev_p);
++ tifn = speculation_ctrl_update_tif(next_p);
++
++ /* Enforce MSR update to ensure consistent state */
++ __speculation_ctrl_update(~tifn, tifn);
++ }
+ }
+
+ /*
+diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
+new file mode 100644
+index 000000000000..898e97cf6629
+--- /dev/null
++++ b/arch/x86/kernel/process.h
+@@ -0,0 +1,39 @@
++// SPDX-License-Identifier: GPL-2.0
++//
++// Code shared between 32 and 64 bit
++
++#include <asm/spec-ctrl.h>
++
++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
++
++/*
++ * This needs to be inline to optimize for the common case where no extra
++ * work needs to be done.
++ */
++static inline void switch_to_extra(struct task_struct *prev,
++ struct task_struct *next)
++{
++ unsigned long next_tif = task_thread_info(next)->flags;
++ unsigned long prev_tif = task_thread_info(prev)->flags;
++
++ if (IS_ENABLED(CONFIG_SMP)) {
++ /*
++ * Avoid __switch_to_xtra() invocation when conditional
++ * STIPB is disabled and the only different bit is
++ * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not
++ * in the TIF_WORK_CTXSW masks.
++ */
++ if (!static_branch_likely(&switch_to_cond_stibp)) {
++ prev_tif &= ~_TIF_SPEC_IB;
++ next_tif &= ~_TIF_SPEC_IB;
++ }
++ }
++
++ /*
++ * __switch_to_xtra() handles debug registers, i/o bitmaps,
++ * speculation mitigations etc.
++ */
++ if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT ||
++ prev_tif & _TIF_WORK_CTXSW_PREV))
++ __switch_to_xtra(prev, next);
++}
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index 5224c6099184..c2df91eab573 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -59,6 +59,8 @@
+ #include <asm/intel_rdt_sched.h>
+ #include <asm/proto.h>
+
++#include "process.h"
++
+ void __show_regs(struct pt_regs *regs, int all)
+ {
+ unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+@@ -234,7 +236,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ struct fpu *prev_fpu = &prev->fpu;
+ struct fpu *next_fpu = &next->fpu;
+ int cpu = smp_processor_id();
+- struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
+
+ /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
+
+@@ -266,12 +267,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
+ set_iopl_mask(next->iopl);
+
+- /*
+- * Now maybe handle debug registers and/or IO bitmaps
+- */
+- if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
+- task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
+- __switch_to_xtra(prev_p, next_p, tss);
++ switch_to_extra(prev_p, next_p);
+
+ /*
+ * Leave lazy mode, flushing any hypercalls made here.
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index cbeecfcc66d6..ec63d6be5e02 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -59,6 +59,8 @@
+ #include <asm/unistd_32_ia32.h>
+ #endif
+
++#include "process.h"
++
+ __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
+
+ /* Prints also some state that isn't saved in the pt_regs */
+@@ -400,7 +402,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ struct fpu *prev_fpu = &prev->fpu;
+ struct fpu *next_fpu = &next->fpu;
+ int cpu = smp_processor_id();
+- struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
+
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
+ this_cpu_read(irq_count) != -1);
+@@ -467,12 +468,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ /* Reload sp0. */
+ update_sp0(next_p);
+
+- /*
+- * Now maybe reload the debug registers and handle I/O bitmaps
+- */
+- if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
+- task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
+- __switch_to_xtra(prev_p, next_p, tss);
++ __switch_to_xtra(prev_p, next_p);
+
+ #ifdef CONFIG_XEN_PV
+ /*
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index d1f5c744142b..bbcd69c76d96 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -367,7 +367,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+
+ /* cpuid 0x80000008.ebx */
+ const u32 kvm_cpuid_8000_0008_ebx_x86_features =
+- F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD);
++ F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
++ F(AMD_SSB_NO);
+
+ /* cpuid 0xC0000001.edx */
+ const u32 kvm_cpuid_C000_0001_edx_x86_features =
+@@ -649,7 +650,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ entry->ebx |= F(VIRT_SSBD);
+ entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
+ cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+- if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
++ /*
++ * The preference is to use SPEC CTRL MSR instead of the
++ * VIRT_SPEC MSR.
++ */
++ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
++ !boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ entry->ebx |= F(VIRT_SSBD);
+ break;
+ }
+diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
+index d755e0d44ac1..364d9895dd56 100644
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -4734,9 +4734,9 @@ static bool need_remote_flush(u64 old, u64 new)
+ }
+
+ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
+- const u8 *new, int *bytes)
++ int *bytes)
+ {
+- u64 gentry;
++ u64 gentry = 0;
+ int r;
+
+ /*
+@@ -4748,22 +4748,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
+ /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
+ *gpa &= ~(gpa_t)7;
+ *bytes = 8;
+- r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
+- if (r)
+- gentry = 0;
+- new = (const u8 *)&gentry;
+ }
+
+- switch (*bytes) {
+- case 4:
+- gentry = *(const u32 *)new;
+- break;
+- case 8:
+- gentry = *(const u64 *)new;
+- break;
+- default:
+- gentry = 0;
+- break;
++ if (*bytes == 4 || *bytes == 8) {
++ r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
++ if (r)
++ gentry = 0;
+ }
+
+ return gentry;
+@@ -4876,8 +4866,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+
+ pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
+
+- gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
+-
+ /*
+ * No need to care whether allocation memory is successful
+ * or not since pte prefetch is skiped if it does not have
+@@ -4886,6 +4874,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+ mmu_topup_memory_caches(vcpu);
+
+ spin_lock(&vcpu->kvm->mmu_lock);
++
++ gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
++
+ ++vcpu->kvm->stat.mmu_pte_write;
+ kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index f6bebcec60b4..17f08db34547 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1733,21 +1733,31 @@ out:
+ return ERR_PTR(err);
+ }
+
++static void svm_clear_current_vmcb(struct vmcb *vmcb)
++{
++ int i;
++
++ for_each_online_cpu(i)
++ cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
++}
++
+ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+
++ /*
++ * The vmcb page can be recycled, causing a false negative in
++ * svm_vcpu_load(). So, ensure that no logical CPU has this
++ * vmcb page recorded as its current vmcb.
++ */
++ svm_clear_current_vmcb(svm->vmcb);
++
+ __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
+ __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
+ __free_page(virt_to_page(svm->nested.hsave));
+ __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, svm);
+- /*
+- * The vmcb page can be recycled, causing a false negative in
+- * svm_vcpu_load(). So do a full IBPB now.
+- */
+- indirect_branch_prediction_barrier();
+ }
+
+ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+@@ -3644,7 +3654,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
++ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
++ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+ return 1;
+
+ msr_info->data = svm->spec_ctrl;
+@@ -3749,11 +3760,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+ break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr->host_initiated &&
+- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
++ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
++ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+ return 1;
+
+ /* The STIBP bit doesn't fault even if it's not advertised */
+- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
+ return 1;
+
+ svm->spec_ctrl = data;
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 8d688b213504..f24329659bea 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -6378,6 +6378,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
+ clock_pairing.nsec = ts.tv_nsec;
+ clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
+ clock_pairing.flags = 0;
++ memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
+
+ ret = 0;
+ if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
+@@ -6884,7 +6885,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
+ else {
+ if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
+ kvm_x86_ops->sync_pir_to_irr(vcpu);
+- kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
++ if (ioapic_in_kernel(vcpu->kvm))
++ kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
+ }
+ bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
+ vcpu_to_synic(vcpu)->vec_bitmap, 256);
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 83a3f4c935fc..5400a24e1a8c 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -29,6 +29,12 @@
+ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+ */
+
++/*
++ * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
++ * stored in cpu_tlb_state.last_user_mm_ibpb.
++ */
++#define LAST_USER_MM_IBPB 0x1UL
++
+ /*
+ * We get here when we do something requiring a TLB invalidation
+ * but could not go invalidate all of the contexts. We do the
+@@ -180,6 +186,89 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
+ }
+ }
+
++static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
++{
++ unsigned long next_tif = task_thread_info(next)->flags;
++ unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
++
++ return (unsigned long)next->mm | ibpb;
++}
++
++static void cond_ibpb(struct task_struct *next)
++{
++ if (!next || !next->mm)
++ return;
++
++ /*
++ * Both, the conditional and the always IBPB mode use the mm
++ * pointer to avoid the IBPB when switching between tasks of the
++ * same process. Using the mm pointer instead of mm->context.ctx_id
++ * opens a hypothetical hole vs. mm_struct reuse, which is more or
++ * less impossible to control by an attacker. Aside of that it
++ * would only affect the first schedule so the theoretically
++ * exposed data is not really interesting.
++ */
++ if (static_branch_likely(&switch_mm_cond_ibpb)) {
++ unsigned long prev_mm, next_mm;
++
++ /*
++ * This is a bit more complex than the always mode because
++ * it has to handle two cases:
++ *
++ * 1) Switch from a user space task (potential attacker)
++ * which has TIF_SPEC_IB set to a user space task
++ * (potential victim) which has TIF_SPEC_IB not set.
++ *
++ * 2) Switch from a user space task (potential attacker)
++ * which has TIF_SPEC_IB not set to a user space task
++ * (potential victim) which has TIF_SPEC_IB set.
++ *
++ * This could be done by unconditionally issuing IBPB when
++ * a task which has TIF_SPEC_IB set is either scheduled in
++ * or out. Though that results in two flushes when:
++ *
++ * - the same user space task is scheduled out and later
++ * scheduled in again and only a kernel thread ran in
++ * between.
++ *
++ * - a user space task belonging to the same process is
++ * scheduled in after a kernel thread ran in between
++ *
++ * - a user space task belonging to the same process is
++ * scheduled in immediately.
++ *
++ * Optimize this with reasonably small overhead for the
++ * above cases. Mangle the TIF_SPEC_IB bit into the mm
++ * pointer of the incoming task which is stored in
++ * cpu_tlbstate.last_user_mm_ibpb for comparison.
++ */
++ next_mm = mm_mangle_tif_spec_ib(next);
++ prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
++
++ /*
++ * Issue IBPB only if the mm's are different and one or
++ * both have the IBPB bit set.
++ */
++ if (next_mm != prev_mm &&
++ (next_mm | prev_mm) & LAST_USER_MM_IBPB)
++ indirect_branch_prediction_barrier();
++
++ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
++ }
++
++ if (static_branch_unlikely(&switch_mm_always_ibpb)) {
++ /*
++ * Only flush when switching to a user space task with a
++ * different context than the user space task which ran
++ * last on this CPU.
++ */
++ if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
++ indirect_branch_prediction_barrier();
++ this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
++ }
++ }
++}
++
+ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+ {
+@@ -248,27 +337,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ } else {
+ u16 new_asid;
+ bool need_flush;
+- u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
+
+ /*
+ * Avoid user/user BTB poisoning by flushing the branch
+ * predictor when switching between processes. This stops
+ * one process from doing Spectre-v2 attacks on another.
+- *
+- * As an optimization, flush indirect branches only when
+- * switching into processes that disable dumping. This
+- * protects high value processes like gpg, without having
+- * too high performance overhead. IBPB is *expensive*!
+- *
+- * This will not flush branches when switching into kernel
+- * threads. It will also not flush if we switch to idle
+- * thread and back to the same process. It will flush if we
+- * switch to a different non-dumpable process.
+ */
+- if (tsk && tsk->mm &&
+- tsk->mm->context.ctx_id != last_ctx_id &&
+- get_dumpable(tsk->mm) != SUID_DUMP_USER)
+- indirect_branch_prediction_barrier();
++ cond_ibpb(tsk);
+
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ /*
+@@ -318,14 +393,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
+ }
+
+- /*
+- * Record last user mm's context id, so we can avoid
+- * flushing branch buffer with IBPB if we switch back
+- * to the same user.
+- */
+- if (next != &init_mm)
+- this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
+-
+ /* Make sure we write CR3 before loaded_mm. */
+ barrier();
+
+@@ -406,7 +473,7 @@ void initialize_tlbstate_and_flush(void)
+ write_cr3(build_cr3(mm->pgd, 0));
+
+ /* Reinitialize tlbstate. */
+- this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
++ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
+ this_cpu_write(cpu_tlbstate.next_asid, 1);
+ this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
+diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
+index bcb5beb81177..7df02fc934a9 100644
+--- a/arch/xtensa/kernel/asm-offsets.c
++++ b/arch/xtensa/kernel/asm-offsets.c
+@@ -91,14 +91,14 @@ int main(void)
+ DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
+ DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
+ #if XTENSA_HAVE_COPROCESSORS
+- DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp));
+- DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp));
+- DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp));
+- DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp));
+- DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp));
+- DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp));
+- DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp));
+- DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp));
++ DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
++ DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
++ DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2));
++ DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3));
++ DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4));
++ DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5));
++ DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6));
++ DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7));
+ #endif
+ DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user));
+ DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t));
+diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
+index ff4f0ecb03dd..f1c46bc5d465 100644
+--- a/arch/xtensa/kernel/process.c
++++ b/arch/xtensa/kernel/process.c
+@@ -88,18 +88,21 @@ void coprocessor_release_all(struct thread_info *ti)
+
+ void coprocessor_flush_all(struct thread_info *ti)
+ {
+- unsigned long cpenable;
++ unsigned long cpenable, old_cpenable;
+ int i;
+
+ preempt_disable();
+
++ RSR_CPENABLE(old_cpenable);
+ cpenable = ti->cpenable;
++ WSR_CPENABLE(cpenable);
+
+ for (i = 0; i < XCHAL_CP_MAX; i++) {
+ if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
+ coprocessor_flush(ti, i);
+ cpenable >>= 1;
+ }
++ WSR_CPENABLE(old_cpenable);
+
+ preempt_enable();
+ }
+diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
+index e2461968efb2..7c3ed7d78075 100644
+--- a/arch/xtensa/kernel/ptrace.c
++++ b/arch/xtensa/kernel/ptrace.c
+@@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs)
+ }
+
+
++#if XTENSA_HAVE_COPROCESSORS
++#define CP_OFFSETS(cp) \
++ { \
++ .elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \
++ .ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \
++ .sz = sizeof(xtregs_ ## cp ## _t), \
++ }
++
++static const struct {
++ size_t elf_xtregs_offset;
++ size_t ti_offset;
++ size_t sz;
++} cp_offsets[] = {
++ CP_OFFSETS(cp0),
++ CP_OFFSETS(cp1),
++ CP_OFFSETS(cp2),
++ CP_OFFSETS(cp3),
++ CP_OFFSETS(cp4),
++ CP_OFFSETS(cp5),
++ CP_OFFSETS(cp6),
++ CP_OFFSETS(cp7),
++};
++#endif
++
+ static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
+ {
+ struct pt_regs *regs = task_pt_regs(child);
+ struct thread_info *ti = task_thread_info(child);
+ elf_xtregs_t __user *xtregs = uregs;
+ int ret = 0;
++ int i __maybe_unused;
+
+ if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t)))
+ return -EIO;
+@@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
+ #if XTENSA_HAVE_COPROCESSORS
+ /* Flush all coprocessor registers to memory. */
+ coprocessor_flush_all(ti);
+- ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp,
+- sizeof(xtregs_coprocessor_t));
++
++ for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
++ ret |= __copy_to_user((char __user *)xtregs +
++ cp_offsets[i].elf_xtregs_offset,
++ (const char *)ti +
++ cp_offsets[i].ti_offset,
++ cp_offsets[i].sz);
+ #endif
+ ret |= __copy_to_user(&xtregs->opt, &regs->xtregs_opt,
+ sizeof(xtregs->opt));
+@@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
+ struct pt_regs *regs = task_pt_regs(child);
+ elf_xtregs_t *xtregs = uregs;
+ int ret = 0;
++ int i __maybe_unused;
+
+ if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t)))
+ return -EFAULT;
+@@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
+ coprocessor_flush_all(ti);
+ coprocessor_release_all(ti);
+
+- ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0,
+- sizeof(xtregs_coprocessor_t));
++ for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
++ ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset,
++ (const char __user *)xtregs +
++ cp_offsets[i].elf_xtregs_offset,
++ cp_offsets[i].sz);
+ #endif
+ ret |= __copy_from_user(&regs->xtregs_opt, &xtregs->opt,
+ sizeof(xtregs->opt));
+diff --git a/drivers/android/binder.c b/drivers/android/binder.c
+index a86c27948fca..96a0f940e54d 100644
+--- a/drivers/android/binder.c
++++ b/drivers/android/binder.c
+@@ -2918,7 +2918,6 @@ static void binder_transaction(struct binder_proc *proc,
+ t->buffer = NULL;
+ goto err_binder_alloc_buf_failed;
+ }
+- t->buffer->allow_user_free = 0;
+ t->buffer->debug_id = t->debug_id;
+ t->buffer->transaction = t;
+ t->buffer->target_node = target_node;
+@@ -3407,14 +3406,18 @@ static int binder_thread_write(struct binder_proc *proc,
+
+ buffer = binder_alloc_prepare_to_free(&proc->alloc,
+ data_ptr);
+- if (buffer == NULL) {
+- binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n",
+- proc->pid, thread->pid, (u64)data_ptr);
+- break;
+- }
+- if (!buffer->allow_user_free) {
+- binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n",
+- proc->pid, thread->pid, (u64)data_ptr);
++ if (IS_ERR_OR_NULL(buffer)) {
++ if (PTR_ERR(buffer) == -EPERM) {
++ binder_user_error(
++ "%d:%d BC_FREE_BUFFER u%016llx matched unreturned or currently freeing buffer\n",
++ proc->pid, thread->pid,
++ (u64)data_ptr);
++ } else {
++ binder_user_error(
++ "%d:%d BC_FREE_BUFFER u%016llx no match\n",
++ proc->pid, thread->pid,
++ (u64)data_ptr);
++ }
+ break;
+ }
+ binder_debug(BINDER_DEBUG_FREE_BUFFER,
+diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
+index 58e4658f9dd6..b9281f2725a6 100644
+--- a/drivers/android/binder_alloc.c
++++ b/drivers/android/binder_alloc.c
+@@ -149,14 +149,12 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked(
+ else {
+ /*
+ * Guard against user threads attempting to
+- * free the buffer twice
++ * free the buffer when in use by kernel or
++ * after it's already been freed.
+ */
+- if (buffer->free_in_progress) {
+- pr_err("%d:%d FREE_BUFFER u%016llx user freed buffer twice\n",
+- alloc->pid, current->pid, (u64)user_ptr);
+- return NULL;
+- }
+- buffer->free_in_progress = 1;
++ if (!buffer->allow_user_free)
++ return ERR_PTR(-EPERM);
++ buffer->allow_user_free = 0;
+ return buffer;
+ }
+ }
+@@ -486,7 +484,7 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc,
+
+ rb_erase(best_fit, &alloc->free_buffers);
+ buffer->free = 0;
+- buffer->free_in_progress = 0;
++ buffer->allow_user_free = 0;
+ binder_insert_allocated_buffer_locked(alloc, buffer);
+ binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+ "%d: binder_alloc_buf size %zd got %pK\n",
+diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
+index 2dd33b6df104..a3ad7683b6f2 100644
+--- a/drivers/android/binder_alloc.h
++++ b/drivers/android/binder_alloc.h
+@@ -50,8 +50,7 @@ struct binder_buffer {
+ unsigned free:1;
+ unsigned allow_user_free:1;
+ unsigned async_transaction:1;
+- unsigned free_in_progress:1;
+- unsigned debug_id:28;
++ unsigned debug_id:29;
+
+ struct binder_transaction *transaction;
+
+diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
+index a861b5b4d443..21ed0e20c5d9 100644
+--- a/drivers/dma/at_hdmac.c
++++ b/drivers/dma/at_hdmac.c
+@@ -1641,6 +1641,12 @@ static void atc_free_chan_resources(struct dma_chan *chan)
+ atchan->descs_allocated = 0;
+ atchan->status = 0;
+
++ /*
++ * Free atslave allocated in at_dma_xlate()
++ */
++ kfree(chan->private);
++ chan->private = NULL;
++
+ dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
+ }
+
+@@ -1675,7 +1681,7 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_SLAVE, mask);
+
+- atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL);
++ atslave = kzalloc(sizeof(*atslave), GFP_KERNEL);
+ if (!atslave)
+ return NULL;
+
+@@ -2000,6 +2006,8 @@ static int at_dma_remove(struct platform_device *pdev)
+ struct resource *io;
+
+ at_dma_off(atdma);
++ if (pdev->dev.of_node)
++ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&atdma->dma_common);
+
+ dma_pool_destroy(atdma->memset_pool);
+diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
+index d96b09fea835..e05de5032f0c 100644
+--- a/drivers/hv/channel.c
++++ b/drivers/hv/channel.c
+@@ -454,6 +454,14 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
+ }
+ wait_for_completion(&msginfo->waitevent);
+
++ if (msginfo->response.gpadl_created.creation_status != 0) {
++ pr_err("Failed to establish GPADL: err = 0x%x\n",
++ msginfo->response.gpadl_created.creation_status);
++
++ ret = -EDQUOT;
++ goto cleanup;
++ }
++
+ if (channel->rescind) {
+ ret = -ENODEV;
+ goto cleanup;
+diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c
+index 0a9e8fadfa9d..37ab30566464 100644
+--- a/drivers/iio/magnetometer/st_magn_buffer.c
++++ b/drivers/iio/magnetometer/st_magn_buffer.c
+@@ -30,11 +30,6 @@ int st_magn_trig_set_state(struct iio_trigger *trig, bool state)
+ return st_sensors_set_dataready_irq(indio_dev, state);
+ }
+
+-static int st_magn_buffer_preenable(struct iio_dev *indio_dev)
+-{
+- return st_sensors_set_enable(indio_dev, true);
+-}
+-
+ static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
+ {
+ int err;
+@@ -50,7 +45,7 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
+ if (err < 0)
+ goto st_magn_buffer_postenable_error;
+
+- return err;
++ return st_sensors_set_enable(indio_dev, true);
+
+ st_magn_buffer_postenable_error:
+ kfree(mdata->buffer_data);
+@@ -63,11 +58,11 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev)
+ int err;
+ struct st_sensor_data *mdata = iio_priv(indio_dev);
+
+- err = iio_triggered_buffer_predisable(indio_dev);
++ err = st_sensors_set_enable(indio_dev, false);
+ if (err < 0)
+ goto st_magn_buffer_predisable_error;
+
+- err = st_sensors_set_enable(indio_dev, false);
++ err = iio_triggered_buffer_predisable(indio_dev);
+
+ st_magn_buffer_predisable_error:
+ kfree(mdata->buffer_data);
+@@ -75,7 +70,6 @@ st_magn_buffer_predisable_error:
+ }
+
+ static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = {
+- .preenable = &st_magn_buffer_preenable,
+ .postenable = &st_magn_buffer_postenable,
+ .predisable = &st_magn_buffer_predisable,
+ };
+diff --git a/drivers/media/usb/em28xx/em28xx-dvb.c b/drivers/media/usb/em28xx/em28xx-dvb.c
+index 4a7db623fe29..29cdaaf1ed90 100644
+--- a/drivers/media/usb/em28xx/em28xx-dvb.c
++++ b/drivers/media/usb/em28xx/em28xx-dvb.c
+@@ -2105,6 +2105,8 @@ static int em28xx_dvb_fini(struct em28xx *dev)
+ }
+ }
+
++ em28xx_unregister_dvb(dvb);
++
+ /* remove I2C SEC */
+ client = dvb->i2c_client_sec;
+ if (client) {
+@@ -2126,7 +2128,6 @@ static int em28xx_dvb_fini(struct em28xx *dev)
+ i2c_unregister_device(client);
+ }
+
+- em28xx_unregister_dvb(dvb);
+ kfree(dvb);
+ dev->dvb = NULL;
+ kref_put(&dev->ref, em28xx_free_device);
+diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
+index 329727e00e97..95745dc4e0ec 100644
+--- a/drivers/misc/mic/scif/scif_rma.c
++++ b/drivers/misc/mic/scif/scif_rma.c
+@@ -417,7 +417,7 @@ static int scif_create_remote_lookup(struct scif_dev *remote_dev,
+ if (err)
+ goto error_window;
+ err = scif_map_page(&window->num_pages_lookup.lookup[j],
+- vmalloc_dma_phys ?
++ vmalloc_num_pages ?
+ vmalloc_to_page(&window->num_pages[i]) :
+ virt_to_page(&window->num_pages[i]),
+ remote_dev);
+diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
+index 94d7a865b135..7504f430c011 100644
+--- a/drivers/mtd/ubi/vtbl.c
++++ b/drivers/mtd/ubi/vtbl.c
+@@ -578,6 +578,16 @@ static int init_volumes(struct ubi_device *ubi,
+ vol->ubi = ubi;
+ reserved_pebs += vol->reserved_pebs;
+
++ /*
++ * We use ubi->peb_count and not vol->reserved_pebs because
++ * we want to keep the code simple. Otherwise we'd have to
++ * resize/check the bitmap upon volume resize too.
++ * Allocating a few bytes more does not hurt.
++ */
++ err = ubi_fastmap_init_checkmap(vol, ubi->peb_count);
++ if (err)
++ return err;
++
+ /*
+ * In case of dynamic volume UBI knows nothing about how many
+ * data is stored there. So assume the whole volume is used.
+@@ -620,16 +630,6 @@ static int init_volumes(struct ubi_device *ubi,
+ (long long)(vol->used_ebs - 1) * vol->usable_leb_size;
+ vol->used_bytes += av->last_data_size;
+ vol->last_eb_bytes = av->last_data_size;
+-
+- /*
+- * We use ubi->peb_count and not vol->reserved_pebs because
+- * we want to keep the code simple. Otherwise we'd have to
+- * resize/check the bitmap upon volume resize too.
+- * Allocating a few bytes more does not hurt.
+- */
+- err = ubi_fastmap_init_checkmap(vol, ubi->peb_count);
+- if (err)
+- return err;
+ }
+
+ /* And add the layout volume */
+diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+index 2237ef8e4344..f13256af8031 100644
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+@@ -1691,6 +1691,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
+ bool if_up = netif_running(nic->netdev);
+ struct bpf_prog *old_prog;
+ bool bpf_attached = false;
++ int ret = 0;
+
+ /* For now just support only the usual MTU sized frames */
+ if (prog && (dev->mtu > 1500)) {
+@@ -1724,8 +1725,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
+ if (nic->xdp_prog) {
+ /* Attach BPF program */
+ nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
+- if (!IS_ERR(nic->xdp_prog))
++ if (!IS_ERR(nic->xdp_prog)) {
+ bpf_attached = true;
++ } else {
++ ret = PTR_ERR(nic->xdp_prog);
++ nic->xdp_prog = NULL;
++ }
+ }
+
+ /* Calculate Tx queues needed for XDP and network stack */
+@@ -1737,7 +1742,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
+ netif_trans_update(nic->netdev);
+ }
+
+- return 0;
++ return ret;
+ }
+
+ static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
+diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+index a3d12dbde95b..09494e1c77c5 100644
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+@@ -585,10 +585,12 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
+ if (!sq->dmem.base)
+ return;
+
+- if (sq->tso_hdrs)
++ if (sq->tso_hdrs) {
+ dma_free_coherent(&nic->pdev->dev,
+ sq->dmem.q_len * TSO_HEADER_SIZE,
+ sq->tso_hdrs, sq->tso_hdrs_phys);
++ sq->tso_hdrs = NULL;
++ }
+
+ /* Free pending skbs in the queue */
+ smp_rmb();
+diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
+index e9f101c9bae2..bfbb39f93554 100644
+--- a/drivers/net/rionet.c
++++ b/drivers/net/rionet.c
+@@ -216,9 +216,9 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+ * it just report sending a packet to the target
+ * (without actual packet transfer).
+ */
+- dev_kfree_skb_any(skb);
+ ndev->stats.tx_packets++;
+ ndev->stats.tx_bytes += skb->len;
++ dev_kfree_skb_any(skb);
+ }
+ }
+
+diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
+index d49c7103085e..aabbcfb6e6da 100644
+--- a/drivers/net/usb/ipheth.c
++++ b/drivers/net/usb/ipheth.c
+@@ -140,7 +140,6 @@ struct ipheth_device {
+ struct usb_device *udev;
+ struct usb_interface *intf;
+ struct net_device *net;
+- struct sk_buff *tx_skb;
+ struct urb *tx_urb;
+ struct urb *rx_urb;
+ unsigned char *tx_buf;
+@@ -229,6 +228,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb)
+ case -ENOENT:
+ case -ECONNRESET:
+ case -ESHUTDOWN:
++ case -EPROTO:
+ return;
+ case 0:
+ break;
+@@ -280,7 +280,6 @@ static void ipheth_sndbulk_callback(struct urb *urb)
+ dev_err(&dev->intf->dev, "%s: urb status: %d\n",
+ __func__, status);
+
+- dev_kfree_skb_irq(dev->tx_skb);
+ netif_wake_queue(dev->net);
+ }
+
+@@ -410,7 +409,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
+ if (skb->len > IPHETH_BUF_SIZE) {
+ WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len);
+ dev->net->stats.tx_dropped++;
+- dev_kfree_skb_irq(skb);
++ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+@@ -430,12 +429,11 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
+ dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n",
+ __func__, retval);
+ dev->net->stats.tx_errors++;
+- dev_kfree_skb_irq(skb);
++ dev_kfree_skb_any(skb);
+ } else {
+- dev->tx_skb = skb;
+-
+ dev->net->stats.tx_packets++;
+ dev->net->stats.tx_bytes += skb->len;
++ dev_consume_skb_any(skb);
+ netif_stop_queue(net);
+ }
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index f528e9ac3413..0e8e3be50332 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -61,7 +61,8 @@ static const unsigned long guest_offloads[] = {
+ VIRTIO_NET_F_GUEST_TSO4,
+ VIRTIO_NET_F_GUEST_TSO6,
+ VIRTIO_NET_F_GUEST_ECN,
+- VIRTIO_NET_F_GUEST_UFO
++ VIRTIO_NET_F_GUEST_UFO,
++ VIRTIO_NET_F_GUEST_CSUM
+ };
+
+ struct virtnet_stats {
+@@ -1939,9 +1940,6 @@ static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
+ if (!vi->guest_offloads)
+ return 0;
+
+- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
+- offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
+-
+ return virtnet_set_guest_offloads(vi, offloads);
+ }
+
+@@ -1951,8 +1949,6 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
+
+ if (!vi->guest_offloads)
+ return 0;
+- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
+- offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;
+
+ return virtnet_set_guest_offloads(vi, offloads);
+ }
+@@ -1970,8 +1966,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
+- virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) {
+- NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
++ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
++ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
++ NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
+ return -EOPNOTSUPP;
+ }
+
+diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
+index ffdd2fa401b1..d63d7c326801 100644
+--- a/drivers/net/wireless/ath/wil6210/wmi.c
++++ b/drivers/net/wireless/ath/wil6210/wmi.c
+@@ -1380,8 +1380,14 @@ int wmi_set_ie(struct wil6210_priv *wil, u8 type, u16 ie_len, const void *ie)
+ };
+ int rc;
+ u16 len = sizeof(struct wmi_set_appie_cmd) + ie_len;
+- struct wmi_set_appie_cmd *cmd = kzalloc(len, GFP_KERNEL);
++ struct wmi_set_appie_cmd *cmd;
+
++ if (len < ie_len) {
++ rc = -EINVAL;
++ goto out;
++ }
++
++ cmd = kzalloc(len, GFP_KERNEL);
+ if (!cmd) {
+ rc = -ENOMEM;
+ goto out;
+diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c
+index f48c3f62966d..761cf8573a80 100644
+--- a/drivers/net/wireless/ti/wlcore/cmd.c
++++ b/drivers/net/wireless/ti/wlcore/cmd.c
+@@ -35,7 +35,6 @@
+ #include "wl12xx_80211.h"
+ #include "cmd.h"
+ #include "event.h"
+-#include "ps.h"
+ #include "tx.h"
+ #include "hw_ops.h"
+
+@@ -192,10 +191,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl,
+
+ timeout_time = jiffies + msecs_to_jiffies(WL1271_EVENT_TIMEOUT);
+
+- ret = wl1271_ps_elp_wakeup(wl);
+- if (ret < 0)
+- return ret;
+-
+ do {
+ if (time_after(jiffies, timeout_time)) {
+ wl1271_debug(DEBUG_CMD, "timeout waiting for event %d",
+@@ -227,7 +222,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl,
+ } while (!event);
+
+ out:
+- wl1271_ps_elp_sleep(wl);
+ kfree(events_vector);
+ return ret;
+ }
+diff --git a/drivers/pci/dwc/pci-layerscape.c b/drivers/pci/dwc/pci-layerscape.c
+index 87fa486bee2c..1ede4b60aac3 100644
+--- a/drivers/pci/dwc/pci-layerscape.c
++++ b/drivers/pci/dwc/pci-layerscape.c
+@@ -89,7 +89,7 @@ static void ls_pcie_disable_outbound_atus(struct ls_pcie *pcie)
+ int i;
+
+ for (i = 0; i < PCIE_IATU_NUM; i++)
+- dw_pcie_disable_atu(pcie->pci, DW_PCIE_REGION_OUTBOUND, i);
++ dw_pcie_disable_atu(pcie->pci, i, DW_PCIE_REGION_OUTBOUND);
+ }
+
+ static int ls1021_pcie_link_up(struct dw_pcie *pci)
+diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
+index 169dd7127f9e..69ef5f4060ed 100644
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -4545,8 +4545,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
+ {
+ struct qeth_ipa_cmd *cmd;
+ struct qeth_arp_query_info *qinfo;
+- struct qeth_snmp_cmd *snmp;
+ unsigned char *data;
++ void *snmp_data;
+ __u16 data_len;
+
+ QETH_CARD_TEXT(card, 3, "snpcmdcb");
+@@ -4554,7 +4554,6 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
+ cmd = (struct qeth_ipa_cmd *) sdata;
+ data = (unsigned char *)((char *)cmd - reply->offset);
+ qinfo = (struct qeth_arp_query_info *) reply->param;
+- snmp = &cmd->data.setadapterparms.data.snmp;
+
+ if (cmd->hdr.return_code) {
+ QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code);
+@@ -4567,10 +4566,15 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
+ return 0;
+ }
+ data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data));
+- if (cmd->data.setadapterparms.hdr.seq_no == 1)
+- data_len -= (__u16)((char *)&snmp->data - (char *)cmd);
+- else
+- data_len -= (__u16)((char *)&snmp->request - (char *)cmd);
++ if (cmd->data.setadapterparms.hdr.seq_no == 1) {
++ snmp_data = &cmd->data.setadapterparms.data.snmp;
++ data_len -= offsetof(struct qeth_ipa_cmd,
++ data.setadapterparms.data.snmp);
++ } else {
++ snmp_data = &cmd->data.setadapterparms.data.snmp.request;
++ data_len -= offsetof(struct qeth_ipa_cmd,
++ data.setadapterparms.data.snmp.request);
++ }
+
+ /* check if there is enough room in userspace */
+ if ((qinfo->udata_len - qinfo->udata_offset) < data_len) {
+@@ -4583,16 +4587,9 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
+ QETH_CARD_TEXT_(card, 4, "sseqn%i",
+ cmd->data.setadapterparms.hdr.seq_no);
+ /*copy entries to user buffer*/
+- if (cmd->data.setadapterparms.hdr.seq_no == 1) {
+- memcpy(qinfo->udata + qinfo->udata_offset,
+- (char *)snmp,
+- data_len + offsetof(struct qeth_snmp_cmd, data));
+- qinfo->udata_offset += offsetof(struct qeth_snmp_cmd, data);
+- } else {
+- memcpy(qinfo->udata + qinfo->udata_offset,
+- (char *)&snmp->request, data_len);
+- }
++ memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len);
+ qinfo->udata_offset += data_len;
++
+ /* check if all replies received ... */
+ QETH_CARD_TEXT_(card, 4, "srtot%i",
+ cmd->data.setadapterparms.hdr.used_total);
+diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+index bd4352fe2de3..83852f323c5e 100644
+--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
++++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+@@ -1293,7 +1293,7 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy,
+
+ sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
+ sinfo->tx_packets = psta->sta_stats.tx_pkts;
+-
++ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
+ }
+
+ /* for Ad-Hoc/AP mode */
+diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+index 314ffac50bb8..f05e9af4fe81 100644
+--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
++++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+@@ -1461,6 +1461,7 @@ vchiq_compat_ioctl_await_completion(struct file *file,
+ struct vchiq_await_completion32 args32;
+ struct vchiq_completion_data32 completion32;
+ unsigned int *msgbufcount32;
++ unsigned int msgbufcount_native;
+ compat_uptr_t msgbuf32;
+ void *msgbuf;
+ void **msgbufptr;
+@@ -1572,7 +1573,11 @@ vchiq_compat_ioctl_await_completion(struct file *file,
+ sizeof(completion32)))
+ return -EFAULT;
+
+- args32.msgbufcount--;
++ if (get_user(msgbufcount_native, &args->msgbufcount))
++ return -EFAULT;
++
++ if (!msgbufcount_native)
++ args32.msgbufcount--;
+
+ msgbufcount32 =
+ &((struct vchiq_await_completion32 __user *)arg)->msgbufcount;
+diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
+index 1e8f68960014..808437c5ec49 100644
+--- a/drivers/usb/core/quirks.c
++++ b/drivers/usb/core/quirks.c
+@@ -64,6 +64,9 @@ static const struct usb_device_id usb_quirk_list[] = {
+ /* Microsoft LifeCam-VX700 v2.0 */
+ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
+
++ /* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */
++ { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME },
++
+ /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
+ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
+ { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index ac8d619ff887..b8704c0678f9 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -1511,9 +1511,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
+ unsigned transfer_in_flight;
+ unsigned started;
+
+- if (dep->flags & DWC3_EP_STALL)
+- return 0;
+-
+ if (dep->number > 1)
+ trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue);
+ else
+@@ -1535,8 +1532,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
+ else
+ dep->flags |= DWC3_EP_STALL;
+ } else {
+- if (!(dep->flags & DWC3_EP_STALL))
+- return 0;
+
+ ret = dwc3_send_clear_stall_ep_cmd(dep);
+ if (ret)
+diff --git a/drivers/usb/storage/unusual_realtek.h b/drivers/usb/storage/unusual_realtek.h
+index 8fe624ad302a..7ca779493671 100644
+--- a/drivers/usb/storage/unusual_realtek.h
++++ b/drivers/usb/storage/unusual_realtek.h
+@@ -39,4 +39,14 @@ UNUSUAL_DEV(0x0bda, 0x0159, 0x0000, 0x9999,
+ "USB Card Reader",
+ USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
+
++UNUSUAL_DEV(0x0bda, 0x0177, 0x0000, 0x9999,
++ "Realtek",
++ "USB Card Reader",
++ USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
++
++UNUSUAL_DEV(0x0bda, 0x0184, 0x0000, 0x9999,
++ "Realtek",
++ "USB Card Reader",
++ USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
++
+ #endif /* defined(CONFIG_USB_STORAGE_REALTEK) || ... */
+diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
+index f2cd9dedb037..195229df5ba0 100644
+--- a/fs/btrfs/Makefile
++++ b/fs/btrfs/Makefile
+@@ -10,7 +10,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
+ export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
+ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
+ reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
+- uuid-tree.o props.o hash.o free-space-tree.o
++ uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
+
+ btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
+ btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 0e67cee73c53..e42673477c25 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -50,6 +50,7 @@
+ #include "sysfs.h"
+ #include "qgroup.h"
+ #include "compression.h"
++#include "tree-checker.h"
+
+ #ifdef CONFIG_X86
+ #include <asm/cpufeature.h>
+@@ -544,146 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
+ return ret;
+ }
+
+-#define CORRUPT(reason, eb, root, slot) \
+- btrfs_crit(root->fs_info, \
+- "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
+- btrfs_header_level(eb) == 0 ? "leaf" : "node", \
+- reason, btrfs_header_bytenr(eb), root->objectid, slot)
+-
+-static noinline int check_leaf(struct btrfs_root *root,
+- struct extent_buffer *leaf)
+-{
+- struct btrfs_fs_info *fs_info = root->fs_info;
+- struct btrfs_key key;
+- struct btrfs_key leaf_key;
+- u32 nritems = btrfs_header_nritems(leaf);
+- int slot;
+-
+- /*
+- * Extent buffers from a relocation tree have a owner field that
+- * corresponds to the subvolume tree they are based on. So just from an
+- * extent buffer alone we can not find out what is the id of the
+- * corresponding subvolume tree, so we can not figure out if the extent
+- * buffer corresponds to the root of the relocation tree or not. So skip
+- * this check for relocation trees.
+- */
+- if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
+- struct btrfs_root *check_root;
+-
+- key.objectid = btrfs_header_owner(leaf);
+- key.type = BTRFS_ROOT_ITEM_KEY;
+- key.offset = (u64)-1;
+-
+- check_root = btrfs_get_fs_root(fs_info, &key, false);
+- /*
+- * The only reason we also check NULL here is that during
+- * open_ctree() some roots has not yet been set up.
+- */
+- if (!IS_ERR_OR_NULL(check_root)) {
+- struct extent_buffer *eb;
+-
+- eb = btrfs_root_node(check_root);
+- /* if leaf is the root, then it's fine */
+- if (leaf != eb) {
+- CORRUPT("non-root leaf's nritems is 0",
+- leaf, check_root, 0);
+- free_extent_buffer(eb);
+- return -EIO;
+- }
+- free_extent_buffer(eb);
+- }
+- return 0;
+- }
+-
+- if (nritems == 0)
+- return 0;
+-
+- /* Check the 0 item */
+- if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
+- BTRFS_LEAF_DATA_SIZE(fs_info)) {
+- CORRUPT("invalid item offset size pair", leaf, root, 0);
+- return -EIO;
+- }
+-
+- /*
+- * Check to make sure each items keys are in the correct order and their
+- * offsets make sense. We only have to loop through nritems-1 because
+- * we check the current slot against the next slot, which verifies the
+- * next slot's offset+size makes sense and that the current's slot
+- * offset is correct.
+- */
+- for (slot = 0; slot < nritems - 1; slot++) {
+- btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
+- btrfs_item_key_to_cpu(leaf, &key, slot + 1);
+-
+- /* Make sure the keys are in the right order */
+- if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
+- CORRUPT("bad key order", leaf, root, slot);
+- return -EIO;
+- }
+-
+- /*
+- * Make sure the offset and ends are right, remember that the
+- * item data starts at the end of the leaf and grows towards the
+- * front.
+- */
+- if (btrfs_item_offset_nr(leaf, slot) !=
+- btrfs_item_end_nr(leaf, slot + 1)) {
+- CORRUPT("slot offset bad", leaf, root, slot);
+- return -EIO;
+- }
+-
+- /*
+- * Check to make sure that we don't point outside of the leaf,
+- * just in case all the items are consistent to each other, but
+- * all point outside of the leaf.
+- */
+- if (btrfs_item_end_nr(leaf, slot) >
+- BTRFS_LEAF_DATA_SIZE(fs_info)) {
+- CORRUPT("slot end outside of leaf", leaf, root, slot);
+- return -EIO;
+- }
+- }
+-
+- return 0;
+-}
+-
+-static int check_node(struct btrfs_root *root, struct extent_buffer *node)
+-{
+- unsigned long nr = btrfs_header_nritems(node);
+- struct btrfs_key key, next_key;
+- int slot;
+- u64 bytenr;
+- int ret = 0;
+-
+- if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
+- btrfs_crit(root->fs_info,
+- "corrupt node: block %llu root %llu nritems %lu",
+- node->start, root->objectid, nr);
+- return -EIO;
+- }
+-
+- for (slot = 0; slot < nr - 1; slot++) {
+- bytenr = btrfs_node_blockptr(node, slot);
+- btrfs_node_key_to_cpu(node, &key, slot);
+- btrfs_node_key_to_cpu(node, &next_key, slot + 1);
+-
+- if (!bytenr) {
+- CORRUPT("invalid item slot", node, root, slot);
+- ret = -EIO;
+- goto out;
+- }
+-
+- if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
+- CORRUPT("bad key order", node, root, slot);
+- ret = -EIO;
+- goto out;
+- }
+- }
+-out:
+- return ret;
+-}
+-
+ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+ u64 phy_offset, struct page *page,
+ u64 start, u64 end, int mirror)
+@@ -749,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+ * that we don't try and read the other copies of this block, just
+ * return -EIO.
+ */
+- if (found_level == 0 && check_leaf(root, eb)) {
++ if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
+ set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
+ ret = -EIO;
+ }
+
+- if (found_level > 0 && check_node(root, eb))
++ if (found_level > 0 && btrfs_check_node(root, eb))
+ ret = -EIO;
+
+ if (!ret)
+@@ -4009,7 +3870,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
+ buf->len,
+ fs_info->dirty_metadata_batch);
+ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+- if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
++ /*
++ * Since btrfs_mark_buffer_dirty() can be called with item pointer set
++ * but item data not updated.
++ * So here we should only check item pointers, not item data.
++ */
++ if (btrfs_header_level(buf) == 0 &&
++ btrfs_check_leaf_relaxed(root, buf)) {
+ btrfs_print_leaf(buf);
+ ASSERT(0);
+ }
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 2cb3569ac548..83791d13c204 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -9828,6 +9828,8 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
+ int ret = 0;
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
++ struct btrfs_block_group_item bg;
++ u64 flags;
+ int slot;
+
+ ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+@@ -9862,8 +9864,32 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
+ "logical %llu len %llu found bg but no related chunk",
+ found_key.objectid, found_key.offset);
+ ret = -ENOENT;
++ } else if (em->start != found_key.objectid ||
++ em->len != found_key.offset) {
++ btrfs_err(fs_info,
++ "block group %llu len %llu mismatch with chunk %llu len %llu",
++ found_key.objectid, found_key.offset,
++ em->start, em->len);
++ ret = -EUCLEAN;
+ } else {
+- ret = 0;
++ read_extent_buffer(leaf, &bg,
++ btrfs_item_ptr_offset(leaf, slot),
++ sizeof(bg));
++ flags = btrfs_block_group_flags(&bg) &
++ BTRFS_BLOCK_GROUP_TYPE_MASK;
++
++ if (flags != (em->map_lookup->type &
++ BTRFS_BLOCK_GROUP_TYPE_MASK)) {
++ btrfs_err(fs_info,
++"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
++ found_key.objectid,
++ found_key.offset, flags,
++ (BTRFS_BLOCK_GROUP_TYPE_MASK &
++ em->map_lookup->type));
++ ret = -EUCLEAN;
++ } else {
++ ret = 0;
++ }
+ }
+ free_extent_map(em);
+ goto out;
+@@ -10092,6 +10118,62 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
+ return cache;
+ }
+
++
++/*
++ * Iterate all chunks and verify that each of them has the corresponding block
++ * group
++ */
++static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
++{
++ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
++ struct extent_map *em;
++ struct btrfs_block_group_cache *bg;
++ u64 start = 0;
++ int ret = 0;
++
++ while (1) {
++ read_lock(&map_tree->map_tree.lock);
++ /*
++ * lookup_extent_mapping will return the first extent map
++ * intersecting the range, so setting @len to 1 is enough to
++ * get the first chunk.
++ */
++ em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
++ read_unlock(&map_tree->map_tree.lock);
++ if (!em)
++ break;
++
++ bg = btrfs_lookup_block_group(fs_info, em->start);
++ if (!bg) {
++ btrfs_err(fs_info,
++ "chunk start=%llu len=%llu doesn't have corresponding block group",
++ em->start, em->len);
++ ret = -EUCLEAN;
++ free_extent_map(em);
++ break;
++ }
++ if (bg->key.objectid != em->start ||
++ bg->key.offset != em->len ||
++ (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
++ (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
++ btrfs_err(fs_info,
++"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
++ em->start, em->len,
++ em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
++ bg->key.objectid, bg->key.offset,
++ bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
++ ret = -EUCLEAN;
++ free_extent_map(em);
++ btrfs_put_block_group(bg);
++ break;
++ }
++ start = em->start + em->len;
++ free_extent_map(em);
++ btrfs_put_block_group(bg);
++ }
++ return ret;
++}
++
+ int btrfs_read_block_groups(struct btrfs_fs_info *info)
+ {
+ struct btrfs_path *path;
+@@ -10264,7 +10346,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
+ }
+
+ init_global_block_rsv(info);
+- ret = 0;
++ ret = check_chunk_block_group_mappings(info);
+ error:
+ btrfs_free_path(path);
+ return ret;
+diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
+index eeae2c3ab17e..5feb8b03ffe8 100644
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -4048,6 +4048,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
+ restart:
+ if (update_backref_cache(trans, &rc->backref_cache)) {
+ btrfs_end_transaction(trans);
++ trans = NULL;
+ continue;
+ }
+
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index fe960d5e8913..49a02bf091ae 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -2176,6 +2176,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
+ vol = memdup_user((void __user *)arg, sizeof(*vol));
+ if (IS_ERR(vol))
+ return PTR_ERR(vol);
++ vol->name[BTRFS_PATH_NAME_MAX] = '\0';
+
+ switch (cmd) {
+ case BTRFS_IOC_SCAN_DEV:
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index f74005ca8f08..73c1fbca0c35 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1955,6 +1955,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
+ return ret;
+ }
+
++ btrfs_trans_release_metadata(trans, fs_info);
++ trans->block_rsv = NULL;
++
+ /* make a pass through all the delayed refs we have so far
+ * any runnings procs may add more while we are here
+ */
+@@ -1964,9 +1967,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
+ return ret;
+ }
+
+- btrfs_trans_release_metadata(trans, fs_info);
+- trans->block_rsv = NULL;
+-
+ cur_trans = trans->transaction;
+
+ /*
+diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
+new file mode 100644
+index 000000000000..f206aec1525d
+--- /dev/null
++++ b/fs/btrfs/tree-checker.c
+@@ -0,0 +1,649 @@
++/*
++ * Copyright (C) Qu Wenruo 2017. All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public
++ * License v2 as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public
++ * License along with this program.
++ */
++
++/*
++ * The module is used to catch unexpected/corrupted tree block data.
++ * Such behavior can be caused either by a fuzzed image or bugs.
++ *
++ * The objective is to do leaf/node validation checks when tree block is read
++ * from disk, and check *every* possible member, so other code won't
++ * need to checking them again.
++ *
++ * Due to the potential and unwanted damage, every checker needs to be
++ * carefully reviewed otherwise so it does not prevent mount of valid images.
++ */
++
++#include "ctree.h"
++#include "tree-checker.h"
++#include "disk-io.h"
++#include "compression.h"
++#include "hash.h"
++#include "volumes.h"
++
++#define CORRUPT(reason, eb, root, slot) \
++ btrfs_crit(root->fs_info, \
++ "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
++ btrfs_header_level(eb) == 0 ? "leaf" : "node", \
++ reason, btrfs_header_bytenr(eb), root->objectid, slot)
++
++/*
++ * Error message should follow the following format:
++ * corrupt <type>: <identifier>, <reason>[, <bad_value>]
++ *
++ * @type: leaf or node
++ * @identifier: the necessary info to locate the leaf/node.
++ * It's recommened to decode key.objecitd/offset if it's
++ * meaningful.
++ * @reason: describe the error
++ * @bad_value: optional, it's recommened to output bad value and its
++ * expected value (range).
++ *
++ * Since comma is used to separate the components, only space is allowed
++ * inside each component.
++ */
++
++/*
++ * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
++ * Allows callers to customize the output.
++ */
++__printf(4, 5)
++static void generic_err(const struct btrfs_root *root,
++ const struct extent_buffer *eb, int slot,
++ const char *fmt, ...)
++{
++ struct va_format vaf;
++ va_list args;
++
++ va_start(args, fmt);
++
++ vaf.fmt = fmt;
++ vaf.va = &args;
++
++ btrfs_crit(root->fs_info,
++ "corrupt %s: root=%llu block=%llu slot=%d, %pV",
++ btrfs_header_level(eb) == 0 ? "leaf" : "node",
++ root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
++ va_end(args);
++}
++
++static int check_extent_data_item(struct btrfs_root *root,
++ struct extent_buffer *leaf,
++ struct btrfs_key *key, int slot)
++{
++ struct btrfs_file_extent_item *fi;
++ u32 sectorsize = root->fs_info->sectorsize;
++ u32 item_size = btrfs_item_size_nr(leaf, slot);
++
++ if (!IS_ALIGNED(key->offset, sectorsize)) {
++ CORRUPT("unaligned key offset for file extent",
++ leaf, root, slot);
++ return -EUCLEAN;
++ }
++
++ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
++
++ if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
++ CORRUPT("invalid file extent type", leaf, root, slot);
++ return -EUCLEAN;
++ }
++
++ /*
++ * Support for new compression/encrption must introduce incompat flag,
++ * and must be caught in open_ctree().
++ */
++ if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
++ CORRUPT("invalid file extent compression", leaf, root, slot);
++ return -EUCLEAN;
++ }
++ if (btrfs_file_extent_encryption(leaf, fi)) {
++ CORRUPT("invalid file extent encryption", leaf, root, slot);
++ return -EUCLEAN;
++ }
++ if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
++ /* Inline extent must have 0 as key offset */
++ if (key->offset) {
++ CORRUPT("inline extent has non-zero key offset",
++ leaf, root, slot);
++ return -EUCLEAN;
++ }
++
++ /* Compressed inline extent has no on-disk size, skip it */
++ if (btrfs_file_extent_compression(leaf, fi) !=
++ BTRFS_COMPRESS_NONE)
++ return 0;
++
++ /* Uncompressed inline extent size must match item size */
++ if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
++ btrfs_file_extent_ram_bytes(leaf, fi)) {
++ CORRUPT("plaintext inline extent has invalid size",
++ leaf, root, slot);
++ return -EUCLEAN;
++ }
++ return 0;
++ }
++
++ /* Regular or preallocated extent has fixed item size */
++ if (item_size != sizeof(*fi)) {
++ CORRUPT(
++ "regluar or preallocated extent data item size is invalid",
++ leaf, root, slot);
++ return -EUCLEAN;
++ }
++ if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) ||
++ !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) ||
++ !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) ||
++ !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) ||
++ !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) {
++ CORRUPT(
++ "regular or preallocated extent data item has unaligned value",
++ leaf, root, slot);
++ return -EUCLEAN;
++ }
++
++ return 0;
++}
++
++static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
++ struct btrfs_key *key, int slot)
++{
++ u32 sectorsize = root->fs_info->sectorsize;
++ u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
++
++ if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
++ CORRUPT("invalid objectid for csum item", leaf, root, slot);
++ return -EUCLEAN;
++ }
++ if (!IS_ALIGNED(key->offset, sectorsize)) {
++ CORRUPT("unaligned key offset for csum item", leaf, root, slot);
++ return -EUCLEAN;
++ }
++ if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
++ CORRUPT("unaligned csum item size", leaf, root, slot);
++ return -EUCLEAN;
++ }
++ return 0;
++}
++
++/*
++ * Customized reported for dir_item, only important new info is key->objectid,
++ * which represents inode number
++ */
++__printf(4, 5)
++static void dir_item_err(const struct btrfs_root *root,
++ const struct extent_buffer *eb, int slot,
++ const char *fmt, ...)
++{
++ struct btrfs_key key;
++ struct va_format vaf;
++ va_list args;
++
++ btrfs_item_key_to_cpu(eb, &key, slot);
++ va_start(args, fmt);
++
++ vaf.fmt = fmt;
++ vaf.va = &args;
++
++ btrfs_crit(root->fs_info,
++ "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
++ btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
++ btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
++ va_end(args);
++}
++
++static int check_dir_item(struct btrfs_root *root,
++ struct extent_buffer *leaf,
++ struct btrfs_key *key, int slot)
++{
++ struct btrfs_dir_item *di;
++ u32 item_size = btrfs_item_size_nr(leaf, slot);
++ u32 cur = 0;
++
++ di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
++ while (cur < item_size) {
++ u32 name_len;
++ u32 data_len;
++ u32 max_name_len;
++ u32 total_size;
++ u32 name_hash;
++ u8 dir_type;
++
++ /* header itself should not cross item boundary */
++ if (cur + sizeof(*di) > item_size) {
++ dir_item_err(root, leaf, slot,
++ "dir item header crosses item boundary, have %zu boundary %u",
++ cur + sizeof(*di), item_size);
++ return -EUCLEAN;
++ }
++
++ /* dir type check */
++ dir_type = btrfs_dir_type(leaf, di);
++ if (dir_type >= BTRFS_FT_MAX) {
++ dir_item_err(root, leaf, slot,
++ "invalid dir item type, have %u expect [0, %u)",
++ dir_type, BTRFS_FT_MAX);
++ return -EUCLEAN;
++ }
++
++ if (key->type == BTRFS_XATTR_ITEM_KEY &&
++ dir_type != BTRFS_FT_XATTR) {
++ dir_item_err(root, leaf, slot,
++ "invalid dir item type for XATTR key, have %u expect %u",
++ dir_type, BTRFS_FT_XATTR);
++ return -EUCLEAN;
++ }
++ if (dir_type == BTRFS_FT_XATTR &&
++ key->type != BTRFS_XATTR_ITEM_KEY) {
++ dir_item_err(root, leaf, slot,
++ "xattr dir type found for non-XATTR key");
++ return -EUCLEAN;
++ }
++ if (dir_type == BTRFS_FT_XATTR)
++ max_name_len = XATTR_NAME_MAX;
++ else
++ max_name_len = BTRFS_NAME_LEN;
++
++ /* Name/data length check */
++ name_len = btrfs_dir_name_len(leaf, di);
++ data_len = btrfs_dir_data_len(leaf, di);
++ if (name_len > max_name_len) {
++ dir_item_err(root, leaf, slot,
++ "dir item name len too long, have %u max %u",
++ name_len, max_name_len);
++ return -EUCLEAN;
++ }
++ if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
++ dir_item_err(root, leaf, slot,
++ "dir item name and data len too long, have %u max %u",
++ name_len + data_len,
++ BTRFS_MAX_XATTR_SIZE(root->fs_info));
++ return -EUCLEAN;
++ }
++
++ if (data_len && dir_type != BTRFS_FT_XATTR) {
++ dir_item_err(root, leaf, slot,
++ "dir item with invalid data len, have %u expect 0",
++ data_len);
++ return -EUCLEAN;
++ }
++
++ total_size = sizeof(*di) + name_len + data_len;
++
++ /* header and name/data should not cross item boundary */
++ if (cur + total_size > item_size) {
++ dir_item_err(root, leaf, slot,
++ "dir item data crosses item boundary, have %u boundary %u",
++ cur + total_size, item_size);
++ return -EUCLEAN;
++ }
++
++ /*
++ * Special check for XATTR/DIR_ITEM, as key->offset is name
++ * hash, should match its name
++ */
++ if (key->type == BTRFS_DIR_ITEM_KEY ||
++ key->type == BTRFS_XATTR_ITEM_KEY) {
++ char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
++
++ read_extent_buffer(leaf, namebuf,
++ (unsigned long)(di + 1), name_len);
++ name_hash = btrfs_name_hash(namebuf, name_len);
++ if (key->offset != name_hash) {
++ dir_item_err(root, leaf, slot,
++ "name hash mismatch with key, have 0x%016x expect 0x%016llx",
++ name_hash, key->offset);
++ return -EUCLEAN;
++ }
++ }
++ cur += total_size;
++ di = (struct btrfs_dir_item *)((void *)di + total_size);
++ }
++ return 0;
++}
++
++__printf(4, 5)
++__cold
++static void block_group_err(const struct btrfs_fs_info *fs_info,
++ const struct extent_buffer *eb, int slot,
++ const char *fmt, ...)
++{
++ struct btrfs_key key;
++ struct va_format vaf;
++ va_list args;
++
++ btrfs_item_key_to_cpu(eb, &key, slot);
++ va_start(args, fmt);
++
++ vaf.fmt = fmt;
++ vaf.va = &args;
++
++ btrfs_crit(fs_info,
++ "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
++ btrfs_header_level(eb) == 0 ? "leaf" : "node",
++ btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
++ key.objectid, key.offset, &vaf);
++ va_end(args);
++}
++
++static int check_block_group_item(struct btrfs_fs_info *fs_info,
++ struct extent_buffer *leaf,
++ struct btrfs_key *key, int slot)
++{
++ struct btrfs_block_group_item bgi;
++ u32 item_size = btrfs_item_size_nr(leaf, slot);
++ u64 flags;
++ u64 type;
++
++ /*
++ * Here we don't really care about alignment since extent allocator can
++ * handle it. We care more about the size, as if one block group is
++ * larger than maximum size, it's must be some obvious corruption.
++ */
++ if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
++ block_group_err(fs_info, leaf, slot,
++ "invalid block group size, have %llu expect (0, %llu]",
++ key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
++ return -EUCLEAN;
++ }
++
++ if (item_size != sizeof(bgi)) {
++ block_group_err(fs_info, leaf, slot,
++ "invalid item size, have %u expect %zu",
++ item_size, sizeof(bgi));
++ return -EUCLEAN;
++ }
++
++ read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
++ sizeof(bgi));
++ if (btrfs_block_group_chunk_objectid(&bgi) !=
++ BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
++ block_group_err(fs_info, leaf, slot,
++ "invalid block group chunk objectid, have %llu expect %llu",
++ btrfs_block_group_chunk_objectid(&bgi),
++ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
++ return -EUCLEAN;
++ }
++
++ if (btrfs_block_group_used(&bgi) > key->offset) {
++ block_group_err(fs_info, leaf, slot,
++ "invalid block group used, have %llu expect [0, %llu)",
++ btrfs_block_group_used(&bgi), key->offset);
++ return -EUCLEAN;
++ }
++
++ flags = btrfs_block_group_flags(&bgi);
++ if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
++ block_group_err(fs_info, leaf, slot,
++"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
++ flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
++ hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
++ return -EUCLEAN;
++ }
++
++ type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
++ if (type != BTRFS_BLOCK_GROUP_DATA &&
++ type != BTRFS_BLOCK_GROUP_METADATA &&
++ type != BTRFS_BLOCK_GROUP_SYSTEM &&
++ type != (BTRFS_BLOCK_GROUP_METADATA |
++ BTRFS_BLOCK_GROUP_DATA)) {
++ block_group_err(fs_info, leaf, slot,
++"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
++ type, hweight64(type),
++ BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
++ BTRFS_BLOCK_GROUP_SYSTEM,
++ BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
++ return -EUCLEAN;
++ }
++ return 0;
++}
++
++/*
++ * Common point to switch the item-specific validation.
++ */
++static int check_leaf_item(struct btrfs_root *root,
++ struct extent_buffer *leaf,
++ struct btrfs_key *key, int slot)
++{
++ int ret = 0;
++
++ switch (key->type) {
++ case BTRFS_EXTENT_DATA_KEY:
++ ret = check_extent_data_item(root, leaf, key, slot);
++ break;
++ case BTRFS_EXTENT_CSUM_KEY:
++ ret = check_csum_item(root, leaf, key, slot);
++ break;
++ case BTRFS_DIR_ITEM_KEY:
++ case BTRFS_DIR_INDEX_KEY:
++ case BTRFS_XATTR_ITEM_KEY:
++ ret = check_dir_item(root, leaf, key, slot);
++ break;
++ case BTRFS_BLOCK_GROUP_ITEM_KEY:
++ ret = check_block_group_item(root->fs_info, leaf, key, slot);
++ break;
++ }
++ return ret;
++}
++
++static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
++ bool check_item_data)
++{
++ struct btrfs_fs_info *fs_info = root->fs_info;
++ /* No valid key type is 0, so all key should be larger than this key */
++ struct btrfs_key prev_key = {0, 0, 0};
++ struct btrfs_key key;
++ u32 nritems = btrfs_header_nritems(leaf);
++ int slot;
++
++ if (btrfs_header_level(leaf) != 0) {
++ generic_err(root, leaf, 0,
++ "invalid level for leaf, have %d expect 0",
++ btrfs_header_level(leaf));
++ return -EUCLEAN;
++ }
++
++ /*
++ * Extent buffers from a relocation tree have a owner field that
++ * corresponds to the subvolume tree they are based on. So just from an
++ * extent buffer alone we can not find out what is the id of the
++ * corresponding subvolume tree, so we can not figure out if the extent
++ * buffer corresponds to the root of the relocation tree or not. So
++ * skip this check for relocation trees.
++ */
++ if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
++ u64 owner = btrfs_header_owner(leaf);
++ struct btrfs_root *check_root;
++
++ /* These trees must never be empty */
++ if (owner == BTRFS_ROOT_TREE_OBJECTID ||
++ owner == BTRFS_CHUNK_TREE_OBJECTID ||
++ owner == BTRFS_EXTENT_TREE_OBJECTID ||
++ owner == BTRFS_DEV_TREE_OBJECTID ||
++ owner == BTRFS_FS_TREE_OBJECTID ||
++ owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
++ generic_err(root, leaf, 0,
++ "invalid root, root %llu must never be empty",
++ owner);
++ return -EUCLEAN;
++ }
++ key.objectid = owner;
++ key.type = BTRFS_ROOT_ITEM_KEY;
++ key.offset = (u64)-1;
++
++ check_root = btrfs_get_fs_root(fs_info, &key, false);
++ /*
++ * The only reason we also check NULL here is that during
++ * open_ctree() some roots has not yet been set up.
++ */
++ if (!IS_ERR_OR_NULL(check_root)) {
++ struct extent_buffer *eb;
++
++ eb = btrfs_root_node(check_root);
++ /* if leaf is the root, then it's fine */
++ if (leaf != eb) {
++ CORRUPT("non-root leaf's nritems is 0",
++ leaf, check_root, 0);
++ free_extent_buffer(eb);
++ return -EUCLEAN;
++ }
++ free_extent_buffer(eb);
++ }
++ return 0;
++ }
++
++ if (nritems == 0)
++ return 0;
++
++ /*
++ * Check the following things to make sure this is a good leaf, and
++ * leaf users won't need to bother with similar sanity checks:
++ *
++ * 1) key ordering
++ * 2) item offset and size
++ * No overlap, no hole, all inside the leaf.
++ * 3) item content
++ * If possible, do comprehensive sanity check.
++ * NOTE: All checks must only rely on the item data itself.
++ */
++ for (slot = 0; slot < nritems; slot++) {
++ u32 item_end_expected;
++ int ret;
++
++ btrfs_item_key_to_cpu(leaf, &key, slot);
++
++ /* Make sure the keys are in the right order */
++ if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
++ CORRUPT("bad key order", leaf, root, slot);
++ return -EUCLEAN;
++ }
++
++ /*
++ * Make sure the offset and ends are right, remember that the
++ * item data starts at the end of the leaf and grows towards the
++ * front.
++ */
++ if (slot == 0)
++ item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
++ else
++ item_end_expected = btrfs_item_offset_nr(leaf,
++ slot - 1);
++ if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
++ CORRUPT("slot offset bad", leaf, root, slot);
++ return -EUCLEAN;
++ }
++
++ /*
++ * Check to make sure that we don't point outside of the leaf,
++ * just in case all the items are consistent to each other, but
++ * all point outside of the leaf.
++ */
++ if (btrfs_item_end_nr(leaf, slot) >
++ BTRFS_LEAF_DATA_SIZE(fs_info)) {
++ CORRUPT("slot end outside of leaf", leaf, root, slot);
++ return -EUCLEAN;
++ }
++
++ /* Also check if the item pointer overlaps with btrfs item. */
++ if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
++ btrfs_item_ptr_offset(leaf, slot)) {
++ CORRUPT("slot overlap with its data", leaf, root, slot);
++ return -EUCLEAN;
++ }
++
++ if (check_item_data) {
++ /*
++ * Check if the item size and content meet other
++ * criteria
++ */
++ ret = check_leaf_item(root, leaf, &key, slot);
++ if (ret < 0)
++ return ret;
++ }
++
++ prev_key.objectid = key.objectid;
++ prev_key.type = key.type;
++ prev_key.offset = key.offset;
++ }
++
++ return 0;
++}
++
++int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
++{
++ return check_leaf(root, leaf, true);
++}
++
++int btrfs_check_leaf_relaxed(struct btrfs_root *root,
++ struct extent_buffer *leaf)
++{
++ return check_leaf(root, leaf, false);
++}
++
++int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
++{
++ unsigned long nr = btrfs_header_nritems(node);
++ struct btrfs_key key, next_key;
++ int slot;
++ int level = btrfs_header_level(node);
++ u64 bytenr;
++ int ret = 0;
++
++ if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
++ generic_err(root, node, 0,
++ "invalid level for node, have %d expect [1, %d]",
++ level, BTRFS_MAX_LEVEL - 1);
++ return -EUCLEAN;
++ }
++ if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
++ btrfs_crit(root->fs_info,
++"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
++ root->objectid, node->start,
++ nr == 0 ? "small" : "large", nr,
++ BTRFS_NODEPTRS_PER_BLOCK(root->fs_info));
++ return -EUCLEAN;
++ }
++
++ for (slot = 0; slot < nr - 1; slot++) {
++ bytenr = btrfs_node_blockptr(node, slot);
++ btrfs_node_key_to_cpu(node, &key, slot);
++ btrfs_node_key_to_cpu(node, &next_key, slot + 1);
++
++ if (!bytenr) {
++ generic_err(root, node, slot,
++ "invalid NULL node pointer");
++ ret = -EUCLEAN;
++ goto out;
++ }
++ if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) {
++ generic_err(root, node, slot,
++ "unaligned pointer, have %llu should be aligned to %u",
++ bytenr, root->fs_info->sectorsize);
++ ret = -EUCLEAN;
++ goto out;
++ }
++
++ if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
++ generic_err(root, node, slot,
++ "bad key order, current (%llu %u %llu) next (%llu %u %llu)",
++ key.objectid, key.type, key.offset,
++ next_key.objectid, next_key.type,
++ next_key.offset);
++ ret = -EUCLEAN;
++ goto out;
++ }
++ }
++out:
++ return ret;
++}
+diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
+new file mode 100644
+index 000000000000..3d53e8d6fda0
+--- /dev/null
++++ b/fs/btrfs/tree-checker.h
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (C) Qu Wenruo 2017. All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public
++ * License v2 as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public
++ * License along with this program.
++ */
++
++#ifndef __BTRFS_TREE_CHECKER__
++#define __BTRFS_TREE_CHECKER__
++
++#include "ctree.h"
++#include "extent_io.h"
++
++/*
++ * Comprehensive leaf checker.
++ * Will check not only the item pointers, but also every possible member
++ * in item data.
++ */
++int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
++
++/*
++ * Less strict leaf checker.
++ * Will only check item pointers, not reading item data.
++ */
++int btrfs_check_leaf_relaxed(struct btrfs_root *root,
++ struct extent_buffer *leaf);
++int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
++
++#endif
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index a0947f4a3e87..9663b6aa2a56 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -4647,7 +4647,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
+
+ if (type & BTRFS_BLOCK_GROUP_DATA) {
+ max_stripe_size = SZ_1G;
+- max_chunk_size = 10 * max_stripe_size;
++ max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
+ if (!devs_max)
+ devs_max = BTRFS_MAX_DEVS(info->chunk_root);
+ } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
+@@ -6353,6 +6353,8 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
+ u16 num_stripes;
+ u16 sub_stripes;
+ u64 type;
++ u64 features;
++ bool mixed = false;
+
+ length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+@@ -6391,6 +6393,32 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
+ btrfs_chunk_type(leaf, chunk));
+ return -EIO;
+ }
++
++ if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
++ btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
++ return -EIO;
++ }
++
++ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
++ (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
++ btrfs_err(fs_info,
++ "system chunk with data or metadata type: 0x%llx", type);
++ return -EIO;
++ }
++
++ features = btrfs_super_incompat_flags(fs_info->super_copy);
++ if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
++ mixed = true;
++
++ if (!mixed) {
++ if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
++ (type & BTRFS_BLOCK_GROUP_DATA)) {
++ btrfs_err(fs_info,
++ "mixed chunk type in non-mixed mode: 0x%llx", type);
++ return -EIO;
++ }
++ }
++
+ if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+ (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
+index c5dd48eb7b3d..76fb6e84f201 100644
+--- a/fs/btrfs/volumes.h
++++ b/fs/btrfs/volumes.h
+@@ -24,6 +24,8 @@
+ #include <linux/btrfs.h>
+ #include "async-thread.h"
+
++#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
++
+ extern struct mutex uuid_mutex;
+
+ #define BTRFS_STRIPE_LEN SZ_64K
+diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
+index bf378ddca4db..a48984dd6426 100644
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -4079,6 +4079,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
+ return auth;
+ }
+
++static int add_authorizer_challenge(struct ceph_connection *con,
++ void *challenge_buf, int challenge_buf_len)
++{
++ struct ceph_mds_session *s = con->private;
++ struct ceph_mds_client *mdsc = s->s_mdsc;
++ struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
++
++ return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
++ challenge_buf, challenge_buf_len);
++}
+
+ static int verify_authorizer_reply(struct ceph_connection *con)
+ {
+@@ -4142,6 +4152,7 @@ static const struct ceph_connection_operations mds_con_ops = {
+ .put = con_put,
+ .dispatch = dispatch,
+ .get_authorizer = get_authorizer,
++ .add_authorizer_challenge = add_authorizer_challenge,
+ .verify_authorizer_reply = verify_authorizer_reply,
+ .invalidate_authorizer = invalidate_authorizer,
+ .peer_reset = peer_reset,
+diff --git a/fs/direct-io.c b/fs/direct-io.c
+index 625a84aa6484..40567501015f 100644
+--- a/fs/direct-io.c
++++ b/fs/direct-io.c
+@@ -304,8 +304,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
+ */
+ dio->iocb->ki_pos += transferred;
+
+- if (dio->op == REQ_OP_WRITE)
+- ret = generic_write_sync(dio->iocb, transferred);
++ if (ret > 0 && dio->op == REQ_OP_WRITE)
++ ret = generic_write_sync(dio->iocb, ret);
+ dio->iocb->ki_complete(dio->iocb, ret, 0);
+ }
+
+diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
+index 62d9a659a8ff..dd8f10db82e9 100644
+--- a/fs/ext2/xattr.c
++++ b/fs/ext2/xattr.c
+@@ -612,9 +612,9 @@ skip_replace:
+ }
+
+ cleanup:
+- brelse(bh);
+ if (!(bh && header == HDR(bh)))
+ kfree(header);
++ brelse(bh);
+ up_write(&EXT2_I(inode)->xattr_sem);
+
+ return error;
+diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
+index 41fce930f44c..624817eeb25e 100644
+--- a/fs/f2fs/checkpoint.c
++++ b/fs/f2fs/checkpoint.c
+@@ -69,6 +69,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
+ .old_blkaddr = index,
+ .new_blkaddr = index,
+ .encrypted_page = NULL,
++ .is_meta = is_meta,
+ };
+
+ if (unlikely(!is_meta))
+@@ -85,8 +86,10 @@ repeat:
+ fio.page = page;
+
+ if (f2fs_submit_page_bio(&fio)) {
+- f2fs_put_page(page, 1);
+- goto repeat;
++ memset(page_address(page), 0, PAGE_SIZE);
++ f2fs_stop_checkpoint(sbi, false);
++ f2fs_bug_on(sbi, 1);
++ return page;
+ }
+
+ lock_page(page);
+@@ -117,7 +120,8 @@ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
+ return __get_meta_page(sbi, index, false);
+ }
+
+-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
++bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
++ block_t blkaddr, int type)
+ {
+ switch (type) {
+ case META_NAT:
+@@ -137,8 +141,20 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
+ return false;
+ break;
+ case META_POR:
++ case DATA_GENERIC:
+ if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+- blkaddr < MAIN_BLKADDR(sbi)))
++ blkaddr < MAIN_BLKADDR(sbi))) {
++ if (type == DATA_GENERIC) {
++ f2fs_msg(sbi->sb, KERN_WARNING,
++ "access invalid blkaddr:%u", blkaddr);
++ WARN_ON(1);
++ }
++ return false;
++ }
++ break;
++ case META_GENERIC:
++ if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
++ blkaddr >= MAIN_BLKADDR(sbi)))
+ return false;
+ break;
+ default:
+@@ -163,6 +179,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+ .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
+ .encrypted_page = NULL,
+ .in_list = false,
++ .is_meta = (type != META_POR),
+ };
+ struct blk_plug plug;
+
+@@ -172,7 +189,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+ blk_start_plug(&plug);
+ for (; nrpages-- > 0; blkno++) {
+
+- if (!is_valid_blkaddr(sbi, blkno, type))
++ if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
+ goto out;
+
+ switch (type) {
+@@ -737,6 +754,14 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
+ &cp_page_1, version);
+ if (err)
+ return NULL;
++
++ if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
++ sbi->blocks_per_seg) {
++ f2fs_msg(sbi->sb, KERN_WARNING,
++ "invalid cp_pack_total_block_count:%u",
++ le32_to_cpu(cp_block->cp_pack_total_block_count));
++ goto invalid_cp;
++ }
+ pre_version = *version;
+
+ cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
+@@ -800,15 +825,15 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
+ cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
+ memcpy(sbi->ckpt, cp_block, blk_size);
+
+- /* Sanity checking of checkpoint */
+- if (sanity_check_ckpt(sbi))
+- goto free_fail_no_cp;
+-
+ if (cur_page == cp1)
+ sbi->cur_cp_pack = 1;
+ else
+ sbi->cur_cp_pack = 2;
+
++ /* Sanity checking of checkpoint */
++ if (sanity_check_ckpt(sbi))
++ goto free_fail_no_cp;
++
+ if (cp_blks <= 1)
+ goto done;
+
+diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
+index 6fbb6d75318a..8f6e7c3a10f8 100644
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -369,6 +369,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
+ struct page *page = fio->encrypted_page ?
+ fio->encrypted_page : fio->page;
+
++ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
++ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
++ return -EFAULT;
++
+ trace_f2fs_submit_page_bio(page, fio);
+ f2fs_trace_ios(fio, 0);
+
+@@ -412,9 +416,9 @@ next:
+ spin_unlock(&io->io_lock);
+ }
+
+- if (fio->old_blkaddr != NEW_ADDR)
+- verify_block_addr(sbi, fio->old_blkaddr);
+- verify_block_addr(sbi, fio->new_blkaddr);
++ if (__is_valid_data_blkaddr(fio->old_blkaddr))
++ verify_block_addr(fio, fio->old_blkaddr);
++ verify_block_addr(fio, fio->new_blkaddr);
+
+ bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+
+@@ -945,7 +949,13 @@ next_dnode:
+ next_block:
+ blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
+
+- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
++ if (__is_valid_data_blkaddr(blkaddr) &&
++ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
++ err = -EFAULT;
++ goto sync_out;
++ }
++
++ if (!is_valid_data_blkaddr(sbi, blkaddr)) {
+ if (create) {
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
+@@ -1263,6 +1273,10 @@ got_it:
+ SetPageUptodate(page);
+ goto confused;
+ }
++
++ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
++ DATA_GENERIC))
++ goto set_error_page;
+ } else {
+ zero_user_segment(page, 0, PAGE_SIZE);
+ if (!PageUptodate(page))
+@@ -1387,15 +1401,6 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio)
+ return need_inplace_update_policy(inode, fio);
+ }
+
+-static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
+-{
+- if (fio->old_blkaddr == NEW_ADDR)
+- return false;
+- if (fio->old_blkaddr == NULL_ADDR)
+- return false;
+- return true;
+-}
+-
+ int do_write_data_page(struct f2fs_io_info *fio)
+ {
+ struct page *page = fio->page;
+@@ -1410,11 +1415,13 @@ int do_write_data_page(struct f2fs_io_info *fio)
+ f2fs_lookup_extent_cache(inode, page->index, &ei)) {
+ fio->old_blkaddr = ei.blk + page->index - ei.fofs;
+
+- if (valid_ipu_blkaddr(fio)) {
+- ipu_force = true;
+- fio->need_lock = LOCK_DONE;
+- goto got_it;
+- }
++ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
++ DATA_GENERIC))
++ return -EFAULT;
++
++ ipu_force = true;
++ fio->need_lock = LOCK_DONE;
++ goto got_it;
+ }
+
+ /* Deadlock due to between page->lock and f2fs_lock_op */
+@@ -1433,11 +1440,18 @@ int do_write_data_page(struct f2fs_io_info *fio)
+ goto out_writepage;
+ }
+ got_it:
++ if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
++ !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
++ DATA_GENERIC)) {
++ err = -EFAULT;
++ goto out_writepage;
++ }
+ /*
+ * If current allocation needs SSR,
+ * it had better in-place writes for updated data.
+ */
+- if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
++ if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
++ need_inplace_update(fio))) {
+ err = encrypt_one_page(fio);
+ if (err)
+ goto out_writepage;
+diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
+index 54f8520ad7a2..3f1a44696036 100644
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -162,7 +162,7 @@ struct cp_control {
+ };
+
+ /*
+- * For CP/NAT/SIT/SSA readahead
++ * indicate meta/data type
+ */
+ enum {
+ META_CP,
+@@ -170,6 +170,8 @@ enum {
+ META_SIT,
+ META_SSA,
+ META_POR,
++ DATA_GENERIC,
++ META_GENERIC,
+ };
+
+ /* for the list of ino */
+@@ -910,6 +912,7 @@ struct f2fs_io_info {
+ bool submitted; /* indicate IO submission */
+ int need_lock; /* indicate we need to lock cp_rwsem */
+ bool in_list; /* indicate fio is in io_list */
++ bool is_meta; /* indicate borrow meta inode mapping or not */
+ enum iostat_type io_type; /* io type */
+ };
+
+@@ -2354,6 +2357,39 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
+ spin_unlock(&sbi->iostat_lock);
+ }
+
++#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
++ (!is_read_io(fio->op) || fio->is_meta))
++
++bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
++ block_t blkaddr, int type);
++void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
++static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
++ block_t blkaddr, int type)
++{
++ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
++ f2fs_msg(sbi->sb, KERN_ERR,
++ "invalid blkaddr: %u, type: %d, run fsck to fix.",
++ blkaddr, type);
++ f2fs_bug_on(sbi, 1);
++ }
++}
++
++static inline bool __is_valid_data_blkaddr(block_t blkaddr)
++{
++ if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
++ return false;
++ return true;
++}
++
++static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
++ block_t blkaddr)
++{
++ if (!__is_valid_data_blkaddr(blkaddr))
++ return false;
++ verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
++ return true;
++}
++
+ /*
+ * file.c
+ */
+@@ -2564,7 +2600,8 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
+ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
+-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
++bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
++ block_t blkaddr, int type);
+ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+ int type, bool sync);
+ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
+diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
+index 6f589730782d..7d3189f1941c 100644
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -328,13 +328,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
+ return pgofs;
+ }
+
+-static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
+- int whence)
++static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
++ pgoff_t dirty, pgoff_t pgofs, int whence)
+ {
+ switch (whence) {
+ case SEEK_DATA:
+ if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
+- (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
++ is_valid_data_blkaddr(sbi, blkaddr))
+ return true;
+ break;
+ case SEEK_HOLE:
+@@ -397,7 +397,15 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
+ blkaddr = datablock_addr(dn.inode,
+ dn.node_page, dn.ofs_in_node);
+
+- if (__found_offset(blkaddr, dirty, pgofs, whence)) {
++ if (__is_valid_data_blkaddr(blkaddr) &&
++ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
++ blkaddr, DATA_GENERIC)) {
++ f2fs_put_dnode(&dn);
++ goto fail;
++ }
++
++ if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
++ pgofs, whence)) {
+ f2fs_put_dnode(&dn);
+ goto found;
+ }
+@@ -495,6 +503,11 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
+
+ dn->data_blkaddr = NULL_ADDR;
+ set_data_blkaddr(dn);
++
++ if (__is_valid_data_blkaddr(blkaddr) &&
++ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
++ continue;
++
+ invalidate_blocks(sbi, blkaddr);
+ if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
+ clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
+diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
+index 259b0aa283f0..9a40724dbaa6 100644
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -62,11 +62,12 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
+ }
+ }
+
+-static bool __written_first_block(struct f2fs_inode *ri)
++static bool __written_first_block(struct f2fs_sb_info *sbi,
++ struct f2fs_inode *ri)
+ {
+ block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
+
+- if (addr != NEW_ADDR && addr != NULL_ADDR)
++ if (is_valid_data_blkaddr(sbi, addr))
+ return true;
+ return false;
+ }
+@@ -179,6 +180,72 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
+ ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
+ }
+
++static bool sanity_check_inode(struct inode *inode, struct page *node_page)
++{
++ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
++ struct f2fs_inode_info *fi = F2FS_I(inode);
++ unsigned long long iblocks;
++
++ iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
++ if (!iblocks) {
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ f2fs_msg(sbi->sb, KERN_WARNING,
++ "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
++ "run fsck to fix.",
++ __func__, inode->i_ino, iblocks);
++ return false;
++ }
++
++ if (ino_of_node(node_page) != nid_of_node(node_page)) {
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ f2fs_msg(sbi->sb, KERN_WARNING,
++ "%s: corrupted inode footer i_ino=%lx, ino,nid: "
++ "[%u, %u] run fsck to fix.",
++ __func__, inode->i_ino,
++ ino_of_node(node_page), nid_of_node(node_page));
++ return false;
++ }
++
++ if (f2fs_has_extra_attr(inode) &&
++ !f2fs_sb_has_extra_attr(sbi->sb)) {
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ f2fs_msg(sbi->sb, KERN_WARNING,
++ "%s: inode (ino=%lx) is with extra_attr, "
++ "but extra_attr feature is off",
++ __func__, inode->i_ino);
++ return false;
++ }
++
++ if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
++ fi->i_extra_isize % sizeof(__le32)) {
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ f2fs_msg(sbi->sb, KERN_WARNING,
++ "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
++ "max: %zu",
++ __func__, inode->i_ino, fi->i_extra_isize,
++ F2FS_TOTAL_EXTRA_ATTR_SIZE);
++ return false;
++ }
++
++ if (F2FS_I(inode)->extent_tree) {
++ struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
++
++ if (ei->len &&
++ (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
++ !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
++ DATA_GENERIC))) {
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ f2fs_msg(sbi->sb, KERN_WARNING,
++ "%s: inode (ino=%lx) extent info [%u, %u, %u] "
++ "is incorrect, run fsck to fix",
++ __func__, inode->i_ino,
++ ei->blk, ei->fofs, ei->len);
++ return false;
++ }
++ }
++ return true;
++}
++
+ static int do_read_inode(struct inode *inode)
+ {
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+@@ -228,6 +295,11 @@ static int do_read_inode(struct inode *inode)
+ fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
+ le16_to_cpu(ri->i_extra_isize) : 0;
+
++ if (!sanity_check_inode(inode, node_page)) {
++ f2fs_put_page(node_page, 1);
++ return -EINVAL;
++ }
++
+ /* check data exist */
+ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
+ __recover_inline_status(inode, node_page);
+@@ -235,7 +307,7 @@ static int do_read_inode(struct inode *inode)
+ /* get rdev by using inline_info */
+ __get_inode_rdev(inode, ri);
+
+- if (__written_first_block(ri))
++ if (__written_first_block(sbi, ri))
+ set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+
+ if (!need_inode_block_update(sbi, inode->i_ino))
+diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
+index 712505ec5de4..65de72d65562 100644
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -334,8 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
+ new_blkaddr == NULL_ADDR);
+ f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
+ new_blkaddr == NEW_ADDR);
+- f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
+- nat_get_blkaddr(e) != NULL_ADDR &&
++ f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
+ new_blkaddr == NEW_ADDR);
+
+ /* increment version no as node is removed */
+@@ -350,7 +349,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
+
+ /* change address */
+ nat_set_blkaddr(e, new_blkaddr);
+- if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
++ if (!is_valid_data_blkaddr(sbi, new_blkaddr))
+ set_nat_flag(e, IS_CHECKPOINTED, false);
+ __set_nat_cache_dirty(nm_i, e);
+
+@@ -1399,6 +1398,12 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
+ return 0;
+ }
+
++ if (__is_valid_data_blkaddr(ni.blk_addr) &&
++ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
++ up_read(&sbi->node_write);
++ goto redirty_out;
++ }
++
+ if (atomic && !test_opt(sbi, NOBARRIER))
+ fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
+
+diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
+index 765fadf954af..6ea445377767 100644
+--- a/fs/f2fs/recovery.c
++++ b/fs/f2fs/recovery.c
+@@ -236,7 +236,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
+ while (1) {
+ struct fsync_inode_entry *entry;
+
+- if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
++ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
+ return 0;
+
+ page = get_tmp_page(sbi, blkaddr);
+@@ -479,7 +479,7 @@ retry_dn:
+ }
+
+ /* dest is valid block, try to recover from src to dest */
+- if (is_valid_blkaddr(sbi, dest, META_POR)) {
++ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
+
+ if (src == NULL_ADDR) {
+ err = reserve_new_block(&dn);
+@@ -540,7 +540,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
+ while (1) {
+ struct fsync_inode_entry *entry;
+
+- if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
++ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
+ break;
+
+ ra_meta_pages_cond(sbi, blkaddr);
+diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
+index 3c7bbbae0afa..5c698757e116 100644
+--- a/fs/f2fs/segment.c
++++ b/fs/f2fs/segment.c
+@@ -1758,7 +1758,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
+ struct seg_entry *se;
+ bool is_cp = false;
+
+- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
++ if (!is_valid_data_blkaddr(sbi, blkaddr))
+ return true;
+
+ mutex_lock(&sit_i->sentry_lock);
+@@ -2571,7 +2571,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
+ {
+ struct page *cpage;
+
+- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
++ if (!is_valid_data_blkaddr(sbi, blkaddr))
+ return;
+
+ cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
+@@ -3304,6 +3304,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
+ unsigned int old_valid_blocks;
+
+ start = le32_to_cpu(segno_in_journal(journal, i));
++ if (start >= MAIN_SEGS(sbi)) {
++ f2fs_msg(sbi->sb, KERN_ERR,
++ "Wrong journal entry on segno %u",
++ start);
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ err = -EINVAL;
++ break;
++ }
++
+ se = &sit_i->sentries[start];
+ sit = sit_in_journal(journal, i);
+
+diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
+index 4dfb5080098f..47348d98165b 100644
+--- a/fs/f2fs/segment.h
++++ b/fs/f2fs/segment.h
+@@ -53,13 +53,19 @@
+ ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
+ (sbi)->segs_per_sec)) \
+
+-#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr)
+-#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr)
++#define MAIN_BLKADDR(sbi) \
++ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
++ le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr))
++#define SEG0_BLKADDR(sbi) \
++ (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \
++ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr))
+
+ #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments)
+ #define MAIN_SECS(sbi) ((sbi)->total_sections)
+
+-#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count)
++#define TOTAL_SEGS(sbi) \
++ (SM_I(sbi) ? SM_I(sbi)->segment_count : \
++ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count))
+ #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg)
+
+ #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
+@@ -79,7 +85,7 @@
+ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
+
+ #define GET_SEGNO(sbi, blk_addr) \
+- ((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \
++ ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
+ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
+ GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
+ #define BLKS_PER_SEC(sbi) \
+@@ -619,10 +625,14 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
+ f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
+ }
+
+-static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
++static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
+ {
+- BUG_ON(blk_addr < SEG0_BLKADDR(sbi)
+- || blk_addr >= MAX_BLKADDR(sbi));
++ struct f2fs_sb_info *sbi = fio->sbi;
++
++ if (__is_meta_io(fio))
++ verify_blkaddr(sbi, blk_addr, META_GENERIC);
++ else
++ verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
+ }
+
+ /*
+diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
+index 7cda685296b2..de4de4ebe64c 100644
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -1807,6 +1807,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
+ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
+ struct buffer_head *bh)
+ {
++ block_t segment_count, segs_per_sec, secs_per_zone;
++ block_t total_sections, blocks_per_seg;
+ struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
+ (bh->b_data + F2FS_SUPER_OFFSET);
+ struct super_block *sb = sbi->sb;
+@@ -1863,6 +1865,68 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
+ return 1;
+ }
+
++ segment_count = le32_to_cpu(raw_super->segment_count);
++ segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
++ secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
++ total_sections = le32_to_cpu(raw_super->section_count);
++
++ /* blocks_per_seg should be 512, given the above check */
++ blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
++
++ if (segment_count > F2FS_MAX_SEGMENT ||
++ segment_count < F2FS_MIN_SEGMENTS) {
++ f2fs_msg(sb, KERN_INFO,
++ "Invalid segment count (%u)",
++ segment_count);
++ return 1;
++ }
++
++ if (total_sections > segment_count ||
++ total_sections < F2FS_MIN_SEGMENTS ||
++ segs_per_sec > segment_count || !segs_per_sec) {
++ f2fs_msg(sb, KERN_INFO,
++ "Invalid segment/section count (%u, %u x %u)",
++ segment_count, total_sections, segs_per_sec);
++ return 1;
++ }
++
++ if ((segment_count / segs_per_sec) < total_sections) {
++ f2fs_msg(sb, KERN_INFO,
++ "Small segment_count (%u < %u * %u)",
++ segment_count, segs_per_sec, total_sections);
++ return 1;
++ }
++
++ if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) {
++ f2fs_msg(sb, KERN_INFO,
++ "Wrong segment_count / block_count (%u > %u)",
++ segment_count, le32_to_cpu(raw_super->block_count));
++ return 1;
++ }
++
++ if (secs_per_zone > total_sections || !secs_per_zone) {
++ f2fs_msg(sb, KERN_INFO,
++ "Wrong secs_per_zone / total_sections (%u, %u)",
++ secs_per_zone, total_sections);
++ return 1;
++ }
++ if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION) {
++ f2fs_msg(sb, KERN_INFO,
++ "Corrupted extension count (%u > %u)",
++ le32_to_cpu(raw_super->extension_count),
++ F2FS_MAX_EXTENSION);
++ return 1;
++ }
++
++ if (le32_to_cpu(raw_super->cp_payload) >
++ (blocks_per_seg - F2FS_CP_PACKS)) {
++ f2fs_msg(sb, KERN_INFO,
++ "Insane cp_payload (%u > %u)",
++ le32_to_cpu(raw_super->cp_payload),
++ blocks_per_seg - F2FS_CP_PACKS);
++ return 1;
++ }
++
+ /* check reserved ino info */
+ if (le32_to_cpu(raw_super->node_ino) != 1 ||
+ le32_to_cpu(raw_super->meta_ino) != 2 ||
+@@ -1875,13 +1939,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
+ return 1;
+ }
+
+- if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
+- f2fs_msg(sb, KERN_INFO,
+- "Invalid segment count (%u)",
+- le32_to_cpu(raw_super->segment_count));
+- return 1;
+- }
+-
+ /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
+ if (sanity_check_area_boundary(sbi, bh))
+ return 1;
+@@ -1899,6 +1956,9 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+ unsigned int sit_segs, nat_segs;
+ unsigned int sit_bitmap_size, nat_bitmap_size;
+ unsigned int log_blocks_per_seg;
++ unsigned int segment_count_main;
++ unsigned int cp_pack_start_sum, cp_payload;
++ block_t user_block_count;
+ int i;
+
+ total = le32_to_cpu(raw_super->segment_count);
+@@ -1923,6 +1983,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+ return 1;
+ }
+
++ user_block_count = le64_to_cpu(ckpt->user_block_count);
++ segment_count_main = le32_to_cpu(raw_super->segment_count_main);
++ log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
++ if (!user_block_count || user_block_count >=
++ segment_count_main << log_blocks_per_seg) {
++ f2fs_msg(sbi->sb, KERN_ERR,
++ "Wrong user_block_count: %u", user_block_count);
++ return 1;
++ }
++
+ main_segs = le32_to_cpu(raw_super->segment_count_main);
+ blocks_per_seg = sbi->blocks_per_seg;
+
+@@ -1939,7 +2009,6 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+
+ sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
+ nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
+- log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+
+ if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
+ nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
+@@ -1949,6 +2018,17 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+ return 1;
+ }
+
++ cp_pack_start_sum = __start_sum_addr(sbi);
++ cp_payload = __cp_payload(sbi);
++ if (cp_pack_start_sum < cp_payload + 1 ||
++ cp_pack_start_sum > blocks_per_seg - 1 -
++ NR_CURSEG_TYPE) {
++ f2fs_msg(sbi->sb, KERN_ERR,
++ "Wrong cp_pack_start_sum: %u",
++ cp_pack_start_sum);
++ return 1;
++ }
++
+ if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
+ return 1;
+diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
+index 6249c92671de..ea66f04f46f7 100644
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -501,7 +501,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
+ if (args->flags & ATTR_CREATE)
+ return retval;
+ retval = xfs_attr_shortform_remove(args);
+- ASSERT(retval == 0);
++ if (retval)
++ return retval;
++ /*
++ * Since we have removed the old attr, clear ATTR_REPLACE so
++ * that the leaf format add routine won't trip over the attr
++ * not being around.
++ */
++ args->flags &= ~ATTR_REPLACE;
+ }
+
+ if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
+diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
+index a3333004fd2b..8458cc5fbce5 100644
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -113,6 +113,7 @@ struct bpf_insn_aux_data {
+ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
+ };
+ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
++ int sanitize_stack_off; /* stack slot to be cleared */
+ bool seen; /* this insn was processed by the verifier */
+ };
+
+diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
+index e931da8424a4..6728c2ee0205 100644
+--- a/include/linux/ceph/auth.h
++++ b/include/linux/ceph/auth.h
+@@ -64,6 +64,10 @@ struct ceph_auth_client_ops {
+ /* ensure that an existing authorizer is up to date */
+ int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
+ struct ceph_auth_handshake *auth);
++ int (*add_authorizer_challenge)(struct ceph_auth_client *ac,
++ struct ceph_authorizer *a,
++ void *challenge_buf,
++ int challenge_buf_len);
+ int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a);
+ void (*invalidate_authorizer)(struct ceph_auth_client *ac,
+@@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
+ extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+ int peer_type,
+ struct ceph_auth_handshake *a);
++int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
++ struct ceph_authorizer *a,
++ void *challenge_buf,
++ int challenge_buf_len);
+ extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a);
+ extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
+diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
+index 59042d5ac520..70f42eef813b 100644
+--- a/include/linux/ceph/ceph_features.h
++++ b/include/linux/ceph/ceph_features.h
+@@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
+ DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
+ DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
+ DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
+-DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING) // *do not share this bit*
++DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit*
++DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2) // *do not share this bit*
+
+-DEFINE_CEPH_FEATURE(61, 1, RESERVED2) // unused, but slow down!
+ DEFINE_CEPH_FEATURE(62, 1, RESERVED) // do not use; used as a sentinal
+ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
+
+@@ -209,7 +209,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
+ CEPH_FEATURE_SERVER_JEWEL | \
+ CEPH_FEATURE_MON_STATEFUL_SUB | \
+ CEPH_FEATURE_CRUSH_TUNABLES5 | \
+- CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
++ CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \
++ CEPH_FEATURE_CEPHX_V2)
+
+ #define CEPH_FEATURES_REQUIRED_DEFAULT \
+ (CEPH_FEATURE_NOSRCADDR | \
+diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
+index ead9d85f1c11..18fbe910ed55 100644
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -31,6 +31,9 @@ struct ceph_connection_operations {
+ struct ceph_auth_handshake *(*get_authorizer) (
+ struct ceph_connection *con,
+ int *proto, int force_new);
++ int (*add_authorizer_challenge)(struct ceph_connection *con,
++ void *challenge_buf,
++ int challenge_buf_len);
+ int (*verify_authorizer_reply) (struct ceph_connection *con);
+ int (*invalidate_authorizer)(struct ceph_connection *con);
+
+@@ -203,9 +206,8 @@ struct ceph_connection {
+ attempt for this connection, client */
+ u32 peer_global_seq; /* peer's global seq for this connection */
+
++ struct ceph_auth_handshake *auth;
+ int auth_retry; /* true if we need a newer authorizer */
+- void *auth_reply_buf; /* where to put the authorizer reply */
+- int auth_reply_buf_len;
+
+ struct mutex mutex;
+
+diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
+index 73ae2a926548..9e50aede46c8 100644
+--- a/include/linux/ceph/msgr.h
++++ b/include/linux/ceph/msgr.h
+@@ -91,7 +91,7 @@ struct ceph_entity_inst {
+ #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */
+ #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */
+ #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
+-
++#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */
+
+ /*
+ * connection negotiation
+diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
+index 3b7675bcca64..cd0d2270998f 100644
+--- a/include/linux/jump_label.h
++++ b/include/linux/jump_label.h
+@@ -160,6 +160,8 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry,
+ extern int jump_label_text_reserved(void *start, void *end);
+ extern void static_key_slow_inc(struct static_key *key);
+ extern void static_key_slow_dec(struct static_key *key);
++extern void static_key_slow_inc_cpuslocked(struct static_key *key);
++extern void static_key_slow_dec_cpuslocked(struct static_key *key);
+ extern void jump_label_apply_nops(struct module *mod);
+ extern int static_key_count(struct static_key *key);
+ extern void static_key_enable(struct static_key *key);
+@@ -222,6 +224,9 @@ static inline void static_key_slow_dec(struct static_key *key)
+ atomic_dec(&key->enabled);
+ }
+
++#define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key)
++#define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key)
++
+ static inline int jump_label_text_reserved(void *start, void *end)
+ {
+ return 0;
+@@ -416,6 +421,8 @@ extern bool ____wrong_branch_error(void);
+
+ #define static_branch_inc(x) static_key_slow_inc(&(x)->key)
+ #define static_branch_dec(x) static_key_slow_dec(&(x)->key)
++#define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key)
++#define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key)
+
+ /*
+ * Normal usage; boolean enable/disable.
+diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
+index 919b2a0b0307..38342e88b3f3 100644
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -62,8 +62,8 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
+ #define PTRACE_MODE_READ 0x01
+ #define PTRACE_MODE_ATTACH 0x02
+ #define PTRACE_MODE_NOAUDIT 0x04
+-#define PTRACE_MODE_FSCREDS 0x08
+-#define PTRACE_MODE_REALCREDS 0x10
++#define PTRACE_MODE_FSCREDS 0x08
++#define PTRACE_MODE_REALCREDS 0x10
+
+ /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
+ #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index e04919aa8201..866439c361a9 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1405,6 +1405,8 @@ static inline bool is_percpu_thread(void)
+ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
+ #define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
+ #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
++#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
++#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
+
+ #define TASK_PFA_TEST(name, func) \
+ static inline bool task_##func(struct task_struct *p) \
+@@ -1436,6 +1438,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
+ TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+ TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+
++TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
++TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
++TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
++
++TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
++TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
++
+ static inline void
+ current_restore_flags(unsigned long orig_flags, unsigned long flags)
+ {
+diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
+new file mode 100644
+index 000000000000..59d3736c454c
+--- /dev/null
++++ b/include/linux/sched/smt.h
+@@ -0,0 +1,20 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_SCHED_SMT_H
++#define _LINUX_SCHED_SMT_H
++
++#include <linux/static_key.h>
++
++#ifdef CONFIG_SCHED_SMT
++extern struct static_key_false sched_smt_present;
++
++static __always_inline bool sched_smt_active(void)
++{
++ return static_branch_likely(&sched_smt_present);
++}
++#else
++static inline bool sched_smt_active(void) { return false; }
++#endif
++
++void arch_smt_update(void);
++
++#endif
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index f64e88444082..f6250555ce7d 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1288,6 +1288,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
+ }
+ }
+
++static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val)
++{
++ skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL);
++ skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
++}
++
++static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb)
++{
++ return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL;
++}
++
++static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb)
++{
++ return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL);
++}
++
+ /* Release a reference on a zerocopy structure */
+ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
+ {
+@@ -1297,7 +1313,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
+ if (uarg->callback == sock_zerocopy_callback) {
+ uarg->zerocopy = uarg->zerocopy && zerocopy;
+ sock_zerocopy_put(uarg);
+- } else {
++ } else if (!skb_zcopy_is_nouarg(skb)) {
+ uarg->callback(uarg, zerocopy);
+ }
+
+diff --git a/include/net/tls.h b/include/net/tls.h
+index 86ed3dd80fe7..604fd982da19 100644
+--- a/include/net/tls.h
++++ b/include/net/tls.h
+@@ -89,6 +89,8 @@ struct tls_context {
+
+ void *priv_ctx;
+
++ u8 tx_conf:2;
++
+ u16 prepend_size;
+ u16 tag_size;
+ u16 overhead_size;
+@@ -104,7 +106,6 @@ struct tls_context {
+
+ u16 pending_open_record_frags;
+ int (*push_pending_record)(struct sock *sk, int flags);
+- void (*free_resources)(struct sock *sk);
+
+ void (*sk_write_space)(struct sock *sk);
+ void (*sk_proto_close)(struct sock *sk, long timeout);
+@@ -129,6 +130,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+ int tls_sw_sendpage(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags);
+ void tls_sw_close(struct sock *sk, long timeout);
++void tls_sw_free_tx_resources(struct sock *sk);
+
+ void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
+ void tls_icsk_clean_acked(struct sock *sk);
+diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
+index 7115838fbf2a..38ab0e06259a 100644
+--- a/include/uapi/linux/btrfs_tree.h
++++ b/include/uapi/linux/btrfs_tree.h
+@@ -734,6 +734,7 @@ struct btrfs_balance_item {
+ #define BTRFS_FILE_EXTENT_INLINE 0
+ #define BTRFS_FILE_EXTENT_REG 1
+ #define BTRFS_FILE_EXTENT_PREALLOC 2
++#define BTRFS_FILE_EXTENT_TYPES 2
+
+ struct btrfs_file_extent_item {
+ /*
+diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
+index 3027f943f4b3..214102fab940 100644
+--- a/include/uapi/linux/prctl.h
++++ b/include/uapi/linux/prctl.h
+@@ -203,6 +203,7 @@ struct prctl_mm_map {
+ #define PR_SET_SPECULATION_CTRL 53
+ /* Speculation control variants */
+ # define PR_SPEC_STORE_BYPASS 0
++# define PR_SPEC_INDIRECT_BRANCH 1
+ /* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+ # define PR_SPEC_NOT_AFFECTED 0
+ # define PR_SPEC_PRCTL (1UL << 0)
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 013b0cd1958e..f6755fd5bae2 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -717,8 +717,9 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
+ /* check_stack_read/write functions track spill/fill of registers,
+ * stack boundary and alignment are checked in check_mem_access()
+ */
+-static int check_stack_write(struct bpf_verifier_state *state, int off,
+- int size, int value_regno)
++static int check_stack_write(struct bpf_verifier_env *env,
++ struct bpf_verifier_state *state, int off,
++ int size, int value_regno, int insn_idx)
+ {
+ int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
+ /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
+@@ -738,8 +739,32 @@ static int check_stack_write(struct bpf_verifier_state *state, int off,
+ state->spilled_regs[spi] = state->regs[value_regno];
+ state->spilled_regs[spi].live |= REG_LIVE_WRITTEN;
+
+- for (i = 0; i < BPF_REG_SIZE; i++)
++ for (i = 0; i < BPF_REG_SIZE; i++) {
++ if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC &&
++ !env->allow_ptr_leaks) {
++ int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
++ int soff = (-spi - 1) * BPF_REG_SIZE;
++
++ /* detected reuse of integer stack slot with a pointer
++ * which means either llvm is reusing stack slot or
++ * an attacker is trying to exploit CVE-2018-3639
++ * (speculative store bypass)
++ * Have to sanitize that slot with preemptive
++ * store of zero.
++ */
++ if (*poff && *poff != soff) {
++ /* disallow programs where single insn stores
++ * into two different stack slots, since verifier
++ * cannot sanitize them
++ */
++ verbose("insn %d cannot access two stack slots fp%d and fp%d",
++ insn_idx, *poff, soff);
++ return -EINVAL;
++ }
++ *poff = soff;
++ }
+ state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
++ }
+ } else {
+ /* regular write of data into stack */
+ state->spilled_regs[spi] = (struct bpf_reg_state) {};
+@@ -1216,7 +1241,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
+ verbose("attempt to corrupt spilled pointer on stack\n");
+ return -EACCES;
+ }
+- err = check_stack_write(state, off, size, value_regno);
++ err = check_stack_write(env, state, off, size,
++ value_regno, insn_idx);
+ } else {
+ err = check_stack_read(state, off, size, value_regno);
+ }
+@@ -4270,6 +4296,34 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
+ else
+ continue;
+
++ if (type == BPF_WRITE &&
++ env->insn_aux_data[i + delta].sanitize_stack_off) {
++ struct bpf_insn patch[] = {
++ /* Sanitize suspicious stack slot with zero.
++ * There are no memory dependencies for this store,
++ * since it's only using frame pointer and immediate
++ * constant of zero
++ */
++ BPF_ST_MEM(BPF_DW, BPF_REG_FP,
++ env->insn_aux_data[i + delta].sanitize_stack_off,
++ 0),
++ /* the original STX instruction will immediately
++ * overwrite the same stack slot with appropriate value
++ */
++ *insn,
++ };
++
++ cnt = ARRAY_SIZE(patch);
++ new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
++ if (!new_prog)
++ return -ENOMEM;
++
++ delta += cnt - 1;
++ env->prog = new_prog;
++ insn = new_prog->insnsi + i + delta;
++ continue;
++ }
++
+ if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
+ continue;
+
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index f3f389e33343..5c907d96e3dd 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -10,6 +10,7 @@
+ #include <linux/sched/signal.h>
+ #include <linux/sched/hotplug.h>
+ #include <linux/sched/task.h>
++#include <linux/sched/smt.h>
+ #include <linux/unistd.h>
+ #include <linux/cpu.h>
+ #include <linux/oom.h>
+@@ -347,6 +348,12 @@ void cpu_hotplug_enable(void)
+ EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
+ #endif /* CONFIG_HOTPLUG_CPU */
+
++/*
++ * Architectures that need SMT-specific errata handling during SMT hotplug
++ * should override this.
++ */
++void __weak arch_smt_update(void) { }
++
+ #ifdef CONFIG_HOTPLUG_SMT
+ enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
+ EXPORT_SYMBOL_GPL(cpu_smt_control);
+@@ -998,6 +1005,7 @@ out:
+ * concurrent CPU hotplug via cpu_add_remove_lock.
+ */
+ lockup_detector_cleanup();
++ arch_smt_update();
+ return ret;
+ }
+
+@@ -1126,6 +1134,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
+ ret = cpuhp_up_callbacks(cpu, st, target);
+ out:
+ cpus_write_unlock();
++ arch_smt_update();
+ return ret;
+ }
+
+@@ -2071,8 +2080,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
+ */
+ cpuhp_offline_cpu_device(cpu);
+ }
+- if (!ret)
++ if (!ret) {
+ cpu_smt_control = ctrlval;
++ arch_smt_update();
++ }
+ cpu_maps_update_done();
+ return ret;
+ }
+@@ -2083,6 +2094,7 @@ static int cpuhp_smt_enable(void)
+
+ cpu_maps_update_begin();
+ cpu_smt_control = CPU_SMT_ENABLED;
++ arch_smt_update();
+ for_each_present_cpu(cpu) {
+ /* Skip online CPUs and CPUs on offline nodes */
+ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
+diff --git a/kernel/jump_label.c b/kernel/jump_label.c
+index 7c3774ac1d51..70be35a19be2 100644
+--- a/kernel/jump_label.c
++++ b/kernel/jump_label.c
+@@ -79,7 +79,7 @@ int static_key_count(struct static_key *key)
+ }
+ EXPORT_SYMBOL_GPL(static_key_count);
+
+-static void static_key_slow_inc_cpuslocked(struct static_key *key)
++void static_key_slow_inc_cpuslocked(struct static_key *key)
+ {
+ int v, v1;
+
+@@ -180,7 +180,7 @@ void static_key_disable(struct static_key *key)
+ }
+ EXPORT_SYMBOL_GPL(static_key_disable);
+
+-static void static_key_slow_dec_cpuslocked(struct static_key *key,
++static void __static_key_slow_dec_cpuslocked(struct static_key *key,
+ unsigned long rate_limit,
+ struct delayed_work *work)
+ {
+@@ -211,7 +211,7 @@ static void __static_key_slow_dec(struct static_key *key,
+ struct delayed_work *work)
+ {
+ cpus_read_lock();
+- static_key_slow_dec_cpuslocked(key, rate_limit, work);
++ __static_key_slow_dec_cpuslocked(key, rate_limit, work);
+ cpus_read_unlock();
+ }
+
+@@ -229,6 +229,12 @@ void static_key_slow_dec(struct static_key *key)
+ }
+ EXPORT_SYMBOL_GPL(static_key_slow_dec);
+
++void static_key_slow_dec_cpuslocked(struct static_key *key)
++{
++ STATIC_KEY_CHECK_USE();
++ __static_key_slow_dec_cpuslocked(key, 0, NULL);
++}
++
+ void static_key_slow_dec_deferred(struct static_key_deferred *key)
+ {
+ STATIC_KEY_CHECK_USE();
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 3bc664662081..0552ddbb25e2 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -5617,15 +5617,10 @@ int sched_cpu_activate(unsigned int cpu)
+
+ #ifdef CONFIG_SCHED_SMT
+ /*
+- * The sched_smt_present static key needs to be evaluated on every
+- * hotplug event because at boot time SMT might be disabled when
+- * the number of booted CPUs is limited.
+- *
+- * If then later a sibling gets hotplugged, then the key would stay
+- * off and SMT scheduling would never be functional.
++ * When going up, increment the number of cores with SMT present.
+ */
+- if (cpumask_weight(cpu_smt_mask(cpu)) > 1)
+- static_branch_enable_cpuslocked(&sched_smt_present);
++ if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
++ static_branch_inc_cpuslocked(&sched_smt_present);
+ #endif
+ set_cpu_active(cpu, true);
+
+@@ -5669,6 +5664,14 @@ int sched_cpu_deactivate(unsigned int cpu)
+ */
+ synchronize_rcu_mult(call_rcu, call_rcu_sched);
+
++#ifdef CONFIG_SCHED_SMT
++ /*
++ * When going down, decrement the number of cores with SMT present.
++ */
++ if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
++ static_branch_dec_cpuslocked(&sched_smt_present);
++#endif
++
+ if (!sched_smp_initialized)
+ return 0;
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 2d4d79420e36..7240bb4a4090 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -4040,12 +4040,12 @@ static inline bool cfs_bandwidth_used(void)
+
+ void cfs_bandwidth_usage_inc(void)
+ {
+- static_key_slow_inc(&__cfs_bandwidth_used);
++ static_key_slow_inc_cpuslocked(&__cfs_bandwidth_used);
+ }
+
+ void cfs_bandwidth_usage_dec(void)
+ {
+- static_key_slow_dec(&__cfs_bandwidth_used);
++ static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used);
+ }
+ #else /* HAVE_JUMP_LABEL */
+ static bool cfs_bandwidth_used(void)
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 63d999dfec80..b3ba6e5e99f2 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -20,6 +20,7 @@
+ #include <linux/sched/task_stack.h>
+ #include <linux/sched/cputime.h>
+ #include <linux/sched/init.h>
++#include <linux/sched/smt.h>
+
+ #include <linux/u64_stats_sync.h>
+ #include <linux/kernel_stat.h>
+@@ -825,9 +826,6 @@ static inline int cpu_of(struct rq *rq)
+
+
+ #ifdef CONFIG_SCHED_SMT
+-
+-extern struct static_key_false sched_smt_present;
+-
+ extern void __update_idle_core(struct rq *rq);
+
+ static inline void update_idle_core(struct rq *rq)
+diff --git a/lib/test_kmod.c b/lib/test_kmod.c
+index 96c304fd656a..7abb59ce6613 100644
+--- a/lib/test_kmod.c
++++ b/lib/test_kmod.c
+@@ -1221,7 +1221,6 @@ void unregister_test_dev_kmod(struct kmod_test_device *test_dev)
+
+ dev_info(test_dev->dev, "removing interface\n");
+ misc_deregister(&test_dev->misc_dev);
+- kfree(&test_dev->misc_dev.name);
+
+ mutex_unlock(&test_dev->config_mutex);
+ mutex_unlock(&test_dev->trigger_mutex);
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index adacfe66cf3d..930f2aa3bb4d 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2280,7 +2280,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
+ }
+ }
+
+-static void freeze_page(struct page *page)
++static void unmap_page(struct page *page)
+ {
+ enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
+ TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+@@ -2295,7 +2295,7 @@ static void freeze_page(struct page *page)
+ VM_BUG_ON_PAGE(!unmap_success, page);
+ }
+
+-static void unfreeze_page(struct page *page)
++static void remap_page(struct page *page)
+ {
+ int i;
+ if (PageTransHuge(page)) {
+@@ -2312,26 +2312,13 @@ static void __split_huge_page_tail(struct page *head, int tail,
+ struct page *page_tail = head + tail;
+
+ VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
+- VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail);
+
+ /*
+- * tail_page->_refcount is zero and not changing from under us. But
+- * get_page_unless_zero() may be running from under us on the
+- * tail_page. If we used atomic_set() below instead of atomic_inc() or
+- * atomic_add(), we would then run atomic_set() concurrently with
+- * get_page_unless_zero(), and atomic_set() is implemented in C not
+- * using locked ops. spin_unlock on x86 sometime uses locked ops
+- * because of PPro errata 66, 92, so unless somebody can guarantee
+- * atomic_set() here would be safe on all archs (and not only on x86),
+- * it's safer to use atomic_inc()/atomic_add().
++ * Clone page flags before unfreezing refcount.
++ *
++ * After successful get_page_unless_zero() might follow flags change,
++ * for exmaple lock_page() which set PG_waiters.
+ */
+- if (PageAnon(head) && !PageSwapCache(head)) {
+- page_ref_inc(page_tail);
+- } else {
+- /* Additional pin to radix tree */
+- page_ref_add(page_tail, 2);
+- }
+-
+ page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
+ page_tail->flags |= (head->flags &
+ ((1L << PG_referenced) |
+@@ -2344,36 +2331,42 @@ static void __split_huge_page_tail(struct page *head, int tail,
+ (1L << PG_unevictable) |
+ (1L << PG_dirty)));
+
+- /*
+- * After clearing PageTail the gup refcount can be released.
+- * Page flags also must be visible before we make the page non-compound.
+- */
++ /* ->mapping in first tail page is compound_mapcount */
++ VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
++ page_tail);
++ page_tail->mapping = head->mapping;
++ page_tail->index = head->index + tail;
++
++ /* Page flags must be visible before we make the page non-compound. */
+ smp_wmb();
+
++ /*
++ * Clear PageTail before unfreezing page refcount.
++ *
++ * After successful get_page_unless_zero() might follow put_page()
++ * which needs correct compound_head().
++ */
+ clear_compound_head(page_tail);
+
++ /* Finally unfreeze refcount. Additional reference from page cache. */
++ page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) ||
++ PageSwapCache(head)));
++
+ if (page_is_young(head))
+ set_page_young(page_tail);
+ if (page_is_idle(head))
+ set_page_idle(page_tail);
+
+- /* ->mapping in first tail page is compound_mapcount */
+- VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
+- page_tail);
+- page_tail->mapping = head->mapping;
+-
+- page_tail->index = head->index + tail;
+ page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
+ lru_add_page_tail(head, page_tail, lruvec, list);
+ }
+
+ static void __split_huge_page(struct page *page, struct list_head *list,
+- unsigned long flags)
++ pgoff_t end, unsigned long flags)
+ {
+ struct page *head = compound_head(page);
+ struct zone *zone = page_zone(head);
+ struct lruvec *lruvec;
+- pgoff_t end = -1;
+ int i;
+
+ lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
+@@ -2381,9 +2374,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
+ /* complete memcg works before add pages to LRU */
+ mem_cgroup_split_huge_fixup(head);
+
+- if (!PageAnon(page))
+- end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
+-
+ for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
+ __split_huge_page_tail(head, i, lruvec, list);
+ /* Some pages can be beyond i_size: drop them from page cache */
+@@ -2412,7 +2402,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
+
+ spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
+
+- unfreeze_page(head);
++ remap_page(head);
+
+ for (i = 0; i < HPAGE_PMD_NR; i++) {
+ struct page *subpage = head + i;
+@@ -2555,6 +2545,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ int count, mapcount, extra_pins, ret;
+ bool mlocked;
+ unsigned long flags;
++ pgoff_t end;
+
+ VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+@@ -2577,6 +2568,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ ret = -EBUSY;
+ goto out;
+ }
++ end = -1;
+ mapping = NULL;
+ anon_vma_lock_write(anon_vma);
+ } else {
+@@ -2590,10 +2582,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+
+ anon_vma = NULL;
+ i_mmap_lock_read(mapping);
++
++ /*
++ *__split_huge_page() may need to trim off pages beyond EOF:
++ * but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
++ * which cannot be nested inside the page tree lock. So note
++ * end now: i_size itself may be changed at any moment, but
++ * head page lock is good enough to serialize the trimming.
++ */
++ end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+ }
+
+ /*
+- * Racy check if we can split the page, before freeze_page() will
++ * Racy check if we can split the page, before unmap_page() will
+ * split PMDs
+ */
+ if (!can_split_huge_page(head, &extra_pins)) {
+@@ -2602,7 +2603,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ }
+
+ mlocked = PageMlocked(page);
+- freeze_page(head);
++ unmap_page(head);
+ VM_BUG_ON_PAGE(compound_mapcount(head), head);
+
+ /* Make sure the page is not on per-CPU pagevec as it takes pin */
+@@ -2639,7 +2640,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ if (mapping)
+ __dec_node_page_state(page, NR_SHMEM_THPS);
+ spin_unlock(&pgdata->split_queue_lock);
+- __split_huge_page(page, list, flags);
++ __split_huge_page(page, list, end, flags);
+ if (PageSwapCache(head)) {
+ swp_entry_t entry = { .val = page_private(head) };
+
+@@ -2659,7 +2660,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ fail: if (mapping)
+ spin_unlock(&mapping->tree_lock);
+ spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
+- unfreeze_page(head);
++ remap_page(head);
+ ret = -EBUSY;
+ }
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 0a5bb3e8a8a3..d27a73737f1a 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1288,7 +1288,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+ * collapse_shmem - collapse small tmpfs/shmem pages into huge one.
+ *
+ * Basic scheme is simple, details are more complex:
+- * - allocate and freeze a new huge page;
++ * - allocate and lock a new huge page;
+ * - scan over radix tree replacing old pages the new one
+ * + swap in pages if necessary;
+ * + fill in gaps;
+@@ -1296,11 +1296,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+ * - if replacing succeed:
+ * + copy data over;
+ * + free old pages;
+- * + unfreeze huge page;
++ * + unlock huge page;
+ * - if replacing failed;
+ * + put all pages back and unfreeze them;
+ * + restore gaps in the radix-tree;
+- * + free huge page;
++ * + unlock and free huge page;
+ */
+ static void collapse_shmem(struct mm_struct *mm,
+ struct address_space *mapping, pgoff_t start,
+@@ -1333,18 +1333,15 @@ static void collapse_shmem(struct mm_struct *mm,
+ goto out;
+ }
+
++ __SetPageLocked(new_page);
++ __SetPageSwapBacked(new_page);
+ new_page->index = start;
+ new_page->mapping = mapping;
+- __SetPageSwapBacked(new_page);
+- __SetPageLocked(new_page);
+- BUG_ON(!page_ref_freeze(new_page, 1));
+-
+
+ /*
+- * At this point the new_page is 'frozen' (page_count() is zero), locked
+- * and not up-to-date. It's safe to insert it into radix tree, because
+- * nobody would be able to map it or use it in other way until we
+- * unfreeze it.
++ * At this point the new_page is locked and not up-to-date.
++ * It's safe to insert it into the page cache, because nobody would
++ * be able to map it or use it in another way until we unlock it.
+ */
+
+ index = start;
+@@ -1352,19 +1349,29 @@ static void collapse_shmem(struct mm_struct *mm,
+ radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+ int n = min(iter.index, end) - index;
+
++ /*
++ * Stop if extent has been hole-punched, and is now completely
++ * empty (the more obvious i_size_read() check would take an
++ * irq-unsafe seqlock on 32-bit).
++ */
++ if (n >= HPAGE_PMD_NR) {
++ result = SCAN_TRUNCATED;
++ goto tree_locked;
++ }
++
+ /*
+ * Handle holes in the radix tree: charge it from shmem and
+ * insert relevant subpage of new_page into the radix-tree.
+ */
+ if (n && !shmem_charge(mapping->host, n)) {
+ result = SCAN_FAIL;
+- break;
++ goto tree_locked;
+ }
+- nr_none += n;
+ for (; index < min(iter.index, end); index++) {
+ radix_tree_insert(&mapping->page_tree, index,
+ new_page + (index % HPAGE_PMD_NR));
+ }
++ nr_none += n;
+
+ /* We are done. */
+ if (index >= end)
+@@ -1380,12 +1387,12 @@ static void collapse_shmem(struct mm_struct *mm,
+ result = SCAN_FAIL;
+ goto tree_unlocked;
+ }
+- spin_lock_irq(&mapping->tree_lock);
+ } else if (trylock_page(page)) {
+ get_page(page);
++ spin_unlock_irq(&mapping->tree_lock);
+ } else {
+ result = SCAN_PAGE_LOCK;
+- break;
++ goto tree_locked;
+ }
+
+ /*
+@@ -1394,17 +1401,24 @@ static void collapse_shmem(struct mm_struct *mm,
+ */
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE(!PageUptodate(page), page);
+- VM_BUG_ON_PAGE(PageTransCompound(page), page);
++
++ /*
++ * If file was truncated then extended, or hole-punched, before
++ * we locked the first page, then a THP might be there already.
++ */
++ if (PageTransCompound(page)) {
++ result = SCAN_PAGE_COMPOUND;
++ goto out_unlock;
++ }
+
+ if (page_mapping(page) != mapping) {
+ result = SCAN_TRUNCATED;
+ goto out_unlock;
+ }
+- spin_unlock_irq(&mapping->tree_lock);
+
+ if (isolate_lru_page(page)) {
+ result = SCAN_DEL_PAGE_LRU;
+- goto out_isolate_failed;
++ goto out_unlock;
+ }
+
+ if (page_mapped(page))
+@@ -1426,7 +1440,9 @@ static void collapse_shmem(struct mm_struct *mm,
+ */
+ if (!page_ref_freeze(page, 3)) {
+ result = SCAN_PAGE_COUNT;
+- goto out_lru;
++ spin_unlock_irq(&mapping->tree_lock);
++ putback_lru_page(page);
++ goto out_unlock;
+ }
+
+ /*
+@@ -1442,17 +1458,10 @@ static void collapse_shmem(struct mm_struct *mm,
+ slot = radix_tree_iter_resume(slot, &iter);
+ index++;
+ continue;
+-out_lru:
+- spin_unlock_irq(&mapping->tree_lock);
+- putback_lru_page(page);
+-out_isolate_failed:
+- unlock_page(page);
+- put_page(page);
+- goto tree_unlocked;
+ out_unlock:
+ unlock_page(page);
+ put_page(page);
+- break;
++ goto tree_unlocked;
+ }
+
+ /*
+@@ -1460,14 +1469,18 @@ out_unlock:
+ * This code only triggers if there's nothing in radix tree
+ * beyond 'end'.
+ */
+- if (result == SCAN_SUCCEED && index < end) {
++ if (index < end) {
+ int n = end - index;
+
++ /* Stop if extent has been truncated, and is now empty */
++ if (n >= HPAGE_PMD_NR) {
++ result = SCAN_TRUNCATED;
++ goto tree_locked;
++ }
+ if (!shmem_charge(mapping->host, n)) {
+ result = SCAN_FAIL;
+ goto tree_locked;
+ }
+-
+ for (; index < end; index++) {
+ radix_tree_insert(&mapping->page_tree, index,
+ new_page + (index % HPAGE_PMD_NR));
+@@ -1475,57 +1488,62 @@ out_unlock:
+ nr_none += n;
+ }
+
++ __inc_node_page_state(new_page, NR_SHMEM_THPS);
++ if (nr_none) {
++ struct zone *zone = page_zone(new_page);
++
++ __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
++ __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
++ }
++
+ tree_locked:
+ spin_unlock_irq(&mapping->tree_lock);
+ tree_unlocked:
+
+ if (result == SCAN_SUCCEED) {
+- unsigned long flags;
+- struct zone *zone = page_zone(new_page);
+-
+ /*
+ * Replacing old pages with new one has succeed, now we need to
+ * copy the content and free old pages.
+ */
++ index = start;
+ list_for_each_entry_safe(page, tmp, &pagelist, lru) {
++ while (index < page->index) {
++ clear_highpage(new_page + (index % HPAGE_PMD_NR));
++ index++;
++ }
+ copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
+ page);
+ list_del(&page->lru);
+- unlock_page(page);
+- page_ref_unfreeze(page, 1);
+ page->mapping = NULL;
++ page_ref_unfreeze(page, 1);
+ ClearPageActive(page);
+ ClearPageUnevictable(page);
++ unlock_page(page);
+ put_page(page);
++ index++;
+ }
+-
+- local_irq_save(flags);
+- __inc_node_page_state(new_page, NR_SHMEM_THPS);
+- if (nr_none) {
+- __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
+- __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
++ while (index < end) {
++ clear_highpage(new_page + (index % HPAGE_PMD_NR));
++ index++;
+ }
+- local_irq_restore(flags);
+
+- /*
+- * Remove pte page tables, so we can re-faulti
+- * the page as huge.
+- */
+- retract_page_tables(mapping, start);
+-
+- /* Everything is ready, let's unfreeze the new_page */
+- set_page_dirty(new_page);
+ SetPageUptodate(new_page);
+- page_ref_unfreeze(new_page, HPAGE_PMD_NR);
++ page_ref_add(new_page, HPAGE_PMD_NR - 1);
++ set_page_dirty(new_page);
+ mem_cgroup_commit_charge(new_page, memcg, false, true);
+ lru_cache_add_anon(new_page);
+- unlock_page(new_page);
+
++ /*
++ * Remove pte page tables, so we can re-fault the page as huge.
++ */
++ retract_page_tables(mapping, start);
+ *hpage = NULL;
+ } else {
+ /* Something went wrong: rollback changes to the radix-tree */
+- shmem_uncharge(mapping->host, nr_none);
+ spin_lock_irq(&mapping->tree_lock);
++ mapping->nrpages -= nr_none;
++ shmem_uncharge(mapping->host, nr_none);
++
+ radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
+ start) {
+ if (iter.index >= end)
+@@ -1551,19 +1569,18 @@ tree_unlocked:
+ slot, page);
+ slot = radix_tree_iter_resume(slot, &iter);
+ spin_unlock_irq(&mapping->tree_lock);
+- putback_lru_page(page);
+ unlock_page(page);
++ putback_lru_page(page);
+ spin_lock_irq(&mapping->tree_lock);
+ }
+ VM_BUG_ON(nr_none);
+ spin_unlock_irq(&mapping->tree_lock);
+
+- /* Unfreeze new_page, caller would take care about freeing it */
+- page_ref_unfreeze(new_page, 1);
+ mem_cgroup_cancel_charge(new_page, memcg, true);
+- unlock_page(new_page);
+ new_page->mapping = NULL;
+ }
++
++ unlock_page(new_page);
+ out:
+ VM_BUG_ON(!list_empty(&pagelist));
+ /* TODO: tracepoints */
+diff --git a/mm/shmem.c b/mm/shmem.c
+index fa08f56fd5e5..ab7ff0aeae2d 100644
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -296,12 +296,14 @@ bool shmem_charge(struct inode *inode, long pages)
+ if (!shmem_inode_acct_block(inode, pages))
+ return false;
+
++ /* nrpages adjustment first, then shmem_recalc_inode() when balanced */
++ inode->i_mapping->nrpages += pages;
++
+ spin_lock_irqsave(&info->lock, flags);
+ info->alloced += pages;
+ inode->i_blocks += pages * BLOCKS_PER_PAGE;
+ shmem_recalc_inode(inode);
+ spin_unlock_irqrestore(&info->lock, flags);
+- inode->i_mapping->nrpages += pages;
+
+ return true;
+ }
+@@ -311,6 +313,8 @@ void shmem_uncharge(struct inode *inode, long pages)
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ unsigned long flags;
+
++ /* nrpages adjustment done by __delete_from_page_cache() or caller */
++
+ spin_lock_irqsave(&info->lock, flags);
+ info->alloced -= pages;
+ inode->i_blocks -= pages * BLOCKS_PER_PAGE;
+@@ -1528,11 +1532,13 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
+ {
+ struct page *oldpage, *newpage;
+ struct address_space *swap_mapping;
++ swp_entry_t entry;
+ pgoff_t swap_index;
+ int error;
+
+ oldpage = *pagep;
+- swap_index = page_private(oldpage);
++ entry.val = page_private(oldpage);
++ swap_index = swp_offset(entry);
+ swap_mapping = page_mapping(oldpage);
+
+ /*
+@@ -1551,7 +1557,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
+ __SetPageLocked(newpage);
+ __SetPageSwapBacked(newpage);
+ SetPageUptodate(newpage);
+- set_page_private(newpage, swap_index);
++ set_page_private(newpage, entry.val);
+ SetPageSwapCache(newpage);
+
+ /*
+diff --git a/net/ceph/auth.c b/net/ceph/auth.c
+index dbde2b3c3c15..fbeee068ea14 100644
+--- a/net/ceph/auth.c
++++ b/net/ceph/auth.c
+@@ -315,6 +315,22 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+ }
+ EXPORT_SYMBOL(ceph_auth_update_authorizer);
+
++int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
++ struct ceph_authorizer *a,
++ void *challenge_buf,
++ int challenge_buf_len)
++{
++ int ret = 0;
++
++ mutex_lock(&ac->mutex);
++ if (ac->ops && ac->ops->add_authorizer_challenge)
++ ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf,
++ challenge_buf_len);
++ mutex_unlock(&ac->mutex);
++ return ret;
++}
++EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge);
++
+ int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a)
+ {
+diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
+index 2f4a1baf5f52..2bf9d9f7ddf3 100644
+--- a/net/ceph/auth_x.c
++++ b/net/ceph/auth_x.c
+@@ -9,6 +9,7 @@
+
+ #include <linux/ceph/decode.h>
+ #include <linux/ceph/auth.h>
++#include <linux/ceph/ceph_features.h>
+ #include <linux/ceph/libceph.h>
+ #include <linux/ceph/messenger.h>
+
+@@ -70,25 +71,40 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf,
+ return sizeof(u32) + ciphertext_len;
+ }
+
++static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p,
++ int ciphertext_len)
++{
++ struct ceph_x_encrypt_header *hdr = p;
++ int plaintext_len;
++ int ret;
++
++ ret = ceph_crypt(secret, false, p, ciphertext_len, ciphertext_len,
++ &plaintext_len);
++ if (ret)
++ return ret;
++
++ if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) {
++ pr_err("%s bad magic\n", __func__);
++ return -EINVAL;
++ }
++
++ return plaintext_len - sizeof(*hdr);
++}
++
+ static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end)
+ {
+- struct ceph_x_encrypt_header *hdr = *p + sizeof(u32);
+- int ciphertext_len, plaintext_len;
++ int ciphertext_len;
+ int ret;
+
+ ceph_decode_32_safe(p, end, ciphertext_len, e_inval);
+ ceph_decode_need(p, end, ciphertext_len, e_inval);
+
+- ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len,
+- &plaintext_len);
+- if (ret)
++ ret = __ceph_x_decrypt(secret, *p, ciphertext_len);
++ if (ret < 0)
+ return ret;
+
+- if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC)
+- return -EPERM;
+-
+ *p += ciphertext_len;
+- return plaintext_len - sizeof(struct ceph_x_encrypt_header);
++ return ret;
+
+ e_inval:
+ return -EINVAL;
+@@ -275,6 +291,51 @@ bad:
+ return -EINVAL;
+ }
+
++/*
++ * Encode and encrypt the second part (ceph_x_authorize_b) of the
++ * authorizer. The first part (ceph_x_authorize_a) should already be
++ * encoded.
++ */
++static int encrypt_authorizer(struct ceph_x_authorizer *au,
++ u64 *server_challenge)
++{
++ struct ceph_x_authorize_a *msg_a;
++ struct ceph_x_authorize_b *msg_b;
++ void *p, *end;
++ int ret;
++
++ msg_a = au->buf->vec.iov_base;
++ WARN_ON(msg_a->ticket_blob.secret_id != cpu_to_le64(au->secret_id));
++ p = (void *)(msg_a + 1) + le32_to_cpu(msg_a->ticket_blob.blob_len);
++ end = au->buf->vec.iov_base + au->buf->vec.iov_len;
++
++ msg_b = p + ceph_x_encrypt_offset();
++ msg_b->struct_v = 2;
++ msg_b->nonce = cpu_to_le64(au->nonce);
++ if (server_challenge) {
++ msg_b->have_challenge = 1;
++ msg_b->server_challenge_plus_one =
++ cpu_to_le64(*server_challenge + 1);
++ } else {
++ msg_b->have_challenge = 0;
++ msg_b->server_challenge_plus_one = 0;
++ }
++
++ ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
++ if (ret < 0)
++ return ret;
++
++ p += ret;
++ if (server_challenge) {
++ WARN_ON(p != end);
++ } else {
++ WARN_ON(p > end);
++ au->buf->vec.iov_len = p - au->buf->vec.iov_base;
++ }
++
++ return 0;
++}
++
+ static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au)
+ {
+ ceph_crypto_key_destroy(&au->session_key);
+@@ -291,7 +352,6 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
+ int maxlen;
+ struct ceph_x_authorize_a *msg_a;
+ struct ceph_x_authorize_b *msg_b;
+- void *p, *end;
+ int ret;
+ int ticket_blob_len =
+ (th->ticket_blob ? th->ticket_blob->vec.iov_len : 0);
+@@ -335,21 +395,13 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
+ dout(" th %p secret_id %lld %lld\n", th, th->secret_id,
+ le64_to_cpu(msg_a->ticket_blob.secret_id));
+
+- p = msg_a + 1;
+- p += ticket_blob_len;
+- end = au->buf->vec.iov_base + au->buf->vec.iov_len;
+-
+- msg_b = p + ceph_x_encrypt_offset();
+- msg_b->struct_v = 1;
+ get_random_bytes(&au->nonce, sizeof(au->nonce));
+- msg_b->nonce = cpu_to_le64(au->nonce);
+- ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
+- if (ret < 0)
++ ret = encrypt_authorizer(au, NULL);
++ if (ret) {
++ pr_err("failed to encrypt authorizer: %d", ret);
+ goto out_au;
++ }
+
+- p += ret;
+- WARN_ON(p > end);
+- au->buf->vec.iov_len = p - au->buf->vec.iov_base;
+ dout(" built authorizer nonce %llx len %d\n", au->nonce,
+ (int)au->buf->vec.iov_len);
+ return 0;
+@@ -626,6 +678,54 @@ static int ceph_x_update_authorizer(
+ return 0;
+ }
+
++static int decrypt_authorize_challenge(struct ceph_x_authorizer *au,
++ void *challenge_buf,
++ int challenge_buf_len,
++ u64 *server_challenge)
++{
++ struct ceph_x_authorize_challenge *ch =
++ challenge_buf + sizeof(struct ceph_x_encrypt_header);
++ int ret;
++
++ /* no leading len */
++ ret = __ceph_x_decrypt(&au->session_key, challenge_buf,
++ challenge_buf_len);
++ if (ret < 0)
++ return ret;
++ if (ret < sizeof(*ch)) {
++ pr_err("bad size %d for ceph_x_authorize_challenge\n", ret);
++ return -EINVAL;
++ }
++
++ *server_challenge = le64_to_cpu(ch->server_challenge);
++ return 0;
++}
++
++static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
++ struct ceph_authorizer *a,
++ void *challenge_buf,
++ int challenge_buf_len)
++{
++ struct ceph_x_authorizer *au = (void *)a;
++ u64 server_challenge;
++ int ret;
++
++ ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len,
++ &server_challenge);
++ if (ret) {
++ pr_err("failed to decrypt authorize challenge: %d", ret);
++ return ret;
++ }
++
++ ret = encrypt_authorizer(au, &server_challenge);
++ if (ret) {
++ pr_err("failed to encrypt authorizer w/ challenge: %d", ret);
++ return ret;
++ }
++
++ return 0;
++}
++
+ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a)
+ {
+@@ -637,8 +737,10 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
+ ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN);
+ if (ret < 0)
+ return ret;
+- if (ret != sizeof(*reply))
+- return -EPERM;
++ if (ret < sizeof(*reply)) {
++ pr_err("bad size %d for ceph_x_authorize_reply\n", ret);
++ return -EINVAL;
++ }
+
+ if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one))
+ ret = -EPERM;
+@@ -704,26 +806,64 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg,
+ __le64 *psig)
+ {
+ void *enc_buf = au->enc_buf;
+- struct {
+- __le32 len;
+- __le32 header_crc;
+- __le32 front_crc;
+- __le32 middle_crc;
+- __le32 data_crc;
+- } __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
+ int ret;
+
+- sigblock->len = cpu_to_le32(4*sizeof(u32));
+- sigblock->header_crc = msg->hdr.crc;
+- sigblock->front_crc = msg->footer.front_crc;
+- sigblock->middle_crc = msg->footer.middle_crc;
+- sigblock->data_crc = msg->footer.data_crc;
+- ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN,
+- sizeof(*sigblock));
+- if (ret < 0)
+- return ret;
++ if (!CEPH_HAVE_FEATURE(msg->con->peer_features, CEPHX_V2)) {
++ struct {
++ __le32 len;
++ __le32 header_crc;
++ __le32 front_crc;
++ __le32 middle_crc;
++ __le32 data_crc;
++ } __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
++
++ sigblock->len = cpu_to_le32(4*sizeof(u32));
++ sigblock->header_crc = msg->hdr.crc;
++ sigblock->front_crc = msg->footer.front_crc;
++ sigblock->middle_crc = msg->footer.middle_crc;
++ sigblock->data_crc = msg->footer.data_crc;
++
++ ret = ceph_x_encrypt(&au->session_key, enc_buf,
++ CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock));
++ if (ret < 0)
++ return ret;
++
++ *psig = *(__le64 *)(enc_buf + sizeof(u32));
++ } else {
++ struct {
++ __le32 header_crc;
++ __le32 front_crc;
++ __le32 front_len;
++ __le32 middle_crc;
++ __le32 middle_len;
++ __le32 data_crc;
++ __le32 data_len;
++ __le32 seq_lower_word;
++ } __packed *sigblock = enc_buf;
++ struct {
++ __le64 a, b, c, d;
++ } __packed *penc = enc_buf;
++ int ciphertext_len;
++
++ sigblock->header_crc = msg->hdr.crc;
++ sigblock->front_crc = msg->footer.front_crc;
++ sigblock->front_len = msg->hdr.front_len;
++ sigblock->middle_crc = msg->footer.middle_crc;
++ sigblock->middle_len = msg->hdr.middle_len;
++ sigblock->data_crc = msg->footer.data_crc;
++ sigblock->data_len = msg->hdr.data_len;
++ sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq;
++
++ /* no leading len, no ceph_x_encrypt_header */
++ ret = ceph_crypt(&au->session_key, true, enc_buf,
++ CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock),
++ &ciphertext_len);
++ if (ret)
++ return ret;
++
++ *psig = penc->a ^ penc->b ^ penc->c ^ penc->d;
++ }
+
+- *psig = *(__le64 *)(enc_buf + sizeof(u32));
+ return 0;
+ }
+
+@@ -778,6 +918,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
+ .handle_reply = ceph_x_handle_reply,
+ .create_authorizer = ceph_x_create_authorizer,
+ .update_authorizer = ceph_x_update_authorizer,
++ .add_authorizer_challenge = ceph_x_add_authorizer_challenge,
+ .verify_authorizer_reply = ceph_x_verify_authorizer_reply,
+ .invalidate_authorizer = ceph_x_invalidate_authorizer,
+ .reset = ceph_x_reset,
+diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
+index 32c13d763b9a..24b0b74564d0 100644
+--- a/net/ceph/auth_x_protocol.h
++++ b/net/ceph/auth_x_protocol.h
+@@ -70,6 +70,13 @@ struct ceph_x_authorize_a {
+ struct ceph_x_authorize_b {
+ __u8 struct_v;
+ __le64 nonce;
++ __u8 have_challenge;
++ __le64 server_challenge_plus_one;
++} __attribute__ ((packed));
++
++struct ceph_x_authorize_challenge {
++ __u8 struct_v;
++ __le64 server_challenge;
+ } __attribute__ ((packed));
+
+ struct ceph_x_authorize_reply {
+diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
+index 5281da82371a..f864807284d4 100644
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1411,24 +1411,26 @@ static void prepare_write_keepalive(struct ceph_connection *con)
+ * Connection negotiation.
+ */
+
+-static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
+- int *auth_proto)
++static int get_connect_authorizer(struct ceph_connection *con)
+ {
+ struct ceph_auth_handshake *auth;
++ int auth_proto;
+
+ if (!con->ops->get_authorizer) {
++ con->auth = NULL;
+ con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
+ con->out_connect.authorizer_len = 0;
+- return NULL;
++ return 0;
+ }
+
+- auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
++ auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry);
+ if (IS_ERR(auth))
+- return auth;
++ return PTR_ERR(auth);
+
+- con->auth_reply_buf = auth->authorizer_reply_buf;
+- con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
+- return auth;
++ con->auth = auth;
++ con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
++ con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len);
++ return 0;
+ }
+
+ /*
+@@ -1444,12 +1446,22 @@ static void prepare_write_banner(struct ceph_connection *con)
+ con_flag_set(con, CON_FLAG_WRITE_PENDING);
+ }
+
++static void __prepare_write_connect(struct ceph_connection *con)
++{
++ con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect);
++ if (con->auth)
++ con_out_kvec_add(con, con->auth->authorizer_buf_len,
++ con->auth->authorizer_buf);
++
++ con->out_more = 0;
++ con_flag_set(con, CON_FLAG_WRITE_PENDING);
++}
++
+ static int prepare_write_connect(struct ceph_connection *con)
+ {
+ unsigned int global_seq = get_global_seq(con->msgr, 0);
+ int proto;
+- int auth_proto;
+- struct ceph_auth_handshake *auth;
++ int ret;
+
+ switch (con->peer_name.type) {
+ case CEPH_ENTITY_TYPE_MON:
+@@ -1476,24 +1488,11 @@ static int prepare_write_connect(struct ceph_connection *con)
+ con->out_connect.protocol_version = cpu_to_le32(proto);
+ con->out_connect.flags = 0;
+
+- auth_proto = CEPH_AUTH_UNKNOWN;
+- auth = get_connect_authorizer(con, &auth_proto);
+- if (IS_ERR(auth))
+- return PTR_ERR(auth);
+-
+- con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
+- con->out_connect.authorizer_len = auth ?
+- cpu_to_le32(auth->authorizer_buf_len) : 0;
+-
+- con_out_kvec_add(con, sizeof (con->out_connect),
+- &con->out_connect);
+- if (auth && auth->authorizer_buf_len)
+- con_out_kvec_add(con, auth->authorizer_buf_len,
+- auth->authorizer_buf);
+-
+- con->out_more = 0;
+- con_flag_set(con, CON_FLAG_WRITE_PENDING);
++ ret = get_connect_authorizer(con);
++ if (ret)
++ return ret;
+
++ __prepare_write_connect(con);
+ return 0;
+ }
+
+@@ -1753,11 +1752,21 @@ static int read_partial_connect(struct ceph_connection *con)
+ if (ret <= 0)
+ goto out;
+
+- size = le32_to_cpu(con->in_reply.authorizer_len);
+- end += size;
+- ret = read_partial(con, end, size, con->auth_reply_buf);
+- if (ret <= 0)
+- goto out;
++ if (con->auth) {
++ size = le32_to_cpu(con->in_reply.authorizer_len);
++ if (size > con->auth->authorizer_reply_buf_len) {
++ pr_err("authorizer reply too big: %d > %zu\n", size,
++ con->auth->authorizer_reply_buf_len);
++ ret = -EINVAL;
++ goto out;
++ }
++
++ end += size;
++ ret = read_partial(con, end, size,
++ con->auth->authorizer_reply_buf);
++ if (ret <= 0)
++ goto out;
++ }
+
+ dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
+ con, (int)con->in_reply.tag,
+@@ -1765,7 +1774,6 @@ static int read_partial_connect(struct ceph_connection *con)
+ le32_to_cpu(con->in_reply.global_seq));
+ out:
+ return ret;
+-
+ }
+
+ /*
+@@ -2048,12 +2056,27 @@ static int process_connect(struct ceph_connection *con)
+
+ dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
+
+- if (con->auth_reply_buf) {
++ if (con->auth) {
+ /*
+ * Any connection that defines ->get_authorizer()
+- * should also define ->verify_authorizer_reply().
++ * should also define ->add_authorizer_challenge() and
++ * ->verify_authorizer_reply().
++ *
+ * See get_connect_authorizer().
+ */
++ if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
++ ret = con->ops->add_authorizer_challenge(
++ con, con->auth->authorizer_reply_buf,
++ le32_to_cpu(con->in_reply.authorizer_len));
++ if (ret < 0)
++ return ret;
++
++ con_out_kvec_reset(con);
++ __prepare_write_connect(con);
++ prepare_read_connect(con);
++ return 0;
++ }
++
+ ret = con->ops->verify_authorizer_reply(con);
+ if (ret < 0) {
+ con->error_msg = "bad authorize reply";
+diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
+index 2814dba5902d..53ea2d48896c 100644
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -5292,6 +5292,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
+ return auth;
+ }
+
++static int add_authorizer_challenge(struct ceph_connection *con,
++ void *challenge_buf, int challenge_buf_len)
++{
++ struct ceph_osd *o = con->private;
++ struct ceph_osd_client *osdc = o->o_osdc;
++ struct ceph_auth_client *ac = osdc->client->monc.auth;
++
++ return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer,
++ challenge_buf, challenge_buf_len);
++}
+
+ static int verify_authorizer_reply(struct ceph_connection *con)
+ {
+@@ -5341,6 +5351,7 @@ static const struct ceph_connection_operations osd_con_ops = {
+ .put = put_osd_con,
+ .dispatch = dispatch,
+ .get_authorizer = get_authorizer,
++ .add_authorizer_challenge = add_authorizer_challenge,
+ .verify_authorizer_reply = verify_authorizer_reply,
+ .invalidate_authorizer = invalidate_authorizer,
+ .alloc_msg = alloc_msg,
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index c19a118f9f82..4067fa3fcbb2 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4882,6 +4882,10 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
+ nf_reset(skb);
+ nf_reset_trace(skb);
+
++#ifdef CONFIG_NET_SWITCHDEV
++ skb->offload_fwd_mark = 0;
++#endif
++
+ if (!xnet)
+ return;
+
+diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
+index 8d1a7c900393..88d5b2645bb0 100644
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2433,7 +2433,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
+ void *ph;
+ __u32 ts;
+
+- ph = skb_shinfo(skb)->destructor_arg;
++ ph = skb_zcopy_get_nouarg(skb);
+ packet_dec_pending(&po->tx_ring);
+
+ ts = __packet_set_timestamp(po, ph, skb);
+@@ -2499,7 +2499,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
+ skb->priority = po->sk.sk_priority;
+ skb->mark = po->sk.sk_mark;
+ sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
+- skb_shinfo(skb)->destructor_arg = ph.raw;
++ skb_zcopy_set_nouarg(skb, ph.raw);
+
+ skb_reserve(skb, hlen);
+ skb_reset_network_header(skb);
+diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
+index 4f2971f528db..e903bdd39b9f 100644
+--- a/net/tls/tls_main.c
++++ b/net/tls/tls_main.c
+@@ -46,8 +46,28 @@ MODULE_DESCRIPTION("Transport Layer Security Support");
+ MODULE_LICENSE("Dual BSD/GPL");
+ MODULE_ALIAS_TCP_ULP("tls");
+
+-static struct proto tls_base_prot;
+-static struct proto tls_sw_prot;
++enum {
++ TLSV4,
++ TLSV6,
++ TLS_NUM_PROTS,
++};
++
++enum {
++ TLS_BASE_TX,
++ TLS_SW_TX,
++ TLS_NUM_CONFIG,
++};
++
++static struct proto *saved_tcpv6_prot;
++static DEFINE_MUTEX(tcpv6_prot_mutex);
++static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG];
++
++static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
++{
++ int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
++
++ sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf];
++}
+
+ int wait_on_pending_writer(struct sock *sk, long *timeo)
+ {
+@@ -239,6 +259,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
+ void (*sk_proto_close)(struct sock *sk, long timeout);
+
+ lock_sock(sk);
++ sk_proto_close = ctx->sk_proto_close;
++
++ if (ctx->tx_conf == TLS_BASE_TX) {
++ tls_ctx_free(ctx);
++ goto skip_tx_cleanup;
++ }
+
+ if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
+ tls_handle_open_record(sk, 0);
+@@ -255,13 +281,16 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
+ sg++;
+ }
+ }
+- ctx->free_resources(sk);
++
+ kfree(ctx->rec_seq);
+ kfree(ctx->iv);
+
+- sk_proto_close = ctx->sk_proto_close;
+- tls_ctx_free(ctx);
++ if (ctx->tx_conf == TLS_SW_TX) {
++ tls_sw_free_tx_resources(sk);
++ tls_ctx_free(ctx);
++ }
+
++skip_tx_cleanup:
+ release_sock(sk);
+ sk_proto_close(sk, timeout);
+ }
+@@ -362,48 +391,43 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
+ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
+ unsigned int optlen)
+ {
+- struct tls_crypto_info *crypto_info, tmp_crypto_info;
++ struct tls_crypto_info *crypto_info;
+ struct tls_context *ctx = tls_get_ctx(sk);
+- struct proto *prot = NULL;
+ int rc = 0;
++ int tx_conf;
+
+ if (!optval || (optlen < sizeof(*crypto_info))) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+- rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info));
++ crypto_info = &ctx->crypto_send.info;
++ /* Currently we don't support set crypto info more than one time */
++ if (TLS_CRYPTO_INFO_READY(crypto_info)) {
++ rc = -EBUSY;
++ goto out;
++ }
++
++ rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
+ if (rc) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ /* check version */
+- if (tmp_crypto_info.version != TLS_1_2_VERSION) {
++ if (crypto_info->version != TLS_1_2_VERSION) {
+ rc = -ENOTSUPP;
+- goto out;
+- }
+-
+- /* get user crypto info */
+- crypto_info = &ctx->crypto_send.info;
+-
+- /* Currently we don't support set crypto info more than one time */
+- if (TLS_CRYPTO_INFO_READY(crypto_info)) {
+- rc = -EBUSY;
+- goto out;
++ goto err_crypto_info;
+ }
+
+- switch (tmp_crypto_info.cipher_type) {
++ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
+ rc = -EINVAL;
+ goto err_crypto_info;
+ }
+- rc = copy_from_user(
+- crypto_info,
+- optval,
+- sizeof(struct tls12_crypto_info_aes_gcm_128));
+-
++ rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
++ optlen - sizeof(*crypto_info));
+ if (rc) {
+ rc = -EFAULT;
+ goto err_crypto_info;
+@@ -415,18 +439,16 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
+ goto err_crypto_info;
+ }
+
+- ctx->sk_write_space = sk->sk_write_space;
+- sk->sk_write_space = tls_write_space;
+-
+- ctx->sk_proto_close = sk->sk_prot->close;
+-
+ /* currently SW is default, we will have ethtool in future */
+ rc = tls_set_sw_offload(sk, ctx);
+- prot = &tls_sw_prot;
++ tx_conf = TLS_SW_TX;
+ if (rc)
+ goto err_crypto_info;
+
+- sk->sk_prot = prot;
++ ctx->tx_conf = tx_conf;
++ update_sk_prot(sk, ctx);
++ ctx->sk_write_space = sk->sk_write_space;
++ sk->sk_write_space = tls_write_space;
+ goto out;
+
+ err_crypto_info:
+@@ -464,8 +486,21 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
+ return do_tls_setsockopt(sk, optname, optval, optlen);
+ }
+
++static void build_protos(struct proto *prot, struct proto *base)
++{
++ prot[TLS_BASE_TX] = *base;
++ prot[TLS_BASE_TX].setsockopt = tls_setsockopt;
++ prot[TLS_BASE_TX].getsockopt = tls_getsockopt;
++ prot[TLS_BASE_TX].close = tls_sk_proto_close;
++
++ prot[TLS_SW_TX] = prot[TLS_BASE_TX];
++ prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg;
++ prot[TLS_SW_TX].sendpage = tls_sw_sendpage;
++}
++
+ static int tls_init(struct sock *sk)
+ {
++ int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tls_context *ctx;
+ int rc = 0;
+@@ -488,7 +523,21 @@ static int tls_init(struct sock *sk)
+ icsk->icsk_ulp_data = ctx;
+ ctx->setsockopt = sk->sk_prot->setsockopt;
+ ctx->getsockopt = sk->sk_prot->getsockopt;
+- sk->sk_prot = &tls_base_prot;
++ ctx->sk_proto_close = sk->sk_prot->close;
++
++ /* Build IPv6 TLS whenever the address of tcpv6_prot changes */
++ if (ip_ver == TLSV6 &&
++ unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
++ mutex_lock(&tcpv6_prot_mutex);
++ if (likely(sk->sk_prot != saved_tcpv6_prot)) {
++ build_protos(tls_prots[TLSV6], sk->sk_prot);
++ smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
++ }
++ mutex_unlock(&tcpv6_prot_mutex);
++ }
++
++ ctx->tx_conf = TLS_BASE_TX;
++ update_sk_prot(sk, ctx);
+ out:
+ return rc;
+ }
+@@ -501,14 +550,7 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
+
+ static int __init tls_register(void)
+ {
+- tls_base_prot = tcp_prot;
+- tls_base_prot.setsockopt = tls_setsockopt;
+- tls_base_prot.getsockopt = tls_getsockopt;
+-
+- tls_sw_prot = tls_base_prot;
+- tls_sw_prot.sendmsg = tls_sw_sendmsg;
+- tls_sw_prot.sendpage = tls_sw_sendpage;
+- tls_sw_prot.close = tls_sk_proto_close;
++ build_protos(tls_prots[TLSV4], &tcp_prot);
+
+ tcp_register_ulp(&tcp_tls_ulp_ops);
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index 6ae9ca567d6c..d18d4a478e4f 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -388,7 +388,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+ {
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+- int ret = 0;
++ int ret;
+ int required_size;
+ long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+ bool eor = !(msg->msg_flags & MSG_MORE);
+@@ -403,7 +403,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+
+ lock_sock(sk);
+
+- if (tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo))
++ ret = tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo);
++ if (ret)
+ goto send_end;
+
+ if (unlikely(msg->msg_controllen)) {
+@@ -539,7 +540,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
+ {
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+- int ret = 0;
++ int ret;
+ long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+ bool eor;
+ size_t orig_size = size;
+@@ -559,7 +560,8 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
+
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+- if (tls_complete_pending_work(sk, tls_ctx, flags, &timeo))
++ ret = tls_complete_pending_work(sk, tls_ctx, flags, &timeo);
++ if (ret)
+ goto sendpage_end;
+
+ /* Call the sk_stream functions to manage the sndbuf mem. */
+@@ -646,7 +648,7 @@ sendpage_end:
+ return ret;
+ }
+
+-static void tls_sw_free_resources(struct sock *sk)
++void tls_sw_free_tx_resources(struct sock *sk)
+ {
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+@@ -685,7 +687,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
+ }
+
+ ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
+- ctx->free_resources = tls_sw_free_resources;
+
+ crypto_info = &ctx->crypto_send.info;
+ switch (crypto_info->cipher_type) {
+diff --git a/scripts/Makefile.build b/scripts/Makefile.build
+index 7143da06d702..be9e5deb58ba 100644
+--- a/scripts/Makefile.build
++++ b/scripts/Makefile.build
+@@ -272,10 +272,8 @@ else
+ objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
+ endif
+ ifdef CONFIG_RETPOLINE
+-ifneq ($(RETPOLINE_CFLAGS),)
+ objtool_args += --retpoline
+ endif
+-endif
+
+
+ ifdef CONFIG_MODVERSIONS
+diff --git a/sound/core/control.c b/sound/core/control.c
+index af7e6165e21e..36571cd49be3 100644
+--- a/sound/core/control.c
++++ b/sound/core/control.c
+@@ -347,6 +347,40 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count)
+ return 0;
+ }
+
++/* add a new kcontrol object; call with card->controls_rwsem locked */
++static int __snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol)
++{
++ struct snd_ctl_elem_id id;
++ unsigned int idx;
++ unsigned int count;
++
++ id = kcontrol->id;
++ if (id.index > UINT_MAX - kcontrol->count)
++ return -EINVAL;
++
++ if (snd_ctl_find_id(card, &id)) {
++ dev_err(card->dev,
++ "control %i:%i:%i:%s:%i is already present\n",
++ id.iface, id.device, id.subdevice, id.name, id.index);
++ return -EBUSY;
++ }
++
++ if (snd_ctl_find_hole(card, kcontrol->count) < 0)
++ return -ENOMEM;
++
++ list_add_tail(&kcontrol->list, &card->controls);
++ card->controls_count += kcontrol->count;
++ kcontrol->id.numid = card->last_numid + 1;
++ card->last_numid += kcontrol->count;
++
++ id = kcontrol->id;
++ count = kcontrol->count;
++ for (idx = 0; idx < count; idx++, id.index++, id.numid++)
++ snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id);
++
++ return 0;
++}
++
+ /**
+ * snd_ctl_add - add the control instance to the card
+ * @card: the card instance
+@@ -363,45 +397,18 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count)
+ */
+ int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol)
+ {
+- struct snd_ctl_elem_id id;
+- unsigned int idx;
+- unsigned int count;
+ int err = -EINVAL;
+
+ if (! kcontrol)
+ return err;
+ if (snd_BUG_ON(!card || !kcontrol->info))
+ goto error;
+- id = kcontrol->id;
+- if (id.index > UINT_MAX - kcontrol->count)
+- goto error;
+
+ down_write(&card->controls_rwsem);
+- if (snd_ctl_find_id(card, &id)) {
+- up_write(&card->controls_rwsem);
+- dev_err(card->dev, "control %i:%i:%i:%s:%i is already present\n",
+- id.iface,
+- id.device,
+- id.subdevice,
+- id.name,
+- id.index);
+- err = -EBUSY;
+- goto error;
+- }
+- if (snd_ctl_find_hole(card, kcontrol->count) < 0) {
+- up_write(&card->controls_rwsem);
+- err = -ENOMEM;
+- goto error;
+- }
+- list_add_tail(&kcontrol->list, &card->controls);
+- card->controls_count += kcontrol->count;
+- kcontrol->id.numid = card->last_numid + 1;
+- card->last_numid += kcontrol->count;
+- id = kcontrol->id;
+- count = kcontrol->count;
++ err = __snd_ctl_add(card, kcontrol);
+ up_write(&card->controls_rwsem);
+- for (idx = 0; idx < count; idx++, id.index++, id.numid++)
+- snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id);
++ if (err < 0)
++ goto error;
+ return 0;
+
+ error:
+@@ -1360,9 +1367,12 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
+ kctl->tlv.c = snd_ctl_elem_user_tlv;
+
+ /* This function manage to free the instance on failure. */
+- err = snd_ctl_add(card, kctl);
+- if (err < 0)
+- return err;
++ down_write(&card->controls_rwsem);
++ err = __snd_ctl_add(card, kctl);
++ if (err < 0) {
++ snd_ctl_free_one(kctl);
++ goto unlock;
++ }
+ offset = snd_ctl_get_ioff(kctl, &info->id);
+ snd_ctl_build_ioff(&info->id, kctl, offset);
+ /*
+@@ -1373,10 +1383,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
+ * which locks the element.
+ */
+
+- down_write(&card->controls_rwsem);
+ card->user_ctl_count++;
+- up_write(&card->controls_rwsem);
+
++ unlock:
++ up_write(&card->controls_rwsem);
+ return 0;
+ }
+
+diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c
+index 8a852042a066..91cd305cabd7 100644
+--- a/sound/isa/wss/wss_lib.c
++++ b/sound/isa/wss/wss_lib.c
+@@ -1531,7 +1531,6 @@ static int snd_wss_playback_open(struct snd_pcm_substream *substream)
+ if (err < 0) {
+ if (chip->release_dma)
+ chip->release_dma(chip, chip->dma_private_data, chip->dma1);
+- snd_free_pages(runtime->dma_area, runtime->dma_bytes);
+ return err;
+ }
+ chip->playback_substream = substream;
+@@ -1572,7 +1571,6 @@ static int snd_wss_capture_open(struct snd_pcm_substream *substream)
+ if (err < 0) {
+ if (chip->release_dma)
+ chip->release_dma(chip, chip->dma_private_data, chip->dma2);
+- snd_free_pages(runtime->dma_area, runtime->dma_bytes);
+ return err;
+ }
+ chip->capture_substream = substream;
+diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
+index 1ef7cdf1d3e8..38f355ae1863 100644
+--- a/sound/pci/ac97/ac97_codec.c
++++ b/sound/pci/ac97/ac97_codec.c
+@@ -824,7 +824,7 @@ static int snd_ac97_put_spsa(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_
+ {
+ struct snd_ac97 *ac97 = snd_kcontrol_chip(kcontrol);
+ int reg = kcontrol->private_value & 0xff;
+- int shift = (kcontrol->private_value >> 8) & 0xff;
++ int shift = (kcontrol->private_value >> 8) & 0x0f;
+ int mask = (kcontrol->private_value >> 16) & 0xff;
+ // int invert = (kcontrol->private_value >> 24) & 0xff;
+ unsigned short value, old, new;
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index eb8807de3ebc..66b0a124beae 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -343,6 +343,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
+ case 0x10ec0285:
+ case 0x10ec0298:
+ case 0x10ec0289:
++ case 0x10ec0300:
+ alc_update_coef_idx(codec, 0x10, 1<<9, 0);
+ break;
+ case 0x10ec0275:
+@@ -2758,6 +2759,7 @@ enum {
+ ALC269_TYPE_ALC215,
+ ALC269_TYPE_ALC225,
+ ALC269_TYPE_ALC294,
++ ALC269_TYPE_ALC300,
+ ALC269_TYPE_ALC700,
+ };
+
+@@ -2792,6 +2794,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
+ case ALC269_TYPE_ALC215:
+ case ALC269_TYPE_ALC225:
+ case ALC269_TYPE_ALC294:
++ case ALC269_TYPE_ALC300:
+ case ALC269_TYPE_ALC700:
+ ssids = alc269_ssids;
+ break;
+@@ -6408,6 +6411,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8),
+ SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
++ SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS),
+ SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
+ SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
+@@ -7089,6 +7093,10 @@ static int patch_alc269(struct hda_codec *codec)
+ spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */
+ alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */
+ break;
++ case 0x10ec0300:
++ spec->codec_variant = ALC269_TYPE_ALC300;
++ spec->gen.mixer_nid = 0; /* no loopback on ALC300 */
++ break;
+ case 0x10ec0700:
+ case 0x10ec0701:
+ case 0x10ec0703:
+@@ -8160,6 +8168,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
+ HDA_CODEC_ENTRY(0x10ec0295, "ALC295", patch_alc269),
+ HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269),
+ HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269),
++ HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269),
+ HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861),
+ HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd),
+ HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861),
+diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c
+index e73c962590eb..079063d8038d 100644
+--- a/sound/sparc/cs4231.c
++++ b/sound/sparc/cs4231.c
+@@ -1146,10 +1146,8 @@ static int snd_cs4231_playback_open(struct snd_pcm_substream *substream)
+ runtime->hw = snd_cs4231_playback;
+
+ err = snd_cs4231_open(chip, CS4231_MODE_PLAY);
+- if (err < 0) {
+- snd_free_pages(runtime->dma_area, runtime->dma_bytes);
++ if (err < 0)
+ return err;
+- }
+ chip->playback_substream = substream;
+ chip->p_periods_sent = 0;
+ snd_pcm_set_sync(substream);
+@@ -1167,10 +1165,8 @@ static int snd_cs4231_capture_open(struct snd_pcm_substream *substream)
+ runtime->hw = snd_cs4231_capture;
+
+ err = snd_cs4231_open(chip, CS4231_MODE_RECORD);
+- if (err < 0) {
+- snd_free_pages(runtime->dma_area, runtime->dma_bytes);
++ if (err < 0)
+ return err;
+- }
+ chip->capture_substream = substream;
+ chip->c_periods_sent = 0;
+ snd_pcm_set_sync(substream);