diff options
author | Mike Pagano <mpagano@gentoo.org> | 2020-02-11 10:35:16 -0500 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2020-02-11 10:35:16 -0500 |
commit | 017dcca4be57c6514a165e84650e101f9273aff9 (patch) | |
tree | 5f1b7ce67f3b93c26276ec6505154c18cf3e73e4 | |
parent | Linux patch 5.4.18 (diff) | |
download | linux-patches-017dcca4be57c6514a165e84650e101f9273aff9.tar.gz linux-patches-017dcca4be57c6514a165e84650e101f9273aff9.tar.bz2 linux-patches-017dcca4be57c6514a165e84650e101f9273aff9.zip |
Linux patch 5.4.195.4-19
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1018_linux-5.4.19.patch | 15537 |
2 files changed, 15541 insertions, 0 deletions
diff --git a/0000_README b/0000_README index a0fadf60..b15a5b37 100644 --- a/0000_README +++ b/0000_README @@ -115,6 +115,10 @@ Patch: 1017_linux-5.4.18.patch From: http://www.kernel.org Desc: Linux 5.4.18 +Patch: 1018_linux-5.4.19.patch +From: http://www.kernel.org +Desc: Linux 5.4.19 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1018_linux-5.4.19.patch b/1018_linux-5.4.19.patch new file mode 100644 index 00000000..a769bba6 --- /dev/null +++ b/1018_linux-5.4.19.patch @@ -0,0 +1,15537 @@ +diff --git a/MAINTAINERS b/MAINTAINERS +index 4f7ac27d8651..d1aeebb59e6a 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -8704,8 +8704,10 @@ L: isdn4linux@listserv.isdn4linux.de (subscribers-only) + L: netdev@vger.kernel.org + W: http://www.isdn4linux.de + S: Maintained +-F: drivers/isdn/mISDN +-F: drivers/isdn/hardware ++F: drivers/isdn/mISDN/ ++F: drivers/isdn/hardware/ ++F: drivers/isdn/Kconfig ++F: drivers/isdn/Makefile + + ISDN/CAPI SUBSYSTEM + M: Karsten Keil <isdn@linux-pingi.de> +diff --git a/Makefile b/Makefile +index b6c151fd5227..2f55d377f0db 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 5 + PATCHLEVEL = 4 +-SUBLEVEL = 18 ++SUBLEVEL = 19 + EXTRAVERSION = + NAME = Kleptomaniac Octopus + +diff --git a/arch/Kconfig b/arch/Kconfig +index 5f8a5d84dbbe..43102756304c 100644 +--- a/arch/Kconfig ++++ b/arch/Kconfig +@@ -396,9 +396,6 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE + config HAVE_RCU_TABLE_FREE + bool + +-config HAVE_RCU_TABLE_NO_INVALIDATE +- bool +- + config HAVE_MMU_GATHER_PAGE_SIZE + bool + +diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h +index 40002416efec..8e995ec796c8 100644 +--- a/arch/arm/include/asm/kvm_emulate.h ++++ b/arch/arm/include/asm/kvm_emulate.h +@@ -14,13 +14,25 @@ + #include <asm/cputype.h> + + /* arm64 compatibility macros */ ++#define PSR_AA32_MODE_FIQ FIQ_MODE ++#define PSR_AA32_MODE_SVC SVC_MODE + #define PSR_AA32_MODE_ABT ABT_MODE + #define PSR_AA32_MODE_UND UND_MODE + #define PSR_AA32_T_BIT PSR_T_BIT ++#define PSR_AA32_F_BIT PSR_F_BIT + #define PSR_AA32_I_BIT PSR_I_BIT + #define PSR_AA32_A_BIT PSR_A_BIT + #define PSR_AA32_E_BIT PSR_E_BIT + #define PSR_AA32_IT_MASK PSR_IT_MASK ++#define PSR_AA32_GE_MASK 0x000f0000 ++#define PSR_AA32_DIT_BIT 0x00200000 ++#define PSR_AA32_PAN_BIT 0x00400000 ++#define PSR_AA32_SSBS_BIT 0x00800000 ++#define PSR_AA32_Q_BIT PSR_Q_BIT ++#define PSR_AA32_V_BIT PSR_V_BIT ++#define PSR_AA32_C_BIT PSR_C_BIT ++#define PSR_AA32_Z_BIT PSR_Z_BIT ++#define PSR_AA32_N_BIT PSR_N_BIT + + unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); + +@@ -41,6 +53,11 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) + *__vcpu_spsr(vcpu) = v; + } + ++static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) ++{ ++ return spsr; ++} ++ + static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, + u8 reg_num) + { +@@ -177,6 +194,11 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) + return kvm_vcpu_get_hsr(vcpu) & HSR_SSE; + } + ++static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) ++{ ++ return false; ++} ++ + static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) + { + return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; +diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h +index 7c0eddb0adb2..32fbf82e3ebc 100644 +--- a/arch/arm/include/asm/kvm_mmio.h ++++ b/arch/arm/include/asm/kvm_mmio.h +@@ -14,6 +14,8 @@ + struct kvm_decode { + unsigned long rt; + bool sign_extend; ++ /* Not used on 32-bit arm */ ++ bool sixty_four; + }; + + void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); +diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S +index b408fa56eb89..6922dd8d3e2d 100644 +--- a/arch/arm/mach-tegra/sleep-tegra30.S ++++ b/arch/arm/mach-tegra/sleep-tegra30.S +@@ -370,6 +370,14 @@ _pll_m_c_x_done: + pll_locked r1, r0, CLK_RESET_PLLC_BASE + pll_locked r1, r0, CLK_RESET_PLLX_BASE + ++ tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 ++ cmp r1, #TEGRA30 ++ beq 1f ++ ldr r1, [r0, #CLK_RESET_PLLP_BASE] ++ bic r1, r1, #(1<<31) @ disable PllP bypass ++ str r1, [r0, #CLK_RESET_PLLP_BASE] ++1: ++ + mov32 r7, TEGRA_TMRUS_BASE + ldr r1, [r7] + add r1, r1, #LOCK_DELAY +@@ -630,7 +638,10 @@ tegra30_switch_cpu_to_clk32k: + str r0, [r4, #PMC_PLLP_WB0_OVERRIDE] + + /* disable PLLP, PLLA, PLLC and PLLX */ ++ tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 ++ cmp r1, #TEGRA30 + ldr r0, [r5, #CLK_RESET_PLLP_BASE] ++ orrne r0, r0, #(1 << 31) @ enable PllP bypass on fast cluster + bic r0, r0, #(1 << 30) + str r0, [r5, #CLK_RESET_PLLP_BASE] + ldr r0, [r5, #CLK_RESET_PLLA_BASE] +diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c +index 7d042d5c43e3..27576c7b836e 100644 +--- a/arch/arm/mm/dma-mapping.c ++++ b/arch/arm/mm/dma-mapping.c +@@ -221,7 +221,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops); + + static int __dma_supported(struct device *dev, u64 mask, bool warn) + { +- unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); ++ unsigned long max_dma_pfn = min(max_pfn - 1, arm_dma_pfn_limit); + + /* + * Translate the device's DMA mask to a PFN limit. This +diff --git a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi +index 501a7330dbc8..522d3ef72df5 100644 +--- a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi ++++ b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi +@@ -73,6 +73,7 @@ + regulator-always-on; + regulator-boot-on; + regulator-name = "vdd_apc"; ++ regulator-initial-mode = <1>; + regulator-min-microvolt = <1048000>; + regulator-max-microvolt = <1384000>; + }; +diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c +index 70b1469783f9..24bc0a3f26e2 100644 +--- a/arch/arm64/crypto/ghash-ce-glue.c ++++ b/arch/arm64/crypto/ghash-ce-glue.c +@@ -261,7 +261,7 @@ static int ghash_setkey(struct crypto_shash *tfm, + static struct shash_alg ghash_alg[] = {{ + .base.cra_name = "ghash", + .base.cra_driver_name = "ghash-neon", +- .base.cra_priority = 100, ++ .base.cra_priority = 150, + .base.cra_blocksize = GHASH_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ghash_key), + .base.cra_module = THIS_MODULE, +diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h +index 063c964af705..48bfbf70dbb0 100644 +--- a/arch/arm64/include/asm/daifflags.h ++++ b/arch/arm64/include/asm/daifflags.h +@@ -36,7 +36,7 @@ static inline void local_daif_mask(void) + trace_hardirqs_off(); + } + +-static inline unsigned long local_daif_save(void) ++static inline unsigned long local_daif_save_flags(void) + { + unsigned long flags; + +@@ -48,6 +48,15 @@ static inline unsigned long local_daif_save(void) + flags |= PSR_I_BIT; + } + ++ return flags; ++} ++ ++static inline unsigned long local_daif_save(void) ++{ ++ unsigned long flags; ++ ++ flags = local_daif_save_flags(); ++ + local_daif_mask(); + + return flags; +diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h +index d69c1efc63e7..6ff84f1f3b4c 100644 +--- a/arch/arm64/include/asm/kvm_emulate.h ++++ b/arch/arm64/include/asm/kvm_emulate.h +@@ -204,6 +204,38 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) + vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; + } + ++/* ++ * The layout of SPSR for an AArch32 state is different when observed from an ++ * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 ++ * view given an AArch64 view. ++ * ++ * In ARM DDI 0487E.a see: ++ * ++ * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426 ++ * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256 ++ * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280 ++ * ++ * Which show the following differences: ++ * ++ * | Bit | AA64 | AA32 | Notes | ++ * +-----+------+------+-----------------------------| ++ * | 24 | DIT | J | J is RES0 in ARMv8 | ++ * | 21 | SS | DIT | SS doesn't exist in AArch32 | ++ * ++ * ... and all other bits are (currently) common. ++ */ ++static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) ++{ ++ const unsigned long overlap = BIT(24) | BIT(21); ++ unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT); ++ ++ spsr &= ~overlap; ++ ++ spsr |= dit << 21; ++ ++ return spsr; ++} ++ + static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) + { + u32 mode; +@@ -263,6 +295,11 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); + } + ++static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) ++{ ++ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); ++} ++ + static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) + { + return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; +diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h +index 02b5c48fd467..b204501a0c39 100644 +--- a/arch/arm64/include/asm/kvm_mmio.h ++++ b/arch/arm64/include/asm/kvm_mmio.h +@@ -10,13 +10,11 @@ + #include <linux/kvm_host.h> + #include <asm/kvm_arm.h> + +-/* +- * This is annoying. The mmio code requires this, even if we don't +- * need any decoding. To be fixed. +- */ + struct kvm_decode { + unsigned long rt; + bool sign_extend; ++ /* Witdth of the register accessed by the faulting instruction is 64-bits */ ++ bool sixty_four; + }; + + void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); +diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h +index fbebb411ae20..bf57308fcd63 100644 +--- a/arch/arm64/include/asm/ptrace.h ++++ b/arch/arm64/include/asm/ptrace.h +@@ -62,6 +62,7 @@ + #define PSR_AA32_I_BIT 0x00000080 + #define PSR_AA32_A_BIT 0x00000100 + #define PSR_AA32_E_BIT 0x00000200 ++#define PSR_AA32_PAN_BIT 0x00400000 + #define PSR_AA32_SSBS_BIT 0x00800000 + #define PSR_AA32_DIT_BIT 0x01000000 + #define PSR_AA32_Q_BIT 0x08000000 +diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h +index 7ed9294e2004..d1bb5b69f1ce 100644 +--- a/arch/arm64/include/uapi/asm/ptrace.h ++++ b/arch/arm64/include/uapi/asm/ptrace.h +@@ -49,6 +49,7 @@ + #define PSR_SSBS_BIT 0x00001000 + #define PSR_PAN_BIT 0x00400000 + #define PSR_UAO_BIT 0x00800000 ++#define PSR_DIT_BIT 0x01000000 + #define PSR_V_BIT 0x10000000 + #define PSR_C_BIT 0x20000000 + #define PSR_Z_BIT 0x40000000 +diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c +index 3a58e9db5cfe..a100483b47c4 100644 +--- a/arch/arm64/kernel/acpi.c ++++ b/arch/arm64/kernel/acpi.c +@@ -274,7 +274,7 @@ int apei_claim_sea(struct pt_regs *regs) + if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) + return err; + +- current_flags = arch_local_save_flags(); ++ current_flags = local_daif_save_flags(); + + /* + * SEA can interrupt SError, mask it and describe this as an NMI so +diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c +index a9d25a305af5..a364a4ad5479 100644 +--- a/arch/arm64/kvm/inject_fault.c ++++ b/arch/arm64/kvm/inject_fault.c +@@ -14,9 +14,6 @@ + #include <asm/kvm_emulate.h> + #include <asm/esr.h> + +-#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ +- PSR_I_BIT | PSR_D_BIT) +- + #define CURRENT_EL_SP_EL0_VECTOR 0x0 + #define CURRENT_EL_SP_ELx_VECTOR 0x200 + #define LOWER_EL_AArch64_VECTOR 0x400 +@@ -50,6 +47,69 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) + return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; + } + ++/* ++ * When an exception is taken, most PSTATE fields are left unchanged in the ++ * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all ++ * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx ++ * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. ++ * ++ * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. ++ * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. ++ * ++ * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from ++ * MSB to LSB. ++ */ ++static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) ++{ ++ unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); ++ unsigned long old, new; ++ ++ old = *vcpu_cpsr(vcpu); ++ new = 0; ++ ++ new |= (old & PSR_N_BIT); ++ new |= (old & PSR_Z_BIT); ++ new |= (old & PSR_C_BIT); ++ new |= (old & PSR_V_BIT); ++ ++ // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) ++ ++ new |= (old & PSR_DIT_BIT); ++ ++ // PSTATE.UAO is set to zero upon any exception to AArch64 ++ // See ARM DDI 0487E.a, page D5-2579. ++ ++ // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 ++ // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented ++ // See ARM DDI 0487E.a, page D5-2578. ++ new |= (old & PSR_PAN_BIT); ++ if (!(sctlr & SCTLR_EL1_SPAN)) ++ new |= PSR_PAN_BIT; ++ ++ // PSTATE.SS is set to zero upon any exception to AArch64 ++ // See ARM DDI 0487E.a, page D2-2452. ++ ++ // PSTATE.IL is set to zero upon any exception to AArch64 ++ // See ARM DDI 0487E.a, page D1-2306. ++ ++ // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 ++ // See ARM DDI 0487E.a, page D13-3258 ++ if (sctlr & SCTLR_ELx_DSSBS) ++ new |= PSR_SSBS_BIT; ++ ++ // PSTATE.BTYPE is set to zero upon any exception to AArch64 ++ // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. ++ ++ new |= PSR_D_BIT; ++ new |= PSR_A_BIT; ++ new |= PSR_I_BIT; ++ new |= PSR_F_BIT; ++ ++ new |= PSR_MODE_EL1h; ++ ++ return new; ++} ++ + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) + { + unsigned long cpsr = *vcpu_cpsr(vcpu); +@@ -59,7 +119,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr + vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); + *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); + +- *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; ++ *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); + vcpu_write_spsr(vcpu, cpsr); + + vcpu_write_sys_reg(vcpu, addr, FAR_EL1); +@@ -94,7 +154,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) + vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); + *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); + +- *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; ++ *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); + vcpu_write_spsr(vcpu, cpsr); + + /* +diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink +index 4eea4188cb20..13e0beb9eee3 100644 +--- a/arch/mips/Makefile.postlink ++++ b/arch/mips/Makefile.postlink +@@ -12,7 +12,7 @@ __archpost: + include scripts/Kbuild.include + + CMD_RELOCS = arch/mips/boot/tools/relocs +-quiet_cmd_relocs = RELOCS $@ ++quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(CMD_RELOCS) $@ + + # `@true` prevents complaint when there is nothing to be done +diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile +index 528bd73d530a..4ed45ade32a1 100644 +--- a/arch/mips/boot/Makefile ++++ b/arch/mips/boot/Makefile +@@ -123,7 +123,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS + targets += vmlinux.its + targets += vmlinux.gz.its + targets += vmlinux.bz2.its +-targets += vmlinux.lzmo.its ++targets += vmlinux.lzma.its + targets += vmlinux.lzo.its + + quiet_cmd_cpp_its_S = ITS $@ +diff --git a/arch/mips/kernel/syscalls/Makefile b/arch/mips/kernel/syscalls/Makefile +index a3d4bec695c6..6efb2f6889a7 100644 +--- a/arch/mips/kernel/syscalls/Makefile ++++ b/arch/mips/kernel/syscalls/Makefile +@@ -18,7 +18,7 @@ quiet_cmd_syshdr = SYSHDR $@ + '$(syshdr_pfx_$(basetarget))' \ + '$(syshdr_offset_$(basetarget))' + +-quiet_cmd_sysnr = SYSNR $@ ++quiet_cmd_sysnr = SYSNR $@ + cmd_sysnr = $(CONFIG_SHELL) '$(sysnr)' '$<' '$@' \ + '$(sysnr_abis_$(basetarget))' \ + '$(sysnr_pfx_$(basetarget))' \ +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index 3e56c9c2f16e..2b1033f13210 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -221,8 +221,7 @@ config PPC + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP +- select HAVE_RCU_TABLE_FREE if SMP +- select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE ++ select HAVE_RCU_TABLE_FREE + select HAVE_MMU_GATHER_PAGE_SIZE + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN +@@ -237,6 +236,7 @@ config PPC + select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE + select NEED_SG_DMA_LENGTH + select OF ++ select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE + select OF_EARLY_FLATTREE + select OLD_SIGACTION if PPC32 + select OLD_SIGSUSPEND +diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c +index 1699e9531552..00c4d843a023 100644 +--- a/arch/powerpc/boot/4xx.c ++++ b/arch/powerpc/boot/4xx.c +@@ -228,7 +228,7 @@ void ibm4xx_denali_fixup_memsize(void) + dpath = 8; /* 64 bits */ + + /* get address pins (rows) */ +- val = SDRAM0_READ(DDR0_42); ++ val = SDRAM0_READ(DDR0_42); + + row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT); + if (row > max_row) +diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h +index f9dc597b0b86..91c8f1d9bcee 100644 +--- a/arch/powerpc/include/asm/book3s/32/kup.h ++++ b/arch/powerpc/include/asm/book3s/32/kup.h +@@ -102,11 +102,13 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) + isync(); /* Context sync required after mtsrin() */ + } + +-static inline void allow_user_access(void __user *to, const void __user *from, u32 size) ++static __always_inline void allow_user_access(void __user *to, const void __user *from, ++ u32 size, unsigned long dir) + { + u32 addr, end; + +- if (__builtin_constant_p(to) && to == NULL) ++ BUILD_BUG_ON(!__builtin_constant_p(dir)); ++ if (!(dir & KUAP_WRITE)) + return; + + addr = (__force u32)to; +@@ -119,11 +121,16 @@ static inline void allow_user_access(void __user *to, const void __user *from, u + kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */ + } + +-static inline void prevent_user_access(void __user *to, const void __user *from, u32 size) ++static __always_inline void prevent_user_access(void __user *to, const void __user *from, ++ u32 size, unsigned long dir) + { + u32 addr = (__force u32)to; + u32 end = min(addr + size, TASK_SIZE); + ++ BUILD_BUG_ON(!__builtin_constant_p(dir)); ++ if (!(dir & KUAP_WRITE)) ++ return; ++ + if (!addr || addr >= TASK_SIZE || !size) + return; + +@@ -131,12 +138,17 @@ static inline void prevent_user_access(void __user *to, const void __user *from, + kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */ + } + +-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) ++static inline bool ++bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) + { ++ unsigned long begin = regs->kuap & 0xf0000000; ++ unsigned long end = regs->kuap << 28; ++ + if (!is_write) + return false; + +- return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !"); ++ return WARN(address < begin || address >= end, ++ "Bug: write fault blocked by segment registers !"); + } + + #endif /* CONFIG_PPC_KUAP */ +diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h +index 998317702630..dc5c039eb28e 100644 +--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h ++++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h +@@ -49,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size) + + #define get_hugepd_cache_index(x) (x) + +-#ifdef CONFIG_SMP + static inline void pgtable_free_tlb(struct mmu_gather *tlb, + void *table, int shift) + { +@@ -66,13 +65,6 @@ static inline void __tlb_remove_table(void *_table) + + pgtable_free(table, shift); + } +-#else +-static inline void pgtable_free_tlb(struct mmu_gather *tlb, +- void *table, int shift) +-{ +- pgtable_free(table, shift); +-} +-#endif + + static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, + unsigned long address) +diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h +index f254de956d6a..c8d1076e0ebb 100644 +--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h ++++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h +@@ -77,25 +77,27 @@ static inline void set_kuap(unsigned long value) + isync(); + } + +-static inline void allow_user_access(void __user *to, const void __user *from, +- unsigned long size) ++static __always_inline void allow_user_access(void __user *to, const void __user *from, ++ unsigned long size, unsigned long dir) + { + // This is written so we can resolve to a single case at build time +- if (__builtin_constant_p(to) && to == NULL) ++ BUILD_BUG_ON(!__builtin_constant_p(dir)); ++ if (dir == KUAP_READ) + set_kuap(AMR_KUAP_BLOCK_WRITE); +- else if (__builtin_constant_p(from) && from == NULL) ++ else if (dir == KUAP_WRITE) + set_kuap(AMR_KUAP_BLOCK_READ); + else + set_kuap(0); + } + + static inline void prevent_user_access(void __user *to, const void __user *from, +- unsigned long size) ++ unsigned long size, unsigned long dir) + { + set_kuap(AMR_KUAP_BLOCKED); + } + +-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) ++static inline bool ++bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) + { + return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && + (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), +diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h +index d5a44912902f..cae9e814593a 100644 +--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h ++++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h +@@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_list; + extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long); + extern void pmd_fragment_free(unsigned long *); + extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); +-#ifdef CONFIG_SMP + extern void __tlb_remove_table(void *_table); +-#endif + void pte_frag_destroy(void *pte_frag); + + static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) +diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h +index eea28ca679db..bc7d9d06a6d9 100644 +--- a/arch/powerpc/include/asm/futex.h ++++ b/arch/powerpc/include/asm/futex.h +@@ -35,7 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, + { + int oldval = 0, ret; + +- allow_write_to_user(uaddr, sizeof(*uaddr)); ++ allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); + pagefault_disable(); + + switch (op) { +@@ -62,7 +62,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, + + *oval = oldval; + +- prevent_write_to_user(uaddr, sizeof(*uaddr)); ++ prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); + return ret; + } + +@@ -76,7 +76,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + if (!access_ok(uaddr, sizeof(u32))) + return -EFAULT; + +- allow_write_to_user(uaddr, sizeof(*uaddr)); ++ allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); ++ + __asm__ __volatile__ ( + PPC_ATOMIC_ENTRY_BARRIER + "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ +@@ -97,7 +98,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + : "cc", "memory"); + + *uval = prev; +- prevent_write_to_user(uaddr, sizeof(*uaddr)); ++ prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); ++ + return ret; + } + +diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h +index 5b5e39643a27..94f24928916a 100644 +--- a/arch/powerpc/include/asm/kup.h ++++ b/arch/powerpc/include/asm/kup.h +@@ -2,6 +2,10 @@ + #ifndef _ASM_POWERPC_KUP_H_ + #define _ASM_POWERPC_KUP_H_ + ++#define KUAP_READ 1 ++#define KUAP_WRITE 2 ++#define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE) ++ + #ifdef CONFIG_PPC64 + #include <asm/book3s/64/kup-radix.h> + #endif +@@ -42,32 +46,48 @@ void setup_kuap(bool disabled); + #else + static inline void setup_kuap(bool disabled) { } + static inline void allow_user_access(void __user *to, const void __user *from, +- unsigned long size) { } ++ unsigned long size, unsigned long dir) { } + static inline void prevent_user_access(void __user *to, const void __user *from, +- unsigned long size) { } +-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; } ++ unsigned long size, unsigned long dir) { } ++static inline bool ++bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) ++{ ++ return false; ++} + #endif /* CONFIG_PPC_KUAP */ + + static inline void allow_read_from_user(const void __user *from, unsigned long size) + { +- allow_user_access(NULL, from, size); ++ allow_user_access(NULL, from, size, KUAP_READ); + } + + static inline void allow_write_to_user(void __user *to, unsigned long size) + { +- allow_user_access(to, NULL, size); ++ allow_user_access(to, NULL, size, KUAP_WRITE); ++} ++ ++static inline void allow_read_write_user(void __user *to, const void __user *from, ++ unsigned long size) ++{ ++ allow_user_access(to, from, size, KUAP_READ_WRITE); + } + + static inline void prevent_read_from_user(const void __user *from, unsigned long size) + { +- prevent_user_access(NULL, from, size); ++ prevent_user_access(NULL, from, size, KUAP_READ); + } + + static inline void prevent_write_to_user(void __user *to, unsigned long size) + { +- prevent_user_access(to, NULL, size); ++ prevent_user_access(to, NULL, size, KUAP_WRITE); ++} ++ ++static inline void prevent_read_write_user(void __user *to, const void __user *from, ++ unsigned long size) ++{ ++ prevent_user_access(to, from, size, KUAP_READ_WRITE); + } + + #endif /* !__ASSEMBLY__ */ + +-#endif /* _ASM_POWERPC_KUP_H_ */ ++#endif /* _ASM_POWERPC_KUAP_H_ */ +diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h +index 1c3133b5f86a..6fe97465e350 100644 +--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h ++++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h +@@ -34,18 +34,19 @@ + #include <asm/reg.h> + + static inline void allow_user_access(void __user *to, const void __user *from, +- unsigned long size) ++ unsigned long size, unsigned long dir) + { + mtspr(SPRN_MD_AP, MD_APG_INIT); + } + + static inline void prevent_user_access(void __user *to, const void __user *from, +- unsigned long size) ++ unsigned long size, unsigned long dir) + { + mtspr(SPRN_MD_AP, MD_APG_KUAP); + } + +-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) ++static inline bool ++bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) + { + return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), + "Bug: fault blocked by AP register !"); +diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h +index 332b13b4ecdb..29c43665a753 100644 +--- a/arch/powerpc/include/asm/nohash/pgalloc.h ++++ b/arch/powerpc/include/asm/nohash/pgalloc.h +@@ -46,7 +46,6 @@ static inline void pgtable_free(void *table, int shift) + + #define get_hugepd_cache_index(x) (x) + +-#ifdef CONFIG_SMP + static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) + { + unsigned long pgf = (unsigned long)table; +@@ -64,13 +63,6 @@ static inline void __tlb_remove_table(void *_table) + pgtable_free(table, shift); + } + +-#else +-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) +-{ +- pgtable_free(table, shift); +-} +-#endif +- + static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, + unsigned long address) + { +diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h +index b2c0be93929d..7f3a8b902325 100644 +--- a/arch/powerpc/include/asm/tlb.h ++++ b/arch/powerpc/include/asm/tlb.h +@@ -26,6 +26,17 @@ + + #define tlb_flush tlb_flush + extern void tlb_flush(struct mmu_gather *tlb); ++/* ++ * book3s: ++ * Hash does not use the linux page-tables, so we can avoid ++ * the TLB invalidate for page-table freeing, Radix otoh does use the ++ * page-tables and needs the TLBI. ++ * ++ * nohash: ++ * We still do TLB invalidate in the __pte_free_tlb routine before we ++ * add the page table pages to mmu gather table batch. ++ */ ++#define tlb_needs_table_invalidate() radix_enabled() + + /* Get the generic bits... */ + #include <asm-generic/tlb.h> +diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h +index c92fe7fe9692..cafad1960e76 100644 +--- a/arch/powerpc/include/asm/uaccess.h ++++ b/arch/powerpc/include/asm/uaccess.h +@@ -313,9 +313,9 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) + unsigned long ret; + + barrier_nospec(); +- allow_user_access(to, from, n); ++ allow_read_write_user(to, from, n); + ret = __copy_tofrom_user(to, from, n); +- prevent_user_access(to, from, n); ++ prevent_read_write_user(to, from, n); + return ret; + } + #endif /* __powerpc64__ */ +diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S +index d60908ea37fb..59bb4f4ae316 100644 +--- a/arch/powerpc/kernel/entry_32.S ++++ b/arch/powerpc/kernel/entry_32.S +@@ -179,7 +179,7 @@ transfer_to_handler: + 2: /* if from kernel, check interrupted DOZE/NAP mode and + * check for stack overflow + */ +- kuap_save_and_lock r11, r12, r9, r2, r0 ++ kuap_save_and_lock r11, r12, r9, r2, r6 + addi r2, r12, -THREAD + lwz r9,KSP_LIMIT(r12) + cmplw r1,r9 /* if r1 <= ksp_limit */ +@@ -284,6 +284,7 @@ reenable_mmu: + rlwinm r9,r9,0,~MSR_EE + lwz r12,_LINK(r11) /* and return to address in LR */ + kuap_restore r11, r2, r3, r4, r5 ++ lwz r2, GPR2(r11) + b fast_exception_return + #endif + +diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c +index 709cf1fd4cf4..36abbe3c346d 100644 +--- a/arch/powerpc/kvm/book3s_hv.c ++++ b/arch/powerpc/kvm/book3s_hv.c +@@ -2354,7 +2354,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, + mutex_unlock(&kvm->lock); + + if (!vcore) +- goto free_vcpu; ++ goto uninit_vcpu; + + spin_lock(&vcore->lock); + ++vcore->num_threads; +@@ -2371,6 +2371,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, + + return vcpu; + ++uninit_vcpu: ++ kvm_vcpu_uninit(vcpu); + free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu); + out: +diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c +index cc65af8fe6f7..3f6ad3f58628 100644 +--- a/arch/powerpc/kvm/book3s_pr.c ++++ b/arch/powerpc/kvm/book3s_pr.c +@@ -1769,10 +1769,12 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, + + err = kvmppc_mmu_init(vcpu); + if (err < 0) +- goto uninit_vcpu; ++ goto free_shared_page; + + return vcpu; + ++free_shared_page: ++ free_page((unsigned long)vcpu->arch.shared); + uninit_vcpu: + kvm_vcpu_uninit(vcpu); + free_shadow_vcpu: +diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c +index 5a3373e06e60..235d57d6c205 100644 +--- a/arch/powerpc/kvm/book3s_xive_native.c ++++ b/arch/powerpc/kvm/book3s_xive_native.c +@@ -638,7 +638,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, + srcu_idx = srcu_read_lock(&kvm->srcu); + gfn = gpa_to_gfn(kvm_eq.qaddr); + +- page_size = kvm_host_page_size(kvm, gfn); ++ page_size = kvm_host_page_size(vcpu, gfn); + if (1ull << kvm_eq.qshift > page_size) { + srcu_read_unlock(&kvm->srcu, srcu_idx); + pr_warn("Incompatible host page size %lx!\n", page_size); +diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c +index 75483b40fcb1..2bf7e1b4fd82 100644 +--- a/arch/powerpc/mm/book3s64/pgtable.c ++++ b/arch/powerpc/mm/book3s64/pgtable.c +@@ -378,7 +378,6 @@ static inline void pgtable_free(void *table, int index) + } + } + +-#ifdef CONFIG_SMP + void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) + { + unsigned long pgf = (unsigned long)table; +@@ -395,12 +394,6 @@ void __tlb_remove_table(void *_table) + + return pgtable_free(table, index); + } +-#else +-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) +-{ +- return pgtable_free(table, index); +-} +-#endif + + #ifdef CONFIG_PROC_FS + atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; +diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c +index 8432c281de92..9298905cfe74 100644 +--- a/arch/powerpc/mm/fault.c ++++ b/arch/powerpc/mm/fault.c +@@ -233,7 +233,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, + + // Read/write fault in a valid region (the exception table search passed + // above), but blocked by KUAP is bad, it can never succeed. +- if (bad_kuap_fault(regs, is_write)) ++ if (bad_kuap_fault(regs, address, is_write)) + return true; + + // What's left? Kernel fault on user in well defined regions (extable +diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c +index 2f9ddc29c535..c73205172447 100644 +--- a/arch/powerpc/mm/ptdump/ptdump.c ++++ b/arch/powerpc/mm/ptdump/ptdump.c +@@ -173,10 +173,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr) + + static void note_prot_wx(struct pg_state *st, unsigned long addr) + { ++ pte_t pte = __pte(st->current_flags); ++ + if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx) + return; + +- if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X))) ++ if (!pte_write(pte) || !pte_exec(pte)) + return; + + WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n", +diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c +index 8e700390f3d6..4c3af2e9eb8e 100644 +--- a/arch/powerpc/platforms/pseries/hotplug-memory.c ++++ b/arch/powerpc/platforms/pseries/hotplug-memory.c +@@ -360,8 +360,10 @@ static bool lmb_is_removable(struct drmem_lmb *lmb) + + for (i = 0; i < scns_per_block; i++) { + pfn = PFN_DOWN(phys_addr); +- if (!pfn_present(pfn)) ++ if (!pfn_present(pfn)) { ++ phys_addr += MIN_MEMORY_BLOCK_SIZE; + continue; ++ } + + rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION); + phys_addr += MIN_MEMORY_BLOCK_SIZE; +diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c +index d83364ebc5c5..8057aafd5f5e 100644 +--- a/arch/powerpc/xmon/xmon.c ++++ b/arch/powerpc/xmon/xmon.c +@@ -1894,15 +1894,14 @@ static void dump_300_sprs(void) + + printf("pidr = %.16lx tidr = %.16lx\n", + mfspr(SPRN_PID), mfspr(SPRN_TIDR)); +- printf("asdr = %.16lx psscr = %.16lx\n", +- mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR) +- : mfspr(SPRN_PSSCR_PR)); ++ printf("psscr = %.16lx\n", ++ hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR)); + + if (!hv) + return; + +- printf("ptcr = %.16lx\n", +- mfspr(SPRN_PTCR)); ++ printf("ptcr = %.16lx asdr = %.16lx\n", ++ mfspr(SPRN_PTCR), mfspr(SPRN_ASDR)); + #endif + } + +diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c +index 7fbf56aab661..e2279fed8f56 100644 +--- a/arch/riscv/net/bpf_jit_comp.c ++++ b/arch/riscv/net/bpf_jit_comp.c +@@ -120,6 +120,11 @@ static bool seen_reg(int reg, struct rv_jit_context *ctx) + return false; + } + ++static void mark_fp(struct rv_jit_context *ctx) ++{ ++ __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags); ++} ++ + static void mark_call(struct rv_jit_context *ctx) + { + __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); +@@ -596,7 +601,8 @@ static void __build_epilogue(u8 reg, struct rv_jit_context *ctx) + + emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); + /* Set return value. */ +- emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); ++ if (reg == RV_REG_RA) ++ emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); + emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx); + } + +@@ -1426,6 +1432,10 @@ static void build_prologue(struct rv_jit_context *ctx) + { + int stack_adjust = 0, store_offset, bpf_stack_adjust; + ++ bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); ++ if (bpf_stack_adjust) ++ mark_fp(ctx); ++ + if (seen_reg(RV_REG_RA, ctx)) + stack_adjust += 8; + stack_adjust += 8; /* RV_REG_FP */ +@@ -1443,7 +1453,6 @@ static void build_prologue(struct rv_jit_context *ctx) + stack_adjust += 8; + + stack_adjust = round_up(stack_adjust, 16); +- bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + stack_adjust += bpf_stack_adjust; + + store_offset = stack_adjust - 8; +diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h +index 823578c6b9e2..3f5cb55cde35 100644 +--- a/arch/s390/include/asm/page.h ++++ b/arch/s390/include/asm/page.h +@@ -33,6 +33,8 @@ + #define ARCH_HAS_PREPARE_HUGEPAGE + #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH + ++#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA ++ + #include <asm/setup.h> + #ifndef __ASSEMBLY__ + +diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c +index d047e846e1b9..756c627f7e54 100644 +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -2863,9 +2863,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) + vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 | + CR14_UNUSED_33 | + CR14_EXTERNAL_DAMAGE_SUBMASK; +- /* make sure the new fpc will be lazily loaded */ +- save_fpu_regs(); +- current->thread.fpu.fpc = 0; ++ vcpu->run->s.regs.fpc = 0; + vcpu->arch.sie_block->gbea = 1; + vcpu->arch.sie_block->pp = 0; + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; +@@ -4354,7 +4352,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, + switch (ioctl) { + case KVM_S390_STORE_STATUS: + idx = srcu_read_lock(&vcpu->kvm->srcu); +- r = kvm_s390_vcpu_store_status(vcpu, arg); ++ r = kvm_s390_store_status_unloaded(vcpu, arg); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + break; + case KVM_S390_SET_INITIAL_PSW: { +diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c +index b0246c705a19..5674710a4841 100644 +--- a/arch/s390/mm/hugetlbpage.c ++++ b/arch/s390/mm/hugetlbpage.c +@@ -2,7 +2,7 @@ + /* + * IBM System z Huge TLB Page Support for Kernel. + * +- * Copyright IBM Corp. 2007,2016 ++ * Copyright IBM Corp. 2007,2020 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +@@ -11,6 +11,9 @@ + + #include <linux/mm.h> + #include <linux/hugetlb.h> ++#include <linux/mman.h> ++#include <linux/sched/mm.h> ++#include <linux/security.h> + + /* + * If the bit selected by single-bit bitmask "a" is set within "x", move +@@ -267,3 +270,98 @@ static __init int setup_hugepagesz(char *opt) + return 1; + } + __setup("hugepagesz=", setup_hugepagesz); ++ ++static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, ++ unsigned long addr, unsigned long len, ++ unsigned long pgoff, unsigned long flags) ++{ ++ struct hstate *h = hstate_file(file); ++ struct vm_unmapped_area_info info; ++ ++ info.flags = 0; ++ info.length = len; ++ info.low_limit = current->mm->mmap_base; ++ info.high_limit = TASK_SIZE; ++ info.align_mask = PAGE_MASK & ~huge_page_mask(h); ++ info.align_offset = 0; ++ return vm_unmapped_area(&info); ++} ++ ++static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, ++ unsigned long addr0, unsigned long len, ++ unsigned long pgoff, unsigned long flags) ++{ ++ struct hstate *h = hstate_file(file); ++ struct vm_unmapped_area_info info; ++ unsigned long addr; ++ ++ info.flags = VM_UNMAPPED_AREA_TOPDOWN; ++ info.length = len; ++ info.low_limit = max(PAGE_SIZE, mmap_min_addr); ++ info.high_limit = current->mm->mmap_base; ++ info.align_mask = PAGE_MASK & ~huge_page_mask(h); ++ info.align_offset = 0; ++ addr = vm_unmapped_area(&info); ++ ++ /* ++ * A failed mmap() very likely causes application failure, ++ * so fall back to the bottom-up function here. This scenario ++ * can happen with large stack limits and large mmap() ++ * allocations. ++ */ ++ if (addr & ~PAGE_MASK) { ++ VM_BUG_ON(addr != -ENOMEM); ++ info.flags = 0; ++ info.low_limit = TASK_UNMAPPED_BASE; ++ info.high_limit = TASK_SIZE; ++ addr = vm_unmapped_area(&info); ++ } ++ ++ return addr; ++} ++ ++unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, ++ unsigned long len, unsigned long pgoff, unsigned long flags) ++{ ++ struct hstate *h = hstate_file(file); ++ struct mm_struct *mm = current->mm; ++ struct vm_area_struct *vma; ++ int rc; ++ ++ if (len & ~huge_page_mask(h)) ++ return -EINVAL; ++ if (len > TASK_SIZE - mmap_min_addr) ++ return -ENOMEM; ++ ++ if (flags & MAP_FIXED) { ++ if (prepare_hugepage_range(file, addr, len)) ++ return -EINVAL; ++ goto check_asce_limit; ++ } ++ ++ if (addr) { ++ addr = ALIGN(addr, huge_page_size(h)); ++ vma = find_vma(mm, addr); ++ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && ++ (!vma || addr + len <= vm_start_gap(vma))) ++ goto check_asce_limit; ++ } ++ ++ if (mm->get_unmapped_area == arch_get_unmapped_area) ++ addr = hugetlb_get_unmapped_area_bottomup(file, addr, len, ++ pgoff, flags); ++ else ++ addr = hugetlb_get_unmapped_area_topdown(file, addr, len, ++ pgoff, flags); ++ if (addr & ~PAGE_MASK) ++ return addr; ++ ++check_asce_limit: ++ if (addr + len > current->mm->context.asce_limit && ++ addr + len <= TASK_SIZE) { ++ rc = crst_table_upgrade(mm, addr + len); ++ if (rc) ++ return (unsigned long) rc; ++ } ++ return addr; ++} +diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig +index eb24cb1afc11..18e9fb6fcf1b 100644 +--- a/arch/sparc/Kconfig ++++ b/arch/sparc/Kconfig +@@ -65,7 +65,6 @@ config SPARC64 + select HAVE_KRETPROBES + select HAVE_KPROBES + select HAVE_RCU_TABLE_FREE if SMP +- select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE + select HAVE_MEMBLOCK_NODE_MAP + select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_DYNAMIC_FTRACE +diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h +index a2f3fa61ee36..8cb8f3833239 100644 +--- a/arch/sparc/include/asm/tlb_64.h ++++ b/arch/sparc/include/asm/tlb_64.h +@@ -28,6 +28,15 @@ void flush_tlb_pending(void); + #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) + #define tlb_flush(tlb) flush_tlb_pending() + ++/* ++ * SPARC64's hardware TLB fill does not use the Linux page-tables ++ * and therefore we don't need a TLBI when freeing page-table pages. ++ */ ++ ++#ifdef CONFIG_HAVE_RCU_TABLE_FREE ++#define tlb_needs_table_invalidate() (false) ++#endif ++ + #include <asm-generic/tlb.h> + + #endif /* _SPARC64_TLB_H */ +diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h +index 9d0d125500e2..084b8949ddff 100644 +--- a/arch/sparc/include/uapi/asm/ipcbuf.h ++++ b/arch/sparc/include/uapi/asm/ipcbuf.h +@@ -15,19 +15,19 @@ + + struct ipc64_perm + { +- __kernel_key_t key; +- __kernel_uid_t uid; +- __kernel_gid_t gid; +- __kernel_uid_t cuid; +- __kernel_gid_t cgid; ++ __kernel_key_t key; ++ __kernel_uid32_t uid; ++ __kernel_gid32_t gid; ++ __kernel_uid32_t cuid; ++ __kernel_gid32_t cgid; + #ifndef __arch64__ +- unsigned short __pad0; ++ unsigned short __pad0; + #endif +- __kernel_mode_t mode; +- unsigned short __pad1; +- unsigned short seq; +- unsigned long long __unused1; +- unsigned long long __unused2; ++ __kernel_mode_t mode; ++ unsigned short __pad1; ++ unsigned short seq; ++ unsigned long long __unused1; ++ unsigned long long __unused2; + }; + + #endif /* __SPARC_IPCBUF_H */ +diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h +index 2ebc17d9c72c..19e94af9cc5d 100644 +--- a/arch/x86/include/asm/apic.h ++++ b/arch/x86/include/asm/apic.h +@@ -140,6 +140,7 @@ extern void apic_soft_disable(void); + extern void lapic_shutdown(void); + extern void sync_Arb_IDs(void); + extern void init_bsp_APIC(void); ++extern void apic_intr_mode_select(void); + extern void apic_intr_mode_init(void); + extern void init_apic_mappings(void); + void register_lapic_address(unsigned long address); +@@ -188,6 +189,7 @@ static inline void disable_local_APIC(void) { } + # define setup_secondary_APIC_clock x86_init_noop + static inline void lapic_update_tsc_freq(void) { } + static inline void init_bsp_APIC(void) { } ++static inline void apic_intr_mode_select(void) { } + static inline void apic_intr_mode_init(void) { } + static inline void lapic_assign_system_vectors(void) { } + static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } +@@ -452,6 +454,14 @@ static inline void ack_APIC_irq(void) + apic_eoi(); + } + ++ ++static inline bool lapic_vector_set_in_irr(unsigned int vector) ++{ ++ u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); ++ ++ return !!(irr & (1U << (vector % 32))); ++} ++ + static inline unsigned default_get_apic_id(unsigned long x) + { + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 4fc61483919a..c1ed054c103c 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -380,12 +380,12 @@ struct kvm_mmu { + void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); + unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); + u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); +- int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, ++ int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, + bool prefault); + void (*inject_page_fault)(struct kvm_vcpu *vcpu, + struct x86_exception *fault); +- gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, +- struct x86_exception *exception); ++ gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa, ++ u32 access, struct x86_exception *exception); + gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception); + int (*sync_page)(struct kvm_vcpu *vcpu, +@@ -667,10 +667,10 @@ struct kvm_vcpu_arch { + bool pvclock_set_guest_stopped_request; + + struct { ++ u8 preempted; + u64 msr_val; + u64 last_steal; +- struct gfn_to_hva_cache stime; +- struct kvm_steal_time steal; ++ struct gfn_to_pfn_cache cache; + } st; + + u64 tsc_offset; +@@ -1128,6 +1128,7 @@ struct kvm_x86_ops { + bool (*xsaves_supported)(void); + bool (*umip_emulated)(void); + bool (*pt_supported)(void); ++ bool (*pku_supported)(void); + + int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + void (*request_immediate_exit)(struct kvm_vcpu *vcpu); +@@ -1450,7 +1451,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); + + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); + +-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, ++int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, + void *insn, int insn_len); + void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); + void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); +diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h +index 19435858df5f..96d9cd208610 100644 +--- a/arch/x86/include/asm/x86_init.h ++++ b/arch/x86/include/asm/x86_init.h +@@ -51,12 +51,14 @@ struct x86_init_resources { + * are set up. + * @intr_init: interrupt init code + * @trap_init: platform specific trap setup ++ * @intr_mode_select: interrupt delivery mode selection + * @intr_mode_init: interrupt delivery mode setup + */ + struct x86_init_irqs { + void (*pre_vector_init)(void); + void (*intr_init)(void); + void (*trap_init)(void); ++ void (*intr_mode_select)(void); + void (*intr_mode_init)(void); + }; + +diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c +index 2b0faf86da1b..df891f874614 100644 +--- a/arch/x86/kernel/apic/apic.c ++++ b/arch/x86/kernel/apic/apic.c +@@ -830,8 +830,17 @@ bool __init apic_needs_pit(void) + if (!tsc_khz || !cpu_khz) + return true; + +- /* Is there an APIC at all? */ +- if (!boot_cpu_has(X86_FEATURE_APIC)) ++ /* Is there an APIC at all or is it disabled? */ ++ if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) ++ return true; ++ ++ /* ++ * If interrupt delivery mode is legacy PIC or virtual wire without ++ * configuration, the local APIC timer wont be set up. Make sure ++ * that the PIT is initialized. ++ */ ++ if (apic_intr_mode == APIC_PIC || ++ apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG) + return true; + + /* Virt guests may lack ARAT, but still have DEADLINE */ +@@ -1322,7 +1331,7 @@ void __init sync_Arb_IDs(void) + + enum apic_intr_mode_id apic_intr_mode __ro_after_init; + +-static int __init apic_intr_mode_select(void) ++static int __init __apic_intr_mode_select(void) + { + /* Check kernel option */ + if (disable_apic) { +@@ -1384,6 +1393,12 @@ static int __init apic_intr_mode_select(void) + return APIC_SYMMETRIC_IO; + } + ++/* Select the interrupt delivery mode for the BSP */ ++void __init apic_intr_mode_select(void) ++{ ++ apic_intr_mode = __apic_intr_mode_select(); ++} ++ + /* + * An initial setup of the virtual wire mode. + */ +@@ -1440,8 +1455,6 @@ void __init apic_intr_mode_init(void) + { + bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); + +- apic_intr_mode = apic_intr_mode_select(); +- + switch (apic_intr_mode) { + case APIC_PIC: + pr_info("APIC: Keep in PIC mode(8259)\n"); +diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c +index 7f7533462474..159bd0cb8548 100644 +--- a/arch/x86/kernel/apic/msi.c ++++ b/arch/x86/kernel/apic/msi.c +@@ -23,10 +23,8 @@ + + static struct irq_domain *msi_default_domain; + +-static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) ++static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg) + { +- struct irq_cfg *cfg = irqd_cfg(data); +- + msg->address_hi = MSI_ADDR_BASE_HI; + + if (x2apic_enabled()) +@@ -47,6 +45,127 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) + MSI_DATA_VECTOR(cfg->vector); + } + ++static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) ++{ ++ __irq_msi_compose_msg(irqd_cfg(data), msg); ++} ++ ++static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg) ++{ ++ struct msi_msg msg[2] = { [1] = { }, }; ++ ++ __irq_msi_compose_msg(cfg, msg); ++ irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg); ++} ++ ++static int ++msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) ++{ ++ struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd); ++ struct irq_data *parent = irqd->parent_data; ++ unsigned int cpu; ++ int ret; ++ ++ /* Save the current configuration */ ++ cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd)); ++ old_cfg = *cfg; ++ ++ /* Allocate a new target vector */ ++ ret = parent->chip->irq_set_affinity(parent, mask, force); ++ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) ++ return ret; ++ ++ /* ++ * For non-maskable and non-remapped MSI interrupts the migration ++ * to a different destination CPU and a different vector has to be ++ * done careful to handle the possible stray interrupt which can be ++ * caused by the non-atomic update of the address/data pair. ++ * ++ * Direct update is possible when: ++ * - The MSI is maskable (remapped MSI does not use this code path)). ++ * The quirk bit is not set in this case. ++ * - The new vector is the same as the old vector ++ * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) ++ * - The new destination CPU is the same as the old destination CPU ++ */ ++ if (!irqd_msi_nomask_quirk(irqd) || ++ cfg->vector == old_cfg.vector || ++ old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || ++ cfg->dest_apicid == old_cfg.dest_apicid) { ++ irq_msi_update_msg(irqd, cfg); ++ return ret; ++ } ++ ++ /* ++ * Paranoia: Validate that the interrupt target is the local ++ * CPU. ++ */ ++ if (WARN_ON_ONCE(cpu != smp_processor_id())) { ++ irq_msi_update_msg(irqd, cfg); ++ return ret; ++ } ++ ++ /* ++ * Redirect the interrupt to the new vector on the current CPU ++ * first. This might cause a spurious interrupt on this vector if ++ * the device raises an interrupt right between this update and the ++ * update to the final destination CPU. ++ * ++ * If the vector is in use then the installed device handler will ++ * denote it as spurious which is no harm as this is a rare event ++ * and interrupt handlers have to cope with spurious interrupts ++ * anyway. If the vector is unused, then it is marked so it won't ++ * trigger the 'No irq handler for vector' warning in do_IRQ(). ++ * ++ * This requires to hold vector lock to prevent concurrent updates to ++ * the affected vector. ++ */ ++ lock_vector_lock(); ++ ++ /* ++ * Mark the new target vector on the local CPU if it is currently ++ * unused. Reuse the VECTOR_RETRIGGERED state which is also used in ++ * the CPU hotplug path for a similar purpose. This cannot be ++ * undone here as the current CPU has interrupts disabled and ++ * cannot handle the interrupt before the whole set_affinity() ++ * section is done. In the CPU unplug case, the current CPU is ++ * about to vanish and will not handle any interrupts anymore. The ++ * vector is cleaned up when the CPU comes online again. ++ */ ++ if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector]))) ++ this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED); ++ ++ /* Redirect it to the new vector on the local CPU temporarily */ ++ old_cfg.vector = cfg->vector; ++ irq_msi_update_msg(irqd, &old_cfg); ++ ++ /* Now transition it to the target CPU */ ++ irq_msi_update_msg(irqd, cfg); ++ ++ /* ++ * All interrupts after this point are now targeted at the new ++ * vector/CPU. ++ * ++ * Drop vector lock before testing whether the temporary assignment ++ * to the local CPU was hit by an interrupt raised in the device, ++ * because the retrigger function acquires vector lock again. ++ */ ++ unlock_vector_lock(); ++ ++ /* ++ * Check whether the transition raced with a device interrupt and ++ * is pending in the local APICs IRR. It is safe to do this outside ++ * of vector lock as the irq_desc::lock of this interrupt is still ++ * held and interrupts are disabled: The check is not accessing the ++ * underlying vector store. It's just checking the local APIC's ++ * IRR. ++ */ ++ if (lapic_vector_set_in_irr(cfg->vector)) ++ irq_data_get_irq_chip(irqd)->irq_retrigger(irqd); ++ ++ return ret; ++} ++ + /* + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, + * which implement the MSI or MSI-X Capability Structure. +@@ -58,6 +177,7 @@ static struct irq_chip pci_msi_controller = { + .irq_ack = irq_chip_ack_parent, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = irq_msi_compose_msg, ++ .irq_set_affinity = msi_set_affinity, + .flags = IRQCHIP_SKIP_SET_WAKE, + }; + +@@ -146,6 +266,8 @@ void __init arch_init_msi_domain(struct irq_domain *parent) + } + if (!msi_default_domain) + pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); ++ else ++ msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK; + } + + #ifdef CONFIG_IRQ_REMAP +diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c +index 3e20d322bc98..032509adf9de 100644 +--- a/arch/x86/kernel/cpu/tsx.c ++++ b/arch/x86/kernel/cpu/tsx.c +@@ -115,11 +115,12 @@ void __init tsx_init(void) + tsx_disable(); + + /* +- * tsx_disable() will change the state of the +- * RTM CPUID bit. Clear it here since it is now +- * expected to be not set. ++ * tsx_disable() will change the state of the RTM and HLE CPUID ++ * bits. Clear them here since they are now expected to be not ++ * set. + */ + setup_clear_cpu_cap(X86_FEATURE_RTM); ++ setup_clear_cpu_cap(X86_FEATURE_HLE); + } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { + + /* +@@ -131,10 +132,10 @@ void __init tsx_init(void) + tsx_enable(); + + /* +- * tsx_enable() will change the state of the +- * RTM CPUID bit. Force it here since it is now +- * expected to be set. ++ * tsx_enable() will change the state of the RTM and HLE CPUID ++ * bits. Force them here since they are now expected to be set. + */ + setup_force_cpu_cap(X86_FEATURE_RTM); ++ setup_force_cpu_cap(X86_FEATURE_HLE); + } + } +diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c +index 7ce29cee9f9e..d8673d8a779b 100644 +--- a/arch/x86/kernel/time.c ++++ b/arch/x86/kernel/time.c +@@ -91,10 +91,18 @@ void __init hpet_time_init(void) + + static __init void x86_late_time_init(void) + { ++ /* ++ * Before PIT/HPET init, select the interrupt mode. This is required ++ * to make the decision whether PIT should be initialized correct. ++ */ ++ x86_init.irqs.intr_mode_select(); ++ ++ /* Setup the legacy timers */ + x86_init.timers.timer_init(); ++ + /* +- * After PIT/HPET timers init, select and setup +- * the final interrupt mode for delivering IRQs. ++ * After PIT/HPET timers init, set up the final interrupt mode for ++ * delivering IRQs. + */ + x86_init.irqs.intr_mode_init(); + tsc_init(); +diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c +index 18a799c8fa28..1838b10a299c 100644 +--- a/arch/x86/kernel/x86_init.c ++++ b/arch/x86/kernel/x86_init.c +@@ -58,6 +58,7 @@ struct x86_init_ops x86_init __initdata = { + .pre_vector_init = init_ISA_irqs, + .intr_init = native_init_IRQ, + .trap_init = x86_init_noop, ++ .intr_mode_select = apic_intr_mode_select, + .intr_mode_init = apic_intr_mode_init + }, + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index b1d5a8c94a57..6fa946f983c9 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -352,6 +352,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) + unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; + unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; + unsigned f_la57; ++ unsigned f_pku = kvm_x86_ops->pku_supported() ? F(PKU) : 0; + + /* cpuid 7.0.ebx */ + const u32 kvm_cpuid_7_0_ebx_x86_features = +@@ -363,7 +364,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) + + /* cpuid 7.0.ecx*/ + const u32 kvm_cpuid_7_0_ecx_x86_features = +- F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | ++ F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) | + F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | + F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/; +@@ -392,6 +393,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) + /* Set LA57 based on hardware capability. */ + entry->ecx |= f_la57; + entry->ecx |= f_umip; ++ entry->ecx |= f_pku; + /* PKU is not yet implemented for shadow paging. */ + if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) + entry->ecx &= ~F(PKU); +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 698efb8c3897..37aa9ce29b33 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -22,6 +22,7 @@ + #include "kvm_cache_regs.h" + #include <asm/kvm_emulate.h> + #include <linux/stringify.h> ++#include <asm/fpu/api.h> + #include <asm/debugreg.h> + #include <asm/nospec-branch.h> + +@@ -1075,8 +1076,23 @@ static void fetch_register_operand(struct operand *op) + } + } + ++static void emulator_get_fpu(void) ++{ ++ fpregs_lock(); ++ ++ fpregs_assert_state_consistent(); ++ if (test_thread_flag(TIF_NEED_FPU_LOAD)) ++ switch_fpu_return(); ++} ++ ++static void emulator_put_fpu(void) ++{ ++ fpregs_unlock(); ++} ++ + static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) + { ++ emulator_get_fpu(); + switch (reg) { + case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; + case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; +@@ -1098,11 +1114,13 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) + #endif + default: BUG(); + } ++ emulator_put_fpu(); + } + + static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, + int reg) + { ++ emulator_get_fpu(); + switch (reg) { + case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; + case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; +@@ -1124,10 +1142,12 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, + #endif + default: BUG(); + } ++ emulator_put_fpu(); + } + + static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) + { ++ emulator_get_fpu(); + switch (reg) { + case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; + case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; +@@ -1139,10 +1159,12 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) + case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; + default: BUG(); + } ++ emulator_put_fpu(); + } + + static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) + { ++ emulator_get_fpu(); + switch (reg) { + case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; + case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; +@@ -1154,6 +1176,7 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) + case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; + default: BUG(); + } ++ emulator_put_fpu(); + } + + static int em_fninit(struct x86_emulate_ctxt *ctxt) +@@ -1161,7 +1184,9 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt) + if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) + return emulate_nm(ctxt); + ++ emulator_get_fpu(); + asm volatile("fninit"); ++ emulator_put_fpu(); + return X86EMUL_CONTINUE; + } + +@@ -1172,7 +1197,9 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt) + if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) + return emulate_nm(ctxt); + ++ emulator_get_fpu(); + asm volatile("fnstcw %0": "+m"(fcw)); ++ emulator_put_fpu(); + + ctxt->dst.val = fcw; + +@@ -1186,7 +1213,9 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) + if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) + return emulate_nm(ctxt); + ++ emulator_get_fpu(); + asm volatile("fnstsw %0": "+m"(fsw)); ++ emulator_put_fpu(); + + ctxt->dst.val = fsw; + +@@ -4094,8 +4123,12 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) + if (rc != X86EMUL_CONTINUE) + return rc; + ++ emulator_get_fpu(); ++ + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + ++ emulator_put_fpu(); ++ + if (rc != X86EMUL_CONTINUE) + return rc; + +@@ -4138,6 +4171,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) + if (rc != X86EMUL_CONTINUE) + return rc; + ++ emulator_get_fpu(); ++ + if (size < __fxstate_size(16)) { + rc = fxregs_fixup(&fx_state, size); + if (rc != X86EMUL_CONTINUE) +@@ -4153,6 +4188,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) + rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); + + out: ++ emulator_put_fpu(); ++ + return rc; + } + +@@ -5212,16 +5249,28 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) + ctxt->ad_bytes = def_ad_bytes ^ 6; + break; + case 0x26: /* ES override */ ++ has_seg_override = true; ++ ctxt->seg_override = VCPU_SREG_ES; ++ break; + case 0x2e: /* CS override */ ++ has_seg_override = true; ++ ctxt->seg_override = VCPU_SREG_CS; ++ break; + case 0x36: /* SS override */ ++ has_seg_override = true; ++ ctxt->seg_override = VCPU_SREG_SS; ++ break; + case 0x3e: /* DS override */ + has_seg_override = true; +- ctxt->seg_override = (ctxt->b >> 3) & 3; ++ ctxt->seg_override = VCPU_SREG_DS; + break; + case 0x64: /* FS override */ ++ has_seg_override = true; ++ ctxt->seg_override = VCPU_SREG_FS; ++ break; + case 0x65: /* GS override */ + has_seg_override = true; +- ctxt->seg_override = ctxt->b & 7; ++ ctxt->seg_override = VCPU_SREG_GS; + break; + case 0x40 ... 0x4f: /* REX */ + if (mode != X86EMUL_MODE_PROT64) +@@ -5305,10 +5354,15 @@ done_prefixes: + } + break; + case Escape: +- if (ctxt->modrm > 0xbf) +- opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; +- else ++ if (ctxt->modrm > 0xbf) { ++ size_t size = ARRAY_SIZE(opcode.u.esc->high); ++ u32 index = array_index_nospec( ++ ctxt->modrm - 0xc0, size); ++ ++ opcode = opcode.u.esc->high[index]; ++ } else { + opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; ++ } + break; + case InstrDual: + if ((ctxt->modrm >> 6) == 3) +@@ -5450,7 +5504,9 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) + { + int rc; + ++ emulator_get_fpu(); + rc = asm_safe("fwait"); ++ emulator_put_fpu(); + + if (unlikely(rc != X86EMUL_CONTINUE)) + return emulate_exception(ctxt, MF_VECTOR, 0, false); +diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c +index 23ff65504d7e..26408434b9bc 100644 +--- a/arch/x86/kvm/hyperv.c ++++ b/arch/x86/kvm/hyperv.c +@@ -809,11 +809,12 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, + u32 index, u64 *pdata) + { + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; ++ size_t size = ARRAY_SIZE(hv->hv_crash_param); + +- if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) ++ if (WARN_ON_ONCE(index >= size)) + return -EINVAL; + +- *pdata = hv->hv_crash_param[index]; ++ *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; + return 0; + } + +@@ -852,11 +853,12 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, + u32 index, u64 data) + { + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; ++ size_t size = ARRAY_SIZE(hv->hv_crash_param); + +- if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) ++ if (WARN_ON_ONCE(index >= size)) + return -EINVAL; + +- hv->hv_crash_param[index] = data; ++ hv->hv_crash_param[array_index_nospec(index, size)] = data; + return 0; + } + +diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c +index 8b38bb4868a6..629a09ca9860 100644 +--- a/arch/x86/kvm/i8259.c ++++ b/arch/x86/kvm/i8259.c +@@ -460,10 +460,14 @@ static int picdev_write(struct kvm_pic *s, + switch (addr) { + case 0x20: + case 0x21: ++ pic_lock(s); ++ pic_ioport_write(&s->pics[0], addr, data); ++ pic_unlock(s); ++ break; + case 0xa0: + case 0xa1: + pic_lock(s); +- pic_ioport_write(&s->pics[addr >> 7], addr, data); ++ pic_ioport_write(&s->pics[1], addr, data); + pic_unlock(s); + break; + case 0x4d0: +diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c +index d859ae8890d0..24a6905d60ee 100644 +--- a/arch/x86/kvm/ioapic.c ++++ b/arch/x86/kvm/ioapic.c +@@ -36,6 +36,7 @@ + #include <linux/io.h> + #include <linux/slab.h> + #include <linux/export.h> ++#include <linux/nospec.h> + #include <asm/processor.h> + #include <asm/page.h> + #include <asm/current.h> +@@ -68,13 +69,14 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, + default: + { + u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; +- u64 redir_content; ++ u64 redir_content = ~0ULL; + +- if (redir_index < IOAPIC_NUM_PINS) +- redir_content = +- ioapic->redirtbl[redir_index].bits; +- else +- redir_content = ~0ULL; ++ if (redir_index < IOAPIC_NUM_PINS) { ++ u32 index = array_index_nospec( ++ redir_index, IOAPIC_NUM_PINS); ++ ++ redir_content = ioapic->redirtbl[index].bits; ++ } + + result = (ioapic->ioregsel & 0x1) ? + (redir_content >> 32) & 0xffffffff : +@@ -291,6 +293,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) + + if (index >= IOAPIC_NUM_PINS) + return; ++ index = array_index_nospec(index, IOAPIC_NUM_PINS); + e = &ioapic->redirtbl[index]; + mask_before = e->fields.mask; + /* Preserve read-only fields */ +diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c +index b29d00b661ff..15728971a430 100644 +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -1926,15 +1926,20 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVT1: +- case APIC_LVTERR: ++ case APIC_LVTERR: { + /* TODO: Check vector */ ++ size_t size; ++ u32 index; ++ + if (!kvm_apic_sw_enabled(apic)) + val |= APIC_LVT_MASKED; +- +- val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; ++ size = ARRAY_SIZE(apic_lvt_mask); ++ index = array_index_nospec( ++ (reg - APIC_LVTT) >> 4, size); ++ val &= apic_lvt_mask[index]; + kvm_lapic_set_reg(apic, reg, val); +- + break; ++ } + + case APIC_LVTT: + if (!kvm_apic_sw_enabled(apic)) +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 2ce9da58611e..518100ea5ef4 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -418,22 +418,24 @@ static inline bool is_access_track_spte(u64 spte) + * requires a full MMU zap). The flag is instead explicitly queried when + * checking for MMIO spte cache hits. + */ +-#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0) ++#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0) + + #define MMIO_SPTE_GEN_LOW_START 3 + #define MMIO_SPTE_GEN_LOW_END 11 + #define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ + MMIO_SPTE_GEN_LOW_START) + +-#define MMIO_SPTE_GEN_HIGH_START 52 +-#define MMIO_SPTE_GEN_HIGH_END 61 ++#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT ++#define MMIO_SPTE_GEN_HIGH_END 62 + #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ + MMIO_SPTE_GEN_HIGH_START) ++ + static u64 generation_mmio_spte_mask(u64 gen) + { + u64 mask; + + WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); ++ BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK); + + mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; + mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; +@@ -444,8 +446,6 @@ static u64 get_mmio_spte_generation(u64 spte) + { + u64 gen; + +- spte &= ~shadow_mmio_mask; +- + gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; + gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; + return gen; +@@ -538,16 +538,20 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); + static u8 kvm_get_shadow_phys_bits(void) + { + /* +- * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected +- * in CPU detection code, but MKTME treats those reduced bits as +- * 'keyID' thus they are not reserved bits. Therefore for MKTME +- * we should still return physical address bits reported by CPUID. ++ * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected ++ * in CPU detection code, but the processor treats those reduced bits as ++ * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at ++ * the physical address bits reported by CPUID. + */ +- if (!boot_cpu_has(X86_FEATURE_TME) || +- WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008)) +- return boot_cpu_data.x86_phys_bits; ++ if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008)) ++ return cpuid_eax(0x80000008) & 0xff; + +- return cpuid_eax(0x80000008) & 0xff; ++ /* ++ * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with ++ * custom CPUID. Proceed with whatever the kernel found since these features ++ * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008). ++ */ ++ return boot_cpu_data.x86_phys_bits; + } + + static void kvm_mmu_reset_all_pte_masks(void) +@@ -1282,12 +1286,12 @@ static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn, + return __mmu_gfn_lpage_is_disallowed(gfn, level, slot); + } + +-static int host_mapping_level(struct kvm *kvm, gfn_t gfn) ++static int host_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn) + { + unsigned long page_size; + int i, ret = 0; + +- page_size = kvm_host_page_size(kvm, gfn); ++ page_size = kvm_host_page_size(vcpu, gfn); + + for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { + if (page_size >= KVM_HPAGE_SIZE(i)) +@@ -1337,7 +1341,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, + if (unlikely(*force_pt_level)) + return PT_PAGE_TABLE_LEVEL; + +- host_level = host_mapping_level(vcpu->kvm, large_gfn); ++ host_level = host_mapping_level(vcpu, large_gfn); + + if (host_level == PT_PAGE_TABLE_LEVEL) + return host_level; +@@ -3528,7 +3532,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte) + * - true: let the vcpu to access on the same address again. + * - false: let the real page fault path to fix it. + */ +-static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, ++static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int level, + u32 error_code) + { + struct kvm_shadow_walk_iterator iterator; +@@ -3548,7 +3552,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, + do { + u64 new_spte; + +- for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) ++ for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte) + if (!is_shadow_present_pte(spte) || + iterator.level < level) + break; +@@ -3626,7 +3630,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, + + } while (true); + +- trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, ++ trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep, + spte, fault_handled); + walk_shadow_page_lockless_end(vcpu); + +@@ -3634,10 +3638,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, + } + + static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, +- gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); ++ gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, ++ bool *writable); + static int make_mmu_pages_available(struct kvm_vcpu *vcpu); + +-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, ++static int nonpaging_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, + gfn_t gfn, bool prefault) + { + int r; +@@ -3663,16 +3668,16 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, + gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); + } + +- if (fast_page_fault(vcpu, v, level, error_code)) ++ if (fast_page_fault(vcpu, gpa, level, error_code)) + return RET_PF_RETRY; + + mmu_seq = vcpu->kvm->mmu_notifier_seq; + smp_rmb(); + +- if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) ++ if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) + return RET_PF_RETRY; + +- if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) ++ if (handle_abnormal_pfn(vcpu, gpa, gfn, pfn, ACC_ALL, &r)) + return r; + + r = RET_PF_RETRY; +@@ -3683,7 +3688,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, + goto out_unlock; + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); +- r = __direct_map(vcpu, v, write, map_writable, level, pfn, ++ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, + prefault, false); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); +@@ -3981,7 +3986,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) + } + EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); + +-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, ++static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr, + u32 access, struct x86_exception *exception) + { + if (exception) +@@ -3989,7 +3994,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, + return vaddr; + } + +-static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, ++static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr, + u32 access, + struct x86_exception *exception) + { +@@ -4149,13 +4154,14 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) + walk_shadow_page_lockless_end(vcpu); + } + +-static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, ++static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, + u32 error_code, bool prefault) + { +- gfn_t gfn = gva >> PAGE_SHIFT; ++ gfn_t gfn = gpa >> PAGE_SHIFT; + int r; + +- pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); ++ /* Note, paging is disabled, ergo gva == gpa. */ ++ pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code); + + if (page_fault_handle_page_track(vcpu, error_code, gfn)) + return RET_PF_EMULATE; +@@ -4167,11 +4173,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, + MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); + + +- return nonpaging_map(vcpu, gva & PAGE_MASK, ++ return nonpaging_map(vcpu, gpa & PAGE_MASK, + error_code, gfn, prefault); + } + +-static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) ++static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ++ gfn_t gfn) + { + struct kvm_arch_async_pf arch; + +@@ -4180,11 +4187,13 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) + arch.direct_map = vcpu->arch.mmu->direct_map; + arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu); + +- return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); ++ return kvm_setup_async_pf(vcpu, cr2_or_gpa, ++ kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); + } + + static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, +- gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) ++ gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, ++ bool *writable) + { + struct kvm_memory_slot *slot; + bool async; +@@ -4204,12 +4213,12 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, + return false; /* *pfn has correct page already */ + + if (!prefault && kvm_can_do_async_pf(vcpu)) { +- trace_kvm_try_async_get_page(gva, gfn); ++ trace_kvm_try_async_get_page(cr2_or_gpa, gfn); + if (kvm_find_async_pf_gfn(vcpu, gfn)) { +- trace_kvm_async_pf_doublefault(gva, gfn); ++ trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn); + kvm_make_request(KVM_REQ_APF_HALT, vcpu); + return true; +- } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn)) ++ } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn)) + return true; + } + +@@ -4222,6 +4231,12 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, + { + int r = 1; + ++#ifndef CONFIG_X86_64 ++ /* A 64-bit CR2 should be impossible on 32-bit KVM. */ ++ if (WARN_ON_ONCE(fault_address >> 32)) ++ return -EFAULT; ++#endif ++ + vcpu->arch.l1tf_flush_l1d = true; + switch (vcpu->arch.apf.host_apf_reason) { + default: +@@ -4259,7 +4274,7 @@ check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) + return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num); + } + +-static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, ++static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, + bool prefault) + { + kvm_pfn_t pfn; +@@ -5516,7 +5531,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu) + return 0; + } + +-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, ++int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, + void *insn, int insn_len) + { + int r, emulation_type = 0; +@@ -5525,18 +5540,18 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, + /* With shadow page tables, fault_address contains a GVA or nGPA. */ + if (vcpu->arch.mmu->direct_map) { + vcpu->arch.gpa_available = true; +- vcpu->arch.gpa_val = cr2; ++ vcpu->arch.gpa_val = cr2_or_gpa; + } + + r = RET_PF_INVALID; + if (unlikely(error_code & PFERR_RSVD_MASK)) { +- r = handle_mmio_page_fault(vcpu, cr2, direct); ++ r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); + if (r == RET_PF_EMULATE) + goto emulate; + } + + if (r == RET_PF_INVALID) { +- r = vcpu->arch.mmu->page_fault(vcpu, cr2, ++ r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, + lower_32_bits(error_code), + false); + WARN_ON(r == RET_PF_INVALID); +@@ -5556,7 +5571,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, + */ + if (vcpu->arch.mmu->direct_map && + (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { +- kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); ++ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); + return 1; + } + +@@ -5571,7 +5586,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, + * explicitly shadowing L1's page tables, i.e. unprotecting something + * for L1 isn't going to magically fix whatever issue cause L2 to fail. + */ +- if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) ++ if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) + emulation_type = EMULTYPE_ALLOW_RETRY; + emulate: + /* +@@ -5586,7 +5601,7 @@ emulate: + return 1; + } + +- return x86_emulate_instruction(vcpu, cr2, emulation_type, insn, ++ return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, + insn_len); + } + EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); +@@ -6249,7 +6264,7 @@ static void kvm_set_mmio_spte_mask(void) + * If reserved bit is not supported, clear the present bit to disable + * mmio page fault. + */ +- if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52) ++ if (shadow_phys_bits == 52) + mask &= ~1ull; + + kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); +diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h +index 7ca8831c7d1a..3c6522b84ff1 100644 +--- a/arch/x86/kvm/mmutrace.h ++++ b/arch/x86/kvm/mmutrace.h +@@ -249,13 +249,13 @@ TRACE_EVENT( + + TRACE_EVENT( + fast_page_fault, +- TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, ++ TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code, + u64 *sptep, u64 old_spte, bool retry), +- TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry), ++ TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry), + + TP_STRUCT__entry( + __field(int, vcpu_id) +- __field(gva_t, gva) ++ __field(gpa_t, cr2_or_gpa) + __field(u32, error_code) + __field(u64 *, sptep) + __field(u64, old_spte) +@@ -265,7 +265,7 @@ TRACE_EVENT( + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; +- __entry->gva = gva; ++ __entry->cr2_or_gpa = cr2_or_gpa; + __entry->error_code = error_code; + __entry->sptep = sptep; + __entry->old_spte = old_spte; +@@ -273,9 +273,9 @@ TRACE_EVENT( + __entry->retry = retry; + ), + +- TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx" ++ TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx" + " new %llx spurious %d fixed %d", __entry->vcpu_id, +- __entry->gva, __print_flags(__entry->error_code, "|", ++ __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|", + kvm_mmu_trace_pferr_flags), __entry->sptep, + __entry->old_spte, __entry->new_spte, + __spte_satisfied(old_spte), __spte_satisfied(new_spte) +diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c +index 25ce3edd1872..7f0059aa30e1 100644 +--- a/arch/x86/kvm/mtrr.c ++++ b/arch/x86/kvm/mtrr.c +@@ -192,11 +192,15 @@ static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) + break; + case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: + *seg = 1; +- *unit = msr - MSR_MTRRfix16K_80000; ++ *unit = array_index_nospec( ++ msr - MSR_MTRRfix16K_80000, ++ MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1); + break; + case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: + *seg = 2; +- *unit = msr - MSR_MTRRfix4K_C0000; ++ *unit = array_index_nospec( ++ msr - MSR_MTRRfix4K_C0000, ++ MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1); + break; + default: + return false; +diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h +index 97b21e7fd013..c1d7b866a03f 100644 +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -291,11 +291,11 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte) + } + + /* +- * Fetch a guest pte for a guest virtual address ++ * Fetch a guest pte for a guest virtual address, or for an L2's GPA. + */ + static int FNAME(walk_addr_generic)(struct guest_walker *walker, + struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, +- gva_t addr, u32 access) ++ gpa_t addr, u32 access) + { + int ret; + pt_element_t pte; +@@ -496,7 +496,7 @@ error: + } + + static int FNAME(walk_addr)(struct guest_walker *walker, +- struct kvm_vcpu *vcpu, gva_t addr, u32 access) ++ struct kvm_vcpu *vcpu, gpa_t addr, u32 access) + { + return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr, + access); +@@ -611,7 +611,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, + * If the guest tries to write a write-protected page, we need to + * emulate this operation, return 1 to indicate this case. + */ +-static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ++static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, + struct guest_walker *gw, + int write_fault, int hlevel, + kvm_pfn_t pfn, bool map_writable, bool prefault, +@@ -765,7 +765,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, + * Returns: 1 if we need to emulate the instruction, 0 otherwise, or + * a negative value on error. + */ +-static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, ++static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, + bool prefault) + { + int write_fault = error_code & PFERR_WRITE_MASK; +@@ -945,18 +945,19 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa) + spin_unlock(&vcpu->kvm->mmu_lock); + } + +-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, ++/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */ ++static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access, + struct x86_exception *exception) + { + struct guest_walker walker; + gpa_t gpa = UNMAPPED_GVA; + int r; + +- r = FNAME(walk_addr)(&walker, vcpu, vaddr, access); ++ r = FNAME(walk_addr)(&walker, vcpu, addr, access); + + if (r) { + gpa = gfn_to_gpa(walker.gfn); +- gpa |= vaddr & ~PAGE_MASK; ++ gpa |= addr & ~PAGE_MASK; + } else if (exception) + *exception = walker.fault; + +@@ -964,7 +965,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, + } + + #if PTTYPE != PTTYPE_EPT +-static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, ++/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */ ++static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr, + u32 access, + struct x86_exception *exception) + { +@@ -972,6 +974,11 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, + gpa_t gpa = UNMAPPED_GVA; + int r; + ++#ifndef CONFIG_X86_64 ++ /* A 64-bit GVA should be impossible on 32-bit KVM. */ ++ WARN_ON_ONCE(vaddr >> 32); ++#endif ++ + r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access); + + if (r) { +diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h +index 58265f761c3b..3fc98afd72a8 100644 +--- a/arch/x86/kvm/pmu.h ++++ b/arch/x86/kvm/pmu.h +@@ -2,6 +2,8 @@ + #ifndef __KVM_X86_PMU_H + #define __KVM_X86_PMU_H + ++#include <linux/nospec.h> ++ + #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu) + #define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu)) + #define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu) +@@ -86,8 +88,12 @@ static inline bool pmc_is_enabled(struct kvm_pmc *pmc) + static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, + u32 base) + { +- if (msr >= base && msr < base + pmu->nr_arch_gp_counters) +- return &pmu->gp_counters[msr - base]; ++ if (msr >= base && msr < base + pmu->nr_arch_gp_counters) { ++ u32 index = array_index_nospec(msr - base, ++ pmu->nr_arch_gp_counters); ++ ++ return &pmu->gp_counters[index]; ++ } + + return NULL; + } +@@ -97,8 +103,12 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) + { + int base = MSR_CORE_PERF_FIXED_CTR0; + +- if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) +- return &pmu->fixed_counters[msr - base]; ++ if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) { ++ u32 index = array_index_nospec(msr - base, ++ pmu->nr_arch_fixed_counters); ++ ++ return &pmu->fixed_counters[index]; ++ } + + return NULL; + } +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index c5673bda4b66..8d1be7c61f10 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -5986,6 +5986,11 @@ static bool svm_has_wbinvd_exit(void) + return true; + } + ++static bool svm_pku_supported(void) ++{ ++ return false; ++} ++ + #define PRE_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_PRE_EXCEPT, } + #define POST_EX(exit) { .exit_code = (exit), \ +@@ -7278,6 +7283,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { + .xsaves_supported = svm_xsaves_supported, + .umip_emulated = svm_umip_emulated, + .pt_supported = svm_pt_supported, ++ .pku_supported = svm_pku_supported, + + .set_supported_cpuid = svm_set_supported_cpuid, + +diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h +index 7aa69716d516..283bdb7071af 100644 +--- a/arch/x86/kvm/vmx/capabilities.h ++++ b/arch/x86/kvm/vmx/capabilities.h +@@ -145,6 +145,11 @@ static inline bool vmx_umip_emulated(void) + SECONDARY_EXEC_DESC; + } + ++static inline bool vmx_pku_supported(void) ++{ ++ return boot_cpu_has(X86_FEATURE_PKU); ++} ++ + static inline bool cpu_has_vmx_rdtscp(void) + { + return vmcs_config.cpu_based_2nd_exec_ctrl & +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index d0523741fb03..931d3b5f3acd 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -4663,8 +4663,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu) + vmx_instruction_info, true, len, &gva)) + return 1; + /* _system ok, nested_vmx_check_permission has verified cpl=0 */ +- if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) ++ if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) { + kvm_inject_page_fault(vcpu, &e); ++ return 1; ++ } + } + + return nested_vmx_succeed(vcpu); +diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c +index 3e9c059099e9..f8998a7bc7d5 100644 +--- a/arch/x86/kvm/vmx/pmu_intel.c ++++ b/arch/x86/kvm/vmx/pmu_intel.c +@@ -84,10 +84,14 @@ static unsigned intel_find_arch_event(struct kvm_pmu *pmu, + + static unsigned intel_find_fixed_event(int idx) + { +- if (idx >= ARRAY_SIZE(fixed_pmc_events)) ++ u32 event; ++ size_t size = ARRAY_SIZE(fixed_pmc_events); ++ ++ if (idx >= size) + return PERF_COUNT_HW_MAX; + +- return intel_arch_events[fixed_pmc_events[idx]].event_type; ++ event = fixed_pmc_events[array_index_nospec(idx, size)]; ++ return intel_arch_events[event].event_type; + } + + /* check if a PMC is enabled by comparing it with globl_ctrl bits. */ +@@ -128,16 +132,20 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + bool fixed = idx & (1u << 30); + struct kvm_pmc *counters; ++ unsigned int num_counters; + + idx &= ~(3u << 30); +- if (!fixed && idx >= pmu->nr_arch_gp_counters) +- return NULL; +- if (fixed && idx >= pmu->nr_arch_fixed_counters) ++ if (fixed) { ++ counters = pmu->fixed_counters; ++ num_counters = pmu->nr_arch_fixed_counters; ++ } else { ++ counters = pmu->gp_counters; ++ num_counters = pmu->nr_arch_gp_counters; ++ } ++ if (idx >= num_counters) + return NULL; +- counters = fixed ? pmu->fixed_counters : pmu->gp_counters; + *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; +- +- return &counters[idx]; ++ return &counters[array_index_nospec(idx, num_counters)]; + } + + static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index f09a213fd5cb..dc7c166c4335 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -2140,6 +2140,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_num_address_ranges))) + return 1; ++ if (is_noncanonical_address(data, vcpu)) ++ return 1; + if (index % 2) + vmx->pt_desc.guest.addr_b[index / 2] = data; + else +@@ -7865,6 +7867,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { + .xsaves_supported = vmx_xsaves_supported, + .umip_emulated = vmx_umip_emulated, + .pt_supported = vmx_pt_supported, ++ .pku_supported = vmx_pku_supported, + + .request_immediate_exit = vmx_request_immediate_exit, + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 8d82ec0482fc..edde5ee8c6f5 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -92,6 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); + static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); + #endif + ++static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; ++ + #define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ + #define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ + +@@ -886,9 +888,38 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) + } + EXPORT_SYMBOL_GPL(kvm_set_xcr); + ++static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c) ++{ ++ u64 reserved_bits = CR4_RESERVED_BITS; ++ ++ if (!cpu_has(c, X86_FEATURE_XSAVE)) ++ reserved_bits |= X86_CR4_OSXSAVE; ++ ++ if (!cpu_has(c, X86_FEATURE_SMEP)) ++ reserved_bits |= X86_CR4_SMEP; ++ ++ if (!cpu_has(c, X86_FEATURE_SMAP)) ++ reserved_bits |= X86_CR4_SMAP; ++ ++ if (!cpu_has(c, X86_FEATURE_FSGSBASE)) ++ reserved_bits |= X86_CR4_FSGSBASE; ++ ++ if (!cpu_has(c, X86_FEATURE_PKU)) ++ reserved_bits |= X86_CR4_PKE; ++ ++ if (!cpu_has(c, X86_FEATURE_LA57) && ++ !(cpuid_ecx(0x7) & bit(X86_FEATURE_LA57))) ++ reserved_bits |= X86_CR4_LA57; ++ ++ if (!cpu_has(c, X86_FEATURE_UMIP) && !kvm_x86_ops->umip_emulated()) ++ reserved_bits |= X86_CR4_UMIP; ++ ++ return reserved_bits; ++} ++ + static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) + { +- if (cr4 & CR4_RESERVED_BITS) ++ if (cr4 & cr4_reserved_bits) + return -EINVAL; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) +@@ -1054,9 +1085,11 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) + + static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) + { ++ size_t size = ARRAY_SIZE(vcpu->arch.db); ++ + switch (dr) { + case 0 ... 3: +- vcpu->arch.db[dr] = val; ++ vcpu->arch.db[array_index_nospec(dr, size)] = val; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) + vcpu->arch.eff_db[dr] = val; + break; +@@ -1093,9 +1126,11 @@ EXPORT_SYMBOL_GPL(kvm_set_dr); + + int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) + { ++ size_t size = ARRAY_SIZE(vcpu->arch.db); ++ + switch (dr) { + case 0 ... 3: +- *val = vcpu->arch.db[dr]; ++ *val = vcpu->arch.db[array_index_nospec(dr, size)]; + break; + case 4: + /* fall through */ +@@ -2490,7 +2525,10 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + default: + if (msr >= MSR_IA32_MC0_CTL && + msr < MSR_IA32_MCx_CTL(bank_num)) { +- u32 offset = msr - MSR_IA32_MC0_CTL; ++ u32 offset = array_index_nospec( ++ msr - MSR_IA32_MC0_CTL, ++ MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); ++ + /* only 0 or all 1s can be written to IA32_MCi_CTL + * some Linux kernels though clear bit 10 in bank 4 to + * workaround a BIOS/GART TBL issue on AMD K8s, ignore +@@ -2586,45 +2624,47 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) + + static void record_steal_time(struct kvm_vcpu *vcpu) + { ++ struct kvm_host_map map; ++ struct kvm_steal_time *st; ++ + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) + return; + +- if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, +- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) ++ /* -EAGAIN is returned in atomic context so we can just return. */ ++ if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, ++ &map, &vcpu->arch.st.cache, false)) + return; + ++ st = map.hva + ++ offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); ++ + /* + * Doing a TLB flush here, on the guest's behalf, can avoid + * expensive IPIs. + */ + trace_kvm_pv_tlb_flush(vcpu->vcpu_id, +- vcpu->arch.st.steal.preempted & KVM_VCPU_FLUSH_TLB); +- if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB) ++ st->preempted & KVM_VCPU_FLUSH_TLB); ++ if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) + kvm_vcpu_flush_tlb(vcpu, false); + +- if (vcpu->arch.st.steal.version & 1) +- vcpu->arch.st.steal.version += 1; /* first time write, random junk */ ++ vcpu->arch.st.preempted = 0; + +- vcpu->arch.st.steal.version += 1; ++ if (st->version & 1) ++ st->version += 1; /* first time write, random junk */ + +- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, +- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); ++ st->version += 1; + + smp_wmb(); + +- vcpu->arch.st.steal.steal += current->sched_info.run_delay - ++ st->steal += current->sched_info.run_delay - + vcpu->arch.st.last_steal; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + +- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, +- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); +- + smp_wmb(); + +- vcpu->arch.st.steal.version += 1; ++ st->version += 1; + +- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, +- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); ++ kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); + } + + int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +@@ -2777,11 +2817,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + if (data & KVM_STEAL_RESERVED_MASK) + return 1; + +- if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, +- data & KVM_STEAL_VALID_BITS, +- sizeof(struct kvm_steal_time))) +- return 1; +- + vcpu->arch.st.msr_val = data; + + if (!(data & KVM_MSR_ENABLED)) +@@ -2917,7 +2952,10 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) + default: + if (msr >= MSR_IA32_MC0_CTL && + msr < MSR_IA32_MCx_CTL(bank_num)) { +- u32 offset = msr - MSR_IA32_MC0_CTL; ++ u32 offset = array_index_nospec( ++ msr - MSR_IA32_MC0_CTL, ++ MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); ++ + data = vcpu->arch.mce_banks[offset]; + break; + } +@@ -3443,10 +3481,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) + + kvm_x86_ops->vcpu_load(vcpu, cpu); + +- fpregs_assert_state_consistent(); +- if (test_thread_flag(TIF_NEED_FPU_LOAD)) +- switch_fpu_return(); +- + /* Apply any externally detected TSC adjustments (due to suspend) */ + if (unlikely(vcpu->arch.tsc_offset_adjustment)) { + adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); +@@ -3486,15 +3520,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) + + static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) + { ++ struct kvm_host_map map; ++ struct kvm_steal_time *st; ++ + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) + return; + +- vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED; ++ if (vcpu->arch.st.preempted) ++ return; ++ ++ if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, ++ &vcpu->arch.st.cache, true)) ++ return; + +- kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, +- &vcpu->arch.st.steal.preempted, +- offsetof(struct kvm_steal_time, preempted), +- sizeof(vcpu->arch.st.steal.preempted)); ++ st = map.hva + ++ offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); ++ ++ st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; ++ ++ kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); + } + + void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +@@ -6365,11 +6409,11 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) + return 1; + } + +-static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, ++static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + bool write_fault_to_shadow_pgtable, + int emulation_type) + { +- gpa_t gpa = cr2; ++ gpa_t gpa = cr2_or_gpa; + kvm_pfn_t pfn; + + if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) +@@ -6383,7 +6427,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, + * Write permission should be allowed since only + * write access need to be emulated. + */ +- gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); ++ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); + + /* + * If the mapping is invalid in guest, let cpu retry +@@ -6440,10 +6484,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, + } + + static bool retry_instruction(struct x86_emulate_ctxt *ctxt, +- unsigned long cr2, int emulation_type) ++ gpa_t cr2_or_gpa, int emulation_type) + { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); +- unsigned long last_retry_eip, last_retry_addr, gpa = cr2; ++ unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa; + + last_retry_eip = vcpu->arch.last_retry_eip; + last_retry_addr = vcpu->arch.last_retry_addr; +@@ -6472,14 +6516,14 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, + if (x86_page_table_writing_insn(ctxt)) + return false; + +- if (ctxt->eip == last_retry_eip && last_retry_addr == cr2) ++ if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa) + return false; + + vcpu->arch.last_retry_eip = ctxt->eip; +- vcpu->arch.last_retry_addr = cr2; ++ vcpu->arch.last_retry_addr = cr2_or_gpa; + + if (!vcpu->arch.mmu->direct_map) +- gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); ++ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); + + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + +@@ -6625,11 +6669,8 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) + return false; + } + +-int x86_emulate_instruction(struct kvm_vcpu *vcpu, +- unsigned long cr2, +- int emulation_type, +- void *insn, +- int insn_len) ++int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ++ int emulation_type, void *insn, int insn_len) + { + int r; + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; +@@ -6675,8 +6716,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } +- if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, +- emulation_type)) ++ if (reexecute_instruction(vcpu, cr2_or_gpa, ++ write_fault_to_spt, ++ emulation_type)) + return 1; + if (ctxt->have_exception) { + /* +@@ -6710,7 +6752,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, + return 1; + } + +- if (retry_instruction(ctxt, cr2, emulation_type)) ++ if (retry_instruction(ctxt, cr2_or_gpa, emulation_type)) + return 1; + + /* this is needed for vmware backdoor interface to work since it +@@ -6722,7 +6764,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, + + restart: + /* Save the faulting GPA (cr2) in the address field */ +- ctxt->exception.address = cr2; ++ ctxt->exception.address = cr2_or_gpa; + + r = x86_emulate_insn(ctxt); + +@@ -6730,7 +6772,7 @@ restart: + return 1; + + if (r == EMULATION_FAILED) { +- if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, ++ if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt, + emulation_type)) + return 1; + +@@ -8172,8 +8214,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) + trace_kvm_entry(vcpu->vcpu_id); + guest_enter_irqoff(); + +- /* The preempt notifier should have taken care of the FPU already. */ +- WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD)); ++ fpregs_assert_state_consistent(); ++ if (test_thread_flag(TIF_NEED_FPU_LOAD)) ++ switch_fpu_return(); + + if (unlikely(vcpu->arch.switch_db_regs)) { + set_debugreg(0, 7); +@@ -8445,12 +8488,26 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) + return 0; + } + ++static void kvm_save_current_fpu(struct fpu *fpu) ++{ ++ /* ++ * If the target FPU state is not resident in the CPU registers, just ++ * memcpy() from current, else save CPU state directly to the target. ++ */ ++ if (test_thread_flag(TIF_NEED_FPU_LOAD)) ++ memcpy(&fpu->state, ¤t->thread.fpu.state, ++ fpu_kernel_xstate_size); ++ else ++ copy_fpregs_to_fpstate(fpu); ++} ++ + /* Swap (qemu) user FPU context for the guest FPU context. */ + static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) + { + fpregs_lock(); + +- copy_fpregs_to_fpstate(vcpu->arch.user_fpu); ++ kvm_save_current_fpu(vcpu->arch.user_fpu); ++ + /* PKRU is separately restored in kvm_x86_ops->run. */ + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, + ~XFEATURE_MASK_PKRU); +@@ -8466,7 +8523,8 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) + { + fpregs_lock(); + +- copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); ++ kvm_save_current_fpu(vcpu->arch.guest_fpu); ++ + copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state); + + fpregs_mark_activate(); +@@ -8688,6 +8746,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) + { + vcpu_load(vcpu); ++ if (kvm_mpx_supported()) ++ kvm_load_guest_fpu(vcpu); + + kvm_apic_accept_events(vcpu); + if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && +@@ -8696,6 +8756,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + else + mp_state->mp_state = vcpu->arch.mp_state; + ++ if (kvm_mpx_supported()) ++ kvm_put_guest_fpu(vcpu); + vcpu_put(vcpu); + return 0; + } +@@ -9055,6 +9117,9 @@ static void fx_init(struct kvm_vcpu *vcpu) + void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) + { + void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; ++ struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache; ++ ++ kvm_release_pfn(cache->pfn, cache->dirty, cache); + + kvmclock_reset(vcpu); + +@@ -9125,7 +9190,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) + kvm_mmu_unload(vcpu); + vcpu_put(vcpu); + +- kvm_x86_ops->vcpu_free(vcpu); ++ kvm_arch_vcpu_free(vcpu); + } + + void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) +@@ -9317,6 +9382,8 @@ int kvm_arch_hardware_setup(void) + if (r != 0) + return r; + ++ cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data); ++ + if (kvm_has_tsc_control) { + /* + * Make sure the user can only configure tsc_khz values that +@@ -9719,11 +9786,18 @@ out_free: + + void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) + { ++ struct kvm_vcpu *vcpu; ++ int i; ++ + /* + * memslots->generation has been incremented. + * mmio generation may have reached its maximum value. + */ + kvm_mmu_invalidate_mmio_sptes(kvm, gen); ++ ++ /* Force re-initialization of steal_time cache */ ++ kvm_for_each_vcpu(i, vcpu, kvm) ++ kvm_vcpu_kick(vcpu); + } + + int kvm_arch_prepare_memory_region(struct kvm *kvm, +@@ -9975,7 +10049,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) + work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu)) + return; + +- vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true); ++ vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true); + } + + static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) +@@ -10088,7 +10162,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + { + struct x86_exception fault; + +- trace_kvm_async_pf_not_present(work->arch.token, work->gva); ++ trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa); + kvm_add_async_pf_gfn(vcpu, work->arch.gfn); + + if (kvm_can_deliver_async_pf(vcpu) && +@@ -10123,7 +10197,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + work->arch.token = ~0; /* broadcast wakeup */ + else + kvm_del_async_pf_gfn(vcpu, work->arch.gfn); +- trace_kvm_async_pf_ready(work->arch.token, work->gva); ++ trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa); + + if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && + !apf_get_user(vcpu, &val)) { +diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h +index dbf7442a822b..de6b55484876 100644 +--- a/arch/x86/kvm/x86.h ++++ b/arch/x86/kvm/x86.h +@@ -286,7 +286,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); + bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, + int page_num); + bool kvm_vector_hashing_enabled(void); +-int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, ++int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + int emulation_type, void *insn, int insn_len); + + #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ +diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c +index 5bfea374a160..6ea215cdeada 100644 +--- a/arch/x86/xen/enlighten_pv.c ++++ b/arch/x86/xen/enlighten_pv.c +@@ -1215,6 +1215,7 @@ asmlinkage __visible void __init xen_start_kernel(void) + x86_platform.get_nmi_reason = xen_get_nmi_reason; + + x86_init.resources.memory_setup = xen_memory_setup; ++ x86_init.irqs.intr_mode_select = x86_init_noop; + x86_init.irqs.intr_mode_init = x86_init_noop; + x86_init.oem.arch_setup = xen_arch_setup; + x86_init.oem.banner = xen_banner; +diff --git a/crypto/algapi.c b/crypto/algapi.c +index de30ddc952d8..bb8329e49956 100644 +--- a/crypto/algapi.c ++++ b/crypto/algapi.c +@@ -257,6 +257,7 @@ void crypto_alg_tested(const char *name, int err) + struct crypto_alg *alg; + struct crypto_alg *q; + LIST_HEAD(list); ++ bool best; + + down_write(&crypto_alg_sem); + list_for_each_entry(q, &crypto_alg_list, cra_list) { +@@ -280,6 +281,21 @@ found: + + alg->cra_flags |= CRYPTO_ALG_TESTED; + ++ /* Only satisfy larval waiters if we are the best. */ ++ best = true; ++ list_for_each_entry(q, &crypto_alg_list, cra_list) { ++ if (crypto_is_moribund(q) || !crypto_is_larval(q)) ++ continue; ++ ++ if (strcmp(alg->cra_name, q->cra_name)) ++ continue; ++ ++ if (q->cra_priority > alg->cra_priority) { ++ best = false; ++ break; ++ } ++ } ++ + list_for_each_entry(q, &crypto_alg_list, cra_list) { + if (q == alg) + continue; +@@ -303,10 +319,12 @@ found: + continue; + if ((q->cra_flags ^ alg->cra_flags) & larval->mask) + continue; +- if (!crypto_mod_get(alg)) +- continue; + +- larval->adult = alg; ++ if (best && crypto_mod_get(alg)) ++ larval->adult = alg; ++ else ++ larval->adult = ERR_PTR(-EAGAIN); ++ + continue; + } + +@@ -669,11 +687,9 @@ EXPORT_SYMBOL_GPL(crypto_grab_spawn); + + void crypto_drop_spawn(struct crypto_spawn *spawn) + { +- if (!spawn->alg) +- return; +- + down_write(&crypto_alg_sem); +- list_del(&spawn->list); ++ if (spawn->alg) ++ list_del(&spawn->list); + up_write(&crypto_alg_sem); + } + EXPORT_SYMBOL_GPL(crypto_drop_spawn); +@@ -681,22 +697,16 @@ EXPORT_SYMBOL_GPL(crypto_drop_spawn); + static struct crypto_alg *crypto_spawn_alg(struct crypto_spawn *spawn) + { + struct crypto_alg *alg; +- struct crypto_alg *alg2; + + down_read(&crypto_alg_sem); + alg = spawn->alg; +- alg2 = alg; +- if (alg2) +- alg2 = crypto_mod_get(alg2); +- up_read(&crypto_alg_sem); +- +- if (!alg2) { +- if (alg) +- crypto_shoot_alg(alg); +- return ERR_PTR(-EAGAIN); ++ if (alg && !crypto_mod_get(alg)) { ++ alg->cra_flags |= CRYPTO_ALG_DYING; ++ alg = NULL; + } ++ up_read(&crypto_alg_sem); + +- return alg; ++ return alg ?: ERR_PTR(-EAGAIN); + } + + struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type, +diff --git a/crypto/api.c b/crypto/api.c +index d8ba54142620..eda0c56b8615 100644 +--- a/crypto/api.c ++++ b/crypto/api.c +@@ -97,7 +97,7 @@ static void crypto_larval_destroy(struct crypto_alg *alg) + struct crypto_larval *larval = (void *)alg; + + BUG_ON(!crypto_is_larval(alg)); +- if (larval->adult) ++ if (!IS_ERR_OR_NULL(larval->adult)) + crypto_mod_put(larval->adult); + kfree(larval); + } +@@ -178,6 +178,8 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) + alg = ERR_PTR(-ETIMEDOUT); + else if (!alg) + alg = ERR_PTR(-ENOENT); ++ else if (IS_ERR(alg)) ++ ; + else if (crypto_is_test_larval(larval) && + !(alg->cra_flags & CRYPTO_ALG_TESTED)) + alg = ERR_PTR(-EAGAIN); +@@ -344,13 +346,12 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask) + return len; + } + +-void crypto_shoot_alg(struct crypto_alg *alg) ++static void crypto_shoot_alg(struct crypto_alg *alg) + { + down_write(&crypto_alg_sem); + alg->cra_flags |= CRYPTO_ALG_DYING; + up_write(&crypto_alg_sem); + } +-EXPORT_SYMBOL_GPL(crypto_shoot_alg); + + struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, + u32 mask) +diff --git a/crypto/internal.h b/crypto/internal.h +index 93df7bec844a..e506a57e2243 100644 +--- a/crypto/internal.h ++++ b/crypto/internal.h +@@ -68,7 +68,6 @@ void crypto_alg_tested(const char *name, int err); + void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, + struct crypto_alg *nalg); + void crypto_remove_final(struct list_head *list); +-void crypto_shoot_alg(struct crypto_alg *alg); + struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, + u32 mask); + void *crypto_create_tfm(struct crypto_alg *alg, +diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c +index 81bbea7f2ba6..a4f3b3f342c8 100644 +--- a/crypto/pcrypt.c ++++ b/crypto/pcrypt.c +@@ -24,6 +24,8 @@ static struct kset *pcrypt_kset; + + struct pcrypt_instance_ctx { + struct crypto_aead_spawn spawn; ++ struct padata_shell *psenc; ++ struct padata_shell *psdec; + atomic_t tfm_count; + }; + +@@ -32,6 +34,12 @@ struct pcrypt_aead_ctx { + unsigned int cb_cpu; + }; + ++static inline struct pcrypt_instance_ctx *pcrypt_tfm_ictx( ++ struct crypto_aead *tfm) ++{ ++ return aead_instance_ctx(aead_alg_instance(tfm)); ++} ++ + static int pcrypt_aead_setkey(struct crypto_aead *parent, + const u8 *key, unsigned int keylen) + { +@@ -63,7 +71,6 @@ static void pcrypt_aead_done(struct crypto_async_request *areq, int err) + struct padata_priv *padata = pcrypt_request_padata(preq); + + padata->info = err; +- req->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + padata_do_serial(padata); + } +@@ -90,6 +97,9 @@ static int pcrypt_aead_encrypt(struct aead_request *req) + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead); + u32 flags = aead_request_flags(req); ++ struct pcrypt_instance_ctx *ictx; ++ ++ ictx = pcrypt_tfm_ictx(aead); + + memset(padata, 0, sizeof(struct padata_priv)); + +@@ -103,7 +113,7 @@ static int pcrypt_aead_encrypt(struct aead_request *req) + req->cryptlen, req->iv); + aead_request_set_ad(creq, req->assoclen); + +- err = padata_do_parallel(pencrypt, padata, &ctx->cb_cpu); ++ err = padata_do_parallel(ictx->psenc, padata, &ctx->cb_cpu); + if (!err) + return -EINPROGRESS; + +@@ -132,6 +142,9 @@ static int pcrypt_aead_decrypt(struct aead_request *req) + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead); + u32 flags = aead_request_flags(req); ++ struct pcrypt_instance_ctx *ictx; ++ ++ ictx = pcrypt_tfm_ictx(aead); + + memset(padata, 0, sizeof(struct padata_priv)); + +@@ -145,7 +158,7 @@ static int pcrypt_aead_decrypt(struct aead_request *req) + req->cryptlen, req->iv); + aead_request_set_ad(creq, req->assoclen); + +- err = padata_do_parallel(pdecrypt, padata, &ctx->cb_cpu); ++ err = padata_do_parallel(ictx->psdec, padata, &ctx->cb_cpu); + if (!err) + return -EINPROGRESS; + +@@ -192,6 +205,8 @@ static void pcrypt_free(struct aead_instance *inst) + struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst); + + crypto_drop_aead(&ctx->spawn); ++ padata_free_shell(ctx->psdec); ++ padata_free_shell(ctx->psenc); + kfree(inst); + } + +@@ -233,12 +248,22 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, + if (!inst) + return -ENOMEM; + ++ err = -ENOMEM; ++ + ctx = aead_instance_ctx(inst); ++ ctx->psenc = padata_alloc_shell(pencrypt); ++ if (!ctx->psenc) ++ goto out_free_inst; ++ ++ ctx->psdec = padata_alloc_shell(pdecrypt); ++ if (!ctx->psdec) ++ goto out_free_psenc; ++ + crypto_set_aead_spawn(&ctx->spawn, aead_crypto_instance(inst)); + + err = crypto_grab_aead(&ctx->spawn, name, 0, 0); + if (err) +- goto out_free_inst; ++ goto out_free_psdec; + + alg = crypto_spawn_aead_alg(&ctx->spawn); + err = pcrypt_init_instance(aead_crypto_instance(inst), &alg->base); +@@ -271,6 +296,10 @@ out: + + out_drop_aead: + crypto_drop_aead(&ctx->spawn); ++out_free_psdec: ++ padata_free_shell(ctx->psdec); ++out_free_psenc: ++ padata_free_shell(ctx->psenc); + out_free_inst: + kfree(inst); + goto out; +diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c +index 558fedf8a7a1..254a7d98b9d4 100644 +--- a/drivers/acpi/battery.c ++++ b/drivers/acpi/battery.c +@@ -38,6 +38,8 @@ + #define PREFIX "ACPI: " + + #define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF ++#define ACPI_BATTERY_CAPACITY_VALID(capacity) \ ++ ((capacity) != 0 && (capacity) != ACPI_BATTERY_VALUE_UNKNOWN) + + #define ACPI_BATTERY_DEVICE_NAME "Battery" + +@@ -192,7 +194,8 @@ static int acpi_battery_is_charged(struct acpi_battery *battery) + + static bool acpi_battery_is_degraded(struct acpi_battery *battery) + { +- return battery->full_charge_capacity && battery->design_capacity && ++ return ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity) && ++ ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity) && + battery->full_charge_capacity < battery->design_capacity; + } + +@@ -214,7 +217,7 @@ static int acpi_battery_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) + { +- int ret = 0; ++ int full_capacity = ACPI_BATTERY_VALUE_UNKNOWN, ret = 0; + struct acpi_battery *battery = to_acpi_battery(psy); + + if (acpi_battery_present(battery)) { +@@ -263,14 +266,14 @@ static int acpi_battery_get_property(struct power_supply *psy, + break; + case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN: + case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN: +- if (battery->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN) ++ if (!ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity)) + ret = -ENODEV; + else + val->intval = battery->design_capacity * 1000; + break; + case POWER_SUPPLY_PROP_CHARGE_FULL: + case POWER_SUPPLY_PROP_ENERGY_FULL: +- if (battery->full_charge_capacity == ACPI_BATTERY_VALUE_UNKNOWN) ++ if (!ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity)) + ret = -ENODEV; + else + val->intval = battery->full_charge_capacity * 1000; +@@ -283,11 +286,17 @@ static int acpi_battery_get_property(struct power_supply *psy, + val->intval = battery->capacity_now * 1000; + break; + case POWER_SUPPLY_PROP_CAPACITY: +- if (battery->capacity_now && battery->full_charge_capacity) +- val->intval = battery->capacity_now * 100/ +- battery->full_charge_capacity; ++ if (ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity)) ++ full_capacity = battery->full_charge_capacity; ++ else if (ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity)) ++ full_capacity = battery->design_capacity; ++ ++ if (battery->capacity_now == ACPI_BATTERY_VALUE_UNKNOWN || ++ full_capacity == ACPI_BATTERY_VALUE_UNKNOWN) ++ ret = -ENODEV; + else +- val->intval = 0; ++ val->intval = battery->capacity_now * 100/ ++ full_capacity; + break; + case POWER_SUPPLY_PROP_CAPACITY_LEVEL: + if (battery->state & ACPI_BATTERY_STATE_CRITICAL) +@@ -333,6 +342,20 @@ static enum power_supply_property charge_battery_props[] = { + POWER_SUPPLY_PROP_SERIAL_NUMBER, + }; + ++static enum power_supply_property charge_battery_full_cap_broken_props[] = { ++ POWER_SUPPLY_PROP_STATUS, ++ POWER_SUPPLY_PROP_PRESENT, ++ POWER_SUPPLY_PROP_TECHNOLOGY, ++ POWER_SUPPLY_PROP_CYCLE_COUNT, ++ POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, ++ POWER_SUPPLY_PROP_VOLTAGE_NOW, ++ POWER_SUPPLY_PROP_CURRENT_NOW, ++ POWER_SUPPLY_PROP_CHARGE_NOW, ++ POWER_SUPPLY_PROP_MODEL_NAME, ++ POWER_SUPPLY_PROP_MANUFACTURER, ++ POWER_SUPPLY_PROP_SERIAL_NUMBER, ++}; ++ + static enum power_supply_property energy_battery_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_PRESENT, +@@ -794,20 +817,34 @@ static void __exit battery_hook_exit(void) + static int sysfs_add_battery(struct acpi_battery *battery) + { + struct power_supply_config psy_cfg = { .drv_data = battery, }; ++ bool full_cap_broken = false; ++ ++ if (!ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity) && ++ !ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity)) ++ full_cap_broken = true; + + if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) { +- battery->bat_desc.properties = charge_battery_props; +- battery->bat_desc.num_properties = +- ARRAY_SIZE(charge_battery_props); +- } else if (battery->full_charge_capacity == 0) { +- battery->bat_desc.properties = +- energy_battery_full_cap_broken_props; +- battery->bat_desc.num_properties = +- ARRAY_SIZE(energy_battery_full_cap_broken_props); ++ if (full_cap_broken) { ++ battery->bat_desc.properties = ++ charge_battery_full_cap_broken_props; ++ battery->bat_desc.num_properties = ++ ARRAY_SIZE(charge_battery_full_cap_broken_props); ++ } else { ++ battery->bat_desc.properties = charge_battery_props; ++ battery->bat_desc.num_properties = ++ ARRAY_SIZE(charge_battery_props); ++ } + } else { +- battery->bat_desc.properties = energy_battery_props; +- battery->bat_desc.num_properties = +- ARRAY_SIZE(energy_battery_props); ++ if (full_cap_broken) { ++ battery->bat_desc.properties = ++ energy_battery_full_cap_broken_props; ++ battery->bat_desc.num_properties = ++ ARRAY_SIZE(energy_battery_full_cap_broken_props); ++ } else { ++ battery->bat_desc.properties = energy_battery_props; ++ battery->bat_desc.num_properties = ++ ARRAY_SIZE(energy_battery_props); ++ } + } + + battery->bat_desc.name = acpi_device_bid(battery->device); +diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c +index 31014c7d3793..e63fd7bfd3a5 100644 +--- a/drivers/acpi/video_detect.c ++++ b/drivers/acpi/video_detect.c +@@ -336,6 +336,11 @@ static const struct dmi_system_id video_detect_dmi_table[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"), + }, + }, ++ ++ /* ++ * Desktops which falsely report a backlight and which our heuristics ++ * for this do not catch. ++ */ + { + .callback = video_detect_force_none, + .ident = "Dell OptiPlex 9020M", +@@ -344,6 +349,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 9020M"), + }, + }, ++ { ++ .callback = video_detect_force_none, ++ .ident = "MSI MS-7721", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "MSI"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "MS-7721"), ++ }, ++ }, + { }, + }; + +diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c +index 134a8af51511..0e99a760aebd 100644 +--- a/drivers/base/power/main.c ++++ b/drivers/base/power/main.c +@@ -273,10 +273,38 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async) + device_links_read_unlock(idx); + } + +-static void dpm_wait_for_superior(struct device *dev, bool async) ++static bool dpm_wait_for_superior(struct device *dev, bool async) + { +- dpm_wait(dev->parent, async); ++ struct device *parent; ++ ++ /* ++ * If the device is resumed asynchronously and the parent's callback ++ * deletes both the device and the parent itself, the parent object may ++ * be freed while this function is running, so avoid that by reference ++ * counting the parent once more unless the device has been deleted ++ * already (in which case return right away). ++ */ ++ mutex_lock(&dpm_list_mtx); ++ ++ if (!device_pm_initialized(dev)) { ++ mutex_unlock(&dpm_list_mtx); ++ return false; ++ } ++ ++ parent = get_device(dev->parent); ++ ++ mutex_unlock(&dpm_list_mtx); ++ ++ dpm_wait(parent, async); ++ put_device(parent); ++ + dpm_wait_for_suppliers(dev, async); ++ ++ /* ++ * If the parent's callback has deleted the device, attempting to resume ++ * it would be invalid, so avoid doing that then. ++ */ ++ return device_pm_initialized(dev); + } + + static void dpm_wait_for_consumers(struct device *dev, bool async) +@@ -621,7 +649,8 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn + if (!dev->power.is_noirq_suspended) + goto Out; + +- dpm_wait_for_superior(dev, async); ++ if (!dpm_wait_for_superior(dev, async)) ++ goto Out; + + skip_resume = dev_pm_may_skip_resume(dev); + +@@ -829,7 +858,8 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn + if (!dev->power.is_late_suspended) + goto Out; + +- dpm_wait_for_superior(dev, async); ++ if (!dpm_wait_for_superior(dev, async)) ++ goto Out; + + callback = dpm_subsys_resume_early_cb(dev, state, &info); + +@@ -944,7 +974,9 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) + goto Complete; + } + +- dpm_wait_for_superior(dev, async); ++ if (!dpm_wait_for_superior(dev, async)) ++ goto Complete; ++ + dpm_watchdog_set(&wd, dev); + device_lock(dev); + +diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c +index 4e7ef35f1c8f..9c3b063e1a1f 100644 +--- a/drivers/bluetooth/btusb.c ++++ b/drivers/bluetooth/btusb.c +@@ -2850,7 +2850,7 @@ static int btusb_mtk_setup_firmware(struct hci_dev *hdev, const char *fwname) + err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params); + if (err < 0) { + bt_dev_err(hdev, "Failed to send wmt rst (%d)", err); +- return err; ++ goto err_release_fw; + } + + /* Wait a few moments for firmware activation done */ +@@ -3819,6 +3819,10 @@ static int btusb_probe(struct usb_interface *intf, + * (DEVICE_REMOTE_WAKEUP) + */ + set_bit(BTUSB_WAKEUP_DISABLE, &data->flags); ++ ++ err = usb_autopm_get_interface(intf); ++ if (err < 0) ++ goto out_free_dev; + } + + if (id->driver_info & BTUSB_AMP) { +diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c +index 1ed85f120a1b..49b9f2f85bad 100644 +--- a/drivers/clk/tegra/clk-tegra-periph.c ++++ b/drivers/clk/tegra/clk-tegra-periph.c +@@ -785,7 +785,11 @@ static struct tegra_periph_init_data gate_clks[] = { + GATE("ahbdma", "hclk", 33, 0, tegra_clk_ahbdma, 0), + GATE("apbdma", "pclk", 34, 0, tegra_clk_apbdma, 0), + GATE("kbc", "clk_32k", 36, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_kbc, 0), +- GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, 0), ++ /* ++ * Critical for RAM re-repair operation, which must occur on resume ++ * from LP1 system suspend and as part of CCPLEX cluster switching. ++ */ ++ GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, CLK_IS_CRITICAL), + GATE("fuse_burn", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse_burn, 0), + GATE("kfuse", "clk_m", 40, TEGRA_PERIPH_ON_APB, tegra_clk_kfuse, 0), + GATE("apbif", "clk_m", 107, TEGRA_PERIPH_ON_APB, tegra_clk_apbif, 0), +diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c +index 8d8da763adc5..8910fd1ae3c6 100644 +--- a/drivers/cpufreq/cppc_cpufreq.c ++++ b/drivers/cpufreq/cppc_cpufreq.c +@@ -217,7 +217,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, + return ret; + } + +-static int cppc_verify_policy(struct cpufreq_policy *policy) ++static int cppc_verify_policy(struct cpufreq_policy_data *policy) + { + cpufreq_verify_within_cpu_limits(policy); + return 0; +diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c +index cd53272e2fa2..f7a7bcf6f52e 100644 +--- a/drivers/cpufreq/cpufreq-nforce2.c ++++ b/drivers/cpufreq/cpufreq-nforce2.c +@@ -291,7 +291,7 @@ static int nforce2_target(struct cpufreq_policy *policy, + * nforce2_verify - verifies a new CPUFreq policy + * @policy: new policy + */ +-static int nforce2_verify(struct cpufreq_policy *policy) ++static int nforce2_verify(struct cpufreq_policy_data *policy) + { + unsigned int fsb_pol_max; + +diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c +index a7db4f22a077..7679f8a91745 100644 +--- a/drivers/cpufreq/cpufreq.c ++++ b/drivers/cpufreq/cpufreq.c +@@ -74,6 +74,9 @@ static void cpufreq_exit_governor(struct cpufreq_policy *policy); + static int cpufreq_start_governor(struct cpufreq_policy *policy); + static void cpufreq_stop_governor(struct cpufreq_policy *policy); + static void cpufreq_governor_limits(struct cpufreq_policy *policy); ++static int cpufreq_set_policy(struct cpufreq_policy *policy, ++ struct cpufreq_governor *new_gov, ++ unsigned int new_pol); + + /** + * Two notifier lists: the "policy" list is involved in the +@@ -613,25 +616,22 @@ static struct cpufreq_governor *find_governor(const char *str_governor) + return NULL; + } + +-static int cpufreq_parse_policy(char *str_governor, +- struct cpufreq_policy *policy) ++static unsigned int cpufreq_parse_policy(char *str_governor) + { +- if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { +- policy->policy = CPUFREQ_POLICY_PERFORMANCE; +- return 0; +- } +- if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) { +- policy->policy = CPUFREQ_POLICY_POWERSAVE; +- return 0; +- } +- return -EINVAL; ++ if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) ++ return CPUFREQ_POLICY_PERFORMANCE; ++ ++ if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) ++ return CPUFREQ_POLICY_POWERSAVE; ++ ++ return CPUFREQ_POLICY_UNKNOWN; + } + + /** + * cpufreq_parse_governor - parse a governor string only for has_target() ++ * @str_governor: Governor name. + */ +-static int cpufreq_parse_governor(char *str_governor, +- struct cpufreq_policy *policy) ++static struct cpufreq_governor *cpufreq_parse_governor(char *str_governor) + { + struct cpufreq_governor *t; + +@@ -645,7 +645,7 @@ static int cpufreq_parse_governor(char *str_governor, + + ret = request_module("cpufreq_%s", str_governor); + if (ret) +- return -EINVAL; ++ return NULL; + + mutex_lock(&cpufreq_governor_mutex); + +@@ -656,12 +656,7 @@ static int cpufreq_parse_governor(char *str_governor, + + mutex_unlock(&cpufreq_governor_mutex); + +- if (t) { +- policy->governor = t; +- return 0; +- } +- +- return -EINVAL; ++ return t; + } + + /** +@@ -762,28 +757,33 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) + static ssize_t store_scaling_governor(struct cpufreq_policy *policy, + const char *buf, size_t count) + { ++ char str_governor[16]; + int ret; +- char str_governor[16]; +- struct cpufreq_policy new_policy; +- +- memcpy(&new_policy, policy, sizeof(*policy)); + + ret = sscanf(buf, "%15s", str_governor); + if (ret != 1) + return -EINVAL; + + if (cpufreq_driver->setpolicy) { +- if (cpufreq_parse_policy(str_governor, &new_policy)) ++ unsigned int new_pol; ++ ++ new_pol = cpufreq_parse_policy(str_governor); ++ if (!new_pol) + return -EINVAL; ++ ++ ret = cpufreq_set_policy(policy, NULL, new_pol); + } else { +- if (cpufreq_parse_governor(str_governor, &new_policy)) ++ struct cpufreq_governor *new_gov; ++ ++ new_gov = cpufreq_parse_governor(str_governor); ++ if (!new_gov) + return -EINVAL; +- } + +- ret = cpufreq_set_policy(policy, &new_policy); ++ ret = cpufreq_set_policy(policy, new_gov, ++ CPUFREQ_POLICY_UNKNOWN); + +- if (new_policy.governor) +- module_put(new_policy.governor->owner); ++ module_put(new_gov->owner); ++ } + + return ret ? ret : count; + } +@@ -1050,40 +1050,33 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void) + + static int cpufreq_init_policy(struct cpufreq_policy *policy) + { +- struct cpufreq_governor *gov = NULL, *def_gov = NULL; +- struct cpufreq_policy new_policy; +- +- memcpy(&new_policy, policy, sizeof(*policy)); +- +- def_gov = cpufreq_default_governor(); ++ struct cpufreq_governor *def_gov = cpufreq_default_governor(); ++ struct cpufreq_governor *gov = NULL; ++ unsigned int pol = CPUFREQ_POLICY_UNKNOWN; + + if (has_target()) { +- /* +- * Update governor of new_policy to the governor used before +- * hotplug +- */ ++ /* Update policy governor to the one used before hotplug. */ + gov = find_governor(policy->last_governor); + if (gov) { + pr_debug("Restoring governor %s for cpu %d\n", +- policy->governor->name, policy->cpu); +- } else { +- if (!def_gov) +- return -ENODATA; ++ policy->governor->name, policy->cpu); ++ } else if (def_gov) { + gov = def_gov; ++ } else { ++ return -ENODATA; + } +- new_policy.governor = gov; + } else { + /* Use the default policy if there is no last_policy. */ + if (policy->last_policy) { +- new_policy.policy = policy->last_policy; ++ pol = policy->last_policy; ++ } else if (def_gov) { ++ pol = cpufreq_parse_policy(def_gov->name); + } else { +- if (!def_gov) +- return -ENODATA; +- cpufreq_parse_policy(def_gov->name, &new_policy); ++ return -ENODATA; + } + } + +- return cpufreq_set_policy(policy, &new_policy); ++ return cpufreq_set_policy(policy, gov, pol); + } + + static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) +@@ -1111,13 +1104,10 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp + + void refresh_frequency_limits(struct cpufreq_policy *policy) + { +- struct cpufreq_policy new_policy; +- + if (!policy_is_inactive(policy)) { +- new_policy = *policy; + pr_debug("updating policy for CPU %u\n", policy->cpu); + +- cpufreq_set_policy(policy, &new_policy); ++ cpufreq_set_policy(policy, policy->governor, policy->policy); + } + } + EXPORT_SYMBOL(refresh_frequency_limits); +@@ -2361,43 +2351,46 @@ EXPORT_SYMBOL(cpufreq_get_policy); + /** + * cpufreq_set_policy - Modify cpufreq policy parameters. + * @policy: Policy object to modify. +- * @new_policy: New policy data. ++ * @new_gov: Policy governor pointer. ++ * @new_pol: Policy value (for drivers with built-in governors). + * +- * Pass @new_policy to the cpufreq driver's ->verify() callback. Next, copy the +- * min and max parameters of @new_policy to @policy and either invoke the +- * driver's ->setpolicy() callback (if present) or carry out a governor update +- * for @policy. That is, run the current governor's ->limits() callback (if the +- * governor field in @new_policy points to the same object as the one in +- * @policy) or replace the governor for @policy with the new one stored in +- * @new_policy. ++ * Invoke the cpufreq driver's ->verify() callback to sanity-check the frequency ++ * limits to be set for the policy, update @policy with the verified limits ++ * values and either invoke the driver's ->setpolicy() callback (if present) or ++ * carry out a governor update for @policy. That is, run the current governor's ++ * ->limits() callback (if @new_gov points to the same object as the one in ++ * @policy) or replace the governor for @policy with @new_gov. + * + * The cpuinfo part of @policy is not updated by this function. + */ +-int cpufreq_set_policy(struct cpufreq_policy *policy, +- struct cpufreq_policy *new_policy) ++static int cpufreq_set_policy(struct cpufreq_policy *policy, ++ struct cpufreq_governor *new_gov, ++ unsigned int new_pol) + { ++ struct cpufreq_policy_data new_data; + struct cpufreq_governor *old_gov; + int ret; + +- pr_debug("setting new policy for CPU %u: %u - %u kHz\n", +- new_policy->cpu, new_policy->min, new_policy->max); +- +- memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo)); +- ++ memcpy(&new_data.cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo)); ++ new_data.freq_table = policy->freq_table; ++ new_data.cpu = policy->cpu; + /* + * PM QoS framework collects all the requests from users and provide us + * the final aggregated value here. + */ +- new_policy->min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN); +- new_policy->max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX); ++ new_data.min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN); ++ new_data.max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX); ++ ++ pr_debug("setting new policy for CPU %u: %u - %u kHz\n", ++ new_data.cpu, new_data.min, new_data.max); + + /* verify the cpu speed can be set within this limit */ +- ret = cpufreq_driver->verify(new_policy); ++ ret = cpufreq_driver->verify(&new_data); + if (ret) + return ret; + +- policy->min = new_policy->min; +- policy->max = new_policy->max; ++ policy->min = new_data.min; ++ policy->max = new_data.max; + trace_cpu_frequency_limits(policy); + + policy->cached_target_freq = UINT_MAX; +@@ -2406,12 +2399,12 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, + policy->min, policy->max); + + if (cpufreq_driver->setpolicy) { +- policy->policy = new_policy->policy; ++ policy->policy = new_pol; + pr_debug("setting range\n"); + return cpufreq_driver->setpolicy(policy); + } + +- if (new_policy->governor == policy->governor) { ++ if (new_gov == policy->governor) { + pr_debug("governor limits update\n"); + cpufreq_governor_limits(policy); + return 0; +@@ -2428,7 +2421,7 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, + } + + /* start new governor */ +- policy->governor = new_policy->governor; ++ policy->governor = new_gov; + ret = cpufreq_init_governor(policy); + if (!ret) { + ret = cpufreq_start_governor(policy); +diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c +index ded427e0a488..e117b0059123 100644 +--- a/drivers/cpufreq/freq_table.c ++++ b/drivers/cpufreq/freq_table.c +@@ -60,7 +60,7 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, + return 0; + } + +-int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, ++int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, + struct cpufreq_frequency_table *table) + { + struct cpufreq_frequency_table *pos; +@@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(cpufreq_frequency_table_verify); + * Generic routine to verify policy & frequency table, requires driver to set + * policy->freq_table prior to it. + */ +-int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy) ++int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy) + { + if (!policy->freq_table) + return -ENODEV; +diff --git a/drivers/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c +index e97b5733aa24..75b3ef7ec679 100644 +--- a/drivers/cpufreq/gx-suspmod.c ++++ b/drivers/cpufreq/gx-suspmod.c +@@ -328,7 +328,7 @@ static void gx_set_cpuspeed(struct cpufreq_policy *policy, unsigned int khz) + * for the hardware supported by the driver. + */ + +-static int cpufreq_gx_verify(struct cpufreq_policy *policy) ++static int cpufreq_gx_verify(struct cpufreq_policy_data *policy) + { + unsigned int tmp_freq = 0; + u8 tmp1, tmp2; +diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c +index 8ab31702cf6a..45499e0b9f2f 100644 +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -2036,8 +2036,9 @@ static int intel_pstate_get_max_freq(struct cpudata *cpu) + cpu->pstate.max_freq : cpu->pstate.turbo_freq; + } + +-static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, +- struct cpudata *cpu) ++static void intel_pstate_update_perf_limits(struct cpudata *cpu, ++ unsigned int policy_min, ++ unsigned int policy_max) + { + int max_freq = intel_pstate_get_max_freq(cpu); + int32_t max_policy_perf, min_policy_perf; +@@ -2056,18 +2057,17 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, + turbo_max = cpu->pstate.turbo_pstate; + } + +- max_policy_perf = max_state * policy->max / max_freq; +- if (policy->max == policy->min) { ++ max_policy_perf = max_state * policy_max / max_freq; ++ if (policy_max == policy_min) { + min_policy_perf = max_policy_perf; + } else { +- min_policy_perf = max_state * policy->min / max_freq; ++ min_policy_perf = max_state * policy_min / max_freq; + min_policy_perf = clamp_t(int32_t, min_policy_perf, + 0, max_policy_perf); + } + + pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n", +- policy->cpu, max_state, +- min_policy_perf, max_policy_perf); ++ cpu->cpu, max_state, min_policy_perf, max_policy_perf); + + /* Normalize user input to [min_perf, max_perf] */ + if (per_cpu_limits) { +@@ -2081,7 +2081,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, + global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100); + global_min = clamp_t(int32_t, global_min, 0, global_max); + +- pr_debug("cpu:%d global_min:%d global_max:%d\n", policy->cpu, ++ pr_debug("cpu:%d global_min:%d global_max:%d\n", cpu->cpu, + global_min, global_max); + + cpu->min_perf_ratio = max(min_policy_perf, global_min); +@@ -2094,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, + cpu->max_perf_ratio); + + } +- pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", policy->cpu, ++ pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", cpu->cpu, + cpu->max_perf_ratio, + cpu->min_perf_ratio); + } +@@ -2114,7 +2114,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) + + mutex_lock(&intel_pstate_limits_lock); + +- intel_pstate_update_perf_limits(policy, cpu); ++ intel_pstate_update_perf_limits(cpu, policy->min, policy->max); + + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { + /* +@@ -2143,8 +2143,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) + return 0; + } + +-static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy, +- struct cpudata *cpu) ++static void intel_pstate_adjust_policy_max(struct cpudata *cpu, ++ struct cpufreq_policy_data *policy) + { + if (!hwp_active && + cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && +@@ -2155,7 +2155,7 @@ static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy, + } + } + +-static int intel_pstate_verify_policy(struct cpufreq_policy *policy) ++static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy) + { + struct cpudata *cpu = all_cpu_data[policy->cpu]; + +@@ -2163,11 +2163,7 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy) + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, + intel_pstate_get_max_freq(cpu)); + +- if (policy->policy != CPUFREQ_POLICY_POWERSAVE && +- policy->policy != CPUFREQ_POLICY_PERFORMANCE) +- return -EINVAL; +- +- intel_pstate_adjust_policy_max(policy, cpu); ++ intel_pstate_adjust_policy_max(cpu, policy); + + return 0; + } +@@ -2268,7 +2264,7 @@ static struct cpufreq_driver intel_pstate = { + .name = "intel_pstate", + }; + +-static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) ++static int intel_cpufreq_verify_policy(struct cpufreq_policy_data *policy) + { + struct cpudata *cpu = all_cpu_data[policy->cpu]; + +@@ -2276,9 +2272,9 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, + intel_pstate_get_max_freq(cpu)); + +- intel_pstate_adjust_policy_max(policy, cpu); ++ intel_pstate_adjust_policy_max(cpu, policy); + +- intel_pstate_update_perf_limits(policy, cpu); ++ intel_pstate_update_perf_limits(cpu, policy->min, policy->max); + + return 0; + } +diff --git a/drivers/cpufreq/longrun.c b/drivers/cpufreq/longrun.c +index 64b8689f7a4a..0b08be8bff76 100644 +--- a/drivers/cpufreq/longrun.c ++++ b/drivers/cpufreq/longrun.c +@@ -122,7 +122,7 @@ static int longrun_set_policy(struct cpufreq_policy *policy) + * Validates a new CPUFreq policy. This function has to be called with + * cpufreq_driver locked. + */ +-static int longrun_verify_policy(struct cpufreq_policy *policy) ++static int longrun_verify_policy(struct cpufreq_policy_data *policy) + { + if (!policy) + return -EINVAL; +@@ -130,10 +130,6 @@ static int longrun_verify_policy(struct cpufreq_policy *policy) + policy->cpu = 0; + cpufreq_verify_within_cpu_limits(policy); + +- if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && +- (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) +- return -EINVAL; +- + return 0; + } + +diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c +index fdc767fdbe6a..f90273006553 100644 +--- a/drivers/cpufreq/pcc-cpufreq.c ++++ b/drivers/cpufreq/pcc-cpufreq.c +@@ -109,7 +109,7 @@ struct pcc_cpu { + + static struct pcc_cpu __percpu *pcc_cpu_info; + +-static int pcc_cpufreq_verify(struct cpufreq_policy *policy) ++static int pcc_cpufreq_verify(struct cpufreq_policy_data *policy) + { + cpufreq_verify_within_cpu_limits(policy); + return 0; +diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c +index 5096c0ab781b..0ac265d47ef0 100644 +--- a/drivers/cpufreq/sh-cpufreq.c ++++ b/drivers/cpufreq/sh-cpufreq.c +@@ -87,7 +87,7 @@ static int sh_cpufreq_target(struct cpufreq_policy *policy, + return work_on_cpu(policy->cpu, __sh_cpufreq_target, &data); + } + +-static int sh_cpufreq_verify(struct cpufreq_policy *policy) ++static int sh_cpufreq_verify(struct cpufreq_policy_data *policy) + { + struct clk *cpuclk = &per_cpu(sh_cpuclk, policy->cpu); + struct cpufreq_frequency_table *freq_table; +diff --git a/drivers/cpufreq/unicore2-cpufreq.c b/drivers/cpufreq/unicore2-cpufreq.c +index 707dbc1b7ac8..98d392196df2 100644 +--- a/drivers/cpufreq/unicore2-cpufreq.c ++++ b/drivers/cpufreq/unicore2-cpufreq.c +@@ -22,7 +22,7 @@ static struct cpufreq_driver ucv2_driver; + /* make sure that only the "userspace" governor is run + * -- anything else wouldn't make sense on this platform, anyway. + */ +-static int ucv2_verify_speed(struct cpufreq_policy *policy) ++static int ucv2_verify_speed(struct cpufreq_policy_data *policy) + { + if (policy->cpu) + return -EINVAL; +diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c +index db99cee1991c..89f79d763ab8 100644 +--- a/drivers/crypto/atmel-aes.c ++++ b/drivers/crypto/atmel-aes.c +@@ -88,7 +88,6 @@ + struct atmel_aes_caps { + bool has_dualbuff; + bool has_cfb64; +- bool has_ctr32; + bool has_gcm; + bool has_xts; + bool has_authenc; +@@ -1013,8 +1012,9 @@ static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd) + struct atmel_aes_ctr_ctx *ctx = atmel_aes_ctr_ctx_cast(dd->ctx); + struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq); + struct scatterlist *src, *dst; +- u32 ctr, blocks; + size_t datalen; ++ u32 ctr; ++ u16 blocks, start, end; + bool use_dma, fragmented = false; + + /* Check for transfer completion. */ +@@ -1026,27 +1026,17 @@ static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd) + datalen = req->nbytes - ctx->offset; + blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE); + ctr = be32_to_cpu(ctx->iv[3]); +- if (dd->caps.has_ctr32) { +- /* Check 32bit counter overflow. */ +- u32 start = ctr; +- u32 end = start + blocks - 1; +- +- if (end < start) { +- ctr |= 0xffffffff; +- datalen = AES_BLOCK_SIZE * -start; +- fragmented = true; +- } +- } else { +- /* Check 16bit counter overflow. */ +- u16 start = ctr & 0xffff; +- u16 end = start + (u16)blocks - 1; +- +- if (blocks >> 16 || end < start) { +- ctr |= 0xffff; +- datalen = AES_BLOCK_SIZE * (0x10000-start); +- fragmented = true; +- } ++ ++ /* Check 16bit counter overflow. */ ++ start = ctr & 0xffff; ++ end = start + blocks - 1; ++ ++ if (blocks >> 16 || end < start) { ++ ctr |= 0xffff; ++ datalen = AES_BLOCK_SIZE * (0x10000 - start); ++ fragmented = true; + } ++ + use_dma = (datalen >= ATMEL_AES_DMA_THRESHOLD); + + /* Jump to offset. */ +@@ -2550,7 +2540,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) + { + dd->caps.has_dualbuff = 0; + dd->caps.has_cfb64 = 0; +- dd->caps.has_ctr32 = 0; + dd->caps.has_gcm = 0; + dd->caps.has_xts = 0; + dd->caps.has_authenc = 0; +@@ -2561,7 +2550,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) + case 0x500: + dd->caps.has_dualbuff = 1; + dd->caps.has_cfb64 = 1; +- dd->caps.has_ctr32 = 1; + dd->caps.has_gcm = 1; + dd->caps.has_xts = 1; + dd->caps.has_authenc = 1; +@@ -2570,7 +2558,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) + case 0x200: + dd->caps.has_dualbuff = 1; + dd->caps.has_cfb64 = 1; +- dd->caps.has_ctr32 = 1; + dd->caps.has_gcm = 1; + dd->caps.max_burst_size = 4; + break; +diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c +index 0186b3df4c87..0d5576f6ad21 100644 +--- a/drivers/crypto/ccp/ccp-dev-v3.c ++++ b/drivers/crypto/ccp/ccp-dev-v3.c +@@ -586,6 +586,7 @@ const struct ccp_vdata ccpv3_platform = { + .setup = NULL, + .perform = &ccp3_actions, + .offset = 0, ++ .rsamax = CCP_RSA_MAX_WIDTH, + }; + + const struct ccp_vdata ccpv3 = { +diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c +index d3e8faa03f15..3d7c8d9e54b9 100644 +--- a/drivers/crypto/ccree/cc_aead.c ++++ b/drivers/crypto/ccree/cc_aead.c +@@ -237,7 +237,7 @@ static void cc_aead_complete(struct device *dev, void *cc_req, int err) + * revealed the decrypted message --> zero its memory. + */ + sg_zero_buffer(areq->dst, sg_nents(areq->dst), +- areq->cryptlen, 0); ++ areq->cryptlen, areq->assoclen); + err = -EBADMSG; + } + /*ENCRYPT*/ +diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c +index 254b48797799..cd9c60268bf8 100644 +--- a/drivers/crypto/ccree/cc_cipher.c ++++ b/drivers/crypto/ccree/cc_cipher.c +@@ -523,6 +523,7 @@ static void cc_setup_readiv_desc(struct crypto_tfm *tfm, + } + } + ++ + static void cc_setup_state_desc(struct crypto_tfm *tfm, + struct cipher_req_ctx *req_ctx, + unsigned int ivsize, unsigned int nbytes, +@@ -534,8 +535,6 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm, + int cipher_mode = ctx_p->cipher_mode; + int flow_mode = ctx_p->flow_mode; + int direction = req_ctx->gen_ctx.op_type; +- dma_addr_t key_dma_addr = ctx_p->user.key_dma_addr; +- unsigned int key_len = ctx_p->keylen; + dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr; + unsigned int du_size = nbytes; + +@@ -570,6 +569,47 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm, + break; + case DRV_CIPHER_XTS: + case DRV_CIPHER_ESSIV: ++ case DRV_CIPHER_BITLOCKER: ++ break; ++ default: ++ dev_err(dev, "Unsupported cipher mode (%d)\n", cipher_mode); ++ } ++} ++ ++ ++static void cc_setup_xex_state_desc(struct crypto_tfm *tfm, ++ struct cipher_req_ctx *req_ctx, ++ unsigned int ivsize, unsigned int nbytes, ++ struct cc_hw_desc desc[], ++ unsigned int *seq_size) ++{ ++ struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm); ++ struct device *dev = drvdata_to_dev(ctx_p->drvdata); ++ int cipher_mode = ctx_p->cipher_mode; ++ int flow_mode = ctx_p->flow_mode; ++ int direction = req_ctx->gen_ctx.op_type; ++ dma_addr_t key_dma_addr = ctx_p->user.key_dma_addr; ++ unsigned int key_len = ctx_p->keylen; ++ dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr; ++ unsigned int du_size = nbytes; ++ ++ struct cc_crypto_alg *cc_alg = ++ container_of(tfm->__crt_alg, struct cc_crypto_alg, ++ skcipher_alg.base); ++ ++ if (cc_alg->data_unit) ++ du_size = cc_alg->data_unit; ++ ++ switch (cipher_mode) { ++ case DRV_CIPHER_ECB: ++ break; ++ case DRV_CIPHER_CBC: ++ case DRV_CIPHER_CBC_CTS: ++ case DRV_CIPHER_CTR: ++ case DRV_CIPHER_OFB: ++ break; ++ case DRV_CIPHER_XTS: ++ case DRV_CIPHER_ESSIV: + case DRV_CIPHER_BITLOCKER: + /* load XEX key */ + hw_desc_init(&desc[*seq_size]); +@@ -881,12 +921,14 @@ static int cc_cipher_process(struct skcipher_request *req, + + /* STAT_PHASE_2: Create sequence */ + +- /* Setup IV and XEX key used */ ++ /* Setup state (IV) */ + cc_setup_state_desc(tfm, req_ctx, ivsize, nbytes, desc, &seq_len); + /* Setup MLLI line, if needed */ + cc_setup_mlli_desc(tfm, req_ctx, dst, src, nbytes, req, desc, &seq_len); + /* Setup key */ + cc_setup_key_desc(tfm, req_ctx, nbytes, desc, &seq_len); ++ /* Setup state (IV and XEX key) */ ++ cc_setup_xex_state_desc(tfm, req_ctx, ivsize, nbytes, desc, &seq_len); + /* Data processing */ + cc_setup_flow_desc(tfm, req_ctx, dst, src, nbytes, desc, &seq_len); + /* Read next IV */ +diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h +index ab31d4a68c80..7d2f7e2c0bb5 100644 +--- a/drivers/crypto/ccree/cc_driver.h ++++ b/drivers/crypto/ccree/cc_driver.h +@@ -161,6 +161,7 @@ struct cc_drvdata { + int std_bodies; + bool sec_disabled; + u32 comp_mask; ++ bool pm_on; + }; + + struct cc_crypto_alg { +diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c +index dbc508fb719b..452bd77a9ba0 100644 +--- a/drivers/crypto/ccree/cc_pm.c ++++ b/drivers/crypto/ccree/cc_pm.c +@@ -22,14 +22,8 @@ const struct dev_pm_ops ccree_pm = { + int cc_pm_suspend(struct device *dev) + { + struct cc_drvdata *drvdata = dev_get_drvdata(dev); +- int rc; + + dev_dbg(dev, "set HOST_POWER_DOWN_EN\n"); +- rc = cc_suspend_req_queue(drvdata); +- if (rc) { +- dev_err(dev, "cc_suspend_req_queue (%x)\n", rc); +- return rc; +- } + fini_cc_regs(drvdata); + cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE); + cc_clk_off(drvdata); +@@ -63,13 +57,6 @@ int cc_pm_resume(struct device *dev) + /* check if tee fips error occurred during power down */ + cc_tee_handle_fips_error(drvdata); + +- rc = cc_resume_req_queue(drvdata); +- if (rc) { +- dev_err(dev, "cc_resume_req_queue (%x)\n", rc); +- return rc; +- } +- +- /* must be after the queue resuming as it uses the HW queue*/ + cc_init_hash_sram(drvdata); + + return 0; +@@ -80,12 +67,10 @@ int cc_pm_get(struct device *dev) + int rc = 0; + struct cc_drvdata *drvdata = dev_get_drvdata(dev); + +- if (cc_req_queue_suspended(drvdata)) ++ if (drvdata->pm_on) + rc = pm_runtime_get_sync(dev); +- else +- pm_runtime_get_noresume(dev); + +- return rc; ++ return (rc == 1 ? 0 : rc); + } + + int cc_pm_put_suspend(struct device *dev) +@@ -93,14 +78,11 @@ int cc_pm_put_suspend(struct device *dev) + int rc = 0; + struct cc_drvdata *drvdata = dev_get_drvdata(dev); + +- if (!cc_req_queue_suspended(drvdata)) { ++ if (drvdata->pm_on) { + pm_runtime_mark_last_busy(dev); + rc = pm_runtime_put_autosuspend(dev); +- } else { +- /* Something wrong happens*/ +- dev_err(dev, "request to suspend already suspended queue"); +- rc = -EBUSY; + } ++ + return rc; + } + +@@ -117,7 +99,7 @@ int cc_pm_init(struct cc_drvdata *drvdata) + /* must be before the enabling to avoid resdundent suspending */ + pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT); + pm_runtime_use_autosuspend(dev); +- /* activate the PM module */ ++ /* set us as active - note we won't do PM ops until cc_pm_go()! */ + return pm_runtime_set_active(dev); + } + +@@ -125,9 +107,11 @@ int cc_pm_init(struct cc_drvdata *drvdata) + void cc_pm_go(struct cc_drvdata *drvdata) + { + pm_runtime_enable(drvdata_to_dev(drvdata)); ++ drvdata->pm_on = true; + } + + void cc_pm_fini(struct cc_drvdata *drvdata) + { + pm_runtime_disable(drvdata_to_dev(drvdata)); ++ drvdata->pm_on = false; + } +diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c +index a947d5a2cf35..37e6fee37b13 100644 +--- a/drivers/crypto/ccree/cc_request_mgr.c ++++ b/drivers/crypto/ccree/cc_request_mgr.c +@@ -41,7 +41,6 @@ struct cc_req_mgr_handle { + #else + struct tasklet_struct comptask; + #endif +- bool is_runtime_suspended; + }; + + struct cc_bl_item { +@@ -404,6 +403,7 @@ static void cc_proc_backlog(struct cc_drvdata *drvdata) + spin_lock(&mgr->bl_lock); + list_del(&bli->list); + --mgr->bl_len; ++ kfree(bli); + } + + spin_unlock(&mgr->bl_lock); +@@ -677,52 +677,3 @@ static void comp_handler(unsigned long devarg) + cc_proc_backlog(drvdata); + dev_dbg(dev, "Comp. handler done.\n"); + } +- +-/* +- * resume the queue configuration - no need to take the lock as this happens +- * inside the spin lock protection +- */ +-#if defined(CONFIG_PM) +-int cc_resume_req_queue(struct cc_drvdata *drvdata) +-{ +- struct cc_req_mgr_handle *request_mgr_handle = +- drvdata->request_mgr_handle; +- +- spin_lock_bh(&request_mgr_handle->hw_lock); +- request_mgr_handle->is_runtime_suspended = false; +- spin_unlock_bh(&request_mgr_handle->hw_lock); +- +- return 0; +-} +- +-/* +- * suspend the queue configuration. Since it is used for the runtime suspend +- * only verify that the queue can be suspended. +- */ +-int cc_suspend_req_queue(struct cc_drvdata *drvdata) +-{ +- struct cc_req_mgr_handle *request_mgr_handle = +- drvdata->request_mgr_handle; +- +- /* lock the send_request */ +- spin_lock_bh(&request_mgr_handle->hw_lock); +- if (request_mgr_handle->req_queue_head != +- request_mgr_handle->req_queue_tail) { +- spin_unlock_bh(&request_mgr_handle->hw_lock); +- return -EBUSY; +- } +- request_mgr_handle->is_runtime_suspended = true; +- spin_unlock_bh(&request_mgr_handle->hw_lock); +- +- return 0; +-} +- +-bool cc_req_queue_suspended(struct cc_drvdata *drvdata) +-{ +- struct cc_req_mgr_handle *request_mgr_handle = +- drvdata->request_mgr_handle; +- +- return request_mgr_handle->is_runtime_suspended; +-} +- +-#endif +diff --git a/drivers/crypto/ccree/cc_request_mgr.h b/drivers/crypto/ccree/cc_request_mgr.h +index f46cf766fe4d..ff7746aaaf35 100644 +--- a/drivers/crypto/ccree/cc_request_mgr.h ++++ b/drivers/crypto/ccree/cc_request_mgr.h +@@ -40,12 +40,4 @@ void complete_request(struct cc_drvdata *drvdata); + + void cc_req_mgr_fini(struct cc_drvdata *drvdata); + +-#if defined(CONFIG_PM) +-int cc_resume_req_queue(struct cc_drvdata *drvdata); +- +-int cc_suspend_req_queue(struct cc_drvdata *drvdata); +- +-bool cc_req_queue_suspended(struct cc_drvdata *drvdata); +-#endif +- + #endif /*__REQUEST_MGR_H__*/ +diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig +index 504daff7687d..f7f0a1fb6895 100644 +--- a/drivers/crypto/hisilicon/Kconfig ++++ b/drivers/crypto/hisilicon/Kconfig +@@ -35,6 +35,5 @@ config CRYPTO_DEV_HISI_ZIP + depends on ARM64 && PCI && PCI_MSI + select CRYPTO_DEV_HISI_QM + select CRYPTO_HISI_SGL +- select SG_SPLIT + help + Support for HiSilicon ZIP Driver +diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h +index ffb00d987d02..99f21d848d4f 100644 +--- a/drivers/crypto/hisilicon/zip/zip.h ++++ b/drivers/crypto/hisilicon/zip/zip.h +@@ -12,6 +12,10 @@ + + /* hisi_zip_sqe dw3 */ + #define HZIP_BD_STATUS_M GENMASK(7, 0) ++/* hisi_zip_sqe dw7 */ ++#define HZIP_IN_SGE_DATA_OFFSET_M GENMASK(23, 0) ++/* hisi_zip_sqe dw8 */ ++#define HZIP_OUT_SGE_DATA_OFFSET_M GENMASK(23, 0) + /* hisi_zip_sqe dw9 */ + #define HZIP_REQ_TYPE_M GENMASK(7, 0) + #define HZIP_ALG_TYPE_ZLIB 0x02 +diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c +index 59023545a1c4..cf34bfdfb3e6 100644 +--- a/drivers/crypto/hisilicon/zip/zip_crypto.c ++++ b/drivers/crypto/hisilicon/zip/zip_crypto.c +@@ -45,10 +45,8 @@ enum hisi_zip_alg_type { + + struct hisi_zip_req { + struct acomp_req *req; +- struct scatterlist *src; +- struct scatterlist *dst; +- size_t slen; +- size_t dlen; ++ int sskip; ++ int dskip; + struct hisi_acc_hw_sgl *hw_src; + struct hisi_acc_hw_sgl *hw_dst; + dma_addr_t dma_src; +@@ -94,13 +92,15 @@ static void hisi_zip_config_tag(struct hisi_zip_sqe *sqe, u32 tag) + + static void hisi_zip_fill_sqe(struct hisi_zip_sqe *sqe, u8 req_type, + dma_addr_t s_addr, dma_addr_t d_addr, u32 slen, +- u32 dlen) ++ u32 dlen, int sskip, int dskip) + { + memset(sqe, 0, sizeof(struct hisi_zip_sqe)); + +- sqe->input_data_length = slen; ++ sqe->input_data_length = slen - sskip; ++ sqe->dw7 = FIELD_PREP(HZIP_IN_SGE_DATA_OFFSET_M, sskip); ++ sqe->dw8 = FIELD_PREP(HZIP_OUT_SGE_DATA_OFFSET_M, dskip); + sqe->dw9 = FIELD_PREP(HZIP_REQ_TYPE_M, req_type); +- sqe->dest_avail_out = dlen; ++ sqe->dest_avail_out = dlen - dskip; + sqe->source_addr_l = lower_32_bits(s_addr); + sqe->source_addr_h = upper_32_bits(s_addr); + sqe->dest_addr_l = lower_32_bits(d_addr); +@@ -301,11 +301,6 @@ static void hisi_zip_remove_req(struct hisi_zip_qp_ctx *qp_ctx, + { + struct hisi_zip_req_q *req_q = &qp_ctx->req_q; + +- if (qp_ctx->qp->alg_type == HZIP_ALG_TYPE_COMP) +- kfree(req->dst); +- else +- kfree(req->src); +- + write_lock(&req_q->req_lock); + clear_bit(req->req_id, req_q->req_bitmap); + memset(req, 0, sizeof(struct hisi_zip_req)); +@@ -333,8 +328,8 @@ static void hisi_zip_acomp_cb(struct hisi_qp *qp, void *data) + } + dlen = sqe->produced; + +- hisi_acc_sg_buf_unmap(dev, req->src, req->hw_src); +- hisi_acc_sg_buf_unmap(dev, req->dst, req->hw_dst); ++ hisi_acc_sg_buf_unmap(dev, acomp_req->src, req->hw_src); ++ hisi_acc_sg_buf_unmap(dev, acomp_req->dst, req->hw_dst); + + head_size = (qp->alg_type == 0) ? TO_HEAD_SIZE(qp->req_type) : 0; + acomp_req->dlen = dlen + head_size; +@@ -428,20 +423,6 @@ static size_t get_comp_head_size(struct scatterlist *src, u8 req_type) + } + } + +-static int get_sg_skip_bytes(struct scatterlist *sgl, size_t bytes, +- size_t remains, struct scatterlist **out) +-{ +-#define SPLIT_NUM 2 +- size_t split_sizes[SPLIT_NUM]; +- int out_mapped_nents[SPLIT_NUM]; +- +- split_sizes[0] = bytes; +- split_sizes[1] = remains; +- +- return sg_split(sgl, 0, 0, SPLIT_NUM, split_sizes, out, +- out_mapped_nents, GFP_KERNEL); +-} +- + static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req, + struct hisi_zip_qp_ctx *qp_ctx, + size_t head_size, bool is_comp) +@@ -449,31 +430,7 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req, + struct hisi_zip_req_q *req_q = &qp_ctx->req_q; + struct hisi_zip_req *q = req_q->q; + struct hisi_zip_req *req_cache; +- struct scatterlist *out[2]; +- struct scatterlist *sgl; +- size_t len; +- int ret, req_id; +- +- /* +- * remove/add zlib/gzip head, as hardware operations do not include +- * comp head. so split req->src to get sgl without heads in acomp, or +- * add comp head to req->dst ahead of that hardware output compressed +- * data in sgl splited from req->dst without comp head. +- */ +- if (is_comp) { +- sgl = req->dst; +- len = req->dlen - head_size; +- } else { +- sgl = req->src; +- len = req->slen - head_size; +- } +- +- ret = get_sg_skip_bytes(sgl, head_size, len, out); +- if (ret) +- return ERR_PTR(ret); +- +- /* sgl for comp head is useless, so free it now */ +- kfree(out[0]); ++ int req_id; + + write_lock(&req_q->req_lock); + +@@ -481,7 +438,6 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req, + if (req_id >= req_q->size) { + write_unlock(&req_q->req_lock); + dev_dbg(&qp_ctx->qp->qm->pdev->dev, "req cache is full!\n"); +- kfree(out[1]); + return ERR_PTR(-EBUSY); + } + set_bit(req_id, req_q->req_bitmap); +@@ -489,16 +445,13 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req, + req_cache = q + req_id; + req_cache->req_id = req_id; + req_cache->req = req; ++ + if (is_comp) { +- req_cache->src = req->src; +- req_cache->dst = out[1]; +- req_cache->slen = req->slen; +- req_cache->dlen = req->dlen - head_size; ++ req_cache->sskip = 0; ++ req_cache->dskip = head_size; + } else { +- req_cache->src = out[1]; +- req_cache->dst = req->dst; +- req_cache->slen = req->slen - head_size; +- req_cache->dlen = req->dlen; ++ req_cache->sskip = head_size; ++ req_cache->dskip = 0; + } + + write_unlock(&req_q->req_lock); +@@ -510,6 +463,7 @@ static int hisi_zip_do_work(struct hisi_zip_req *req, + struct hisi_zip_qp_ctx *qp_ctx) + { + struct hisi_zip_sqe *zip_sqe = &qp_ctx->zip_sqe; ++ struct acomp_req *a_req = req->req; + struct hisi_qp *qp = qp_ctx->qp; + struct device *dev = &qp->qm->pdev->dev; + struct hisi_acc_sgl_pool *pool = &qp_ctx->sgl_pool; +@@ -517,16 +471,16 @@ static int hisi_zip_do_work(struct hisi_zip_req *req, + dma_addr_t output; + int ret; + +- if (!req->src || !req->slen || !req->dst || !req->dlen) ++ if (!a_req->src || !a_req->slen || !a_req->dst || !a_req->dlen) + return -EINVAL; + +- req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, req->src, pool, ++ req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->src, pool, + req->req_id << 1, &input); + if (IS_ERR(req->hw_src)) + return PTR_ERR(req->hw_src); + req->dma_src = input; + +- req->hw_dst = hisi_acc_sg_buf_map_to_hw_sgl(dev, req->dst, pool, ++ req->hw_dst = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->dst, pool, + (req->req_id << 1) + 1, + &output); + if (IS_ERR(req->hw_dst)) { +@@ -535,8 +489,8 @@ static int hisi_zip_do_work(struct hisi_zip_req *req, + } + req->dma_dst = output; + +- hisi_zip_fill_sqe(zip_sqe, qp->req_type, input, output, req->slen, +- req->dlen); ++ hisi_zip_fill_sqe(zip_sqe, qp->req_type, input, output, a_req->slen, ++ a_req->dlen, req->sskip, req->dskip); + hisi_zip_config_buf_type(zip_sqe, HZIP_SGL); + hisi_zip_config_tag(zip_sqe, req->req_id); + +@@ -548,9 +502,9 @@ static int hisi_zip_do_work(struct hisi_zip_req *req, + return -EINPROGRESS; + + err_unmap_output: +- hisi_acc_sg_buf_unmap(dev, req->dst, req->hw_dst); ++ hisi_acc_sg_buf_unmap(dev, a_req->dst, req->hw_dst); + err_unmap_input: +- hisi_acc_sg_buf_unmap(dev, req->src, req->hw_src); ++ hisi_acc_sg_buf_unmap(dev, a_req->src, req->hw_src); + return ret; + } + +diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c +index 3cbefb41b099..2680e1525db5 100644 +--- a/drivers/crypto/picoxcell_crypto.c ++++ b/drivers/crypto/picoxcell_crypto.c +@@ -1613,6 +1613,11 @@ static const struct of_device_id spacc_of_id_table[] = { + MODULE_DEVICE_TABLE(of, spacc_of_id_table); + #endif /* CONFIG_OF */ + ++static void spacc_tasklet_kill(void *data) ++{ ++ tasklet_kill(data); ++} ++ + static int spacc_probe(struct platform_device *pdev) + { + int i, err, ret; +@@ -1655,6 +1660,14 @@ static int spacc_probe(struct platform_device *pdev) + return -ENXIO; + } + ++ tasklet_init(&engine->complete, spacc_spacc_complete, ++ (unsigned long)engine); ++ ++ ret = devm_add_action(&pdev->dev, spacc_tasklet_kill, ++ &engine->complete); ++ if (ret) ++ return ret; ++ + if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0, + engine->name, engine)) { + dev_err(engine->dev, "failed to request IRQ\n"); +@@ -1712,8 +1725,6 @@ static int spacc_probe(struct platform_device *pdev) + INIT_LIST_HEAD(&engine->completed); + INIT_LIST_HEAD(&engine->in_progress); + engine->in_flight = 0; +- tasklet_init(&engine->complete, spacc_spacc_complete, +- (unsigned long)engine); + + platform_set_drvdata(pdev, engine); + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +index ee1dc75f5ddc..1d733b57e60f 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +@@ -247,7 +247,8 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( + drm_dp_mst_reset_vcpi_slots(mst_mgr, mst_port); + } + +- ret = drm_dp_update_payload_part1(mst_mgr); ++ /* It's OK for this to fail */ ++ drm_dp_update_payload_part1(mst_mgr); + + /* mst_mgr->->payloads are VC payload notify MST branch using DPCD or + * AUX message. The sequence is slot 1-63 allocated sequence for each +@@ -256,9 +257,6 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( + + get_payload_table(aconnector, proposed_table); + +- if (ret) +- return false; +- + return true; + } + +@@ -316,7 +314,6 @@ bool dm_helpers_dp_mst_send_payload_allocation( + struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_topology_mgr *mst_mgr; + struct drm_dp_mst_port *mst_port; +- int ret; + + aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + +@@ -330,10 +327,8 @@ bool dm_helpers_dp_mst_send_payload_allocation( + if (!mst_mgr->mst_state) + return false; + +- ret = drm_dp_update_payload_part2(mst_mgr); +- +- if (ret) +- return false; ++ /* It's OK for this to fail */ ++ drm_dp_update_payload_part2(mst_mgr); + + if (!enable) + drm_dp_mst_deallocate_vcpi(mst_mgr, mst_port); +diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +index f2e73e6d46b8..10985134ce0b 100644 +--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c ++++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +@@ -73,7 +73,11 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) + unsigned long prate; + unsigned int mask = ATMEL_HLCDC_CLKDIV_MASK | ATMEL_HLCDC_CLKPOL; + unsigned int cfg = 0; +- int div; ++ int div, ret; ++ ++ ret = clk_prepare_enable(crtc->dc->hlcdc->sys_clk); ++ if (ret) ++ return; + + vm.vfront_porch = adj->crtc_vsync_start - adj->crtc_vdisplay; + vm.vback_porch = adj->crtc_vtotal - adj->crtc_vsync_end; +@@ -95,14 +99,14 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) + (adj->crtc_hdisplay - 1) | + ((adj->crtc_vdisplay - 1) << 16)); + ++ prate = clk_get_rate(crtc->dc->hlcdc->sys_clk); ++ mode_rate = adj->crtc_clock * 1000; + if (!crtc->dc->desc->fixed_clksrc) { ++ prate *= 2; + cfg |= ATMEL_HLCDC_CLKSEL; + mask |= ATMEL_HLCDC_CLKSEL; + } + +- prate = 2 * clk_get_rate(crtc->dc->hlcdc->sys_clk); +- mode_rate = adj->crtc_clock * 1000; +- + div = DIV_ROUND_UP(prate, mode_rate); + if (div < 2) { + div = 2; +@@ -117,8 +121,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) + int div_low = prate / mode_rate; + + if (div_low >= 2 && +- ((prate / div_low - mode_rate) < +- 10 * (mode_rate - prate / div))) ++ (10 * (prate / div_low - mode_rate) < ++ (mode_rate - prate / div))) + /* + * At least 10 times better when using a higher + * frequency than requested, instead of a lower. +@@ -147,6 +151,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) + ATMEL_HLCDC_VSPSU | ATMEL_HLCDC_VSPHO | + ATMEL_HLCDC_GUARDTIME_MASK | ATMEL_HLCDC_MODE_MASK, + cfg); ++ ++ clk_disable_unprepare(crtc->dc->hlcdc->sys_clk); + } + + static enum drm_mode_status +diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c +index a48a4c21b1b3..c5e9e2305fff 100644 +--- a/drivers/gpu/drm/drm_dp_mst_topology.c ++++ b/drivers/gpu/drm/drm_dp_mst_topology.c +@@ -2694,6 +2694,7 @@ static bool drm_dp_get_vc_payload_bw(int dp_link_bw, + int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool mst_state) + { + int ret = 0; ++ int i = 0; + struct drm_dp_mst_branch *mstb = NULL; + + mutex_lock(&mgr->lock); +@@ -2754,10 +2755,21 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms + /* this can fail if the device is gone */ + drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, 0); + ret = 0; ++ mutex_lock(&mgr->payload_lock); + memset(mgr->payloads, 0, mgr->max_payloads * sizeof(struct drm_dp_payload)); + mgr->payload_mask = 0; + set_bit(0, &mgr->payload_mask); ++ for (i = 0; i < mgr->max_payloads; i++) { ++ struct drm_dp_vcpi *vcpi = mgr->proposed_vcpis[i]; ++ ++ if (vcpi) { ++ vcpi->vcpi = 0; ++ vcpi->num_slots = 0; ++ } ++ mgr->proposed_vcpis[i] = NULL; ++ } + mgr->vcpi_mask = 0; ++ mutex_unlock(&mgr->payload_lock); + } + + out_unlock: +diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c +index b8363aaa9032..818738e83d06 100644 +--- a/drivers/gpu/drm/drm_rect.c ++++ b/drivers/gpu/drm/drm_rect.c +@@ -54,7 +54,12 @@ EXPORT_SYMBOL(drm_rect_intersect); + + static u32 clip_scaled(u32 src, u32 dst, u32 clip) + { +- u64 tmp = mul_u32_u32(src, dst - clip); ++ u64 tmp; ++ ++ if (dst == 0) ++ return 0; ++ ++ tmp = mul_u32_u32(src, dst - clip); + + /* + * Round toward 1.0 when clipping so that we don't accidentally +diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c +index 772f0753ed38..aaf2f26f8505 100644 +--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c ++++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c +@@ -121,7 +121,7 @@ static void mdp4_dsi_encoder_enable(struct drm_encoder *encoder) + if (mdp4_dsi_encoder->enabled) + return; + +- mdp4_crtc_set_config(encoder->crtc, ++ mdp4_crtc_set_config(encoder->crtc, + MDP4_DMA_CONFIG_PACK_ALIGN_MSB | + MDP4_DMA_CONFIG_DEFLKR_EN | + MDP4_DMA_CONFIG_DITHER_EN | +diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c +index 34bd73526afd..930674117533 100644 +--- a/drivers/hv/hv_balloon.c ++++ b/drivers/hv/hv_balloon.c +@@ -1213,10 +1213,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, + unsigned int i, j; + struct page *pg; + +- if (num_pages < alloc_unit) +- return 0; +- +- for (i = 0; (i * alloc_unit) < num_pages; i++) { ++ for (i = 0; i < num_pages / alloc_unit; i++) { + if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) > + PAGE_SIZE) + return i * alloc_unit; +@@ -1254,7 +1251,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, + + } + +- return num_pages; ++ return i * alloc_unit; + } + + static void balloon_up(struct work_struct *dummy) +@@ -1269,9 +1266,6 @@ static void balloon_up(struct work_struct *dummy) + long avail_pages; + unsigned long floor; + +- /* The host balloons pages in 2M granularity. */ +- WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0); +- + /* + * We will attempt 2M allocations. However, if we fail to + * allocate 2M chunks, we will go back to 4k allocations. +@@ -1281,14 +1275,13 @@ static void balloon_up(struct work_struct *dummy) + avail_pages = si_mem_available(); + floor = compute_balloon_floor(); + +- /* Refuse to balloon below the floor, keep the 2M granularity. */ ++ /* Refuse to balloon below the floor. */ + if (avail_pages < num_pages || avail_pages - num_pages < floor) { + pr_warn("Balloon request will be partially fulfilled. %s\n", + avail_pages < num_pages ? "Not enough memory." : + "Balloon floor reached."); + + num_pages = avail_pages > floor ? (avail_pages - floor) : 0; +- num_pages -= num_pages % PAGES_IN_2M; + } + + while (!done) { +diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c +index 163ff7ba92b7..fedf6829cdec 100644 +--- a/drivers/infiniband/core/umem_odp.c ++++ b/drivers/infiniband/core/umem_odp.c +@@ -632,7 +632,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, + + while (bcnt > 0) { + const size_t gup_num_pages = min_t(size_t, +- (bcnt + BIT(page_shift) - 1) >> page_shift, ++ ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE, + PAGE_SIZE / sizeof(struct page *)); + + down_read(&owning_mm->mmap_sem); +diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c +index 4950df3f71b6..5c73c0a790fa 100644 +--- a/drivers/infiniband/hw/mlx5/gsi.c ++++ b/drivers/infiniband/hw/mlx5/gsi.c +@@ -507,8 +507,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, + ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); + if (ret) { + /* Undo the effect of adding the outstanding wr */ +- gsi->outstanding_pi = (gsi->outstanding_pi - 1) % +- gsi->cap.max_send_wr; ++ gsi->outstanding_pi--; + goto err; + } + spin_unlock_irqrestore(&gsi->lock, flags); +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index deb924e1d790..3d2b63585da9 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -329,6 +329,9 @@ struct cached_dev { + */ + atomic_t has_dirty; + ++#define BCH_CACHE_READA_ALL 0 ++#define BCH_CACHE_READA_META_ONLY 1 ++ unsigned int cache_readahead_policy; + struct bch_ratelimit writeback_rate; + struct delayed_work writeback_rate_update; + +diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c +index 41adcd1546f1..4045ae748f17 100644 +--- a/drivers/md/bcache/request.c ++++ b/drivers/md/bcache/request.c +@@ -391,13 +391,20 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) + goto skip; + + /* +- * Flag for bypass if the IO is for read-ahead or background, +- * unless the read-ahead request is for metadata ++ * If the bio is for read-ahead or background IO, bypass it or ++ * not depends on the following situations, ++ * - If the IO is for meta data, always cache it and no bypass ++ * - If the IO is not meta data, check dc->cache_reada_policy, ++ * BCH_CACHE_READA_ALL: cache it and not bypass ++ * BCH_CACHE_READA_META_ONLY: not cache it and bypass ++ * That is, read-ahead request for metadata always get cached + * (eg, for gfs2 or xfs). + */ +- if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) && +- !(bio->bi_opf & (REQ_META|REQ_PRIO))) +- goto skip; ++ if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) { ++ if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) && ++ (dc->cache_readahead_policy != BCH_CACHE_READA_ALL)) ++ goto skip; ++ } + + if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || + bio_sectors(bio) & (c->sb.block_size - 1)) { +diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c +index 627dcea0f5b6..7f0fb4b5755a 100644 +--- a/drivers/md/bcache/sysfs.c ++++ b/drivers/md/bcache/sysfs.c +@@ -27,6 +27,12 @@ static const char * const bch_cache_modes[] = { + NULL + }; + ++static const char * const bch_reada_cache_policies[] = { ++ "all", ++ "meta-only", ++ NULL ++}; ++ + /* Default is 0 ("auto") */ + static const char * const bch_stop_on_failure_modes[] = { + "auto", +@@ -100,6 +106,7 @@ rw_attribute(congested_write_threshold_us); + rw_attribute(sequential_cutoff); + rw_attribute(data_csum); + rw_attribute(cache_mode); ++rw_attribute(readahead_cache_policy); + rw_attribute(stop_when_cache_set_failed); + rw_attribute(writeback_metadata); + rw_attribute(writeback_running); +@@ -167,6 +174,11 @@ SHOW(__bch_cached_dev) + bch_cache_modes, + BDEV_CACHE_MODE(&dc->sb)); + ++ if (attr == &sysfs_readahead_cache_policy) ++ return bch_snprint_string_list(buf, PAGE_SIZE, ++ bch_reada_cache_policies, ++ dc->cache_readahead_policy); ++ + if (attr == &sysfs_stop_when_cache_set_failed) + return bch_snprint_string_list(buf, PAGE_SIZE, + bch_stop_on_failure_modes, +@@ -352,6 +364,15 @@ STORE(__cached_dev) + } + } + ++ if (attr == &sysfs_readahead_cache_policy) { ++ v = __sysfs_match_string(bch_reada_cache_policies, -1, buf); ++ if (v < 0) ++ return v; ++ ++ if ((unsigned int) v != dc->cache_readahead_policy) ++ dc->cache_readahead_policy = v; ++ } ++ + if (attr == &sysfs_stop_when_cache_set_failed) { + v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf); + if (v < 0) +@@ -466,6 +487,7 @@ static struct attribute *bch_cached_dev_files[] = { + &sysfs_data_csum, + #endif + &sysfs_cache_mode, ++ &sysfs_readahead_cache_policy, + &sysfs_stop_when_cache_set_failed, + &sysfs_writeback_metadata, + &sysfs_writeback_running, +diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c +index eb9782fc93fe..492bbe0584d9 100644 +--- a/drivers/md/dm-crypt.c ++++ b/drivers/md/dm-crypt.c +@@ -331,8 +331,14 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, + static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, + const char *opts) + { +- unsigned bs = crypto_skcipher_blocksize(any_tfm(cc)); +- int log = ilog2(bs); ++ unsigned bs; ++ int log; ++ ++ if (test_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags)) ++ bs = crypto_aead_blocksize(any_tfm_aead(cc)); ++ else ++ bs = crypto_skcipher_blocksize(any_tfm(cc)); ++ log = ilog2(bs); + + /* we need to calculate how far we must shift the sector count + * to get the cipher block count, we use this shift in _gen */ +@@ -717,7 +723,7 @@ static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv, + struct crypto_wait wait; + int err; + +- req = skcipher_request_alloc(any_tfm(cc), GFP_KERNEL | GFP_NOFS); ++ req = skcipher_request_alloc(any_tfm(cc), GFP_NOIO); + if (!req) + return -ENOMEM; + +diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c +index b88d6d701f5b..8bb723f1a569 100644 +--- a/drivers/md/dm-thin-metadata.c ++++ b/drivers/md/dm-thin-metadata.c +@@ -387,16 +387,15 @@ static int subtree_equal(void *context, const void *value1_le, const void *value + * Variant that is used for in-core only changes or code that + * shouldn't put the pool in service on its own (e.g. commit). + */ +-static inline void __pmd_write_lock(struct dm_pool_metadata *pmd) ++static inline void pmd_write_lock_in_core(struct dm_pool_metadata *pmd) + __acquires(pmd->root_lock) + { + down_write(&pmd->root_lock); + } +-#define pmd_write_lock_in_core(pmd) __pmd_write_lock((pmd)) + + static inline void pmd_write_lock(struct dm_pool_metadata *pmd) + { +- __pmd_write_lock(pmd); ++ pmd_write_lock_in_core(pmd); + if (unlikely(!pmd->in_service)) + pmd->in_service = true; + } +@@ -831,6 +830,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) + * We need to know if the thin_disk_superblock exceeds a 512-byte sector. + */ + BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512); ++ BUG_ON(!rwsem_is_locked(&pmd->root_lock)); + + if (unlikely(!pmd->in_service)) + return 0; +@@ -953,6 +953,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) + return -EBUSY; + } + ++ pmd_write_lock_in_core(pmd); + if (!dm_bm_is_read_only(pmd->bm) && !pmd->fail_io) { + r = __commit_transaction(pmd); + if (r < 0) +@@ -961,6 +962,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) + } + if (!pmd->fail_io) + __destroy_persistent_data_objects(pmd); ++ pmd_write_unlock(pmd); + + kfree(pmd); + return 0; +@@ -1841,7 +1843,7 @@ int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) + * Care is taken to not have commit be what + * triggers putting the thin-pool in-service. + */ +- __pmd_write_lock(pmd); ++ pmd_write_lock_in_core(pmd); + if (pmd->fail_io) + goto out; + +diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c +index 43d1af1d8173..07c1b0334f57 100644 +--- a/drivers/md/dm-writecache.c ++++ b/drivers/md/dm-writecache.c +@@ -442,7 +442,13 @@ static void writecache_notify_io(unsigned long error, void *context) + complete(&endio->c); + } + +-static void ssd_commit_flushed(struct dm_writecache *wc) ++static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) ++{ ++ wait_event(wc->bio_in_progress_wait[direction], ++ !atomic_read(&wc->bio_in_progress[direction])); ++} ++ ++static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) + { + struct dm_io_region region; + struct dm_io_request req; +@@ -488,17 +494,20 @@ static void ssd_commit_flushed(struct dm_writecache *wc) + writecache_notify_io(0, &endio); + wait_for_completion_io(&endio.c); + ++ if (wait_for_ios) ++ writecache_wait_for_ios(wc, WRITE); ++ + writecache_disk_flush(wc, wc->ssd_dev); + + memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size); + } + +-static void writecache_commit_flushed(struct dm_writecache *wc) ++static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) + { + if (WC_MODE_PMEM(wc)) + wmb(); + else +- ssd_commit_flushed(wc); ++ ssd_commit_flushed(wc, wait_for_ios); + } + + static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) +@@ -522,12 +531,6 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) + writecache_error(wc, r, "error flushing metadata: %d", r); + } + +-static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) +-{ +- wait_event(wc->bio_in_progress_wait[direction], +- !atomic_read(&wc->bio_in_progress[direction])); +-} +- + #define WFE_RETURN_FOLLOWING 1 + #define WFE_LOWEST_SEQ 2 + +@@ -724,15 +727,12 @@ static void writecache_flush(struct dm_writecache *wc) + e = e2; + cond_resched(); + } +- writecache_commit_flushed(wc); +- +- if (!WC_MODE_PMEM(wc)) +- writecache_wait_for_ios(wc, WRITE); ++ writecache_commit_flushed(wc, true); + + wc->seq_count++; + pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count)); + writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count); +- writecache_commit_flushed(wc); ++ writecache_commit_flushed(wc, false); + + wc->overwrote_committed = false; + +@@ -756,7 +756,7 @@ static void writecache_flush(struct dm_writecache *wc) + } + + if (need_flush_after_free) +- writecache_commit_flushed(wc); ++ writecache_commit_flushed(wc, false); + } + + static void writecache_flush_work(struct work_struct *work) +@@ -809,7 +809,7 @@ static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_ + } + + if (discarded_something) +- writecache_commit_flushed(wc); ++ writecache_commit_flushed(wc, false); + } + + static bool writecache_wait_for_writeback(struct dm_writecache *wc) +@@ -958,7 +958,7 @@ erase_this: + + if (need_flush) { + writecache_flush_all_metadata(wc); +- writecache_commit_flushed(wc); ++ writecache_commit_flushed(wc, false); + } + + wc_unlock(wc); +@@ -1342,7 +1342,7 @@ static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head * + wc->writeback_size--; + n_walked++; + if (unlikely(n_walked >= ENDIO_LATENCY)) { +- writecache_commit_flushed(wc); ++ writecache_commit_flushed(wc, false); + wc_unlock(wc); + wc_lock(wc); + n_walked = 0; +@@ -1423,7 +1423,7 @@ pop_from_list: + writecache_wait_for_ios(wc, READ); + } + +- writecache_commit_flushed(wc); ++ writecache_commit_flushed(wc, false); + + wc_unlock(wc); + } +@@ -1766,10 +1766,10 @@ static int init_memory(struct dm_writecache *wc) + write_original_sector_seq_count(wc, &wc->entries[b], -1, -1); + + writecache_flush_all_metadata(wc); +- writecache_commit_flushed(wc); ++ writecache_commit_flushed(wc, false); + pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC)); + writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic); +- writecache_commit_flushed(wc); ++ writecache_commit_flushed(wc, false); + + return 0; + } +diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c +index ac1179ca80d9..5205cf9bbfd9 100644 +--- a/drivers/md/dm-zoned-metadata.c ++++ b/drivers/md/dm-zoned-metadata.c +@@ -134,6 +134,7 @@ struct dmz_metadata { + + sector_t zone_bitmap_size; + unsigned int zone_nr_bitmap_blocks; ++ unsigned int zone_bits_per_mblk; + + unsigned int nr_bitmap_blocks; + unsigned int nr_map_blocks; +@@ -1167,7 +1168,10 @@ static int dmz_init_zones(struct dmz_metadata *zmd) + + /* Init */ + zmd->zone_bitmap_size = dev->zone_nr_blocks >> 3; +- zmd->zone_nr_bitmap_blocks = zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT; ++ zmd->zone_nr_bitmap_blocks = ++ max_t(sector_t, 1, zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT); ++ zmd->zone_bits_per_mblk = min_t(sector_t, dev->zone_nr_blocks, ++ DMZ_BLOCK_SIZE_BITS); + + /* Allocate zone array */ + zmd->zones = kcalloc(dev->nr_zones, sizeof(struct dm_zone), GFP_KERNEL); +@@ -1991,7 +1995,7 @@ int dmz_copy_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone, + dmz_release_mblock(zmd, to_mblk); + dmz_release_mblock(zmd, from_mblk); + +- chunk_block += DMZ_BLOCK_SIZE_BITS; ++ chunk_block += zmd->zone_bits_per_mblk; + } + + to_zone->weight = from_zone->weight; +@@ -2052,7 +2056,7 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, + + /* Set bits */ + bit = chunk_block & DMZ_BLOCK_MASK_BITS; +- nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); ++ nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); + + count = dmz_set_bits((unsigned long *)mblk->data, bit, nr_bits); + if (count) { +@@ -2131,7 +2135,7 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, + + /* Clear bits */ + bit = chunk_block & DMZ_BLOCK_MASK_BITS; +- nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); ++ nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); + + count = dmz_clear_bits((unsigned long *)mblk->data, + bit, nr_bits); +@@ -2191,6 +2195,7 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone, + { + struct dmz_mblock *mblk; + unsigned int bit, set_bit, nr_bits; ++ unsigned int zone_bits = zmd->zone_bits_per_mblk; + unsigned long *bitmap; + int n = 0; + +@@ -2205,15 +2210,15 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone, + /* Get offset */ + bitmap = (unsigned long *) mblk->data; + bit = chunk_block & DMZ_BLOCK_MASK_BITS; +- nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); ++ nr_bits = min(nr_blocks, zone_bits - bit); + if (set) +- set_bit = find_next_bit(bitmap, DMZ_BLOCK_SIZE_BITS, bit); ++ set_bit = find_next_bit(bitmap, zone_bits, bit); + else +- set_bit = find_next_zero_bit(bitmap, DMZ_BLOCK_SIZE_BITS, bit); ++ set_bit = find_next_zero_bit(bitmap, zone_bits, bit); + dmz_release_mblock(zmd, mblk); + + n += set_bit - bit; +- if (set_bit < DMZ_BLOCK_SIZE_BITS) ++ if (set_bit < zone_bits) + break; + + nr_blocks -= nr_bits; +@@ -2316,7 +2321,7 @@ static void dmz_get_zone_weight(struct dmz_metadata *zmd, struct dm_zone *zone) + /* Count bits in this block */ + bitmap = mblk->data; + bit = chunk_block & DMZ_BLOCK_MASK_BITS; +- nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); ++ nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); + n += dmz_count_bits(bitmap, bit, nr_bits); + + dmz_release_mblock(zmd, mblk); +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index 1a5e328c443a..6d3cc235f842 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -1880,6 +1880,7 @@ static void dm_init_normal_md_queue(struct mapped_device *md) + /* + * Initialize aspects of queue that aren't relevant for blk-mq + */ ++ md->queue->backing_dev_info->congested_data = md; + md->queue->backing_dev_info->congested_fn = dm_any_congested; + } + +@@ -1970,7 +1971,12 @@ static struct mapped_device *alloc_dev(int minor) + if (!md->queue) + goto bad; + md->queue->queuedata = md; +- md->queue->backing_dev_info->congested_data = md; ++ /* ++ * default to bio-based required ->make_request_fn until DM ++ * table is loaded and md->type established. If request-based ++ * table is loaded: blk-mq will override accordingly. ++ */ ++ blk_queue_make_request(md->queue, dm_make_request); + + md->disk = alloc_disk_node(1, md->numa_node_id); + if (!md->disk) +@@ -2285,7 +2291,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) + case DM_TYPE_DAX_BIO_BASED: + case DM_TYPE_NVME_BIO_BASED: + dm_init_normal_md_queue(md); +- blk_queue_make_request(md->queue, dm_make_request); + break; + case DM_TYPE_NONE: + WARN_ON_ONCE(true); +diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c +index bd68f6fef694..d8b4125e338c 100644 +--- a/drivers/md/persistent-data/dm-space-map-common.c ++++ b/drivers/md/persistent-data/dm-space-map-common.c +@@ -380,6 +380,33 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, + return -ENOSPC; + } + ++int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, ++ dm_block_t begin, dm_block_t end, dm_block_t *b) ++{ ++ int r; ++ uint32_t count; ++ ++ do { ++ r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b); ++ if (r) ++ break; ++ ++ /* double check this block wasn't used in the old transaction */ ++ if (*b >= old_ll->nr_blocks) ++ count = 0; ++ else { ++ r = sm_ll_lookup(old_ll, *b, &count); ++ if (r) ++ break; ++ ++ if (count) ++ begin = *b + 1; ++ } ++ } while (count); ++ ++ return r; ++} ++ + static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, + int (*mutator)(void *context, uint32_t old, uint32_t *new), + void *context, enum allocation_event *ev) +diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h +index b3078d5eda0c..8de63ce39bdd 100644 +--- a/drivers/md/persistent-data/dm-space-map-common.h ++++ b/drivers/md/persistent-data/dm-space-map-common.h +@@ -109,6 +109,8 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result); + int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result); + int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, + dm_block_t end, dm_block_t *result); ++int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, ++ dm_block_t begin, dm_block_t end, dm_block_t *result); + int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev); + int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); + int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); +diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c +index 32adf6b4a9c7..bf4c5e2ccb6f 100644 +--- a/drivers/md/persistent-data/dm-space-map-disk.c ++++ b/drivers/md/persistent-data/dm-space-map-disk.c +@@ -167,8 +167,10 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) + enum allocation_event ev; + struct sm_disk *smd = container_of(sm, struct sm_disk, sm); + +- /* FIXME: we should loop round a couple of times */ +- r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b); ++ /* ++ * Any block we allocate has to be free in both the old and current ll. ++ */ ++ r = sm_ll_find_common_free_block(&smd->old_ll, &smd->ll, smd->begin, smd->ll.nr_blocks, b); + if (r) + return r; + +diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c +index 25328582cc48..9e3c64ec2026 100644 +--- a/drivers/md/persistent-data/dm-space-map-metadata.c ++++ b/drivers/md/persistent-data/dm-space-map-metadata.c +@@ -448,7 +448,10 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) + enum allocation_event ev; + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); + +- r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); ++ /* ++ * Any block we allocate has to be free in both the old and current ll. ++ */ ++ r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b); + if (r) + return r; + +diff --git a/drivers/media/rc/iguanair.c b/drivers/media/rc/iguanair.c +index 872d6441e512..a7deca1fefb7 100644 +--- a/drivers/media/rc/iguanair.c ++++ b/drivers/media/rc/iguanair.c +@@ -413,7 +413,7 @@ static int iguanair_probe(struct usb_interface *intf, + int ret, pipein, pipeout; + struct usb_host_interface *idesc; + +- idesc = intf->altsetting; ++ idesc = intf->cur_altsetting; + if (idesc->desc.bNumEndpoints < 2) + return -ENODEV; + +diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c +index 7741151606ef..6f80c251f641 100644 +--- a/drivers/media/rc/rc-main.c ++++ b/drivers/media/rc/rc-main.c +@@ -1891,23 +1891,28 @@ int rc_register_device(struct rc_dev *dev) + + dev->registered = true; + +- if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { +- rc = rc_setup_rx_device(dev); +- if (rc) +- goto out_dev; +- } +- +- /* Ensure that the lirc kfifo is setup before we start the thread */ ++ /* ++ * once the the input device is registered in rc_setup_rx_device, ++ * userspace can open the input device and rc_open() will be called ++ * as a result. This results in driver code being allowed to submit ++ * keycodes with rc_keydown, so lirc must be registered first. ++ */ + if (dev->allowed_protocols != RC_PROTO_BIT_CEC) { + rc = ir_lirc_register(dev); + if (rc < 0) +- goto out_rx; ++ goto out_dev; ++ } ++ ++ if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { ++ rc = rc_setup_rx_device(dev); ++ if (rc) ++ goto out_lirc; + } + + if (dev->driver_type == RC_DRIVER_IR_RAW) { + rc = ir_raw_event_register(dev); + if (rc < 0) +- goto out_lirc; ++ goto out_rx; + } + + dev_dbg(&dev->dev, "Registered rc%u (driver: %s)\n", dev->minor, +@@ -1915,11 +1920,11 @@ int rc_register_device(struct rc_dev *dev) + + return 0; + ++out_rx: ++ rc_free_rx_device(dev); + out_lirc: + if (dev->allowed_protocols != RC_PROTO_BIT_CEC) + ir_lirc_unregister(dev); +-out_rx: +- rc_free_rx_device(dev); + out_dev: + device_del(&dev->dev); + out_rx_free: +diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c +index 428235ca2635..2b688cc39bb8 100644 +--- a/drivers/media/usb/uvc/uvc_driver.c ++++ b/drivers/media/usb/uvc/uvc_driver.c +@@ -1493,6 +1493,11 @@ static int uvc_scan_chain_forward(struct uvc_video_chain *chain, + break; + if (forward == prev) + continue; ++ if (forward->chain.next || forward->chain.prev) { ++ uvc_trace(UVC_TRACE_DESCR, "Found reference to " ++ "entity %d already in chain.\n", forward->id); ++ return -EINVAL; ++ } + + switch (UVC_ENTITY_TYPE(forward)) { + case UVC_VC_EXTENSION_UNIT: +@@ -1574,6 +1579,13 @@ static int uvc_scan_chain_backward(struct uvc_video_chain *chain, + return -1; + } + ++ if (term->chain.next || term->chain.prev) { ++ uvc_trace(UVC_TRACE_DESCR, "Found reference to " ++ "entity %d already in chain.\n", ++ term->id); ++ return -EINVAL; ++ } ++ + if (uvc_trace_param & UVC_TRACE_PROBE) + printk(KERN_CONT " %d", term->id); + +diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +index e1eaf1135c7f..7ad6db8dd9f6 100644 +--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c ++++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +@@ -1183,36 +1183,38 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar + u32 aux_space; + int compatible_arg = 1; + long err = 0; ++ unsigned int ncmd; + + /* + * 1. When struct size is different, converts the command. + */ + switch (cmd) { +- case VIDIOC_G_FMT32: cmd = VIDIOC_G_FMT; break; +- case VIDIOC_S_FMT32: cmd = VIDIOC_S_FMT; break; +- case VIDIOC_QUERYBUF32: cmd = VIDIOC_QUERYBUF; break; +- case VIDIOC_G_FBUF32: cmd = VIDIOC_G_FBUF; break; +- case VIDIOC_S_FBUF32: cmd = VIDIOC_S_FBUF; break; +- case VIDIOC_QBUF32: cmd = VIDIOC_QBUF; break; +- case VIDIOC_DQBUF32: cmd = VIDIOC_DQBUF; break; +- case VIDIOC_ENUMSTD32: cmd = VIDIOC_ENUMSTD; break; +- case VIDIOC_ENUMINPUT32: cmd = VIDIOC_ENUMINPUT; break; +- case VIDIOC_TRY_FMT32: cmd = VIDIOC_TRY_FMT; break; +- case VIDIOC_G_EXT_CTRLS32: cmd = VIDIOC_G_EXT_CTRLS; break; +- case VIDIOC_S_EXT_CTRLS32: cmd = VIDIOC_S_EXT_CTRLS; break; +- case VIDIOC_TRY_EXT_CTRLS32: cmd = VIDIOC_TRY_EXT_CTRLS; break; +- case VIDIOC_DQEVENT32: cmd = VIDIOC_DQEVENT; break; +- case VIDIOC_OVERLAY32: cmd = VIDIOC_OVERLAY; break; +- case VIDIOC_STREAMON32: cmd = VIDIOC_STREAMON; break; +- case VIDIOC_STREAMOFF32: cmd = VIDIOC_STREAMOFF; break; +- case VIDIOC_G_INPUT32: cmd = VIDIOC_G_INPUT; break; +- case VIDIOC_S_INPUT32: cmd = VIDIOC_S_INPUT; break; +- case VIDIOC_G_OUTPUT32: cmd = VIDIOC_G_OUTPUT; break; +- case VIDIOC_S_OUTPUT32: cmd = VIDIOC_S_OUTPUT; break; +- case VIDIOC_CREATE_BUFS32: cmd = VIDIOC_CREATE_BUFS; break; +- case VIDIOC_PREPARE_BUF32: cmd = VIDIOC_PREPARE_BUF; break; +- case VIDIOC_G_EDID32: cmd = VIDIOC_G_EDID; break; +- case VIDIOC_S_EDID32: cmd = VIDIOC_S_EDID; break; ++ case VIDIOC_G_FMT32: ncmd = VIDIOC_G_FMT; break; ++ case VIDIOC_S_FMT32: ncmd = VIDIOC_S_FMT; break; ++ case VIDIOC_QUERYBUF32: ncmd = VIDIOC_QUERYBUF; break; ++ case VIDIOC_G_FBUF32: ncmd = VIDIOC_G_FBUF; break; ++ case VIDIOC_S_FBUF32: ncmd = VIDIOC_S_FBUF; break; ++ case VIDIOC_QBUF32: ncmd = VIDIOC_QBUF; break; ++ case VIDIOC_DQBUF32: ncmd = VIDIOC_DQBUF; break; ++ case VIDIOC_ENUMSTD32: ncmd = VIDIOC_ENUMSTD; break; ++ case VIDIOC_ENUMINPUT32: ncmd = VIDIOC_ENUMINPUT; break; ++ case VIDIOC_TRY_FMT32: ncmd = VIDIOC_TRY_FMT; break; ++ case VIDIOC_G_EXT_CTRLS32: ncmd = VIDIOC_G_EXT_CTRLS; break; ++ case VIDIOC_S_EXT_CTRLS32: ncmd = VIDIOC_S_EXT_CTRLS; break; ++ case VIDIOC_TRY_EXT_CTRLS32: ncmd = VIDIOC_TRY_EXT_CTRLS; break; ++ case VIDIOC_DQEVENT32: ncmd = VIDIOC_DQEVENT; break; ++ case VIDIOC_OVERLAY32: ncmd = VIDIOC_OVERLAY; break; ++ case VIDIOC_STREAMON32: ncmd = VIDIOC_STREAMON; break; ++ case VIDIOC_STREAMOFF32: ncmd = VIDIOC_STREAMOFF; break; ++ case VIDIOC_G_INPUT32: ncmd = VIDIOC_G_INPUT; break; ++ case VIDIOC_S_INPUT32: ncmd = VIDIOC_S_INPUT; break; ++ case VIDIOC_G_OUTPUT32: ncmd = VIDIOC_G_OUTPUT; break; ++ case VIDIOC_S_OUTPUT32: ncmd = VIDIOC_S_OUTPUT; break; ++ case VIDIOC_CREATE_BUFS32: ncmd = VIDIOC_CREATE_BUFS; break; ++ case VIDIOC_PREPARE_BUF32: ncmd = VIDIOC_PREPARE_BUF; break; ++ case VIDIOC_G_EDID32: ncmd = VIDIOC_G_EDID; break; ++ case VIDIOC_S_EDID32: ncmd = VIDIOC_S_EDID; break; ++ default: ncmd = cmd; break; + } + + /* +@@ -1221,11 +1223,11 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar + * argument into it. + */ + switch (cmd) { +- case VIDIOC_OVERLAY: +- case VIDIOC_STREAMON: +- case VIDIOC_STREAMOFF: +- case VIDIOC_S_INPUT: +- case VIDIOC_S_OUTPUT: ++ case VIDIOC_OVERLAY32: ++ case VIDIOC_STREAMON32: ++ case VIDIOC_STREAMOFF32: ++ case VIDIOC_S_INPUT32: ++ case VIDIOC_S_OUTPUT32: + err = alloc_userspace(sizeof(unsigned int), 0, &new_p64); + if (!err && assign_in_user((unsigned int __user *)new_p64, + (compat_uint_t __user *)p32)) +@@ -1233,23 +1235,23 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar + compatible_arg = 0; + break; + +- case VIDIOC_G_INPUT: +- case VIDIOC_G_OUTPUT: ++ case VIDIOC_G_INPUT32: ++ case VIDIOC_G_OUTPUT32: + err = alloc_userspace(sizeof(unsigned int), 0, &new_p64); + compatible_arg = 0; + break; + +- case VIDIOC_G_EDID: +- case VIDIOC_S_EDID: ++ case VIDIOC_G_EDID32: ++ case VIDIOC_S_EDID32: + err = alloc_userspace(sizeof(struct v4l2_edid), 0, &new_p64); + if (!err) + err = get_v4l2_edid32(new_p64, p32); + compatible_arg = 0; + break; + +- case VIDIOC_G_FMT: +- case VIDIOC_S_FMT: +- case VIDIOC_TRY_FMT: ++ case VIDIOC_G_FMT32: ++ case VIDIOC_S_FMT32: ++ case VIDIOC_TRY_FMT32: + err = bufsize_v4l2_format(p32, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_format), +@@ -1262,7 +1264,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar + compatible_arg = 0; + break; + +- case VIDIOC_CREATE_BUFS: ++ case VIDIOC_CREATE_BUFS32: + err = bufsize_v4l2_create(p32, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_create_buffers), +@@ -1275,10 +1277,10 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar + compatible_arg = 0; + break; + +- case VIDIOC_PREPARE_BUF: +- case VIDIOC_QUERYBUF: +- case VIDIOC_QBUF: +- case VIDIOC_DQBUF: ++ case VIDIOC_PREPARE_BUF32: ++ case VIDIOC_QUERYBUF32: ++ case VIDIOC_QBUF32: ++ case VIDIOC_DQBUF32: + err = bufsize_v4l2_buffer(p32, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_buffer), +@@ -1291,7 +1293,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar + compatible_arg = 0; + break; + +- case VIDIOC_S_FBUF: ++ case VIDIOC_S_FBUF32: + err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, + &new_p64); + if (!err) +@@ -1299,13 +1301,13 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar + compatible_arg = 0; + break; + +- case VIDIOC_G_FBUF: ++ case VIDIOC_G_FBUF32: + err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, + &new_p64); + compatible_arg = 0; + break; + +- case VIDIOC_ENUMSTD: ++ case VIDIOC_ENUMSTD32: + err = alloc_userspace(sizeof(struct v4l2_standard), 0, + &new_p64); + if (!err) +@@ -1313,16 +1315,16 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar + compatible_arg = 0; + break; + +- case VIDIOC_ENUMINPUT: ++ case VIDIOC_ENUMINPUT32: + err = alloc_userspace(sizeof(struct v4l2_input), 0, &new_p64); + if (!err) + err = get_v4l2_input32(new_p64, p32); + compatible_arg = 0; + break; + +- case VIDIOC_G_EXT_CTRLS: +- case VIDIOC_S_EXT_CTRLS: +- case VIDIOC_TRY_EXT_CTRLS: ++ case VIDIOC_G_EXT_CTRLS32: ++ case VIDIOC_S_EXT_CTRLS32: ++ case VIDIOC_TRY_EXT_CTRLS32: + err = bufsize_v4l2_ext_controls(p32, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_ext_controls), +@@ -1334,7 +1336,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar + } + compatible_arg = 0; + break; +- case VIDIOC_DQEVENT: ++ case VIDIOC_DQEVENT32: + err = alloc_userspace(sizeof(struct v4l2_event), 0, &new_p64); + compatible_arg = 0; + break; +@@ -1352,9 +1354,9 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar + * Otherwise, it will pass the newly allocated @new_p64 argument. + */ + if (compatible_arg) +- err = native_ioctl(file, cmd, (unsigned long)p32); ++ err = native_ioctl(file, ncmd, (unsigned long)p32); + else +- err = native_ioctl(file, cmd, (unsigned long)new_p64); ++ err = native_ioctl(file, ncmd, (unsigned long)new_p64); + + if (err == -ENOTTY) + return err; +@@ -1370,13 +1372,13 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar + * the blocks to maximum allowed value. + */ + switch (cmd) { +- case VIDIOC_G_EXT_CTRLS: +- case VIDIOC_S_EXT_CTRLS: +- case VIDIOC_TRY_EXT_CTRLS: ++ case VIDIOC_G_EXT_CTRLS32: ++ case VIDIOC_S_EXT_CTRLS32: ++ case VIDIOC_TRY_EXT_CTRLS32: + if (put_v4l2_ext_controls32(file, new_p64, p32)) + err = -EFAULT; + break; +- case VIDIOC_S_EDID: ++ case VIDIOC_S_EDID32: + if (put_v4l2_edid32(new_p64, p32)) + err = -EFAULT; + break; +@@ -1389,49 +1391,49 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar + * the original 32 bits structure. + */ + switch (cmd) { +- case VIDIOC_S_INPUT: +- case VIDIOC_S_OUTPUT: +- case VIDIOC_G_INPUT: +- case VIDIOC_G_OUTPUT: ++ case VIDIOC_S_INPUT32: ++ case VIDIOC_S_OUTPUT32: ++ case VIDIOC_G_INPUT32: ++ case VIDIOC_G_OUTPUT32: + if (assign_in_user((compat_uint_t __user *)p32, + ((unsigned int __user *)new_p64))) + err = -EFAULT; + break; + +- case VIDIOC_G_FBUF: ++ case VIDIOC_G_FBUF32: + err = put_v4l2_framebuffer32(new_p64, p32); + break; + +- case VIDIOC_DQEVENT: ++ case VIDIOC_DQEVENT32: + err = put_v4l2_event32(new_p64, p32); + break; + +- case VIDIOC_G_EDID: ++ case VIDIOC_G_EDID32: + err = put_v4l2_edid32(new_p64, p32); + break; + +- case VIDIOC_G_FMT: +- case VIDIOC_S_FMT: +- case VIDIOC_TRY_FMT: ++ case VIDIOC_G_FMT32: ++ case VIDIOC_S_FMT32: ++ case VIDIOC_TRY_FMT32: + err = put_v4l2_format32(new_p64, p32); + break; + +- case VIDIOC_CREATE_BUFS: ++ case VIDIOC_CREATE_BUFS32: + err = put_v4l2_create32(new_p64, p32); + break; + +- case VIDIOC_PREPARE_BUF: +- case VIDIOC_QUERYBUF: +- case VIDIOC_QBUF: +- case VIDIOC_DQBUF: ++ case VIDIOC_PREPARE_BUF32: ++ case VIDIOC_QUERYBUF32: ++ case VIDIOC_QBUF32: ++ case VIDIOC_DQBUF32: + err = put_v4l2_buffer32(new_p64, p32); + break; + +- case VIDIOC_ENUMSTD: ++ case VIDIOC_ENUMSTD32: + err = put_v4l2_standard32(new_p64, p32); + break; + +- case VIDIOC_ENUMINPUT: ++ case VIDIOC_ENUMINPUT32: + err = put_v4l2_input32(new_p64, p32); + break; + } +diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c +index 66a6c6c236a7..28262190c3ab 100644 +--- a/drivers/media/v4l2-core/videobuf-dma-sg.c ++++ b/drivers/media/v4l2-core/videobuf-dma-sg.c +@@ -349,8 +349,11 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma) + BUG_ON(dma->sglen); + + if (dma->pages) { +- for (i = 0; i < dma->nr_pages; i++) ++ for (i = 0; i < dma->nr_pages; i++) { ++ if (dma->direction == DMA_FROM_DEVICE) ++ set_page_dirty_lock(dma->pages[i]); + put_page(dma->pages[i]); ++ } + kfree(dma->pages); + dma->pages = NULL; + } +diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c +index a4aaadaa0cb0..aa59496e4376 100644 +--- a/drivers/mfd/axp20x.c ++++ b/drivers/mfd/axp20x.c +@@ -126,7 +126,7 @@ static const struct regmap_range axp288_writeable_ranges[] = { + static const struct regmap_range axp288_volatile_ranges[] = { + regmap_reg_range(AXP20X_PWR_INPUT_STATUS, AXP288_POWER_REASON), + regmap_reg_range(AXP288_BC_GLOBAL, AXP288_BC_GLOBAL), +- regmap_reg_range(AXP288_BC_DET_STAT, AXP288_BC_DET_STAT), ++ regmap_reg_range(AXP288_BC_DET_STAT, AXP20X_VBUS_IPSOUT_MGMT), + regmap_reg_range(AXP20X_CHRG_BAK_CTRL, AXP20X_CHRG_BAK_CTRL), + regmap_reg_range(AXP20X_IRQ1_EN, AXP20X_IPSOUT_V_HIGH_L), + regmap_reg_range(AXP20X_TIMER_CTRL, AXP20X_TIMER_CTRL), +diff --git a/drivers/mfd/da9062-core.c b/drivers/mfd/da9062-core.c +index e69626867c26..9143de7b77b8 100644 +--- a/drivers/mfd/da9062-core.c ++++ b/drivers/mfd/da9062-core.c +@@ -248,7 +248,7 @@ static const struct mfd_cell da9062_devs[] = { + .name = "da9062-watchdog", + .num_resources = ARRAY_SIZE(da9062_wdt_resources), + .resources = da9062_wdt_resources, +- .of_compatible = "dlg,da9062-wdt", ++ .of_compatible = "dlg,da9062-watchdog", + }, + { + .name = "da9062-thermal", +diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c +index 381593fbe50f..7841c11411d0 100644 +--- a/drivers/mfd/dln2.c ++++ b/drivers/mfd/dln2.c +@@ -722,6 +722,8 @@ static int dln2_probe(struct usb_interface *interface, + const struct usb_device_id *usb_id) + { + struct usb_host_interface *hostif = interface->cur_altsetting; ++ struct usb_endpoint_descriptor *epin; ++ struct usb_endpoint_descriptor *epout; + struct device *dev = &interface->dev; + struct dln2_dev *dln2; + int ret; +@@ -731,12 +733,19 @@ static int dln2_probe(struct usb_interface *interface, + hostif->desc.bNumEndpoints < 2) + return -ENODEV; + ++ epin = &hostif->endpoint[0].desc; ++ epout = &hostif->endpoint[1].desc; ++ if (!usb_endpoint_is_bulk_out(epout)) ++ return -ENODEV; ++ if (!usb_endpoint_is_bulk_in(epin)) ++ return -ENODEV; ++ + dln2 = kzalloc(sizeof(*dln2), GFP_KERNEL); + if (!dln2) + return -ENOMEM; + +- dln2->ep_out = hostif->endpoint[0].desc.bEndpointAddress; +- dln2->ep_in = hostif->endpoint[1].desc.bEndpointAddress; ++ dln2->ep_out = epout->bEndpointAddress; ++ dln2->ep_in = epin->bEndpointAddress; + dln2->usb_dev = usb_get_dev(interface_to_usbdev(interface)); + dln2->interface = interface; + usb_set_intfdata(interface, dln2); +diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c +index da5cd9c92a59..ead2e79036a9 100644 +--- a/drivers/mfd/rn5t618.c ++++ b/drivers/mfd/rn5t618.c +@@ -26,6 +26,7 @@ static bool rn5t618_volatile_reg(struct device *dev, unsigned int reg) + case RN5T618_WATCHDOGCNT: + case RN5T618_DCIRQ: + case RN5T618_ILIMDATAH ... RN5T618_AIN0DATAL: ++ case RN5T618_ADCCNT3: + case RN5T618_IR_ADC1 ... RN5T618_IR_ADC3: + case RN5T618_IR_GPR: + case RN5T618_IR_GPF: +diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c +index 66e354d51ee9..7083d8ddd495 100644 +--- a/drivers/mmc/host/mmc_spi.c ++++ b/drivers/mmc/host/mmc_spi.c +@@ -1134,17 +1134,22 @@ static void mmc_spi_initsequence(struct mmc_spi_host *host) + * SPI protocol. Another is that when chipselect is released while + * the card returns BUSY status, the clock must issue several cycles + * with chipselect high before the card will stop driving its output. ++ * ++ * SPI_CS_HIGH means "asserted" here. In some cases like when using ++ * GPIOs for chip select, SPI_CS_HIGH is set but this will be logically ++ * inverted by gpiolib, so if we want to ascertain to drive it high ++ * we should toggle the default with an XOR as we do here. + */ +- host->spi->mode |= SPI_CS_HIGH; ++ host->spi->mode ^= SPI_CS_HIGH; + if (spi_setup(host->spi) != 0) { + /* Just warn; most cards work without it. */ + dev_warn(&host->spi->dev, + "can't change chip-select polarity\n"); +- host->spi->mode &= ~SPI_CS_HIGH; ++ host->spi->mode ^= SPI_CS_HIGH; + } else { + mmc_spi_readbytes(host, 18); + +- host->spi->mode &= ~SPI_CS_HIGH; ++ host->spi->mode ^= SPI_CS_HIGH; + if (spi_setup(host->spi) != 0) { + /* Wot, we can't get the same setup we had before? */ + dev_err(&host->spi->dev, +diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c +index 0ae986c42bc8..9378d5dc86c8 100644 +--- a/drivers/mmc/host/sdhci-of-at91.c ++++ b/drivers/mmc/host/sdhci-of-at91.c +@@ -324,19 +324,22 @@ static int sdhci_at91_probe(struct platform_device *pdev) + priv->mainck = devm_clk_get(&pdev->dev, "baseclk"); + if (IS_ERR(priv->mainck)) { + dev_err(&pdev->dev, "failed to get baseclk\n"); +- return PTR_ERR(priv->mainck); ++ ret = PTR_ERR(priv->mainck); ++ goto sdhci_pltfm_free; + } + + priv->hclock = devm_clk_get(&pdev->dev, "hclock"); + if (IS_ERR(priv->hclock)) { + dev_err(&pdev->dev, "failed to get hclock\n"); +- return PTR_ERR(priv->hclock); ++ ret = PTR_ERR(priv->hclock); ++ goto sdhci_pltfm_free; + } + + priv->gck = devm_clk_get(&pdev->dev, "multclk"); + if (IS_ERR(priv->gck)) { + dev_err(&pdev->dev, "failed to get multclk\n"); +- return PTR_ERR(priv->gck); ++ ret = PTR_ERR(priv->gck); ++ goto sdhci_pltfm_free; + } + + ret = sdhci_at91_set_clks_presets(&pdev->dev); +diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c +index c9ea365c248c..5091e2c1c0e5 100644 +--- a/drivers/mmc/host/sdhci-pci-core.c ++++ b/drivers/mmc/host/sdhci-pci-core.c +@@ -1604,7 +1604,7 @@ static u32 sdhci_read_present_state(struct sdhci_host *host) + return sdhci_readl(host, SDHCI_PRESENT_STATE); + } + +-void amd_sdhci_reset(struct sdhci_host *host, u8 mask) ++static void amd_sdhci_reset(struct sdhci_host *host, u8 mask) + { + struct sdhci_pci_slot *slot = sdhci_priv(host); + struct pci_dev *pdev = slot->chip->pdev; +diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c +index 309c808351ac..f417fb680cd8 100644 +--- a/drivers/mtd/spi-nor/spi-nor.c ++++ b/drivers/mtd/spi-nor/spi-nor.c +@@ -2310,15 +2310,16 @@ static const struct flash_info spi_nor_ids[] = { + { "n25q256a", INFO(0x20ba19, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "n25q256ax1", INFO(0x20bb19, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_QUAD_READ) }, + { "n25q512ax3", INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) }, ++ { "mt25qu512a", INFO6(0x20bb20, 0x104400, 64 * 1024, 1024, ++ SECT_4K | USE_FSR | SPI_NOR_DUAL_READ | ++ SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) }, ++ { "n25q512a", INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K | ++ SPI_NOR_QUAD_READ) }, + { "n25q00", INFO(0x20ba21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) }, + { "n25q00a", INFO(0x20bb21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) }, + { "mt25ql02g", INFO(0x20ba22, 0, 64 * 1024, 4096, + SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | + NO_CHIP_ERASE) }, +- { "mt25qu512a (n25q512a)", INFO(0x20bb20, 0, 64 * 1024, 1024, +- SECT_4K | USE_FSR | SPI_NOR_DUAL_READ | +- SPI_NOR_QUAD_READ | +- SPI_NOR_4B_OPCODES) }, + { "mt25qu02g", INFO(0x20bb22, 0, 64 * 1024, 4096, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) }, + + /* Micron */ +diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c +index 30621c67721a..604772fc4a96 100644 +--- a/drivers/mtd/ubi/fastmap.c ++++ b/drivers/mtd/ubi/fastmap.c +@@ -64,7 +64,7 @@ static int self_check_seen(struct ubi_device *ubi, unsigned long *seen) + return 0; + + for (pnum = 0; pnum < ubi->peb_count; pnum++) { +- if (test_bit(pnum, seen) && ubi->lookuptbl[pnum]) { ++ if (!test_bit(pnum, seen) && ubi->lookuptbl[pnum]) { + ubi_err(ubi, "self-check failed for PEB %d, fastmap didn't see it", pnum); + ret = -EINVAL; + } +@@ -1137,7 +1137,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, + struct rb_node *tmp_rb; + int ret, i, j, free_peb_count, used_peb_count, vol_count; + int scrub_peb_count, erase_peb_count; +- unsigned long *seen_pebs = NULL; ++ unsigned long *seen_pebs; + + fm_raw = ubi->fm_buf; + memset(ubi->fm_buf, 0, ubi->fm_size); +@@ -1151,7 +1151,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, + dvbuf = new_fm_vbuf(ubi, UBI_FM_DATA_VOLUME_ID); + if (!dvbuf) { + ret = -ENOMEM; +- goto out_kfree; ++ goto out_free_avbuf; + } + + avhdr = ubi_get_vid_hdr(avbuf); +@@ -1160,7 +1160,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, + seen_pebs = init_seen(ubi); + if (IS_ERR(seen_pebs)) { + ret = PTR_ERR(seen_pebs); +- goto out_kfree; ++ goto out_free_dvbuf; + } + + spin_lock(&ubi->volumes_lock); +@@ -1328,7 +1328,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, + ret = ubi_io_write_vid_hdr(ubi, new_fm->e[0]->pnum, avbuf); + if (ret) { + ubi_err(ubi, "unable to write vid_hdr to fastmap SB!"); +- goto out_kfree; ++ goto out_free_seen; + } + + for (i = 0; i < new_fm->used_blocks; i++) { +@@ -1350,7 +1350,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, + if (ret) { + ubi_err(ubi, "unable to write vid_hdr to PEB %i!", + new_fm->e[i]->pnum); +- goto out_kfree; ++ goto out_free_seen; + } + } + +@@ -1360,7 +1360,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, + if (ret) { + ubi_err(ubi, "unable to write fastmap to PEB %i!", + new_fm->e[i]->pnum); +- goto out_kfree; ++ goto out_free_seen; + } + } + +@@ -1370,10 +1370,13 @@ static int ubi_write_fastmap(struct ubi_device *ubi, + ret = self_check_seen(ubi, seen_pebs); + dbg_bld("fastmap written!"); + +-out_kfree: +- ubi_free_vid_buf(avbuf); +- ubi_free_vid_buf(dvbuf); ++out_free_seen: + free_seen(seen_pebs); ++out_free_dvbuf: ++ ubi_free_vid_buf(dvbuf); ++out_free_avbuf: ++ ubi_free_vid_buf(avbuf); ++ + out: + return ret; + } +diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c +index 4f2e6910c623..1cc2cd894f87 100644 +--- a/drivers/net/bonding/bond_alb.c ++++ b/drivers/net/bonding/bond_alb.c +@@ -1383,26 +1383,31 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) + bool do_tx_balance = true; + u32 hash_index = 0; + const u8 *hash_start = NULL; +- struct ipv6hdr *ip6hdr; + + skb_reset_mac_header(skb); + eth_data = eth_hdr(skb); + + switch (ntohs(skb->protocol)) { + case ETH_P_IP: { +- const struct iphdr *iph = ip_hdr(skb); ++ const struct iphdr *iph; + + if (is_broadcast_ether_addr(eth_data->h_dest) || +- iph->daddr == ip_bcast || +- iph->protocol == IPPROTO_IGMP) { ++ !pskb_network_may_pull(skb, sizeof(*iph))) { ++ do_tx_balance = false; ++ break; ++ } ++ iph = ip_hdr(skb); ++ if (iph->daddr == ip_bcast || iph->protocol == IPPROTO_IGMP) { + do_tx_balance = false; + break; + } + hash_start = (char *)&(iph->daddr); + hash_size = sizeof(iph->daddr); +- } + break; +- case ETH_P_IPV6: ++ } ++ case ETH_P_IPV6: { ++ const struct ipv6hdr *ip6hdr; ++ + /* IPv6 doesn't really use broadcast mac address, but leave + * that here just in case. + */ +@@ -1419,7 +1424,11 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) + break; + } + +- /* Additianally, DAD probes should not be tx-balanced as that ++ if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) { ++ do_tx_balance = false; ++ break; ++ } ++ /* Additionally, DAD probes should not be tx-balanced as that + * will lead to false positives for duplicate addresses and + * prevent address configuration from working. + */ +@@ -1429,17 +1438,26 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) + break; + } + +- hash_start = (char *)&(ipv6_hdr(skb)->daddr); +- hash_size = sizeof(ipv6_hdr(skb)->daddr); ++ hash_start = (char *)&ip6hdr->daddr; ++ hash_size = sizeof(ip6hdr->daddr); + break; +- case ETH_P_IPX: +- if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) { ++ } ++ case ETH_P_IPX: { ++ const struct ipxhdr *ipxhdr; ++ ++ if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) { ++ do_tx_balance = false; ++ break; ++ } ++ ipxhdr = (struct ipxhdr *)skb_network_header(skb); ++ ++ if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) { + /* something is wrong with this packet */ + do_tx_balance = false; + break; + } + +- if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) { ++ if (ipxhdr->ipx_type != IPX_TYPE_NCP) { + /* The only protocol worth balancing in + * this family since it has an "ARP" like + * mechanism +@@ -1448,9 +1466,11 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) + break; + } + ++ eth_data = eth_hdr(skb); + hash_start = (char *)eth_data->h_dest; + hash_size = ETH_ALEN; + break; ++ } + case ETH_P_ARP: + do_tx_balance = false; + if (bond_info->rlb_enabled) +diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c +index a7132c1593c3..7ed667b304d1 100644 +--- a/drivers/net/dsa/b53/b53_common.c ++++ b/drivers/net/dsa/b53/b53_common.c +@@ -680,7 +680,7 @@ int b53_configure_vlan(struct dsa_switch *ds) + b53_do_vlan_op(dev, VTA_CMD_CLEAR); + } + +- b53_enable_vlan(dev, false, ds->vlan_filtering); ++ b53_enable_vlan(dev, dev->vlan_enabled, ds->vlan_filtering); + + b53_for_each_port(dev, i) + b53_write16(dev, B53_VLAN_PAGE, +diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c +index 47b21096b577..fecd5e674e04 100644 +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -68,7 +68,9 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) + + /* Force link status for IMP port */ + reg = core_readl(priv, offset); +- reg |= (MII_SW_OR | LINK_STS | GMII_SPEED_UP_2G); ++ reg |= (MII_SW_OR | LINK_STS); ++ if (priv->type == BCM7278_DEVICE_ID) ++ reg |= GMII_SPEED_UP_2G; + core_writel(priv, reg, offset); + + /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ +diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c +index c5f64959a184..1142768969c2 100644 +--- a/drivers/net/dsa/microchip/ksz9477_spi.c ++++ b/drivers/net/dsa/microchip/ksz9477_spi.c +@@ -101,6 +101,12 @@ static struct spi_driver ksz9477_spi_driver = { + + module_spi_driver(ksz9477_spi_driver); + ++MODULE_ALIAS("spi:ksz9477"); ++MODULE_ALIAS("spi:ksz9897"); ++MODULE_ALIAS("spi:ksz9893"); ++MODULE_ALIAS("spi:ksz9563"); ++MODULE_ALIAS("spi:ksz8563"); ++MODULE_ALIAS("spi:ksz9567"); + MODULE_AUTHOR("Woojung Huh <Woojung.Huh@microchip.com>"); + MODULE_DESCRIPTION("Microchip KSZ9477 Series Switch SPI access Driver"); + MODULE_LICENSE("GPL"); +diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c +index b4c664957266..4a27577e137b 100644 +--- a/drivers/net/ethernet/broadcom/bcmsysport.c ++++ b/drivers/net/ethernet/broadcom/bcmsysport.c +@@ -2728,6 +2728,9 @@ static int __maybe_unused bcm_sysport_resume(struct device *d) + + umac_reset(priv); + ++ /* Disable the UniMAC RX/TX */ ++ umac_enable_set(priv, CMD_RX_EN | CMD_TX_EN, 0); ++ + /* We may have been suspended and never received a WOL event that + * would turn off MPD detection, take care of that now + */ +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index cf292f7c3d3c..41297533b4a8 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -7873,7 +7873,7 @@ static void bnxt_setup_msix(struct bnxt *bp) + int tcs, i; + + tcs = netdev_get_num_tc(dev); +- if (tcs > 1) { ++ if (tcs) { + int i, off, count; + + for (i = 0; i < tcs; i++) { +@@ -9273,10 +9273,6 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, + bnxt_debug_dev_exit(bp); + bnxt_disable_napi(bp); + del_timer_sync(&bp->timer); +- if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && +- pci_is_enabled(bp->pdev)) +- pci_disable_device(bp->pdev); +- + bnxt_free_skbs(bp); + + /* Save ring stats before shutdown */ +@@ -10052,8 +10048,15 @@ static void bnxt_fw_reset_close(struct bnxt *bp) + { + __bnxt_close_nic(bp, true, false); + bnxt_ulp_irq_stop(bp); ++ /* When firmware is fatal state, disable PCI device to prevent ++ * any potential bad DMAs before freeing kernel memory. ++ */ ++ if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) ++ pci_disable_device(bp->pdev); + bnxt_clear_int_mode(bp); + bnxt_hwrm_func_drv_unrgtr(bp); ++ if (pci_is_enabled(bp->pdev)) ++ pci_disable_device(bp->pdev); + bnxt_free_ctx_mem(bp); + kfree(bp->ctx); + bp->ctx = NULL; +@@ -11359,9 +11362,9 @@ static void bnxt_remove_one(struct pci_dev *pdev) + bnxt_sriov_disable(bp); + + bnxt_dl_fw_reporters_destroy(bp, true); +- bnxt_dl_unregister(bp); + pci_disable_pcie_error_reporting(pdev); + unregister_netdev(dev); ++ bnxt_dl_unregister(bp); + bnxt_shutdown_tc(bp); + bnxt_cancel_sp_work(bp); + bp->sp_event = 0; +@@ -11850,11 +11853,14 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) + bnxt_init_tc(bp); + } + ++ bnxt_dl_register(bp); ++ + rc = register_netdev(dev); + if (rc) +- goto init_err_cleanup_tc; ++ goto init_err_cleanup; + +- bnxt_dl_register(bp); ++ if (BNXT_PF(bp)) ++ devlink_port_type_eth_set(&bp->dl_port, bp->dev); + bnxt_dl_fw_reporters_create(bp); + + netdev_info(dev, "%s found at mem %lx, node addr %pM\n", +@@ -11864,7 +11870,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) + + return 0; + +-init_err_cleanup_tc: ++init_err_cleanup: ++ bnxt_dl_unregister(bp); + bnxt_shutdown_tc(bp); + bnxt_clear_int_mode(bp); + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +index 1e236e74ff2f..2d817ba0602c 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +@@ -482,7 +482,6 @@ int bnxt_dl_register(struct bnxt *bp) + netdev_err(bp->dev, "devlink_port_register failed"); + goto err_dl_param_unreg; + } +- devlink_port_type_eth_set(&bp->dl_port, bp->dev); + + rc = devlink_port_params_register(&bp->dl_port, bnxt_dl_port_params, + ARRAY_SIZE(bnxt_dl_port_params)); +diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c +index f496b248bda3..95a94507cec1 100644 +--- a/drivers/net/ethernet/cadence/macb_main.c ++++ b/drivers/net/ethernet/cadence/macb_main.c +@@ -73,7 +73,11 @@ struct sifive_fu540_macb_mgmt { + /* Max length of transmit frame must be a multiple of 8 bytes */ + #define MACB_TX_LEN_ALIGN 8 + #define MACB_MAX_TX_LEN ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) +-#define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) ++/* Limit maximum TX length as per Cadence TSO errata. This is to avoid a ++ * false amba_error in TX path from the DMA assuming there is not enough ++ * space in the SRAM (16KB) even when there is. ++ */ ++#define GEM_MAX_TX_LEN (unsigned int)(0x3FC0) + + #define GEM_MTU_MIN_SIZE ETH_MIN_MTU + #define MACB_NETIF_LSO NETIF_F_TSO +@@ -1664,16 +1668,14 @@ static netdev_features_t macb_features_check(struct sk_buff *skb, + + /* Validate LSO compatibility */ + +- /* there is only one buffer */ +- if (!skb_is_nonlinear(skb)) ++ /* there is only one buffer or protocol is not UDP */ ++ if (!skb_is_nonlinear(skb) || (ip_hdr(skb)->protocol != IPPROTO_UDP)) + return features; + + /* length of header */ + hdrlen = skb_transport_offset(skb); +- if (ip_hdr(skb)->protocol == IPPROTO_TCP) +- hdrlen += tcp_hdrlen(skb); + +- /* For LSO: ++ /* For UFO only: + * When software supplies two or more payload buffers all payload buffers + * apart from the last must be a multiple of 8 bytes in size. + */ +diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c +index 0efdbd1a4a6f..32d470d4122a 100644 +--- a/drivers/net/ethernet/dec/tulip/dmfe.c ++++ b/drivers/net/ethernet/dec/tulip/dmfe.c +@@ -2214,15 +2214,16 @@ static int __init dmfe_init_module(void) + if (cr6set) + dmfe_cr6_user_set = cr6set; + +- switch(mode) { +- case DMFE_10MHF: ++ switch (mode) { ++ case DMFE_10MHF: + case DMFE_100MHF: + case DMFE_10MFD: + case DMFE_100MFD: + case DMFE_1M_HPNA: + dmfe_media_mode = mode; + break; +- default:dmfe_media_mode = DMFE_AUTO; ++ default: ++ dmfe_media_mode = DMFE_AUTO; + break; + } + +diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c +index b1f30b194300..117ffe08800d 100644 +--- a/drivers/net/ethernet/dec/tulip/uli526x.c ++++ b/drivers/net/ethernet/dec/tulip/uli526x.c +@@ -1809,8 +1809,8 @@ static int __init uli526x_init_module(void) + if (cr6set) + uli526x_cr6_user_set = cr6set; + +- switch (mode) { +- case ULI526X_10MHF: ++ switch (mode) { ++ case ULI526X_10MHF: + case ULI526X_100MHF: + case ULI526X_10MFD: + case ULI526X_100MFD: +diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +index fcbe01f61aa4..e130233b5085 100644 +--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c ++++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +@@ -2483,6 +2483,9 @@ static void dpaa_adjust_link(struct net_device *net_dev) + mac_dev->adjust_link(mac_dev); + } + ++/* The Aquantia PHYs are capable of performing rate adaptation */ ++#define PHY_VEND_AQUANTIA 0x03a1b400 ++ + static int dpaa_phy_init(struct net_device *net_dev) + { + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; +@@ -2501,9 +2504,14 @@ static int dpaa_phy_init(struct net_device *net_dev) + return -ENODEV; + } + +- /* Remove any features not supported by the controller */ +- ethtool_convert_legacy_u32_to_link_mode(mask, mac_dev->if_support); +- linkmode_and(phy_dev->supported, phy_dev->supported, mask); ++ /* Unless the PHY is capable of rate adaptation */ ++ if (mac_dev->phy_if != PHY_INTERFACE_MODE_XGMII || ++ ((phy_dev->drv->phy_id & GENMASK(31, 10)) != PHY_VEND_AQUANTIA)) { ++ /* remove any features not supported by the controller */ ++ ethtool_convert_legacy_u32_to_link_mode(mask, ++ mac_dev->if_support); ++ linkmode_and(phy_dev->supported, phy_dev->supported, mask); ++ } + + phy_support_asym_pause(phy_dev); + +diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c +index e49820675c8c..6b1a81df1465 100644 +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -388,6 +388,8 @@ struct mvneta_pcpu_stats { + struct u64_stats_sync syncp; + u64 rx_packets; + u64 rx_bytes; ++ u64 rx_dropped; ++ u64 rx_errors; + u64 tx_packets; + u64 tx_bytes; + }; +@@ -706,6 +708,8 @@ mvneta_get_stats64(struct net_device *dev, + struct mvneta_pcpu_stats *cpu_stats; + u64 rx_packets; + u64 rx_bytes; ++ u64 rx_dropped; ++ u64 rx_errors; + u64 tx_packets; + u64 tx_bytes; + +@@ -714,19 +718,20 @@ mvneta_get_stats64(struct net_device *dev, + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + rx_packets = cpu_stats->rx_packets; + rx_bytes = cpu_stats->rx_bytes; ++ rx_dropped = cpu_stats->rx_dropped; ++ rx_errors = cpu_stats->rx_errors; + tx_packets = cpu_stats->tx_packets; + tx_bytes = cpu_stats->tx_bytes; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; ++ stats->rx_dropped += rx_dropped; ++ stats->rx_errors += rx_errors; + stats->tx_packets += tx_packets; + stats->tx_bytes += tx_bytes; + } + +- stats->rx_errors = dev->stats.rx_errors; +- stats->rx_dropped = dev->stats.rx_dropped; +- + stats->tx_dropped = dev->stats.tx_dropped; + } + +@@ -1703,8 +1708,14 @@ static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto, + static void mvneta_rx_error(struct mvneta_port *pp, + struct mvneta_rx_desc *rx_desc) + { ++ struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); + u32 status = rx_desc->status; + ++ /* update per-cpu counter */ ++ u64_stats_update_begin(&stats->syncp); ++ stats->rx_errors++; ++ u64_stats_update_end(&stats->syncp); ++ + switch (status & MVNETA_RXD_ERR_CODE_MASK) { + case MVNETA_RXD_ERR_CRC: + netdev_err(pp->dev, "bad rx status %08x (crc error), size=%d\n", +@@ -1965,7 +1976,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi, + /* Check errors only for FIRST descriptor */ + if (rx_status & MVNETA_RXD_ERR_SUMMARY) { + mvneta_rx_error(pp, rx_desc); +- dev->stats.rx_errors++; + /* leave the descriptor untouched */ + continue; + } +@@ -1976,11 +1986,17 @@ static int mvneta_rx_swbm(struct napi_struct *napi, + skb_size = max(rx_copybreak, rx_header_size); + rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size); + if (unlikely(!rxq->skb)) { ++ struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); ++ + netdev_err(dev, + "Can't allocate skb on queue %d\n", + rxq->id); +- dev->stats.rx_dropped++; ++ + rxq->skb_alloc_err++; ++ ++ u64_stats_update_begin(&stats->syncp); ++ stats->rx_dropped++; ++ u64_stats_update_end(&stats->syncp); + continue; + } + copy_size = min(skb_size, rx_bytes); +@@ -2137,7 +2153,6 @@ err_drop_frame_ret_pool: + mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool, + rx_desc->buf_phys_addr); + err_drop_frame: +- dev->stats.rx_errors++; + mvneta_rx_error(pp, rx_desc); + /* leave the descriptor untouched */ + continue; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +index d787bc0a4155..e09bc3858d57 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +@@ -45,7 +45,7 @@ void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); + + static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) + { +- if (!MLX5_CAP_GEN(mdev, tls)) ++ if (!MLX5_CAP_GEN(mdev, tls_tx)) + return false; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +index 71384ad1a443..ef1ed15a53b4 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +@@ -269,7 +269,7 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, + int datalen; + u32 skb_seq; + +- if (MLX5_CAP_GEN(sq->channel->mdev, tls)) { ++ if (MLX5_CAP_GEN(sq->channel->mdev, tls_tx)) { + skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi); + goto out; + } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +index c76da309506b..72232e570af7 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +@@ -850,6 +850,7 @@ void mlx5_fpga_ipsec_delete_sa_ctx(void *context) + mutex_lock(&fpga_xfrm->lock); + if (!--fpga_xfrm->num_rules) { + mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx); ++ kfree(fpga_xfrm->sa_ctx); + fpga_xfrm->sa_ctx = NULL; + } + mutex_unlock(&fpga_xfrm->lock); +@@ -1478,7 +1479,7 @@ int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, + if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs))) + return 0; + +- if (!mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { ++ if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { + mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); + return -EOPNOTSUPP; + } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +index 791e14ac26f4..86e6bbb57482 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +@@ -1555,16 +1555,16 @@ struct match_list_head { + struct match_list first; + }; + +-static void free_match_list(struct match_list_head *head) ++static void free_match_list(struct match_list_head *head, bool ft_locked) + { + if (!list_empty(&head->list)) { + struct match_list *iter, *match_tmp; + + list_del(&head->first.list); +- tree_put_node(&head->first.g->node, false); ++ tree_put_node(&head->first.g->node, ft_locked); + list_for_each_entry_safe(iter, match_tmp, &head->list, + list) { +- tree_put_node(&iter->g->node, false); ++ tree_put_node(&iter->g->node, ft_locked); + list_del(&iter->list); + kfree(iter); + } +@@ -1573,7 +1573,8 @@ static void free_match_list(struct match_list_head *head) + + static int build_match_list(struct match_list_head *match_head, + struct mlx5_flow_table *ft, +- const struct mlx5_flow_spec *spec) ++ const struct mlx5_flow_spec *spec, ++ bool ft_locked) + { + struct rhlist_head *tmp, *list; + struct mlx5_flow_group *g; +@@ -1598,7 +1599,7 @@ static int build_match_list(struct match_list_head *match_head, + + curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); + if (!curr_match) { +- free_match_list(match_head); ++ free_match_list(match_head, ft_locked); + err = -ENOMEM; + goto out; + } +@@ -1778,7 +1779,7 @@ search_again_locked: + version = atomic_read(&ft->node.version); + + /* Collect all fgs which has a matching match_criteria */ +- err = build_match_list(&match_head, ft, spec); ++ err = build_match_list(&match_head, ft, spec, take_write); + if (err) { + if (take_write) + up_write_ref_node(&ft->node, false); +@@ -1792,7 +1793,7 @@ search_again_locked: + + rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest, + dest_num, version); +- free_match_list(&match_head); ++ free_match_list(&match_head, take_write); + if (!IS_ERR(rule) || + (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { + if (take_write) +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c +index a19790dee7b2..13e86f0b42f5 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c +@@ -239,7 +239,7 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) + return err; + } + +- if (MLX5_CAP_GEN(dev, tls)) { ++ if (MLX5_CAP_GEN(dev, tls_tx)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_TLS); + if (err) + return err; +diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h +index 5bfdda19f64d..d8745f87f065 100644 +--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h ++++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h +@@ -862,7 +862,7 @@ struct ionic_rxq_comp { + #define IONIC_RXQ_COMP_CSUM_F_VLAN 0x40 + #define IONIC_RXQ_COMP_CSUM_F_CALC 0x80 + u8 pkt_type_color; +-#define IONIC_RXQ_COMP_PKT_TYPE_MASK 0x0f ++#define IONIC_RXQ_COMP_PKT_TYPE_MASK 0x7f + }; + + enum ionic_pkt_type { +diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c +index 0dacf2c18c09..3e613058e225 100644 +--- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c +@@ -44,8 +44,8 @@ + /* Add/subtract the Adjustment_Value when making a Drift adjustment */ + #define QED_DRIFT_CNTR_DIRECTION_SHIFT 31 + #define QED_TIMESTAMP_MASK BIT(16) +-/* Param mask for Hardware to detect/timestamp the unicast PTP packets */ +-#define QED_PTP_UCAST_PARAM_MASK 0xF ++/* Param mask for Hardware to detect/timestamp the L2/L4 unicast PTP packets */ ++#define QED_PTP_UCAST_PARAM_MASK 0x70F + + static enum qed_resc_lock qed_ptcdev_to_resc(struct qed_hwfn *p_hwfn) + { +diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c +index 8d88e4083456..7b65e79d6ae9 100644 +--- a/drivers/net/ethernet/smsc/smc911x.c ++++ b/drivers/net/ethernet/smsc/smc911x.c +@@ -936,7 +936,7 @@ static void smc911x_phy_configure(struct work_struct *work) + if (lp->ctl_rspeed != 100) + my_ad_caps &= ~(ADVERTISE_100BASE4|ADVERTISE_100FULL|ADVERTISE_100HALF); + +- if (!lp->ctl_rfduplx) ++ if (!lp->ctl_rfduplx) + my_ad_caps &= ~(ADVERTISE_100FULL|ADVERTISE_10FULL); + + /* Update our Auto-Neg Advertisement Register */ +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +index 7ec895407d23..e0a5fe83d8e0 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +@@ -413,6 +413,7 @@ static int ethqos_configure(struct qcom_ethqos *ethqos) + dll_lock = rgmii_readl(ethqos, SDC4_STATUS); + if (dll_lock & SDC4_STATUS_DLL_LOCK) + break; ++ retry--; + } while (retry > 0); + if (!retry) + dev_err(ðqos->pdev->dev, +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index 06dd65c419c4..582176d869c3 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -4763,6 +4763,7 @@ int stmmac_suspend(struct device *dev) + { + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); ++ u32 chan; + + if (!ndev || !netif_running(ndev)) + return 0; +@@ -4776,6 +4777,9 @@ int stmmac_suspend(struct device *dev) + + stmmac_disable_all_queues(priv); + ++ for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) ++ del_timer_sync(&priv->tx_queue[chan].txtimer); ++ + /* Stop TX/RX DMA */ + stmmac_stop_all_dma(priv); + +diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c +index 9b3ba98726d7..3a53d222bfcc 100644 +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -767,12 +767,12 @@ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize) + int i; + + gtp->addr_hash = kmalloc_array(hsize, sizeof(struct hlist_head), +- GFP_KERNEL); ++ GFP_KERNEL | __GFP_NOWARN); + if (gtp->addr_hash == NULL) + return -ENOMEM; + + gtp->tid_hash = kmalloc_array(hsize, sizeof(struct hlist_head), +- GFP_KERNEL); ++ GFP_KERNEL | __GFP_NOWARN); + if (gtp->tid_hash == NULL) + goto err1; + +diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c +index 44c2d857a7fa..91b302f0192f 100644 +--- a/drivers/net/netdevsim/dev.c ++++ b/drivers/net/netdevsim/dev.c +@@ -73,7 +73,7 @@ static const struct file_operations nsim_dev_take_snapshot_fops = { + + static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) + { +- char dev_ddir_name[16]; ++ char dev_ddir_name[sizeof(DRV_NAME) + 10]; + + sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id); + nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir); +diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c +index a7b9cf3269bf..29a0917a81e6 100644 +--- a/drivers/net/ppp/ppp_async.c ++++ b/drivers/net/ppp/ppp_async.c +@@ -874,15 +874,15 @@ ppp_async_input(struct asyncppp *ap, const unsigned char *buf, + skb = dev_alloc_skb(ap->mru + PPP_HDRLEN + 2); + if (!skb) + goto nomem; +- ap->rpkt = skb; +- } +- if (skb->len == 0) { +- /* Try to get the payload 4-byte aligned. +- * This should match the +- * PPP_ALLSTATIONS/PPP_UI/compressed tests in +- * process_input_packet, but we do not have +- * enough chars here to test buf[1] and buf[2]. +- */ ++ ap->rpkt = skb; ++ } ++ if (skb->len == 0) { ++ /* Try to get the payload 4-byte aligned. ++ * This should match the ++ * PPP_ALLSTATIONS/PPP_UI/compressed tests in ++ * process_input_packet, but we do not have ++ * enough chars here to test buf[1] and buf[2]. ++ */ + if (buf[0] != PPP_ALLSTATIONS) + skb_reserve(skb, 2 + (buf[0] & 1)); + } +diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +index 7cdfde9b3dea..575ed19e9195 100644 +--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c ++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +@@ -430,6 +430,7 @@ fail: + usb_free_urb(req->urb); + list_del(q->next); + } ++ kfree(reqs); + return NULL; + + } +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +index b3768d5d852a..8ad2d889179c 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +@@ -3321,6 +3321,10 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, + igtk_cmd.sta_id = cpu_to_le32(sta_id); + + if (remove_key) { ++ /* This is a valid situation for IGTK */ ++ if (sta_id == IWL_MVM_INVALID_STA) ++ return 0; ++ + igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_NOT_VALID); + } else { + struct ieee80211_key_seq seq; +@@ -3575,9 +3579,9 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, + IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n", + keyconf->keyidx, sta_id); + +- if (mvm_sta && (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || +- keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || +- keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)) ++ if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || ++ keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || ++ keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) + return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true); + + if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) { +diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +index 6dd835f1efc2..fbfa0b15d0c8 100644 +--- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c ++++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +@@ -232,6 +232,7 @@ static int mwifiex_process_country_ie(struct mwifiex_private *priv, + + if (country_ie_len > + (IEEE80211_COUNTRY_STRING_LEN + MWIFIEX_MAX_TRIPLET_802_11D)) { ++ rcu_read_unlock(); + mwifiex_dbg(priv->adapter, ERROR, + "11D: country_ie_len overflow!, deauth AP\n"); + return -EINVAL; +diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c +index cda996f6954e..2b83156efe3f 100644 +--- a/drivers/nfc/pn544/pn544.c ++++ b/drivers/nfc/pn544/pn544.c +@@ -693,7 +693,7 @@ static int pn544_hci_check_presence(struct nfc_hci_dev *hdev, + target->nfcid1_len != 10) + return -EOPNOTSUPP; + +- return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, ++ return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, + PN544_RF_READER_CMD_ACTIVATE_NEXT, + target->nfcid1, target->nfcid1_len, NULL); + } else if (target->supported_protocols & (NFC_PROTO_JEWEL_MASK | +diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c +index d16b55ffe79f..4e9004fe5c6f 100644 +--- a/drivers/nvme/target/fabrics-cmd.c ++++ b/drivers/nvme/target/fabrics-cmd.c +@@ -105,6 +105,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) + u16 qid = le16_to_cpu(c->qid); + u16 sqsize = le16_to_cpu(c->sqsize); + struct nvmet_ctrl *old; ++ u16 ret; + + old = cmpxchg(&req->sq->ctrl, NULL, ctrl); + if (old) { +@@ -115,7 +116,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) + if (!sqsize) { + pr_warn("queue size zero!\n"); + req->error_loc = offsetof(struct nvmf_connect_command, sqsize); +- return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; ++ ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; ++ goto err; + } + + /* note: convert queue size from 0's-based value to 1's-based value */ +@@ -128,16 +130,19 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) + } + + if (ctrl->ops->install_queue) { +- u16 ret = ctrl->ops->install_queue(req->sq); +- ++ ret = ctrl->ops->install_queue(req->sq); + if (ret) { + pr_err("failed to install queue %d cntlid %d ret %x\n", +- qid, ret, ctrl->cntlid); +- return ret; ++ qid, ctrl->cntlid, ret); ++ goto err; + } + } + + return 0; ++ ++err: ++ req->sq->ctrl = NULL; ++ return ret; + } + + static void nvmet_execute_admin_connect(struct nvmet_req *req) +diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c +index 057d1ff87d5d..960542dea5ad 100644 +--- a/drivers/nvmem/core.c ++++ b/drivers/nvmem/core.c +@@ -110,7 +110,7 @@ static void nvmem_cell_drop(struct nvmem_cell *cell) + list_del(&cell->node); + mutex_unlock(&nvmem_mutex); + of_node_put(cell->np); +- kfree(cell->name); ++ kfree_const(cell->name); + kfree(cell); + } + +@@ -137,7 +137,9 @@ static int nvmem_cell_info_to_nvmem_cell(struct nvmem_device *nvmem, + cell->nvmem = nvmem; + cell->offset = info->offset; + cell->bytes = info->bytes; +- cell->name = info->name; ++ cell->name = kstrdup_const(info->name, GFP_KERNEL); ++ if (!cell->name) ++ return -ENOMEM; + + cell->bit_offset = info->bit_offset; + cell->nbits = info->nbits; +@@ -327,7 +329,7 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem) + dev_err(dev, "cell %s unaligned to nvmem stride %d\n", + cell->name, nvmem->stride); + /* Cells already added will be freed later. */ +- kfree(cell->name); ++ kfree_const(cell->name); + kfree(cell); + return -EINVAL; + } +diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig +index 37c2ccbefecd..d91618641be6 100644 +--- a/drivers/of/Kconfig ++++ b/drivers/of/Kconfig +@@ -103,4 +103,8 @@ config OF_OVERLAY + config OF_NUMA + bool + ++config OF_DMA_DEFAULT_COHERENT ++ # arches should select this if DMA is coherent by default for OF devices ++ bool ++ + endif # OF +diff --git a/drivers/of/address.c b/drivers/of/address.c +index 978427a9d5e6..8f74c4626e0e 100644 +--- a/drivers/of/address.c ++++ b/drivers/of/address.c +@@ -998,12 +998,16 @@ EXPORT_SYMBOL_GPL(of_dma_get_range); + * @np: device node + * + * It returns true if "dma-coherent" property was found +- * for this device in DT. ++ * for this device in the DT, or if DMA is coherent by ++ * default for OF devices on the current platform. + */ + bool of_dma_is_coherent(struct device_node *np) + { + struct device_node *node = of_node_get(np); + ++ if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT)) ++ return true; ++ + while (node) { + if (of_property_read_bool(node, "dma-coherent")) { + of_node_put(node); +diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c +index af677254a072..c8c702c494a2 100644 +--- a/drivers/pci/controller/dwc/pci-keystone.c ++++ b/drivers/pci/controller/dwc/pci-keystone.c +@@ -422,7 +422,7 @@ static void ks_pcie_setup_rc_app_regs(struct keystone_pcie *ks_pcie) + lower_32_bits(start) | OB_ENABLEN); + ks_pcie_app_writel(ks_pcie, OB_OFFSET_HI(i), + upper_32_bits(start)); +- start += OB_WIN_SIZE; ++ start += OB_WIN_SIZE * SZ_1M; + } + + val = ks_pcie_app_readl(ks_pcie, CMD_STATUS); +@@ -510,7 +510,7 @@ static void ks_pcie_stop_link(struct dw_pcie *pci) + /* Disable Link training */ + val = ks_pcie_app_readl(ks_pcie, CMD_STATUS); + val &= ~LTSSM_EN_VAL; +- ks_pcie_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val); ++ ks_pcie_app_writel(ks_pcie, CMD_STATUS, val); + } + + static int ks_pcie_start_link(struct dw_pcie *pci) +@@ -1354,7 +1354,7 @@ static int __init ks_pcie_probe(struct platform_device *pdev) + ret = of_property_read_u32(np, "num-viewport", &num_viewport); + if (ret < 0) { + dev_err(dev, "unable to read *num-viewport* property\n"); +- return ret; ++ goto err_get_sync; + } + + /* +diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c +index 673a1725ef38..090b632965e2 100644 +--- a/drivers/pci/controller/pci-tegra.c ++++ b/drivers/pci/controller/pci-tegra.c +@@ -2798,7 +2798,7 @@ static int tegra_pcie_probe(struct platform_device *pdev) + + pm_runtime_enable(pcie->dev); + err = pm_runtime_get_sync(pcie->dev); +- if (err) { ++ if (err < 0) { + dev_err(dev, "fail to enable pcie controller: %d\n", err); + goto teardown_msi; + } +diff --git a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c +index 42bc5150dd92..febe0aef68d4 100644 +--- a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c ++++ b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c +@@ -80,7 +80,7 @@ static int read_poll_timeout(void __iomem *addr, u32 mask) + if (readl_relaxed(addr) & mask) + return 0; + +- usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50); ++ usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50); + } while (!time_after(jiffies, timeout)); + + return (readl_relaxed(addr) & mask) ? 0 : -ETIMEDOUT; +diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c +index cdab916fbf92..e330ec73c465 100644 +--- a/drivers/platform/x86/intel_scu_ipc.c ++++ b/drivers/platform/x86/intel_scu_ipc.c +@@ -67,26 +67,22 @@ + struct intel_scu_ipc_pdata_t { + u32 i2c_base; + u32 i2c_len; +- u8 irq_mode; + }; + + static const struct intel_scu_ipc_pdata_t intel_scu_ipc_lincroft_pdata = { + .i2c_base = 0xff12b000, + .i2c_len = 0x10, +- .irq_mode = 0, + }; + + /* Penwell and Cloverview */ + static const struct intel_scu_ipc_pdata_t intel_scu_ipc_penwell_pdata = { + .i2c_base = 0xff12b000, + .i2c_len = 0x10, +- .irq_mode = 1, + }; + + static const struct intel_scu_ipc_pdata_t intel_scu_ipc_tangier_pdata = { + .i2c_base = 0xff00d000, + .i2c_len = 0x10, +- .irq_mode = 0, + }; + + struct intel_scu_ipc_dev { +@@ -99,6 +95,9 @@ struct intel_scu_ipc_dev { + + static struct intel_scu_ipc_dev ipcdev; /* Only one for now */ + ++#define IPC_STATUS 0x04 ++#define IPC_STATUS_IRQ BIT(2) ++ + /* + * IPC Read Buffer (Read Only): + * 16 byte buffer for receiving data from SCU, if IPC command +@@ -120,11 +119,8 @@ static DEFINE_MUTEX(ipclock); /* lock used to prevent multiple call to SCU */ + */ + static inline void ipc_command(struct intel_scu_ipc_dev *scu, u32 cmd) + { +- if (scu->irq_mode) { +- reinit_completion(&scu->cmd_complete); +- writel(cmd | IPC_IOC, scu->ipc_base); +- } +- writel(cmd, scu->ipc_base); ++ reinit_completion(&scu->cmd_complete); ++ writel(cmd | IPC_IOC, scu->ipc_base); + } + + /* +@@ -610,9 +606,10 @@ EXPORT_SYMBOL(intel_scu_ipc_i2c_cntrl); + static irqreturn_t ioc(int irq, void *dev_id) + { + struct intel_scu_ipc_dev *scu = dev_id; ++ int status = ipc_read_status(scu); + +- if (scu->irq_mode) +- complete(&scu->cmd_complete); ++ writel(status | IPC_STATUS_IRQ, scu->ipc_base + IPC_STATUS); ++ complete(&scu->cmd_complete); + + return IRQ_HANDLED; + } +@@ -638,8 +635,6 @@ static int ipc_probe(struct pci_dev *pdev, const struct pci_device_id *id) + if (!pdata) + return -ENODEV; + +- scu->irq_mode = pdata->irq_mode; +- + err = pcim_enable_device(pdev); + if (err) + return err; +diff --git a/drivers/power/supply/axp20x_ac_power.c b/drivers/power/supply/axp20x_ac_power.c +index 0d34a932b6d5..f74b0556bb6b 100644 +--- a/drivers/power/supply/axp20x_ac_power.c ++++ b/drivers/power/supply/axp20x_ac_power.c +@@ -23,6 +23,8 @@ + #define AXP20X_PWR_STATUS_ACIN_PRESENT BIT(7) + #define AXP20X_PWR_STATUS_ACIN_AVAIL BIT(6) + ++#define AXP813_ACIN_PATH_SEL BIT(7) ++ + #define AXP813_VHOLD_MASK GENMASK(5, 3) + #define AXP813_VHOLD_UV_TO_BIT(x) ((((x) / 100000) - 40) << 3) + #define AXP813_VHOLD_REG_TO_UV(x) \ +@@ -40,6 +42,7 @@ struct axp20x_ac_power { + struct power_supply *supply; + struct iio_channel *acin_v; + struct iio_channel *acin_i; ++ bool has_acin_path_sel; + }; + + static irqreturn_t axp20x_ac_power_irq(int irq, void *devid) +@@ -86,6 +89,17 @@ static int axp20x_ac_power_get_property(struct power_supply *psy, + return ret; + + val->intval = !!(reg & AXP20X_PWR_STATUS_ACIN_AVAIL); ++ ++ /* ACIN_PATH_SEL disables ACIN even if ACIN_AVAIL is set. */ ++ if (val->intval && power->has_acin_path_sel) { ++ ret = regmap_read(power->regmap, AXP813_ACIN_PATH_CTRL, ++ ®); ++ if (ret) ++ return ret; ++ ++ val->intval = !!(reg & AXP813_ACIN_PATH_SEL); ++ } ++ + return 0; + + case POWER_SUPPLY_PROP_VOLTAGE_NOW: +@@ -224,21 +238,25 @@ static const struct power_supply_desc axp813_ac_power_desc = { + struct axp_data { + const struct power_supply_desc *power_desc; + bool acin_adc; ++ bool acin_path_sel; + }; + + static const struct axp_data axp20x_data = { +- .power_desc = &axp20x_ac_power_desc, +- .acin_adc = true, ++ .power_desc = &axp20x_ac_power_desc, ++ .acin_adc = true, ++ .acin_path_sel = false, + }; + + static const struct axp_data axp22x_data = { +- .power_desc = &axp22x_ac_power_desc, +- .acin_adc = false, ++ .power_desc = &axp22x_ac_power_desc, ++ .acin_adc = false, ++ .acin_path_sel = false, + }; + + static const struct axp_data axp813_data = { +- .power_desc = &axp813_ac_power_desc, +- .acin_adc = false, ++ .power_desc = &axp813_ac_power_desc, ++ .acin_adc = false, ++ .acin_path_sel = true, + }; + + static int axp20x_ac_power_probe(struct platform_device *pdev) +@@ -282,6 +300,7 @@ static int axp20x_ac_power_probe(struct platform_device *pdev) + } + + power->regmap = dev_get_regmap(pdev->dev.parent, NULL); ++ power->has_acin_path_sel = axp_data->acin_path_sel; + + platform_set_drvdata(pdev, power); + +diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c +index da49436176cd..30a9014b2f95 100644 +--- a/drivers/power/supply/ltc2941-battery-gauge.c ++++ b/drivers/power/supply/ltc2941-battery-gauge.c +@@ -449,7 +449,7 @@ static int ltc294x_i2c_remove(struct i2c_client *client) + { + struct ltc294x_info *info = i2c_get_clientdata(client); + +- cancel_delayed_work(&info->work); ++ cancel_delayed_work_sync(&info->work); + power_supply_unregister(info->supply); + return 0; + } +diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c +index ca3dc3f3bb29..bb16c465426e 100644 +--- a/drivers/regulator/helpers.c ++++ b/drivers/regulator/helpers.c +@@ -13,6 +13,8 @@ + #include <linux/regulator/driver.h> + #include <linux/module.h> + ++#include "internal.h" ++ + /** + * regulator_is_enabled_regmap - standard is_enabled() for regmap users + * +@@ -881,3 +883,15 @@ void regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers, + consumers[i].supply = supply_names[i]; + } + EXPORT_SYMBOL_GPL(regulator_bulk_set_supply_names); ++ ++/** ++ * regulator_is_equal - test whether two regulators are the same ++ * ++ * @reg1: first regulator to operate on ++ * @reg2: second regulator to operate on ++ */ ++bool regulator_is_equal(struct regulator *reg1, struct regulator *reg2) ++{ ++ return reg1->rdev == reg2->rdev; ++} ++EXPORT_SYMBOL_GPL(regulator_is_equal); +diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c +index 469d0bc9f5fe..00cf33573136 100644 +--- a/drivers/scsi/csiostor/csio_scsi.c ++++ b/drivers/scsi/csiostor/csio_scsi.c +@@ -1383,7 +1383,7 @@ csio_device_reset(struct device *dev, + return -EINVAL; + + /* Delete NPIV lnodes */ +- csio_lnodes_exit(hw, 1); ++ csio_lnodes_exit(hw, 1); + + /* Block upper IOs */ + csio_lnodes_block_request(hw); +diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c +index 42cf38c1ea99..0cbe6740e0c9 100644 +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -4392,7 +4392,8 @@ dcmd_timeout_ocr_possible(struct megasas_instance *instance) { + if (instance->adapter_type == MFI_SERIES) + return KILL_ADAPTER; + else if (instance->unload || +- test_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags)) ++ test_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, ++ &instance->reset_flags)) + return IGNORE_TIMEOUT; + else + return INITIATE_OCR; +diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c +index e301458bcbae..46bc062d873e 100644 +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c +@@ -4847,6 +4847,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) + if (instance->requestorId && !instance->skip_heartbeat_timer_del) + del_timer_sync(&instance->sriov_heartbeat_timer); + set_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags); ++ set_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags); + atomic_set(&instance->adprecovery, MEGASAS_ADPRESET_SM_POLLING); + instance->instancet->disable_intr(instance); + megasas_sync_irqs((unsigned long)instance); +@@ -5046,7 +5047,7 @@ kill_hba: + instance->skip_heartbeat_timer_del = 1; + retval = FAILED; + out: +- clear_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags); ++ clear_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags); + mutex_unlock(&instance->reset_mutex); + return retval; + } +diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h +index c013c80fe4e6..dd2e37e40d6b 100644 +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.h ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h +@@ -89,6 +89,7 @@ enum MR_RAID_FLAGS_IO_SUB_TYPE { + + #define MEGASAS_FP_CMD_LEN 16 + #define MEGASAS_FUSION_IN_RESET 0 ++#define MEGASAS_FUSION_OCR_NOT_POSSIBLE 1 + #define RAID_1_PEER_CMDS 2 + #define JBOD_MAPS_COUNT 2 + #define MEGASAS_REDUCE_QD_COUNT 64 +diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c +index 30afc59c1870..7bbff91f8883 100644 +--- a/drivers/scsi/qla2xxx/qla_dbg.c ++++ b/drivers/scsi/qla2xxx/qla_dbg.c +@@ -2519,12 +2519,6 @@ qla83xx_fw_dump_failed: + /* Driver Debug Functions. */ + /****************************************************************************/ + +-static inline int +-ql_mask_match(uint level) +-{ +- return (level & ql2xextended_error_logging) == level; +-} +- + /* + * This function is for formatting and logging debug information. + * It is to be used when vha is available. It formats the message +diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h +index bb01b680ce9f..433e95502808 100644 +--- a/drivers/scsi/qla2xxx/qla_dbg.h ++++ b/drivers/scsi/qla2xxx/qla_dbg.h +@@ -374,3 +374,9 @@ extern int qla24xx_dump_ram(struct qla_hw_data *, uint32_t, uint32_t *, + extern void qla24xx_pause_risc(struct device_reg_24xx __iomem *, + struct qla_hw_data *); + extern int qla24xx_soft_reset(struct qla_hw_data *); ++ ++static inline int ++ql_mask_match(uint level) ++{ ++ return (level & ql2xextended_error_logging) == level; ++} +diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h +index 1eb3fe281cc3..c57b95a20688 100644 +--- a/drivers/scsi/qla2xxx/qla_def.h ++++ b/drivers/scsi/qla2xxx/qla_def.h +@@ -2402,6 +2402,7 @@ typedef struct fc_port { + unsigned int scan_needed:1; + unsigned int n2n_flag:1; + unsigned int explicit_logout:1; ++ unsigned int prli_pend_timer:1; + + struct completion nvme_del_done; + uint32_t nvme_prli_service_param; +@@ -2428,6 +2429,7 @@ typedef struct fc_port { + struct work_struct free_work; + struct work_struct reg_work; + uint64_t jiffies_at_registration; ++ unsigned long prli_expired; + struct qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX]; + + uint16_t tgt_id; +@@ -4821,6 +4823,9 @@ struct sff_8247_a0 { + ha->current_topology == ISP_CFG_N || \ + !ha->current_topology) + ++#define PRLI_PHASE(_cls) \ ++ ((_cls == DSC_LS_PRLI_PEND) || (_cls == DSC_LS_PRLI_COMP)) ++ + #include "qla_target.h" + #include "qla_gbl.h" + #include "qla_dbg.h" +diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c +index 9ffaa920fc8f..ac4c47fc5f4c 100644 +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -686,7 +686,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, + port_id_t id; + u64 wwn; + u16 data[2]; +- u8 current_login_state; ++ u8 current_login_state, nvme_cls; + + fcport = ea->fcport; + ql_dbg(ql_dbg_disc, vha, 0xffff, +@@ -745,10 +745,17 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, + + loop_id = le16_to_cpu(e->nport_handle); + loop_id = (loop_id & 0x7fff); +- if (fcport->fc4f_nvme) +- current_login_state = e->current_login_state >> 4; +- else +- current_login_state = e->current_login_state & 0xf; ++ nvme_cls = e->current_login_state >> 4; ++ current_login_state = e->current_login_state & 0xf; ++ ++ if (PRLI_PHASE(nvme_cls)) { ++ current_login_state = nvme_cls; ++ fcport->fc4_type &= ~FS_FC4TYPE_FCP; ++ fcport->fc4_type |= FS_FC4TYPE_NVME; ++ } else if (PRLI_PHASE(current_login_state)) { ++ fcport->fc4_type |= FS_FC4TYPE_FCP; ++ fcport->fc4_type &= ~FS_FC4TYPE_NVME; ++ } + + + ql_dbg(ql_dbg_disc, vha, 0x20e2, +@@ -1219,12 +1226,19 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport) + struct srb_iocb *lio; + int rval = QLA_FUNCTION_FAILED; + +- if (!vha->flags.online) ++ if (!vha->flags.online) { ++ ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC exit\n", ++ __func__, __LINE__, fcport->port_name); + return rval; ++ } + +- if (fcport->fw_login_state == DSC_LS_PLOGI_PEND || +- fcport->fw_login_state == DSC_LS_PRLI_PEND) ++ if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND || ++ fcport->fw_login_state == DSC_LS_PRLI_PEND) && ++ qla_dual_mode_enabled(vha)) { ++ ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC exit\n", ++ __func__, __LINE__, fcport->port_name); + return rval; ++ } + + sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); + if (!sp) +@@ -1602,6 +1616,10 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) + break; + default: + if (fcport->login_pause) { ++ ql_dbg(ql_dbg_disc, vha, 0x20d8, ++ "%s %d %8phC exit\n", ++ __func__, __LINE__, ++ fcport->port_name); + fcport->last_rscn_gen = fcport->rscn_gen; + fcport->last_login_gen = fcport->login_gen; + set_bit(RELOGIN_NEEDED, &vha->dpc_flags); +diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c +index 7c5f2736ebee..3e9c5768815e 100644 +--- a/drivers/scsi/qla2xxx/qla_isr.c ++++ b/drivers/scsi/qla2xxx/qla_isr.c +@@ -1897,6 +1897,18 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, + inbuf = (uint32_t *)&sts->nvme_ersp_data; + outbuf = (uint32_t *)fd->rspaddr; + iocb->u.nvme.rsp_pyld_len = le16_to_cpu(sts->nvme_rsp_pyld_len); ++ if (unlikely(iocb->u.nvme.rsp_pyld_len > ++ sizeof(struct nvme_fc_ersp_iu))) { ++ if (ql_mask_match(ql_dbg_io)) { ++ WARN_ONCE(1, "Unexpected response payload length %u.\n", ++ iocb->u.nvme.rsp_pyld_len); ++ ql_log(ql_log_warn, fcport->vha, 0x5100, ++ "Unexpected response payload length %u.\n", ++ iocb->u.nvme.rsp_pyld_len); ++ } ++ iocb->u.nvme.rsp_pyld_len = ++ sizeof(struct nvme_fc_ersp_iu); ++ } + iter = iocb->u.nvme.rsp_pyld_len >> 2; + for (; iter; iter--) + *outbuf++ = swab32(*inbuf++); +diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c +index eac76e934cbe..1ef8907314e5 100644 +--- a/drivers/scsi/qla2xxx/qla_mbx.c ++++ b/drivers/scsi/qla2xxx/qla_mbx.c +@@ -6151,9 +6151,8 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *vha, dma_addr_t req_dma, uint32_t addr, + mcp->mb[7] = LSW(MSD(req_dma)); + mcp->mb[8] = MSW(addr); + /* Setting RAM ID to valid */ +- mcp->mb[10] |= BIT_7; + /* For MCTP RAM ID is 0x40 */ +- mcp->mb[10] |= 0x40; ++ mcp->mb[10] = BIT_7 | 0x40; + + mcp->out_mb |= MBX_10|MBX_8|MBX_7|MBX_6|MBX_5|MBX_4|MBX_3|MBX_2|MBX_1| + MBX_0; +diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c +index 2b2028f2383e..c855d013ba8a 100644 +--- a/drivers/scsi/qla2xxx/qla_nx.c ++++ b/drivers/scsi/qla2xxx/qla_nx.c +@@ -1612,8 +1612,7 @@ qla82xx_get_bootld_offset(struct qla_hw_data *ha) + return (u8 *)&ha->hablob->fw->data[offset]; + } + +-static __le32 +-qla82xx_get_fw_size(struct qla_hw_data *ha) ++static u32 qla82xx_get_fw_size(struct qla_hw_data *ha) + { + struct qla82xx_uri_data_desc *uri_desc = NULL; + +@@ -1624,7 +1623,7 @@ qla82xx_get_fw_size(struct qla_hw_data *ha) + return cpu_to_le32(uri_desc->size); + } + +- return cpu_to_le32(*(u32 *)&ha->hablob->fw->data[FW_SIZE_OFFSET]); ++ return get_unaligned_le32(&ha->hablob->fw->data[FW_SIZE_OFFSET]); + } + + static u8 * +@@ -1816,7 +1815,7 @@ qla82xx_fw_load_from_blob(struct qla_hw_data *ha) + } + + flashaddr = FLASH_ADDR_START; +- size = (__force u32)qla82xx_get_fw_size(ha) / 8; ++ size = qla82xx_get_fw_size(ha) / 8; + ptr64 = (u64 *)qla82xx_get_fw_offs(ha); + + for (i = 0; i < size; i++) { +diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c +index 74a378a91b71..cb8a892e2d39 100644 +--- a/drivers/scsi/qla2xxx/qla_target.c ++++ b/drivers/scsi/qla2xxx/qla_target.c +@@ -1257,6 +1257,7 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) + sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; + spin_unlock_irqrestore(&sess->vha->work_lock, flags); + ++ sess->prli_pend_timer = 0; + sess->disc_state = DSC_DELETE_PEND; + + qla24xx_chk_fcp_state(sess); +diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c +index 2323432a0edb..5504ab11decc 100644 +--- a/drivers/scsi/qla4xxx/ql4_os.c ++++ b/drivers/scsi/qla4xxx/ql4_os.c +@@ -4145,7 +4145,7 @@ static void qla4xxx_mem_free(struct scsi_qla_host *ha) + dma_free_coherent(&ha->pdev->dev, ha->queues_len, ha->queues, + ha->queues_dma); + +- if (ha->fw_dump) ++ if (ha->fw_dump) + vfree(ha->fw_dump); + + ha->queues_len = 0; +diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c +index 1e38bb967871..0d41a7dc1d6b 100644 +--- a/drivers/scsi/ufs/ufshcd.c ++++ b/drivers/scsi/ufs/ufshcd.c +@@ -5023,6 +5023,7 @@ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba) + + hba->auto_bkops_enabled = false; + trace_ufshcd_auto_bkops_state(dev_name(hba->dev), "Disabled"); ++ hba->is_urgent_bkops_lvl_checked = false; + out: + return err; + } +@@ -5047,6 +5048,7 @@ static void ufshcd_force_reset_auto_bkops(struct ufs_hba *hba) + hba->ee_ctrl_mask &= ~MASK_EE_URGENT_BKOPS; + ufshcd_disable_auto_bkops(hba); + } ++ hba->is_urgent_bkops_lvl_checked = false; + } + + static inline int ufshcd_get_bkops_status(struct ufs_hba *hba, u32 *status) +@@ -5093,6 +5095,7 @@ static int ufshcd_bkops_ctrl(struct ufs_hba *hba, + err = ufshcd_enable_auto_bkops(hba); + else + err = ufshcd_disable_auto_bkops(hba); ++ hba->urgent_bkops_lvl = curr_status; + out: + return err; + } +diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h +index 1c8b349379af..77c4a9abe365 100644 +--- a/drivers/usb/dwc3/core.h ++++ b/drivers/usb/dwc3/core.h +@@ -688,7 +688,9 @@ struct dwc3_ep { + #define DWC3_EP_STALL BIT(1) + #define DWC3_EP_WEDGE BIT(2) + #define DWC3_EP_TRANSFER_STARTED BIT(3) ++#define DWC3_EP_END_TRANSFER_PENDING BIT(4) + #define DWC3_EP_PENDING_REQUEST BIT(5) ++#define DWC3_EP_DELAY_START BIT(6) + + /* This last one is specific to EP0 */ + #define DWC3_EP0_DIR_IN BIT(31) +diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c +index fd1b100d2927..6dee4dabc0a4 100644 +--- a/drivers/usb/dwc3/ep0.c ++++ b/drivers/usb/dwc3/ep0.c +@@ -1136,8 +1136,10 @@ void dwc3_ep0_interrupt(struct dwc3 *dwc, + case DWC3_DEPEVT_EPCMDCMPLT: + cmd = DEPEVT_PARAMETER_CMD(event->parameters); + +- if (cmd == DWC3_DEPCMD_ENDTRANSFER) ++ if (cmd == DWC3_DEPCMD_ENDTRANSFER) { ++ dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING; + dep->flags &= ~DWC3_EP_TRANSFER_STARTED; ++ } + break; + } + } +diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c +index 154f3f3e8cff..8b95be897078 100644 +--- a/drivers/usb/dwc3/gadget.c ++++ b/drivers/usb/dwc3/gadget.c +@@ -1447,6 +1447,12 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) + list_add_tail(&req->list, &dep->pending_list); + req->status = DWC3_REQUEST_STATUS_QUEUED; + ++ /* Start the transfer only after the END_TRANSFER is completed */ ++ if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) { ++ dep->flags |= DWC3_EP_DELAY_START; ++ return 0; ++ } ++ + /* + * NOTICE: Isochronous endpoints should NEVER be prestarted. We must + * wait for a XferNotReady event so we will know what's the current +@@ -2625,8 +2631,14 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc, + cmd = DEPEVT_PARAMETER_CMD(event->parameters); + + if (cmd == DWC3_DEPCMD_ENDTRANSFER) { ++ dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING; + dep->flags &= ~DWC3_EP_TRANSFER_STARTED; + dwc3_gadget_ep_cleanup_cancelled_requests(dep); ++ if ((dep->flags & DWC3_EP_DELAY_START) && ++ !usb_endpoint_xfer_isoc(dep->endpoint.desc)) ++ __dwc3_gadget_kick_transfer(dep); ++ ++ dep->flags &= ~DWC3_EP_DELAY_START; + } + break; + case DWC3_DEPEVT_STREAMEVT: +@@ -2683,7 +2695,8 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, + u32 cmd; + int ret; + +- if (!(dep->flags & DWC3_EP_TRANSFER_STARTED)) ++ if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) || ++ (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) + return; + + /* +@@ -2728,6 +2741,8 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, + + if (!interrupt) + dep->flags &= ~DWC3_EP_TRANSFER_STARTED; ++ else ++ dep->flags |= DWC3_EP_END_TRANSFER_PENDING; + + if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A) + udelay(100); +diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c +index 460d5d7c984f..7f5cf488b2b1 100644 +--- a/drivers/usb/gadget/function/f_ecm.c ++++ b/drivers/usb/gadget/function/f_ecm.c +@@ -52,6 +52,7 @@ struct f_ecm { + struct usb_ep *notify; + struct usb_request *notify_req; + u8 notify_state; ++ atomic_t notify_count; + bool is_open; + + /* FIXME is_open needs some irq-ish locking +@@ -380,7 +381,7 @@ static void ecm_do_notify(struct f_ecm *ecm) + int status; + + /* notification already in flight? */ +- if (!req) ++ if (atomic_read(&ecm->notify_count)) + return; + + event = req->buf; +@@ -420,10 +421,10 @@ static void ecm_do_notify(struct f_ecm *ecm) + event->bmRequestType = 0xA1; + event->wIndex = cpu_to_le16(ecm->ctrl_id); + +- ecm->notify_req = NULL; ++ atomic_inc(&ecm->notify_count); + status = usb_ep_queue(ecm->notify, req, GFP_ATOMIC); + if (status < 0) { +- ecm->notify_req = req; ++ atomic_dec(&ecm->notify_count); + DBG(cdev, "notify --> %d\n", status); + } + } +@@ -448,17 +449,19 @@ static void ecm_notify_complete(struct usb_ep *ep, struct usb_request *req) + switch (req->status) { + case 0: + /* no fault */ ++ atomic_dec(&ecm->notify_count); + break; + case -ECONNRESET: + case -ESHUTDOWN: ++ atomic_set(&ecm->notify_count, 0); + ecm->notify_state = ECM_NOTIFY_NONE; + break; + default: + DBG(cdev, "event %02x --> %d\n", + event->bNotificationType, req->status); ++ atomic_dec(&ecm->notify_count); + break; + } +- ecm->notify_req = req; + ecm_do_notify(ecm); + } + +@@ -907,6 +910,11 @@ static void ecm_unbind(struct usb_configuration *c, struct usb_function *f) + + usb_free_all_descriptors(f); + ++ if (atomic_read(&ecm->notify_count)) { ++ usb_ep_dequeue(ecm->notify, ecm->notify_req); ++ atomic_set(&ecm->notify_count, 0); ++ } ++ + kfree(ecm->notify_req->buf); + usb_ep_free_request(ecm->notify, ecm->notify_req); + } +diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c +index 59d9d512dcda..ced2581cf99f 100644 +--- a/drivers/usb/gadget/function/f_fs.c ++++ b/drivers/usb/gadget/function/f_fs.c +@@ -1062,6 +1062,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) + req->num_sgs = io_data->sgt.nents; + } else { + req->buf = data; ++ req->num_sgs = 0; + } + req->length = data_len; + +@@ -1105,6 +1106,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) + req->num_sgs = io_data->sgt.nents; + } else { + req->buf = data; ++ req->num_sgs = 0; + } + req->length = data_len; + +diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c +index 2d6e76e4cffa..1d900081b1f0 100644 +--- a/drivers/usb/gadget/function/f_ncm.c ++++ b/drivers/usb/gadget/function/f_ncm.c +@@ -53,6 +53,7 @@ struct f_ncm { + struct usb_ep *notify; + struct usb_request *notify_req; + u8 notify_state; ++ atomic_t notify_count; + bool is_open; + + const struct ndp_parser_opts *parser_opts; +@@ -547,7 +548,7 @@ static void ncm_do_notify(struct f_ncm *ncm) + int status; + + /* notification already in flight? */ +- if (!req) ++ if (atomic_read(&ncm->notify_count)) + return; + + event = req->buf; +@@ -587,7 +588,8 @@ static void ncm_do_notify(struct f_ncm *ncm) + event->bmRequestType = 0xA1; + event->wIndex = cpu_to_le16(ncm->ctrl_id); + +- ncm->notify_req = NULL; ++ atomic_inc(&ncm->notify_count); ++ + /* + * In double buffering if there is a space in FIFO, + * completion callback can be called right after the call, +@@ -597,7 +599,7 @@ static void ncm_do_notify(struct f_ncm *ncm) + status = usb_ep_queue(ncm->notify, req, GFP_ATOMIC); + spin_lock(&ncm->lock); + if (status < 0) { +- ncm->notify_req = req; ++ atomic_dec(&ncm->notify_count); + DBG(cdev, "notify --> %d\n", status); + } + } +@@ -632,17 +634,19 @@ static void ncm_notify_complete(struct usb_ep *ep, struct usb_request *req) + case 0: + VDBG(cdev, "Notification %02x sent\n", + event->bNotificationType); ++ atomic_dec(&ncm->notify_count); + break; + case -ECONNRESET: + case -ESHUTDOWN: ++ atomic_set(&ncm->notify_count, 0); + ncm->notify_state = NCM_NOTIFY_NONE; + break; + default: + DBG(cdev, "event %02x --> %d\n", + event->bNotificationType, req->status); ++ atomic_dec(&ncm->notify_count); + break; + } +- ncm->notify_req = req; + ncm_do_notify(ncm); + spin_unlock(&ncm->lock); + } +@@ -1649,6 +1653,11 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f) + ncm_string_defs[0].id = 0; + usb_free_all_descriptors(f); + ++ if (atomic_read(&ncm->notify_count)) { ++ usb_ep_dequeue(ncm->notify, ncm->notify_req); ++ atomic_set(&ncm->notify_count, 0); ++ } ++ + kfree(ncm->notify_req->buf); + usb_ep_free_request(ncm->notify, ncm->notify_req); + } +diff --git a/drivers/usb/gadget/legacy/cdc2.c b/drivers/usb/gadget/legacy/cdc2.c +index da1c37933ca1..8d7a556ece30 100644 +--- a/drivers/usb/gadget/legacy/cdc2.c ++++ b/drivers/usb/gadget/legacy/cdc2.c +@@ -225,7 +225,7 @@ static struct usb_composite_driver cdc_driver = { + .name = "g_cdc", + .dev = &device_desc, + .strings = dev_strings, +- .max_speed = USB_SPEED_HIGH, ++ .max_speed = USB_SPEED_SUPER, + .bind = cdc_bind, + .unbind = cdc_unbind, + }; +diff --git a/drivers/usb/gadget/legacy/g_ffs.c b/drivers/usb/gadget/legacy/g_ffs.c +index b640ed3fcf70..ae6d8f7092b8 100644 +--- a/drivers/usb/gadget/legacy/g_ffs.c ++++ b/drivers/usb/gadget/legacy/g_ffs.c +@@ -149,7 +149,7 @@ static struct usb_composite_driver gfs_driver = { + .name = DRIVER_NAME, + .dev = &gfs_dev_desc, + .strings = gfs_dev_strings, +- .max_speed = USB_SPEED_HIGH, ++ .max_speed = USB_SPEED_SUPER, + .bind = gfs_bind, + .unbind = gfs_unbind, + }; +diff --git a/drivers/usb/gadget/legacy/multi.c b/drivers/usb/gadget/legacy/multi.c +index 50515f9e1022..ec9749845660 100644 +--- a/drivers/usb/gadget/legacy/multi.c ++++ b/drivers/usb/gadget/legacy/multi.c +@@ -482,7 +482,7 @@ static struct usb_composite_driver multi_driver = { + .name = "g_multi", + .dev = &device_desc, + .strings = dev_strings, +- .max_speed = USB_SPEED_HIGH, ++ .max_speed = USB_SPEED_SUPER, + .bind = multi_bind, + .unbind = multi_unbind, + .needs_serial = 1, +diff --git a/drivers/usb/gadget/legacy/ncm.c b/drivers/usb/gadget/legacy/ncm.c +index 8465f081e921..c61e71ba7045 100644 +--- a/drivers/usb/gadget/legacy/ncm.c ++++ b/drivers/usb/gadget/legacy/ncm.c +@@ -197,7 +197,7 @@ static struct usb_composite_driver ncm_driver = { + .name = "g_ncm", + .dev = &device_desc, + .strings = dev_strings, +- .max_speed = USB_SPEED_HIGH, ++ .max_speed = USB_SPEED_SUPER, + .bind = gncm_bind, + .unbind = gncm_unbind, + }; +diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c +index 8b4ff9fff340..753645bb2527 100644 +--- a/drivers/usb/typec/tcpm/tcpci.c ++++ b/drivers/usb/typec/tcpm/tcpci.c +@@ -591,6 +591,12 @@ static int tcpci_probe(struct i2c_client *client, + static int tcpci_remove(struct i2c_client *client) + { + struct tcpci_chip *chip = i2c_get_clientdata(client); ++ int err; ++ ++ /* Disable chip interrupts before unregistering port */ ++ err = tcpci_write16(chip->tcpci, TCPC_ALERT_MASK, 0); ++ if (err < 0) ++ return err; + + tcpci_unregister_port(chip->tcpci); + +diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c +index 9f4117766bb1..c962d9b370c6 100644 +--- a/drivers/virtio/virtio_balloon.c ++++ b/drivers/virtio/virtio_balloon.c +@@ -474,7 +474,9 @@ static int init_vqs(struct virtio_balloon *vb) + names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate"; + callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack; + names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate"; ++ callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL; + names[VIRTIO_BALLOON_VQ_STATS] = NULL; ++ callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { +@@ -898,8 +900,7 @@ static int virtballoon_probe(struct virtio_device *vdev) + vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); + if (IS_ERR(vb->vb_dev_info.inode)) { + err = PTR_ERR(vb->vb_dev_info.inode); +- kern_unmount(balloon_mnt); +- goto out_del_vqs; ++ goto out_kern_unmount; + } + vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; + #endif +@@ -910,13 +911,13 @@ static int virtballoon_probe(struct virtio_device *vdev) + */ + if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { + err = -ENOSPC; +- goto out_del_vqs; ++ goto out_iput; + } + vb->balloon_wq = alloc_workqueue("balloon-wq", + WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); + if (!vb->balloon_wq) { + err = -ENOMEM; +- goto out_del_vqs; ++ goto out_iput; + } + INIT_WORK(&vb->report_free_page_work, report_free_page_func); + vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP; +@@ -950,6 +951,12 @@ static int virtballoon_probe(struct virtio_device *vdev) + out_del_balloon_wq: + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + destroy_workqueue(vb->balloon_wq); ++out_iput: ++#ifdef CONFIG_BALLOON_COMPACTION ++ iput(vb->vb_dev_info.inode); ++out_kern_unmount: ++ kern_unmount(balloon_mnt); ++#endif + out_del_vqs: + vdev->config->del_vqs(vdev); + out_free_vb: +@@ -965,6 +972,10 @@ static void remove_common(struct virtio_balloon *vb) + leak_balloon(vb, vb->num_pages); + update_balloon_size(vb); + ++ /* There might be free pages that are being reported: release them. */ ++ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) ++ return_free_pages_to_mm(vb, ULONG_MAX); ++ + /* Now we reset the device so we can clean up the queues. */ + vb->vdev->config->reset(vb->vdev); + +diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c +index f2862f66c2ac..222d630c41fc 100644 +--- a/drivers/virtio/virtio_pci_common.c ++++ b/drivers/virtio/virtio_pci_common.c +@@ -294,7 +294,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, + /* Best option: one for change interrupt, one per vq. */ + nvectors = 1; + for (i = 0; i < nvqs; ++i) +- if (callbacks[i]) ++ if (names[i] && callbacks[i]) + ++nvectors; + } else { + /* Second best: one for change, shared for all vqs. */ +diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c +index 21e8085b848b..861daf4f37b2 100644 +--- a/drivers/watchdog/watchdog_core.c ++++ b/drivers/watchdog/watchdog_core.c +@@ -147,6 +147,25 @@ int watchdog_init_timeout(struct watchdog_device *wdd, + } + EXPORT_SYMBOL_GPL(watchdog_init_timeout); + ++static int watchdog_reboot_notifier(struct notifier_block *nb, ++ unsigned long code, void *data) ++{ ++ struct watchdog_device *wdd; ++ ++ wdd = container_of(nb, struct watchdog_device, reboot_nb); ++ if (code == SYS_DOWN || code == SYS_HALT) { ++ if (watchdog_active(wdd)) { ++ int ret; ++ ++ ret = wdd->ops->stop(wdd); ++ if (ret) ++ return NOTIFY_BAD; ++ } ++ } ++ ++ return NOTIFY_DONE; ++} ++ + static int watchdog_restart_notifier(struct notifier_block *nb, + unsigned long action, void *data) + { +@@ -235,6 +254,19 @@ static int __watchdog_register_device(struct watchdog_device *wdd) + } + } + ++ if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { ++ wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; ++ ++ ret = register_reboot_notifier(&wdd->reboot_nb); ++ if (ret) { ++ pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", ++ wdd->id, ret); ++ watchdog_dev_unregister(wdd); ++ ida_simple_remove(&watchdog_ida, id); ++ return ret; ++ } ++ } ++ + if (wdd->ops->restart) { + wdd->restart_nb.notifier_call = watchdog_restart_notifier; + +@@ -289,6 +321,9 @@ static void __watchdog_unregister_device(struct watchdog_device *wdd) + if (wdd->ops->restart) + unregister_restart_handler(&wdd->restart_nb); + ++ if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) ++ unregister_reboot_notifier(&wdd->reboot_nb); ++ + watchdog_dev_unregister(wdd); + ida_simple_remove(&watchdog_ida, wdd->id); + } +diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c +index 62483a99105c..ce04edc69e5f 100644 +--- a/drivers/watchdog/watchdog_dev.c ++++ b/drivers/watchdog/watchdog_dev.c +@@ -38,7 +38,6 @@ + #include <linux/miscdevice.h> /* For handling misc devices */ + #include <linux/module.h> /* For module stuff/... */ + #include <linux/mutex.h> /* For mutexes */ +-#include <linux/reboot.h> /* For reboot notifier */ + #include <linux/slab.h> /* For memory functions */ + #include <linux/types.h> /* For standard types (like size_t) */ + #include <linux/watchdog.h> /* For watchdog specific items */ +@@ -1077,25 +1076,6 @@ static void watchdog_cdev_unregister(struct watchdog_device *wdd) + put_device(&wd_data->dev); + } + +-static int watchdog_reboot_notifier(struct notifier_block *nb, +- unsigned long code, void *data) +-{ +- struct watchdog_device *wdd; +- +- wdd = container_of(nb, struct watchdog_device, reboot_nb); +- if (code == SYS_DOWN || code == SYS_HALT) { +- if (watchdog_active(wdd)) { +- int ret; +- +- ret = wdd->ops->stop(wdd); +- if (ret) +- return NOTIFY_BAD; +- } +- } +- +- return NOTIFY_DONE; +-} +- + /* + * watchdog_dev_register: register a watchdog device + * @wdd: watchdog device +@@ -1114,22 +1094,8 @@ int watchdog_dev_register(struct watchdog_device *wdd) + return ret; + + ret = watchdog_register_pretimeout(wdd); +- if (ret) { ++ if (ret) + watchdog_cdev_unregister(wdd); +- return ret; +- } +- +- if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { +- wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; +- +- ret = devm_register_reboot_notifier(&wdd->wd_data->dev, +- &wdd->reboot_nb); +- if (ret) { +- pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", +- wdd->id, ret); +- watchdog_dev_unregister(wdd); +- } +- } + + return ret; + } +diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c +index 6d12fc368210..a8d24433c8e9 100644 +--- a/drivers/xen/xen-balloon.c ++++ b/drivers/xen/xen-balloon.c +@@ -94,7 +94,7 @@ static void watch_target(struct xenbus_watch *watch, + "%llu", &static_max) == 1)) + static_max >>= PAGE_SHIFT - 10; + else +- static_max = new_target; ++ static_max = balloon_stats.current_pages; + + target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0 + : static_max - balloon_stats.target_pages; +diff --git a/fs/aio.c b/fs/aio.c +index 0d9a559d488c..4115d5ad6b90 100644 +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -1610,6 +1610,14 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, + return 0; + } + ++static void aio_poll_put_work(struct work_struct *work) ++{ ++ struct poll_iocb *req = container_of(work, struct poll_iocb, work); ++ struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); ++ ++ iocb_put(iocb); ++} ++ + static void aio_poll_complete_work(struct work_struct *work) + { + struct poll_iocb *req = container_of(work, struct poll_iocb, work); +@@ -1674,6 +1682,8 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, + list_del_init(&req->wait.entry); + + if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { ++ struct kioctx *ctx = iocb->ki_ctx; ++ + /* + * Try to complete the iocb inline if we can. Use + * irqsave/irqrestore because not all filesystems (e.g. fuse) +@@ -1683,8 +1693,14 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, + list_del(&iocb->ki_list); + iocb->ki_res.res = mangle_poll(mask); + req->done = true; +- spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); +- iocb_put(iocb); ++ if (iocb->ki_eventfd && eventfd_signal_count()) { ++ iocb = NULL; ++ INIT_WORK(&req->work, aio_poll_put_work); ++ schedule_work(&req->work); ++ } ++ spin_unlock_irqrestore(&ctx->ctx_lock, flags); ++ if (iocb) ++ iocb_put(iocb); + } else { + schedule_work(&req->work); + } +diff --git a/fs/attr.c b/fs/attr.c +index df28035aa23e..b4bbdbd4c8ca 100644 +--- a/fs/attr.c ++++ b/fs/attr.c +@@ -183,18 +183,12 @@ void setattr_copy(struct inode *inode, const struct iattr *attr) + inode->i_uid = attr->ia_uid; + if (ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; +- if (ia_valid & ATTR_ATIME) { +- inode->i_atime = timestamp_truncate(attr->ia_atime, +- inode); +- } +- if (ia_valid & ATTR_MTIME) { +- inode->i_mtime = timestamp_truncate(attr->ia_mtime, +- inode); +- } +- if (ia_valid & ATTR_CTIME) { +- inode->i_ctime = timestamp_truncate(attr->ia_ctime, +- inode); +- } ++ if (ia_valid & ATTR_ATIME) ++ inode->i_atime = attr->ia_atime; ++ if (ia_valid & ATTR_MTIME) ++ inode->i_mtime = attr->ia_mtime; ++ if (ia_valid & ATTR_CTIME) ++ inode->i_ctime = attr->ia_ctime; + if (ia_valid & ATTR_MODE) { + umode_t mode = attr->ia_mode; + +@@ -268,8 +262,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de + attr->ia_ctime = now; + if (!(ia_valid & ATTR_ATIME_SET)) + attr->ia_atime = now; ++ else ++ attr->ia_atime = timestamp_truncate(attr->ia_atime, inode); + if (!(ia_valid & ATTR_MTIME_SET)) + attr->ia_mtime = now; ++ else ++ attr->ia_mtime = timestamp_truncate(attr->ia_mtime, inode); ++ + if (ia_valid & ATTR_KILL_PRIV) { + error = security_inode_need_killpriv(dentry); + if (error < 0) +diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c +index da9b0f060a9d..a989105d39c8 100644 +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -330,12 +330,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, + struct seq_list *elem) + { + write_lock(&fs_info->tree_mod_log_lock); +- spin_lock(&fs_info->tree_mod_seq_lock); + if (!elem->seq) { + elem->seq = btrfs_inc_tree_mod_seq(fs_info); + list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); + } +- spin_unlock(&fs_info->tree_mod_seq_lock); + write_unlock(&fs_info->tree_mod_log_lock); + + return elem->seq; +@@ -355,7 +353,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, + if (!seq_putting) + return; + +- spin_lock(&fs_info->tree_mod_seq_lock); ++ write_lock(&fs_info->tree_mod_log_lock); + list_del(&elem->list); + elem->seq = 0; + +@@ -366,19 +364,17 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, + * blocker with lower sequence number exists, we + * cannot remove anything from the log + */ +- spin_unlock(&fs_info->tree_mod_seq_lock); ++ write_unlock(&fs_info->tree_mod_log_lock); + return; + } + min_seq = cur_elem->seq; + } + } +- spin_unlock(&fs_info->tree_mod_seq_lock); + + /* + * anything that's lower than the lowest existing (read: blocked) + * sequence number can be removed from the tree. + */ +- write_lock(&fs_info->tree_mod_log_lock); + tm_root = &fs_info->tree_mod_log; + for (node = rb_first(tm_root); node; node = next) { + next = rb_next(node); +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index 5e9f80b28fcf..290ca193c6c0 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -671,14 +671,12 @@ struct btrfs_fs_info { + atomic_t nr_delayed_iputs; + wait_queue_head_t delayed_iputs_wait; + +- /* this protects tree_mod_seq_list */ +- spinlock_t tree_mod_seq_lock; + atomic64_t tree_mod_seq; +- struct list_head tree_mod_seq_list; + +- /* this protects tree_mod_log */ ++ /* this protects tree_mod_log and tree_mod_seq_list */ + rwlock_t tree_mod_log_lock; + struct rb_root tree_mod_log; ++ struct list_head tree_mod_seq_list; + + atomic_t async_delalloc_pages; + +diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c +index df3bd880061d..dfdb7d4f8406 100644 +--- a/fs/btrfs/delayed-ref.c ++++ b/fs/btrfs/delayed-ref.c +@@ -492,7 +492,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, + if (head->is_data) + return; + +- spin_lock(&fs_info->tree_mod_seq_lock); ++ read_lock(&fs_info->tree_mod_log_lock); + if (!list_empty(&fs_info->tree_mod_seq_list)) { + struct seq_list *elem; + +@@ -500,7 +500,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, + struct seq_list, list); + seq = elem->seq; + } +- spin_unlock(&fs_info->tree_mod_seq_lock); ++ read_unlock(&fs_info->tree_mod_log_lock); + + again: + for (node = rb_first_cached(&head->ref_tree); node; +@@ -518,7 +518,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq) + struct seq_list *elem; + int ret = 0; + +- spin_lock(&fs_info->tree_mod_seq_lock); ++ read_lock(&fs_info->tree_mod_log_lock); + if (!list_empty(&fs_info->tree_mod_seq_list)) { + elem = list_first_entry(&fs_info->tree_mod_seq_list, + struct seq_list, list); +@@ -531,7 +531,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq) + } + } + +- spin_unlock(&fs_info->tree_mod_seq_lock); ++ read_unlock(&fs_info->tree_mod_log_lock); + return ret; + } + +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index bae334212ee2..7becc5e96f92 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -2016,7 +2016,7 @@ static void free_root_extent_buffers(struct btrfs_root *root) + } + + /* helper to cleanup tree roots */ +-static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) ++static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root) + { + free_root_extent_buffers(info->tree_root); + +@@ -2025,7 +2025,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) + free_root_extent_buffers(info->csum_root); + free_root_extent_buffers(info->quota_root); + free_root_extent_buffers(info->uuid_root); +- if (chunk_root) ++ if (free_chunk_root) + free_root_extent_buffers(info->chunk_root); + free_root_extent_buffers(info->free_space_root); + } +@@ -2652,7 +2652,6 @@ int open_ctree(struct super_block *sb, + spin_lock_init(&fs_info->fs_roots_radix_lock); + spin_lock_init(&fs_info->delayed_iput_lock); + spin_lock_init(&fs_info->defrag_inodes_lock); +- spin_lock_init(&fs_info->tree_mod_seq_lock); + spin_lock_init(&fs_info->super_lock); + spin_lock_init(&fs_info->buffer_lock); + spin_lock_init(&fs_info->unused_bgs_lock); +@@ -3324,7 +3323,7 @@ fail_block_groups: + btrfs_put_block_group_cache(fs_info); + + fail_tree_roots: +- free_root_pointers(fs_info, 1); ++ free_root_pointers(fs_info, true); + invalidate_inode_pages2(fs_info->btree_inode->i_mapping); + + fail_sb_buffer: +@@ -3356,7 +3355,7 @@ recovery_tree_root: + if (!btrfs_test_opt(fs_info, USEBACKUPROOT)) + goto fail_tree_roots; + +- free_root_pointers(fs_info, 0); ++ free_root_pointers(fs_info, false); + + /* don't use the log in recovery mode, it won't be valid */ + btrfs_set_super_log_root(disk_super, 0); +@@ -4047,10 +4046,17 @@ void close_ctree(struct btrfs_fs_info *fs_info) + invalidate_inode_pages2(fs_info->btree_inode->i_mapping); + btrfs_stop_all_workers(fs_info); + +- btrfs_free_block_groups(fs_info); +- + clear_bit(BTRFS_FS_OPEN, &fs_info->flags); +- free_root_pointers(fs_info, 1); ++ free_root_pointers(fs_info, true); ++ ++ /* ++ * We must free the block groups after dropping the fs_roots as we could ++ * have had an IO error and have left over tree log blocks that aren't ++ * cleaned up until the fs roots are freed. This makes the block group ++ * accounting appear to be wrong because there's pending reserved bytes, ++ * so make sure we do the block group cleanup afterwards. ++ */ ++ btrfs_free_block_groups(fs_info); + + iput(fs_info->btree_inode); + +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index 33c6b191ca59..284540cdbbd9 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -1583,21 +1583,25 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, + /* Find first extent with bits cleared */ + while (1) { + node = __etree_search(tree, start, &next, &prev, NULL, NULL); +- if (!node) { ++ if (!node && !next && !prev) { ++ /* ++ * Tree is completely empty, send full range and let ++ * caller deal with it ++ */ ++ *start_ret = 0; ++ *end_ret = -1; ++ goto out; ++ } else if (!node && !next) { ++ /* ++ * We are past the last allocated chunk, set start at ++ * the end of the last extent. ++ */ ++ state = rb_entry(prev, struct extent_state, rb_node); ++ *start_ret = state->end + 1; ++ *end_ret = -1; ++ goto out; ++ } else if (!node) { + node = next; +- if (!node) { +- /* +- * We are past the last allocated chunk, +- * set start at the end of the last extent. The +- * device alloc tree should never be empty so +- * prev is always set. +- */ +- ASSERT(prev); +- state = rb_entry(prev, struct extent_state, rb_node); +- *start_ret = state->end + 1; +- *end_ret = -1; +- goto out; +- } + } + /* + * At this point 'node' either contains 'start' or start is +@@ -3938,6 +3942,11 @@ int btree_write_cache_pages(struct address_space *mapping, + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; ++ /* ++ * Start from the beginning does not need to cycle over the ++ * range, mark it as scanned. ++ */ ++ scanned = (index == 0); + } else { + index = wbc->range_start >> PAGE_SHIFT; + end = wbc->range_end >> PAGE_SHIFT; +@@ -3955,7 +3964,6 @@ retry: + tag))) { + unsigned i; + +- scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + +@@ -4084,6 +4092,11 @@ static int extent_write_cache_pages(struct address_space *mapping, + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; ++ /* ++ * Start from the beginning does not need to cycle over the ++ * range, mark it as scanned. ++ */ ++ scanned = (index == 0); + } else { + index = wbc->range_start >> PAGE_SHIFT; + end = wbc->range_end >> PAGE_SHIFT; +@@ -4117,7 +4130,6 @@ retry: + &index, end, tag))) { + unsigned i; + +- scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + +@@ -4177,7 +4189,16 @@ retry: + */ + scanned = 1; + index = 0; +- goto retry; ++ ++ /* ++ * If we're looping we could run into a page that is locked by a ++ * writer and that writer could be waiting on writeback for a ++ * page in our current bio, and thus deadlock, so flush the ++ * write bio here. ++ */ ++ ret = flush_write_bio(epd); ++ if (!ret) ++ goto retry; + } + + if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole)) +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index 8e86b2d700c4..d88b8d8897cc 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -3244,6 +3244,7 @@ static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, + static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, + struct inode *dst, u64 dst_loff) + { ++ const u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; + int ret; + + /* +@@ -3251,7 +3252,7 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, + * source range to serialize with relocation. + */ + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); +- ret = btrfs_clone(src, dst, loff, len, len, dst_loff, 1); ++ ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1); + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); + + return ret; +diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c +index 99fe9bf3fdac..98f9684e7ffc 100644 +--- a/fs/btrfs/tests/btrfs-tests.c ++++ b/fs/btrfs/tests/btrfs-tests.c +@@ -121,7 +121,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize) + spin_lock_init(&fs_info->qgroup_lock); + spin_lock_init(&fs_info->super_lock); + spin_lock_init(&fs_info->fs_roots_radix_lock); +- spin_lock_init(&fs_info->tree_mod_seq_lock); + mutex_init(&fs_info->qgroup_ioctl_lock); + mutex_init(&fs_info->qgroup_rescan_lock); + rwlock_init(&fs_info->tree_mod_log_lock); +diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c +index 123d9a614357..df7ce874a74b 100644 +--- a/fs/btrfs/tests/extent-io-tests.c ++++ b/fs/btrfs/tests/extent-io-tests.c +@@ -441,8 +441,17 @@ static int test_find_first_clear_extent_bit(void) + int ret = -EINVAL; + + test_msg("running find_first_clear_extent_bit test"); ++ + extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL); + ++ /* Test correct handling of empty tree */ ++ find_first_clear_extent_bit(&tree, 0, &start, &end, CHUNK_TRIMMED); ++ if (start != 0 || end != -1) { ++ test_err( ++ "error getting a range from completely empty tree: start %llu end %llu", ++ start, end); ++ goto out; ++ } + /* + * Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between + * 4M-32M +diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c +index 8624bdee8c5b..ceffec752234 100644 +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -77,13 +77,14 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) + } + } + +-static noinline void switch_commit_roots(struct btrfs_transaction *trans) ++static noinline void switch_commit_roots(struct btrfs_trans_handle *trans) + { ++ struct btrfs_transaction *cur_trans = trans->transaction; + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_root *root, *tmp; + + down_write(&fs_info->commit_root_sem); +- list_for_each_entry_safe(root, tmp, &trans->switch_commits, ++ list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits, + dirty_list) { + list_del_init(&root->dirty_list); + free_extent_buffer(root->commit_root); +@@ -95,16 +96,17 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans) + } + + /* We can free old roots now. */ +- spin_lock(&trans->dropped_roots_lock); +- while (!list_empty(&trans->dropped_roots)) { +- root = list_first_entry(&trans->dropped_roots, ++ spin_lock(&cur_trans->dropped_roots_lock); ++ while (!list_empty(&cur_trans->dropped_roots)) { ++ root = list_first_entry(&cur_trans->dropped_roots, + struct btrfs_root, root_list); + list_del_init(&root->root_list); +- spin_unlock(&trans->dropped_roots_lock); ++ spin_unlock(&cur_trans->dropped_roots_lock); ++ btrfs_free_log(trans, root); + btrfs_drop_and_free_fs_root(fs_info, root); +- spin_lock(&trans->dropped_roots_lock); ++ spin_lock(&cur_trans->dropped_roots_lock); + } +- spin_unlock(&trans->dropped_roots_lock); ++ spin_unlock(&cur_trans->dropped_roots_lock); + up_write(&fs_info->commit_root_sem); + } + +@@ -1359,7 +1361,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, + ret = commit_cowonly_roots(trans); + if (ret) + goto out; +- switch_commit_roots(trans->transaction); ++ switch_commit_roots(trans); + ret = btrfs_write_and_wait_transaction(trans); + if (ret) + btrfs_handle_fs_error(fs_info, ret, +@@ -1949,6 +1951,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) + struct btrfs_transaction *prev_trans = NULL; + int ret; + ++ /* ++ * Some places just start a transaction to commit it. We need to make ++ * sure that if this commit fails that the abort code actually marks the ++ * transaction as failed, so set trans->dirty to make the abort code do ++ * the right thing. ++ */ ++ trans->dirty = true; ++ + /* Stop the commit early if ->aborted is set */ + if (unlikely(READ_ONCE(cur_trans->aborted))) { + ret = cur_trans->aborted; +@@ -2237,7 +2247,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) + list_add_tail(&fs_info->chunk_root->dirty_list, + &cur_trans->switch_commits); + +- switch_commit_roots(cur_trans); ++ switch_commit_roots(trans); + + ASSERT(list_empty(&cur_trans->dirty_bgs)); + ASSERT(list_empty(&cur_trans->io_bgs)); +diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c +index ab27e6cd9b3e..6f2178618c22 100644 +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -3953,7 +3953,7 @@ static int log_csums(struct btrfs_trans_handle *trans, + static noinline int copy_items(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct btrfs_path *dst_path, +- struct btrfs_path *src_path, u64 *last_extent, ++ struct btrfs_path *src_path, + int start_slot, int nr, int inode_only, + u64 logged_isize) + { +@@ -3964,7 +3964,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, + struct btrfs_file_extent_item *extent; + struct btrfs_inode_item *inode_item; + struct extent_buffer *src = src_path->nodes[0]; +- struct btrfs_key first_key, last_key, key; + int ret; + struct btrfs_key *ins_keys; + u32 *ins_sizes; +@@ -3972,9 +3971,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, + int i; + struct list_head ordered_sums; + int skip_csum = inode->flags & BTRFS_INODE_NODATASUM; +- bool has_extents = false; +- bool need_find_last_extent = true; +- bool done = false; + + INIT_LIST_HEAD(&ordered_sums); + +@@ -3983,8 +3979,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, + if (!ins_data) + return -ENOMEM; + +- first_key.objectid = (u64)-1; +- + ins_sizes = (u32 *)ins_data; + ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); + +@@ -4005,9 +3999,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, + + src_offset = btrfs_item_ptr_offset(src, start_slot + i); + +- if (i == nr - 1) +- last_key = ins_keys[i]; +- + if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { + inode_item = btrfs_item_ptr(dst_path->nodes[0], + dst_path->slots[0], +@@ -4021,20 +4012,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, + src_offset, ins_sizes[i]); + } + +- /* +- * We set need_find_last_extent here in case we know we were +- * processing other items and then walk into the first extent in +- * the inode. If we don't hit an extent then nothing changes, +- * we'll do the last search the next time around. +- */ +- if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { +- has_extents = true; +- if (first_key.objectid == (u64)-1) +- first_key = ins_keys[i]; +- } else { +- need_find_last_extent = false; +- } +- + /* take a reference on file data extents so that truncates + * or deletes of this inode don't have to relog the inode + * again +@@ -4100,167 +4077,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, + kfree(sums); + } + +- if (!has_extents) +- return ret; +- +- if (need_find_last_extent && *last_extent == first_key.offset) { +- /* +- * We don't have any leafs between our current one and the one +- * we processed before that can have file extent items for our +- * inode (and have a generation number smaller than our current +- * transaction id). +- */ +- need_find_last_extent = false; +- } +- +- /* +- * Because we use btrfs_search_forward we could skip leaves that were +- * not modified and then assume *last_extent is valid when it really +- * isn't. So back up to the previous leaf and read the end of the last +- * extent before we go and fill in holes. +- */ +- if (need_find_last_extent) { +- u64 len; +- +- ret = btrfs_prev_leaf(inode->root, src_path); +- if (ret < 0) +- return ret; +- if (ret) +- goto fill_holes; +- if (src_path->slots[0]) +- src_path->slots[0]--; +- src = src_path->nodes[0]; +- btrfs_item_key_to_cpu(src, &key, src_path->slots[0]); +- if (key.objectid != btrfs_ino(inode) || +- key.type != BTRFS_EXTENT_DATA_KEY) +- goto fill_holes; +- extent = btrfs_item_ptr(src, src_path->slots[0], +- struct btrfs_file_extent_item); +- if (btrfs_file_extent_type(src, extent) == +- BTRFS_FILE_EXTENT_INLINE) { +- len = btrfs_file_extent_ram_bytes(src, extent); +- *last_extent = ALIGN(key.offset + len, +- fs_info->sectorsize); +- } else { +- len = btrfs_file_extent_num_bytes(src, extent); +- *last_extent = key.offset + len; +- } +- } +-fill_holes: +- /* So we did prev_leaf, now we need to move to the next leaf, but a few +- * things could have happened +- * +- * 1) A merge could have happened, so we could currently be on a leaf +- * that holds what we were copying in the first place. +- * 2) A split could have happened, and now not all of the items we want +- * are on the same leaf. +- * +- * So we need to adjust how we search for holes, we need to drop the +- * path and re-search for the first extent key we found, and then walk +- * forward until we hit the last one we copied. +- */ +- if (need_find_last_extent) { +- /* btrfs_prev_leaf could return 1 without releasing the path */ +- btrfs_release_path(src_path); +- ret = btrfs_search_slot(NULL, inode->root, &first_key, +- src_path, 0, 0); +- if (ret < 0) +- return ret; +- ASSERT(ret == 0); +- src = src_path->nodes[0]; +- i = src_path->slots[0]; +- } else { +- i = start_slot; +- } +- +- /* +- * Ok so here we need to go through and fill in any holes we may have +- * to make sure that holes are punched for those areas in case they had +- * extents previously. +- */ +- while (!done) { +- u64 offset, len; +- u64 extent_end; +- +- if (i >= btrfs_header_nritems(src_path->nodes[0])) { +- ret = btrfs_next_leaf(inode->root, src_path); +- if (ret < 0) +- return ret; +- ASSERT(ret == 0); +- src = src_path->nodes[0]; +- i = 0; +- need_find_last_extent = true; +- } +- +- btrfs_item_key_to_cpu(src, &key, i); +- if (!btrfs_comp_cpu_keys(&key, &last_key)) +- done = true; +- if (key.objectid != btrfs_ino(inode) || +- key.type != BTRFS_EXTENT_DATA_KEY) { +- i++; +- continue; +- } +- extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); +- if (btrfs_file_extent_type(src, extent) == +- BTRFS_FILE_EXTENT_INLINE) { +- len = btrfs_file_extent_ram_bytes(src, extent); +- extent_end = ALIGN(key.offset + len, +- fs_info->sectorsize); +- } else { +- len = btrfs_file_extent_num_bytes(src, extent); +- extent_end = key.offset + len; +- } +- i++; +- +- if (*last_extent == key.offset) { +- *last_extent = extent_end; +- continue; +- } +- offset = *last_extent; +- len = key.offset - *last_extent; +- ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode), +- offset, 0, 0, len, 0, len, 0, 0, 0); +- if (ret) +- break; +- *last_extent = extent_end; +- } +- +- /* +- * Check if there is a hole between the last extent found in our leaf +- * and the first extent in the next leaf. If there is one, we need to +- * log an explicit hole so that at replay time we can punch the hole. +- */ +- if (ret == 0 && +- key.objectid == btrfs_ino(inode) && +- key.type == BTRFS_EXTENT_DATA_KEY && +- i == btrfs_header_nritems(src_path->nodes[0])) { +- ret = btrfs_next_leaf(inode->root, src_path); +- need_find_last_extent = true; +- if (ret > 0) { +- ret = 0; +- } else if (ret == 0) { +- btrfs_item_key_to_cpu(src_path->nodes[0], &key, +- src_path->slots[0]); +- if (key.objectid == btrfs_ino(inode) && +- key.type == BTRFS_EXTENT_DATA_KEY && +- *last_extent < key.offset) { +- const u64 len = key.offset - *last_extent; +- +- ret = btrfs_insert_file_extent(trans, log, +- btrfs_ino(inode), +- *last_extent, 0, +- 0, len, 0, len, +- 0, 0, 0); +- *last_extent += len; +- } +- } +- } +- /* +- * Need to let the callers know we dropped the path so they should +- * re-search. +- */ +- if (!ret && need_find_last_extent) +- ret = 1; + return ret; + } + +@@ -4425,7 +4241,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, + const u64 i_size = i_size_read(&inode->vfs_inode); + const u64 ino = btrfs_ino(inode); + struct btrfs_path *dst_path = NULL; +- u64 last_extent = (u64)-1; ++ bool dropped_extents = false; + int ins_nr = 0; + int start_slot; + int ret; +@@ -4447,8 +4263,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, + if (slot >= btrfs_header_nritems(leaf)) { + if (ins_nr > 0) { + ret = copy_items(trans, inode, dst_path, path, +- &last_extent, start_slot, +- ins_nr, 1, 0); ++ start_slot, ins_nr, 1, 0); + if (ret < 0) + goto out; + ins_nr = 0; +@@ -4472,8 +4287,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, + path->slots[0]++; + continue; + } +- if (last_extent == (u64)-1) { +- last_extent = key.offset; ++ if (!dropped_extents) { + /* + * Avoid logging extent items logged in past fsync calls + * and leading to duplicate keys in the log tree. +@@ -4487,6 +4301,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, + } while (ret == -EAGAIN); + if (ret) + goto out; ++ dropped_extents = true; + } + if (ins_nr == 0) + start_slot = slot; +@@ -4501,7 +4316,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, + } + } + if (ins_nr > 0) { +- ret = copy_items(trans, inode, dst_path, path, &last_extent, ++ ret = copy_items(trans, inode, dst_path, path, + start_slot, ins_nr, 1, 0); + if (ret > 0) + ret = 0; +@@ -4688,13 +4503,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, + + if (slot >= nritems) { + if (ins_nr > 0) { +- u64 last_extent = 0; +- + ret = copy_items(trans, inode, dst_path, path, +- &last_extent, start_slot, +- ins_nr, 1, 0); +- /* can't be 1, extent items aren't processed */ +- ASSERT(ret <= 0); ++ start_slot, ins_nr, 1, 0); + if (ret < 0) + return ret; + ins_nr = 0; +@@ -4718,13 +4528,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, + cond_resched(); + } + if (ins_nr > 0) { +- u64 last_extent = 0; +- + ret = copy_items(trans, inode, dst_path, path, +- &last_extent, start_slot, +- ins_nr, 1, 0); +- /* can't be 1, extent items aren't processed */ +- ASSERT(ret <= 0); ++ start_slot, ins_nr, 1, 0); + if (ret < 0) + return ret; + } +@@ -4733,100 +4538,119 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, + } + + /* +- * If the no holes feature is enabled we need to make sure any hole between the +- * last extent and the i_size of our inode is explicitly marked in the log. This +- * is to make sure that doing something like: +- * +- * 1) create file with 128Kb of data +- * 2) truncate file to 64Kb +- * 3) truncate file to 256Kb +- * 4) fsync file +- * 5) <crash/power failure> +- * 6) mount fs and trigger log replay +- * +- * Will give us a file with a size of 256Kb, the first 64Kb of data match what +- * the file had in its first 64Kb of data at step 1 and the last 192Kb of the +- * file correspond to a hole. The presence of explicit holes in a log tree is +- * what guarantees that log replay will remove/adjust file extent items in the +- * fs/subvol tree. +- * +- * Here we do not need to care about holes between extents, that is already done +- * by copy_items(). We also only need to do this in the full sync path, where we +- * lookup for extents from the fs/subvol tree only. In the fast path case, we +- * lookup the list of modified extent maps and if any represents a hole, we +- * insert a corresponding extent representing a hole in the log tree. ++ * When using the NO_HOLES feature if we punched a hole that causes the ++ * deletion of entire leafs or all the extent items of the first leaf (the one ++ * that contains the inode item and references) we may end up not processing ++ * any extents, because there are no leafs with a generation matching the ++ * current transaction that have extent items for our inode. So we need to find ++ * if any holes exist and then log them. We also need to log holes after any ++ * truncate operation that changes the inode's size. + */ +-static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, +- struct btrfs_inode *inode, +- struct btrfs_path *path) ++static int btrfs_log_holes(struct btrfs_trans_handle *trans, ++ struct btrfs_root *root, ++ struct btrfs_inode *inode, ++ struct btrfs_path *path) + { + struct btrfs_fs_info *fs_info = root->fs_info; +- int ret; + struct btrfs_key key; +- u64 hole_start; +- u64 hole_size; +- struct extent_buffer *leaf; +- struct btrfs_root *log = root->log_root; + const u64 ino = btrfs_ino(inode); + const u64 i_size = i_size_read(&inode->vfs_inode); ++ u64 prev_extent_end = 0; ++ int ret; + +- if (!btrfs_fs_incompat(fs_info, NO_HOLES)) ++ if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0) + return 0; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; +- key.offset = (u64)-1; ++ key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +- ASSERT(ret != 0); + if (ret < 0) + return ret; + +- ASSERT(path->slots[0] > 0); +- path->slots[0]--; +- leaf = path->nodes[0]; +- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); +- +- if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { +- /* inode does not have any extents */ +- hole_start = 0; +- hole_size = i_size; +- } else { ++ while (true) { + struct btrfs_file_extent_item *extent; ++ struct extent_buffer *leaf = path->nodes[0]; + u64 len; + +- /* +- * If there's an extent beyond i_size, an explicit hole was +- * already inserted by copy_items(). +- */ +- if (key.offset >= i_size) +- return 0; ++ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ++ ret = btrfs_next_leaf(root, path); ++ if (ret < 0) ++ return ret; ++ if (ret > 0) { ++ ret = 0; ++ break; ++ } ++ leaf = path->nodes[0]; ++ } ++ ++ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); ++ if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) ++ break; ++ ++ /* We have a hole, log it. */ ++ if (prev_extent_end < key.offset) { ++ const u64 hole_len = key.offset - prev_extent_end; ++ ++ /* ++ * Release the path to avoid deadlocks with other code ++ * paths that search the root while holding locks on ++ * leafs from the log root. ++ */ ++ btrfs_release_path(path); ++ ret = btrfs_insert_file_extent(trans, root->log_root, ++ ino, prev_extent_end, 0, ++ 0, hole_len, 0, hole_len, ++ 0, 0, 0); ++ if (ret < 0) ++ return ret; ++ ++ /* ++ * Search for the same key again in the root. Since it's ++ * an extent item and we are holding the inode lock, the ++ * key must still exist. If it doesn't just emit warning ++ * and return an error to fall back to a transaction ++ * commit. ++ */ ++ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); ++ if (ret < 0) ++ return ret; ++ if (WARN_ON(ret > 0)) ++ return -ENOENT; ++ leaf = path->nodes[0]; ++ } + + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); +- + if (btrfs_file_extent_type(leaf, extent) == +- BTRFS_FILE_EXTENT_INLINE) +- return 0; ++ BTRFS_FILE_EXTENT_INLINE) { ++ len = btrfs_file_extent_ram_bytes(leaf, extent); ++ prev_extent_end = ALIGN(key.offset + len, ++ fs_info->sectorsize); ++ } else { ++ len = btrfs_file_extent_num_bytes(leaf, extent); ++ prev_extent_end = key.offset + len; ++ } + +- len = btrfs_file_extent_num_bytes(leaf, extent); +- /* Last extent goes beyond i_size, no need to log a hole. */ +- if (key.offset + len > i_size) +- return 0; +- hole_start = key.offset + len; +- hole_size = i_size - hole_start; ++ path->slots[0]++; ++ cond_resched(); + } +- btrfs_release_path(path); + +- /* Last extent ends at i_size. */ +- if (hole_size == 0) +- return 0; ++ if (prev_extent_end < i_size) { ++ u64 hole_len; + +- hole_size = ALIGN(hole_size, fs_info->sectorsize); +- ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, +- hole_size, 0, hole_size, 0, 0, 0); +- return ret; ++ btrfs_release_path(path); ++ hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize); ++ ret = btrfs_insert_file_extent(trans, root->log_root, ++ ino, prev_extent_end, 0, 0, ++ hole_len, 0, hole_len, ++ 0, 0, 0); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return 0; + } + + /* +@@ -5030,6 +4854,50 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, + } + continue; + } ++ /* ++ * If the inode was already logged skip it - otherwise we can ++ * hit an infinite loop. Example: ++ * ++ * From the commit root (previous transaction) we have the ++ * following inodes: ++ * ++ * inode 257 a directory ++ * inode 258 with references "zz" and "zz_link" on inode 257 ++ * inode 259 with reference "a" on inode 257 ++ * ++ * And in the current (uncommitted) transaction we have: ++ * ++ * inode 257 a directory, unchanged ++ * inode 258 with references "a" and "a2" on inode 257 ++ * inode 259 with reference "zz_link" on inode 257 ++ * inode 261 with reference "zz" on inode 257 ++ * ++ * When logging inode 261 the following infinite loop could ++ * happen if we don't skip already logged inodes: ++ * ++ * - we detect inode 258 as a conflicting inode, with inode 261 ++ * on reference "zz", and log it; ++ * ++ * - we detect inode 259 as a conflicting inode, with inode 258 ++ * on reference "a", and log it; ++ * ++ * - we detect inode 258 as a conflicting inode, with inode 259 ++ * on reference "zz_link", and log it - again! After this we ++ * repeat the above steps forever. ++ */ ++ spin_lock(&BTRFS_I(inode)->lock); ++ /* ++ * Check the inode's logged_trans only instead of ++ * btrfs_inode_in_log(). This is because the last_log_commit of ++ * the inode is not updated when we only log that it exists and ++ * and it has the full sync bit set (see btrfs_log_inode()). ++ */ ++ if (BTRFS_I(inode)->logged_trans == trans->transid) { ++ spin_unlock(&BTRFS_I(inode)->lock); ++ btrfs_add_delayed_iput(inode); ++ continue; ++ } ++ spin_unlock(&BTRFS_I(inode)->lock); + /* + * We are safe logging the other inode without acquiring its + * lock as long as we log with the LOG_INODE_EXISTS mode. We +@@ -5129,7 +4997,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_key min_key; + struct btrfs_key max_key; + struct btrfs_root *log = root->log_root; +- u64 last_extent = 0; + int err = 0; + int ret; + int nritems; +@@ -5307,7 +5174,7 @@ again: + ins_start_slot = path->slots[0]; + } + ret = copy_items(trans, inode, dst_path, path, +- &last_extent, ins_start_slot, ++ ins_start_slot, + ins_nr, inode_only, + logged_isize); + if (ret < 0) { +@@ -5330,17 +5197,13 @@ again: + if (ins_nr == 0) + goto next_slot; + ret = copy_items(trans, inode, dst_path, path, +- &last_extent, ins_start_slot, ++ ins_start_slot, + ins_nr, inode_only, logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } + ins_nr = 0; +- if (ret) { +- btrfs_release_path(path); +- continue; +- } + goto next_slot; + } + +@@ -5353,18 +5216,13 @@ again: + goto next_slot; + } + +- ret = copy_items(trans, inode, dst_path, path, &last_extent, ++ ret = copy_items(trans, inode, dst_path, path, + ins_start_slot, ins_nr, inode_only, + logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } +- if (ret) { +- ins_nr = 0; +- btrfs_release_path(path); +- continue; +- } + ins_nr = 1; + ins_start_slot = path->slots[0]; + next_slot: +@@ -5378,13 +5236,12 @@ next_slot: + } + if (ins_nr) { + ret = copy_items(trans, inode, dst_path, path, +- &last_extent, ins_start_slot, ++ ins_start_slot, + ins_nr, inode_only, logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } +- ret = 0; + ins_nr = 0; + } + btrfs_release_path(path); +@@ -5399,14 +5256,13 @@ next_key: + } + } + if (ins_nr) { +- ret = copy_items(trans, inode, dst_path, path, &last_extent, ++ ret = copy_items(trans, inode, dst_path, path, + ins_start_slot, ins_nr, inode_only, + logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } +- ret = 0; + ins_nr = 0; + } + +@@ -5419,7 +5275,7 @@ next_key: + if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { + btrfs_release_path(path); + btrfs_release_path(dst_path); +- err = btrfs_log_trailing_hole(trans, root, inode, path); ++ err = btrfs_log_holes(trans, root, inode, path); + if (err) + goto out_unlock; + } +diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c +index 97f1ba7c18b2..f7d9fc1a6fc2 100644 +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -881,17 +881,28 @@ static struct btrfs_fs_devices *find_fsid_changed( + /* + * Handles the case where scanned device is part of an fs that had + * multiple successful changes of FSID but curently device didn't +- * observe it. Meaning our fsid will be different than theirs. ++ * observe it. Meaning our fsid will be different than theirs. We need ++ * to handle two subcases : ++ * 1 - The fs still continues to have different METADATA/FSID uuids. ++ * 2 - The fs is switched back to its original FSID (METADATA/FSID ++ * are equal). + */ + list_for_each_entry(fs_devices, &fs_uuids, fs_list) { ++ /* Changed UUIDs */ + if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid, + BTRFS_FSID_SIZE) != 0 && + memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid, + BTRFS_FSID_SIZE) == 0 && + memcmp(fs_devices->fsid, disk_super->fsid, +- BTRFS_FSID_SIZE) != 0) { ++ BTRFS_FSID_SIZE) != 0) ++ return fs_devices; ++ ++ /* Unchanged UUIDs */ ++ if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid, ++ BTRFS_FSID_SIZE) == 0 && ++ memcmp(fs_devices->fsid, disk_super->metadata_uuid, ++ BTRFS_FSID_SIZE) == 0) + return fs_devices; +- } + } + + return NULL; +diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c +index e1cac715d19e..06d932ed097e 100644 +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -350,9 +350,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) + } + + rc = cifs_negotiate_protocol(0, tcon->ses); +- if (!rc && tcon->ses->need_reconnect) ++ if (!rc && tcon->ses->need_reconnect) { + rc = cifs_setup_session(0, tcon->ses, nls_codepage); +- ++ if ((rc == -EACCES) && !tcon->retry) { ++ rc = -EHOSTDOWN; ++ mutex_unlock(&tcon->ses->session_mutex); ++ goto failed; ++ } ++ } + if (rc || !tcon->need_reconnect) { + mutex_unlock(&tcon->ses->session_mutex); + goto out; +@@ -397,6 +402,7 @@ out: + case SMB2_SET_INFO: + rc = -EAGAIN; + } ++failed: + unload_nls(nls_codepage); + return rc; + } +diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c +index 680aba9c00d5..fd0b5dd68f9e 100644 +--- a/fs/configfs/inode.c ++++ b/fs/configfs/inode.c +@@ -76,14 +76,11 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr) + if (ia_valid & ATTR_GID) + sd_iattr->ia_gid = iattr->ia_gid; + if (ia_valid & ATTR_ATIME) +- sd_iattr->ia_atime = timestamp_truncate(iattr->ia_atime, +- inode); ++ sd_iattr->ia_atime = iattr->ia_atime; + if (ia_valid & ATTR_MTIME) +- sd_iattr->ia_mtime = timestamp_truncate(iattr->ia_mtime, +- inode); ++ sd_iattr->ia_mtime = iattr->ia_mtime; + if (ia_valid & ATTR_CTIME) +- sd_iattr->ia_ctime = timestamp_truncate(iattr->ia_ctime, +- inode); ++ sd_iattr->ia_ctime = iattr->ia_ctime; + if (ia_valid & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + +diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c +index c34fa7c61b43..4ee65b2b6247 100644 +--- a/fs/crypto/keyring.c ++++ b/fs/crypto/keyring.c +@@ -664,9 +664,6 @@ static int check_for_busy_inodes(struct super_block *sb, + struct list_head *pos; + size_t busy_count = 0; + unsigned long ino; +- struct dentry *dentry; +- char _path[256]; +- char *path = NULL; + + spin_lock(&mk->mk_decrypted_inodes_lock); + +@@ -685,22 +682,14 @@ static int check_for_busy_inodes(struct super_block *sb, + struct fscrypt_info, + ci_master_key_link)->ci_inode; + ino = inode->i_ino; +- dentry = d_find_alias(inode); + } + spin_unlock(&mk->mk_decrypted_inodes_lock); + +- if (dentry) { +- path = dentry_path(dentry, _path, sizeof(_path)); +- dput(dentry); +- } +- if (IS_ERR_OR_NULL(path)) +- path = "(unknown)"; +- + fscrypt_warn(NULL, +- "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu (%s)", ++ "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu", + sb->s_id, busy_count, master_key_spec_type(&mk->mk_spec), + master_key_spec_len(&mk->mk_spec), (u8 *)&mk->mk_spec.u, +- ino, path); ++ ino); + return -EBUSY; + } + +diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c +index 19f89f9fb10c..23b74b8e8f96 100644 +--- a/fs/erofs/decompressor.c ++++ b/fs/erofs/decompressor.c +@@ -306,24 +306,22 @@ static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq, + } + + src = kmap_atomic(*rq->in); +- if (!rq->out[0]) { +- dst = NULL; +- } else { ++ if (rq->out[0]) { + dst = kmap_atomic(rq->out[0]); + memcpy(dst + rq->pageofs_out, src, righthalf); ++ kunmap_atomic(dst); + } + +- if (rq->out[1] == *rq->in) { +- memmove(src, src + righthalf, rq->pageofs_out); +- } else if (nrpages_out == 2) { +- if (dst) +- kunmap_atomic(dst); ++ if (nrpages_out == 2) { + DBG_BUGON(!rq->out[1]); +- dst = kmap_atomic(rq->out[1]); +- memcpy(dst, src + righthalf, rq->pageofs_out); ++ if (rq->out[1] == *rq->in) { ++ memmove(src, src + righthalf, rq->pageofs_out); ++ } else { ++ dst = kmap_atomic(rq->out[1]); ++ memcpy(dst, src + righthalf, rq->pageofs_out); ++ kunmap_atomic(dst); ++ } + } +- if (dst) +- kunmap_atomic(dst); + kunmap_atomic(src); + return 0; + } +diff --git a/fs/eventfd.c b/fs/eventfd.c +index 8aa0ea8c55e8..78e41c7c3d05 100644 +--- a/fs/eventfd.c ++++ b/fs/eventfd.c +@@ -24,6 +24,8 @@ + #include <linux/seq_file.h> + #include <linux/idr.h> + ++DEFINE_PER_CPU(int, eventfd_wake_count); ++ + static DEFINE_IDA(eventfd_ida); + + struct eventfd_ctx { +@@ -60,12 +62,25 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) + { + unsigned long flags; + ++ /* ++ * Deadlock or stack overflow issues can happen if we recurse here ++ * through waitqueue wakeup handlers. If the caller users potentially ++ * nested waitqueues with custom wakeup handlers, then it should ++ * check eventfd_signal_count() before calling this function. If ++ * it returns true, the eventfd_signal() call should be deferred to a ++ * safe context. ++ */ ++ if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) ++ return 0; ++ + spin_lock_irqsave(&ctx->wqh.lock, flags); ++ this_cpu_inc(eventfd_wake_count); + if (ULLONG_MAX - ctx->count < n) + n = ULLONG_MAX - ctx->count; + ctx->count += n; + if (waitqueue_active(&ctx->wqh)) + wake_up_locked_poll(&ctx->wqh, EPOLLIN); ++ this_cpu_dec(eventfd_wake_count); + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + + return n; +diff --git a/fs/ext2/super.c b/fs/ext2/super.c +index 30c630d73f0f..065cd2d1bdc6 100644 +--- a/fs/ext2/super.c ++++ b/fs/ext2/super.c +@@ -1082,9 +1082,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) + + if (EXT2_BLOCKS_PER_GROUP(sb) == 0) + goto cantfind_ext2; +- sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - +- le32_to_cpu(es->s_first_data_block) - 1) +- / EXT2_BLOCKS_PER_GROUP(sb)) + 1; ++ sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - ++ le32_to_cpu(es->s_first_data_block) - 1) ++ / EXT2_BLOCKS_PER_GROUP(sb)) + 1; + db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / + EXT2_DESC_PER_BLOCK(sb); + sbi->s_group_desc = kmalloc_array (db_count, +diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c +index 6305d5ec25af..5ef8d7ae231b 100644 +--- a/fs/ext4/dir.c ++++ b/fs/ext4/dir.c +@@ -673,9 +673,11 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) + { + struct qstr qstr = {.name = str, .len = len }; +- struct inode *inode = dentry->d_parent->d_inode; ++ const struct dentry *parent = READ_ONCE(dentry->d_parent); ++ const struct inode *inode = READ_ONCE(parent->d_inode); + +- if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) { ++ if (!inode || !IS_CASEFOLDED(inode) || ++ !EXT4_SB(inode->i_sb)->s_encoding) { + if (len != name->len) + return -1; + return memcmp(str, name->name, len); +@@ -688,10 +690,11 @@ static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) + { + const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb); + const struct unicode_map *um = sbi->s_encoding; ++ const struct inode *inode = READ_ONCE(dentry->d_inode); + unsigned char *norm; + int len, ret = 0; + +- if (!IS_CASEFOLDED(dentry->d_inode) || !um) ++ if (!inode || !IS_CASEFOLDED(inode) || !um) + return 0; + + norm = kmalloc(PATH_MAX, GFP_ATOMIC); +diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c +index 12ceadef32c5..2cc9f2168b9e 100644 +--- a/fs/ext4/page-io.c ++++ b/fs/ext4/page-io.c +@@ -478,17 +478,26 @@ int ext4_bio_write_page(struct ext4_io_submit *io, + gfp_t gfp_flags = GFP_NOFS; + unsigned int enc_bytes = round_up(len, i_blocksize(inode)); + ++ /* ++ * Since bounce page allocation uses a mempool, we can only use ++ * a waiting mask (i.e. request guaranteed allocation) on the ++ * first page of the bio. Otherwise it can deadlock. ++ */ ++ if (io->io_bio) ++ gfp_flags = GFP_NOWAIT | __GFP_NOWARN; + retry_encrypt: + bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes, + 0, gfp_flags); + if (IS_ERR(bounce_page)) { + ret = PTR_ERR(bounce_page); +- if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { +- if (io->io_bio) { ++ if (ret == -ENOMEM && ++ (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { ++ gfp_flags = GFP_NOFS; ++ if (io->io_bio) + ext4_io_submit(io); +- congestion_wait(BLK_RW_ASYNC, HZ/50); +- } +- gfp_flags |= __GFP_NOFAIL; ++ else ++ gfp_flags |= __GFP_NOFAIL; ++ congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry_encrypt; + } + bounce_page = NULL; +diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c +index 4033778bcbbf..84280ad3786c 100644 +--- a/fs/f2fs/dir.c ++++ b/fs/f2fs/dir.c +@@ -1068,24 +1068,27 @@ static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) + { + struct qstr qstr = {.name = str, .len = len }; ++ const struct dentry *parent = READ_ONCE(dentry->d_parent); ++ const struct inode *inode = READ_ONCE(parent->d_inode); + +- if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) { ++ if (!inode || !IS_CASEFOLDED(inode)) { + if (len != name->len) + return -1; +- return memcmp(str, name, len); ++ return memcmp(str, name->name, len); + } + +- return f2fs_ci_compare(dentry->d_parent->d_inode, name, &qstr, false); ++ return f2fs_ci_compare(inode, name, &qstr, false); + } + + static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) + { + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + const struct unicode_map *um = sbi->s_encoding; ++ const struct inode *inode = READ_ONCE(dentry->d_inode); + unsigned char *norm; + int len, ret = 0; + +- if (!IS_CASEFOLDED(dentry->d_inode)) ++ if (!inode || !IS_CASEFOLDED(inode)) + return 0; + + norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC); +diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c +index fae665691481..72f308790a8e 100644 +--- a/fs/f2fs/file.c ++++ b/fs/f2fs/file.c +@@ -751,18 +751,12 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr) + inode->i_uid = attr->ia_uid; + if (ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; +- if (ia_valid & ATTR_ATIME) { +- inode->i_atime = timestamp_truncate(attr->ia_atime, +- inode); +- } +- if (ia_valid & ATTR_MTIME) { +- inode->i_mtime = timestamp_truncate(attr->ia_mtime, +- inode); +- } +- if (ia_valid & ATTR_CTIME) { +- inode->i_ctime = timestamp_truncate(attr->ia_ctime, +- inode); +- } ++ if (ia_valid & ATTR_ATIME) ++ inode->i_atime = attr->ia_atime; ++ if (ia_valid & ATTR_MTIME) ++ inode->i_mtime = attr->ia_mtime; ++ if (ia_valid & ATTR_CTIME) ++ inode->i_ctime = attr->ia_ctime; + if (ia_valid & ATTR_MODE) { + umode_t mode = attr->ia_mode; + +diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c +index 1443cee15863..ea8dbf1458c9 100644 +--- a/fs/f2fs/super.c ++++ b/fs/f2fs/super.c +@@ -1213,9 +1213,11 @@ static int f2fs_statfs_project(struct super_block *sb, + return PTR_ERR(dquot); + spin_lock(&dquot->dq_dqb_lock); + +- limit = (dquot->dq_dqb.dqb_bsoftlimit ? +- dquot->dq_dqb.dqb_bsoftlimit : +- dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits; ++ limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit, ++ dquot->dq_dqb.dqb_bhardlimit); ++ if (limit) ++ limit >>= sb->s_blocksize_bits; ++ + if (limit && buf->f_blocks > limit) { + curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; + buf->f_blocks = limit; +@@ -1224,9 +1226,9 @@ static int f2fs_statfs_project(struct super_block *sb, + (buf->f_blocks - curblock) : 0; + } + +- limit = dquot->dq_dqb.dqb_isoftlimit ? +- dquot->dq_dqb.dqb_isoftlimit : +- dquot->dq_dqb.dqb_ihardlimit; ++ limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit, ++ dquot->dq_dqb.dqb_ihardlimit); ++ + if (limit && buf->f_files > limit) { + buf->f_files = limit; + buf->f_ffree = +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 335607b8c5c0..76ac9c7d32ec 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -2063,7 +2063,7 @@ void wb_workfn(struct work_struct *work) + struct bdi_writeback, dwork); + long pages_written; + +- set_worker_desc("flush-%s", dev_name(wb->bdi->dev)); ++ set_worker_desc("flush-%s", bdi_dev_name(wb->bdi)); + current->flags |= PF_SWAPWRITE; + + if (likely(!current_is_workqueue_rescuer() || +diff --git a/fs/fuse/file.c b/fs/fuse/file.c +index ce715380143c..695369f46f92 100644 +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -1465,6 +1465,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, + } + ia = NULL; + if (nres < 0) { ++ iov_iter_revert(iter, nbytes); + err = nres; + break; + } +@@ -1473,8 +1474,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, + count -= nres; + res += nres; + pos += nres; +- if (nres != nbytes) ++ if (nres != nbytes) { ++ iov_iter_revert(iter, nbytes - nres); + break; ++ } + if (count) { + max_pages = iov_iter_npages(iter, fc->max_pages); + ia = fuse_io_alloc(io, max_pages); +diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c +index 01ff37b76652..4a10b4e7092a 100644 +--- a/fs/gfs2/file.c ++++ b/fs/gfs2/file.c +@@ -833,7 +833,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct gfs2_inode *ip = GFS2_I(inode); +- ssize_t written = 0, ret; ++ ssize_t ret; + + ret = gfs2_rsqa_alloc(ip); + if (ret) +@@ -853,68 +853,58 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) + inode_lock(inode); + ret = generic_write_checks(iocb, from); + if (ret <= 0) +- goto out; +- +- /* We can write back this queue in page reclaim */ +- current->backing_dev_info = inode_to_bdi(inode); ++ goto out_unlock; + + ret = file_remove_privs(file); + if (ret) +- goto out2; ++ goto out_unlock; + + ret = file_update_time(file); + if (ret) +- goto out2; ++ goto out_unlock; + + if (iocb->ki_flags & IOCB_DIRECT) { + struct address_space *mapping = file->f_mapping; +- loff_t pos, endbyte; +- ssize_t buffered; ++ ssize_t buffered, ret2; + +- written = gfs2_file_direct_write(iocb, from); +- if (written < 0 || !iov_iter_count(from)) +- goto out2; ++ ret = gfs2_file_direct_write(iocb, from); ++ if (ret < 0 || !iov_iter_count(from)) ++ goto out_unlock; + +- ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); +- if (unlikely(ret < 0)) +- goto out2; +- buffered = ret; ++ iocb->ki_flags |= IOCB_DSYNC; ++ current->backing_dev_info = inode_to_bdi(inode); ++ buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); ++ current->backing_dev_info = NULL; ++ if (unlikely(buffered <= 0)) ++ goto out_unlock; + + /* + * We need to ensure that the page cache pages are written to + * disk and invalidated to preserve the expected O_DIRECT +- * semantics. ++ * semantics. If the writeback or invalidate fails, only report ++ * the direct I/O range as we don't know if the buffered pages ++ * made it to disk. + */ +- pos = iocb->ki_pos; +- endbyte = pos + buffered - 1; +- ret = filemap_write_and_wait_range(mapping, pos, endbyte); +- if (!ret) { +- iocb->ki_pos += buffered; +- written += buffered; +- invalidate_mapping_pages(mapping, +- pos >> PAGE_SHIFT, +- endbyte >> PAGE_SHIFT); +- } else { +- /* +- * We don't know how much we wrote, so just return +- * the number of bytes which were direct-written +- */ +- } ++ iocb->ki_pos += buffered; ++ ret2 = generic_write_sync(iocb, buffered); ++ invalidate_mapping_pages(mapping, ++ (iocb->ki_pos - buffered) >> PAGE_SHIFT, ++ (iocb->ki_pos - 1) >> PAGE_SHIFT); ++ if (!ret || ret2 > 0) ++ ret += ret2; + } else { ++ current->backing_dev_info = inode_to_bdi(inode); + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); +- if (likely(ret > 0)) ++ current->backing_dev_info = NULL; ++ if (likely(ret > 0)) { + iocb->ki_pos += ret; ++ ret = generic_write_sync(iocb, ret); ++ } + } + +-out2: +- current->backing_dev_info = NULL; +-out: ++out_unlock: + inode_unlock(inode); +- if (likely(ret > 0)) { +- /* Handle various SYNC-type writes */ +- ret = generic_write_sync(iocb, ret); +- } +- return written ? written : ret; ++ return ret; + } + + static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, +diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c +index e7b9d39955d4..7ca84be20cf6 100644 +--- a/fs/gfs2/lops.c ++++ b/fs/gfs2/lops.c +@@ -421,7 +421,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, + + for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) { + if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) { +- if (lh.lh_sequence > head->lh_sequence) ++ if (lh.lh_sequence >= head->lh_sequence) + *head = lh; + else { + ret = true; +diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c +index 1c58859aa592..ef485f892d1b 100644 +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -981,6 +981,7 @@ static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos) + + static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) + { ++ (*pos)++; + return NULL; + } + +diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c +index e180033e35cf..05ed7be8a634 100644 +--- a/fs/nfs/dir.c ++++ b/fs/nfs/dir.c +@@ -162,6 +162,17 @@ typedef struct { + bool eof; + } nfs_readdir_descriptor_t; + ++static ++void nfs_readdir_init_array(struct page *page) ++{ ++ struct nfs_cache_array *array; ++ ++ array = kmap_atomic(page); ++ memset(array, 0, sizeof(struct nfs_cache_array)); ++ array->eof_index = -1; ++ kunmap_atomic(array); ++} ++ + /* + * we are freeing strings created by nfs_add_to_readdir_array() + */ +@@ -174,6 +185,7 @@ void nfs_readdir_clear_array(struct page *page) + array = kmap_atomic(page); + for (i = 0; i < array->size; i++) + kfree(array->array[i].string.name); ++ array->size = 0; + kunmap_atomic(array); + } + +@@ -610,6 +622,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, + int status = -ENOMEM; + unsigned int array_size = ARRAY_SIZE(pages); + ++ nfs_readdir_init_array(page); ++ + entry.prev_cookie = 0; + entry.cookie = desc->last_cookie; + entry.eof = 0; +@@ -626,8 +640,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, + } + + array = kmap(page); +- memset(array, 0, sizeof(struct nfs_cache_array)); +- array->eof_index = -1; + + status = nfs_readdir_alloc_pages(pages, array_size); + if (status < 0) +@@ -682,6 +694,7 @@ int nfs_readdir_filler(void *data, struct page* page) + unlock_page(page); + return 0; + error: ++ nfs_readdir_clear_array(page); + unlock_page(page); + return ret; + } +@@ -689,8 +702,6 @@ int nfs_readdir_filler(void *data, struct page* page) + static + void cache_page_release(nfs_readdir_descriptor_t *desc) + { +- if (!desc->page->mapping) +- nfs_readdir_clear_array(desc->page); + put_page(desc->page); + desc->page = NULL; + } +@@ -704,19 +715,28 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc) + + /* + * Returns 0 if desc->dir_cookie was found on page desc->page_index ++ * and locks the page to prevent removal from the page cache. + */ + static +-int find_cache_page(nfs_readdir_descriptor_t *desc) ++int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) + { + int res; + + desc->page = get_cache_page(desc); + if (IS_ERR(desc->page)) + return PTR_ERR(desc->page); +- +- res = nfs_readdir_search_array(desc); ++ res = lock_page_killable(desc->page); + if (res != 0) +- cache_page_release(desc); ++ goto error; ++ res = -EAGAIN; ++ if (desc->page->mapping != NULL) { ++ res = nfs_readdir_search_array(desc); ++ if (res == 0) ++ return 0; ++ } ++ unlock_page(desc->page); ++error: ++ cache_page_release(desc); + return res; + } + +@@ -731,7 +751,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) + desc->last_cookie = 0; + } + do { +- res = find_cache_page(desc); ++ res = find_and_lock_cache_page(desc); + } while (res == -EAGAIN); + return res; + } +@@ -770,7 +790,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) + desc->eof = true; + + kunmap(desc->page); +- cache_page_release(desc); + dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", + (unsigned long long)*desc->dir_cookie, res); + return res; +@@ -816,13 +835,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) + + status = nfs_do_filldir(desc); + ++ out_release: ++ nfs_readdir_clear_array(desc->page); ++ cache_page_release(desc); + out: + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", + __func__, status); + return status; +- out_release: +- cache_page_release(desc); +- goto out; + } + + /* The file offset position represents the dirent entry number. A +@@ -887,6 +906,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) + break; + + res = nfs_do_filldir(desc); ++ unlock_page(desc->page); ++ cache_page_release(desc); + if (res < 0) + break; + } while (!desc->eof); +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index ef55e9b1cd4e..3007b8945d38 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -791,6 +791,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_file *nf, *new; + struct inode *inode; + unsigned int hashval; ++ bool retry = true; + + /* FIXME: skip this if fh_dentry is already set? */ + status = fh_verify(rqstp, fhp, S_IFREG, +@@ -826,6 +827,11 @@ wait_for_construction: + + /* Did construction of this file fail? */ + if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { ++ if (!retry) { ++ status = nfserr_jukebox; ++ goto out; ++ } ++ retry = false; + nfsd_file_put_noref(nf); + goto retry; + } +diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c +index 2681c70283ce..e12409eca7cc 100644 +--- a/fs/nfsd/nfs4layouts.c ++++ b/fs/nfsd/nfs4layouts.c +@@ -675,7 +675,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) + + /* Client gets 2 lease periods to return it */ + cutoff = ktime_add_ns(task->tk_start, +- nn->nfsd4_lease * NSEC_PER_SEC * 2); ++ (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2); + + if (ktime_before(now, cutoff)) { + rpc_delay(task, HZ/100); /* 10 mili-seconds */ +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index 08f6eb2b73f8..1c82d7dd54df 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -6550,7 +6550,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + } + + if (fl_flags & FL_SLEEP) { +- nbl->nbl_time = jiffies; ++ nbl->nbl_time = get_seconds(); + spin_lock(&nn->blocked_locks_lock); + list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); + list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); +diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h +index 46f56afb6cb8..a080789b4d13 100644 +--- a/fs/nfsd/state.h ++++ b/fs/nfsd/state.h +@@ -605,7 +605,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) + struct nfsd4_blocked_lock { + struct list_head nbl_list; + struct list_head nbl_lru; +- unsigned long nbl_time; ++ time_t nbl_time; + struct file_lock nbl_lock; + struct knfsd_fh nbl_fh; + struct nfsd4_callback nbl_cb; +diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c +index cf423fea0c6f..fc38b9fe4549 100644 +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -975,6 +975,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, + host_err = vfs_iter_write(file, &iter, &pos, flags); + if (host_err < 0) + goto out_nfserr; ++ *cnt = host_err; + nfsdstats.io_write += *cnt; + fsnotify_modify(file); + +diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c +index 6c7388430ad3..d4359a1df3d5 100644 +--- a/fs/ntfs/inode.c ++++ b/fs/ntfs/inode.c +@@ -2899,18 +2899,12 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr) + ia_valid |= ATTR_MTIME | ATTR_CTIME; + } + } +- if (ia_valid & ATTR_ATIME) { +- vi->i_atime = timestamp_truncate(attr->ia_atime, +- vi); +- } +- if (ia_valid & ATTR_MTIME) { +- vi->i_mtime = timestamp_truncate(attr->ia_mtime, +- vi); +- } +- if (ia_valid & ATTR_CTIME) { +- vi->i_ctime = timestamp_truncate(attr->ia_ctime, +- vi); +- } ++ if (ia_valid & ATTR_ATIME) ++ vi->i_atime = attr->ia_atime; ++ if (ia_valid & ATTR_MTIME) ++ vi->i_mtime = attr->ia_mtime; ++ if (ia_valid & ATTR_CTIME) ++ vi->i_ctime = attr->ia_ctime; + mark_inode_dirty(vi); + out: + return err; +diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c +index 9876db52913a..6cd5e4924e4d 100644 +--- a/fs/ocfs2/file.c ++++ b/fs/ocfs2/file.c +@@ -2101,17 +2101,15 @@ static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) + static int ocfs2_inode_lock_for_extent_tree(struct inode *inode, + struct buffer_head **di_bh, + int meta_level, +- int overwrite_io, + int write_sem, + int wait) + { + int ret = 0; + + if (wait) +- ret = ocfs2_inode_lock(inode, NULL, meta_level); ++ ret = ocfs2_inode_lock(inode, di_bh, meta_level); + else +- ret = ocfs2_try_inode_lock(inode, +- overwrite_io ? NULL : di_bh, meta_level); ++ ret = ocfs2_try_inode_lock(inode, di_bh, meta_level); + if (ret < 0) + goto out; + +@@ -2136,6 +2134,7 @@ static int ocfs2_inode_lock_for_extent_tree(struct inode *inode, + + out_unlock: + brelse(*di_bh); ++ *di_bh = NULL; + ocfs2_inode_unlock(inode, meta_level); + out: + return ret; +@@ -2177,7 +2176,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file, + ret = ocfs2_inode_lock_for_extent_tree(inode, + &di_bh, + meta_level, +- overwrite_io, + write_sem, + wait); + if (ret < 0) { +@@ -2233,13 +2231,13 @@ static int ocfs2_prepare_inode_for_write(struct file *file, + &di_bh, + meta_level, + write_sem); ++ meta_level = 1; ++ write_sem = 1; + ret = ocfs2_inode_lock_for_extent_tree(inode, + &di_bh, + meta_level, +- overwrite_io, +- 1, ++ write_sem, + wait); +- write_sem = 1; + if (ret < 0) { + if (ret != -EAGAIN) + mlog_errno(ret); +diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c +index e235a635d9ec..15e4fa288475 100644 +--- a/fs/overlayfs/file.c ++++ b/fs/overlayfs/file.c +@@ -146,7 +146,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) + struct inode *inode = file_inode(file); + struct fd real; + const struct cred *old_cred; +- ssize_t ret; ++ loff_t ret; + + /* + * The two special cases below do not need to involve real fs, +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index 47a91c9733a5..7255e6a5838f 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -504,7 +504,13 @@ get: + if (err) + goto fail; + +- WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); ++ /* ++ * Directory inode is always on overlay st_dev. ++ * Non-dir with ovl_same_dev() could be on pseudo st_dev in case ++ * of xino bits overflow. ++ */ ++ WARN_ON_ONCE(S_ISDIR(stat.mode) && ++ dir->d_sb->s_dev != stat.dev); + ino = stat.ino; + } else if (xinobits && !OVL_TYPE_UPPER(type)) { + ino = ovl_remap_lower_ino(ino, xinobits, +diff --git a/fs/read_write.c b/fs/read_write.c +index 5bbf587f5bc1..7458fccc59e1 100644 +--- a/fs/read_write.c ++++ b/fs/read_write.c +@@ -1777,10 +1777,9 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len, + * else. Assume that the offsets have already been checked for block + * alignment. + * +- * For deduplication we always scale down to the previous block because we +- * can't meaningfully compare post-EOF contents. +- * +- * For clone we only link a partial EOF block above the destination file's EOF. ++ * For clone we only link a partial EOF block above or at the destination file's ++ * EOF. For deduplication we accept a partial EOF block only if it ends at the ++ * destination file's EOF (can not link it into the middle of a file). + * + * Shorten the request if possible. + */ +@@ -1796,8 +1795,7 @@ static int generic_remap_check_len(struct inode *inode_in, + if ((*len & blkmask) == 0) + return 0; + +- if ((remap_flags & REMAP_FILE_DEDUP) || +- pos_out + *len < i_size_read(inode_out)) ++ if (pos_out + *len < i_size_read(inode_out)) + new_len &= ~blkmask; + + if (new_len == *len) +diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c +index 0b98e3c8b461..6c0e19f7a21f 100644 +--- a/fs/ubifs/dir.c ++++ b/fs/ubifs/dir.c +@@ -228,6 +228,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, + if (nm.hash) { + ubifs_assert(c, fname_len(&nm) == 0); + ubifs_assert(c, fname_name(&nm) == NULL); ++ if (nm.hash & ~UBIFS_S_KEY_HASH_MASK) ++ goto done; /* ENOENT */ + dent_key_init_hash(c, &key, dir->i_ino, nm.hash); + err = ubifs_tnc_lookup_dh(c, &key, dent, nm.minor_hash); + } else { +diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c +index cd52585c8f4f..a771273fba7e 100644 +--- a/fs/ubifs/file.c ++++ b/fs/ubifs/file.c +@@ -786,7 +786,9 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, + + if (page_offset > end_index) + break; +- page = find_or_create_page(mapping, page_offset, ra_gfp_mask); ++ page = pagecache_get_page(mapping, page_offset, ++ FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT, ++ ra_gfp_mask); + if (!page) + break; + if (!PageUptodate(page)) +@@ -1078,18 +1080,12 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; +- if (attr->ia_valid & ATTR_ATIME) { +- inode->i_atime = timestamp_truncate(attr->ia_atime, +- inode); +- } +- if (attr->ia_valid & ATTR_MTIME) { +- inode->i_mtime = timestamp_truncate(attr->ia_mtime, +- inode); +- } +- if (attr->ia_valid & ATTR_CTIME) { +- inode->i_ctime = timestamp_truncate(attr->ia_ctime, +- inode); +- } ++ if (attr->ia_valid & ATTR_ATIME) ++ inode->i_atime = attr->ia_atime; ++ if (attr->ia_valid & ATTR_MTIME) ++ inode->i_mtime = attr->ia_mtime; ++ if (attr->ia_valid & ATTR_CTIME) ++ inode->i_ctime = attr->ia_ctime; + if (attr->ia_valid & ATTR_MODE) { + umode_t mode = attr->ia_mode; + +diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c +index 5dc5abca11c7..eeb1be259888 100644 +--- a/fs/ubifs/ioctl.c ++++ b/fs/ubifs/ioctl.c +@@ -113,7 +113,8 @@ static int setflags(struct inode *inode, int flags) + if (err) + goto out_unlock; + +- ui->flags = ioctl2ubifs(flags); ++ ui->flags &= ~ioctl2ubifs(UBIFS_SUPPORTED_IOCTL_FLAGS); ++ ui->flags |= ioctl2ubifs(flags); + ubifs_set_inode_flags(inode); + inode->i_ctime = current_time(inode); + release = ui->dirty; +diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c +index a551eb3e9b89..6681c18e52b8 100644 +--- a/fs/ubifs/sb.c ++++ b/fs/ubifs/sb.c +@@ -161,7 +161,7 @@ static int create_default_filesystem(struct ubifs_info *c) + sup = kzalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_KERNEL); + mst = kzalloc(c->mst_node_alsz, GFP_KERNEL); + idx_node_size = ubifs_idx_node_sz(c, 1); +- idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL); ++ idx = kzalloc(ALIGN(idx_node_size, c->min_io_size), GFP_KERNEL); + ino = kzalloc(ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size), GFP_KERNEL); + cs = kzalloc(ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size), GFP_KERNEL); + +diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c +index 5e1e8ec0589e..7fc2f3f07c16 100644 +--- a/fs/ubifs/super.c ++++ b/fs/ubifs/super.c +@@ -1599,6 +1599,7 @@ out_free: + vfree(c->ileb_buf); + vfree(c->sbuf); + kfree(c->bottom_up_buf); ++ kfree(c->sup_node); + ubifs_debugging_exit(c); + return err; + } +@@ -1641,6 +1642,7 @@ static void ubifs_umount(struct ubifs_info *c) + vfree(c->ileb_buf); + vfree(c->sbuf); + kfree(c->bottom_up_buf); ++ kfree(c->sup_node); + ubifs_debugging_exit(c); + } + +diff --git a/fs/utimes.c b/fs/utimes.c +index 1ba3f7883870..090739322463 100644 +--- a/fs/utimes.c ++++ b/fs/utimes.c +@@ -36,14 +36,14 @@ static int utimes_common(const struct path *path, struct timespec64 *times) + if (times[0].tv_nsec == UTIME_OMIT) + newattrs.ia_valid &= ~ATTR_ATIME; + else if (times[0].tv_nsec != UTIME_NOW) { +- newattrs.ia_atime = timestamp_truncate(times[0], inode); ++ newattrs.ia_atime = times[0]; + newattrs.ia_valid |= ATTR_ATIME_SET; + } + + if (times[1].tv_nsec == UTIME_OMIT) + newattrs.ia_valid &= ~ATTR_MTIME; + else if (times[1].tv_nsec != UTIME_NOW) { +- newattrs.ia_mtime = timestamp_truncate(times[1], inode); ++ newattrs.ia_mtime = times[1]; + newattrs.ia_valid |= ATTR_MTIME_SET; + } + /* +diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h +index 04c0644006fd..c716ea81e653 100644 +--- a/include/asm-generic/tlb.h ++++ b/include/asm-generic/tlb.h +@@ -137,13 +137,6 @@ + * When used, an architecture is expected to provide __tlb_remove_table() + * which does the actual freeing of these pages. + * +- * HAVE_RCU_TABLE_NO_INVALIDATE +- * +- * This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before +- * freeing the page-table pages. This can be avoided if you use +- * HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux +- * page-tables natively. +- * + * MMU_GATHER_NO_RANGE + * + * Use this if your architecture lacks an efficient flush_tlb_range(). +@@ -189,8 +182,23 @@ struct mmu_table_batch { + + extern void tlb_remove_table(struct mmu_gather *tlb, void *table); + ++/* ++ * This allows an architecture that does not use the linux page-tables for ++ * hardware to skip the TLBI when freeing page tables. ++ */ ++#ifndef tlb_needs_table_invalidate ++#define tlb_needs_table_invalidate() (true) ++#endif ++ ++#else ++ ++#ifdef tlb_needs_table_invalidate ++#error tlb_needs_table_invalidate() requires HAVE_RCU_TABLE_FREE + #endif + ++#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ ++ ++ + #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER + /* + * If we can't allocate a page to make a big batch of page pointers +diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h +index 97967ce06de3..f88197c1ffc2 100644 +--- a/include/linux/backing-dev.h ++++ b/include/linux/backing-dev.h +@@ -13,6 +13,7 @@ + #include <linux/fs.h> + #include <linux/sched.h> + #include <linux/blkdev.h> ++#include <linux/device.h> + #include <linux/writeback.h> + #include <linux/blk-cgroup.h> + #include <linux/backing-dev-defs.h> +@@ -504,4 +505,13 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) + (1 << WB_async_congested)); + } + ++extern const char *bdi_unknown_name; ++ ++static inline const char *bdi_dev_name(struct backing_dev_info *bdi) ++{ ++ if (!bdi || !bdi->dev) ++ return bdi_unknown_name; ++ return dev_name(bdi->dev); ++} ++ + #endif /* _LINUX_BACKING_DEV_H */ +diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h +index 31b1b0e03df8..018dce868de6 100644 +--- a/include/linux/cpufreq.h ++++ b/include/linux/cpufreq.h +@@ -148,6 +148,20 @@ struct cpufreq_policy { + struct notifier_block nb_max; + }; + ++/* ++ * Used for passing new cpufreq policy data to the cpufreq driver's ->verify() ++ * callback for sanitization. That callback is only expected to modify the min ++ * and max values, if necessary, and specifically it must not update the ++ * frequency table. ++ */ ++struct cpufreq_policy_data { ++ struct cpufreq_cpuinfo cpuinfo; ++ struct cpufreq_frequency_table *freq_table; ++ unsigned int cpu; ++ unsigned int min; /* in kHz */ ++ unsigned int max; /* in kHz */ ++}; ++ + struct cpufreq_freqs { + struct cpufreq_policy *policy; + unsigned int old; +@@ -201,8 +215,6 @@ u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); + struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu); + void cpufreq_cpu_release(struct cpufreq_policy *policy); + int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); +-int cpufreq_set_policy(struct cpufreq_policy *policy, +- struct cpufreq_policy *new_policy); + void refresh_frequency_limits(struct cpufreq_policy *policy); + void cpufreq_update_policy(unsigned int cpu); + void cpufreq_update_limits(unsigned int cpu); +@@ -284,7 +296,7 @@ struct cpufreq_driver { + + /* needed by all drivers */ + int (*init)(struct cpufreq_policy *policy); +- int (*verify)(struct cpufreq_policy *policy); ++ int (*verify)(struct cpufreq_policy_data *policy); + + /* define one out of two */ + int (*setpolicy)(struct cpufreq_policy *policy); +@@ -415,8 +427,9 @@ static inline int cpufreq_thermal_control_enabled(struct cpufreq_driver *drv) + (drv->flags & CPUFREQ_IS_COOLING_DEV); + } + +-static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, +- unsigned int min, unsigned int max) ++static inline void cpufreq_verify_within_limits(struct cpufreq_policy_data *policy, ++ unsigned int min, ++ unsigned int max) + { + if (policy->min < min) + policy->min = min; +@@ -432,10 +445,10 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, + } + + static inline void +-cpufreq_verify_within_cpu_limits(struct cpufreq_policy *policy) ++cpufreq_verify_within_cpu_limits(struct cpufreq_policy_data *policy) + { + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, +- policy->cpuinfo.max_freq); ++ policy->cpuinfo.max_freq); + } + + #ifdef CONFIG_CPU_FREQ +@@ -513,6 +526,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, + * CPUFREQ GOVERNORS * + *********************************************************************/ + ++#define CPUFREQ_POLICY_UNKNOWN (0) + /* + * If (cpufreq_driver->target) exists, the ->governor decides what frequency + * within the limits is used. If (cpufreq_driver->setpolicy> exists, these +@@ -684,9 +698,9 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, + int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table); + +-int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, ++int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, + struct cpufreq_frequency_table *table); +-int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy); ++int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); + + int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, + unsigned int target_freq, +diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h +index ffcc7724ca21..dc4fd8a6644d 100644 +--- a/include/linux/eventfd.h ++++ b/include/linux/eventfd.h +@@ -12,6 +12,8 @@ + #include <linux/fcntl.h> + #include <linux/wait.h> + #include <linux/err.h> ++#include <linux/percpu-defs.h> ++#include <linux/percpu.h> + + /* + * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining +@@ -40,6 +42,13 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); + int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, + __u64 *cnt); + ++DECLARE_PER_CPU(int, eventfd_wake_count); ++ ++static inline bool eventfd_signal_count(void) ++{ ++ return this_cpu_read(eventfd_wake_count); ++} ++ + #else /* CONFIG_EVENTFD */ + + /* +@@ -68,6 +77,11 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, + return -ENOSYS; + } + ++static inline bool eventfd_signal_count(void) ++{ ++ return false; ++} ++ + #endif + + #endif /* _LINUX_EVENTFD_H */ +diff --git a/include/linux/irq.h b/include/linux/irq.h +index fb301cf29148..f8755e5fcd74 100644 +--- a/include/linux/irq.h ++++ b/include/linux/irq.h +@@ -209,6 +209,8 @@ struct irq_data { + * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target + * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set + * IRQD_CAN_RESERVE - Can use reservation mode ++ * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change ++ * required + */ + enum { + IRQD_TRIGGER_MASK = 0xf, +@@ -231,6 +233,7 @@ enum { + IRQD_SINGLE_TARGET = (1 << 24), + IRQD_DEFAULT_TRIGGER_SET = (1 << 25), + IRQD_CAN_RESERVE = (1 << 26), ++ IRQD_MSI_NOMASK_QUIRK = (1 << 27), + }; + + #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) +@@ -390,6 +393,21 @@ static inline bool irqd_can_reserve(struct irq_data *d) + return __irqd_to_state(d) & IRQD_CAN_RESERVE; + } + ++static inline void irqd_set_msi_nomask_quirk(struct irq_data *d) ++{ ++ __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK; ++} ++ ++static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d) ++{ ++ __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK; ++} ++ ++static inline bool irqd_msi_nomask_quirk(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; ++} ++ + #undef __irqd_to_state + + static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) +diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h +index 583e7abd07f9..aba5ada373d6 100644 +--- a/include/linux/irqdomain.h ++++ b/include/linux/irqdomain.h +@@ -205,6 +205,13 @@ enum { + /* Irq domain implements MSI remapping */ + IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), + ++ /* ++ * Quirk to handle MSI implementations which do not provide ++ * masking. Currently known to affect x86, but partially ++ * handled in core code. ++ */ ++ IRQ_DOMAIN_MSI_NOMASK_QUIRK = (1 << 6), ++ + /* + * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved + * for implementation specific purposes and ignored by the +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index d41c521a39da..b81f0f1ded5f 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -204,7 +204,7 @@ struct kvm_async_pf { + struct list_head queue; + struct kvm_vcpu *vcpu; + struct mm_struct *mm; +- gva_t gva; ++ gpa_t cr2_or_gpa; + unsigned long addr; + struct kvm_arch_async_pf arch; + bool wakeup_all; +@@ -212,8 +212,8 @@ struct kvm_async_pf { + + void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); + void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); +-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, +- struct kvm_arch_async_pf *arch); ++int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ++ unsigned long hva, struct kvm_arch_async_pf *arch); + int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); + #endif + +@@ -728,6 +728,7 @@ void kvm_set_pfn_dirty(kvm_pfn_t pfn); + void kvm_set_pfn_accessed(kvm_pfn_t pfn); + void kvm_get_pfn(kvm_pfn_t pfn); + ++void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache); + int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, + int len); + int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, +@@ -750,7 +751,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); + int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); + struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); + bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); +-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); ++unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); + void mark_page_dirty(struct kvm *kvm, gfn_t gfn); + + struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); +@@ -758,8 +759,12 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn + kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); + kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); + int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); ++int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, ++ struct gfn_to_pfn_cache *cache, bool atomic); + struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn); + void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); ++int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, ++ struct gfn_to_pfn_cache *cache, bool dirty, bool atomic); + unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); + unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); + int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, +diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h +index bde5374ae021..2382cb58969d 100644 +--- a/include/linux/kvm_types.h ++++ b/include/linux/kvm_types.h +@@ -18,7 +18,7 @@ struct kvm_memslots; + + enum kvm_mr_change; + +-#include <asm/types.h> ++#include <linux/types.h> + + /* + * Address types: +@@ -49,4 +49,11 @@ struct gfn_to_hva_cache { + struct kvm_memory_slot *memslot; + }; + ++struct gfn_to_pfn_cache { ++ u64 generation; ++ gfn_t gfn; ++ kvm_pfn_t pfn; ++ bool dirty; ++}; ++ + #endif /* __KVM_TYPES_H__ */ +diff --git a/include/linux/mfd/rohm-bd70528.h b/include/linux/mfd/rohm-bd70528.h +index 1013e60c5b25..b0109ee6dae2 100644 +--- a/include/linux/mfd/rohm-bd70528.h ++++ b/include/linux/mfd/rohm-bd70528.h +@@ -317,7 +317,7 @@ enum { + #define BD70528_MASK_RTC_MINUTE 0x7f + #define BD70528_MASK_RTC_HOUR_24H 0x80 + #define BD70528_MASK_RTC_HOUR_PM 0x20 +-#define BD70528_MASK_RTC_HOUR 0x1f ++#define BD70528_MASK_RTC_HOUR 0x3f + #define BD70528_MASK_RTC_DAY 0x3f + #define BD70528_MASK_RTC_WEEK 0x07 + #define BD70528_MASK_RTC_MONTH 0x1f +diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h +index 0836fe232f97..0cdc8d12785a 100644 +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -1417,14 +1417,15 @@ struct mlx5_ifc_cmd_hca_cap_bits { + + u8 reserved_at_440[0x20]; + +- u8 tls[0x1]; +- u8 reserved_at_461[0x2]; ++ u8 reserved_at_460[0x3]; + u8 log_max_uctx[0x5]; + u8 reserved_at_468[0x3]; + u8 log_max_umem[0x5]; + u8 max_num_eqs[0x10]; + +- u8 reserved_at_480[0x3]; ++ u8 reserved_at_480[0x1]; ++ u8 tls_tx[0x1]; ++ u8 reserved_at_482[0x1]; + u8 log_max_l2_table[0x5]; + u8 reserved_at_488[0x8]; + u8 log_uar_page_sz[0x10]; +diff --git a/include/linux/padata.h b/include/linux/padata.h +index 23717eeaad23..cccab7a59787 100644 +--- a/include/linux/padata.h ++++ b/include/linux/padata.h +@@ -9,6 +9,7 @@ + #ifndef PADATA_H + #define PADATA_H + ++#include <linux/compiler_types.h> + #include <linux/workqueue.h> + #include <linux/spinlock.h> + #include <linux/list.h> +@@ -98,7 +99,7 @@ struct padata_cpumask { + * struct parallel_data - Internal control structure, covers everything + * that depends on the cpumask in use. + * +- * @pinst: padata instance. ++ * @sh: padata_shell object. + * @pqueue: percpu padata queues used for parallelization. + * @squeue: percpu padata queues used for serialuzation. + * @reorder_objects: Number of objects waiting in the reorder queues. +@@ -111,7 +112,7 @@ struct padata_cpumask { + * @lock: Reorder lock. + */ + struct parallel_data { +- struct padata_instance *pinst; ++ struct padata_shell *ps; + struct padata_parallel_queue __percpu *pqueue; + struct padata_serial_queue __percpu *squeue; + atomic_t reorder_objects; +@@ -124,14 +125,33 @@ struct parallel_data { + spinlock_t lock ____cacheline_aligned; + }; + ++/** ++ * struct padata_shell - Wrapper around struct parallel_data, its ++ * purpose is to allow the underlying control structure to be replaced ++ * on the fly using RCU. ++ * ++ * @pinst: padat instance. ++ * @pd: Actual parallel_data structure which may be substituted on the fly. ++ * @opd: Pointer to old pd to be freed by padata_replace. ++ * @list: List entry in padata_instance list. ++ */ ++struct padata_shell { ++ struct padata_instance *pinst; ++ struct parallel_data __rcu *pd; ++ struct parallel_data *opd; ++ struct list_head list; ++}; ++ + /** + * struct padata_instance - The overall control structure. + * + * @cpu_notifier: cpu hotplug notifier. + * @parallel_wq: The workqueue used for parallel work. + * @serial_wq: The workqueue used for serial work. +- * @pd: The internal control structure. ++ * @pslist: List of padata_shell objects attached to this instance. + * @cpumask: User supplied cpumasks for parallel and serial works. ++ * @rcpumask: Actual cpumasks based on user cpumask and cpu_online_mask. ++ * @omask: Temporary storage used to compute the notification mask. + * @cpumask_change_notifier: Notifiers chain for user-defined notify + * callbacks that will be called when either @pcpu or @cbcpu + * or both cpumasks change. +@@ -143,8 +163,10 @@ struct padata_instance { + struct hlist_node node; + struct workqueue_struct *parallel_wq; + struct workqueue_struct *serial_wq; +- struct parallel_data *pd; ++ struct list_head pslist; + struct padata_cpumask cpumask; ++ struct padata_cpumask rcpumask; ++ cpumask_var_t omask; + struct blocking_notifier_head cpumask_change_notifier; + struct kobject kobj; + struct mutex lock; +@@ -156,7 +178,9 @@ struct padata_instance { + + extern struct padata_instance *padata_alloc_possible(const char *name); + extern void padata_free(struct padata_instance *pinst); +-extern int padata_do_parallel(struct padata_instance *pinst, ++extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst); ++extern void padata_free_shell(struct padata_shell *ps); ++extern int padata_do_parallel(struct padata_shell *ps, + struct padata_priv *padata, int *cb_cpu); + extern void padata_do_serial(struct padata_priv *padata); + extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, +diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h +index a6fabd865211..176bfbd52d97 100644 +--- a/include/linux/percpu-defs.h ++++ b/include/linux/percpu-defs.h +@@ -175,8 +175,7 @@ + * Declaration/definition used for per-CPU variables that should be accessed + * as decrypted when memory encryption is enabled in the guest. + */ +-#if defined(CONFIG_VIRTUALIZATION) && defined(CONFIG_AMD_MEM_ENCRYPT) +- ++#ifdef CONFIG_AMD_MEM_ENCRYPT + #define DECLARE_PER_CPU_DECRYPTED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "..decrypted") + +diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h +index 337a46391527..6a92fd3105a3 100644 +--- a/include/linux/regulator/consumer.h ++++ b/include/linux/regulator/consumer.h +@@ -287,6 +287,8 @@ void regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers, + const char *const *supply_names, + unsigned int num_supplies); + ++bool regulator_is_equal(struct regulator *reg1, struct regulator *reg2); ++ + #else + + /* +@@ -593,6 +595,11 @@ regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers, + { + } + ++static inline bool ++regulator_is_equal(struct regulator *reg1, struct regulator *reg2) ++{ ++ return false; ++} + #endif + + static inline int regulator_set_voltage_triplet(struct regulator *regulator, +diff --git a/include/media/v4l2-rect.h b/include/media/v4l2-rect.h +index c86474dc7b55..8800a640c224 100644 +--- a/include/media/v4l2-rect.h ++++ b/include/media/v4l2-rect.h +@@ -63,10 +63,10 @@ static inline void v4l2_rect_map_inside(struct v4l2_rect *r, + r->left = boundary->left; + if (r->top < boundary->top) + r->top = boundary->top; +- if (r->left + r->width > boundary->width) +- r->left = boundary->width - r->width; +- if (r->top + r->height > boundary->height) +- r->top = boundary->height - r->height; ++ if (r->left + r->width > boundary->left + boundary->width) ++ r->left = boundary->left + boundary->width - r->width; ++ if (r->top + r->height > boundary->top + boundary->height) ++ r->top = boundary->top + boundary->height - r->height; + } + + /** +diff --git a/include/net/ipx.h b/include/net/ipx.h +index baf090390998..9d1342807b59 100644 +--- a/include/net/ipx.h ++++ b/include/net/ipx.h +@@ -47,11 +47,6 @@ struct ipxhdr { + /* From af_ipx.c */ + extern int sysctl_ipx_pprop_broadcasting; + +-static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb) +-{ +- return (struct ipxhdr *)skb_transport_header(skb); +-} +- + struct ipx_interface { + /* IPX address */ + __be32 if_netnum; +diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h +index e05b95e83d5a..fb9dce4c6928 100644 +--- a/include/sound/hdaudio.h ++++ b/include/sound/hdaudio.h +@@ -8,6 +8,7 @@ + + #include <linux/device.h> + #include <linux/interrupt.h> ++#include <linux/io.h> + #include <linux/pm_runtime.h> + #include <linux/timecounter.h> + #include <sound/core.h> +@@ -330,6 +331,7 @@ struct hdac_bus { + bool chip_init:1; /* h/w initialized */ + + /* behavior flags */ ++ bool aligned_mmio:1; /* aligned MMIO access */ + bool sync_write:1; /* sync after verb write */ + bool use_posbuf:1; /* use position buffer */ + bool snoop:1; /* enable snooping */ +@@ -405,34 +407,61 @@ void snd_hdac_bus_free_stream_pages(struct hdac_bus *bus); + unsigned int snd_hdac_aligned_read(void __iomem *addr, unsigned int mask); + void snd_hdac_aligned_write(unsigned int val, void __iomem *addr, + unsigned int mask); +-#define snd_hdac_reg_writeb(v, addr) snd_hdac_aligned_write(v, addr, 0xff) +-#define snd_hdac_reg_writew(v, addr) snd_hdac_aligned_write(v, addr, 0xffff) +-#define snd_hdac_reg_readb(addr) snd_hdac_aligned_read(addr, 0xff) +-#define snd_hdac_reg_readw(addr) snd_hdac_aligned_read(addr, 0xffff) +-#else /* CONFIG_SND_HDA_ALIGNED_MMIO */ +-#define snd_hdac_reg_writeb(val, addr) writeb(val, addr) +-#define snd_hdac_reg_writew(val, addr) writew(val, addr) +-#define snd_hdac_reg_readb(addr) readb(addr) +-#define snd_hdac_reg_readw(addr) readw(addr) +-#endif /* CONFIG_SND_HDA_ALIGNED_MMIO */ +-#define snd_hdac_reg_writel(val, addr) writel(val, addr) +-#define snd_hdac_reg_readl(addr) readl(addr) ++#define snd_hdac_aligned_mmio(bus) (bus)->aligned_mmio ++#else ++#define snd_hdac_aligned_mmio(bus) false ++#define snd_hdac_aligned_read(addr, mask) 0 ++#define snd_hdac_aligned_write(val, addr, mask) do {} while (0) ++#endif ++ ++static inline void snd_hdac_reg_writeb(struct hdac_bus *bus, void __iomem *addr, ++ u8 val) ++{ ++ if (snd_hdac_aligned_mmio(bus)) ++ snd_hdac_aligned_write(val, addr, 0xff); ++ else ++ writeb(val, addr); ++} ++ ++static inline void snd_hdac_reg_writew(struct hdac_bus *bus, void __iomem *addr, ++ u16 val) ++{ ++ if (snd_hdac_aligned_mmio(bus)) ++ snd_hdac_aligned_write(val, addr, 0xffff); ++ else ++ writew(val, addr); ++} ++ ++static inline u8 snd_hdac_reg_readb(struct hdac_bus *bus, void __iomem *addr) ++{ ++ return snd_hdac_aligned_mmio(bus) ? ++ snd_hdac_aligned_read(addr, 0xff) : readb(addr); ++} ++ ++static inline u16 snd_hdac_reg_readw(struct hdac_bus *bus, void __iomem *addr) ++{ ++ return snd_hdac_aligned_mmio(bus) ? ++ snd_hdac_aligned_read(addr, 0xffff) : readw(addr); ++} ++ ++#define snd_hdac_reg_writel(bus, addr, val) writel(val, addr) ++#define snd_hdac_reg_readl(bus, addr) readl(addr) + + /* + * macros for easy use + */ + #define _snd_hdac_chip_writeb(chip, reg, value) \ +- snd_hdac_reg_writeb(value, (chip)->remap_addr + (reg)) ++ snd_hdac_reg_writeb(chip, (chip)->remap_addr + (reg), value) + #define _snd_hdac_chip_readb(chip, reg) \ +- snd_hdac_reg_readb((chip)->remap_addr + (reg)) ++ snd_hdac_reg_readb(chip, (chip)->remap_addr + (reg)) + #define _snd_hdac_chip_writew(chip, reg, value) \ +- snd_hdac_reg_writew(value, (chip)->remap_addr + (reg)) ++ snd_hdac_reg_writew(chip, (chip)->remap_addr + (reg), value) + #define _snd_hdac_chip_readw(chip, reg) \ +- snd_hdac_reg_readw((chip)->remap_addr + (reg)) ++ snd_hdac_reg_readw(chip, (chip)->remap_addr + (reg)) + #define _snd_hdac_chip_writel(chip, reg, value) \ +- snd_hdac_reg_writel(value, (chip)->remap_addr + (reg)) ++ snd_hdac_reg_writel(chip, (chip)->remap_addr + (reg), value) + #define _snd_hdac_chip_readl(chip, reg) \ +- snd_hdac_reg_readl((chip)->remap_addr + (reg)) ++ snd_hdac_reg_readl(chip, (chip)->remap_addr + (reg)) + + /* read/write a register, pass without AZX_REG_ prefix */ + #define snd_hdac_chip_writel(chip, reg, value) \ +@@ -540,17 +569,17 @@ int snd_hdac_get_stream_stripe_ctl(struct hdac_bus *bus, + */ + /* read/write a register, pass without AZX_REG_ prefix */ + #define snd_hdac_stream_writel(dev, reg, value) \ +- snd_hdac_reg_writel(value, (dev)->sd_addr + AZX_REG_ ## reg) ++ snd_hdac_reg_writel((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg, value) + #define snd_hdac_stream_writew(dev, reg, value) \ +- snd_hdac_reg_writew(value, (dev)->sd_addr + AZX_REG_ ## reg) ++ snd_hdac_reg_writew((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg, value) + #define snd_hdac_stream_writeb(dev, reg, value) \ +- snd_hdac_reg_writeb(value, (dev)->sd_addr + AZX_REG_ ## reg) ++ snd_hdac_reg_writeb((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg, value) + #define snd_hdac_stream_readl(dev, reg) \ +- snd_hdac_reg_readl((dev)->sd_addr + AZX_REG_ ## reg) ++ snd_hdac_reg_readl((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg) + #define snd_hdac_stream_readw(dev, reg) \ +- snd_hdac_reg_readw((dev)->sd_addr + AZX_REG_ ## reg) ++ snd_hdac_reg_readw((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg) + #define snd_hdac_stream_readb(dev, reg) \ +- snd_hdac_reg_readb((dev)->sd_addr + AZX_REG_ ## reg) ++ snd_hdac_reg_readb((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg) + + /* update a register, pass without AZX_REG_ prefix */ + #define snd_hdac_stream_updatel(dev, reg, mask, val) \ +diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h +index c2ce6480b4b1..66282552db20 100644 +--- a/include/trace/events/writeback.h ++++ b/include/trace/events/writeback.h +@@ -67,8 +67,8 @@ DECLARE_EVENT_CLASS(writeback_page_template, + + TP_fast_assign( + strscpy_pad(__entry->name, +- mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", +- 32); ++ bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : ++ NULL), 32); + __entry->ino = mapping ? mapping->host->i_ino : 0; + __entry->index = page->index; + ), +@@ -111,8 +111,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, + struct backing_dev_info *bdi = inode_to_bdi(inode); + + /* may be called for files on pseudo FSes w/ unregistered bdi */ +- strscpy_pad(__entry->name, +- bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); ++ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); + __entry->ino = inode->i_ino; + __entry->state = inode->i_state; + __entry->flags = flags; +@@ -193,7 +192,7 @@ TRACE_EVENT(inode_foreign_history, + ), + + TP_fast_assign( +- strncpy(__entry->name, dev_name(inode_to_bdi(inode)->dev), 32); ++ strncpy(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); + __entry->ino = inode->i_ino; + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); + __entry->history = history; +@@ -222,7 +221,7 @@ TRACE_EVENT(inode_switch_wbs, + ), + + TP_fast_assign( +- strncpy(__entry->name, dev_name(old_wb->bdi->dev), 32); ++ strncpy(__entry->name, bdi_dev_name(old_wb->bdi), 32); + __entry->ino = inode->i_ino; + __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb); + __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb); +@@ -255,7 +254,7 @@ TRACE_EVENT(track_foreign_dirty, + struct address_space *mapping = page_mapping(page); + struct inode *inode = mapping ? mapping->host : NULL; + +- strncpy(__entry->name, dev_name(wb->bdi->dev), 32); ++ strncpy(__entry->name, bdi_dev_name(wb->bdi), 32); + __entry->bdi_id = wb->bdi->id; + __entry->ino = inode ? inode->i_ino : 0; + __entry->memcg_id = wb->memcg_css->id; +@@ -288,7 +287,7 @@ TRACE_EVENT(flush_foreign, + ), + + TP_fast_assign( +- strncpy(__entry->name, dev_name(wb->bdi->dev), 32); ++ strncpy(__entry->name, bdi_dev_name(wb->bdi), 32); + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); + __entry->frn_bdi_id = frn_bdi_id; + __entry->frn_memcg_id = frn_memcg_id; +@@ -318,7 +317,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, + + TP_fast_assign( + strscpy_pad(__entry->name, +- dev_name(inode_to_bdi(inode)->dev), 32); ++ bdi_dev_name(inode_to_bdi(inode)), 32); + __entry->ino = inode->i_ino; + __entry->sync_mode = wbc->sync_mode; + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); +@@ -361,9 +360,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, + __field(unsigned int, cgroup_ino) + ), + TP_fast_assign( +- strscpy_pad(__entry->name, +- wb->bdi->dev ? dev_name(wb->bdi->dev) : +- "(unknown)", 32); ++ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); + __entry->nr_pages = work->nr_pages; + __entry->sb_dev = work->sb ? work->sb->s_dev : 0; + __entry->sync_mode = work->sync_mode; +@@ -416,7 +413,7 @@ DECLARE_EVENT_CLASS(writeback_class, + __field(unsigned int, cgroup_ino) + ), + TP_fast_assign( +- strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); ++ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); + ), + TP_printk("bdi %s: cgroup_ino=%u", +@@ -438,7 +435,7 @@ TRACE_EVENT(writeback_bdi_register, + __array(char, name, 32) + ), + TP_fast_assign( +- strscpy_pad(__entry->name, dev_name(bdi->dev), 32); ++ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); + ), + TP_printk("bdi %s", + __entry->name +@@ -463,7 +460,7 @@ DECLARE_EVENT_CLASS(wbc_class, + ), + + TP_fast_assign( +- strscpy_pad(__entry->name, dev_name(bdi->dev), 32); ++ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); + __entry->nr_to_write = wbc->nr_to_write; + __entry->pages_skipped = wbc->pages_skipped; + __entry->sync_mode = wbc->sync_mode; +@@ -514,7 +511,7 @@ TRACE_EVENT(writeback_queue_io, + ), + TP_fast_assign( + unsigned long *older_than_this = work->older_than_this; +- strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); ++ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); + __entry->older = older_than_this ? *older_than_this : 0; + __entry->age = older_than_this ? + (jiffies - *older_than_this) * 1000 / HZ : -1; +@@ -600,7 +597,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, + ), + + TP_fast_assign( +- strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); ++ strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); + __entry->write_bw = KBps(wb->write_bandwidth); + __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); + __entry->dirty_rate = KBps(dirty_rate); +@@ -665,7 +662,7 @@ TRACE_EVENT(balance_dirty_pages, + + TP_fast_assign( + unsigned long freerun = (thresh + bg_thresh) / 2; +- strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); ++ strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); + + __entry->limit = global_wb_domain.dirty_limit; + __entry->setpoint = (global_wb_domain.dirty_limit + +@@ -726,7 +723,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue, + + TP_fast_assign( + strscpy_pad(__entry->name, +- dev_name(inode_to_bdi(inode)->dev), 32); ++ bdi_dev_name(inode_to_bdi(inode)), 32); + __entry->ino = inode->i_ino; + __entry->state = inode->i_state; + __entry->dirtied_when = inode->dirtied_when; +@@ -800,7 +797,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, + + TP_fast_assign( + strscpy_pad(__entry->name, +- dev_name(inode_to_bdi(inode)->dev), 32); ++ bdi_dev_name(inode_to_bdi(inode)), 32); + __entry->ino = inode->i_ino; + __entry->state = inode->i_state; + __entry->dirtied_when = inode->dirtied_when; +diff --git a/ipc/msg.c b/ipc/msg.c +index 8dec945fa030..767587ab45a3 100644 +--- a/ipc/msg.c ++++ b/ipc/msg.c +@@ -377,7 +377,7 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) + * NOTE: no locks must be held, the rwsem is taken inside this function. + */ + static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, +- struct msqid64_ds *msqid64) ++ struct ipc64_perm *perm, int msg_qbytes) + { + struct kern_ipc_perm *ipcp; + struct msg_queue *msq; +@@ -387,7 +387,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, + rcu_read_lock(); + + ipcp = ipcctl_obtain_check(ns, &msg_ids(ns), msqid, cmd, +- &msqid64->msg_perm, msqid64->msg_qbytes); ++ perm, msg_qbytes); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + goto out_unlock1; +@@ -409,18 +409,18 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, + { + DEFINE_WAKE_Q(wake_q); + +- if (msqid64->msg_qbytes > ns->msg_ctlmnb && ++ if (msg_qbytes > ns->msg_ctlmnb && + !capable(CAP_SYS_RESOURCE)) { + err = -EPERM; + goto out_unlock1; + } + + ipc_lock_object(&msq->q_perm); +- err = ipc_update_perm(&msqid64->msg_perm, ipcp); ++ err = ipc_update_perm(perm, ipcp); + if (err) + goto out_unlock0; + +- msq->q_qbytes = msqid64->msg_qbytes; ++ msq->q_qbytes = msg_qbytes; + + msq->q_ctime = ktime_get_real_seconds(); + /* +@@ -601,9 +601,10 @@ static long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf, int ver + case IPC_SET: + if (copy_msqid_from_user(&msqid64, buf, version)) + return -EFAULT; +- /* fallthru */ ++ return msgctl_down(ns, msqid, cmd, &msqid64.msg_perm, ++ msqid64.msg_qbytes); + case IPC_RMID: +- return msgctl_down(ns, msqid, cmd, &msqid64); ++ return msgctl_down(ns, msqid, cmd, NULL, 0); + default: + return -EINVAL; + } +@@ -735,9 +736,9 @@ static long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr, int versio + case IPC_SET: + if (copy_compat_msqid_from_user(&msqid64, uptr, version)) + return -EFAULT; +- /* fallthru */ ++ return msgctl_down(ns, msqid, cmd, &msqid64.msg_perm, msqid64.msg_qbytes); + case IPC_RMID: +- return msgctl_down(ns, msqid, cmd, &msqid64); ++ return msgctl_down(ns, msqid, cmd, NULL, 0); + default: + return -EINVAL; + } +diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c +index 3d3d61b5985b..b4b6b77f309c 100644 +--- a/kernel/bpf/devmap.c ++++ b/kernel/bpf/devmap.c +@@ -293,7 +293,8 @@ struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key) + struct hlist_head *head = dev_map_index_hash(dtab, key); + struct bpf_dtab_netdev *dev; + +- hlist_for_each_entry_rcu(dev, head, index_hlist) ++ hlist_for_each_entry_rcu(dev, head, index_hlist, ++ lockdep_is_held(&dtab->index_lock)) + if (dev->idx == key) + return dev; + +diff --git a/kernel/events/core.c b/kernel/events/core.c +index 6c829e22bad3..15b123bdcaf5 100644 +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -5823,7 +5823,15 @@ accounting: + */ + user_lock_limit *= num_online_cpus(); + +- user_locked = atomic_long_read(&user->locked_vm) + user_extra; ++ user_locked = atomic_long_read(&user->locked_vm); ++ ++ /* ++ * sysctl_perf_event_mlock may have changed, so that ++ * user->locked_vm > user_lock_limit ++ */ ++ if (user_locked > user_lock_limit) ++ user_locked = user_lock_limit; ++ user_locked += user_extra; + + if (user_locked <= user_lock_limit) { + /* charge all to locked_vm */ +diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c +index c1eccd4f6520..a949bd39e343 100644 +--- a/kernel/irq/debugfs.c ++++ b/kernel/irq/debugfs.c +@@ -114,6 +114,7 @@ static const struct irq_bit_descr irqdata_states[] = { + BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), + BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), + BIT_MASK_DESCR(IRQD_CAN_RESERVE), ++ BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK), + + BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), + +diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c +index dd822fd8a7d5..480df3659720 100644 +--- a/kernel/irq/irqdomain.c ++++ b/kernel/irq/irqdomain.c +@@ -1459,6 +1459,7 @@ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) + if (rv) { + /* Restore the original irq_data. */ + *root_irq_data = *child_irq_data; ++ kfree(child_irq_data); + goto error; + } + +diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c +index ad26fbcfbfc8..eb95f6106a1e 100644 +--- a/kernel/irq/msi.c ++++ b/kernel/irq/msi.c +@@ -453,8 +453,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, + continue; + + irq_data = irq_domain_get_irq_data(domain, desc->irq); +- if (!can_reserve) ++ if (!can_reserve) { + irqd_clr_can_reserve(irq_data); ++ if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) ++ irqd_set_msi_nomask_quirk(irq_data); ++ } + ret = irq_domain_activate_irq(irq_data, can_reserve); + if (ret) + goto cleanup; +diff --git a/kernel/padata.c b/kernel/padata.c +index c3fec1413295..9c82ee4a9732 100644 +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -35,6 +35,8 @@ + + #define MAX_OBJ_NUM 1000 + ++static void padata_free_pd(struct parallel_data *pd); ++ + static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) + { + int cpu, target_cpu; +@@ -87,7 +89,7 @@ static void padata_parallel_worker(struct work_struct *parallel_work) + /** + * padata_do_parallel - padata parallelization function + * +- * @pinst: padata instance ++ * @ps: padatashell + * @padata: object to be parallelized + * @cb_cpu: pointer to the CPU that the serialization callback function should + * run on. If it's not in the serial cpumask of @pinst +@@ -98,16 +100,17 @@ static void padata_parallel_worker(struct work_struct *parallel_work) + * Note: Every object which is parallelized by padata_do_parallel + * must be seen by padata_do_serial. + */ +-int padata_do_parallel(struct padata_instance *pinst, ++int padata_do_parallel(struct padata_shell *ps, + struct padata_priv *padata, int *cb_cpu) + { ++ struct padata_instance *pinst = ps->pinst; + int i, cpu, cpu_index, target_cpu, err; + struct padata_parallel_queue *queue; + struct parallel_data *pd; + + rcu_read_lock_bh(); + +- pd = rcu_dereference_bh(pinst->pd); ++ pd = rcu_dereference_bh(ps->pd); + + err = -EINVAL; + if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID) +@@ -210,10 +213,10 @@ static struct padata_priv *padata_find_next(struct parallel_data *pd, + + static void padata_reorder(struct parallel_data *pd) + { ++ struct padata_instance *pinst = pd->ps->pinst; + int cb_cpu; + struct padata_priv *padata; + struct padata_serial_queue *squeue; +- struct padata_instance *pinst = pd->pinst; + struct padata_parallel_queue *next_queue; + + /* +@@ -283,6 +286,7 @@ static void padata_serial_worker(struct work_struct *serial_work) + struct padata_serial_queue *squeue; + struct parallel_data *pd; + LIST_HEAD(local_list); ++ int cnt; + + local_bh_disable(); + squeue = container_of(serial_work, struct padata_serial_queue, work); +@@ -292,6 +296,8 @@ static void padata_serial_worker(struct work_struct *serial_work) + list_replace_init(&squeue->serial.list, &local_list); + spin_unlock(&squeue->serial.lock); + ++ cnt = 0; ++ + while (!list_empty(&local_list)) { + struct padata_priv *padata; + +@@ -301,9 +307,12 @@ static void padata_serial_worker(struct work_struct *serial_work) + list_del_init(&padata->list); + + padata->serial(padata); +- atomic_dec(&pd->refcnt); ++ cnt++; + } + local_bh_enable(); ++ ++ if (atomic_sub_and_test(cnt, &pd->refcnt)) ++ padata_free_pd(pd); + } + + /** +@@ -341,36 +350,39 @@ void padata_do_serial(struct padata_priv *padata) + } + EXPORT_SYMBOL(padata_do_serial); + +-static int padata_setup_cpumasks(struct parallel_data *pd, +- const struct cpumask *pcpumask, +- const struct cpumask *cbcpumask) ++static int padata_setup_cpumasks(struct padata_instance *pinst) + { + struct workqueue_attrs *attrs; ++ int err; ++ ++ attrs = alloc_workqueue_attrs(); ++ if (!attrs) ++ return -ENOMEM; ++ ++ /* Restrict parallel_wq workers to pd->cpumask.pcpu. */ ++ cpumask_copy(attrs->cpumask, pinst->cpumask.pcpu); ++ err = apply_workqueue_attrs(pinst->parallel_wq, attrs); ++ free_workqueue_attrs(attrs); ++ ++ return err; ++} ++ ++static int pd_setup_cpumasks(struct parallel_data *pd, ++ const struct cpumask *pcpumask, ++ const struct cpumask *cbcpumask) ++{ + int err = -ENOMEM; + + if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL)) + goto out; +- cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask); +- + if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) + goto free_pcpu_mask; +- cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask); +- +- attrs = alloc_workqueue_attrs(); +- if (!attrs) +- goto free_cbcpu_mask; + +- /* Restrict parallel_wq workers to pd->cpumask.pcpu. */ +- cpumask_copy(attrs->cpumask, pd->cpumask.pcpu); +- err = apply_workqueue_attrs(pd->pinst->parallel_wq, attrs); +- free_workqueue_attrs(attrs); +- if (err < 0) +- goto free_cbcpu_mask; ++ cpumask_copy(pd->cpumask.pcpu, pcpumask); ++ cpumask_copy(pd->cpumask.cbcpu, cbcpumask); + + return 0; + +-free_cbcpu_mask: +- free_cpumask_var(pd->cpumask.cbcpu); + free_pcpu_mask: + free_cpumask_var(pd->cpumask.pcpu); + out: +@@ -414,12 +426,16 @@ static void padata_init_pqueues(struct parallel_data *pd) + } + + /* Allocate and initialize the internal cpumask dependend resources. */ +-static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, +- const struct cpumask *pcpumask, +- const struct cpumask *cbcpumask) ++static struct parallel_data *padata_alloc_pd(struct padata_shell *ps) + { ++ struct padata_instance *pinst = ps->pinst; ++ const struct cpumask *cbcpumask; ++ const struct cpumask *pcpumask; + struct parallel_data *pd; + ++ cbcpumask = pinst->rcpumask.cbcpu; ++ pcpumask = pinst->rcpumask.pcpu; ++ + pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL); + if (!pd) + goto err; +@@ -432,15 +448,15 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, + if (!pd->squeue) + goto err_free_pqueue; + +- pd->pinst = pinst; +- if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0) ++ pd->ps = ps; ++ if (pd_setup_cpumasks(pd, pcpumask, cbcpumask)) + goto err_free_squeue; + + padata_init_pqueues(pd); + padata_init_squeues(pd); + atomic_set(&pd->seq_nr, -1); + atomic_set(&pd->reorder_objects, 0); +- atomic_set(&pd->refcnt, 0); ++ atomic_set(&pd->refcnt, 1); + spin_lock_init(&pd->lock); + pd->cpu = cpumask_first(pd->cpumask.pcpu); + INIT_WORK(&pd->reorder_work, invoke_padata_reorder); +@@ -466,29 +482,6 @@ static void padata_free_pd(struct parallel_data *pd) + kfree(pd); + } + +-/* Flush all objects out of the padata queues. */ +-static void padata_flush_queues(struct parallel_data *pd) +-{ +- int cpu; +- struct padata_parallel_queue *pqueue; +- struct padata_serial_queue *squeue; +- +- for_each_cpu(cpu, pd->cpumask.pcpu) { +- pqueue = per_cpu_ptr(pd->pqueue, cpu); +- flush_work(&pqueue->work); +- } +- +- if (atomic_read(&pd->reorder_objects)) +- padata_reorder(pd); +- +- for_each_cpu(cpu, pd->cpumask.cbcpu) { +- squeue = per_cpu_ptr(pd->squeue, cpu); +- flush_work(&squeue->work); +- } +- +- BUG_ON(atomic_read(&pd->refcnt) != 0); +-} +- + static void __padata_start(struct padata_instance *pinst) + { + pinst->flags |= PADATA_INIT; +@@ -502,39 +495,67 @@ static void __padata_stop(struct padata_instance *pinst) + pinst->flags &= ~PADATA_INIT; + + synchronize_rcu(); +- +- get_online_cpus(); +- padata_flush_queues(pinst->pd); +- put_online_cpus(); + } + + /* Replace the internal control structure with a new one. */ +-static void padata_replace(struct padata_instance *pinst, +- struct parallel_data *pd_new) ++static int padata_replace_one(struct padata_shell *ps) + { +- struct parallel_data *pd_old = pinst->pd; +- int notification_mask = 0; ++ struct parallel_data *pd_new; + +- pinst->flags |= PADATA_RESET; ++ pd_new = padata_alloc_pd(ps); ++ if (!pd_new) ++ return -ENOMEM; + +- rcu_assign_pointer(pinst->pd, pd_new); ++ ps->opd = rcu_dereference_protected(ps->pd, 1); ++ rcu_assign_pointer(ps->pd, pd_new); + +- synchronize_rcu(); ++ return 0; ++} ++ ++static int padata_replace(struct padata_instance *pinst, int cpu) ++{ ++ int notification_mask = 0; ++ struct padata_shell *ps; ++ int err; ++ ++ pinst->flags |= PADATA_RESET; + +- if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu)) ++ cpumask_copy(pinst->omask, pinst->rcpumask.pcpu); ++ cpumask_and(pinst->rcpumask.pcpu, pinst->cpumask.pcpu, ++ cpu_online_mask); ++ if (cpu >= 0) ++ cpumask_clear_cpu(cpu, pinst->rcpumask.pcpu); ++ if (!cpumask_equal(pinst->omask, pinst->rcpumask.pcpu)) + notification_mask |= PADATA_CPU_PARALLEL; +- if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu)) ++ ++ cpumask_copy(pinst->omask, pinst->rcpumask.cbcpu); ++ cpumask_and(pinst->rcpumask.cbcpu, pinst->cpumask.cbcpu, ++ cpu_online_mask); ++ if (cpu >= 0) ++ cpumask_clear_cpu(cpu, pinst->rcpumask.cbcpu); ++ if (!cpumask_equal(pinst->omask, pinst->rcpumask.cbcpu)) + notification_mask |= PADATA_CPU_SERIAL; + +- padata_flush_queues(pd_old); +- padata_free_pd(pd_old); ++ list_for_each_entry(ps, &pinst->pslist, list) { ++ err = padata_replace_one(ps); ++ if (err) ++ break; ++ } ++ ++ synchronize_rcu(); ++ ++ list_for_each_entry_continue_reverse(ps, &pinst->pslist, list) ++ if (atomic_dec_and_test(&ps->opd->refcnt)) ++ padata_free_pd(ps->opd); + + if (notification_mask) + blocking_notifier_call_chain(&pinst->cpumask_change_notifier, + notification_mask, +- &pd_new->cpumask); ++ &pinst->cpumask); + + pinst->flags &= ~PADATA_RESET; ++ ++ return err; + } + + /** +@@ -587,7 +608,7 @@ static int __padata_set_cpumasks(struct padata_instance *pinst, + cpumask_var_t cbcpumask) + { + int valid; +- struct parallel_data *pd; ++ int err; + + valid = padata_validate_cpumask(pinst, pcpumask); + if (!valid) { +@@ -600,19 +621,15 @@ static int __padata_set_cpumasks(struct padata_instance *pinst, + __padata_stop(pinst); + + out_replace: +- pd = padata_alloc_pd(pinst, pcpumask, cbcpumask); +- if (!pd) +- return -ENOMEM; +- + cpumask_copy(pinst->cpumask.pcpu, pcpumask); + cpumask_copy(pinst->cpumask.cbcpu, cbcpumask); + +- padata_replace(pinst, pd); ++ err = padata_setup_cpumasks(pinst) ?: padata_replace(pinst, -1); + + if (valid) + __padata_start(pinst); + +- return 0; ++ return err; + } + + /** +@@ -695,46 +712,32 @@ EXPORT_SYMBOL(padata_stop); + + static int __padata_add_cpu(struct padata_instance *pinst, int cpu) + { +- struct parallel_data *pd; ++ int err = 0; + + if (cpumask_test_cpu(cpu, cpu_online_mask)) { +- pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu, +- pinst->cpumask.cbcpu); +- if (!pd) +- return -ENOMEM; +- +- padata_replace(pinst, pd); ++ err = padata_replace(pinst, -1); + + if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) && + padata_validate_cpumask(pinst, pinst->cpumask.cbcpu)) + __padata_start(pinst); + } + +- return 0; ++ return err; + } + + static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) + { +- struct parallel_data *pd = NULL; ++ int err = 0; + + if (cpumask_test_cpu(cpu, cpu_online_mask)) { +- + if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) || + !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu)) + __padata_stop(pinst); + +- pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu, +- pinst->cpumask.cbcpu); +- if (!pd) +- return -ENOMEM; +- +- padata_replace(pinst, pd); +- +- cpumask_clear_cpu(cpu, pd->cpumask.cbcpu); +- cpumask_clear_cpu(cpu, pd->cpumask.pcpu); ++ err = padata_replace(pinst, cpu); + } + +- return 0; ++ return err; + } + + /** +@@ -817,8 +820,12 @@ static void __padata_free(struct padata_instance *pinst) + cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node); + #endif + ++ WARN_ON(!list_empty(&pinst->pslist)); ++ + padata_stop(pinst); +- padata_free_pd(pinst->pd); ++ free_cpumask_var(pinst->omask); ++ free_cpumask_var(pinst->rcpumask.cbcpu); ++ free_cpumask_var(pinst->rcpumask.pcpu); + free_cpumask_var(pinst->cpumask.pcpu); + free_cpumask_var(pinst->cpumask.cbcpu); + destroy_workqueue(pinst->serial_wq); +@@ -965,7 +972,6 @@ static struct padata_instance *padata_alloc(const char *name, + const struct cpumask *cbcpumask) + { + struct padata_instance *pinst; +- struct parallel_data *pd = NULL; + + pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL); + if (!pinst) +@@ -993,14 +999,22 @@ static struct padata_instance *padata_alloc(const char *name, + !padata_validate_cpumask(pinst, cbcpumask)) + goto err_free_masks; + +- pd = padata_alloc_pd(pinst, pcpumask, cbcpumask); +- if (!pd) ++ if (!alloc_cpumask_var(&pinst->rcpumask.pcpu, GFP_KERNEL)) + goto err_free_masks; ++ if (!alloc_cpumask_var(&pinst->rcpumask.cbcpu, GFP_KERNEL)) ++ goto err_free_rcpumask_pcpu; ++ if (!alloc_cpumask_var(&pinst->omask, GFP_KERNEL)) ++ goto err_free_rcpumask_cbcpu; + +- rcu_assign_pointer(pinst->pd, pd); ++ INIT_LIST_HEAD(&pinst->pslist); + + cpumask_copy(pinst->cpumask.pcpu, pcpumask); + cpumask_copy(pinst->cpumask.cbcpu, cbcpumask); ++ cpumask_and(pinst->rcpumask.pcpu, pcpumask, cpu_online_mask); ++ cpumask_and(pinst->rcpumask.cbcpu, cbcpumask, cpu_online_mask); ++ ++ if (padata_setup_cpumasks(pinst)) ++ goto err_free_omask; + + pinst->flags = 0; + +@@ -1016,6 +1030,12 @@ static struct padata_instance *padata_alloc(const char *name, + + return pinst; + ++err_free_omask: ++ free_cpumask_var(pinst->omask); ++err_free_rcpumask_cbcpu: ++ free_cpumask_var(pinst->rcpumask.cbcpu); ++err_free_rcpumask_pcpu: ++ free_cpumask_var(pinst->rcpumask.pcpu); + err_free_masks: + free_cpumask_var(pinst->cpumask.pcpu); + free_cpumask_var(pinst->cpumask.cbcpu); +@@ -1054,6 +1074,61 @@ void padata_free(struct padata_instance *pinst) + } + EXPORT_SYMBOL(padata_free); + ++/** ++ * padata_alloc_shell - Allocate and initialize padata shell. ++ * ++ * @pinst: Parent padata_instance object. ++ */ ++struct padata_shell *padata_alloc_shell(struct padata_instance *pinst) ++{ ++ struct parallel_data *pd; ++ struct padata_shell *ps; ++ ++ ps = kzalloc(sizeof(*ps), GFP_KERNEL); ++ if (!ps) ++ goto out; ++ ++ ps->pinst = pinst; ++ ++ get_online_cpus(); ++ pd = padata_alloc_pd(ps); ++ put_online_cpus(); ++ ++ if (!pd) ++ goto out_free_ps; ++ ++ mutex_lock(&pinst->lock); ++ RCU_INIT_POINTER(ps->pd, pd); ++ list_add(&ps->list, &pinst->pslist); ++ mutex_unlock(&pinst->lock); ++ ++ return ps; ++ ++out_free_ps: ++ kfree(ps); ++out: ++ return NULL; ++} ++EXPORT_SYMBOL(padata_alloc_shell); ++ ++/** ++ * padata_free_shell - free a padata shell ++ * ++ * @ps: padata shell to free ++ */ ++void padata_free_shell(struct padata_shell *ps) ++{ ++ struct padata_instance *pinst = ps->pinst; ++ ++ mutex_lock(&pinst->lock); ++ list_del(&ps->list); ++ padata_free_pd(rcu_dereference_protected(ps->pd, 1)); ++ mutex_unlock(&pinst->lock); ++ ++ kfree(ps); ++} ++EXPORT_SYMBOL(padata_free_shell); ++ + #ifdef CONFIG_HOTPLUG_CPU + + static __init int padata_driver_init(void) +diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c +index 5dffade2d7cd..21acdff3bd27 100644 +--- a/kernel/rcu/srcutree.c ++++ b/kernel/rcu/srcutree.c +@@ -530,7 +530,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) + idx = rcu_seq_state(ssp->srcu_gp_seq); + WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); + cbdelay = srcu_get_delay(ssp); +- ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); ++ WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns()); + rcu_seq_end(&ssp->srcu_gp_seq); + gpseq = rcu_seq_current(&ssp->srcu_gp_seq); + if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq)) +@@ -762,6 +762,7 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp) + unsigned long flags; + struct srcu_data *sdp; + unsigned long t; ++ unsigned long tlast; + + /* If the local srcu_data structure has callbacks, not idle. */ + local_irq_save(flags); +@@ -780,9 +781,9 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp) + + /* First, see if enough time has passed since the last GP. */ + t = ktime_get_mono_fast_ns(); ++ tlast = READ_ONCE(ssp->srcu_last_gp_end); + if (exp_holdoff == 0 || +- time_in_range_open(t, ssp->srcu_last_gp_end, +- ssp->srcu_last_gp_end + exp_holdoff)) ++ time_in_range_open(t, tlast, tlast + exp_holdoff)) + return false; /* Too soon after last GP. */ + + /* Next, check for probable idleness. */ +diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h +index d632cd019597..69c5aa64fcfd 100644 +--- a/kernel/rcu/tree_exp.h ++++ b/kernel/rcu/tree_exp.h +@@ -134,7 +134,7 @@ static void __maybe_unused sync_exp_reset_tree(void) + rcu_for_each_node_breadth_first(rnp) { + raw_spin_lock_irqsave_rcu_node(rnp, flags); + WARN_ON_ONCE(rnp->expmask); +- rnp->expmask = rnp->expmaskinit; ++ WRITE_ONCE(rnp->expmask, rnp->expmaskinit); + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + } + } +@@ -211,7 +211,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, + rnp = rnp->parent; + raw_spin_lock_rcu_node(rnp); /* irqs already disabled */ + WARN_ON_ONCE(!(rnp->expmask & mask)); +- rnp->expmask &= ~mask; ++ WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask); + } + } + +@@ -241,7 +241,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + return; + } +- rnp->expmask &= ~mask; ++ WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask); + __rcu_report_exp_rnp(rnp, wake, flags); /* Releases rnp->lock. */ + } + +@@ -372,12 +372,10 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + + /* IPI the remaining CPUs for expedited quiescent state. */ +- for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { ++ for_each_leaf_node_cpu_mask(rnp, cpu, mask_ofl_ipi) { + unsigned long mask = leaf_node_cpu_bit(rnp, cpu); + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + +- if (!(mask_ofl_ipi & mask)) +- continue; + retry_ipi: + if (rcu_dynticks_in_eqs_since(rdp, rdp->exp_dynticks_snap)) { + mask_ofl_test |= mask; +@@ -491,7 +489,7 @@ static void synchronize_sched_expedited_wait(void) + struct rcu_data *rdp; + + mask = leaf_node_cpu_bit(rnp, cpu); +- if (!(rnp->expmask & mask)) ++ if (!(READ_ONCE(rnp->expmask) & mask)) + continue; + ndetected++; + rdp = per_cpu_ptr(&rcu_data, cpu); +@@ -503,7 +501,8 @@ static void synchronize_sched_expedited_wait(void) + } + pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n", + jiffies - jiffies_start, rcu_state.expedited_sequence, +- rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]); ++ READ_ONCE(rnp_root->expmask), ++ ".T"[!!rnp_root->exp_tasks]); + if (ndetected) { + pr_err("blocking rcu_node structures:"); + rcu_for_each_node_breadth_first(rnp) { +@@ -513,7 +512,7 @@ static void synchronize_sched_expedited_wait(void) + continue; + pr_cont(" l=%u:%d-%d:%#lx/%c", + rnp->level, rnp->grplo, rnp->grphi, +- rnp->expmask, ++ READ_ONCE(rnp->expmask), + ".T"[!!rnp->exp_tasks]); + } + pr_cont("\n"); +@@ -521,7 +520,7 @@ static void synchronize_sched_expedited_wait(void) + rcu_for_each_leaf_node(rnp) { + for_each_leaf_node_possible_cpu(rnp, cpu) { + mask = leaf_node_cpu_bit(rnp, cpu); +- if (!(rnp->expmask & mask)) ++ if (!(READ_ONCE(rnp->expmask) & mask)) + continue; + dump_cpu_task(cpu); + } +diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h +index fa08d55f7040..f849e7429816 100644 +--- a/kernel/rcu/tree_plugin.h ++++ b/kernel/rcu/tree_plugin.h +@@ -220,7 +220,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) + * blocked tasks. + */ + if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) { +- rnp->gp_tasks = &t->rcu_node_entry; ++ WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry); + WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq); + } + if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) +@@ -340,7 +340,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); + */ + static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) + { +- return rnp->gp_tasks != NULL; ++ return READ_ONCE(rnp->gp_tasks) != NULL; + } + + /* Bias and limit values for ->rcu_read_lock_nesting. */ +@@ -493,7 +493,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) + trace_rcu_unlock_preempted_task(TPS("rcu_preempt"), + rnp->gp_seq, t->pid); + if (&t->rcu_node_entry == rnp->gp_tasks) +- rnp->gp_tasks = np; ++ WRITE_ONCE(rnp->gp_tasks, np); + if (&t->rcu_node_entry == rnp->exp_tasks) + rnp->exp_tasks = np; + if (IS_ENABLED(CONFIG_RCU_BOOST)) { +@@ -612,7 +612,7 @@ static void rcu_read_unlock_special(struct task_struct *t) + + t->rcu_read_unlock_special.b.exp_hint = false; + exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) || +- (rdp->grpmask & rnp->expmask) || ++ (rdp->grpmask & READ_ONCE(rnp->expmask)) || + tick_nohz_full_cpu(rdp->cpu); + // Need to defer quiescent state until everything is enabled. + if (irqs_were_disabled && use_softirq && +@@ -663,7 +663,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) + dump_blkd_tasks(rnp, 10); + if (rcu_preempt_has_tasks(rnp) && + (rnp->qsmaskinit || rnp->wait_blkd_tasks)) { +- rnp->gp_tasks = rnp->blkd_tasks.next; ++ WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next); + t = container_of(rnp->gp_tasks, struct task_struct, + rcu_node_entry); + trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"), +@@ -757,7 +757,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) + pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n", + __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext); + pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n", +- __func__, rnp->gp_tasks, rnp->boost_tasks, rnp->exp_tasks); ++ __func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks, ++ rnp->exp_tasks); + pr_info("%s: ->blkd_tasks", __func__); + i = 0; + list_for_each(lhp, &rnp->blkd_tasks) { +diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c +index 451f9d05ccfe..4b11f0309eee 100644 +--- a/kernel/time/alarmtimer.c ++++ b/kernel/time/alarmtimer.c +@@ -88,6 +88,7 @@ static int alarmtimer_rtc_add_device(struct device *dev, + unsigned long flags; + struct rtc_device *rtc = to_rtc_device(dev); + struct wakeup_source *__ws; ++ int ret = 0; + + if (rtcdev) + return -EBUSY; +@@ -102,8 +103,8 @@ static int alarmtimer_rtc_add_device(struct device *dev, + spin_lock_irqsave(&rtcdev_lock, flags); + if (!rtcdev) { + if (!try_module_get(rtc->owner)) { +- spin_unlock_irqrestore(&rtcdev_lock, flags); +- return -1; ++ ret = -1; ++ goto unlock; + } + + rtcdev = rtc; +@@ -112,11 +113,12 @@ static int alarmtimer_rtc_add_device(struct device *dev, + ws = __ws; + __ws = NULL; + } ++unlock: + spin_unlock_irqrestore(&rtcdev_lock, flags); + + wakeup_source_unregister(__ws); + +- return 0; ++ return ret; + } + + static inline void alarmtimer_rtc_timer_init(void) +diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c +index fff5f64981c6..428beb69426a 100644 +--- a/kernel/time/clocksource.c ++++ b/kernel/time/clocksource.c +@@ -293,8 +293,15 @@ static void clocksource_watchdog(struct timer_list *unused) + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); +- watchdog_timer.expires += WATCHDOG_INTERVAL; +- add_timer_on(&watchdog_timer, next_cpu); ++ ++ /* ++ * Arm timer if not already pending: could race with concurrent ++ * pair clocksource_stop_watchdog() clocksource_start_watchdog(). ++ */ ++ if (!timer_pending(&watchdog_timer)) { ++ watchdog_timer.expires += WATCHDOG_INTERVAL; ++ add_timer_on(&watchdog_timer, next_cpu); ++ } + out: + spin_unlock(&watchdog_lock); + } +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index 0708a41cfe2d..407d8bf4ed93 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -5102,8 +5102,8 @@ static const struct file_operations ftrace_notrace_fops = { + + static DEFINE_MUTEX(graph_lock); + +-struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH; +-struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; ++struct ftrace_hash __rcu *ftrace_graph_hash = EMPTY_HASH; ++struct ftrace_hash __rcu *ftrace_graph_notrace_hash = EMPTY_HASH; + + enum graph_filter_type { + GRAPH_FILTER_NOTRACE = 0, +@@ -5378,8 +5378,15 @@ ftrace_graph_release(struct inode *inode, struct file *file) + + mutex_unlock(&graph_lock); + +- /* Wait till all users are no longer using the old hash */ +- synchronize_rcu(); ++ /* ++ * We need to do a hard force of sched synchronization. ++ * This is because we use preempt_disable() to do RCU, but ++ * the function tracers can be called where RCU is not watching ++ * (like before user_exit()). We can not rely on the RCU ++ * infrastructure to do the synchronization, thus we must do it ++ * ourselves. ++ */ ++ schedule_on_each_cpu(ftrace_sync); + + free_ftrace_hash(old_hash); + } +diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h +index d685c61085c0..a3c29d5fcc61 100644 +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -932,22 +932,31 @@ extern void __trace_graph_return(struct trace_array *tr, + unsigned long flags, int pc); + + #ifdef CONFIG_DYNAMIC_FTRACE +-extern struct ftrace_hash *ftrace_graph_hash; +-extern struct ftrace_hash *ftrace_graph_notrace_hash; ++extern struct ftrace_hash __rcu *ftrace_graph_hash; ++extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; + + static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) + { + unsigned long addr = trace->func; + int ret = 0; ++ struct ftrace_hash *hash; + + preempt_disable_notrace(); + +- if (ftrace_hash_empty(ftrace_graph_hash)) { ++ /* ++ * Have to open code "rcu_dereference_sched()" because the ++ * function graph tracer can be called when RCU is not ++ * "watching". ++ * Protected with schedule_on_each_cpu(ftrace_sync) ++ */ ++ hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); ++ ++ if (ftrace_hash_empty(hash)) { + ret = 1; + goto out; + } + +- if (ftrace_lookup_ip(ftrace_graph_hash, addr)) { ++ if (ftrace_lookup_ip(hash, addr)) { + + /* + * This needs to be cleared on the return functions +@@ -983,10 +992,20 @@ static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) + static inline int ftrace_graph_notrace_addr(unsigned long addr) + { + int ret = 0; ++ struct ftrace_hash *notrace_hash; + + preempt_disable_notrace(); + +- if (ftrace_lookup_ip(ftrace_graph_notrace_hash, addr)) ++ /* ++ * Have to open code "rcu_dereference_sched()" because the ++ * function graph tracer can be called when RCU is not ++ * "watching". ++ * Protected with schedule_on_each_cpu(ftrace_sync) ++ */ ++ notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, ++ !preemptible()); ++ ++ if (ftrace_lookup_ip(notrace_hash, addr)) + ret = 1; + + preempt_enable_notrace(); +diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c +index 205692181e7b..4be7fc84d6b6 100644 +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -470,11 +470,12 @@ struct action_data { + * When a histogram trigger is hit, the values of any + * references to variables, including variables being passed + * as parameters to synthetic events, are collected into a +- * var_ref_vals array. This var_ref_idx is the index of the +- * first param in the array to be passed to the synthetic +- * event invocation. ++ * var_ref_vals array. This var_ref_idx array is an array of ++ * indices into the var_ref_vals array, one for each synthetic ++ * event param, and is passed to the synthetic event ++ * invocation. + */ +- unsigned int var_ref_idx; ++ unsigned int var_ref_idx[TRACING_MAP_VARS_MAX]; + struct synth_event *synth_event; + bool use_trace_keyword; + char *synth_event_name; +@@ -875,14 +876,14 @@ static struct trace_event_functions synth_event_funcs = { + + static notrace void trace_event_raw_event_synth(void *__data, + u64 *var_ref_vals, +- unsigned int var_ref_idx) ++ unsigned int *var_ref_idx) + { + struct trace_event_file *trace_file = __data; + struct synth_trace_event *entry; + struct trace_event_buffer fbuffer; + struct ring_buffer *buffer; + struct synth_event *event; +- unsigned int i, n_u64; ++ unsigned int i, n_u64, val_idx; + int fields_size = 0; + + event = trace_file->event_call->data; +@@ -905,15 +906,16 @@ static notrace void trace_event_raw_event_synth(void *__data, + goto out; + + for (i = 0, n_u64 = 0; i < event->n_fields; i++) { ++ val_idx = var_ref_idx[i]; + if (event->fields[i]->is_string) { +- char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i]; ++ char *str_val = (char *)(long)var_ref_vals[val_idx]; + char *str_field = (char *)&entry->fields[n_u64]; + + strscpy(str_field, str_val, STR_VAR_LEN_MAX); + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + } else { + struct synth_field *field = event->fields[i]; +- u64 val = var_ref_vals[var_ref_idx + i]; ++ u64 val = var_ref_vals[val_idx]; + + switch (field->size) { + case 1: +@@ -1113,10 +1115,10 @@ static struct tracepoint *alloc_synth_tracepoint(char *name) + } + + typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals, +- unsigned int var_ref_idx); ++ unsigned int *var_ref_idx); + + static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals, +- unsigned int var_ref_idx) ++ unsigned int *var_ref_idx) + { + struct tracepoint *tp = event->tp; + +@@ -2655,6 +2657,22 @@ static int init_var_ref(struct hist_field *ref_field, + goto out; + } + ++static int find_var_ref_idx(struct hist_trigger_data *hist_data, ++ struct hist_field *var_field) ++{ ++ struct hist_field *ref_field; ++ int i; ++ ++ for (i = 0; i < hist_data->n_var_refs; i++) { ++ ref_field = hist_data->var_refs[i]; ++ if (ref_field->var.idx == var_field->var.idx && ++ ref_field->var.hist_data == var_field->hist_data) ++ return i; ++ } ++ ++ return -ENOENT; ++} ++ + /** + * create_var_ref - Create a variable reference and attach it to trigger + * @hist_data: The trigger that will be referencing the variable +@@ -4228,11 +4246,11 @@ static int trace_action_create(struct hist_trigger_data *hist_data, + struct trace_array *tr = hist_data->event_file->tr; + char *event_name, *param, *system = NULL; + struct hist_field *hist_field, *var_ref; +- unsigned int i, var_ref_idx; ++ unsigned int i; + unsigned int field_pos = 0; + struct synth_event *event; + char *synth_event_name; +- int ret = 0; ++ int var_ref_idx, ret = 0; + + lockdep_assert_held(&event_mutex); + +@@ -4249,8 +4267,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data, + + event->ref++; + +- var_ref_idx = hist_data->n_var_refs; +- + for (i = 0; i < data->n_params; i++) { + char *p; + +@@ -4299,6 +4315,14 @@ static int trace_action_create(struct hist_trigger_data *hist_data, + goto err; + } + ++ var_ref_idx = find_var_ref_idx(hist_data, var_ref); ++ if (WARN_ON(var_ref_idx < 0)) { ++ ret = var_ref_idx; ++ goto err; ++ } ++ ++ data->var_ref_idx[i] = var_ref_idx; ++ + field_pos++; + kfree(p); + continue; +@@ -4317,7 +4341,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data, + } + + data->synth_event = event; +- data->var_ref_idx = var_ref_idx; + out: + return ret; + err: +diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c +index 9ae87be422f2..ab8b6436d53f 100644 +--- a/kernel/trace/trace_probe.c ++++ b/kernel/trace/trace_probe.c +@@ -876,7 +876,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, + for (i = 0; i < tp->nr_args; i++) { + parg = tp->args + i; + if (parg->count) { +- if (strcmp(parg->type->name, "string") == 0) ++ if ((strcmp(parg->type->name, "string") == 0) || ++ (strcmp(parg->type->name, "ustring") == 0)) + fmt = ", __get_str(%s[%d])"; + else + fmt = ", REC->%s[%d]"; +@@ -884,7 +885,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, + pos += snprintf(buf + pos, LEN_OR_ZERO, + fmt, parg->name, j); + } else { +- if (strcmp(parg->type->name, "string") == 0) ++ if ((strcmp(parg->type->name, "string") == 0) || ++ (strcmp(parg->type->name, "ustring") == 0)) + fmt = ", __get_str(%s)"; + else + fmt = ", REC->%s"; +diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c +index e288168661e1..e304196d7c28 100644 +--- a/kernel/trace/trace_sched_switch.c ++++ b/kernel/trace/trace_sched_switch.c +@@ -89,8 +89,10 @@ static void tracing_sched_unregister(void) + + static void tracing_start_sched_switch(int ops) + { +- bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref); ++ bool sched_register; ++ + mutex_lock(&sched_register_mutex); ++ sched_register = (!sched_cmdline_ref && !sched_tgid_ref); + + switch (ops) { + case RECORD_CMDLINE: +diff --git a/lib/test_kasan.c b/lib/test_kasan.c +index 49cc4d570a40..bd3d9ef7d39e 100644 +--- a/lib/test_kasan.c ++++ b/lib/test_kasan.c +@@ -157,6 +157,7 @@ static noinline void __init kmalloc_oob_krealloc_more(void) + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); ++ kfree(ptr2); + return; + } + +diff --git a/mm/backing-dev.c b/mm/backing-dev.c +index c360f6a6c844..62f05f605fb5 100644 +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -21,6 +21,7 @@ struct backing_dev_info noop_backing_dev_info = { + EXPORT_SYMBOL_GPL(noop_backing_dev_info); + + static struct class *bdi_class; ++const char *bdi_unknown_name = "(unknown)"; + + /* + * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index ef4e9eb572a4..b5b4e310fe70 100644 +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -5465,14 +5465,6 @@ static int mem_cgroup_move_account(struct page *page, + __mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages); + } + +-#ifdef CONFIG_TRANSPARENT_HUGEPAGE +- if (compound && !list_empty(page_deferred_list(page))) { +- spin_lock(&from->deferred_split_queue.split_queue_lock); +- list_del_init(page_deferred_list(page)); +- from->deferred_split_queue.split_queue_len--; +- spin_unlock(&from->deferred_split_queue.split_queue_lock); +- } +-#endif + /* + * It is safe to change page->mem_cgroup here because the page + * is referenced, charged, and isolated - we can't race with +@@ -5482,16 +5474,6 @@ static int mem_cgroup_move_account(struct page *page, + /* caller should have done css_get */ + page->mem_cgroup = to; + +-#ifdef CONFIG_TRANSPARENT_HUGEPAGE +- if (compound && list_empty(page_deferred_list(page))) { +- spin_lock(&to->deferred_split_queue.split_queue_lock); +- list_add_tail(page_deferred_list(page), +- &to->deferred_split_queue.split_queue); +- to->deferred_split_queue.split_queue_len++; +- spin_unlock(&to->deferred_split_queue.split_queue_lock); +- } +-#endif +- + spin_unlock_irqrestore(&from->move_lock, flags); + + ret = 0; +diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c +index fab540685279..0aa154be3a52 100644 +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -1738,8 +1738,6 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) + + BUG_ON(check_hotplug_memory_range(start, size)); + +- mem_hotplug_begin(); +- + /* + * All memory blocks must be offlined before removing memory. Check + * whether all memory blocks in question are offline and return error +@@ -1754,9 +1752,14 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) + memblock_free(start, size); + memblock_remove(start, size); + +- /* remove memory block devices before removing memory */ ++ /* ++ * Memory block device removal under the device_hotplug_lock is ++ * a barrier against racing online attempts. ++ */ + remove_memory_block_devices(start, size); + ++ mem_hotplug_begin(); ++ + arch_remove_memory(nid, start, size, NULL); + __release_memory_resource(start, size); + +diff --git a/mm/migrate.c b/mm/migrate.c +index 6956627ebf8b..c4c313e47f12 100644 +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -1631,8 +1631,19 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, + start = i; + } else if (node != current_node) { + err = do_move_pages_to_node(mm, &pagelist, current_node); +- if (err) ++ if (err) { ++ /* ++ * Positive err means the number of failed ++ * pages to migrate. Since we are going to ++ * abort and return the number of non-migrated ++ * pages, so need to incude the rest of the ++ * nr_pages that have not been attempted as ++ * well. ++ */ ++ if (err > 0) ++ err += nr_pages - i - 1; + goto out; ++ } + err = store_status(status, start, current_node, i - start); + if (err) + goto out; +@@ -1663,8 +1674,11 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, + goto out_flush; + + err = do_move_pages_to_node(mm, &pagelist, current_node); +- if (err) ++ if (err) { ++ if (err > 0) ++ err += nr_pages - i - 1; + goto out; ++ } + if (i > start) { + err = store_status(status, start, current_node, i - start); + if (err) +@@ -1678,6 +1692,13 @@ out_flush: + + /* Make sure we do not overwrite the existing error */ + err1 = do_move_pages_to_node(mm, &pagelist, current_node); ++ /* ++ * Don't have to report non-attempted pages here since: ++ * - If the above loop is done gracefully all pages have been ++ * attempted. ++ * - If the above loop is aborted it means a fatal error ++ * happened, should return ret. ++ */ + if (!err1) + err1 = store_status(status, start, current_node, i - start); + if (err >= 0) +diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c +index 7d70e5c78f97..7c1b8f67af7b 100644 +--- a/mm/mmu_gather.c ++++ b/mm/mmu_gather.c +@@ -102,14 +102,14 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ + */ + static inline void tlb_table_invalidate(struct mmu_gather *tlb) + { +-#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE +- /* +- * Invalidate page-table caches used by hardware walkers. Then we still +- * need to RCU-sched wait while freeing the pages because software +- * walkers can still be in-flight. +- */ +- tlb_flush_mmu_tlbonly(tlb); +-#endif ++ if (tlb_needs_table_invalidate()) { ++ /* ++ * Invalidate page-table caches used by hardware walkers. Then ++ * we still need to RCU-sched wait while freeing the pages ++ * because software walkers can still be in-flight. ++ */ ++ tlb_flush_mmu_tlbonly(tlb); ++ } + } + + static void tlb_remove_table_smp_sync(void *arg) +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 45e39131a716..d387ca74cb5a 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -6933,7 +6933,8 @@ static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn) + * This function also addresses a similar issue where struct pages are left + * uninitialized because the physical address range is not covered by + * memblock.memory or memblock.reserved. That could happen when memblock +- * layout is manually configured via memmap=. ++ * layout is manually configured via memmap=, or when the highest physical ++ * address (max_pfn) does not end on a section boundary. + */ + void __init zero_resv_unavail(void) + { +@@ -6951,7 +6952,16 @@ void __init zero_resv_unavail(void) + pgcnt += zero_pfn_range(PFN_DOWN(next), PFN_UP(start)); + next = end; + } +- pgcnt += zero_pfn_range(PFN_DOWN(next), max_pfn); ++ ++ /* ++ * Early sections always have a fully populated memmap for the whole ++ * section - see pfn_valid(). If the last section has holes at the ++ * end and that section is marked "online", the memmap will be ++ * considered initialized. Make sure that memmap has a well defined ++ * state. ++ */ ++ pgcnt += zero_pfn_range(PFN_DOWN(next), ++ round_up(max_pfn, PAGES_PER_SECTION)); + + /* + * Struct pages that do not have backing memory. This could be because +diff --git a/mm/sparse.c b/mm/sparse.c +index 1100fdb9649c..69b41b6046a5 100644 +--- a/mm/sparse.c ++++ b/mm/sparse.c +@@ -787,7 +787,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, + ms->usage = NULL; + } + memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); +- ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr); ++ ms->section_mem_map = (unsigned long)NULL; + } + + if (section_is_early && memmap) +diff --git a/net/core/devlink.c b/net/core/devlink.c +index ae614965c8c2..61bc67047f56 100644 +--- a/net/core/devlink.c ++++ b/net/core/devlink.c +@@ -3863,6 +3863,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, + goto out_unlock; + } + ++ /* return 0 if there is no further data to read */ ++ if (start_offset >= region->size) { ++ err = 0; ++ goto out_unlock; ++ } ++ + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, + DEVLINK_CMD_REGION_READ); +diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c +index 536e032d95c8..246a258b1fac 100644 +--- a/net/core/drop_monitor.c ++++ b/net/core/drop_monitor.c +@@ -1004,8 +1004,10 @@ static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack) + { + int cpu; + +- if (!monitor_hw) ++ if (!monitor_hw) { + NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled"); ++ return; ++ } + + monitor_hw = false; + +diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c +index ee561297d8a7..fbfd0db182b7 100644 +--- a/net/hsr/hsr_slave.c ++++ b/net/hsr/hsr_slave.c +@@ -27,6 +27,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) + + rcu_read_lock(); /* hsr->node_db, hsr->ports */ + port = hsr_port_get_rcu(skb->dev); ++ if (!port) ++ goto finish_pass; + + if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) { + /* Directly kill frames sent by ourselves */ +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 3640e8563a10..deb466fc3d1f 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2618,10 +2618,12 @@ int tcp_disconnect(struct sock *sk, int flags) + tp->snd_cwnd = TCP_INIT_CWND; + tp->snd_cwnd_cnt = 0; + tp->window_clamp = 0; ++ tp->delivered = 0; + tp->delivered_ce = 0; + tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; + tcp_clear_retrans(tp); ++ tp->total_retrans = 0; + inet_csk_delack_init(sk); + /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 + * issue in __tcp_select_window() +@@ -2633,10 +2635,14 @@ int tcp_disconnect(struct sock *sk, int flags) + sk->sk_rx_dst = NULL; + tcp_saved_syn_free(tp); + tp->compressed_ack = 0; ++ tp->segs_in = 0; ++ tp->segs_out = 0; + tp->bytes_sent = 0; + tp->bytes_acked = 0; + tp->bytes_received = 0; + tp->bytes_retrans = 0; ++ tp->data_segs_in = 0; ++ tp->data_segs_out = 0; + tp->duplicate_sack[0].start_seq = 0; + tp->duplicate_sack[0].end_seq = 0; + tp->dsack_dups = 0; +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c +index f9b5690e94fd..b11ccb53c7e0 100644 +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -5719,6 +5719,9 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) + struct nlattr *tb[IFLA_INET6_MAX + 1]; + int err; + ++ if (!idev) ++ return -EAFNOSUPPORT; ++ + if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) + BUG(); + +diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c +index f82ea12bac37..425b95eb7e87 100644 +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -322,8 +322,13 @@ int l2tp_session_register(struct l2tp_session *session, + + spin_lock_bh(&pn->l2tp_session_hlist_lock); + ++ /* IP encap expects session IDs to be globally unique, while ++ * UDP encap doesn't. ++ */ + hlist_for_each_entry(session_walk, g_head, global_hlist) +- if (session_walk->session_id == session->session_id) { ++ if (session_walk->session_id == session->session_id && ++ (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP || ++ tunnel->encap == L2TP_ENCAPTYPE_IP)) { + err = -EEXIST; + goto err_tlock_pnlock; + } +diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c +index d8143a8c034d..a9df9dac57b2 100644 +--- a/net/netfilter/ipset/ip_set_core.c ++++ b/net/netfilter/ipset/ip_set_core.c +@@ -1293,31 +1293,34 @@ ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = { + }; + + static int +-dump_init(struct netlink_callback *cb, struct ip_set_net *inst) ++ip_set_dump_start(struct netlink_callback *cb) + { + struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); + struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; + struct nlattr *attr = (void *)nlh + min_len; ++ struct sk_buff *skb = cb->skb; ++ struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); + u32 dump_type; +- ip_set_id_t index; + int ret; + + ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, + nlh->nlmsg_len - min_len, + ip_set_dump_policy, NULL); + if (ret) +- return ret; ++ goto error; + + cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]); + if (cda[IPSET_ATTR_SETNAME]) { ++ ip_set_id_t index; + struct ip_set *set; + + set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), + &index); +- if (!set) +- return -ENOENT; +- ++ if (!set) { ++ ret = -ENOENT; ++ goto error; ++ } + dump_type = DUMP_ONE; + cb->args[IPSET_CB_INDEX] = index; + } else { +@@ -1333,10 +1336,17 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) + cb->args[IPSET_CB_DUMP] = dump_type; + + return 0; ++ ++error: ++ /* We have to create and send the error message manually :-( */ ++ if (nlh->nlmsg_flags & NLM_F_ACK) { ++ netlink_ack(cb->skb, nlh, ret, NULL); ++ } ++ return ret; + } + + static int +-ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) ++ip_set_dump_do(struct sk_buff *skb, struct netlink_callback *cb) + { + ip_set_id_t index = IPSET_INVALID_ID, max; + struct ip_set *set = NULL; +@@ -1347,18 +1357,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) + bool is_destroyed; + int ret = 0; + +- if (!cb->args[IPSET_CB_DUMP]) { +- ret = dump_init(cb, inst); +- if (ret < 0) { +- nlh = nlmsg_hdr(cb->skb); +- /* We have to create and send the error message +- * manually :-( +- */ +- if (nlh->nlmsg_flags & NLM_F_ACK) +- netlink_ack(cb->skb, nlh, ret, NULL); +- return ret; +- } +- } ++ if (!cb->args[IPSET_CB_DUMP]) ++ return -EINVAL; + + if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) + goto out; +@@ -1494,7 +1494,8 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, + + { + struct netlink_dump_control c = { +- .dump = ip_set_dump_start, ++ .start = ip_set_dump_start, ++ .dump = ip_set_dump_do, + .done = ip_set_dump_done, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); +diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c +index d72ddb67bb74..4a6ca9723a12 100644 +--- a/net/rxrpc/af_rxrpc.c ++++ b/net/rxrpc/af_rxrpc.c +@@ -194,6 +194,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) + service_in_use: + write_unlock(&local->services_lock); + rxrpc_unuse_local(local); ++ rxrpc_put_local(local); + ret = -EADDRINUSE; + error_unlock: + release_sock(&rx->sk); +@@ -899,6 +900,7 @@ static int rxrpc_release_sock(struct sock *sk) + rxrpc_purge_queue(&sk->sk_receive_queue); + + rxrpc_unuse_local(rx->local); ++ rxrpc_put_local(rx->local); + rx->local = NULL; + key_put(rx->key); + rx->key = NULL; +diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h +index 5e99df80e80a..7d730c438404 100644 +--- a/net/rxrpc/ar-internal.h ++++ b/net/rxrpc/ar-internal.h +@@ -490,6 +490,7 @@ enum rxrpc_call_flag { + RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ + RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */ + RXRPC_CALL_IS_INTR, /* The call is interruptible */ ++ RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ + }; + + /* +@@ -1021,6 +1022,16 @@ void rxrpc_unuse_local(struct rxrpc_local *); + void rxrpc_queue_local(struct rxrpc_local *); + void rxrpc_destroy_all_locals(struct rxrpc_net *); + ++static inline bool __rxrpc_unuse_local(struct rxrpc_local *local) ++{ ++ return atomic_dec_return(&local->active_users) == 0; ++} ++ ++static inline bool __rxrpc_use_local(struct rxrpc_local *local) ++{ ++ return atomic_fetch_add_unless(&local->active_users, 1, 0) != 0; ++} ++ + /* + * misc.c + */ +diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c +index a31c18c09894..dbdbc4f18b5e 100644 +--- a/net/rxrpc/call_object.c ++++ b/net/rxrpc/call_object.c +@@ -493,7 +493,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) + + _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); + +- if (conn) ++ if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) + rxrpc_disconnect_call(call); + if (call->security) + call->security->free_call_crypto(call); +@@ -569,6 +569,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) + struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); + struct rxrpc_net *rxnet = call->rxnet; + ++ rxrpc_put_connection(call->conn); + rxrpc_put_peer(call->peer); + kfree(call->rxtx_buffer); + kfree(call->rxtx_annotations); +@@ -590,7 +591,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) + + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); +- ASSERTCMP(call->conn, ==, NULL); + + rxrpc_cleanup_ring(call); + rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned); +diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c +index 376370cd9285..ea7d4c21f889 100644 +--- a/net/rxrpc/conn_client.c ++++ b/net/rxrpc/conn_client.c +@@ -785,6 +785,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) + u32 cid; + + spin_lock(&conn->channel_lock); ++ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + + cid = call->cid; + if (cid) { +@@ -792,7 +793,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) + chan = &conn->channels[channel]; + } + trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); +- call->conn = NULL; + + /* Calls that have never actually been assigned a channel can simply be + * discarded. If the conn didn't get used either, it will follow +@@ -908,7 +908,6 @@ out: + spin_unlock(&rxnet->client_conn_cache_lock); + out_2: + spin_unlock(&conn->channel_lock); +- rxrpc_put_connection(conn); + _leave(""); + return; + +diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c +index 808a4723f868..06fcff2ebbba 100644 +--- a/net/rxrpc/conn_event.c ++++ b/net/rxrpc/conn_event.c +@@ -438,16 +438,12 @@ again: + /* + * connection-level event processor + */ +-void rxrpc_process_connection(struct work_struct *work) ++static void rxrpc_do_process_connection(struct rxrpc_connection *conn) + { +- struct rxrpc_connection *conn = +- container_of(work, struct rxrpc_connection, processor); + struct sk_buff *skb; + u32 abort_code = RX_PROTOCOL_ERROR; + int ret; + +- rxrpc_see_connection(conn); +- + if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) + rxrpc_secure_connection(conn); + +@@ -475,18 +471,32 @@ void rxrpc_process_connection(struct work_struct *work) + } + } + +-out: +- rxrpc_put_connection(conn); +- _leave(""); + return; + + requeue_and_leave: + skb_queue_head(&conn->rx_queue, skb); +- goto out; ++ return; + + protocol_error: + if (rxrpc_abort_connection(conn, ret, abort_code) < 0) + goto requeue_and_leave; + rxrpc_free_skb(skb, rxrpc_skb_freed); +- goto out; ++ return; ++} ++ ++void rxrpc_process_connection(struct work_struct *work) ++{ ++ struct rxrpc_connection *conn = ++ container_of(work, struct rxrpc_connection, processor); ++ ++ rxrpc_see_connection(conn); ++ ++ if (__rxrpc_use_local(conn->params.local)) { ++ rxrpc_do_process_connection(conn); ++ rxrpc_unuse_local(conn->params.local); ++ } ++ ++ rxrpc_put_connection(conn); ++ _leave(""); ++ return; + } +diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c +index 38d718e90dc6..19e141eeed17 100644 +--- a/net/rxrpc/conn_object.c ++++ b/net/rxrpc/conn_object.c +@@ -223,9 +223,8 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) + __rxrpc_disconnect_call(conn, call); + spin_unlock(&conn->channel_lock); + +- call->conn = NULL; ++ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + conn->idle_timestamp = jiffies; +- rxrpc_put_connection(conn); + } + + /* +diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c +index 96d54e5bf7bc..ef10fbf71b15 100644 +--- a/net/rxrpc/input.c ++++ b/net/rxrpc/input.c +@@ -599,10 +599,8 @@ ack: + false, true, + rxrpc_propose_ack_input_data); + +- if (seq0 == READ_ONCE(call->rx_hard_ack) + 1) { +- trace_rxrpc_notify_socket(call->debug_id, serial); +- rxrpc_notify_socket(call); +- } ++ trace_rxrpc_notify_socket(call->debug_id, serial); ++ rxrpc_notify_socket(call); + + unlock: + spin_unlock(&call->input_lock); +diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c +index 36587260cabd..a6c1349e965d 100644 +--- a/net/rxrpc/local_object.c ++++ b/net/rxrpc/local_object.c +@@ -364,11 +364,14 @@ void rxrpc_queue_local(struct rxrpc_local *local) + void rxrpc_put_local(struct rxrpc_local *local) + { + const void *here = __builtin_return_address(0); ++ unsigned int debug_id; + int n; + + if (local) { ++ debug_id = local->debug_id; ++ + n = atomic_dec_return(&local->usage); +- trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here); ++ trace_rxrpc_local(debug_id, rxrpc_local_put, n, here); + + if (n == 0) + call_rcu(&local->rcu, rxrpc_local_rcu); +@@ -380,14 +383,11 @@ void rxrpc_put_local(struct rxrpc_local *local) + */ + struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) + { +- unsigned int au; +- + local = rxrpc_get_local_maybe(local); + if (!local) + return NULL; + +- au = atomic_fetch_add_unless(&local->active_users, 1, 0); +- if (au == 0) { ++ if (!__rxrpc_use_local(local)) { + rxrpc_put_local(local); + return NULL; + } +@@ -401,14 +401,11 @@ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) + */ + void rxrpc_unuse_local(struct rxrpc_local *local) + { +- unsigned int au; +- + if (local) { +- au = atomic_dec_return(&local->active_users); +- if (au == 0) ++ if (__rxrpc_unuse_local(local)) { ++ rxrpc_get_local(local); + rxrpc_queue_local(local); +- else +- rxrpc_put_local(local); ++ } + } + } + +@@ -465,7 +462,7 @@ static void rxrpc_local_processor(struct work_struct *work) + + do { + again = false; +- if (atomic_read(&local->active_users) == 0) { ++ if (!__rxrpc_use_local(local)) { + rxrpc_local_destroyer(local); + break; + } +@@ -479,6 +476,8 @@ static void rxrpc_local_processor(struct work_struct *work) + rxrpc_process_local_events(local); + again = true; + } ++ ++ __rxrpc_unuse_local(local); + } while (again); + + rxrpc_put_local(local); +diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c +index 935bb60fff56..bad3d2420344 100644 +--- a/net/rxrpc/output.c ++++ b/net/rxrpc/output.c +@@ -129,7 +129,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, + int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, + rxrpc_serial_t *_serial) + { +- struct rxrpc_connection *conn = NULL; ++ struct rxrpc_connection *conn; + struct rxrpc_ack_buffer *pkt; + struct msghdr msg; + struct kvec iov[2]; +@@ -139,18 +139,14 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, + int ret; + u8 reason; + +- spin_lock_bh(&call->lock); +- if (call->conn) +- conn = rxrpc_get_connection_maybe(call->conn); +- spin_unlock_bh(&call->lock); +- if (!conn) ++ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) + return -ECONNRESET; + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); +- if (!pkt) { +- rxrpc_put_connection(conn); ++ if (!pkt) + return -ENOMEM; +- } ++ ++ conn = call->conn; + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; +@@ -244,7 +240,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, + } + + out: +- rxrpc_put_connection(conn); + kfree(pkt); + return ret; + } +@@ -254,7 +249,7 @@ out: + */ + int rxrpc_send_abort_packet(struct rxrpc_call *call) + { +- struct rxrpc_connection *conn = NULL; ++ struct rxrpc_connection *conn; + struct rxrpc_abort_buffer pkt; + struct msghdr msg; + struct kvec iov[1]; +@@ -271,13 +266,11 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) + test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + return 0; + +- spin_lock_bh(&call->lock); +- if (call->conn) +- conn = rxrpc_get_connection_maybe(call->conn); +- spin_unlock_bh(&call->lock); +- if (!conn) ++ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) + return -ECONNRESET; + ++ conn = call->conn; ++ + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; + msg.msg_control = NULL; +@@ -312,8 +305,6 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) + trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr, + rxrpc_tx_point_call_abort); + rxrpc_tx_backoff(call, ret); +- +- rxrpc_put_connection(conn); + return ret; + } + +diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c +index 48f67a9b1037..923b263c401b 100644 +--- a/net/rxrpc/peer_event.c ++++ b/net/rxrpc/peer_event.c +@@ -364,27 +364,31 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, + if (!rxrpc_get_peer_maybe(peer)) + continue; + +- spin_unlock_bh(&rxnet->peer_hash_lock); +- +- keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; +- slot = keepalive_at - base; +- _debug("%02x peer %u t=%d {%pISp}", +- cursor, peer->debug_id, slot, &peer->srx.transport); ++ if (__rxrpc_use_local(peer->local)) { ++ spin_unlock_bh(&rxnet->peer_hash_lock); ++ ++ keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; ++ slot = keepalive_at - base; ++ _debug("%02x peer %u t=%d {%pISp}", ++ cursor, peer->debug_id, slot, &peer->srx.transport); ++ ++ if (keepalive_at <= base || ++ keepalive_at > base + RXRPC_KEEPALIVE_TIME) { ++ rxrpc_send_keepalive(peer); ++ slot = RXRPC_KEEPALIVE_TIME; ++ } + +- if (keepalive_at <= base || +- keepalive_at > base + RXRPC_KEEPALIVE_TIME) { +- rxrpc_send_keepalive(peer); +- slot = RXRPC_KEEPALIVE_TIME; ++ /* A transmission to this peer occurred since last we ++ * examined it so put it into the appropriate future ++ * bucket. ++ */ ++ slot += cursor; ++ slot &= mask; ++ spin_lock_bh(&rxnet->peer_hash_lock); ++ list_add_tail(&peer->keepalive_link, ++ &rxnet->peer_keepalive[slot & mask]); ++ rxrpc_unuse_local(peer->local); + } +- +- /* A transmission to this peer occurred since last we examined +- * it so put it into the appropriate future bucket. +- */ +- slot += cursor; +- slot &= mask; +- spin_lock_bh(&rxnet->peer_hash_lock); +- list_add_tail(&peer->keepalive_link, +- &rxnet->peer_keepalive[slot & mask]); + rxrpc_put_peer_locked(peer); + } + +diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h +index c22624131949..d36949d9382c 100644 +--- a/net/sched/cls_rsvp.h ++++ b/net/sched/cls_rsvp.h +@@ -463,10 +463,8 @@ static u32 gen_tunnel(struct rsvp_head *data) + + static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { + [TCA_RSVP_CLASSID] = { .type = NLA_U32 }, +- [TCA_RSVP_DST] = { .type = NLA_BINARY, +- .len = RSVP_DST_LEN * sizeof(u32) }, +- [TCA_RSVP_SRC] = { .type = NLA_BINARY, +- .len = RSVP_DST_LEN * sizeof(u32) }, ++ [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) }, ++ [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, + }; + +diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c +index 3d4a1280352f..09b7dc5fe7e0 100644 +--- a/net/sched/cls_tcindex.c ++++ b/net/sched/cls_tcindex.c +@@ -333,12 +333,31 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + cp->fall_through = p->fall_through; + cp->tp = tp; + ++ if (tb[TCA_TCINDEX_HASH]) ++ cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); ++ ++ if (tb[TCA_TCINDEX_MASK]) ++ cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); ++ ++ if (tb[TCA_TCINDEX_SHIFT]) ++ cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); ++ ++ if (!cp->hash) { ++ /* Hash not specified, use perfect hash if the upper limit ++ * of the hashing index is below the threshold. ++ */ ++ if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) ++ cp->hash = (cp->mask >> cp->shift) + 1; ++ else ++ cp->hash = DEFAULT_HASH_SIZE; ++ } ++ + if (p->perfect) { + int i; + + if (tcindex_alloc_perfect_hash(net, cp) < 0) + goto errout; +- for (i = 0; i < cp->hash; i++) ++ for (i = 0; i < min(cp->hash, p->hash); i++) + cp->perfect[i].res = p->perfect[i].res; + balloc = 1; + } +@@ -346,19 +365,10 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + + err = tcindex_filter_result_init(&new_filter_result, net); + if (err < 0) +- goto errout1; ++ goto errout_alloc; + if (old_r) + cr = r->res; + +- if (tb[TCA_TCINDEX_HASH]) +- cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); +- +- if (tb[TCA_TCINDEX_MASK]) +- cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); +- +- if (tb[TCA_TCINDEX_SHIFT]) +- cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); +- + err = -EBUSY; + + /* Hash already allocated, make sure that we still meet the +@@ -376,16 +386,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + if (tb[TCA_TCINDEX_FALL_THROUGH]) + cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); + +- if (!cp->hash) { +- /* Hash not specified, use perfect hash if the upper limit +- * of the hashing index is below the threshold. +- */ +- if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) +- cp->hash = (cp->mask >> cp->shift) + 1; +- else +- cp->hash = DEFAULT_HASH_SIZE; +- } +- + if (!cp->perfect && !cp->h) + cp->alloc_hash = cp->hash; + +@@ -484,7 +484,6 @@ errout_alloc: + tcindex_free_perfect_hash(cp); + else if (balloc == 2) + kfree(cp->h); +-errout1: + tcf_exts_destroy(&new_filter_result.exts); + errout: + kfree(cp); +diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c +index c609373c8661..660fc45ee40f 100644 +--- a/net/sched/sch_taprio.c ++++ b/net/sched/sch_taprio.c +@@ -31,6 +31,7 @@ static DEFINE_SPINLOCK(taprio_list_lock); + + #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) + #define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) ++#define TAPRIO_FLAGS_INVALID U32_MAX + + struct sched_entry { + struct list_head list; +@@ -766,6 +767,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { + [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, + [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, ++ [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, + }; + + static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, +@@ -1367,6 +1369,33 @@ static int taprio_mqprio_cmp(const struct net_device *dev, + return 0; + } + ++/* The semantics of the 'flags' argument in relation to 'change()' ++ * requests, are interpreted following two rules (which are applied in ++ * this order): (1) an omitted 'flags' argument is interpreted as ++ * zero; (2) the 'flags' of a "running" taprio instance cannot be ++ * changed. ++ */ ++static int taprio_new_flags(const struct nlattr *attr, u32 old, ++ struct netlink_ext_ack *extack) ++{ ++ u32 new = 0; ++ ++ if (attr) ++ new = nla_get_u32(attr); ++ ++ if (old != TAPRIO_FLAGS_INVALID && old != new) { ++ NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); ++ return -EOPNOTSUPP; ++ } ++ ++ if (!taprio_flags_valid(new)) { ++ NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); ++ return -EINVAL; ++ } ++ ++ return new; ++} ++ + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) + { +@@ -1375,7 +1404,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_mqprio_qopt *mqprio = NULL; +- u32 taprio_flags = 0; + unsigned long flags; + ktime_t start; + int i, err; +@@ -1388,21 +1416,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, + if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) + mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); + +- if (tb[TCA_TAPRIO_ATTR_FLAGS]) { +- taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]); +- +- if (q->flags != 0 && q->flags != taprio_flags) { +- NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); +- return -EOPNOTSUPP; +- } else if (!taprio_flags_valid(taprio_flags)) { +- NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); +- return -EINVAL; +- } ++ err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS], ++ q->flags, extack); ++ if (err < 0) ++ return err; + +- q->flags = taprio_flags; +- } ++ q->flags = err; + +- err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags); ++ err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags); + if (err < 0) + return err; + +@@ -1444,7 +1465,20 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, + + taprio_set_picos_per_byte(dev, q); + +- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) ++ if (mqprio) { ++ netdev_set_num_tc(dev, mqprio->num_tc); ++ for (i = 0; i < mqprio->num_tc; i++) ++ netdev_set_tc_queue(dev, i, ++ mqprio->count[i], ++ mqprio->offset[i]); ++ ++ /* Always use supplied priority mappings */ ++ for (i = 0; i <= TC_BITMASK; i++) ++ netdev_set_prio_tc_map(dev, i, ++ mqprio->prio_tc_map[i]); ++ } ++ ++ if (FULL_OFFLOAD_IS_ENABLED(q->flags)) + err = taprio_enable_offload(dev, mqprio, q, new_admin, extack); + else + err = taprio_disable_offload(dev, q, extack); +@@ -1464,27 +1498,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, + q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]); + } + +- if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) && +- !FULL_OFFLOAD_IS_ENABLED(taprio_flags) && ++ if (!TXTIME_ASSIST_IS_ENABLED(q->flags) && ++ !FULL_OFFLOAD_IS_ENABLED(q->flags) && + !hrtimer_active(&q->advance_timer)) { + hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); + q->advance_timer.function = advance_sched; + } + +- if (mqprio) { +- netdev_set_num_tc(dev, mqprio->num_tc); +- for (i = 0; i < mqprio->num_tc; i++) +- netdev_set_tc_queue(dev, i, +- mqprio->count[i], +- mqprio->offset[i]); +- +- /* Always use supplied priority mappings */ +- for (i = 0; i <= TC_BITMASK; i++) +- netdev_set_prio_tc_map(dev, i, +- mqprio->prio_tc_map[i]); +- } +- +- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) { ++ if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { + q->dequeue = taprio_dequeue_offload; + q->peek = taprio_peek_offload; + } else { +@@ -1501,9 +1522,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, + goto unlock; + } + +- if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) { +- setup_txtime(q, new_admin, start); ++ setup_txtime(q, new_admin, start); + ++ if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { + if (!oper) { + rcu_assign_pointer(q->oper_sched, new_admin); + err = 0; +@@ -1528,7 +1549,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, + + spin_unlock_irqrestore(&q->current_entry_lock, flags); + +- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) ++ if (FULL_OFFLOAD_IS_ENABLED(q->flags)) + taprio_offload_config_changed(q); + } + +@@ -1567,7 +1588,7 @@ static void taprio_destroy(struct Qdisc *sch) + } + q->qdiscs = NULL; + +- netdev_set_num_tc(dev, 0); ++ netdev_reset_tc(dev); + + if (q->oper_sched) + call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb); +@@ -1597,6 +1618,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, + * and get the valid one on taprio_change(). + */ + q->clockid = -1; ++ q->flags = TAPRIO_FLAGS_INVALID; + + spin_lock(&taprio_list_lock); + list_add(&q->taprio_list, &taprio_list); +diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c +index 908b60a72d95..ed20fa8a6f70 100644 +--- a/net/sunrpc/auth_gss/svcauth_gss.c ++++ b/net/sunrpc/auth_gss/svcauth_gss.c +@@ -1245,6 +1245,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, + dprintk("RPC: No creds found!\n"); + goto out; + } else { ++ struct timespec64 boot; + + /* steal creds */ + rsci.cred = ud->creds; +@@ -1265,6 +1266,9 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, + &expiry, GFP_KERNEL); + if (status) + goto out; ++ ++ getboottime64(&boot); ++ expiry -= boot.tv_sec; + } + + rsci.h.expiry_time = expiry; +diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile +index 42b571cde177..e7ad48c605e0 100644 +--- a/samples/bpf/Makefile ++++ b/samples/bpf/Makefile +@@ -236,7 +236,7 @@ all: + + clean: + $(MAKE) -C ../../ M=$(CURDIR) clean +- @rm -f *~ ++ @find $(CURDIR) -type f -name '*~' -delete + + $(LIBBPF): FORCE + # Fix up variables inherited from Kbuild that tools/ build system won't like +diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c +index 0da6e9e7132e..8b862a7a6c6a 100644 +--- a/samples/bpf/xdp_redirect_cpu_user.c ++++ b/samples/bpf/xdp_redirect_cpu_user.c +@@ -16,6 +16,10 @@ static const char *__doc__ = + #include <getopt.h> + #include <net/if.h> + #include <time.h> ++#include <linux/limits.h> ++ ++#define __must_check ++#include <linux/err.h> + + #include <arpa/inet.h> + #include <linux/if_link.h> +@@ -46,6 +50,10 @@ static int cpus_count_map_fd; + static int cpus_iterator_map_fd; + static int exception_cnt_map_fd; + ++#define NUM_TP 5 ++struct bpf_link *tp_links[NUM_TP] = { 0 }; ++static int tp_cnt = 0; ++ + /* Exit return codes */ + #define EXIT_OK 0 + #define EXIT_FAIL 1 +@@ -88,6 +96,10 @@ static void int_exit(int sig) + printf("program on interface changed, not removing\n"); + } + } ++ /* Detach tracepoints */ ++ while (tp_cnt) ++ bpf_link__destroy(tp_links[--tp_cnt]); ++ + exit(EXIT_OK); + } + +@@ -588,23 +600,61 @@ static void stats_poll(int interval, bool use_separators, char *prog_name, + free_stats_record(prev); + } + ++static struct bpf_link * attach_tp(struct bpf_object *obj, ++ const char *tp_category, ++ const char* tp_name) ++{ ++ struct bpf_program *prog; ++ struct bpf_link *link; ++ char sec_name[PATH_MAX]; ++ int len; ++ ++ len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s", ++ tp_category, tp_name); ++ if (len < 0) ++ exit(EXIT_FAIL); ++ ++ prog = bpf_object__find_program_by_title(obj, sec_name); ++ if (!prog) { ++ fprintf(stderr, "ERR: finding progsec: %s\n", sec_name); ++ exit(EXIT_FAIL_BPF); ++ } ++ ++ link = bpf_program__attach_tracepoint(prog, tp_category, tp_name); ++ if (IS_ERR(link)) ++ exit(EXIT_FAIL_BPF); ++ ++ return link; ++} ++ ++static void init_tracepoints(struct bpf_object *obj) { ++ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err"); ++ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err"); ++ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception"); ++ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue"); ++ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread"); ++} ++ + static int init_map_fds(struct bpf_object *obj) + { +- cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); +- rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); ++ /* Maps updated by tracepoints */ + redirect_err_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt"); ++ exception_cnt_map_fd = ++ bpf_object__find_map_fd_by_name(obj, "exception_cnt"); + cpumap_enqueue_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt"); + cpumap_kthread_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt"); ++ ++ /* Maps used by XDP */ ++ rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); ++ cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); + cpus_available_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpus_available"); + cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count"); + cpus_iterator_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpus_iterator"); +- exception_cnt_map_fd = +- bpf_object__find_map_fd_by_name(obj, "exception_cnt"); + + if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 || + redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 || +@@ -662,6 +712,7 @@ int main(int argc, char **argv) + strerror(errno)); + return EXIT_FAIL; + } ++ init_tracepoints(obj); + if (init_map_fds(obj) < 0) { + fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n"); + return EXIT_FAIL; +diff --git a/scripts/find-unused-docs.sh b/scripts/find-unused-docs.sh +index 3f46f8977dc4..ee6a50e33aba 100755 +--- a/scripts/find-unused-docs.sh ++++ b/scripts/find-unused-docs.sh +@@ -54,7 +54,7 @@ for file in `find $1 -name '*.c'`; do + if [[ ${FILES_INCLUDED[$file]+_} ]]; then + continue; + fi +- str=$(scripts/kernel-doc -text -export "$file" 2>/dev/null) ++ str=$(scripts/kernel-doc -export "$file" 2>/dev/null) + if [[ -n "$str" ]]; then + echo "$file" + fi +diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c +index abeb09c30633..ad22066eba04 100644 +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -2832,42 +2832,39 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap, + int addrlen) + { + int rc = 0; +-#if IS_ENABLED(CONFIG_IPV6) +- struct sockaddr_in6 *sip = (struct sockaddr_in6 *)sap; +-#endif +-#ifdef SMACK_IPV6_SECMARK_LABELING +- struct smack_known *rsp; +- struct socket_smack *ssp; +-#endif + + if (sock->sk == NULL) + return 0; +- ++ if (sock->sk->sk_family != PF_INET && ++ (!IS_ENABLED(CONFIG_IPV6) || sock->sk->sk_family != PF_INET6)) ++ return 0; ++ if (addrlen < offsetofend(struct sockaddr, sa_family)) ++ return 0; ++ if (IS_ENABLED(CONFIG_IPV6) && sap->sa_family == AF_INET6) { ++ struct sockaddr_in6 *sip = (struct sockaddr_in6 *)sap; + #ifdef SMACK_IPV6_SECMARK_LABELING +- ssp = sock->sk->sk_security; ++ struct smack_known *rsp; + #endif + +- switch (sock->sk->sk_family) { +- case PF_INET: +- if (addrlen < sizeof(struct sockaddr_in) || +- sap->sa_family != AF_INET) +- return -EINVAL; +- rc = smack_netlabel_send(sock->sk, (struct sockaddr_in *)sap); +- break; +- case PF_INET6: +- if (addrlen < SIN6_LEN_RFC2133 || sap->sa_family != AF_INET6) +- return -EINVAL; ++ if (addrlen < SIN6_LEN_RFC2133) ++ return 0; + #ifdef SMACK_IPV6_SECMARK_LABELING + rsp = smack_ipv6host_label(sip); +- if (rsp != NULL) ++ if (rsp != NULL) { ++ struct socket_smack *ssp = sock->sk->sk_security; ++ + rc = smk_ipv6_check(ssp->smk_out, rsp, sip, +- SMK_CONNECTING); ++ SMK_CONNECTING); ++ } + #endif + #ifdef SMACK_IPV6_PORT_LABELING + rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING); + #endif +- break; ++ return rc; + } ++ if (sap->sa_family != AF_INET || addrlen < sizeof(struct sockaddr_in)) ++ return 0; ++ rc = smack_netlabel_send(sock->sk, (struct sockaddr_in *)sap); + return rc; + } + +diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c +index aee7c04d49e5..b61ba0321a72 100644 +--- a/sound/drivers/dummy.c ++++ b/sound/drivers/dummy.c +@@ -915,7 +915,7 @@ static void print_formats(struct snd_dummy *dummy, + { + int i; + +- for (i = 0; i < SNDRV_PCM_FORMAT_LAST; i++) { ++ for (i = 0; i <= SNDRV_PCM_FORMAT_LAST; i++) { + if (dummy->pcm_hw.formats & (1ULL << i)) + snd_iprintf(buffer, " %s", snd_pcm_format_name(i)); + } +diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c +index f6cbb831b86a..85beb172d810 100644 +--- a/sound/pci/hda/hda_intel.c ++++ b/sound/pci/hda/hda_intel.c +@@ -2156,6 +2156,8 @@ static struct snd_pci_quirk power_save_blacklist[] = { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1581607 */ + SND_PCI_QUIRK(0x1558, 0x3501, "Clevo W35xSS_370SS", 0), + /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ ++ SND_PCI_QUIRK(0x1558, 0x6504, "Clevo W65_67SB", 0), ++ /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ + SND_PCI_QUIRK(0x1028, 0x0497, "Dell Precision T3600", 0), + /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ + /* Note the P55A-UD3 and Z87-D3HP share the subsys id for the HDA dev */ +@@ -2415,6 +2417,8 @@ static const struct pci_device_id azx_ids[] = { + /* Jasperlake */ + { PCI_DEVICE(0x8086, 0x38c8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, ++ { PCI_DEVICE(0x8086, 0x4dc8), ++ .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* Tigerlake */ + { PCI_DEVICE(0x8086, 0xa0c8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, +diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c +index 8350954b7986..e5191584638a 100644 +--- a/sound/pci/hda/hda_tegra.c ++++ b/sound/pci/hda/hda_tegra.c +@@ -398,6 +398,7 @@ static int hda_tegra_create(struct snd_card *card, + return err; + + chip->bus.needs_damn_long_delay = 1; ++ chip->bus.core.aligned_mmio = 1; + + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); + if (err < 0) { +diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c +index 488c17c9f375..8ac805a634f4 100644 +--- a/sound/pci/hda/patch_hdmi.c ++++ b/sound/pci/hda/patch_hdmi.c +@@ -4153,6 +4153,7 @@ HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi), + HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi), + HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi), + HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi), ++HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi), + HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), + HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi), + HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi), +diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c +index aa1f9637d895..e949b372cead 100644 +--- a/sound/soc/codecs/sgtl5000.c ++++ b/sound/soc/codecs/sgtl5000.c +@@ -1344,7 +1344,8 @@ static int sgtl5000_set_power_regs(struct snd_soc_component *component) + * if vddio == vdda the source of charge pump should be + * assigned manually to VDDIO + */ +- if (vddio == vdda) { ++ if (regulator_is_equal(sgtl5000->supplies[VDDA].consumer, ++ sgtl5000->supplies[VDDIO].consumer)) { + lreg_ctrl |= SGTL5000_VDDC_ASSN_OVRD; + lreg_ctrl |= SGTL5000_VDDC_MAN_ASSN_VDDIO << + SGTL5000_VDDC_MAN_ASSN_SHIFT; +diff --git a/sound/soc/intel/boards/skl_hda_dsp_common.c b/sound/soc/intel/boards/skl_hda_dsp_common.c +index 58409b6e476e..e3d405e57c5f 100644 +--- a/sound/soc/intel/boards/skl_hda_dsp_common.c ++++ b/sound/soc/intel/boards/skl_hda_dsp_common.c +@@ -38,16 +38,19 @@ int skl_hda_hdmi_add_pcm(struct snd_soc_card *card, int device) + return 0; + } + +-SND_SOC_DAILINK_DEFS(idisp1, +- DAILINK_COMP_ARRAY(COMP_CPU("iDisp1 Pin")), ++SND_SOC_DAILINK_DEF(idisp1_cpu, ++ DAILINK_COMP_ARRAY(COMP_CPU("iDisp1 Pin"))); ++SND_SOC_DAILINK_DEF(idisp1_codec, + DAILINK_COMP_ARRAY(COMP_CODEC("ehdaudio0D2", "intel-hdmi-hifi1"))); + +-SND_SOC_DAILINK_DEFS(idisp2, +- DAILINK_COMP_ARRAY(COMP_CPU("iDisp2 Pin")), ++SND_SOC_DAILINK_DEF(idisp2_cpu, ++ DAILINK_COMP_ARRAY(COMP_CPU("iDisp2 Pin"))); ++SND_SOC_DAILINK_DEF(idisp2_codec, + DAILINK_COMP_ARRAY(COMP_CODEC("ehdaudio0D2", "intel-hdmi-hifi2"))); + +-SND_SOC_DAILINK_DEFS(idisp3, +- DAILINK_COMP_ARRAY(COMP_CPU("iDisp3 Pin")), ++SND_SOC_DAILINK_DEF(idisp3_cpu, ++ DAILINK_COMP_ARRAY(COMP_CPU("iDisp3 Pin"))); ++SND_SOC_DAILINK_DEF(idisp3_codec, + DAILINK_COMP_ARRAY(COMP_CODEC("ehdaudio0D2", "intel-hdmi-hifi3"))); + + SND_SOC_DAILINK_DEF(analog_cpu, +@@ -80,21 +83,21 @@ struct snd_soc_dai_link skl_hda_be_dai_links[HDA_DSP_MAX_BE_DAI_LINKS] = { + .id = 1, + .dpcm_playback = 1, + .no_pcm = 1, +- SND_SOC_DAILINK_REG(idisp1), ++ SND_SOC_DAILINK_REG(idisp1_cpu, idisp1_codec, platform), + }, + { + .name = "iDisp2", + .id = 2, + .dpcm_playback = 1, + .no_pcm = 1, +- SND_SOC_DAILINK_REG(idisp2), ++ SND_SOC_DAILINK_REG(idisp2_cpu, idisp2_codec, platform), + }, + { + .name = "iDisp3", + .id = 3, + .dpcm_playback = 1, + .no_pcm = 1, +- SND_SOC_DAILINK_REG(idisp3), ++ SND_SOC_DAILINK_REG(idisp3_cpu, idisp3_codec, platform), + }, + { + .name = "Analog Playback and Capture", +diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c +index 5a3749938900..d286dff3171d 100644 +--- a/sound/soc/meson/axg-fifo.c ++++ b/sound/soc/meson/axg-fifo.c +@@ -108,10 +108,12 @@ static int axg_fifo_pcm_hw_params(struct snd_pcm_substream *ss, + { + struct snd_pcm_runtime *runtime = ss->runtime; + struct axg_fifo *fifo = axg_fifo_data(ss); ++ unsigned int burst_num, period, threshold; + dma_addr_t end_ptr; +- unsigned int burst_num; + int ret; + ++ period = params_period_bytes(params); ++ + ret = snd_pcm_lib_malloc_pages(ss, params_buffer_bytes(params)); + if (ret < 0) + return ret; +@@ -122,9 +124,25 @@ static int axg_fifo_pcm_hw_params(struct snd_pcm_substream *ss, + regmap_write(fifo->map, FIFO_FINISH_ADDR, end_ptr); + + /* Setup interrupt periodicity */ +- burst_num = params_period_bytes(params) / AXG_FIFO_BURST; ++ burst_num = period / AXG_FIFO_BURST; + regmap_write(fifo->map, FIFO_INT_ADDR, burst_num); + ++ /* ++ * Start the fifo request on the smallest of the following: ++ * - Half the fifo size ++ * - Half the period size ++ */ ++ threshold = min(period / 2, ++ (unsigned int)AXG_FIFO_MIN_DEPTH / 2); ++ ++ /* ++ * With the threshold in bytes, register value is: ++ * V = (threshold / burst) - 1 ++ */ ++ threshold /= AXG_FIFO_BURST; ++ regmap_field_write(fifo->field_threshold, ++ threshold ? threshold - 1 : 0); ++ + /* Enable block count irq */ + regmap_update_bits(fifo->map, FIFO_CTRL0, + CTRL0_INT_EN(FIFO_INT_COUNT_REPEAT), +@@ -360,6 +378,11 @@ int axg_fifo_probe(struct platform_device *pdev) + return fifo->irq; + } + ++ fifo->field_threshold = ++ devm_regmap_field_alloc(dev, fifo->map, data->field_threshold); ++ if (IS_ERR(fifo->field_threshold)) ++ return PTR_ERR(fifo->field_threshold); ++ + return devm_snd_soc_register_component(dev, data->component_drv, + data->dai_drv, 1); + } +diff --git a/sound/soc/meson/axg-fifo.h b/sound/soc/meson/axg-fifo.h +index bb1e2ce50256..ab546a3cf940 100644 +--- a/sound/soc/meson/axg-fifo.h ++++ b/sound/soc/meson/axg-fifo.h +@@ -9,7 +9,9 @@ + + struct clk; + struct platform_device; ++struct reg_field; + struct regmap; ++struct regmap_field; + struct reset_control; + + struct snd_soc_component_driver; +@@ -50,8 +52,6 @@ struct snd_soc_pcm_runtime; + #define CTRL1_STATUS2_SEL_MASK GENMASK(11, 8) + #define CTRL1_STATUS2_SEL(x) ((x) << 8) + #define STATUS2_SEL_DDR_READ 0 +-#define CTRL1_THRESHOLD_MASK GENMASK(23, 16) +-#define CTRL1_THRESHOLD(x) ((x) << 16) + #define CTRL1_FRDDR_DEPTH_MASK GENMASK(31, 24) + #define CTRL1_FRDDR_DEPTH(x) ((x) << 24) + #define FIFO_START_ADDR 0x08 +@@ -67,12 +67,14 @@ struct axg_fifo { + struct regmap *map; + struct clk *pclk; + struct reset_control *arb; ++ struct regmap_field *field_threshold; + int irq; + }; + + struct axg_fifo_match_data { + const struct snd_soc_component_driver *component_drv; + struct snd_soc_dai_driver *dai_drv; ++ struct reg_field field_threshold; + }; + + extern const struct snd_pcm_ops axg_fifo_pcm_ops; +diff --git a/sound/soc/meson/axg-frddr.c b/sound/soc/meson/axg-frddr.c +index 6ab111c31b28..09773a9ae964 100644 +--- a/sound/soc/meson/axg-frddr.c ++++ b/sound/soc/meson/axg-frddr.c +@@ -50,7 +50,7 @@ static int axg_frddr_dai_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) + { + struct axg_fifo *fifo = snd_soc_dai_get_drvdata(dai); +- unsigned int fifo_depth, fifo_threshold; ++ unsigned int fifo_depth; + int ret; + + /* Enable pclk to access registers and clock the fifo ip */ +@@ -68,11 +68,8 @@ static int axg_frddr_dai_startup(struct snd_pcm_substream *substream, + * Depth and threshold are zero based. + */ + fifo_depth = AXG_FIFO_MIN_CNT - 1; +- fifo_threshold = (AXG_FIFO_MIN_CNT / 2) - 1; +- regmap_update_bits(fifo->map, FIFO_CTRL1, +- CTRL1_FRDDR_DEPTH_MASK | CTRL1_THRESHOLD_MASK, +- CTRL1_FRDDR_DEPTH(fifo_depth) | +- CTRL1_THRESHOLD(fifo_threshold)); ++ regmap_update_bits(fifo->map, FIFO_CTRL1, CTRL1_FRDDR_DEPTH_MASK, ++ CTRL1_FRDDR_DEPTH(fifo_depth)); + + return 0; + } +@@ -153,8 +150,9 @@ static const struct snd_soc_component_driver axg_frddr_component_drv = { + }; + + static const struct axg_fifo_match_data axg_frddr_match_data = { +- .component_drv = &axg_frddr_component_drv, +- .dai_drv = &axg_frddr_dai_drv ++ .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), ++ .component_drv = &axg_frddr_component_drv, ++ .dai_drv = &axg_frddr_dai_drv + }; + + static const struct snd_soc_dai_ops g12a_frddr_ops = { +@@ -271,8 +269,9 @@ static const struct snd_soc_component_driver g12a_frddr_component_drv = { + }; + + static const struct axg_fifo_match_data g12a_frddr_match_data = { +- .component_drv = &g12a_frddr_component_drv, +- .dai_drv = &g12a_frddr_dai_drv ++ .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), ++ .component_drv = &g12a_frddr_component_drv, ++ .dai_drv = &g12a_frddr_dai_drv + }; + + /* On SM1, the output selection in on CTRL2 */ +@@ -335,8 +334,9 @@ static const struct snd_soc_component_driver sm1_frddr_component_drv = { + }; + + static const struct axg_fifo_match_data sm1_frddr_match_data = { +- .component_drv = &sm1_frddr_component_drv, +- .dai_drv = &g12a_frddr_dai_drv ++ .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), ++ .component_drv = &sm1_frddr_component_drv, ++ .dai_drv = &g12a_frddr_dai_drv + }; + + static const struct of_device_id axg_frddr_of_match[] = { +diff --git a/sound/soc/meson/axg-toddr.c b/sound/soc/meson/axg-toddr.c +index c8ea2145f576..ecf41c7549a6 100644 +--- a/sound/soc/meson/axg-toddr.c ++++ b/sound/soc/meson/axg-toddr.c +@@ -89,7 +89,6 @@ static int axg_toddr_dai_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) + { + struct axg_fifo *fifo = snd_soc_dai_get_drvdata(dai); +- unsigned int fifo_threshold; + int ret; + + /* Enable pclk to access registers and clock the fifo ip */ +@@ -107,11 +106,6 @@ static int axg_toddr_dai_startup(struct snd_pcm_substream *substream, + /* Apply single buffer mode to the interface */ + regmap_update_bits(fifo->map, FIFO_CTRL0, CTRL0_TODDR_PP_MODE, 0); + +- /* TODDR does not have a configurable fifo depth */ +- fifo_threshold = AXG_FIFO_MIN_CNT - 1; +- regmap_update_bits(fifo->map, FIFO_CTRL1, CTRL1_THRESHOLD_MASK, +- CTRL1_THRESHOLD(fifo_threshold)); +- + return 0; + } + +@@ -185,8 +179,9 @@ static const struct snd_soc_component_driver axg_toddr_component_drv = { + }; + + static const struct axg_fifo_match_data axg_toddr_match_data = { +- .component_drv = &axg_toddr_component_drv, +- .dai_drv = &axg_toddr_dai_drv ++ .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), ++ .component_drv = &axg_toddr_component_drv, ++ .dai_drv = &axg_toddr_dai_drv + }; + + static const struct snd_soc_dai_ops g12a_toddr_ops = { +@@ -218,8 +213,9 @@ static const struct snd_soc_component_driver g12a_toddr_component_drv = { + }; + + static const struct axg_fifo_match_data g12a_toddr_match_data = { +- .component_drv = &g12a_toddr_component_drv, +- .dai_drv = &g12a_toddr_dai_drv ++ .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), ++ .component_drv = &g12a_toddr_component_drv, ++ .dai_drv = &g12a_toddr_dai_drv + }; + + static const char * const sm1_toddr_sel_texts[] = { +@@ -282,8 +278,9 @@ static const struct snd_soc_component_driver sm1_toddr_component_drv = { + }; + + static const struct axg_fifo_match_data sm1_toddr_match_data = { +- .component_drv = &sm1_toddr_component_drv, +- .dai_drv = &g12a_toddr_dai_drv ++ .field_threshold = REG_FIELD(FIFO_CTRL1, 12, 23), ++ .component_drv = &sm1_toddr_component_drv, ++ .dai_drv = &g12a_toddr_dai_drv + }; + + static const struct of_device_id axg_toddr_of_match[] = { +diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c +index 81f28f7ff1a0..12aec140819a 100644 +--- a/sound/soc/sof/core.c ++++ b/sound/soc/sof/core.c +@@ -288,6 +288,46 @@ static int sof_machine_check(struct snd_sof_dev *sdev) + #endif + } + ++/* ++ * FW Boot State Transition Diagram ++ * ++ * +-----------------------------------------------------------------------+ ++ * | | ++ * ------------------ ------------------ | ++ * | | | | | ++ * | BOOT_FAILED | | READY_FAILED |-------------------------+ | ++ * | | | | | | ++ * ------------------ ------------------ | | ++ * ^ ^ | | ++ * | | | | ++ * (FW Boot Timeout) (FW_READY FAIL) | | ++ * | | | | ++ * | | | | ++ * ------------------ | ------------------ | | ++ * | | | | | | | ++ * | IN_PROGRESS |---------------+------------->| COMPLETE | | | ++ * | | (FW Boot OK) (FW_READY OK) | | | | ++ * ------------------ ------------------ | | ++ * ^ | | | ++ * | | | | ++ * (FW Loading OK) (System Suspend/Runtime Suspend) ++ * | | | | ++ * | | | | ++ * ------------------ ------------------ | | | ++ * | | | |<-----+ | | ++ * | PREPARE | | NOT_STARTED |<---------------------+ | ++ * | | | |<---------------------------+ ++ * ------------------ ------------------ ++ * | ^ | ^ ++ * | | | | ++ * | +-----------------------+ | ++ * | (DSP Probe OK) | ++ * | | ++ * | | ++ * +------------------------------------+ ++ * (System Suspend/Runtime Suspend) ++ */ ++ + static int sof_probe_continue(struct snd_sof_dev *sdev) + { + struct snd_sof_pdata *plat_data = sdev->pdata; +@@ -303,6 +343,8 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) + return ret; + } + ++ sdev->fw_state = SOF_FW_BOOT_PREPARE; ++ + /* check machine info */ + ret = sof_machine_check(sdev); + if (ret < 0) { +@@ -342,7 +384,12 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) + goto fw_load_err; + } + +- /* boot the firmware */ ++ sdev->fw_state = SOF_FW_BOOT_IN_PROGRESS; ++ ++ /* ++ * Boot the firmware. The FW boot status will be modified ++ * in snd_sof_run_firmware() depending on the outcome. ++ */ + ret = snd_sof_run_firmware(sdev); + if (ret < 0) { + dev_err(sdev->dev, "error: failed to boot DSP firmware %d\n", +@@ -368,7 +415,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) + if (ret < 0) { + dev_err(sdev->dev, + "error: failed to register DSP DAI driver %d\n", ret); +- goto fw_run_err; ++ goto fw_trace_err; + } + + drv_name = plat_data->machine->drv_name; +@@ -382,7 +429,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) + + if (IS_ERR(plat_data->pdev_mach)) { + ret = PTR_ERR(plat_data->pdev_mach); +- goto fw_run_err; ++ goto fw_trace_err; + } + + dev_dbg(sdev->dev, "created machine %s\n", +@@ -393,7 +440,8 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) + + return 0; + +-#if !IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE) ++fw_trace_err: ++ snd_sof_free_trace(sdev); + fw_run_err: + snd_sof_fw_unload(sdev); + fw_load_err: +@@ -402,21 +450,10 @@ ipc_err: + snd_sof_free_debug(sdev); + dbg_err: + snd_sof_remove(sdev); +-#else +- +- /* +- * when the probe_continue is handled in a work queue, the +- * probe does not fail so we don't release resources here. +- * They will be released with an explicit call to +- * snd_sof_device_remove() when the PCI/ACPI device is removed +- */ + +-fw_run_err: +-fw_load_err: +-ipc_err: +-dbg_err: +- +-#endif ++ /* all resources freed, update state to match */ ++ sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; ++ sdev->first_boot = true; + + return ret; + } +@@ -447,6 +484,7 @@ int snd_sof_device_probe(struct device *dev, struct snd_sof_pdata *plat_data) + + sdev->pdata = plat_data; + sdev->first_boot = true; ++ sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; + dev_set_drvdata(dev, sdev); + + /* check all mandatory ops */ +@@ -494,10 +532,12 @@ int snd_sof_device_remove(struct device *dev) + if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)) + cancel_work_sync(&sdev->probe_work); + +- snd_sof_fw_unload(sdev); +- snd_sof_ipc_free(sdev); +- snd_sof_free_debug(sdev); +- snd_sof_free_trace(sdev); ++ if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) { ++ snd_sof_fw_unload(sdev); ++ snd_sof_ipc_free(sdev); ++ snd_sof_free_debug(sdev); ++ snd_sof_free_trace(sdev); ++ } + + /* + * Unregister machine driver. This will unbind the snd_card which +@@ -513,7 +553,8 @@ int snd_sof_device_remove(struct device *dev) + * scheduled on, when they are unloaded. Therefore, the DSP must be + * removed only after the topology has been unloaded. + */ +- snd_sof_remove(sdev); ++ if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) ++ snd_sof_remove(sdev); + + /* release firmware */ + release_firmware(pdata->fw); +diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c +index 65c2af3fcaab..356bb134ae93 100644 +--- a/sound/soc/sof/intel/hda-loader.c ++++ b/sound/soc/sof/intel/hda-loader.c +@@ -278,7 +278,6 @@ int hda_dsp_cl_boot_firmware(struct snd_sof_dev *sdev) + + /* init for booting wait */ + init_waitqueue_head(&sdev->boot_wait); +- sdev->boot_complete = false; + + /* prepare DMA for code loader stream */ + tag = cl_stream_prepare(sdev, 0x40, stripped_firmware.size, +diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c +index 5a5163eef2ef..3c4b604412f0 100644 +--- a/sound/soc/sof/intel/hda.c ++++ b/sound/soc/sof/intel/hda.c +@@ -166,7 +166,7 @@ void hda_dsp_dump_skl(struct snd_sof_dev *sdev, u32 flags) + panic = snd_sof_dsp_read(sdev, HDA_DSP_BAR, + HDA_ADSP_ERROR_CODE_SKL + 0x4); + +- if (sdev->boot_complete) { ++ if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) { + hda_dsp_get_registers(sdev, &xoops, &panic_info, stack, + HDA_DSP_STACK_DUMP_SIZE); + snd_sof_get_status(sdev, status, panic, &xoops, &panic_info, +@@ -193,7 +193,7 @@ void hda_dsp_dump(struct snd_sof_dev *sdev, u32 flags) + HDA_DSP_SRAM_REG_FW_STATUS); + panic = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_DSP_SRAM_REG_FW_TRACEP); + +- if (sdev->boot_complete) { ++ if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) { + hda_dsp_get_registers(sdev, &xoops, &panic_info, stack, + HDA_DSP_STACK_DUMP_SIZE); + snd_sof_get_status(sdev, status, panic, &xoops, &panic_info, +diff --git a/sound/soc/sof/ipc.c b/sound/soc/sof/ipc.c +index 7b6d69783e16..8984d965037d 100644 +--- a/sound/soc/sof/ipc.c ++++ b/sound/soc/sof/ipc.c +@@ -348,19 +348,12 @@ void snd_sof_ipc_msgs_rx(struct snd_sof_dev *sdev) + break; + case SOF_IPC_FW_READY: + /* check for FW boot completion */ +- if (!sdev->boot_complete) { ++ if (sdev->fw_state == SOF_FW_BOOT_IN_PROGRESS) { + err = sof_ops(sdev)->fw_ready(sdev, cmd); +- if (err < 0) { +- /* +- * this indicates a mismatch in ABI +- * between the driver and fw +- */ +- dev_err(sdev->dev, "error: ABI mismatch %d\n", +- err); +- } else { +- /* firmware boot completed OK */ +- sdev->boot_complete = true; +- } ++ if (err < 0) ++ sdev->fw_state = SOF_FW_BOOT_READY_FAILED; ++ else ++ sdev->fw_state = SOF_FW_BOOT_COMPLETE; + + /* wake up firmware loader */ + wake_up(&sdev->boot_wait); +diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c +index a041adf0669d..ce114df5e4fc 100644 +--- a/sound/soc/sof/loader.c ++++ b/sound/soc/sof/loader.c +@@ -511,7 +511,6 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev) + int init_core_mask; + + init_waitqueue_head(&sdev->boot_wait); +- sdev->boot_complete = false; + + /* create read-only fw_version debugfs to store boot version info */ + if (sdev->first_boot) { +@@ -543,19 +542,27 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev) + + init_core_mask = ret; + +- /* now wait for the DSP to boot */ +- ret = wait_event_timeout(sdev->boot_wait, sdev->boot_complete, ++ /* ++ * now wait for the DSP to boot. There are 3 possible outcomes: ++ * 1. Boot wait times out indicating FW boot failure. ++ * 2. FW boots successfully and fw_ready op succeeds. ++ * 3. FW boots but fw_ready op fails. ++ */ ++ ret = wait_event_timeout(sdev->boot_wait, ++ sdev->fw_state > SOF_FW_BOOT_IN_PROGRESS, + msecs_to_jiffies(sdev->boot_timeout)); + if (ret == 0) { + dev_err(sdev->dev, "error: firmware boot failure\n"); + snd_sof_dsp_dbg_dump(sdev, SOF_DBG_REGS | SOF_DBG_MBOX | + SOF_DBG_TEXT | SOF_DBG_PCI); +- /* after this point FW_READY msg should be ignored */ +- sdev->boot_complete = true; ++ sdev->fw_state = SOF_FW_BOOT_FAILED; + return -EIO; + } + +- dev_info(sdev->dev, "firmware boot complete\n"); ++ if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) ++ dev_info(sdev->dev, "firmware boot complete\n"); ++ else ++ return -EIO; /* FW boots but fw_ready op failed */ + + /* perform post fw run operations */ + ret = snd_sof_dsp_post_fw_run(sdev); +diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c +index e23beaeefe00..195af259e78e 100644 +--- a/sound/soc/sof/pm.c ++++ b/sound/soc/sof/pm.c +@@ -269,6 +269,10 @@ static int sof_resume(struct device *dev, bool runtime_resume) + if (!sof_ops(sdev)->resume || !sof_ops(sdev)->runtime_resume) + return 0; + ++ /* DSP was never successfully started, nothing to resume */ ++ if (sdev->first_boot) ++ return 0; ++ + /* + * if the runtime_resume flag is set, call the runtime_resume routine + * or else call the system resume routine +@@ -283,6 +287,8 @@ static int sof_resume(struct device *dev, bool runtime_resume) + return ret; + } + ++ sdev->fw_state = SOF_FW_BOOT_PREPARE; ++ + /* load the firmware */ + ret = snd_sof_load_firmware(sdev); + if (ret < 0) { +@@ -292,7 +298,12 @@ static int sof_resume(struct device *dev, bool runtime_resume) + return ret; + } + +- /* boot the firmware */ ++ sdev->fw_state = SOF_FW_BOOT_IN_PROGRESS; ++ ++ /* ++ * Boot the firmware. The FW boot status will be modified ++ * in snd_sof_run_firmware() depending on the outcome. ++ */ + ret = snd_sof_run_firmware(sdev); + if (ret < 0) { + dev_err(sdev->dev, +@@ -338,6 +349,9 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) + if (!sof_ops(sdev)->suspend) + return 0; + ++ if (sdev->fw_state != SOF_FW_BOOT_COMPLETE) ++ goto power_down; ++ + /* release trace */ + snd_sof_release_trace(sdev); + +@@ -375,6 +389,12 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) + ret); + } + ++power_down: ++ ++ /* return if the DSP was not probed successfully */ ++ if (sdev->fw_state == SOF_FW_BOOT_NOT_STARTED) ++ return 0; ++ + /* power down all DSP cores */ + if (runtime_suspend) + ret = snd_sof_dsp_runtime_suspend(sdev); +@@ -385,6 +405,9 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) + "error: failed to power down DSP during suspend %d\n", + ret); + ++ /* reset FW state */ ++ sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; ++ + return ret; + } + +diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h +index 730f3259dd02..7b329bd99674 100644 +--- a/sound/soc/sof/sof-priv.h ++++ b/sound/soc/sof/sof-priv.h +@@ -356,6 +356,15 @@ struct snd_sof_dai { + struct list_head list; /* list in sdev dai list */ + }; + ++enum snd_sof_fw_state { ++ SOF_FW_BOOT_NOT_STARTED = 0, ++ SOF_FW_BOOT_PREPARE, ++ SOF_FW_BOOT_IN_PROGRESS, ++ SOF_FW_BOOT_FAILED, ++ SOF_FW_BOOT_READY_FAILED, /* firmware booted but fw_ready op failed */ ++ SOF_FW_BOOT_COMPLETE, ++}; ++ + /* + * SOF Device Level. + */ +@@ -372,7 +381,7 @@ struct snd_sof_dev { + + /* DSP firmware boot */ + wait_queue_head_t boot_wait; +- u32 boot_complete; ++ enum snd_sof_fw_state fw_state; + u32 first_boot; + + /* work queue in case the probe is implemented in two steps */ +diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c +index 94b903d95afa..74c00c905d24 100644 +--- a/sound/usb/mixer_scarlett_gen2.c ++++ b/sound/usb/mixer_scarlett_gen2.c +@@ -558,11 +558,11 @@ static const struct scarlett2_config + + /* proprietary request/response format */ + struct scarlett2_usb_packet { +- u32 cmd; +- u16 size; +- u16 seq; +- u32 error; +- u32 pad; ++ __le32 cmd; ++ __le16 size; ++ __le16 seq; ++ __le32 error; ++ __le32 pad; + u8 data[]; + }; + +@@ -664,11 +664,11 @@ static int scarlett2_usb( + "Scarlett Gen 2 USB invalid response; " + "cmd tx/rx %d/%d seq %d/%d size %d/%d " + "error %d pad %d\n", +- le16_to_cpu(req->cmd), le16_to_cpu(resp->cmd), ++ le32_to_cpu(req->cmd), le32_to_cpu(resp->cmd), + le16_to_cpu(req->seq), le16_to_cpu(resp->seq), + resp_size, le16_to_cpu(resp->size), +- le16_to_cpu(resp->error), +- le16_to_cpu(resp->pad)); ++ le32_to_cpu(resp->error), ++ le32_to_cpu(resp->pad)); + err = -EINVAL; + goto unlock; + } +@@ -687,7 +687,7 @@ error: + /* Send SCARLETT2_USB_DATA_CMD SCARLETT2_USB_CONFIG_SAVE */ + static void scarlett2_config_save(struct usb_mixer_interface *mixer) + { +- u32 req = cpu_to_le32(SCARLETT2_USB_CONFIG_SAVE); ++ __le32 req = cpu_to_le32(SCARLETT2_USB_CONFIG_SAVE); + + scarlett2_usb(mixer, SCARLETT2_USB_DATA_CMD, + &req, sizeof(u32), +@@ -713,11 +713,11 @@ static int scarlett2_usb_set_config( + const struct scarlett2_config config_item = + scarlett2_config_items[config_item_num]; + struct { +- u32 offset; +- u32 bytes; +- s32 value; ++ __le32 offset; ++ __le32 bytes; ++ __le32 value; + } __packed req; +- u32 req2; ++ __le32 req2; + int err; + struct scarlett2_mixer_data *private = mixer->private_data; + +@@ -753,8 +753,8 @@ static int scarlett2_usb_get( + int offset, void *buf, int size) + { + struct { +- u32 offset; +- u32 size; ++ __le32 offset; ++ __le32 size; + } __packed req; + + req.offset = cpu_to_le32(offset); +@@ -794,8 +794,8 @@ static int scarlett2_usb_set_mix(struct usb_mixer_interface *mixer, + const struct scarlett2_device_info *info = private->info; + + struct { +- u16 mix_num; +- u16 data[SCARLETT2_INPUT_MIX_MAX]; ++ __le16 mix_num; ++ __le16 data[SCARLETT2_INPUT_MIX_MAX]; + } __packed req; + + int i, j; +@@ -850,9 +850,9 @@ static int scarlett2_usb_set_mux(struct usb_mixer_interface *mixer) + }; + + struct { +- u16 pad; +- u16 num; +- u32 data[SCARLETT2_MUX_MAX]; ++ __le16 pad; ++ __le16 num; ++ __le32 data[SCARLETT2_MUX_MAX]; + } __packed req; + + req.pad = 0; +@@ -911,9 +911,9 @@ static int scarlett2_usb_get_meter_levels(struct usb_mixer_interface *mixer, + u16 *levels) + { + struct { +- u16 pad; +- u16 num_meters; +- u32 magic; ++ __le16 pad; ++ __le16 num_meters; ++ __le32 magic; + } __packed req; + u32 resp[SCARLETT2_NUM_METERS]; + int i, err; +diff --git a/sound/usb/validate.c b/sound/usb/validate.c +index 389e8657434a..5a3c4f7882b0 100644 +--- a/sound/usb/validate.c ++++ b/sound/usb/validate.c +@@ -110,7 +110,7 @@ static bool validate_processing_unit(const void *p, + default: + if (v->type == UAC1_EXTENSION_UNIT) + return true; /* OK */ +- switch (d->wProcessType) { ++ switch (le16_to_cpu(d->wProcessType)) { + case UAC_PROCESS_UP_DOWNMIX: + case UAC_PROCESS_DOLBY_PROLOGIC: + if (d->bLength < len + 1) /* bNrModes */ +@@ -125,7 +125,7 @@ static bool validate_processing_unit(const void *p, + case UAC_VERSION_2: + if (v->type == UAC2_EXTENSION_UNIT_V2) + return true; /* OK */ +- switch (d->wProcessType) { ++ switch (le16_to_cpu(d->wProcessType)) { + case UAC2_PROCESS_UP_DOWNMIX: + case UAC2_PROCESS_DOLBY_PROLOCIC: /* SiC! */ + if (d->bLength < len + 1) /* bNrModes */ +@@ -142,7 +142,7 @@ static bool validate_processing_unit(const void *p, + len += 2; /* wClusterDescrID */ + break; + } +- switch (d->wProcessType) { ++ switch (le16_to_cpu(d->wProcessType)) { + case UAC3_PROCESS_UP_DOWNMIX: + if (d->bLength < len + 1) /* bNrModes */ + return false; +diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat +index ad1b9e646c49..4cf93110c259 100755 +--- a/tools/kvm/kvm_stat/kvm_stat ++++ b/tools/kvm/kvm_stat/kvm_stat +@@ -270,6 +270,7 @@ class ArchX86(Arch): + def __init__(self, exit_reasons): + self.sc_perf_evt_open = 298 + self.ioctl_numbers = IOCTL_NUMBERS ++ self.exit_reason_field = 'exit_reason' + self.exit_reasons = exit_reasons + + def debugfs_is_child(self, field): +@@ -289,6 +290,7 @@ class ArchPPC(Arch): + # numbers depend on the wordsize. + char_ptr_size = ctypes.sizeof(ctypes.c_char_p) + self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 ++ self.exit_reason_field = 'exit_nr' + self.exit_reasons = {} + + def debugfs_is_child(self, field): +@@ -300,6 +302,7 @@ class ArchA64(Arch): + def __init__(self): + self.sc_perf_evt_open = 241 + self.ioctl_numbers = IOCTL_NUMBERS ++ self.exit_reason_field = 'esr_ec' + self.exit_reasons = AARCH64_EXIT_REASONS + + def debugfs_is_child(self, field): +@@ -311,6 +314,7 @@ class ArchS390(Arch): + def __init__(self): + self.sc_perf_evt_open = 331 + self.ioctl_numbers = IOCTL_NUMBERS ++ self.exit_reason_field = None + self.exit_reasons = None + + def debugfs_is_child(self, field): +@@ -541,8 +545,8 @@ class TracepointProvider(Provider): + """ + filters = {} + filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) +- if ARCH.exit_reasons: +- filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) ++ if ARCH.exit_reason_field and ARCH.exit_reasons: ++ filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons) + return filters + + def _get_available_fields(self): +diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c +index d98838c5820c..b6403712c2f4 100644 +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -2541,7 +2541,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, + if (strncmp(local_name, targ_name, local_essent_len) == 0) { + pr_debug("[%d] %s: found candidate [%d] %s\n", + local_type_id, local_name, i, targ_name); +- new_ids = realloc(cand_ids->data, cand_ids->len + 1); ++ new_ids = reallocarray(cand_ids->data, ++ cand_ids->len + 1, ++ sizeof(*cand_ids->data)); + if (!new_ids) { + err = -ENOMEM; + goto err_out; +diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh +index 0a832e265a50..c3ae1e8ae119 100755 +--- a/tools/objtool/sync-check.sh ++++ b/tools/objtool/sync-check.sh +@@ -47,5 +47,3 @@ check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[ + check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"' + check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"' + check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]"' +- +-cd - +diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c +index 2f55d4d23446..6e04304560ca 100644 +--- a/tools/power/cpupower/lib/cpufreq.c ++++ b/tools/power/cpupower/lib/cpufreq.c +@@ -332,21 +332,74 @@ void cpufreq_put_available_governors(struct cpufreq_available_governors *any) + } + + +-struct cpufreq_frequencies +-*cpufreq_get_frequencies(const char *type, unsigned int cpu) ++struct cpufreq_available_frequencies ++*cpufreq_get_available_frequencies(unsigned int cpu) + { +- struct cpufreq_frequencies *first = NULL; +- struct cpufreq_frequencies *current = NULL; ++ struct cpufreq_available_frequencies *first = NULL; ++ struct cpufreq_available_frequencies *current = NULL; + char one_value[SYSFS_PATH_MAX]; + char linebuf[MAX_LINE_LEN]; +- char fname[MAX_LINE_LEN]; + unsigned int pos, i; + unsigned int len; + +- snprintf(fname, MAX_LINE_LEN, "scaling_%s_frequencies", type); ++ len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies", ++ linebuf, sizeof(linebuf)); ++ if (len == 0) ++ return NULL; + +- len = sysfs_cpufreq_read_file(cpu, fname, +- linebuf, sizeof(linebuf)); ++ pos = 0; ++ for (i = 0; i < len; i++) { ++ if (linebuf[i] == ' ' || linebuf[i] == '\n') { ++ if (i - pos < 2) ++ continue; ++ if (i - pos >= SYSFS_PATH_MAX) ++ goto error_out; ++ if (current) { ++ current->next = malloc(sizeof(*current)); ++ if (!current->next) ++ goto error_out; ++ current = current->next; ++ } else { ++ first = malloc(sizeof(*first)); ++ if (!first) ++ goto error_out; ++ current = first; ++ } ++ current->first = first; ++ current->next = NULL; ++ ++ memcpy(one_value, linebuf + pos, i - pos); ++ one_value[i - pos] = '\0'; ++ if (sscanf(one_value, "%lu", ¤t->frequency) != 1) ++ goto error_out; ++ ++ pos = i + 1; ++ } ++ } ++ ++ return first; ++ ++ error_out: ++ while (first) { ++ current = first->next; ++ free(first); ++ first = current; ++ } ++ return NULL; ++} ++ ++struct cpufreq_available_frequencies ++*cpufreq_get_boost_frequencies(unsigned int cpu) ++{ ++ struct cpufreq_available_frequencies *first = NULL; ++ struct cpufreq_available_frequencies *current = NULL; ++ char one_value[SYSFS_PATH_MAX]; ++ char linebuf[MAX_LINE_LEN]; ++ unsigned int pos, i; ++ unsigned int len; ++ ++ len = sysfs_cpufreq_read_file(cpu, "scaling_boost_frequencies", ++ linebuf, sizeof(linebuf)); + if (len == 0) + return NULL; + +@@ -391,9 +444,9 @@ struct cpufreq_frequencies + return NULL; + } + +-void cpufreq_put_frequencies(struct cpufreq_frequencies *any) ++void cpufreq_put_available_frequencies(struct cpufreq_available_frequencies *any) + { +- struct cpufreq_frequencies *tmp, *next; ++ struct cpufreq_available_frequencies *tmp, *next; + + if (!any) + return; +@@ -406,6 +459,11 @@ void cpufreq_put_frequencies(struct cpufreq_frequencies *any) + } + } + ++void cpufreq_put_boost_frequencies(struct cpufreq_available_frequencies *any) ++{ ++ cpufreq_put_available_frequencies(any); ++} ++ + static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu, + const char *file) + { +diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h +index a55f0d19215b..95f4fd9e2656 100644 +--- a/tools/power/cpupower/lib/cpufreq.h ++++ b/tools/power/cpupower/lib/cpufreq.h +@@ -20,10 +20,10 @@ struct cpufreq_available_governors { + struct cpufreq_available_governors *first; + }; + +-struct cpufreq_frequencies { ++struct cpufreq_available_frequencies { + unsigned long frequency; +- struct cpufreq_frequencies *next; +- struct cpufreq_frequencies *first; ++ struct cpufreq_available_frequencies *next; ++ struct cpufreq_available_frequencies *first; + }; + + +@@ -124,11 +124,17 @@ void cpufreq_put_available_governors( + * cpufreq_put_frequencies after use. + */ + +-struct cpufreq_frequencies +-*cpufreq_get_frequencies(const char *type, unsigned int cpu); ++struct cpufreq_available_frequencies ++*cpufreq_get_available_frequencies(unsigned int cpu); + +-void cpufreq_put_frequencies( +- struct cpufreq_frequencies *first); ++void cpufreq_put_available_frequencies( ++ struct cpufreq_available_frequencies *first); ++ ++struct cpufreq_available_frequencies ++*cpufreq_get_boost_frequencies(unsigned int cpu); ++ ++void cpufreq_put_boost_frequencies( ++ struct cpufreq_available_frequencies *first); + + + /* determine affected CPUs +diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c +index e63cf55f81cf..6efc0f6b1b11 100644 +--- a/tools/power/cpupower/utils/cpufreq-info.c ++++ b/tools/power/cpupower/utils/cpufreq-info.c +@@ -244,14 +244,14 @@ static int get_boost_mode_x86(unsigned int cpu) + + static int get_boost_mode(unsigned int cpu) + { +- struct cpufreq_frequencies *freqs; ++ struct cpufreq_available_frequencies *freqs; + + if (cpupower_cpu_info.vendor == X86_VENDOR_AMD || + cpupower_cpu_info.vendor == X86_VENDOR_HYGON || + cpupower_cpu_info.vendor == X86_VENDOR_INTEL) + return get_boost_mode_x86(cpu); + +- freqs = cpufreq_get_frequencies("boost", cpu); ++ freqs = cpufreq_get_boost_frequencies(cpu); + if (freqs) { + printf(_(" boost frequency steps: ")); + while (freqs->next) { +@@ -261,7 +261,7 @@ static int get_boost_mode(unsigned int cpu) + } + print_speed(freqs->frequency); + printf("\n"); +- cpufreq_put_frequencies(freqs); ++ cpufreq_put_available_frequencies(freqs); + } + + return 0; +@@ -475,7 +475,7 @@ static int get_latency(unsigned int cpu, unsigned int human) + + static void debug_output_one(unsigned int cpu) + { +- struct cpufreq_frequencies *freqs; ++ struct cpufreq_available_frequencies *freqs; + + get_driver(cpu); + get_related_cpus(cpu); +@@ -483,7 +483,7 @@ static void debug_output_one(unsigned int cpu) + get_latency(cpu, 1); + get_hardware_limits(cpu, 1); + +- freqs = cpufreq_get_frequencies("available", cpu); ++ freqs = cpufreq_get_available_frequencies(cpu); + if (freqs) { + printf(_(" available frequency steps: ")); + while (freqs->next) { +@@ -493,7 +493,7 @@ static void debug_output_one(unsigned int cpu) + } + print_speed(freqs->frequency); + printf("\n"); +- cpufreq_put_frequencies(freqs); ++ cpufreq_put_available_frequencies(freqs); + } + + get_available_governors(cpu); +diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c +index 5ecc267d98b0..fad615c22e4d 100644 +--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c ++++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c +@@ -2,7 +2,7 @@ + #include <test_progs.h> + + ssize_t get_base_addr() { +- size_t start; ++ size_t start, offset; + char buf[256]; + FILE *f; + +@@ -10,10 +10,11 @@ ssize_t get_base_addr() { + if (!f) + return -errno; + +- while (fscanf(f, "%zx-%*x %s %*s\n", &start, buf) == 2) { ++ while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n", ++ &start, buf, &offset) == 3) { + if (strcmp(buf, "r-xp") == 0) { + fclose(f); +- return start; ++ return start - offset; + } + } + +diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +index 3003fddc0613..cf6c87936c69 100644 +--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c ++++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +@@ -4,6 +4,7 @@ + #include <sched.h> + #include <sys/socket.h> + #include <test_progs.h> ++#include "libbpf_internal.h" + + static void on_sample(void *ctx, int cpu, void *data, __u32 size) + { +@@ -19,7 +20,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) + + void test_perf_buffer(void) + { +- int err, prog_fd, nr_cpus, i, duration = 0; ++ int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0; + const char *prog_name = "kprobe/sys_nanosleep"; + const char *file = "./test_perf_buffer.o"; + struct perf_buffer_opts pb_opts = {}; +@@ -29,15 +30,27 @@ void test_perf_buffer(void) + struct bpf_object *obj; + struct perf_buffer *pb; + struct bpf_link *link; ++ bool *online; + + nr_cpus = libbpf_num_possible_cpus(); + if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus)) + return; + ++ err = parse_cpu_mask_file("/sys/devices/system/cpu/online", ++ &online, &on_len); ++ if (CHECK(err, "nr_on_cpus", "err %d\n", err)) ++ return; ++ ++ for (i = 0; i < on_len; i++) ++ if (online[i]) ++ nr_on_cpus++; ++ + /* load program */ + err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd); +- if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) +- return; ++ if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) { ++ obj = NULL; ++ goto out_close; ++ } + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name)) +@@ -64,6 +77,11 @@ void test_perf_buffer(void) + /* trigger kprobe on every CPU */ + CPU_ZERO(&cpu_seen); + for (i = 0; i < nr_cpus; i++) { ++ if (i >= on_len || !online[i]) { ++ printf("skipping offline CPU #%d\n", i); ++ continue; ++ } ++ + CPU_ZERO(&cpu_set); + CPU_SET(i, &cpu_set); + +@@ -81,8 +99,8 @@ void test_perf_buffer(void) + if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) + goto out_free_pb; + +- if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt", +- "expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen))) ++ if (CHECK(CPU_COUNT(&cpu_seen) != nr_on_cpus, "seen_cpu_cnt", ++ "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) + goto out_free_pb; + + out_free_pb: +@@ -91,4 +109,5 @@ out_detach: + bpf_link__destroy(link); + out_close: + bpf_object__close(obj); ++ free(online); + } +diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +index f62aa0eb959b..1735faf17536 100644 +--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c ++++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +@@ -49,8 +49,12 @@ retry: + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); +- if (CHECK(pmu_fd < 0, "perf_event_open", +- "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n", ++ if (pmu_fd < 0 && errno == ENOENT) { ++ printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); ++ test__skip(); ++ goto cleanup; ++ } ++ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd, errno)) + goto close_prog; + +diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +index ea7d84f01235..e6be383a003f 100644 +--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c ++++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +@@ -113,6 +113,12 @@ int _select_by_skb_data(struct sk_reuseport_md *reuse_md) + data_check.skb_ports[0] = th->source; + data_check.skb_ports[1] = th->dest; + ++ if (th->fin) ++ /* The connection is being torn down at the end of a ++ * test. It can't contain a cmd, so return early. ++ */ ++ return SK_PASS; ++ + if ((th->doff << 2) + sizeof(*cmd) > data_check.len) + GOTO_DONE(DROP_ERR_SKB_DATA); + if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy, +diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c +index 4a851513c842..779e11da979c 100644 +--- a/tools/testing/selftests/bpf/test_sockmap.c ++++ b/tools/testing/selftests/bpf/test_sockmap.c +@@ -331,7 +331,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, + FILE *file; + int i, fp; + +- file = fopen(".sendpage_tst.tmp", "w+"); ++ file = tmpfile(); + if (!file) { + perror("create file for sendpage"); + return 1; +@@ -340,13 +340,8 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, + fwrite(&k, sizeof(char), 1, file); + fflush(file); + fseek(file, 0, SEEK_SET); +- fclose(file); + +- fp = open(".sendpage_tst.tmp", O_RDONLY); +- if (fp < 0) { +- perror("reopen file for sendpage"); +- return 1; +- } ++ fp = fileno(file); + + clock_gettime(CLOCK_MONOTONIC, &s->start); + for (i = 0; i < cnt; i++) { +@@ -354,11 +349,11 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, + + if (!drop && sent < 0) { + perror("send loop error"); +- close(fp); ++ fclose(file); + return sent; + } else if (drop && sent >= 0) { + printf("sendpage loop error expected: %i\n", sent); +- close(fp); ++ fclose(file); + return -EIO; + } + +@@ -366,7 +361,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, + s->bytes_sent += sent; + } + clock_gettime(CLOCK_MONOTONIC, &s->end); +- close(fp); ++ fclose(file); + return 0; + } + +diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py +index e98c36750fae..d34fe06268d2 100644 +--- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py ++++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py +@@ -54,7 +54,7 @@ class SubPlugin(TdcPlugin): + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, +- env=ENVIR) ++ env=os.environ.copy()) + (rawout, serr) = proc.communicate() + + if proc.returncode != 0 and len(serr) > 0: +diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c +index c4c57ba99e90..631d397ac81b 100644 +--- a/virt/kvm/arm/aarch32.c ++++ b/virt/kvm/arm/aarch32.c +@@ -10,6 +10,7 @@ + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + ++#include <linux/bits.h> + #include <linux/kvm_host.h> + #include <asm/kvm_emulate.h> + #include <asm/kvm_hyp.h> +@@ -28,25 +29,115 @@ static const u8 return_offsets[8][2] = { + [7] = { 4, 4 }, /* FIQ, unused */ + }; + ++/* ++ * When an exception is taken, most CPSR fields are left unchanged in the ++ * handler. However, some are explicitly overridden (e.g. M[4:0]). ++ * ++ * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with ++ * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was ++ * obsoleted by the ARMv7 virtualization extensions and is RES0. ++ * ++ * For the SPSR layout seen from AArch32, see: ++ * - ARM DDI 0406C.d, page B1-1148 ++ * - ARM DDI 0487E.a, page G8-6264 ++ * ++ * For the SPSR_ELx layout for AArch32 seen from AArch64, see: ++ * - ARM DDI 0487E.a, page C5-426 ++ * ++ * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from ++ * MSB to LSB. ++ */ ++static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) ++{ ++ u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); ++ unsigned long old, new; ++ ++ old = *vcpu_cpsr(vcpu); ++ new = 0; ++ ++ new |= (old & PSR_AA32_N_BIT); ++ new |= (old & PSR_AA32_Z_BIT); ++ new |= (old & PSR_AA32_C_BIT); ++ new |= (old & PSR_AA32_V_BIT); ++ new |= (old & PSR_AA32_Q_BIT); ++ ++ // CPSR.IT[7:0] are set to zero upon any exception ++ // See ARM DDI 0487E.a, section G1.12.3 ++ // See ARM DDI 0406C.d, section B1.8.3 ++ ++ new |= (old & PSR_AA32_DIT_BIT); ++ ++ // CPSR.SSBS is set to SCTLR.DSSBS upon any exception ++ // See ARM DDI 0487E.a, page G8-6244 ++ if (sctlr & BIT(31)) ++ new |= PSR_AA32_SSBS_BIT; ++ ++ // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 ++ // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented ++ // See ARM DDI 0487E.a, page G8-6246 ++ new |= (old & PSR_AA32_PAN_BIT); ++ if (!(sctlr & BIT(23))) ++ new |= PSR_AA32_PAN_BIT; ++ ++ // SS does not exist in AArch32, so ignore ++ ++ // CPSR.IL is set to zero upon any exception ++ // See ARM DDI 0487E.a, page G1-5527 ++ ++ new |= (old & PSR_AA32_GE_MASK); ++ ++ // CPSR.IT[7:0] are set to zero upon any exception ++ // See prior comment above ++ ++ // CPSR.E is set to SCTLR.EE upon any exception ++ // See ARM DDI 0487E.a, page G8-6245 ++ // See ARM DDI 0406C.d, page B4-1701 ++ if (sctlr & BIT(25)) ++ new |= PSR_AA32_E_BIT; ++ ++ // CPSR.A is unchanged upon an exception to Undefined, Supervisor ++ // CPSR.A is set upon an exception to other modes ++ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 ++ // See ARM DDI 0406C.d, page B1-1182 ++ new |= (old & PSR_AA32_A_BIT); ++ if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) ++ new |= PSR_AA32_A_BIT; ++ ++ // CPSR.I is set upon any exception ++ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 ++ // See ARM DDI 0406C.d, page B1-1182 ++ new |= PSR_AA32_I_BIT; ++ ++ // CPSR.F is set upon an exception to FIQ ++ // CPSR.F is unchanged upon an exception to other modes ++ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 ++ // See ARM DDI 0406C.d, page B1-1182 ++ new |= (old & PSR_AA32_F_BIT); ++ if (mode == PSR_AA32_MODE_FIQ) ++ new |= PSR_AA32_F_BIT; ++ ++ // CPSR.T is set to SCTLR.TE upon any exception ++ // See ARM DDI 0487E.a, page G8-5514 ++ // See ARM DDI 0406C.d, page B1-1181 ++ if (sctlr & BIT(30)) ++ new |= PSR_AA32_T_BIT; ++ ++ new |= mode; ++ ++ return new; ++} ++ + static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) + { +- unsigned long cpsr; +- unsigned long new_spsr_value = *vcpu_cpsr(vcpu); +- bool is_thumb = (new_spsr_value & PSR_AA32_T_BIT); ++ unsigned long spsr = *vcpu_cpsr(vcpu); ++ bool is_thumb = (spsr & PSR_AA32_T_BIT); + u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + +- cpsr = mode | PSR_AA32_I_BIT; +- +- if (sctlr & (1 << 30)) +- cpsr |= PSR_AA32_T_BIT; +- if (sctlr & (1 << 25)) +- cpsr |= PSR_AA32_E_BIT; +- +- *vcpu_cpsr(vcpu) = cpsr; ++ *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); + + /* Note: These now point to the banked copies */ +- vcpu_write_spsr(vcpu, new_spsr_value); ++ vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); + *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + + /* Branch to exception vector */ +@@ -84,7 +175,7 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, + fsr = &vcpu_cp15(vcpu, c5_DFSR); + } + +- prepare_fault32(vcpu, PSR_AA32_MODE_ABT | PSR_AA32_A_BIT, vect_offset); ++ prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); + + *far = addr; + +diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c +index 6af5c91337f2..f274fabb4301 100644 +--- a/virt/kvm/arm/mmio.c ++++ b/virt/kvm/arm/mmio.c +@@ -105,6 +105,9 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) + data = (data ^ mask) - mask; + } + ++ if (!vcpu->arch.mmio_decode.sixty_four) ++ data = data & 0xffffffff; ++ + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, + &data); + data = vcpu_data_host_to_guest(vcpu, data, len); +@@ -125,6 +128,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) + unsigned long rt; + int access_size; + bool sign_extend; ++ bool sixty_four; + + if (kvm_vcpu_dabt_iss1tw(vcpu)) { + /* page table accesses IO mem: tell guest to fix its TTBR */ +@@ -138,11 +142,13 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) + + *is_write = kvm_vcpu_dabt_iswrite(vcpu); + sign_extend = kvm_vcpu_dabt_issext(vcpu); ++ sixty_four = kvm_vcpu_dabt_issf(vcpu); + rt = kvm_vcpu_dabt_get_rd(vcpu); + + *len = access_size; + vcpu->arch.mmio_decode.sign_extend = sign_extend; + vcpu->arch.mmio_decode.rt = rt; ++ vcpu->arch.mmio_decode.sixty_four = sixty_four; + + return 0; + } +diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c +index 35305d6e68cc..d8ef708a2ef6 100644 +--- a/virt/kvm/async_pf.c ++++ b/virt/kvm/async_pf.c +@@ -64,7 +64,7 @@ static void async_pf_execute(struct work_struct *work) + struct mm_struct *mm = apf->mm; + struct kvm_vcpu *vcpu = apf->vcpu; + unsigned long addr = apf->addr; +- gva_t gva = apf->gva; ++ gpa_t cr2_or_gpa = apf->cr2_or_gpa; + int locked = 1; + + might_sleep(); +@@ -92,7 +92,7 @@ static void async_pf_execute(struct work_struct *work) + * this point + */ + +- trace_kvm_async_pf_completed(addr, gva); ++ trace_kvm_async_pf_completed(addr, cr2_or_gpa); + + if (swq_has_sleeper(&vcpu->wq)) + swake_up_one(&vcpu->wq); +@@ -165,8 +165,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) + } + } + +-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, +- struct kvm_arch_async_pf *arch) ++int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ++ unsigned long hva, struct kvm_arch_async_pf *arch) + { + struct kvm_async_pf *work; + +@@ -185,7 +185,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, + + work->wakeup_all = false; + work->vcpu = vcpu; +- work->gva = gva; ++ work->cr2_or_gpa = cr2_or_gpa; + work->addr = hva; + work->arch = *arch; + work->mm = current->mm; +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 13efc291b1c7..b5ea1bafe513 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1394,14 +1394,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) + } + EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); + +-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) ++unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) + { + struct vm_area_struct *vma; + unsigned long addr, size; + + size = PAGE_SIZE; + +- addr = gfn_to_hva(kvm, gfn); ++ addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL); + if (kvm_is_error_hva(addr)) + return PAGE_SIZE; + +@@ -1809,26 +1809,72 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) + } + EXPORT_SYMBOL_GPL(gfn_to_page); + +-static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn, +- struct kvm_host_map *map) ++void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache) ++{ ++ if (pfn == 0) ++ return; ++ ++ if (cache) ++ cache->pfn = cache->gfn = 0; ++ ++ if (dirty) ++ kvm_release_pfn_dirty(pfn); ++ else ++ kvm_release_pfn_clean(pfn); ++} ++ ++static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn, ++ struct gfn_to_pfn_cache *cache, u64 gen) ++{ ++ kvm_release_pfn(cache->pfn, cache->dirty, cache); ++ ++ cache->pfn = gfn_to_pfn_memslot(slot, gfn); ++ cache->gfn = gfn; ++ cache->dirty = false; ++ cache->generation = gen; ++} ++ ++static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn, ++ struct kvm_host_map *map, ++ struct gfn_to_pfn_cache *cache, ++ bool atomic) + { + kvm_pfn_t pfn; + void *hva = NULL; + struct page *page = KVM_UNMAPPED_PAGE; ++ struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn); ++ u64 gen = slots->generation; + + if (!map) + return -EINVAL; + +- pfn = gfn_to_pfn_memslot(slot, gfn); ++ if (cache) { ++ if (!cache->pfn || cache->gfn != gfn || ++ cache->generation != gen) { ++ if (atomic) ++ return -EAGAIN; ++ kvm_cache_gfn_to_pfn(slot, gfn, cache, gen); ++ } ++ pfn = cache->pfn; ++ } else { ++ if (atomic) ++ return -EAGAIN; ++ pfn = gfn_to_pfn_memslot(slot, gfn); ++ } + if (is_error_noslot_pfn(pfn)) + return -EINVAL; + + if (pfn_valid(pfn)) { + page = pfn_to_page(pfn); +- hva = kmap(page); ++ if (atomic) ++ hva = kmap_atomic(page); ++ else ++ hva = kmap(page); + #ifdef CONFIG_HAS_IOMEM +- } else { ++ } else if (!atomic) { + hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB); ++ } else { ++ return -EINVAL; + #endif + } + +@@ -1843,14 +1889,25 @@ static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn, + return 0; + } + ++int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, ++ struct gfn_to_pfn_cache *cache, bool atomic) ++{ ++ return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map, ++ cache, atomic); ++} ++EXPORT_SYMBOL_GPL(kvm_map_gfn); ++ + int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map) + { +- return __kvm_map_gfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, map); ++ return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map, ++ NULL, false); + } + EXPORT_SYMBOL_GPL(kvm_vcpu_map); + +-void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, +- bool dirty) ++static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot, ++ struct kvm_host_map *map, ++ struct gfn_to_pfn_cache *cache, ++ bool dirty, bool atomic) + { + if (!map) + return; +@@ -1858,23 +1915,45 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, + if (!map->hva) + return; + +- if (map->page != KVM_UNMAPPED_PAGE) +- kunmap(map->page); ++ if (map->page != KVM_UNMAPPED_PAGE) { ++ if (atomic) ++ kunmap_atomic(map->hva); ++ else ++ kunmap(map->page); ++ } + #ifdef CONFIG_HAS_IOMEM +- else ++ else if (!atomic) + memunmap(map->hva); ++ else ++ WARN_ONCE(1, "Unexpected unmapping in atomic context"); + #endif + +- if (dirty) { +- kvm_vcpu_mark_page_dirty(vcpu, map->gfn); +- kvm_release_pfn_dirty(map->pfn); +- } else { +- kvm_release_pfn_clean(map->pfn); +- } ++ if (dirty) ++ mark_page_dirty_in_slot(memslot, map->gfn); ++ ++ if (cache) ++ cache->dirty |= dirty; ++ else ++ kvm_release_pfn(map->pfn, dirty, NULL); + + map->hva = NULL; + map->page = NULL; + } ++ ++int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, ++ struct gfn_to_pfn_cache *cache, bool dirty, bool atomic) ++{ ++ __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map, ++ cache, dirty, atomic); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_unmap_gfn); ++ ++void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty) ++{ ++ __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL, ++ dirty, false); ++} + EXPORT_SYMBOL_GPL(kvm_vcpu_unmap); + + struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn) |