diff options
author | Alice Ferrazzi <alicef@gentoo.org> | 2023-04-06 19:40:26 +0900 |
---|---|---|
committer | Alice Ferrazzi <alicef@gentoo.org> | 2023-04-06 19:40:26 +0900 |
commit | 552322bbd8665a864a089b06ed41c97e413562b9 (patch) | |
tree | 83de7867139ee4b6c265adf3e964c280deaa686a | |
parent | Remove redundant patch (diff) | |
download | linux-patches-552322bbd8665a864a089b06ed41c97e413562b9.tar.gz linux-patches-552322bbd8665a864a089b06ed41c97e413562b9.tar.bz2 linux-patches-552322bbd8665a864a089b06ed41c97e413562b9.zip |
Linux patch 6.2.106.2-12
Signed-off-by: Alice Ferrazzi <alicef@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1009_linux-6.2.10.patch | 10600 |
2 files changed, 10604 insertions, 0 deletions
diff --git a/0000_README b/0000_README index 47edd2da..93bcb21e 100644 --- a/0000_README +++ b/0000_README @@ -79,6 +79,10 @@ Patch: 1008_linux-6.2.9.patch From: https://www.kernel.org Desc: Linux 6.2.9 +Patch: 1009_linux-6.2.10.patch +From: https://www.kernel.org +Desc: Linux 6.2.10 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1009_linux-6.2.10.patch b/1009_linux-6.2.10.patch new file mode 100644 index 00000000..980ca4df --- /dev/null +++ b/1009_linux-6.2.10.patch @@ -0,0 +1,10600 @@ +diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml +index 3fe981b14e2cb..54736362378eb 100644 +--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml ++++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml +@@ -76,6 +76,13 @@ properties: + If "broken-flash-reset" is present then having this property does not + make any difference. + ++ spi-cpol: true ++ spi-cpha: true ++ ++dependencies: ++ spi-cpol: [ spi-cpha ] ++ spi-cpha: [ spi-cpol ] ++ + unevaluatedProperties: false + + examples: +diff --git a/Makefile b/Makefile +index 8732f7208d59b..6ec0ec452e465 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 6 + PATCHLEVEL = 2 +-SUBLEVEL = 9 ++SUBLEVEL = 10 + EXTRAVERSION = + NAME = Hurr durr I'ma ninja sloth + +diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c +index a3ee3b605c9b8..3c24178bd4935 100644 +--- a/arch/arm64/kvm/mmu.c ++++ b/arch/arm64/kvm/mmu.c +@@ -665,14 +665,33 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr) + CONFIG_PGTABLE_LEVELS), + .mm_ops = &kvm_user_mm_ops, + }; ++ unsigned long flags; + kvm_pte_t pte = 0; /* Keep GCC quiet... */ + u32 level = ~0; + int ret; + ++ /* ++ * Disable IRQs so that we hazard against a concurrent ++ * teardown of the userspace page tables (which relies on ++ * IPI-ing threads). ++ */ ++ local_irq_save(flags); + ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level); +- VM_BUG_ON(ret); +- VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS); +- VM_BUG_ON(!(pte & PTE_VALID)); ++ local_irq_restore(flags); ++ ++ if (ret) ++ return ret; ++ ++ /* ++ * Not seeing an error, but not updating level? Something went ++ * deeply wrong... ++ */ ++ if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS)) ++ return -EFAULT; ++ ++ /* Oops, the userspace PTs are gone... Replay the fault */ ++ if (!kvm_pte_valid(pte)) ++ return -EAGAIN; + + return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)); + } +@@ -1079,7 +1098,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, + * + * Returns the size of the mapping. + */ +-static unsigned long ++static long + transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long hva, kvm_pfn_t *pfnp, + phys_addr_t *ipap) +@@ -1091,8 +1110,15 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, + * sure that the HVA and IPA are sufficiently aligned and that the + * block map is contained within the memslot. + */ +- if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) && +- get_user_mapping_size(kvm, hva) >= PMD_SIZE) { ++ if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { ++ int sz = get_user_mapping_size(kvm, hva); ++ ++ if (sz < 0) ++ return sz; ++ ++ if (sz < PMD_SIZE) ++ return PAGE_SIZE; ++ + /* + * The address we faulted on is backed by a transparent huge + * page. However, because we map the compound huge page and +@@ -1192,7 +1218,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + { + int ret = 0; + bool write_fault, writable, force_pte = false; +- bool exec_fault; ++ bool exec_fault, mte_allowed; + bool device = false; + unsigned long mmu_seq; + struct kvm *kvm = vcpu->kvm; +@@ -1203,7 +1229,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + kvm_pfn_t pfn; + bool logging_active = memslot_is_logging(memslot); + unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); +- unsigned long vma_pagesize, fault_granule; ++ long vma_pagesize, fault_granule; + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; + struct kvm_pgtable *pgt; + +@@ -1217,6 +1243,20 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + return -EFAULT; + } + ++ /* ++ * Permission faults just need to update the existing leaf entry, ++ * and so normally don't require allocations from the memcache. The ++ * only exception to this is when dirty logging is enabled at runtime ++ * and a write fault needs to collapse a block entry into a table. ++ */ ++ if (fault_status != ESR_ELx_FSC_PERM || ++ (logging_active && write_fault)) { ++ ret = kvm_mmu_topup_memory_cache(memcache, ++ kvm_mmu_cache_min_pages(kvm)); ++ if (ret) ++ return ret; ++ } ++ + /* + * Let's check if we will get back a huge page backed by hugetlbfs, or + * get block mapping for device MMIO region. +@@ -1269,37 +1309,21 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + fault_ipa &= ~(vma_pagesize - 1); + + gfn = fault_ipa >> PAGE_SHIFT; +- mmap_read_unlock(current->mm); ++ mte_allowed = kvm_vma_mte_allowed(vma); + +- /* +- * Permission faults just need to update the existing leaf entry, +- * and so normally don't require allocations from the memcache. The +- * only exception to this is when dirty logging is enabled at runtime +- * and a write fault needs to collapse a block entry into a table. +- */ +- if (fault_status != ESR_ELx_FSC_PERM || +- (logging_active && write_fault)) { +- ret = kvm_mmu_topup_memory_cache(memcache, +- kvm_mmu_cache_min_pages(kvm)); +- if (ret) +- return ret; +- } ++ /* Don't use the VMA after the unlock -- it may have vanished */ ++ vma = NULL; + +- mmu_seq = vcpu->kvm->mmu_invalidate_seq; + /* +- * Ensure the read of mmu_invalidate_seq happens before we call +- * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk +- * the page we just got a reference to gets unmapped before we have a +- * chance to grab the mmu_lock, which ensure that if the page gets +- * unmapped afterwards, the call to kvm_unmap_gfn will take it away +- * from us again properly. This smp_rmb() interacts with the smp_wmb() +- * in kvm_mmu_notifier_invalidate_<page|range_end>. ++ * Read mmu_invalidate_seq so that KVM can detect if the results of ++ * vma_lookup() or __gfn_to_pfn_memslot() become stale prior to ++ * acquiring kvm->mmu_lock. + * +- * Besides, __gfn_to_pfn_memslot() instead of gfn_to_pfn_prot() is +- * used to avoid unnecessary overhead introduced to locate the memory +- * slot because it's always fixed even @gfn is adjusted for huge pages. ++ * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs ++ * with the smp_wmb() in kvm_mmu_invalidate_end(). + */ +- smp_rmb(); ++ mmu_seq = vcpu->kvm->mmu_invalidate_seq; ++ mmap_read_unlock(current->mm); + + pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL, + write_fault, &writable, NULL); +@@ -1350,11 +1374,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + vma_pagesize = transparent_hugepage_adjust(kvm, memslot, + hva, &pfn, + &fault_ipa); ++ ++ if (vma_pagesize < 0) { ++ ret = vma_pagesize; ++ goto out_unlock; ++ } + } + + if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) { + /* Check the VMM hasn't introduced a new disallowed VMA */ +- if (kvm_vma_mte_allowed(vma)) { ++ if (mte_allowed) { + sanitise_mte_tags(kvm, pfn, vma_pagesize); + } else { + ret = -EFAULT; +diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c +index 24908400e1906..c243b10f3e150 100644 +--- a/arch/arm64/kvm/pmu-emul.c ++++ b/arch/arm64/kvm/pmu-emul.c +@@ -538,7 +538,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) + if (!kvm_pmu_is_3p5(vcpu)) + val &= ~ARMV8_PMU_PMCR_LP; + +- __vcpu_sys_reg(vcpu, PMCR_EL0) = val; ++ /* The reset bits don't indicate any state, and shouldn't be saved. */ ++ __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P); + + if (val & ARMV8_PMU_PMCR_E) { + kvm_pmu_enable_counter_mask(vcpu, +diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c +index c6cbfe6b854b3..c48c053d61466 100644 +--- a/arch/arm64/kvm/sys_regs.c ++++ b/arch/arm64/kvm/sys_regs.c +@@ -765,6 +765,22 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx) + return true; + } + ++static int get_pmu_evcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, ++ u64 *val) ++{ ++ u64 idx; ++ ++ if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 0) ++ /* PMCCNTR_EL0 */ ++ idx = ARMV8_PMU_CYCLE_IDX; ++ else ++ /* PMEVCNTRn_EL0 */ ++ idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); ++ ++ *val = kvm_pmu_get_counter_value(vcpu, idx); ++ return 0; ++} ++ + static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +@@ -981,7 +997,7 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + /* Macro to expand the PMEVCNTRn_EL0 register */ + #define PMU_PMEVCNTR_EL0(n) \ + { PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \ +- .reset = reset_pmevcntr, \ ++ .reset = reset_pmevcntr, .get_user = get_pmu_evcntr, \ + .access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), } + + /* Macro to expand the PMEVTYPERn_EL0 register */ +@@ -1745,7 +1761,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { + { PMU_SYS_REG(SYS_PMCEID1_EL0), + .access = access_pmceid, .reset = NULL }, + { PMU_SYS_REG(SYS_PMCCNTR_EL0), +- .access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 }, ++ .access = access_pmu_evcntr, .reset = reset_unknown, ++ .reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr}, + { PMU_SYS_REG(SYS_PMXEVTYPER_EL0), + .access = access_pmu_evtyper, .reset = NULL }, + { PMU_SYS_REG(SYS_PMXEVCNTR_EL0), +diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c +index 33788668cbdbf..3779e7855bd75 100644 +--- a/arch/mips/bmips/dma.c ++++ b/arch/mips/bmips/dma.c +@@ -5,6 +5,8 @@ + #include <asm/bmips.h> + #include <asm/io.h> + ++bool bmips_rac_flush_disable; ++ + void arch_sync_dma_for_cpu_all(void) + { + void __iomem *cbr = BMIPS_GET_CBR(); +@@ -15,6 +17,9 @@ void arch_sync_dma_for_cpu_all(void) + boot_cpu_type() != CPU_BMIPS4380) + return; + ++ if (unlikely(bmips_rac_flush_disable)) ++ return; ++ + /* Flush stale data out of the readahead cache */ + cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG); + __raw_writel(cfg | 0x100, cbr + BMIPS_RAC_CONFIG); +diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c +index e95b3f78e7cd4..549a6392a3d2d 100644 +--- a/arch/mips/bmips/setup.c ++++ b/arch/mips/bmips/setup.c +@@ -35,6 +35,8 @@ + #define REG_BCM6328_OTP ((void __iomem *)CKSEG1ADDR(0x1000062c)) + #define BCM6328_TP1_DISABLED BIT(9) + ++extern bool bmips_rac_flush_disable; ++ + static const unsigned long kbase = VMLINUX_LOAD_ADDRESS & 0xfff00000; + + struct bmips_quirk { +@@ -104,6 +106,12 @@ static void bcm6358_quirks(void) + * disable SMP for now + */ + bmips_smp_enabled = 0; ++ ++ /* ++ * RAC flush causes kernel panics on BCM6358 when booting from TP1 ++ * because the bootloader is not initializing it properly. ++ */ ++ bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31)); + } + + static void bcm6368_quirks(void) +diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h +index 2bbc0fcce04a3..5e26c7f2c25ab 100644 +--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h ++++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h +@@ -148,6 +148,11 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, + */ + } + ++static inline bool __pte_protnone(unsigned long pte) ++{ ++ return (pte & (pgprot_val(PAGE_NONE) | _PAGE_RWX)) == pgprot_val(PAGE_NONE); ++} ++ + static inline bool __pte_flags_need_flush(unsigned long oldval, + unsigned long newval) + { +@@ -164,8 +169,8 @@ static inline bool __pte_flags_need_flush(unsigned long oldval, + /* + * We do not expect kernel mappings or non-PTEs or not-present PTEs. + */ +- VM_WARN_ON_ONCE(oldval & _PAGE_PRIVILEGED); +- VM_WARN_ON_ONCE(newval & _PAGE_PRIVILEGED); ++ VM_WARN_ON_ONCE(!__pte_protnone(oldval) && oldval & _PAGE_PRIVILEGED); ++ VM_WARN_ON_ONCE(!__pte_protnone(newval) && newval & _PAGE_PRIVILEGED); + VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE)); + VM_WARN_ON_ONCE(!(newval & _PAGE_PTE)); + VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT)); +diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c +index 2087a785f05f1..5fff0d04b23f7 100644 +--- a/arch/powerpc/kernel/ptrace/ptrace-view.c ++++ b/arch/powerpc/kernel/ptrace/ptrace-view.c +@@ -290,6 +290,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, + static int ppr_get(struct task_struct *target, const struct user_regset *regset, + struct membuf to) + { ++ if (!target->thread.regs) ++ return -EINVAL; ++ + return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64)); + } + +@@ -297,6 +300,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) + { ++ if (!target->thread.regs) ++ return -EINVAL; ++ + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->ppr, 0, sizeof(u64)); + } +diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c +index 4ad6e510d405f..94c023bb13e05 100644 +--- a/arch/powerpc/platforms/pseries/vas.c ++++ b/arch/powerpc/platforms/pseries/vas.c +@@ -857,6 +857,13 @@ int pseries_vas_dlpar_cpu(void) + { + int new_nr_creds, rc; + ++ /* ++ * NX-GZIP is not enabled. Nothing to do for DLPAR event ++ */ ++ if (!copypaste_feat) ++ return 0; ++ ++ + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, + vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, + (u64)virt_to_phys(&hv_cop_caps)); +@@ -1013,6 +1020,7 @@ static int __init pseries_vas_init(void) + * Linux supports user space COPY/PASTE only with Radix + */ + if (!radix_enabled()) { ++ copypaste_feat = false; + pr_err("API is supported only with radix page tables\n"); + return -ENOTSUPP; + } +diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c +index ad34519c8a13d..3ac2ff6a65dac 100644 +--- a/arch/riscv/kvm/vcpu_timer.c ++++ b/arch/riscv/kvm/vcpu_timer.c +@@ -147,10 +147,8 @@ static void kvm_riscv_vcpu_timer_blocking(struct kvm_vcpu *vcpu) + return; + + delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t); +- if (delta_ns) { +- hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL); +- t->next_set = true; +- } ++ hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL); ++ t->next_set = true; + } + + static void kvm_riscv_vcpu_timer_unblocking(struct kvm_vcpu *vcpu) +diff --git a/arch/s390/Makefile b/arch/s390/Makefile +index b3235ab0ace83..ed646c583e4fe 100644 +--- a/arch/s390/Makefile ++++ b/arch/s390/Makefile +@@ -162,7 +162,7 @@ vdso_prepare: prepare0 + + ifdef CONFIG_EXPOLINE_EXTERN + modules_prepare: expoline_prepare +-expoline_prepare: ++expoline_prepare: scripts + $(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o + endif + endif +diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c +index 720036fb19242..d44214072779e 100644 +--- a/arch/s390/lib/uaccess.c ++++ b/arch/s390/lib/uaccess.c +@@ -172,7 +172,7 @@ unsigned long __clear_user(void __user *to, unsigned long size) + "4: slgr %0,%0\n" + "5:\n" + EX_TABLE(0b,2b) EX_TABLE(6b,2b) EX_TABLE(3b,5b) EX_TABLE(7b,5b) +- : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) ++ : "+&a" (size), "+&a" (to), "+a" (tmp1), "=&a" (tmp2) + : "a" (empty_zero_page), [spec] "d" (spec.val) + : "cc", "memory", "0"); + return size; +diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile +index 3c5b52fbe4a7f..a9ec8c9f5c5dd 100644 +--- a/arch/x86/xen/Makefile ++++ b/arch/x86/xen/Makefile +@@ -45,6 +45,6 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o + + obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o + +-obj-$(CONFIG_XEN_PV_DOM0) += vga.o ++obj-$(CONFIG_XEN_DOM0) += vga.o + + obj-$(CONFIG_XEN_EFI) += efi.o +diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c +index 5b13796628770..68f5f5d209dfa 100644 +--- a/arch/x86/xen/enlighten_pv.c ++++ b/arch/x86/xen/enlighten_pv.c +@@ -1389,7 +1389,8 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si) + + x86_platform.set_legacy_features = + xen_dom0_set_legacy_features; +- xen_init_vga(info, xen_start_info->console.dom0.info_size); ++ xen_init_vga(info, xen_start_info->console.dom0.info_size, ++ &boot_params.screen_info); + xen_start_info->console.domU.mfn = 0; + xen_start_info->console.domU.evtchn = 0; + +diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c +index bcae606bbc5cf..ada3868c02c23 100644 +--- a/arch/x86/xen/enlighten_pvh.c ++++ b/arch/x86/xen/enlighten_pvh.c +@@ -43,6 +43,19 @@ void __init xen_pvh_init(struct boot_params *boot_params) + x86_init.oem.banner = xen_banner; + + xen_efi_init(boot_params); ++ ++ if (xen_initial_domain()) { ++ struct xen_platform_op op = { ++ .cmd = XENPF_get_dom0_console, ++ }; ++ int ret = HYPERVISOR_platform_op(&op); ++ ++ if (ret > 0) ++ xen_init_vga(&op.u.dom0_console, ++ min(ret * sizeof(char), ++ sizeof(op.u.dom0_console)), ++ &boot_params->screen_info); ++ } + } + + void __init mem_map_via_hcall(struct boot_params *boot_params_p) +diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c +index 14ea32e734d59..d97adab8420f4 100644 +--- a/arch/x86/xen/vga.c ++++ b/arch/x86/xen/vga.c +@@ -9,10 +9,9 @@ + + #include "xen-ops.h" + +-void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) ++void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size, ++ struct screen_info *screen_info) + { +- struct screen_info *screen_info = &boot_params.screen_info; +- + /* This is drawn from a dump from vgacon:startup in + * standard Linux. */ + screen_info->orig_video_mode = 3; +diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h +index 9a8bb972193d8..a10903785a338 100644 +--- a/arch/x86/xen/xen-ops.h ++++ b/arch/x86/xen/xen-ops.h +@@ -108,11 +108,12 @@ static inline void xen_uninit_lock_cpu(int cpu) + + struct dom0_vga_console_info; + +-#ifdef CONFIG_XEN_PV_DOM0 +-void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); ++#ifdef CONFIG_XEN_DOM0 ++void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size, ++ struct screen_info *); + #else + static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, +- size_t size) ++ size_t size, struct screen_info *si) + { + } + #endif +diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c +index cd98366a9b238..f0a7d1c2641e0 100644 +--- a/arch/xtensa/kernel/traps.c ++++ b/arch/xtensa/kernel/traps.c +@@ -539,7 +539,7 @@ static size_t kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; + + void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) + { +- size_t len; ++ size_t len, off = 0; + + if (!sp) + sp = stack_pointer(task); +@@ -548,9 +548,17 @@ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) + kstack_depth_to_print * STACK_DUMP_ENTRY_SIZE); + + printk("%sStack:\n", loglvl); +- print_hex_dump(loglvl, " ", DUMP_PREFIX_NONE, +- STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE, +- sp, len, false); ++ while (off < len) { ++ u8 line[STACK_DUMP_LINE_SIZE]; ++ size_t line_len = len - off > STACK_DUMP_LINE_SIZE ? ++ STACK_DUMP_LINE_SIZE : len - off; ++ ++ __memcpy(line, (u8 *)sp + off, line_len); ++ print_hex_dump(loglvl, " ", DUMP_PREFIX_NONE, ++ STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE, ++ line, line_len, false); ++ off += STACK_DUMP_LINE_SIZE; ++ } + show_trace(task, sp, loglvl); + } + +diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c +index 0c05ccde1f7a6..7c16bc15e7a14 100644 +--- a/drivers/acpi/bus.c ++++ b/drivers/acpi/bus.c +@@ -459,85 +459,67 @@ out_free: + Notification Handling + -------------------------------------------------------------------------- */ + +-/* +- * acpi_bus_notify +- * --------------- +- * Callback for all 'system-level' device notifications (values 0x00-0x7F). ++/** ++ * acpi_bus_notify - Global system-level (0x00-0x7F) notifications handler ++ * @handle: Target ACPI object. ++ * @type: Notification type. ++ * @data: Ignored. ++ * ++ * This only handles notifications related to device hotplug. + */ + static void acpi_bus_notify(acpi_handle handle, u32 type, void *data) + { + struct acpi_device *adev; +- u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; +- bool hotplug_event = false; + + switch (type) { + case ACPI_NOTIFY_BUS_CHECK: + acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n"); +- hotplug_event = true; + break; + + case ACPI_NOTIFY_DEVICE_CHECK: + acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n"); +- hotplug_event = true; + break; + + case ACPI_NOTIFY_DEVICE_WAKE: + acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n"); +- break; ++ return; + + case ACPI_NOTIFY_EJECT_REQUEST: + acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n"); +- hotplug_event = true; + break; + + case ACPI_NOTIFY_DEVICE_CHECK_LIGHT: + acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n"); + /* TBD: Exactly what does 'light' mean? */ +- break; ++ return; + + case ACPI_NOTIFY_FREQUENCY_MISMATCH: + acpi_handle_err(handle, "Device cannot be configured due " + "to a frequency mismatch\n"); +- break; ++ return; + + case ACPI_NOTIFY_BUS_MODE_MISMATCH: + acpi_handle_err(handle, "Device cannot be configured due " + "to a bus mode mismatch\n"); +- break; ++ return; + + case ACPI_NOTIFY_POWER_FAULT: + acpi_handle_err(handle, "Device has suffered a power fault\n"); +- break; ++ return; + + default: + acpi_handle_debug(handle, "Unknown event type 0x%x\n", type); +- break; ++ return; + } + + adev = acpi_get_acpi_dev(handle); +- if (!adev) +- goto err; +- +- if (adev->dev.driver) { +- struct acpi_driver *driver = to_acpi_driver(adev->dev.driver); +- +- if (driver && driver->ops.notify && +- (driver->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS)) +- driver->ops.notify(adev, type); +- } +- +- if (!hotplug_event) { +- acpi_put_acpi_dev(adev); +- return; +- } + +- if (ACPI_SUCCESS(acpi_hotplug_schedule(adev, type))) ++ if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type))) + return; + + acpi_put_acpi_dev(adev); + +- err: +- acpi_evaluate_ost(handle, type, ost_code, NULL); ++ acpi_evaluate_ost(handle, type, ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL); + } + + static void acpi_notify_device(acpi_handle handle, u32 event, void *data) +@@ -562,42 +544,51 @@ static u32 acpi_device_fixed_event(void *data) + return ACPI_INTERRUPT_HANDLED; + } + +-static int acpi_device_install_notify_handler(struct acpi_device *device) ++static int acpi_device_install_notify_handler(struct acpi_device *device, ++ struct acpi_driver *acpi_drv) + { + acpi_status status; + +- if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) ++ if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) { + status = + acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, + acpi_device_fixed_event, + device); +- else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) ++ } else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) { + status = + acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, + acpi_device_fixed_event, + device); +- else +- status = acpi_install_notify_handler(device->handle, +- ACPI_DEVICE_NOTIFY, ++ } else { ++ u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ? ++ ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY; ++ ++ status = acpi_install_notify_handler(device->handle, type, + acpi_notify_device, + device); ++ } + + if (ACPI_FAILURE(status)) + return -EINVAL; + return 0; + } + +-static void acpi_device_remove_notify_handler(struct acpi_device *device) ++static void acpi_device_remove_notify_handler(struct acpi_device *device, ++ struct acpi_driver *acpi_drv) + { +- if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) ++ if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) { + acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, + acpi_device_fixed_event); +- else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) ++ } else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) { + acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, + acpi_device_fixed_event); +- else +- acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, ++ } else { ++ u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ? ++ ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY; ++ ++ acpi_remove_notify_handler(device->handle, type, + acpi_notify_device); ++ } + } + + /* Handle events targeting \_SB device (at present only graceful shutdown) */ +@@ -1039,7 +1030,7 @@ static int acpi_device_probe(struct device *dev) + acpi_drv->name, acpi_dev->pnp.bus_id); + + if (acpi_drv->ops.notify) { +- ret = acpi_device_install_notify_handler(acpi_dev); ++ ret = acpi_device_install_notify_handler(acpi_dev, acpi_drv); + if (ret) { + if (acpi_drv->ops.remove) + acpi_drv->ops.remove(acpi_dev); +@@ -1062,7 +1053,7 @@ static void acpi_device_remove(struct device *dev) + struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver); + + if (acpi_drv->ops.notify) +- acpi_device_remove_notify_handler(acpi_dev); ++ acpi_device_remove_notify_handler(acpi_dev, acpi_drv); + + if (acpi_drv->ops.remove) + acpi_drv->ops.remove(acpi_dev); +diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c +index 710ac640267dd..14d6d81e536fe 100644 +--- a/drivers/acpi/video_detect.c ++++ b/drivers/acpi/video_detect.c +@@ -716,6 +716,13 @@ static const struct dmi_system_id video_detect_dmi_table[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "Dell G15 5515"), + }, + }, ++ { ++ .callback = video_detect_force_native, ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 15 3535"), ++ }, ++ }, + + /* + * Desktops which falsely report a backlight and which our heuristics +diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c +index e45285d4e62a4..da5727069d851 100644 +--- a/drivers/acpi/x86/utils.c ++++ b/drivers/acpi/x86/utils.c +@@ -251,6 +251,7 @@ bool force_storage_d3(void) + #define ACPI_QUIRK_UART1_TTY_UART2_SKIP BIT(1) + #define ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY BIT(2) + #define ACPI_QUIRK_USE_ACPI_AC_AND_BATTERY BIT(3) ++#define ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS BIT(4) + + static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { + /* +@@ -279,6 +280,16 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { + * need the x86-android-tablets module to properly work. + */ + #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS) ++ { ++ /* Acer Iconia One 7 B1-750 */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "VESPA2"), ++ }, ++ .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | ++ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | ++ ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), ++ }, + { + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), +@@ -286,7 +297,19 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | + ACPI_QUIRK_UART1_TTY_UART2_SKIP | +- ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), ++ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | ++ ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), ++ }, ++ { ++ /* Lenovo Yoga Book X90F/L */ ++ .matches = { ++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), ++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), ++ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"), ++ }, ++ .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | ++ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | ++ ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), + }, + { + .matches = { +@@ -294,7 +317,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "TF103C"), + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | +- ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), ++ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | ++ ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), + }, + { + /* Lenovo Yoga Tablet 2 1050F/L */ +@@ -336,7 +360,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "M890BAP"), + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | +- ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), ++ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | ++ ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), + }, + { + /* Whitelabel (sold as various brands) TM800A550L */ +@@ -413,6 +438,20 @@ int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *s + return 0; + } + EXPORT_SYMBOL_GPL(acpi_quirk_skip_serdev_enumeration); ++ ++bool acpi_quirk_skip_gpio_event_handlers(void) ++{ ++ const struct dmi_system_id *dmi_id; ++ long quirks; ++ ++ dmi_id = dmi_first_match(acpi_quirk_skip_dmi_ids); ++ if (!dmi_id) ++ return false; ++ ++ quirks = (unsigned long)dmi_id->driver_data; ++ return (quirks & ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS); ++} ++EXPORT_SYMBOL_GPL(acpi_quirk_skip_gpio_event_handlers); + #endif + + /* Lists of PMIC ACPI HIDs with an (often better) native charger driver */ +diff --git a/drivers/block/loop.c b/drivers/block/loop.c +index eabbc3bdec221..4916fe78ab8fa 100644 +--- a/drivers/block/loop.c ++++ b/drivers/block/loop.c +@@ -1010,9 +1010,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, + /* This is safe, since we have a reference from open(). */ + __module_get(THIS_MODULE); + +- /* suppress uevents while reconfiguring the device */ +- dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); +- + /* + * If we don't hold exclusive handle for the device, upgrade to it + * here to avoid changing device under exclusive owner. +@@ -1067,6 +1064,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, + } + } + ++ /* suppress uevents while reconfiguring the device */ ++ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); ++ + disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE); + set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); + +@@ -1109,17 +1109,17 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, + if (partscan) + clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); + ++ /* enable and uncork uevent now that we are done */ ++ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); ++ + loop_global_unlock(lo, is_loop); + if (partscan) + loop_reread_partitions(lo); ++ + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(bdev, loop_configure); + +- error = 0; +-done: +- /* enable and uncork uevent now that we are done */ +- dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); +- return error; ++ return 0; + + out_unlock: + loop_global_unlock(lo, is_loop); +@@ -1130,7 +1130,7 @@ out_putf: + fput(file); + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); +- goto done; ++ return error; + } + + static void __loop_clr_fd(struct loop_device *lo, bool release) +diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c +index 34ff048e70d0e..7c9175619a1dc 100644 +--- a/drivers/gpio/gpiolib-acpi.c ++++ b/drivers/gpio/gpiolib-acpi.c +@@ -536,6 +536,9 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) + if (ACPI_FAILURE(status)) + return; + ++ if (acpi_quirk_skip_gpio_event_handlers()) ++ return; ++ + acpi_walk_resources(handle, METHOD_NAME__AEI, + acpi_gpiochip_alloc_event, acpi_gpio); + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +index f873692071032..00a92e935ff0f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +@@ -981,7 +981,12 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev) + */ + bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) + { +- if (adev->flags & AMD_IS_APU) ++ if ((adev->flags & AMD_IS_APU) && ++ adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU enabled APUs */ ++ return false; ++ ++ if ((adev->flags & AMD_IS_APU) && ++ amdgpu_acpi_is_s3_active(adev)) + return false; + + if (amdgpu_sriov_vf(adev)) +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +index 2b9d806e23afb..10a0a510910b6 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +@@ -123,6 +123,8 @@ enum AMDGIM_FEATURE_FLAG { + AMDGIM_FEATURE_PP_ONE_VF = (1 << 4), + /* Indirect Reg Access enabled */ + AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5), ++ /* AV1 Support MODE*/ ++ AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6), + }; + + enum AMDGIM_REG_ACCESS_FLAG { +@@ -321,6 +323,8 @@ static inline bool is_virtual_machine(void) + ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug) + #define amdgpu_sriov_is_normal(adev) \ + ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug)) ++#define amdgpu_sriov_is_av1_support(adev) \ ++ ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT) + bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); + void amdgpu_virt_init_setting(struct amdgpu_device *adev); + void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +index 6c97148ca0ed3..24d42d24e6a01 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +@@ -93,7 +93,8 @@ union amd_sriov_msg_feature_flags { + uint32_t mm_bw_management : 1; + uint32_t pp_one_vf_mode : 1; + uint32_t reg_indirect_acc : 1; +- uint32_t reserved : 26; ++ uint32_t av1_support : 1; ++ uint32_t reserved : 25; + } flags; + uint32_t all; + }; +diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c +index 3d938b52178e3..9eedc1a1494c0 100644 +--- a/drivers/gpu/drm/amd/amdgpu/soc21.c ++++ b/drivers/gpu/drm/amd/amdgpu/soc21.c +@@ -101,6 +101,59 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 = + .codec_array = vcn_4_0_0_video_codecs_decode_array_vcn1, + }; + ++/* SRIOV SOC21, not const since data is controlled by host */ ++static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn0[] = { ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, ++}; ++ ++static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn1[] = { ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, ++}; ++ ++static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn0 = { ++ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0), ++ .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn0, ++}; ++ ++static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = { ++ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1), ++ .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn1, ++}; ++ ++static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = { ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, ++}; ++ ++static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = { ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, ++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, ++}; ++ ++static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn0 = { ++ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0), ++ .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn0, ++}; ++ ++static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn1 = { ++ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1), ++ .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn1, ++}; ++ + static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, + const struct amdgpu_video_codecs **codecs) + { +@@ -111,16 +164,31 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, + case IP_VERSION(4, 0, 0): + case IP_VERSION(4, 0, 2): + case IP_VERSION(4, 0, 4): +- if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) { +- if (encode) +- *codecs = &vcn_4_0_0_video_codecs_encode_vcn1; +- else +- *codecs = &vcn_4_0_0_video_codecs_decode_vcn1; ++ if (amdgpu_sriov_vf(adev)) { ++ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) || ++ !amdgpu_sriov_is_av1_support(adev)) { ++ if (encode) ++ *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn1; ++ else ++ *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn1; ++ } else { ++ if (encode) ++ *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn0; ++ else ++ *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn0; ++ } + } else { +- if (encode) +- *codecs = &vcn_4_0_0_video_codecs_encode_vcn0; +- else +- *codecs = &vcn_4_0_0_video_codecs_decode_vcn0; ++ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)) { ++ if (encode) ++ *codecs = &vcn_4_0_0_video_codecs_encode_vcn1; ++ else ++ *codecs = &vcn_4_0_0_video_codecs_decode_vcn1; ++ } else { ++ if (encode) ++ *codecs = &vcn_4_0_0_video_codecs_encode_vcn0; ++ else ++ *codecs = &vcn_4_0_0_video_codecs_decode_vcn0; ++ } + } + return 0; + default: +@@ -729,8 +797,23 @@ static int soc21_common_late_init(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +- if (amdgpu_sriov_vf(adev)) ++ if (amdgpu_sriov_vf(adev)) { + xgpu_nv_mailbox_get_irq(adev); ++ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) || ++ !amdgpu_sriov_is_av1_support(adev)) { ++ amdgpu_virt_update_sriov_video_codec(adev, ++ sriov_vcn_4_0_0_video_codecs_encode_array_vcn1, ++ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1), ++ sriov_vcn_4_0_0_video_codecs_decode_array_vcn1, ++ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1)); ++ } else { ++ amdgpu_virt_update_sriov_video_codec(adev, ++ sriov_vcn_4_0_0_video_codecs_encode_array_vcn0, ++ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0), ++ sriov_vcn_4_0_0_video_codecs_decode_array_vcn0, ++ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0)); ++ } ++ } + + return 0; + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +index f79b8e964140e..e191d38f3da62 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +@@ -1298,14 +1298,14 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, + args->n_success = i+1; + } + +- mutex_unlock(&p->mutex); +- + err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); + if (err) { + pr_debug("Sync memory failed, wait interrupted by user signal\n"); + goto sync_memory_failed; + } + ++ mutex_unlock(&p->mutex); ++ + /* Flush TLBs after waiting for the page table updates to complete */ + for (i = 0; i < args->n_devices; i++) { + peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); +@@ -1321,9 +1321,9 @@ get_process_device_data_failed: + bind_process_to_device_failed: + get_mem_obj_from_handle_failed: + map_memory_to_gpu_failed: ++sync_memory_failed: + mutex_unlock(&p->mutex); + copy_from_user_failed: +-sync_memory_failed: + kfree(devices_arr); + + return err; +@@ -1337,6 +1337,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, + void *mem; + long err = 0; + uint32_t *devices_arr = NULL, i; ++ bool flush_tlb; + + if (!args->n_devices) { + pr_debug("Device IDs array empty\n"); +@@ -1389,16 +1390,19 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, + } + args->n_success = i+1; + } +- mutex_unlock(&p->mutex); + +- if (kfd_flush_tlb_after_unmap(pdd->dev)) { ++ flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev); ++ if (flush_tlb) { + err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev, + (struct kgd_mem *) mem, true); + if (err) { + pr_debug("Sync memory failed, wait interrupted by user signal\n"); + goto sync_memory_failed; + } ++ } ++ mutex_unlock(&p->mutex); + ++ if (flush_tlb) { + /* Flush TLBs after waiting for the page table updates to complete */ + for (i = 0; i < args->n_devices; i++) { + peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); +@@ -1414,9 +1418,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, + bind_process_to_device_failed: + get_mem_obj_from_handle_failed: + unmap_memory_from_gpu_failed: ++sync_memory_failed: + mutex_unlock(&p->mutex); + copy_from_user_failed: +-sync_memory_failed: + kfree(devices_arr); + return err; + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +index 10048ce16aea4..5c8506f180140 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +@@ -289,7 +289,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) + static int + svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, + struct migrate_vma *migrate, struct dma_fence **mfence, +- dma_addr_t *scratch) ++ dma_addr_t *scratch, uint64_t ttm_res_offset) + { + uint64_t npages = migrate->npages; + struct device *dev = adev->dev; +@@ -299,19 +299,13 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, + uint64_t i, j; + int r; + +- pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, +- prange->last); ++ pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start, ++ prange->last, ttm_res_offset); + + src = scratch; + dst = (uint64_t *)(scratch + npages); + +- r = svm_range_vram_node_new(adev, prange, true); +- if (r) { +- dev_dbg(adev->dev, "fail %d to alloc vram\n", r); +- goto out; +- } +- +- amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT, ++ amdgpu_res_first(prange->ttm_res, ttm_res_offset, + npages << PAGE_SHIFT, &cursor); + for (i = j = 0; i < npages; i++) { + struct page *spage; +@@ -391,14 +385,14 @@ out_free_vram_pages: + migrate->dst[i + 3] = 0; + } + #endif +-out: ++ + return r; + } + + static long + svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, + struct vm_area_struct *vma, uint64_t start, +- uint64_t end, uint32_t trigger) ++ uint64_t end, uint32_t trigger, uint64_t ttm_res_offset) + { + struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); + uint64_t npages = (end - start) >> PAGE_SHIFT; +@@ -451,7 +445,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, + else + pr_debug("0x%lx pages migrated\n", cpages); + +- r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch); ++ r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch, ttm_res_offset); + migrate_vma_pages(&migrate); + + pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", +@@ -499,6 +493,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long addr, start, end; + struct vm_area_struct *vma; + struct amdgpu_device *adev; ++ uint64_t ttm_res_offset; + unsigned long cpages = 0; + long r = 0; + +@@ -520,6 +515,13 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + start = prange->start << PAGE_SHIFT; + end = (prange->last + 1) << PAGE_SHIFT; + ++ r = svm_range_vram_node_new(adev, prange, true); ++ if (r) { ++ dev_dbg(adev->dev, "fail %ld to alloc vram\n", r); ++ return r; ++ } ++ ttm_res_offset = prange->offset << PAGE_SHIFT; ++ + for (addr = start; addr < end;) { + unsigned long next; + +@@ -528,18 +530,21 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + break; + + next = min(vma->vm_end, end); +- r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger); ++ r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, ttm_res_offset); + if (r < 0) { + pr_debug("failed %ld to migrate\n", r); + break; + } else { + cpages += r; + } ++ ttm_res_offset += next - addr; + addr = next; + } + + if (cpages) + prange->actual_loc = best_loc; ++ else ++ svm_range_vram_node_free(prange); + + return r < 0 ? r : 0; + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +index 09b966dc37681..aee2212e52f69 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +@@ -77,6 +77,7 @@ err_ioctl: + + static void kfd_exit(void) + { ++ kfd_cleanup_processes(); + kfd_debugfs_fini(); + kfd_process_destroy_wq(); + kfd_procfs_shutdown(); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index 552c3ac85a132..7dc55919993c0 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -926,6 +926,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev); + + int kfd_process_create_wq(void); + void kfd_process_destroy_wq(void); ++void kfd_cleanup_processes(void); + struct kfd_process *kfd_create_process(struct file *filep); + struct kfd_process *kfd_get_process(const struct task_struct *task); + struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +index 51b1683ac5c1e..4d9f2d1c49b1d 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +@@ -1167,6 +1167,17 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn) + kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); + } + ++static void kfd_process_notifier_release_internal(struct kfd_process *p) ++{ ++ cancel_delayed_work_sync(&p->eviction_work); ++ cancel_delayed_work_sync(&p->restore_work); ++ ++ /* Indicate to other users that MM is no longer valid */ ++ p->mm = NULL; ++ ++ mmu_notifier_put(&p->mmu_notifier); ++} ++ + static void kfd_process_notifier_release(struct mmu_notifier *mn, + struct mm_struct *mm) + { +@@ -1181,17 +1192,22 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, + return; + + mutex_lock(&kfd_processes_mutex); ++ /* ++ * Do early return if table is empty. ++ * ++ * This could potentially happen if this function is called concurrently ++ * by mmu_notifier and by kfd_cleanup_pocesses. ++ * ++ */ ++ if (hash_empty(kfd_processes_table)) { ++ mutex_unlock(&kfd_processes_mutex); ++ return; ++ } + hash_del_rcu(&p->kfd_processes); + mutex_unlock(&kfd_processes_mutex); + synchronize_srcu(&kfd_processes_srcu); + +- cancel_delayed_work_sync(&p->eviction_work); +- cancel_delayed_work_sync(&p->restore_work); +- +- /* Indicate to other users that MM is no longer valid */ +- p->mm = NULL; +- +- mmu_notifier_put(&p->mmu_notifier); ++ kfd_process_notifier_release_internal(p); + } + + static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { +@@ -1200,6 +1216,43 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { + .free_notifier = kfd_process_free_notifier, + }; + ++/* ++ * This code handles the case when driver is being unloaded before all ++ * mm_struct are released. We need to safely free the kfd_process and ++ * avoid race conditions with mmu_notifier that might try to free them. ++ * ++ */ ++void kfd_cleanup_processes(void) ++{ ++ struct kfd_process *p; ++ struct hlist_node *p_temp; ++ unsigned int temp; ++ HLIST_HEAD(cleanup_list); ++ ++ /* ++ * Move all remaining kfd_process from the process table to a ++ * temp list for processing. Once done, callback from mmu_notifier ++ * release will not see the kfd_process in the table and do early return, ++ * avoiding double free issues. ++ */ ++ mutex_lock(&kfd_processes_mutex); ++ hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) { ++ hash_del_rcu(&p->kfd_processes); ++ synchronize_srcu(&kfd_processes_srcu); ++ hlist_add_head(&p->kfd_processes, &cleanup_list); ++ } ++ mutex_unlock(&kfd_processes_mutex); ++ ++ hlist_for_each_entry_safe(p, p_temp, &cleanup_list, kfd_processes) ++ kfd_process_notifier_release_internal(p); ++ ++ /* ++ * Ensures that all outstanding free_notifier get called, triggering ++ * the release of the kfd_process struct. ++ */ ++ mmu_notifier_synchronize(); ++} ++ + static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) + { + unsigned long offset; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +index 5137476ec18e6..4236539d9f932 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +@@ -218,8 +218,8 @@ static int init_user_queue(struct process_queue_manager *pqm, + return 0; + + cleanup: +- if (dev->shared_resources.enable_mes) +- uninit_queue(*q); ++ uninit_queue(*q); ++ *q = NULL; + return retval; + } + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +index a7fd98f57f94c..dc62375a8e2c4 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +@@ -495,7 +495,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) + link->dp.mst_enabled = config->mst_enabled; + link->dp.usb4_enabled = config->usb4_enabled; + display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION; +- link->adjust.auth_delay = 0; ++ link->adjust.auth_delay = 2; + link->adjust.hdcp1.disable = 0; + conn_state = aconnector->base.state; + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +index abdbd4352f6f3..60dd88666437d 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +@@ -208,6 +208,21 @@ bool needs_dsc_aux_workaround(struct dc_link *link) + return false; + } + ++bool is_synaptics_cascaded_panamera(struct dc_link *link, struct drm_dp_mst_port *port) ++{ ++ u8 branch_vendor_data[4] = { 0 }; // Vendor data 0x50C ~ 0x50F ++ ++ if (drm_dp_dpcd_read(port->mgr->aux, DP_BRANCH_VENDOR_SPECIFIC_START, &branch_vendor_data, 4) == 4) { ++ if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 && ++ IS_SYNAPTICS_CASCADED_PANAMERA(link->dpcd_caps.branch_dev_name, branch_vendor_data)) { ++ DRM_INFO("Synaptics Cascaded MST hub\n"); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ + static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnector) + { + struct dc_sink *dc_sink = aconnector->dc_sink; +@@ -231,6 +246,10 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto + needs_dsc_aux_workaround(aconnector->dc_link)) + aconnector->dsc_aux = &aconnector->mst_port->dm_dp_aux.aux; + ++ /* synaptics cascaded MST hub case */ ++ if (!aconnector->dsc_aux && is_synaptics_cascaded_panamera(aconnector->dc_link, port)) ++ aconnector->dsc_aux = port->mgr->aux; ++ + if (!aconnector->dsc_aux) + return false; + +@@ -627,12 +646,25 @@ struct dsc_mst_fairness_params { + struct amdgpu_dm_connector *aconnector; + }; + +-static int kbps_to_peak_pbn(int kbps) ++static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link) ++{ ++ u8 link_coding_cap; ++ uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B; ++ ++ link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link); ++ if (link_coding_cap == DP_128b_132b_ENCODING) ++ fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B; ++ ++ return fec_overhead_multiplier_x1000; ++} ++ ++static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000) + { + u64 peak_kbps = kbps; + + peak_kbps *= 1006; +- peak_kbps = div_u64(peak_kbps, 1000); ++ peak_kbps *= fec_overhead_multiplier_x1000; ++ peak_kbps = div_u64(peak_kbps, 1000 * 1000); + return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); + } + +@@ -726,11 +758,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, + int link_timeslots_used; + int fair_pbn_alloc; + int ret = 0; ++ uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); + + for (i = 0; i < count; i++) { + if (vars[i + k].dsc_enabled) { + initial_slack[i] = +- kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn; ++ kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn; + bpp_increased[i] = false; + remaining_to_increase += 1; + } else { +@@ -826,6 +859,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, + int next_index; + int remaining_to_try = 0; + int ret; ++ uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); + + for (i = 0; i < count; i++) { + if (vars[i + k].dsc_enabled +@@ -855,7 +889,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, + if (next_index == -1) + break; + +- vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); ++ vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, +@@ -868,7 +902,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, + vars[next_index].dsc_enabled = false; + vars[next_index].bpp_x16 = 0; + } else { +- vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps); ++ vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps, fec_overhead_multiplier_x1000); + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, +@@ -897,6 +931,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, + int count = 0; + int i, k, ret; + bool debugfs_overwrite = false; ++ uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); + + memset(params, 0, sizeof(params)); + +@@ -958,7 +993,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, + /* Try no compression */ + for (i = 0; i < count; i++) { + vars[i + k].aconnector = params[i].aconnector; +- vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); ++ vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); + vars[i + k].dsc_enabled = false; + vars[i + k].bpp_x16 = 0; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, +@@ -977,7 +1012,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, + /* Try max compression */ + for (i = 0; i < count; i++) { + if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { +- vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); ++ vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); + vars[i + k].dsc_enabled = true; + vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, +@@ -985,7 +1020,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, + if (ret < 0) + return ret; + } else { +- vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); ++ vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); + vars[i + k].dsc_enabled = false; + vars[i + k].bpp_x16 = 0; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +index 97fd70df531bf..1e4ede1e57abd 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +@@ -34,6 +34,21 @@ + #define SYNAPTICS_RC_OFFSET 0x4BC + #define SYNAPTICS_RC_DATA 0x4C0 + ++#define DP_BRANCH_VENDOR_SPECIFIC_START 0x50C ++ ++/** ++ * Panamera MST Hub detection ++ * Offset DPCD 050Eh == 0x5A indicates cascaded MST hub case ++ * Check from beginning of branch device vendor specific field (050Ch) ++ */ ++#define IS_SYNAPTICS_PANAMERA(branchDevName) (((int)branchDevName[4] & 0xF0) == 0x50 ? 1 : 0) ++#define BRANCH_HW_REVISION_PANAMERA_A2 0x10 ++#define SYNAPTICS_CASCADED_HUB_ID 0x5A ++#define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) ++ ++#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031 ++#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000 ++ + struct amdgpu_display_manager; + struct amdgpu_dm_connector; + +diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +index 7031db145a77a..3524b5811682a 100644 +--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c ++++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +@@ -91,7 +91,15 @@ static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj) + static int etnaviv_gem_prime_mmap_obj(struct etnaviv_gem_object *etnaviv_obj, + struct vm_area_struct *vma) + { +- return dma_buf_mmap(etnaviv_obj->base.dma_buf, vma, 0); ++ int ret; ++ ++ ret = dma_buf_mmap(etnaviv_obj->base.dma_buf, vma, 0); ++ if (!ret) { ++ /* Drop the reference acquired by drm_gem_mmap_obj(). */ ++ drm_gem_object_put(&etnaviv_obj->base); ++ } ++ ++ return ret; + } + + static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = { +diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c +index 250e83f1f5ac0..c3928d28cd443 100644 +--- a/drivers/gpu/drm/i915/display/intel_color.c ++++ b/drivers/gpu/drm/i915/display/intel_color.c +@@ -514,6 +514,22 @@ static void icl_color_commit_noarm(const struct intel_crtc_state *crtc_state) + icl_load_csc_matrix(crtc_state); + } + ++static void skl_color_commit_noarm(const struct intel_crtc_state *crtc_state) ++{ ++ /* ++ * Possibly related to display WA #1184, SKL CSC loses the latched ++ * CSC coeff/offset register values if the CSC registers are disarmed ++ * between DC5 exit and PSR exit. This will cause the plane(s) to ++ * output all black (until CSC_MODE is rearmed and properly latched). ++ * Once PSR exit (and proper register latching) has occurred the ++ * danger is over. Thus when PSR is enabled the CSC coeff/offset ++ * register programming will be peformed from skl_color_commit_arm() ++ * which is called after PSR exit. ++ */ ++ if (!crtc_state->has_psr) ++ ilk_load_csc_matrix(crtc_state); ++} ++ + static void ilk_color_commit_noarm(const struct intel_crtc_state *crtc_state) + { + ilk_load_csc_matrix(crtc_state); +@@ -556,6 +572,9 @@ static void skl_color_commit_arm(const struct intel_crtc_state *crtc_state) + enum pipe pipe = crtc->pipe; + u32 val = 0; + ++ if (crtc_state->has_psr) ++ ilk_load_csc_matrix(crtc_state); ++ + /* + * We don't (yet) allow userspace to control the pipe background color, + * so force it to black, but apply pipe gamma and CSC appropriately +@@ -574,6 +593,25 @@ static void skl_color_commit_arm(const struct intel_crtc_state *crtc_state) + crtc_state->csc_mode); + } + ++static void icl_color_commit_arm(const struct intel_crtc_state *crtc_state) ++{ ++ struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); ++ struct drm_i915_private *i915 = to_i915(crtc->base.dev); ++ enum pipe pipe = crtc->pipe; ++ ++ /* ++ * We don't (yet) allow userspace to control the pipe background color, ++ * so force it to black. ++ */ ++ intel_de_write(i915, SKL_BOTTOM_COLOR(pipe), 0); ++ ++ intel_de_write(i915, GAMMA_MODE(crtc->pipe), ++ crtc_state->gamma_mode); ++ ++ intel_de_write_fw(i915, PIPE_CSC_MODE(crtc->pipe), ++ crtc_state->csc_mode); ++} ++ + static struct drm_property_blob * + create_linear_lut(struct drm_i915_private *i915, int lut_size) + { +@@ -2287,14 +2325,14 @@ static const struct intel_color_funcs i9xx_color_funcs = { + static const struct intel_color_funcs icl_color_funcs = { + .color_check = icl_color_check, + .color_commit_noarm = icl_color_commit_noarm, +- .color_commit_arm = skl_color_commit_arm, ++ .color_commit_arm = icl_color_commit_arm, + .load_luts = icl_load_luts, + .read_luts = icl_read_luts, + }; + + static const struct intel_color_funcs glk_color_funcs = { + .color_check = glk_color_check, +- .color_commit_noarm = ilk_color_commit_noarm, ++ .color_commit_noarm = skl_color_commit_noarm, + .color_commit_arm = skl_color_commit_arm, + .load_luts = glk_load_luts, + .read_luts = glk_read_luts, +@@ -2302,7 +2340,7 @@ static const struct intel_color_funcs glk_color_funcs = { + + static const struct intel_color_funcs skl_color_funcs = { + .color_check = ivb_color_check, +- .color_commit_noarm = ilk_color_commit_noarm, ++ .color_commit_noarm = skl_color_commit_noarm, + .color_commit_arm = skl_color_commit_arm, + .load_luts = bdw_load_luts, + .read_luts = NULL, +diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c +index 8b6994853f6f8..f0aad2403109b 100644 +--- a/drivers/gpu/drm/i915/display/intel_display.c ++++ b/drivers/gpu/drm/i915/display/intel_display.c +@@ -7107,6 +7107,8 @@ static void intel_update_crtc(struct intel_atomic_state *state, + + intel_fbc_update(state, crtc); + ++ drm_WARN_ON(&i915->drm, !intel_display_power_is_enabled(i915, POWER_DOMAIN_DC_OFF)); ++ + if (!modeset && + intel_crtc_needs_color_update(new_crtc_state)) + intel_color_commit_noarm(new_crtc_state); +@@ -7480,8 +7482,28 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) + drm_atomic_helper_wait_for_dependencies(&state->base); + drm_dp_mst_atomic_wait_for_dependencies(&state->base); + +- if (state->modeset) +- wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET); ++ /* ++ * During full modesets we write a lot of registers, wait ++ * for PLLs, etc. Doing that while DC states are enabled ++ * is not a good idea. ++ * ++ * During fastsets and other updates we also need to ++ * disable DC states due to the following scenario: ++ * 1. DC5 exit and PSR exit happen ++ * 2. Some or all _noarm() registers are written ++ * 3. Due to some long delay PSR is re-entered ++ * 4. DC5 entry -> DMC saves the already written new ++ * _noarm() registers and the old not yet written ++ * _arm() registers ++ * 5. DC5 exit -> DMC restores a mixture of old and ++ * new register values and arms the update ++ * 6. PSR exit -> hardware latches a mixture of old and ++ * new register values -> corrupted frame, or worse ++ * 7. New _arm() registers are finally written ++ * 8. Hardware finally latches a complete set of new ++ * register values, and subsequent frames will be OK again ++ */ ++ wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_DC_OFF); + + intel_atomic_prepare_plane_clear_colors(state); + +@@ -7625,8 +7647,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) + * the culprit. + */ + intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore); +- intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET, wakeref); + } ++ intel_display_power_put(dev_priv, POWER_DOMAIN_DC_OFF, wakeref); + intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref); + + /* +diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c +index ad1a37b515fb1..2a9f40a2b3ed0 100644 +--- a/drivers/gpu/drm/i915/display/intel_dpt.c ++++ b/drivers/gpu/drm/i915/display/intel_dpt.c +@@ -301,6 +301,7 @@ intel_dpt_create(struct intel_framebuffer *fb) + vm->pte_encode = gen8_ggtt_pte_encode; + + dpt->obj = dpt_obj; ++ dpt->obj->is_dpt = true; + + return &dpt->vm; + } +@@ -309,5 +310,6 @@ void intel_dpt_destroy(struct i915_address_space *vm) + { + struct i915_dpt *dpt = i915_vm_to_dpt(vm); + ++ dpt->obj->is_dpt = false; + i915_vm_put(&dpt->vm); + } +diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c +index 70624b4b2d38c..c5d41fd51118f 100644 +--- a/drivers/gpu/drm/i915/display/intel_tc.c ++++ b/drivers/gpu/drm/i915/display/intel_tc.c +@@ -436,9 +436,9 @@ static bool icl_tc_phy_is_owned(struct intel_digital_port *dig_port) + PORT_TX_DFLEXDPCSSS(dig_port->tc_phy_fia)); + if (val == 0xffffffff) { + drm_dbg_kms(&i915->drm, +- "Port %s: PHY in TCCOLD, assume safe mode\n", ++ "Port %s: PHY in TCCOLD, assume not owned\n", + dig_port->tc_port_name); +- return true; ++ return false; + } + + return val & DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx); +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +index 8949fb0a944f6..3198b64ad7dbc 100644 +--- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +@@ -127,7 +127,8 @@ i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915, + + memcpy(map, data, size); + +- i915_gem_object_unpin_map(obj); ++ i915_gem_object_flush_map(obj); ++ __i915_gem_object_release_map(obj); + + return obj; + } +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h +index 3db53769864c2..2f53a68348217 100644 +--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h ++++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h +@@ -319,7 +319,7 @@ i915_gem_object_never_mmap(const struct drm_i915_gem_object *obj) + static inline bool + i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj) + { +- return READ_ONCE(obj->frontbuffer); ++ return READ_ONCE(obj->frontbuffer) || obj->is_dpt; + } + + static inline unsigned int +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +index ab4c2f90a5643..1d0d8ee9d707d 100644 +--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h ++++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +@@ -491,6 +491,9 @@ struct drm_i915_gem_object { + */ + unsigned int cache_dirty:1; + ++ /* @is_dpt: Object houses a display page table (DPT) */ ++ unsigned int is_dpt:1; ++ + /** + * @read_domains: Read memory domains. + * +diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c +index 9ad3bc7201cba..fc73cfe0e39bb 100644 +--- a/drivers/gpu/drm/i915/gt/intel_rps.c ++++ b/drivers/gpu/drm/i915/gt/intel_rps.c +@@ -2074,16 +2074,6 @@ void intel_rps_sanitize(struct intel_rps *rps) + rps_disable_interrupts(rps); + } + +-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) +-{ +- struct drm_i915_private *i915 = rps_to_i915(rps); +- i915_reg_t rpstat; +- +- rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; +- +- return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); +-} +- + u32 intel_rps_read_rpstat(struct intel_rps *rps) + { + struct drm_i915_private *i915 = rps_to_i915(rps); +@@ -2094,7 +2084,7 @@ u32 intel_rps_read_rpstat(struct intel_rps *rps) + return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat); + } + +-u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) ++static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) + { + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 cagf; +@@ -2117,10 +2107,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) + return cagf; + } + +-static u32 read_cagf(struct intel_rps *rps) ++static u32 __read_cagf(struct intel_rps *rps, bool take_fw) + { + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); ++ i915_reg_t r = INVALID_MMIO_REG; + u32 freq; + + /* +@@ -2128,22 +2119,30 @@ static u32 read_cagf(struct intel_rps *rps) + * registers will return 0 freq when GT is in RC6 + */ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { +- freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); ++ r = MTL_MIRROR_TARGET_WP1; + } else if (GRAPHICS_VER(i915) >= 12) { +- freq = intel_uncore_read(uncore, GEN12_RPSTAT1); ++ r = GEN12_RPSTAT1; + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + vlv_punit_get(i915); + freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(i915); + } else if (GRAPHICS_VER(i915) >= 6) { +- freq = intel_uncore_read(uncore, GEN6_RPSTAT1); ++ r = GEN6_RPSTAT1; + } else { +- freq = intel_uncore_read(uncore, MEMSTAT_ILK); ++ r = MEMSTAT_ILK; + } + ++ if (i915_mmio_reg_valid(r)) ++ freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r); ++ + return intel_rps_get_cagf(rps, freq); + } + ++static u32 read_cagf(struct intel_rps *rps) ++{ ++ return __read_cagf(rps, true); ++} ++ + u32 intel_rps_read_actual_frequency(struct intel_rps *rps) + { + struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; +@@ -2156,7 +2155,12 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) + return freq; + } + +-u32 intel_rps_read_punit_req(struct intel_rps *rps) ++u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps) ++{ ++ return intel_gpu_freq(rps, __read_cagf(rps, false)); ++} ++ ++static u32 intel_rps_read_punit_req(struct intel_rps *rps) + { + struct intel_uncore *uncore = rps_to_uncore(rps); + struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; +diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h +index 9e1cad9ba0e9c..d86ddfee095ed 100644 +--- a/drivers/gpu/drm/i915/gt/intel_rps.h ++++ b/drivers/gpu/drm/i915/gt/intel_rps.h +@@ -34,8 +34,8 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); + + int intel_gpu_freq(struct intel_rps *rps, int val); + int intel_freq_opcode(struct intel_rps *rps, int val); +-u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); + u32 intel_rps_read_actual_frequency(struct intel_rps *rps); ++u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps); + u32 intel_rps_get_requested_frequency(struct intel_rps *rps); + u32 intel_rps_get_min_frequency(struct intel_rps *rps); + u32 intel_rps_get_min_raw_freq(struct intel_rps *rps); +@@ -46,10 +46,8 @@ int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val); + u32 intel_rps_get_rp0_frequency(struct intel_rps *rps); + u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); + u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); +-u32 intel_rps_read_punit_req(struct intel_rps *rps); + u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); + u32 intel_rps_read_rpstat(struct intel_rps *rps); +-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps); + void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps); + void intel_rps_raise_unslice(struct intel_rps *rps); + void intel_rps_lower_unslice(struct intel_rps *rps); +diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c +index 125b6ca25a756..7d5e2c53c23a7 100644 +--- a/drivers/gpu/drm/i915/i915_perf.c ++++ b/drivers/gpu/drm/i915/i915_perf.c +@@ -1592,9 +1592,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) + /* + * Wa_16011777198:dg2: Unset the override of GUCRC mode to enable rc6. + */ +- if (intel_uc_uses_guc_rc(>->uc) && +- (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || +- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))) ++ if (stream->override_gucrc) + drm_WARN_ON(>->i915->drm, + intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc)); + +@@ -3293,8 +3291,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, + if (ret) { + drm_dbg(&stream->perf->i915->drm, + "Unable to override gucrc mode\n"); +- goto err_config; ++ goto err_gucrc; + } ++ ++ stream->override_gucrc = true; + } + + ret = alloc_oa_buffer(stream); +@@ -3333,11 +3333,15 @@ err_enable: + free_oa_buffer(stream); + + err_oa_buf_alloc: +- free_oa_configs(stream); ++ if (stream->override_gucrc) ++ intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc); + ++err_gucrc: + intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); + intel_engine_pm_put(stream->engine); + ++ free_oa_configs(stream); ++ + err_config: + free_noa_wait(stream); + +diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h +index ca150b7af3f29..4d5d8c365d9e2 100644 +--- a/drivers/gpu/drm/i915/i915_perf_types.h ++++ b/drivers/gpu/drm/i915/i915_perf_types.h +@@ -316,6 +316,12 @@ struct i915_perf_stream { + * buffer should be checked for available data. + */ + u64 poll_oa_period; ++ ++ /** ++ * @override_gucrc: GuC RC has been overridden for the perf stream, ++ * and we need to restore the default configuration on release. ++ */ ++ bool override_gucrc; + }; + + /** +diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c +index 52531ab28c5f5..6d422b056f8a8 100644 +--- a/drivers/gpu/drm/i915/i915_pmu.c ++++ b/drivers/gpu/drm/i915/i915_pmu.c +@@ -393,14 +393,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) + * case we assume the system is running at the intended + * frequency. Fortunately, the read should rarely fail! + */ +- val = intel_rps_read_rpstat_fw(rps); +- if (val) +- val = intel_rps_get_cagf(rps, val); +- else +- val = rps->cur_freq; ++ val = intel_rps_read_actual_frequency_fw(rps); ++ if (!val) ++ val = intel_gpu_freq(rps, rps->cur_freq); + + add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], +- intel_gpu_freq(rps, val), period_ns / 1000); ++ val, period_ns / 1000); + } + + if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) { +diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c +index 40409a29f5b69..91b5ecc575380 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c ++++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c +@@ -33,6 +33,7 @@ + #include <linux/apple-gmux.h> + #include <linux/backlight.h> + #include <linux/idr.h> ++#include <drm/drm_probe_helper.h> + + #include "nouveau_drv.h" + #include "nouveau_reg.h" +@@ -299,8 +300,12 @@ nv50_backlight_init(struct nouveau_backlight *bl, + struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev); + struct nvif_object *device = &drm->client.device.object; + ++ /* ++ * Note when this runs the connectors have not been probed yet, ++ * so nv_conn->base.status is not set yet. ++ */ + if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1)) || +- nv_conn->base.status != connector_status_connected) ++ drm_helper_probe_detect(&nv_conn->base, NULL, false) != connector_status_connected) + return -ENODEV; + + if (nv_conn->type == DCB_CONNECTOR_eDP) { +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index f060ac7376e69..cfeb24d40d378 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -168,7 +168,13 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, + + raw_local_irq_enable(); + ret = __intel_idle(dev, drv, index); +- raw_local_irq_disable(); ++ ++ /* ++ * The lockdep hardirqs state may be changed to 'on' with timer ++ * tick interrupt followed by __do_softirq(). Use local_irq_disable() ++ * to keep the hardirqs state correct. ++ */ ++ local_irq_disable(); + + return ret; + } +diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c +index 2959d80f7fdb6..cd36cf7165423 100644 +--- a/drivers/input/joystick/xpad.c ++++ b/drivers/input/joystick/xpad.c +@@ -779,9 +779,6 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d + input_report_key(dev, BTN_C, data[8]); + input_report_key(dev, BTN_Z, data[9]); + +- /* Profile button has a value of 0-3, so it is reported as an axis */ +- if (xpad->mapping & MAP_PROFILE_BUTTON) +- input_report_abs(dev, ABS_PROFILE, data[34]); + + input_sync(dev); + } +@@ -1059,6 +1056,10 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char + (__u16) le16_to_cpup((__le16 *)(data + 8))); + } + ++ /* Profile button has a value of 0-3, so it is reported as an axis */ ++ if (xpad->mapping & MAP_PROFILE_BUTTON) ++ input_report_abs(dev, ABS_PROFILE, data[34]); ++ + /* paddle handling */ + /* based on SDL's SDL_hidapi_xboxone.c */ + if (xpad->mapping & MAP_PADDLES) { +diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c +index 989228b5a0a44..e2c11d9f3868f 100644 +--- a/drivers/input/mouse/alps.c ++++ b/drivers/input/mouse/alps.c +@@ -852,8 +852,8 @@ static void alps_process_packet_v6(struct psmouse *psmouse) + x = y = z = 0; + + /* Divide 4 since trackpoint's speed is too fast */ +- input_report_rel(dev2, REL_X, (char)x / 4); +- input_report_rel(dev2, REL_Y, -((char)y / 4)); ++ input_report_rel(dev2, REL_X, (s8)x / 4); ++ input_report_rel(dev2, REL_Y, -((s8)y / 4)); + + psmouse_report_standard_buttons(dev2, packet[3]); + +@@ -1104,8 +1104,8 @@ static void alps_process_trackstick_packet_v7(struct psmouse *psmouse) + ((packet[3] & 0x20) << 1); + z = (packet[5] & 0x3f) | ((packet[3] & 0x80) >> 1); + +- input_report_rel(dev2, REL_X, (char)x); +- input_report_rel(dev2, REL_Y, -((char)y)); ++ input_report_rel(dev2, REL_X, (s8)x); ++ input_report_rel(dev2, REL_Y, -((s8)y)); + input_report_abs(dev2, ABS_PRESSURE, z); + + psmouse_report_standard_buttons(dev2, packet[1]); +@@ -2294,20 +2294,20 @@ static int alps_get_v3_v7_resolution(struct psmouse *psmouse, int reg_pitch) + if (reg < 0) + return reg; + +- x_pitch = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */ ++ x_pitch = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */ + x_pitch = 50 + 2 * x_pitch; /* In 0.1 mm units */ + +- y_pitch = (char)reg >> 4; /* sign extend upper 4 bits */ ++ y_pitch = (s8)reg >> 4; /* sign extend upper 4 bits */ + y_pitch = 36 + 2 * y_pitch; /* In 0.1 mm units */ + + reg = alps_command_mode_read_reg(psmouse, reg_pitch + 1); + if (reg < 0) + return reg; + +- x_electrode = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */ ++ x_electrode = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */ + x_electrode = 17 + x_electrode; + +- y_electrode = (char)reg >> 4; /* sign extend upper 4 bits */ ++ y_electrode = (s8)reg >> 4; /* sign extend upper 4 bits */ + y_electrode = 13 + y_electrode; + + x_phys = x_pitch * (x_electrode - 1); /* In 0.1 mm units */ +diff --git a/drivers/input/mouse/focaltech.c b/drivers/input/mouse/focaltech.c +index 6fd5fff0cbfff..c74b99077d16a 100644 +--- a/drivers/input/mouse/focaltech.c ++++ b/drivers/input/mouse/focaltech.c +@@ -202,8 +202,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse, + state->pressed = packet[0] >> 7; + finger1 = ((packet[0] >> 4) & 0x7) - 1; + if (finger1 < FOC_MAX_FINGERS) { +- state->fingers[finger1].x += (char)packet[1]; +- state->fingers[finger1].y += (char)packet[2]; ++ state->fingers[finger1].x += (s8)packet[1]; ++ state->fingers[finger1].y += (s8)packet[2]; + } else { + psmouse_err(psmouse, "First finger in rel packet invalid: %d\n", + finger1); +@@ -218,8 +218,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse, + */ + finger2 = ((packet[3] >> 4) & 0x7) - 1; + if (finger2 < FOC_MAX_FINGERS) { +- state->fingers[finger2].x += (char)packet[4]; +- state->fingers[finger2].y += (char)packet[5]; ++ state->fingers[finger2].x += (s8)packet[4]; ++ state->fingers[finger2].y += (s8)packet[5]; + } + } + +diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h +index efc61736099b9..028e45bd050bf 100644 +--- a/drivers/input/serio/i8042-acpipnpio.h ++++ b/drivers/input/serio/i8042-acpipnpio.h +@@ -610,6 +610,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) + }, ++ { ++ /* Fujitsu Lifebook A574/H */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "FMVA0501PZ"), ++ }, ++ .driver_data = (void *)(SERIO_QUIRK_NOMUX) ++ }, + { + /* Gigabyte M912 */ + .matches = { +@@ -1116,6 +1124,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, ++ { ++ /* ++ * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes ++ * the keyboard very laggy for ~5 seconds after boot and ++ * sometimes also after resume. ++ * However both are required for the keyboard to not fail ++ * completely sometimes after boot or resume. ++ */ ++ .matches = { ++ DMI_MATCH(DMI_BOARD_NAME, "N150CU"), ++ }, ++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | ++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) ++ }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"), +@@ -1123,6 +1145,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, ++ { ++ /* ++ * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes ++ * the keyboard very laggy for ~5 seconds after boot and ++ * sometimes also after resume. ++ * However both are required for the keyboard to not fail ++ * completely sometimes after boot or resume. ++ */ ++ .matches = { ++ DMI_MATCH(DMI_BOARD_NAME, "NHxxRZQ"), ++ }, ++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | ++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) ++ }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), +diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c +index 8a0a8078de8f7..2e502fcb0d1fd 100644 +--- a/drivers/input/touchscreen/goodix.c ++++ b/drivers/input/touchscreen/goodix.c +@@ -124,10 +124,18 @@ static const unsigned long goodix_irq_flags[] = { + static const struct dmi_system_id nine_bytes_report[] = { + #if defined(CONFIG_DMI) && defined(CONFIG_X86) + { +- .ident = "Lenovo YogaBook", +- /* YB1-X91L/F and YB1-X90L/F */ ++ /* Lenovo Yoga Book X90F / X90L */ + .matches = { +- DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9") ++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), ++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), ++ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"), ++ } ++ }, ++ { ++ /* Lenovo Yoga Book X91F / X91L */ ++ .matches = { ++ /* Non exact match to match F + L versions */ ++ DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"), + } + }, + #endif +diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c +index b00a0ceb2d137..c80c524ad32d7 100644 +--- a/drivers/iommu/intel/dmar.c ++++ b/drivers/iommu/intel/dmar.c +@@ -1057,7 +1057,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) + } + + err = -EINVAL; +- if (cap_sagaw(iommu->cap) == 0) { ++ if (!cap_sagaw(iommu->cap) && ++ (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) { + pr_info("%s: No supported address widths. Not attempting DMA translation.\n", + iommu->name); + drhd->ignored = 1; +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index fc953013ea260..1b6c3c783a8eb 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -1509,6 +1509,8 @@ static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, + ret = 1; + break; + default: ++ if (len) ++ setup_split_accounting(ci, *len); + /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ + alloc_multiple_bios(&blist, ci, ti, num_bios); + while ((clone = bio_list_pop(&blist))) { +diff --git a/drivers/md/md.c b/drivers/md/md.c +index 272cc5d14906f..beab84f0c585c 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -3131,6 +3131,9 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) + err = kstrtouint(buf, 10, (unsigned int *)&slot); + if (err < 0) + return err; ++ if (slot < 0) ++ /* overflow */ ++ return -ENOSPC; + } + if (rdev->mddev->pers && slot == -1) { + /* Setting 'slot' on an active array requires also +diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c +index 142d4c74017c0..d59ecf776715c 100644 +--- a/drivers/media/platform/qcom/venus/firmware.c ++++ b/drivers/media/platform/qcom/venus/firmware.c +@@ -38,8 +38,8 @@ static void venus_reset_cpu(struct venus_core *core) + writel(fw_size, wrapper_base + WRAPPER_FW_END_ADDR); + writel(0, wrapper_base + WRAPPER_CPA_START_ADDR); + writel(fw_size, wrapper_base + WRAPPER_CPA_END_ADDR); +- writel(0, wrapper_base + WRAPPER_NONPIX_START_ADDR); +- writel(0, wrapper_base + WRAPPER_NONPIX_END_ADDR); ++ writel(fw_size, wrapper_base + WRAPPER_NONPIX_START_ADDR); ++ writel(fw_size, wrapper_base + WRAPPER_NONPIX_END_ADDR); + + if (IS_V6(core)) { + /* Bring XTSS out of reset */ +diff --git a/drivers/mtd/nand/ecc-mxic.c b/drivers/mtd/nand/ecc-mxic.c +index 8afdca731b874..6b487ffe2f2dc 100644 +--- a/drivers/mtd/nand/ecc-mxic.c ++++ b/drivers/mtd/nand/ecc-mxic.c +@@ -429,6 +429,7 @@ static int mxic_ecc_data_xfer_wait_for_completion(struct mxic_ecc_engine *mxic) + mxic_ecc_enable_int(mxic); + ret = wait_for_completion_timeout(&mxic->complete, + msecs_to_jiffies(1000)); ++ ret = ret ? 0 : -ETIMEDOUT; + mxic_ecc_disable_int(mxic); + } else { + ret = readl_poll_timeout(mxic->regs + INTRPT_STS, val, +diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c +index 5ee01231ac4cd..a28574c009003 100644 +--- a/drivers/mtd/nand/raw/meson_nand.c ++++ b/drivers/mtd/nand/raw/meson_nand.c +@@ -176,6 +176,7 @@ struct meson_nfc { + + dma_addr_t daddr; + dma_addr_t iaddr; ++ u32 info_bytes; + + unsigned long assigned_cs; + }; +@@ -503,6 +504,7 @@ static int meson_nfc_dma_buffer_setup(struct nand_chip *nand, void *databuf, + nfc->daddr, datalen, dir); + return ret; + } ++ nfc->info_bytes = infolen; + cmd = GENCMDIADDRL(NFC_CMD_AIL, nfc->iaddr); + writel(cmd, nfc->reg_base + NFC_REG_CMD); + +@@ -520,8 +522,10 @@ static void meson_nfc_dma_buffer_release(struct nand_chip *nand, + struct meson_nfc *nfc = nand_get_controller_data(nand); + + dma_unmap_single(nfc->dev, nfc->daddr, datalen, dir); +- if (infolen) ++ if (infolen) { + dma_unmap_single(nfc->dev, nfc->iaddr, infolen, dir); ++ nfc->info_bytes = 0; ++ } + } + + static int meson_nfc_read_buf(struct nand_chip *nand, u8 *buf, int len) +@@ -710,6 +714,8 @@ static void meson_nfc_check_ecc_pages_valid(struct meson_nfc *nfc, + usleep_range(10, 15); + /* info is updated by nfc dma engine*/ + smp_rmb(); ++ dma_sync_single_for_cpu(nfc->dev, nfc->iaddr, nfc->info_bytes, ++ DMA_FROM_DEVICE); + ret = *info & ECC_COMPLETE; + } while (!ret); + } +@@ -991,7 +997,7 @@ static const struct mtd_ooblayout_ops meson_ooblayout_ops = { + + static int meson_nfc_clk_init(struct meson_nfc *nfc) + { +- struct clk_parent_data nfc_divider_parent_data[1]; ++ struct clk_parent_data nfc_divider_parent_data[1] = {0}; + struct clk_init_data init = {0}; + int ret; + +diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c +index 003b0ac2854c9..3fffd5da8d3b0 100644 +--- a/drivers/net/dsa/microchip/ksz8795.c ++++ b/drivers/net/dsa/microchip/ksz8795.c +@@ -958,15 +958,14 @@ int ksz8_fdb_dump(struct ksz_device *dev, int port, + u16 entries = 0; + u8 timestamp = 0; + u8 fid; +- u8 member; +- struct alu_struct alu; ++ u8 src_port; ++ u8 mac[ETH_ALEN]; + + do { +- alu.is_static = false; +- ret = ksz8_r_dyn_mac_table(dev, i, alu.mac, &fid, &member, ++ ret = ksz8_r_dyn_mac_table(dev, i, mac, &fid, &src_port, + ×tamp, &entries); +- if (!ret && (member & BIT(port))) { +- ret = cb(alu.mac, alu.fid, alu.is_static, data); ++ if (!ret && port == src_port) { ++ ret = cb(mac, fid, false, data); + if (ret) + break; + } +diff --git a/drivers/net/dsa/microchip/ksz8863_smi.c b/drivers/net/dsa/microchip/ksz8863_smi.c +index 2f4623f3bd852..3698112138b78 100644 +--- a/drivers/net/dsa/microchip/ksz8863_smi.c ++++ b/drivers/net/dsa/microchip/ksz8863_smi.c +@@ -82,22 +82,16 @@ static const struct regmap_bus regmap_smi[] = { + { + .read = ksz8863_mdio_read, + .write = ksz8863_mdio_write, +- .max_raw_read = 1, +- .max_raw_write = 1, + }, + { + .read = ksz8863_mdio_read, + .write = ksz8863_mdio_write, + .val_format_endian_default = REGMAP_ENDIAN_BIG, +- .max_raw_read = 2, +- .max_raw_write = 2, + }, + { + .read = ksz8863_mdio_read, + .write = ksz8863_mdio_write, + .val_format_endian_default = REGMAP_ENDIAN_BIG, +- .max_raw_read = 4, +- .max_raw_write = 4, + } + }; + +@@ -108,7 +102,6 @@ static const struct regmap_config ksz8863_regmap_config[] = { + .pad_bits = 24, + .val_bits = 8, + .cache_type = REGCACHE_NONE, +- .use_single_read = 1, + .lock = ksz_regmap_lock, + .unlock = ksz_regmap_unlock, + }, +@@ -118,7 +111,6 @@ static const struct regmap_config ksz8863_regmap_config[] = { + .pad_bits = 24, + .val_bits = 16, + .cache_type = REGCACHE_NONE, +- .use_single_read = 1, + .lock = ksz_regmap_lock, + .unlock = ksz_regmap_unlock, + }, +@@ -128,7 +120,6 @@ static const struct regmap_config ksz8863_regmap_config[] = { + .pad_bits = 24, + .val_bits = 32, + .cache_type = REGCACHE_NONE, +- .use_single_read = 1, + .lock = ksz_regmap_lock, + .unlock = ksz_regmap_unlock, + } +diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c +index 19cd05762ab77..8601a9e4e4d2f 100644 +--- a/drivers/net/dsa/microchip/ksz_common.c ++++ b/drivers/net/dsa/microchip/ksz_common.c +@@ -395,13 +395,13 @@ static const u32 ksz8863_masks[] = { + [VLAN_TABLE_VALID] = BIT(19), + [STATIC_MAC_TABLE_VALID] = BIT(19), + [STATIC_MAC_TABLE_USE_FID] = BIT(21), +- [STATIC_MAC_TABLE_FID] = GENMASK(29, 26), ++ [STATIC_MAC_TABLE_FID] = GENMASK(25, 22), + [STATIC_MAC_TABLE_OVERRIDE] = BIT(20), + [STATIC_MAC_TABLE_FWD_PORTS] = GENMASK(18, 16), +- [DYNAMIC_MAC_TABLE_ENTRIES_H] = GENMASK(5, 0), +- [DYNAMIC_MAC_TABLE_MAC_EMPTY] = BIT(7), ++ [DYNAMIC_MAC_TABLE_ENTRIES_H] = GENMASK(1, 0), ++ [DYNAMIC_MAC_TABLE_MAC_EMPTY] = BIT(2), + [DYNAMIC_MAC_TABLE_NOT_READY] = BIT(7), +- [DYNAMIC_MAC_TABLE_ENTRIES] = GENMASK(31, 28), ++ [DYNAMIC_MAC_TABLE_ENTRIES] = GENMASK(31, 24), + [DYNAMIC_MAC_TABLE_FID] = GENMASK(19, 16), + [DYNAMIC_MAC_TABLE_SRC_PORT] = GENMASK(21, 20), + [DYNAMIC_MAC_TABLE_TIMESTAMP] = GENMASK(23, 22), +@@ -411,10 +411,10 @@ static u8 ksz8863_shifts[] = { + [VLAN_TABLE_MEMBERSHIP_S] = 16, + [STATIC_MAC_FWD_PORTS] = 16, + [STATIC_MAC_FID] = 22, +- [DYNAMIC_MAC_ENTRIES_H] = 3, ++ [DYNAMIC_MAC_ENTRIES_H] = 8, + [DYNAMIC_MAC_ENTRIES] = 24, + [DYNAMIC_MAC_FID] = 16, +- [DYNAMIC_MAC_TIMESTAMP] = 24, ++ [DYNAMIC_MAC_TIMESTAMP] = 22, + [DYNAMIC_MAC_SRC_PORT] = 20, + }; + +diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c +index 89829e0ca8e8f..8211a4d373e81 100644 +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -3354,9 +3354,14 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) + * If this is the upstream port for this switch, enable + * forwarding of unknown unicasts and multicasts. + */ +- reg = MV88E6XXX_PORT_CTL0_IGMP_MLD_SNOOP | +- MV88E6185_PORT_CTL0_USE_TAG | MV88E6185_PORT_CTL0_USE_IP | ++ reg = MV88E6185_PORT_CTL0_USE_TAG | MV88E6185_PORT_CTL0_USE_IP | + MV88E6XXX_PORT_CTL0_STATE_FORWARDING; ++ /* Forward any IPv4 IGMP or IPv6 MLD frames received ++ * by a USER port to the CPU port to allow snooping. ++ */ ++ if (dsa_is_user_port(ds, port)) ++ reg |= MV88E6XXX_PORT_CTL0_IGMP_MLD_SNOOP; ++ + err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL0, reg); + if (err) + return err; +diff --git a/drivers/net/dsa/realtek/realtek-mdio.c b/drivers/net/dsa/realtek/realtek-mdio.c +index 3e54fac5f9027..5a8fe707ca25e 100644 +--- a/drivers/net/dsa/realtek/realtek-mdio.c ++++ b/drivers/net/dsa/realtek/realtek-mdio.c +@@ -21,6 +21,7 @@ + + #include <linux/module.h> + #include <linux/of_device.h> ++#include <linux/overflow.h> + #include <linux/regmap.h> + + #include "realtek.h" +@@ -152,7 +153,9 @@ static int realtek_mdio_probe(struct mdio_device *mdiodev) + if (!var) + return -EINVAL; + +- priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL); ++ priv = devm_kzalloc(&mdiodev->dev, ++ size_add(sizeof(*priv), var->chip_data_sz), ++ GFP_KERNEL); + if (!priv) + return -ENOMEM; + +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +index 16c490692f422..12083b9679b54 100644 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +@@ -672,6 +672,18 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, + return 0; + } + ++static struct sk_buff * ++bnx2x_build_skb(const struct bnx2x_fastpath *fp, void *data) ++{ ++ struct sk_buff *skb; ++ ++ if (fp->rx_frag_size) ++ skb = build_skb(data, fp->rx_frag_size); ++ else ++ skb = slab_build_skb(data); ++ return skb; ++} ++ + static void bnx2x_frag_free(const struct bnx2x_fastpath *fp, void *data) + { + if (fp->rx_frag_size) +@@ -779,7 +791,7 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp, + dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping), + fp->rx_buf_size, DMA_FROM_DEVICE); + if (likely(new_data)) +- skb = build_skb(data, fp->rx_frag_size); ++ skb = bnx2x_build_skb(fp, data); + + if (likely(skb)) { + #ifdef BNX2X_STOP_ON_ERROR +@@ -1046,7 +1058,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) + dma_unmap_addr(rx_buf, mapping), + fp->rx_buf_size, + DMA_FROM_DEVICE); +- skb = build_skb(data, fp->rx_frag_size); ++ skb = bnx2x_build_skb(fp, data); + if (unlikely(!skb)) { + bnx2x_frag_free(fp, data); + bnx2x_fp_qstats(bp, fp)-> +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index b44b2ec5e61a2..015b5848b9583 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -175,12 +175,12 @@ static const struct pci_device_id bnxt_pci_tbl[] = { + { PCI_VDEVICE(BROADCOM, 0x1750), .driver_data = BCM57508 }, + { PCI_VDEVICE(BROADCOM, 0x1751), .driver_data = BCM57504 }, + { PCI_VDEVICE(BROADCOM, 0x1752), .driver_data = BCM57502 }, +- { PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57508_NPAR }, ++ { PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57502_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1801), .driver_data = BCM57504_NPAR }, +- { PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57502_NPAR }, +- { PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57508_NPAR }, ++ { PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57508_NPAR }, ++ { PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57502_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1804), .driver_data = BCM57504_NPAR }, +- { PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57502_NPAR }, ++ { PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57508_NPAR }, + { PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 }, + { PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 }, + #ifdef CONFIG_BNXT_SRIOV +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +index 56355e64815e2..3056e5bb7d6fa 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +@@ -1225,6 +1225,7 @@ struct bnxt_link_info { + #define BNXT_LINK_SPEED_40GB PORT_PHY_QCFG_RESP_LINK_SPEED_40GB + #define BNXT_LINK_SPEED_50GB PORT_PHY_QCFG_RESP_LINK_SPEED_50GB + #define BNXT_LINK_SPEED_100GB PORT_PHY_QCFG_RESP_LINK_SPEED_100GB ++#define BNXT_LINK_SPEED_200GB PORT_PHY_QCFG_RESP_LINK_SPEED_200GB + u16 support_speeds; + u16 support_pam4_speeds; + u16 auto_link_speeds; /* fw adv setting */ +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +index ec573127b7076..6bd18eb5137f4 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +@@ -1714,6 +1714,8 @@ u32 bnxt_fw_to_ethtool_speed(u16 fw_link_speed) + return SPEED_50000; + case BNXT_LINK_SPEED_100GB: + return SPEED_100000; ++ case BNXT_LINK_SPEED_200GB: ++ return SPEED_200000; + default: + return SPEED_UNKNOWN; + } +@@ -3738,6 +3740,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, + bnxt_ulp_stop(bp); + rc = bnxt_close_nic(bp, true, false); + if (rc) { ++ etest->flags |= ETH_TEST_FL_FAILED; + bnxt_ulp_start(bp, rc); + return; + } +diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c +index ef4d3762bf371..ca229b0efeb65 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_diag.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c +@@ -44,7 +44,7 @@ static i40e_status i40e_diag_reg_pattern_test(struct i40e_hw *hw, + return 0; + } + +-struct i40e_diag_reg_test_info i40e_reg_list[] = { ++const struct i40e_diag_reg_test_info i40e_reg_list[] = { + /* offset mask elements stride */ + {I40E_QTX_CTL(0), 0x0000FFBF, 1, + I40E_QTX_CTL(1) - I40E_QTX_CTL(0)}, +@@ -78,27 +78,28 @@ i40e_status i40e_diag_reg_test(struct i40e_hw *hw) + { + i40e_status ret_code = 0; + u32 reg, mask; ++ u32 elements; + u32 i, j; + + for (i = 0; i40e_reg_list[i].offset != 0 && + !ret_code; i++) { + ++ elements = i40e_reg_list[i].elements; + /* set actual reg range for dynamically allocated resources */ + if (i40e_reg_list[i].offset == I40E_QTX_CTL(0) && + hw->func_caps.num_tx_qp != 0) +- i40e_reg_list[i].elements = hw->func_caps.num_tx_qp; ++ elements = hw->func_caps.num_tx_qp; + if ((i40e_reg_list[i].offset == I40E_PFINT_ITRN(0, 0) || + i40e_reg_list[i].offset == I40E_PFINT_ITRN(1, 0) || + i40e_reg_list[i].offset == I40E_PFINT_ITRN(2, 0) || + i40e_reg_list[i].offset == I40E_QINT_TQCTL(0) || + i40e_reg_list[i].offset == I40E_QINT_RQCTL(0)) && + hw->func_caps.num_msix_vectors != 0) +- i40e_reg_list[i].elements = +- hw->func_caps.num_msix_vectors - 1; ++ elements = hw->func_caps.num_msix_vectors - 1; + + /* test register access */ + mask = i40e_reg_list[i].mask; +- for (j = 0; j < i40e_reg_list[i].elements && !ret_code; j++) { ++ for (j = 0; j < elements && !ret_code; j++) { + reg = i40e_reg_list[i].offset + + (j * i40e_reg_list[i].stride); + ret_code = i40e_diag_reg_pattern_test(hw, reg, mask); +diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h +index c3340f320a18c..1db7c6d572311 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_diag.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h +@@ -20,7 +20,7 @@ struct i40e_diag_reg_test_info { + u32 stride; /* bytes between each element */ + }; + +-extern struct i40e_diag_reg_test_info i40e_reg_list[]; ++extern const struct i40e_diag_reg_test_info i40e_reg_list[]; + + i40e_status i40e_diag_reg_test(struct i40e_hw *hw); + i40e_status i40e_diag_eeprom_test(struct i40e_hw *hw); +diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c +index 6d08b397df2ad..8f0ea411dfba0 100644 +--- a/drivers/net/ethernet/intel/ice/ice_sched.c ++++ b/drivers/net/ethernet/intel/ice/ice_sched.c +@@ -2787,7 +2787,7 @@ static int + ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, + u16 vsi_handle, unsigned long *tc_bitmap) + { +- struct ice_sched_agg_vsi_info *agg_vsi_info, *old_agg_vsi_info = NULL; ++ struct ice_sched_agg_vsi_info *agg_vsi_info, *iter, *old_agg_vsi_info = NULL; + struct ice_sched_agg_info *agg_info, *old_agg_info; + struct ice_hw *hw = pi->hw; + int status = 0; +@@ -2805,11 +2805,13 @@ ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, + if (old_agg_info && old_agg_info != agg_info) { + struct ice_sched_agg_vsi_info *vtmp; + +- list_for_each_entry_safe(old_agg_vsi_info, vtmp, ++ list_for_each_entry_safe(iter, vtmp, + &old_agg_info->agg_vsi_list, + list_entry) +- if (old_agg_vsi_info->vsi_handle == vsi_handle) ++ if (iter->vsi_handle == vsi_handle) { ++ old_agg_vsi_info = iter; + break; ++ } + } + + /* check if entry already exist */ +diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c +index 61f844d225123..46b36851af460 100644 +--- a/drivers/net/ethernet/intel/ice/ice_switch.c ++++ b/drivers/net/ethernet/intel/ice/ice_switch.c +@@ -1780,18 +1780,36 @@ ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx, + int + ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable) + { +- struct ice_vsi_ctx *ctx; ++ struct ice_vsi_ctx *ctx, *cached_ctx; ++ int status; ++ ++ cached_ctx = ice_get_vsi_ctx(hw, vsi_handle); ++ if (!cached_ctx) ++ return -ENOENT; + +- ctx = ice_get_vsi_ctx(hw, vsi_handle); ++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) +- return -EIO; ++ return -ENOMEM; ++ ++ ctx->info.q_opt_rss = cached_ctx->info.q_opt_rss; ++ ctx->info.q_opt_tc = cached_ctx->info.q_opt_tc; ++ ctx->info.q_opt_flags = cached_ctx->info.q_opt_flags; ++ ++ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); + + if (enable) + ctx->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; + else + ctx->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; + +- return ice_update_vsi(hw, vsi_handle, ctx, NULL); ++ status = ice_update_vsi(hw, vsi_handle, ctx, NULL); ++ if (!status) { ++ cached_ctx->info.q_opt_flags = ctx->info.q_opt_flags; ++ cached_ctx->info.valid_sections |= ctx->info.valid_sections; ++ } ++ ++ kfree(ctx); ++ return status; + } + + /** +diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +index c6a58343d81d8..a2645ff3100e4 100644 +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +@@ -541,6 +541,72 @@ static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf) + } + } + ++/** ++ * ice_vc_fdir_has_prof_conflict ++ * @vf: pointer to the VF structure ++ * @conf: FDIR configuration for each filter ++ * ++ * Check if @conf has conflicting profile with existing profiles ++ * ++ * Return: true on success, and false on error. ++ */ ++static bool ++ice_vc_fdir_has_prof_conflict(struct ice_vf *vf, ++ struct virtchnl_fdir_fltr_conf *conf) ++{ ++ struct ice_fdir_fltr *desc; ++ ++ list_for_each_entry(desc, &vf->fdir.fdir_rule_list, fltr_node) { ++ struct virtchnl_fdir_fltr_conf *existing_conf; ++ enum ice_fltr_ptype flow_type_a, flow_type_b; ++ struct ice_fdir_fltr *a, *b; ++ ++ existing_conf = to_fltr_conf_from_desc(desc); ++ a = &existing_conf->input; ++ b = &conf->input; ++ flow_type_a = a->flow_type; ++ flow_type_b = b->flow_type; ++ ++ /* No need to compare two rules with different tunnel types or ++ * with the same protocol type. ++ */ ++ if (existing_conf->ttype != conf->ttype || ++ flow_type_a == flow_type_b) ++ continue; ++ ++ switch (flow_type_a) { ++ case ICE_FLTR_PTYPE_NONF_IPV4_UDP: ++ case ICE_FLTR_PTYPE_NONF_IPV4_TCP: ++ case ICE_FLTR_PTYPE_NONF_IPV4_SCTP: ++ if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) ++ return true; ++ break; ++ case ICE_FLTR_PTYPE_NONF_IPV4_OTHER: ++ if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_UDP || ++ flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_TCP || ++ flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_SCTP) ++ return true; ++ break; ++ case ICE_FLTR_PTYPE_NONF_IPV6_UDP: ++ case ICE_FLTR_PTYPE_NONF_IPV6_TCP: ++ case ICE_FLTR_PTYPE_NONF_IPV6_SCTP: ++ if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) ++ return true; ++ break; ++ case ICE_FLTR_PTYPE_NONF_IPV6_OTHER: ++ if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_UDP || ++ flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_TCP || ++ flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_SCTP) ++ return true; ++ break; ++ default: ++ break; ++ } ++ } ++ ++ return false; ++} ++ + /** + * ice_vc_fdir_write_flow_prof + * @vf: pointer to the VF structure +@@ -677,6 +743,13 @@ ice_vc_fdir_config_input_set(struct ice_vf *vf, struct virtchnl_fdir_add *fltr, + enum ice_fltr_ptype flow; + int ret; + ++ ret = ice_vc_fdir_has_prof_conflict(vf, conf); ++ if (ret) { ++ dev_dbg(dev, "Found flow profile conflict for VF %d\n", ++ vf->vf_id); ++ return ret; ++ } ++ + flow = input->flow_type; + ret = ice_vc_fdir_alloc_prof(vf, flow); + if (ret) { +diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +index 41d935d1aaf6f..40aeaa7bd739f 100644 +--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c ++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +@@ -62,35 +62,38 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { + MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG, + MVPP22_CLS_HEK_IP4_2T, + MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 | +- MVPP2_PRS_RI_L4_TCP, ++ MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP, + MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK), + + MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG, + MVPP22_CLS_HEK_IP4_2T, + MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT | +- MVPP2_PRS_RI_L4_TCP, ++ MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP, + MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK), + + MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG, + MVPP22_CLS_HEK_IP4_2T, + MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER | +- MVPP2_PRS_RI_L4_TCP, ++ MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP, + MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK), + + /* TCP over IPv4 flows, fragmented, with vlan tag */ + MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, +- MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP, ++ MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_IP_FRAG_TRUE | ++ MVPP2_PRS_RI_L4_TCP, + MVPP2_PRS_IP_MASK), + + MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, +- MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP, ++ MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_IP_FRAG_TRUE | ++ MVPP2_PRS_RI_L4_TCP, + MVPP2_PRS_IP_MASK), + + MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, +- MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP, ++ MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_IP_FRAG_TRUE | ++ MVPP2_PRS_RI_L4_TCP, + MVPP2_PRS_IP_MASK), + + /* UDP over IPv4 flows, Not fragmented, no vlan tag */ +@@ -132,35 +135,38 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { + MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG, + MVPP22_CLS_HEK_IP4_2T, + MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 | +- MVPP2_PRS_RI_L4_UDP, ++ MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP, + MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK), + + MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG, + MVPP22_CLS_HEK_IP4_2T, + MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT | +- MVPP2_PRS_RI_L4_UDP, ++ MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP, + MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK), + + MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG, + MVPP22_CLS_HEK_IP4_2T, + MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER | +- MVPP2_PRS_RI_L4_UDP, ++ MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP, + MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK), + + /* UDP over IPv4 flows, fragmented, with vlan tag */ + MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, +- MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP, ++ MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_IP_FRAG_TRUE | ++ MVPP2_PRS_RI_L4_UDP, + MVPP2_PRS_IP_MASK), + + MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, +- MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP, ++ MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_IP_FRAG_TRUE | ++ MVPP2_PRS_RI_L4_UDP, + MVPP2_PRS_IP_MASK), + + MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, +- MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP, ++ MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_IP_FRAG_TRUE | ++ MVPP2_PRS_RI_L4_UDP, + MVPP2_PRS_IP_MASK), + + /* TCP over IPv6 flows, not fragmented, no vlan tag */ +diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +index 75ba57bd1d46d..9af22f497a40f 100644 +--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c ++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +@@ -1539,8 +1539,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv) + if (!priv->prs_double_vlans) + return -ENOMEM; + +- /* Double VLAN: 0x8100, 0x88A8 */ +- err = mvpp2_prs_double_vlan_add(priv, ETH_P_8021Q, ETH_P_8021AD, ++ /* Double VLAN: 0x88A8, 0x8100 */ ++ err = mvpp2_prs_double_vlan_add(priv, ETH_P_8021AD, ETH_P_8021Q, + MVPP2_PRS_PORT_MASK); + if (err) + return err; +@@ -1607,59 +1607,45 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv) + static int mvpp2_prs_pppoe_init(struct mvpp2 *priv) + { + struct mvpp2_prs_entry pe; +- int tid; +- +- /* IPv4 over PPPoE with options */ +- tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, +- MVPP2_PE_LAST_FREE_TID); +- if (tid < 0) +- return tid; +- +- memset(&pe, 0, sizeof(pe)); +- mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_PPPOE); +- pe.index = tid; +- +- mvpp2_prs_match_etype(&pe, 0, PPP_IP); +- +- mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); +- mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4_OPT, +- MVPP2_PRS_RI_L3_PROTO_MASK); +- /* goto ipv4 dest-address (skip eth_type + IP-header-size - 4) */ +- mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + +- sizeof(struct iphdr) - 4, +- MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); +- /* Set L3 offset */ +- mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, +- MVPP2_ETH_TYPE_LEN, +- MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); +- +- /* Update shadow table and hw entry */ +- mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE); +- mvpp2_prs_hw_write(priv, &pe); ++ int tid, ihl; + +- /* IPv4 over PPPoE without options */ +- tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, +- MVPP2_PE_LAST_FREE_TID); +- if (tid < 0) +- return tid; ++ /* IPv4 over PPPoE with header length >= 5 */ ++ for (ihl = MVPP2_PRS_IPV4_IHL_MIN; ihl <= MVPP2_PRS_IPV4_IHL_MAX; ihl++) { ++ tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, ++ MVPP2_PE_LAST_FREE_TID); ++ if (tid < 0) ++ return tid; + +- pe.index = tid; ++ memset(&pe, 0, sizeof(pe)); ++ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_PPPOE); ++ pe.index = tid; + +- mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN, +- MVPP2_PRS_IPV4_HEAD | +- MVPP2_PRS_IPV4_IHL_MIN, +- MVPP2_PRS_IPV4_HEAD_MASK | +- MVPP2_PRS_IPV4_IHL_MASK); ++ mvpp2_prs_match_etype(&pe, 0, PPP_IP); ++ mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN, ++ MVPP2_PRS_IPV4_HEAD | ihl, ++ MVPP2_PRS_IPV4_HEAD_MASK | ++ MVPP2_PRS_IPV4_IHL_MASK); + +- /* Clear ri before updating */ +- pe.sram[MVPP2_PRS_SRAM_RI_WORD] = 0x0; +- pe.sram[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0; +- mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4, +- MVPP2_PRS_RI_L3_PROTO_MASK); ++ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); ++ mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4, ++ MVPP2_PRS_RI_L3_PROTO_MASK); ++ /* goto ipv4 dst-address (skip eth_type + IP-header-size - 4) */ ++ mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + ++ sizeof(struct iphdr) - 4, ++ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); ++ /* Set L3 offset */ ++ mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, ++ MVPP2_ETH_TYPE_LEN, ++ MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); ++ /* Set L4 offset */ ++ mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4, ++ MVPP2_ETH_TYPE_LEN + (ihl * 4), ++ MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); + +- /* Update shadow table and hw entry */ +- mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE); +- mvpp2_prs_hw_write(priv, &pe); ++ /* Update shadow table and hw entry */ ++ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE); ++ mvpp2_prs_hw_write(priv, &pe); ++ } + + /* IPv6 over PPPoE */ + tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, +diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +index 332329cb1ee00..bd7c18c839d42 100644 +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -713,8 +713,6 @@ static void mtk_mac_link_up(struct phylink_config *config, + break; + } + +- mtk_set_queue_speed(mac->hw, mac->id, speed); +- + /* Configure duplex */ + if (duplex == DUPLEX_FULL) + mcr |= MAC_MCR_FORCE_DPX; +@@ -2008,9 +2006,6 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, + skb_checksum_none_assert(skb); + skb->protocol = eth_type_trans(skb, netdev); + +- if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) +- mtk_ppe_check_skb(eth->ppe[0], skb, hash); +- + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) { + if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) { + if (trxd.rxd3 & RX_DMA_VTAG_V2) { +@@ -2038,6 +2033,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, + __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vlan_tci); + } + ++ if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) ++ mtk_ppe_check_skb(eth->ppe[0], skb, hash); ++ + skb_record_rx_queue(skb, 0); + napi_gro_receive(napi, skb); + +diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c +index 1ff024f42444b..2ea539ccc0802 100644 +--- a/drivers/net/ethernet/mediatek/mtk_ppe.c ++++ b/drivers/net/ethernet/mediatek/mtk_ppe.c +@@ -8,6 +8,7 @@ + #include <linux/platform_device.h> + #include <linux/if_ether.h> + #include <linux/if_vlan.h> ++#include <net/dst_metadata.h> + #include <net/dsa.h> + #include "mtk_eth_soc.h" + #include "mtk_ppe.h" +@@ -458,6 +459,7 @@ __mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry) + hwe->ib1 &= ~MTK_FOE_IB1_STATE; + hwe->ib1 |= FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_INVALID); + dma_wmb(); ++ mtk_ppe_cache_clear(ppe); + } + entry->hash = 0xffff; + +@@ -699,7 +701,9 @@ void __mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash) + skb->dev->dsa_ptr->tag_ops->proto != DSA_TAG_PROTO_MTK) + goto out; + +- tag += 4; ++ if (!skb_metadata_dst(skb)) ++ tag += 4; ++ + if (get_unaligned_be16(tag) != ETH_P_8021Q) + break; + +diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +index 81afd5ee3fbf1..161751bb36c9c 100644 +--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c ++++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +@@ -576,6 +576,7 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f) + if (IS_ERR(block_cb)) + return PTR_ERR(block_cb); + ++ flow_block_cb_incref(block_cb); + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &block_cb_list); + return 0; +@@ -584,7 +585,7 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f) + if (!block_cb) + return -ENOENT; + +- if (flow_block_cb_decref(block_cb)) { ++ if (!flow_block_cb_decref(block_cb)) { + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index 47d4b54d15634..1f4233b2842f7 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -4117,13 +4117,17 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, + struct xsk_buff_pool *xsk_pool = + mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix); + struct mlx5e_xsk_param xsk; ++ int max_xdp_mtu; + + if (!xsk_pool) + continue; + + mlx5e_build_xsk_param(xsk_pool, &xsk); ++ max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &xsk); + +- if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) { ++ /* Validate XSK params and XDP MTU in advance */ ++ if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev) || ++ new_params->sw_mtu > max_xdp_mtu) { + u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); + int max_mtu_frame, max_mtu_page, max_mtu; + +@@ -4133,9 +4137,9 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, + */ + max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); + max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0)); +- max_mtu = min(max_mtu_frame, max_mtu_page); ++ max_mtu = min3(max_mtu_frame, max_mtu_page, max_xdp_mtu); + +- netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n", ++ netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u or its redirection XDP program. Try MTU <= %d\n", + new_params->sw_mtu, ix, max_mtu); + return false; + } +diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c +index 930496cd34ed0..b50f16786c246 100644 +--- a/drivers/net/ethernet/realtek/r8169_phy_config.c ++++ b/drivers/net/ethernet/realtek/r8169_phy_config.c +@@ -826,6 +826,9 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp, + /* disable phy pfm mode */ + phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0); + ++ /* disable 10m pll off */ ++ phy_modify_paged(phydev, 0x0a43, 0x10, BIT(0), 0); ++ + rtl8168g_disable_aldps(phydev); + rtl8168g_config_eee_phy(phydev); + } +diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c +index 7022fb2005a2f..d30459dbfe8f8 100644 +--- a/drivers/net/ethernet/sfc/ef10.c ++++ b/drivers/net/ethernet/sfc/ef10.c +@@ -1304,7 +1304,8 @@ static void efx_ef10_fini_nic(struct efx_nic *efx) + static int efx_ef10_init_nic(struct efx_nic *efx) + { + struct efx_ef10_nic_data *nic_data = efx->nic_data; +- netdev_features_t hw_enc_features = 0; ++ struct net_device *net_dev = efx->net_dev; ++ netdev_features_t tun_feats, tso_feats; + int rc; + + if (nic_data->must_check_datapath_caps) { +@@ -1349,20 +1350,30 @@ static int efx_ef10_init_nic(struct efx_nic *efx) + nic_data->must_restore_piobufs = false; + } + +- /* add encapsulated checksum offload features */ ++ /* encap features might change during reset if fw variant changed */ + if (efx_has_cap(efx, VXLAN_NVGRE) && !efx_ef10_is_vf(efx)) +- hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; +- /* add encapsulated TSO features */ +- if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) { +- netdev_features_t encap_tso_features; ++ net_dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; ++ else ++ net_dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + +- encap_tso_features = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | +- NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM; ++ tun_feats = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | ++ NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM; ++ tso_feats = NETIF_F_TSO | NETIF_F_TSO6; + +- hw_enc_features |= encap_tso_features | NETIF_F_TSO; +- efx->net_dev->features |= encap_tso_features; ++ if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) { ++ /* If this is first nic_init, or if it is a reset and a new fw ++ * variant has added new features, enable them by default. ++ * If the features are not new, maintain their current value. ++ */ ++ if (!(net_dev->hw_features & tun_feats)) ++ net_dev->features |= tun_feats; ++ net_dev->hw_enc_features |= tun_feats | tso_feats; ++ net_dev->hw_features |= tun_feats; ++ } else { ++ net_dev->hw_enc_features &= ~(tun_feats | tso_feats); ++ net_dev->hw_features &= ~tun_feats; ++ net_dev->features &= ~tun_feats; + } +- efx->net_dev->hw_enc_features = hw_enc_features; + + /* don't fail init if RSS setup doesn't work */ + rc = efx->type->rx_push_rss_config(efx, false, +@@ -4021,7 +4032,10 @@ static unsigned int efx_ef10_recycle_ring_size(const struct efx_nic *efx) + NETIF_F_HW_VLAN_CTAG_FILTER | \ + NETIF_F_IPV6_CSUM | \ + NETIF_F_RXHASH | \ +- NETIF_F_NTUPLE) ++ NETIF_F_NTUPLE | \ ++ NETIF_F_SG | \ ++ NETIF_F_RXCSUM | \ ++ NETIF_F_RXALL) + + const struct efx_nic_type efx_hunt_a0_vf_nic_type = { + .is_vf = true, +diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c +index 3a86f1213a051..6a1bff54bc6c3 100644 +--- a/drivers/net/ethernet/sfc/efx.c ++++ b/drivers/net/ethernet/sfc/efx.c +@@ -1001,21 +1001,18 @@ static int efx_pci_probe_post_io(struct efx_nic *efx) + } + + /* Determine netdevice features */ +- net_dev->features |= (efx->type->offload_features | NETIF_F_SG | +- NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL); +- if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) { +- net_dev->features |= NETIF_F_TSO6; +- if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) +- net_dev->hw_enc_features |= NETIF_F_TSO6; +- } +- /* Check whether device supports TSO */ +- if (!efx->type->tso_versions || !efx->type->tso_versions(efx)) +- net_dev->features &= ~NETIF_F_ALL_TSO; ++ net_dev->features |= efx->type->offload_features; ++ ++ /* Add TSO features */ ++ if (efx->type->tso_versions && efx->type->tso_versions(efx)) ++ net_dev->features |= NETIF_F_TSO | NETIF_F_TSO6; ++ + /* Mask for features that also apply to VLAN devices */ + net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | + NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | + NETIF_F_RXCSUM); + ++ /* Determine user configurable features */ + net_dev->hw_features |= net_dev->features & ~efx->fixed_features; + + /* Disable receiving frames with bad FCS, by default. */ +diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c +index a2e511912e6a9..a690d139e1770 100644 +--- a/drivers/net/ethernet/smsc/smsc911x.c ++++ b/drivers/net/ethernet/smsc/smsc911x.c +@@ -1037,8 +1037,6 @@ static int smsc911x_mii_probe(struct net_device *dev) + return ret; + } + +- /* Indicate that the MAC is responsible for managing PHY PM */ +- phydev->mac_managed_pm = true; + phy_attached_info(phydev); + + phy_set_max_speed(phydev, SPEED_100); +@@ -1066,6 +1064,7 @@ static int smsc911x_mii_init(struct platform_device *pdev, + struct net_device *dev) + { + struct smsc911x_data *pdata = netdev_priv(dev); ++ struct phy_device *phydev; + int err = -ENXIO; + + pdata->mii_bus = mdiobus_alloc(); +@@ -1108,6 +1107,10 @@ static int smsc911x_mii_init(struct platform_device *pdev, + goto err_out_free_bus_2; + } + ++ phydev = phy_find_first(pdata->mii_bus); ++ if (phydev) ++ phydev->mac_managed_pm = true; ++ + return 0; + + err_out_free_bus_2: +diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h +index ec9c130276d89..54bb072aeb2d3 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/common.h ++++ b/drivers/net/ethernet/stmicro/stmmac/common.h +@@ -532,7 +532,6 @@ struct mac_device_info { + unsigned int xlgmac; + unsigned int num_vlan; + u32 vlan_filter[32]; +- unsigned int promisc; + bool vlan_fail_q_en; + u8 vlan_fail_q; + }; +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +index 8c7a0b7c99520..36251ec2589c9 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +@@ -472,12 +472,6 @@ static int dwmac4_add_hw_vlan_rx_fltr(struct net_device *dev, + if (vid > 4095) + return -EINVAL; + +- if (hw->promisc) { +- netdev_err(dev, +- "Adding VLAN in promisc mode not supported\n"); +- return -EPERM; +- } +- + /* Single Rx VLAN Filter */ + if (hw->num_vlan == 1) { + /* For single VLAN filter, VID 0 means VLAN promiscuous */ +@@ -527,12 +521,6 @@ static int dwmac4_del_hw_vlan_rx_fltr(struct net_device *dev, + { + int i, ret = 0; + +- if (hw->promisc) { +- netdev_err(dev, +- "Deleting VLAN in promisc mode not supported\n"); +- return -EPERM; +- } +- + /* Single Rx VLAN Filter */ + if (hw->num_vlan == 1) { + if ((hw->vlan_filter[0] & GMAC_VLAN_TAG_VID) == vid) { +@@ -557,39 +545,6 @@ static int dwmac4_del_hw_vlan_rx_fltr(struct net_device *dev, + return ret; + } + +-static void dwmac4_vlan_promisc_enable(struct net_device *dev, +- struct mac_device_info *hw) +-{ +- void __iomem *ioaddr = hw->pcsr; +- u32 value; +- u32 hash; +- u32 val; +- int i; +- +- /* Single Rx VLAN Filter */ +- if (hw->num_vlan == 1) { +- dwmac4_write_single_vlan(dev, 0); +- return; +- } +- +- /* Extended Rx VLAN Filter Enable */ +- for (i = 0; i < hw->num_vlan; i++) { +- if (hw->vlan_filter[i] & GMAC_VLAN_TAG_DATA_VEN) { +- val = hw->vlan_filter[i] & ~GMAC_VLAN_TAG_DATA_VEN; +- dwmac4_write_vlan_filter(dev, hw, i, val); +- } +- } +- +- hash = readl(ioaddr + GMAC_VLAN_HASH_TABLE); +- if (hash & GMAC_VLAN_VLHT) { +- value = readl(ioaddr + GMAC_VLAN_TAG); +- if (value & GMAC_VLAN_VTHM) { +- value &= ~GMAC_VLAN_VTHM; +- writel(value, ioaddr + GMAC_VLAN_TAG); +- } +- } +-} +- + static void dwmac4_restore_hw_vlan_rx_fltr(struct net_device *dev, + struct mac_device_info *hw) + { +@@ -709,22 +664,12 @@ static void dwmac4_set_filter(struct mac_device_info *hw, + } + + /* VLAN filtering */ +- if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) ++ if (dev->flags & IFF_PROMISC && !hw->vlan_fail_q_en) ++ value &= ~GMAC_PACKET_FILTER_VTFE; ++ else if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + value |= GMAC_PACKET_FILTER_VTFE; + + writel(value, ioaddr + GMAC_PACKET_FILTER); +- +- if (dev->flags & IFF_PROMISC && !hw->vlan_fail_q_en) { +- if (!hw->promisc) { +- hw->promisc = 1; +- dwmac4_vlan_promisc_enable(dev, hw); +- } +- } else { +- if (hw->promisc) { +- hw->promisc = 0; +- dwmac4_restore_hw_vlan_rx_fltr(dev, hw); +- } +- } + } + + static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, +diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c +index 0b0c6c0764fe9..d0b5129439ed6 100644 +--- a/drivers/net/ieee802154/ca8210.c ++++ b/drivers/net/ieee802154/ca8210.c +@@ -1902,10 +1902,9 @@ static int ca8210_skb_tx( + struct ca8210_priv *priv + ) + { +- int status; + struct ieee802154_hdr header = { }; + struct secspec secspec; +- unsigned int mac_len; ++ int mac_len, status; + + dev_dbg(&priv->spi->dev, "%s called\n", __func__); + +diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c +index 0f52c068c46d6..ee6fb00b71eb6 100644 +--- a/drivers/net/ipa/gsi_trans.c ++++ b/drivers/net/ipa/gsi_trans.c +@@ -156,7 +156,7 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool, + * gsi_trans_pool_exit_dma() can assume the total allocated + * size is exactly (count * size). + */ +- total_size = get_order(total_size) << PAGE_SHIFT; ++ total_size = PAGE_SIZE << get_order(total_size); + + virt = dma_alloc_coherent(dev, total_size, &addr, GFP_KERNEL); + if (!virt) +diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c +index 7a28e082436e4..d0c916a53d7ce 100644 +--- a/drivers/net/net_failover.c ++++ b/drivers/net/net_failover.c +@@ -130,14 +130,10 @@ static u16 net_failover_select_queue(struct net_device *dev, + txq = ops->ndo_select_queue(primary_dev, skb, sb_dev); + else + txq = netdev_pick_tx(primary_dev, skb, NULL); +- +- qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; +- +- return txq; ++ } else { ++ txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; + } + +- txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; +- + /* Save the original txq to restore before passing to the driver */ + qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; + +diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c +index b4ff9c5073a3c..9ab5eff502b71 100644 +--- a/drivers/net/phy/dp83869.c ++++ b/drivers/net/phy/dp83869.c +@@ -588,15 +588,13 @@ static int dp83869_of_init(struct phy_device *phydev) + &dp83869_internal_delay[0], + delay_size, true); + if (dp83869->rx_int_delay < 0) +- dp83869->rx_int_delay = +- dp83869_internal_delay[DP83869_CLK_DELAY_DEF]; ++ dp83869->rx_int_delay = DP83869_CLK_DELAY_DEF; + + dp83869->tx_int_delay = phy_get_internal_delay(phydev, dev, + &dp83869_internal_delay[0], + delay_size, false); + if (dp83869->tx_int_delay < 0) +- dp83869->tx_int_delay = +- dp83869_internal_delay[DP83869_CLK_DELAY_DEF]; ++ dp83869->tx_int_delay = DP83869_CLK_DELAY_DEF; + + return ret; + } +diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c +index 682987040ea82..da488cbb05428 100644 +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -1688,7 +1688,9 @@ not_lro: + if (unlikely(rcd->ts)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci); + +- if (adapter->netdev->features & NETIF_F_LRO) ++ /* Use GRO callback if UPT is enabled */ ++ if ((adapter->netdev->features & NETIF_F_LRO) && ++ !rq->shared->updateRxProd) + netif_receive_skb(skb); + else + napi_gro_receive(&rq->napi, skb); +diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c +index 1e6a479766429..c066b0040a3fe 100644 +--- a/drivers/net/wwan/iosm/iosm_ipc_imem.c ++++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c +@@ -587,6 +587,13 @@ static void ipc_imem_run_state_worker(struct work_struct *instance) + while (ctrl_chl_idx < IPC_MEM_MAX_CHANNELS) { + if (!ipc_chnl_cfg_get(&chnl_cfg_port, ctrl_chl_idx)) { + ipc_imem->ipc_port[ctrl_chl_idx] = NULL; ++ ++ if (ipc_imem->pcie->pci->device == INTEL_CP_DEVICE_7560_ID && ++ chnl_cfg_port.wwan_port_type == WWAN_PORT_XMMRPC) { ++ ctrl_chl_idx++; ++ continue; ++ } ++ + if (ipc_imem->pcie->pci->device == INTEL_CP_DEVICE_7360_ID && + chnl_cfg_port.wwan_port_type == WWAN_PORT_MBIM) { + ctrl_chl_idx++; +diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h +index 3dbfc8a6924ed..1fcbd83f7ff2e 100644 +--- a/drivers/net/xen-netback/common.h ++++ b/drivers/net/xen-netback/common.h +@@ -166,7 +166,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ + struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; + grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; + +- struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS]; ++ struct gnttab_copy tx_copy_ops[2 * MAX_PENDING_REQS]; + struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; + struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; + /* passed to gnttab_[un]map_refs with pages under (un)mapping */ +diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c +index bf627af723bf9..5c266062c08f0 100644 +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -334,6 +334,7 @@ static int xenvif_count_requests(struct xenvif_queue *queue, + struct xenvif_tx_cb { + u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1]; + u8 copy_count; ++ u32 split_mask; + }; + + #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) +@@ -361,6 +362,8 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) + struct sk_buff *skb = + alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN, + GFP_ATOMIC | __GFP_NOWARN); ++ ++ BUILD_BUG_ON(sizeof(*XENVIF_TX_CB(skb)) > sizeof(skb->cb)); + if (unlikely(skb == NULL)) + return NULL; + +@@ -396,11 +399,13 @@ static void xenvif_get_requests(struct xenvif_queue *queue, + nr_slots = shinfo->nr_frags + 1; + + copy_count(skb) = 0; ++ XENVIF_TX_CB(skb)->split_mask = 0; + + /* Create copy ops for exactly data_len bytes into the skb head. */ + __skb_put(skb, data_len); + while (data_len > 0) { + int amount = data_len > txp->size ? txp->size : data_len; ++ bool split = false; + + cop->source.u.ref = txp->gref; + cop->source.domid = queue->vif->domid; +@@ -413,6 +418,13 @@ static void xenvif_get_requests(struct xenvif_queue *queue, + cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb) + - data_len); + ++ /* Don't cross local page boundary! */ ++ if (cop->dest.offset + amount > XEN_PAGE_SIZE) { ++ amount = XEN_PAGE_SIZE - cop->dest.offset; ++ XENVIF_TX_CB(skb)->split_mask |= 1U << copy_count(skb); ++ split = true; ++ } ++ + cop->len = amount; + cop->flags = GNTCOPY_source_gref; + +@@ -420,7 +432,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, + pending_idx = queue->pending_ring[index]; + callback_param(queue, pending_idx).ctx = NULL; + copy_pending_idx(skb, copy_count(skb)) = pending_idx; +- copy_count(skb)++; ++ if (!split) ++ copy_count(skb)++; + + cop++; + data_len -= amount; +@@ -441,7 +454,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, + nr_slots--; + } else { + /* The copy op partially covered the tx_request. +- * The remainder will be mapped. ++ * The remainder will be mapped or copied in the next ++ * iteration. + */ + txp->offset += amount; + txp->size -= amount; +@@ -539,6 +553,13 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, + pending_idx = copy_pending_idx(skb, i); + + newerr = (*gopp_copy)->status; ++ ++ /* Split copies need to be handled together. */ ++ if (XENVIF_TX_CB(skb)->split_mask & (1U << i)) { ++ (*gopp_copy)++; ++ if (!newerr) ++ newerr = (*gopp_copy)->status; ++ } + if (likely(!newerr)) { + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) +diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c +index 29c902b9aecbd..ea3f0806783a3 100644 +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -3126,6 +3126,7 @@ out_dev_unmap: + nvme_dev_unmap(dev); + out_uninit_ctrl: + nvme_uninit_ctrl(&dev->ctrl); ++ nvme_put_ctrl(&dev->ctrl); + return result; + } + +@@ -3490,6 +3491,8 @@ static const struct pci_device_id nvme_id_table[] = { + .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, ++ { PCI_DEVICE(0x1d97, 0x1d97), /* Lexar NM620 */ ++ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), +diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c +index 6d5d619ab2e94..346f67d2fdae2 100644 +--- a/drivers/pci/controller/dwc/pcie-designware.c ++++ b/drivers/pci/controller/dwc/pcie-designware.c +@@ -806,11 +806,6 @@ void dw_pcie_setup(struct dw_pcie *pci) + dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val); + } + +- val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL); +- val &= ~PORT_LINK_FAST_LINK_MODE; +- val |= PORT_LINK_DLL_LINK_EN; +- dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val); +- + if (dw_pcie_cap_is(pci, CDM_CHECK)) { + val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS); + val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS | +@@ -818,6 +813,11 @@ void dw_pcie_setup(struct dw_pcie *pci) + dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val); + } + ++ val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL); ++ val &= ~PORT_LINK_FAST_LINK_MODE; ++ val |= PORT_LINK_DLL_LINK_EN; ++ dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val); ++ + if (!pci->num_lanes) { + dev_dbg(pci->dev, "Using h/w default number of lanes\n"); + return; +diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c +index 32c3edaf90385..5e7b82a2b13d0 100644 +--- a/drivers/pinctrl/pinctrl-amd.c ++++ b/drivers/pinctrl/pinctrl-amd.c +@@ -865,32 +865,34 @@ static const struct pinconf_ops amd_pinconf_ops = { + .pin_config_group_set = amd_pinconf_group_set, + }; + +-static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) ++static void amd_gpio_irq_init_pin(struct amd_gpio *gpio_dev, int pin) + { +- struct pinctrl_desc *desc = gpio_dev->pctrl->desc; ++ const struct pin_desc *pd; + unsigned long flags; + u32 pin_reg, mask; +- int i; + + mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) | + BIT(INTERRUPT_MASK_OFF) | BIT(INTERRUPT_ENABLE_OFF) | + BIT(WAKE_CNTRL_OFF_S4); + +- for (i = 0; i < desc->npins; i++) { +- int pin = desc->pins[i].number; +- const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin); +- +- if (!pd) +- continue; ++ pd = pin_desc_get(gpio_dev->pctrl, pin); ++ if (!pd) ++ return; + +- raw_spin_lock_irqsave(&gpio_dev->lock, flags); ++ raw_spin_lock_irqsave(&gpio_dev->lock, flags); ++ pin_reg = readl(gpio_dev->base + pin * 4); ++ pin_reg &= ~mask; ++ writel(pin_reg, gpio_dev->base + pin * 4); ++ raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); ++} + +- pin_reg = readl(gpio_dev->base + i * 4); +- pin_reg &= ~mask; +- writel(pin_reg, gpio_dev->base + i * 4); ++static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) ++{ ++ struct pinctrl_desc *desc = gpio_dev->pctrl->desc; ++ int i; + +- raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); +- } ++ for (i = 0; i < desc->npins; i++) ++ amd_gpio_irq_init_pin(gpio_dev, i); + } + + #ifdef CONFIG_PM_SLEEP +@@ -943,8 +945,10 @@ static int amd_gpio_resume(struct device *dev) + for (i = 0; i < desc->npins; i++) { + int pin = desc->pins[i].number; + +- if (!amd_gpio_should_save(gpio_dev, pin)) ++ if (!amd_gpio_should_save(gpio_dev, pin)) { ++ amd_gpio_irq_init_pin(gpio_dev, pin); + continue; ++ } + + raw_spin_lock_irqsave(&gpio_dev->lock, flags); + gpio_dev->saved_regs[i] |= readl(gpio_dev->base + pin * 4) & PIN_IRQ_PENDING; +diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c +index 373eed8bc4be9..c775d239444a6 100644 +--- a/drivers/pinctrl/pinctrl-at91-pio4.c ++++ b/drivers/pinctrl/pinctrl-at91-pio4.c +@@ -1206,7 +1206,6 @@ static int atmel_pinctrl_probe(struct platform_device *pdev) + dev_err(dev, "can't add the irq domain\n"); + return -ENODEV; + } +- atmel_pioctrl->irq_domain->name = "atmel gpio"; + + for (i = 0; i < atmel_pioctrl->npins; i++) { + int irq = irq_create_mapping(atmel_pioctrl->irq_domain, i); +diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c +index 29e4a6282a641..1dcbd0937ef5a 100644 +--- a/drivers/pinctrl/pinctrl-ocelot.c ++++ b/drivers/pinctrl/pinctrl-ocelot.c +@@ -1204,7 +1204,7 @@ static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev, + regmap_update_bits(info->map, REG_ALT(0, info, pin->pin), + BIT(p), f << p); + regmap_update_bits(info->map, REG_ALT(1, info, pin->pin), +- BIT(p), f << (p - 1)); ++ BIT(p), (f >> 1) << p); + + return 0; + } +diff --git a/drivers/platform/surface/aggregator/bus.c b/drivers/platform/surface/aggregator/bus.c +index de539938896e2..b501a79f2a08a 100644 +--- a/drivers/platform/surface/aggregator/bus.c ++++ b/drivers/platform/surface/aggregator/bus.c +@@ -485,8 +485,10 @@ int __ssam_register_clients(struct device *parent, struct ssam_controller *ctrl, + * device, so ignore it and continue with the next one. + */ + status = ssam_add_client_device(parent, ctrl, child); +- if (status && status != -ENODEV) ++ if (status && status != -ENODEV) { ++ fwnode_handle_put(child); + goto err; ++ } + } + + return 0; +diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c +index 0eb5bfdd823a1..959ec3c5f376e 100644 +--- a/drivers/platform/x86/ideapad-laptop.c ++++ b/drivers/platform/x86/ideapad-laptop.c +@@ -1170,7 +1170,6 @@ static const struct key_entry ideapad_keymap[] = { + { KE_KEY, 65, { KEY_PROG4 } }, + { KE_KEY, 66, { KEY_TOUCHPAD_OFF } }, + { KE_KEY, 67, { KEY_TOUCHPAD_ON } }, +- { KE_KEY, 68, { KEY_TOUCHPAD_TOGGLE } }, + { KE_KEY, 128, { KEY_ESC } }, + + /* +@@ -1526,18 +1525,16 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ + if (priv->features.ctrl_ps2_aux_port) + i8042_command(¶m, value ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE); + +- if (send_events) { +- /* +- * On older models the EC controls the touchpad and toggles it +- * on/off itself, in this case we report KEY_TOUCHPAD_ON/_OFF. +- * If the EC did not toggle, report KEY_TOUCHPAD_TOGGLE. +- */ +- if (value != priv->r_touchpad_val) { +- ideapad_input_report(priv, value ? 67 : 66); +- sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad"); +- } else { +- ideapad_input_report(priv, 68); +- } ++ /* ++ * On older models the EC controls the touchpad and toggles it on/off ++ * itself, in this case we report KEY_TOUCHPAD_ON/_OFF. Some models do ++ * an acpi-notify with VPC bit 5 set on resume, so this function get ++ * called with send_events=true on every resume. Therefor if the EC did ++ * not toggle, do nothing to avoid sending spurious KEY_TOUCHPAD_TOGGLE. ++ */ ++ if (send_events && value != priv->r_touchpad_val) { ++ ideapad_input_report(priv, value ? 67 : 66); ++ sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad"); + } + + priv->r_touchpad_val = value; +diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c +index 3a15d32d7644c..b9591969e0fa1 100644 +--- a/drivers/platform/x86/intel/pmc/core.c ++++ b/drivers/platform/x86/intel/pmc/core.c +@@ -66,7 +66,18 @@ static inline void pmc_core_reg_write(struct pmc_dev *pmcdev, int reg_offset, + + static inline u64 pmc_core_adjust_slp_s0_step(struct pmc_dev *pmcdev, u32 value) + { +- return (u64)value * pmcdev->map->slp_s0_res_counter_step; ++ /* ++ * ADL PCH does not have the SLP_S0 counter and LPM Residency counters are ++ * used as a workaround which uses 30.5 usec tick. All other client ++ * programs have the legacy SLP_S0 residency counter that is using the 122 ++ * usec tick. ++ */ ++ const int lpm_adj_x2 = pmcdev->map->lpm_res_counter_step_x2; ++ ++ if (pmcdev->map == &adl_reg_map) ++ return (u64)value * GET_X2_COUNTER((u64)lpm_adj_x2); ++ else ++ return (u64)value * pmcdev->map->slp_s0_res_counter_step; + } + + static int set_etr3(struct pmc_dev *pmcdev) +diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c +index a01a92769c1a3..74af3e593b2ca 100644 +--- a/drivers/platform/x86/think-lmi.c ++++ b/drivers/platform/x86/think-lmi.c +@@ -941,12 +941,23 @@ static ssize_t possible_values_show(struct kobject *kobj, struct kobj_attribute + { + struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj); + +- if (!tlmi_priv.can_get_bios_selections) +- return -EOPNOTSUPP; +- + return sysfs_emit(buf, "%s\n", setting->possible_values); + } + ++static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr, ++ char *buf) ++{ ++ struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj); ++ ++ if (setting->possible_values) { ++ /* Figure out what setting type is as BIOS does not return this */ ++ if (strchr(setting->possible_values, ';')) ++ return sysfs_emit(buf, "enumeration\n"); ++ } ++ /* Anything else is going to be a string */ ++ return sysfs_emit(buf, "string\n"); ++} ++ + static ssize_t current_value_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +@@ -1036,14 +1047,30 @@ static struct kobj_attribute attr_possible_values = __ATTR_RO(possible_values); + + static struct kobj_attribute attr_current_val = __ATTR_RW_MODE(current_value, 0600); + ++static struct kobj_attribute attr_type = __ATTR_RO(type); ++ ++static umode_t attr_is_visible(struct kobject *kobj, ++ struct attribute *attr, int n) ++{ ++ struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj); ++ ++ /* We don't want to display possible_values attributes if not available */ ++ if ((attr == &attr_possible_values.attr) && (!setting->possible_values)) ++ return 0; ++ ++ return attr->mode; ++} ++ + static struct attribute *tlmi_attrs[] = { + &attr_displ_name.attr, + &attr_current_val.attr, + &attr_possible_values.attr, ++ &attr_type.attr, + NULL + }; + + static const struct attribute_group tlmi_attr_group = { ++ .is_visible = attr_is_visible, + .attrs = tlmi_attrs, + }; + +@@ -1423,7 +1450,34 @@ static int tlmi_analyze(void) + if (ret || !setting->possible_values) + pr_info("Error retrieving possible values for %d : %s\n", + i, setting->display_name); ++ } else { ++ /* ++ * Older Thinkstations don't support the bios_selections API. ++ * Instead they store this as a [Optional:Option1,Option2] section of the ++ * name string. ++ * Try and pull that out if it's available. ++ */ ++ char *item, *optstart, *optend; ++ ++ if (!tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID)) { ++ optstart = strstr(item, "[Optional:"); ++ if (optstart) { ++ optstart += strlen("[Optional:"); ++ optend = strstr(optstart, "]"); ++ if (optend) ++ setting->possible_values = ++ kstrndup(optstart, optend - optstart, ++ GFP_KERNEL); ++ } ++ } + } ++ /* ++ * firmware-attributes requires that possible_values are separated by ';' but ++ * Lenovo FW uses ','. Replace appropriately. ++ */ ++ if (setting->possible_values) ++ strreplace(setting->possible_values, ',', ';'); ++ + kobject_init(&setting->kobj, &tlmi_attr_setting_ktype); + tlmi_priv.setting[i] = setting; + kfree(item); +diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c +index 08f4cf0ad9e3c..8fa9772acf79b 100644 +--- a/drivers/ptp/ptp_qoriq.c ++++ b/drivers/ptp/ptp_qoriq.c +@@ -601,7 +601,7 @@ static int ptp_qoriq_probe(struct platform_device *dev) + return 0; + + no_clock: +- iounmap(ptp_qoriq->base); ++ iounmap(base); + no_ioremap: + release_resource(ptp_qoriq->rsrc); + no_resource: +diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c +index 2a9867abba20c..e6724a229d237 100644 +--- a/drivers/regulator/fixed.c ++++ b/drivers/regulator/fixed.c +@@ -215,7 +215,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev) + drvdata->enable_clock = devm_clk_get(dev, NULL); + if (IS_ERR(drvdata->enable_clock)) { + dev_err(dev, "Can't get enable-clock from devicetree\n"); +- return -ENOENT; ++ return PTR_ERR(drvdata->enable_clock); + } + } else if (drvtype && drvtype->has_performance_state) { + drvdata->desc.ops = &fixed_voltage_domain_ops; +diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c +index 997b524bdd2b5..a48c6938ae68f 100644 +--- a/drivers/s390/crypto/vfio_ap_drv.c ++++ b/drivers/s390/crypto/vfio_ap_drv.c +@@ -54,8 +54,9 @@ static struct ap_driver vfio_ap_drv = { + + static void vfio_ap_matrix_dev_release(struct device *dev) + { +- struct ap_matrix_dev *matrix_dev = dev_get_drvdata(dev); ++ struct ap_matrix_dev *matrix_dev; + ++ matrix_dev = container_of(dev, struct ap_matrix_dev, device); + kfree(matrix_dev); + } + +diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c +index fe70f8f114352..5f746b4a6b8da 100644 +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c +@@ -4768,7 +4768,7 @@ int megasas_task_abort_fusion(struct scsi_cmnd *scmd) + devhandle = megasas_get_tm_devhandle(scmd->device); + + if (devhandle == (u16)ULONG_MAX) { +- ret = SUCCESS; ++ ret = FAILED; + sdev_printk(KERN_INFO, scmd->device, + "task abort issued for invalid devhandle\n"); + mutex_unlock(&instance->reset_mutex); +@@ -4838,7 +4838,7 @@ int megasas_reset_target_fusion(struct scsi_cmnd *scmd) + devhandle = megasas_get_tm_devhandle(scmd->device); + + if (devhandle == (u16)ULONG_MAX) { +- ret = SUCCESS; ++ ret = FAILED; + sdev_printk(KERN_INFO, scmd->device, + "target reset issued for invalid devhandle\n"); + mutex_unlock(&instance->reset_mutex); +diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c +index 2ee9ea57554d7..14ae0a9c5d3d8 100644 +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -6616,11 +6616,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) + else if (rc == -EAGAIN) + goto try_32bit_dma; + total_sz += sense_sz; +- ioc_info(ioc, +- "sense pool(0x%p)- dma(0x%llx): depth(%d)," +- "element_size(%d), pool_size(%d kB)\n", +- ioc->sense, (unsigned long long)ioc->sense_dma, ioc->scsiio_depth, +- SCSI_SENSE_BUFFERSIZE, sz / 1024); + /* reply pool, 4 byte align */ + sz = ioc->reply_free_queue_depth * ioc->reply_sz; + rc = _base_allocate_reply_pool(ioc, sz); +diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +index acc11ad569758..cb8980238e8fc 100644 +--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c ++++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +@@ -181,7 +181,6 @@ static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp + cancel_delayed_work_sync(&pci_info->work); + proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0); + proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, 0); +- thermal_zone_device_disable(tzd); + pci_info->stored_thres = 0; + return 0; + } +diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c +index ae28a03fa890b..1157b8869bcca 100644 +--- a/drivers/thunderbolt/quirks.c ++++ b/drivers/thunderbolt/quirks.c +@@ -26,6 +26,19 @@ static void quirk_clx_disable(struct tb_switch *sw) + tb_sw_dbg(sw, "disabling CL states\n"); + } + ++static void quirk_usb3_maximum_bandwidth(struct tb_switch *sw) ++{ ++ struct tb_port *port; ++ ++ tb_switch_for_each_port(sw, port) { ++ if (!tb_port_is_usb3_down(port)) ++ continue; ++ port->max_bw = 16376; ++ tb_port_dbg(port, "USB3 maximum bandwidth limited to %u Mb/s\n", ++ port->max_bw); ++ } ++} ++ + struct tb_quirk { + u16 hw_vendor_id; + u16 hw_device_id; +@@ -43,6 +56,24 @@ static const struct tb_quirk tb_quirks[] = { + * DP buffers. + */ + { 0x8087, 0x0b26, 0x0000, 0x0000, quirk_dp_credit_allocation }, ++ /* ++ * Limit the maximum USB3 bandwidth for the following Intel USB4 ++ * host routers due to a hardware issue. ++ */ ++ { 0x8087, PCI_DEVICE_ID_INTEL_ADL_NHI0, 0x0000, 0x0000, ++ quirk_usb3_maximum_bandwidth }, ++ { 0x8087, PCI_DEVICE_ID_INTEL_ADL_NHI1, 0x0000, 0x0000, ++ quirk_usb3_maximum_bandwidth }, ++ { 0x8087, PCI_DEVICE_ID_INTEL_RPL_NHI0, 0x0000, 0x0000, ++ quirk_usb3_maximum_bandwidth }, ++ { 0x8087, PCI_DEVICE_ID_INTEL_RPL_NHI1, 0x0000, 0x0000, ++ quirk_usb3_maximum_bandwidth }, ++ { 0x8087, PCI_DEVICE_ID_INTEL_MTL_M_NHI0, 0x0000, 0x0000, ++ quirk_usb3_maximum_bandwidth }, ++ { 0x8087, PCI_DEVICE_ID_INTEL_MTL_P_NHI0, 0x0000, 0x0000, ++ quirk_usb3_maximum_bandwidth }, ++ { 0x8087, PCI_DEVICE_ID_INTEL_MTL_P_NHI1, 0x0000, 0x0000, ++ quirk_usb3_maximum_bandwidth }, + /* + * CLx is not supported on AMD USB4 Yellow Carp and Pink Sardine platforms. + */ +diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h +index e11d973a8f9b6..f034723b1b40e 100644 +--- a/drivers/thunderbolt/tb.h ++++ b/drivers/thunderbolt/tb.h +@@ -252,6 +252,8 @@ struct tb_switch { + * @ctl_credits: Buffers reserved for control path + * @dma_credits: Number of credits allocated for DMA tunneling for all + * DMA paths through this port. ++ * @max_bw: Maximum possible bandwidth through this adapter if set to ++ * non-zero. + * + * In USB4 terminology this structure represents an adapter (protocol or + * lane adapter). +@@ -277,6 +279,7 @@ struct tb_port { + unsigned int total_credits; + unsigned int ctl_credits; + unsigned int dma_credits; ++ unsigned int max_bw; + }; + + /** +diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c +index d5cd219ee9e6b..3a11b30b6c86a 100644 +--- a/drivers/thunderbolt/usb4.c ++++ b/drivers/thunderbolt/usb4.c +@@ -1882,6 +1882,15 @@ int usb4_port_retimer_nvm_read(struct tb_port *port, u8 index, + usb4_port_retimer_nvm_read_block, &info); + } + ++static inline unsigned int ++usb4_usb3_port_max_bandwidth(const struct tb_port *port, unsigned int bw) ++{ ++ /* Take the possible bandwidth limitation into account */ ++ if (port->max_bw) ++ return min(bw, port->max_bw); ++ return bw; ++} ++ + /** + * usb4_usb3_port_max_link_rate() - Maximum support USB3 link rate + * @port: USB3 adapter port +@@ -1903,7 +1912,9 @@ int usb4_usb3_port_max_link_rate(struct tb_port *port) + return ret; + + lr = (val & ADP_USB3_CS_4_MSLR_MASK) >> ADP_USB3_CS_4_MSLR_SHIFT; +- return lr == ADP_USB3_CS_4_MSLR_20G ? 20000 : 10000; ++ ret = lr == ADP_USB3_CS_4_MSLR_20G ? 20000 : 10000; ++ ++ return usb4_usb3_port_max_bandwidth(port, ret); + } + + /** +@@ -1930,7 +1941,9 @@ int usb4_usb3_port_actual_link_rate(struct tb_port *port) + return 0; + + lr = val & ADP_USB3_CS_4_ALR_MASK; +- return lr == ADP_USB3_CS_4_ALR_20G ? 20000 : 10000; ++ ret = lr == ADP_USB3_CS_4_ALR_20G ? 20000 : 10000; ++ ++ return usb4_usb3_port_max_bandwidth(port, ret); + } + + static int usb4_usb3_port_cm_request(struct tb_port *port, bool request) +diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c +index 8cbbb002fefe0..086b509689839 100644 +--- a/drivers/usb/typec/ucsi/ucsi.c ++++ b/drivers/usb/typec/ucsi/ucsi.c +@@ -1039,9 +1039,8 @@ static struct fwnode_handle *ucsi_find_fwnode(struct ucsi_connector *con) + return NULL; + } + +-static int ucsi_register_port(struct ucsi *ucsi, int index) ++static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con) + { +- struct ucsi_connector *con = &ucsi->connector[index]; + struct typec_capability *cap = &con->typec_cap; + enum typec_accessory *accessory = cap->accessory; + enum usb_role u_role = USB_ROLE_NONE; +@@ -1062,7 +1061,6 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) + init_completion(&con->complete); + mutex_init(&con->lock); + INIT_LIST_HEAD(&con->partner_tasks); +- con->num = index + 1; + con->ucsi = ucsi; + + cap->fwnode = ucsi_find_fwnode(con); +@@ -1204,7 +1202,7 @@ out_unlock: + */ + static int ucsi_init(struct ucsi *ucsi) + { +- struct ucsi_connector *con; ++ struct ucsi_connector *con, *connector; + u64 command, ntfy; + int ret; + int i; +@@ -1235,16 +1233,16 @@ static int ucsi_init(struct ucsi *ucsi) + } + + /* Allocate the connectors. Released in ucsi_unregister() */ +- ucsi->connector = kcalloc(ucsi->cap.num_connectors + 1, +- sizeof(*ucsi->connector), GFP_KERNEL); +- if (!ucsi->connector) { ++ connector = kcalloc(ucsi->cap.num_connectors + 1, sizeof(*connector), GFP_KERNEL); ++ if (!connector) { + ret = -ENOMEM; + goto err_reset; + } + + /* Register all connectors */ + for (i = 0; i < ucsi->cap.num_connectors; i++) { +- ret = ucsi_register_port(ucsi, i); ++ connector[i].num = i + 1; ++ ret = ucsi_register_port(ucsi, &connector[i]); + if (ret) + goto err_unregister; + } +@@ -1256,11 +1254,12 @@ static int ucsi_init(struct ucsi *ucsi) + if (ret < 0) + goto err_unregister; + ++ ucsi->connector = connector; + ucsi->ntfy = ntfy; + return 0; + + err_unregister: +- for (con = ucsi->connector; con->port; con++) { ++ for (con = connector; con->port; con++) { + ucsi_unregister_partner(con); + ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON); + ucsi_unregister_port_psy(con); +@@ -1269,10 +1268,7 @@ err_unregister: + typec_unregister_port(con->port); + con->port = NULL; + } +- +- kfree(ucsi->connector); +- ucsi->connector = NULL; +- ++ kfree(connector); + err_reset: + memset(&ucsi->cap, 0, sizeof(ucsi->cap)); + ucsi_reset_ppm(ucsi); +diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c +index 81c3154544287..b6b22fa4a8a01 100644 +--- a/drivers/video/fbdev/au1200fb.c ++++ b/drivers/video/fbdev/au1200fb.c +@@ -1040,6 +1040,9 @@ static int au1200fb_fb_check_var(struct fb_var_screeninfo *var, + u32 pixclock; + int screen_size, plane; + ++ if (!var->pixclock) ++ return -EINVAL; ++ + plane = fbdev->plane; + + /* Make sure that the mode respect all LCD controller and +diff --git a/drivers/video/fbdev/geode/lxfb_core.c b/drivers/video/fbdev/geode/lxfb_core.c +index 8130e9eee2b4b..556d8b1a9e06a 100644 +--- a/drivers/video/fbdev/geode/lxfb_core.c ++++ b/drivers/video/fbdev/geode/lxfb_core.c +@@ -235,6 +235,9 @@ static void get_modedb(struct fb_videomode **modedb, unsigned int *size) + + static int lxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) + { ++ if (!var->pixclock) ++ return -EINVAL; ++ + if (var->xres > 1920 || var->yres > 1440) + return -EINVAL; + +diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c +index 0a9e5067b2010..a81095b2b1ea5 100644 +--- a/drivers/video/fbdev/intelfb/intelfbdrv.c ++++ b/drivers/video/fbdev/intelfb/intelfbdrv.c +@@ -1222,6 +1222,9 @@ static int intelfb_check_var(struct fb_var_screeninfo *var, + + dinfo = GET_DINFO(info); + ++ if (!var->pixclock) ++ return -EINVAL; ++ + /* update the pitch */ + if (intelfbhw_validate_mode(dinfo, var) != 0) + return -EINVAL; +diff --git a/drivers/video/fbdev/nvidia/nvidia.c b/drivers/video/fbdev/nvidia/nvidia.c +index e60a276b4855d..ea4ba3dfb96bb 100644 +--- a/drivers/video/fbdev/nvidia/nvidia.c ++++ b/drivers/video/fbdev/nvidia/nvidia.c +@@ -764,6 +764,8 @@ static int nvidiafb_check_var(struct fb_var_screeninfo *var, + int pitch, err = 0; + + NVTRACE_ENTER(); ++ if (!var->pixclock) ++ return -EINVAL; + + var->transp.offset = 0; + var->transp.length = 0; +diff --git a/drivers/video/fbdev/tgafb.c b/drivers/video/fbdev/tgafb.c +index 14d37c49633c6..b44004880f0d1 100644 +--- a/drivers/video/fbdev/tgafb.c ++++ b/drivers/video/fbdev/tgafb.c +@@ -173,6 +173,9 @@ tgafb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) + { + struct tga_par *par = (struct tga_par *)info->par; + ++ if (!var->pixclock) ++ return -EINVAL; ++ + if (par->tga_type == TGA_TYPE_8PLANE) { + if (var->bits_per_pixel != 8) + return -EINVAL; +diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c +index 46851511b661b..0d7ae20e39c9a 100644 +--- a/fs/btrfs/backref.c ++++ b/fs/btrfs/backref.c +@@ -1895,8 +1895,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, + ULIST_ITER_INIT(&uiter); + ctx->use_path_cache = true; + while (1) { +- bool is_shared; +- bool cached; ++ const unsigned long prev_ref_count = ctx->refs.nnodes; + + walk_ctx.bytenr = bytenr; + ret = find_parent_nodes(&walk_ctx, &shared); +@@ -1914,21 +1913,36 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, + ret = 0; + + /* +- * If our data extent was not directly shared (without multiple +- * reference items), than it might have a single reference item +- * with a count > 1 for the same offset, which means there are 2 +- * (or more) file extent items that point to the data extent - +- * this happens when a file extent item needs to be split and +- * then one item gets moved to another leaf due to a b+tree leaf +- * split when inserting some item. In this case the file extent +- * items may be located in different leaves and therefore some +- * of the leaves may be referenced through shared subtrees while +- * others are not. Since our extent buffer cache only works for +- * a single path (by far the most common case and simpler to +- * deal with), we can not use it if we have multiple leaves +- * (which implies multiple paths). ++ * More than one extent buffer (bytenr) may have been added to ++ * the ctx->refs ulist, in which case we have to check multiple ++ * tree paths in case the first one is not shared, so we can not ++ * use the path cache which is made for a single path. Multiple ++ * extent buffers at the current level happen when: ++ * ++ * 1) level -1, the data extent: If our data extent was not ++ * directly shared (without multiple reference items), then ++ * it might have a single reference item with a count > 1 for ++ * the same offset, which means there are 2 (or more) file ++ * extent items that point to the data extent - this happens ++ * when a file extent item needs to be split and then one ++ * item gets moved to another leaf due to a b+tree leaf split ++ * when inserting some item. In this case the file extent ++ * items may be located in different leaves and therefore ++ * some of the leaves may be referenced through shared ++ * subtrees while others are not. Since our extent buffer ++ * cache only works for a single path (by far the most common ++ * case and simpler to deal with), we can not use it if we ++ * have multiple leaves (which implies multiple paths). ++ * ++ * 2) level >= 0, a tree node/leaf: We can have a mix of direct ++ * and indirect references on a b+tree node/leaf, so we have ++ * to check multiple paths, and the extent buffer (the ++ * current bytenr) may be shared or not. One example is ++ * during relocation as we may get a shared tree block ref ++ * (direct ref) and a non-shared tree block ref (indirect ++ * ref) for the same node/leaf. + */ +- if (level == -1 && ctx->refs.nnodes > 1) ++ if ((ctx->refs.nnodes - prev_ref_count) > 1) + ctx->use_path_cache = false; + + if (level >= 0) +@@ -1938,18 +1952,45 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, + if (!node) + break; + bytenr = node->val; +- level++; +- cached = lookup_backref_shared_cache(ctx, root, bytenr, level, +- &is_shared); +- if (cached) { +- ret = (is_shared ? 1 : 0); +- break; ++ if (ctx->use_path_cache) { ++ bool is_shared; ++ bool cached; ++ ++ level++; ++ cached = lookup_backref_shared_cache(ctx, root, bytenr, ++ level, &is_shared); ++ if (cached) { ++ ret = (is_shared ? 1 : 0); ++ break; ++ } + } + shared.share_count = 0; + shared.have_delayed_delete_refs = false; + cond_resched(); + } + ++ /* ++ * If the path cache is disabled, then it means at some tree level we ++ * got multiple parents due to a mix of direct and indirect backrefs or ++ * multiple leaves with file extent items pointing to the same data ++ * extent. We have to invalidate the cache and cache only the sharedness ++ * result for the levels where we got only one node/reference. ++ */ ++ if (!ctx->use_path_cache) { ++ int i = 0; ++ ++ level--; ++ if (ret >= 0 && level >= 0) { ++ bytenr = ctx->path_cache_entries[level].bytenr; ++ ctx->use_path_cache = true; ++ store_backref_shared_cache(ctx, root, bytenr, level, ret); ++ i = level + 1; ++ } ++ ++ for ( ; i < BTRFS_MAX_LEVEL; i++) ++ ctx->path_cache_entries[i].bytenr = 0; ++ } ++ + /* + * Cache the sharedness result for the data extent if we know our inode + * has more than 1 file extent item that refers to the data extent. +diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c +index d628d545ffea7..c70a888bf8bf6 100644 +--- a/fs/btrfs/block-group.c ++++ b/fs/btrfs/block-group.c +@@ -1036,14 +1036,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, + < block_group->zone_unusable); + WARN_ON(block_group->space_info->disk_total + < block_group->length * factor); +- WARN_ON(test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, +- &block_group->runtime_flags) && +- block_group->space_info->active_total_bytes +- < block_group->length); + } + block_group->space_info->total_bytes -= block_group->length; +- if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) +- block_group->space_info->active_total_bytes -= block_group->length; + block_group->space_info->bytes_readonly -= + (block_group->length - block_group->zone_unusable); + block_group->space_info->bytes_zone_unusable -= +@@ -3342,13 +3336,15 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, + spin_unlock(&info->delalloc_root_lock); + + while (total) { +- bool reclaim; ++ struct btrfs_space_info *space_info; ++ bool reclaim = false; + + cache = btrfs_lookup_block_group(info, bytenr); + if (!cache) { + ret = -ENOENT; + break; + } ++ space_info = cache->space_info; + factor = btrfs_bg_type_to_factor(cache->flags); + + /* +@@ -3363,7 +3359,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, + byte_in_group = bytenr - cache->start; + WARN_ON(byte_in_group > cache->length); + +- spin_lock(&cache->space_info->lock); ++ spin_lock(&space_info->lock); + spin_lock(&cache->lock); + + if (btrfs_test_opt(info, SPACE_CACHE) && +@@ -3376,23 +3372,23 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, + old_val += num_bytes; + cache->used = old_val; + cache->reserved -= num_bytes; +- cache->space_info->bytes_reserved -= num_bytes; +- cache->space_info->bytes_used += num_bytes; +- cache->space_info->disk_used += num_bytes * factor; ++ space_info->bytes_reserved -= num_bytes; ++ space_info->bytes_used += num_bytes; ++ space_info->disk_used += num_bytes * factor; + spin_unlock(&cache->lock); +- spin_unlock(&cache->space_info->lock); ++ spin_unlock(&space_info->lock); + } else { + old_val -= num_bytes; + cache->used = old_val; + cache->pinned += num_bytes; +- btrfs_space_info_update_bytes_pinned(info, +- cache->space_info, num_bytes); +- cache->space_info->bytes_used -= num_bytes; +- cache->space_info->disk_used -= num_bytes * factor; ++ btrfs_space_info_update_bytes_pinned(info, space_info, ++ num_bytes); ++ space_info->bytes_used -= num_bytes; ++ space_info->disk_used -= num_bytes * factor; + + reclaim = should_reclaim_block_group(cache, num_bytes); + spin_unlock(&cache->lock); +- spin_unlock(&cache->space_info->lock); ++ spin_unlock(&space_info->lock); + + set_extent_dirty(&trans->transaction->pinned_extents, + bytenr, bytenr + num_bytes - 1, +diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c +index 0d250d052487c..d84cef89cdff5 100644 +--- a/fs/btrfs/free-space-cache.c ++++ b/fs/btrfs/free-space-cache.c +@@ -2693,8 +2693,13 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, + bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold); + + spin_lock(&ctl->tree_lock); ++ /* Count initial region as zone_unusable until it gets activated. */ + if (!used) + to_free = size; ++ else if (initial && ++ test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &block_group->fs_info->flags) && ++ (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))) ++ to_free = 0; + else if (initial) + to_free = block_group->zone_capacity; + else if (offset >= block_group->alloc_offset) +@@ -2722,7 +2727,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, + reclaimable_unusable = block_group->zone_unusable - + (block_group->length - block_group->zone_capacity); + /* All the region is now unusable. Mark it as unused and reclaim */ +- if (block_group->zone_unusable == block_group->length) { ++ if (block_group->zone_unusable == block_group->length && ++ block_group->alloc_offset) { + btrfs_mark_bg_unused(block_group); + } else if (bg_reclaim_threshold && + reclaimable_unusable >= +diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h +index 3d8156fc8523f..f180ca061aef4 100644 +--- a/fs/btrfs/fs.h ++++ b/fs/btrfs/fs.h +@@ -119,11 +119,8 @@ enum { + /* Indicate that we want to commit the transaction. */ + BTRFS_FS_NEED_TRANS_COMMIT, + +- /* +- * Indicate metadata over-commit is disabled. This is set when active +- * zone tracking is needed. +- */ +- BTRFS_FS_NO_OVERCOMMIT, ++ /* This is set when active zone tracking is needed. */ ++ BTRFS_FS_ACTIVE_ZONE_TRACKING, + + /* + * Indicate if we have some features changed, this is mostly for +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index 7e348bd2ccdeb..c232636ecdfea 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -3731,7 +3731,9 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) + } + + /* update qgroup status and info */ ++ mutex_lock(&fs_info->qgroup_ioctl_lock); + err = btrfs_run_qgroups(trans); ++ mutex_unlock(&fs_info->qgroup_ioctl_lock); + if (err < 0) + btrfs_handle_fs_error(fs_info, err, + "failed to update qgroup status and info"); +diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c +index af97413abcf43..abf2b7f143078 100644 +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -2828,13 +2828,22 @@ cleanup: + } + + /* +- * called from commit_transaction. Writes all changed qgroups to disk. ++ * Writes all changed qgroups to disk. ++ * Called by the transaction commit path and the qgroup assign ioctl. + */ + int btrfs_run_qgroups(struct btrfs_trans_handle *trans) + { + struct btrfs_fs_info *fs_info = trans->fs_info; + int ret = 0; + ++ /* ++ * In case we are called from the qgroup assign ioctl, assert that we ++ * are holding the qgroup_ioctl_lock, otherwise we can race with a quota ++ * disable operation (ioctl) and access a freed quota root. ++ */ ++ if (trans->transaction->state != TRANS_STATE_COMMIT_DOING) ++ lockdep_assert_held(&fs_info->qgroup_ioctl_lock); ++ + if (!fs_info->quota_root) + return ret; + +diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c +index 69c09508afb50..3eecce86f63fc 100644 +--- a/fs/btrfs/space-info.c ++++ b/fs/btrfs/space-info.c +@@ -308,8 +308,6 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, + ASSERT(found); + spin_lock(&found->lock); + found->total_bytes += block_group->length; +- if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) +- found->active_total_bytes += block_group->length; + found->disk_total += block_group->length * factor; + found->bytes_used += block_group->used; + found->disk_used += block_group->used * factor; +@@ -379,22 +377,6 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, + return avail; + } + +-static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info, +- struct btrfs_space_info *space_info) +-{ +- /* +- * On regular filesystem, all total_bytes are always writable. On zoned +- * filesystem, there may be a limitation imposed by max_active_zones. +- * For metadata allocation, we cannot finish an existing active block +- * group to avoid a deadlock. Thus, we need to consider only the active +- * groups to be writable for metadata space. +- */ +- if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) +- return space_info->total_bytes; +- +- return space_info->active_total_bytes; +-} +- + int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, u64 bytes, + enum btrfs_reserve_flush_enum flush) +@@ -407,13 +389,13 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, + return 0; + + used = btrfs_space_info_used(space_info, true); +- if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) && ++ if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) && + (space_info->flags & BTRFS_BLOCK_GROUP_METADATA)) + avail = 0; + else + avail = calc_available_free_space(fs_info, space_info, flush); + +- if (used + bytes < writable_total_bytes(fs_info, space_info) + avail) ++ if (used + bytes < space_info->total_bytes + avail) + return 1; + return 0; + } +@@ -449,7 +431,7 @@ again: + ticket = list_first_entry(head, struct reserve_ticket, list); + + /* Check and see if our ticket can be satisfied now. */ +- if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) || ++ if ((used + ticket->bytes <= space_info->total_bytes) || + btrfs_can_overcommit(fs_info, space_info, ticket->bytes, + flush)) { + btrfs_space_info_update_bytes_may_use(fs_info, +@@ -829,7 +811,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, + { + u64 used; + u64 avail; +- u64 total; + u64 to_reclaim = space_info->reclaim_size; + + lockdep_assert_held(&space_info->lock); +@@ -844,9 +825,8 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, + * space. If that's the case add in our overage so we make sure to put + * appropriate pressure on the flushing state machine. + */ +- total = writable_total_bytes(fs_info, space_info); +- if (total + avail < used) +- to_reclaim += used - (total + avail); ++ if (space_info->total_bytes + avail < used) ++ to_reclaim += used - (space_info->total_bytes + avail); + + return to_reclaim; + } +@@ -856,11 +836,10 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, + { + u64 global_rsv_size = fs_info->global_block_rsv.reserved; + u64 ordered, delalloc; +- u64 total = writable_total_bytes(fs_info, space_info); + u64 thresh; + u64 used; + +- thresh = mult_perc(total, 90); ++ thresh = mult_perc(space_info->total_bytes, 90); + + lockdep_assert_held(&space_info->lock); + +@@ -923,8 +902,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, + BTRFS_RESERVE_FLUSH_ALL); + used = space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_readonly + global_rsv_size; +- if (used < total) +- thresh += total - used; ++ if (used < space_info->total_bytes) ++ thresh += space_info->total_bytes - used; + thresh >>= space_info->clamp; + + used = space_info->bytes_pinned; +@@ -1651,7 +1630,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, + * can_overcommit() to ensure we can overcommit to continue. + */ + if (!pending_tickets && +- ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) || ++ ((used + orig_bytes <= space_info->total_bytes) || + btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) { + btrfs_space_info_update_bytes_may_use(fs_info, space_info, + orig_bytes); +@@ -1665,8 +1644,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, + */ + if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) { + used = btrfs_space_info_used(space_info, false); +- if (used + orig_bytes <= +- writable_total_bytes(fs_info, space_info)) { ++ if (used + orig_bytes <= space_info->total_bytes) { + btrfs_space_info_update_bytes_may_use(fs_info, space_info, + orig_bytes); + ret = 0; +diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h +index fc99ea2b0c34f..2033b71b18cec 100644 +--- a/fs/btrfs/space-info.h ++++ b/fs/btrfs/space-info.h +@@ -96,8 +96,6 @@ struct btrfs_space_info { + u64 bytes_may_use; /* number of bytes that may be used for + delalloc/allocations */ + u64 bytes_readonly; /* total bytes that are read only */ +- /* Total bytes in the space, but only accounts active block groups. */ +- u64 active_total_bytes; + u64 bytes_zone_unusable; /* total bytes that are unusable until + resetting the device zone */ + +diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c +index 8f8d0fce6e4a3..9094e2402922c 100644 +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -2035,7 +2035,20 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) + + if (current->journal_info == trans) + current->journal_info = NULL; +- btrfs_scrub_cancel(fs_info); ++ ++ /* ++ * If relocation is running, we can't cancel scrub because that will ++ * result in a deadlock. Before relocating a block group, relocation ++ * pauses scrub, then starts and commits a transaction before unpausing ++ * scrub. If the transaction commit is being done by the relocation ++ * task or triggered by another task and the relocation task is waiting ++ * for the commit, and we end up here due to an error in the commit ++ * path, then calling btrfs_scrub_cancel() will deadlock, as we are ++ * asking for scrub to stop while having it asked to be paused higher ++ * above in relocation code. ++ */ ++ if (!test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) ++ btrfs_scrub_cancel(fs_info); + + kmem_cache_free(btrfs_trans_handle_cachep, trans); + } +diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c +index df43093b7a46d..fe652f8fc697b 100644 +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -1366,8 +1366,17 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, + * So, we need to add a special mount option to scan for + * later supers, using BTRFS_SUPER_MIRROR_MAX instead + */ +- flags |= FMODE_EXCL; + ++ /* ++ * Avoid using flag |= FMODE_EXCL here, as the systemd-udev may ++ * initiate the device scan which may race with the user's mount ++ * or mkfs command, resulting in failure. ++ * Since the device scan is solely for reading purposes, there is ++ * no need for FMODE_EXCL. Additionally, the devices are read again ++ * during the mount process. It is ok to get some inconsistent ++ * values temporarily, as the device paths of the fsid are the only ++ * required information for assembling the volume. ++ */ + bdev = blkdev_get_by_path(path, flags, holder); + if (IS_ERR(bdev)) + return ERR_CAST(bdev); +@@ -3266,8 +3275,15 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) + btrfs_scrub_pause(fs_info); + ret = btrfs_relocate_block_group(fs_info, chunk_offset); + btrfs_scrub_continue(fs_info); +- if (ret) ++ if (ret) { ++ /* ++ * If we had a transaction abort, stop all running scrubs. ++ * See transaction.c:cleanup_transaction() why we do it here. ++ */ ++ if (BTRFS_FS_ERROR(fs_info)) ++ btrfs_scrub_cancel(fs_info); + return ret; ++ } + + block_group = btrfs_lookup_block_group(fs_info, chunk_offset); + if (!block_group) +diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c +index f3b7d8ae93a9f..ce5ebba7fdd9a 100644 +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -539,8 +539,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) + } + atomic_set(&zone_info->active_zones_left, + max_active_zones - nactive); +- /* Overcommit does not work well with active zone tacking. */ +- set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags); ++ set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags); + } + + /* Validate superblock log */ +@@ -1577,9 +1576,19 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) + return; + + WARN_ON(cache->bytes_super != 0); +- unusable = (cache->alloc_offset - cache->used) + +- (cache->length - cache->zone_capacity); +- free = cache->zone_capacity - cache->alloc_offset; ++ ++ /* Check for block groups never get activated */ ++ if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) && ++ cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) && ++ !test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) && ++ cache->alloc_offset == 0) { ++ unusable = cache->length; ++ free = 0; ++ } else { ++ unusable = (cache->alloc_offset - cache->used) + ++ (cache->length - cache->zone_capacity); ++ free = cache->zone_capacity - cache->alloc_offset; ++ } + + /* We only need ->free_space in ALLOC_SEQ block groups */ + cache->cached = BTRFS_CACHE_FINISHED; +@@ -1916,7 +1925,11 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) + + /* Successfully activated all the zones */ + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); +- space_info->active_total_bytes += block_group->length; ++ WARN_ON(block_group->alloc_offset != 0); ++ if (block_group->zone_unusable == block_group->length) { ++ block_group->zone_unusable = block_group->length - block_group->zone_capacity; ++ space_info->bytes_zone_unusable -= block_group->zone_capacity; ++ } + spin_unlock(&block_group->lock); + btrfs_try_granting_tickets(fs_info, space_info); + spin_unlock(&space_info->lock); +@@ -2280,7 +2293,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info) + u64 avail; + + spin_lock(&block_group->lock); +- if (block_group->reserved || ++ if (block_group->reserved || block_group->alloc_offset == 0 || + (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) { + spin_unlock(&block_group->lock); + continue; +@@ -2317,10 +2330,6 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, + if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) + return 0; + +- /* No more block groups to activate */ +- if (space_info->active_total_bytes == space_info->total_bytes) +- return 0; +- + for (;;) { + int ret; + bool need_finish = false; +diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h +index 63a0ac2b93558..16a703c683b77 100644 +--- a/fs/cifs/cifsfs.h ++++ b/fs/cifs/cifsfs.h +@@ -118,7 +118,10 @@ extern const struct dentry_operations cifs_ci_dentry_ops; + #ifdef CONFIG_CIFS_DFS_UPCALL + extern struct vfsmount *cifs_dfs_d_automount(struct path *path); + #else +-#define cifs_dfs_d_automount NULL ++static inline struct vfsmount *cifs_dfs_d_automount(struct path *path) ++{ ++ return ERR_PTR(-EREMOTE); ++} + #endif + + /* Functions related to symlinks */ +diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h +index e75184544ecb4..639df85dafd6c 100644 +--- a/fs/cifs/cifsproto.h ++++ b/fs/cifs/cifsproto.h +@@ -697,5 +697,6 @@ static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options) + + struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon); + void cifs_put_tcon_super(struct super_block *sb); ++int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry); + + #endif /* _CIFSPROTO_H */ +diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c +index 566e6a26b897c..f71f8533c2f4d 100644 +--- a/fs/cifs/cifssmb.c ++++ b/fs/cifs/cifssmb.c +@@ -70,7 +70,6 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) + struct cifs_ses *ses; + struct TCP_Server_Info *server; + struct nls_table *nls_codepage; +- int retries; + + /* + * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for +@@ -98,45 +97,9 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) + } + spin_unlock(&tcon->tc_lock); + +- retries = server->nr_targets; +- +- /* +- * Give demultiplex thread up to 10 seconds to each target available for +- * reconnect -- should be greater than cifs socket timeout which is 7 +- * seconds. +- */ +- while (server->tcpStatus == CifsNeedReconnect) { +- rc = wait_event_interruptible_timeout(server->response_q, +- (server->tcpStatus != CifsNeedReconnect), +- 10 * HZ); +- if (rc < 0) { +- cifs_dbg(FYI, "%s: aborting reconnect due to a received signal by the process\n", +- __func__); +- return -ERESTARTSYS; +- } +- +- /* are we still trying to reconnect? */ +- spin_lock(&server->srv_lock); +- if (server->tcpStatus != CifsNeedReconnect) { +- spin_unlock(&server->srv_lock); +- break; +- } +- spin_unlock(&server->srv_lock); +- +- if (retries && --retries) +- continue; +- +- /* +- * on "soft" mounts we wait once. Hard mounts keep +- * retrying until process is killed or server comes +- * back on-line +- */ +- if (!tcon->retry) { +- cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n"); +- return -EHOSTDOWN; +- } +- retries = server->nr_targets; +- } ++ rc = cifs_wait_for_server_reconnect(server, tcon->retry); ++ if (rc) ++ return rc; + + spin_lock(&ses->chan_lock); + if (!cifs_chan_needs_reconnect(ses, server) && !tcon->need_reconnect) { +@@ -4414,8 +4377,13 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, + return -ENODEV; + + getDFSRetry: +- rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **) &pSMB, +- (void **) &pSMBr); ++ /* ++ * Use smb_init_no_reconnect() instead of smb_init() as ++ * CIFSGetDFSRefer() may be called from cifs_reconnect_tcon() and thus ++ * causing an infinite recursion. ++ */ ++ rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, ++ (void **)&pSMB, (void **)&pSMBr); + if (rc) + return rc; + +diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c +index 6da2af97b8bac..985e962cf0858 100644 +--- a/fs/cifs/connect.c ++++ b/fs/cifs/connect.c +@@ -244,31 +244,42 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, + cifs_chan_update_iface(ses, server); + + spin_lock(&ses->chan_lock); +- if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) +- goto next_session; ++ if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) { ++ spin_unlock(&ses->chan_lock); ++ continue; ++ } + + if (mark_smb_session) + CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses); + else + cifs_chan_set_need_reconnect(ses, server); + ++ cifs_dbg(FYI, "%s: channel connect bitmap: 0x%lx\n", ++ __func__, ses->chans_need_reconnect); ++ + /* If all channels need reconnect, then tcon needs reconnect */ +- if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) +- goto next_session; ++ if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) { ++ spin_unlock(&ses->chan_lock); ++ continue; ++ } ++ spin_unlock(&ses->chan_lock); + ++ spin_lock(&ses->ses_lock); + ses->ses_status = SES_NEED_RECON; ++ spin_unlock(&ses->ses_lock); + + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + tcon->need_reconnect = true; ++ spin_lock(&tcon->tc_lock); + tcon->status = TID_NEED_RECON; ++ spin_unlock(&tcon->tc_lock); + } + if (ses->tcon_ipc) { + ses->tcon_ipc->need_reconnect = true; ++ spin_lock(&ses->tcon_ipc->tc_lock); + ses->tcon_ipc->status = TID_NEED_RECON; ++ spin_unlock(&ses->tcon_ipc->tc_lock); + } +- +-next_session: +- spin_unlock(&ses->chan_lock); + } + spin_unlock(&cifs_tcp_ses_lock); + } +@@ -3703,11 +3714,19 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, + + /* only send once per connect */ + spin_lock(&server->srv_lock); +- if (!server->ops->need_neg(server) || ++ if (server->tcpStatus != CifsGood && ++ server->tcpStatus != CifsNew && + server->tcpStatus != CifsNeedNegotiate) { ++ spin_unlock(&server->srv_lock); ++ return -EHOSTDOWN; ++ } ++ ++ if (!server->ops->need_neg(server) && ++ server->tcpStatus == CifsGood) { + spin_unlock(&server->srv_lock); + return 0; + } ++ + server->tcpStatus = CifsInNegotiate; + spin_unlock(&server->srv_lock); + +@@ -3735,39 +3754,48 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, + struct nls_table *nls_info) + { + int rc = -ENOSYS; +- struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; +- struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; ++ struct TCP_Server_Info *pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; ++ struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&pserver->dstaddr; ++ struct sockaddr_in *addr = (struct sockaddr_in *)&pserver->dstaddr; + bool is_binding = false; + + spin_lock(&ses->ses_lock); +- if (server->dstaddr.ss_family == AF_INET6) +- scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI6", &addr6->sin6_addr); +- else +- scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI4", &addr->sin_addr); ++ cifs_dbg(FYI, "%s: channel connect bitmap: 0x%lx\n", ++ __func__, ses->chans_need_reconnect); + + if (ses->ses_status != SES_GOOD && + ses->ses_status != SES_NEW && + ses->ses_status != SES_NEED_RECON) { + spin_unlock(&ses->ses_lock); +- return 0; ++ return -EHOSTDOWN; + } + + /* only send once per connect */ + spin_lock(&ses->chan_lock); +- if (CIFS_ALL_CHANS_GOOD(ses) || +- cifs_chan_in_reconnect(ses, server)) { ++ if (CIFS_ALL_CHANS_GOOD(ses)) { ++ if (ses->ses_status == SES_NEED_RECON) ++ ses->ses_status = SES_GOOD; + spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); + return 0; + } +- is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); ++ + cifs_chan_set_in_reconnect(ses, server); ++ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); + spin_unlock(&ses->chan_lock); + + if (!is_binding) + ses->ses_status = SES_IN_SETUP; + spin_unlock(&ses->ses_lock); + ++ /* update ses ip_addr only for primary chan */ ++ if (server == pserver) { ++ if (server->dstaddr.ss_family == AF_INET6) ++ scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI6", &addr6->sin6_addr); ++ else ++ scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI4", &addr->sin_addr); ++ } ++ + if (!is_binding) { + ses->capabilities = server->capabilities; + if (!linuxExtEnabled) +diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c +index 9f4486b705d5c..5542893ef03f7 100644 +--- a/fs/cifs/misc.c ++++ b/fs/cifs/misc.c +@@ -1376,3 +1376,47 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, + return 0; + } + #endif ++ ++int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry) ++{ ++ int timeout = 10; ++ int rc; ++ ++ spin_lock(&server->srv_lock); ++ if (server->tcpStatus != CifsNeedReconnect) { ++ spin_unlock(&server->srv_lock); ++ return 0; ++ } ++ timeout *= server->nr_targets; ++ spin_unlock(&server->srv_lock); ++ ++ /* ++ * Give demultiplex thread up to 10 seconds to each target available for ++ * reconnect -- should be greater than cifs socket timeout which is 7 ++ * seconds. ++ * ++ * On "soft" mounts we wait once. Hard mounts keep retrying until ++ * process is killed or server comes back on-line. ++ */ ++ do { ++ rc = wait_event_interruptible_timeout(server->response_q, ++ (server->tcpStatus != CifsNeedReconnect), ++ timeout * HZ); ++ if (rc < 0) { ++ cifs_dbg(FYI, "%s: aborting reconnect due to received signal\n", ++ __func__); ++ return -ERESTARTSYS; ++ } ++ ++ /* are we still trying to reconnect? */ ++ spin_lock(&server->srv_lock); ++ if (server->tcpStatus != CifsNeedReconnect) { ++ spin_unlock(&server->srv_lock); ++ return 0; ++ } ++ spin_unlock(&server->srv_lock); ++ } while (retry); ++ ++ cifs_dbg(FYI, "%s: gave up waiting on reconnect\n", __func__); ++ return -EHOSTDOWN; ++} +diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c +index 6e6e44d8b4c79..b37379b62cc77 100644 +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -139,72 +139,12 @@ out: + return; + } + +-static int wait_for_server_reconnect(struct TCP_Server_Info *server, +- __le16 smb2_command, bool retry) +-{ +- int timeout = 10; +- int rc; +- +- spin_lock(&server->srv_lock); +- if (server->tcpStatus != CifsNeedReconnect) { +- spin_unlock(&server->srv_lock); +- return 0; +- } +- timeout *= server->nr_targets; +- spin_unlock(&server->srv_lock); +- +- /* +- * Return to caller for TREE_DISCONNECT and LOGOFF and CLOSE +- * here since they are implicitly done when session drops. +- */ +- switch (smb2_command) { +- /* +- * BB Should we keep oplock break and add flush to exceptions? +- */ +- case SMB2_TREE_DISCONNECT: +- case SMB2_CANCEL: +- case SMB2_CLOSE: +- case SMB2_OPLOCK_BREAK: +- return -EAGAIN; +- } +- +- /* +- * Give demultiplex thread up to 10 seconds to each target available for +- * reconnect -- should be greater than cifs socket timeout which is 7 +- * seconds. +- * +- * On "soft" mounts we wait once. Hard mounts keep retrying until +- * process is killed or server comes back on-line. +- */ +- do { +- rc = wait_event_interruptible_timeout(server->response_q, +- (server->tcpStatus != CifsNeedReconnect), +- timeout * HZ); +- if (rc < 0) { +- cifs_dbg(FYI, "%s: aborting reconnect due to received signal\n", +- __func__); +- return -ERESTARTSYS; +- } +- +- /* are we still trying to reconnect? */ +- spin_lock(&server->srv_lock); +- if (server->tcpStatus != CifsNeedReconnect) { +- spin_unlock(&server->srv_lock); +- return 0; +- } +- spin_unlock(&server->srv_lock); +- } while (retry); +- +- cifs_dbg(FYI, "%s: gave up waiting on reconnect\n", __func__); +- return -EHOSTDOWN; +-} +- + static int + smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, + struct TCP_Server_Info *server) + { + int rc = 0; +- struct nls_table *nls_codepage; ++ struct nls_table *nls_codepage = NULL; + struct cifs_ses *ses; + + /* +@@ -239,7 +179,28 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, + (!tcon->ses->server) || !server) + return -EIO; + +- rc = wait_for_server_reconnect(server, smb2_command, tcon->retry); ++ spin_lock(&server->srv_lock); ++ if (server->tcpStatus == CifsNeedReconnect) { ++ /* ++ * Return to caller for TREE_DISCONNECT and LOGOFF and CLOSE ++ * here since they are implicitly done when session drops. ++ */ ++ switch (smb2_command) { ++ /* ++ * BB Should we keep oplock break and add flush to exceptions? ++ */ ++ case SMB2_TREE_DISCONNECT: ++ case SMB2_CANCEL: ++ case SMB2_CLOSE: ++ case SMB2_OPLOCK_BREAK: ++ spin_unlock(&server->srv_lock); ++ return -EAGAIN; ++ } ++ } ++ spin_unlock(&server->srv_lock); ++ ++again: ++ rc = cifs_wait_for_server_reconnect(server, tcon->retry); + if (rc) + return rc; + +@@ -255,8 +216,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, + tcon->ses->chans_need_reconnect, + tcon->need_reconnect); + +- nls_codepage = load_nls_default(); +- ++ mutex_lock(&ses->session_mutex); + /* + * Recheck after acquire mutex. If another thread is negotiating + * and the server never sends an answer the socket will be closed +@@ -265,28 +225,38 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, + spin_lock(&server->srv_lock); + if (server->tcpStatus == CifsNeedReconnect) { + spin_unlock(&server->srv_lock); ++ mutex_unlock(&ses->session_mutex); ++ ++ if (tcon->retry) ++ goto again; ++ + rc = -EHOSTDOWN; + goto out; + } + spin_unlock(&server->srv_lock); + ++ nls_codepage = load_nls_default(); ++ + /* + * need to prevent multiple threads trying to simultaneously + * reconnect the same SMB session + */ ++ spin_lock(&ses->ses_lock); + spin_lock(&ses->chan_lock); +- if (!cifs_chan_needs_reconnect(ses, server)) { ++ if (!cifs_chan_needs_reconnect(ses, server) && ++ ses->ses_status == SES_GOOD) { + spin_unlock(&ses->chan_lock); +- ++ spin_unlock(&ses->ses_lock); + /* this means that we only need to tree connect */ + if (tcon->need_reconnect) + goto skip_sess_setup; + ++ mutex_unlock(&ses->session_mutex); + goto out; + } + spin_unlock(&ses->chan_lock); ++ spin_unlock(&ses->ses_lock); + +- mutex_lock(&ses->session_mutex); + rc = cifs_negotiate_protocol(0, ses, server); + if (!rc) { + rc = cifs_setup_session(0, ses, server, nls_codepage); +@@ -302,10 +272,8 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, + mutex_unlock(&ses->session_mutex); + goto out; + } +- mutex_unlock(&ses->session_mutex); + + skip_sess_setup: +- mutex_lock(&ses->session_mutex); + if (!tcon->need_reconnect) { + mutex_unlock(&ses->session_mutex); + goto out; +@@ -320,7 +288,7 @@ skip_sess_setup: + cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); + if (rc) { + /* If sess reconnected but tcon didn't, something strange ... */ +- pr_warn_once("reconnect tcon failed rc = %d\n", rc); ++ cifs_dbg(VFS, "reconnect tcon failed rc = %d\n", rc); + goto out; + } + +@@ -1292,9 +1260,9 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) + if (rc) + return rc; + +- spin_lock(&ses->chan_lock); +- is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); +- spin_unlock(&ses->chan_lock); ++ spin_lock(&ses->ses_lock); ++ is_binding = (ses->ses_status == SES_GOOD); ++ spin_unlock(&ses->ses_lock); + + if (is_binding) { + req->hdr.SessionId = cpu_to_le64(ses->Suid); +@@ -1452,9 +1420,9 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) + goto out_put_spnego_key; + } + +- spin_lock(&ses->chan_lock); +- is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); +- spin_unlock(&ses->chan_lock); ++ spin_lock(&ses->ses_lock); ++ is_binding = (ses->ses_status == SES_GOOD); ++ spin_unlock(&ses->ses_lock); + + /* keep session key if binding */ + if (!is_binding) { +@@ -1578,9 +1546,9 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) + + cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n"); + +- spin_lock(&ses->chan_lock); +- is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); +- spin_unlock(&ses->chan_lock); ++ spin_lock(&ses->ses_lock); ++ is_binding = (ses->ses_status == SES_GOOD); ++ spin_unlock(&ses->ses_lock); + + /* keep existing ses id and flags if binding */ + if (!is_binding) { +@@ -1646,9 +1614,9 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) + + rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base; + +- spin_lock(&ses->chan_lock); +- is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); +- spin_unlock(&ses->chan_lock); ++ spin_lock(&ses->ses_lock); ++ is_binding = (ses->ses_status == SES_GOOD); ++ spin_unlock(&ses->ses_lock); + + /* keep existing ses id and flags if binding */ + if (!is_binding) { +diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c +index d827b7547ffad..790acf65a0926 100644 +--- a/fs/cifs/smb2transport.c ++++ b/fs/cifs/smb2transport.c +@@ -81,6 +81,7 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) + struct cifs_ses *ses = NULL; + int i; + int rc = 0; ++ bool is_binding = false; + + spin_lock(&cifs_tcp_ses_lock); + +@@ -97,9 +98,12 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) + goto out; + + found: ++ spin_lock(&ses->ses_lock); + spin_lock(&ses->chan_lock); +- if (cifs_chan_needs_reconnect(ses, server) && +- !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) { ++ ++ is_binding = (cifs_chan_needs_reconnect(ses, server) && ++ ses->ses_status == SES_GOOD); ++ if (is_binding) { + /* + * If we are in the process of binding a new channel + * to an existing session, use the master connection +@@ -107,6 +111,7 @@ found: + */ + memcpy(key, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE); + spin_unlock(&ses->chan_lock); ++ spin_unlock(&ses->ses_lock); + goto out; + } + +@@ -119,10 +124,12 @@ found: + if (chan->server == server) { + memcpy(key, chan->signkey, SMB3_SIGN_KEY_SIZE); + spin_unlock(&ses->chan_lock); ++ spin_unlock(&ses->ses_lock); + goto out; + } + } + spin_unlock(&ses->chan_lock); ++ spin_unlock(&ses->ses_lock); + + cifs_dbg(VFS, + "%s: Could not find channel signing key for session 0x%llx\n", +@@ -392,11 +399,15 @@ generate_smb3signingkey(struct cifs_ses *ses, + bool is_binding = false; + int chan_index = 0; + ++ spin_lock(&ses->ses_lock); + spin_lock(&ses->chan_lock); +- is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); ++ is_binding = (cifs_chan_needs_reconnect(ses, server) && ++ ses->ses_status == SES_GOOD); ++ + chan_index = cifs_ses_get_chan_index(ses, server); + /* TODO: introduce ref counting for channels when the can be freed */ + spin_unlock(&ses->chan_lock); ++ spin_unlock(&ses->ses_lock); + + /* + * All channels use the same encryption/decryption keys but +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 4214286e01450..4f4ef7aa2f4a0 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -1980,8 +1980,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) + if (!data->rpc_done) { + if (data->rpc_status) + return ERR_PTR(data->rpc_status); +- /* cached opens have already been processed */ +- goto update; ++ return nfs4_try_open_cached(data); + } + + ret = nfs_refresh_inode(inode, &data->f_attr); +@@ -1990,7 +1989,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) + + if (data->o_res.delegation_type != 0) + nfs4_opendata_check_deleg(data, state); +-update: ++ + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) + return ERR_PTR(-EAGAIN); +diff --git a/fs/verity/enable.c b/fs/verity/enable.c +index df6b499bf6a14..400c264bf8930 100644 +--- a/fs/verity/enable.c ++++ b/fs/verity/enable.c +@@ -390,25 +390,27 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg) + goto out_drop_write; + + err = enable_verity(filp, &arg); +- if (err) +- goto out_allow_write_access; + + /* +- * Some pages of the file may have been evicted from pagecache after +- * being used in the Merkle tree construction, then read into pagecache +- * again by another process reading from the file concurrently. Since +- * these pages didn't undergo verification against the file digest which +- * fs-verity now claims to be enforcing, we have to wipe the pagecache +- * to ensure that all future reads are verified. ++ * We no longer drop the inode's pagecache after enabling verity. This ++ * used to be done to try to avoid a race condition where pages could be ++ * evicted after being used in the Merkle tree construction, then ++ * re-instantiated by a concurrent read. Such pages are unverified, and ++ * the backing storage could have filled them with different content, so ++ * they shouldn't be used to fulfill reads once verity is enabled. ++ * ++ * But, dropping the pagecache has a big performance impact, and it ++ * doesn't fully solve the race condition anyway. So for those reasons, ++ * and also because this race condition isn't very important relatively ++ * speaking (especially for small-ish files, where the chance of a page ++ * being used, evicted, *and* re-instantiated all while enabling verity ++ * is quite small), we no longer drop the inode's pagecache. + */ +- filemap_write_and_wait(inode->i_mapping); +- invalidate_inode_pages2(inode->i_mapping); + + /* + * allow_write_access() is needed to pair with deny_write_access(). + * Regardless, the filesystem won't allow writing to verity files. + */ +-out_allow_write_access: + allow_write_access(filp); + out_drop_write: + mnt_drop_write_file(filp); +diff --git a/fs/zonefs/Makefile b/fs/zonefs/Makefile +index 9fe54f5319f22..645f7229de4a0 100644 +--- a/fs/zonefs/Makefile ++++ b/fs/zonefs/Makefile +@@ -3,4 +3,4 @@ ccflags-y += -I$(src) + + obj-$(CONFIG_ZONEFS_FS) += zonefs.o + +-zonefs-y := super.o sysfs.o ++zonefs-y := super.o file.o sysfs.o +diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c +new file mode 100644 +index 0000000000000..63cd50840419c +--- /dev/null ++++ b/fs/zonefs/file.c +@@ -0,0 +1,902 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Simple file system for zoned block devices exposing zones as files. ++ * ++ * Copyright (C) 2022 Western Digital Corporation or its affiliates. ++ */ ++#include <linux/module.h> ++#include <linux/pagemap.h> ++#include <linux/iomap.h> ++#include <linux/init.h> ++#include <linux/slab.h> ++#include <linux/blkdev.h> ++#include <linux/statfs.h> ++#include <linux/writeback.h> ++#include <linux/quotaops.h> ++#include <linux/seq_file.h> ++#include <linux/parser.h> ++#include <linux/uio.h> ++#include <linux/mman.h> ++#include <linux/sched/mm.h> ++#include <linux/task_io_accounting_ops.h> ++ ++#include "zonefs.h" ++ ++#include "trace.h" ++ ++static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset, ++ loff_t length, unsigned int flags, ++ struct iomap *iomap, struct iomap *srcmap) ++{ ++ struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ struct super_block *sb = inode->i_sb; ++ loff_t isize; ++ ++ /* ++ * All blocks are always mapped below EOF. If reading past EOF, ++ * act as if there is a hole up to the file maximum size. ++ */ ++ mutex_lock(&zi->i_truncate_mutex); ++ iomap->bdev = inode->i_sb->s_bdev; ++ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); ++ isize = i_size_read(inode); ++ if (iomap->offset >= isize) { ++ iomap->type = IOMAP_HOLE; ++ iomap->addr = IOMAP_NULL_ADDR; ++ iomap->length = length; ++ } else { ++ iomap->type = IOMAP_MAPPED; ++ iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset; ++ iomap->length = isize - iomap->offset; ++ } ++ mutex_unlock(&zi->i_truncate_mutex); ++ ++ trace_zonefs_iomap_begin(inode, iomap); ++ ++ return 0; ++} ++ ++static const struct iomap_ops zonefs_read_iomap_ops = { ++ .iomap_begin = zonefs_read_iomap_begin, ++}; ++ ++static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset, ++ loff_t length, unsigned int flags, ++ struct iomap *iomap, struct iomap *srcmap) ++{ ++ struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ struct super_block *sb = inode->i_sb; ++ loff_t isize; ++ ++ /* All write I/Os should always be within the file maximum size */ ++ if (WARN_ON_ONCE(offset + length > z->z_capacity)) ++ return -EIO; ++ ++ /* ++ * Sequential zones can only accept direct writes. This is already ++ * checked when writes are issued, so warn if we see a page writeback ++ * operation. ++ */ ++ if (WARN_ON_ONCE(zonefs_zone_is_seq(z) && !(flags & IOMAP_DIRECT))) ++ return -EIO; ++ ++ /* ++ * For conventional zones, all blocks are always mapped. For sequential ++ * zones, all blocks after always mapped below the inode size (zone ++ * write pointer) and unwriten beyond. ++ */ ++ mutex_lock(&zi->i_truncate_mutex); ++ iomap->bdev = inode->i_sb->s_bdev; ++ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); ++ iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset; ++ isize = i_size_read(inode); ++ if (iomap->offset >= isize) { ++ iomap->type = IOMAP_UNWRITTEN; ++ iomap->length = z->z_capacity - iomap->offset; ++ } else { ++ iomap->type = IOMAP_MAPPED; ++ iomap->length = isize - iomap->offset; ++ } ++ mutex_unlock(&zi->i_truncate_mutex); ++ ++ trace_zonefs_iomap_begin(inode, iomap); ++ ++ return 0; ++} ++ ++static const struct iomap_ops zonefs_write_iomap_ops = { ++ .iomap_begin = zonefs_write_iomap_begin, ++}; ++ ++static int zonefs_read_folio(struct file *unused, struct folio *folio) ++{ ++ return iomap_read_folio(folio, &zonefs_read_iomap_ops); ++} ++ ++static void zonefs_readahead(struct readahead_control *rac) ++{ ++ iomap_readahead(rac, &zonefs_read_iomap_ops); ++} ++ ++/* ++ * Map blocks for page writeback. This is used only on conventional zone files, ++ * which implies that the page range can only be within the fixed inode size. ++ */ ++static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, ++ struct inode *inode, loff_t offset) ++{ ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ ++ if (WARN_ON_ONCE(zonefs_zone_is_seq(z))) ++ return -EIO; ++ if (WARN_ON_ONCE(offset >= i_size_read(inode))) ++ return -EIO; ++ ++ /* If the mapping is already OK, nothing needs to be done */ ++ if (offset >= wpc->iomap.offset && ++ offset < wpc->iomap.offset + wpc->iomap.length) ++ return 0; ++ ++ return zonefs_write_iomap_begin(inode, offset, ++ z->z_capacity - offset, ++ IOMAP_WRITE, &wpc->iomap, NULL); ++} ++ ++static const struct iomap_writeback_ops zonefs_writeback_ops = { ++ .map_blocks = zonefs_write_map_blocks, ++}; ++ ++static int zonefs_writepages(struct address_space *mapping, ++ struct writeback_control *wbc) ++{ ++ struct iomap_writepage_ctx wpc = { }; ++ ++ return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops); ++} ++ ++static int zonefs_swap_activate(struct swap_info_struct *sis, ++ struct file *swap_file, sector_t *span) ++{ ++ struct inode *inode = file_inode(swap_file); ++ ++ if (zonefs_inode_is_seq(inode)) { ++ zonefs_err(inode->i_sb, ++ "swap file: not a conventional zone file\n"); ++ return -EINVAL; ++ } ++ ++ return iomap_swapfile_activate(sis, swap_file, span, ++ &zonefs_read_iomap_ops); ++} ++ ++const struct address_space_operations zonefs_file_aops = { ++ .read_folio = zonefs_read_folio, ++ .readahead = zonefs_readahead, ++ .writepages = zonefs_writepages, ++ .dirty_folio = filemap_dirty_folio, ++ .release_folio = iomap_release_folio, ++ .invalidate_folio = iomap_invalidate_folio, ++ .migrate_folio = filemap_migrate_folio, ++ .is_partially_uptodate = iomap_is_partially_uptodate, ++ .error_remove_page = generic_error_remove_page, ++ .direct_IO = noop_direct_IO, ++ .swap_activate = zonefs_swap_activate, ++}; ++ ++int zonefs_file_truncate(struct inode *inode, loff_t isize) ++{ ++ struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ loff_t old_isize; ++ enum req_op op; ++ int ret = 0; ++ ++ /* ++ * Only sequential zone files can be truncated and truncation is allowed ++ * only down to a 0 size, which is equivalent to a zone reset, and to ++ * the maximum file size, which is equivalent to a zone finish. ++ */ ++ if (!zonefs_zone_is_seq(z)) ++ return -EPERM; ++ ++ if (!isize) ++ op = REQ_OP_ZONE_RESET; ++ else if (isize == z->z_capacity) ++ op = REQ_OP_ZONE_FINISH; ++ else ++ return -EPERM; ++ ++ inode_dio_wait(inode); ++ ++ /* Serialize against page faults */ ++ filemap_invalidate_lock(inode->i_mapping); ++ ++ /* Serialize against zonefs_iomap_begin() */ ++ mutex_lock(&zi->i_truncate_mutex); ++ ++ old_isize = i_size_read(inode); ++ if (isize == old_isize) ++ goto unlock; ++ ++ ret = zonefs_inode_zone_mgmt(inode, op); ++ if (ret) ++ goto unlock; ++ ++ /* ++ * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set, ++ * take care of open zones. ++ */ ++ if (z->z_flags & ZONEFS_ZONE_OPEN) { ++ /* ++ * Truncating a zone to EMPTY or FULL is the equivalent of ++ * closing the zone. For a truncation to 0, we need to ++ * re-open the zone to ensure new writes can be processed. ++ * For a truncation to the maximum file size, the zone is ++ * closed and writes cannot be accepted anymore, so clear ++ * the open flag. ++ */ ++ if (!isize) ++ ret = zonefs_inode_zone_mgmt(inode, REQ_OP_ZONE_OPEN); ++ else ++ z->z_flags &= ~ZONEFS_ZONE_OPEN; ++ } ++ ++ zonefs_update_stats(inode, isize); ++ truncate_setsize(inode, isize); ++ z->z_wpoffset = isize; ++ zonefs_inode_account_active(inode); ++ ++unlock: ++ mutex_unlock(&zi->i_truncate_mutex); ++ filemap_invalidate_unlock(inode->i_mapping); ++ ++ return ret; ++} ++ ++static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end, ++ int datasync) ++{ ++ struct inode *inode = file_inode(file); ++ int ret = 0; ++ ++ if (unlikely(IS_IMMUTABLE(inode))) ++ return -EPERM; ++ ++ /* ++ * Since only direct writes are allowed in sequential files, page cache ++ * flush is needed only for conventional zone files. ++ */ ++ if (zonefs_inode_is_cnv(inode)) ++ ret = file_write_and_wait_range(file, start, end); ++ if (!ret) ++ ret = blkdev_issue_flush(inode->i_sb->s_bdev); ++ ++ if (ret) ++ zonefs_io_error(inode, true); ++ ++ return ret; ++} ++ ++static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) ++{ ++ struct inode *inode = file_inode(vmf->vma->vm_file); ++ vm_fault_t ret; ++ ++ if (unlikely(IS_IMMUTABLE(inode))) ++ return VM_FAULT_SIGBUS; ++ ++ /* ++ * Sanity check: only conventional zone files can have shared ++ * writeable mappings. ++ */ ++ if (zonefs_inode_is_seq(inode)) ++ return VM_FAULT_NOPAGE; ++ ++ sb_start_pagefault(inode->i_sb); ++ file_update_time(vmf->vma->vm_file); ++ ++ /* Serialize against truncates */ ++ filemap_invalidate_lock_shared(inode->i_mapping); ++ ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops); ++ filemap_invalidate_unlock_shared(inode->i_mapping); ++ ++ sb_end_pagefault(inode->i_sb); ++ return ret; ++} ++ ++static const struct vm_operations_struct zonefs_file_vm_ops = { ++ .fault = filemap_fault, ++ .map_pages = filemap_map_pages, ++ .page_mkwrite = zonefs_filemap_page_mkwrite, ++}; ++ ++static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ /* ++ * Conventional zones accept random writes, so their files can support ++ * shared writable mappings. For sequential zone files, only read ++ * mappings are possible since there are no guarantees for write ++ * ordering between msync() and page cache writeback. ++ */ ++ if (zonefs_inode_is_seq(file_inode(file)) && ++ (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) ++ return -EINVAL; ++ ++ file_accessed(file); ++ vma->vm_ops = &zonefs_file_vm_ops; ++ ++ return 0; ++} ++ ++static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence) ++{ ++ loff_t isize = i_size_read(file_inode(file)); ++ ++ /* ++ * Seeks are limited to below the zone size for conventional zones ++ * and below the zone write pointer for sequential zones. In both ++ * cases, this limit is the inode size. ++ */ ++ return generic_file_llseek_size(file, offset, whence, isize, isize); ++} ++ ++static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, ++ int error, unsigned int flags) ++{ ++ struct inode *inode = file_inode(iocb->ki_filp); ++ struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ ++ if (error) { ++ zonefs_io_error(inode, true); ++ return error; ++ } ++ ++ if (size && zonefs_inode_is_seq(inode)) { ++ /* ++ * Note that we may be seeing completions out of order, ++ * but that is not a problem since a write completed ++ * successfully necessarily means that all preceding writes ++ * were also successful. So we can safely increase the inode ++ * size to the write end location. ++ */ ++ mutex_lock(&zi->i_truncate_mutex); ++ if (i_size_read(inode) < iocb->ki_pos + size) { ++ zonefs_update_stats(inode, iocb->ki_pos + size); ++ zonefs_i_size_write(inode, iocb->ki_pos + size); ++ } ++ mutex_unlock(&zi->i_truncate_mutex); ++ } ++ ++ return 0; ++} ++ ++static const struct iomap_dio_ops zonefs_write_dio_ops = { ++ .end_io = zonefs_file_write_dio_end_io, ++}; ++ ++static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) ++{ ++ struct inode *inode = file_inode(iocb->ki_filp); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ struct block_device *bdev = inode->i_sb->s_bdev; ++ unsigned int max = bdev_max_zone_append_sectors(bdev); ++ pgoff_t start, end; ++ struct bio *bio; ++ ssize_t size; ++ int nr_pages; ++ ssize_t ret; ++ ++ max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); ++ iov_iter_truncate(from, max); ++ ++ /* ++ * If the inode block size (zone write granularity) is smaller than the ++ * page size, we may be appending data belonging to the last page of the ++ * inode straddling inode->i_size, with that page already cached due to ++ * a buffered read or readahead. So make sure to invalidate that page. ++ * This will always be a no-op for the case where the block size is ++ * equal to the page size. ++ */ ++ start = iocb->ki_pos >> PAGE_SHIFT; ++ end = (iocb->ki_pos + iov_iter_count(from) - 1) >> PAGE_SHIFT; ++ if (invalidate_inode_pages2_range(inode->i_mapping, start, end)) ++ return -EBUSY; ++ ++ nr_pages = iov_iter_npages(from, BIO_MAX_VECS); ++ if (!nr_pages) ++ return 0; ++ ++ bio = bio_alloc(bdev, nr_pages, ++ REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS); ++ bio->bi_iter.bi_sector = z->z_sector; ++ bio->bi_ioprio = iocb->ki_ioprio; ++ if (iocb_is_dsync(iocb)) ++ bio->bi_opf |= REQ_FUA; ++ ++ ret = bio_iov_iter_get_pages(bio, from); ++ if (unlikely(ret)) ++ goto out_release; ++ ++ size = bio->bi_iter.bi_size; ++ task_io_account_write(size); ++ ++ if (iocb->ki_flags & IOCB_HIPRI) ++ bio_set_polled(bio, iocb); ++ ++ ret = submit_bio_wait(bio); ++ ++ /* ++ * If the file zone was written underneath the file system, the zone ++ * write pointer may not be where we expect it to be, but the zone ++ * append write can still succeed. So check manually that we wrote where ++ * we intended to, that is, at zi->i_wpoffset. ++ */ ++ if (!ret) { ++ sector_t wpsector = ++ z->z_sector + (z->z_wpoffset >> SECTOR_SHIFT); ++ ++ if (bio->bi_iter.bi_sector != wpsector) { ++ zonefs_warn(inode->i_sb, ++ "Corrupted write pointer %llu for zone at %llu\n", ++ bio->bi_iter.bi_sector, z->z_sector); ++ ret = -EIO; ++ } ++ } ++ ++ zonefs_file_write_dio_end_io(iocb, size, ret, 0); ++ trace_zonefs_file_dio_append(inode, size, ret); ++ ++out_release: ++ bio_release_pages(bio, false); ++ bio_put(bio); ++ ++ if (ret >= 0) { ++ iocb->ki_pos += size; ++ return size; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Do not exceed the LFS limits nor the file zone size. If pos is under the ++ * limit it becomes a short access. If it exceeds the limit, return -EFBIG. ++ */ ++static loff_t zonefs_write_check_limits(struct file *file, loff_t pos, ++ loff_t count) ++{ ++ struct inode *inode = file_inode(file); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ loff_t limit = rlimit(RLIMIT_FSIZE); ++ loff_t max_size = z->z_capacity; ++ ++ if (limit != RLIM_INFINITY) { ++ if (pos >= limit) { ++ send_sig(SIGXFSZ, current, 0); ++ return -EFBIG; ++ } ++ count = min(count, limit - pos); ++ } ++ ++ if (!(file->f_flags & O_LARGEFILE)) ++ max_size = min_t(loff_t, MAX_NON_LFS, max_size); ++ ++ if (unlikely(pos >= max_size)) ++ return -EFBIG; ++ ++ return min(count, max_size - pos); ++} ++ ++static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from) ++{ ++ struct file *file = iocb->ki_filp; ++ struct inode *inode = file_inode(file); ++ struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ loff_t count; ++ ++ if (IS_SWAPFILE(inode)) ++ return -ETXTBSY; ++ ++ if (!iov_iter_count(from)) ++ return 0; ++ ++ if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) ++ return -EINVAL; ++ ++ if (iocb->ki_flags & IOCB_APPEND) { ++ if (zonefs_zone_is_cnv(z)) ++ return -EINVAL; ++ mutex_lock(&zi->i_truncate_mutex); ++ iocb->ki_pos = z->z_wpoffset; ++ mutex_unlock(&zi->i_truncate_mutex); ++ } ++ ++ count = zonefs_write_check_limits(file, iocb->ki_pos, ++ iov_iter_count(from)); ++ if (count < 0) ++ return count; ++ ++ iov_iter_truncate(from, count); ++ return iov_iter_count(from); ++} ++ ++/* ++ * Handle direct writes. For sequential zone files, this is the only possible ++ * write path. For these files, check that the user is issuing writes ++ * sequentially from the end of the file. This code assumes that the block layer ++ * delivers write requests to the device in sequential order. This is always the ++ * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE ++ * elevator feature is being used (e.g. mq-deadline). The block layer always ++ * automatically select such an elevator for zoned block devices during the ++ * device initialization. ++ */ ++static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ++{ ++ struct inode *inode = file_inode(iocb->ki_filp); ++ struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ struct super_block *sb = inode->i_sb; ++ bool sync = is_sync_kiocb(iocb); ++ bool append = false; ++ ssize_t ret, count; ++ ++ /* ++ * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT ++ * as this can cause write reordering (e.g. the first aio gets EAGAIN ++ * on the inode lock but the second goes through but is now unaligned). ++ */ ++ if (zonefs_zone_is_seq(z) && !sync && (iocb->ki_flags & IOCB_NOWAIT)) ++ return -EOPNOTSUPP; ++ ++ if (iocb->ki_flags & IOCB_NOWAIT) { ++ if (!inode_trylock(inode)) ++ return -EAGAIN; ++ } else { ++ inode_lock(inode); ++ } ++ ++ count = zonefs_write_checks(iocb, from); ++ if (count <= 0) { ++ ret = count; ++ goto inode_unlock; ++ } ++ ++ if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { ++ ret = -EINVAL; ++ goto inode_unlock; ++ } ++ ++ /* Enforce sequential writes (append only) in sequential zones */ ++ if (zonefs_zone_is_seq(z)) { ++ mutex_lock(&zi->i_truncate_mutex); ++ if (iocb->ki_pos != z->z_wpoffset) { ++ mutex_unlock(&zi->i_truncate_mutex); ++ ret = -EINVAL; ++ goto inode_unlock; ++ } ++ mutex_unlock(&zi->i_truncate_mutex); ++ append = sync; ++ } ++ ++ if (append) { ++ ret = zonefs_file_dio_append(iocb, from); ++ } else { ++ /* ++ * iomap_dio_rw() may return ENOTBLK if there was an issue with ++ * page invalidation. Overwrite that error code with EBUSY to ++ * be consistent with zonefs_file_dio_append() return value for ++ * similar issues. ++ */ ++ ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, ++ &zonefs_write_dio_ops, 0, NULL, 0); ++ if (ret == -ENOTBLK) ++ ret = -EBUSY; ++ } ++ ++ if (zonefs_zone_is_seq(z) && ++ (ret > 0 || ret == -EIOCBQUEUED)) { ++ if (ret > 0) ++ count = ret; ++ ++ /* ++ * Update the zone write pointer offset assuming the write ++ * operation succeeded. If it did not, the error recovery path ++ * will correct it. Also do active seq file accounting. ++ */ ++ mutex_lock(&zi->i_truncate_mutex); ++ z->z_wpoffset += count; ++ zonefs_inode_account_active(inode); ++ mutex_unlock(&zi->i_truncate_mutex); ++ } ++ ++inode_unlock: ++ inode_unlock(inode); ++ ++ return ret; ++} ++ ++static ssize_t zonefs_file_buffered_write(struct kiocb *iocb, ++ struct iov_iter *from) ++{ ++ struct inode *inode = file_inode(iocb->ki_filp); ++ ssize_t ret; ++ ++ /* ++ * Direct IO writes are mandatory for sequential zone files so that the ++ * write IO issuing order is preserved. ++ */ ++ if (zonefs_inode_is_seq(inode)) ++ return -EIO; ++ ++ if (iocb->ki_flags & IOCB_NOWAIT) { ++ if (!inode_trylock(inode)) ++ return -EAGAIN; ++ } else { ++ inode_lock(inode); ++ } ++ ++ ret = zonefs_write_checks(iocb, from); ++ if (ret <= 0) ++ goto inode_unlock; ++ ++ ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops); ++ if (ret > 0) ++ iocb->ki_pos += ret; ++ else if (ret == -EIO) ++ zonefs_io_error(inode, true); ++ ++inode_unlock: ++ inode_unlock(inode); ++ if (ret > 0) ++ ret = generic_write_sync(iocb, ret); ++ ++ return ret; ++} ++ ++static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ++{ ++ struct inode *inode = file_inode(iocb->ki_filp); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ ++ if (unlikely(IS_IMMUTABLE(inode))) ++ return -EPERM; ++ ++ if (sb_rdonly(inode->i_sb)) ++ return -EROFS; ++ ++ /* Write operations beyond the zone capacity are not allowed */ ++ if (iocb->ki_pos >= z->z_capacity) ++ return -EFBIG; ++ ++ if (iocb->ki_flags & IOCB_DIRECT) { ++ ssize_t ret = zonefs_file_dio_write(iocb, from); ++ ++ if (ret != -ENOTBLK) ++ return ret; ++ } ++ ++ return zonefs_file_buffered_write(iocb, from); ++} ++ ++static int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size, ++ int error, unsigned int flags) ++{ ++ if (error) { ++ zonefs_io_error(file_inode(iocb->ki_filp), false); ++ return error; ++ } ++ ++ return 0; ++} ++ ++static const struct iomap_dio_ops zonefs_read_dio_ops = { ++ .end_io = zonefs_file_read_dio_end_io, ++}; ++ ++static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) ++{ ++ struct inode *inode = file_inode(iocb->ki_filp); ++ struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ struct super_block *sb = inode->i_sb; ++ loff_t isize; ++ ssize_t ret; ++ ++ /* Offline zones cannot be read */ ++ if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777))) ++ return -EPERM; ++ ++ if (iocb->ki_pos >= z->z_capacity) ++ return 0; ++ ++ if (iocb->ki_flags & IOCB_NOWAIT) { ++ if (!inode_trylock_shared(inode)) ++ return -EAGAIN; ++ } else { ++ inode_lock_shared(inode); ++ } ++ ++ /* Limit read operations to written data */ ++ mutex_lock(&zi->i_truncate_mutex); ++ isize = i_size_read(inode); ++ if (iocb->ki_pos >= isize) { ++ mutex_unlock(&zi->i_truncate_mutex); ++ ret = 0; ++ goto inode_unlock; ++ } ++ iov_iter_truncate(to, isize - iocb->ki_pos); ++ mutex_unlock(&zi->i_truncate_mutex); ++ ++ if (iocb->ki_flags & IOCB_DIRECT) { ++ size_t count = iov_iter_count(to); ++ ++ if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { ++ ret = -EINVAL; ++ goto inode_unlock; ++ } ++ file_accessed(iocb->ki_filp); ++ ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops, ++ &zonefs_read_dio_ops, 0, NULL, 0); ++ } else { ++ ret = generic_file_read_iter(iocb, to); ++ if (ret == -EIO) ++ zonefs_io_error(inode, false); ++ } ++ ++inode_unlock: ++ inode_unlock_shared(inode); ++ ++ return ret; ++} ++ ++/* ++ * Write open accounting is done only for sequential files. ++ */ ++static inline bool zonefs_seq_file_need_wro(struct inode *inode, ++ struct file *file) ++{ ++ if (zonefs_inode_is_cnv(inode)) ++ return false; ++ ++ if (!(file->f_mode & FMODE_WRITE)) ++ return false; ++ ++ return true; ++} ++ ++static int zonefs_seq_file_write_open(struct inode *inode) ++{ ++ struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ int ret = 0; ++ ++ mutex_lock(&zi->i_truncate_mutex); ++ ++ if (!zi->i_wr_refcnt) { ++ struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); ++ unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files); ++ ++ if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { ++ ++ if (sbi->s_max_wro_seq_files ++ && wro > sbi->s_max_wro_seq_files) { ++ atomic_dec(&sbi->s_wro_seq_files); ++ ret = -EBUSY; ++ goto unlock; ++ } ++ ++ if (i_size_read(inode) < z->z_capacity) { ++ ret = zonefs_inode_zone_mgmt(inode, ++ REQ_OP_ZONE_OPEN); ++ if (ret) { ++ atomic_dec(&sbi->s_wro_seq_files); ++ goto unlock; ++ } ++ z->z_flags |= ZONEFS_ZONE_OPEN; ++ zonefs_inode_account_active(inode); ++ } ++ } ++ } ++ ++ zi->i_wr_refcnt++; ++ ++unlock: ++ mutex_unlock(&zi->i_truncate_mutex); ++ ++ return ret; ++} ++ ++static int zonefs_file_open(struct inode *inode, struct file *file) ++{ ++ int ret; ++ ++ ret = generic_file_open(inode, file); ++ if (ret) ++ return ret; ++ ++ if (zonefs_seq_file_need_wro(inode, file)) ++ return zonefs_seq_file_write_open(inode); ++ ++ return 0; ++} ++ ++static void zonefs_seq_file_write_close(struct inode *inode) ++{ ++ struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ struct super_block *sb = inode->i_sb; ++ struct zonefs_sb_info *sbi = ZONEFS_SB(sb); ++ int ret = 0; ++ ++ mutex_lock(&zi->i_truncate_mutex); ++ ++ zi->i_wr_refcnt--; ++ if (zi->i_wr_refcnt) ++ goto unlock; ++ ++ /* ++ * The file zone may not be open anymore (e.g. the file was truncated to ++ * its maximum size or it was fully written). For this case, we only ++ * need to decrement the write open count. ++ */ ++ if (z->z_flags & ZONEFS_ZONE_OPEN) { ++ ret = zonefs_inode_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); ++ if (ret) { ++ __zonefs_io_error(inode, false); ++ /* ++ * Leaving zones explicitly open may lead to a state ++ * where most zones cannot be written (zone resources ++ * exhausted). So take preventive action by remounting ++ * read-only. ++ */ ++ if (z->z_flags & ZONEFS_ZONE_OPEN && ++ !(sb->s_flags & SB_RDONLY)) { ++ zonefs_warn(sb, ++ "closing zone at %llu failed %d\n", ++ z->z_sector, ret); ++ zonefs_warn(sb, ++ "remounting filesystem read-only\n"); ++ sb->s_flags |= SB_RDONLY; ++ } ++ goto unlock; ++ } ++ ++ z->z_flags &= ~ZONEFS_ZONE_OPEN; ++ zonefs_inode_account_active(inode); ++ } ++ ++ atomic_dec(&sbi->s_wro_seq_files); ++ ++unlock: ++ mutex_unlock(&zi->i_truncate_mutex); ++} ++ ++static int zonefs_file_release(struct inode *inode, struct file *file) ++{ ++ /* ++ * If we explicitly open a zone we must close it again as well, but the ++ * zone management operation can fail (either due to an IO error or as ++ * the zone has gone offline or read-only). Make sure we don't fail the ++ * close(2) for user-space. ++ */ ++ if (zonefs_seq_file_need_wro(inode, file)) ++ zonefs_seq_file_write_close(inode); ++ ++ return 0; ++} ++ ++const struct file_operations zonefs_file_operations = { ++ .open = zonefs_file_open, ++ .release = zonefs_file_release, ++ .fsync = zonefs_file_fsync, ++ .mmap = zonefs_file_mmap, ++ .llseek = zonefs_file_llseek, ++ .read_iter = zonefs_file_read_iter, ++ .write_iter = zonefs_file_write_iter, ++ .splice_read = generic_file_splice_read, ++ .splice_write = iter_file_splice_write, ++ .iopoll = iocb_bio_iopoll, ++}; +diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c +index a9c5c3f720adf..270ded209dde5 100644 +--- a/fs/zonefs/super.c ++++ b/fs/zonefs/super.c +@@ -28,33 +28,47 @@ + #include "trace.h" + + /* +- * Manage the active zone count. Called with zi->i_truncate_mutex held. ++ * Get the name of a zone group directory. + */ +-static void zonefs_account_active(struct inode *inode) ++static const char *zonefs_zgroup_name(enum zonefs_ztype ztype) + { +- struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); +- struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ switch (ztype) { ++ case ZONEFS_ZTYPE_CNV: ++ return "cnv"; ++ case ZONEFS_ZTYPE_SEQ: ++ return "seq"; ++ default: ++ WARN_ON_ONCE(1); ++ return "???"; ++ } ++} + +- lockdep_assert_held(&zi->i_truncate_mutex); ++/* ++ * Manage the active zone count. ++ */ ++static void zonefs_account_active(struct super_block *sb, ++ struct zonefs_zone *z) ++{ ++ struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + +- if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) ++ if (zonefs_zone_is_cnv(z)) + return; + + /* + * For zones that transitioned to the offline or readonly condition, + * we only need to clear the active state. + */ +- if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) ++ if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) + goto out; + + /* + * If the zone is active, that is, if it is explicitly open or + * partially written, check if it was already accounted as active. + */ +- if ((zi->i_flags & ZONEFS_ZONE_OPEN) || +- (zi->i_wpoffset > 0 && zi->i_wpoffset < zi->i_max_size)) { +- if (!(zi->i_flags & ZONEFS_ZONE_ACTIVE)) { +- zi->i_flags |= ZONEFS_ZONE_ACTIVE; ++ if ((z->z_flags & ZONEFS_ZONE_OPEN) || ++ (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) { ++ if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) { ++ z->z_flags |= ZONEFS_ZONE_ACTIVE; + atomic_inc(&sbi->s_active_seq_files); + } + return; +@@ -62,18 +76,29 @@ static void zonefs_account_active(struct inode *inode) + + out: + /* The zone is not active. If it was, update the active count */ +- if (zi->i_flags & ZONEFS_ZONE_ACTIVE) { +- zi->i_flags &= ~ZONEFS_ZONE_ACTIVE; ++ if (z->z_flags & ZONEFS_ZONE_ACTIVE) { ++ z->z_flags &= ~ZONEFS_ZONE_ACTIVE; + atomic_dec(&sbi->s_active_seq_files); + } + } + +-static inline int zonefs_zone_mgmt(struct inode *inode, enum req_op op) ++/* ++ * Manage the active zone count. Called with zi->i_truncate_mutex held. ++ */ ++void zonefs_inode_account_active(struct inode *inode) + { +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- int ret; ++ lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); + +- lockdep_assert_held(&zi->i_truncate_mutex); ++ return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode)); ++} ++ ++/* ++ * Execute a zone management operation. ++ */ ++static int zonefs_zone_mgmt(struct super_block *sb, ++ struct zonefs_zone *z, enum req_op op) ++{ ++ int ret; + + /* + * With ZNS drives, closing an explicitly open zone that has not been +@@ -83,201 +108,49 @@ static inline int zonefs_zone_mgmt(struct inode *inode, enum req_op op) + * are exceeded, make sure that the zone does not remain active by + * resetting it. + */ +- if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset) ++ if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset) + op = REQ_OP_ZONE_RESET; + +- trace_zonefs_zone_mgmt(inode, op); +- ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector, +- zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS); ++ trace_zonefs_zone_mgmt(sb, z, op); ++ ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector, ++ z->z_size >> SECTOR_SHIFT, GFP_NOFS); + if (ret) { +- zonefs_err(inode->i_sb, ++ zonefs_err(sb, + "Zone management operation %s at %llu failed %d\n", +- blk_op_str(op), zi->i_zsector, ret); ++ blk_op_str(op), z->z_sector, ret); + return ret; + } + + return 0; + } + +-static inline void zonefs_i_size_write(struct inode *inode, loff_t isize) ++int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op) + { +- struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); + +- i_size_write(inode, isize); +- /* +- * A full zone is no longer open/active and does not need +- * explicit closing. +- */ +- if (isize >= zi->i_max_size) { +- struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); +- +- if (zi->i_flags & ZONEFS_ZONE_ACTIVE) +- atomic_dec(&sbi->s_active_seq_files); +- zi->i_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE); +- } ++ return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op); + } + +-static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset, +- loff_t length, unsigned int flags, +- struct iomap *iomap, struct iomap *srcmap) ++void zonefs_i_size_write(struct inode *inode, loff_t isize) + { +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- struct super_block *sb = inode->i_sb; +- loff_t isize; ++ struct zonefs_zone *z = zonefs_inode_zone(inode); + +- /* +- * All blocks are always mapped below EOF. If reading past EOF, +- * act as if there is a hole up to the file maximum size. +- */ +- mutex_lock(&zi->i_truncate_mutex); +- iomap->bdev = inode->i_sb->s_bdev; +- iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); +- isize = i_size_read(inode); +- if (iomap->offset >= isize) { +- iomap->type = IOMAP_HOLE; +- iomap->addr = IOMAP_NULL_ADDR; +- iomap->length = length; +- } else { +- iomap->type = IOMAP_MAPPED; +- iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; +- iomap->length = isize - iomap->offset; +- } +- mutex_unlock(&zi->i_truncate_mutex); +- +- trace_zonefs_iomap_begin(inode, iomap); +- +- return 0; +-} +- +-static const struct iomap_ops zonefs_read_iomap_ops = { +- .iomap_begin = zonefs_read_iomap_begin, +-}; +- +-static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset, +- loff_t length, unsigned int flags, +- struct iomap *iomap, struct iomap *srcmap) +-{ +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- struct super_block *sb = inode->i_sb; +- loff_t isize; +- +- /* All write I/Os should always be within the file maximum size */ +- if (WARN_ON_ONCE(offset + length > zi->i_max_size)) +- return -EIO; +- +- /* +- * Sequential zones can only accept direct writes. This is already +- * checked when writes are issued, so warn if we see a page writeback +- * operation. +- */ +- if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ && +- !(flags & IOMAP_DIRECT))) +- return -EIO; ++ i_size_write(inode, isize); + + /* +- * For conventional zones, all blocks are always mapped. For sequential +- * zones, all blocks after always mapped below the inode size (zone +- * write pointer) and unwriten beyond. ++ * A full zone is no longer open/active and does not need ++ * explicit closing. + */ +- mutex_lock(&zi->i_truncate_mutex); +- iomap->bdev = inode->i_sb->s_bdev; +- iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); +- iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; +- isize = i_size_read(inode); +- if (iomap->offset >= isize) { +- iomap->type = IOMAP_UNWRITTEN; +- iomap->length = zi->i_max_size - iomap->offset; +- } else { +- iomap->type = IOMAP_MAPPED; +- iomap->length = isize - iomap->offset; +- } +- mutex_unlock(&zi->i_truncate_mutex); +- +- trace_zonefs_iomap_begin(inode, iomap); +- +- return 0; +-} +- +-static const struct iomap_ops zonefs_write_iomap_ops = { +- .iomap_begin = zonefs_write_iomap_begin, +-}; +- +-static int zonefs_read_folio(struct file *unused, struct folio *folio) +-{ +- return iomap_read_folio(folio, &zonefs_read_iomap_ops); +-} +- +-static void zonefs_readahead(struct readahead_control *rac) +-{ +- iomap_readahead(rac, &zonefs_read_iomap_ops); +-} +- +-/* +- * Map blocks for page writeback. This is used only on conventional zone files, +- * which implies that the page range can only be within the fixed inode size. +- */ +-static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, +- struct inode *inode, loff_t offset) +-{ +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- +- if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV)) +- return -EIO; +- if (WARN_ON_ONCE(offset >= i_size_read(inode))) +- return -EIO; +- +- /* If the mapping is already OK, nothing needs to be done */ +- if (offset >= wpc->iomap.offset && +- offset < wpc->iomap.offset + wpc->iomap.length) +- return 0; +- +- return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset, +- IOMAP_WRITE, &wpc->iomap, NULL); +-} +- +-static const struct iomap_writeback_ops zonefs_writeback_ops = { +- .map_blocks = zonefs_write_map_blocks, +-}; +- +-static int zonefs_writepages(struct address_space *mapping, +- struct writeback_control *wbc) +-{ +- struct iomap_writepage_ctx wpc = { }; +- +- return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops); +-} +- +-static int zonefs_swap_activate(struct swap_info_struct *sis, +- struct file *swap_file, sector_t *span) +-{ +- struct inode *inode = file_inode(swap_file); +- struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ if (isize >= z->z_capacity) { ++ struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); + +- if (zi->i_ztype != ZONEFS_ZTYPE_CNV) { +- zonefs_err(inode->i_sb, +- "swap file: not a conventional zone file\n"); +- return -EINVAL; ++ if (z->z_flags & ZONEFS_ZONE_ACTIVE) ++ atomic_dec(&sbi->s_active_seq_files); ++ z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE); + } +- +- return iomap_swapfile_activate(sis, swap_file, span, +- &zonefs_read_iomap_ops); + } + +-static const struct address_space_operations zonefs_file_aops = { +- .read_folio = zonefs_read_folio, +- .readahead = zonefs_readahead, +- .writepages = zonefs_writepages, +- .dirty_folio = filemap_dirty_folio, +- .release_folio = iomap_release_folio, +- .invalidate_folio = iomap_invalidate_folio, +- .migrate_folio = filemap_migrate_folio, +- .is_partially_uptodate = iomap_is_partially_uptodate, +- .error_remove_page = generic_error_remove_page, +- .direct_IO = noop_direct_IO, +- .swap_activate = zonefs_swap_activate, +-}; +- +-static void zonefs_update_stats(struct inode *inode, loff_t new_isize) ++void zonefs_update_stats(struct inode *inode, loff_t new_isize) + { + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); +@@ -310,63 +183,68 @@ static void zonefs_update_stats(struct inode *inode, loff_t new_isize) + } + + /* +- * Check a zone condition and adjust its file inode access permissions for +- * offline and readonly zones. Return the inode size corresponding to the +- * amount of readable data in the zone. ++ * Check a zone condition. Return the amount of written (and still readable) ++ * data in the zone. + */ +-static loff_t zonefs_check_zone_condition(struct inode *inode, +- struct blk_zone *zone, bool warn, +- bool mount) ++static loff_t zonefs_check_zone_condition(struct super_block *sb, ++ struct zonefs_zone *z, ++ struct blk_zone *zone) + { +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- + switch (zone->cond) { + case BLK_ZONE_COND_OFFLINE: +- /* +- * Dead zone: make the inode immutable, disable all accesses +- * and set the file size to 0 (zone wp set to zone start). +- */ +- if (warn) +- zonefs_warn(inode->i_sb, "inode %lu: offline zone\n", +- inode->i_ino); +- inode->i_flags |= S_IMMUTABLE; +- inode->i_mode &= ~0777; +- zone->wp = zone->start; +- zi->i_flags |= ZONEFS_ZONE_OFFLINE; ++ zonefs_warn(sb, "Zone %llu: offline zone\n", ++ z->z_sector); ++ z->z_flags |= ZONEFS_ZONE_OFFLINE; + return 0; + case BLK_ZONE_COND_READONLY: + /* +- * The write pointer of read-only zones is invalid. If such a +- * zone is found during mount, the file size cannot be retrieved +- * so we treat the zone as offline (mount == true case). +- * Otherwise, keep the file size as it was when last updated +- * so that the user can recover data. In both cases, writes are +- * always disabled for the zone. ++ * The write pointer of read-only zones is invalid, so we cannot ++ * determine the zone wpoffset (inode size). We thus keep the ++ * zone wpoffset as is, which leads to an empty file ++ * (wpoffset == 0) on mount. For a runtime error, this keeps ++ * the inode size as it was when last updated so that the user ++ * can recover data. + */ +- if (warn) +- zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n", +- inode->i_ino); +- inode->i_flags |= S_IMMUTABLE; +- if (mount) { +- zone->cond = BLK_ZONE_COND_OFFLINE; +- inode->i_mode &= ~0777; +- zone->wp = zone->start; +- zi->i_flags |= ZONEFS_ZONE_OFFLINE; +- return 0; +- } +- zi->i_flags |= ZONEFS_ZONE_READONLY; +- inode->i_mode &= ~0222; +- return i_size_read(inode); ++ zonefs_warn(sb, "Zone %llu: read-only zone\n", ++ z->z_sector); ++ z->z_flags |= ZONEFS_ZONE_READONLY; ++ if (zonefs_zone_is_cnv(z)) ++ return z->z_capacity; ++ return z->z_wpoffset; + case BLK_ZONE_COND_FULL: + /* The write pointer of full zones is invalid. */ +- return zi->i_max_size; ++ return z->z_capacity; + default: +- if (zi->i_ztype == ZONEFS_ZTYPE_CNV) +- return zi->i_max_size; ++ if (zonefs_zone_is_cnv(z)) ++ return z->z_capacity; + return (zone->wp - zone->start) << SECTOR_SHIFT; + } + } + ++/* ++ * Check a zone condition and adjust its inode access permissions for ++ * offline and readonly zones. ++ */ ++static void zonefs_inode_update_mode(struct inode *inode) ++{ ++ struct zonefs_zone *z = zonefs_inode_zone(inode); ++ ++ if (z->z_flags & ZONEFS_ZONE_OFFLINE) { ++ /* Offline zones cannot be read nor written */ ++ inode->i_flags |= S_IMMUTABLE; ++ inode->i_mode &= ~0777; ++ } else if (z->z_flags & ZONEFS_ZONE_READONLY) { ++ /* Readonly zones cannot be written */ ++ inode->i_flags |= S_IMMUTABLE; ++ if (z->z_flags & ZONEFS_ZONE_INIT_MODE) ++ inode->i_mode &= ~0777; ++ else ++ inode->i_mode &= ~0222; ++ } ++ ++ z->z_flags &= ~ZONEFS_ZONE_INIT_MODE; ++} ++ + struct zonefs_ioerr_data { + struct inode *inode; + bool write; +@@ -377,7 +255,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + { + struct zonefs_ioerr_data *err = data; + struct inode *inode = err->inode; +- struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + loff_t isize, data_size; +@@ -388,10 +266,9 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + * as there is no inconsistency between the inode size and the amount of + * data writen in the zone (data_size). + */ +- data_size = zonefs_check_zone_condition(inode, zone, true, false); ++ data_size = zonefs_check_zone_condition(sb, z, zone); + isize = i_size_read(inode); +- if (zone->cond != BLK_ZONE_COND_OFFLINE && +- zone->cond != BLK_ZONE_COND_READONLY && ++ if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && + !err->write && isize == data_size) + return 0; + +@@ -414,8 +291,9 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + * In all cases, warn about inode size inconsistency and handle the + * IO error according to the zone condition and to the mount options. + */ +- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && isize != data_size) +- zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", ++ if (zonefs_zone_is_seq(z) && isize != data_size) ++ zonefs_warn(sb, ++ "inode %lu: invalid size %lld (should be %lld)\n", + inode->i_ino, isize, data_size); + + /* +@@ -424,24 +302,22 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + * zone condition to read-only and offline respectively, as if the + * condition was signaled by the hardware. + */ +- if (zone->cond == BLK_ZONE_COND_OFFLINE || +- sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) { ++ if ((z->z_flags & ZONEFS_ZONE_OFFLINE) || ++ (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) { + zonefs_warn(sb, "inode %lu: read/write access disabled\n", + inode->i_ino); +- if (zone->cond != BLK_ZONE_COND_OFFLINE) { +- zone->cond = BLK_ZONE_COND_OFFLINE; +- data_size = zonefs_check_zone_condition(inode, zone, +- false, false); +- } +- } else if (zone->cond == BLK_ZONE_COND_READONLY || +- sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) { ++ if (!(z->z_flags & ZONEFS_ZONE_OFFLINE)) ++ z->z_flags |= ZONEFS_ZONE_OFFLINE; ++ zonefs_inode_update_mode(inode); ++ data_size = 0; ++ } else if ((z->z_flags & ZONEFS_ZONE_READONLY) || ++ (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) { + zonefs_warn(sb, "inode %lu: write access disabled\n", + inode->i_ino); +- if (zone->cond != BLK_ZONE_COND_READONLY) { +- zone->cond = BLK_ZONE_COND_READONLY; +- data_size = zonefs_check_zone_condition(inode, zone, +- false, false); +- } ++ if (!(z->z_flags & ZONEFS_ZONE_READONLY)) ++ z->z_flags |= ZONEFS_ZONE_READONLY; ++ zonefs_inode_update_mode(inode); ++ data_size = isize; + } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO && + data_size > isize) { + /* Do not expose garbage data */ +@@ -455,9 +331,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + * close of the zone when the inode file is closed. + */ + if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) && +- (zone->cond == BLK_ZONE_COND_OFFLINE || +- zone->cond == BLK_ZONE_COND_READONLY)) +- zi->i_flags &= ~ZONEFS_ZONE_OPEN; ++ (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE))) ++ z->z_flags &= ~ZONEFS_ZONE_OPEN; + + /* + * If error=remount-ro was specified, any error result in remounting +@@ -474,8 +349,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + */ + zonefs_update_stats(inode, data_size); + zonefs_i_size_write(inode, data_size); +- zi->i_wpoffset = data_size; +- zonefs_account_active(inode); ++ z->z_wpoffset = data_size; ++ zonefs_inode_account_active(inode); + + return 0; + } +@@ -487,9 +362,9 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + * eventually correct the file size and zonefs inode write pointer offset + * (which can be out of sync with the drive due to partial write failures). + */ +-static void __zonefs_io_error(struct inode *inode, bool write) ++void __zonefs_io_error(struct inode *inode, bool write) + { +- struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ struct zonefs_zone *z = zonefs_inode_zone(inode); + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + unsigned int noio_flag; +@@ -505,8 +380,8 @@ static void __zonefs_io_error(struct inode *inode, bool write) + * files with aggregated conventional zones, for which the inode zone + * size is always larger than the device zone size. + */ +- if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev)) +- nr_zones = zi->i_zone_size >> ++ if (z->z_size > bdev_zone_sectors(sb->s_bdev)) ++ nr_zones = z->z_size >> + (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + + /* +@@ -518,7 +393,7 @@ static void __zonefs_io_error(struct inode *inode, bool write) + * the GFP_NOIO context avoids both problems. + */ + noio_flag = memalloc_noio_save(); +- ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, nr_zones, ++ ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones, + zonefs_io_error_cb, &err); + if (ret != nr_zones) + zonefs_err(sb, "Get inode %lu zone information failed %d\n", +@@ -526,749 +401,6 @@ static void __zonefs_io_error(struct inode *inode, bool write) + memalloc_noio_restore(noio_flag); + } + +-static void zonefs_io_error(struct inode *inode, bool write) +-{ +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- +- mutex_lock(&zi->i_truncate_mutex); +- __zonefs_io_error(inode, write); +- mutex_unlock(&zi->i_truncate_mutex); +-} +- +-static int zonefs_file_truncate(struct inode *inode, loff_t isize) +-{ +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- loff_t old_isize; +- enum req_op op; +- int ret = 0; +- +- /* +- * Only sequential zone files can be truncated and truncation is allowed +- * only down to a 0 size, which is equivalent to a zone reset, and to +- * the maximum file size, which is equivalent to a zone finish. +- */ +- if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) +- return -EPERM; +- +- if (!isize) +- op = REQ_OP_ZONE_RESET; +- else if (isize == zi->i_max_size) +- op = REQ_OP_ZONE_FINISH; +- else +- return -EPERM; +- +- inode_dio_wait(inode); +- +- /* Serialize against page faults */ +- filemap_invalidate_lock(inode->i_mapping); +- +- /* Serialize against zonefs_iomap_begin() */ +- mutex_lock(&zi->i_truncate_mutex); +- +- old_isize = i_size_read(inode); +- if (isize == old_isize) +- goto unlock; +- +- ret = zonefs_zone_mgmt(inode, op); +- if (ret) +- goto unlock; +- +- /* +- * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set, +- * take care of open zones. +- */ +- if (zi->i_flags & ZONEFS_ZONE_OPEN) { +- /* +- * Truncating a zone to EMPTY or FULL is the equivalent of +- * closing the zone. For a truncation to 0, we need to +- * re-open the zone to ensure new writes can be processed. +- * For a truncation to the maximum file size, the zone is +- * closed and writes cannot be accepted anymore, so clear +- * the open flag. +- */ +- if (!isize) +- ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN); +- else +- zi->i_flags &= ~ZONEFS_ZONE_OPEN; +- } +- +- zonefs_update_stats(inode, isize); +- truncate_setsize(inode, isize); +- zi->i_wpoffset = isize; +- zonefs_account_active(inode); +- +-unlock: +- mutex_unlock(&zi->i_truncate_mutex); +- filemap_invalidate_unlock(inode->i_mapping); +- +- return ret; +-} +- +-static int zonefs_inode_setattr(struct user_namespace *mnt_userns, +- struct dentry *dentry, struct iattr *iattr) +-{ +- struct inode *inode = d_inode(dentry); +- int ret; +- +- if (unlikely(IS_IMMUTABLE(inode))) +- return -EPERM; +- +- ret = setattr_prepare(&init_user_ns, dentry, iattr); +- if (ret) +- return ret; +- +- /* +- * Since files and directories cannot be created nor deleted, do not +- * allow setting any write attributes on the sub-directories grouping +- * files by zone type. +- */ +- if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) && +- (iattr->ia_mode & 0222)) +- return -EPERM; +- +- if (((iattr->ia_valid & ATTR_UID) && +- !uid_eq(iattr->ia_uid, inode->i_uid)) || +- ((iattr->ia_valid & ATTR_GID) && +- !gid_eq(iattr->ia_gid, inode->i_gid))) { +- ret = dquot_transfer(mnt_userns, inode, iattr); +- if (ret) +- return ret; +- } +- +- if (iattr->ia_valid & ATTR_SIZE) { +- ret = zonefs_file_truncate(inode, iattr->ia_size); +- if (ret) +- return ret; +- } +- +- setattr_copy(&init_user_ns, inode, iattr); +- +- return 0; +-} +- +-static const struct inode_operations zonefs_file_inode_operations = { +- .setattr = zonefs_inode_setattr, +-}; +- +-static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end, +- int datasync) +-{ +- struct inode *inode = file_inode(file); +- int ret = 0; +- +- if (unlikely(IS_IMMUTABLE(inode))) +- return -EPERM; +- +- /* +- * Since only direct writes are allowed in sequential files, page cache +- * flush is needed only for conventional zone files. +- */ +- if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV) +- ret = file_write_and_wait_range(file, start, end); +- if (!ret) +- ret = blkdev_issue_flush(inode->i_sb->s_bdev); +- +- if (ret) +- zonefs_io_error(inode, true); +- +- return ret; +-} +- +-static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) +-{ +- struct inode *inode = file_inode(vmf->vma->vm_file); +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- vm_fault_t ret; +- +- if (unlikely(IS_IMMUTABLE(inode))) +- return VM_FAULT_SIGBUS; +- +- /* +- * Sanity check: only conventional zone files can have shared +- * writeable mappings. +- */ +- if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV)) +- return VM_FAULT_NOPAGE; +- +- sb_start_pagefault(inode->i_sb); +- file_update_time(vmf->vma->vm_file); +- +- /* Serialize against truncates */ +- filemap_invalidate_lock_shared(inode->i_mapping); +- ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops); +- filemap_invalidate_unlock_shared(inode->i_mapping); +- +- sb_end_pagefault(inode->i_sb); +- return ret; +-} +- +-static const struct vm_operations_struct zonefs_file_vm_ops = { +- .fault = filemap_fault, +- .map_pages = filemap_map_pages, +- .page_mkwrite = zonefs_filemap_page_mkwrite, +-}; +- +-static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma) +-{ +- /* +- * Conventional zones accept random writes, so their files can support +- * shared writable mappings. For sequential zone files, only read +- * mappings are possible since there are no guarantees for write +- * ordering between msync() and page cache writeback. +- */ +- if (ZONEFS_I(file_inode(file))->i_ztype == ZONEFS_ZTYPE_SEQ && +- (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) +- return -EINVAL; +- +- file_accessed(file); +- vma->vm_ops = &zonefs_file_vm_ops; +- +- return 0; +-} +- +-static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence) +-{ +- loff_t isize = i_size_read(file_inode(file)); +- +- /* +- * Seeks are limited to below the zone size for conventional zones +- * and below the zone write pointer for sequential zones. In both +- * cases, this limit is the inode size. +- */ +- return generic_file_llseek_size(file, offset, whence, isize, isize); +-} +- +-static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, +- int error, unsigned int flags) +-{ +- struct inode *inode = file_inode(iocb->ki_filp); +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- +- if (error) { +- zonefs_io_error(inode, true); +- return error; +- } +- +- if (size && zi->i_ztype != ZONEFS_ZTYPE_CNV) { +- /* +- * Note that we may be seeing completions out of order, +- * but that is not a problem since a write completed +- * successfully necessarily means that all preceding writes +- * were also successful. So we can safely increase the inode +- * size to the write end location. +- */ +- mutex_lock(&zi->i_truncate_mutex); +- if (i_size_read(inode) < iocb->ki_pos + size) { +- zonefs_update_stats(inode, iocb->ki_pos + size); +- zonefs_i_size_write(inode, iocb->ki_pos + size); +- } +- mutex_unlock(&zi->i_truncate_mutex); +- } +- +- return 0; +-} +- +-static const struct iomap_dio_ops zonefs_write_dio_ops = { +- .end_io = zonefs_file_write_dio_end_io, +-}; +- +-static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) +-{ +- struct inode *inode = file_inode(iocb->ki_filp); +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- struct block_device *bdev = inode->i_sb->s_bdev; +- unsigned int max = bdev_max_zone_append_sectors(bdev); +- struct bio *bio; +- ssize_t size; +- int nr_pages; +- ssize_t ret; +- +- max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); +- iov_iter_truncate(from, max); +- +- nr_pages = iov_iter_npages(from, BIO_MAX_VECS); +- if (!nr_pages) +- return 0; +- +- bio = bio_alloc(bdev, nr_pages, +- REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS); +- bio->bi_iter.bi_sector = zi->i_zsector; +- bio->bi_ioprio = iocb->ki_ioprio; +- if (iocb_is_dsync(iocb)) +- bio->bi_opf |= REQ_FUA; +- +- ret = bio_iov_iter_get_pages(bio, from); +- if (unlikely(ret)) +- goto out_release; +- +- size = bio->bi_iter.bi_size; +- task_io_account_write(size); +- +- if (iocb->ki_flags & IOCB_HIPRI) +- bio_set_polled(bio, iocb); +- +- ret = submit_bio_wait(bio); +- +- /* +- * If the file zone was written underneath the file system, the zone +- * write pointer may not be where we expect it to be, but the zone +- * append write can still succeed. So check manually that we wrote where +- * we intended to, that is, at zi->i_wpoffset. +- */ +- if (!ret) { +- sector_t wpsector = +- zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT); +- +- if (bio->bi_iter.bi_sector != wpsector) { +- zonefs_warn(inode->i_sb, +- "Corrupted write pointer %llu for zone at %llu\n", +- wpsector, zi->i_zsector); +- ret = -EIO; +- } +- } +- +- zonefs_file_write_dio_end_io(iocb, size, ret, 0); +- trace_zonefs_file_dio_append(inode, size, ret); +- +-out_release: +- bio_release_pages(bio, false); +- bio_put(bio); +- +- if (ret >= 0) { +- iocb->ki_pos += size; +- return size; +- } +- +- return ret; +-} +- +-/* +- * Do not exceed the LFS limits nor the file zone size. If pos is under the +- * limit it becomes a short access. If it exceeds the limit, return -EFBIG. +- */ +-static loff_t zonefs_write_check_limits(struct file *file, loff_t pos, +- loff_t count) +-{ +- struct inode *inode = file_inode(file); +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- loff_t limit = rlimit(RLIMIT_FSIZE); +- loff_t max_size = zi->i_max_size; +- +- if (limit != RLIM_INFINITY) { +- if (pos >= limit) { +- send_sig(SIGXFSZ, current, 0); +- return -EFBIG; +- } +- count = min(count, limit - pos); +- } +- +- if (!(file->f_flags & O_LARGEFILE)) +- max_size = min_t(loff_t, MAX_NON_LFS, max_size); +- +- if (unlikely(pos >= max_size)) +- return -EFBIG; +- +- return min(count, max_size - pos); +-} +- +-static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from) +-{ +- struct file *file = iocb->ki_filp; +- struct inode *inode = file_inode(file); +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- loff_t count; +- +- if (IS_SWAPFILE(inode)) +- return -ETXTBSY; +- +- if (!iov_iter_count(from)) +- return 0; +- +- if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) +- return -EINVAL; +- +- if (iocb->ki_flags & IOCB_APPEND) { +- if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) +- return -EINVAL; +- mutex_lock(&zi->i_truncate_mutex); +- iocb->ki_pos = zi->i_wpoffset; +- mutex_unlock(&zi->i_truncate_mutex); +- } +- +- count = zonefs_write_check_limits(file, iocb->ki_pos, +- iov_iter_count(from)); +- if (count < 0) +- return count; +- +- iov_iter_truncate(from, count); +- return iov_iter_count(from); +-} +- +-/* +- * Handle direct writes. For sequential zone files, this is the only possible +- * write path. For these files, check that the user is issuing writes +- * sequentially from the end of the file. This code assumes that the block layer +- * delivers write requests to the device in sequential order. This is always the +- * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE +- * elevator feature is being used (e.g. mq-deadline). The block layer always +- * automatically select such an elevator for zoned block devices during the +- * device initialization. +- */ +-static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) +-{ +- struct inode *inode = file_inode(iocb->ki_filp); +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- struct super_block *sb = inode->i_sb; +- bool sync = is_sync_kiocb(iocb); +- bool append = false; +- ssize_t ret, count; +- +- /* +- * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT +- * as this can cause write reordering (e.g. the first aio gets EAGAIN +- * on the inode lock but the second goes through but is now unaligned). +- */ +- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync && +- (iocb->ki_flags & IOCB_NOWAIT)) +- return -EOPNOTSUPP; +- +- if (iocb->ki_flags & IOCB_NOWAIT) { +- if (!inode_trylock(inode)) +- return -EAGAIN; +- } else { +- inode_lock(inode); +- } +- +- count = zonefs_write_checks(iocb, from); +- if (count <= 0) { +- ret = count; +- goto inode_unlock; +- } +- +- if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { +- ret = -EINVAL; +- goto inode_unlock; +- } +- +- /* Enforce sequential writes (append only) in sequential zones */ +- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) { +- mutex_lock(&zi->i_truncate_mutex); +- if (iocb->ki_pos != zi->i_wpoffset) { +- mutex_unlock(&zi->i_truncate_mutex); +- ret = -EINVAL; +- goto inode_unlock; +- } +- mutex_unlock(&zi->i_truncate_mutex); +- append = sync; +- } +- +- if (append) +- ret = zonefs_file_dio_append(iocb, from); +- else +- ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, +- &zonefs_write_dio_ops, 0, NULL, 0); +- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && +- (ret > 0 || ret == -EIOCBQUEUED)) { +- if (ret > 0) +- count = ret; +- +- /* +- * Update the zone write pointer offset assuming the write +- * operation succeeded. If it did not, the error recovery path +- * will correct it. Also do active seq file accounting. +- */ +- mutex_lock(&zi->i_truncate_mutex); +- zi->i_wpoffset += count; +- zonefs_account_active(inode); +- mutex_unlock(&zi->i_truncate_mutex); +- } +- +-inode_unlock: +- inode_unlock(inode); +- +- return ret; +-} +- +-static ssize_t zonefs_file_buffered_write(struct kiocb *iocb, +- struct iov_iter *from) +-{ +- struct inode *inode = file_inode(iocb->ki_filp); +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- ssize_t ret; +- +- /* +- * Direct IO writes are mandatory for sequential zone files so that the +- * write IO issuing order is preserved. +- */ +- if (zi->i_ztype != ZONEFS_ZTYPE_CNV) +- return -EIO; +- +- if (iocb->ki_flags & IOCB_NOWAIT) { +- if (!inode_trylock(inode)) +- return -EAGAIN; +- } else { +- inode_lock(inode); +- } +- +- ret = zonefs_write_checks(iocb, from); +- if (ret <= 0) +- goto inode_unlock; +- +- ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops); +- if (ret > 0) +- iocb->ki_pos += ret; +- else if (ret == -EIO) +- zonefs_io_error(inode, true); +- +-inode_unlock: +- inode_unlock(inode); +- if (ret > 0) +- ret = generic_write_sync(iocb, ret); +- +- return ret; +-} +- +-static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +-{ +- struct inode *inode = file_inode(iocb->ki_filp); +- +- if (unlikely(IS_IMMUTABLE(inode))) +- return -EPERM; +- +- if (sb_rdonly(inode->i_sb)) +- return -EROFS; +- +- /* Write operations beyond the zone size are not allowed */ +- if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size) +- return -EFBIG; +- +- if (iocb->ki_flags & IOCB_DIRECT) { +- ssize_t ret = zonefs_file_dio_write(iocb, from); +- if (ret != -ENOTBLK) +- return ret; +- } +- +- return zonefs_file_buffered_write(iocb, from); +-} +- +-static int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size, +- int error, unsigned int flags) +-{ +- if (error) { +- zonefs_io_error(file_inode(iocb->ki_filp), false); +- return error; +- } +- +- return 0; +-} +- +-static const struct iomap_dio_ops zonefs_read_dio_ops = { +- .end_io = zonefs_file_read_dio_end_io, +-}; +- +-static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +-{ +- struct inode *inode = file_inode(iocb->ki_filp); +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- struct super_block *sb = inode->i_sb; +- loff_t isize; +- ssize_t ret; +- +- /* Offline zones cannot be read */ +- if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777))) +- return -EPERM; +- +- if (iocb->ki_pos >= zi->i_max_size) +- return 0; +- +- if (iocb->ki_flags & IOCB_NOWAIT) { +- if (!inode_trylock_shared(inode)) +- return -EAGAIN; +- } else { +- inode_lock_shared(inode); +- } +- +- /* Limit read operations to written data */ +- mutex_lock(&zi->i_truncate_mutex); +- isize = i_size_read(inode); +- if (iocb->ki_pos >= isize) { +- mutex_unlock(&zi->i_truncate_mutex); +- ret = 0; +- goto inode_unlock; +- } +- iov_iter_truncate(to, isize - iocb->ki_pos); +- mutex_unlock(&zi->i_truncate_mutex); +- +- if (iocb->ki_flags & IOCB_DIRECT) { +- size_t count = iov_iter_count(to); +- +- if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { +- ret = -EINVAL; +- goto inode_unlock; +- } +- file_accessed(iocb->ki_filp); +- ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops, +- &zonefs_read_dio_ops, 0, NULL, 0); +- } else { +- ret = generic_file_read_iter(iocb, to); +- if (ret == -EIO) +- zonefs_io_error(inode, false); +- } +- +-inode_unlock: +- inode_unlock_shared(inode); +- +- return ret; +-} +- +-/* +- * Write open accounting is done only for sequential files. +- */ +-static inline bool zonefs_seq_file_need_wro(struct inode *inode, +- struct file *file) +-{ +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- +- if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) +- return false; +- +- if (!(file->f_mode & FMODE_WRITE)) +- return false; +- +- return true; +-} +- +-static int zonefs_seq_file_write_open(struct inode *inode) +-{ +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- int ret = 0; +- +- mutex_lock(&zi->i_truncate_mutex); +- +- if (!zi->i_wr_refcnt) { +- struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); +- unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files); +- +- if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { +- +- if (sbi->s_max_wro_seq_files +- && wro > sbi->s_max_wro_seq_files) { +- atomic_dec(&sbi->s_wro_seq_files); +- ret = -EBUSY; +- goto unlock; +- } +- +- if (i_size_read(inode) < zi->i_max_size) { +- ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN); +- if (ret) { +- atomic_dec(&sbi->s_wro_seq_files); +- goto unlock; +- } +- zi->i_flags |= ZONEFS_ZONE_OPEN; +- zonefs_account_active(inode); +- } +- } +- } +- +- zi->i_wr_refcnt++; +- +-unlock: +- mutex_unlock(&zi->i_truncate_mutex); +- +- return ret; +-} +- +-static int zonefs_file_open(struct inode *inode, struct file *file) +-{ +- int ret; +- +- ret = generic_file_open(inode, file); +- if (ret) +- return ret; +- +- if (zonefs_seq_file_need_wro(inode, file)) +- return zonefs_seq_file_write_open(inode); +- +- return 0; +-} +- +-static void zonefs_seq_file_write_close(struct inode *inode) +-{ +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- struct super_block *sb = inode->i_sb; +- struct zonefs_sb_info *sbi = ZONEFS_SB(sb); +- int ret = 0; +- +- mutex_lock(&zi->i_truncate_mutex); +- +- zi->i_wr_refcnt--; +- if (zi->i_wr_refcnt) +- goto unlock; +- +- /* +- * The file zone may not be open anymore (e.g. the file was truncated to +- * its maximum size or it was fully written). For this case, we only +- * need to decrement the write open count. +- */ +- if (zi->i_flags & ZONEFS_ZONE_OPEN) { +- ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); +- if (ret) { +- __zonefs_io_error(inode, false); +- /* +- * Leaving zones explicitly open may lead to a state +- * where most zones cannot be written (zone resources +- * exhausted). So take preventive action by remounting +- * read-only. +- */ +- if (zi->i_flags & ZONEFS_ZONE_OPEN && +- !(sb->s_flags & SB_RDONLY)) { +- zonefs_warn(sb, +- "closing zone at %llu failed %d\n", +- zi->i_zsector, ret); +- zonefs_warn(sb, +- "remounting filesystem read-only\n"); +- sb->s_flags |= SB_RDONLY; +- } +- goto unlock; +- } +- +- zi->i_flags &= ~ZONEFS_ZONE_OPEN; +- zonefs_account_active(inode); +- } +- +- atomic_dec(&sbi->s_wro_seq_files); +- +-unlock: +- mutex_unlock(&zi->i_truncate_mutex); +-} +- +-static int zonefs_file_release(struct inode *inode, struct file *file) +-{ +- /* +- * If we explicitly open a zone we must close it again as well, but the +- * zone management operation can fail (either due to an IO error or as +- * the zone has gone offline or read-only). Make sure we don't fail the +- * close(2) for user-space. +- */ +- if (zonefs_seq_file_need_wro(inode, file)) +- zonefs_seq_file_write_close(inode); +- +- return 0; +-} +- +-static const struct file_operations zonefs_file_operations = { +- .open = zonefs_file_open, +- .release = zonefs_file_release, +- .fsync = zonefs_file_fsync, +- .mmap = zonefs_file_mmap, +- .llseek = zonefs_file_llseek, +- .read_iter = zonefs_file_read_iter, +- .write_iter = zonefs_file_write_iter, +- .splice_read = generic_file_splice_read, +- .splice_write = iter_file_splice_write, +- .iopoll = iocb_bio_iopoll, +-}; +- + static struct kmem_cache *zonefs_inode_cachep; + + static struct inode *zonefs_alloc_inode(struct super_block *sb) +@@ -1282,7 +414,6 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb) + inode_init_once(&zi->i_vnode); + mutex_init(&zi->i_truncate_mutex); + zi->i_wr_refcnt = 0; +- zi->i_flags = 0; + + return &zi->i_vnode; + } +@@ -1315,8 +446,8 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) + buf->f_bavail = buf->f_bfree; + + for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { +- if (sbi->s_nr_files[t]) +- buf->f_files += sbi->s_nr_files[t] + 1; ++ if (sbi->s_zgroup[t].g_nr_zones) ++ buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1; + } + buf->f_ffree = 0; + +@@ -1382,51 +513,85 @@ static int zonefs_parse_options(struct super_block *sb, char *options) + } + } + +- return 0; +-} ++ return 0; ++} ++ ++static int zonefs_show_options(struct seq_file *seq, struct dentry *root) ++{ ++ struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb); ++ ++ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) ++ seq_puts(seq, ",errors=remount-ro"); ++ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) ++ seq_puts(seq, ",errors=zone-ro"); ++ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) ++ seq_puts(seq, ",errors=zone-offline"); ++ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR) ++ seq_puts(seq, ",errors=repair"); ++ ++ return 0; ++} ++ ++static int zonefs_remount(struct super_block *sb, int *flags, char *data) ++{ ++ sync_filesystem(sb); ++ ++ return zonefs_parse_options(sb, data); ++} ++ ++static int zonefs_inode_setattr(struct user_namespace *mnt_userns, ++ struct dentry *dentry, struct iattr *iattr) ++{ ++ struct inode *inode = d_inode(dentry); ++ int ret; ++ ++ if (unlikely(IS_IMMUTABLE(inode))) ++ return -EPERM; ++ ++ ret = setattr_prepare(&init_user_ns, dentry, iattr); ++ if (ret) ++ return ret; ++ ++ /* ++ * Since files and directories cannot be created nor deleted, do not ++ * allow setting any write attributes on the sub-directories grouping ++ * files by zone type. ++ */ ++ if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) && ++ (iattr->ia_mode & 0222)) ++ return -EPERM; ++ ++ if (((iattr->ia_valid & ATTR_UID) && ++ !uid_eq(iattr->ia_uid, inode->i_uid)) || ++ ((iattr->ia_valid & ATTR_GID) && ++ !gid_eq(iattr->ia_gid, inode->i_gid))) { ++ ret = dquot_transfer(mnt_userns, inode, iattr); ++ if (ret) ++ return ret; ++ } + +-static int zonefs_show_options(struct seq_file *seq, struct dentry *root) +-{ +- struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb); ++ if (iattr->ia_valid & ATTR_SIZE) { ++ ret = zonefs_file_truncate(inode, iattr->ia_size); ++ if (ret) ++ return ret; ++ } + +- if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) +- seq_puts(seq, ",errors=remount-ro"); +- if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) +- seq_puts(seq, ",errors=zone-ro"); +- if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) +- seq_puts(seq, ",errors=zone-offline"); +- if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR) +- seq_puts(seq, ",errors=repair"); ++ setattr_copy(&init_user_ns, inode, iattr); + + return 0; + } + +-static int zonefs_remount(struct super_block *sb, int *flags, char *data) +-{ +- sync_filesystem(sb); +- +- return zonefs_parse_options(sb, data); +-} +- +-static const struct super_operations zonefs_sops = { +- .alloc_inode = zonefs_alloc_inode, +- .free_inode = zonefs_free_inode, +- .statfs = zonefs_statfs, +- .remount_fs = zonefs_remount, +- .show_options = zonefs_show_options, +-}; +- + static const struct inode_operations zonefs_dir_inode_operations = { + .lookup = simple_lookup, + .setattr = zonefs_inode_setattr, + }; + + static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode, +- enum zonefs_ztype type) ++ enum zonefs_ztype ztype) + { + struct super_block *sb = parent->i_sb; + +- inode->i_ino = bdev_nr_zones(sb->s_bdev) + type + 1; ++ inode->i_ino = bdev_nr_zones(sb->s_bdev) + ztype + 1; + inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555); + inode->i_op = &zonefs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; +@@ -1434,73 +599,38 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode, + inc_nlink(parent); + } + +-static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, +- enum zonefs_ztype type) ++static const struct inode_operations zonefs_file_inode_operations = { ++ .setattr = zonefs_inode_setattr, ++}; ++ ++static void zonefs_init_file_inode(struct inode *inode, ++ struct zonefs_zone *z) + { + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); +- struct zonefs_inode_info *zi = ZONEFS_I(inode); +- int ret = 0; +- +- inode->i_ino = zone->start >> sbi->s_zone_sectors_shift; +- inode->i_mode = S_IFREG | sbi->s_perm; +- +- zi->i_ztype = type; +- zi->i_zsector = zone->start; +- zi->i_zone_size = zone->len << SECTOR_SHIFT; +- if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && +- !(sbi->s_features & ZONEFS_F_AGGRCNV)) { +- zonefs_err(sb, +- "zone size %llu doesn't match device's zone sectors %llu\n", +- zi->i_zone_size, +- bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); +- return -EINVAL; +- } + +- zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, +- zone->capacity << SECTOR_SHIFT); +- zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true, true); ++ inode->i_private = z; + ++ inode->i_ino = z->z_sector >> sbi->s_zone_sectors_shift; ++ inode->i_mode = S_IFREG | sbi->s_perm; + inode->i_uid = sbi->s_uid; + inode->i_gid = sbi->s_gid; +- inode->i_size = zi->i_wpoffset; +- inode->i_blocks = zi->i_max_size >> SECTOR_SHIFT; ++ inode->i_size = z->z_wpoffset; ++ inode->i_blocks = z->z_capacity >> SECTOR_SHIFT; + + inode->i_op = &zonefs_file_inode_operations; + inode->i_fop = &zonefs_file_operations; + inode->i_mapping->a_ops = &zonefs_file_aops; + +- sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes); +- sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits; +- sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits; +- +- mutex_lock(&zi->i_truncate_mutex); +- +- /* +- * For sequential zones, make sure that any open zone is closed first +- * to ensure that the initial number of open zones is 0, in sync with +- * the open zone accounting done when the mount option +- * ZONEFS_MNTOPT_EXPLICIT_OPEN is used. +- */ +- if (type == ZONEFS_ZTYPE_SEQ && +- (zone->cond == BLK_ZONE_COND_IMP_OPEN || +- zone->cond == BLK_ZONE_COND_EXP_OPEN)) { +- ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); +- if (ret) +- goto unlock; +- } +- +- zonefs_account_active(inode); +- +-unlock: +- mutex_unlock(&zi->i_truncate_mutex); +- +- return ret; ++ /* Update the inode access rights depending on the zone condition */ ++ z->z_flags |= ZONEFS_ZONE_INIT_MODE; ++ zonefs_inode_update_mode(inode); + } + + static struct dentry *zonefs_create_inode(struct dentry *parent, +- const char *name, struct blk_zone *zone, +- enum zonefs_ztype type) ++ const char *name, ++ struct zonefs_zone *z, ++ enum zonefs_ztype ztype) + { + struct inode *dir = d_inode(parent); + struct dentry *dentry; +@@ -1516,15 +646,10 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, + goto dput; + + inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime; +- if (zone) { +- ret = zonefs_init_file_inode(inode, zone, type); +- if (ret) { +- iput(inode); +- goto dput; +- } +- } else { +- zonefs_init_dir_inode(dir, inode, type); +- } ++ if (z) ++ zonefs_init_file_inode(inode, z); ++ else ++ zonefs_init_dir_inode(dir, inode, ztype); + + d_add(dentry, inode); + dir->i_size++; +@@ -1540,100 +665,51 @@ dput: + struct zonefs_zone_data { + struct super_block *sb; + unsigned int nr_zones[ZONEFS_ZTYPE_MAX]; ++ sector_t cnv_zone_start; + struct blk_zone *zones; + }; + + /* +- * Create a zone group and populate it with zone files. ++ * Create the inodes for a zone group. + */ +-static int zonefs_create_zgroup(struct zonefs_zone_data *zd, +- enum zonefs_ztype type) ++static int zonefs_create_zgroup_inodes(struct super_block *sb, ++ enum zonefs_ztype ztype) + { +- struct super_block *sb = zd->sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); +- struct blk_zone *zone, *next, *end; +- const char *zgroup_name; +- char *file_name; ++ struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype]; + struct dentry *dir, *dent; +- unsigned int n = 0; +- int ret; ++ char *file_name; ++ int i, ret = 0; ++ ++ if (!zgroup) ++ return -ENOMEM; + + /* If the group is empty, there is nothing to do */ +- if (!zd->nr_zones[type]) ++ if (!zgroup->g_nr_zones) + return 0; + + file_name = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL); + if (!file_name) + return -ENOMEM; + +- if (type == ZONEFS_ZTYPE_CNV) +- zgroup_name = "cnv"; +- else +- zgroup_name = "seq"; +- +- dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type); ++ dir = zonefs_create_inode(sb->s_root, zonefs_zgroup_name(ztype), ++ NULL, ztype); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + goto free; + } + +- /* +- * The first zone contains the super block: skip it. +- */ +- end = zd->zones + bdev_nr_zones(sb->s_bdev); +- for (zone = &zd->zones[1]; zone < end; zone = next) { +- +- next = zone + 1; +- if (zonefs_zone_type(zone) != type) +- continue; +- +- /* +- * For conventional zones, contiguous zones can be aggregated +- * together to form larger files. Note that this overwrites the +- * length of the first zone of the set of contiguous zones +- * aggregated together. If one offline or read-only zone is +- * found, assume that all zones aggregated have the same +- * condition. +- */ +- if (type == ZONEFS_ZTYPE_CNV && +- (sbi->s_features & ZONEFS_F_AGGRCNV)) { +- for (; next < end; next++) { +- if (zonefs_zone_type(next) != type) +- break; +- zone->len += next->len; +- zone->capacity += next->capacity; +- if (next->cond == BLK_ZONE_COND_READONLY && +- zone->cond != BLK_ZONE_COND_OFFLINE) +- zone->cond = BLK_ZONE_COND_READONLY; +- else if (next->cond == BLK_ZONE_COND_OFFLINE) +- zone->cond = BLK_ZONE_COND_OFFLINE; +- } +- if (zone->capacity != zone->len) { +- zonefs_err(sb, "Invalid conventional zone capacity\n"); +- ret = -EINVAL; +- goto free; +- } +- } +- +- /* +- * Use the file number within its group as file name. +- */ +- snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n); +- dent = zonefs_create_inode(dir, file_name, zone, type); ++ for (i = 0; i < zgroup->g_nr_zones; i++) { ++ /* Use the zone number within its group as the file name */ ++ snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", i); ++ dent = zonefs_create_inode(dir, file_name, ++ &zgroup->g_zones[i], ztype); + if (IS_ERR(dent)) { + ret = PTR_ERR(dent); +- goto free; ++ break; + } +- +- n++; + } + +- zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", +- zgroup_name, n, n > 1 ? "s" : ""); +- +- sbi->s_nr_files[type] = n; +- ret = 0; +- + free: + kfree(file_name); + +@@ -1644,21 +720,38 @@ static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx, + void *data) + { + struct zonefs_zone_data *zd = data; ++ struct super_block *sb = zd->sb; ++ struct zonefs_sb_info *sbi = ZONEFS_SB(sb); ++ ++ /* ++ * We do not care about the first zone: it contains the super block ++ * and not exposed as a file. ++ */ ++ if (!idx) ++ return 0; + + /* +- * Count the number of usable zones: the first zone at index 0 contains +- * the super block and is ignored. ++ * Count the number of zones that will be exposed as files. ++ * For sequential zones, we always have as many files as zones. ++ * FOr conventional zones, the number of files depends on if we have ++ * conventional zones aggregation enabled. + */ + switch (zone->type) { + case BLK_ZONE_TYPE_CONVENTIONAL: +- zone->wp = zone->start + zone->len; +- if (idx) +- zd->nr_zones[ZONEFS_ZTYPE_CNV]++; ++ if (sbi->s_features & ZONEFS_F_AGGRCNV) { ++ /* One file per set of contiguous conventional zones */ ++ if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) || ++ zone->start != zd->cnv_zone_start) ++ sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; ++ zd->cnv_zone_start = zone->start + zone->len; ++ } else { ++ /* One file per zone */ ++ sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; ++ } + break; + case BLK_ZONE_TYPE_SEQWRITE_REQ: + case BLK_ZONE_TYPE_SEQWRITE_PREF: +- if (idx) +- zd->nr_zones[ZONEFS_ZTYPE_SEQ]++; ++ sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++; + break; + default: + zonefs_err(zd->sb, "Unsupported zone type 0x%x\n", +@@ -1698,11 +791,173 @@ static int zonefs_get_zone_info(struct zonefs_zone_data *zd) + return 0; + } + +-static inline void zonefs_cleanup_zone_info(struct zonefs_zone_data *zd) ++static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd) + { + kvfree(zd->zones); + } + ++/* ++ * Create a zone group and populate it with zone files. ++ */ ++static int zonefs_init_zgroup(struct super_block *sb, ++ struct zonefs_zone_data *zd, ++ enum zonefs_ztype ztype) ++{ ++ struct zonefs_sb_info *sbi = ZONEFS_SB(sb); ++ struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype]; ++ struct blk_zone *zone, *next, *end; ++ struct zonefs_zone *z; ++ unsigned int n = 0; ++ int ret; ++ ++ /* Allocate the zone group. If it is empty, we have nothing to do. */ ++ if (!zgroup->g_nr_zones) ++ return 0; ++ ++ zgroup->g_zones = kvcalloc(zgroup->g_nr_zones, ++ sizeof(struct zonefs_zone), GFP_KERNEL); ++ if (!zgroup->g_zones) ++ return -ENOMEM; ++ ++ /* ++ * Initialize the zone groups using the device zone information. ++ * We always skip the first zone as it contains the super block ++ * and is not use to back a file. ++ */ ++ end = zd->zones + bdev_nr_zones(sb->s_bdev); ++ for (zone = &zd->zones[1]; zone < end; zone = next) { ++ ++ next = zone + 1; ++ if (zonefs_zone_type(zone) != ztype) ++ continue; ++ ++ if (WARN_ON_ONCE(n >= zgroup->g_nr_zones)) ++ return -EINVAL; ++ ++ /* ++ * For conventional zones, contiguous zones can be aggregated ++ * together to form larger files. Note that this overwrites the ++ * length of the first zone of the set of contiguous zones ++ * aggregated together. If one offline or read-only zone is ++ * found, assume that all zones aggregated have the same ++ * condition. ++ */ ++ if (ztype == ZONEFS_ZTYPE_CNV && ++ (sbi->s_features & ZONEFS_F_AGGRCNV)) { ++ for (; next < end; next++) { ++ if (zonefs_zone_type(next) != ztype) ++ break; ++ zone->len += next->len; ++ zone->capacity += next->capacity; ++ if (next->cond == BLK_ZONE_COND_READONLY && ++ zone->cond != BLK_ZONE_COND_OFFLINE) ++ zone->cond = BLK_ZONE_COND_READONLY; ++ else if (next->cond == BLK_ZONE_COND_OFFLINE) ++ zone->cond = BLK_ZONE_COND_OFFLINE; ++ } ++ } ++ ++ z = &zgroup->g_zones[n]; ++ if (ztype == ZONEFS_ZTYPE_CNV) ++ z->z_flags |= ZONEFS_ZONE_CNV; ++ z->z_sector = zone->start; ++ z->z_size = zone->len << SECTOR_SHIFT; ++ if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && ++ !(sbi->s_features & ZONEFS_F_AGGRCNV)) { ++ zonefs_err(sb, ++ "Invalid zone size %llu (device zone sectors %llu)\n", ++ z->z_size, ++ bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); ++ return -EINVAL; ++ } ++ ++ z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE, ++ zone->capacity << SECTOR_SHIFT); ++ z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone); ++ ++ sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes); ++ sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits; ++ sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits; ++ ++ /* ++ * For sequential zones, make sure that any open zone is closed ++ * first to ensure that the initial number of open zones is 0, ++ * in sync with the open zone accounting done when the mount ++ * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used. ++ */ ++ if (ztype == ZONEFS_ZTYPE_SEQ && ++ (zone->cond == BLK_ZONE_COND_IMP_OPEN || ++ zone->cond == BLK_ZONE_COND_EXP_OPEN)) { ++ ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE); ++ if (ret) ++ return ret; ++ } ++ ++ zonefs_account_active(sb, z); ++ ++ n++; ++ } ++ ++ if (WARN_ON_ONCE(n != zgroup->g_nr_zones)) ++ return -EINVAL; ++ ++ zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", ++ zonefs_zgroup_name(ztype), ++ zgroup->g_nr_zones, ++ zgroup->g_nr_zones > 1 ? "s" : ""); ++ ++ return 0; ++} ++ ++static void zonefs_free_zgroups(struct super_block *sb) ++{ ++ struct zonefs_sb_info *sbi = ZONEFS_SB(sb); ++ enum zonefs_ztype ztype; ++ ++ if (!sbi) ++ return; ++ ++ for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { ++ kvfree(sbi->s_zgroup[ztype].g_zones); ++ sbi->s_zgroup[ztype].g_zones = NULL; ++ } ++} ++ ++/* ++ * Create a zone group and populate it with zone files. ++ */ ++static int zonefs_init_zgroups(struct super_block *sb) ++{ ++ struct zonefs_zone_data zd; ++ enum zonefs_ztype ztype; ++ int ret; ++ ++ /* First get the device zone information */ ++ memset(&zd, 0, sizeof(struct zonefs_zone_data)); ++ zd.sb = sb; ++ ret = zonefs_get_zone_info(&zd); ++ if (ret) ++ goto cleanup; ++ ++ /* Allocate and initialize the zone groups */ ++ for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { ++ ret = zonefs_init_zgroup(sb, &zd, ztype); ++ if (ret) { ++ zonefs_info(sb, ++ "Zone group \"%s\" initialization failed\n", ++ zonefs_zgroup_name(ztype)); ++ break; ++ } ++ } ++ ++cleanup: ++ zonefs_free_zone_info(&zd); ++ if (ret) ++ zonefs_free_zgroups(sb); ++ ++ return ret; ++} ++ + /* + * Read super block information from the device. + */ +@@ -1785,6 +1040,14 @@ free_page: + return ret; + } + ++static const struct super_operations zonefs_sops = { ++ .alloc_inode = zonefs_alloc_inode, ++ .free_inode = zonefs_free_inode, ++ .statfs = zonefs_statfs, ++ .remount_fs = zonefs_remount, ++ .show_options = zonefs_show_options, ++}; ++ + /* + * Check that the device is zoned. If it is, get the list of zones and create + * sub-directories and files according to the device zone configuration and +@@ -1792,7 +1055,6 @@ free_page: + */ + static int zonefs_fill_super(struct super_block *sb, void *data, int silent) + { +- struct zonefs_zone_data zd; + struct zonefs_sb_info *sbi; + struct inode *inode; + enum zonefs_ztype t; +@@ -1845,16 +1107,6 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) + if (ret) + return ret; + +- memset(&zd, 0, sizeof(struct zonefs_zone_data)); +- zd.sb = sb; +- ret = zonefs_get_zone_info(&zd); +- if (ret) +- goto cleanup; +- +- ret = zonefs_sysfs_register(sb); +- if (ret) +- goto cleanup; +- + zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev)); + + if (!sbi->s_max_wro_seq_files && +@@ -1865,6 +1117,11 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; + } + ++ /* Initialize the zone groups */ ++ ret = zonefs_init_zgroups(sb); ++ if (ret) ++ goto cleanup; ++ + /* Create root directory inode */ + ret = -ENOMEM; + inode = new_inode(sb); +@@ -1884,13 +1141,19 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) + + /* Create and populate files in zone groups directories */ + for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { +- ret = zonefs_create_zgroup(&zd, t); ++ ret = zonefs_create_zgroup_inodes(sb, t); + if (ret) +- break; ++ goto cleanup; + } + ++ ret = zonefs_sysfs_register(sb); ++ if (ret) ++ goto cleanup; ++ ++ return 0; ++ + cleanup: +- zonefs_cleanup_zone_info(&zd); ++ zonefs_free_zgroups(sb); + + return ret; + } +@@ -1909,6 +1172,7 @@ static void zonefs_kill_super(struct super_block *sb) + d_genocide(sb->s_root); + + zonefs_sysfs_unregister(sb); ++ zonefs_free_zgroups(sb); + kill_block_super(sb); + kfree(sbi); + } +diff --git a/fs/zonefs/trace.h b/fs/zonefs/trace.h +index 42edcfd393ed2..9969db3a9c7dc 100644 +--- a/fs/zonefs/trace.h ++++ b/fs/zonefs/trace.h +@@ -20,8 +20,9 @@ + #define show_dev(dev) MAJOR(dev), MINOR(dev) + + TRACE_EVENT(zonefs_zone_mgmt, +- TP_PROTO(struct inode *inode, enum req_op op), +- TP_ARGS(inode, op), ++ TP_PROTO(struct super_block *sb, struct zonefs_zone *z, ++ enum req_op op), ++ TP_ARGS(sb, z, op), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) +@@ -30,12 +31,12 @@ TRACE_EVENT(zonefs_zone_mgmt, + __field(sector_t, nr_sectors) + ), + TP_fast_assign( +- __entry->dev = inode->i_sb->s_dev; +- __entry->ino = inode->i_ino; ++ __entry->dev = sb->s_dev; ++ __entry->ino = ++ z->z_sector >> ZONEFS_SB(sb)->s_zone_sectors_shift; + __entry->op = op; +- __entry->sector = ZONEFS_I(inode)->i_zsector; +- __entry->nr_sectors = +- ZONEFS_I(inode)->i_zone_size >> SECTOR_SHIFT; ++ __entry->sector = z->z_sector; ++ __entry->nr_sectors = z->z_size >> SECTOR_SHIFT; + ), + TP_printk("bdev=(%d,%d), ino=%lu op=%s, sector=%llu, nr_sectors=%llu", + show_dev(__entry->dev), (unsigned long)__entry->ino, +@@ -58,9 +59,10 @@ TRACE_EVENT(zonefs_file_dio_append, + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; +- __entry->sector = ZONEFS_I(inode)->i_zsector; ++ __entry->sector = zonefs_inode_zone(inode)->z_sector; + __entry->size = size; +- __entry->wpoffset = ZONEFS_I(inode)->i_wpoffset; ++ __entry->wpoffset = ++ zonefs_inode_zone(inode)->z_wpoffset; + __entry->ret = ret; + ), + TP_printk("bdev=(%d, %d), ino=%lu, sector=%llu, size=%zu, wpoffset=%llu, ret=%zu", +diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h +index 1dbe78119ff16..2d626e18b1411 100644 +--- a/fs/zonefs/zonefs.h ++++ b/fs/zonefs/zonefs.h +@@ -39,31 +39,47 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone) + return ZONEFS_ZTYPE_SEQ; + } + +-#define ZONEFS_ZONE_OPEN (1U << 0) +-#define ZONEFS_ZONE_ACTIVE (1U << 1) +-#define ZONEFS_ZONE_OFFLINE (1U << 2) +-#define ZONEFS_ZONE_READONLY (1U << 3) ++#define ZONEFS_ZONE_INIT_MODE (1U << 0) ++#define ZONEFS_ZONE_OPEN (1U << 1) ++#define ZONEFS_ZONE_ACTIVE (1U << 2) ++#define ZONEFS_ZONE_OFFLINE (1U << 3) ++#define ZONEFS_ZONE_READONLY (1U << 4) ++#define ZONEFS_ZONE_CNV (1U << 31) + + /* +- * In-memory inode data. ++ * In-memory per-file inode zone data. + */ +-struct zonefs_inode_info { +- struct inode i_vnode; ++struct zonefs_zone { ++ /* Zone state flags */ ++ unsigned int z_flags; + +- /* File zone type */ +- enum zonefs_ztype i_ztype; ++ /* Zone start sector (512B unit) */ ++ sector_t z_sector; + +- /* File zone start sector (512B unit) */ +- sector_t i_zsector; ++ /* Zone size (bytes) */ ++ loff_t z_size; + +- /* File zone write pointer position (sequential zones only) */ +- loff_t i_wpoffset; ++ /* Zone capacity (file maximum size, bytes) */ ++ loff_t z_capacity; + +- /* File maximum size */ +- loff_t i_max_size; ++ /* Write pointer offset in the zone (sequential zones only, bytes) */ ++ loff_t z_wpoffset; ++}; + +- /* File zone size */ +- loff_t i_zone_size; ++/* ++ * In memory zone group information: all zones of a group are exposed ++ * as files, one file per zone. ++ */ ++struct zonefs_zone_group { ++ unsigned int g_nr_zones; ++ struct zonefs_zone *g_zones; ++}; ++ ++/* ++ * In-memory inode data. ++ */ ++struct zonefs_inode_info { ++ struct inode i_vnode; + + /* + * To serialise fully against both syscall and mmap based IO and +@@ -82,7 +98,6 @@ struct zonefs_inode_info { + + /* guarded by i_truncate_mutex */ + unsigned int i_wr_refcnt; +- unsigned int i_flags; + }; + + static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode) +@@ -90,6 +105,31 @@ static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode) + return container_of(inode, struct zonefs_inode_info, i_vnode); + } + ++static inline bool zonefs_zone_is_cnv(struct zonefs_zone *z) ++{ ++ return z->z_flags & ZONEFS_ZONE_CNV; ++} ++ ++static inline bool zonefs_zone_is_seq(struct zonefs_zone *z) ++{ ++ return !zonefs_zone_is_cnv(z); ++} ++ ++static inline struct zonefs_zone *zonefs_inode_zone(struct inode *inode) ++{ ++ return inode->i_private; ++} ++ ++static inline bool zonefs_inode_is_cnv(struct inode *inode) ++{ ++ return zonefs_zone_is_cnv(zonefs_inode_zone(inode)); ++} ++ ++static inline bool zonefs_inode_is_seq(struct inode *inode) ++{ ++ return zonefs_zone_is_seq(zonefs_inode_zone(inode)); ++} ++ + /* + * On-disk super block (block 0). + */ +@@ -181,7 +221,7 @@ struct zonefs_sb_info { + uuid_t s_uuid; + unsigned int s_zone_sectors_shift; + +- unsigned int s_nr_files[ZONEFS_ZTYPE_MAX]; ++ struct zonefs_zone_group s_zgroup[ZONEFS_ZTYPE_MAX]; + + loff_t s_blocks; + loff_t s_used_blocks; +@@ -209,6 +249,28 @@ static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb) + #define zonefs_warn(sb, format, args...) \ + pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args) + ++/* In super.c */ ++void zonefs_inode_account_active(struct inode *inode); ++int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op); ++void zonefs_i_size_write(struct inode *inode, loff_t isize); ++void zonefs_update_stats(struct inode *inode, loff_t new_isize); ++void __zonefs_io_error(struct inode *inode, bool write); ++ ++static inline void zonefs_io_error(struct inode *inode, bool write) ++{ ++ struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ ++ mutex_lock(&zi->i_truncate_mutex); ++ __zonefs_io_error(inode, write); ++ mutex_unlock(&zi->i_truncate_mutex); ++} ++ ++/* In file.c */ ++extern const struct address_space_operations zonefs_file_aops; ++extern const struct file_operations zonefs_file_operations; ++int zonefs_file_truncate(struct inode *inode, loff_t isize); ++ ++/* In sysfs.c */ + int zonefs_sysfs_register(struct super_block *sb); + void zonefs_sysfs_unregister(struct super_block *sb); + int zonefs_sysfs_init(void); +diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h +index 0584e9f6e3397..57acb895c0381 100644 +--- a/include/acpi/acpi_bus.h ++++ b/include/acpi/acpi_bus.h +@@ -657,6 +657,7 @@ static inline bool acpi_quirk_skip_acpi_ac_and_battery(void) + #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS) + bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev); + int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip); ++bool acpi_quirk_skip_gpio_event_handlers(void); + #else + static inline bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev) + { +@@ -668,6 +669,10 @@ acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip) + *skip = false; + return 0; + } ++static inline bool acpi_quirk_skip_gpio_event_handlers(void) ++{ ++ return false; ++} + #endif + + #ifdef CONFIG_PM +diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h +index 90b2fb0292cb1..012fa0d171b27 100644 +--- a/include/trace/events/rcu.h ++++ b/include/trace/events/rcu.h +@@ -768,7 +768,7 @@ TRACE_EVENT_RCU(rcu_torture_read, + TP_ARGS(rcutorturename, rhp, secs, c_old, c), + + TP_STRUCT__entry( +- __field(char, rcutorturename[RCUTORTURENAME_LEN]) ++ __array(char, rcutorturename, RCUTORTURENAME_LEN) + __field(struct rcu_head *, rhp) + __field(unsigned long, secs) + __field(unsigned long, c_old) +diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h +index 655d92e803e14..79a443c65ea93 100644 +--- a/include/xen/interface/platform.h ++++ b/include/xen/interface/platform.h +@@ -483,6 +483,8 @@ struct xenpf_symdata { + }; + DEFINE_GUEST_HANDLE_STRUCT(xenpf_symdata); + ++#define XENPF_get_dom0_console 64 ++ + struct xen_platform_op { + uint32_t cmd; + uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ +@@ -506,6 +508,7 @@ struct xen_platform_op { + struct xenpf_mem_hotadd mem_add; + struct xenpf_core_parking core_parking; + struct xenpf_symdata symdata; ++ struct dom0_vga_console_info dom0_console; + uint8_t pad[128]; + } u; + }; +diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h +index 729793ae97127..c2cde88aeed53 100644 +--- a/io_uring/alloc_cache.h ++++ b/io_uring/alloc_cache.h +@@ -27,6 +27,7 @@ static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *c + struct hlist_node *node = cache->list.first; + + hlist_del(node); ++ cache->nr_cached--; + return container_of(node, struct io_cache_entry, node); + } + +diff --git a/io_uring/poll.c b/io_uring/poll.c +index fea739eef56f4..666666ab2e73d 100644 +--- a/io_uring/poll.c ++++ b/io_uring/poll.c +@@ -724,6 +724,7 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) + apoll = io_req_alloc_apoll(req, issue_flags); + if (!apoll) + return IO_APOLL_ABORTED; ++ req->flags &= ~(REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL); + req->flags |= REQ_F_POLLED; + ipt.pt._qproc = io_async_queue_proc; + +diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h +index 2b8743645efcb..f27f4975217d9 100644 +--- a/io_uring/rsrc.h ++++ b/io_uring/rsrc.h +@@ -144,15 +144,13 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req, + unsigned int issue_flags) + { + if (!req->rsrc_node) { +- req->rsrc_node = ctx->rsrc_node; ++ io_ring_submit_lock(ctx, issue_flags); + +- if (!(issue_flags & IO_URING_F_UNLOCKED)) { +- lockdep_assert_held(&ctx->uring_lock); ++ lockdep_assert_held(&ctx->uring_lock); + +- io_charge_rsrc_node(ctx); +- } else { +- percpu_ref_get(&req->rsrc_node->refs); +- } ++ req->rsrc_node = ctx->rsrc_node; ++ io_charge_rsrc_node(ctx); ++ io_ring_submit_unlock(ctx, issue_flags); + } + } + +diff --git a/kernel/compat.c b/kernel/compat.c +index 55551989d9da5..fb50f29d9b361 100644 +--- a/kernel/compat.c ++++ b/kernel/compat.c +@@ -152,7 +152,7 @@ COMPAT_SYSCALL_DEFINE3(sched_getaffinity, compat_pid_t, pid, unsigned int, len, + if (len & (sizeof(compat_ulong_t)-1)) + return -EINVAL; + +- if (!alloc_cpumask_var(&mask, GFP_KERNEL)) ++ if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + ret = sched_getaffinity(pid, mask); +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 9a0698353d60f..57d84b534cdea 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -8404,14 +8404,14 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, + if (len & (sizeof(unsigned long)-1)) + return -EINVAL; + +- if (!alloc_cpumask_var(&mask, GFP_KERNEL)) ++ if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + ret = sched_getaffinity(pid, mask); + if (ret == 0) { + unsigned int retlen = min(len, cpumask_size()); + +- if (copy_to_user(user_mask_ptr, mask, retlen)) ++ if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen)) + ret = -EFAULT; + else + ret = retlen; +diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c +index c736487fc0e48..e0c420eb0b2b4 100644 +--- a/kernel/trace/kprobe_event_gen_test.c ++++ b/kernel/trace/kprobe_event_gen_test.c +@@ -146,7 +146,7 @@ static int __init test_gen_kprobe_cmd(void) + if (trace_event_file_is_valid(gen_kprobe_test)) + gen_kprobe_test = NULL; + /* We got an error after creating the event, delete it */ +- ret = kprobe_event_delete("gen_kprobe_test"); ++ kprobe_event_delete("gen_kprobe_test"); + goto out; + } + +@@ -211,7 +211,7 @@ static int __init test_gen_kretprobe_cmd(void) + if (trace_event_file_is_valid(gen_kretprobe_test)) + gen_kretprobe_test = NULL; + /* We got an error after creating the event, delete it */ +- ret = kprobe_event_delete("gen_kretprobe_test"); ++ kprobe_event_delete("gen_kretprobe_test"); + goto out; + } + +diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h +index 7a5bf44839c9c..f06df065dec01 100644 +--- a/lib/zstd/common/zstd_deps.h ++++ b/lib/zstd/common/zstd_deps.h +@@ -84,7 +84,7 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) { + + #include <linux/kernel.h> + +-#define assert(x) WARN_ON((x)) ++#define assert(x) WARN_ON(!(x)) + + #endif /* ZSTD_DEPS_ASSERT */ + #endif /* ZSTD_DEPS_NEED_ASSERT */ +diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c +index b9b935a9f5c0d..6b3177c947114 100644 +--- a/lib/zstd/decompress/zstd_decompress.c ++++ b/lib/zstd/decompress/zstd_decompress.c +@@ -798,7 +798,7 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + if (srcSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } +- ZSTD_memcpy(dst, src, srcSize); ++ ZSTD_memmove(dst, src, srcSize); + return srcSize; + } + +@@ -858,6 +858,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + + /* Loop on each block */ + while (1) { ++ BYTE* oBlockEnd = oend; + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); +@@ -867,16 +868,34 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + remainingSrcSize -= ZSTD_blockHeaderSize; + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); + ++ if (ip >= op && ip < oBlockEnd) { ++ /* We are decompressing in-place. Limit the output pointer so that we ++ * don't overwrite the block that we are currently reading. This will ++ * fail decompression if the input & output pointers aren't spaced ++ * far enough apart. ++ * ++ * This is important to set, even when the pointers are far enough ++ * apart, because ZSTD_decompressBlock_internal() can decide to store ++ * literals in the output buffer, after the block it is decompressing. ++ * Since we don't want anything to overwrite our input, we have to tell ++ * ZSTD_decompressBlock_internal to never write past ip. ++ * ++ * See ZSTD_allocateLiteralsBuffer() for reference. ++ */ ++ oBlockEnd = op + (ip - op); ++ } ++ + switch(blockProperties.blockType) + { + case bt_compressed: +- decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming); ++ decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming); + break; + case bt_raw : ++ /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */ + decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); + break; + case bt_rle : +- decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize); ++ decodedSize = ZSTD_setRleBlock(op, (size_t)(oBlockEnd-op), *ip, blockProperties.origSize); + break; + case bt_reserved : + default: +diff --git a/net/can/bcm.c b/net/can/bcm.c +index 27706f6ace34a..a962ec2b8ba5b 100644 +--- a/net/can/bcm.c ++++ b/net/can/bcm.c +@@ -941,6 +941,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, + + cf = op->frames + op->cfsiz * i; + err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz); ++ if (err < 0) ++ goto free_op; + + if (op->flags & CAN_FD_FRAME) { + if (cf->len > 64) +@@ -950,12 +952,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, + err = -EINVAL; + } + +- if (err < 0) { +- if (op->frames != &op->sframe) +- kfree(op->frames); +- kfree(op); +- return err; +- } ++ if (err < 0) ++ goto free_op; + + if (msg_head->flags & TX_CP_CAN_ID) { + /* copy can_id into frame */ +@@ -1026,6 +1024,12 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, + bcm_tx_start_timer(op); + + return msg_head->nframes * op->cfsiz + MHSIZ; ++ ++free_op: ++ if (op->frames != &op->sframe) ++ kfree(op->frames); ++ kfree(op); ++ return err; + } + + /* +diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c +index fce9b9ebf13f6..fb92c3609e172 100644 +--- a/net/can/j1939/transport.c ++++ b/net/can/j1939/transport.c +@@ -1124,8 +1124,6 @@ static void __j1939_session_cancel(struct j1939_session *session, + + if (session->sk) + j1939_sk_send_loop_abort(session->sk, session->err); +- else +- j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); + } + + static void j1939_session_cancel(struct j1939_session *session, +@@ -1140,6 +1138,9 @@ static void j1939_session_cancel(struct j1939_session *session, + } + + j1939_session_list_unlock(session->priv); ++ ++ if (!session->sk) ++ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); + } + + static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) +@@ -1253,6 +1254,9 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer) + __j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT); + } + j1939_session_list_unlock(session->priv); ++ ++ if (!session->sk) ++ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); + } + + j1939_session_put(session); +diff --git a/net/dsa/slave.c b/net/dsa/slave.c +index 6711ddc0a3c7d..df8b16c741a40 100644 +--- a/net/dsa/slave.c ++++ b/net/dsa/slave.c +@@ -57,6 +57,12 @@ struct dsa_standalone_event_work { + u16 vid; + }; + ++struct dsa_host_vlan_rx_filtering_ctx { ++ struct net_device *dev; ++ const unsigned char *addr; ++ enum dsa_standalone_event event; ++}; ++ + static bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds) + { + return ds->ops->port_fdb_add && ds->ops->port_fdb_del && +@@ -155,18 +161,37 @@ static int dsa_slave_schedule_standalone_work(struct net_device *dev, + return 0; + } + ++static int dsa_slave_host_vlan_rx_filtering(struct net_device *vdev, int vid, ++ void *arg) ++{ ++ struct dsa_host_vlan_rx_filtering_ctx *ctx = arg; ++ ++ return dsa_slave_schedule_standalone_work(ctx->dev, ctx->event, ++ ctx->addr, vid); ++} ++ + static int dsa_slave_sync_uc(struct net_device *dev, + const unsigned char *addr) + { + struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); ++ struct dsa_host_vlan_rx_filtering_ctx ctx = { ++ .dev = dev, ++ .addr = addr, ++ .event = DSA_UC_ADD, ++ }; ++ int err; + + dev_uc_add(master, addr); + + if (!dsa_switch_supports_uc_filtering(dp->ds)) + return 0; + +- return dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0); ++ err = dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0); ++ if (err) ++ return err; ++ ++ return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); + } + + static int dsa_slave_unsync_uc(struct net_device *dev, +@@ -174,13 +199,23 @@ static int dsa_slave_unsync_uc(struct net_device *dev, + { + struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); ++ struct dsa_host_vlan_rx_filtering_ctx ctx = { ++ .dev = dev, ++ .addr = addr, ++ .event = DSA_UC_DEL, ++ }; ++ int err; + + dev_uc_del(master, addr); + + if (!dsa_switch_supports_uc_filtering(dp->ds)) + return 0; + +- return dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0); ++ err = dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0); ++ if (err) ++ return err; ++ ++ return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); + } + + static int dsa_slave_sync_mc(struct net_device *dev, +@@ -188,13 +223,23 @@ static int dsa_slave_sync_mc(struct net_device *dev, + { + struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); ++ struct dsa_host_vlan_rx_filtering_ctx ctx = { ++ .dev = dev, ++ .addr = addr, ++ .event = DSA_MC_ADD, ++ }; ++ int err; + + dev_mc_add(master, addr); + + if (!dsa_switch_supports_mc_filtering(dp->ds)) + return 0; + +- return dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0); ++ err = dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0); ++ if (err) ++ return err; ++ ++ return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); + } + + static int dsa_slave_unsync_mc(struct net_device *dev, +@@ -202,13 +247,23 @@ static int dsa_slave_unsync_mc(struct net_device *dev, + { + struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); ++ struct dsa_host_vlan_rx_filtering_ctx ctx = { ++ .dev = dev, ++ .addr = addr, ++ .event = DSA_MC_DEL, ++ }; ++ int err; + + dev_mc_del(master, addr); + + if (!dsa_switch_supports_mc_filtering(dp->ds)) + return 0; + +- return dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0); ++ err = dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0); ++ if (err) ++ return err; ++ ++ return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); + } + + void dsa_slave_sync_ha(struct net_device *dev) +@@ -1668,6 +1723,8 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, + .flags = 0, + }; + struct netlink_ext_ack extack = {0}; ++ struct dsa_switch *ds = dp->ds; ++ struct netdev_hw_addr *ha; + int ret; + + /* User port... */ +@@ -1687,6 +1744,30 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, + return ret; + } + ++ if (!dsa_switch_supports_uc_filtering(ds) && ++ !dsa_switch_supports_mc_filtering(ds)) ++ return 0; ++ ++ netif_addr_lock_bh(dev); ++ ++ if (dsa_switch_supports_mc_filtering(ds)) { ++ netdev_for_each_synced_mc_addr(ha, dev) { ++ dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, ++ ha->addr, vid); ++ } ++ } ++ ++ if (dsa_switch_supports_uc_filtering(ds)) { ++ netdev_for_each_synced_uc_addr(ha, dev) { ++ dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, ++ ha->addr, vid); ++ } ++ } ++ ++ netif_addr_unlock_bh(dev); ++ ++ dsa_flush_workqueue(); ++ + return 0; + } + +@@ -1699,13 +1780,43 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, + /* This API only allows programming tagged, non-PVID VIDs */ + .flags = 0, + }; ++ struct dsa_switch *ds = dp->ds; ++ struct netdev_hw_addr *ha; + int err; + + err = dsa_port_vlan_del(dp, &vlan); + if (err) + return err; + +- return dsa_port_host_vlan_del(dp, &vlan); ++ err = dsa_port_host_vlan_del(dp, &vlan); ++ if (err) ++ return err; ++ ++ if (!dsa_switch_supports_uc_filtering(ds) && ++ !dsa_switch_supports_mc_filtering(ds)) ++ return 0; ++ ++ netif_addr_lock_bh(dev); ++ ++ if (dsa_switch_supports_mc_filtering(ds)) { ++ netdev_for_each_synced_mc_addr(ha, dev) { ++ dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, ++ ha->addr, vid); ++ } ++ } ++ ++ if (dsa_switch_supports_uc_filtering(ds)) { ++ netdev_for_each_synced_uc_addr(ha, dev) { ++ dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, ++ ha->addr, vid); ++ } ++ } ++ ++ netif_addr_unlock_bh(dev); ++ ++ dsa_flush_workqueue(); ++ ++ return 0; + } + + static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg) +diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c +index 00db74d96583d..b77f1189d19d1 100644 +--- a/net/hsr/hsr_framereg.c ++++ b/net/hsr/hsr_framereg.c +@@ -415,7 +415,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, + node_dst = find_node_by_addr_A(&port->hsr->node_db, + eth_hdr(skb)->h_dest); + if (!node_dst) { +- if (net_ratelimit()) ++ if (port->hsr->prot_version != PRP_V1 && net_ratelimit()) + netdev_err(skb->dev, "%s: Unknown node\n", __func__); + return; + } +diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c +index d611e15301839..e24d2d5b04ad0 100644 +--- a/net/mac80211/cfg.c ++++ b/net/mac80211/cfg.c +@@ -2576,6 +2576,17 @@ static int ieee80211_change_bss(struct wiphy *wiphy, + if (!sband) + return -EINVAL; + ++ if (params->basic_rates) { ++ if (!ieee80211_parse_bitrates(link->conf->chandef.width, ++ wiphy->bands[sband->band], ++ params->basic_rates, ++ params->basic_rates_len, ++ &link->conf->basic_rates)) ++ return -EINVAL; ++ changed |= BSS_CHANGED_BASIC_RATES; ++ ieee80211_check_rate_mask(link); ++ } ++ + if (params->use_cts_prot >= 0) { + link->conf->use_cts_prot = params->use_cts_prot; + changed |= BSS_CHANGED_ERP_CTS_PROT; +@@ -2597,16 +2608,6 @@ static int ieee80211_change_bss(struct wiphy *wiphy, + changed |= BSS_CHANGED_ERP_SLOT; + } + +- if (params->basic_rates) { +- ieee80211_parse_bitrates(link->conf->chandef.width, +- wiphy->bands[sband->band], +- params->basic_rates, +- params->basic_rates_len, +- &link->conf->basic_rates); +- changed |= BSS_CHANGED_BASIC_RATES; +- ieee80211_check_rate_mask(link); +- } +- + if (params->ap_isolate >= 0) { + if (params->ap_isolate) + sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS; +diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c +index aaa5b2741b79d..1b9465b43997c 100644 +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -2155,6 +2155,7 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) + switch (skst) { + case TCP_FIN_WAIT1: + case TCP_FIN_WAIT2: ++ case TCP_LAST_ACK: + break; + case TCP_ESTABLISHED: + case TCP_CLOSE_WAIT: +diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c +index cf5172d4ce68c..103af2b3e986f 100644 +--- a/net/xfrm/xfrm_user.c ++++ b/net/xfrm/xfrm_user.c +@@ -1012,7 +1012,9 @@ static int copy_to_user_aead(struct xfrm_algo_aead *aead, struct sk_buff *skb) + return -EMSGSIZE; + + ap = nla_data(nla); +- memcpy(ap, aead, sizeof(*aead)); ++ strscpy_pad(ap->alg_name, aead->alg_name, sizeof(ap->alg_name)); ++ ap->alg_key_len = aead->alg_key_len; ++ ap->alg_icv_len = aead->alg_icv_len; + + if (redact_secret && aead->alg_key_len) + memset(ap->alg_key, 0, (aead->alg_key_len + 7) / 8); +@@ -1032,7 +1034,8 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb) + return -EMSGSIZE; + + ap = nla_data(nla); +- memcpy(ap, ealg, sizeof(*ealg)); ++ strscpy_pad(ap->alg_name, ealg->alg_name, sizeof(ap->alg_name)); ++ ap->alg_key_len = ealg->alg_key_len; + + if (redact_secret && ealg->alg_key_len) + memset(ap->alg_key, 0, (ealg->alg_key_len + 7) / 8); +@@ -1043,6 +1046,40 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb) + return 0; + } + ++static int copy_to_user_calg(struct xfrm_algo *calg, struct sk_buff *skb) ++{ ++ struct nlattr *nla = nla_reserve(skb, XFRMA_ALG_COMP, sizeof(*calg)); ++ struct xfrm_algo *ap; ++ ++ if (!nla) ++ return -EMSGSIZE; ++ ++ ap = nla_data(nla); ++ strscpy_pad(ap->alg_name, calg->alg_name, sizeof(ap->alg_name)); ++ ap->alg_key_len = 0; ++ ++ return 0; ++} ++ ++static int copy_to_user_encap(struct xfrm_encap_tmpl *ep, struct sk_buff *skb) ++{ ++ struct nlattr *nla = nla_reserve(skb, XFRMA_ENCAP, sizeof(*ep)); ++ struct xfrm_encap_tmpl *uep; ++ ++ if (!nla) ++ return -EMSGSIZE; ++ ++ uep = nla_data(nla); ++ memset(uep, 0, sizeof(*uep)); ++ ++ uep->encap_type = ep->encap_type; ++ uep->encap_sport = ep->encap_sport; ++ uep->encap_dport = ep->encap_dport; ++ uep->encap_oa = ep->encap_oa; ++ ++ return 0; ++} ++ + static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m) + { + int ret = 0; +@@ -1098,12 +1135,12 @@ static int copy_to_user_state_extra(struct xfrm_state *x, + goto out; + } + if (x->calg) { +- ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); ++ ret = copy_to_user_calg(x->calg, skb); + if (ret) + goto out; + } + if (x->encap) { +- ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); ++ ret = copy_to_user_encap(x->encap, skb); + if (ret) + goto out; + } +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index efff8078e3958..9466b6a2abae4 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -1733,7 +1733,7 @@ static void extract_crcs_for_object(const char *object, struct module *mod) + if (!isdigit(*p)) + continue; /* skip this line */ + +- crc = strtol(p, &p, 0); ++ crc = strtoul(p, &p, 0); + if (*p != '\n') + continue; /* skip this line */ + +diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c +index 8b6aeb8a78f7d..02fd65993e7e5 100644 +--- a/sound/core/pcm_lib.c ++++ b/sound/core/pcm_lib.c +@@ -2155,6 +2155,8 @@ int pcm_lib_apply_appl_ptr(struct snd_pcm_substream *substream, + ret = substream->ops->ack(substream); + if (ret < 0) { + runtime->control->appl_ptr = old_appl_ptr; ++ if (ret == -EPIPE) ++ __snd_pcm_xrun(substream); + return ret; + } + } +diff --git a/sound/pci/asihpi/hpi6205.c b/sound/pci/asihpi/hpi6205.c +index 27e11b5f70b97..c7d7eff86727f 100644 +--- a/sound/pci/asihpi/hpi6205.c ++++ b/sound/pci/asihpi/hpi6205.c +@@ -430,7 +430,7 @@ void HPI_6205(struct hpi_message *phm, struct hpi_response *phr) + pao = hpi_find_adapter(phm->adapter_index); + } else { + /* subsys messages don't address an adapter */ +- _HPI_6205(NULL, phm, phr); ++ phr->error = HPI_ERROR_INVALID_OBJ_INDEX; + return; + } + +diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c +index acde4cd58785e..099722ebaed83 100644 +--- a/sound/pci/hda/patch_ca0132.c ++++ b/sound/pci/hda/patch_ca0132.c +@@ -4228,8 +4228,10 @@ static int tuning_ctl_set(struct hda_codec *codec, hda_nid_t nid, + + for (i = 0; i < TUNING_CTLS_COUNT; i++) + if (nid == ca0132_tuning_ctls[i].nid) +- break; ++ goto found; + ++ return -EINVAL; ++found: + snd_hda_power_up(codec); + dspio_set_param(codec, ca0132_tuning_ctls[i].mid, 0x20, + ca0132_tuning_ctls[i].req, +diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c +index 75e1d00074b9f..a889cccdd607c 100644 +--- a/sound/pci/hda/patch_conexant.c ++++ b/sound/pci/hda/patch_conexant.c +@@ -980,7 +980,10 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { + SND_PCI_QUIRK(0x17aa, 0x3905, "Lenovo G50-30", CXT_FIXUP_STEREO_DMIC), + SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC), + SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC), +- SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_PINCFG_LENOVO_NOTEBOOK), ++ /* NOTE: we'd need to extend the quirk for 17aa:3977 as the same ++ * PCI SSID is used on multiple Lenovo models ++ */ ++ SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC), + SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo G50-70", CXT_FIXUP_STEREO_DMIC), + SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC), + SND_PCI_QUIRK_VENDOR(0x17aa, "Thinkpad", CXT_FIXUP_THINKPAD_ACPI), +@@ -1003,6 +1006,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { + { .id = CXT_FIXUP_MUTE_LED_GPIO, .name = "mute-led-gpio" }, + { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" }, + { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" }, ++ { .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" }, + {} + }; + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 28ac6c159b2a2..070150bbd3559 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -2631,6 +2631,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { + SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65f5, "Clevo PD50PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), ++ SND_PCI_QUIRK(0x1558, 0x66a2, "Clevo PE60RNE", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), +@@ -2651,6 +2652,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { + SND_PCI_QUIRK(0x1558, 0x96e1, "Clevo P960[ER][CDFN]-K", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x97e1, "Clevo P970[ER][CDFN]", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x97e2, "Clevo P970RC-M", ALC1220_FIXUP_CLEVO_P950), ++ SND_PCI_QUIRK(0x1558, 0xd502, "Clevo PD50SNE", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), + SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), + SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530), +@@ -9574,6 +9576,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), ++ SND_PCI_QUIRK(0x1558, 0x5630, "Clevo NP50RNJS", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70f2, "Clevo NH79EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), +@@ -9608,6 +9611,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL5[03]RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL50NU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), ++ SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xb022, "Clevo NH77D[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), +@@ -9708,6 +9712,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), ++ SND_PCI_QUIRK(0x17aa, 0x9e56, "Lenovo ZhaoYang CF4620Z", ALC286_FIXUP_SONY_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1849, 0x1233, "ASRock NUC Box 1100", ALC233_FIXUP_NO_AUDIO_JACK), + SND_PCI_QUIRK(0x1849, 0xa233, "Positivo Master C6300", ALC269_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS), +diff --git a/sound/pci/ymfpci/ymfpci.c b/sound/pci/ymfpci/ymfpci.c +index 1e198e4d57b8d..82d4e0fda91be 100644 +--- a/sound/pci/ymfpci/ymfpci.c ++++ b/sound/pci/ymfpci/ymfpci.c +@@ -170,7 +170,7 @@ static int snd_card_ymfpci_probe(struct pci_dev *pci, + return -ENOENT; + } + +- err = snd_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE, ++ err = snd_devm_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE, + sizeof(*chip), &card); + if (err < 0) + return err; +diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c +index c80114c0ad7bf..b492c32ce0704 100644 +--- a/sound/pci/ymfpci/ymfpci_main.c ++++ b/sound/pci/ymfpci/ymfpci_main.c +@@ -2165,7 +2165,7 @@ static int snd_ymfpci_memalloc(struct snd_ymfpci *chip) + chip->work_base = ptr; + chip->work_base_addr = ptr_addr; + +- snd_BUG_ON(ptr + chip->work_size != ++ snd_BUG_ON(ptr + PAGE_ALIGN(chip->work_size) != + chip->work_ptr->area + chip->work_ptr->bytes); + + snd_ymfpci_writel(chip, YDSXGR_PLAYCTRLBASE, chip->bank_base_playback_addr); +diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c +index 74cbbe16f9aec..a22f2ec95901f 100644 +--- a/sound/soc/codecs/hdmi-codec.c ++++ b/sound/soc/codecs/hdmi-codec.c +@@ -428,8 +428,13 @@ static int hdmi_codec_startup(struct snd_pcm_substream *substream, + { + struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai); + bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; ++ bool has_capture = !hcp->hcd.no_i2s_capture; ++ bool has_playback = !hcp->hcd.no_i2s_playback; + int ret = 0; + ++ if (!((has_playback && tx) || (has_capture && !tx))) ++ return 0; ++ + mutex_lock(&hcp->lock); + if (hcp->busy) { + dev_err(dai->dev, "Only one simultaneous stream supported!\n"); +@@ -468,6 +473,12 @@ static void hdmi_codec_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) + { + struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai); ++ bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; ++ bool has_capture = !hcp->hcd.no_i2s_capture; ++ bool has_playback = !hcp->hcd.no_i2s_playback; ++ ++ if (!((has_playback && tx) || (has_capture && !tx))) ++ return; + + hcp->chmap_idx = HDMI_CODEC_CHMAP_IDX_UNKNOWN; + hcp->hcd.ops->audio_shutdown(dai->dev->parent, hcp->hcd.data); +diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c +index 2449a2df66df0..8facdb922f076 100644 +--- a/sound/soc/codecs/lpass-tx-macro.c ++++ b/sound/soc/codecs/lpass-tx-macro.c +@@ -242,7 +242,7 @@ enum { + + struct tx_mute_work { + struct tx_macro *tx; +- u32 decimator; ++ u8 decimator; + struct delayed_work dwork; + }; + +@@ -635,7 +635,7 @@ exit: + return 0; + } + +-static bool is_amic_enabled(struct snd_soc_component *component, int decimator) ++static bool is_amic_enabled(struct snd_soc_component *component, u8 decimator) + { + u16 adc_mux_reg, adc_reg, adc_n; + +@@ -849,7 +849,7 @@ static int tx_macro_enable_dec(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) + { + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); +- unsigned int decimator; ++ u8 decimator; + u16 tx_vol_ctl_reg, dec_cfg_reg, hpf_gate_reg, tx_gain_ctl_reg; + u8 hpf_cut_off_freq; + int hpf_delay = TX_MACRO_DMIC_HPF_DELAY_MS; +@@ -1064,7 +1064,8 @@ static int tx_macro_hw_params(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) + { + struct snd_soc_component *component = dai->component; +- u32 decimator, sample_rate; ++ u32 sample_rate; ++ u8 decimator; + int tx_fs_rate; + struct tx_macro *tx = snd_soc_component_get_drvdata(component); + +@@ -1128,7 +1129,7 @@ static int tx_macro_digital_mute(struct snd_soc_dai *dai, int mute, int stream) + { + struct snd_soc_component *component = dai->component; + struct tx_macro *tx = snd_soc_component_get_drvdata(component); +- u16 decimator; ++ u8 decimator; + + /* active decimator not set yet */ + if (tx->active_decimator[dai->id] == -1) +diff --git a/sound/soc/intel/avs/boards/da7219.c b/sound/soc/intel/avs/boards/da7219.c +index acd43b6108e99..1a1d572cc1d02 100644 +--- a/sound/soc/intel/avs/boards/da7219.c ++++ b/sound/soc/intel/avs/boards/da7219.c +@@ -117,6 +117,26 @@ static void avs_da7219_codec_exit(struct snd_soc_pcm_runtime *rtd) + snd_soc_component_set_jack(asoc_rtd_to_codec(rtd, 0)->component, NULL, NULL); + } + ++static int ++avs_da7219_be_fixup(struct snd_soc_pcm_runtime *runrime, struct snd_pcm_hw_params *params) ++{ ++ struct snd_interval *rate, *channels; ++ struct snd_mask *fmt; ++ ++ rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); ++ channels = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); ++ fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); ++ ++ /* The ADSP will convert the FE rate to 48k, stereo */ ++ rate->min = rate->max = 48000; ++ channels->min = channels->max = 2; ++ ++ /* set SSP0 to 24 bit */ ++ snd_mask_none(fmt); ++ snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S24_LE); ++ return 0; ++} ++ + static int avs_create_dai_link(struct device *dev, const char *platform_name, int ssp_port, + struct snd_soc_dai_link **dai_link) + { +@@ -148,6 +168,7 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in + dl->num_platforms = 1; + dl->id = 0; + dl->dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS; ++ dl->be_hw_params_fixup = avs_da7219_be_fixup; + dl->init = avs_da7219_codec_init; + dl->exit = avs_da7219_codec_exit; + dl->nonatomic = 1; +diff --git a/sound/soc/intel/avs/boards/max98357a.c b/sound/soc/intel/avs/boards/max98357a.c +index 921f42caf7e09..183123d08c5a3 100644 +--- a/sound/soc/intel/avs/boards/max98357a.c ++++ b/sound/soc/intel/avs/boards/max98357a.c +@@ -8,6 +8,7 @@ + + #include <linux/module.h> + #include <linux/platform_device.h> ++#include <sound/pcm_params.h> + #include <sound/soc.h> + #include <sound/soc-acpi.h> + #include <sound/soc-dapm.h> +@@ -24,6 +25,26 @@ static const struct snd_soc_dapm_route card_base_routes[] = { + { "Spk", NULL, "Speaker" }, + }; + ++static int ++avs_max98357a_be_fixup(struct snd_soc_pcm_runtime *runrime, struct snd_pcm_hw_params *params) ++{ ++ struct snd_interval *rate, *channels; ++ struct snd_mask *fmt; ++ ++ rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); ++ channels = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); ++ fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); ++ ++ /* The ADSP will convert the FE rate to 48k, stereo */ ++ rate->min = rate->max = 48000; ++ channels->min = channels->max = 2; ++ ++ /* set SSP0 to 16 bit */ ++ snd_mask_none(fmt); ++ snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S16_LE); ++ return 0; ++} ++ + static int avs_create_dai_link(struct device *dev, const char *platform_name, int ssp_port, + struct snd_soc_dai_link **dai_link) + { +@@ -55,6 +76,7 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in + dl->num_platforms = 1; + dl->id = 0; + dl->dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS; ++ dl->be_hw_params_fixup = avs_max98357a_be_fixup; + dl->nonatomic = 1; + dl->no_pcm = 1; + dl->dpcm_playback = 1; +diff --git a/sound/soc/intel/avs/boards/nau8825.c b/sound/soc/intel/avs/boards/nau8825.c +index 6731d8a490767..49438a67a77c6 100644 +--- a/sound/soc/intel/avs/boards/nau8825.c ++++ b/sound/soc/intel/avs/boards/nau8825.c +@@ -33,15 +33,15 @@ avs_nau8825_clock_control(struct snd_soc_dapm_widget *w, struct snd_kcontrol *co + return -EINVAL; + } + +- if (!SND_SOC_DAPM_EVENT_ON(event)) { ++ if (SND_SOC_DAPM_EVENT_ON(event)) ++ ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_MCLK, 24000000, ++ SND_SOC_CLOCK_IN); ++ else + ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_INTERNAL, 0, SND_SOC_CLOCK_IN); +- if (ret < 0) { +- dev_err(card->dev, "set sysclk err = %d\n", ret); +- return ret; +- } +- } ++ if (ret < 0) ++ dev_err(card->dev, "Set sysclk failed: %d\n", ret); + +- return 0; ++ return ret; + } + + static const struct snd_kcontrol_new card_controls[] = { +diff --git a/sound/soc/intel/avs/boards/rt5682.c b/sound/soc/intel/avs/boards/rt5682.c +index 473e9fe5d0bf7..b2c2ba93dcb56 100644 +--- a/sound/soc/intel/avs/boards/rt5682.c ++++ b/sound/soc/intel/avs/boards/rt5682.c +@@ -169,6 +169,27 @@ static const struct snd_soc_ops avs_rt5682_ops = { + .hw_params = avs_rt5682_hw_params, + }; + ++static int ++avs_rt5682_be_fixup(struct snd_soc_pcm_runtime *runtime, struct snd_pcm_hw_params *params) ++{ ++ struct snd_interval *rate, *channels; ++ struct snd_mask *fmt; ++ ++ rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); ++ channels = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); ++ fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); ++ ++ /* The ADSP will convert the FE rate to 48k, stereo */ ++ rate->min = rate->max = 48000; ++ channels->min = channels->max = 2; ++ ++ /* set SSPN to 24 bit */ ++ snd_mask_none(fmt); ++ snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S24_LE); ++ ++ return 0; ++} ++ + static int avs_create_dai_link(struct device *dev, const char *platform_name, int ssp_port, + struct snd_soc_dai_link **dai_link) + { +@@ -201,6 +222,7 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in + dl->id = 0; + dl->init = avs_rt5682_codec_init; + dl->exit = avs_rt5682_codec_exit; ++ dl->be_hw_params_fixup = avs_rt5682_be_fixup; + dl->ops = &avs_rt5682_ops; + dl->nonatomic = 1; + dl->no_pcm = 1; +diff --git a/sound/soc/intel/avs/boards/ssm4567.c b/sound/soc/intel/avs/boards/ssm4567.c +index c5db696127624..2b7f5ad92aca7 100644 +--- a/sound/soc/intel/avs/boards/ssm4567.c ++++ b/sound/soc/intel/avs/boards/ssm4567.c +@@ -15,7 +15,6 @@ + #include <sound/soc-acpi.h> + #include "../../../codecs/nau8825.h" + +-#define SKL_NUVOTON_CODEC_DAI "nau8825-hifi" + #define SKL_SSM_CODEC_DAI "ssm4567-hifi" + + static struct snd_soc_codec_conf card_codec_conf[] = { +@@ -34,41 +33,11 @@ static const struct snd_kcontrol_new card_controls[] = { + SOC_DAPM_PIN_SWITCH("Right Speaker"), + }; + +-static int +-platform_clock_control(struct snd_soc_dapm_widget *w, struct snd_kcontrol *control, int event) +-{ +- struct snd_soc_dapm_context *dapm = w->dapm; +- struct snd_soc_card *card = dapm->card; +- struct snd_soc_dai *codec_dai; +- int ret; +- +- codec_dai = snd_soc_card_get_codec_dai(card, SKL_NUVOTON_CODEC_DAI); +- if (!codec_dai) { +- dev_err(card->dev, "Codec dai not found\n"); +- return -EINVAL; +- } +- +- if (SND_SOC_DAPM_EVENT_ON(event)) { +- ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_MCLK, 24000000, +- SND_SOC_CLOCK_IN); +- if (ret < 0) +- dev_err(card->dev, "set sysclk err = %d\n", ret); +- } else { +- ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_INTERNAL, 0, SND_SOC_CLOCK_IN); +- if (ret < 0) +- dev_err(card->dev, "set sysclk err = %d\n", ret); +- } +- +- return ret; +-} +- + static const struct snd_soc_dapm_widget card_widgets[] = { + SND_SOC_DAPM_SPK("Left Speaker", NULL), + SND_SOC_DAPM_SPK("Right Speaker", NULL), + SND_SOC_DAPM_SPK("DP1", NULL), + SND_SOC_DAPM_SPK("DP2", NULL), +- SND_SOC_DAPM_SUPPLY("Platform Clock", SND_SOC_NOPM, 0, 0, platform_clock_control, +- SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + }; + + static const struct snd_soc_dapm_route card_base_routes[] = { +diff --git a/sound/soc/sof/intel/hda-ctrl.c b/sound/soc/sof/intel/hda-ctrl.c +index 3aea36c077c9d..f3bdeba284122 100644 +--- a/sound/soc/sof/intel/hda-ctrl.c ++++ b/sound/soc/sof/intel/hda-ctrl.c +@@ -196,12 +196,15 @@ int hda_dsp_ctrl_init_chip(struct snd_sof_dev *sdev) + goto err; + } + ++ usleep_range(500, 1000); ++ + /* exit HDA controller reset */ + ret = hda_dsp_ctrl_link_reset(sdev, false); + if (ret < 0) { + dev_err(sdev->dev, "error: failed to exit HDA controller reset\n"); + goto err; + } ++ usleep_range(1000, 1200); + + hda_codec_detect_mask(sdev); + +diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c +index b4eacae8564c8..6b2094f74c9c0 100644 +--- a/sound/soc/sof/intel/hda-dsp.c ++++ b/sound/soc/sof/intel/hda-dsp.c +@@ -399,6 +399,12 @@ static int hda_dsp_update_d0i3c_register(struct snd_sof_dev *sdev, u8 value) + snd_sof_dsp_update8(sdev, HDA_DSP_HDA_BAR, chip->d0i3_offset, + SOF_HDA_VS_D0I3C_I3, value); + ++ /* ++ * The value written to the D0I3C::I3 bit may not be taken into account immediately. ++ * A delay is recommended before checking if D0I3C::CIP is cleared ++ */ ++ usleep_range(30, 40); ++ + /* Wait for cmd in progress to be cleared before exiting the function */ + ret = hda_dsp_wait_d0i3c_done(sdev); + if (ret < 0) { +@@ -407,6 +413,12 @@ static int hda_dsp_update_d0i3c_register(struct snd_sof_dev *sdev, u8 value) + } + + reg = snd_sof_dsp_read8(sdev, HDA_DSP_HDA_BAR, chip->d0i3_offset); ++ /* Confirm d0i3 state changed with paranoia check */ ++ if ((reg ^ value) & SOF_HDA_VS_D0I3C_I3) { ++ dev_err(sdev->dev, "failed to update D0I3C!\n"); ++ return -EIO; ++ } ++ + trace_sof_intel_D0I3C_updated(sdev, reg); + + return 0; +diff --git a/sound/soc/sof/intel/pci-tng.c b/sound/soc/sof/intel/pci-tng.c +index 5b2b409752c58..8c22a00266c06 100644 +--- a/sound/soc/sof/intel/pci-tng.c ++++ b/sound/soc/sof/intel/pci-tng.c +@@ -75,11 +75,7 @@ static int tangier_pci_probe(struct snd_sof_dev *sdev) + + /* LPE base */ + base = pci_resource_start(pci, desc->resindex_lpe_base) - IRAM_OFFSET; +- size = pci_resource_len(pci, desc->resindex_lpe_base); +- if (size < PCI_BAR_SIZE) { +- dev_err(sdev->dev, "error: I/O region is too small.\n"); +- return -ENODEV; +- } ++ size = PCI_BAR_SIZE; + + dev_dbg(sdev->dev, "LPE PHY base at 0x%x size 0x%x", base, size); + sdev->bar[DSP_BAR] = devm_ioremap(sdev->dev, base, size); +diff --git a/sound/soc/sof/ipc3.c b/sound/soc/sof/ipc3.c +index 1fef4dcc09368..fde8af5a1f485 100644 +--- a/sound/soc/sof/ipc3.c ++++ b/sound/soc/sof/ipc3.c +@@ -970,8 +970,9 @@ static void sof_ipc3_rx_msg(struct snd_sof_dev *sdev) + return; + } + +- if (hdr.size < sizeof(hdr)) { +- dev_err(sdev->dev, "The received message size is invalid\n"); ++ if (hdr.size < sizeof(hdr) || hdr.size > SOF_IPC_MSG_MAX_SIZE) { ++ dev_err(sdev->dev, "The received message size is invalid: %u\n", ++ hdr.size); + return; + } + +diff --git a/sound/soc/sof/ipc4-control.c b/sound/soc/sof/ipc4-control.c +index 0d5a578c34962..7442ec1c5a4d4 100644 +--- a/sound/soc/sof/ipc4-control.c ++++ b/sound/soc/sof/ipc4-control.c +@@ -84,7 +84,8 @@ sof_ipc4_set_volume_data(struct snd_sof_dev *sdev, struct snd_sof_widget *swidge + } + + /* set curve type and duration from topology */ +- data.curve_duration = gain->data.curve_duration; ++ data.curve_duration_l = gain->data.curve_duration_l; ++ data.curve_duration_h = gain->data.curve_duration_h; + data.curve_type = gain->data.curve_type; + + msg->data_ptr = &data; +diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c +index 59f4d42f9011e..6da6137fa2cbc 100644 +--- a/sound/soc/sof/ipc4-topology.c ++++ b/sound/soc/sof/ipc4-topology.c +@@ -107,7 +107,7 @@ static const struct sof_topology_token gain_tokens[] = { + get_token_u32, offsetof(struct sof_ipc4_gain_data, curve_type)}, + {SOF_TKN_GAIN_RAMP_DURATION, + SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32, +- offsetof(struct sof_ipc4_gain_data, curve_duration)}, ++ offsetof(struct sof_ipc4_gain_data, curve_duration_l)}, + {SOF_TKN_GAIN_VAL, SND_SOC_TPLG_TUPLE_TYPE_WORD, + get_token_u32, offsetof(struct sof_ipc4_gain_data, init_val)}, + }; +@@ -155,7 +155,7 @@ static void sof_ipc4_dbg_audio_format(struct device *dev, + for (i = 0; i < num_format; i++, ptr = (u8 *)ptr + object_size) { + fmt = ptr; + dev_dbg(dev, +- " #%d: %uKHz, %ubit (ch_map %#x ch_cfg %u interleaving_style %u fmt_cfg %#x)\n", ++ " #%d: %uHz, %ubit (ch_map %#x ch_cfg %u interleaving_style %u fmt_cfg %#x)\n", + i, fmt->sampling_frequency, fmt->bit_depth, fmt->ch_map, + fmt->ch_cfg, fmt->interleaving_style, fmt->fmt_cfg); + } +@@ -670,7 +670,7 @@ static int sof_ipc4_widget_setup_comp_pga(struct snd_sof_widget *swidget) + + dev_dbg(scomp->dev, + "pga widget %s: ramp type: %d, ramp duration %d, initial gain value: %#x, cpc %d\n", +- swidget->widget->name, gain->data.curve_type, gain->data.curve_duration, ++ swidget->widget->name, gain->data.curve_type, gain->data.curve_duration_l, + gain->data.init_val, gain->base_config.cpc); + + ret = sof_ipc4_widget_setup_msg(swidget, &gain->msg); +diff --git a/sound/soc/sof/ipc4-topology.h b/sound/soc/sof/ipc4-topology.h +index 2363a7cc0b57d..cf9d278524572 100644 +--- a/sound/soc/sof/ipc4-topology.h ++++ b/sound/soc/sof/ipc4-topology.h +@@ -217,14 +217,16 @@ struct sof_ipc4_control_data { + * @init_val: Initial value + * @curve_type: Curve type + * @reserved: reserved for future use +- * @curve_duration: Curve duration ++ * @curve_duration_l: Curve duration low part ++ * @curve_duration_h: Curve duration high part + */ + struct sof_ipc4_gain_data { + uint32_t channels; + uint32_t init_val; + uint32_t curve_type; + uint32_t reserved; +- uint32_t curve_duration; ++ uint32_t curve_duration_l; ++ uint32_t curve_duration_h; + } __aligned(8); + + /** +diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c +index 419302e2057e8..647fa054d8b1d 100644 +--- a/sound/usb/endpoint.c ++++ b/sound/usb/endpoint.c +@@ -455,8 +455,8 @@ static void push_back_to_ready_list(struct snd_usb_endpoint *ep, + * This function is used both for implicit feedback endpoints and in low- + * latency playback mode. + */ +-void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, +- bool in_stream_lock) ++int snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, ++ bool in_stream_lock) + { + bool implicit_fb = snd_usb_endpoint_implicit_feedback_sink(ep); + +@@ -480,7 +480,7 @@ void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, + spin_unlock_irqrestore(&ep->lock, flags); + + if (ctx == NULL) +- return; ++ break; + + /* copy over the length information */ + if (implicit_fb) { +@@ -495,11 +495,14 @@ void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, + break; + if (err < 0) { + /* push back to ready list again for -EAGAIN */ +- if (err == -EAGAIN) ++ if (err == -EAGAIN) { + push_back_to_ready_list(ep, ctx); +- else ++ break; ++ } ++ ++ if (!in_stream_lock) + notify_xrun(ep); +- return; ++ return -EPIPE; + } + + err = usb_submit_urb(ctx->urb, GFP_ATOMIC); +@@ -507,13 +510,16 @@ void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, + usb_audio_err(ep->chip, + "Unable to submit urb #%d: %d at %s\n", + ctx->index, err, __func__); +- notify_xrun(ep); +- return; ++ if (!in_stream_lock) ++ notify_xrun(ep); ++ return -EPIPE; + } + + set_bit(ctx->index, &ep->active_mask); + atomic_inc(&ep->submitted_urbs); + } ++ ++ return 0; + } + + /* +diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h +index 924f4351588ce..c09f68ce08b18 100644 +--- a/sound/usb/endpoint.h ++++ b/sound/usb/endpoint.h +@@ -52,7 +52,7 @@ int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep); + int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep, + struct snd_urb_ctx *ctx, int idx, + unsigned int avail); +-void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, +- bool in_stream_lock); ++int snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, ++ bool in_stream_lock); + + #endif /* __USBAUDIO_ENDPOINT_H */ +diff --git a/sound/usb/format.c b/sound/usb/format.c +index 405dc0bf6678c..4b1c5ba121f39 100644 +--- a/sound/usb/format.c ++++ b/sound/usb/format.c +@@ -39,8 +39,12 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, + case UAC_VERSION_1: + default: { + struct uac_format_type_i_discrete_descriptor *fmt = _fmt; +- if (format >= 64) +- return 0; /* invalid format */ ++ if (format >= 64) { ++ usb_audio_info(chip, ++ "%u:%d: invalid format type 0x%llx is detected, processed as PCM\n", ++ fp->iface, fp->altsetting, format); ++ format = UAC_FORMAT_TYPE_I_PCM; ++ } + sample_width = fmt->bBitResolution; + sample_bytes = fmt->bSubframeSize; + format = 1ULL << format; +diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c +index d959da7a1afba..eec5232f9fb29 100644 +--- a/sound/usb/pcm.c ++++ b/sound/usb/pcm.c +@@ -1639,7 +1639,7 @@ static int snd_usb_pcm_playback_ack(struct snd_pcm_substream *substream) + * outputs here + */ + if (!ep->active_mask) +- snd_usb_queue_pending_output_urbs(ep, true); ++ return snd_usb_queue_pending_output_urbs(ep, true); + return 0; + } + +diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c +index 69e80ee5f70e2..cfbec31e115cc 100644 +--- a/tools/lib/bpf/btf_dump.c ++++ b/tools/lib/bpf/btf_dump.c +@@ -833,14 +833,9 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, + const struct btf_type *t) + { + const struct btf_member *m; +- int align, i, bit_sz; ++ int max_align = 1, align, i, bit_sz; + __u16 vlen; + +- align = btf__align_of(btf, id); +- /* size of a non-packed struct has to be a multiple of its alignment*/ +- if (align && t->size % align) +- return true; +- + m = btf_members(t); + vlen = btf_vlen(t); + /* all non-bitfield fields have to be naturally aligned */ +@@ -849,8 +844,11 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, + bit_sz = btf_member_bitfield_size(t, i); + if (align && bit_sz == 0 && m->offset % (8 * align) != 0) + return true; ++ max_align = max(align, max_align); + } +- ++ /* size of a non-packed struct has to be a multiple of its alignment */ ++ if (t->size % max_align != 0) ++ return true; + /* + * if original struct was marked as packed, but its layout is + * naturally aligned, we'll detect that it's not packed +@@ -858,44 +856,97 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, + return false; + } + +-static int chip_away_bits(int total, int at_most) +-{ +- return total % at_most ? : at_most; +-} +- + static void btf_dump_emit_bit_padding(const struct btf_dump *d, +- int cur_off, int m_off, int m_bit_sz, +- int align, int lvl) ++ int cur_off, int next_off, int next_align, ++ bool in_bitfield, int lvl) + { +- int off_diff = m_off - cur_off; +- int ptr_bits = d->ptr_sz * 8; ++ const struct { ++ const char *name; ++ int bits; ++ } pads[] = { ++ {"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8} ++ }; ++ int new_off, pad_bits, bits, i; ++ const char *pad_type; ++ ++ if (cur_off >= next_off) ++ return; /* no gap */ ++ ++ /* For filling out padding we want to take advantage of ++ * natural alignment rules to minimize unnecessary explicit ++ * padding. First, we find the largest type (among long, int, ++ * short, or char) that can be used to force naturally aligned ++ * boundary. Once determined, we'll use such type to fill in ++ * the remaining padding gap. In some cases we can rely on ++ * compiler filling some gaps, but sometimes we need to force ++ * alignment to close natural alignment with markers like ++ * `long: 0` (this is always the case for bitfields). Note ++ * that even if struct itself has, let's say 4-byte alignment ++ * (i.e., it only uses up to int-aligned types), using `long: ++ * X;` explicit padding doesn't actually change struct's ++ * overall alignment requirements, but compiler does take into ++ * account that type's (long, in this example) natural ++ * alignment requirements when adding implicit padding. We use ++ * this fact heavily and don't worry about ruining correct ++ * struct alignment requirement. ++ */ ++ for (i = 0; i < ARRAY_SIZE(pads); i++) { ++ pad_bits = pads[i].bits; ++ pad_type = pads[i].name; + +- if (off_diff <= 0) +- /* no gap */ +- return; +- if (m_bit_sz == 0 && off_diff < align * 8) +- /* natural padding will take care of a gap */ +- return; ++ new_off = roundup(cur_off, pad_bits); ++ if (new_off <= next_off) ++ break; ++ } + +- while (off_diff > 0) { +- const char *pad_type; +- int pad_bits; +- +- if (ptr_bits > 32 && off_diff > 32) { +- pad_type = "long"; +- pad_bits = chip_away_bits(off_diff, ptr_bits); +- } else if (off_diff > 16) { +- pad_type = "int"; +- pad_bits = chip_away_bits(off_diff, 32); +- } else if (off_diff > 8) { +- pad_type = "short"; +- pad_bits = chip_away_bits(off_diff, 16); +- } else { +- pad_type = "char"; +- pad_bits = chip_away_bits(off_diff, 8); ++ if (new_off > cur_off && new_off <= next_off) { ++ /* We need explicit `<type>: 0` aligning mark if next ++ * field is right on alignment offset and its ++ * alignment requirement is less strict than <type>'s ++ * alignment (so compiler won't naturally align to the ++ * offset we expect), or if subsequent `<type>: X`, ++ * will actually completely fit in the remaining hole, ++ * making compiler basically ignore `<type>: X` ++ * completely. ++ */ ++ if (in_bitfield || ++ (new_off == next_off && roundup(cur_off, next_align * 8) != new_off) || ++ (new_off != next_off && next_off - new_off <= new_off - cur_off)) ++ /* but for bitfields we'll emit explicit bit count */ ++ btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, ++ in_bitfield ? new_off - cur_off : 0); ++ cur_off = new_off; ++ } ++ ++ /* Now we know we start at naturally aligned offset for a chosen ++ * padding type (long, int, short, or char), and so the rest is just ++ * a straightforward filling of remaining padding gap with full ++ * `<type>: sizeof(<type>);` markers, except for the last one, which ++ * might need smaller than sizeof(<type>) padding. ++ */ ++ while (cur_off != next_off) { ++ bits = min(next_off - cur_off, pad_bits); ++ if (bits == pad_bits) { ++ btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits); ++ cur_off += bits; ++ continue; ++ } ++ /* For the remainder padding that doesn't cover entire ++ * pad_type bit length, we pick the smallest necessary type. ++ * This is pure aesthetics, we could have just used `long`, ++ * but having smallest necessary one communicates better the ++ * scale of the padding gap. ++ */ ++ for (i = ARRAY_SIZE(pads) - 1; i >= 0; i--) { ++ pad_type = pads[i].name; ++ pad_bits = pads[i].bits; ++ if (pad_bits < bits) ++ continue; ++ ++ btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, bits); ++ cur_off += bits; ++ break; + } +- btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits); +- off_diff -= pad_bits; + } + } + +@@ -915,9 +966,11 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, + { + const struct btf_member *m = btf_members(t); + bool is_struct = btf_is_struct(t); +- int align, i, packed, off = 0; ++ bool packed, prev_bitfield = false; ++ int align, i, off = 0; + __u16 vlen = btf_vlen(t); + ++ align = btf__align_of(d->btf, id); + packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0; + + btf_dump_printf(d, "%s%s%s {", +@@ -927,33 +980,36 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, + + for (i = 0; i < vlen; i++, m++) { + const char *fname; +- int m_off, m_sz; ++ int m_off, m_sz, m_align; ++ bool in_bitfield; + + fname = btf_name_of(d, m->name_off); + m_sz = btf_member_bitfield_size(t, i); + m_off = btf_member_bit_offset(t, i); +- align = packed ? 1 : btf__align_of(d->btf, m->type); ++ m_align = packed ? 1 : btf__align_of(d->btf, m->type); + +- btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1); ++ in_bitfield = prev_bitfield && m_sz != 0; ++ ++ btf_dump_emit_bit_padding(d, off, m_off, m_align, in_bitfield, lvl + 1); + btf_dump_printf(d, "\n%s", pfx(lvl + 1)); + btf_dump_emit_type_decl(d, m->type, fname, lvl + 1); + + if (m_sz) { + btf_dump_printf(d, ": %d", m_sz); + off = m_off + m_sz; ++ prev_bitfield = true; + } else { + m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type)); + off = m_off + m_sz * 8; ++ prev_bitfield = false; + } ++ + btf_dump_printf(d, ";"); + } + + /* pad at the end, if necessary */ +- if (is_struct) { +- align = packed ? 1 : btf__align_of(d->btf, id); +- btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, +- lvl + 1); +- } ++ if (is_struct) ++ btf_dump_emit_bit_padding(d, off, t->size * 8, align, false, lvl + 1); + + /* + * Keep `struct empty {}` on a single line, +diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c +index 52aa0351533c3..388c9e3ad0407 100644 +--- a/tools/power/acpi/tools/pfrut/pfrut.c ++++ b/tools/power/acpi/tools/pfrut/pfrut.c +@@ -97,7 +97,7 @@ static struct option long_options[] = { + static void parse_options(int argc, char **argv) + { + int option_index = 0; +- char *pathname; ++ char *pathname, *endptr; + int opt; + + pathname = strdup(argv[0]); +@@ -125,11 +125,23 @@ static void parse_options(int argc, char **argv) + log_getinfo = 1; + break; + case 'T': +- log_type = atoi(optarg); ++ log_type = strtol(optarg, &endptr, 0); ++ if (*endptr || (log_type != 0 && log_type != 1)) { ++ printf("Number expected: type(0:execution, 1:history) - Quit.\n"); ++ exit(1); ++ } ++ + set_log_type = 1; + break; + case 'L': +- log_level = atoi(optarg); ++ log_level = strtol(optarg, &endptr, 0); ++ if (*endptr || ++ (log_level != 0 && log_level != 1 && ++ log_level != 2 && log_level != 4)) { ++ printf("Number expected: level(0, 1, 2, 4) - Quit.\n"); ++ exit(1); ++ } ++ + set_log_level = 1; + break; + case 'R': +diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 +index c7b26a3603afe..3e1a4c4be001a 100644 +--- a/tools/power/x86/turbostat/turbostat.8 ++++ b/tools/power/x86/turbostat/turbostat.8 +@@ -344,6 +344,8 @@ Alternatively, non-root users can be enabled to run turbostat this way: + + # chmod +r /dev/cpu/*/msr + ++# chmod +r /dev/cpu_dma_latency ++ + .B "turbostat " + reads hardware counters, but doesn't write them. + So it will not interfere with the OS or other programs, including +diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c +index aba460410dbd1..c61c6c704fbe6 100644 +--- a/tools/power/x86/turbostat/turbostat.c ++++ b/tools/power/x86/turbostat/turbostat.c +@@ -4426,7 +4426,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) + + fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " + "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", +- cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x2) ? "" : "No-"); ++ cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); + + return 0; + } +@@ -5482,7 +5482,7 @@ void print_dev_latency(void) + + retval = read(fd, (void *)&value, sizeof(int)); + if (retval != sizeof(int)) { +- warn("read %s\n", path); ++ warn("read failed %s\n", path); + close(fd); + return; + } +diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c +index e5560a6560309..e01690618e1ee 100644 +--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c ++++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c +@@ -53,7 +53,7 @@ struct bitfields_only_mixed_types { + */ + /* ------ END-EXPECTED-OUTPUT ------ */ + struct bitfield_mixed_with_others { +- long: 4; /* char is enough as a backing field */ ++ char: 4; /* char is enough as a backing field */ + int a: 4; + /* 8-bit implicit padding */ + short b; /* combined with previous bitfield */ +diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c +index e304b6204bd9d..7998f27df7ddd 100644 +--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c ++++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c +@@ -58,7 +58,81 @@ union jump_code_union { + } __attribute__((packed)); + }; + +-/*------ END-EXPECTED-OUTPUT ------ */ ++/* ----- START-EXPECTED-OUTPUT ----- */ ++/* ++ *struct nested_packed_but_aligned_struct { ++ * int x1; ++ * int x2; ++ *}; ++ * ++ *struct outer_implicitly_packed_struct { ++ * char y1; ++ * struct nested_packed_but_aligned_struct y2; ++ *} __attribute__((packed)); ++ * ++ */ ++/* ------ END-EXPECTED-OUTPUT ------ */ ++ ++struct nested_packed_but_aligned_struct { ++ int x1; ++ int x2; ++} __attribute__((packed)); ++ ++struct outer_implicitly_packed_struct { ++ char y1; ++ struct nested_packed_but_aligned_struct y2; ++}; ++/* ----- START-EXPECTED-OUTPUT ----- */ ++/* ++ *struct usb_ss_ep_comp_descriptor { ++ * char: 8; ++ * char bDescriptorType; ++ * char bMaxBurst; ++ * short wBytesPerInterval; ++ *}; ++ * ++ *struct usb_host_endpoint { ++ * long: 64; ++ * char: 8; ++ * struct usb_ss_ep_comp_descriptor ss_ep_comp; ++ * long: 0; ++ *} __attribute__((packed)); ++ * ++ */ ++/* ------ END-EXPECTED-OUTPUT ------ */ ++ ++struct usb_ss_ep_comp_descriptor { ++ char: 8; ++ char bDescriptorType; ++ char bMaxBurst; ++ int: 0; ++ short wBytesPerInterval; ++} __attribute__((packed)); ++ ++struct usb_host_endpoint { ++ long: 64; ++ char: 8; ++ struct usb_ss_ep_comp_descriptor ss_ep_comp; ++ long: 0; ++}; ++ ++/* ----- START-EXPECTED-OUTPUT ----- */ ++struct nested_packed_struct { ++ int a; ++ char b; ++} __attribute__((packed)); ++ ++struct outer_nonpacked_struct { ++ short a; ++ struct nested_packed_struct b; ++}; ++ ++struct outer_packed_struct { ++ short a; ++ struct nested_packed_struct b; ++} __attribute__((packed)); ++ ++/* ------ END-EXPECTED-OUTPUT ------ */ + + int f(struct { + struct packed_trailing_space _1; +@@ -69,6 +143,10 @@ int f(struct { + union union_is_never_packed _6; + union union_does_not_need_packing _7; + union jump_code_union _8; ++ struct outer_implicitly_packed_struct _9; ++ struct usb_host_endpoint _10; ++ struct outer_nonpacked_struct _11; ++ struct outer_packed_struct _12; + } *_) + { + return 0; +diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c +index 7cb522d22a664..79276fbe454a8 100644 +--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c ++++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c +@@ -19,7 +19,7 @@ struct padded_implicitly { + /* + *struct padded_explicitly { + * int a; +- * int: 32; ++ * long: 0; + * int b; + *}; + * +@@ -28,41 +28,28 @@ struct padded_implicitly { + + struct padded_explicitly { + int a; +- int: 1; /* algo will explicitly pad with full 32 bits here */ ++ int: 1; /* algo will emit aligning `long: 0;` here */ + int b; + }; + + /* ----- START-EXPECTED-OUTPUT ----- */ +-/* +- *struct padded_a_lot { +- * int a; +- * long: 32; +- * long: 64; +- * long: 64; +- * int b; +- *}; +- * +- */ +-/* ------ END-EXPECTED-OUTPUT ------ */ +- + struct padded_a_lot { + int a; +- /* 32 bit of implicit padding here, which algo will make explicit */ + long: 64; + long: 64; + int b; + }; + ++/* ------ END-EXPECTED-OUTPUT ------ */ ++ + /* ----- START-EXPECTED-OUTPUT ----- */ + /* + *struct padded_cache_line { + * int a; +- * long: 32; + * long: 64; + * long: 64; + * long: 64; + * int b; +- * long: 32; + * long: 64; + * long: 64; + * long: 64; +@@ -85,7 +72,7 @@ struct padded_cache_line { + *struct zone { + * int a; + * short b; +- * short: 16; ++ * long: 0; + * struct zone_padding __pad__; + *}; + * +@@ -108,6 +95,131 @@ struct padding_wo_named_members { + long: 64; + }; + ++struct padding_weird_1 { ++ int a; ++ long: 64; ++ short: 16; ++ short b; ++}; ++ ++/* ------ END-EXPECTED-OUTPUT ------ */ ++ ++/* ----- START-EXPECTED-OUTPUT ----- */ ++/* ++ *struct padding_weird_2 { ++ * long: 56; ++ * char a; ++ * long: 56; ++ * char b; ++ * char: 8; ++ *}; ++ * ++ */ ++/* ------ END-EXPECTED-OUTPUT ------ */ ++struct padding_weird_2 { ++ int: 32; /* these paddings will be collapsed into `long: 56;` */ ++ short: 16; ++ char: 8; ++ char a; ++ int: 32; /* these paddings will be collapsed into `long: 56;` */ ++ short: 16; ++ char: 8; ++ char b; ++ char: 8; ++}; ++ ++/* ----- START-EXPECTED-OUTPUT ----- */ ++struct exact_1byte { ++ char x; ++}; ++ ++struct padded_1byte { ++ char: 8; ++}; ++ ++struct exact_2bytes { ++ short x; ++}; ++ ++struct padded_2bytes { ++ short: 16; ++}; ++ ++struct exact_4bytes { ++ int x; ++}; ++ ++struct padded_4bytes { ++ int: 32; ++}; ++ ++struct exact_8bytes { ++ long x; ++}; ++ ++struct padded_8bytes { ++ long: 64; ++}; ++ ++struct ff_periodic_effect { ++ int: 32; ++ short magnitude; ++ long: 0; ++ short phase; ++ long: 0; ++ int: 32; ++ int custom_len; ++ short *custom_data; ++}; ++ ++struct ib_wc { ++ long: 64; ++ long: 64; ++ int: 32; ++ int byte_len; ++ void *qp; ++ union {} ex; ++ long: 64; ++ int slid; ++ int wc_flags; ++ long: 64; ++ char smac[6]; ++ long: 0; ++ char network_hdr_type; ++}; ++ ++struct acpi_object_method { ++ long: 64; ++ char: 8; ++ char type; ++ short reference_count; ++ char flags; ++ short: 0; ++ char: 8; ++ char sync_level; ++ long: 64; ++ void *node; ++ void *aml_start; ++ union {} dispatch; ++ long: 64; ++ int aml_length; ++}; ++ ++struct nested_unpacked { ++ int x; ++}; ++ ++struct nested_packed { ++ struct nested_unpacked a; ++ char c; ++} __attribute__((packed)); ++ ++struct outer_mixed_but_unpacked { ++ struct nested_packed b1; ++ short a1; ++ struct nested_packed b2; ++}; ++ + /* ------ END-EXPECTED-OUTPUT ------ */ + + int f(struct { +@@ -117,6 +229,20 @@ int f(struct { + struct padded_cache_line _4; + struct zone _5; + struct padding_wo_named_members _6; ++ struct padding_weird_1 _7; ++ struct padding_weird_2 _8; ++ struct exact_1byte _100; ++ struct padded_1byte _101; ++ struct exact_2bytes _102; ++ struct padded_2bytes _103; ++ struct exact_4bytes _104; ++ struct padded_4bytes _105; ++ struct exact_8bytes _106; ++ struct padded_8bytes _107; ++ struct ff_periodic_effect _200; ++ struct ib_wc _201; ++ struct acpi_object_method _202; ++ struct outer_mixed_but_unpacked _203; + } *_) + { + return 0; |