diff options
author | Mike Pagano <mpagano@gentoo.org> | 2024-06-21 10:06:47 -0400 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2024-06-21 10:06:47 -0400 |
commit | db92f9e09ad91a4aee86ae156867b587718c98d9 (patch) | |
tree | b42f704bc736592c3b0a2047aba0646bff6090e5 | |
parent | Linux patch 6.6.34 (diff) | |
download | linux-patches-db92f9e09ad91a4aee86ae156867b587718c98d9.tar.gz linux-patches-db92f9e09ad91a4aee86ae156867b587718c98d9.tar.bz2 linux-patches-db92f9e09ad91a4aee86ae156867b587718c98d9.zip |
Linux patch 6.6.356.6-42
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1034_linux-6.6.35.patch | 11925 |
2 files changed, 11929 insertions, 0 deletions
diff --git a/0000_README b/0000_README index 1591fcc7..5fa01a61 100644 --- a/0000_README +++ b/0000_README @@ -179,6 +179,10 @@ Patch: 1033_linux-6.6.34.patch From: https://www.kernel.org Desc: Linux 6.6.34 +Patch: 1034_linux-6.6.35.patch +From: https://www.kernel.org +Desc: Linux 6.6.35 + Patch: 1510_fs-enable-link-security-restrictions-by-default.patch From: http://sources.debian.net/src/linux/3.16.7-ckt4-3/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch/ Desc: Enable link security restrictions by default. diff --git a/1034_linux-6.6.35.patch b/1034_linux-6.6.35.patch new file mode 100644 index 00000000..931ef63e --- /dev/null +++ b/1034_linux-6.6.35.patch @@ -0,0 +1,11925 @@ +diff --git a/Makefile b/Makefile +index bc62304cf0b26..3faa2679d9a71 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 6 + PATCHLEVEL = 6 +-SUBLEVEL = 34 ++SUBLEVEL = 35 + EXTRAVERSION = + NAME = Hurr durr I'ma ninja sloth + +diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h +index ba4c05bc24d69..8394718870e1a 100644 +--- a/arch/parisc/include/asm/cacheflush.h ++++ b/arch/parisc/include/asm/cacheflush.h +@@ -31,18 +31,17 @@ void flush_cache_all_local(void); + void flush_cache_all(void); + void flush_cache_mm(struct mm_struct *mm); + +-void flush_kernel_dcache_page_addr(const void *addr); +- + #define flush_kernel_dcache_range(start,size) \ + flush_kernel_dcache_range_asm((start), (start)+(size)); + ++/* The only way to flush a vmap range is to flush whole cache */ + #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 + void flush_kernel_vmap_range(void *vaddr, int size); + void invalidate_kernel_vmap_range(void *vaddr, int size); + +-#define flush_cache_vmap(start, end) flush_cache_all() ++void flush_cache_vmap(unsigned long start, unsigned long end); + #define flush_cache_vmap_early(start, end) do { } while (0) +-#define flush_cache_vunmap(start, end) flush_cache_all() ++void flush_cache_vunmap(unsigned long start, unsigned long end); + + void flush_dcache_folio(struct folio *folio); + #define flush_dcache_folio flush_dcache_folio +@@ -77,17 +76,11 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, + void flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end); + +-/* defined in pacache.S exported in cache.c used by flush_anon_page */ +-void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); +- + #define ARCH_HAS_FLUSH_ANON_PAGE + void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr); + + #define ARCH_HAS_FLUSH_ON_KUNMAP +-static inline void kunmap_flush_on_unmap(const void *addr) +-{ +- flush_kernel_dcache_page_addr(addr); +-} ++void kunmap_flush_on_unmap(const void *addr); + + #endif /* _PARISC_CACHEFLUSH_H */ + +diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h +index 974accac05cd3..babf65751e818 100644 +--- a/arch/parisc/include/asm/pgtable.h ++++ b/arch/parisc/include/asm/pgtable.h +@@ -448,14 +448,17 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) + return pte; + } + ++static inline pte_t ptep_get(pte_t *ptep) ++{ ++ return READ_ONCE(*ptep); ++} ++#define ptep_get ptep_get ++ + static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) + { + pte_t pte; + +- if (!pte_young(*ptep)) +- return 0; +- +- pte = *ptep; ++ pte = ptep_get(ptep); + if (!pte_young(pte)) { + return 0; + } +@@ -463,17 +466,10 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned + return 1; + } + +-struct mm_struct; +-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +-{ +- pte_t old_pte; +- +- old_pte = *ptep; +- set_pte(ptep, __pte(0)); +- +- return old_pte; +-} ++int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); ++pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); + ++struct mm_struct; + static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) + { + set_pte(ptep, pte_wrprotect(*ptep)); +@@ -511,7 +507,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, + #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + + #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR ++#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH ++#define __HAVE_ARCH_PTEP_CLEAR_FLUSH + #define __HAVE_ARCH_PTEP_SET_WRPROTECT + #define __HAVE_ARCH_PTE_SAME + +diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c +index 393822f167270..f7953b0391cf6 100644 +--- a/arch/parisc/kernel/cache.c ++++ b/arch/parisc/kernel/cache.c +@@ -20,6 +20,7 @@ + #include <linux/sched.h> + #include <linux/sched/mm.h> + #include <linux/syscalls.h> ++#include <linux/vmalloc.h> + #include <asm/pdc.h> + #include <asm/cache.h> + #include <asm/cacheflush.h> +@@ -31,20 +32,31 @@ + #include <asm/mmu_context.h> + #include <asm/cachectl.h> + ++#define PTR_PAGE_ALIGN_DOWN(addr) PTR_ALIGN_DOWN(addr, PAGE_SIZE) ++ ++/* ++ * When nonzero, use _PAGE_ACCESSED bit to try to reduce the number ++ * of page flushes done flush_cache_page_if_present. There are some ++ * pros and cons in using this option. It may increase the risk of ++ * random segmentation faults. ++ */ ++#define CONFIG_FLUSH_PAGE_ACCESSED 0 ++ + int split_tlb __ro_after_init; + int dcache_stride __ro_after_init; + int icache_stride __ro_after_init; + EXPORT_SYMBOL(dcache_stride); + ++/* Internal implementation in arch/parisc/kernel/pacache.S */ + void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); + EXPORT_SYMBOL(flush_dcache_page_asm); + void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); + void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr); +- +-/* Internal implementation in arch/parisc/kernel/pacache.S */ + void flush_data_cache_local(void *); /* flushes local data-cache only */ + void flush_instruction_cache_local(void); /* flushes local code-cache only */ + ++static void flush_kernel_dcache_page_addr(const void *addr); ++ + /* On some machines (i.e., ones with the Merced bus), there can be + * only a single PxTLB broadcast at a time; this must be guaranteed + * by software. We need a spinlock around all TLB flushes to ensure +@@ -317,6 +329,18 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, + { + if (!static_branch_likely(&parisc_has_cache)) + return; ++ ++ /* ++ * The TLB is the engine of coherence on parisc. The CPU is ++ * entitled to speculate any page with a TLB mapping, so here ++ * we kill the mapping then flush the page along a special flush ++ * only alias mapping. This guarantees that the page is no-longer ++ * in the cache for any process and nor may it be speculatively ++ * read in (until the user or kernel specifically accesses it, ++ * of course). ++ */ ++ flush_tlb_page(vma, vmaddr); ++ + preempt_disable(); + flush_dcache_page_asm(physaddr, vmaddr); + if (vma->vm_flags & VM_EXEC) +@@ -324,46 +348,44 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, + preempt_enable(); + } + +-static void flush_user_cache_page(struct vm_area_struct *vma, unsigned long vmaddr) ++static void flush_kernel_dcache_page_addr(const void *addr) + { +- unsigned long flags, space, pgd, prot; +-#ifdef CONFIG_TLB_PTLOCK +- unsigned long pgd_lock; +-#endif ++ unsigned long vaddr = (unsigned long)addr; ++ unsigned long flags; + +- vmaddr &= PAGE_MASK; ++ /* Purge TLB entry to remove translation on all CPUs */ ++ purge_tlb_start(flags); ++ pdtlb(SR_KERNEL, addr); ++ purge_tlb_end(flags); + ++ /* Use tmpalias flush to prevent data cache move-in */ + preempt_disable(); ++ flush_dcache_page_asm(__pa(vaddr), vaddr); ++ preempt_enable(); ++} + +- /* Set context for flush */ +- local_irq_save(flags); +- prot = mfctl(8); +- space = mfsp(SR_USER); +- pgd = mfctl(25); +-#ifdef CONFIG_TLB_PTLOCK +- pgd_lock = mfctl(28); +-#endif +- switch_mm_irqs_off(NULL, vma->vm_mm, NULL); +- local_irq_restore(flags); +- +- flush_user_dcache_range_asm(vmaddr, vmaddr + PAGE_SIZE); +- if (vma->vm_flags & VM_EXEC) +- flush_user_icache_range_asm(vmaddr, vmaddr + PAGE_SIZE); +- flush_tlb_page(vma, vmaddr); ++static void flush_kernel_icache_page_addr(const void *addr) ++{ ++ unsigned long vaddr = (unsigned long)addr; ++ unsigned long flags; + +- /* Restore previous context */ +- local_irq_save(flags); +-#ifdef CONFIG_TLB_PTLOCK +- mtctl(pgd_lock, 28); +-#endif +- mtctl(pgd, 25); +- mtsp(space, SR_USER); +- mtctl(prot, 8); +- local_irq_restore(flags); ++ /* Purge TLB entry to remove translation on all CPUs */ ++ purge_tlb_start(flags); ++ pdtlb(SR_KERNEL, addr); ++ purge_tlb_end(flags); + ++ /* Use tmpalias flush to prevent instruction cache move-in */ ++ preempt_disable(); ++ flush_icache_page_asm(__pa(vaddr), vaddr); + preempt_enable(); + } + ++void kunmap_flush_on_unmap(const void *addr) ++{ ++ flush_kernel_dcache_page_addr(addr); ++} ++EXPORT_SYMBOL(kunmap_flush_on_unmap); ++ + void flush_icache_pages(struct vm_area_struct *vma, struct page *page, + unsigned int nr) + { +@@ -371,13 +393,16 @@ void flush_icache_pages(struct vm_area_struct *vma, struct page *page, + + for (;;) { + flush_kernel_dcache_page_addr(kaddr); +- flush_kernel_icache_page(kaddr); ++ flush_kernel_icache_page_addr(kaddr); + if (--nr == 0) + break; + kaddr += PAGE_SIZE; + } + } + ++/* ++ * Walk page directory for MM to find PTEP pointer for address ADDR. ++ */ + static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr) + { + pte_t *ptep = NULL; +@@ -406,6 +431,41 @@ static inline bool pte_needs_flush(pte_t pte) + == (_PAGE_PRESENT | _PAGE_ACCESSED); + } + ++/* ++ * Return user physical address. Returns 0 if page is not present. ++ */ ++static inline unsigned long get_upa(struct mm_struct *mm, unsigned long addr) ++{ ++ unsigned long flags, space, pgd, prot, pa; ++#ifdef CONFIG_TLB_PTLOCK ++ unsigned long pgd_lock; ++#endif ++ ++ /* Save context */ ++ local_irq_save(flags); ++ prot = mfctl(8); ++ space = mfsp(SR_USER); ++ pgd = mfctl(25); ++#ifdef CONFIG_TLB_PTLOCK ++ pgd_lock = mfctl(28); ++#endif ++ ++ /* Set context for lpa_user */ ++ switch_mm_irqs_off(NULL, mm, NULL); ++ pa = lpa_user(addr); ++ ++ /* Restore previous context */ ++#ifdef CONFIG_TLB_PTLOCK ++ mtctl(pgd_lock, 28); ++#endif ++ mtctl(pgd, 25); ++ mtsp(space, SR_USER); ++ mtctl(prot, 8); ++ local_irq_restore(flags); ++ ++ return pa; ++} ++ + void flush_dcache_folio(struct folio *folio) + { + struct address_space *mapping = folio_flush_mapping(folio); +@@ -454,50 +514,23 @@ void flush_dcache_folio(struct folio *folio) + if (addr + nr * PAGE_SIZE > vma->vm_end) + nr = (vma->vm_end - addr) / PAGE_SIZE; + +- if (parisc_requires_coherency()) { +- for (i = 0; i < nr; i++) { +- pte_t *ptep = get_ptep(vma->vm_mm, +- addr + i * PAGE_SIZE); +- if (!ptep) +- continue; +- if (pte_needs_flush(*ptep)) +- flush_user_cache_page(vma, +- addr + i * PAGE_SIZE); +- /* Optimise accesses to the same table? */ +- pte_unmap(ptep); +- } +- } else { ++ if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1)) ++ != (addr & (SHM_COLOUR - 1))) { ++ for (i = 0; i < nr; i++) ++ __flush_cache_page(vma, ++ addr + i * PAGE_SIZE, ++ (pfn + i) * PAGE_SIZE); + /* +- * The TLB is the engine of coherence on parisc: +- * The CPU is entitled to speculate any page +- * with a TLB mapping, so here we kill the +- * mapping then flush the page along a special +- * flush only alias mapping. This guarantees that +- * the page is no-longer in the cache for any +- * process and nor may it be speculatively read +- * in (until the user or kernel specifically +- * accesses it, of course) ++ * Software is allowed to have any number ++ * of private mappings to a page. + */ +- for (i = 0; i < nr; i++) +- flush_tlb_page(vma, addr + i * PAGE_SIZE); +- if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1)) +- != (addr & (SHM_COLOUR - 1))) { +- for (i = 0; i < nr; i++) +- __flush_cache_page(vma, +- addr + i * PAGE_SIZE, +- (pfn + i) * PAGE_SIZE); +- /* +- * Software is allowed to have any number +- * of private mappings to a page. +- */ +- if (!(vma->vm_flags & VM_SHARED)) +- continue; +- if (old_addr) +- pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", +- old_addr, addr, vma->vm_file); +- if (nr == folio_nr_pages(folio)) +- old_addr = addr; +- } ++ if (!(vma->vm_flags & VM_SHARED)) ++ continue; ++ if (old_addr) ++ pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", ++ old_addr, addr, vma->vm_file); ++ if (nr == folio_nr_pages(folio)) ++ old_addr = addr; + } + WARN_ON(++count == 4096); + } +@@ -587,35 +620,28 @@ extern void purge_kernel_dcache_page_asm(unsigned long); + extern void clear_user_page_asm(void *, unsigned long); + extern void copy_user_page_asm(void *, void *, unsigned long); + +-void flush_kernel_dcache_page_addr(const void *addr) +-{ +- unsigned long flags; +- +- flush_kernel_dcache_page_asm(addr); +- purge_tlb_start(flags); +- pdtlb(SR_KERNEL, addr); +- purge_tlb_end(flags); +-} +-EXPORT_SYMBOL(flush_kernel_dcache_page_addr); +- + static void flush_cache_page_if_present(struct vm_area_struct *vma, +- unsigned long vmaddr, unsigned long pfn) ++ unsigned long vmaddr) + { ++#if CONFIG_FLUSH_PAGE_ACCESSED + bool needs_flush = false; +- pte_t *ptep; ++ pte_t *ptep, pte; + +- /* +- * The pte check is racy and sometimes the flush will trigger +- * a non-access TLB miss. Hopefully, the page has already been +- * flushed. +- */ + ptep = get_ptep(vma->vm_mm, vmaddr); + if (ptep) { +- needs_flush = pte_needs_flush(*ptep); ++ pte = ptep_get(ptep); ++ needs_flush = pte_needs_flush(pte); + pte_unmap(ptep); + } + if (needs_flush) +- flush_cache_page(vma, vmaddr, pfn); ++ __flush_cache_page(vma, vmaddr, PFN_PHYS(pte_pfn(pte))); ++#else ++ struct mm_struct *mm = vma->vm_mm; ++ unsigned long physaddr = get_upa(mm, vmaddr); ++ ++ if (physaddr) ++ __flush_cache_page(vma, vmaddr, PAGE_ALIGN_DOWN(physaddr)); ++#endif + } + + void copy_user_highpage(struct page *to, struct page *from, +@@ -625,7 +651,7 @@ void copy_user_highpage(struct page *to, struct page *from, + + kfrom = kmap_local_page(from); + kto = kmap_local_page(to); +- flush_cache_page_if_present(vma, vaddr, page_to_pfn(from)); ++ __flush_cache_page(vma, vaddr, PFN_PHYS(page_to_pfn(from))); + copy_page_asm(kto, kfrom); + kunmap_local(kto); + kunmap_local(kfrom); +@@ -634,16 +660,17 @@ void copy_user_highpage(struct page *to, struct page *from, + void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long user_vaddr, void *dst, void *src, int len) + { +- flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page)); ++ __flush_cache_page(vma, user_vaddr, PFN_PHYS(page_to_pfn(page))); + memcpy(dst, src, len); +- flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); ++ flush_kernel_dcache_page_addr(PTR_PAGE_ALIGN_DOWN(dst)); + } + + void copy_from_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long user_vaddr, void *dst, void *src, int len) + { +- flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page)); ++ __flush_cache_page(vma, user_vaddr, PFN_PHYS(page_to_pfn(page))); + memcpy(dst, src, len); ++ flush_kernel_dcache_page_addr(PTR_PAGE_ALIGN_DOWN(src)); + } + + /* __flush_tlb_range() +@@ -677,32 +704,10 @@ int __flush_tlb_range(unsigned long sid, unsigned long start, + + static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end) + { +- unsigned long addr, pfn; +- pte_t *ptep; +- +- for (addr = start; addr < end; addr += PAGE_SIZE) { +- bool needs_flush = false; +- /* +- * The vma can contain pages that aren't present. Although +- * the pte search is expensive, we need the pte to find the +- * page pfn and to check whether the page should be flushed. +- */ +- ptep = get_ptep(vma->vm_mm, addr); +- if (ptep) { +- needs_flush = pte_needs_flush(*ptep); +- pfn = pte_pfn(*ptep); +- pte_unmap(ptep); +- } +- if (needs_flush) { +- if (parisc_requires_coherency()) { +- flush_user_cache_page(vma, addr); +- } else { +- if (WARN_ON(!pfn_valid(pfn))) +- return; +- __flush_cache_page(vma, addr, PFN_PHYS(pfn)); +- } +- } +- } ++ unsigned long addr; ++ ++ for (addr = start; addr < end; addr += PAGE_SIZE) ++ flush_cache_page_if_present(vma, addr); + } + + static inline unsigned long mm_total_size(struct mm_struct *mm) +@@ -753,21 +758,19 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned + if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) + return; + flush_tlb_range(vma, start, end); +- flush_cache_all(); ++ if (vma->vm_flags & VM_EXEC) ++ flush_cache_all(); ++ else ++ flush_data_cache(); + return; + } + +- flush_cache_pages(vma, start, end); ++ flush_cache_pages(vma, start & PAGE_MASK, end); + } + + void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) + { +- if (WARN_ON(!pfn_valid(pfn))) +- return; +- if (parisc_requires_coherency()) +- flush_user_cache_page(vma, vmaddr); +- else +- __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); ++ __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); + } + + void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) +@@ -775,34 +778,133 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned lon + if (!PageAnon(page)) + return; + +- if (parisc_requires_coherency()) { +- if (vma->vm_flags & VM_SHARED) +- flush_data_cache(); +- else +- flush_user_cache_page(vma, vmaddr); ++ __flush_cache_page(vma, vmaddr, PFN_PHYS(page_to_pfn(page))); ++} ++ ++int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, ++ pte_t *ptep) ++{ ++ pte_t pte = ptep_get(ptep); ++ ++ if (!pte_young(pte)) ++ return 0; ++ set_pte(ptep, pte_mkold(pte)); ++#if CONFIG_FLUSH_PAGE_ACCESSED ++ __flush_cache_page(vma, addr, PFN_PHYS(pte_pfn(pte))); ++#endif ++ return 1; ++} ++ ++/* ++ * After a PTE is cleared, we have no way to flush the cache for ++ * the physical page. On PA8800 and PA8900 processors, these lines ++ * can cause random cache corruption. Thus, we must flush the cache ++ * as well as the TLB when clearing a PTE that's valid. ++ */ ++pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, ++ pte_t *ptep) ++{ ++ struct mm_struct *mm = (vma)->vm_mm; ++ pte_t pte = ptep_get_and_clear(mm, addr, ptep); ++ unsigned long pfn = pte_pfn(pte); ++ ++ if (pfn_valid(pfn)) ++ __flush_cache_page(vma, addr, PFN_PHYS(pfn)); ++ else if (pte_accessible(mm, pte)) ++ flush_tlb_page(vma, addr); ++ ++ return pte; ++} ++ ++/* ++ * The physical address for pages in the ioremap case can be obtained ++ * from the vm_struct struct. I wasn't able to successfully handle the ++ * vmalloc and vmap cases. We have an array of struct page pointers in ++ * the uninitialized vmalloc case but the flush failed using page_to_pfn. ++ */ ++void flush_cache_vmap(unsigned long start, unsigned long end) ++{ ++ unsigned long addr, physaddr; ++ struct vm_struct *vm; ++ ++ /* Prevent cache move-in */ ++ flush_tlb_kernel_range(start, end); ++ ++ if (end - start >= parisc_cache_flush_threshold) { ++ flush_cache_all(); + return; + } + +- flush_tlb_page(vma, vmaddr); +- preempt_disable(); +- flush_dcache_page_asm(page_to_phys(page), vmaddr); +- preempt_enable(); ++ if (WARN_ON_ONCE(!is_vmalloc_addr((void *)start))) { ++ flush_cache_all(); ++ return; ++ } ++ ++ vm = find_vm_area((void *)start); ++ if (WARN_ON_ONCE(!vm)) { ++ flush_cache_all(); ++ return; ++ } ++ ++ /* The physical addresses of IOREMAP regions are contiguous */ ++ if (vm->flags & VM_IOREMAP) { ++ physaddr = vm->phys_addr; ++ for (addr = start; addr < end; addr += PAGE_SIZE) { ++ preempt_disable(); ++ flush_dcache_page_asm(physaddr, start); ++ flush_icache_page_asm(physaddr, start); ++ preempt_enable(); ++ physaddr += PAGE_SIZE; ++ } ++ return; ++ } ++ ++ flush_cache_all(); + } ++EXPORT_SYMBOL(flush_cache_vmap); + ++/* ++ * The vm_struct has been retired and the page table is set up. The ++ * last page in the range is a guard page. Its physical address can't ++ * be determined using lpa, so there is no way to flush the range ++ * using flush_dcache_page_asm. ++ */ ++void flush_cache_vunmap(unsigned long start, unsigned long end) ++{ ++ /* Prevent cache move-in */ ++ flush_tlb_kernel_range(start, end); ++ flush_data_cache(); ++} ++EXPORT_SYMBOL(flush_cache_vunmap); ++ ++/* ++ * On systems with PA8800/PA8900 processors, there is no way to flush ++ * a vmap range other than using the architected loop to flush the ++ * entire cache. The page directory is not set up, so we can't use ++ * fdc, etc. FDCE/FICE don't work to flush a portion of the cache. ++ * L2 is physically indexed but FDCE/FICE instructions in virtual ++ * mode output their virtual address on the core bus, not their ++ * real address. As a result, the L2 cache index formed from the ++ * virtual address will most likely not be the same as the L2 index ++ * formed from the real address. ++ */ + void flush_kernel_vmap_range(void *vaddr, int size) + { + unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; + +- if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && +- (unsigned long)size >= parisc_cache_flush_threshold) { +- flush_tlb_kernel_range(start, end); +- flush_data_cache(); ++ flush_tlb_kernel_range(start, end); ++ ++ if (!static_branch_likely(&parisc_has_dcache)) ++ return; ++ ++ /* If interrupts are disabled, we can only do local flush */ ++ if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) { ++ flush_data_cache_local(NULL); + return; + } + +- flush_kernel_dcache_range_asm(start, end); +- flush_tlb_kernel_range(start, end); ++ flush_data_cache(); + } + EXPORT_SYMBOL(flush_kernel_vmap_range); + +@@ -814,15 +916,18 @@ void invalidate_kernel_vmap_range(void *vaddr, int size) + /* Ensure DMA is complete */ + asm_syncdma(); + +- if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && +- (unsigned long)size >= parisc_cache_flush_threshold) { +- flush_tlb_kernel_range(start, end); +- flush_data_cache(); ++ flush_tlb_kernel_range(start, end); ++ ++ if (!static_branch_likely(&parisc_has_dcache)) ++ return; ++ ++ /* If interrupts are disabled, we can only do local flush */ ++ if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) { ++ flush_data_cache_local(NULL); + return; + } + +- purge_kernel_dcache_range_asm(start, end); +- flush_tlb_kernel_range(start, end); ++ flush_data_cache(); + } + EXPORT_SYMBOL(invalidate_kernel_vmap_range); + +diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h +index 4c96de9cd1e99..ccc91bf9b0342 100644 +--- a/arch/powerpc/include/asm/uaccess.h ++++ b/arch/powerpc/include/asm/uaccess.h +@@ -92,9 +92,25 @@ __pu_failed: \ + : label) + #endif + ++#ifdef CONFIG_CC_IS_CLANG ++#define DS_FORM_CONSTRAINT "Z<>" ++#else ++#define DS_FORM_CONSTRAINT "YZ<>" ++#endif ++ + #ifdef __powerpc64__ ++#ifdef CONFIG_PPC_KERNEL_PREFIXED + #define __put_user_asm2_goto(x, ptr, label) \ + __put_user_asm_goto(x, ptr, label, "std") ++#else ++#define __put_user_asm2_goto(x, addr, label) \ ++ asm goto ("1: std%U1%X1 %0,%1 # put_user\n" \ ++ EX_TABLE(1b, %l2) \ ++ : \ ++ : "r" (x), DS_FORM_CONSTRAINT (*addr) \ ++ : \ ++ : label) ++#endif // CONFIG_PPC_KERNEL_PREFIXED + #else /* __powerpc64__ */ + #define __put_user_asm2_goto(x, addr, label) \ + asm goto( \ +diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c +index 0eb689351b7d0..5cd407c6a8e4f 100644 +--- a/arch/riscv/kvm/aia_device.c ++++ b/arch/riscv/kvm/aia_device.c +@@ -237,10 +237,11 @@ static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr) + + static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr) + { +- u32 hart, group = 0; ++ u32 hart = 0, group = 0; + +- hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) & +- GENMASK_ULL(aia->nr_hart_bits - 1, 0); ++ if (aia->nr_hart_bits) ++ hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) & ++ GENMASK_ULL(aia->nr_hart_bits - 1, 0); + if (aia->nr_group_bits) + group = (addr >> aia->nr_group_shift) & + GENMASK_ULL(aia->nr_group_bits - 1, 0); +diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c +index b7e0e03c69b1e..d520b25d85616 100644 +--- a/arch/riscv/kvm/vcpu_onereg.c ++++ b/arch/riscv/kvm/vcpu_onereg.c +@@ -614,9 +614,9 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu, + switch (reg_subtype) { + case KVM_REG_RISCV_ISA_SINGLE: + return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val); +- case KVM_REG_RISCV_SBI_MULTI_EN: ++ case KVM_REG_RISCV_ISA_MULTI_EN: + return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true); +- case KVM_REG_RISCV_SBI_MULTI_DIS: ++ case KVM_REG_RISCV_ISA_MULTI_DIS: + return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false); + default: + return -ENOENT; +diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c +index ec02ea86aa39f..83ac1eb8e7e68 100644 +--- a/arch/riscv/mm/init.c ++++ b/arch/riscv/mm/init.c +@@ -234,18 +234,19 @@ static void __init setup_bootmem(void) + kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; + + /* +- * memblock allocator is not aware of the fact that last 4K bytes of +- * the addressable memory can not be mapped because of IS_ERR_VALUE +- * macro. Make sure that last 4k bytes are not usable by memblock +- * if end of dram is equal to maximum addressable memory. For 64-bit +- * kernel, this problem can't happen here as the end of the virtual +- * address space is occupied by the kernel mapping then this check must +- * be done as soon as the kernel mapping base address is determined. ++ * Reserve physical address space that would be mapped to virtual ++ * addresses greater than (void *)(-PAGE_SIZE) because: ++ * - This memory would overlap with ERR_PTR ++ * - This memory belongs to high memory, which is not supported ++ * ++ * This is not applicable to 64-bit kernel, because virtual addresses ++ * after (void *)(-PAGE_SIZE) are not linearly mapped: they are ++ * occupied by kernel mapping. Also it is unrealistic for high memory ++ * to exist on 64-bit platforms. + */ + if (!IS_ENABLED(CONFIG_64BIT)) { +- max_mapped_addr = __pa(~(ulong)0); +- if (max_mapped_addr == (phys_ram_end - 1)) +- memblock_set_current_limit(max_mapped_addr - 4096); ++ max_mapped_addr = __va_to_pa_nodebug(-PAGE_SIZE); ++ memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr); + } + + min_low_pfn = PFN_UP(phys_ram_base); +diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c +index 01398fee5cf82..f61b2f8291e35 100644 +--- a/arch/riscv/mm/pageattr.c ++++ b/arch/riscv/mm/pageattr.c +@@ -387,17 +387,33 @@ int set_direct_map_default_noflush(struct page *page) + } + + #ifdef CONFIG_DEBUG_PAGEALLOC ++static int debug_pagealloc_set_page(pte_t *pte, unsigned long addr, void *data) ++{ ++ int enable = *(int *)data; ++ ++ unsigned long val = pte_val(ptep_get(pte)); ++ ++ if (enable) ++ val |= _PAGE_PRESENT; ++ else ++ val &= ~_PAGE_PRESENT; ++ ++ set_pte(pte, __pte(val)); ++ ++ return 0; ++} ++ + void __kernel_map_pages(struct page *page, int numpages, int enable) + { + if (!debug_pagealloc_enabled()) + return; + +- if (enable) +- __set_memory((unsigned long)page_address(page), numpages, +- __pgprot(_PAGE_PRESENT), __pgprot(0)); +- else +- __set_memory((unsigned long)page_address(page), numpages, +- __pgprot(0), __pgprot(_PAGE_PRESENT)); ++ unsigned long start = (unsigned long)page_address(page); ++ unsigned long size = PAGE_SIZE * numpages; ++ ++ apply_to_existing_page_range(&init_mm, start, size, debug_pagealloc_set_page, &enable); ++ ++ flush_tlb_kernel_range(start, start + size); + } + #endif + +diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile +index 583c11664c63b..658e9ec065c47 100644 +--- a/arch/x86/boot/compressed/Makefile ++++ b/arch/x86/boot/compressed/Makefile +@@ -116,9 +116,9 @@ vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o + + vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o + vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o +-vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a ++vmlinux-libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a + +-$(obj)/vmlinux: $(vmlinux-objs-y) FORCE ++$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE + $(call if_changed,ld) + + OBJCOPYFLAGS_vmlinux.bin := -R .comment -S +diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c +index c4ea5258ab558..9049f390d8347 100644 +--- a/arch/x86/boot/main.c ++++ b/arch/x86/boot/main.c +@@ -119,8 +119,8 @@ static void init_heap(void) + char *stack_end; + + if (boot_params.hdr.loadflags & CAN_USE_HEAP) { +- asm("leal %P1(%%esp),%0" +- : "=r" (stack_end) : "i" (-STACK_SIZE)); ++ asm("leal %n1(%%esp),%0" ++ : "=r" (stack_end) : "i" (STACK_SIZE)); + + heap_end = (char *) + ((size_t)boot_params.hdr.heap_end_ptr + 0x200); +diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h +index 65f79092c9d9e..cb9ce0f9e78e0 100644 +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -288,10 +288,10 @@ static inline int alternatives_text_reserved(void *start, void *end) + * Otherwise, if CPU has feature1, newinstr1 is used. + * Otherwise, oldinstr is used. + */ +-#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \ +- ft_flags2, input...) \ +- asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \ +- newinstr2, ft_flags2) \ ++#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \ ++ ft_flags2, input...) \ ++ asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \ ++ newinstr2, ft_flags2) \ + : : "i" (0), ## input) + + /* Like alternative_input, but with a single output argument */ +@@ -301,7 +301,7 @@ static inline int alternatives_text_reserved(void *start, void *end) + + /* Like alternative_io, but for replacing a direct call with another one. */ + #define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \ +- asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", ft_flags) \ ++ asm_inline volatile (ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \ + : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) + + /* +@@ -310,12 +310,12 @@ static inline int alternatives_text_reserved(void *start, void *end) + * Otherwise, if CPU has feature1, function1 is used. + * Otherwise, old function is used. + */ +-#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ +- output, input...) \ +- asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", ft_flags1,\ +- "call %P[new2]", ft_flags2) \ +- : output, ASM_CALL_CONSTRAINT \ +- : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ ++#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ ++ output, input...) \ ++ asm_inline volatile (ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \ ++ "call %c[new2]", ft_flags2) \ ++ : output, ASM_CALL_CONSTRAINT \ ++ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ + [new2] "i" (newfunc2), ## input) + + /* +diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h +index 3486d91b8595f..d510405e4e1de 100644 +--- a/arch/x86/include/asm/atomic64_32.h ++++ b/arch/x86/include/asm/atomic64_32.h +@@ -24,7 +24,7 @@ typedef struct { + + #ifdef CONFIG_X86_CMPXCHG64 + #define __alternative_atomic64(f, g, out, in...) \ +- asm volatile("call %P[func]" \ ++ asm volatile("call %c[func]" \ + : out : [func] "i" (atomic64_##g##_cx8), ## in) + + #define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8) +diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h +index 686e92d2663ee..3508f3fc928d4 100644 +--- a/arch/x86/include/asm/cpufeature.h ++++ b/arch/x86/include/asm/cpufeature.h +@@ -173,7 +173,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); + static __always_inline bool _static_cpu_has(u16 bit) + { + asm goto( +- ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") ++ ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]") + ".pushsection .altinstr_aux,\"ax\"\n" + "6:\n" + " testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n" +diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h +index 798183867d789..b71ad173f8776 100644 +--- a/arch/x86/include/asm/irq_stack.h ++++ b/arch/x86/include/asm/irq_stack.h +@@ -100,7 +100,7 @@ + } + + #define ASM_CALL_ARG0 \ +- "call %P[__func] \n" \ ++ "call %c[__func] \n" \ + ASM_REACHABLE + + #define ASM_CALL_ARG1 \ +diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h +index 237dc8cdd12b9..3a7755c1a4410 100644 +--- a/arch/x86/include/asm/uaccess.h ++++ b/arch/x86/include/asm/uaccess.h +@@ -78,10 +78,10 @@ extern int __get_user_bad(void); + int __ret_gu; \ + register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ + __chk_user_ptr(ptr); \ +- asm volatile("call __" #fn "_%P4" \ ++ asm volatile("call __" #fn "_%c[size]" \ + : "=a" (__ret_gu), "=r" (__val_gu), \ + ASM_CALL_CONSTRAINT \ +- : "0" (ptr), "i" (sizeof(*(ptr)))); \ ++ : "0" (ptr), [size] "i" (sizeof(*(ptr)))); \ + instrument_get_user(__val_gu); \ + (x) = (__force __typeof__(*(ptr))) __val_gu; \ + __builtin_expect(__ret_gu, 0); \ +@@ -177,7 +177,7 @@ extern void __put_user_nocheck_8(void); + __chk_user_ptr(__ptr); \ + __ptr_pu = __ptr; \ + __val_pu = __x; \ +- asm volatile("call __" #fn "_%P[size]" \ ++ asm volatile("call __" #fn "_%c[size]" \ + : "=c" (__ret_pu), \ + ASM_CALL_CONSTRAINT \ + : "0" (__ptr_pu), \ +diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c +index cab4d8b1535d6..6f1cc7f1b202a 100644 +--- a/arch/x86/kernel/amd_nb.c ++++ b/arch/x86/kernel/amd_nb.c +@@ -209,7 +209,14 @@ static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write) + + int amd_smn_read(u16 node, u32 address, u32 *value) + { +- return __amd_smn_rw(node, address, value, false); ++ int err = __amd_smn_rw(node, address, value, false); ++ ++ if (PCI_POSSIBLE_ERROR(*value)) { ++ err = -ENODEV; ++ *value = 0; ++ } ++ ++ return err; + } + EXPORT_SYMBOL_GPL(amd_smn_read); + +diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c +index 1a3e2c05a8a5b..d287fe290c9ab 100644 +--- a/arch/x86/kernel/machine_kexec_64.c ++++ b/arch/x86/kernel/machine_kexec_64.c +@@ -298,8 +298,15 @@ void machine_kexec_cleanup(struct kimage *image) + void machine_kexec(struct kimage *image) + { + unsigned long page_list[PAGES_NR]; +- void *control_page; ++ unsigned int host_mem_enc_active; + int save_ftrace_enabled; ++ void *control_page; ++ ++ /* ++ * This must be done before load_segments() since if call depth tracking ++ * is used then GS must be valid to make any function calls. ++ */ ++ host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT); + + #ifdef CONFIG_KEXEC_JUMP + if (image->preserve_context) +@@ -361,7 +368,7 @@ void machine_kexec(struct kimage *image) + (unsigned long)page_list, + image->start, + image->preserve_context, +- cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)); ++ host_mem_enc_active); + + #ifdef CONFIG_KEXEC_JUMP + if (image->preserve_context) +diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c +index c5845f31c34dc..99e72b8a96ac0 100644 +--- a/arch/x86/kvm/svm/sev.c ++++ b/arch/x86/kvm/svm/sev.c +@@ -664,6 +664,14 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, + return ret; + + vcpu->arch.guest_state_protected = true; ++ ++ /* ++ * SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it ++ * only after setting guest_state_protected because KVM_SET_MSRS allows ++ * dynamic toggling of LBRV (for performance reason) on write access to ++ * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set. ++ */ ++ svm_enable_lbrv(vcpu); + return 0; + } + +@@ -2264,6 +2272,12 @@ void __init sev_hardware_setup(void) + if (!boot_cpu_has(X86_FEATURE_SEV_ES)) + goto out; + ++ if (!lbrv) { ++ WARN_ONCE(!boot_cpu_has(X86_FEATURE_LBRV), ++ "LBRV must be present for SEV-ES support"); ++ goto out; ++ } ++ + /* Has the system been allocated ASIDs for SEV-ES? */ + if (min_sev_asid == 1) + goto out; +@@ -2988,6 +3002,25 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) + + set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); + } ++ ++ /* ++ * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if ++ * the host/guest supports its use. ++ * ++ * guest_can_use() checks a number of requirements on the host/guest to ++ * ensure that MSR_IA32_XSS is available, but it might report true even ++ * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host ++ * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better ++ * to further check that the guest CPUID actually supports ++ * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved ++ * guests will still get intercepted and caught in the normal ++ * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths. ++ */ ++ if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && ++ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) ++ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1); ++ else ++ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0); + } + + void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) +@@ -3010,7 +3043,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) + struct kvm_vcpu *vcpu = &svm->vcpu; + + svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; +- svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; + + /* + * An SEV-ES guest requires a VMSA area that is a separate from the +@@ -3062,10 +3094,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) + /* Clear intercepts on selected MSRs */ + set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1); +- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); +- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); +- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); +- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); + } + + void sev_init_vmcb(struct vcpu_svm *svm) +diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c +index 1efbe8b33f6a1..e3c2acc1adc73 100644 +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -99,10 +99,12 @@ static const struct svm_direct_access_msrs { + { .index = MSR_IA32_SPEC_CTRL, .always = false }, + { .index = MSR_IA32_PRED_CMD, .always = false }, + { .index = MSR_IA32_FLUSH_CMD, .always = false }, ++ { .index = MSR_IA32_DEBUGCTLMSR, .always = false }, + { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, + { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, + { .index = MSR_IA32_LASTINTFROMIP, .always = false }, + { .index = MSR_IA32_LASTINTTOIP, .always = false }, ++ { .index = MSR_IA32_XSS, .always = false }, + { .index = MSR_EFER, .always = false }, + { .index = MSR_IA32_CR_PAT, .always = false }, + { .index = MSR_AMD64_SEV_ES_GHCB, .always = true }, +@@ -214,7 +216,7 @@ int vgif = true; + module_param(vgif, int, 0444); + + /* enable/disable LBR virtualization */ +-static int lbrv = true; ++int lbrv = true; + module_param(lbrv, int, 0444); + + static int tsc_scaling = true; +@@ -1007,7 +1009,7 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb) + vmcb_mark_dirty(to_vmcb, VMCB_LBR); + } + +-static void svm_enable_lbrv(struct kvm_vcpu *vcpu) ++void svm_enable_lbrv(struct kvm_vcpu *vcpu) + { + struct vcpu_svm *svm = to_svm(vcpu); + +@@ -1017,6 +1019,9 @@ static void svm_enable_lbrv(struct kvm_vcpu *vcpu) + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); + ++ if (sev_es_guest(vcpu->kvm)) ++ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1); ++ + /* Move the LBR msrs to the vmcb02 so that the guest can see them. */ + if (is_guest_mode(vcpu)) + svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr); +@@ -1026,6 +1031,8 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu) + { + struct vcpu_svm *svm = to_svm(vcpu); + ++ KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm); ++ + svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); +@@ -5248,6 +5255,12 @@ static __init int svm_hardware_setup(void) + + nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS); + ++ if (lbrv) { ++ if (!boot_cpu_has(X86_FEATURE_LBRV)) ++ lbrv = false; ++ else ++ pr_info("LBR virtualization supported\n"); ++ } + /* + * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which + * may be modified by svm_adjust_mmio_mask()), as well as nrips. +@@ -5301,14 +5314,6 @@ static __init int svm_hardware_setup(void) + svm_x86_ops.set_vnmi_pending = NULL; + } + +- +- if (lbrv) { +- if (!boot_cpu_has(X86_FEATURE_LBRV)) +- lbrv = false; +- else +- pr_info("LBR virtualization supported\n"); +- } +- + if (!enable_pmu) + pr_info("PMU virtualization is disabled\n"); + +diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h +index be67ab7fdd104..37ada9808d9b5 100644 +--- a/arch/x86/kvm/svm/svm.h ++++ b/arch/x86/kvm/svm/svm.h +@@ -30,7 +30,7 @@ + #define IOPM_SIZE PAGE_SIZE * 3 + #define MSRPM_SIZE PAGE_SIZE * 2 + +-#define MAX_DIRECT_ACCESS_MSRS 46 ++#define MAX_DIRECT_ACCESS_MSRS 48 + #define MSRPM_OFFSETS 32 + extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; + extern bool npt_enabled; +@@ -39,6 +39,7 @@ extern int vgif; + extern bool intercept_smi; + extern bool x2avic_enabled; + extern bool vnmi; ++extern int lbrv; + + /* + * Clean bits in VMCB. +@@ -541,6 +542,7 @@ u32 *svm_vcpu_alloc_msrpm(void); + void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm); + void svm_vcpu_free_msrpm(u32 *msrpm); + void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb); ++void svm_enable_lbrv(struct kvm_vcpu *vcpu); + void svm_update_lbrv(struct kvm_vcpu *vcpu); + + int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer); +diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S +index f6aad480febd3..6913fbce6544f 100644 +--- a/arch/x86/lib/getuser.S ++++ b/arch/x86/lib/getuser.S +@@ -44,7 +44,11 @@ + or %rdx, %rax + .else + cmp $TASK_SIZE_MAX-\size+1, %eax ++.if \size != 8 + jae .Lbad_get_user ++.else ++ jae .Lbad_get_user_8 ++.endif + sbb %edx, %edx /* array_index_mask_nospec() */ + and %edx, %eax + .endif +@@ -154,7 +158,7 @@ SYM_CODE_END(__get_user_handle_exception) + #ifdef CONFIG_X86_32 + SYM_CODE_START_LOCAL(__get_user_8_handle_exception) + ASM_CLAC +-bad_get_user_8: ++.Lbad_get_user_8: + xor %edx,%edx + xor %ecx,%ecx + mov $(-EFAULT),%_ASM_AX +diff --git a/block/blk-flush.c b/block/blk-flush.c +index e73dc22d05c1d..313f0ffcce42e 100644 +--- a/block/blk-flush.c ++++ b/block/blk-flush.c +@@ -183,7 +183,7 @@ static void blk_flush_complete_seq(struct request *rq, + /* queue for flush */ + if (list_empty(pending)) + fq->flush_pending_since = jiffies; +- list_move_tail(&rq->queuelist, pending); ++ list_add_tail(&rq->queuelist, pending); + break; + + case REQ_FSEQ_DATA: +@@ -261,6 +261,7 @@ static enum rq_end_io_ret flush_end_io(struct request *flush_rq, + unsigned int seq = blk_flush_cur_seq(rq); + + BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH); ++ list_del_init(&rq->queuelist); + blk_flush_complete_seq(rq, fq, seq, error); + } + +diff --git a/block/sed-opal.c b/block/sed-opal.c +index e27109be77690..1a1cb35bf4b79 100644 +--- a/block/sed-opal.c ++++ b/block/sed-opal.c +@@ -313,7 +313,7 @@ static int read_sed_opal_key(const char *key_name, u_char *buffer, int buflen) + &key_type_user, key_name, true); + + if (IS_ERR(kref)) +- ret = PTR_ERR(kref); ++ return PTR_ERR(kref); + + key = key_ref_to_ptr(kref); + down_read(&key->sem); +diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c +index c708524576df4..ac05e2557435e 100644 +--- a/drivers/acpi/x86/utils.c ++++ b/drivers/acpi/x86/utils.c +@@ -198,16 +198,16 @@ bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *s + } + + /* +- * AMD systems from Renoir and Lucienne *require* that the NVME controller ++ * AMD systems from Renoir onwards *require* that the NVME controller + * is put into D3 over a Modern Standby / suspend-to-idle cycle. + * + * This is "typically" accomplished using the `StorageD3Enable` + * property in the _DSD that is checked via the `acpi_storage_d3` function +- * but this property was introduced after many of these systems launched +- * and most OEM systems don't have it in their BIOS. ++ * but some OEM systems still don't have it in their BIOS. + * + * The Microsoft documentation for StorageD3Enable mentioned that Windows has +- * a hardcoded allowlist for D3 support, which was used for these platforms. ++ * a hardcoded allowlist for D3 support as well as a registry key to override ++ * the BIOS, which has been used for these cases. + * + * This allows quirking on Linux in a similar fashion. + * +@@ -220,19 +220,15 @@ bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *s + * https://bugzilla.kernel.org/show_bug.cgi?id=216773 + * https://bugzilla.kernel.org/show_bug.cgi?id=217003 + * 2) On at least one HP system StorageD3Enable is missing on the second NVME +- disk in the system. ++ * disk in the system. ++ * 3) On at least one HP Rembrandt system StorageD3Enable is missing on the only ++ * NVME device. + */ +-static const struct x86_cpu_id storage_d3_cpu_ids[] = { +- X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 24, NULL), /* Picasso */ +- X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 96, NULL), /* Renoir */ +- X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 104, NULL), /* Lucienne */ +- X86_MATCH_VENDOR_FAM_MODEL(AMD, 25, 80, NULL), /* Cezanne */ +- {} +-}; +- + bool force_storage_d3(void) + { +- return x86_match_cpu(storage_d3_cpu_ids); ++ if (!cpu_feature_enabled(X86_FEATURE_ZEN)) ++ return false; ++ return acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0; + } + + /* +diff --git a/drivers/base/core.c b/drivers/base/core.c +index 0214288765c8c..aeb4644817d57 100644 +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -2664,8 +2664,11 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, + if (!env) + return -ENOMEM; + ++ /* Synchronize with really_probe() */ ++ device_lock(dev); + /* let the kset specific function add its keys */ + retval = kset->uevent_ops->uevent(&dev->kobj, env); ++ device_unlock(dev); + if (retval) + goto out; + +diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c +index 55c5b48bc276f..36bd191998375 100644 +--- a/drivers/block/null_blk/zoned.c ++++ b/drivers/block/null_blk/zoned.c +@@ -112,7 +112,7 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) + if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) { + dev->zone_max_open = dev->zone_max_active; + pr_info("changed the maximum number of open zones to %u\n", +- dev->nr_zones); ++ dev->zone_max_open); + } else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) { + dev->zone_max_open = 0; + pr_info("zone_max_open limit disabled, limit >= zone count\n"); +diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c +index ee37d0be6877d..9cd80522ca2d7 100644 +--- a/drivers/clk/clkdev.c ++++ b/drivers/clk/clkdev.c +@@ -144,7 +144,7 @@ void clkdev_add_table(struct clk_lookup *cl, size_t num) + mutex_unlock(&clocks_mutex); + } + +-#define MAX_DEV_ID 20 ++#define MAX_DEV_ID 24 + #define MAX_CON_ID 16 + + struct clk_lookup_alloc { +diff --git a/drivers/clk/sifive/sifive-prci.c b/drivers/clk/sifive/sifive-prci.c +index af81eb835bc23..b1be6a2d24aa9 100644 +--- a/drivers/clk/sifive/sifive-prci.c ++++ b/drivers/clk/sifive/sifive-prci.c +@@ -4,7 +4,6 @@ + * Copyright (C) 2020 Zong Li + */ + +-#include <linux/clkdev.h> + #include <linux/delay.h> + #include <linux/io.h> + #include <linux/of.h> +@@ -536,13 +535,6 @@ static int __prci_register_clocks(struct device *dev, struct __prci_data *pd, + return r; + } + +- r = clk_hw_register_clkdev(&pic->hw, pic->name, dev_name(dev)); +- if (r) { +- dev_warn(dev, "Failed to register clkdev for %s: %d\n", +- init.name, r); +- return r; +- } +- + pd->hw_clks.hws[i] = &pic->hw; + } + +diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c +index c65ab42546238..7a646fed17211 100644 +--- a/drivers/cxl/core/region.c ++++ b/drivers/cxl/core/region.c +@@ -2186,15 +2186,6 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, + struct device *dev; + int rc; + +- switch (mode) { +- case CXL_DECODER_RAM: +- case CXL_DECODER_PMEM: +- break; +- default: +- dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); +- return ERR_PTR(-EINVAL); +- } +- + cxlr = cxl_region_alloc(cxlrd, id); + if (IS_ERR(cxlr)) + return cxlr; +@@ -2245,6 +2236,15 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, + { + int rc; + ++ switch (mode) { ++ case CXL_DECODER_RAM: ++ case CXL_DECODER_PMEM: ++ break; ++ default: ++ dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); ++ return ERR_PTR(-EINVAL); ++ } ++ + rc = memregion_alloc(GFP_KERNEL); + if (rc < 0) + return ERR_PTR(rc); +diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c +index b7c6f7ea9e0c8..6a1bfcd0cc210 100644 +--- a/drivers/dma-buf/st-dma-fence.c ++++ b/drivers/dma-buf/st-dma-fence.c +@@ -540,6 +540,12 @@ static int race_signal_callback(void *arg) + t[i].before = pass; + t[i].task = kthread_run(thread_signal_callback, &t[i], + "dma-fence:%d", i); ++ if (IS_ERR(t[i].task)) { ++ ret = PTR_ERR(t[i].task); ++ while (--i >= 0) ++ kthread_stop_put(t[i].task); ++ return ret; ++ } + get_task_struct(t[i].task); + } + +diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c +index fc7cdad371616..4f426be286884 100644 +--- a/drivers/dma/dma-axi-dmac.c ++++ b/drivers/dma/dma-axi-dmac.c +@@ -1033,8 +1033,8 @@ static int axi_dmac_remove(struct platform_device *pdev) + { + struct axi_dmac *dmac = platform_get_drvdata(pdev); + +- of_dma_controller_free(pdev->dev.of_node); + free_irq(dmac->irq, dmac); ++ of_dma_controller_free(pdev->dev.of_node); + tasklet_kill(&dmac->chan.vchan.task); + dma_async_device_unregister(&dmac->dma_dev); + clk_disable_unprepare(dmac->clk); +diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c +index ff7c155239e31..7af59985f1c1f 100644 +--- a/drivers/firmware/qcom_scm.c ++++ b/drivers/firmware/qcom_scm.c +@@ -498,13 +498,14 @@ int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, size_t size, + + ret = qcom_scm_bw_enable(); + if (ret) +- return ret; ++ goto disable_clk; + + desc.args[1] = mdata_phys; + + ret = qcom_scm_call(__scm->dev, &desc, &res); +- + qcom_scm_bw_disable(); ++ ++disable_clk: + qcom_scm_clk_disable(); + + out: +@@ -566,10 +567,12 @@ int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, phys_addr_t size) + + ret = qcom_scm_bw_enable(); + if (ret) +- return ret; ++ goto disable_clk; + + ret = qcom_scm_call(__scm->dev, &desc, &res); + qcom_scm_bw_disable(); ++ ++disable_clk: + qcom_scm_clk_disable(); + + return ret ? : res.result[0]; +@@ -601,10 +604,12 @@ int qcom_scm_pas_auth_and_reset(u32 peripheral) + + ret = qcom_scm_bw_enable(); + if (ret) +- return ret; ++ goto disable_clk; + + ret = qcom_scm_call(__scm->dev, &desc, &res); + qcom_scm_bw_disable(); ++ ++disable_clk: + qcom_scm_clk_disable(); + + return ret ? : res.result[0]; +@@ -635,11 +640,12 @@ int qcom_scm_pas_shutdown(u32 peripheral) + + ret = qcom_scm_bw_enable(); + if (ret) +- return ret; ++ goto disable_clk; + + ret = qcom_scm_call(__scm->dev, &desc, &res); +- + qcom_scm_bw_disable(); ++ ++disable_clk: + qcom_scm_clk_disable(); + + return ret ? : res.result[0]; +diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig +index d56b835359d3b..ebd4e113dc265 100644 +--- a/drivers/gpio/Kconfig ++++ b/drivers/gpio/Kconfig +@@ -1507,7 +1507,7 @@ config GPIO_TPS68470 + are "output only" GPIOs. + + config GPIO_TQMX86 +- tristate "TQ-Systems QTMX86 GPIO" ++ tristate "TQ-Systems TQMx86 GPIO" + depends on MFD_TQMX86 || COMPILE_TEST + depends on HAS_IOPORT_MAP + select GPIOLIB_IRQCHIP +diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c +index 3a28c1f273c39..f2e7e8754d95d 100644 +--- a/drivers/gpio/gpio-tqmx86.c ++++ b/drivers/gpio/gpio-tqmx86.c +@@ -6,6 +6,7 @@ + * Vadim V.Vlasov <vvlasov@dev.rtsoft.ru> + */ + ++#include <linux/bitmap.h> + #include <linux/bitops.h> + #include <linux/errno.h> + #include <linux/gpio/driver.h> +@@ -28,16 +29,25 @@ + #define TQMX86_GPIIC 3 /* GPI Interrupt Configuration Register */ + #define TQMX86_GPIIS 4 /* GPI Interrupt Status Register */ + ++#define TQMX86_GPII_NONE 0 + #define TQMX86_GPII_FALLING BIT(0) + #define TQMX86_GPII_RISING BIT(1) ++/* Stored in irq_type as a trigger type, but not actually valid as a register ++ * value, so the name doesn't use "GPII" ++ */ ++#define TQMX86_INT_BOTH (BIT(0) | BIT(1)) + #define TQMX86_GPII_MASK (BIT(0) | BIT(1)) + #define TQMX86_GPII_BITS 2 ++/* Stored in irq_type with GPII bits */ ++#define TQMX86_INT_UNMASKED BIT(2) + + struct tqmx86_gpio_data { + struct gpio_chip chip; + void __iomem *io_base; + int irq; ++ /* Lock must be held for accessing output and irq_type fields */ + raw_spinlock_t spinlock; ++ DECLARE_BITMAP(output, TQMX86_NGPIO); + u8 irq_type[TQMX86_NGPI]; + }; + +@@ -64,15 +74,10 @@ static void tqmx86_gpio_set(struct gpio_chip *chip, unsigned int offset, + { + struct tqmx86_gpio_data *gpio = gpiochip_get_data(chip); + unsigned long flags; +- u8 val; + + raw_spin_lock_irqsave(&gpio->spinlock, flags); +- val = tqmx86_gpio_read(gpio, TQMX86_GPIOD); +- if (value) +- val |= BIT(offset); +- else +- val &= ~BIT(offset); +- tqmx86_gpio_write(gpio, val, TQMX86_GPIOD); ++ __assign_bit(offset, gpio->output, value); ++ tqmx86_gpio_write(gpio, bitmap_get_value8(gpio->output, 0), TQMX86_GPIOD); + raw_spin_unlock_irqrestore(&gpio->spinlock, flags); + } + +@@ -107,21 +112,38 @@ static int tqmx86_gpio_get_direction(struct gpio_chip *chip, + return GPIO_LINE_DIRECTION_OUT; + } + ++static void tqmx86_gpio_irq_config(struct tqmx86_gpio_data *gpio, int offset) ++ __must_hold(&gpio->spinlock) ++{ ++ u8 type = TQMX86_GPII_NONE, gpiic; ++ ++ if (gpio->irq_type[offset] & TQMX86_INT_UNMASKED) { ++ type = gpio->irq_type[offset] & TQMX86_GPII_MASK; ++ ++ if (type == TQMX86_INT_BOTH) ++ type = tqmx86_gpio_get(&gpio->chip, offset + TQMX86_NGPO) ++ ? TQMX86_GPII_FALLING ++ : TQMX86_GPII_RISING; ++ } ++ ++ gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); ++ gpiic &= ~(TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS)); ++ gpiic |= type << (offset * TQMX86_GPII_BITS); ++ tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC); ++} ++ + static void tqmx86_gpio_irq_mask(struct irq_data *data) + { + unsigned int offset = (data->hwirq - TQMX86_NGPO); + struct tqmx86_gpio_data *gpio = gpiochip_get_data( + irq_data_get_irq_chip_data(data)); + unsigned long flags; +- u8 gpiic, mask; +- +- mask = TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS); + + raw_spin_lock_irqsave(&gpio->spinlock, flags); +- gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); +- gpiic &= ~mask; +- tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC); ++ gpio->irq_type[offset] &= ~TQMX86_INT_UNMASKED; ++ tqmx86_gpio_irq_config(gpio, offset); + raw_spin_unlock_irqrestore(&gpio->spinlock, flags); ++ + gpiochip_disable_irq(&gpio->chip, irqd_to_hwirq(data)); + } + +@@ -131,16 +153,12 @@ static void tqmx86_gpio_irq_unmask(struct irq_data *data) + struct tqmx86_gpio_data *gpio = gpiochip_get_data( + irq_data_get_irq_chip_data(data)); + unsigned long flags; +- u8 gpiic, mask; +- +- mask = TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS); + + gpiochip_enable_irq(&gpio->chip, irqd_to_hwirq(data)); ++ + raw_spin_lock_irqsave(&gpio->spinlock, flags); +- gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); +- gpiic &= ~mask; +- gpiic |= gpio->irq_type[offset] << (offset * TQMX86_GPII_BITS); +- tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC); ++ gpio->irq_type[offset] |= TQMX86_INT_UNMASKED; ++ tqmx86_gpio_irq_config(gpio, offset); + raw_spin_unlock_irqrestore(&gpio->spinlock, flags); + } + +@@ -151,7 +169,7 @@ static int tqmx86_gpio_irq_set_type(struct irq_data *data, unsigned int type) + unsigned int offset = (data->hwirq - TQMX86_NGPO); + unsigned int edge_type = type & IRQF_TRIGGER_MASK; + unsigned long flags; +- u8 new_type, gpiic; ++ u8 new_type; + + switch (edge_type) { + case IRQ_TYPE_EDGE_RISING: +@@ -161,19 +179,16 @@ static int tqmx86_gpio_irq_set_type(struct irq_data *data, unsigned int type) + new_type = TQMX86_GPII_FALLING; + break; + case IRQ_TYPE_EDGE_BOTH: +- new_type = TQMX86_GPII_FALLING | TQMX86_GPII_RISING; ++ new_type = TQMX86_INT_BOTH; + break; + default: + return -EINVAL; /* not supported */ + } + +- gpio->irq_type[offset] = new_type; +- + raw_spin_lock_irqsave(&gpio->spinlock, flags); +- gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); +- gpiic &= ~((TQMX86_GPII_MASK) << (offset * TQMX86_GPII_BITS)); +- gpiic |= new_type << (offset * TQMX86_GPII_BITS); +- tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC); ++ gpio->irq_type[offset] &= ~TQMX86_GPII_MASK; ++ gpio->irq_type[offset] |= new_type; ++ tqmx86_gpio_irq_config(gpio, offset); + raw_spin_unlock_irqrestore(&gpio->spinlock, flags); + + return 0; +@@ -184,8 +199,8 @@ static void tqmx86_gpio_irq_handler(struct irq_desc *desc) + struct gpio_chip *chip = irq_desc_get_handler_data(desc); + struct tqmx86_gpio_data *gpio = gpiochip_get_data(chip); + struct irq_chip *irq_chip = irq_desc_get_chip(desc); +- unsigned long irq_bits; +- int i = 0; ++ unsigned long irq_bits, flags; ++ int i; + u8 irq_status; + + chained_irq_enter(irq_chip, desc); +@@ -194,6 +209,34 @@ static void tqmx86_gpio_irq_handler(struct irq_desc *desc) + tqmx86_gpio_write(gpio, irq_status, TQMX86_GPIIS); + + irq_bits = irq_status; ++ ++ raw_spin_lock_irqsave(&gpio->spinlock, flags); ++ for_each_set_bit(i, &irq_bits, TQMX86_NGPI) { ++ /* ++ * Edge-both triggers are implemented by flipping the edge ++ * trigger after each interrupt, as the controller only supports ++ * either rising or falling edge triggers, but not both. ++ * ++ * Internally, the TQMx86 GPIO controller has separate status ++ * registers for rising and falling edge interrupts. GPIIC ++ * configures which bits from which register are visible in the ++ * interrupt status register GPIIS and defines what triggers the ++ * parent IRQ line. Writing to GPIIS always clears both rising ++ * and falling interrupt flags internally, regardless of the ++ * currently configured trigger. ++ * ++ * In consequence, we can cleanly implement the edge-both ++ * trigger in software by first clearing the interrupt and then ++ * setting the new trigger based on the current GPIO input in ++ * tqmx86_gpio_irq_config() - even if an edge arrives between ++ * reading the input and setting the trigger, we will have a new ++ * interrupt pending. ++ */ ++ if ((gpio->irq_type[i] & TQMX86_GPII_MASK) == TQMX86_INT_BOTH) ++ tqmx86_gpio_irq_config(gpio, i); ++ } ++ raw_spin_unlock_irqrestore(&gpio->spinlock, flags); ++ + for_each_set_bit(i, &irq_bits, TQMX86_NGPI) + generic_handle_domain_irq(gpio->chip.irq.domain, + i + TQMX86_NGPO); +@@ -277,6 +320,13 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) + + tqmx86_gpio_write(gpio, (u8)~TQMX86_DIR_INPUT_MASK, TQMX86_GPIODD); + ++ /* ++ * Reading the previous output state is not possible with TQMx86 hardware. ++ * Initialize all outputs to 0 to have a defined state that matches the ++ * shadow register. ++ */ ++ tqmx86_gpio_write(gpio, 0, TQMX86_GPIOD); ++ + chip = &gpio->chip; + chip->label = "gpio-tqmx86"; + chip->owner = THIS_MODULE; +diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c +index f3e744172673c..f4e76b46ca327 100644 +--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c ++++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c +@@ -259,7 +259,7 @@ komeda_component_get_avail_scaler(struct komeda_component *c, + u32 avail_scalers; + + pipe_st = komeda_pipeline_get_state(c->pipeline, state); +- if (!pipe_st) ++ if (IS_ERR_OR_NULL(pipe_st)) + return NULL; + + avail_scalers = (pipe_st->active_comps & KOMEDA_PIPELINE_SCALERS) ^ +diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c +index 9316384b44745..a1dd2ead8dcc4 100644 +--- a/drivers/gpu/drm/bridge/panel.c ++++ b/drivers/gpu/drm/bridge/panel.c +@@ -360,9 +360,12 @@ EXPORT_SYMBOL(drm_panel_bridge_set_orientation); + + static void devm_drm_panel_bridge_release(struct device *dev, void *res) + { +- struct drm_bridge **bridge = res; ++ struct drm_bridge *bridge = *(struct drm_bridge **)res; + +- drm_panel_bridge_remove(*bridge); ++ if (!bridge) ++ return; ++ ++ drm_bridge_remove(bridge); + } + + /** +diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c +index e435f986cd135..1ff0678be7c75 100644 +--- a/drivers/gpu/drm/drm_gem_shmem_helper.c ++++ b/drivers/gpu/drm/drm_gem_shmem_helper.c +@@ -610,6 +610,9 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct + return ret; + } + ++ if (is_cow_mapping(vma->vm_flags)) ++ return -EINVAL; ++ + dma_resv_lock(shmem->base.resv, NULL); + ret = drm_gem_shmem_get_pages(shmem); + dma_resv_unlock(shmem->base.resv); +diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c +index fb941a8c99f0f..e17f9c5c9c90e 100644 +--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c ++++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c +@@ -309,6 +309,7 @@ static int vidi_get_modes(struct drm_connector *connector) + struct vidi_context *ctx = ctx_from_connector(connector); + struct edid *edid; + int edid_len; ++ int count; + + /* + * the edid data comes from user side and it would be set +@@ -328,7 +329,11 @@ static int vidi_get_modes(struct drm_connector *connector) + + drm_connector_update_edid_property(connector, edid); + +- return drm_add_edid_modes(connector, edid); ++ count = drm_add_edid_modes(connector, edid); ++ ++ kfree(edid); ++ ++ return count; + } + + static const struct drm_connector_helper_funcs vidi_connector_helper_funcs = { +diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c +index eff51bfc46440..906133331a442 100644 +--- a/drivers/gpu/drm/exynos/exynos_hdmi.c ++++ b/drivers/gpu/drm/exynos/exynos_hdmi.c +@@ -887,11 +887,11 @@ static int hdmi_get_modes(struct drm_connector *connector) + int ret; + + if (!hdata->ddc_adpt) +- return 0; ++ goto no_edid; + + edid = drm_get_edid(connector, hdata->ddc_adpt); + if (!edid) +- return 0; ++ goto no_edid; + + hdata->dvi_mode = !connector->display_info.is_hdmi; + DRM_DEV_DEBUG_KMS(hdata->dev, "%s : width[%d] x height[%d]\n", +@@ -906,6 +906,9 @@ static int hdmi_get_modes(struct drm_connector *connector) + kfree(edid); + + return ret; ++ ++no_edid: ++ return drm_add_modes_noedid(connector, 640, 480); + } + + static int hdmi_find_phy_conf(struct hdmi_context *hdata, u32 pixel_clock) +diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c +index 8b8d1d806566e..1cf1674897e9f 100644 +--- a/drivers/gpu/drm/i915/display/intel_audio.c ++++ b/drivers/gpu/drm/i915/display/intel_audio.c +@@ -1251,17 +1251,6 @@ static const struct component_ops i915_audio_component_bind_ops = { + static void i915_audio_component_init(struct drm_i915_private *i915) + { + u32 aud_freq, aud_freq_init; +- int ret; +- +- ret = component_add_typed(i915->drm.dev, +- &i915_audio_component_bind_ops, +- I915_COMPONENT_AUDIO); +- if (ret < 0) { +- drm_err(&i915->drm, +- "failed to add audio component (%d)\n", ret); +- /* continue with reduced functionality */ +- return; +- } + + if (DISPLAY_VER(i915) >= 9) { + aud_freq_init = intel_de_read(i915, AUD_FREQ_CNTRL); +@@ -1284,6 +1273,21 @@ static void i915_audio_component_init(struct drm_i915_private *i915) + + /* init with current cdclk */ + intel_audio_cdclk_change_post(i915); ++} ++ ++static void i915_audio_component_register(struct drm_i915_private *i915) ++{ ++ int ret; ++ ++ ret = component_add_typed(i915->drm.dev, ++ &i915_audio_component_bind_ops, ++ I915_COMPONENT_AUDIO); ++ if (ret < 0) { ++ drm_err(&i915->drm, ++ "failed to add audio component (%d)\n", ret); ++ /* continue with reduced functionality */ ++ return; ++ } + + i915->display.audio.component_registered = true; + } +@@ -1316,6 +1320,12 @@ void intel_audio_init(struct drm_i915_private *i915) + i915_audio_component_init(i915); + } + ++void intel_audio_register(struct drm_i915_private *i915) ++{ ++ if (!i915->display.audio.lpe.platdev) ++ i915_audio_component_register(i915); ++} ++ + /** + * intel_audio_deinit() - deinitialize the audio driver + * @i915: the i915 drm device private data +diff --git a/drivers/gpu/drm/i915/display/intel_audio.h b/drivers/gpu/drm/i915/display/intel_audio.h +index 07d034a981e90..9779343a37106 100644 +--- a/drivers/gpu/drm/i915/display/intel_audio.h ++++ b/drivers/gpu/drm/i915/display/intel_audio.h +@@ -28,6 +28,7 @@ void intel_audio_codec_get_config(struct intel_encoder *encoder, + void intel_audio_cdclk_change_pre(struct drm_i915_private *dev_priv); + void intel_audio_cdclk_change_post(struct drm_i915_private *dev_priv); + void intel_audio_init(struct drm_i915_private *dev_priv); ++void intel_audio_register(struct drm_i915_private *i915); + void intel_audio_deinit(struct drm_i915_private *dev_priv); + void intel_audio_sdp_split_update(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c +index 8f144d4d3c398..26514f931af7a 100644 +--- a/drivers/gpu/drm/i915/display/intel_display_driver.c ++++ b/drivers/gpu/drm/i915/display/intel_display_driver.c +@@ -386,6 +386,8 @@ void intel_display_driver_register(struct drm_i915_private *i915) + + intel_audio_init(i915); + ++ intel_audio_register(i915); ++ + intel_display_debugfs_register(i915); + + /* +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h +index f607b87890ddd..c096fcdb2f1ed 100644 +--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h ++++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h +@@ -285,7 +285,9 @@ bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj); + static inline bool + i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) + { +- return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE); ++ /* TODO: make DPT shrinkable when it has no bound vmas */ ++ return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE) && ++ !obj->is_dpt; + } + + static inline bool +diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +index ecc990ec1b952..f2973cd1a8aae 100644 +--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c ++++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +@@ -258,8 +258,13 @@ static void signal_irq_work(struct irq_work *work) + i915_request_put(rq); + } + ++ /* Lazy irq enabling after HW submission */ + if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers)) + intel_breadcrumbs_arm_irq(b); ++ ++ /* And confirm that we still want irqs enabled before we yield */ ++ if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) ++ intel_breadcrumbs_disarm_irq(b); + } + + struct intel_breadcrumbs * +@@ -310,13 +315,7 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) + return; + + /* Kick the work once more to drain the signalers, and disarm the irq */ +- irq_work_sync(&b->irq_work); +- while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { +- local_irq_disable(); +- signal_irq_work(&b->irq_work); +- local_irq_enable(); +- cond_resched(); +- } ++ irq_work_queue(&b->irq_work); + } + + void intel_breadcrumbs_free(struct kref *kref) +@@ -399,7 +398,7 @@ static void insert_breadcrumb(struct i915_request *rq) + * the request as it may have completed and raised the interrupt as + * we were attaching it into the lists. + */ +- if (!b->irq_armed || __i915_request_is_complete(rq)) ++ if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq)) + irq_work_queue(&b->irq_work); + } + +diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +index e8f385b9c6182..28bfc48a91272 100644 +--- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c ++++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +@@ -643,7 +643,9 @@ static int st7789v_probe(struct spi_device *spi) + if (ret) + return dev_err_probe(dev, ret, "Failed to get backlight\n"); + +- of_drm_get_panel_orientation(spi->dev.of_node, &ctx->orientation); ++ ret = of_drm_get_panel_orientation(spi->dev.of_node, &ctx->orientation); ++ if (ret) ++ return dev_err_probe(&spi->dev, ret, "Failed to get orientation\n"); + + drm_panel_add(&ctx->panel); + +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +index 58fb40c93100a..bea576434e475 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +@@ -956,13 +956,6 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) + vmw_read(dev_priv, + SVGA_REG_SUGGESTED_GBOBJECT_MEM_SIZE_KB); + +- /* +- * Workaround for low memory 2D VMs to compensate for the +- * allocation taken by fbdev +- */ +- if (!(dev_priv->capabilities & SVGA_CAP_3D)) +- mem_size *= 3; +- + dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE; + dev_priv->max_primary_mem = + vmw_read(dev_priv, SVGA_REG_MAX_PRIMARY_MEM); +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +index 6acc7ad0e9eb8..13423c7b0cbdb 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +@@ -1067,9 +1067,6 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf, + int vmw_kms_write_svga(struct vmw_private *vmw_priv, + unsigned width, unsigned height, unsigned pitch, + unsigned bpp, unsigned depth); +-bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv, +- uint32_t pitch, +- uint32_t height); + int vmw_kms_present(struct vmw_private *dev_priv, + struct drm_file *file_priv, + struct vmw_framebuffer *vfb, +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +index a884072851322..08f2470edab27 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +@@ -35,6 +35,7 @@ + #include <drm/drm_fourcc.h> + #include <drm/drm_rect.h> + #include <drm/drm_sysfs.h> ++#include <drm/drm_edid.h> + + void vmw_du_cleanup(struct vmw_display_unit *du) + { +@@ -215,7 +216,7 @@ static bool vmw_du_cursor_plane_has_changed(struct vmw_plane_state *old_vps, + new_image = vmw_du_cursor_plane_acquire_image(new_vps); + + changed = false; +- if (old_image && new_image) ++ if (old_image && new_image && old_image != new_image) + changed = memcmp(old_image, new_image, size) != 0; + + return changed; +@@ -2150,13 +2151,12 @@ int vmw_kms_write_svga(struct vmw_private *vmw_priv, + return 0; + } + ++static + bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv, +- uint32_t pitch, +- uint32_t height) ++ u64 pitch, ++ u64 height) + { +- return ((u64) pitch * (u64) height) < (u64) +- ((dev_priv->active_display_unit == vmw_du_screen_target) ? +- dev_priv->max_primary_mem : dev_priv->vram_size); ++ return (pitch * height) < (u64)dev_priv->vram_size; + } + + /** +@@ -2279,107 +2279,6 @@ vmw_du_connector_detect(struct drm_connector *connector, bool force) + connector_status_connected : connector_status_disconnected); + } + +-static struct drm_display_mode vmw_kms_connector_builtin[] = { +- /* 640x480@60Hz */ +- { DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25175, 640, 656, +- 752, 800, 0, 480, 489, 492, 525, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* 800x600@60Hz */ +- { DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 40000, 800, 840, +- 968, 1056, 0, 600, 601, 605, 628, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1024x768@60Hz */ +- { DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 65000, 1024, 1048, +- 1184, 1344, 0, 768, 771, 777, 806, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* 1152x864@75Hz */ +- { DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 108000, 1152, 1216, +- 1344, 1600, 0, 864, 865, 868, 900, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1280x720@60Hz */ +- { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74500, 1280, 1344, +- 1472, 1664, 0, 720, 723, 728, 748, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1280x768@60Hz */ +- { DRM_MODE("1280x768", DRM_MODE_TYPE_DRIVER, 79500, 1280, 1344, +- 1472, 1664, 0, 768, 771, 778, 798, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1280x800@60Hz */ +- { DRM_MODE("1280x800", DRM_MODE_TYPE_DRIVER, 83500, 1280, 1352, +- 1480, 1680, 0, 800, 803, 809, 831, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* 1280x960@60Hz */ +- { DRM_MODE("1280x960", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1376, +- 1488, 1800, 0, 960, 961, 964, 1000, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1280x1024@60Hz */ +- { DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1328, +- 1440, 1688, 0, 1024, 1025, 1028, 1066, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1360x768@60Hz */ +- { DRM_MODE("1360x768", DRM_MODE_TYPE_DRIVER, 85500, 1360, 1424, +- 1536, 1792, 0, 768, 771, 777, 795, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1440x1050@60Hz */ +- { DRM_MODE("1400x1050", DRM_MODE_TYPE_DRIVER, 121750, 1400, 1488, +- 1632, 1864, 0, 1050, 1053, 1057, 1089, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1440x900@60Hz */ +- { DRM_MODE("1440x900", DRM_MODE_TYPE_DRIVER, 106500, 1440, 1520, +- 1672, 1904, 0, 900, 903, 909, 934, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1600x1200@60Hz */ +- { DRM_MODE("1600x1200", DRM_MODE_TYPE_DRIVER, 162000, 1600, 1664, +- 1856, 2160, 0, 1200, 1201, 1204, 1250, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1680x1050@60Hz */ +- { DRM_MODE("1680x1050", DRM_MODE_TYPE_DRIVER, 146250, 1680, 1784, +- 1960, 2240, 0, 1050, 1053, 1059, 1089, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1792x1344@60Hz */ +- { DRM_MODE("1792x1344", DRM_MODE_TYPE_DRIVER, 204750, 1792, 1920, +- 2120, 2448, 0, 1344, 1345, 1348, 1394, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1853x1392@60Hz */ +- { DRM_MODE("1856x1392", DRM_MODE_TYPE_DRIVER, 218250, 1856, 1952, +- 2176, 2528, 0, 1392, 1393, 1396, 1439, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1920x1080@60Hz */ +- { DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 173000, 1920, 2048, +- 2248, 2576, 0, 1080, 1083, 1088, 1120, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1920x1200@60Hz */ +- { DRM_MODE("1920x1200", DRM_MODE_TYPE_DRIVER, 193250, 1920, 2056, +- 2256, 2592, 0, 1200, 1203, 1209, 1245, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1920x1440@60Hz */ +- { DRM_MODE("1920x1440", DRM_MODE_TYPE_DRIVER, 234000, 1920, 2048, +- 2256, 2600, 0, 1440, 1441, 1444, 1500, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 2560x1440@60Hz */ +- { DRM_MODE("2560x1440", DRM_MODE_TYPE_DRIVER, 241500, 2560, 2608, +- 2640, 2720, 0, 1440, 1443, 1448, 1481, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* 2560x1600@60Hz */ +- { DRM_MODE("2560x1600", DRM_MODE_TYPE_DRIVER, 348500, 2560, 2752, +- 3032, 3504, 0, 1600, 1603, 1609, 1658, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 2880x1800@60Hz */ +- { DRM_MODE("2880x1800", DRM_MODE_TYPE_DRIVER, 337500, 2880, 2928, +- 2960, 3040, 0, 1800, 1803, 1809, 1852, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* 3840x2160@60Hz */ +- { DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 533000, 3840, 3888, +- 3920, 4000, 0, 2160, 2163, 2168, 2222, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* 3840x2400@60Hz */ +- { DRM_MODE("3840x2400", DRM_MODE_TYPE_DRIVER, 592250, 3840, 3888, +- 3920, 4000, 0, 2400, 2403, 2409, 2469, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* Terminate */ +- { DRM_MODE("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) }, +-}; +- + /** + * vmw_guess_mode_timing - Provide fake timings for a + * 60Hz vrefresh mode. +@@ -2401,88 +2300,6 @@ void vmw_guess_mode_timing(struct drm_display_mode *mode) + } + + +-int vmw_du_connector_fill_modes(struct drm_connector *connector, +- uint32_t max_width, uint32_t max_height) +-{ +- struct vmw_display_unit *du = vmw_connector_to_du(connector); +- struct drm_device *dev = connector->dev; +- struct vmw_private *dev_priv = vmw_priv(dev); +- struct drm_display_mode *mode = NULL; +- struct drm_display_mode *bmode; +- struct drm_display_mode prefmode = { DRM_MODE("preferred", +- DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, +- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) +- }; +- int i; +- u32 assumed_bpp = 4; +- +- if (dev_priv->assume_16bpp) +- assumed_bpp = 2; +- +- max_width = min(max_width, dev_priv->texture_max_width); +- max_height = min(max_height, dev_priv->texture_max_height); +- +- /* +- * For STDU extra limit for a mode on SVGA_REG_SCREENTARGET_MAX_WIDTH/ +- * HEIGHT registers. +- */ +- if (dev_priv->active_display_unit == vmw_du_screen_target) { +- max_width = min(max_width, dev_priv->stdu_max_width); +- max_height = min(max_height, dev_priv->stdu_max_height); +- } +- +- /* Add preferred mode */ +- mode = drm_mode_duplicate(dev, &prefmode); +- if (!mode) +- return 0; +- mode->hdisplay = du->pref_width; +- mode->vdisplay = du->pref_height; +- vmw_guess_mode_timing(mode); +- drm_mode_set_name(mode); +- +- if (vmw_kms_validate_mode_vram(dev_priv, +- mode->hdisplay * assumed_bpp, +- mode->vdisplay)) { +- drm_mode_probed_add(connector, mode); +- } else { +- drm_mode_destroy(dev, mode); +- mode = NULL; +- } +- +- if (du->pref_mode) { +- list_del_init(&du->pref_mode->head); +- drm_mode_destroy(dev, du->pref_mode); +- } +- +- /* mode might be null here, this is intended */ +- du->pref_mode = mode; +- +- for (i = 0; vmw_kms_connector_builtin[i].type != 0; i++) { +- bmode = &vmw_kms_connector_builtin[i]; +- if (bmode->hdisplay > max_width || +- bmode->vdisplay > max_height) +- continue; +- +- if (!vmw_kms_validate_mode_vram(dev_priv, +- bmode->hdisplay * assumed_bpp, +- bmode->vdisplay)) +- continue; +- +- mode = drm_mode_duplicate(dev, bmode); +- if (!mode) +- return 0; +- +- drm_mode_probed_add(connector, mode); +- } +- +- drm_connector_list_update(connector); +- /* Move the prefered mode first, help apps pick the right mode. */ +- drm_mode_sort(&connector->modes); +- +- return 1; +-} +- + /** + * vmw_kms_update_layout_ioctl - Handler for DRM_VMW_UPDATE_LAYOUT ioctl + * @dev: drm device for the ioctl +@@ -3023,3 +2840,84 @@ int vmw_du_helper_plane_update(struct vmw_du_update_plane *update) + vmw_validation_unref_lists(&val_ctx); + return ret; + } ++ ++/** ++ * vmw_connector_mode_valid - implements drm_connector_helper_funcs.mode_valid callback ++ * ++ * @connector: the drm connector, part of a DU container ++ * @mode: drm mode to check ++ * ++ * Returns MODE_OK on success, or a drm_mode_status error code. ++ */ ++enum drm_mode_status vmw_connector_mode_valid(struct drm_connector *connector, ++ struct drm_display_mode *mode) ++{ ++ enum drm_mode_status ret; ++ struct drm_device *dev = connector->dev; ++ struct vmw_private *dev_priv = vmw_priv(dev); ++ u32 assumed_cpp = 4; ++ ++ if (dev_priv->assume_16bpp) ++ assumed_cpp = 2; ++ ++ ret = drm_mode_validate_size(mode, dev_priv->texture_max_width, ++ dev_priv->texture_max_height); ++ if (ret != MODE_OK) ++ return ret; ++ ++ if (!vmw_kms_validate_mode_vram(dev_priv, ++ mode->hdisplay * assumed_cpp, ++ mode->vdisplay)) ++ return MODE_MEM; ++ ++ return MODE_OK; ++} ++ ++/** ++ * vmw_connector_get_modes - implements drm_connector_helper_funcs.get_modes callback ++ * ++ * @connector: the drm connector, part of a DU container ++ * ++ * Returns the number of added modes. ++ */ ++int vmw_connector_get_modes(struct drm_connector *connector) ++{ ++ struct vmw_display_unit *du = vmw_connector_to_du(connector); ++ struct drm_device *dev = connector->dev; ++ struct vmw_private *dev_priv = vmw_priv(dev); ++ struct drm_display_mode *mode = NULL; ++ struct drm_display_mode prefmode = { DRM_MODE("preferred", ++ DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) ++ }; ++ u32 max_width; ++ u32 max_height; ++ u32 num_modes; ++ ++ /* Add preferred mode */ ++ mode = drm_mode_duplicate(dev, &prefmode); ++ if (!mode) ++ return 0; ++ ++ mode->hdisplay = du->pref_width; ++ mode->vdisplay = du->pref_height; ++ vmw_guess_mode_timing(mode); ++ drm_mode_set_name(mode); ++ ++ drm_mode_probed_add(connector, mode); ++ drm_dbg_kms(dev, "preferred mode " DRM_MODE_FMT "\n", DRM_MODE_ARG(mode)); ++ ++ /* Probe connector for all modes not exceeding our geom limits */ ++ max_width = dev_priv->texture_max_width; ++ max_height = dev_priv->texture_max_height; ++ ++ if (dev_priv->active_display_unit == vmw_du_screen_target) { ++ max_width = min(dev_priv->stdu_max_width, max_width); ++ max_height = min(dev_priv->stdu_max_height, max_height); ++ } ++ ++ num_modes = 1 + drm_add_modes_noedid(connector, max_width, max_height); ++ ++ return num_modes; ++} +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +index 9fda4f4ec7a97..19a843da87b78 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +@@ -378,7 +378,6 @@ struct vmw_display_unit { + unsigned pref_width; + unsigned pref_height; + bool pref_active; +- struct drm_display_mode *pref_mode; + + /* + * Gui positioning +@@ -428,8 +427,6 @@ void vmw_du_connector_save(struct drm_connector *connector); + void vmw_du_connector_restore(struct drm_connector *connector); + enum drm_connector_status + vmw_du_connector_detect(struct drm_connector *connector, bool force); +-int vmw_du_connector_fill_modes(struct drm_connector *connector, +- uint32_t max_width, uint32_t max_height); + int vmw_kms_helper_dirty(struct vmw_private *dev_priv, + struct vmw_framebuffer *framebuffer, + const struct drm_clip_rect *clips, +@@ -438,6 +435,9 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv, + int num_clips, + int increment, + struct vmw_kms_dirty *dirty); ++enum drm_mode_status vmw_connector_mode_valid(struct drm_connector *connector, ++ struct drm_display_mode *mode); ++int vmw_connector_get_modes(struct drm_connector *connector); + + void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, + struct drm_file *file_priv, +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +index a82fa97003705..c4db4aecca6c3 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +@@ -304,7 +304,7 @@ static void vmw_ldu_connector_destroy(struct drm_connector *connector) + static const struct drm_connector_funcs vmw_legacy_connector_funcs = { + .dpms = vmw_du_connector_dpms, + .detect = vmw_du_connector_detect, +- .fill_modes = vmw_du_connector_fill_modes, ++ .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = vmw_ldu_connector_destroy, + .reset = vmw_du_connector_reset, + .atomic_duplicate_state = vmw_du_connector_duplicate_state, +@@ -313,6 +313,8 @@ static const struct drm_connector_funcs vmw_legacy_connector_funcs = { + + static const struct + drm_connector_helper_funcs vmw_ldu_connector_helper_funcs = { ++ .get_modes = vmw_connector_get_modes, ++ .mode_valid = vmw_connector_mode_valid + }; + + static int vmw_kms_ldu_do_bo_dirty(struct vmw_private *dev_priv, +@@ -449,7 +451,6 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) + ldu->base.pref_active = (unit == 0); + ldu->base.pref_width = dev_priv->initial_width; + ldu->base.pref_height = dev_priv->initial_height; +- ldu->base.pref_mode = NULL; + + /* + * Remove this after enabling atomic because property values can +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +index 556a403b7eb56..30c3ad27b6629 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +@@ -347,7 +347,7 @@ static void vmw_sou_connector_destroy(struct drm_connector *connector) + static const struct drm_connector_funcs vmw_sou_connector_funcs = { + .dpms = vmw_du_connector_dpms, + .detect = vmw_du_connector_detect, +- .fill_modes = vmw_du_connector_fill_modes, ++ .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = vmw_sou_connector_destroy, + .reset = vmw_du_connector_reset, + .atomic_duplicate_state = vmw_du_connector_duplicate_state, +@@ -357,6 +357,8 @@ static const struct drm_connector_funcs vmw_sou_connector_funcs = { + + static const struct + drm_connector_helper_funcs vmw_sou_connector_helper_funcs = { ++ .get_modes = vmw_connector_get_modes, ++ .mode_valid = vmw_connector_mode_valid + }; + + +@@ -826,7 +828,6 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) + sou->base.pref_active = (unit == 0); + sou->base.pref_width = dev_priv->initial_width; + sou->base.pref_height = dev_priv->initial_height; +- sou->base.pref_mode = NULL; + + /* + * Remove this after enabling atomic because property values can +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +index ba0c0e12cfe9d..4ccab07faff08 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +@@ -41,7 +41,14 @@ + #define vmw_connector_to_stdu(x) \ + container_of(x, struct vmw_screen_target_display_unit, base.connector) + +- ++/* ++ * Some renderers such as llvmpipe will align the width and height of their ++ * buffers to match their tile size. We need to keep this in mind when exposing ++ * modes to userspace so that this possible over-allocation will not exceed ++ * graphics memory. 64x64 pixels seems to be a reasonable upper bound for the ++ * tile size of current renderers. ++ */ ++#define GPU_TILE_SIZE 64 + + enum stdu_content_type { + SAME_AS_DISPLAY = 0, +@@ -825,12 +832,46 @@ static void vmw_stdu_connector_destroy(struct drm_connector *connector) + vmw_stdu_destroy(vmw_connector_to_stdu(connector)); + } + ++static enum drm_mode_status ++vmw_stdu_connector_mode_valid(struct drm_connector *connector, ++ struct drm_display_mode *mode) ++{ ++ enum drm_mode_status ret; ++ struct drm_device *dev = connector->dev; ++ struct vmw_private *dev_priv = vmw_priv(dev); ++ u64 assumed_cpp = dev_priv->assume_16bpp ? 2 : 4; ++ /* Align width and height to account for GPU tile over-alignment */ ++ u64 required_mem = ALIGN(mode->hdisplay, GPU_TILE_SIZE) * ++ ALIGN(mode->vdisplay, GPU_TILE_SIZE) * ++ assumed_cpp; ++ required_mem = ALIGN(required_mem, PAGE_SIZE); ++ ++ ret = drm_mode_validate_size(mode, dev_priv->stdu_max_width, ++ dev_priv->stdu_max_height); ++ if (ret != MODE_OK) ++ return ret; + ++ ret = drm_mode_validate_size(mode, dev_priv->texture_max_width, ++ dev_priv->texture_max_height); ++ if (ret != MODE_OK) ++ return ret; ++ ++ if (required_mem > dev_priv->max_primary_mem) ++ return MODE_MEM; ++ ++ if (required_mem > dev_priv->max_mob_pages * PAGE_SIZE) ++ return MODE_MEM; ++ ++ if (required_mem > dev_priv->max_mob_size) ++ return MODE_MEM; ++ ++ return MODE_OK; ++} + + static const struct drm_connector_funcs vmw_stdu_connector_funcs = { + .dpms = vmw_du_connector_dpms, + .detect = vmw_du_connector_detect, +- .fill_modes = vmw_du_connector_fill_modes, ++ .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = vmw_stdu_connector_destroy, + .reset = vmw_du_connector_reset, + .atomic_duplicate_state = vmw_du_connector_duplicate_state, +@@ -840,6 +881,8 @@ static const struct drm_connector_funcs vmw_stdu_connector_funcs = { + + static const struct + drm_connector_helper_funcs vmw_stdu_connector_helper_funcs = { ++ .get_modes = vmw_connector_get_modes, ++ .mode_valid = vmw_stdu_connector_mode_valid + }; + + +diff --git a/drivers/greybus/interface.c b/drivers/greybus/interface.c +index 9ec949a438ef6..52ef6be9d4499 100644 +--- a/drivers/greybus/interface.c ++++ b/drivers/greybus/interface.c +@@ -694,6 +694,7 @@ static void gb_interface_release(struct device *dev) + + trace_gb_interface_release(intf); + ++ cancel_work_sync(&intf->mode_switch_work); + kfree(intf); + } + +diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c +index e0181218ad857..85ddeb13a3fae 100644 +--- a/drivers/hid/hid-core.c ++++ b/drivers/hid/hid-core.c +@@ -1448,7 +1448,6 @@ static void implement(const struct hid_device *hid, u8 *report, + hid_warn(hid, + "%s() called with too large value %d (n: %d)! (%s)\n", + __func__, value, n, current->comm); +- WARN_ON(1); + value &= m; + } + } +diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c +index 3c3c497b6b911..37958edec55f5 100644 +--- a/drivers/hid/hid-logitech-dj.c ++++ b/drivers/hid/hid-logitech-dj.c +@@ -1284,8 +1284,10 @@ static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev, + */ + msleep(50); + +- if (retval) ++ if (retval) { ++ kfree(dj_report); + return retval; ++ } + } + + /* +diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c +index edd0b0f1193bd..97dfa3694ff04 100644 +--- a/drivers/hid/hid-nvidia-shield.c ++++ b/drivers/hid/hid-nvidia-shield.c +@@ -283,7 +283,9 @@ static struct input_dev *shield_haptics_create( + return haptics; + + input_set_capability(haptics, EV_FF, FF_RUMBLE); +- input_ff_create_memless(haptics, NULL, play_effect); ++ ret = input_ff_create_memless(haptics, NULL, play_effect); ++ if (ret) ++ goto err; + + ret = input_register_device(haptics); + if (ret) +diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c +index 648893f9e4b67..8dad239aba2ce 100644 +--- a/drivers/hwtracing/intel_th/pci.c ++++ b/drivers/hwtracing/intel_th/pci.c +@@ -294,6 +294,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xae24), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, ++ { ++ /* Meteor Lake-S */ ++ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7f26), ++ .driver_data = (kernel_ulong_t)&intel_th_2x, ++ }, + { + /* Raptor Lake-S */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7a26), +@@ -304,6 +309,26 @@ static const struct pci_device_id intel_th_pci_id_table[] = { + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa76f), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, ++ { ++ /* Granite Rapids */ ++ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0963), ++ .driver_data = (kernel_ulong_t)&intel_th_2x, ++ }, ++ { ++ /* Granite Rapids SOC */ ++ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3256), ++ .driver_data = (kernel_ulong_t)&intel_th_2x, ++ }, ++ { ++ /* Sapphire Rapids SOC */ ++ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3456), ++ .driver_data = (kernel_ulong_t)&intel_th_2x, ++ }, ++ { ++ /* Lunar Lake */ ++ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa824), ++ .driver_data = (kernel_ulong_t)&intel_th_2x, ++ }, + { + /* Alder Lake CPU */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f), +diff --git a/drivers/i2c/busses/i2c-at91-slave.c b/drivers/i2c/busses/i2c-at91-slave.c +index d6eeea5166c04..131a67d9d4a68 100644 +--- a/drivers/i2c/busses/i2c-at91-slave.c ++++ b/drivers/i2c/busses/i2c-at91-slave.c +@@ -106,8 +106,7 @@ static int at91_unreg_slave(struct i2c_client *slave) + + static u32 at91_twi_func(struct i2c_adapter *adapter) + { +- return I2C_FUNC_SLAVE | I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL +- | I2C_FUNC_SMBUS_READ_BLOCK_DATA; ++ return I2C_FUNC_SLAVE; + } + + static const struct i2c_algorithm at91_twi_algorithm_slave = { +diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c +index 2e079cf20bb5b..78e2c47e3d7da 100644 +--- a/drivers/i2c/busses/i2c-designware-slave.c ++++ b/drivers/i2c/busses/i2c-designware-slave.c +@@ -220,7 +220,7 @@ static const struct i2c_algorithm i2c_dw_algo = { + + void i2c_dw_configure_slave(struct dw_i2c_dev *dev) + { +- dev->functionality = I2C_FUNC_SLAVE | DW_IC_DEFAULT_FUNCTIONALITY; ++ dev->functionality = I2C_FUNC_SLAVE; + + dev->slave_cfg = DW_IC_CON_RX_FIFO_FULL_HLD_CTRL | + DW_IC_CON_RESTART_EN | DW_IC_CON_STOP_DET_IFADDRESSED; +diff --git a/drivers/iio/adc/ad9467.c b/drivers/iio/adc/ad9467.c +index 863dca5db161e..4c08f8a04f962 100644 +--- a/drivers/iio/adc/ad9467.c ++++ b/drivers/iio/adc/ad9467.c +@@ -225,11 +225,11 @@ static void __ad9467_get_scale(struct ad9467_state *st, int index, + } + + static const struct iio_chan_spec ad9434_channels[] = { +- AD9467_CHAN(0, 0, 12, 'S'), ++ AD9467_CHAN(0, 0, 12, 's'), + }; + + static const struct iio_chan_spec ad9467_channels[] = { +- AD9467_CHAN(0, 0, 16, 'S'), ++ AD9467_CHAN(0, 0, 16, 's'), + }; + + static const struct ad9467_chip_info ad9467_chip_tbl = { +diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c +index a543b91124b07..e3b2158829416 100644 +--- a/drivers/iio/adc/adi-axi-adc.c ++++ b/drivers/iio/adc/adi-axi-adc.c +@@ -175,6 +175,7 @@ static int adi_axi_adc_probe(struct platform_device *pdev) + struct adi_axi_adc_state *st; + void __iomem *base; + unsigned int ver; ++ struct clk *clk; + int ret; + + st = devm_kzalloc(&pdev->dev, sizeof(*st), GFP_KERNEL); +@@ -195,6 +196,10 @@ static int adi_axi_adc_probe(struct platform_device *pdev) + if (!expected_ver) + return -ENODEV; + ++ clk = devm_clk_get_enabled(&pdev->dev, NULL); ++ if (IS_ERR(clk)) ++ return PTR_ERR(clk); ++ + /* + * Force disable the core. Up to the frontend to enable us. And we can + * still read/write registers... +diff --git a/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c b/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c +index 03823ee57f598..7b19c94ef87d9 100644 +--- a/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c ++++ b/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c +@@ -60,11 +60,15 @@ EXPORT_SYMBOL_NS_GPL(inv_sensors_timestamp_init, IIO_INV_SENSORS_TIMESTAMP); + int inv_sensors_timestamp_update_odr(struct inv_sensors_timestamp *ts, + uint32_t period, bool fifo) + { ++ uint32_t mult; ++ + /* when FIFO is on, prevent odr change if one is already pending */ + if (fifo && ts->new_mult != 0) + return -EAGAIN; + +- ts->new_mult = period / ts->chip.clock_period; ++ mult = period / ts->chip.clock_period; ++ if (mult != ts->mult) ++ ts->new_mult = mult; + + return 0; + } +@@ -101,6 +105,9 @@ static bool inv_update_chip_period(struct inv_sensors_timestamp *ts, + + static void inv_align_timestamp_it(struct inv_sensors_timestamp *ts) + { ++ const int64_t period_min = ts->min_period * ts->mult; ++ const int64_t period_max = ts->max_period * ts->mult; ++ int64_t add_max, sub_max; + int64_t delta, jitter; + int64_t adjust; + +@@ -108,11 +115,13 @@ static void inv_align_timestamp_it(struct inv_sensors_timestamp *ts) + delta = ts->it.lo - ts->timestamp; + + /* adjust timestamp while respecting jitter */ ++ add_max = period_max - (int64_t)ts->period; ++ sub_max = period_min - (int64_t)ts->period; + jitter = INV_SENSORS_TIMESTAMP_JITTER((int64_t)ts->period, ts->chip.jitter); + if (delta > jitter) +- adjust = jitter; ++ adjust = add_max; + else if (delta < -jitter) +- adjust = -jitter; ++ adjust = sub_max; + else + adjust = 0; + +diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c +index 076bc9ecfb499..4763402dbcd66 100644 +--- a/drivers/iio/dac/ad5592r-base.c ++++ b/drivers/iio/dac/ad5592r-base.c +@@ -415,7 +415,7 @@ static int ad5592r_read_raw(struct iio_dev *iio_dev, + s64 tmp = *val * (3767897513LL / 25LL); + *val = div_s64_rem(tmp, 1000000000LL, val2); + +- return IIO_VAL_INT_PLUS_MICRO; ++ return IIO_VAL_INT_PLUS_NANO; + } + + mutex_lock(&st->lock); +diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c +index b1e4fde27d256..72e9541381027 100644 +--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c ++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c +@@ -129,10 +129,6 @@ static int inv_icm42600_accel_update_scan_mode(struct iio_dev *indio_dev, + /* update data FIFO write */ + inv_sensors_timestamp_apply_odr(ts, 0, 0, 0); + ret = inv_icm42600_buffer_set_fifo_en(st, fifo_en | st->fifo.en); +- if (ret) +- goto out_unlock; +- +- ret = inv_icm42600_buffer_update_watermark(st); + + out_unlock: + mutex_unlock(&st->lock); +diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c +index 3bf946e56e1df..f1629f77d6063 100644 +--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c ++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c +@@ -129,10 +129,6 @@ static int inv_icm42600_gyro_update_scan_mode(struct iio_dev *indio_dev, + /* update data FIFO write */ + inv_sensors_timestamp_apply_odr(ts, 0, 0, 0); + ret = inv_icm42600_buffer_set_fifo_en(st, fifo_en | st->fifo.en); +- if (ret) +- goto out_unlock; +- +- ret = inv_icm42600_buffer_update_watermark(st); + + out_unlock: + mutex_unlock(&st->lock); +diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c +index a2ad2dbd04d92..ef3fae113dd64 100644 +--- a/drivers/iommu/amd/init.c ++++ b/drivers/iommu/amd/init.c +@@ -1692,8 +1692,17 @@ static void __init free_pci_segments(void) + } + } + ++static void __init free_sysfs(struct amd_iommu *iommu) ++{ ++ if (iommu->iommu.dev) { ++ iommu_device_unregister(&iommu->iommu); ++ iommu_device_sysfs_remove(&iommu->iommu); ++ } ++} ++ + static void __init free_iommu_one(struct amd_iommu *iommu) + { ++ free_sysfs(iommu); + free_cwwb_sem(iommu); + free_command_buffer(iommu); + free_event_buffer(iommu); +diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c +index fc0528c513ad9..c7d6e6987166f 100644 +--- a/drivers/irqchip/irq-gic-v3-its.c ++++ b/drivers/irqchip/irq-gic-v3-its.c +@@ -1840,28 +1840,22 @@ static int its_vlpi_map(struct irq_data *d, struct its_cmd_info *info) + { + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); +- int ret = 0; + + if (!info->map) + return -EINVAL; + +- raw_spin_lock(&its_dev->event_map.vlpi_lock); +- + if (!its_dev->event_map.vm) { + struct its_vlpi_map *maps; + + maps = kcalloc(its_dev->event_map.nr_lpis, sizeof(*maps), + GFP_ATOMIC); +- if (!maps) { +- ret = -ENOMEM; +- goto out; +- } ++ if (!maps) ++ return -ENOMEM; + + its_dev->event_map.vm = info->map->vm; + its_dev->event_map.vlpi_maps = maps; + } else if (its_dev->event_map.vm != info->map->vm) { +- ret = -EINVAL; +- goto out; ++ return -EINVAL; + } + + /* Get our private copy of the mapping information */ +@@ -1893,46 +1887,32 @@ static int its_vlpi_map(struct irq_data *d, struct its_cmd_info *info) + its_dev->event_map.nr_vlpis++; + } + +-out: +- raw_spin_unlock(&its_dev->event_map.vlpi_lock); +- return ret; ++ return 0; + } + + static int its_vlpi_get(struct irq_data *d, struct its_cmd_info *info) + { + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + struct its_vlpi_map *map; +- int ret = 0; +- +- raw_spin_lock(&its_dev->event_map.vlpi_lock); + + map = get_vlpi_map(d); + +- if (!its_dev->event_map.vm || !map) { +- ret = -EINVAL; +- goto out; +- } ++ if (!its_dev->event_map.vm || !map) ++ return -EINVAL; + + /* Copy our mapping information to the incoming request */ + *info->map = *map; + +-out: +- raw_spin_unlock(&its_dev->event_map.vlpi_lock); +- return ret; ++ return 0; + } + + static int its_vlpi_unmap(struct irq_data *d) + { + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); +- int ret = 0; +- +- raw_spin_lock(&its_dev->event_map.vlpi_lock); + +- if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) { +- ret = -EINVAL; +- goto out; +- } ++ if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) ++ return -EINVAL; + + /* Drop the virtual mapping */ + its_send_discard(its_dev, event); +@@ -1956,9 +1936,7 @@ static int its_vlpi_unmap(struct irq_data *d) + kfree(its_dev->event_map.vlpi_maps); + } + +-out: +- raw_spin_unlock(&its_dev->event_map.vlpi_lock); +- return ret; ++ return 0; + } + + static int its_vlpi_prop_update(struct irq_data *d, struct its_cmd_info *info) +@@ -1986,6 +1964,8 @@ static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) + if (!is_v4(its_dev->its)) + return -EINVAL; + ++ guard(raw_spinlock_irq)(&its_dev->event_map.vlpi_lock); ++ + /* Unmap request? */ + if (!info) + return its_vlpi_unmap(d); +diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c +index e8d01b14ccdde..627beae9649a2 100644 +--- a/drivers/irqchip/irq-riscv-intc.c ++++ b/drivers/irqchip/irq-riscv-intc.c +@@ -17,17 +17,19 @@ + #include <linux/module.h> + #include <linux/of.h> + #include <linux/smp.h> ++#include <linux/soc/andes/irq.h> + + static struct irq_domain *intc_domain; ++static unsigned int riscv_intc_nr_irqs __ro_after_init = BITS_PER_LONG; ++static unsigned int riscv_intc_custom_base __ro_after_init = BITS_PER_LONG; ++static unsigned int riscv_intc_custom_nr_irqs __ro_after_init; + + static asmlinkage void riscv_intc_irq(struct pt_regs *regs) + { + unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG; + +- if (unlikely(cause >= BITS_PER_LONG)) +- panic("unexpected interrupt cause"); +- +- generic_handle_domain_irq(intc_domain, cause); ++ if (generic_handle_domain_irq(intc_domain, cause)) ++ pr_warn_ratelimited("Failed to handle interrupt (cause: %ld)\n", cause); + } + + /* +@@ -47,6 +49,31 @@ static void riscv_intc_irq_unmask(struct irq_data *d) + csr_set(CSR_IE, BIT(d->hwirq)); + } + ++static void andes_intc_irq_mask(struct irq_data *d) ++{ ++ /* ++ * Andes specific S-mode local interrupt causes (hwirq) ++ * are defined as (256 + n) and controlled by n-th bit ++ * of SLIE. ++ */ ++ unsigned int mask = BIT(d->hwirq % BITS_PER_LONG); ++ ++ if (d->hwirq < ANDES_SLI_CAUSE_BASE) ++ csr_clear(CSR_IE, mask); ++ else ++ csr_clear(ANDES_CSR_SLIE, mask); ++} ++ ++static void andes_intc_irq_unmask(struct irq_data *d) ++{ ++ unsigned int mask = BIT(d->hwirq % BITS_PER_LONG); ++ ++ if (d->hwirq < ANDES_SLI_CAUSE_BASE) ++ csr_set(CSR_IE, mask); ++ else ++ csr_set(ANDES_CSR_SLIE, mask); ++} ++ + static void riscv_intc_irq_eoi(struct irq_data *d) + { + /* +@@ -70,12 +97,21 @@ static struct irq_chip riscv_intc_chip = { + .irq_eoi = riscv_intc_irq_eoi, + }; + ++static struct irq_chip andes_intc_chip = { ++ .name = "RISC-V INTC", ++ .irq_mask = andes_intc_irq_mask, ++ .irq_unmask = andes_intc_irq_unmask, ++ .irq_eoi = riscv_intc_irq_eoi, ++}; ++ + static int riscv_intc_domain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) + { ++ struct irq_chip *chip = d->host_data; ++ + irq_set_percpu_devid(irq); +- irq_domain_set_info(d, irq, hwirq, &riscv_intc_chip, d->host_data, +- handle_percpu_devid_irq, NULL, NULL); ++ irq_domain_set_info(d, irq, hwirq, chip, NULL, handle_percpu_devid_irq, ++ NULL, NULL); + + return 0; + } +@@ -93,6 +129,14 @@ static int riscv_intc_domain_alloc(struct irq_domain *domain, + if (ret) + return ret; + ++ /* ++ * Only allow hwirq for which we have corresponding standard or ++ * custom interrupt enable register. ++ */ ++ if ((hwirq >= riscv_intc_nr_irqs && hwirq < riscv_intc_custom_base) || ++ (hwirq >= riscv_intc_custom_base + riscv_intc_custom_nr_irqs)) ++ return -EINVAL; ++ + for (i = 0; i < nr_irqs; i++) { + ret = riscv_intc_domain_map(domain, virq + i, hwirq + i); + if (ret) +@@ -113,12 +157,12 @@ static struct fwnode_handle *riscv_intc_hwnode(void) + return intc_domain->fwnode; + } + +-static int __init riscv_intc_init_common(struct fwnode_handle *fn) ++static int __init riscv_intc_init_common(struct fwnode_handle *fn, ++ struct irq_chip *chip) + { + int rc; + +- intc_domain = irq_domain_create_linear(fn, BITS_PER_LONG, +- &riscv_intc_domain_ops, NULL); ++ intc_domain = irq_domain_create_tree(fn, &riscv_intc_domain_ops, chip); + if (!intc_domain) { + pr_err("unable to add IRQ domain\n"); + return -ENXIO; +@@ -132,7 +176,11 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn) + + riscv_set_intc_hwnode_fn(riscv_intc_hwnode); + +- pr_info("%d local interrupts mapped\n", BITS_PER_LONG); ++ pr_info("%d local interrupts mapped\n", riscv_intc_nr_irqs); ++ if (riscv_intc_custom_nr_irqs) { ++ pr_info("%d custom local interrupts mapped\n", ++ riscv_intc_custom_nr_irqs); ++ } + + return 0; + } +@@ -140,8 +188,9 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn) + static int __init riscv_intc_init(struct device_node *node, + struct device_node *parent) + { +- int rc; ++ struct irq_chip *chip = &riscv_intc_chip; + unsigned long hartid; ++ int rc; + + rc = riscv_of_parent_hartid(node, &hartid); + if (rc < 0) { +@@ -166,18 +215,26 @@ static int __init riscv_intc_init(struct device_node *node, + return 0; + } + +- return riscv_intc_init_common(of_node_to_fwnode(node)); ++ if (of_device_is_compatible(node, "andestech,cpu-intc")) { ++ riscv_intc_custom_base = ANDES_SLI_CAUSE_BASE; ++ riscv_intc_custom_nr_irqs = ANDES_RV_IRQ_LAST; ++ chip = &andes_intc_chip; ++ } ++ ++ return riscv_intc_init_common(of_node_to_fwnode(node), chip); + } + + IRQCHIP_DECLARE(riscv, "riscv,cpu-intc", riscv_intc_init); ++IRQCHIP_DECLARE(andes, "andestech,cpu-intc", riscv_intc_init); + + #ifdef CONFIG_ACPI + + static int __init riscv_intc_acpi_init(union acpi_subtable_headers *header, + const unsigned long end) + { +- struct fwnode_handle *fn; + struct acpi_madt_rintc *rintc; ++ struct fwnode_handle *fn; ++ int rc; + + rintc = (struct acpi_madt_rintc *)header; + +@@ -196,7 +253,11 @@ static int __init riscv_intc_acpi_init(union acpi_subtable_headers *header, + return -ENOMEM; + } + +- return riscv_intc_init_common(fn); ++ rc = riscv_intc_init_common(fn, &riscv_intc_chip); ++ if (rc) ++ irq_domain_free_fwnode(fn); ++ ++ return rc; + } + + IRQCHIP_ACPI_DECLARE(riscv_intc, ACPI_MADT_TYPE_RINTC, NULL, +diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c +index 32af2b14ff344..34c9be437432a 100644 +--- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c ++++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c +@@ -69,8 +69,10 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id + + aux_bus->aux_device_wrapper[1] = kzalloc(sizeof(*aux_bus->aux_device_wrapper[1]), + GFP_KERNEL); +- if (!aux_bus->aux_device_wrapper[1]) +- return -ENOMEM; ++ if (!aux_bus->aux_device_wrapper[1]) { ++ retval = -ENOMEM; ++ goto err_aux_dev_add_0; ++ } + + retval = ida_alloc(&gp_client_ida, GFP_KERNEL); + if (retval < 0) +@@ -111,6 +113,7 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id + + err_aux_dev_add_1: + auxiliary_device_uninit(&aux_bus->aux_device_wrapper[1]->aux_dev); ++ goto err_aux_dev_add_0; + + err_aux_dev_init_1: + ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[1]->aux_dev.id); +@@ -120,6 +123,7 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id + + err_aux_dev_add_0: + auxiliary_device_uninit(&aux_bus->aux_device_wrapper[0]->aux_dev); ++ goto err_ret; + + err_aux_dev_init_0: + ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[0]->aux_dev.id); +@@ -127,6 +131,7 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id + err_ida_alloc_0: + kfree(aux_bus->aux_device_wrapper[0]); + ++err_ret: + return retval; + } + +diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c +index 3c2c28c8ba30a..6c4f5e9fe834d 100644 +--- a/drivers/misc/mei/pci-me.c ++++ b/drivers/misc/mei/pci-me.c +@@ -400,8 +400,10 @@ static int mei_me_pci_resume(struct device *device) + } + + err = mei_restart(dev); +- if (err) ++ if (err) { ++ free_irq(pdev->irq, dev); + return err; ++ } + + /* Start timer if stopped in suspend */ + schedule_delayed_work(&dev->timer_work, HZ); +diff --git a/drivers/misc/vmw_vmci/vmci_event.c b/drivers/misc/vmw_vmci/vmci_event.c +index 5d7ac07623c27..9a41ab65378de 100644 +--- a/drivers/misc/vmw_vmci/vmci_event.c ++++ b/drivers/misc/vmw_vmci/vmci_event.c +@@ -9,6 +9,7 @@ + #include <linux/vmw_vmci_api.h> + #include <linux/list.h> + #include <linux/module.h> ++#include <linux/nospec.h> + #include <linux/sched.h> + #include <linux/slab.h> + #include <linux/rculist.h> +@@ -86,9 +87,12 @@ static void event_deliver(struct vmci_event_msg *event_msg) + { + struct vmci_subscription *cur; + struct list_head *subscriber_list; ++ u32 sanitized_event, max_vmci_event; + + rcu_read_lock(); +- subscriber_list = &subscriber_array[event_msg->event_data.event]; ++ max_vmci_event = ARRAY_SIZE(subscriber_array); ++ sanitized_event = array_index_nospec(event_msg->event_data.event, max_vmci_event); ++ subscriber_list = &subscriber_array[sanitized_event]; + list_for_each_entry_rcu(cur, subscriber_list, node) { + cur->callback(cur->id, &event_msg->event_data, + cur->callback_data); +diff --git a/drivers/net/dsa/qca/qca8k-leds.c b/drivers/net/dsa/qca/qca8k-leds.c +index e8c16e76e34bb..77a79c2494022 100644 +--- a/drivers/net/dsa/qca/qca8k-leds.c ++++ b/drivers/net/dsa/qca/qca8k-leds.c +@@ -431,8 +431,11 @@ qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int p + init_data.devname_mandatory = true; + init_data.devicename = kasprintf(GFP_KERNEL, "%s:0%d", ds->slave_mii_bus->id, + port_num); +- if (!init_data.devicename) ++ if (!init_data.devicename) { ++ fwnode_handle_put(led); ++ fwnode_handle_put(leds); + return -ENOMEM; ++ } + + ret = devm_led_classdev_register_ext(priv->dev, &port_led->cdev, &init_data); + if (ret) +@@ -441,6 +444,7 @@ qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int p + kfree(init_data.devicename); + } + ++ fwnode_handle_put(leds); + return 0; + } + +@@ -471,9 +475,13 @@ qca8k_setup_led_ctrl(struct qca8k_priv *priv) + * the correct port for LED setup. + */ + ret = qca8k_parse_port_leds(priv, port, qca8k_port_to_phy(port_num)); +- if (ret) ++ if (ret) { ++ fwnode_handle_put(port); ++ fwnode_handle_put(ports); + return ret; ++ } + } + ++ fwnode_handle_put(ports); + return 0; + } +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c +index 132442f16fe67..7a4e08b5a8c1b 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c +@@ -678,7 +678,7 @@ static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx) + req_type); + else if (rc && rc != HWRM_ERR_CODE_PF_UNAVAILABLE) + hwrm_err(bp, ctx, "hwrm req_type 0x%x seq id 0x%x error 0x%x\n", +- req_type, token->seq_id, rc); ++ req_type, le16_to_cpu(ctx->req->seq_id), rc); + rc = __hwrm_to_stderr(rc); + exit: + if (token) +diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +index 600de587d7a98..e70b9ccca380e 100644 +--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c ++++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +@@ -272,13 +272,12 @@ lio_vf_rep_copy_packet(struct octeon_device *oct, + pg_info->page_offset; + memcpy(skb->data, va, MIN_SKB_SIZE); + skb_put(skb, MIN_SKB_SIZE); ++ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, ++ pg_info->page, ++ pg_info->page_offset + MIN_SKB_SIZE, ++ len - MIN_SKB_SIZE, ++ LIO_RXBUFFER_SZ); + } +- +- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, +- pg_info->page, +- pg_info->page_offset + MIN_SKB_SIZE, +- len - MIN_SKB_SIZE, +- LIO_RXBUFFER_SZ); + } else { + struct octeon_skb_page_info *pg_info = + ((struct octeon_skb_page_info *)(skb->cb)); +diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c +index f281e42a7ef96..3d60ea25711fc 100644 +--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c ++++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c +@@ -506,11 +506,13 @@ static void gve_rx_skb_hash(struct sk_buff *skb, + skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); + } + +-static void gve_rx_free_skb(struct gve_rx_ring *rx) ++static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) + { + if (!rx->ctx.skb_head) + return; + ++ if (rx->ctx.skb_head == napi->skb) ++ napi->skb = NULL; + dev_kfree_skb_any(rx->ctx.skb_head); + rx->ctx.skb_head = NULL; + rx->ctx.skb_tail = NULL; +@@ -783,7 +785,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) + + err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num); + if (err < 0) { +- gve_rx_free_skb(rx); ++ gve_rx_free_skb(napi, rx); + u64_stats_update_begin(&rx->statss); + if (err == -ENOMEM) + rx->rx_skb_alloc_fail++; +@@ -826,7 +828,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) + + /* gve_rx_complete_skb() will consume skb if successful */ + if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) { +- gve_rx_free_skb(rx); ++ gve_rx_free_skb(napi, rx); + u64_stats_update_begin(&rx->statss); + rx->rx_desc_err_dropped_pkt++; + u64_stats_update_end(&rx->statss); +diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c +index 1e19b834a6130..5a44354bbdfdf 100644 +--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c ++++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c +@@ -501,28 +501,18 @@ static int gve_prep_tso(struct sk_buff *skb) + if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO)) + return -1; + ++ if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) ++ return -EINVAL; ++ + /* Needed because we will modify header. */ + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + tcp = tcp_hdr(skb); +- +- /* Remove payload length from checksum. */ + paylen = skb->len - skb_transport_offset(skb); +- +- switch (skb_shinfo(skb)->gso_type) { +- case SKB_GSO_TCPV4: +- case SKB_GSO_TCPV6: +- csum_replace_by_diff(&tcp->check, +- (__force __wsum)htonl(paylen)); +- +- /* Compute length of segmentation header. */ +- header_len = skb_tcp_all_headers(skb); +- break; +- default: +- return -EINVAL; +- } ++ csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); ++ header_len = skb_tcp_all_headers(skb); + + if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO)) + return -EINVAL; +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +index 677cfaa5fe08c..db9574e9fb7bc 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +@@ -3539,6 +3539,9 @@ static int hns3_alloc_ring_buffers(struct hns3_enet_ring *ring) + ret = hns3_alloc_and_attach_buffer(ring, i); + if (ret) + goto out_buffer_fail; ++ ++ if (!(i % HNS3_RESCHED_BD_NUM)) ++ cond_resched(); + } + + return 0; +@@ -5112,6 +5115,7 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv) + } + + u64_stats_init(&priv->ring[i].syncp); ++ cond_resched(); + } + + return 0; +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +index acd756b0c7c9a..d36c4ed16d8dd 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +@@ -214,6 +214,8 @@ enum hns3_nic_state { + #define HNS3_CQ_MODE_EQE 1U + #define HNS3_CQ_MODE_CQE 0U + ++#define HNS3_RESCHED_BD_NUM 1024 ++ + enum hns3_pkt_l2t_type { + HNS3_L2_TYPE_UNICAST, + HNS3_L2_TYPE_MULTICAST, +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +index 14713454e0d82..c8059d96f64be 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +@@ -3031,9 +3031,7 @@ static void hclge_push_link_status(struct hclge_dev *hdev) + + static void hclge_update_link_status(struct hclge_dev *hdev) + { +- struct hnae3_handle *rhandle = &hdev->vport[0].roce; + struct hnae3_handle *handle = &hdev->vport[0].nic; +- struct hnae3_client *rclient = hdev->roce_client; + struct hnae3_client *client = hdev->nic_client; + int state; + int ret; +@@ -3057,8 +3055,15 @@ static void hclge_update_link_status(struct hclge_dev *hdev) + + client->ops->link_status_change(handle, state); + hclge_config_mac_tnl_int(hdev, state); +- if (rclient && rclient->ops->link_status_change) +- rclient->ops->link_status_change(rhandle, state); ++ ++ if (test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state)) { ++ struct hnae3_handle *rhandle = &hdev->vport[0].roce; ++ struct hnae3_client *rclient = hdev->roce_client; ++ ++ if (rclient && rclient->ops->link_status_change) ++ rclient->ops->link_status_change(rhandle, ++ state); ++ } + + hclge_push_link_status(hdev); + } +@@ -11233,6 +11238,12 @@ static int hclge_init_client_instance(struct hnae3_client *client, + return ret; + } + ++static bool hclge_uninit_need_wait(struct hclge_dev *hdev) ++{ ++ return test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || ++ test_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); ++} ++ + static void hclge_uninit_client_instance(struct hnae3_client *client, + struct hnae3_ae_dev *ae_dev) + { +@@ -11241,7 +11252,7 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, + + if (hdev->roce_client) { + clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); +- while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) ++ while (hclge_uninit_need_wait(hdev)) + msleep(HCLGE_WAIT_RESET_DONE); + + hdev->roce_client->ops->uninit_instance(&vport->roce, 0); +diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h +index 5022b036ca4f9..c7962f322db2d 100644 +--- a/drivers/net/ethernet/intel/ice/ice.h ++++ b/drivers/net/ethernet/intel/ice/ice.h +@@ -407,7 +407,6 @@ struct ice_vsi { + struct ice_tc_cfg tc_cfg; + struct bpf_prog *xdp_prog; + struct ice_tx_ring **xdp_rings; /* XDP ring array */ +- unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */ + u16 num_xdp_txq; /* Used XDP queues */ + u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ + +@@ -714,6 +713,25 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) + ring->flags |= ICE_TX_FLAGS_RING_XDP; + } + ++/** ++ * ice_get_xp_from_qid - get ZC XSK buffer pool bound to a queue ID ++ * @vsi: pointer to VSI ++ * @qid: index of a queue to look at XSK buff pool presence ++ * ++ * Return: A pointer to xsk_buff_pool structure if there is a buffer pool ++ * attached and configured as zero-copy, NULL otherwise. ++ */ ++static inline struct xsk_buff_pool *ice_get_xp_from_qid(struct ice_vsi *vsi, ++ u16 qid) ++{ ++ struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid); ++ ++ if (!ice_is_xdp_ena_vsi(vsi)) ++ return NULL; ++ ++ return (pool && pool->dev) ? pool : NULL; ++} ++ + /** + * ice_xsk_pool - get XSK buffer pool bound to a ring + * @ring: Rx ring to use +@@ -726,10 +744,7 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) + struct ice_vsi *vsi = ring->vsi; + u16 qid = ring->q_index; + +- if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) +- return NULL; +- +- return xsk_get_pool_from_qid(vsi->netdev, qid); ++ return ice_get_xp_from_qid(vsi, qid); + } + + /** +@@ -754,12 +769,7 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) + if (!ring) + return; + +- if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) { +- ring->xsk_pool = NULL; +- return; +- } +- +- ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid); ++ ring->xsk_pool = ice_get_xp_from_qid(vsi, qid); + } + + /** +@@ -882,9 +892,16 @@ int ice_down(struct ice_vsi *vsi); + int ice_down_up(struct ice_vsi *vsi); + int ice_vsi_cfg_lan(struct ice_vsi *vsi); + struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi); ++ ++enum ice_xdp_cfg { ++ ICE_XDP_CFG_FULL, /* Fully apply new config in .ndo_bpf() */ ++ ICE_XDP_CFG_PART, /* Save/use part of config in VSI rebuild */ ++}; ++ + int ice_vsi_determine_xdp_res(struct ice_vsi *vsi); +-int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog); +-int ice_destroy_xdp_rings(struct ice_vsi *vsi); ++int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, ++ enum ice_xdp_cfg cfg_type); ++int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type); + int + ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); +diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c +index 2004120a58acd..13ca3342a0cea 100644 +--- a/drivers/net/ethernet/intel/ice/ice_lib.c ++++ b/drivers/net/ethernet/intel/ice/ice_lib.c +@@ -117,14 +117,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) + if (!vsi->q_vectors) + goto err_vectors; + +- vsi->af_xdp_zc_qps = bitmap_zalloc(max_t(int, vsi->alloc_txq, vsi->alloc_rxq), GFP_KERNEL); +- if (!vsi->af_xdp_zc_qps) +- goto err_zc_qps; +- + return 0; + +-err_zc_qps: +- devm_kfree(dev, vsi->q_vectors); + err_vectors: + devm_kfree(dev, vsi->rxq_map); + err_rxq_map: +@@ -321,8 +315,6 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) + + dev = ice_pf_to_dev(pf); + +- bitmap_free(vsi->af_xdp_zc_qps); +- vsi->af_xdp_zc_qps = NULL; + /* free the ring and vector containers */ + devm_kfree(dev, vsi->q_vectors); + vsi->q_vectors = NULL; +@@ -2470,7 +2462,8 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) + ret = ice_vsi_determine_xdp_res(vsi); + if (ret) + goto unroll_vector_base; +- ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog); ++ ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog, ++ ICE_XDP_CFG_PART); + if (ret) + goto unroll_vector_base; + } +@@ -2621,7 +2614,7 @@ void ice_vsi_decfg(struct ice_vsi *vsi) + /* return value check can be skipped here, it always returns + * 0 if reset is in progress + */ +- ice_destroy_xdp_rings(vsi); ++ ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_PART); + + ice_vsi_clear_rings(vsi); + ice_vsi_free_q_vectors(vsi); +diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c +index 8ebb6517f6b96..5d71febdcd4dd 100644 +--- a/drivers/net/ethernet/intel/ice/ice_main.c ++++ b/drivers/net/ethernet/intel/ice/ice_main.c +@@ -2657,10 +2657,12 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog) + * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP + * @vsi: VSI to bring up Tx rings used by XDP + * @prog: bpf program that will be assigned to VSI ++ * @cfg_type: create from scratch or restore the existing configuration + * + * Return 0 on success and negative value on error + */ +-int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) ++int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, ++ enum ice_xdp_cfg cfg_type) + { + u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + int xdp_rings_rem = vsi->num_xdp_txq; +@@ -2736,7 +2738,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) + * taken into account at the end of ice_vsi_rebuild, where + * ice_cfg_vsi_lan is being called + */ +- if (ice_is_reset_in_progress(pf->state)) ++ if (cfg_type == ICE_XDP_CFG_PART) + return 0; + + /* tell the Tx scheduler that right now we have +@@ -2788,22 +2790,21 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) + /** + * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings + * @vsi: VSI to remove XDP rings ++ * @cfg_type: disable XDP permanently or allow it to be restored later + * + * Detach XDP rings from irq vectors, clean up the PF bitmap and free + * resources + */ +-int ice_destroy_xdp_rings(struct ice_vsi *vsi) ++int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type) + { + u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + struct ice_pf *pf = vsi->back; + int i, v_idx; + + /* q_vectors are freed in reset path so there's no point in detaching +- * rings; in case of rebuild being triggered not from reset bits +- * in pf->state won't be set, so additionally check first q_vector +- * against NULL ++ * rings + */ +- if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) ++ if (cfg_type == ICE_XDP_CFG_PART) + goto free_qmap; + + ice_for_each_q_vector(vsi, v_idx) { +@@ -2844,7 +2845,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi) + if (static_key_enabled(&ice_xdp_locking_key)) + static_branch_dec(&ice_xdp_locking_key); + +- if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) ++ if (cfg_type == ICE_XDP_CFG_PART) + return 0; + + ice_vsi_assign_bpf_prog(vsi, NULL); +@@ -2955,7 +2956,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, + if (xdp_ring_err) { + NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP"); + } else { +- xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); ++ xdp_ring_err = ice_prepare_xdp_rings(vsi, prog, ++ ICE_XDP_CFG_FULL); + if (xdp_ring_err) + NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); + } +@@ -2966,7 +2968,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, + NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed"); + } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { + xdp_features_clear_redirect_target(vsi->netdev); +- xdp_ring_err = ice_destroy_xdp_rings(vsi); ++ xdp_ring_err = ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_FULL); + if (xdp_ring_err) + NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); + /* reallocate Rx queues that were used for zero-copy */ +diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c +index f6f52a2480662..2fb43cded572c 100644 +--- a/drivers/net/ethernet/intel/ice/ice_nvm.c ++++ b/drivers/net/ethernet/intel/ice/ice_nvm.c +@@ -441,8 +441,7 @@ int + ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, + u16 module_type) + { +- u16 pfa_len, pfa_ptr; +- u16 next_tlv; ++ u16 pfa_len, pfa_ptr, next_tlv, max_tlv; + int status; + + status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr); +@@ -455,11 +454,23 @@ ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, + ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n"); + return status; + } ++ ++ /* The Preserved Fields Area contains a sequence of Type-Length-Value ++ * structures which define its contents. The PFA length includes all ++ * of the TLVs, plus the initial length word itself, *and* one final ++ * word at the end after all of the TLVs. ++ */ ++ if (check_add_overflow(pfa_ptr, pfa_len - 1, &max_tlv)) { ++ dev_warn(ice_hw_to_dev(hw), "PFA starts at offset %u. PFA length of %u caused 16-bit arithmetic overflow.\n", ++ pfa_ptr, pfa_len); ++ return -EINVAL; ++ } ++ + /* Starting with first TLV after PFA length, iterate through the list + * of TLVs to find the requested one. + */ + next_tlv = pfa_ptr + 1; +- while (next_tlv < pfa_ptr + pfa_len) { ++ while (next_tlv < max_tlv) { + u16 tlv_sub_module_type; + u16 tlv_len; + +@@ -483,10 +494,13 @@ ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, + } + return -EINVAL; + } +- /* Check next TLV, i.e. current TLV pointer + length + 2 words +- * (for current TLV's type and length) +- */ +- next_tlv = next_tlv + tlv_len + 2; ++ ++ if (check_add_overflow(next_tlv, 2, &next_tlv) || ++ check_add_overflow(next_tlv, tlv_len, &next_tlv)) { ++ dev_warn(ice_hw_to_dev(hw), "TLV of type %u and length 0x%04x caused 16-bit arithmetic overflow. The PFA starts at 0x%04x and has length of 0x%04x\n", ++ tlv_sub_module_type, tlv_len, pfa_ptr, pfa_len); ++ return -EINVAL; ++ } + } + /* Module does not exist */ + return -ENOENT; +diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c +index 7bd71660011e4..f53566cb6bfbd 100644 +--- a/drivers/net/ethernet/intel/ice/ice_xsk.c ++++ b/drivers/net/ethernet/intel/ice/ice_xsk.c +@@ -289,7 +289,6 @@ static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid) + if (!pool) + return -EINVAL; + +- clear_bit(qid, vsi->af_xdp_zc_qps); + xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR); + + return 0; +@@ -320,8 +319,6 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) + if (err) + return err; + +- set_bit(qid, vsi->af_xdp_zc_qps); +- + return 0; + } + +@@ -369,11 +366,13 @@ ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present) + int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc) + { + struct ice_rx_ring *rx_ring; +- unsigned long q; ++ uint i; ++ ++ ice_for_each_rxq(vsi, i) { ++ rx_ring = vsi->rx_rings[i]; ++ if (!rx_ring->xsk_pool) ++ continue; + +- for_each_set_bit(q, vsi->af_xdp_zc_qps, +- max_t(int, vsi->alloc_txq, vsi->alloc_rxq)) { +- rx_ring = vsi->rx_rings[q]; + if (ice_realloc_rx_xdp_bufs(rx_ring, zc)) + return -ENOMEM; + } +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +index 91a4ea529d077..00ef6d201b973 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +@@ -2506,7 +2506,17 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, + * - when available free entries are less. + * Lower priority ones out of avaialble free entries are always + * chosen when 'high vs low' question arises. ++ * ++ * For a VF base MCAM match rule is set by its PF. And all the ++ * further MCAM rules installed by VF on its own are ++ * concatenated with the base rule set by its PF. Hence PF entries ++ * should be at lower priority compared to VF entries. Otherwise ++ * base rule is hit always and rules installed by VF will be of ++ * no use. Hence if the request is from PF then allocate low ++ * priority entries. + */ ++ if (!(pcifunc & RVU_PFVF_FUNC_MASK)) ++ goto lprio_alloc; + + /* Get the search range for priority allocation request */ + if (req->priority) { +@@ -2515,17 +2525,6 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, + goto alloc; + } + +- /* For a VF base MCAM match rule is set by its PF. And all the +- * further MCAM rules installed by VF on its own are +- * concatenated with the base rule set by its PF. Hence PF entries +- * should be at lower priority compared to VF entries. Otherwise +- * base rule is hit always and rules installed by VF will be of +- * no use. Hence if the request is from PF and NOT a priority +- * allocation request then allocate low priority entries. +- */ +- if (!(pcifunc & RVU_PFVF_FUNC_MASK)) +- goto lprio_alloc; +- + /* Find out the search range for non-priority allocation request + * + * Get MCAM free entry count in middle zone. +@@ -2555,6 +2554,18 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, + reverse = true; + start = 0; + end = mcam->bmap_entries; ++ /* Ensure PF requests are always at bottom and if PF requests ++ * for higher/lower priority entry wrt reference entry then ++ * honour that criteria and start search for entries from bottom ++ * and not in mid zone. ++ */ ++ if (!(pcifunc & RVU_PFVF_FUNC_MASK) && ++ req->priority == NPC_MCAM_HIGHER_PRIO) ++ end = req->ref_entry; ++ ++ if (!(pcifunc & RVU_PFVF_FUNC_MASK) && ++ req->priority == NPC_MCAM_LOWER_PRIO) ++ start = req->ref_entry; + } + + alloc: +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index 455907b1167a0..e87a776ea2bfd 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -4704,7 +4704,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, + + /* Verify if UDP port is being offloaded by HW */ + if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port)) +- return features; ++ return vxlan_features_check(skb, features); + + #if IS_ENABLED(CONFIG_GENEVE) + /* Support Geneve offload for default UDP port */ +@@ -4730,7 +4730,6 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct mlx5e_priv *priv = netdev_priv(netdev); + + features = vlan_features_check(skb, features); +- features = vxlan_features_check(skb, features); + + /* Validate if the tunneled packet is being offloaded by HW */ + if (skb->encapsulation && +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c +index 58f4c0d0fafa2..70898f0a9866c 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c +@@ -373,6 +373,10 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev) + do { + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + break; ++ if (pci_channel_offline(dev->pdev)) { ++ mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n"); ++ return -EACCES; ++ } + + cond_resched(); + } while (!time_after(jiffies, end)); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c +index 2fb2598b775ef..d798834c4e755 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c +@@ -248,6 +248,10 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev) + do { + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + break; ++ if (pci_channel_offline(dev->pdev)) { ++ mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n"); ++ goto unlock; ++ } + + msleep(20); + } while (!time_after(jiffies, end)); +@@ -317,6 +321,10 @@ int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev) + mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n"); + return -ENODEV; + } ++ if (pci_channel_offline(dev->pdev)) { ++ mlx5_core_err(dev, "PCI channel offline, stop waiting for PCI\n"); ++ return -EACCES; ++ } + msleep(100); + } + return 0; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +index 7d9bbb494d95b..005661248c7e9 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +@@ -88,9 +88,13 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, + &dest, 1); + if (IS_ERR(lag_definer->rules[idx])) { + err = PTR_ERR(lag_definer->rules[idx]); +- while (i--) +- while (j--) ++ do { ++ while (j--) { ++ idx = i * ldev->buckets + j; + mlx5_del_flow_rules(lag_definer->rules[idx]); ++ } ++ j = ldev->buckets; ++ } while (i--); + goto destroy_fg; + } + } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c +index 6b774e0c27665..d0b595ba61101 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c +@@ -74,6 +74,10 @@ int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev) + ret = -EBUSY; + goto pci_unlock; + } ++ if (pci_channel_offline(dev->pdev)) { ++ ret = -EACCES; ++ goto pci_unlock; ++ } + + /* Check if semaphore is already locked */ + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c +index 9710ddac1f1a8..2237b3d01e0e5 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -1287,6 +1287,9 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) + + if (!err) + mlx5_function_disable(dev, boot); ++ else ++ mlx5_stop_health_poll(dev, boot); ++ + return err; + } + +diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +index 4f05cddc65cb4..7e6e1bed525af 100644 +--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c ++++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +@@ -296,10 +296,8 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) + if (ret) + return ret; + +- if (qcq->napi.poll) +- napi_enable(&qcq->napi); +- + if (qcq->flags & IONIC_QCQ_F_INTR) { ++ napi_enable(&qcq->napi); + irq_set_affinity_hint(qcq->intr.vector, + &qcq->intr.affinity_mask); + ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +index 31631e3f89d0a..51ff53120307a 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +@@ -93,6 +93,7 @@ struct ethqos_emac_driver_data { + bool has_emac_ge_3; + const char *link_clk_name; + bool has_integrated_pcs; ++ u32 dma_addr_width; + struct dwmac4_addrs dwmac4_addrs; + }; + +@@ -272,6 +273,7 @@ static const struct ethqos_emac_driver_data emac_v4_0_0_data = { + .has_emac_ge_3 = true, + .link_clk_name = "phyaux", + .has_integrated_pcs = true, ++ .dma_addr_width = 36, + .dwmac4_addrs = { + .dma_chan = 0x00008100, + .dma_chan_offset = 0x1000, +@@ -816,6 +818,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev) + plat_dat->flags |= STMMAC_FLAG_RX_CLK_RUNS_IN_LPI; + if (data->has_integrated_pcs) + plat_dat->flags |= STMMAC_FLAG_HAS_INTEGRATED_PCS; ++ if (data->dma_addr_width) ++ plat_dat->host_dma_width = data->dma_addr_width; + + if (ethqos->serdes_phy) { + plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup; +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +index 6ad3e0a119366..2467598f9d92f 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +@@ -343,10 +343,11 @@ static int tc_setup_cbs(struct stmmac_priv *priv, + struct tc_cbs_qopt_offload *qopt) + { + u32 tx_queues_count = priv->plat->tx_queues_to_use; ++ s64 port_transmit_rate_kbps; + u32 queue = qopt->queue; +- u32 ptr, speed_div; + u32 mode_to_use; + u64 value; ++ u32 ptr; + int ret; + + /* Queue 0 is not AVB capable */ +@@ -355,30 +356,26 @@ static int tc_setup_cbs(struct stmmac_priv *priv, + if (!priv->dma_cap.av) + return -EOPNOTSUPP; + ++ port_transmit_rate_kbps = qopt->idleslope - qopt->sendslope; ++ + /* Port Transmit Rate and Speed Divider */ +- switch (priv->speed) { ++ switch (div_s64(port_transmit_rate_kbps, 1000)) { + case SPEED_10000: +- ptr = 32; +- speed_div = 10000000; +- break; + case SPEED_5000: + ptr = 32; +- speed_div = 5000000; + break; + case SPEED_2500: +- ptr = 8; +- speed_div = 2500000; +- break; + case SPEED_1000: + ptr = 8; +- speed_div = 1000000; + break; + case SPEED_100: + ptr = 4; +- speed_div = 100000; + break; + default: +- return -EOPNOTSUPP; ++ netdev_err(priv->dev, ++ "Invalid portTransmitRate %lld (idleSlope - sendSlope)\n", ++ port_transmit_rate_kbps); ++ return -EINVAL; + } + + mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use; +@@ -398,10 +395,10 @@ static int tc_setup_cbs(struct stmmac_priv *priv, + } + + /* Final adjustments for HW */ +- value = div_s64(qopt->idleslope * 1024ll * ptr, speed_div); ++ value = div_s64(qopt->idleslope * 1024ll * ptr, port_transmit_rate_kbps); + priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0); + +- value = div_s64(-qopt->sendslope * 1024ll * ptr, speed_div); ++ value = div_s64(-qopt->sendslope * 1024ll * ptr, port_transmit_rate_kbps); + priv->plat->tx_queues_cfg[queue].send_slope = value & GENMASK(31, 0); + + value = qopt->hicredit * 1024ll * 8; +diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c +index 0a18b67d0d669..8333a5620deff 100644 +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -915,6 +915,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, + struct geneve_dev *geneve, + const struct ip_tunnel_info *info) + { ++ bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; + bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); + struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); + const struct ip_tunnel_key *key = &info->key; +@@ -926,7 +927,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, + __be16 sport; + int err; + +- if (!skb_vlan_inet_prepare(skb)) ++ if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) + return -EINVAL; + + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); +@@ -999,7 +1000,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, + } + + err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr), +- geneve->cfg.inner_proto_inherit); ++ inner_proto_inherit); + if (unlikely(err)) + return err; + +@@ -1015,6 +1016,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, + struct geneve_dev *geneve, + const struct ip_tunnel_info *info) + { ++ bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; + bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); + struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); + const struct ip_tunnel_key *key = &info->key; +@@ -1024,7 +1026,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, + __be16 sport; + int err; + +- if (!skb_vlan_inet_prepare(skb)) ++ if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) + return -EINVAL; + + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); +@@ -1079,7 +1081,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, + ttl = ttl ? : ip6_dst_hoplimit(dst); + } + err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr), +- geneve->cfg.inner_proto_inherit); ++ inner_proto_inherit); + if (unlikely(err)) + return err; + +diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c +index fc31fcfb0cdb4..366ae22534373 100644 +--- a/drivers/net/phy/micrel.c ++++ b/drivers/net/phy/micrel.c +@@ -770,6 +770,17 @@ static int ksz8061_config_init(struct phy_device *phydev) + { + int ret; + ++ /* Chip can be powered down by the bootstrap code. */ ++ ret = phy_read(phydev, MII_BMCR); ++ if (ret < 0) ++ return ret; ++ if (ret & BMCR_PDOWN) { ++ ret = phy_write(phydev, MII_BMCR, ret & ~BMCR_PDOWN); ++ if (ret < 0) ++ return ret; ++ usleep_range(1000, 2000); ++ } ++ + ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A); + if (ret) + return ret; +@@ -1821,7 +1832,7 @@ static const struct ksz9477_errata_write ksz9477_errata_writes[] = { + {0x1c, 0x20, 0xeeee}, + }; + +-static int ksz9477_config_init(struct phy_device *phydev) ++static int ksz9477_phy_errata(struct phy_device *phydev) + { + int err; + int i; +@@ -1849,16 +1860,30 @@ static int ksz9477_config_init(struct phy_device *phydev) + return err; + } + ++ err = genphy_restart_aneg(phydev); ++ if (err) ++ return err; ++ ++ return err; ++} ++ ++static int ksz9477_config_init(struct phy_device *phydev) ++{ ++ int err; ++ ++ /* Only KSZ9897 family of switches needs this fix. */ ++ if ((phydev->phy_id & 0xf) == 1) { ++ err = ksz9477_phy_errata(phydev); ++ if (err) ++ return err; ++ } ++ + /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes + * in this switch shall be regarded as broken. + */ + if (phydev->dev_flags & MICREL_NO_EEE) + phydev->eee_broken_modes = -1; + +- err = genphy_restart_aneg(phydev); +- if (err) +- return err; +- + return kszphy_config_init(phydev); + } + +@@ -1967,6 +1992,71 @@ static int kszphy_resume(struct phy_device *phydev) + return 0; + } + ++static int ksz9477_resume(struct phy_device *phydev) ++{ ++ int ret; ++ ++ /* No need to initialize registers if not powered down. */ ++ ret = phy_read(phydev, MII_BMCR); ++ if (ret < 0) ++ return ret; ++ if (!(ret & BMCR_PDOWN)) ++ return 0; ++ ++ genphy_resume(phydev); ++ ++ /* After switching from power-down to normal mode, an internal global ++ * reset is automatically generated. Wait a minimum of 1 ms before ++ * read/write access to the PHY registers. ++ */ ++ usleep_range(1000, 2000); ++ ++ /* Only KSZ9897 family of switches needs this fix. */ ++ if ((phydev->phy_id & 0xf) == 1) { ++ ret = ksz9477_phy_errata(phydev); ++ if (ret) ++ return ret; ++ } ++ ++ /* Enable PHY Interrupts */ ++ if (phy_interrupt_is_valid(phydev)) { ++ phydev->interrupts = PHY_INTERRUPT_ENABLED; ++ if (phydev->drv->config_intr) ++ phydev->drv->config_intr(phydev); ++ } ++ ++ return 0; ++} ++ ++static int ksz8061_resume(struct phy_device *phydev) ++{ ++ int ret; ++ ++ /* This function can be called twice when the Ethernet device is on. */ ++ ret = phy_read(phydev, MII_BMCR); ++ if (ret < 0) ++ return ret; ++ if (!(ret & BMCR_PDOWN)) ++ return 0; ++ ++ genphy_resume(phydev); ++ usleep_range(1000, 2000); ++ ++ /* Re-program the value after chip is reset. */ ++ ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A); ++ if (ret) ++ return ret; ++ ++ /* Enable PHY Interrupts */ ++ if (phy_interrupt_is_valid(phydev)) { ++ phydev->interrupts = PHY_INTERRUPT_ENABLED; ++ if (phydev->drv->config_intr) ++ phydev->drv->config_intr(phydev); ++ } ++ ++ return 0; ++} ++ + static int kszphy_probe(struct phy_device *phydev) + { + const struct kszphy_type *type = phydev->drv->driver_data; +@@ -4762,7 +4852,7 @@ static struct phy_driver ksphy_driver[] = { + .config_intr = kszphy_config_intr, + .handle_interrupt = kszphy_handle_interrupt, + .suspend = kszphy_suspend, +- .resume = kszphy_resume, ++ .resume = ksz8061_resume, + }, { + .phy_id = PHY_ID_KSZ9021, + .phy_id_mask = 0x000ffffe, +@@ -4916,7 +5006,7 @@ static struct phy_driver ksphy_driver[] = { + .config_intr = kszphy_config_intr, + .handle_interrupt = kszphy_handle_interrupt, + .suspend = genphy_suspend, +- .resume = genphy_resume, ++ .resume = ksz9477_resume, + .get_features = ksz9477_get_features, + } }; + +diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c +index 3679a43f4eb02..8152e14250f2d 100644 +--- a/drivers/net/phy/sfp.c ++++ b/drivers/net/phy/sfp.c +@@ -2394,8 +2394,7 @@ static void sfp_sm_module(struct sfp *sfp, unsigned int event) + + /* Handle remove event globally, it resets this state machine */ + if (event == SFP_E_REMOVE) { +- if (sfp->sm_mod_state > SFP_MOD_PROBE) +- sfp_sm_mod_remove(sfp); ++ sfp_sm_mod_remove(sfp); + sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); + return; + } +diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c +index 0578864792b60..beebe09eb88ff 100644 +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -2034,8 +2034,8 @@ vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter) + rq->data_ring.base, + rq->data_ring.basePA); + rq->data_ring.base = NULL; +- rq->data_ring.desc_size = 0; + } ++ rq->data_ring.desc_size = 0; + } + } + +diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c +index c24ff08abe0da..8268fa331826e 100644 +--- a/drivers/net/vxlan/vxlan_core.c ++++ b/drivers/net/vxlan/vxlan_core.c +@@ -1446,6 +1446,10 @@ static bool vxlan_snoop(struct net_device *dev, + struct vxlan_fdb *f; + u32 ifindex = 0; + ++ /* Ignore packets from invalid src-address */ ++ if (!is_valid_ether_addr(src_mac)) ++ return true; ++ + #if IS_ENABLED(CONFIG_IPV6) + if (src_ip->sa.sa_family == AF_INET6 && + (ipv6_addr_type(&src_ip->sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)) +diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +index 8faf4e7872bb9..a56593b6135f6 100644 +--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c ++++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +@@ -1824,8 +1824,8 @@ struct iwl_drv *iwl_drv_start(struct iwl_trans *trans) + err_fw: + #ifdef CONFIG_IWLWIFI_DEBUGFS + debugfs_remove_recursive(drv->dbgfs_drv); +- iwl_dbg_tlv_free(drv->trans); + #endif ++ iwl_dbg_tlv_free(drv->trans); + kfree(drv); + err: + return ERR_PTR(ret); +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +index 1d5ee4330f29f..51f396287dc69 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +@@ -92,20 +92,10 @@ void iwl_mvm_mfu_assert_dump_notif(struct iwl_mvm *mvm, + { + struct iwl_rx_packet *pkt = rxb_addr(rxb); + struct iwl_mfu_assert_dump_notif *mfu_dump_notif = (void *)pkt->data; +- __le32 *dump_data = mfu_dump_notif->data; +- int n_words = le32_to_cpu(mfu_dump_notif->data_size) / sizeof(__le32); +- int i; + + if (mfu_dump_notif->index_num == 0) + IWL_INFO(mvm, "MFUART assert id 0x%x occurred\n", + le32_to_cpu(mfu_dump_notif->assert_id)); +- +- for (i = 0; i < n_words; i++) +- IWL_DEBUG_INFO(mvm, +- "MFUART assert dump, dword %u: 0x%08x\n", +- le16_to_cpu(mfu_dump_notif->index_num) * +- n_words + i, +- le32_to_cpu(dump_data[i])); + } + + static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +index aef8824469e1e..4d9a872818a52 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +@@ -73,8 +73,6 @@ static int iwl_mvm_mld_mac_add_interface(struct ieee80211_hw *hw, + goto out_free_bf; + + iwl_mvm_tcm_add_vif(mvm, vif); +- INIT_DELAYED_WORK(&mvmvif->csa_work, +- iwl_mvm_channel_switch_disconnect_wk); + + if (vif->type == NL80211_IFTYPE_MONITOR) { + mvm->monitor_on = true; +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h +index 1ca375a5cf6b5..639cecc7a6e60 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h +@@ -122,13 +122,8 @@ enum { + + #define LINK_QUAL_AGG_FRAME_LIMIT_DEF (63) + #define LINK_QUAL_AGG_FRAME_LIMIT_MAX (63) +-/* +- * FIXME - various places in firmware API still use u8, +- * e.g. LQ command and SCD config command. +- * This should be 256 instead. +- */ +-#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_DEF (255) +-#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_MAX (255) ++#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_DEF (64) ++#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_MAX (64) + #define LINK_QUAL_AGG_FRAME_LIMIT_MIN (0) + + #define LQ_SIZE 2 /* 2 mode tables: "Active" and "Search" */ +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +index e9360b555ac93..8cff24d5f5f40 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +@@ -2730,8 +2730,11 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi, + * + * We mark it as mac header, for upper layers to know where + * all radio tap header ends. ++ * ++ * Since data doesn't move data while putting data on skb and that is ++ * the only way we use, data + len is the next place that hdr would be put + */ +- skb_reset_mac_header(skb); ++ skb_set_mac_header(skb, skb->len); + + /* + * Override the nss from the rx_vec since the rate_n_flags has +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +index 03ec900a33433..0841f1d6dc475 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +@@ -1304,7 +1304,7 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm, + if (IWL_MVM_ADWELL_MAX_BUDGET) + cmd->v7.adwell_max_budget = + cpu_to_le16(IWL_MVM_ADWELL_MAX_BUDGET); +- else if (params->ssids && params->ssids[0].ssid_len) ++ else if (params->n_ssids && params->ssids[0].ssid_len) + cmd->v7.adwell_max_budget = + cpu_to_le16(IWL_SCAN_ADWELL_MAX_BUDGET_DIRECTED_SCAN); + else +@@ -1406,7 +1406,7 @@ iwl_mvm_scan_umac_dwell_v11(struct iwl_mvm *mvm, + if (IWL_MVM_ADWELL_MAX_BUDGET) + general_params->adwell_max_budget = + cpu_to_le16(IWL_MVM_ADWELL_MAX_BUDGET); +- else if (params->ssids && params->ssids[0].ssid_len) ++ else if (params->n_ssids && params->ssids[0].ssid_len) + general_params->adwell_max_budget = + cpu_to_le16(IWL_SCAN_ADWELL_MAX_BUDGET_DIRECTED_SCAN); + else +diff --git a/drivers/net/wwan/iosm/iosm_ipc_devlink.c b/drivers/net/wwan/iosm/iosm_ipc_devlink.c +index 2fe724d623c06..33c5a46f1b922 100644 +--- a/drivers/net/wwan/iosm/iosm_ipc_devlink.c ++++ b/drivers/net/wwan/iosm/iosm_ipc_devlink.c +@@ -210,7 +210,7 @@ static int ipc_devlink_create_region(struct iosm_devlink *devlink) + rc = PTR_ERR(devlink->cd_regions[i]); + dev_err(devlink->dev, "Devlink region fail,err %d", rc); + /* Delete previously created regions */ +- for ( ; i >= 0; i--) ++ for (i--; i >= 0; i--) + devlink_region_destroy(devlink->cd_regions[i]); + goto region_create_fail; + } +diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c +index 391b1465ebfd5..803efc97fd1ea 100644 +--- a/drivers/nvme/host/pr.c ++++ b/drivers/nvme/host/pr.c +@@ -77,7 +77,7 @@ static int nvme_sc_to_pr_err(int nvme_sc) + if (nvme_is_path_error(nvme_sc)) + return PR_STS_PATH_FAILED; + +- switch (nvme_sc) { ++ switch (nvme_sc & 0x7ff) { + case NVME_SC_SUCCESS: + return PR_STS_SUCCESS; + case NVME_SC_RESERVATION_CONFLICT: +diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c +index 9fe07d7efa96c..d4a61645d61a5 100644 +--- a/drivers/nvme/target/passthru.c ++++ b/drivers/nvme/target/passthru.c +@@ -226,13 +226,13 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) + req->cmd->common.opcode == nvme_admin_identify) { + switch (req->cmd->identify.cns) { + case NVME_ID_CNS_CTRL: +- nvmet_passthru_override_id_ctrl(req); ++ status = nvmet_passthru_override_id_ctrl(req); + break; + case NVME_ID_CNS_NS: +- nvmet_passthru_override_id_ns(req); ++ status = nvmet_passthru_override_id_ns(req); + break; + case NVME_ID_CNS_NS_DESC_LIST: +- nvmet_passthru_override_id_descs(req); ++ status = nvmet_passthru_override_id_descs(req); + break; + } + } else if (status < 0) +diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c +index 0af0e965fb57e..1e3c3192d122c 100644 +--- a/drivers/pci/controller/pcie-rockchip-ep.c ++++ b/drivers/pci/controller/pcie-rockchip-ep.c +@@ -98,10 +98,8 @@ static int rockchip_pcie_ep_write_header(struct pci_epc *epc, u8 fn, u8 vfn, + + /* All functions share the same vendor ID with function 0 */ + if (fn == 0) { +- u32 vid_regs = (hdr->vendorid & GENMASK(15, 0)) | +- (hdr->subsys_vendor_id & GENMASK(31, 16)) << 16; +- +- rockchip_pcie_write(rockchip, vid_regs, ++ rockchip_pcie_write(rockchip, ++ hdr->vendorid | hdr->subsys_vendor_id << 16, + PCIE_CORE_CONFIG_VENDOR); + } + +diff --git a/drivers/platform/x86/dell/dell-smbios-base.c b/drivers/platform/x86/dell/dell-smbios-base.c +index e61bfaf8b5c48..86b95206cb1bd 100644 +--- a/drivers/platform/x86/dell/dell-smbios-base.c ++++ b/drivers/platform/x86/dell/dell-smbios-base.c +@@ -11,6 +11,7 @@ + */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + ++#include <linux/container_of.h> + #include <linux/kernel.h> + #include <linux/module.h> + #include <linux/capability.h> +@@ -25,11 +26,16 @@ static u32 da_supported_commands; + static int da_num_tokens; + static struct platform_device *platform_device; + static struct calling_interface_token *da_tokens; +-static struct device_attribute *token_location_attrs; +-static struct device_attribute *token_value_attrs; ++static struct token_sysfs_data *token_entries; + static struct attribute **token_attrs; + static DEFINE_MUTEX(smbios_mutex); + ++struct token_sysfs_data { ++ struct device_attribute location_attr; ++ struct device_attribute value_attr; ++ struct calling_interface_token *token; ++}; ++ + struct smbios_device { + struct list_head list; + struct device *device; +@@ -416,47 +422,26 @@ static void __init find_tokens(const struct dmi_header *dm, void *dummy) + } + } + +-static int match_attribute(struct device *dev, +- struct device_attribute *attr) +-{ +- int i; +- +- for (i = 0; i < da_num_tokens * 2; i++) { +- if (!token_attrs[i]) +- continue; +- if (strcmp(token_attrs[i]->name, attr->attr.name) == 0) +- return i/2; +- } +- dev_dbg(dev, "couldn't match: %s\n", attr->attr.name); +- return -EINVAL; +-} +- + static ssize_t location_show(struct device *dev, + struct device_attribute *attr, char *buf) + { +- int i; ++ struct token_sysfs_data *data = container_of(attr, struct token_sysfs_data, location_attr); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + +- i = match_attribute(dev, attr); +- if (i > 0) +- return sysfs_emit(buf, "%08x", da_tokens[i].location); +- return 0; ++ return sysfs_emit(buf, "%08x", data->token->location); + } + + static ssize_t value_show(struct device *dev, + struct device_attribute *attr, char *buf) + { +- int i; ++ struct token_sysfs_data *data = container_of(attr, struct token_sysfs_data, value_attr); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + +- i = match_attribute(dev, attr); +- if (i > 0) +- return sysfs_emit(buf, "%08x", da_tokens[i].value); +- return 0; ++ return sysfs_emit(buf, "%08x", data->token->value); + } + + static struct attribute_group smbios_attribute_group = { +@@ -473,22 +458,15 @@ static int build_tokens_sysfs(struct platform_device *dev) + { + char *location_name; + char *value_name; +- size_t size; + int ret; + int i, j; + +- /* (number of tokens + 1 for null terminated */ +- size = sizeof(struct device_attribute) * (da_num_tokens + 1); +- token_location_attrs = kzalloc(size, GFP_KERNEL); +- if (!token_location_attrs) ++ token_entries = kcalloc(da_num_tokens, sizeof(*token_entries), GFP_KERNEL); ++ if (!token_entries) + return -ENOMEM; +- token_value_attrs = kzalloc(size, GFP_KERNEL); +- if (!token_value_attrs) +- goto out_allocate_value; + + /* need to store both location and value + terminator*/ +- size = sizeof(struct attribute *) * ((2 * da_num_tokens) + 1); +- token_attrs = kzalloc(size, GFP_KERNEL); ++ token_attrs = kcalloc((2 * da_num_tokens) + 1, sizeof(*token_attrs), GFP_KERNEL); + if (!token_attrs) + goto out_allocate_attrs; + +@@ -496,27 +474,32 @@ static int build_tokens_sysfs(struct platform_device *dev) + /* skip empty */ + if (da_tokens[i].tokenID == 0) + continue; ++ ++ token_entries[i].token = &da_tokens[i]; ++ + /* add location */ + location_name = kasprintf(GFP_KERNEL, "%04x_location", + da_tokens[i].tokenID); + if (location_name == NULL) + goto out_unwind_strings; +- sysfs_attr_init(&token_location_attrs[i].attr); +- token_location_attrs[i].attr.name = location_name; +- token_location_attrs[i].attr.mode = 0444; +- token_location_attrs[i].show = location_show; +- token_attrs[j++] = &token_location_attrs[i].attr; ++ ++ sysfs_attr_init(&token_entries[i].location_attr.attr); ++ token_entries[i].location_attr.attr.name = location_name; ++ token_entries[i].location_attr.attr.mode = 0444; ++ token_entries[i].location_attr.show = location_show; ++ token_attrs[j++] = &token_entries[i].location_attr.attr; + + /* add value */ + value_name = kasprintf(GFP_KERNEL, "%04x_value", + da_tokens[i].tokenID); + if (value_name == NULL) + goto loop_fail_create_value; +- sysfs_attr_init(&token_value_attrs[i].attr); +- token_value_attrs[i].attr.name = value_name; +- token_value_attrs[i].attr.mode = 0444; +- token_value_attrs[i].show = value_show; +- token_attrs[j++] = &token_value_attrs[i].attr; ++ ++ sysfs_attr_init(&token_entries[i].value_attr.attr); ++ token_entries[i].value_attr.attr.name = value_name; ++ token_entries[i].value_attr.attr.mode = 0444; ++ token_entries[i].value_attr.show = value_show; ++ token_attrs[j++] = &token_entries[i].value_attr.attr; + continue; + + loop_fail_create_value: +@@ -532,14 +515,12 @@ static int build_tokens_sysfs(struct platform_device *dev) + + out_unwind_strings: + while (i--) { +- kfree(token_location_attrs[i].attr.name); +- kfree(token_value_attrs[i].attr.name); ++ kfree(token_entries[i].location_attr.attr.name); ++ kfree(token_entries[i].value_attr.attr.name); + } + kfree(token_attrs); + out_allocate_attrs: +- kfree(token_value_attrs); +-out_allocate_value: +- kfree(token_location_attrs); ++ kfree(token_entries); + + return -ENOMEM; + } +@@ -551,12 +532,11 @@ static void free_group(struct platform_device *pdev) + sysfs_remove_group(&pdev->dev.kobj, + &smbios_attribute_group); + for (i = 0; i < da_num_tokens; i++) { +- kfree(token_location_attrs[i].attr.name); +- kfree(token_value_attrs[i].attr.name); ++ kfree(token_entries[i].location_attr.attr.name); ++ kfree(token_entries[i].value_attr.attr.name); + } + kfree(token_attrs); +- kfree(token_value_attrs); +- kfree(token_location_attrs); ++ kfree(token_entries); + } + + static int __init dell_smbios_init(void) +diff --git a/drivers/pmdomain/ti/ti_sci_pm_domains.c b/drivers/pmdomain/ti/ti_sci_pm_domains.c +index 34645104fe45d..f520228e1b6ae 100644 +--- a/drivers/pmdomain/ti/ti_sci_pm_domains.c ++++ b/drivers/pmdomain/ti/ti_sci_pm_domains.c +@@ -114,6 +114,18 @@ static const struct of_device_id ti_sci_pm_domain_matches[] = { + }; + MODULE_DEVICE_TABLE(of, ti_sci_pm_domain_matches); + ++static bool ti_sci_pm_idx_exists(struct ti_sci_genpd_provider *pd_provider, u32 idx) ++{ ++ struct ti_sci_pm_domain *pd; ++ ++ list_for_each_entry(pd, &pd_provider->pd_list, node) { ++ if (pd->idx == idx) ++ return true; ++ } ++ ++ return false; ++} ++ + static int ti_sci_pm_domain_probe(struct platform_device *pdev) + { + struct device *dev = &pdev->dev; +@@ -149,8 +161,14 @@ static int ti_sci_pm_domain_probe(struct platform_device *pdev) + break; + + if (args.args_count >= 1 && args.np == dev->of_node) { +- if (args.args[0] > max_id) ++ if (args.args[0] > max_id) { + max_id = args.args[0]; ++ } else { ++ if (ti_sci_pm_idx_exists(pd_provider, args.args[0])) { ++ index++; ++ continue; ++ } ++ } + + pd = devm_kzalloc(dev, sizeof(*pd), GFP_KERNEL); + if (!pd) +diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c +index 5a3a4cc0bec82..91cc6ffa0095e 100644 +--- a/drivers/ptp/ptp_chardev.c ++++ b/drivers/ptp/ptp_chardev.c +@@ -84,7 +84,8 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, + } + + if (info->verify(info, pin, func, chan)) { +- pr_err("driver cannot use function %u on pin %u\n", func, chan); ++ pr_err("driver cannot use function %u and channel %u on pin %u\n", ++ func, chan, pin); + return -EOPNOTSUPP; + } + +diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c +index ad3415a3851b2..50e486bcfa103 100644 +--- a/drivers/remoteproc/ti_k3_r5_remoteproc.c ++++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c +@@ -103,12 +103,14 @@ struct k3_r5_soc_data { + * @dev: cached device pointer + * @mode: Mode to configure the Cluster - Split or LockStep + * @cores: list of R5 cores within the cluster ++ * @core_transition: wait queue to sync core state changes + * @soc_data: SoC-specific feature data for a R5FSS + */ + struct k3_r5_cluster { + struct device *dev; + enum cluster_mode mode; + struct list_head cores; ++ wait_queue_head_t core_transition; + const struct k3_r5_soc_data *soc_data; + }; + +@@ -128,6 +130,7 @@ struct k3_r5_cluster { + * @atcm_enable: flag to control ATCM enablement + * @btcm_enable: flag to control BTCM enablement + * @loczrama: flag to dictate which TCM is at device address 0x0 ++ * @released_from_reset: flag to signal when core is out of reset + */ + struct k3_r5_core { + struct list_head elem; +@@ -144,6 +147,7 @@ struct k3_r5_core { + u32 atcm_enable; + u32 btcm_enable; + u32 loczrama; ++ bool released_from_reset; + }; + + /** +@@ -460,6 +464,8 @@ static int k3_r5_rproc_prepare(struct rproc *rproc) + ret); + return ret; + } ++ core->released_from_reset = true; ++ wake_up_interruptible(&cluster->core_transition); + + /* + * Newer IP revisions like on J7200 SoCs support h/w auto-initialization +@@ -542,7 +548,7 @@ static int k3_r5_rproc_start(struct rproc *rproc) + struct k3_r5_rproc *kproc = rproc->priv; + struct k3_r5_cluster *cluster = kproc->cluster; + struct device *dev = kproc->dev; +- struct k3_r5_core *core; ++ struct k3_r5_core *core0, *core; + u32 boot_addr; + int ret; + +@@ -568,6 +574,16 @@ static int k3_r5_rproc_start(struct rproc *rproc) + goto unroll_core_run; + } + } else { ++ /* do not allow core 1 to start before core 0 */ ++ core0 = list_first_entry(&cluster->cores, struct k3_r5_core, ++ elem); ++ if (core != core0 && core0->rproc->state == RPROC_OFFLINE) { ++ dev_err(dev, "%s: can not start core 1 before core 0\n", ++ __func__); ++ ret = -EPERM; ++ goto put_mbox; ++ } ++ + ret = k3_r5_core_run(core); + if (ret) + goto put_mbox; +@@ -613,7 +629,8 @@ static int k3_r5_rproc_stop(struct rproc *rproc) + { + struct k3_r5_rproc *kproc = rproc->priv; + struct k3_r5_cluster *cluster = kproc->cluster; +- struct k3_r5_core *core = kproc->core; ++ struct device *dev = kproc->dev; ++ struct k3_r5_core *core1, *core = kproc->core; + int ret; + + /* halt all applicable cores */ +@@ -626,6 +643,16 @@ static int k3_r5_rproc_stop(struct rproc *rproc) + } + } + } else { ++ /* do not allow core 0 to stop before core 1 */ ++ core1 = list_last_entry(&cluster->cores, struct k3_r5_core, ++ elem); ++ if (core != core1 && core1->rproc->state != RPROC_OFFLINE) { ++ dev_err(dev, "%s: can not stop core 0 before core 1\n", ++ __func__); ++ ret = -EPERM; ++ goto out; ++ } ++ + ret = k3_r5_core_halt(core); + if (ret) + goto out; +@@ -1140,6 +1167,12 @@ static int k3_r5_rproc_configure_mode(struct k3_r5_rproc *kproc) + return ret; + } + ++ /* ++ * Skip the waiting mechanism for sequential power-on of cores if the ++ * core has already been booted by another entity. ++ */ ++ core->released_from_reset = c_state; ++ + ret = ti_sci_proc_get_status(core->tsp, &boot_vec, &cfg, &ctrl, + &stat); + if (ret < 0) { +@@ -1280,6 +1313,26 @@ static int k3_r5_cluster_rproc_init(struct platform_device *pdev) + cluster->mode == CLUSTER_MODE_SINGLECPU || + cluster->mode == CLUSTER_MODE_SINGLECORE) + break; ++ ++ /* ++ * R5 cores require to be powered on sequentially, core0 ++ * should be in higher power state than core1 in a cluster ++ * So, wait for current core to power up before proceeding ++ * to next core and put timeout of 2sec for each core. ++ * ++ * This waiting mechanism is necessary because ++ * rproc_auto_boot_callback() for core1 can be called before ++ * core0 due to thread execution order. ++ */ ++ ret = wait_event_interruptible_timeout(cluster->core_transition, ++ core->released_from_reset, ++ msecs_to_jiffies(2000)); ++ if (ret <= 0) { ++ dev_err(dev, ++ "Timed out waiting for %s core to power up!\n", ++ rproc->name); ++ return ret; ++ } + } + + return 0; +@@ -1709,6 +1762,7 @@ static int k3_r5_probe(struct platform_device *pdev) + cluster->dev = dev; + cluster->soc_data = data; + INIT_LIST_HEAD(&cluster->cores); ++ init_waitqueue_head(&cluster->core_transition); + + ret = of_property_read_u32(np, "ti,cluster-mode", &cluster->mode); + if (ret < 0 && ret != -EINVAL) { +diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c +index aa5b535e6662b..8e9e278d04495 100644 +--- a/drivers/scsi/mpi3mr/mpi3mr_app.c ++++ b/drivers/scsi/mpi3mr/mpi3mr_app.c +@@ -1854,10 +1854,72 @@ persistent_id_show(struct device *dev, struct device_attribute *attr, + } + static DEVICE_ATTR_RO(persistent_id); + ++/** ++ * sas_ncq_prio_supported_show - Indicate if device supports NCQ priority ++ * @dev: pointer to embedded device ++ * @attr: sas_ncq_prio_supported attribute descriptor ++ * @buf: the buffer returned ++ * ++ * A sysfs 'read-only' sdev attribute, only works with SATA devices ++ */ ++static ssize_t ++sas_ncq_prio_supported_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct scsi_device *sdev = to_scsi_device(dev); ++ ++ return sysfs_emit(buf, "%d\n", sas_ata_ncq_prio_supported(sdev)); ++} ++static DEVICE_ATTR_RO(sas_ncq_prio_supported); ++ ++/** ++ * sas_ncq_prio_enable_show - send prioritized io commands to device ++ * @dev: pointer to embedded device ++ * @attr: sas_ncq_prio_enable attribute descriptor ++ * @buf: the buffer returned ++ * ++ * A sysfs 'read/write' sdev attribute, only works with SATA devices ++ */ ++static ssize_t ++sas_ncq_prio_enable_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct scsi_device *sdev = to_scsi_device(dev); ++ struct mpi3mr_sdev_priv_data *sdev_priv_data = sdev->hostdata; ++ ++ if (!sdev_priv_data) ++ return 0; ++ ++ return sysfs_emit(buf, "%d\n", sdev_priv_data->ncq_prio_enable); ++} ++ ++static ssize_t ++sas_ncq_prio_enable_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ struct scsi_device *sdev = to_scsi_device(dev); ++ struct mpi3mr_sdev_priv_data *sdev_priv_data = sdev->hostdata; ++ bool ncq_prio_enable = 0; ++ ++ if (kstrtobool(buf, &ncq_prio_enable)) ++ return -EINVAL; ++ ++ if (!sas_ata_ncq_prio_supported(sdev)) ++ return -EINVAL; ++ ++ sdev_priv_data->ncq_prio_enable = ncq_prio_enable; ++ ++ return strlen(buf); ++} ++static DEVICE_ATTR_RW(sas_ncq_prio_enable); ++ + static struct attribute *mpi3mr_dev_attrs[] = { + &dev_attr_sas_address.attr, + &dev_attr_device_handle.attr, + &dev_attr_persistent_id.attr, ++ &dev_attr_sas_ncq_prio_supported.attr, ++ &dev_attr_sas_ncq_prio_enable.attr, + NULL, + }; + +diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c +index aa29e250cf15f..04116e02ffe8c 100644 +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -8486,6 +8486,12 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) + ioc->pd_handles_sz = (ioc->facts.MaxDevHandle / 8); + if (ioc->facts.MaxDevHandle % 8) + ioc->pd_handles_sz++; ++ /* ++ * pd_handles_sz should have, at least, the minimal room for ++ * set_bit()/test_bit(), otherwise out-of-memory touch may occur. ++ */ ++ ioc->pd_handles_sz = ALIGN(ioc->pd_handles_sz, sizeof(unsigned long)); ++ + ioc->pd_handles = kzalloc(ioc->pd_handles_sz, + GFP_KERNEL); + if (!ioc->pd_handles) { +@@ -8503,6 +8509,13 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) + ioc->pend_os_device_add_sz = (ioc->facts.MaxDevHandle / 8); + if (ioc->facts.MaxDevHandle % 8) + ioc->pend_os_device_add_sz++; ++ ++ /* ++ * pend_os_device_add_sz should have, at least, the minimal room for ++ * set_bit()/test_bit(), otherwise out-of-memory may occur. ++ */ ++ ioc->pend_os_device_add_sz = ALIGN(ioc->pend_os_device_add_sz, ++ sizeof(unsigned long)); + ioc->pend_os_device_add = kzalloc(ioc->pend_os_device_add_sz, + GFP_KERNEL); + if (!ioc->pend_os_device_add) { +@@ -8794,6 +8807,12 @@ _base_check_ioc_facts_changes(struct MPT3SAS_ADAPTER *ioc) + if (ioc->facts.MaxDevHandle % 8) + pd_handles_sz++; + ++ /* ++ * pd_handles should have, at least, the minimal room for ++ * set_bit()/test_bit(), otherwise out-of-memory touch may ++ * occur. ++ */ ++ pd_handles_sz = ALIGN(pd_handles_sz, sizeof(unsigned long)); + pd_handles = krealloc(ioc->pd_handles, pd_handles_sz, + GFP_KERNEL); + if (!pd_handles) { +diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h +index 1be0850ca17aa..ae21cc064acf5 100644 +--- a/drivers/scsi/mpt3sas/mpt3sas_base.h ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.h +@@ -2045,9 +2045,6 @@ void + mpt3sas_setup_direct_io(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, + struct _raid_device *raid_device, Mpi25SCSIIORequest_t *mpi_request); + +-/* NCQ Prio Handling Check */ +-bool scsih_ncq_prio_supp(struct scsi_device *sdev); +- + void mpt3sas_setup_debugfs(struct MPT3SAS_ADAPTER *ioc); + void mpt3sas_destroy_debugfs(struct MPT3SAS_ADAPTER *ioc); + void mpt3sas_init_debugfs(void); +diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c +index efdb8178db324..e289f18fc7643 100644 +--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c +@@ -4034,7 +4034,7 @@ sas_ncq_prio_supported_show(struct device *dev, + { + struct scsi_device *sdev = to_scsi_device(dev); + +- return sysfs_emit(buf, "%d\n", scsih_ncq_prio_supp(sdev)); ++ return sysfs_emit(buf, "%d\n", sas_ata_ncq_prio_supported(sdev)); + } + static DEVICE_ATTR_RO(sas_ncq_prio_supported); + +@@ -4069,7 +4069,7 @@ sas_ncq_prio_enable_store(struct device *dev, + if (kstrtobool(buf, &ncq_prio_enable)) + return -EINVAL; + +- if (!scsih_ncq_prio_supp(sdev)) ++ if (!sas_ata_ncq_prio_supported(sdev)) + return -EINVAL; + + sas_device_priv_data->ncq_prio_enable = ncq_prio_enable; +diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c +index 605013d3ee83a..f270b0d829f6e 100644 +--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c +@@ -12590,29 +12590,6 @@ scsih_pci_mmio_enabled(struct pci_dev *pdev) + return PCI_ERS_RESULT_RECOVERED; + } + +-/** +- * scsih_ncq_prio_supp - Check for NCQ command priority support +- * @sdev: scsi device struct +- * +- * This is called when a user indicates they would like to enable +- * ncq command priorities. This works only on SATA devices. +- */ +-bool scsih_ncq_prio_supp(struct scsi_device *sdev) +-{ +- struct scsi_vpd *vpd; +- bool ncq_prio_supp = false; +- +- rcu_read_lock(); +- vpd = rcu_dereference(sdev->vpd_pg89); +- if (!vpd || vpd->len < 214) +- goto out; +- +- ncq_prio_supp = (vpd->data[213] >> 4) & 1; +-out: +- rcu_read_unlock(); +- +- return ncq_prio_supp; +-} + /* + * The pci device ids are defined in mpi/mpi2_cnfg.h. + */ +diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c +index 7fd81c65804af..22bdce0bc3279 100644 +--- a/drivers/scsi/scsi.c ++++ b/drivers/scsi/scsi.c +@@ -671,6 +671,13 @@ void scsi_cdl_check(struct scsi_device *sdev) + sdev->use_10_for_rw = 0; + + sdev->cdl_supported = 1; ++ ++ /* ++ * If the device supports CDL, make sure that the current drive ++ * feature status is consistent with the user controlled ++ * cdl_enable state. ++ */ ++ scsi_cdl_enable(sdev, sdev->cdl_enable); + } else { + sdev->cdl_supported = 0; + } +diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c +index d704c484a251c..7fdd2b61fe855 100644 +--- a/drivers/scsi/scsi_transport_sas.c ++++ b/drivers/scsi/scsi_transport_sas.c +@@ -416,6 +416,29 @@ unsigned int sas_is_tlr_enabled(struct scsi_device *sdev) + } + EXPORT_SYMBOL_GPL(sas_is_tlr_enabled); + ++/** ++ * sas_ata_ncq_prio_supported - Check for ATA NCQ command priority support ++ * @sdev: SCSI device ++ * ++ * Check if an ATA device supports NCQ priority using VPD page 89h (ATA ++ * Information). Since this VPD page is implemented only for ATA devices, ++ * this function always returns false for SCSI devices. ++ */ ++bool sas_ata_ncq_prio_supported(struct scsi_device *sdev) ++{ ++ struct scsi_vpd *vpd; ++ bool ncq_prio_supported = false; ++ ++ rcu_read_lock(); ++ vpd = rcu_dereference(sdev->vpd_pg89); ++ if (vpd && vpd->len >= 214) ++ ncq_prio_supported = (vpd->data[213] >> 4) & 1; ++ rcu_read_unlock(); ++ ++ return ncq_prio_supported; ++} ++EXPORT_SYMBOL_GPL(sas_ata_ncq_prio_supported); ++ + /* + * SAS Phy attributes + */ +diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c +index c62f677084b4c..6dd43fff07ad5 100644 +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -3406,16 +3406,23 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, + + static void sd_read_block_zero(struct scsi_disk *sdkp) + { +- unsigned int buf_len = sdkp->device->sector_size; +- char *buffer, cmd[10] = { }; ++ struct scsi_device *sdev = sdkp->device; ++ unsigned int buf_len = sdev->sector_size; ++ u8 *buffer, cmd[16] = { }; + + buffer = kmalloc(buf_len, GFP_KERNEL); + if (!buffer) + return; + +- cmd[0] = READ_10; +- put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ +- put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ ++ if (sdev->use_16_for_rw) { ++ cmd[0] = READ_16; ++ put_unaligned_be64(0, &cmd[2]); /* Logical block address 0 */ ++ put_unaligned_be32(1, &cmd[10]);/* Transfer 1 logical block */ ++ } else { ++ cmd[0] = READ_10; ++ put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ ++ put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ ++ } + + scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, + SD_TIMEOUT, sdkp->max_retries, NULL); +diff --git a/drivers/spmi/hisi-spmi-controller.c b/drivers/spmi/hisi-spmi-controller.c +index 9cbd473487cb0..6eea83ee779dd 100644 +--- a/drivers/spmi/hisi-spmi-controller.c ++++ b/drivers/spmi/hisi-spmi-controller.c +@@ -303,7 +303,6 @@ static int spmi_controller_probe(struct platform_device *pdev) + + spin_lock_init(&spmi_controller->lock); + +- ctrl->nr = spmi_controller->channel; + ctrl->dev.parent = pdev->dev.parent; + ctrl->dev.of_node = of_node_get(pdev->dev.of_node); + +diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c +index e324cd8997193..0754fe76edde4 100644 +--- a/drivers/thunderbolt/debugfs.c ++++ b/drivers/thunderbolt/debugfs.c +@@ -943,8 +943,9 @@ static void margining_port_init(struct tb_port *port) + debugfs_create_file("run", 0600, dir, port, &margining_run_fops); + debugfs_create_file("results", 0600, dir, port, &margining_results_fops); + debugfs_create_file("test", 0600, dir, port, &margining_test_fops); +- if (independent_voltage_margins(usb4) || +- (supports_time(usb4) && independent_time_margins(usb4))) ++ if (independent_voltage_margins(usb4) == USB4_MARGIN_CAP_0_VOLTAGE_HL || ++ (supports_time(usb4) && ++ independent_time_margins(usb4) == USB4_MARGIN_CAP_1_TIME_LR)) + debugfs_create_file("margin", 0600, dir, port, &margining_margin_fops); + } + +diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c +index 6c9a408d67cd6..e05341b85c599 100644 +--- a/drivers/tty/n_tty.c ++++ b/drivers/tty/n_tty.c +@@ -1624,15 +1624,25 @@ static void __receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, + else if (ldata->raw || (L_EXTPROC(tty) && !preops)) + n_tty_receive_buf_raw(tty, cp, fp, count); + else if (tty->closing && !L_EXTPROC(tty)) { +- if (la_count > 0) ++ if (la_count > 0) { + n_tty_receive_buf_closing(tty, cp, fp, la_count, true); +- if (count > la_count) +- n_tty_receive_buf_closing(tty, cp, fp, count - la_count, false); ++ cp += la_count; ++ if (fp) ++ fp += la_count; ++ count -= la_count; ++ } ++ if (count > 0) ++ n_tty_receive_buf_closing(tty, cp, fp, count, false); + } else { +- if (la_count > 0) ++ if (la_count > 0) { + n_tty_receive_buf_standard(tty, cp, fp, la_count, true); +- if (count > la_count) +- n_tty_receive_buf_standard(tty, cp, fp, count - la_count, false); ++ cp += la_count; ++ if (fp) ++ fp += la_count; ++ count -= la_count; ++ } ++ if (count > 0) ++ n_tty_receive_buf_standard(tty, cp, fp, count, false); + + flush_echoes(tty); + if (tty->ops->flush_chars) +diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c +index a1f2259cc9a98..777bea835b114 100644 +--- a/drivers/tty/serial/8250/8250_dw.c ++++ b/drivers/tty/serial/8250/8250_dw.c +@@ -9,7 +9,6 @@ + * LCR is written whilst busy. If it is, then a busy detect interrupt is + * raised, the LCR needs to be rewritten and the uart status register read. + */ +-#include <linux/acpi.h> + #include <linux/clk.h> + #include <linux/delay.h> + #include <linux/device.h> +@@ -17,7 +16,6 @@ + #include <linux/mod_devicetable.h> + #include <linux/module.h> + #include <linux/notifier.h> +-#include <linux/of.h> + #include <linux/platform_device.h> + #include <linux/pm_runtime.h> + #include <linux/property.h> +@@ -56,6 +54,8 @@ + #define DW_UART_QUIRK_ARMADA_38X BIT(1) + #define DW_UART_QUIRK_SKIP_SET_RATE BIT(2) + #define DW_UART_QUIRK_IS_DMA_FC BIT(3) ++#define DW_UART_QUIRK_APMC0D08 BIT(4) ++#define DW_UART_QUIRK_CPR_VALUE BIT(5) + + static inline struct dw8250_data *clk_to_dw8250_data(struct notifier_block *nb) + { +@@ -445,44 +445,33 @@ static void dw8250_prepare_rx_dma(struct uart_8250_port *p) + + static void dw8250_quirks(struct uart_port *p, struct dw8250_data *data) + { +- struct device_node *np = p->dev->of_node; ++ unsigned int quirks = data->pdata ? data->pdata->quirks : 0; ++ u32 cpr_value = data->pdata ? data->pdata->cpr_value : 0; + +- if (np) { +- unsigned int quirks = data->pdata->quirks; +- int id; ++ if (quirks & DW_UART_QUIRK_CPR_VALUE) ++ data->data.cpr_value = cpr_value; + +- /* get index of serial line, if found in DT aliases */ +- id = of_alias_get_id(np, "serial"); +- if (id >= 0) +- p->line = id; + #ifdef CONFIG_64BIT +- if (quirks & DW_UART_QUIRK_OCTEON) { +- p->serial_in = dw8250_serial_inq; +- p->serial_out = dw8250_serial_outq; +- p->flags = UPF_SKIP_TEST | UPF_SHARE_IRQ | UPF_FIXED_TYPE; +- p->type = PORT_OCTEON; +- data->skip_autocfg = true; +- } ++ if (quirks & DW_UART_QUIRK_OCTEON) { ++ p->serial_in = dw8250_serial_inq; ++ p->serial_out = dw8250_serial_outq; ++ p->flags = UPF_SKIP_TEST | UPF_SHARE_IRQ | UPF_FIXED_TYPE; ++ p->type = PORT_OCTEON; ++ data->skip_autocfg = true; ++ } + #endif + +- if (of_device_is_big_endian(np)) { +- p->iotype = UPIO_MEM32BE; +- p->serial_in = dw8250_serial_in32be; +- p->serial_out = dw8250_serial_out32be; +- } +- +- if (quirks & DW_UART_QUIRK_ARMADA_38X) +- p->serial_out = dw8250_serial_out38x; +- if (quirks & DW_UART_QUIRK_SKIP_SET_RATE) +- p->set_termios = dw8250_do_set_termios; +- if (quirks & DW_UART_QUIRK_IS_DMA_FC) { +- data->data.dma.txconf.device_fc = 1; +- data->data.dma.rxconf.device_fc = 1; +- data->data.dma.prepare_tx_dma = dw8250_prepare_tx_dma; +- data->data.dma.prepare_rx_dma = dw8250_prepare_rx_dma; +- } +- +- } else if (acpi_dev_present("APMC0D08", NULL, -1)) { ++ if (quirks & DW_UART_QUIRK_ARMADA_38X) ++ p->serial_out = dw8250_serial_out38x; ++ if (quirks & DW_UART_QUIRK_SKIP_SET_RATE) ++ p->set_termios = dw8250_do_set_termios; ++ if (quirks & DW_UART_QUIRK_IS_DMA_FC) { ++ data->data.dma.txconf.device_fc = 1; ++ data->data.dma.rxconf.device_fc = 1; ++ data->data.dma.prepare_tx_dma = dw8250_prepare_tx_dma; ++ data->data.dma.prepare_rx_dma = dw8250_prepare_rx_dma; ++ } ++ if (quirks & DW_UART_QUIRK_APMC0D08) { + p->iotype = UPIO_MEM32; + p->regshift = 2; + p->serial_in = dw8250_serial_in32; +@@ -515,39 +504,21 @@ static int dw8250_probe(struct platform_device *pdev) + struct device *dev = &pdev->dev; + struct dw8250_data *data; + struct resource *regs; +- int irq; + int err; +- u32 val; + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!regs) + return dev_err_probe(dev, -EINVAL, "no registers defined\n"); + +- irq = platform_get_irq_optional(pdev, 0); +- /* no interrupt -> fall back to polling */ +- if (irq == -ENXIO) +- irq = 0; +- if (irq < 0) +- return irq; +- + spin_lock_init(&p->lock); +- p->mapbase = regs->start; +- p->irq = irq; + p->handle_irq = dw8250_handle_irq; + p->pm = dw8250_do_pm; + p->type = PORT_8250; +- p->flags = UPF_SHARE_IRQ | UPF_FIXED_PORT; ++ p->flags = UPF_FIXED_PORT; + p->dev = dev; +- p->iotype = UPIO_MEM; +- p->serial_in = dw8250_serial_in; +- p->serial_out = dw8250_serial_out; + p->set_ldisc = dw8250_set_ldisc; + p->set_termios = dw8250_set_termios; + +- p->membase = devm_ioremap(dev, regs->start, resource_size(regs)); +- if (!p->membase) +- return -ENOMEM; +- + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; +@@ -559,15 +530,35 @@ static int dw8250_probe(struct platform_device *pdev) + data->uart_16550_compatible = device_property_read_bool(dev, + "snps,uart-16550-compatible"); + +- err = device_property_read_u32(dev, "reg-shift", &val); +- if (!err) +- p->regshift = val; ++ p->mapbase = regs->start; ++ p->mapsize = resource_size(regs); + +- err = device_property_read_u32(dev, "reg-io-width", &val); +- if (!err && val == 4) { +- p->iotype = UPIO_MEM32; ++ p->membase = devm_ioremap(dev, p->mapbase, p->mapsize); ++ if (!p->membase) ++ return -ENOMEM; ++ ++ err = uart_read_port_properties(p); ++ /* no interrupt -> fall back to polling */ ++ if (err == -ENXIO) ++ err = 0; ++ if (err) ++ return err; ++ ++ switch (p->iotype) { ++ case UPIO_MEM: ++ p->serial_in = dw8250_serial_in; ++ p->serial_out = dw8250_serial_out; ++ break; ++ case UPIO_MEM32: + p->serial_in = dw8250_serial_in32; + p->serial_out = dw8250_serial_out32; ++ break; ++ case UPIO_MEM32BE: ++ p->serial_in = dw8250_serial_in32be; ++ p->serial_out = dw8250_serial_out32be; ++ break; ++ default: ++ return -ENODEV; + } + + if (device_property_read_bool(dev, "dcd-override")) { +@@ -594,9 +585,6 @@ static int dw8250_probe(struct platform_device *pdev) + data->msr_mask_off |= UART_MSR_TERI; + } + +- /* Always ask for fixed clock rate from a property. */ +- device_property_read_u32(dev, "clock-frequency", &p->uartclk); +- + /* If there is separate baudclk, get the rate from it. */ + data->clk = devm_clk_get_optional(dev, "baudclk"); + if (data->clk == NULL) +@@ -766,8 +754,8 @@ static const struct dw8250_platform_data dw8250_armada_38x_data = { + + static const struct dw8250_platform_data dw8250_renesas_rzn1_data = { + .usr_reg = DW_UART_USR, +- .cpr_val = 0x00012f32, +- .quirks = DW_UART_QUIRK_IS_DMA_FC, ++ .cpr_value = 0x00012f32, ++ .quirks = DW_UART_QUIRK_CPR_VALUE | DW_UART_QUIRK_IS_DMA_FC, + }; + + static const struct dw8250_platform_data dw8250_starfive_jh7100_data = { +@@ -785,13 +773,18 @@ static const struct of_device_id dw8250_of_match[] = { + }; + MODULE_DEVICE_TABLE(of, dw8250_of_match); + ++static const struct dw8250_platform_data dw8250_apmc0d08 = { ++ .usr_reg = DW_UART_USR, ++ .quirks = DW_UART_QUIRK_APMC0D08, ++}; ++ + static const struct acpi_device_id dw8250_acpi_match[] = { + { "80860F0A", (kernel_ulong_t)&dw8250_dw_apb }, + { "8086228A", (kernel_ulong_t)&dw8250_dw_apb }, + { "AMD0020", (kernel_ulong_t)&dw8250_dw_apb }, + { "AMDI0020", (kernel_ulong_t)&dw8250_dw_apb }, + { "AMDI0022", (kernel_ulong_t)&dw8250_dw_apb }, +- { "APMC0D08", (kernel_ulong_t)&dw8250_dw_apb}, ++ { "APMC0D08", (kernel_ulong_t)&dw8250_apmc0d08 }, + { "BRCM2032", (kernel_ulong_t)&dw8250_dw_apb }, + { "HISI0031", (kernel_ulong_t)&dw8250_dw_apb }, + { "INT33C4", (kernel_ulong_t)&dw8250_dw_apb }, +diff --git a/drivers/tty/serial/8250/8250_dwlib.c b/drivers/tty/serial/8250/8250_dwlib.c +index 84843e204a5e8..8fc8b6753148b 100644 +--- a/drivers/tty/serial/8250/8250_dwlib.c ++++ b/drivers/tty/serial/8250/8250_dwlib.c +@@ -242,7 +242,6 @@ static const struct serial_rs485 dw8250_rs485_supported = { + void dw8250_setup_port(struct uart_port *p) + { + struct dw8250_port_data *pd = p->private_data; +- struct dw8250_data *data = to_dw8250_data(pd); + struct uart_8250_port *up = up_to_u8250p(p); + u32 reg, old_dlf; + +@@ -284,7 +283,7 @@ void dw8250_setup_port(struct uart_port *p) + + reg = dw8250_readl_ext(p, DW_UART_CPR); + if (!reg) { +- reg = data->pdata->cpr_val; ++ reg = pd->cpr_value; + dev_dbg(p->dev, "CPR is not available, using 0x%08x instead\n", reg); + } + if (!reg) +diff --git a/drivers/tty/serial/8250/8250_dwlib.h b/drivers/tty/serial/8250/8250_dwlib.h +index f13e91f2cace9..794a9014cdac1 100644 +--- a/drivers/tty/serial/8250/8250_dwlib.h ++++ b/drivers/tty/serial/8250/8250_dwlib.h +@@ -19,6 +19,7 @@ struct dw8250_port_data { + struct uart_8250_dma dma; + + /* Hardware configuration */ ++ u32 cpr_value; + u8 dlf_size; + + /* RS485 variables */ +@@ -27,7 +28,7 @@ struct dw8250_port_data { + + struct dw8250_platform_data { + u8 usr_reg; +- u32 cpr_val; ++ u32 cpr_value; + unsigned int quirks; + }; + +diff --git a/drivers/tty/serial/8250/8250_pxa.c b/drivers/tty/serial/8250/8250_pxa.c +index a5b3ea27fc902..2cbaf68d28119 100644 +--- a/drivers/tty/serial/8250/8250_pxa.c ++++ b/drivers/tty/serial/8250/8250_pxa.c +@@ -124,6 +124,7 @@ static int serial_pxa_probe(struct platform_device *pdev) + uart.port.regshift = 2; + uart.port.irq = irq; + uart.port.fifosize = 64; ++ uart.tx_loadsz = 32; + uart.port.flags = UPF_IOREMAP | UPF_SKIP_TEST | UPF_FIXED_TYPE; + uart.port.dev = &pdev->dev; + uart.port.uartclk = clk_get_rate(data->clk); +diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c +index d622a9297f651..469ad26cde487 100644 +--- a/drivers/tty/serial/serial_port.c ++++ b/drivers/tty/serial/serial_port.c +@@ -8,7 +8,10 @@ + + #include <linux/device.h> + #include <linux/module.h> ++#include <linux/of.h> ++#include <linux/platform_device.h> + #include <linux/pm_runtime.h> ++#include <linux/property.h> + #include <linux/serial_core.h> + #include <linux/spinlock.h> + +@@ -60,6 +63,13 @@ static int serial_port_runtime_suspend(struct device *dev) + if (port->flags & UPF_DEAD) + return 0; + ++ /* ++ * Nothing to do on pm_runtime_force_suspend(), see ++ * DEFINE_RUNTIME_DEV_PM_OPS. ++ */ ++ if (!pm_runtime_enabled(dev)) ++ return 0; ++ + uart_port_lock_irqsave(port, &flags); + if (!port_dev->tx_enabled) { + uart_port_unlock_irqrestore(port, flags); +@@ -139,6 +149,148 @@ void uart_remove_one_port(struct uart_driver *drv, struct uart_port *port) + } + EXPORT_SYMBOL(uart_remove_one_port); + ++/** ++ * __uart_read_properties - read firmware properties of the given UART port ++ * @port: corresponding port ++ * @use_defaults: apply defaults (when %true) or validate the values (when %false) ++ * ++ * The following device properties are supported: ++ * - clock-frequency (optional) ++ * - fifo-size (optional) ++ * - no-loopback-test (optional) ++ * - reg-shift (defaults may apply) ++ * - reg-offset (value may be validated) ++ * - reg-io-width (defaults may apply or value may be validated) ++ * - interrupts (OF only) ++ * - serial [alias ID] (OF only) ++ * ++ * If the port->dev is of struct platform_device type the interrupt line ++ * will be retrieved via platform_get_irq() call against that device. ++ * Otherwise it will be assigned by fwnode_irq_get() call. In both cases ++ * the index 0 of the resource is used. ++ * ++ * The caller is responsible to initialize the following fields of the @port ++ * ->dev (must be valid) ++ * ->flags ++ * ->mapbase ++ * ->mapsize ++ * ->regshift (if @use_defaults is false) ++ * before calling this function. Alternatively the above mentioned fields ++ * may be zeroed, in such case the only ones, that have associated properties ++ * found, will be set to the respective values. ++ * ++ * If no error happened, the ->irq, ->mapbase, ->mapsize will be altered. ++ * The ->iotype is always altered. ++ * ++ * When @use_defaults is true and the respective property is not found ++ * the following values will be applied: ++ * ->regshift = 0 ++ * In this case IRQ must be provided, otherwise an error will be returned. ++ * ++ * When @use_defaults is false and the respective property is found ++ * the following values will be validated: ++ * - reg-io-width (->iotype) ++ * - reg-offset (->mapsize against ->mapbase) ++ * ++ * Returns: 0 on success or negative errno on failure ++ */ ++static int __uart_read_properties(struct uart_port *port, bool use_defaults) ++{ ++ struct device *dev = port->dev; ++ u32 value; ++ int ret; ++ ++ /* Read optional UART functional clock frequency */ ++ device_property_read_u32(dev, "clock-frequency", &port->uartclk); ++ ++ /* Read the registers alignment (default: 8-bit) */ ++ ret = device_property_read_u32(dev, "reg-shift", &value); ++ if (ret) ++ port->regshift = use_defaults ? 0 : port->regshift; ++ else ++ port->regshift = value; ++ ++ /* Read the registers I/O access type (default: MMIO 8-bit) */ ++ ret = device_property_read_u32(dev, "reg-io-width", &value); ++ if (ret) { ++ port->iotype = UPIO_MEM; ++ } else { ++ switch (value) { ++ case 1: ++ port->iotype = UPIO_MEM; ++ break; ++ case 2: ++ port->iotype = UPIO_MEM16; ++ break; ++ case 4: ++ port->iotype = device_is_big_endian(dev) ? UPIO_MEM32BE : UPIO_MEM32; ++ break; ++ default: ++ if (!use_defaults) { ++ dev_err(dev, "Unsupported reg-io-width (%u)\n", value); ++ return -EINVAL; ++ } ++ port->iotype = UPIO_UNKNOWN; ++ break; ++ } ++ } ++ ++ /* Read the address mapping base offset (default: no offset) */ ++ ret = device_property_read_u32(dev, "reg-offset", &value); ++ if (ret) ++ value = 0; ++ ++ /* Check for shifted address mapping overflow */ ++ if (!use_defaults && port->mapsize < value) { ++ dev_err(dev, "reg-offset %u exceeds region size %pa\n", value, &port->mapsize); ++ return -EINVAL; ++ } ++ ++ port->mapbase += value; ++ port->mapsize -= value; ++ ++ /* Read optional FIFO size */ ++ device_property_read_u32(dev, "fifo-size", &port->fifosize); ++ ++ if (device_property_read_bool(dev, "no-loopback-test")) ++ port->flags |= UPF_SKIP_TEST; ++ ++ /* Get index of serial line, if found in DT aliases */ ++ ret = of_alias_get_id(dev_of_node(dev), "serial"); ++ if (ret >= 0) ++ port->line = ret; ++ ++ if (dev_is_platform(dev)) ++ ret = platform_get_irq(to_platform_device(dev), 0); ++ else ++ ret = fwnode_irq_get(dev_fwnode(dev), 0); ++ if (ret == -EPROBE_DEFER) ++ return ret; ++ if (ret > 0) ++ port->irq = ret; ++ else if (use_defaults) ++ /* By default IRQ support is mandatory */ ++ return ret; ++ else ++ port->irq = 0; ++ ++ port->flags |= UPF_SHARE_IRQ; ++ ++ return 0; ++} ++ ++int uart_read_port_properties(struct uart_port *port) ++{ ++ return __uart_read_properties(port, true); ++} ++EXPORT_SYMBOL_GPL(uart_read_port_properties); ++ ++int uart_read_and_validate_port_properties(struct uart_port *port) ++{ ++ return __uart_read_properties(port, false); ++} ++EXPORT_SYMBOL_GPL(uart_read_and_validate_port_properties); ++ + static struct device_driver serial_port_driver = { + .name = "port", + .suppress_bind_attrs = true, +diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c +index 7ae3096814282..4e84ee6564d4b 100644 +--- a/drivers/ufs/core/ufs-mcq.c ++++ b/drivers/ufs/core/ufs-mcq.c +@@ -630,20 +630,20 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) + struct ufshcd_lrb *lrbp = &hba->lrb[tag]; + struct ufs_hw_queue *hwq; + unsigned long flags; +- int err = FAILED; ++ int err; + + if (!ufshcd_cmd_inflight(lrbp->cmd)) { + dev_err(hba->dev, + "%s: skip abort. cmd at tag %d already completed.\n", + __func__, tag); +- goto out; ++ return FAILED; + } + + /* Skip task abort in case previous aborts failed and report failure */ + if (lrbp->req_abort_skip) { + dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n", + __func__, tag); +- goto out; ++ return FAILED; + } + + hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); +@@ -655,7 +655,7 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) + */ + dev_err(hba->dev, "%s: cmd found in sq. hwq=%d, tag=%d\n", + __func__, hwq->id, tag); +- goto out; ++ return FAILED; + } + + /* +@@ -663,18 +663,17 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) + * in the completion queue either. Query the device to see if + * the command is being processed in the device. + */ +- if (ufshcd_try_to_abort_task(hba, tag)) { ++ err = ufshcd_try_to_abort_task(hba, tag); ++ if (err) { + dev_err(hba->dev, "%s: device abort failed %d\n", __func__, err); + lrbp->req_abort_skip = true; +- goto out; ++ return FAILED; + } + +- err = SUCCESS; + spin_lock_irqsave(&hwq->cq_lock, flags); + if (ufshcd_cmd_inflight(lrbp->cmd)) + ufshcd_release_scsi_cmd(hba, lrbp); + spin_unlock_irqrestore(&hwq->cq_lock, flags); + +-out: +- return err; ++ return SUCCESS; + } +diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c +index 589c90f4d4021..40689757a2690 100644 +--- a/drivers/ufs/core/ufshcd.c ++++ b/drivers/ufs/core/ufshcd.c +@@ -1267,7 +1267,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us) + * make sure that there are no outstanding requests when + * clock scaling is in progress + */ +- ufshcd_scsi_block_requests(hba); ++ blk_mq_quiesce_tagset(&hba->host->tag_set); + mutex_lock(&hba->wb_mutex); + down_write(&hba->clk_scaling_lock); + +@@ -1276,7 +1276,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us) + ret = -EBUSY; + up_write(&hba->clk_scaling_lock); + mutex_unlock(&hba->wb_mutex); +- ufshcd_scsi_unblock_requests(hba); ++ blk_mq_unquiesce_tagset(&hba->host->tag_set); + goto out; + } + +@@ -1297,7 +1297,7 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool sc + + mutex_unlock(&hba->wb_mutex); + +- ufshcd_scsi_unblock_requests(hba); ++ blk_mq_unquiesce_tagset(&hba->host->tag_set); + ufshcd_release(hba); + } + +diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile +index 3a9a0dd4be706..949eca0adebea 100644 +--- a/drivers/usb/Makefile ++++ b/drivers/usb/Makefile +@@ -35,6 +35,7 @@ obj-$(CONFIG_USB_R8A66597_HCD) += host/ + obj-$(CONFIG_USB_FSL_USB2) += host/ + obj-$(CONFIG_USB_FOTG210_HCD) += host/ + obj-$(CONFIG_USB_MAX3421_HCD) += host/ ++obj-$(CONFIG_USB_XEN_HCD) += host/ + + obj-$(CONFIG_USB_C67X00_HCD) += c67x00/ + +diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c +index c553decb54610..6830be4419e20 100644 +--- a/drivers/usb/class/cdc-wdm.c ++++ b/drivers/usb/class/cdc-wdm.c +@@ -266,14 +266,14 @@ static void wdm_int_callback(struct urb *urb) + dev_err(&desc->intf->dev, "Stall on int endpoint\n"); + goto sw; /* halt is cleared in work */ + default: +- dev_err(&desc->intf->dev, ++ dev_err_ratelimited(&desc->intf->dev, + "nonzero urb status received: %d\n", status); + break; + } + } + + if (urb->actual_length < sizeof(struct usb_cdc_notification)) { +- dev_err(&desc->intf->dev, "wdm_int_callback - %d bytes\n", ++ dev_err_ratelimited(&desc->intf->dev, "wdm_int_callback - %d bytes\n", + urb->actual_length); + goto exit; + } +diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c +index d6fc08e5db8fb..6cee705568c27 100644 +--- a/drivers/usb/host/xhci-pci.c ++++ b/drivers/usb/host/xhci-pci.c +@@ -36,6 +36,7 @@ + + #define PCI_VENDOR_ID_ETRON 0x1b6f + #define PCI_DEVICE_ID_EJ168 0x7023 ++#define PCI_DEVICE_ID_EJ188 0x7052 + + #define PCI_DEVICE_ID_INTEL_LYNXPOINT_XHCI 0x8c31 + #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 +@@ -461,6 +462,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; + xhci->quirks |= XHCI_BROKEN_STREAMS; + } ++ if (pdev->vendor == PCI_VENDOR_ID_ETRON && ++ pdev->device == PCI_DEVICE_ID_EJ188) { ++ xhci->quirks |= XHCI_RESET_ON_RESUME; ++ xhci->quirks |= XHCI_BROKEN_STREAMS; ++ } ++ + if (pdev->vendor == PCI_VENDOR_ID_RENESAS && + pdev->device == 0x0014) { + xhci->quirks |= XHCI_TRUST_TX_LENGTH; +diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c +index c959d9144baa5..8dd85221cd927 100644 +--- a/drivers/usb/host/xhci-ring.c ++++ b/drivers/usb/host/xhci-ring.c +@@ -1027,13 +1027,27 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep) + break; + case TD_DIRTY: /* TD is cached, clear it */ + case TD_HALTED: ++ case TD_CLEARING_CACHE_DEFERRED: ++ if (cached_td) { ++ if (cached_td->urb->stream_id != td->urb->stream_id) { ++ /* Multiple streams case, defer move dq */ ++ xhci_dbg(xhci, ++ "Move dq deferred: stream %u URB %p\n", ++ td->urb->stream_id, td->urb); ++ td->cancel_status = TD_CLEARING_CACHE_DEFERRED; ++ break; ++ } ++ ++ /* Should never happen, but clear the TD if it does */ ++ xhci_warn(xhci, ++ "Found multiple active URBs %p and %p in stream %u?\n", ++ td->urb, cached_td->urb, ++ td->urb->stream_id); ++ td_to_noop(xhci, ring, cached_td, false); ++ cached_td->cancel_status = TD_CLEARED; ++ } ++ + td->cancel_status = TD_CLEARING_CACHE; +- if (cached_td) +- /* FIXME stream case, several stopped rings */ +- xhci_dbg(xhci, +- "Move dq past stream %u URB %p instead of stream %u URB %p\n", +- td->urb->stream_id, td->urb, +- cached_td->urb->stream_id, cached_td->urb); + cached_td = td; + break; + } +@@ -1053,10 +1067,16 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep) + if (err) { + /* Failed to move past cached td, just set cached TDs to no-op */ + list_for_each_entry_safe(td, tmp_td, &ep->cancelled_td_list, cancelled_td_list) { +- if (td->cancel_status != TD_CLEARING_CACHE) ++ /* ++ * Deferred TDs need to have the deq pointer set after the above command ++ * completes, so if that failed we just give up on all of them (and ++ * complain loudly since this could cause issues due to caching). ++ */ ++ if (td->cancel_status != TD_CLEARING_CACHE && ++ td->cancel_status != TD_CLEARING_CACHE_DEFERRED) + continue; +- xhci_dbg(xhci, "Failed to clear cancelled cached URB %p, mark clear anyway\n", +- td->urb); ++ xhci_warn(xhci, "Failed to clear cancelled cached URB %p, mark clear anyway\n", ++ td->urb); + td_to_noop(xhci, ring, td, false); + td->cancel_status = TD_CLEARED; + } +@@ -1334,6 +1354,7 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, + struct xhci_ep_ctx *ep_ctx; + struct xhci_slot_ctx *slot_ctx; + struct xhci_td *td, *tmp_td; ++ bool deferred = false; + + ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3])); + stream_id = TRB_TO_STREAM_ID(le32_to_cpu(trb->generic.field[2])); +@@ -1420,6 +1441,8 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, + xhci_dbg(ep->xhci, "%s: Giveback cancelled URB %p TD\n", + __func__, td->urb); + xhci_td_cleanup(ep->xhci, td, ep_ring, td->status); ++ } else if (td->cancel_status == TD_CLEARING_CACHE_DEFERRED) { ++ deferred = true; + } else { + xhci_dbg(ep->xhci, "%s: Keep cancelled URB %p TD as cancel_status is %d\n", + __func__, td->urb, td->cancel_status); +@@ -1429,8 +1452,17 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, + ep->ep_state &= ~SET_DEQ_PENDING; + ep->queued_deq_seg = NULL; + ep->queued_deq_ptr = NULL; +- /* Restart any rings with pending URBs */ +- ring_doorbell_for_active_rings(xhci, slot_id, ep_index); ++ ++ if (deferred) { ++ /* We have more streams to clear */ ++ xhci_dbg(ep->xhci, "%s: Pending TDs to clear, continuing with invalidation\n", ++ __func__); ++ xhci_invalidate_cancelled_tds(ep); ++ } else { ++ /* Restart any rings with pending URBs */ ++ xhci_dbg(ep->xhci, "%s: All TDs cleared, ring doorbell\n", __func__); ++ ring_doorbell_for_active_rings(xhci, slot_id, ep_index); ++ } + } + + static void xhci_handle_cmd_reset_ep(struct xhci_hcd *xhci, int slot_id, +@@ -2525,9 +2557,8 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + goto finish_td; + case COMP_STOPPED_LENGTH_INVALID: + /* stopped on ep trb with invalid length, exclude it */ +- ep_trb_len = 0; +- remaining = 0; +- break; ++ td->urb->actual_length = sum_trb_lengths(xhci, ep_ring, ep_trb); ++ goto finish_td; + case COMP_USB_TRANSACTION_ERROR: + if (xhci->quirks & XHCI_NO_SOFT_RETRY || + (ep->err_count++ > MAX_SOFT_RETRY) || +diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h +index be480d6ac8586..b29fe4716f34e 100644 +--- a/drivers/usb/host/xhci.h ++++ b/drivers/usb/host/xhci.h +@@ -1559,6 +1559,7 @@ enum xhci_cancelled_td_status { + TD_DIRTY = 0, + TD_HALTED, + TD_CLEARING_CACHE, ++ TD_CLEARING_CACHE_DEFERRED, + TD_CLEARED, + }; + +diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c +index 115f05a6201a1..40d34cc28344a 100644 +--- a/drivers/usb/storage/alauda.c ++++ b/drivers/usb/storage/alauda.c +@@ -105,6 +105,8 @@ struct alauda_info { + unsigned char sense_key; + unsigned long sense_asc; /* additional sense code */ + unsigned long sense_ascq; /* additional sense code qualifier */ ++ ++ bool media_initialized; + }; + + #define short_pack(lsb,msb) ( ((u16)(lsb)) | ( ((u16)(msb))<<8 ) ) +@@ -476,11 +478,12 @@ static int alauda_check_media(struct us_data *us) + } + + /* Check for media change */ +- if (status[0] & 0x08) { ++ if (status[0] & 0x08 || !info->media_initialized) { + usb_stor_dbg(us, "Media change detected\n"); + alauda_free_maps(&MEDIA_INFO(us)); +- alauda_init_media(us); +- ++ rc = alauda_init_media(us); ++ if (rc == USB_STOR_TRANSPORT_GOOD) ++ info->media_initialized = true; + info->sense_key = UNIT_ATTENTION; + info->sense_asc = 0x28; + info->sense_ascq = 0x00; +diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c +index 08efd4a6bd1dd..f14505c690f96 100644 +--- a/drivers/usb/typec/tcpm/tcpm.c ++++ b/drivers/usb/typec/tcpm/tcpm.c +@@ -2436,8 +2436,10 @@ static int tcpm_register_sink_caps(struct tcpm_port *port) + memcpy(caps.pdo, port->sink_caps, sizeof(u32) * port->nr_sink_caps); + caps.role = TYPEC_SINK; + +- if (cap) ++ if (cap) { + usb_power_delivery_unregister_capabilities(cap); ++ port->partner_source_caps = NULL; ++ } + + cap = usb_power_delivery_register_capabilities(port->partner_pd, &caps); + if (IS_ERR(cap)) +@@ -5413,6 +5415,7 @@ static void _tcpm_pd_hard_reset(struct tcpm_port *port) + port->tcpc->set_bist_data(port->tcpc, false); + + switch (port->state) { ++ case TOGGLING: + case ERROR_RECOVERY: + case PORT_RESET: + case PORT_RESET_WAIT_OFF: +diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c +index 41a8cdce5d9f7..2784f6cb44822 100644 +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -1282,21 +1282,175 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache, + return ret; + } + ++struct zone_info { ++ u64 physical; ++ u64 capacity; ++ u64 alloc_offset; ++}; ++ ++static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, ++ struct zone_info *info, unsigned long *active, ++ struct map_lookup *map) ++{ ++ struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; ++ struct btrfs_device *device; ++ int dev_replace_is_ongoing = 0; ++ unsigned int nofs_flag; ++ struct blk_zone zone; ++ int ret; ++ ++ info->physical = map->stripes[zone_idx].physical; ++ ++ down_read(&dev_replace->rwsem); ++ device = map->stripes[zone_idx].dev; ++ ++ if (!device->bdev) { ++ up_read(&dev_replace->rwsem); ++ info->alloc_offset = WP_MISSING_DEV; ++ return 0; ++ } ++ ++ /* Consider a zone as active if we can allow any number of active zones. */ ++ if (!device->zone_info->max_active_zones) ++ __set_bit(zone_idx, active); ++ ++ if (!btrfs_dev_is_sequential(device, info->physical)) { ++ up_read(&dev_replace->rwsem); ++ info->alloc_offset = WP_CONVENTIONAL; ++ return 0; ++ } ++ ++ /* This zone will be used for allocation, so mark this zone non-empty. */ ++ btrfs_dev_clear_zone_empty(device, info->physical); ++ ++ dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); ++ if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) ++ btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); ++ ++ /* ++ * The group is mapped to a sequential zone. Get the zone write pointer ++ * to determine the allocation offset within the zone. ++ */ ++ WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); ++ nofs_flag = memalloc_nofs_save(); ++ ret = btrfs_get_dev_zone(device, info->physical, &zone); ++ memalloc_nofs_restore(nofs_flag); ++ if (ret) { ++ up_read(&dev_replace->rwsem); ++ if (ret != -EIO && ret != -EOPNOTSUPP) ++ return ret; ++ info->alloc_offset = WP_MISSING_DEV; ++ return 0; ++ } ++ ++ if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { ++ btrfs_err_in_rcu(fs_info, ++ "zoned: unexpected conventional zone %llu on device %s (devid %llu)", ++ zone.start << SECTOR_SHIFT, rcu_str_deref(device->name), ++ device->devid); ++ up_read(&dev_replace->rwsem); ++ return -EIO; ++ } ++ ++ info->capacity = (zone.capacity << SECTOR_SHIFT); ++ ++ switch (zone.cond) { ++ case BLK_ZONE_COND_OFFLINE: ++ case BLK_ZONE_COND_READONLY: ++ btrfs_err(fs_info, ++ "zoned: offline/readonly zone %llu on device %s (devid %llu)", ++ (info->physical >> device->zone_info->zone_size_shift), ++ rcu_str_deref(device->name), device->devid); ++ info->alloc_offset = WP_MISSING_DEV; ++ break; ++ case BLK_ZONE_COND_EMPTY: ++ info->alloc_offset = 0; ++ break; ++ case BLK_ZONE_COND_FULL: ++ info->alloc_offset = info->capacity; ++ break; ++ default: ++ /* Partially used zone. */ ++ info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); ++ __set_bit(zone_idx, active); ++ break; ++ } ++ ++ up_read(&dev_replace->rwsem); ++ ++ return 0; ++} ++ ++static int btrfs_load_block_group_single(struct btrfs_block_group *bg, ++ struct zone_info *info, ++ unsigned long *active) ++{ ++ if (info->alloc_offset == WP_MISSING_DEV) { ++ btrfs_err(bg->fs_info, ++ "zoned: cannot recover write pointer for zone %llu", ++ info->physical); ++ return -EIO; ++ } ++ ++ bg->alloc_offset = info->alloc_offset; ++ bg->zone_capacity = info->capacity; ++ if (test_bit(0, active)) ++ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); ++ return 0; ++} ++ ++static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, ++ struct map_lookup *map, ++ struct zone_info *zone_info, ++ unsigned long *active) ++{ ++ if (map->type & BTRFS_BLOCK_GROUP_DATA) { ++ btrfs_err(bg->fs_info, ++ "zoned: profile DUP not yet supported on data bg"); ++ return -EINVAL; ++ } ++ ++ if (zone_info[0].alloc_offset == WP_MISSING_DEV) { ++ btrfs_err(bg->fs_info, ++ "zoned: cannot recover write pointer for zone %llu", ++ zone_info[0].physical); ++ return -EIO; ++ } ++ if (zone_info[1].alloc_offset == WP_MISSING_DEV) { ++ btrfs_err(bg->fs_info, ++ "zoned: cannot recover write pointer for zone %llu", ++ zone_info[1].physical); ++ return -EIO; ++ } ++ if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { ++ btrfs_err(bg->fs_info, ++ "zoned: write pointer offset mismatch of zones in DUP profile"); ++ return -EIO; ++ } ++ ++ if (test_bit(0, active) != test_bit(1, active)) { ++ if (!btrfs_zone_activate(bg)) ++ return -EIO; ++ } else if (test_bit(0, active)) { ++ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); ++ } ++ ++ bg->alloc_offset = zone_info[0].alloc_offset; ++ bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity); ++ return 0; ++} ++ + int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) + { + struct btrfs_fs_info *fs_info = cache->fs_info; + struct extent_map_tree *em_tree = &fs_info->mapping_tree; + struct extent_map *em; + struct map_lookup *map; +- struct btrfs_device *device; + u64 logical = cache->start; + u64 length = cache->length; ++ struct zone_info *zone_info = NULL; + int ret; + int i; +- unsigned int nofs_flag; +- u64 *alloc_offsets = NULL; +- u64 *caps = NULL; +- u64 *physical = NULL; + unsigned long *active = NULL; + u64 last_alloc = 0; + u32 num_sequential = 0, num_conventional = 0; +@@ -1328,20 +1482,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) + goto out; + } + +- alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS); +- if (!alloc_offsets) { +- ret = -ENOMEM; +- goto out; +- } +- +- caps = kcalloc(map->num_stripes, sizeof(*caps), GFP_NOFS); +- if (!caps) { +- ret = -ENOMEM; +- goto out; +- } +- +- physical = kcalloc(map->num_stripes, sizeof(*physical), GFP_NOFS); +- if (!physical) { ++ zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS); ++ if (!zone_info) { + ret = -ENOMEM; + goto out; + } +@@ -1353,98 +1495,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) + } + + for (i = 0; i < map->num_stripes; i++) { +- bool is_sequential; +- struct blk_zone zone; +- struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; +- int dev_replace_is_ongoing = 0; +- +- device = map->stripes[i].dev; +- physical[i] = map->stripes[i].physical; +- +- if (device->bdev == NULL) { +- alloc_offsets[i] = WP_MISSING_DEV; +- continue; +- } +- +- is_sequential = btrfs_dev_is_sequential(device, physical[i]); +- if (is_sequential) +- num_sequential++; +- else +- num_conventional++; +- +- /* +- * Consider a zone as active if we can allow any number of +- * active zones. +- */ +- if (!device->zone_info->max_active_zones) +- __set_bit(i, active); +- +- if (!is_sequential) { +- alloc_offsets[i] = WP_CONVENTIONAL; +- continue; +- } +- +- /* +- * This zone will be used for allocation, so mark this zone +- * non-empty. +- */ +- btrfs_dev_clear_zone_empty(device, physical[i]); +- +- down_read(&dev_replace->rwsem); +- dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); +- if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) +- btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical[i]); +- up_read(&dev_replace->rwsem); +- +- /* +- * The group is mapped to a sequential zone. Get the zone write +- * pointer to determine the allocation offset within the zone. +- */ +- WARN_ON(!IS_ALIGNED(physical[i], fs_info->zone_size)); +- nofs_flag = memalloc_nofs_save(); +- ret = btrfs_get_dev_zone(device, physical[i], &zone); +- memalloc_nofs_restore(nofs_flag); +- if (ret == -EIO || ret == -EOPNOTSUPP) { +- ret = 0; +- alloc_offsets[i] = WP_MISSING_DEV; +- continue; +- } else if (ret) { +- goto out; +- } +- +- if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { +- btrfs_err_in_rcu(fs_info, +- "zoned: unexpected conventional zone %llu on device %s (devid %llu)", +- zone.start << SECTOR_SHIFT, +- rcu_str_deref(device->name), device->devid); +- ret = -EIO; ++ ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map); ++ if (ret) + goto out; +- } +- +- caps[i] = (zone.capacity << SECTOR_SHIFT); + +- switch (zone.cond) { +- case BLK_ZONE_COND_OFFLINE: +- case BLK_ZONE_COND_READONLY: +- btrfs_err(fs_info, +- "zoned: offline/readonly zone %llu on device %s (devid %llu)", +- physical[i] >> device->zone_info->zone_size_shift, +- rcu_str_deref(device->name), device->devid); +- alloc_offsets[i] = WP_MISSING_DEV; +- break; +- case BLK_ZONE_COND_EMPTY: +- alloc_offsets[i] = 0; +- break; +- case BLK_ZONE_COND_FULL: +- alloc_offsets[i] = caps[i]; +- break; +- default: +- /* Partially used zone */ +- alloc_offsets[i] = +- ((zone.wp - zone.start) << SECTOR_SHIFT); +- __set_bit(i, active); +- break; +- } ++ if (zone_info[i].alloc_offset == WP_CONVENTIONAL) ++ num_conventional++; ++ else ++ num_sequential++; + } + + if (num_sequential > 0) +@@ -1468,56 +1526,10 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) + + switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* single */ +- if (alloc_offsets[0] == WP_MISSING_DEV) { +- btrfs_err(fs_info, +- "zoned: cannot recover write pointer for zone %llu", +- physical[0]); +- ret = -EIO; +- goto out; +- } +- cache->alloc_offset = alloc_offsets[0]; +- cache->zone_capacity = caps[0]; +- if (test_bit(0, active)) +- set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); ++ ret = btrfs_load_block_group_single(cache, &zone_info[0], active); + break; + case BTRFS_BLOCK_GROUP_DUP: +- if (map->type & BTRFS_BLOCK_GROUP_DATA) { +- btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg"); +- ret = -EINVAL; +- goto out; +- } +- if (alloc_offsets[0] == WP_MISSING_DEV) { +- btrfs_err(fs_info, +- "zoned: cannot recover write pointer for zone %llu", +- physical[0]); +- ret = -EIO; +- goto out; +- } +- if (alloc_offsets[1] == WP_MISSING_DEV) { +- btrfs_err(fs_info, +- "zoned: cannot recover write pointer for zone %llu", +- physical[1]); +- ret = -EIO; +- goto out; +- } +- if (alloc_offsets[0] != alloc_offsets[1]) { +- btrfs_err(fs_info, +- "zoned: write pointer offset mismatch of zones in DUP profile"); +- ret = -EIO; +- goto out; +- } +- if (test_bit(0, active) != test_bit(1, active)) { +- if (!btrfs_zone_activate(cache)) { +- ret = -EIO; +- goto out; +- } +- } else { +- if (test_bit(0, active)) +- set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, +- &cache->runtime_flags); +- } +- cache->alloc_offset = alloc_offsets[0]; +- cache->zone_capacity = min(caps[0], caps[1]); ++ ret = btrfs_load_block_group_dup(cache, map, zone_info, active); + break; + case BTRFS_BLOCK_GROUP_RAID1: + case BTRFS_BLOCK_GROUP_RAID0: +@@ -1570,9 +1582,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) + cache->physical_map = NULL; + } + bitmap_free(active); +- kfree(physical); +- kfree(caps); +- kfree(alloc_offsets); ++ kfree(zone_info); + free_extent_map(em); + + return ret; +diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c +index 5f4df9588620f..b9945e4f697be 100644 +--- a/fs/cachefiles/daemon.c ++++ b/fs/cachefiles/daemon.c +@@ -77,6 +77,7 @@ static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = { + { "tag", cachefiles_daemon_tag }, + #ifdef CONFIG_CACHEFILES_ONDEMAND + { "copen", cachefiles_ondemand_copen }, ++ { "restore", cachefiles_ondemand_restore }, + #endif + { "", NULL } + }; +@@ -132,7 +133,7 @@ static int cachefiles_daemon_open(struct inode *inode, struct file *file) + return 0; + } + +-static void cachefiles_flush_reqs(struct cachefiles_cache *cache) ++void cachefiles_flush_reqs(struct cachefiles_cache *cache) + { + struct xarray *xa = &cache->reqs; + struct cachefiles_req *req; +@@ -158,6 +159,7 @@ static void cachefiles_flush_reqs(struct cachefiles_cache *cache) + xa_for_each(xa, index, req) { + req->error = -EIO; + complete(&req->done); ++ __xa_erase(xa, index); + } + xa_unlock(xa); + +diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c +index 40052bdb33655..35ba2117a6f65 100644 +--- a/fs/cachefiles/interface.c ++++ b/fs/cachefiles/interface.c +@@ -31,6 +31,11 @@ struct cachefiles_object *cachefiles_alloc_object(struct fscache_cookie *cookie) + if (!object) + return NULL; + ++ if (cachefiles_ondemand_init_obj_info(object, volume)) { ++ kmem_cache_free(cachefiles_object_jar, object); ++ return NULL; ++ } ++ + refcount_set(&object->ref, 1); + + spin_lock_init(&object->lock); +@@ -88,7 +93,7 @@ void cachefiles_put_object(struct cachefiles_object *object, + ASSERTCMP(object->file, ==, NULL); + + kfree(object->d_name); +- ++ cachefiles_ondemand_deinit_obj_info(object); + cache = object->volume->cache->cache; + fscache_put_cookie(object->cookie, fscache_cookie_put_object); + object->cookie = NULL; +diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h +index 2ad58c4652084..3eea52462fc87 100644 +--- a/fs/cachefiles/internal.h ++++ b/fs/cachefiles/internal.h +@@ -44,6 +44,20 @@ struct cachefiles_volume { + struct dentry *fanout[256]; /* Fanout subdirs */ + }; + ++enum cachefiles_object_state { ++ CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */ ++ CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */ ++ CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */ ++}; ++ ++struct cachefiles_ondemand_info { ++ struct work_struct ondemand_work; ++ int ondemand_id; ++ enum cachefiles_object_state state; ++ struct cachefiles_object *object; ++ spinlock_t lock; ++}; ++ + /* + * Backing file state. + */ +@@ -61,7 +75,7 @@ struct cachefiles_object { + unsigned long flags; + #define CACHEFILES_OBJECT_USING_TMPFILE 0 /* Have an unlinked tmpfile */ + #ifdef CONFIG_CACHEFILES_ONDEMAND +- int ondemand_id; ++ struct cachefiles_ondemand_info *ondemand; + #endif + }; + +@@ -125,6 +139,7 @@ static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) + struct cachefiles_req { + struct cachefiles_object *object; + struct completion done; ++ refcount_t ref; + int error; + struct cachefiles_msg msg; + }; +@@ -173,6 +188,7 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache, + * daemon.c + */ + extern const struct file_operations cachefiles_daemon_fops; ++extern void cachefiles_flush_reqs(struct cachefiles_cache *cache); + extern void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache); + extern void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache); + +@@ -290,12 +306,35 @@ extern ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + extern int cachefiles_ondemand_copen(struct cachefiles_cache *cache, + char *args); + ++extern int cachefiles_ondemand_restore(struct cachefiles_cache *cache, ++ char *args); ++ + extern int cachefiles_ondemand_init_object(struct cachefiles_object *object); + extern void cachefiles_ondemand_clean_object(struct cachefiles_object *object); + + extern int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len); + ++extern int cachefiles_ondemand_init_obj_info(struct cachefiles_object *obj, ++ struct cachefiles_volume *volume); ++extern void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *obj); ++ ++#define CACHEFILES_OBJECT_STATE_FUNCS(_state, _STATE) \ ++static inline bool \ ++cachefiles_ondemand_object_is_##_state(const struct cachefiles_object *object) \ ++{ \ ++ return object->ondemand->state == CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \ ++} \ ++ \ ++static inline void \ ++cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \ ++{ \ ++ object->ondemand->state = CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \ ++} ++ ++CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN); ++CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE); ++CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING); + #else + static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) +@@ -317,6 +356,15 @@ static inline int cachefiles_ondemand_read(struct cachefiles_object *object, + { + return -EOPNOTSUPP; + } ++ ++static inline int cachefiles_ondemand_init_obj_info(struct cachefiles_object *obj, ++ struct cachefiles_volume *volume) ++{ ++ return 0; ++} ++static inline void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *obj) ++{ ++} + #endif + + /* +@@ -367,6 +415,8 @@ do { \ + pr_err("I/O Error: " FMT"\n", ##__VA_ARGS__); \ + fscache_io_error((___cache)->cache); \ + set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ ++ if (cachefiles_in_ondemand_mode(___cache)) \ ++ cachefiles_flush_reqs(___cache); \ + } while (0) + + #define cachefiles_io_error_obj(object, FMT, ...) \ +diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c +index 0254ed39f68ce..4b39f0422e590 100644 +--- a/fs/cachefiles/ondemand.c ++++ b/fs/cachefiles/ondemand.c +@@ -4,26 +4,45 @@ + #include <linux/uio.h> + #include "internal.h" + ++struct ondemand_anon_file { ++ struct file *file; ++ int fd; ++}; ++ ++static inline void cachefiles_req_put(struct cachefiles_req *req) ++{ ++ if (refcount_dec_and_test(&req->ref)) ++ kfree(req); ++} ++ + static int cachefiles_ondemand_fd_release(struct inode *inode, + struct file *file) + { + struct cachefiles_object *object = file->private_data; +- struct cachefiles_cache *cache = object->volume->cache; +- int object_id = object->ondemand_id; ++ struct cachefiles_cache *cache; ++ struct cachefiles_ondemand_info *info; ++ int object_id; + struct cachefiles_req *req; +- XA_STATE(xas, &cache->reqs, 0); ++ XA_STATE(xas, NULL, 0); + +- xa_lock(&cache->reqs); +- object->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; ++ if (!object) ++ return 0; + +- /* +- * Flush all pending READ requests since their completion depends on +- * anon_fd. +- */ +- xas_for_each(&xas, req, ULONG_MAX) { ++ info = object->ondemand; ++ cache = object->volume->cache; ++ xas.xa = &cache->reqs; ++ ++ xa_lock(&cache->reqs); ++ spin_lock(&info->lock); ++ object_id = info->ondemand_id; ++ info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; ++ cachefiles_ondemand_set_object_close(object); ++ spin_unlock(&info->lock); ++ ++ /* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */ ++ xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { + if (req->msg.object_id == object_id && +- req->msg.opcode == CACHEFILES_OP_READ) { +- req->error = -EIO; ++ req->msg.opcode == CACHEFILES_OP_CLOSE) { + complete(&req->done); + xas_store(&xas, NULL); + } +@@ -118,6 +137,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) + { + struct cachefiles_req *req; + struct fscache_cookie *cookie; ++ struct cachefiles_ondemand_info *info; + char *pid, *psize; + unsigned long id; + long size; +@@ -168,6 +188,33 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) + goto out; + } + ++ info = req->object->ondemand; ++ spin_lock(&info->lock); ++ /* ++ * The anonymous fd was closed before copen ? Fail the request. ++ * ++ * t1 | t2 ++ * --------------------------------------------------------- ++ * cachefiles_ondemand_copen ++ * req = xa_erase(&cache->reqs, id) ++ * // Anon fd is maliciously closed. ++ * cachefiles_ondemand_fd_release ++ * xa_lock(&cache->reqs) ++ * cachefiles_ondemand_set_object_close(object) ++ * xa_unlock(&cache->reqs) ++ * cachefiles_ondemand_set_object_open ++ * // No one will ever close it again. ++ * cachefiles_ondemand_daemon_read ++ * cachefiles_ondemand_select_req ++ * ++ * Get a read req but its fd is already closed. The daemon can't ++ * issue a cread ioctl with an closed fd, then hung. ++ */ ++ if (info->ondemand_id == CACHEFILES_ONDEMAND_ID_CLOSED) { ++ spin_unlock(&info->lock); ++ req->error = -EBADFD; ++ goto out; ++ } + cookie = req->object->cookie; + cookie->object_size = size; + if (size) +@@ -176,19 +223,46 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) + set_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); + trace_cachefiles_ondemand_copen(req->object, id, size); + ++ cachefiles_ondemand_set_object_open(req->object); ++ spin_unlock(&info->lock); ++ wake_up_all(&cache->daemon_pollwq); ++ + out: + complete(&req->done); + return ret; + } + +-static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) ++int cachefiles_ondemand_restore(struct cachefiles_cache *cache, char *args) ++{ ++ struct cachefiles_req *req; ++ ++ XA_STATE(xas, &cache->reqs, 0); ++ ++ if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) ++ return -EOPNOTSUPP; ++ ++ /* ++ * Reset the requests to CACHEFILES_REQ_NEW state, so that the ++ * requests have been processed halfway before the crash of the ++ * user daemon could be reprocessed after the recovery. ++ */ ++ xas_lock(&xas); ++ xas_for_each(&xas, req, ULONG_MAX) ++ xas_set_mark(&xas, CACHEFILES_REQ_NEW); ++ xas_unlock(&xas); ++ ++ wake_up_all(&cache->daemon_pollwq); ++ return 0; ++} ++ ++static int cachefiles_ondemand_get_fd(struct cachefiles_req *req, ++ struct ondemand_anon_file *anon_file) + { + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct cachefiles_open *load; +- struct file *file; + u32 object_id; +- int ret, fd; ++ int ret; + + object = cachefiles_grab_object(req->object, + cachefiles_obj_get_ondemand_fd); +@@ -200,60 +274,114 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) + if (ret < 0) + goto err; + +- fd = get_unused_fd_flags(O_WRONLY); +- if (fd < 0) { +- ret = fd; ++ anon_file->fd = get_unused_fd_flags(O_WRONLY); ++ if (anon_file->fd < 0) { ++ ret = anon_file->fd; + goto err_free_id; + } + +- file = anon_inode_getfile("[cachefiles]", &cachefiles_ondemand_fd_fops, +- object, O_WRONLY); +- if (IS_ERR(file)) { +- ret = PTR_ERR(file); ++ anon_file->file = anon_inode_getfile("[cachefiles]", ++ &cachefiles_ondemand_fd_fops, object, O_WRONLY); ++ if (IS_ERR(anon_file->file)) { ++ ret = PTR_ERR(anon_file->file); + goto err_put_fd; + } + +- file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; +- fd_install(fd, file); ++ spin_lock(&object->ondemand->lock); ++ if (object->ondemand->ondemand_id > 0) { ++ spin_unlock(&object->ondemand->lock); ++ /* Pair with check in cachefiles_ondemand_fd_release(). */ ++ anon_file->file->private_data = NULL; ++ ret = -EEXIST; ++ goto err_put_file; ++ } ++ ++ anon_file->file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; + + load = (void *)req->msg.data; +- load->fd = fd; +- req->msg.object_id = object_id; +- object->ondemand_id = object_id; ++ load->fd = anon_file->fd; ++ object->ondemand->ondemand_id = object_id; ++ spin_unlock(&object->ondemand->lock); + + cachefiles_get_unbind_pincount(cache); + trace_cachefiles_ondemand_open(object, &req->msg, load); + return 0; + ++err_put_file: ++ fput(anon_file->file); ++ anon_file->file = NULL; + err_put_fd: +- put_unused_fd(fd); ++ put_unused_fd(anon_file->fd); ++ anon_file->fd = ret; + err_free_id: + xa_erase(&cache->ondemand_ids, object_id); + err: ++ spin_lock(&object->ondemand->lock); ++ /* Avoid marking an opened object as closed. */ ++ if (object->ondemand->ondemand_id <= 0) ++ cachefiles_ondemand_set_object_close(object); ++ spin_unlock(&object->ondemand->lock); + cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); + return ret; + } + ++static void ondemand_object_worker(struct work_struct *work) ++{ ++ struct cachefiles_ondemand_info *info = ++ container_of(work, struct cachefiles_ondemand_info, ondemand_work); ++ ++ cachefiles_ondemand_init_object(info->object); ++} ++ ++/* ++ * If there are any inflight or subsequent READ requests on the ++ * closed object, reopen it. ++ * Skip read requests whose related object is reopening. ++ */ ++static struct cachefiles_req *cachefiles_ondemand_select_req(struct xa_state *xas, ++ unsigned long xa_max) ++{ ++ struct cachefiles_req *req; ++ struct cachefiles_object *object; ++ struct cachefiles_ondemand_info *info; ++ ++ xas_for_each_marked(xas, req, xa_max, CACHEFILES_REQ_NEW) { ++ if (req->msg.opcode != CACHEFILES_OP_READ) ++ return req; ++ object = req->object; ++ info = object->ondemand; ++ if (cachefiles_ondemand_object_is_close(object)) { ++ cachefiles_ondemand_set_object_reopening(object); ++ queue_work(fscache_wq, &info->ondemand_work); ++ continue; ++ } ++ if (cachefiles_ondemand_object_is_reopening(object)) ++ continue; ++ return req; ++ } ++ return NULL; ++} ++ + ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) + { + struct cachefiles_req *req; + struct cachefiles_msg *msg; +- unsigned long id = 0; + size_t n; + int ret = 0; ++ struct ondemand_anon_file anon_file; + XA_STATE(xas, &cache->reqs, cache->req_id_next); + ++ xa_lock(&cache->reqs); + /* + * Cyclically search for a request that has not ever been processed, + * to prevent requests from being processed repeatedly, and make + * request distribution fair. + */ +- xa_lock(&cache->reqs); +- req = xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW); ++ req = cachefiles_ondemand_select_req(&xas, ULONG_MAX); + if (!req && cache->req_id_next > 0) { + xas_set(&xas, 0); +- req = xas_find_marked(&xas, cache->req_id_next - 1, CACHEFILES_REQ_NEW); ++ req = cachefiles_ondemand_select_req(&xas, cache->req_id_next - 1); + } + if (!req) { + xa_unlock(&cache->reqs); +@@ -270,38 +398,45 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + + xas_clear_mark(&xas, CACHEFILES_REQ_NEW); + cache->req_id_next = xas.xa_index + 1; ++ refcount_inc(&req->ref); ++ cachefiles_grab_object(req->object, cachefiles_obj_get_read_req); + xa_unlock(&cache->reqs); + +- id = xas.xa_index; +- msg->msg_id = id; +- + if (msg->opcode == CACHEFILES_OP_OPEN) { +- ret = cachefiles_ondemand_get_fd(req); ++ ret = cachefiles_ondemand_get_fd(req, &anon_file); + if (ret) +- goto error; ++ goto out; + } + +- if (copy_to_user(_buffer, msg, n) != 0) { ++ msg->msg_id = xas.xa_index; ++ msg->object_id = req->object->ondemand->ondemand_id; ++ ++ if (copy_to_user(_buffer, msg, n) != 0) + ret = -EFAULT; +- goto err_put_fd; +- } + +- /* CLOSE request has no reply */ +- if (msg->opcode == CACHEFILES_OP_CLOSE) { +- xa_erase(&cache->reqs, id); +- complete(&req->done); ++ if (msg->opcode == CACHEFILES_OP_OPEN) { ++ if (ret < 0) { ++ fput(anon_file.file); ++ put_unused_fd(anon_file.fd); ++ goto out; ++ } ++ fd_install(anon_file.fd, anon_file.file); + } +- +- return n; +- +-err_put_fd: +- if (msg->opcode == CACHEFILES_OP_OPEN) +- close_fd(((struct cachefiles_open *)msg->data)->fd); +-error: +- xa_erase(&cache->reqs, id); +- req->error = ret; +- complete(&req->done); +- return ret; ++out: ++ cachefiles_put_object(req->object, cachefiles_obj_put_read_req); ++ /* Remove error request and CLOSE request has no reply */ ++ if (ret || msg->opcode == CACHEFILES_OP_CLOSE) { ++ xas_reset(&xas); ++ xas_lock(&xas); ++ if (xas_load(&xas) == req) { ++ req->error = ret; ++ complete(&req->done); ++ xas_store(&xas, NULL); ++ } ++ xas_unlock(&xas); ++ } ++ cachefiles_req_put(req); ++ return ret ? ret : n; + } + + typedef int (*init_req_fn)(struct cachefiles_req *req, void *private); +@@ -313,20 +448,25 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + void *private) + { + struct cachefiles_cache *cache = object->volume->cache; +- struct cachefiles_req *req; ++ struct cachefiles_req *req = NULL; + XA_STATE(xas, &cache->reqs, 0); + int ret; + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return 0; + +- if (test_bit(CACHEFILES_DEAD, &cache->flags)) +- return -EIO; ++ if (test_bit(CACHEFILES_DEAD, &cache->flags)) { ++ ret = -EIO; ++ goto out; ++ } + + req = kzalloc(sizeof(*req) + data_len, GFP_KERNEL); +- if (!req) +- return -ENOMEM; ++ if (!req) { ++ ret = -ENOMEM; ++ goto out; ++ } + ++ refcount_set(&req->ref, 1); + req->object = object; + init_completion(&req->done); + req->msg.opcode = opcode; +@@ -363,8 +503,9 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + /* coupled with the barrier in cachefiles_flush_reqs() */ + smp_mb(); + +- if (opcode != CACHEFILES_OP_OPEN && object->ondemand_id <= 0) { +- WARN_ON_ONCE(object->ondemand_id == 0); ++ if (opcode == CACHEFILES_OP_CLOSE && ++ !cachefiles_ondemand_object_is_open(object)) { ++ WARN_ON_ONCE(object->ondemand->ondemand_id == 0); + xas_unlock(&xas); + ret = -EIO; + goto out; +@@ -387,7 +528,15 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + wake_up_all(&cache->daemon_pollwq); + wait_for_completion(&req->done); + ret = req->error; ++ cachefiles_req_put(req); ++ return ret; + out: ++ /* Reset the object to close state in error handling path. ++ * If error occurs after creating the anonymous fd, ++ * cachefiles_ondemand_fd_release() will set object to close. ++ */ ++ if (opcode == CACHEFILES_OP_OPEN) ++ cachefiles_ondemand_set_object_close(object); + kfree(req); + return ret; + } +@@ -430,18 +579,10 @@ static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req, + void *private) + { + struct cachefiles_object *object = req->object; +- int object_id = object->ondemand_id; + +- /* +- * It's possible that object id is still 0 if the cookie looking up +- * phase failed before OPEN request has ever been sent. Also avoid +- * sending CLOSE request for CACHEFILES_ONDEMAND_ID_CLOSED, which means +- * anon_fd has already been closed. +- */ +- if (object_id <= 0) ++ if (!cachefiles_ondemand_object_is_open(object)) + return -ENOENT; + +- req->msg.object_id = object_id; + trace_cachefiles_ondemand_close(object, &req->msg); + return 0; + } +@@ -457,16 +598,7 @@ static int cachefiles_ondemand_init_read_req(struct cachefiles_req *req, + struct cachefiles_object *object = req->object; + struct cachefiles_read *load = (void *)req->msg.data; + struct cachefiles_read_ctx *read_ctx = private; +- int object_id = object->ondemand_id; +- +- /* Stop enqueuing requests when daemon has closed anon_fd. */ +- if (object_id <= 0) { +- WARN_ON_ONCE(object_id == 0); +- pr_info_once("READ: anonymous fd closed prematurely.\n"); +- return -EIO; +- } + +- req->msg.object_id = object_id; + load->off = read_ctx->off; + load->len = read_ctx->len; + trace_cachefiles_ondemand_read(object, &req->msg, load); +@@ -479,13 +611,16 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object) + struct fscache_volume *volume = object->volume->vcookie; + size_t volume_key_size, cookie_key_size, data_len; + ++ if (!object->ondemand) ++ return 0; ++ + /* + * CacheFiles will firstly check the cache file under the root cache + * directory. If the coherency check failed, it will fallback to + * creating a new tmpfile as the cache file. Reuse the previously + * allocated object ID if any. + */ +- if (object->ondemand_id > 0) ++ if (cachefiles_ondemand_object_is_open(object)) + return 0; + + volume_key_size = volume->key[0] + 1; +@@ -503,6 +638,29 @@ void cachefiles_ondemand_clean_object(struct cachefiles_object *object) + cachefiles_ondemand_init_close_req, NULL); + } + ++int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object, ++ struct cachefiles_volume *volume) ++{ ++ if (!cachefiles_in_ondemand_mode(volume->cache)) ++ return 0; ++ ++ object->ondemand = kzalloc(sizeof(struct cachefiles_ondemand_info), ++ GFP_KERNEL); ++ if (!object->ondemand) ++ return -ENOMEM; ++ ++ object->ondemand->object = object; ++ spin_lock_init(&object->ondemand->lock); ++ INIT_WORK(&object->ondemand->ondemand_work, ondemand_object_worker); ++ return 0; ++} ++ ++void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *object) ++{ ++ kfree(object->ondemand); ++ object->ondemand = NULL; ++} ++ + int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len) + { +diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c +index 8577ad494e056..941d30bde39fa 100644 +--- a/fs/jfs/xattr.c ++++ b/fs/jfs/xattr.c +@@ -557,9 +557,11 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) + + size_check: + if (EALIST_SIZE(ea_buf->xattr) != ea_size) { ++ int size = min_t(int, EALIST_SIZE(ea_buf->xattr), ea_size); ++ + printk(KERN_ERR "ea_get: invalid extended attribute\n"); + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, +- ea_buf->xattr, ea_size, 1); ++ ea_buf->xattr, size, 1); + ea_release(inode, ea_buf); + rc = -EIO; + goto clean_up; +diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c +index 9fc5061d51b2f..2a0f069d5a096 100644 +--- a/fs/nfs/dir.c ++++ b/fs/nfs/dir.c +@@ -1802,9 +1802,10 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, + if (parent != READ_ONCE(dentry->d_parent)) + return -ECHILD; + } else { +- /* Wait for unlink to complete */ ++ /* Wait for unlink to complete - see unblock_revalidate() */ + wait_var_event(&dentry->d_fsdata, +- dentry->d_fsdata != NFS_FSDATA_BLOCKED); ++ smp_load_acquire(&dentry->d_fsdata) ++ != NFS_FSDATA_BLOCKED); + parent = dget_parent(dentry); + ret = reval(d_inode(parent), dentry, flags); + dput(parent); +@@ -1817,6 +1818,29 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) + return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); + } + ++static void block_revalidate(struct dentry *dentry) ++{ ++ /* old devname - just in case */ ++ kfree(dentry->d_fsdata); ++ ++ /* Any new reference that could lead to an open ++ * will take ->d_lock in lookup_open() -> d_lookup(). ++ * Holding this lock ensures we cannot race with ++ * __nfs_lookup_revalidate() and removes and need ++ * for further barriers. ++ */ ++ lockdep_assert_held(&dentry->d_lock); ++ ++ dentry->d_fsdata = NFS_FSDATA_BLOCKED; ++} ++ ++static void unblock_revalidate(struct dentry *dentry) ++{ ++ /* store_release ensures wait_var_event() sees the update */ ++ smp_store_release(&dentry->d_fsdata, NULL); ++ wake_up_var(&dentry->d_fsdata); ++} ++ + /* + * A weaker form of d_revalidate for revalidating just the d_inode(dentry) + * when we don't really care about the dentry name. This is called when a +@@ -2499,15 +2523,12 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) + spin_unlock(&dentry->d_lock); + goto out; + } +- /* old devname */ +- kfree(dentry->d_fsdata); +- dentry->d_fsdata = NFS_FSDATA_BLOCKED; ++ block_revalidate(dentry); + + spin_unlock(&dentry->d_lock); + error = nfs_safe_remove(dentry); + nfs_dentry_remove_handle_error(dir, dentry, error); +- dentry->d_fsdata = NULL; +- wake_up_var(&dentry->d_fsdata); ++ unblock_revalidate(dentry); + out: + trace_nfs_unlink_exit(dir, dentry, error); + return error; +@@ -2619,8 +2640,7 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) + { + struct dentry *new_dentry = data->new_dentry; + +- new_dentry->d_fsdata = NULL; +- wake_up_var(&new_dentry->d_fsdata); ++ unblock_revalidate(new_dentry); + } + + /* +@@ -2682,11 +2702,6 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, + if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) || + WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED)) + goto out; +- if (new_dentry->d_fsdata) { +- /* old devname */ +- kfree(new_dentry->d_fsdata); +- new_dentry->d_fsdata = NULL; +- } + + spin_lock(&new_dentry->d_lock); + if (d_count(new_dentry) > 2) { +@@ -2708,7 +2723,7 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, + new_dentry = dentry; + new_inode = NULL; + } else { +- new_dentry->d_fsdata = NFS_FSDATA_BLOCKED; ++ block_revalidate(new_dentry); + must_unblock = true; + spin_unlock(&new_dentry->d_lock); + } +@@ -2720,6 +2735,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, + task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, + must_unblock ? nfs_unblock_rename : NULL); + if (IS_ERR(task)) { ++ if (must_unblock) ++ unblock_revalidate(new_dentry); + error = PTR_ERR(task); + goto out; + } +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 41b7eafbd9287..f0953200acd08 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -4003,6 +4003,23 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location, + } + } + ++static bool _is_same_nfs4_pathname(struct nfs4_pathname *path1, ++ struct nfs4_pathname *path2) ++{ ++ int i; ++ ++ if (path1->ncomponents != path2->ncomponents) ++ return false; ++ for (i = 0; i < path1->ncomponents; i++) { ++ if (path1->components[i].len != path2->components[i].len) ++ return false; ++ if (memcmp(path1->components[i].data, path2->components[i].data, ++ path1->components[i].len)) ++ return false; ++ } ++ return true; ++} ++ + static int _nfs4_discover_trunking(struct nfs_server *server, + struct nfs_fh *fhandle) + { +@@ -4036,9 +4053,13 @@ static int _nfs4_discover_trunking(struct nfs_server *server, + if (status) + goto out_free_3; + +- for (i = 0; i < locations->nlocations; i++) ++ for (i = 0; i < locations->nlocations; i++) { ++ if (!_is_same_nfs4_pathname(&locations->fs_path, ++ &locations->locations[i].rootpath)) ++ continue; + test_fs_location_for_trunking(&locations->locations[i], clp, + server); ++ } + out_free_3: + kfree(locations->fattr); + out_free_2: +diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c +index 355bf0db3235b..937be276bb6b4 100644 +--- a/fs/nfsd/nfsfh.c ++++ b/fs/nfsd/nfsfh.c +@@ -572,7 +572,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, + _fh_update(fhp, exp, dentry); + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { + fh_put(fhp); +- return nfserr_opnotsupp; ++ return nfserr_stale; + } + + return 0; +@@ -598,7 +598,7 @@ fh_update(struct svc_fh *fhp) + + _fh_update(fhp, fhp->fh_export, dentry); + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) +- return nfserr_opnotsupp; ++ return nfserr_stale; + return 0; + out_bad: + printk(KERN_ERR "fh_update: fh not verified!\n"); +diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c +index 929edc0b101a0..23a8357f127bc 100644 +--- a/fs/nilfs2/dir.c ++++ b/fs/nilfs2/dir.c +@@ -186,19 +186,24 @@ static bool nilfs_check_page(struct page *page) + return false; + } + +-static struct page *nilfs_get_page(struct inode *dir, unsigned long n) ++static void *nilfs_get_page(struct inode *dir, unsigned long n, ++ struct page **pagep) + { + struct address_space *mapping = dir->i_mapping; + struct page *page = read_mapping_page(mapping, n, NULL); ++ void *kaddr; + +- if (!IS_ERR(page)) { +- kmap(page); +- if (unlikely(!PageChecked(page))) { +- if (!nilfs_check_page(page)) +- goto fail; +- } ++ if (IS_ERR(page)) ++ return page; ++ ++ kaddr = kmap(page); ++ if (unlikely(!PageChecked(page))) { ++ if (!nilfs_check_page(page)) ++ goto fail; + } +- return page; ++ ++ *pagep = page; ++ return kaddr; + + fail: + nilfs_put_page(page); +@@ -275,14 +280,14 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) + for ( ; n < npages; n++, offset = 0) { + char *kaddr, *limit; + struct nilfs_dir_entry *de; +- struct page *page = nilfs_get_page(inode, n); ++ struct page *page; + +- if (IS_ERR(page)) { ++ kaddr = nilfs_get_page(inode, n, &page); ++ if (IS_ERR(kaddr)) { + nilfs_error(sb, "bad page in #%lu", inode->i_ino); + ctx->pos += PAGE_SIZE - offset; + return -EIO; + } +- kaddr = page_address(page); + de = (struct nilfs_dir_entry *)(kaddr + offset); + limit = kaddr + nilfs_last_byte(inode, n) - + NILFS_DIR_REC_LEN(1); +@@ -345,11 +350,9 @@ nilfs_find_entry(struct inode *dir, const struct qstr *qstr, + start = 0; + n = start; + do { +- char *kaddr; ++ char *kaddr = nilfs_get_page(dir, n, &page); + +- page = nilfs_get_page(dir, n); +- if (!IS_ERR(page)) { +- kaddr = page_address(page); ++ if (!IS_ERR(kaddr)) { + de = (struct nilfs_dir_entry *)kaddr; + kaddr += nilfs_last_byte(dir, n) - reclen; + while ((char *) de <= kaddr) { +@@ -387,15 +390,11 @@ nilfs_find_entry(struct inode *dir, const struct qstr *qstr, + + struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p) + { +- struct page *page = nilfs_get_page(dir, 0); +- struct nilfs_dir_entry *de = NULL; ++ struct nilfs_dir_entry *de = nilfs_get_page(dir, 0, p); + +- if (!IS_ERR(page)) { +- de = nilfs_next_entry( +- (struct nilfs_dir_entry *)page_address(page)); +- *p = page; +- } +- return de; ++ if (IS_ERR(de)) ++ return NULL; ++ return nilfs_next_entry(de); + } + + ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) +@@ -459,12 +458,11 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) + for (n = 0; n <= npages; n++) { + char *dir_end; + +- page = nilfs_get_page(dir, n); +- err = PTR_ERR(page); +- if (IS_ERR(page)) ++ kaddr = nilfs_get_page(dir, n, &page); ++ err = PTR_ERR(kaddr); ++ if (IS_ERR(kaddr)) + goto out; + lock_page(page); +- kaddr = page_address(page); + dir_end = kaddr + nilfs_last_byte(dir, n); + de = (struct nilfs_dir_entry *)kaddr; + kaddr += PAGE_SIZE - reclen; +@@ -627,11 +625,10 @@ int nilfs_empty_dir(struct inode *inode) + char *kaddr; + struct nilfs_dir_entry *de; + +- page = nilfs_get_page(inode, i); +- if (IS_ERR(page)) +- continue; ++ kaddr = nilfs_get_page(inode, i, &page); ++ if (IS_ERR(kaddr)) ++ return 0; + +- kaddr = page_address(page); + de = (struct nilfs_dir_entry *)kaddr; + kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1); + +diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c +index 2d74fb2297990..5783efafbabda 100644 +--- a/fs/nilfs2/segment.c ++++ b/fs/nilfs2/segment.c +@@ -1694,6 +1694,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) + if (bh->b_page != bd_page) { + if (bd_page) { + lock_page(bd_page); ++ wait_on_page_writeback(bd_page); + clear_page_dirty_for_io(bd_page); + set_page_writeback(bd_page); + unlock_page(bd_page); +@@ -1707,6 +1708,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) + if (bh == segbuf->sb_super_root) { + if (bh->b_page != bd_page) { + lock_page(bd_page); ++ wait_on_page_writeback(bd_page); + clear_page_dirty_for_io(bd_page); + set_page_writeback(bd_page); + unlock_page(bd_page); +@@ -1723,6 +1725,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) + } + if (bd_page) { + lock_page(bd_page); ++ wait_on_page_writeback(bd_page); + clear_page_dirty_for_io(bd_page); + set_page_writeback(bd_page); + unlock_page(bd_page); +diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c +index c45596c25c665..f861b8c345e86 100644 +--- a/fs/ocfs2/file.c ++++ b/fs/ocfs2/file.c +@@ -1934,6 +1934,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, + + inode_lock(inode); + ++ /* Wait all existing dio workers, newcomers will block on i_rwsem */ ++ inode_dio_wait(inode); + /* + * This prevents concurrent writes on other nodes + */ +diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c +index 5cd6d7771cea1..8e648073bf712 100644 +--- a/fs/ocfs2/namei.c ++++ b/fs/ocfs2/namei.c +@@ -566,7 +566,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, + fe->i_last_eb_blk = 0; + strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); + fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL); +- ktime_get_real_ts64(&ts); ++ ktime_get_coarse_real_ts64(&ts); + fe->i_atime = fe->i_ctime = fe->i_mtime = + cpu_to_le64(ts.tv_sec); + fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = +diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c +index 1fb213f379a5b..d06607a1f137a 100644 +--- a/fs/proc/vmcore.c ++++ b/fs/proc/vmcore.c +@@ -383,6 +383,8 @@ static ssize_t __read_vmcore(struct iov_iter *iter, loff_t *fpos) + /* leave now if filled buffer already */ + if (!iov_iter_count(iter)) + return acc; ++ ++ cond_resched(); + } + + list_for_each_entry(m, &vmcore_list, list) { +diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c +index 7d17a14378e33..a8f52c4ebbdad 100644 +--- a/fs/smb/server/oplock.c ++++ b/fs/smb/server/oplock.c +@@ -207,9 +207,9 @@ static void opinfo_add(struct oplock_info *opinfo) + { + struct ksmbd_inode *ci = opinfo->o_fp->f_ci; + +- write_lock(&ci->m_lock); ++ down_write(&ci->m_lock); + list_add_rcu(&opinfo->op_entry, &ci->m_op_list); +- write_unlock(&ci->m_lock); ++ up_write(&ci->m_lock); + } + + static void opinfo_del(struct oplock_info *opinfo) +@@ -221,9 +221,9 @@ static void opinfo_del(struct oplock_info *opinfo) + lease_del_list(opinfo); + write_unlock(&lease_list_lock); + } +- write_lock(&ci->m_lock); ++ down_write(&ci->m_lock); + list_del_rcu(&opinfo->op_entry); +- write_unlock(&ci->m_lock); ++ up_write(&ci->m_lock); + } + + static unsigned long opinfo_count(struct ksmbd_file *fp) +@@ -526,21 +526,18 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, + * Compare lease key and client_guid to know request from same owner + * of same client + */ +- read_lock(&ci->m_lock); ++ down_read(&ci->m_lock); + list_for_each_entry(opinfo, &ci->m_op_list, op_entry) { + if (!opinfo->is_lease || !opinfo->conn) + continue; +- read_unlock(&ci->m_lock); + lease = opinfo->o_lease; + + ret = compare_guid_key(opinfo, client_guid, lctx->lease_key); + if (ret) { + m_opinfo = opinfo; + /* skip upgrading lease about breaking lease */ +- if (atomic_read(&opinfo->breaking_cnt)) { +- read_lock(&ci->m_lock); ++ if (atomic_read(&opinfo->breaking_cnt)) + continue; +- } + + /* upgrading lease */ + if ((atomic_read(&ci->op_count) + +@@ -570,9 +567,8 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, + lease_none_upgrade(opinfo, lctx->req_state); + } + } +- read_lock(&ci->m_lock); + } +- read_unlock(&ci->m_lock); ++ up_read(&ci->m_lock); + + return m_opinfo; + } +@@ -1119,7 +1115,7 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, + if (!p_ci) + return; + +- read_lock(&p_ci->m_lock); ++ down_read(&p_ci->m_lock); + list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) { + if (opinfo->conn == NULL || !opinfo->is_lease) + continue; +@@ -1137,13 +1133,11 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, + continue; + } + +- read_unlock(&p_ci->m_lock); + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); + opinfo_conn_put(opinfo); +- read_lock(&p_ci->m_lock); + } + } +- read_unlock(&p_ci->m_lock); ++ up_read(&p_ci->m_lock); + + ksmbd_inode_put(p_ci); + } +@@ -1164,7 +1158,7 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) + if (!p_ci) + return; + +- read_lock(&p_ci->m_lock); ++ down_read(&p_ci->m_lock); + list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) { + if (opinfo->conn == NULL || !opinfo->is_lease) + continue; +@@ -1178,13 +1172,11 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) + atomic_dec(&opinfo->conn->r_count); + continue; + } +- read_unlock(&p_ci->m_lock); + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); + opinfo_conn_put(opinfo); +- read_lock(&p_ci->m_lock); + } + } +- read_unlock(&p_ci->m_lock); ++ up_read(&p_ci->m_lock); + + ksmbd_inode_put(p_ci); + } +diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c +index 6a15c5d64f415..6397f77b6750c 100644 +--- a/fs/smb/server/smb2pdu.c ++++ b/fs/smb/server/smb2pdu.c +@@ -630,6 +630,12 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) + return name; + } + ++ if (*name == '\\') { ++ pr_err("not allow directory name included leading slash\n"); ++ kfree(name); ++ return ERR_PTR(-EINVAL); ++ } ++ + ksmbd_conv_path_to_unix(name); + ksmbd_strip_last_slash(name); + return name; +@@ -2361,7 +2367,8 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, + if (rc > 0) { + rc = ksmbd_vfs_remove_xattr(idmap, + path, +- attr_name); ++ attr_name, ++ get_write); + + if (rc < 0) { + ksmbd_debug(SMB, +@@ -2376,7 +2383,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, + } else { + rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value, + le16_to_cpu(eabuf->EaValueLength), +- 0, true); ++ 0, get_write); + if (rc < 0) { + ksmbd_debug(SMB, + "ksmbd_vfs_setxattr is failed(%d)\n", +@@ -2468,7 +2475,7 @@ static int smb2_remove_smb_xattrs(const struct path *path) + !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, + STREAM_PREFIX_LEN)) { + err = ksmbd_vfs_remove_xattr(idmap, path, +- name); ++ name, true); + if (err) + ksmbd_debug(SMB, "remove xattr failed : %s\n", + name); +@@ -2842,20 +2849,11 @@ int smb2_open(struct ksmbd_work *work) + } + + if (req->NameLength) { +- if ((req->CreateOptions & FILE_DIRECTORY_FILE_LE) && +- *(char *)req->Buffer == '\\') { +- pr_err("not allow directory name included leading slash\n"); +- rc = -EINVAL; +- goto err_out2; +- } +- + name = smb2_get_name((char *)req + le16_to_cpu(req->NameOffset), + le16_to_cpu(req->NameLength), + work->conn->local_nls); + if (IS_ERR(name)) { + rc = PTR_ERR(name); +- if (rc != -ENOMEM) +- rc = -ENOENT; + name = NULL; + goto err_out2; + } +@@ -3376,9 +3374,9 @@ int smb2_open(struct ksmbd_work *work) + * after daccess, saccess, attrib_only, and stream are + * initialized. + */ +- write_lock(&fp->f_ci->m_lock); ++ down_write(&fp->f_ci->m_lock); + list_add(&fp->node, &fp->f_ci->m_fp_list); +- write_unlock(&fp->f_ci->m_lock); ++ up_write(&fp->f_ci->m_lock); + + /* Check delete pending among previous fp before oplock break */ + if (ksmbd_inode_pending_delete(fp)) { +diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c +index fcaf373cc0080..474dadf6b7b8b 100644 +--- a/fs/smb/server/smb_common.c ++++ b/fs/smb/server/smb_common.c +@@ -646,7 +646,7 @@ int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp) + * Lookup fp in master fp list, and check desired access and + * shared mode between previous open and current open. + */ +- read_lock(&curr_fp->f_ci->m_lock); ++ down_read(&curr_fp->f_ci->m_lock); + list_for_each_entry(prev_fp, &curr_fp->f_ci->m_fp_list, node) { + if (file_inode(filp) != file_inode(prev_fp->filp)) + continue; +@@ -722,7 +722,7 @@ int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp) + break; + } + } +- read_unlock(&curr_fp->f_ci->m_lock); ++ up_read(&curr_fp->f_ci->m_lock); + + return rc; + } +diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c +index 2558119969359..dceb4bc76a66a 100644 +--- a/fs/smb/server/vfs.c ++++ b/fs/smb/server/vfs.c +@@ -1053,16 +1053,21 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, + } + + int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, +- const struct path *path, char *attr_name) ++ const struct path *path, char *attr_name, ++ bool get_write) + { + int err; + +- err = mnt_want_write(path->mnt); +- if (err) +- return err; ++ if (get_write == true) { ++ err = mnt_want_write(path->mnt); ++ if (err) ++ return err; ++ } + + err = vfs_removexattr(idmap, path->dentry, attr_name); +- mnt_drop_write(path->mnt); ++ ++ if (get_write == true) ++ mnt_drop_write(path->mnt); + + return err; + } +@@ -1375,7 +1380,7 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path) + ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); + + if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) { +- err = ksmbd_vfs_remove_xattr(idmap, path, name); ++ err = ksmbd_vfs_remove_xattr(idmap, path, name, true); + if (err) + ksmbd_debug(SMB, "remove xattr failed : %s\n", name); + } +diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h +index cfe1c8092f230..cb76f4b5bafe8 100644 +--- a/fs/smb/server/vfs.h ++++ b/fs/smb/server/vfs.h +@@ -114,7 +114,8 @@ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, + int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, + size_t *xattr_stream_name_size, int s_type); + int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, +- const struct path *path, char *attr_name); ++ const struct path *path, char *attr_name, ++ bool get_write); + int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, + unsigned int flags, struct path *parent_path, + struct path *path, bool caseless); +diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c +index 030f70700036c..8b2e37c8716ed 100644 +--- a/fs/smb/server/vfs_cache.c ++++ b/fs/smb/server/vfs_cache.c +@@ -165,7 +165,7 @@ static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp) + ci->m_fattr = 0; + INIT_LIST_HEAD(&ci->m_fp_list); + INIT_LIST_HEAD(&ci->m_op_list); +- rwlock_init(&ci->m_lock); ++ init_rwsem(&ci->m_lock); + ci->m_de = fp->filp->f_path.dentry; + return 0; + } +@@ -254,21 +254,22 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp) + ci->m_flags &= ~S_DEL_ON_CLS_STREAM; + err = ksmbd_vfs_remove_xattr(file_mnt_idmap(filp), + &filp->f_path, +- fp->stream.name); ++ fp->stream.name, ++ true); + if (err) + pr_err("remove xattr failed : %s\n", + fp->stream.name); + } + + if (atomic_dec_and_test(&ci->m_count)) { +- write_lock(&ci->m_lock); ++ down_write(&ci->m_lock); + if (ci->m_flags & (S_DEL_ON_CLS | S_DEL_PENDING)) { + ci->m_flags &= ~(S_DEL_ON_CLS | S_DEL_PENDING); +- write_unlock(&ci->m_lock); ++ up_write(&ci->m_lock); + ksmbd_vfs_unlink(filp); +- write_lock(&ci->m_lock); ++ down_write(&ci->m_lock); + } +- write_unlock(&ci->m_lock); ++ up_write(&ci->m_lock); + + ksmbd_inode_free(ci); + } +@@ -289,9 +290,9 @@ static void __ksmbd_remove_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp + if (!has_file_id(fp->volatile_id)) + return; + +- write_lock(&fp->f_ci->m_lock); ++ down_write(&fp->f_ci->m_lock); + list_del_init(&fp->node); +- write_unlock(&fp->f_ci->m_lock); ++ up_write(&fp->f_ci->m_lock); + + write_lock(&ft->lock); + idr_remove(ft->idr, fp->volatile_id); +@@ -523,17 +524,17 @@ struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry) + if (!ci) + return NULL; + +- read_lock(&ci->m_lock); ++ down_read(&ci->m_lock); + list_for_each_entry(lfp, &ci->m_fp_list, node) { + if (inode == file_inode(lfp->filp)) { + atomic_dec(&ci->m_count); + lfp = ksmbd_fp_get(lfp); +- read_unlock(&ci->m_lock); ++ up_read(&ci->m_lock); + return lfp; + } + } + atomic_dec(&ci->m_count); +- read_unlock(&ci->m_lock); ++ up_read(&ci->m_lock); + return NULL; + } + +@@ -705,13 +706,13 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon, + + conn = fp->conn; + ci = fp->f_ci; +- write_lock(&ci->m_lock); ++ down_write(&ci->m_lock); + list_for_each_entry_rcu(op, &ci->m_op_list, op_entry) { + if (op->conn != conn) + continue; + op->conn = NULL; + } +- write_unlock(&ci->m_lock); ++ up_write(&ci->m_lock); + + fp->conn = NULL; + fp->tcon = NULL; +@@ -801,13 +802,13 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp) + fp->tcon = work->tcon; + + ci = fp->f_ci; +- write_lock(&ci->m_lock); ++ down_write(&ci->m_lock); + list_for_each_entry_rcu(op, &ci->m_op_list, op_entry) { + if (op->conn) + continue; + op->conn = fp->conn; + } +- write_unlock(&ci->m_lock); ++ up_write(&ci->m_lock); + + __open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID); + if (!has_file_id(fp->volatile_id)) { +diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h +index ed44fb4e18e79..5a225e7055f19 100644 +--- a/fs/smb/server/vfs_cache.h ++++ b/fs/smb/server/vfs_cache.h +@@ -47,7 +47,7 @@ struct stream { + }; + + struct ksmbd_inode { +- rwlock_t m_lock; ++ struct rw_semaphore m_lock; + atomic_t m_count; + atomic_t op_count; + /* opinfo count for streams */ +diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c +index b521e904a7ce9..b406bb3430f3d 100644 +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -305,33 +305,60 @@ static const struct file_operations eventfs_file_operations = { + .llseek = generic_file_llseek, + }; + +-/* +- * On a remount of tracefs, if UID or GID options are set, then +- * the mount point inode permissions should be used. +- * Reset the saved permission flags appropriately. +- */ +-void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid) ++static void eventfs_set_attrs(struct eventfs_inode *ei, bool update_uid, kuid_t uid, ++ bool update_gid, kgid_t gid, int level) + { +- struct eventfs_inode *ei = ti->private; ++ struct eventfs_inode *ei_child; + +- if (!ei) ++ /* Update events/<system>/<event> */ ++ if (WARN_ON_ONCE(level > 3)) + return; + +- if (update_uid) ++ if (update_uid) { + ei->attr.mode &= ~EVENTFS_SAVE_UID; ++ ei->attr.uid = uid; ++ } + +- if (update_gid) ++ if (update_gid) { + ei->attr.mode &= ~EVENTFS_SAVE_GID; ++ ei->attr.gid = gid; ++ } ++ ++ list_for_each_entry(ei_child, &ei->children, list) { ++ eventfs_set_attrs(ei_child, update_uid, uid, update_gid, gid, level + 1); ++ } + + if (!ei->entry_attrs) + return; + + for (int i = 0; i < ei->nr_entries; i++) { +- if (update_uid) ++ if (update_uid) { + ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_UID; +- if (update_gid) ++ ei->entry_attrs[i].uid = uid; ++ } ++ if (update_gid) { + ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_GID; ++ ei->entry_attrs[i].gid = gid; ++ } + } ++ ++} ++ ++/* ++ * On a remount of tracefs, if UID or GID options are set, then ++ * the mount point inode permissions should be used. ++ * Reset the saved permission flags appropriately. ++ */ ++void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid) ++{ ++ struct eventfs_inode *ei = ti->private; ++ ++ /* Only the events directory does the updates */ ++ if (!ei || !ei->is_events || ei->is_freed) ++ return; ++ ++ eventfs_set_attrs(ei, update_uid, ti->vfs_inode.i_uid, ++ update_gid, ti->vfs_inode.i_gid, 0); + } + + /* Return the evenfs_inode of the "events" directory */ +diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c +index 18d9bb2ebe8e8..1531bd0ee359c 100644 +--- a/fs/xfs/libxfs/xfs_ag.c ++++ b/fs/xfs/libxfs/xfs_ag.c +@@ -979,14 +979,23 @@ xfs_ag_shrink_space( + + if (error) { + /* +- * if extent allocation fails, need to roll the transaction to ++ * If extent allocation fails, need to roll the transaction to + * ensure that the AGFL fixup has been committed anyway. ++ * ++ * We need to hold the AGF across the roll to ensure nothing can ++ * access the AG for allocation until the shrink is fully ++ * cleaned up. And due to the resetting of the AG block ++ * reservation space needing to lock the AGI, we also have to ++ * hold that so we don't get AGI/AGF lock order inversions in ++ * the error handling path. + */ + xfs_trans_bhold(*tpp, agfbp); ++ xfs_trans_bhold(*tpp, agibp); + err2 = xfs_trans_roll(tpp); + if (err2) + return err2; + xfs_trans_bjoin(*tpp, agfbp); ++ xfs_trans_bjoin(*tpp, agibp); + goto resv_init_out; + } + +diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c +index 571bb2a770ac3..59c4804e4d790 100644 +--- a/fs/xfs/libxfs/xfs_sb.c ++++ b/fs/xfs/libxfs/xfs_sb.c +@@ -530,7 +530,8 @@ xfs_validate_sb_common( + } + + if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit), +- XFS_FSB_TO_B(mp, sbp->sb_width), 0, false)) ++ XFS_FSB_TO_B(mp, sbp->sb_width), 0, ++ xfs_buf_daddr(bp) == XFS_SB_DADDR, false)) + return -EFSCORRUPTED; + + /* +@@ -1319,8 +1320,10 @@ xfs_sb_get_secondary( + } + + /* +- * sunit, swidth, sectorsize(optional with 0) should be all in bytes, +- * so users won't be confused by values in error messages. ++ * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users ++ * won't be confused by values in error messages. This function returns false ++ * if the stripe geometry is invalid and the caller is unable to repair the ++ * stripe configuration later in the mount process. + */ + bool + xfs_validate_stripe_geometry( +@@ -1328,20 +1331,21 @@ xfs_validate_stripe_geometry( + __s64 sunit, + __s64 swidth, + int sectorsize, ++ bool may_repair, + bool silent) + { + if (swidth > INT_MAX) { + if (!silent) + xfs_notice(mp, + "stripe width (%lld) is too large", swidth); +- return false; ++ goto check_override; + } + + if (sunit > swidth) { + if (!silent) + xfs_notice(mp, + "stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth); +- return false; ++ goto check_override; + } + + if (sectorsize && (int)sunit % sectorsize) { +@@ -1349,21 +1353,21 @@ xfs_validate_stripe_geometry( + xfs_notice(mp, + "stripe unit (%lld) must be a multiple of the sector size (%d)", + sunit, sectorsize); +- return false; ++ goto check_override; + } + + if (sunit && !swidth) { + if (!silent) + xfs_notice(mp, + "invalid stripe unit (%lld) and stripe width of 0", sunit); +- return false; ++ goto check_override; + } + + if (!sunit && swidth) { + if (!silent) + xfs_notice(mp, + "invalid stripe width (%lld) and stripe unit of 0", swidth); +- return false; ++ goto check_override; + } + + if (sunit && (int)swidth % (int)sunit) { +@@ -1371,9 +1375,27 @@ xfs_validate_stripe_geometry( + xfs_notice(mp, + "stripe width (%lld) must be a multiple of the stripe unit (%lld)", + swidth, sunit); +- return false; ++ goto check_override; + } + return true; ++ ++check_override: ++ if (!may_repair) ++ return false; ++ /* ++ * During mount, mp->m_dalign will not be set unless the sunit mount ++ * option was set. If it was set, ignore the bad stripe alignment values ++ * and allow the validation and overwrite later in the mount process to ++ * attempt to overwrite the bad stripe alignment values with the values ++ * supplied by mount options. ++ */ ++ if (!mp->m_dalign) ++ return false; ++ if (!silent) ++ xfs_notice(mp, ++"Will try to correct with specified mount options sunit (%d) and swidth (%d)", ++ BBTOB(mp->m_dalign), BBTOB(mp->m_swidth)); ++ return true; + } + + /* +diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h +index 2e8e8d63d4eb2..37b1ed1bc2095 100644 +--- a/fs/xfs/libxfs/xfs_sb.h ++++ b/fs/xfs/libxfs/xfs_sb.h +@@ -35,8 +35,9 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, + struct xfs_trans *tp, xfs_agnumber_t agno, + struct xfs_buf **bpp); + +-extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, +- __s64 sunit, __s64 swidth, int sectorsize, bool silent); ++bool xfs_validate_stripe_geometry(struct xfs_mount *mp, ++ __s64 sunit, __s64 swidth, int sectorsize, bool may_repair, ++ bool silent); + + uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); + +diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c +index 1935b9ce1885c..c3a9f33e5a8d1 100644 +--- a/fs/xfs/scrub/btree.c ++++ b/fs/xfs/scrub/btree.c +@@ -385,7 +385,12 @@ xchk_btree_check_block_owner( + agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr); + agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr); + +- init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS; ++ /* ++ * If the btree being examined is not itself a per-AG btree, initialize ++ * sc->sa so that we can check for the presence of an ownership record ++ * in the rmap btree for the AG containing the block. ++ */ ++ init_sa = bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE; + if (init_sa) { + error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa); + if (!xchk_btree_xref_process_error(bs->sc, bs->cur, +diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c +index 23944fcc1a6ca..08e2924852688 100644 +--- a/fs/xfs/scrub/common.c ++++ b/fs/xfs/scrub/common.c +@@ -978,9 +978,7 @@ xchk_irele( + struct xfs_scrub *sc, + struct xfs_inode *ip) + { +- if (current->journal_info != NULL) { +- ASSERT(current->journal_info == sc->tp); +- ++ if (sc->tp) { + /* + * If we are in a transaction, we /cannot/ drop the inode + * ourselves, because the VFS will trigger writeback, which +diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c +index cd91db4a55489..82499270e20b9 100644 +--- a/fs/xfs/scrub/stats.c ++++ b/fs/xfs/scrub/stats.c +@@ -329,9 +329,9 @@ xchk_stats_register( + if (!cs->cs_debugfs) + return; + +- debugfs_create_file("stats", 0644, cs->cs_debugfs, cs, ++ debugfs_create_file("stats", 0444, cs->cs_debugfs, cs, + &scrub_stats_fops); +- debugfs_create_file("clear_stats", 0400, cs->cs_debugfs, cs, ++ debugfs_create_file("clear_stats", 0200, cs->cs_debugfs, cs, + &clear_scrub_stats_fops); + } + +diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c +index 465d7630bb218..e74097e58097b 100644 +--- a/fs/xfs/xfs_aops.c ++++ b/fs/xfs/xfs_aops.c +@@ -502,13 +502,6 @@ xfs_vm_writepages( + { + struct xfs_writepage_ctx wpc = { }; + +- /* +- * Writing back data in a transaction context can result in recursive +- * transactions. This is bad, so issue a warning and get out of here. +- */ +- if (WARN_ON_ONCE(current->journal_info)) +- return 0; +- + xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); + return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); + } +diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c +index 3c210ac837136..db88f41c94c6b 100644 +--- a/fs/xfs/xfs_icache.c ++++ b/fs/xfs/xfs_icache.c +@@ -2031,8 +2031,10 @@ xfs_inodegc_want_queue_work( + * - Memory shrinkers queued the inactivation worker and it hasn't finished. + * - The queue depth exceeds the maximum allowable percpu backlog. + * +- * Note: If the current thread is running a transaction, we don't ever want to +- * wait for other transactions because that could introduce a deadlock. ++ * Note: If we are in a NOFS context here (e.g. current thread is running a ++ * transaction) the we don't want to block here as inodegc progress may require ++ * filesystem resources we hold to make progress and that could result in a ++ * deadlock. Hence we skip out of here if we are in a scoped NOFS context. + */ + static inline bool + xfs_inodegc_want_flush_work( +@@ -2040,7 +2042,7 @@ xfs_inodegc_want_flush_work( + unsigned int items, + unsigned int shrinker_hits) + { +- if (current->journal_info) ++ if (current->flags & PF_MEMALLOC_NOFS) + return false; + + if (shrinker_hits > 0) +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c +index f9d29acd72b9e..efb6b8f356174 100644 +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -1239,8 +1239,19 @@ xfs_link( + */ + if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) && + tdp->i_projid != sip->i_projid)) { +- error = -EXDEV; +- goto error_return; ++ /* ++ * Project quota setup skips special files which can ++ * leave inodes in a PROJINHERIT directory without a ++ * project ID set. We need to allow links to be made ++ * to these "project-less" inodes because userspace ++ * expects them to succeed after project ID setup, ++ * but everything else should be rejected. ++ */ ++ if (!special_file(VFS_I(sip)->i_mode) || ++ sip->i_projid != 0) { ++ error = -EXDEV; ++ goto error_return; ++ } + } + + if (!resblks) { +diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c +index 18c8f168b1532..055cdec2e9ad6 100644 +--- a/fs/xfs/xfs_iomap.c ++++ b/fs/xfs/xfs_iomap.c +@@ -1323,7 +1323,7 @@ xfs_seek_iomap_begin( + if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { + if (data_fsb < cow_fsb + cmap.br_blockcount) + end_fsb = min(end_fsb, data_fsb); +- xfs_trim_extent(&cmap, offset_fsb, end_fsb); ++ xfs_trim_extent(&cmap, offset_fsb, end_fsb - offset_fsb); + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); + error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, + IOMAP_F_SHARED, seq); +@@ -1348,7 +1348,7 @@ xfs_seek_iomap_begin( + imap.br_state = XFS_EXT_NORM; + done: + seq = xfs_iomap_inode_sequence(ip, 0); +- xfs_trim_extent(&imap, offset_fsb, end_fsb); ++ xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); + error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); + out_unlock: + xfs_iunlock(ip, lockmode); +diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c +index cc14cd1c2282f..57f366c3d3554 100644 +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -3203,11 +3203,28 @@ xlog_do_recovery_pass( + kmem_free(hbp); + + /* +- * Submit buffers that have been added from the last record processed, +- * regardless of error status. ++ * Submit buffers that have been dirtied by the last record recovered. + */ +- if (!list_empty(&buffer_list)) ++ if (!list_empty(&buffer_list)) { ++ if (error) { ++ /* ++ * If there has been an item recovery error then we ++ * cannot allow partial checkpoint writeback to ++ * occur. We might have multiple checkpoints with the ++ * same start LSN in this buffer list, and partial ++ * writeback of a checkpoint in this situation can ++ * prevent future recovery of all the changes in the ++ * checkpoints at this start LSN. ++ * ++ * Note: Shutting down the filesystem will result in the ++ * delwri submission marking all the buffers stale, ++ * completing them and cleaning up _XBF_LOGRECOVERY ++ * state without doing any IO. ++ */ ++ xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); ++ } + error2 = xfs_buf_delwri_submit(&buffer_list); ++ } + + if (error && first_bad) + *first_bad = rhead_blk; +diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h +index 4e38357237c31..ead65f5f8dc32 100644 +--- a/fs/xfs/xfs_trans.h ++++ b/fs/xfs/xfs_trans.h +@@ -277,19 +277,14 @@ static inline void + xfs_trans_set_context( + struct xfs_trans *tp) + { +- ASSERT(current->journal_info == NULL); + tp->t_pflags = memalloc_nofs_save(); +- current->journal_info = tp; + } + + static inline void + xfs_trans_clear_context( + struct xfs_trans *tp) + { +- if (current->journal_info == tp) { +- memalloc_nofs_restore(tp->t_pflags); +- current->journal_info = NULL; +- } ++ memalloc_nofs_restore(tp->t_pflags); + } + + static inline void +@@ -297,10 +292,8 @@ xfs_trans_switch_context( + struct xfs_trans *old_tp, + struct xfs_trans *new_tp) + { +- ASSERT(current->journal_info == old_tp); + new_tp->t_pflags = old_tp->t_pflags; + old_tp->t_pflags = 0; +- current->journal_info = new_tp; + } + + #endif /* __XFS_TRANS_H__ */ +diff --git a/include/linux/bpf.h b/include/linux/bpf.h +index 2ebb5d4d43dc6..e4cd28c38b825 100644 +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -296,6 +296,8 @@ struct bpf_map { + bool bypass_spec_v1; + bool frozen; /* write-once; write-protected by freeze_mutex */ + bool free_after_mult_rcu_gp; ++ bool free_after_rcu_gp; ++ atomic64_t sleepable_refcnt; + s64 __percpu *elem_count; + }; + +diff --git a/include/linux/iommu.h b/include/linux/iommu.h +index 0225cf7445de2..b6ef263e85c06 100644 +--- a/include/linux/iommu.h ++++ b/include/linux/iommu.h +@@ -1199,7 +1199,7 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle); + static inline struct iommu_sva * + iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) + { +- return NULL; ++ return ERR_PTR(-ENODEV); + } + + static inline void iommu_sva_unbind_device(struct iommu_sva *handle) +diff --git a/include/linux/property.h b/include/linux/property.h +index 8c3c6685a2ae3..1684fca930f72 100644 +--- a/include/linux/property.h ++++ b/include/linux/property.h +@@ -79,12 +79,38 @@ int fwnode_property_match_string(const struct fwnode_handle *fwnode, + + bool fwnode_device_is_available(const struct fwnode_handle *fwnode); + ++static inline bool fwnode_device_is_big_endian(const struct fwnode_handle *fwnode) ++{ ++ if (fwnode_property_present(fwnode, "big-endian")) ++ return true; ++ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) && ++ fwnode_property_present(fwnode, "native-endian")) ++ return true; ++ return false; ++} ++ + static inline + bool fwnode_device_is_compatible(const struct fwnode_handle *fwnode, const char *compat) + { + return fwnode_property_match_string(fwnode, "compatible", compat) >= 0; + } + ++/** ++ * device_is_big_endian - check if a device has BE registers ++ * @dev: Pointer to the struct device ++ * ++ * Returns: true if the device has a "big-endian" property, or if the kernel ++ * was compiled for BE *and* the device has a "native-endian" property. ++ * Returns false otherwise. ++ * ++ * Callers would nominally use ioread32be/iowrite32be if ++ * device_is_big_endian() == true, or readl/writel otherwise. ++ */ ++static inline bool device_is_big_endian(const struct device *dev) ++{ ++ return fwnode_device_is_big_endian(dev_fwnode(dev)); ++} ++ + /** + * device_is_compatible - match 'compatible' property of the device with a given string + * @dev: Pointer to the struct device +diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h +index fb724c65c77bc..5ce0cd76956e0 100644 +--- a/include/linux/pse-pd/pse.h ++++ b/include/linux/pse-pd/pse.h +@@ -114,14 +114,14 @@ static inline int pse_ethtool_get_status(struct pse_control *psec, + struct netlink_ext_ack *extack, + struct pse_control_status *status) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + + static inline int pse_ethtool_set_config(struct pse_control *psec, + struct netlink_ext_ack *extack, + const struct pse_control_config *config) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + + #endif +diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h +index a7d5fa892be26..5da5eb719f614 100644 +--- a/include/linux/serial_core.h ++++ b/include/linux/serial_core.h +@@ -470,6 +470,7 @@ struct uart_port { + unsigned char iotype; /* io access style */ + unsigned char quirks; /* internal quirks */ + ++#define UPIO_UNKNOWN ((unsigned char)~0U) /* UCHAR_MAX */ + #define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ + #define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ + #define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */ +@@ -960,6 +961,8 @@ int uart_register_driver(struct uart_driver *uart); + void uart_unregister_driver(struct uart_driver *uart); + int uart_add_one_port(struct uart_driver *reg, struct uart_port *port); + void uart_remove_one_port(struct uart_driver *reg, struct uart_port *port); ++int uart_read_port_properties(struct uart_port *port); ++int uart_read_and_validate_port_properties(struct uart_port *port); + bool uart_match_port(const struct uart_port *port1, + const struct uart_port *port2); + +diff --git a/include/linux/soc/andes/irq.h b/include/linux/soc/andes/irq.h +new file mode 100644 +index 0000000000000..edc3182d6e661 +--- /dev/null ++++ b/include/linux/soc/andes/irq.h +@@ -0,0 +1,18 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright (C) 2023 Andes Technology Corporation ++ */ ++#ifndef __ANDES_IRQ_H ++#define __ANDES_IRQ_H ++ ++/* Andes PMU irq number */ ++#define ANDES_RV_IRQ_PMOVI 18 ++#define ANDES_RV_IRQ_LAST ANDES_RV_IRQ_PMOVI ++#define ANDES_SLI_CAUSE_BASE 256 ++ ++/* Andes PMU related registers */ ++#define ANDES_CSR_SLIE 0x9c4 ++#define ANDES_CSR_SLIP 0x9c5 ++#define ANDES_CSR_SCOUNTEROF 0x9d4 ++ ++#endif /* __ANDES_IRQ_H */ +diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h +index f786d2d62fa5e..f89d6d43ba8f1 100644 +--- a/include/net/bluetooth/hci_core.h ++++ b/include/net/bluetooth/hci_core.h +@@ -2071,18 +2071,46 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, + { + u16 max_latency; + +- if (min > max || min < 6 || max > 3200) ++ if (min > max) { ++ BT_WARN("min %d > max %d", min, max); + return -EINVAL; ++ } ++ ++ if (min < 6) { ++ BT_WARN("min %d < 6", min); ++ return -EINVAL; ++ } ++ ++ if (max > 3200) { ++ BT_WARN("max %d > 3200", max); ++ return -EINVAL; ++ } ++ ++ if (to_multiplier < 10) { ++ BT_WARN("to_multiplier %d < 10", to_multiplier); ++ return -EINVAL; ++ } + +- if (to_multiplier < 10 || to_multiplier > 3200) ++ if (to_multiplier > 3200) { ++ BT_WARN("to_multiplier %d > 3200", to_multiplier); + return -EINVAL; ++ } + +- if (max >= to_multiplier * 8) ++ if (max >= to_multiplier * 8) { ++ BT_WARN("max %d >= to_multiplier %d * 8", max, to_multiplier); + return -EINVAL; ++ } + + max_latency = (to_multiplier * 4 / max) - 1; +- if (latency > 499 || latency > max_latency) ++ if (latency > 499) { ++ BT_WARN("latency %d > 499", latency); + return -EINVAL; ++ } ++ ++ if (latency > max_latency) { ++ BT_WARN("latency %d > max_latency %d", latency, max_latency); ++ return -EINVAL; ++ } + + return 0; + } +diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h +index 822f0fad39623..4e69f52a51177 100644 +--- a/include/net/ip_tunnels.h ++++ b/include/net/ip_tunnels.h +@@ -362,9 +362,10 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) + + /* Variant of pskb_inet_may_pull(). + */ +-static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) ++static inline bool skb_vlan_inet_prepare(struct sk_buff *skb, ++ bool inner_proto_inherit) + { +- int nhlen = 0, maclen = ETH_HLEN; ++ int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN; + __be16 type = skb->protocol; + + /* Essentially this is skb_protocol(skb, true) +diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h +index 0e75b9277c8c6..e3b6ce3cbf883 100644 +--- a/include/scsi/scsi_transport_sas.h ++++ b/include/scsi/scsi_transport_sas.h +@@ -200,6 +200,8 @@ unsigned int sas_is_tlr_enabled(struct scsi_device *); + void sas_disable_tlr(struct scsi_device *); + void sas_enable_tlr(struct scsi_device *); + ++bool sas_ata_ncq_prio_supported(struct scsi_device *sdev); ++ + extern struct sas_rphy *sas_end_device_alloc(struct sas_port *); + extern struct sas_rphy *sas_expander_alloc(struct sas_port *, enum sas_device_type); + void sas_rphy_free(struct sas_rphy *); +diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h +index cf4b98b9a9edc..7d931db02b934 100644 +--- a/include/trace/events/cachefiles.h ++++ b/include/trace/events/cachefiles.h +@@ -33,6 +33,8 @@ enum cachefiles_obj_ref_trace { + cachefiles_obj_see_withdrawal, + cachefiles_obj_get_ondemand_fd, + cachefiles_obj_put_ondemand_fd, ++ cachefiles_obj_get_read_req, ++ cachefiles_obj_put_read_req, + }; + + enum fscache_why_object_killed { +@@ -127,7 +129,11 @@ enum cachefiles_error_trace { + EM(cachefiles_obj_see_lookup_cookie, "SEE lookup_cookie") \ + EM(cachefiles_obj_see_lookup_failed, "SEE lookup_failed") \ + EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ +- E_(cachefiles_obj_see_withdrawal, "SEE withdrawal") ++ EM(cachefiles_obj_see_withdrawal, "SEE withdrawal") \ ++ EM(cachefiles_obj_get_ondemand_fd, "GET ondemand_fd") \ ++ EM(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") \ ++ EM(cachefiles_obj_get_read_req, "GET read_req") \ ++ E_(cachefiles_obj_put_read_req, "PUT read_req") + + #define cachefiles_coherency_traces \ + EM(cachefiles_coherency_check_aux, "BAD aux ") \ +diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c +index 318ed067dbf64..8a99aabcac2c3 100644 +--- a/io_uring/io-wq.c ++++ b/io_uring/io-wq.c +@@ -25,10 +25,10 @@ + #define WORKER_IDLE_TIMEOUT (5 * HZ) + + enum { +- IO_WORKER_F_UP = 1, /* up and active */ +- IO_WORKER_F_RUNNING = 2, /* account as running */ +- IO_WORKER_F_FREE = 4, /* worker on free list */ +- IO_WORKER_F_BOUND = 8, /* is doing bounded work */ ++ IO_WORKER_F_UP = 0, /* up and active */ ++ IO_WORKER_F_RUNNING = 1, /* account as running */ ++ IO_WORKER_F_FREE = 2, /* worker on free list */ ++ IO_WORKER_F_BOUND = 3, /* is doing bounded work */ + }; + + enum { +@@ -44,7 +44,8 @@ enum { + */ + struct io_worker { + refcount_t ref; +- unsigned flags; ++ int create_index; ++ unsigned long flags; + struct hlist_nulls_node nulls_node; + struct list_head all_list; + struct task_struct *task; +@@ -58,7 +59,6 @@ struct io_worker { + + unsigned long create_state; + struct callback_head create_work; +- int create_index; + + union { + struct rcu_head rcu; +@@ -165,7 +165,7 @@ static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq, + + static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker) + { +- return io_get_acct(worker->wq, worker->flags & IO_WORKER_F_BOUND); ++ return io_get_acct(worker->wq, test_bit(IO_WORKER_F_BOUND, &worker->flags)); + } + + static void io_worker_ref_put(struct io_wq *wq) +@@ -225,7 +225,7 @@ static void io_worker_exit(struct io_worker *worker) + wait_for_completion(&worker->ref_done); + + raw_spin_lock(&wq->lock); +- if (worker->flags & IO_WORKER_F_FREE) ++ if (test_bit(IO_WORKER_F_FREE, &worker->flags)) + hlist_nulls_del_rcu(&worker->nulls_node); + list_del_rcu(&worker->all_list); + raw_spin_unlock(&wq->lock); +@@ -410,7 +410,7 @@ static void io_wq_dec_running(struct io_worker *worker) + struct io_wq_acct *acct = io_wq_get_acct(worker); + struct io_wq *wq = worker->wq; + +- if (!(worker->flags & IO_WORKER_F_UP)) ++ if (!test_bit(IO_WORKER_F_UP, &worker->flags)) + return; + + if (!atomic_dec_and_test(&acct->nr_running)) +@@ -430,8 +430,8 @@ static void io_wq_dec_running(struct io_worker *worker) + */ + static void __io_worker_busy(struct io_wq *wq, struct io_worker *worker) + { +- if (worker->flags & IO_WORKER_F_FREE) { +- worker->flags &= ~IO_WORKER_F_FREE; ++ if (test_bit(IO_WORKER_F_FREE, &worker->flags)) { ++ clear_bit(IO_WORKER_F_FREE, &worker->flags); + raw_spin_lock(&wq->lock); + hlist_nulls_del_init_rcu(&worker->nulls_node); + raw_spin_unlock(&wq->lock); +@@ -444,8 +444,8 @@ static void __io_worker_busy(struct io_wq *wq, struct io_worker *worker) + static void __io_worker_idle(struct io_wq *wq, struct io_worker *worker) + __must_hold(wq->lock) + { +- if (!(worker->flags & IO_WORKER_F_FREE)) { +- worker->flags |= IO_WORKER_F_FREE; ++ if (!test_bit(IO_WORKER_F_FREE, &worker->flags)) { ++ set_bit(IO_WORKER_F_FREE, &worker->flags); + hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list); + } + } +@@ -634,7 +634,8 @@ static int io_wq_worker(void *data) + bool exit_mask = false, last_timeout = false; + char buf[TASK_COMM_LEN]; + +- worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); ++ set_mask_bits(&worker->flags, 0, ++ BIT(IO_WORKER_F_UP) | BIT(IO_WORKER_F_RUNNING)); + + snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid); + set_task_comm(current, buf); +@@ -698,11 +699,11 @@ void io_wq_worker_running(struct task_struct *tsk) + + if (!worker) + return; +- if (!(worker->flags & IO_WORKER_F_UP)) ++ if (!test_bit(IO_WORKER_F_UP, &worker->flags)) + return; +- if (worker->flags & IO_WORKER_F_RUNNING) ++ if (test_bit(IO_WORKER_F_RUNNING, &worker->flags)) + return; +- worker->flags |= IO_WORKER_F_RUNNING; ++ set_bit(IO_WORKER_F_RUNNING, &worker->flags); + io_wq_inc_running(worker); + } + +@@ -716,12 +717,12 @@ void io_wq_worker_sleeping(struct task_struct *tsk) + + if (!worker) + return; +- if (!(worker->flags & IO_WORKER_F_UP)) ++ if (!test_bit(IO_WORKER_F_UP, &worker->flags)) + return; +- if (!(worker->flags & IO_WORKER_F_RUNNING)) ++ if (!test_bit(IO_WORKER_F_RUNNING, &worker->flags)) + return; + +- worker->flags &= ~IO_WORKER_F_RUNNING; ++ clear_bit(IO_WORKER_F_RUNNING, &worker->flags); + io_wq_dec_running(worker); + } + +@@ -735,7 +736,7 @@ static void io_init_new_worker(struct io_wq *wq, struct io_worker *worker, + raw_spin_lock(&wq->lock); + hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list); + list_add_tail_rcu(&worker->all_list, &wq->all_list); +- worker->flags |= IO_WORKER_F_FREE; ++ set_bit(IO_WORKER_F_FREE, &worker->flags); + raw_spin_unlock(&wq->lock); + wake_up_new_task(tsk); + } +@@ -841,7 +842,7 @@ static bool create_io_worker(struct io_wq *wq, int index) + init_completion(&worker->ref_done); + + if (index == IO_WQ_ACCT_BOUND) +- worker->flags |= IO_WORKER_F_BOUND; ++ set_bit(IO_WORKER_F_BOUND, &worker->flags); + + tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE); + if (!IS_ERR(tsk)) { +@@ -927,8 +928,12 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data) + void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) + { + struct io_wq_acct *acct = io_work_get_acct(wq, work); +- struct io_cb_cancel_data match; +- unsigned work_flags = work->flags; ++ unsigned long work_flags = work->flags; ++ struct io_cb_cancel_data match = { ++ .fn = io_wq_work_match_item, ++ .data = work, ++ .cancel_all = false, ++ }; + bool do_create; + + /* +@@ -966,10 +971,6 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) + raw_spin_unlock(&wq->lock); + + /* fatal condition, failed to create the first worker */ +- match.fn = io_wq_work_match_item, +- match.data = work, +- match.cancel_all = false, +- + io_acct_cancel_pending_work(wq, acct, &match); + } + } +diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c +index 26a00920042c4..702c08c26cd4f 100644 +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -168,7 +168,8 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, + req->buf_list = bl; + req->buf_index = buf->bid; + +- if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) { ++ if (issue_flags & IO_URING_F_UNLOCKED || ++ (req->file && !file_can_poll(req->file))) { + /* + * If we came in unlocked, we have no choice but to consume the + * buffer here, otherwise nothing ensures that the buffer won't +diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c +index 95230921b16d0..2e88b6658e4e0 100644 +--- a/io_uring/rsrc.c ++++ b/io_uring/rsrc.c +@@ -250,6 +250,7 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, + + ret = io_run_task_work_sig(ctx); + if (ret < 0) { ++ __set_current_state(TASK_RUNNING); + mutex_lock(&ctx->uring_lock); + if (list_empty(&ctx->rsrc_ref_list)) + ret = 0; +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index 1333273a71ded..05445a4d55181 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -2673,12 +2673,16 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, + struct bpf_map **used_maps, u32 len) + { + struct bpf_map *map; ++ bool sleepable; + u32 i; + ++ sleepable = aux->sleepable; + for (i = 0; i < len; i++) { + map = used_maps[i]; + if (map->ops->map_poke_untrack) + map->ops->map_poke_untrack(map, aux); ++ if (sleepable) ++ atomic64_dec(&map->sleepable_refcnt); + bpf_map_put(map); + } + } +diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c +index 3248ff5d81617..8ef269e66ba50 100644 +--- a/kernel/bpf/map_in_map.c ++++ b/kernel/bpf/map_in_map.c +@@ -131,12 +131,16 @@ void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) + { + struct bpf_map *inner_map = ptr; + +- /* The inner map may still be used by both non-sleepable and sleepable +- * bpf program, so free it after one RCU grace period and one tasks +- * trace RCU grace period. ++ /* Defer the freeing of inner map according to the sleepable attribute ++ * of bpf program which owns the outer map, so unnecessary waiting for ++ * RCU tasks trace grace period can be avoided. + */ +- if (need_defer) +- WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true); ++ if (need_defer) { ++ if (atomic64_read(&map->sleepable_refcnt)) ++ WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true); ++ else ++ WRITE_ONCE(inner_map->free_after_rcu_gp, true); ++ } + bpf_map_put(inner_map); + } + +diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c +index e886157a9efbb..65df92f5b1922 100644 +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -753,8 +753,11 @@ void bpf_map_put(struct bpf_map *map) + /* bpf_map_free_id() must be called first */ + bpf_map_free_id(map); + ++ WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt)); + if (READ_ONCE(map->free_after_mult_rcu_gp)) + call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); ++ else if (READ_ONCE(map->free_after_rcu_gp)) ++ call_rcu(&map->rcu, bpf_map_free_rcu_gp); + else + bpf_map_free_in_work(map); + } +@@ -2827,6 +2830,7 @@ static int bpf_obj_get(const union bpf_attr *attr) + void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, + const struct bpf_link_ops *ops, struct bpf_prog *prog) + { ++ WARN_ON(ops->dealloc && ops->dealloc_deferred); + atomic64_set(&link->refcnt, 1); + link->type = type; + link->id = 0; +@@ -2885,16 +2889,17 @@ static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) + /* bpf_link_free is guaranteed to be called from process context */ + static void bpf_link_free(struct bpf_link *link) + { ++ const struct bpf_link_ops *ops = link->ops; + bool sleepable = false; + + bpf_link_free_id(link->id); + if (link->prog) { + sleepable = link->prog->aux->sleepable; + /* detach BPF program, clean up used resources */ +- link->ops->release(link); ++ ops->release(link); + bpf_prog_put(link->prog); + } +- if (link->ops->dealloc_deferred) { ++ if (ops->dealloc_deferred) { + /* schedule BPF link deallocation; if underlying BPF program + * is sleepable, we need to first wait for RCU tasks trace + * sync, then go through "classic" RCU grace period +@@ -2903,9 +2908,8 @@ static void bpf_link_free(struct bpf_link *link) + call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); + else + call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); +- } +- if (link->ops->dealloc) +- link->ops->dealloc(link); ++ } else if (ops->dealloc) ++ ops->dealloc(link); + } + + static void bpf_link_put_deferred(struct work_struct *work) +@@ -5358,6 +5362,11 @@ static int bpf_prog_bind_map(union bpf_attr *attr) + goto out_unlock; + } + ++ /* The bpf program will not access the bpf map, but for the sake of ++ * simplicity, increase sleepable_refcnt for sleepable program as well. ++ */ ++ if (prog->aux->sleepable) ++ atomic64_inc(&map->sleepable_refcnt); + memcpy(used_maps_new, used_maps_old, + sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); + used_maps_new[prog->aux->used_map_cnt] = map; +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 24d7a32f1710e..ec0464c075bb4 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -17732,10 +17732,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) + return -E2BIG; + } + ++ if (env->prog->aux->sleepable) ++ atomic64_inc(&map->sleepable_refcnt); + /* hold the map. If the program is rejected by verifier, + * the map will be released by release_maps() or it + * will be used by the valid program until it's unloaded +- * and all maps are released in free_used_maps() ++ * and all maps are released in bpf_free_used_maps() + */ + bpf_map_inc(map); + +diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c +index a7d5fb473b324..e7c3fbd0737ec 100644 +--- a/kernel/dma/swiotlb.c ++++ b/kernel/dma/swiotlb.c +@@ -69,11 +69,14 @@ + * @alloc_size: Size of the allocated buffer. + * @list: The free list describing the number of free entries available + * from each index. ++ * @pad_slots: Number of preceding padding slots. Valid only in the first ++ * allocated non-padding slot. + */ + struct io_tlb_slot { + phys_addr_t orig_addr; + size_t alloc_size; +- unsigned int list; ++ unsigned short list; ++ unsigned short pad_slots; + }; + + static bool swiotlb_force_bounce; +@@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, + mem->nslabs - i); + mem->slots[i].orig_addr = INVALID_PHYS_ADDR; + mem->slots[i].alloc_size = 0; ++ mem->slots[i].pad_slots = 0; + } + + memset(vaddr, 0, bytes); +@@ -821,12 +825,30 @@ void swiotlb_dev_init(struct device *dev) + #endif + } + +-/* +- * Return the offset into a iotlb slot required to keep the device happy. ++/** ++ * swiotlb_align_offset() - Get required offset into an IO TLB allocation. ++ * @dev: Owning device. ++ * @align_mask: Allocation alignment mask. ++ * @addr: DMA address. ++ * ++ * Return the minimum offset from the start of an IO TLB allocation which is ++ * required for a given buffer address and allocation alignment to keep the ++ * device happy. ++ * ++ * First, the address bits covered by min_align_mask must be identical in the ++ * original address and the bounce buffer address. High bits are preserved by ++ * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra ++ * padding bytes before the bounce buffer. ++ * ++ * Second, @align_mask specifies which bits of the first allocated slot must ++ * be zero. This may require allocating additional padding slots, and then the ++ * offset (in bytes) from the first such padding slot is returned. + */ +-static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) ++static unsigned int swiotlb_align_offset(struct device *dev, ++ unsigned int align_mask, u64 addr) + { +- return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); ++ return addr & dma_get_min_align_mask(dev) & ++ (align_mask | (IO_TLB_SIZE - 1)); + } + + /* +@@ -847,7 +869,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size + return; + + tlb_offset = tlb_addr & (IO_TLB_SIZE - 1); +- orig_addr_offset = swiotlb_align_offset(dev, orig_addr); ++ orig_addr_offset = swiotlb_align_offset(dev, 0, orig_addr); + if (tlb_offset < orig_addr_offset) { + dev_WARN_ONCE(dev, 1, + "Access before mapping start detected. orig offset %u, requested offset %u.\n", +@@ -983,7 +1005,7 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, + unsigned long max_slots = get_max_slots(boundary_mask); + unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); + unsigned int nslots = nr_slots(alloc_size), stride; +- unsigned int offset = swiotlb_align_offset(dev, orig_addr); ++ unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr); + unsigned int index, slots_checked, count = 0, i; + unsigned long flags; + unsigned int slot_base; +@@ -992,6 +1014,17 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, + BUG_ON(!nslots); + BUG_ON(area_index >= pool->nareas); + ++ /* ++ * Historically, swiotlb allocations >= PAGE_SIZE were guaranteed to be ++ * page-aligned in the absence of any other alignment requirements. ++ * 'alloc_align_mask' was later introduced to specify the alignment ++ * explicitly, however this is passed as zero for streaming mappings ++ * and so we preserve the old behaviour there in case any drivers are ++ * relying on it. ++ */ ++ if (!alloc_align_mask && !iotlb_align_mask && alloc_size >= PAGE_SIZE) ++ alloc_align_mask = PAGE_SIZE - 1; ++ + /* + * Ensure that the allocation is at least slot-aligned and update + * 'iotlb_align_mask' to ignore bits that will be preserved when +@@ -1006,13 +1039,6 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, + */ + stride = get_max_slots(max(alloc_align_mask, iotlb_align_mask)); + +- /* +- * For allocations of PAGE_SIZE or larger only look for page aligned +- * allocations. +- */ +- if (alloc_size >= PAGE_SIZE) +- stride = umax(stride, PAGE_SHIFT - IO_TLB_SHIFT + 1); +- + spin_lock_irqsave(&area->lock, flags); + if (unlikely(nslots > pool->area_nslabs - area->used)) + goto not_found; +@@ -1278,11 +1304,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, + unsigned long attrs) + { + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; +- unsigned int offset = swiotlb_align_offset(dev, orig_addr); ++ unsigned int offset; + struct io_tlb_pool *pool; + unsigned int i; + int index; + phys_addr_t tlb_addr; ++ unsigned short pad_slots; + + if (!mem || !mem->nslabs) { + dev_warn_ratelimited(dev, +@@ -1299,6 +1326,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, + return (phys_addr_t)DMA_MAPPING_ERROR; + } + ++ offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr); + index = swiotlb_find_slots(dev, orig_addr, + alloc_size + offset, alloc_align_mask, &pool); + if (index == -1) { +@@ -1314,6 +1342,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ ++ pad_slots = offset >> IO_TLB_SHIFT; ++ offset &= (IO_TLB_SIZE - 1); ++ index += pad_slots; ++ pool->slots[index].pad_slots = pad_slots; + for (i = 0; i < nr_slots(alloc_size + offset); i++) + pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); + tlb_addr = slot_addr(pool->start, index) + offset; +@@ -1332,13 +1364,17 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) + { + struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); + unsigned long flags; +- unsigned int offset = swiotlb_align_offset(dev, tlb_addr); +- int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; +- int nslots = nr_slots(mem->slots[index].alloc_size + offset); +- int aindex = index / mem->area_nslabs; +- struct io_tlb_area *area = &mem->areas[aindex]; ++ unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); ++ int index, nslots, aindex; ++ struct io_tlb_area *area; + int count, i; + ++ index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; ++ index -= mem->slots[index].pad_slots; ++ nslots = nr_slots(mem->slots[index].alloc_size + offset); ++ aindex = index / mem->area_nslabs; ++ area = &mem->areas[aindex]; ++ + /* + * Return the buffer to the free list by setting the corresponding + * entries to indicate the number of contiguous entries available. +@@ -1361,6 +1397,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) + mem->slots[i].list = ++count; + mem->slots[i].orig_addr = INVALID_PHYS_ADDR; + mem->slots[i].alloc_size = 0; ++ mem->slots[i].pad_slots = 0; + } + + /* +@@ -1627,6 +1664,12 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) + return NULL; + + tlb_addr = slot_addr(pool->start, index); ++ if (unlikely(!PAGE_ALIGNED(tlb_addr))) { ++ dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n", ++ &tlb_addr); ++ swiotlb_release_slots(dev, tlb_addr); ++ return NULL; ++ } + + return pfn_to_page(PFN_DOWN(tlb_addr)); + } +diff --git a/kernel/events/core.c b/kernel/events/core.c +index fe543e7898f5a..3e0db5b5a1835 100644 +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -5353,6 +5353,7 @@ int perf_event_release_kernel(struct perf_event *event) + again: + mutex_lock(&event->child_mutex); + list_for_each_entry(child, &event->child_list, child_list) { ++ void *var = NULL; + + /* + * Cannot change, child events are not migrated, see the +@@ -5393,11 +5394,23 @@ int perf_event_release_kernel(struct perf_event *event) + * this can't be the last reference. + */ + put_event(event); ++ } else { ++ var = &ctx->refcount; + } + + mutex_unlock(&event->child_mutex); + mutex_unlock(&ctx->mutex); + put_ctx(ctx); ++ ++ if (var) { ++ /* ++ * If perf_event_free_task() has deleted all events from the ++ * ctx while the child_mutex got released above, make sure to ++ * notify about the preceding put_ctx(). ++ */ ++ smp_mb(); /* pairs with wait_var_event() */ ++ wake_up_var(var); ++ } + goto again; + } + mutex_unlock(&event->child_mutex); +diff --git a/kernel/fork.c b/kernel/fork.c +index 2eab916b504bf..177ce7438db6b 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -727,15 +727,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, + } else if (anon_vma_fork(tmp, mpnt)) + goto fail_nomem_anon_vma_fork; + vm_flags_clear(tmp, VM_LOCKED_MASK); +- /* +- * Copy/update hugetlb private vma information. +- */ +- if (is_vm_hugetlb_page(tmp)) +- hugetlb_dup_vma_private(tmp); +- +- if (tmp->vm_ops && tmp->vm_ops->open) +- tmp->vm_ops->open(tmp); +- + file = tmp->vm_file; + if (file) { + struct address_space *mapping = file->f_mapping; +@@ -752,6 +743,12 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, + i_mmap_unlock_write(mapping); + } + ++ /* ++ * Copy/update hugetlb private vma information. ++ */ ++ if (is_vm_hugetlb_page(tmp)) ++ hugetlb_dup_vma_private(tmp); ++ + /* Link the vma into the MT */ + if (vma_iter_bulk_store(&vmi, tmp)) + goto fail_nomem_vmi_store; +@@ -760,6 +757,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, + if (!(tmp->vm_flags & VM_WIPEONFORK)) + retval = copy_page_range(tmp, mpnt); + ++ if (tmp->vm_ops && tmp->vm_ops->open) ++ tmp->vm_ops->open(tmp); ++ + if (retval) + goto loop_out; + } +diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh +index 4ba5fd3d73ae2..383fd43ac6122 100755 +--- a/kernel/gen_kheaders.sh ++++ b/kernel/gen_kheaders.sh +@@ -89,7 +89,7 @@ find $cpio_dir -type f -print0 | + + # Create archive and try to normalize metadata for reproducibility. + tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ +- --owner=0 --group=0 --sort=name --numeric-owner \ ++ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ + -I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null + + echo $headers_md5 > kernel/kheaders.md5 +diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c +index 619972c78774f..e9b2bb260ee6c 100644 +--- a/kernel/pid_namespace.c ++++ b/kernel/pid_namespace.c +@@ -217,6 +217,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) + */ + do { + clear_thread_flag(TIF_SIGPENDING); ++ clear_thread_flag(TIF_NOTIFY_SIGNAL); + rc = kernel_wait4(-1, NULL, __WALL, NULL); + } while (rc != -ECHILD); + +diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c +index e9138cd7a0f52..7f2b17fc8ce40 100644 +--- a/kernel/time/tick-common.c ++++ b/kernel/time/tick-common.c +@@ -179,26 +179,6 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) + } + } + +-#ifdef CONFIG_NO_HZ_FULL +-static void giveup_do_timer(void *info) +-{ +- int cpu = *(unsigned int *)info; +- +- WARN_ON(tick_do_timer_cpu != smp_processor_id()); +- +- tick_do_timer_cpu = cpu; +-} +- +-static void tick_take_do_timer_from_boot(void) +-{ +- int cpu = smp_processor_id(); +- int from = tick_do_timer_boot_cpu; +- +- if (from >= 0 && from != cpu) +- smp_call_function_single(from, giveup_do_timer, &cpu, 1); +-} +-#endif +- + /* + * Setup the tick device + */ +@@ -222,19 +202,25 @@ static void tick_setup_device(struct tick_device *td, + tick_next_period = ktime_get(); + #ifdef CONFIG_NO_HZ_FULL + /* +- * The boot CPU may be nohz_full, in which case set +- * tick_do_timer_boot_cpu so the first housekeeping +- * secondary that comes up will take do_timer from +- * us. ++ * The boot CPU may be nohz_full, in which case the ++ * first housekeeping secondary will take do_timer() ++ * from it. + */ + if (tick_nohz_full_cpu(cpu)) + tick_do_timer_boot_cpu = cpu; + +- } else if (tick_do_timer_boot_cpu != -1 && +- !tick_nohz_full_cpu(cpu)) { +- tick_take_do_timer_from_boot(); ++ } else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) { + tick_do_timer_boot_cpu = -1; +- WARN_ON(tick_do_timer_cpu != cpu); ++ /* ++ * The boot CPU will stay in periodic (NOHZ disabled) ++ * mode until clocksource_done_booting() called after ++ * smp_init() selects a high resolution clocksource and ++ * timekeeping_notify() kicks the NOHZ stuff alive. ++ * ++ * So this WRITE_ONCE can only race with the READ_ONCE ++ * check in tick_periodic() but this race is harmless. ++ */ ++ WRITE_ONCE(tick_do_timer_cpu, cpu); + #endif + } + +diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c +index 1e79084a9d9d2..cc29bf49f7159 100644 +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -3030,6 +3030,7 @@ struct bpf_uprobe_multi_link; + struct bpf_uprobe { + struct bpf_uprobe_multi_link *link; + loff_t offset; ++ unsigned long ref_ctr_offset; + u64 cookie; + struct uprobe_consumer consumer; + }; +@@ -3098,7 +3099,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe, + struct bpf_run_ctx *old_run_ctx; + int err = 0; + +- if (link->task && current != link->task) ++ if (link->task && current->mm != link->task->mm) + return 0; + + if (sleepable) +@@ -3169,7 +3170,6 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr + { + struct bpf_uprobe_multi_link *link = NULL; + unsigned long __user *uref_ctr_offsets; +- unsigned long *ref_ctr_offsets = NULL; + struct bpf_link_primer link_primer; + struct bpf_uprobe *uprobes = NULL; + struct task_struct *task = NULL; +@@ -3200,8 +3200,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr + upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); + uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); + cnt = attr->link_create.uprobe_multi.cnt; ++ pid = attr->link_create.uprobe_multi.pid; + +- if (!upath || !uoffsets || !cnt) ++ if (!upath || !uoffsets || !cnt || pid < 0) + return -EINVAL; + if (cnt > MAX_UPROBE_MULTI_CNT) + return -E2BIG; +@@ -3225,10 +3226,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr + goto error_path_put; + } + +- pid = attr->link_create.uprobe_multi.pid; + if (pid) { + rcu_read_lock(); +- task = get_pid_task(find_vpid(pid), PIDTYPE_PID); ++ task = get_pid_task(find_vpid(pid), PIDTYPE_TGID); + rcu_read_unlock(); + if (!task) { + err = -ESRCH; +@@ -3244,18 +3244,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr + if (!uprobes || !link) + goto error_free; + +- if (uref_ctr_offsets) { +- ref_ctr_offsets = kvcalloc(cnt, sizeof(*ref_ctr_offsets), GFP_KERNEL); +- if (!ref_ctr_offsets) +- goto error_free; +- } +- + for (i = 0; i < cnt; i++) { + if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) { + err = -EFAULT; + goto error_free; + } +- if (uref_ctr_offsets && __get_user(ref_ctr_offsets[i], uref_ctr_offsets + i)) { ++ if (uref_ctr_offsets && __get_user(uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) { + err = -EFAULT; + goto error_free; + } +@@ -3286,7 +3280,7 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr + for (i = 0; i < cnt; i++) { + err = uprobe_register_refctr(d_real_inode(link->path.dentry), + uprobes[i].offset, +- ref_ctr_offsets ? ref_ctr_offsets[i] : 0, ++ uprobes[i].ref_ctr_offset, + &uprobes[i].consumer); + if (err) { + bpf_uprobe_unregister(&path, uprobes, i); +@@ -3298,11 +3292,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr + if (err) + goto error_free; + +- kvfree(ref_ctr_offsets); + return bpf_link_settle(&link_primer); + + error_free: +- kvfree(ref_ctr_offsets); + kvfree(uprobes); + kfree(link); + if (task) +diff --git a/mm/memory-failure.c b/mm/memory-failure.c +index 5378edad9df8f..f21b4fb1e84ca 100644 +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -1193,26 +1193,26 @@ static int me_swapcache_clean(struct page_state *ps, struct page *p) + */ + static int me_huge_page(struct page_state *ps, struct page *p) + { ++ struct folio *folio = page_folio(p); + int res; +- struct page *hpage = compound_head(p); + struct address_space *mapping; + bool extra_pins = false; + +- mapping = page_mapping(hpage); ++ mapping = folio_mapping(folio); + if (mapping) { +- res = truncate_error_page(hpage, page_to_pfn(p), mapping); ++ res = truncate_error_page(&folio->page, page_to_pfn(p), mapping); + /* The page is kept in page cache. */ + extra_pins = true; +- unlock_page(hpage); ++ folio_unlock(folio); + } else { +- unlock_page(hpage); ++ folio_unlock(folio); + /* + * migration entry prevents later access on error hugepage, + * so we can free and dissolve it into buddy to save healthy + * subpages. + */ +- put_page(hpage); +- if (__page_handle_poison(p) >= 0) { ++ folio_put(folio); ++ if (__page_handle_poison(p) > 0) { + page_ref_inc(p); + res = MF_RECOVERED; + } else { +@@ -2082,7 +2082,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb + */ + if (res == 0) { + folio_unlock(folio); +- if (__page_handle_poison(p) >= 0) { ++ if (__page_handle_poison(p) > 0) { + page_ref_inc(p); + res = MF_RECOVERED; + } else { +@@ -2535,6 +2535,13 @@ int unpoison_memory(unsigned long pfn) + goto unlock_mutex; + } + ++ if (is_huge_zero_page(&folio->page)) { ++ unpoison_pr_info("Unpoison: huge zero page is not supported %#lx\n", ++ pfn, &unpoison_rs); ++ ret = -EOPNOTSUPP; ++ goto unlock_mutex; ++ } ++ + if (!PageHWPoison(p)) { + unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n", + pfn, &unpoison_rs); +diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c +index 9d11d26e46c0e..26a3095bec462 100644 +--- a/net/ax25/af_ax25.c ++++ b/net/ax25/af_ax25.c +@@ -1378,8 +1378,10 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, + { + struct sk_buff *skb; + struct sock *newsk; ++ ax25_dev *ax25_dev; + DEFINE_WAIT(wait); + struct sock *sk; ++ ax25_cb *ax25; + int err = 0; + + if (sock->state != SS_UNCONNECTED) +@@ -1434,6 +1436,10 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, + kfree_skb(skb); + sk_acceptq_removed(sk); + newsock->state = SS_CONNECTED; ++ ax25 = sk_to_ax25(newsk); ++ ax25_dev = ax25->ax25_dev; ++ netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC); ++ ax25_dev_hold(ax25_dev); + + out: + release_sock(sk); +diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c +index c9d55b99a7a57..67ae6b8c52989 100644 +--- a/net/ax25/ax25_dev.c ++++ b/net/ax25/ax25_dev.c +@@ -193,7 +193,7 @@ void __exit ax25_dev_free(void) + list_for_each_entry_safe(s, n, &ax25_dev_list, list) { + netdev_put(s->dev, &s->dev_tracker); + list_del(&s->list); +- kfree(s); ++ ax25_dev_put(s); + } + spin_unlock_bh(&ax25_dev_lock); + } +diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c +index 37210567fbfbe..bf31c5bae218f 100644 +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -4009,8 +4009,8 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, + status = L2CAP_CS_AUTHOR_PEND; + chan->ops->defer(chan); + } else { +- l2cap_state_change(chan, BT_CONNECT2); +- result = L2CAP_CR_PEND; ++ l2cap_state_change(chan, BT_CONFIG); ++ result = L2CAP_CR_SUCCESS; + status = L2CAP_CS_NO_INFO; + } + } else { +@@ -4645,13 +4645,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, + + memset(&rsp, 0, sizeof(rsp)); + +- if (max > hcon->le_conn_max_interval) { +- BT_DBG("requested connection interval exceeds current bounds."); +- err = -EINVAL; +- } else { +- err = hci_check_conn_params(min, max, latency, to_multiplier); +- } +- ++ err = hci_check_conn_params(min, max, latency, to_multiplier); + if (err) + rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); + else +diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c +index 478ee7aba85f3..12a2934b28ffb 100644 +--- a/net/bpf/test_run.c ++++ b/net/bpf/test_run.c +@@ -707,10 +707,16 @@ static void + __bpf_prog_test_run_raw_tp(void *data) + { + struct bpf_raw_tp_test_run_info *info = data; ++ struct bpf_trace_run_ctx run_ctx = {}; ++ struct bpf_run_ctx *old_run_ctx; ++ ++ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + + rcu_read_lock(); + info->retval = bpf_prog_run(info->prog, info->ctx); + rcu_read_unlock(); ++ ++ bpf_reset_run_ctx(old_run_ctx); + } + + int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, +diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c +index 3c66141d34d62..1820f09ff59ce 100644 +--- a/net/bridge/br_mst.c ++++ b/net/bridge/br_mst.c +@@ -73,11 +73,10 @@ int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state) + } + EXPORT_SYMBOL_GPL(br_mst_get_state); + +-static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_vlan *v, ++static void br_mst_vlan_set_state(struct net_bridge_vlan_group *vg, ++ struct net_bridge_vlan *v, + u8 state) + { +- struct net_bridge_vlan_group *vg = nbp_vlan_group(p); +- + if (br_vlan_get_state(v) == state) + return; + +@@ -103,7 +102,7 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, + int err = 0; + + rcu_read_lock(); +- vg = nbp_vlan_group(p); ++ vg = nbp_vlan_group_rcu(p); + if (!vg) + goto out; + +@@ -121,7 +120,7 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, + if (v->brvlan->msti != msti) + continue; + +- br_mst_vlan_set_state(p, v, state); ++ br_mst_vlan_set_state(vg, v, state); + } + + out: +@@ -140,13 +139,13 @@ static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti) + * it. + */ + if (v != pv && v->brvlan->msti == msti) { +- br_mst_vlan_set_state(pv->port, pv, v->state); ++ br_mst_vlan_set_state(vg, pv, v->state); + return; + } + } + + /* Otherwise, start out in a new MSTI with all ports disabled. */ +- return br_mst_vlan_set_state(pv->port, pv, BR_STATE_DISABLED); ++ return br_mst_vlan_set_state(vg, pv, BR_STATE_DISABLED); + } + + int br_mst_vlan_set_msti(struct net_bridge_vlan *mv, u16 msti) +diff --git a/net/core/sock_map.c b/net/core/sock_map.c +index 8598466a38057..01be07b485fad 100644 +--- a/net/core/sock_map.c ++++ b/net/core/sock_map.c +@@ -1639,19 +1639,23 @@ void sock_map_close(struct sock *sk, long timeout) + + lock_sock(sk); + rcu_read_lock(); +- psock = sk_psock_get(sk); +- if (unlikely(!psock)) { +- rcu_read_unlock(); +- release_sock(sk); +- saved_close = READ_ONCE(sk->sk_prot)->close; +- } else { ++ psock = sk_psock(sk); ++ if (likely(psock)) { + saved_close = psock->saved_close; + sock_map_remove_links(sk, psock); ++ psock = sk_psock_get(sk); ++ if (unlikely(!psock)) ++ goto no_psock; + rcu_read_unlock(); + sk_psock_stop(psock); + release_sock(sk); + cancel_delayed_work_sync(&psock->work); + sk_psock_put(sk, psock); ++ } else { ++ saved_close = READ_ONCE(sk->sk_prot)->close; ++no_psock: ++ rcu_read_unlock(); ++ release_sock(sk); + } + + /* Make sure we do not recurse. This is a bug. +diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c +index 0b0ce4f81c017..7cb23bcf8ef7a 100644 +--- a/net/ethtool/ioctl.c ++++ b/net/ethtool/ioctl.c +@@ -2134,7 +2134,7 @@ static int ethtool_get_phy_stats_ethtool(struct net_device *dev, + const struct ethtool_ops *ops = dev->ethtool_ops; + int n_stats, ret; + +- if (!ops || !ops->get_sset_count || ops->get_ethtool_phy_stats) ++ if (!ops || !ops->get_sset_count || !ops->get_ethtool_phy_stats) + return -EOPNOTSUPP; + + n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS); +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 7bf774bdb9386..2df05ea2e00fe 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1158,6 +1158,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) + + process_backlog++; + ++#ifdef CONFIG_SKB_DECRYPTED ++ skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); ++#endif + tcp_skb_entail(sk, skb); + copy = size_goal; + +@@ -2637,6 +2640,10 @@ void tcp_set_state(struct sock *sk, int state) + if (oldstate != TCP_ESTABLISHED) + TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); + break; ++ case TCP_CLOSE_WAIT: ++ if (oldstate == TCP_SYN_RECV) ++ TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); ++ break; + + case TCP_CLOSE: + if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED) +@@ -2648,7 +2655,7 @@ void tcp_set_state(struct sock *sk, int state) + inet_put_port(sk); + fallthrough; + default: +- if (oldstate == TCP_ESTABLISHED) ++ if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT) + TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); + } + +diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c +index f6f5b83dd954d..a5cfc5b0b206b 100644 +--- a/net/ipv6/ioam6_iptunnel.c ++++ b/net/ipv6/ioam6_iptunnel.c +@@ -351,9 +351,9 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) + goto drop; + + if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) { +- preempt_disable(); ++ local_bh_disable(); + dst = dst_cache_get(&ilwt->cache); +- preempt_enable(); ++ local_bh_enable(); + + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); +@@ -373,9 +373,9 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) + goto drop; + } + +- preempt_disable(); ++ local_bh_disable(); + dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); +- preempt_enable(); ++ local_bh_enable(); + } + + skb_dst_drop(skb); +diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c +index 8184076a3924e..4356806b52bd5 100644 +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -961,6 +961,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, + if (!fib6_nh->rt6i_pcpu) + return; + ++ rcu_read_lock(); + /* release the reference to this fib entry from + * all of its cached pcpu routes + */ +@@ -969,7 +970,9 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, + struct rt6_info *pcpu_rt; + + ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu); +- pcpu_rt = *ppcpu_rt; ++ ++ /* Paired with xchg() in rt6_get_pcpu_route() */ ++ pcpu_rt = READ_ONCE(*ppcpu_rt); + + /* only dropping the 'from' reference if the cached route + * is using 'match'. The cached pcpu_rt->from only changes +@@ -983,6 +986,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, + fib6_info_release(from); + } + } ++ rcu_read_unlock(); + } + + struct fib6_nh_pcpu_arg { +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index c48eaa7c23401..29fa2ca07b46a 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1399,6 +1399,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) + struct rt6_info *prev, **p; + + p = this_cpu_ptr(res->nh->rt6i_pcpu); ++ /* Paired with READ_ONCE() in __fib6_drop_pcpu_from() */ + prev = xchg(p, NULL); + if (prev) { + dst_dev_put(&prev->dst); +@@ -6331,12 +6332,12 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, + if (!write) + return -EINVAL; + +- net = (struct net *)ctl->extra1; +- delay = net->ipv6.sysctl.flush_delay; + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + if (ret) + return ret; + ++ net = (struct net *)ctl->extra1; ++ delay = net->ipv6.sysctl.flush_delay; + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); + return 0; + } +diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c +index a75df2ec8db0d..098632adc9b5a 100644 +--- a/net/ipv6/seg6_iptunnel.c ++++ b/net/ipv6/seg6_iptunnel.c +@@ -464,23 +464,21 @@ static int seg6_input_core(struct net *net, struct sock *sk, + + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + +- preempt_disable(); ++ local_bh_disable(); + dst = dst_cache_get(&slwt->cache); +- preempt_enable(); + + if (!dst) { + ip6_route_input(skb); + dst = skb_dst(skb); + if (!dst->error) { +- preempt_disable(); + dst_cache_set_ip6(&slwt->cache, dst, + &ipv6_hdr(skb)->saddr); +- preempt_enable(); + } + } else { + skb_dst_drop(skb); + skb_dst_set(skb, dst); + } ++ local_bh_enable(); + + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + if (unlikely(err)) +@@ -536,9 +534,9 @@ static int seg6_output_core(struct net *net, struct sock *sk, + + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + +- preempt_disable(); ++ local_bh_disable(); + dst = dst_cache_get(&slwt->cache); +- preempt_enable(); ++ local_bh_enable(); + + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); +@@ -558,9 +556,9 @@ static int seg6_output_core(struct net *net, struct sock *sk, + goto drop; + } + +- preempt_disable(); ++ local_bh_disable(); + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); +- preempt_enable(); ++ local_bh_enable(); + } + + skb_dst_drop(skb); +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index 3783334ef2332..07bcb690932e1 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1287,7 +1287,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * + */ + + newsk->sk_gso_type = SKB_GSO_TCPV6; +- ip6_dst_store(newsk, dst, NULL, NULL); + inet6_sk_rx_dst_set(newsk, skb); + + inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); +@@ -1298,6 +1297,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * + + memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + ++ ip6_dst_store(newsk, dst, NULL, NULL); ++ + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; + newnp->saddr = ireq->ir_v6_loc_addr; + newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; +diff --git a/net/mac80211/he.c b/net/mac80211/he.c +index 9f5ffdc9db284..ecbb042dd0433 100644 +--- a/net/mac80211/he.c ++++ b/net/mac80211/he.c +@@ -230,15 +230,21 @@ ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif, + + if (!he_spr_ie_elem) + return; ++ ++ he_obss_pd->sr_ctrl = he_spr_ie_elem->he_sr_control; + data = he_spr_ie_elem->optional; + + if (he_spr_ie_elem->he_sr_control & + IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT) +- data++; ++ he_obss_pd->non_srg_max_offset = *data++; ++ + if (he_spr_ie_elem->he_sr_control & + IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) { +- he_obss_pd->max_offset = *data++; + he_obss_pd->min_offset = *data++; ++ he_obss_pd->max_offset = *data++; ++ memcpy(he_obss_pd->bss_color_bitmap, data, 8); ++ data += 8; ++ memcpy(he_obss_pd->partial_bssid_bitmap, data, 8); + he_obss_pd->enable = true; + } + } +diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c +index 59f7264194ce3..530581ba812b4 100644 +--- a/net/mac80211/mesh_pathtbl.c ++++ b/net/mac80211/mesh_pathtbl.c +@@ -1011,10 +1011,23 @@ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, + */ + void mesh_path_flush_pending(struct mesh_path *mpath) + { ++ struct ieee80211_sub_if_data *sdata = mpath->sdata; ++ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; ++ struct mesh_preq_queue *preq, *tmp; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL) + mesh_path_discard_frame(mpath->sdata, skb); ++ ++ spin_lock_bh(&ifmsh->mesh_preq_queue_lock); ++ list_for_each_entry_safe(preq, tmp, &ifmsh->preq_queue.list, list) { ++ if (ether_addr_equal(mpath->dst, preq->dst)) { ++ list_del(&preq->list); ++ kfree(preq); ++ --ifmsh->preq_queue_len; ++ } ++ } ++ spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); + } + + /** +diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c +index c61eb867bb4a7..984f8f67492fd 100644 +--- a/net/mac80211/sta_info.c ++++ b/net/mac80211/sta_info.c +@@ -1709,7 +1709,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) + skb_queue_head_init(&pending); + + /* sync with ieee80211_tx_h_unicast_ps_buf */ +- spin_lock(&sta->ps_lock); ++ spin_lock_bh(&sta->ps_lock); + /* Send all buffered frames to the station */ + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + int count = skb_queue_len(&pending), tmp; +@@ -1738,7 +1738,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) + */ + clear_sta_flag(sta, WLAN_STA_PSPOLL); + clear_sta_flag(sta, WLAN_STA_UAPSD); +- spin_unlock(&sta->ps_lock); ++ spin_unlock_bh(&sta->ps_lock); + + atomic_dec(&ps->num_sta_ps); + +diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c +index 4dd47a1fb9aa6..f58bf77d76b81 100644 +--- a/net/mptcp/pm_netlink.c ++++ b/net/mptcp/pm_netlink.c +@@ -685,6 +685,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) + unsigned int add_addr_accept_max; + struct mptcp_addr_info remote; + unsigned int subflows_max; ++ bool sf_created = false; + int i, nr; + + add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); +@@ -712,15 +713,18 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) + if (nr == 0) + return; + +- msk->pm.add_addr_accepted++; +- if (msk->pm.add_addr_accepted >= add_addr_accept_max || +- msk->pm.subflows >= subflows_max) +- WRITE_ONCE(msk->pm.accept_addr, false); +- + spin_unlock_bh(&msk->pm.lock); + for (i = 0; i < nr; i++) +- __mptcp_subflow_connect(sk, &addrs[i], &remote); ++ if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0) ++ sf_created = true; + spin_lock_bh(&msk->pm.lock); ++ ++ if (sf_created) { ++ msk->pm.add_addr_accepted++; ++ if (msk->pm.add_addr_accepted >= add_addr_accept_max || ++ msk->pm.subflows >= subflows_max) ++ WRITE_ONCE(msk->pm.accept_addr, false); ++ } + } + + void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) +@@ -822,10 +826,13 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, + spin_lock_bh(&msk->pm.lock); + + removed = true; +- __MPTCP_INC_STATS(sock_net(sk), rm_type); ++ if (rm_type == MPTCP_MIB_RMSUBFLOW) ++ __MPTCP_INC_STATS(sock_net(sk), rm_type); + } + if (rm_type == MPTCP_MIB_RMSUBFLOW) + __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap); ++ else if (rm_type == MPTCP_MIB_RMADDR) ++ __MPTCP_INC_STATS(sock_net(sk), rm_type); + if (!removed) + continue; + +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 618d80112d1e2..fbf2b26760731 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2873,9 +2873,14 @@ void mptcp_set_state(struct sock *sk, int state) + if (oldstate != TCP_ESTABLISHED) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); + break; +- ++ case TCP_CLOSE_WAIT: ++ /* Unlike TCP, MPTCP sk would not have the TCP_SYN_RECV state: ++ * MPTCP "accepted" sockets will be created later on. So no ++ * transition from TCP_SYN_RECV to TCP_CLOSE_WAIT. ++ */ ++ break; + default: +- if (oldstate == TCP_ESTABLISHED) ++ if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT) + MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); + } + +@@ -3701,6 +3706,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) + + WRITE_ONCE(msk->write_seq, subflow->idsn); + WRITE_ONCE(msk->snd_nxt, subflow->idsn); ++ WRITE_ONCE(msk->snd_una, subflow->idsn); + if (likely(!__mptcp_check_fallback(msk))) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); + +diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h +index 374412ed780b6..ef0f8f73826f5 100644 +--- a/net/ncsi/internal.h ++++ b/net/ncsi/internal.h +@@ -325,6 +325,7 @@ struct ncsi_dev_priv { + spinlock_t lock; /* Protect the NCSI device */ + unsigned int package_probe_id;/* Current ID during probe */ + unsigned int package_num; /* Number of packages */ ++ unsigned int channel_probe_id;/* Current cahnnel ID during probe */ + struct list_head packages; /* List of packages */ + struct ncsi_channel *hot_channel; /* Channel was ever active */ + struct ncsi_request requests[256]; /* Request table */ +@@ -343,6 +344,7 @@ struct ncsi_dev_priv { + bool multi_package; /* Enable multiple packages */ + bool mlx_multi_host; /* Enable multi host Mellanox */ + u32 package_whitelist; /* Packages to configure */ ++ unsigned char channel_count; /* Num of channels to probe */ + }; + + struct ncsi_cmd_arg { +diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c +index d9da942ad53dd..90c6cf676221a 100644 +--- a/net/ncsi/ncsi-manage.c ++++ b/net/ncsi/ncsi-manage.c +@@ -510,17 +510,19 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) + + break; + case ncsi_dev_state_suspend_gls: +- ndp->pending_req_num = np->channel_num; ++ ndp->pending_req_num = 1; + + nca.type = NCSI_PKT_CMD_GLS; + nca.package = np->id; ++ nca.channel = ndp->channel_probe_id; ++ ret = ncsi_xmit_cmd(&nca); ++ if (ret) ++ goto error; ++ ndp->channel_probe_id++; + +- nd->state = ncsi_dev_state_suspend_dcnt; +- NCSI_FOR_EACH_CHANNEL(np, nc) { +- nca.channel = nc->id; +- ret = ncsi_xmit_cmd(&nca); +- if (ret) +- goto error; ++ if (ndp->channel_probe_id == ndp->channel_count) { ++ ndp->channel_probe_id = 0; ++ nd->state = ncsi_dev_state_suspend_dcnt; + } + + break; +@@ -689,8 +691,6 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, + return 0; + } + +-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) +- + static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca) + { + unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN]; +@@ -716,10 +716,6 @@ static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca) + return ret; + } + +-#endif +- +-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) +- + /* NCSI OEM Command APIs */ + static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca) + { +@@ -856,8 +852,6 @@ static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id) + return nch->handler(nca); + } + +-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ +- + /* Determine if a given channel from the channel_queue should be used for Tx */ + static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp, + struct ncsi_channel *nc) +@@ -1039,20 +1033,18 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) + goto error; + } + +- nd->state = ncsi_dev_state_config_oem_gma; ++ nd->state = IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) ++ ? ncsi_dev_state_config_oem_gma ++ : ncsi_dev_state_config_clear_vids; + break; + case ncsi_dev_state_config_oem_gma: + nd->state = ncsi_dev_state_config_clear_vids; +- ret = -1; + +-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) + nca.type = NCSI_PKT_CMD_OEM; + nca.package = np->id; + nca.channel = nc->id; + ndp->pending_req_num = 1; + ret = ncsi_gma_handler(&nca, nc->version.mf_id); +-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ +- + if (ret < 0) + schedule_work(&ndp->work); + +@@ -1350,7 +1342,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) + { + struct ncsi_dev *nd = &ndp->ndev; + struct ncsi_package *np; +- struct ncsi_channel *nc; + struct ncsi_cmd_arg nca; + unsigned char index; + int ret; +@@ -1404,7 +1395,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) + + schedule_work(&ndp->work); + break; +-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) + case ncsi_dev_state_probe_mlx_gma: + ndp->pending_req_num = 1; + +@@ -1429,25 +1419,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) + + nd->state = ncsi_dev_state_probe_cis; + break; +-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ +- case ncsi_dev_state_probe_cis: +- ndp->pending_req_num = NCSI_RESERVED_CHANNEL; +- +- /* Clear initial state */ +- nca.type = NCSI_PKT_CMD_CIS; +- nca.package = ndp->active_package->id; +- for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) { +- nca.channel = index; +- ret = ncsi_xmit_cmd(&nca); +- if (ret) +- goto error; +- } +- +- nd->state = ncsi_dev_state_probe_gvi; +- if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)) +- nd->state = ncsi_dev_state_probe_keep_phy; +- break; +-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) + case ncsi_dev_state_probe_keep_phy: + ndp->pending_req_num = 1; + +@@ -1460,15 +1431,17 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) + + nd->state = ncsi_dev_state_probe_gvi; + break; +-#endif /* CONFIG_NCSI_OEM_CMD_KEEP_PHY */ ++ case ncsi_dev_state_probe_cis: + case ncsi_dev_state_probe_gvi: + case ncsi_dev_state_probe_gc: + case ncsi_dev_state_probe_gls: + np = ndp->active_package; +- ndp->pending_req_num = np->channel_num; ++ ndp->pending_req_num = 1; + +- /* Retrieve version, capability or link status */ +- if (nd->state == ncsi_dev_state_probe_gvi) ++ /* Clear initial state Retrieve version, capability or link status */ ++ if (nd->state == ncsi_dev_state_probe_cis) ++ nca.type = NCSI_PKT_CMD_CIS; ++ else if (nd->state == ncsi_dev_state_probe_gvi) + nca.type = NCSI_PKT_CMD_GVI; + else if (nd->state == ncsi_dev_state_probe_gc) + nca.type = NCSI_PKT_CMD_GC; +@@ -1476,19 +1449,29 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) + nca.type = NCSI_PKT_CMD_GLS; + + nca.package = np->id; +- NCSI_FOR_EACH_CHANNEL(np, nc) { +- nca.channel = nc->id; +- ret = ncsi_xmit_cmd(&nca); +- if (ret) +- goto error; +- } ++ nca.channel = ndp->channel_probe_id; ++ ++ ret = ncsi_xmit_cmd(&nca); ++ if (ret) ++ goto error; + +- if (nd->state == ncsi_dev_state_probe_gvi) ++ if (nd->state == ncsi_dev_state_probe_cis) { ++ nd->state = ncsi_dev_state_probe_gvi; ++ if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) && ndp->channel_probe_id == 0) ++ nd->state = ncsi_dev_state_probe_keep_phy; ++ } else if (nd->state == ncsi_dev_state_probe_gvi) { + nd->state = ncsi_dev_state_probe_gc; +- else if (nd->state == ncsi_dev_state_probe_gc) ++ } else if (nd->state == ncsi_dev_state_probe_gc) { + nd->state = ncsi_dev_state_probe_gls; +- else ++ } else { ++ nd->state = ncsi_dev_state_probe_cis; ++ ndp->channel_probe_id++; ++ } ++ ++ if (ndp->channel_probe_id == ndp->channel_count) { ++ ndp->channel_probe_id = 0; + nd->state = ncsi_dev_state_probe_dp; ++ } + break; + case ncsi_dev_state_probe_dp: + ndp->pending_req_num = 1; +@@ -1789,6 +1772,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, + ndp->requests[i].ndp = ndp; + timer_setup(&ndp->requests[i].timer, ncsi_request_timeout, 0); + } ++ ndp->channel_count = NCSI_RESERVED_CHANNEL; + + spin_lock_irqsave(&ncsi_dev_lock, flags); + list_add_tail_rcu(&ndp->node, &ncsi_dev_list); +@@ -1822,6 +1806,7 @@ int ncsi_start_dev(struct ncsi_dev *nd) + + if (!(ndp->flags & NCSI_DEV_PROBED)) { + ndp->package_probe_id = 0; ++ ndp->channel_probe_id = 0; + nd->state = ncsi_dev_state_probe; + schedule_work(&ndp->work); + return 0; +diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c +index 480e80e3c2836..f22d67cb04d37 100644 +--- a/net/ncsi/ncsi-rsp.c ++++ b/net/ncsi/ncsi-rsp.c +@@ -795,12 +795,13 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr) + struct ncsi_rsp_gc_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; ++ struct ncsi_package *np; + size_t size; + + /* Find the channel */ + rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, +- NULL, &nc); ++ &np, &nc); + if (!nc) + return -ENODEV; + +@@ -835,6 +836,7 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr) + */ + nc->vlan_filter.bitmap = U64_MAX; + nc->vlan_filter.n_vids = rsp->vlan_cnt; ++ np->ndp->channel_count = rsp->channel_cnt; + + return 0; + } +diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c +index 3184cc6be4c9d..c7ae4d9bf3d24 100644 +--- a/net/netfilter/ipset/ip_set_core.c ++++ b/net/netfilter/ipset/ip_set_core.c +@@ -1172,23 +1172,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { + .len = IPSET_MAXNAMELEN - 1 }, + }; + ++/* In order to return quickly when destroying a single set, it is split ++ * into two stages: ++ * - Cancel garbage collector ++ * - Destroy the set itself via call_rcu() ++ */ ++ + static void +-ip_set_destroy_set(struct ip_set *set) ++ip_set_destroy_set_rcu(struct rcu_head *head) + { +- pr_debug("set: %s\n", set->name); ++ struct ip_set *set = container_of(head, struct ip_set, rcu); + +- /* Must call it without holding any lock */ + set->variant->destroy(set); + module_put(set->type->me); + kfree(set); + } + + static void +-ip_set_destroy_set_rcu(struct rcu_head *head) ++_destroy_all_sets(struct ip_set_net *inst) + { +- struct ip_set *set = container_of(head, struct ip_set, rcu); ++ struct ip_set *set; ++ ip_set_id_t i; ++ bool need_wait = false; + +- ip_set_destroy_set(set); ++ /* First cancel gc's: set:list sets are flushed as well */ ++ for (i = 0; i < inst->ip_set_max; i++) { ++ set = ip_set(inst, i); ++ if (set) { ++ set->variant->cancel_gc(set); ++ if (set->type->features & IPSET_TYPE_NAME) ++ need_wait = true; ++ } ++ } ++ /* Must wait for flush to be really finished */ ++ if (need_wait) ++ rcu_barrier(); ++ for (i = 0; i < inst->ip_set_max; i++) { ++ set = ip_set(inst, i); ++ if (set) { ++ ip_set(inst, i) = NULL; ++ set->variant->destroy(set); ++ module_put(set->type->me); ++ kfree(set); ++ } ++ } + } + + static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, +@@ -1202,11 +1229,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, + if (unlikely(protocol_min_failed(attr))) + return -IPSET_ERR_PROTOCOL; + +- + /* Commands are serialized and references are + * protected by the ip_set_ref_lock. + * External systems (i.e. xt_set) must call +- * ip_set_put|get_nfnl_* functions, that way we ++ * ip_set_nfnl_get_* functions, that way we + * can safely check references here. + * + * list:set timer can only decrement the reference +@@ -1214,8 +1240,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, + * without holding the lock. + */ + if (!attr[IPSET_ATTR_SETNAME]) { +- /* Must wait for flush to be really finished in list:set */ +- rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); + for (i = 0; i < inst->ip_set_max; i++) { + s = ip_set(inst, i); +@@ -1226,15 +1250,7 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, + } + inst->is_destroyed = true; + read_unlock_bh(&ip_set_ref_lock); +- for (i = 0; i < inst->ip_set_max; i++) { +- s = ip_set(inst, i); +- if (s) { +- ip_set(inst, i) = NULL; +- /* Must cancel garbage collectors */ +- s->variant->cancel_gc(s); +- ip_set_destroy_set(s); +- } +- } ++ _destroy_all_sets(inst); + /* Modified by ip_set_destroy() only, which is serialized */ + inst->is_destroyed = false; + } else { +@@ -1255,12 +1271,12 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, + features = s->type->features; + ip_set(inst, i) = NULL; + read_unlock_bh(&ip_set_ref_lock); ++ /* Must cancel garbage collectors */ ++ s->variant->cancel_gc(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } +- /* Must cancel garbage collectors */ +- s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); + } + return 0; +@@ -2365,30 +2381,25 @@ ip_set_net_init(struct net *net) + } + + static void __net_exit +-ip_set_net_exit(struct net *net) ++ip_set_net_pre_exit(struct net *net) + { + struct ip_set_net *inst = ip_set_pernet(net); + +- struct ip_set *set = NULL; +- ip_set_id_t i; +- + inst->is_deleted = true; /* flag for ip_set_nfnl_put */ ++} + +- nfnl_lock(NFNL_SUBSYS_IPSET); +- for (i = 0; i < inst->ip_set_max; i++) { +- set = ip_set(inst, i); +- if (set) { +- ip_set(inst, i) = NULL; +- set->variant->cancel_gc(set); +- ip_set_destroy_set(set); +- } +- } +- nfnl_unlock(NFNL_SUBSYS_IPSET); ++static void __net_exit ++ip_set_net_exit(struct net *net) ++{ ++ struct ip_set_net *inst = ip_set_pernet(net); ++ ++ _destroy_all_sets(inst); + kvfree(rcu_dereference_protected(inst->ip_set_list, 1)); + } + + static struct pernet_operations ip_set_net_ops = { + .init = ip_set_net_init, ++ .pre_exit = ip_set_net_pre_exit, + .exit = ip_set_net_exit, + .id = &ip_set_net_id, + .size = sizeof(struct ip_set_net), +diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c +index 54e2a1dd7f5f5..bfae7066936bb 100644 +--- a/net/netfilter/ipset/ip_set_list_set.c ++++ b/net/netfilter/ipset/ip_set_list_set.c +@@ -79,7 +79,7 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, + struct set_elem *e; + int ret; + +- list_for_each_entry(e, &map->members, list) { ++ list_for_each_entry_rcu(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) + continue; +@@ -99,7 +99,7 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, + struct set_elem *e; + int ret; + +- list_for_each_entry(e, &map->members, list) { ++ list_for_each_entry_rcu(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) + continue; +@@ -188,9 +188,10 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct list_set *map = set->data; + struct set_adt_elem *d = value; + struct set_elem *e, *next, *prev = NULL; +- int ret; ++ int ret = 0; + +- list_for_each_entry(e, &map->members, list) { ++ rcu_read_lock(); ++ list_for_each_entry_rcu(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) + continue; +@@ -201,6 +202,7 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, + + if (d->before == 0) { + ret = 1; ++ goto out; + } else if (d->before > 0) { + next = list_next_entry(e, list); + ret = !list_is_last(&e->list, &map->members) && +@@ -208,9 +210,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, + } else { + ret = prev && prev->id == d->refid; + } +- return ret; ++ goto out; + } +- return 0; ++out: ++ rcu_read_unlock(); ++ return ret; + } + + static void +@@ -239,7 +243,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, + + /* Find where to add the new entry */ + n = prev = next = NULL; +- list_for_each_entry(e, &map->members, list) { ++ list_for_each_entry_rcu(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) + continue; +@@ -316,9 +320,9 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, + { + struct list_set *map = set->data; + struct set_adt_elem *d = value; +- struct set_elem *e, *next, *prev = NULL; ++ struct set_elem *e, *n, *next, *prev = NULL; + +- list_for_each_entry(e, &map->members, list) { ++ list_for_each_entry_safe(e, n, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) + continue; +@@ -424,14 +428,8 @@ static void + list_set_destroy(struct ip_set *set) + { + struct list_set *map = set->data; +- struct set_elem *e, *n; + +- list_for_each_entry_safe(e, n, &map->members, list) { +- list_del(&e->list); +- ip_set_put_byindex(map->net, e->id); +- ip_set_ext_destroy(set, e); +- kfree(e); +- } ++ WARN_ON_ONCE(!list_empty(&map->members)); + kfree(map); + + set->data = NULL; +diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c +index ba0d3683a45d3..9139ce38ea7b9 100644 +--- a/net/netfilter/nft_meta.c ++++ b/net/netfilter/nft_meta.c +@@ -839,6 +839,9 @@ static int nft_meta_inner_init(const struct nft_ctx *ctx, + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int len; + ++ if (!tb[NFTA_META_KEY] || !tb[NFTA_META_DREG]) ++ return -EINVAL; ++ + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_PROTOCOL: +diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c +index 0c43d748e23ae..50429cbd42da4 100644 +--- a/net/netfilter/nft_payload.c ++++ b/net/netfilter/nft_payload.c +@@ -650,6 +650,10 @@ static int nft_payload_inner_init(const struct nft_ctx *ctx, + struct nft_payload *priv = nft_expr_priv(expr); + u32 base; + ++ if (!tb[NFTA_PAYLOAD_BASE] || !tb[NFTA_PAYLOAD_OFFSET] || ++ !tb[NFTA_PAYLOAD_LEN] || !tb[NFTA_PAYLOAD_DREG]) ++ return -EINVAL; ++ + base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + switch (base) { + case NFT_PAYLOAD_TUN_HEADER: +diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c +index 75c9c860182b4..0d6649d937c9f 100644 +--- a/net/sched/sch_multiq.c ++++ b/net/sched/sch_multiq.c +@@ -185,7 +185,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, + + qopt->bands = qdisc_dev(sch)->real_num_tx_queues; + +- removed = kmalloc(sizeof(*removed) * (q->max_bands - q->bands), ++ removed = kmalloc(sizeof(*removed) * (q->max_bands - qopt->bands), + GFP_KERNEL); + if (!removed) + return -ENOMEM; +diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c +index a315748a5e531..418d4a846d04a 100644 +--- a/net/sched/sch_taprio.c ++++ b/net/sched/sch_taprio.c +@@ -1186,16 +1186,13 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, + { + bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags); + +- if (!qopt && !dev->num_tc) { +- NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); +- return -EINVAL; +- } +- +- /* If num_tc is already set, it means that the user already +- * configured the mqprio part +- */ +- if (dev->num_tc) ++ if (!qopt) { ++ if (!dev->num_tc) { ++ NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); ++ return -EINVAL; ++ } + return 0; ++ } + + /* taprio imposes that traffic classes map 1:n to tx queues */ + if (qopt->num_tc > dev->num_tx_queues) { +diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c +index ef5b5d498ef3e..3158b94fd347a 100644 +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -460,29 +460,11 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr, + static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk, + unsigned long mask) + { +- struct net *nnet = sock_net(nsk); +- + nsk->sk_userlocks = osk->sk_userlocks; +- if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) { ++ if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) + nsk->sk_sndbuf = osk->sk_sndbuf; +- } else { +- if (mask == SK_FLAGS_SMC_TO_CLC) +- WRITE_ONCE(nsk->sk_sndbuf, +- READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1])); +- else +- WRITE_ONCE(nsk->sk_sndbuf, +- 2 * READ_ONCE(nnet->smc.sysctl_wmem)); +- } +- if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) { ++ if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) + nsk->sk_rcvbuf = osk->sk_rcvbuf; +- } else { +- if (mask == SK_FLAGS_SMC_TO_CLC) +- WRITE_ONCE(nsk->sk_rcvbuf, +- READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1])); +- else +- WRITE_ONCE(nsk->sk_rcvbuf, +- 2 * READ_ONCE(nnet->smc.sysctl_rmem)); +- } + } + + static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, +diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c +index 1af71fbb0d805..00753bc5f1b14 100644 +--- a/net/sunrpc/auth_gss/auth_gss.c ++++ b/net/sunrpc/auth_gss/auth_gss.c +@@ -1875,8 +1875,10 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; + maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); + /* slack space should prevent this ever happening: */ +- if (unlikely(snd_buf->len > snd_buf->buflen)) ++ if (unlikely(snd_buf->len > snd_buf->buflen)) { ++ status = -EIO; + goto wrap_failed; ++ } + /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was + * done anyway, so it's safe to put the request on the wire: */ + if (maj_stat == GSS_S_CONTEXT_EXPIRED) +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index d01314dc86ecb..5a26e785ce70d 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -222,15 +222,9 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk) + return unix_peer(osk) == NULL || unix_our_peer(sk, osk); + } + +-static inline int unix_recvq_full(const struct sock *sk) +-{ +- return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; +-} +- + static inline int unix_recvq_full_lockless(const struct sock *sk) + { +- return skb_queue_len_lockless(&sk->sk_receive_queue) > +- READ_ONCE(sk->sk_max_ack_backlog); ++ return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; + } + + struct sock *unix_peer_get(struct sock *s) +@@ -531,10 +525,10 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) + return 0; + } + +-static int unix_writable(const struct sock *sk) ++static int unix_writable(const struct sock *sk, unsigned char state) + { +- return sk->sk_state != TCP_LISTEN && +- (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; ++ return state != TCP_LISTEN && ++ (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf); + } + + static void unix_write_space(struct sock *sk) +@@ -542,7 +536,7 @@ static void unix_write_space(struct sock *sk) + struct socket_wq *wq; + + rcu_read_lock(); +- if (unix_writable(sk)) { ++ if (unix_writable(sk, READ_ONCE(sk->sk_state))) { + wq = rcu_dereference(sk->sk_wq); + if (skwq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, +@@ -571,7 +565,6 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) + sk_error_report(other); + } + } +- other->sk_state = TCP_CLOSE; + } + + static void unix_sock_destructor(struct sock *sk) +@@ -618,7 +611,7 @@ static void unix_release_sock(struct sock *sk, int embrion) + u->path.dentry = NULL; + u->path.mnt = NULL; + state = sk->sk_state; +- sk->sk_state = TCP_CLOSE; ++ WRITE_ONCE(sk->sk_state, TCP_CLOSE); + + skpair = unix_peer(sk); + unix_peer(sk) = NULL; +@@ -639,7 +632,7 @@ static void unix_release_sock(struct sock *sk, int embrion) + unix_state_lock(skpair); + /* No more writes */ + WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); +- if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion) + WRITE_ONCE(skpair->sk_err, ECONNRESET); + unix_state_unlock(skpair); + skpair->sk_state_change(skpair); +@@ -740,7 +733,8 @@ static int unix_listen(struct socket *sock, int backlog) + if (backlog > sk->sk_max_ack_backlog) + wake_up_interruptible_all(&u->peer_wait); + sk->sk_max_ack_backlog = backlog; +- sk->sk_state = TCP_LISTEN; ++ WRITE_ONCE(sk->sk_state, TCP_LISTEN); ++ + /* set credentials so connect can copy them */ + init_peercred(sk); + err = 0; +@@ -990,7 +984,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, + sk->sk_hash = unix_unbound_hash(sk); + sk->sk_allocation = GFP_KERNEL_ACCOUNT; + sk->sk_write_space = unix_write_space; +- sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; ++ sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen); + sk->sk_destruct = unix_sock_destructor; + u = unix_sk(sk); + u->inflight = 0; +@@ -1412,7 +1406,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, + if (err) + goto out_unlock; + +- sk->sk_state = other->sk_state = TCP_ESTABLISHED; ++ WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED); ++ WRITE_ONCE(other->sk_state, TCP_ESTABLISHED); + } else { + /* + * 1003.1g breaking connected state with AF_UNSPEC +@@ -1429,13 +1424,20 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, + + unix_peer(sk) = other; + if (!other) +- sk->sk_state = TCP_CLOSE; ++ WRITE_ONCE(sk->sk_state, TCP_CLOSE); + unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); + + unix_state_double_unlock(sk, other); + +- if (other != old_peer) ++ if (other != old_peer) { + unix_dgram_disconnected(sk, old_peer); ++ ++ unix_state_lock(old_peer); ++ if (!unix_peer(old_peer)) ++ WRITE_ONCE(old_peer->sk_state, TCP_CLOSE); ++ unix_state_unlock(old_peer); ++ } ++ + sock_put(old_peer); + } else { + unix_peer(sk) = other; +@@ -1483,7 +1485,6 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, + struct sk_buff *skb = NULL; + long timeo; + int err; +- int st; + + err = unix_validate_addr(sunaddr, addr_len); + if (err) +@@ -1544,7 +1545,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, + if (other->sk_shutdown & RCV_SHUTDOWN) + goto out_unlock; + +- if (unix_recvq_full(other)) { ++ if (unix_recvq_full_lockless(other)) { + err = -EAGAIN; + if (!timeo) + goto out_unlock; +@@ -1569,9 +1570,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, + + Well, and we have to recheck the state after socket locked. + */ +- st = sk->sk_state; +- +- switch (st) { ++ switch (READ_ONCE(sk->sk_state)) { + case TCP_CLOSE: + /* This is ok... continue with connect */ + break; +@@ -1586,7 +1585,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, + + unix_state_lock_nested(sk, U_LOCK_SECOND); + +- if (sk->sk_state != st) { ++ if (sk->sk_state != TCP_CLOSE) { + unix_state_unlock(sk); + unix_state_unlock(other); + sock_put(other); +@@ -1638,7 +1637,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, + copy_peercred(sk, other); + + sock->state = SS_CONNECTED; +- sk->sk_state = TCP_ESTABLISHED; ++ WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED); + sock_hold(newsk); + + smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ +@@ -1711,7 +1710,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, + goto out; + + err = -EINVAL; +- if (sk->sk_state != TCP_LISTEN) ++ if (READ_ONCE(sk->sk_state) != TCP_LISTEN) + goto out; + + /* If socket state is TCP_LISTEN it cannot change (for now...), +@@ -1939,7 +1938,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + } + + err = -EMSGSIZE; +- if (len > sk->sk_sndbuf - 32) ++ if (len > READ_ONCE(sk->sk_sndbuf) - 32) + goto out; + + if (len > SKB_MAX_ALLOC) { +@@ -2021,7 +2020,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + unix_peer(sk) = NULL; + unix_dgram_peer_wake_disconnect_wakeup(sk, other); + +- sk->sk_state = TCP_CLOSE; ++ WRITE_ONCE(sk->sk_state, TCP_CLOSE); + unix_state_unlock(sk); + + unix_dgram_disconnected(sk, other); +@@ -2197,7 +2196,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, + } + + if (msg->msg_namelen) { +- err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; ++ err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; + goto out_err; + } else { + err = -ENOTCONN; +@@ -2218,7 +2217,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, + &err, 0); + } else { + /* Keep two messages in the pipe so it schedules better */ +- size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); ++ size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64); + + /* allow fallback to order-0 allocations */ + size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); +@@ -2311,7 +2310,7 @@ static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, + if (err) + return err; + +- if (sk->sk_state != TCP_ESTABLISHED) ++ if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED) + return -ENOTCONN; + + if (msg->msg_namelen) +@@ -2325,7 +2324,7 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, + { + struct sock *sk = sock->sk; + +- if (sk->sk_state != TCP_ESTABLISHED) ++ if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED) + return -ENOTCONN; + + return unix_dgram_recvmsg(sock, msg, size, flags); +@@ -2597,18 +2596,18 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, + if (skb == u->oob_skb) { + if (copied) { + skb = NULL; +- } else if (sock_flag(sk, SOCK_URGINLINE)) { +- if (!(flags & MSG_PEEK)) { ++ } else if (!(flags & MSG_PEEK)) { ++ if (sock_flag(sk, SOCK_URGINLINE)) { + WRITE_ONCE(u->oob_skb, NULL); + consume_skb(skb); ++ } else { ++ __skb_unlink(skb, &sk->sk_receive_queue); ++ WRITE_ONCE(u->oob_skb, NULL); ++ unlinked_skb = skb; ++ skb = skb_peek(&sk->sk_receive_queue); + } +- } else if (flags & MSG_PEEK) { +- skb = NULL; +- } else { +- __skb_unlink(skb, &sk->sk_receive_queue); +- WRITE_ONCE(u->oob_skb, NULL); +- unlinked_skb = skb; +- skb = skb_peek(&sk->sk_receive_queue); ++ } else if (!sock_flag(sk, SOCK_URGINLINE)) { ++ skb = skb_peek_next(skb, &sk->sk_receive_queue); + } + } + +@@ -2625,7 +2624,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, + + static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) + { +- if (unlikely(sk->sk_state != TCP_ESTABLISHED)) ++ if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) + return -ENOTCONN; + + return unix_read_skb(sk, recv_actor); +@@ -2649,7 +2648,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, + size_t size = state->size; + unsigned int last_len; + +- if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { ++ if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) { + err = -EINVAL; + goto out; + } +@@ -2975,7 +2974,7 @@ long unix_inq_len(struct sock *sk) + struct sk_buff *skb; + long amount = 0; + +- if (sk->sk_state == TCP_LISTEN) ++ if (READ_ONCE(sk->sk_state) == TCP_LISTEN) + return -EINVAL; + + spin_lock(&sk->sk_receive_queue.lock); +@@ -3087,12 +3086,14 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon + static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) + { + struct sock *sk = sock->sk; ++ unsigned char state; + __poll_t mask; + u8 shutdown; + + sock_poll_wait(file, sock, wait); + mask = 0; + shutdown = READ_ONCE(sk->sk_shutdown); ++ state = READ_ONCE(sk->sk_state); + + /* exceptional events? */ + if (READ_ONCE(sk->sk_err)) +@@ -3114,14 +3115,14 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa + + /* Connection-based need to check for termination and startup */ + if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && +- sk->sk_state == TCP_CLOSE) ++ state == TCP_CLOSE) + mask |= EPOLLHUP; + + /* + * we set writable also when the other side has shut down the + * connection. This prevents stuck sockets. + */ +- if (unix_writable(sk)) ++ if (unix_writable(sk, state)) + mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; + + return mask; +@@ -3132,12 +3133,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, + { + struct sock *sk = sock->sk, *other; + unsigned int writable; ++ unsigned char state; + __poll_t mask; + u8 shutdown; + + sock_poll_wait(file, sock, wait); + mask = 0; + shutdown = READ_ONCE(sk->sk_shutdown); ++ state = READ_ONCE(sk->sk_state); + + /* exceptional events? */ + if (READ_ONCE(sk->sk_err) || +@@ -3157,19 +3160,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, + mask |= EPOLLIN | EPOLLRDNORM; + + /* Connection-based need to check for termination and startup */ +- if (sk->sk_type == SOCK_SEQPACKET) { +- if (sk->sk_state == TCP_CLOSE) +- mask |= EPOLLHUP; +- /* connection hasn't started yet? */ +- if (sk->sk_state == TCP_SYN_SENT) +- return mask; +- } ++ if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE) ++ mask |= EPOLLHUP; + + /* No write status requested, avoid expensive OUT tests. */ + if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) + return mask; + +- writable = unix_writable(sk); ++ writable = unix_writable(sk, state); + if (writable) { + unix_state_lock(sk); + +diff --git a/net/unix/diag.c b/net/unix/diag.c +index 3438b7af09af5..1de7500b41b61 100644 +--- a/net/unix/diag.c ++++ b/net/unix/diag.c +@@ -65,7 +65,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) + u32 *buf; + int i; + +- if (sk->sk_state == TCP_LISTEN) { ++ if (READ_ONCE(sk->sk_state) == TCP_LISTEN) { + spin_lock(&sk->sk_receive_queue.lock); + + attr = nla_reserve(nlskb, UNIX_DIAG_ICONS, +@@ -103,8 +103,8 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) + { + struct unix_diag_rqlen rql; + +- if (sk->sk_state == TCP_LISTEN) { +- rql.udiag_rqueue = sk->sk_receive_queue.qlen; ++ if (READ_ONCE(sk->sk_state) == TCP_LISTEN) { ++ rql.udiag_rqueue = skb_queue_len_lockless(&sk->sk_receive_queue); + rql.udiag_wqueue = sk->sk_max_ack_backlog; + } else { + rql.udiag_rqueue = (u32) unix_inq_len(sk); +@@ -136,7 +136,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r + rep = nlmsg_data(nlh); + rep->udiag_family = AF_UNIX; + rep->udiag_type = sk->sk_type; +- rep->udiag_state = sk->sk_state; ++ rep->udiag_state = READ_ONCE(sk->sk_state); + rep->pad = 0; + rep->udiag_ino = sk_ino; + sock_diag_save_cookie(sk, rep->udiag_cookie); +@@ -165,7 +165,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r + sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) + goto out_nlmsg_trim; + +- if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown)) ++ if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, READ_ONCE(sk->sk_shutdown))) + goto out_nlmsg_trim; + + if ((req->udiag_show & UDIAG_SHOW_UID) && +@@ -215,7 +215,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) + sk_for_each(sk, &net->unx.table.buckets[slot]) { + if (num < s_num) + goto next; +- if (!(req->udiag_states & (1 << sk->sk_state))) ++ if (!(req->udiag_states & (1 << READ_ONCE(sk->sk_state)))) + goto next; + if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk), + NETLINK_CB(cb->skb).portid, +diff --git a/net/wireless/core.c b/net/wireless/core.c +index ff743e1f2e2cb..68aa8f0d70140 100644 +--- a/net/wireless/core.c ++++ b/net/wireless/core.c +@@ -431,7 +431,7 @@ static void cfg80211_wiphy_work(struct work_struct *work) + if (wk) { + list_del_init(&wk->entry); + if (!list_empty(&rdev->wiphy_work_list)) +- schedule_work(work); ++ queue_work(system_unbound_wq, work); + spin_unlock_irq(&rdev->wiphy_work_lock); + + wk->func(&rdev->wiphy, wk); +diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c +index 9611aa0bd0513..841a4516793b1 100644 +--- a/net/wireless/pmsr.c ++++ b/net/wireless/pmsr.c +@@ -56,7 +56,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, + out->ftm.burst_period = 0; + if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]) + out->ftm.burst_period = +- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]); ++ nla_get_u16(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]); + + out->ftm.asap = !!tb[NL80211_PMSR_FTM_REQ_ATTR_ASAP]; + if (out->ftm.asap && !capa->ftm.asap) { +@@ -75,7 +75,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, + out->ftm.num_bursts_exp = 0; + if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]) + out->ftm.num_bursts_exp = +- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]); ++ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]); + + if (capa->ftm.max_bursts_exponent >= 0 && + out->ftm.num_bursts_exp > capa->ftm.max_bursts_exponent) { +@@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, + out->ftm.burst_duration = 15; + if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]) + out->ftm.burst_duration = +- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]); ++ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]); + + out->ftm.ftms_per_burst = 0; + if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]) +@@ -107,7 +107,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, + out->ftm.ftmr_retries = 3; + if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]) + out->ftm.ftmr_retries = +- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]); ++ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]); + + out->ftm.request_lci = !!tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI]; + if (out->ftm.request_lci && !capa->ftm.request_lci) { +diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c +index 565511a3f461e..62f26618f6747 100644 +--- a/net/wireless/sysfs.c ++++ b/net/wireless/sysfs.c +@@ -5,7 +5,7 @@ + * + * Copyright 2005-2006 Jiri Benc <jbenc@suse.cz> + * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> +- * Copyright (C) 2020-2021, 2023 Intel Corporation ++ * Copyright (C) 2020-2021, 2023-2024 Intel Corporation + */ + + #include <linux/device.h> +@@ -137,7 +137,7 @@ static int wiphy_resume(struct device *dev) + if (rdev->wiphy.registered && rdev->ops->resume) + ret = rdev_resume(rdev); + rdev->suspended = false; +- schedule_work(&rdev->wiphy_work); ++ queue_work(system_unbound_wq, &rdev->wiphy_work); + wiphy_unlock(&rdev->wiphy); + + if (ret) +diff --git a/net/wireless/util.c b/net/wireless/util.c +index 9aa7bdce20b26..57ea6d5b092d4 100644 +--- a/net/wireless/util.c ++++ b/net/wireless/util.c +@@ -2399,6 +2399,7 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, + { + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; ++ int ret; + + wdev = dev->ieee80211_ptr; + if (!wdev) +@@ -2410,7 +2411,11 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, + + memset(sinfo, 0, sizeof(*sinfo)); + +- return rdev_get_station(rdev, dev, mac_addr, sinfo); ++ wiphy_lock(&rdev->wiphy); ++ ret = rdev_get_station(rdev, dev, mac_addr, sinfo); ++ wiphy_unlock(&rdev->wiphy); ++ ++ return ret; + } + EXPORT_SYMBOL(cfg80211_get_station); + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 269bd79bcd9ad..828d5cc367169 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -1684,10 +1684,11 @@ static void read_symbols(const char *modname) + namespace = get_next_modinfo(&info, "import_ns", + namespace); + } ++ ++ if (extra_warn && !get_modinfo(&info, "description")) ++ warn("missing MODULE_DESCRIPTION() in %s\n", modname); + } + +- if (extra_warn && !get_modinfo(&info, "description")) +- warn("missing MODULE_DESCRIPTION() in %s\n", modname); + for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { + symname = remove_dot(info.strtab + sym->st_name); + +diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c +index 597ea0c4d72f7..44b8161746fec 100644 +--- a/security/integrity/ima/ima_api.c ++++ b/security/integrity/ima/ima_api.c +@@ -244,8 +244,8 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, + const char *audit_cause = "failed"; + struct inode *inode = file_inode(file); + struct inode *real_inode = d_real_inode(file_dentry(file)); +- const char *filename = file->f_path.dentry->d_name.name; + struct ima_max_digest_data hash; ++ struct name_snapshot filename; + struct kstat stat; + int result = 0; + int length; +@@ -316,9 +316,13 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, + if (file->f_flags & O_DIRECT) + audit_cause = "failed(directio)"; + ++ take_dentry_name_snapshot(&filename, file->f_path.dentry); ++ + integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, +- filename, "collect_data", audit_cause, +- result, 0); ++ filename.name.name, "collect_data", ++ audit_cause, result, 0); ++ ++ release_dentry_name_snapshot(&filename); + } + return result; + } +@@ -431,6 +435,7 @@ void ima_audit_measurement(struct integrity_iint_cache *iint, + */ + const char *ima_d_path(const struct path *path, char **pathbuf, char *namebuf) + { ++ struct name_snapshot filename; + char *pathname = NULL; + + *pathbuf = __getname(); +@@ -444,7 +449,10 @@ const char *ima_d_path(const struct path *path, char **pathbuf, char *namebuf) + } + + if (!pathname) { +- strscpy(namebuf, path->dentry->d_name.name, NAME_MAX); ++ take_dentry_name_snapshot(&filename, path->dentry); ++ strscpy(namebuf, filename.name.name, NAME_MAX); ++ release_dentry_name_snapshot(&filename); ++ + pathname = namebuf; + } + +diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c +index 6cd0add524cdc..3b2cb8f1002e6 100644 +--- a/security/integrity/ima/ima_template_lib.c ++++ b/security/integrity/ima/ima_template_lib.c +@@ -483,7 +483,10 @@ static int ima_eventname_init_common(struct ima_event_data *event_data, + bool size_limit) + { + const char *cur_filename = NULL; ++ struct name_snapshot filename; + u32 cur_filename_len = 0; ++ bool snapshot = false; ++ int ret; + + BUG_ON(event_data->filename == NULL && event_data->file == NULL); + +@@ -496,7 +499,10 @@ static int ima_eventname_init_common(struct ima_event_data *event_data, + } + + if (event_data->file) { +- cur_filename = event_data->file->f_path.dentry->d_name.name; ++ take_dentry_name_snapshot(&filename, ++ event_data->file->f_path.dentry); ++ snapshot = true; ++ cur_filename = filename.name.name; + cur_filename_len = strlen(cur_filename); + } else + /* +@@ -505,8 +511,13 @@ static int ima_eventname_init_common(struct ima_event_data *event_data, + */ + cur_filename_len = IMA_EVENT_NAME_LEN_MAX; + out: +- return ima_write_template_field_data(cur_filename, cur_filename_len, +- DATA_FMT_STRING, field_data); ++ ret = ima_write_template_field_data(cur_filename, cur_filename_len, ++ DATA_FMT_STRING, field_data); ++ ++ if (snapshot) ++ release_dentry_name_snapshot(&filename); ++ ++ return ret; + } + + /* +diff --git a/security/landlock/fs.c b/security/landlock/fs.c +index febc4a51137fa..1bdd049e3d636 100644 +--- a/security/landlock/fs.c ++++ b/security/landlock/fs.c +@@ -820,6 +820,7 @@ static int current_check_refer_path(struct dentry *const old_dentry, + bool allow_parent1, allow_parent2; + access_mask_t access_request_parent1, access_request_parent2; + struct path mnt_dir; ++ struct dentry *old_parent; + layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, + layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; + +@@ -867,9 +868,17 @@ static int current_check_refer_path(struct dentry *const old_dentry, + mnt_dir.mnt = new_dir->mnt; + mnt_dir.dentry = new_dir->mnt->mnt_root; + ++ /* ++ * old_dentry may be the root of the common mount point and ++ * !IS_ROOT(old_dentry) at the same time (e.g. with open_tree() and ++ * OPEN_TREE_CLONE). We do not need to call dget(old_parent) because ++ * we keep a reference to old_dentry. ++ */ ++ old_parent = (old_dentry == mnt_dir.dentry) ? old_dentry : ++ old_dentry->d_parent; ++ + /* new_dir->dentry is equal to new_dentry->d_parent */ +- allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry, +- old_dentry->d_parent, ++ allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry, old_parent, + &layer_masks_parent1); + allow_parent2 = collect_domain_accesses( + dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2); +diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c +index a0368202a746a..c51829fdef23b 100644 +--- a/tools/perf/util/auxtrace.c ++++ b/tools/perf/util/auxtrace.c +@@ -1466,6 +1466,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, + char *endptr; + bool period_type_set = false; + bool period_set = false; ++ bool iy = false; + + synth_opts->set = true; + +@@ -1484,6 +1485,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, + switch (*p++) { + case 'i': + case 'y': ++ iy = true; + if (p[-1] == 'y') + synth_opts->cycles = true; + else +@@ -1646,7 +1648,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, + } + } + out: +- if (synth_opts->instructions || synth_opts->cycles) { ++ if (iy) { + if (!period_type_set) + synth_opts->period_type = + PERF_ITRACE_DEFAULT_PERIOD_TYPE; +diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c +index 68118c37f0b56..0ed100617d993 100644 +--- a/tools/testing/cxl/test/mem.c ++++ b/tools/testing/cxl/test/mem.c +@@ -3,6 +3,7 @@ + + #include <linux/platform_device.h> + #include <linux/mod_devicetable.h> ++#include <linux/vmalloc.h> + #include <linux/module.h> + #include <linux/delay.h> + #include <linux/sizes.h> +diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc +index d3a79da215c8b..5f72abe6fa79b 100644 +--- a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc ++++ b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc +@@ -1,7 +1,7 @@ + #!/bin/sh + # SPDX-License-Identifier: GPL-2.0 + # description: Generic dynamic event - check if duplicate events are caught +-# requires: dynamic_events "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README ++# requires: dynamic_events "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README events/syscalls/sys_enter_openat + + echo 0 > events/enable + +diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +index 3f74c09c56b62..118247b8dd84d 100644 +--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc ++++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +@@ -10,7 +10,6 @@ fail() { #msg + } + + sample_events() { +- echo > trace + echo 1 > events/kmem/kmem_cache_free/enable + echo 1 > tracing_on + ls > /dev/null +@@ -22,6 +21,7 @@ echo 0 > tracing_on + echo 0 > events/enable + + echo "Get the most frequently calling function" ++echo > trace + sample_events + + target_func=`cat trace | grep -o 'call_site=\([^+]*\)' | sed 's/call_site=//' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'` +@@ -32,7 +32,16 @@ echo > trace + + echo "Test event filter function name" + echo "call_site.function == $target_func" > events/kmem/kmem_cache_free/filter ++ ++sample_events ++max_retry=10 ++while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do + sample_events ++max_retry=$((max_retry - 1)) ++if [ $max_retry -eq 0 ]; then ++ exit_fail ++fi ++done + + hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l` + misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l` +@@ -49,7 +58,16 @@ address=`grep " ${target_func}\$" /proc/kallsyms | cut -d' ' -f1` + + echo "Test event filter function address" + echo "call_site.function == 0x$address" > events/kmem/kmem_cache_free/filter ++echo > trace ++sample_events ++max_retry=10 ++while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do + sample_events ++max_retry=$((max_retry - 1)) ++if [ $max_retry -eq 0 ]; then ++ exit_fail ++fi ++done + + hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l` + misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l` +diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc +index 1f6981ef7afa0..ba19b81cef39a 100644 +--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc ++++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc +@@ -30,7 +30,8 @@ find_dot_func() { + fi + + grep " [tT] .*\.isra\..*" /proc/kallsyms | cut -f 3 -d " " | while read f; do +- if grep -s $f available_filter_functions; then ++ cnt=`grep -s $f available_filter_functions | wc -l`; ++ if [ $cnt -eq 1 ]; then + echo $f + break + fi +diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c +index 55dec92e1e58c..309b3750e57e1 100644 +--- a/tools/testing/selftests/mm/compaction_test.c ++++ b/tools/testing/selftests/mm/compaction_test.c +@@ -33,7 +33,7 @@ int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) + FILE *cmdfile = popen(cmd, "r"); + + if (!(fgets(buffer, sizeof(buffer), cmdfile))) { +- perror("Failed to read meminfo\n"); ++ ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno)); + return -1; + } + +@@ -44,7 +44,7 @@ int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) + cmdfile = popen(cmd, "r"); + + if (!(fgets(buffer, sizeof(buffer), cmdfile))) { +- perror("Failed to read meminfo\n"); ++ ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno)); + return -1; + } + +@@ -62,14 +62,14 @@ int prereq(void) + fd = open("/proc/sys/vm/compact_unevictable_allowed", + O_RDONLY | O_NONBLOCK); + if (fd < 0) { +- perror("Failed to open\n" +- "/proc/sys/vm/compact_unevictable_allowed\n"); ++ ksft_print_msg("Failed to open /proc/sys/vm/compact_unevictable_allowed: %s\n", ++ strerror(errno)); + return -1; + } + + if (read(fd, &allowed, sizeof(char)) != sizeof(char)) { +- perror("Failed to read from\n" +- "/proc/sys/vm/compact_unevictable_allowed\n"); ++ ksft_print_msg("Failed to read from /proc/sys/vm/compact_unevictable_allowed: %s\n", ++ strerror(errno)); + close(fd); + return -1; + } +@@ -78,15 +78,17 @@ int prereq(void) + if (allowed == '1') + return 0; + ++ ksft_print_msg("Compaction isn't allowed\n"); + return -1; + } + +-int check_compaction(unsigned long mem_free, unsigned int hugepage_size) ++int check_compaction(unsigned long mem_free, unsigned long hugepage_size) + { +- int fd; ++ unsigned long nr_hugepages_ul; ++ int fd, ret = -1; + int compaction_index = 0; +- char initial_nr_hugepages[10] = {0}; +- char nr_hugepages[10] = {0}; ++ char initial_nr_hugepages[20] = {0}; ++ char nr_hugepages[20] = {0}; + + /* We want to test with 80% of available memory. Else, OOM killer comes + in to play */ +@@ -94,12 +96,15 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) + + fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK); + if (fd < 0) { +- perror("Failed to open /proc/sys/vm/nr_hugepages"); +- return -1; ++ ksft_print_msg("Failed to open /proc/sys/vm/nr_hugepages: %s\n", ++ strerror(errno)); ++ ret = -1; ++ goto out; + } + + if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) { +- perror("Failed to read from /proc/sys/vm/nr_hugepages"); ++ ksft_print_msg("Failed to read from /proc/sys/vm/nr_hugepages: %s\n", ++ strerror(errno)); + goto close_fd; + } + +@@ -107,7 +112,8 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) + + /* Start with the initial condition of 0 huge pages*/ + if (write(fd, "0", sizeof(char)) != sizeof(char)) { +- perror("Failed to write 0 to /proc/sys/vm/nr_hugepages\n"); ++ ksft_print_msg("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n", ++ strerror(errno)); + goto close_fd; + } + +@@ -116,82 +122,82 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) + /* Request a large number of huge pages. The Kernel will allocate + as much as it can */ + if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { +- perror("Failed to write 100000 to /proc/sys/vm/nr_hugepages\n"); ++ ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n", ++ strerror(errno)); + goto close_fd; + } + + lseek(fd, 0, SEEK_SET); + + if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) { +- perror("Failed to re-read from /proc/sys/vm/nr_hugepages\n"); ++ ksft_print_msg("Failed to re-read from /proc/sys/vm/nr_hugepages: %s\n", ++ strerror(errno)); + goto close_fd; + } + + /* We should have been able to request at least 1/3 rd of the memory in + huge pages */ +- compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size); +- +- if (compaction_index > 3) { +- printf("No of huge pages allocated = %d\n", +- (atoi(nr_hugepages))); +- fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n" +- "as huge pages\n", compaction_index); ++ nr_hugepages_ul = strtoul(nr_hugepages, NULL, 10); ++ if (!nr_hugepages_ul) { ++ ksft_print_msg("ERROR: No memory is available as huge pages\n"); + goto close_fd; + } +- +- printf("No of huge pages allocated = %d\n", +- (atoi(nr_hugepages))); ++ compaction_index = mem_free/(nr_hugepages_ul * hugepage_size); + + lseek(fd, 0, SEEK_SET); + + if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) + != strlen(initial_nr_hugepages)) { +- perror("Failed to write value to /proc/sys/vm/nr_hugepages\n"); ++ ksft_print_msg("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n", ++ strerror(errno)); + goto close_fd; + } + +- close(fd); +- return 0; ++ ksft_print_msg("Number of huge pages allocated = %lu\n", ++ nr_hugepages_ul); ++ ++ if (compaction_index > 3) { ++ ksft_print_msg("ERROR: Less than 1/%d of memory is available\n" ++ "as huge pages\n", compaction_index); ++ goto close_fd; ++ } ++ ++ ret = 0; + + close_fd: + close(fd); +- printf("Not OK. Compaction test failed."); +- return -1; ++ out: ++ ksft_test_result(ret == 0, "check_compaction\n"); ++ return ret; + } + + + int main(int argc, char **argv) + { + struct rlimit lim; +- struct map_list *list, *entry; ++ struct map_list *list = NULL, *entry; + size_t page_size, i; + void *map = NULL; + unsigned long mem_free = 0; + unsigned long hugepage_size = 0; + long mem_fragmentable_MB = 0; + +- if (prereq() != 0) { +- printf("Either the sysctl compact_unevictable_allowed is not\n" +- "set to 1 or couldn't read the proc file.\n" +- "Skipping the test\n"); +- return KSFT_SKIP; +- } ++ ksft_print_header(); ++ ++ if (prereq() != 0) ++ return ksft_exit_pass(); ++ ++ ksft_set_plan(1); + + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; +- if (setrlimit(RLIMIT_MEMLOCK, &lim)) { +- perror("Failed to set rlimit:\n"); +- return -1; +- } ++ if (setrlimit(RLIMIT_MEMLOCK, &lim)) ++ ksft_exit_fail_msg("Failed to set rlimit: %s\n", strerror(errno)); + + page_size = getpagesize(); + +- list = NULL; +- +- if (read_memory_info(&mem_free, &hugepage_size) != 0) { +- printf("ERROR: Cannot read meminfo\n"); +- return -1; +- } ++ if (read_memory_info(&mem_free, &hugepage_size) != 0) ++ ksft_exit_fail_msg("Failed to get meminfo\n"); + + mem_fragmentable_MB = mem_free * 0.8 / 1024; + +@@ -227,7 +233,7 @@ int main(int argc, char **argv) + } + + if (check_compaction(mem_free, hugepage_size) == 0) +- return 0; ++ return ksft_exit_pass(); + +- return -1; ++ return ksft_exit_fail(); + } +diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile +index 3412b29b32e70..d417de1051233 100644 +--- a/tools/testing/selftests/net/Makefile ++++ b/tools/testing/selftests/net/Makefile +@@ -92,7 +92,7 @@ TEST_PROGS += test_vxlan_nolocalbypass.sh + TEST_PROGS += test_bridge_backup_port.sh + + TEST_FILES := settings +-TEST_FILES += in_netns.sh net_helper.sh setup_loopback.sh setup_veth.sh ++TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh + + include ../lib.mk + +diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh +index e37a15eda6c24..97e7675da04fe 100755 +--- a/tools/testing/selftests/net/forwarding/lib.sh ++++ b/tools/testing/selftests/net/forwarding/lib.sh +@@ -4,9 +4,6 @@ + ############################################################################## + # Defines + +-# Kselftest framework requirement - SKIP code is 4. +-ksft_skip=4 +- + # Can be overridden by the configuration file. + PING=${PING:=ping} + PING6=${PING6:=ping6} +@@ -41,6 +38,32 @@ if [[ -f $relative_path/forwarding.config ]]; then + source "$relative_path/forwarding.config" + fi + ++# Kselftest framework requirement - SKIP code is 4. ++ksft_skip=4 ++ ++busywait() ++{ ++ local timeout=$1; shift ++ ++ local start_time="$(date -u +%s%3N)" ++ while true ++ do ++ local out ++ out=$("$@") ++ local ret=$? ++ if ((!ret)); then ++ echo -n "$out" ++ return 0 ++ fi ++ ++ local current_time="$(date -u +%s%3N)" ++ if ((current_time - start_time > timeout)); then ++ echo -n "$out" ++ return 1 ++ fi ++ done ++} ++ + ############################################################################## + # Sanity checks + +@@ -395,29 +418,6 @@ log_info() + echo "INFO: $msg" + } + +-busywait() +-{ +- local timeout=$1; shift +- +- local start_time="$(date -u +%s%3N)" +- while true +- do +- local out +- out=$("$@") +- local ret=$? +- if ((!ret)); then +- echo -n "$out" +- return 0 +- fi +- +- local current_time="$(date -u +%s%3N)" +- if ((current_time - start_time > timeout)); then +- echo -n "$out" +- return 1 +- fi +- done +-} +- + not() + { + "$@" +diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh +new file mode 100644 +index 0000000000000..a186490edb4ab +--- /dev/null ++++ b/tools/testing/selftests/net/lib.sh +@@ -0,0 +1,97 @@ ++#!/bin/bash ++# SPDX-License-Identifier: GPL-2.0 ++ ++############################################################################## ++# Defines ++ ++WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} ++BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms ++ ++# Kselftest framework requirement - SKIP code is 4. ++ksft_skip=4 ++# namespace list created by setup_ns ++NS_LIST=() ++ ++############################################################################## ++# Helpers ++busywait() ++{ ++ local timeout=$1; shift ++ ++ local start_time="$(date -u +%s%3N)" ++ while true ++ do ++ local out ++ if out=$("$@"); then ++ echo -n "$out" ++ return 0 ++ fi ++ ++ local current_time="$(date -u +%s%3N)" ++ if ((current_time - start_time > timeout)); then ++ echo -n "$out" ++ return 1 ++ fi ++ done ++} ++ ++cleanup_ns() ++{ ++ local ns="" ++ local errexit=0 ++ local ret=0 ++ ++ # disable errexit temporary ++ if [[ $- =~ "e" ]]; then ++ errexit=1 ++ set +e ++ fi ++ ++ for ns in "$@"; do ++ [ -z "${ns}" ] && continue ++ ip netns delete "${ns}" &> /dev/null ++ if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then ++ echo "Warn: Failed to remove namespace $ns" ++ ret=1 ++ fi ++ done ++ ++ [ $errexit -eq 1 ] && set -e ++ return $ret ++} ++ ++cleanup_all_ns() ++{ ++ cleanup_ns "${NS_LIST[@]}" ++} ++ ++# setup netns with given names as prefix. e.g ++# setup_ns local remote ++setup_ns() ++{ ++ local ns="" ++ local ns_name="" ++ local ns_list=() ++ local ns_exist= ++ for ns_name in "$@"; do ++ # Some test may setup/remove same netns multi times ++ if unset ${ns_name} 2> /dev/null; then ++ ns="${ns_name,,}-$(mktemp -u XXXXXX)" ++ eval readonly ${ns_name}="$ns" ++ ns_exist=false ++ else ++ eval ns='$'${ns_name} ++ cleanup_ns "$ns" ++ ns_exist=true ++ fi ++ ++ if ! ip netns add "$ns"; then ++ echo "Failed to create namespace $ns_name" ++ cleanup_ns "${ns_list[@]}" ++ return $ksft_skip ++ fi ++ ip -n "$ns" link set lo up ++ ! $ns_exist && ns_list+=("$ns") ++ done ++ NS_LIST+=("${ns_list[@]}") ++} +diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh +index 6e684a9a3c616..231a95a8de9ee 100755 +--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh +@@ -2393,9 +2393,10 @@ remove_tests() + if reset "remove invalid addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal ++ # broadcast IP: no packet for this address will be received on ns1 ++ pm_nl_add_endpoint $ns1 224.0.0.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal +- pm_nl_add_endpoint $ns1 10.0.14.1 flags signal +- pm_nl_set_limits $ns2 3 3 ++ pm_nl_set_limits $ns2 2 2 + addr_nr_ns1=-3 speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 +diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c +index 7093fd5333beb..7bd80ee2a5b48 100644 +--- a/tools/tracing/rtla/src/timerlat_aa.c ++++ b/tools/tracing/rtla/src/timerlat_aa.c +@@ -16,6 +16,9 @@ enum timelat_state { + TIMERLAT_WAITING_THREAD, + }; + ++/* Used to fill spaces in the output */ ++static const char *spaces = " "; ++ + #define MAX_COMM 24 + + /* +@@ -274,14 +277,17 @@ static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *recor + taa_data->prev_irq_timstamp = start; + + trace_seq_reset(taa_data->prev_irqs_seq); +- trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s \t\t\t%9.2f us\n", +- "nmi", ns_to_usf(duration)); ++ trace_seq_printf(taa_data->prev_irqs_seq, " %24s %.*s %9.2f us\n", ++ "nmi", ++ 24, spaces, ++ ns_to_usf(duration)); + return 0; + } + + taa_data->thread_nmi_sum += duration; +- trace_seq_printf(taa_data->nmi_seq, " %24s \t\t\t%9.2f us\n", +- "nmi", ns_to_usf(duration)); ++ trace_seq_printf(taa_data->nmi_seq, " %24s %.*s %9.2f us\n", ++ "nmi", ++ 24, spaces, ns_to_usf(duration)); + + return 0; + } +@@ -323,8 +329,10 @@ static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *recor + taa_data->prev_irq_timstamp = start; + + trace_seq_reset(taa_data->prev_irqs_seq); +- trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu \t\t%9.2f us\n", +- desc, vector, ns_to_usf(duration)); ++ trace_seq_printf(taa_data->prev_irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", ++ desc, vector, ++ 15, spaces, ++ ns_to_usf(duration)); + return 0; + } + +@@ -372,8 +380,10 @@ static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *recor + * IRQ interference. + */ + taa_data->thread_irq_sum += duration; +- trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu \t %9.2f us\n", +- desc, vector, ns_to_usf(duration)); ++ trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", ++ desc, vector, ++ 24, spaces, ++ ns_to_usf(duration)); + + return 0; + } +@@ -408,8 +418,10 @@ static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *r + + taa_data->thread_softirq_sum += duration; + +- trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t %9.2f us\n", +- softirq_name[vector], vector, ns_to_usf(duration)); ++ trace_seq_printf(taa_data->softirqs_seq, " %24s:%-3llu %.*s %9.2f us\n", ++ softirq_name[vector], vector, ++ 24, spaces, ++ ns_to_usf(duration)); + return 0; + } + +@@ -452,8 +464,10 @@ static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *re + } else { + taa_data->thread_thread_sum += duration; + +- trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu \t\t%9.2f us\n", +- comm, pid, ns_to_usf(duration)); ++ trace_seq_printf(taa_data->threads_seq, " %24s:%-12llu %.*s %9.2f us\n", ++ comm, pid, ++ 15, spaces, ++ ns_to_usf(duration)); + } + + return 0; +@@ -482,7 +496,8 @@ static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *rec + function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]); + if (!function) + break; +- trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function); ++ trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n", ++ 14, spaces, function); + } + } + return 0; +@@ -568,23 +583,24 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, + exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; + if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { + if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) +- printf(" Previous IRQ interference: \t\t up to %9.2f us\n", +- ns_to_usf(taa_data->prev_irq_duration)); ++ printf(" Previous IRQ interference: %.*s up to %9.2f us\n", ++ 16, spaces, ++ ns_to_usf(taa_data->prev_irq_duration)); + } + + /* + * The delay that the IRQ suffered before starting. + */ +- printf(" IRQ handler delay: %16s %9.2f us (%.2f %%)\n", +- (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", +- ns_to_usf(taa_data->timer_irq_start_delay), +- ns_to_per(total, taa_data->timer_irq_start_delay)); ++ printf(" IRQ handler delay: %.*s %16s %9.2f us (%.2f %%)\n", 16, spaces, ++ (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", ++ ns_to_usf(taa_data->timer_irq_start_delay), ++ ns_to_per(total, taa_data->timer_irq_start_delay)); + + /* + * Timerlat IRQ. + */ +- printf(" IRQ latency: \t\t\t\t %9.2f us\n", +- ns_to_usf(taa_data->tlat_irq_latency)); ++ printf(" IRQ latency: %.*s %9.2f us\n", 40, spaces, ++ ns_to_usf(taa_data->tlat_irq_latency)); + + if (irq) { + /* +@@ -595,15 +611,16 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, + * so it will be displayed, it is the key. + */ + printf(" Blocking thread:\n"); +- printf(" %24s:%-9llu\n", +- taa_data->run_thread_comm, taa_data->run_thread_pid); ++ printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm, ++ taa_data->run_thread_pid); + } else { + /* + * The duration of the IRQ handler that handled the timerlat IRQ. + */ +- printf(" Timerlat IRQ duration: \t\t %9.2f us (%.2f %%)\n", +- ns_to_usf(taa_data->timer_irq_duration), +- ns_to_per(total, taa_data->timer_irq_duration)); ++ printf(" Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n", ++ 30, spaces, ++ ns_to_usf(taa_data->timer_irq_duration), ++ ns_to_per(total, taa_data->timer_irq_duration)); + + /* + * The amount of time that the current thread postponed the scheduler. +@@ -611,13 +628,13 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, + * Recalling that it is net from NMI/IRQ/Softirq interference, so there + * is no need to compute values here. + */ +- printf(" Blocking thread: \t\t\t %9.2f us (%.2f %%)\n", +- ns_to_usf(taa_data->thread_blocking_duration), +- ns_to_per(total, taa_data->thread_blocking_duration)); ++ printf(" Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces, ++ ns_to_usf(taa_data->thread_blocking_duration), ++ ns_to_per(total, taa_data->thread_blocking_duration)); + +- printf(" %24s:%-9llu %9.2f us\n", +- taa_data->run_thread_comm, taa_data->run_thread_pid, +- ns_to_usf(taa_data->thread_blocking_duration)); ++ printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces, ++ taa_data->run_thread_comm, taa_data->run_thread_pid, ++ 12, spaces, ns_to_usf(taa_data->thread_blocking_duration)); + } + + /* +@@ -629,9 +646,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, + * NMIs can happen during the IRQ, so they are always possible. + */ + if (taa_data->thread_nmi_sum) +- printf(" NMI interference \t\t\t %9.2f us (%.2f %%)\n", +- ns_to_usf(taa_data->thread_nmi_sum), +- ns_to_per(total, taa_data->thread_nmi_sum)); ++ printf(" NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, ++ ns_to_usf(taa_data->thread_nmi_sum), ++ ns_to_per(total, taa_data->thread_nmi_sum)); + + /* + * If it is an IRQ latency, the other factors can be skipped. +@@ -643,9 +660,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, + * Prints the interference caused by IRQs to the thread latency. + */ + if (taa_data->thread_irq_sum) { +- printf(" IRQ interference \t\t\t %9.2f us (%.2f %%)\n", +- ns_to_usf(taa_data->thread_irq_sum), +- ns_to_per(total, taa_data->thread_irq_sum)); ++ printf(" IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, ++ ns_to_usf(taa_data->thread_irq_sum), ++ ns_to_per(total, taa_data->thread_irq_sum)); + + trace_seq_do_printf(taa_data->irqs_seq); + } +@@ -654,9 +671,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, + * Prints the interference caused by Softirqs to the thread latency. + */ + if (taa_data->thread_softirq_sum) { +- printf(" Softirq interference \t\t\t %9.2f us (%.2f %%)\n", +- ns_to_usf(taa_data->thread_softirq_sum), +- ns_to_per(total, taa_data->thread_softirq_sum)); ++ printf(" Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces, ++ ns_to_usf(taa_data->thread_softirq_sum), ++ ns_to_per(total, taa_data->thread_softirq_sum)); + + trace_seq_do_printf(taa_data->softirqs_seq); + } +@@ -670,9 +687,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, + * timer handling latency. + */ + if (taa_data->thread_thread_sum) { +- printf(" Thread interference \t\t\t %9.2f us (%.2f %%)\n", +- ns_to_usf(taa_data->thread_thread_sum), +- ns_to_per(total, taa_data->thread_thread_sum)); ++ printf(" Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces, ++ ns_to_usf(taa_data->thread_thread_sum), ++ ns_to_per(total, taa_data->thread_thread_sum)); + + trace_seq_do_printf(taa_data->threads_seq); + } +@@ -682,8 +699,8 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, + */ + print_total: + printf("------------------------------------------------------------------------\n"); +- printf(" %s latency: \t\t\t %9.2f us (100%%)\n", irq ? "IRQ" : "Thread", +- ns_to_usf(total)); ++ printf(" %s latency: %.*s %9.2f us (100%%)\n", irq ? " IRQ" : "Thread", ++ 37, spaces, ns_to_usf(total)); + } + + static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx) +diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c +index 3e9af2c386888..38d6965183d9f 100644 +--- a/tools/tracing/rtla/src/timerlat_top.c ++++ b/tools/tracing/rtla/src/timerlat_top.c +@@ -211,6 +211,8 @@ static void timerlat_top_header(struct osnoise_tool *top) + trace_seq_printf(s, "\n"); + } + ++static const char *no_value = " -"; ++ + /* + * timerlat_top_print - prints the output of a given CPU + */ +@@ -238,10 +240,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) + trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count); + + if (!cpu_data->irq_count) { +- trace_seq_printf(s, " - "); +- trace_seq_printf(s, " - "); +- trace_seq_printf(s, " - "); +- trace_seq_printf(s, " - |"); ++ trace_seq_printf(s, "%s %s %s %s |", no_value, no_value, no_value, no_value); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor); +@@ -250,10 +249,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) + } + + if (!cpu_data->thread_count) { +- trace_seq_printf(s, " - "); +- trace_seq_printf(s, " - "); +- trace_seq_printf(s, " - "); +- trace_seq_printf(s, " -\n"); ++ trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor); +@@ -270,10 +266,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) + trace_seq_printf(s, " |"); + + if (!cpu_data->user_count) { +- trace_seq_printf(s, " - "); +- trace_seq_printf(s, " - "); +- trace_seq_printf(s, " - "); +- trace_seq_printf(s, " -\n"); ++ trace_seq_printf(s, "%s %s %s %s\n", no_value, no_value, no_value, no_value); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_user / divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_user / divisor); |