summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pagano <mpagano@gentoo.org>2016-08-17 07:48:43 -0400
committerMike Pagano <mpagano@gentoo.org>2016-08-17 07:48:43 -0400
commit77768bcf60d97675cdc9d83e0a8ee4a34564aede (patch)
treed7e5c0f9438ea264f6b36c101657144b1b903810
parentLinux patch 4.4.17 (diff)
downloadlinux-patches-77768bcf60d97675cdc9d83e0a8ee4a34564aede.tar.gz
linux-patches-77768bcf60d97675cdc9d83e0a8ee4a34564aede.tar.bz2
linux-patches-77768bcf60d97675cdc9d83e0a8ee4a34564aede.zip
Linux patch 4.4.184.4-20
-rw-r--r--0000_README4
-rw-r--r--1017_linux-4.4.18.patch2072
2 files changed, 2076 insertions, 0 deletions
diff --git a/0000_README b/0000_README
index 87c8060c..5aeb754b 100644
--- a/0000_README
+++ b/0000_README
@@ -111,6 +111,10 @@ Patch: 1016_linux-4.4.17.patch
From: http://www.kernel.org
Desc: Linux 4.4.17
+Patch: 1017_linux-4.4.18.patch
+From: http://www.kernel.org
+Desc: Linux 4.4.18
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1017_linux-4.4.18.patch b/1017_linux-4.4.18.patch
new file mode 100644
index 00000000..4306189d
--- /dev/null
+++ b/1017_linux-4.4.18.patch
@@ -0,0 +1,2072 @@
+diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
+index 54944c71b819..2a4ee6302122 100644
+--- a/Documentation/x86/pat.txt
++++ b/Documentation/x86/pat.txt
+@@ -196,3 +196,35 @@ Another, more verbose way of getting PAT related debug messages is with
+ "debugpat" boot parameter. With this parameter, various debug messages are
+ printed to dmesg log.
+
++PAT Initialization
++------------------
++
++The following table describes how PAT is initialized under various
++configurations. The PAT MSR must be updated by Linux in order to support WC
++and WT attributes. Otherwise, the PAT MSR has the value programmed in it
++by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
++
++ MTRR PAT Call Sequence PAT State PAT MSR
++ =========================================================
++ E E MTRR -> PAT init Enabled OS
++ E D MTRR -> PAT init Disabled -
++ D E MTRR -> PAT disable Disabled BIOS
++ D D MTRR -> PAT disable Disabled -
++ - np/E PAT -> PAT disable Disabled BIOS
++ - np/D PAT -> PAT disable Disabled -
++ E !P/E MTRR -> PAT init Disabled BIOS
++ D !P/E MTRR -> PAT disable Disabled BIOS
++ !M !P/E MTRR stub -> PAT disable Disabled BIOS
++
++ Legend
++ ------------------------------------------------
++ E Feature enabled in CPU
++ D Feature disabled/unsupported in CPU
++ np "nopat" boot option specified
++ !P CONFIG_X86_PAT option unset
++ !M CONFIG_MTRR option unset
++ Enabled PAT state set to enabled
++ Disabled PAT state set to disabled
++ OS PAT initializes PAT MSR with OS setting
++ BIOS PAT keeps PAT MSR with BIOS setting
++
+diff --git a/Makefile b/Makefile
+index 76d34f763a41..eaedea88a8a7 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 4
+-SUBLEVEL = 17
++SUBLEVEL = 18
+ EXTRAVERSION =
+ NAME = Blurry Fish Butt
+
+diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
+index 087acb569b63..5f221acd21ae 100644
+--- a/arch/arm/kernel/sys_oabi-compat.c
++++ b/arch/arm/kernel/sys_oabi-compat.c
+@@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
+ mm_segment_t fs;
+ long ret, err, i;
+
+- if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
++ if (maxevents <= 0 ||
++ maxevents > (INT_MAX/sizeof(*kbuf)) ||
++ maxevents > (INT_MAX/sizeof(*events)))
+ return -EINVAL;
++ if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
++ return -EFAULT;
+ kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+@@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid,
+
+ if (nsops < 1 || nsops > SEMOPM)
+ return -EINVAL;
++ if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
++ return -EFAULT;
+ sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
+ if (!sops)
+ return -ENOMEM;
+diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
+index 5a69eb48d0a8..ee93d5fe61d7 100644
+--- a/arch/mips/kernel/scall64-n32.S
++++ b/arch/mips/kernel/scall64-n32.S
+@@ -344,7 +344,7 @@ EXPORT(sysn32_call_table)
+ PTR sys_ni_syscall /* available, was setaltroot */
+ PTR sys_add_key
+ PTR sys_request_key
+- PTR sys_keyctl /* 6245 */
++ PTR compat_sys_keyctl /* 6245 */
+ PTR sys_set_thread_area
+ PTR sys_inotify_init
+ PTR sys_inotify_add_watch
+diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
+index e4b6d7c97822..b77052ec6fb2 100644
+--- a/arch/mips/kernel/scall64-o32.S
++++ b/arch/mips/kernel/scall64-o32.S
+@@ -500,7 +500,7 @@ EXPORT(sys32_call_table)
+ PTR sys_ni_syscall /* available, was setaltroot */
+ PTR sys_add_key /* 4280 */
+ PTR sys_request_key
+- PTR sys_keyctl
++ PTR compat_sys_keyctl
+ PTR sys_set_thread_area
+ PTR sys_inotify_init
+ PTR sys_inotify_add_watch /* 4285 */
+diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
+index b1f0a90f933b..42570d8fb265 100644
+--- a/arch/s390/kernel/ipl.c
++++ b/arch/s390/kernel/ipl.c
+@@ -2070,13 +2070,6 @@ void s390_reset_system(void (*fn_pre)(void),
+ S390_lowcore.program_new_psw.addr =
+ PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+
+- /*
+- * Clear subchannel ID and number to signal new kernel that no CCW or
+- * SCSI IPL has been done (for kexec and kdump)
+- */
+- S390_lowcore.subchannel_id = 0;
+- S390_lowcore.subchannel_nr = 0;
+-
+ /* Store status at absolute zero */
+ store_status();
+
+diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
+index f17705e1332c..e62f4401e792 100644
+--- a/arch/x86/entry/syscalls/syscall_32.tbl
++++ b/arch/x86/entry/syscalls/syscall_32.tbl
+@@ -294,7 +294,7 @@
+ # 285 sys_setaltroot
+ 286 i386 add_key sys_add_key
+ 287 i386 request_key sys_request_key
+-288 i386 keyctl sys_keyctl
++288 i386 keyctl sys_keyctl compat_sys_keyctl
+ 289 i386 ioprio_set sys_ioprio_set
+ 290 i386 ioprio_get sys_ioprio_get
+ 291 i386 inotify_init sys_inotify_init
+diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
+index b94f6f64e23d..dbff1456d215 100644
+--- a/arch/x86/include/asm/mtrr.h
++++ b/arch/x86/include/asm/mtrr.h
+@@ -24,6 +24,7 @@
+ #define _ASM_X86_MTRR_H
+
+ #include <uapi/asm/mtrr.h>
++#include <asm/pat.h>
+
+
+ /*
+@@ -83,9 +84,12 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
+ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
+ {
+ }
++static inline void mtrr_bp_init(void)
++{
++ pat_disable("MTRRs disabled, skipping PAT initialization too.");
++}
+
+ #define mtrr_ap_init() do {} while (0)
+-#define mtrr_bp_init() do {} while (0)
+ #define set_mtrr_aps_delayed_init() do {} while (0)
+ #define mtrr_aps_init() do {} while (0)
+ #define mtrr_bp_restore() do {} while (0)
+diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
+index ca6c228d5e62..0b1ff4c1c14e 100644
+--- a/arch/x86/include/asm/pat.h
++++ b/arch/x86/include/asm/pat.h
+@@ -5,8 +5,8 @@
+ #include <asm/pgtable_types.h>
+
+ bool pat_enabled(void);
++void pat_disable(const char *reason);
+ extern void pat_init(void);
+-void pat_init_cache_modes(u64);
+
+ extern int reserve_memtype(u64 start, u64 end,
+ enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
+diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
+index 3b533cf37c74..b5624fafa44a 100644
+--- a/arch/x86/kernel/cpu/mtrr/generic.c
++++ b/arch/x86/kernel/cpu/mtrr/generic.c
+@@ -444,11 +444,24 @@ static void __init print_mtrr_state(void)
+ pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
+ }
+
++/* PAT setup for BP. We need to go through sync steps here */
++void __init mtrr_bp_pat_init(void)
++{
++ unsigned long flags;
++
++ local_irq_save(flags);
++ prepare_set();
++
++ pat_init();
++
++ post_set();
++ local_irq_restore(flags);
++}
++
+ /* Grab all of the MTRR state for this CPU into *state */
+ bool __init get_mtrr_state(void)
+ {
+ struct mtrr_var_range *vrs;
+- unsigned long flags;
+ unsigned lo, dummy;
+ unsigned int i;
+
+@@ -481,15 +494,6 @@ bool __init get_mtrr_state(void)
+
+ mtrr_state_set = 1;
+
+- /* PAT setup for BP. We need to go through sync steps here */
+- local_irq_save(flags);
+- prepare_set();
+-
+- pat_init();
+-
+- post_set();
+- local_irq_restore(flags);
+-
+ return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
+ }
+
+diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
+index f891b4750f04..fa77ac8291f0 100644
+--- a/arch/x86/kernel/cpu/mtrr/main.c
++++ b/arch/x86/kernel/cpu/mtrr/main.c
+@@ -752,6 +752,9 @@ void __init mtrr_bp_init(void)
+ /* BIOS may override */
+ __mtrr_enabled = get_mtrr_state();
+
++ if (mtrr_enabled())
++ mtrr_bp_pat_init();
++
+ if (mtrr_cleanup(phys_addr)) {
+ changed_by_mtrr_cleanup = 1;
+ mtrr_if->set_all();
+@@ -759,8 +762,16 @@ void __init mtrr_bp_init(void)
+ }
+ }
+
+- if (!mtrr_enabled())
++ if (!mtrr_enabled()) {
+ pr_info("MTRR: Disabled\n");
++
++ /*
++ * PAT initialization relies on MTRR's rendezvous handler.
++ * Skip PAT init until the handler can initialize both
++ * features independently.
++ */
++ pat_disable("MTRRs disabled, skipping PAT initialization too.");
++ }
+ }
+
+ void mtrr_ap_init(void)
+diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
+index 951884dcc433..6c7ced07d16d 100644
+--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
++++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
+@@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
+ void fill_mtrr_var_range(unsigned int index,
+ u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
+ bool get_mtrr_state(void);
++void mtrr_bp_pat_init(void);
+
+ extern void set_mtrr_ops(const struct mtrr_ops *ops);
+
+diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
+index 844b06d67df4..307f60ecfc6d 100644
+--- a/arch/x86/mm/mmap.c
++++ b/arch/x86/mm/mmap.c
+@@ -94,18 +94,6 @@ static unsigned long mmap_base(unsigned long rnd)
+ }
+
+ /*
+- * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
+- * does, but not when emulating X86_32
+- */
+-static unsigned long mmap_legacy_base(unsigned long rnd)
+-{
+- if (mmap_is_ia32())
+- return TASK_UNMAPPED_BASE;
+- else
+- return TASK_UNMAPPED_BASE + rnd;
+-}
+-
+-/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+@@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
+ if (current->flags & PF_RANDOMIZE)
+ random_factor = arch_mmap_rnd();
+
+- mm->mmap_legacy_base = mmap_legacy_base(random_factor);
++ mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
+
+ if (mmap_is_legacy()) {
+ mm->mmap_base = mm->mmap_legacy_base;
+diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
+index 188e3e07eeeb..6ad687d104ca 100644
+--- a/arch/x86/mm/pat.c
++++ b/arch/x86/mm/pat.c
+@@ -39,11 +39,22 @@
+ static bool boot_cpu_done;
+
+ static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
++static void init_cache_modes(void);
+
+-static inline void pat_disable(const char *reason)
++void pat_disable(const char *reason)
+ {
++ if (!__pat_enabled)
++ return;
++
++ if (boot_cpu_done) {
++ WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
++ return;
++ }
++
+ __pat_enabled = 0;
+ pr_info("x86/PAT: %s\n", reason);
++
++ init_cache_modes();
+ }
+
+ static int __init nopat(char *str)
+@@ -180,7 +191,7 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
+ * configuration.
+ * Using lower indices is preferred, so we start with highest index.
+ */
+-void pat_init_cache_modes(u64 pat)
++static void __init_cache_modes(u64 pat)
+ {
+ enum page_cache_mode cache;
+ char pat_msg[33];
+@@ -201,14 +212,11 @@ static void pat_bsp_init(u64 pat)
+ {
+ u64 tmp_pat;
+
+- if (!cpu_has_pat) {
++ if (!boot_cpu_has(X86_FEATURE_PAT)) {
+ pat_disable("PAT not supported by CPU.");
+ return;
+ }
+
+- if (!pat_enabled())
+- goto done;
+-
+ rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
+ if (!tmp_pat) {
+ pat_disable("PAT MSR is 0, disabled.");
+@@ -217,16 +225,12 @@ static void pat_bsp_init(u64 pat)
+
+ wrmsrl(MSR_IA32_CR_PAT, pat);
+
+-done:
+- pat_init_cache_modes(pat);
++ __init_cache_modes(pat);
+ }
+
+ static void pat_ap_init(u64 pat)
+ {
+- if (!pat_enabled())
+- return;
+-
+- if (!cpu_has_pat) {
++ if (!boot_cpu_has(X86_FEATURE_PAT)) {
+ /*
+ * If this happens we are on a secondary CPU, but switched to
+ * PAT on the boot CPU. We have no way to undo PAT.
+@@ -237,18 +241,32 @@ static void pat_ap_init(u64 pat)
+ wrmsrl(MSR_IA32_CR_PAT, pat);
+ }
+
+-void pat_init(void)
++static void init_cache_modes(void)
+ {
+- u64 pat;
+- struct cpuinfo_x86 *c = &boot_cpu_data;
++ u64 pat = 0;
++ static int init_cm_done;
+
+- if (!pat_enabled()) {
++ if (init_cm_done)
++ return;
++
++ if (boot_cpu_has(X86_FEATURE_PAT)) {
++ /*
++ * CPU supports PAT. Set PAT table to be consistent with
++ * PAT MSR. This case supports "nopat" boot option, and
++ * virtual machine environments which support PAT without
++ * MTRRs. In specific, Xen has unique setup to PAT MSR.
++ *
++ * If PAT MSR returns 0, it is considered invalid and emulates
++ * as No PAT.
++ */
++ rdmsrl(MSR_IA32_CR_PAT, pat);
++ }
++
++ if (!pat) {
+ /*
+ * No PAT. Emulate the PAT table that corresponds to the two
+- * cache bits, PWT (Write Through) and PCD (Cache Disable). This
+- * setup is the same as the BIOS default setup when the system
+- * has PAT but the "nopat" boot option has been specified. This
+- * emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
++ * cache bits, PWT (Write Through) and PCD (Cache Disable).
++ * This setup is also the same as the BIOS default setup.
+ *
+ * PTE encoding:
+ *
+@@ -265,10 +283,36 @@ void pat_init(void)
+ */
+ pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
+ PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
++ }
++
++ __init_cache_modes(pat);
++
++ init_cm_done = 1;
++}
++
++/**
++ * pat_init - Initialize PAT MSR and PAT table
++ *
++ * This function initializes PAT MSR and PAT table with an OS-defined value
++ * to enable additional cache attributes, WC and WT.
++ *
++ * This function must be called on all CPUs using the specific sequence of
++ * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
++ * procedure for PAT.
++ */
++void pat_init(void)
++{
++ u64 pat;
++ struct cpuinfo_x86 *c = &boot_cpu_data;
++
++ if (!pat_enabled()) {
++ init_cache_modes();
++ return;
++ }
+
+- } else if ((c->x86_vendor == X86_VENDOR_INTEL) &&
+- (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
+- ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
++ if ((c->x86_vendor == X86_VENDOR_INTEL) &&
++ (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
++ ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
+ /*
+ * PAT support with the lower four entries. Intel Pentium 2,
+ * 3, M, and 4 are affected by PAT errata, which makes the
+@@ -733,25 +777,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
+ if (file->f_flags & O_DSYNC)
+ pcm = _PAGE_CACHE_MODE_UC_MINUS;
+
+-#ifdef CONFIG_X86_32
+- /*
+- * On the PPro and successors, the MTRRs are used to set
+- * memory types for physical addresses outside main memory,
+- * so blindly setting UC or PWT on those pages is wrong.
+- * For Pentiums and earlier, the surround logic should disable
+- * caching for the high addresses through the KEN pin, but
+- * we maintain the tradition of paranoia in this code.
+- */
+- if (!pat_enabled() &&
+- !(boot_cpu_has(X86_FEATURE_MTRR) ||
+- boot_cpu_has(X86_FEATURE_K6_MTRR) ||
+- boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
+- boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
+- (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
+- pcm = _PAGE_CACHE_MODE_UC;
+- }
+-#endif
+-
+ *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
+ cachemode2protval(pcm));
+ return 1;
+diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
+index beab8c706ac9..ffa41591bff9 100644
+--- a/arch/x86/xen/enlighten.c
++++ b/arch/x86/xen/enlighten.c
+@@ -74,7 +74,6 @@
+ #include <asm/mach_traps.h>
+ #include <asm/mwait.h>
+ #include <asm/pci_x86.h>
+-#include <asm/pat.h>
+ #include <asm/cpu.h>
+
+ #ifdef CONFIG_ACPI
+@@ -1519,7 +1518,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
+ {
+ struct physdev_set_iopl set_iopl;
+ unsigned long initrd_start = 0;
+- u64 pat;
+ int rc;
+
+ if (!xen_start_info)
+@@ -1627,13 +1625,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
+ xen_start_info->nr_pages);
+ xen_reserve_special_pages();
+
+- /*
+- * Modify the cache mode translation tables to match Xen's PAT
+- * configuration.
+- */
+- rdmsrl(MSR_IA32_CR_PAT, pat);
+- pat_init_cache_modes(pat);
+-
+ /* keep using Xen gdt for now; no urgent need to change it */
+
+ #ifdef CONFIG_X86_32
+diff --git a/block/genhd.c b/block/genhd.c
+index e5cafa51567c..d2a1d43bf9fa 100644
+--- a/block/genhd.c
++++ b/block/genhd.c
+@@ -831,6 +831,7 @@ static void disk_seqf_stop(struct seq_file *seqf, void *v)
+ if (iter) {
+ class_dev_iter_exit(iter);
+ kfree(iter);
++ seqf->private = NULL;
+ }
+ }
+
+diff --git a/crypto/gcm.c b/crypto/gcm.c
+index bec329b3de8d..d9ea5f9c0574 100644
+--- a/crypto/gcm.c
++++ b/crypto/gcm.c
+@@ -639,7 +639,9 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
+
+ ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
+ CRYPTO_ALG_TYPE_HASH,
+- CRYPTO_ALG_TYPE_AHASH_MASK);
++ CRYPTO_ALG_TYPE_AHASH_MASK |
++ crypto_requires_sync(algt->type,
++ algt->mask));
+ if (IS_ERR(ghash_alg))
+ return PTR_ERR(ghash_alg);
+
+diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
+index ea5815c5e128..bc769c448d4a 100644
+--- a/crypto/scatterwalk.c
++++ b/crypto/scatterwalk.c
+@@ -72,7 +72,8 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
+
+ void scatterwalk_done(struct scatter_walk *walk, int out, int more)
+ {
+- if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more)
++ if (!more || walk->offset >= walk->sg->offset + walk->sg->length ||
++ !(walk->offset & (PAGE_SIZE - 1)))
+ scatterwalk_pagedone(walk, out, more);
+ }
+ EXPORT_SYMBOL_GPL(scatterwalk_done);
+diff --git a/drivers/char/random.c b/drivers/char/random.c
+index d0da5d852d41..0227b0465b40 100644
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -722,15 +722,18 @@ retry:
+ }
+ }
+
+-static void credit_entropy_bits_safe(struct entropy_store *r, int nbits)
++static int credit_entropy_bits_safe(struct entropy_store *r, int nbits)
+ {
+ const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1));
+
++ if (nbits < 0)
++ return -EINVAL;
++
+ /* Cap the value to avoid overflows */
+ nbits = min(nbits, nbits_max);
+- nbits = max(nbits, -nbits_max);
+
+ credit_entropy_bits(r, nbits);
++ return 0;
+ }
+
+ /*********************************************************************
+@@ -1542,8 +1545,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+ return -EPERM;
+ if (get_user(ent_count, p))
+ return -EFAULT;
+- credit_entropy_bits_safe(&input_pool, ent_count);
+- return 0;
++ return credit_entropy_bits_safe(&input_pool, ent_count);
+ case RNDADDENTROPY:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+@@ -1557,8 +1559,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+ size);
+ if (retval < 0)
+ return retval;
+- credit_entropy_bits_safe(&input_pool, ent_count);
+- return 0;
++ return credit_entropy_bits_safe(&input_pool, ent_count);
+ case RNDZAPENTCNT:
+ case RNDCLEARPOOL:
+ /*
+diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
+index 62284e45d531..eb434881ddbc 100644
+--- a/drivers/gpu/drm/i915/intel_pm.c
++++ b/drivers/gpu/drm/i915/intel_pm.c
+@@ -1789,16 +1789,20 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
+ const struct intel_plane_state *pstate,
+ uint32_t mem_value)
+ {
+- int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
++ /*
++ * We treat the cursor plane as always-on for the purposes of watermark
++ * calculation. Until we have two-stage watermark programming merged,
++ * this is necessary to avoid flickering.
++ */
++ int cpp = 4;
++ int width = pstate->visible ? pstate->base.crtc_w : 64;
+
+- if (!cstate->base.active || !pstate->visible)
++ if (!cstate->base.active)
+ return 0;
+
+ return ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
+ cstate->base.adjusted_mode.crtc_htotal,
+- drm_rect_width(&pstate->dst),
+- bpp,
+- mem_value);
++ width, cpp, mem_value);
+ }
+
+ /* Only for WM_LP. */
+diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
+index 774cd2210566..21febbb0d84e 100644
+--- a/drivers/hid/hid-sony.c
++++ b/drivers/hid/hid-sony.c
+@@ -1418,8 +1418,10 @@ static int sixaxis_set_operational_usb(struct hid_device *hdev)
+ }
+
+ ret = hid_hw_output_report(hdev, buf, 1);
+- if (ret < 0)
+- hid_err(hdev, "can't set operational mode: step 3\n");
++ if (ret < 0) {
++ hid_info(hdev, "can't set operational mode: step 3, ignoring\n");
++ ret = 0;
++ }
+
+ out:
+ kfree(buf);
+diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
+index 27fa0cb09538..85f39cc3e276 100644
+--- a/drivers/i2c/busses/i2c-i801.c
++++ b/drivers/i2c/busses/i2c-i801.c
+@@ -244,6 +244,13 @@ struct i801_priv {
+ struct platform_device *mux_pdev;
+ #endif
+ struct platform_device *tco_pdev;
++
++ /*
++ * If set to true the host controller registers are reserved for
++ * ACPI AML use. Protected by acpi_lock.
++ */
++ bool acpi_reserved;
++ struct mutex acpi_lock;
+ };
+
+ #define FEATURE_SMBUS_PEC (1 << 0)
+@@ -714,9 +721,15 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
+ {
+ int hwpec;
+ int block = 0;
+- int ret, xact = 0;
++ int ret = 0, xact = 0;
+ struct i801_priv *priv = i2c_get_adapdata(adap);
+
++ mutex_lock(&priv->acpi_lock);
++ if (priv->acpi_reserved) {
++ mutex_unlock(&priv->acpi_lock);
++ return -EBUSY;
++ }
++
+ hwpec = (priv->features & FEATURE_SMBUS_PEC) && (flags & I2C_CLIENT_PEC)
+ && size != I2C_SMBUS_QUICK
+ && size != I2C_SMBUS_I2C_BLOCK_DATA;
+@@ -773,7 +786,8 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
+ default:
+ dev_err(&priv->pci_dev->dev, "Unsupported transaction %d\n",
+ size);
+- return -EOPNOTSUPP;
++ ret = -EOPNOTSUPP;
++ goto out;
+ }
+
+ if (hwpec) /* enable/disable hardware PEC */
+@@ -796,11 +810,11 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
+ ~(SMBAUXCTL_CRC | SMBAUXCTL_E32B), SMBAUXCTL(priv));
+
+ if (block)
+- return ret;
++ goto out;
+ if (ret)
+- return ret;
++ goto out;
+ if ((read_write == I2C_SMBUS_WRITE) || (xact == I801_QUICK))
+- return 0;
++ goto out;
+
+ switch (xact & 0x7f) {
+ case I801_BYTE: /* Result put in SMBHSTDAT0 */
+@@ -812,7 +826,10 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
+ (inb_p(SMBHSTDAT1(priv)) << 8);
+ break;
+ }
+- return 0;
++
++out:
++ mutex_unlock(&priv->acpi_lock);
++ return ret;
+ }
+
+
+@@ -1249,6 +1266,72 @@ static void i801_add_tco(struct i801_priv *priv)
+ priv->tco_pdev = pdev;
+ }
+
++#ifdef CONFIG_ACPI
++static acpi_status
++i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
++ u64 *value, void *handler_context, void *region_context)
++{
++ struct i801_priv *priv = handler_context;
++ struct pci_dev *pdev = priv->pci_dev;
++ acpi_status status;
++
++ /*
++ * Once BIOS AML code touches the OpRegion we warn and inhibit any
++ * further access from the driver itself. This device is now owned
++ * by the system firmware.
++ */
++ mutex_lock(&priv->acpi_lock);
++
++ if (!priv->acpi_reserved) {
++ priv->acpi_reserved = true;
++
++ dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n");
++ dev_warn(&pdev->dev, "Driver SMBus register access inhibited\n");
++ }
++
++ if ((function & ACPI_IO_MASK) == ACPI_READ)
++ status = acpi_os_read_port(address, (u32 *)value, bits);
++ else
++ status = acpi_os_write_port(address, (u32)*value, bits);
++
++ mutex_unlock(&priv->acpi_lock);
++
++ return status;
++}
++
++static int i801_acpi_probe(struct i801_priv *priv)
++{
++ struct acpi_device *adev;
++ acpi_status status;
++
++ adev = ACPI_COMPANION(&priv->pci_dev->dev);
++ if (adev) {
++ status = acpi_install_address_space_handler(adev->handle,
++ ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler,
++ NULL, priv);
++ if (ACPI_SUCCESS(status))
++ return 0;
++ }
++
++ return acpi_check_resource_conflict(&priv->pci_dev->resource[SMBBAR]);
++}
++
++static void i801_acpi_remove(struct i801_priv *priv)
++{
++ struct acpi_device *adev;
++
++ adev = ACPI_COMPANION(&priv->pci_dev->dev);
++ if (!adev)
++ return;
++
++ acpi_remove_address_space_handler(adev->handle,
++ ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler);
++}
++#else
++static inline int i801_acpi_probe(struct i801_priv *priv) { return 0; }
++static inline void i801_acpi_remove(struct i801_priv *priv) { }
++#endif
++
+ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ {
+ unsigned char temp;
+@@ -1266,6 +1349,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ priv->adapter.dev.parent = &dev->dev;
+ ACPI_COMPANION_SET(&priv->adapter.dev, ACPI_COMPANION(&dev->dev));
+ priv->adapter.retries = 3;
++ mutex_init(&priv->acpi_lock);
+
+ priv->pci_dev = dev;
+ switch (dev->device) {
+@@ -1328,10 +1412,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ return -ENODEV;
+ }
+
+- err = acpi_check_resource_conflict(&dev->resource[SMBBAR]);
+- if (err) {
++ if (i801_acpi_probe(priv))
+ return -ENODEV;
+- }
+
+ err = pcim_iomap_regions(dev, 1 << SMBBAR,
+ dev_driver_string(&dev->dev));
+@@ -1340,6 +1422,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ "Failed to request SMBus region 0x%lx-0x%Lx\n",
+ priv->smba,
+ (unsigned long long)pci_resource_end(dev, SMBBAR));
++ i801_acpi_remove(priv);
+ return err;
+ }
+
+@@ -1404,6 +1487,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ err = i2c_add_adapter(&priv->adapter);
+ if (err) {
+ dev_err(&dev->dev, "Failed to add SMBus adapter\n");
++ i801_acpi_remove(priv);
+ return err;
+ }
+
+@@ -1422,6 +1506,7 @@ static void i801_remove(struct pci_dev *dev)
+
+ i801_del_mux(priv);
+ i2c_del_adapter(&priv->adapter);
++ i801_acpi_remove(priv);
+ pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
+
+ platform_device_unregister(priv->tco_pdev);
+diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
+index db760e84119f..b8df0f5e8c25 100644
+--- a/drivers/net/bonding/bond_netlink.c
++++ b/drivers/net/bonding/bond_netlink.c
+@@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_net, struct net_device *bond_dev,
+ if (err < 0)
+ return err;
+
+- return register_netdevice(bond_dev);
++ err = register_netdevice(bond_dev);
++
++ netif_carrier_off(bond_dev);
++
++ return err;
+ }
+
+ static size_t bond_get_size(const struct net_device *bond_dev)
+diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
+index 28f7610b03fe..c32f5d32f811 100644
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -219,7 +219,7 @@ err_dma:
+ dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
+ DMA_TO_DEVICE);
+
+- while (i > 0) {
++ while (i-- > 0) {
+ int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
+ struct bgmac_slot_info *slot = &ring->slots[index];
+ u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
+index 3dd548ab8df1..40365cb1abe6 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
+@@ -794,13 +794,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
+ * in a bitmap and increasing the chain consumer only
+ * for the first successive completed entries.
+ */
+- bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE);
++ __set_bit(pos, p_spq->p_comp_bitmap);
+
+ while (test_bit(p_spq->comp_bitmap_idx,
+ p_spq->p_comp_bitmap)) {
+- bitmap_clear(p_spq->p_comp_bitmap,
+- p_spq->comp_bitmap_idx,
+- SPQ_RING_SIZE);
++ __clear_bit(p_spq->comp_bitmap_idx,
++ p_spq->p_comp_bitmap);
+ p_spq->comp_bitmap_idx++;
+ qed_chain_return_produced(&p_spq->chain);
+ }
+diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
+index a790d5f90b83..e0e94b855bbe 100644
+--- a/drivers/net/usb/cdc_ncm.c
++++ b/drivers/net/usb/cdc_ncm.c
+@@ -952,8 +952,6 @@ EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting);
+
+ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
+ {
+- int ret;
+-
+ /* MBIM backwards compatible function? */
+ if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM)
+ return -ENODEV;
+@@ -962,16 +960,7 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
+ * Additionally, generic NCM devices are assumed to accept arbitrarily
+ * placed NDP.
+ */
+- ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0);
+-
+- /*
+- * We should get an event when network connection is "connected" or
+- * "disconnected". Set network connection in "disconnected" state
+- * (carrier is OFF) during attach, so the IP network stack does not
+- * start IPv6 negotiation and more.
+- */
+- usbnet_link_change(dev, 0, 0);
+- return ret;
++ return cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0);
+ }
+
+ static void cdc_ncm_align_tail(struct sk_buff *skb, size_t modulus, size_t remainder, size_t max)
+@@ -1554,7 +1543,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
+
+ static const struct driver_info cdc_ncm_info = {
+ .description = "CDC NCM",
+- .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET,
++ .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
++ | FLAG_LINK_INTR,
+ .bind = cdc_ncm_bind,
+ .unbind = cdc_ncm_unbind,
+ .manage_power = usbnet_manage_power,
+@@ -1567,7 +1557,7 @@ static const struct driver_info cdc_ncm_info = {
+ static const struct driver_info wwan_info = {
+ .description = "Mobile Broadband Network Device",
+ .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
+- | FLAG_WWAN,
++ | FLAG_LINK_INTR | FLAG_WWAN,
+ .bind = cdc_ncm_bind,
+ .unbind = cdc_ncm_unbind,
+ .manage_power = usbnet_manage_power,
+@@ -1580,7 +1570,7 @@ static const struct driver_info wwan_info = {
+ static const struct driver_info wwan_noarp_info = {
+ .description = "Mobile Broadband Network Device (NO ARP)",
+ .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
+- | FLAG_WWAN | FLAG_NOARP,
++ | FLAG_LINK_INTR | FLAG_WWAN | FLAG_NOARP,
+ .bind = cdc_ncm_bind,
+ .unbind = cdc_ncm_unbind,
+ .manage_power = usbnet_manage_power,
+diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c
+index 943c1cb9566c..d28e3ab9479c 100644
+--- a/drivers/pnp/quirks.c
++++ b/drivers/pnp/quirks.c
+@@ -342,7 +342,9 @@ static void quirk_amd_mmconfig_area(struct pnp_dev *dev)
+ /* Device IDs of parts that have 32KB MCH space */
+ static const unsigned int mch_quirk_devices[] = {
+ 0x0154, /* Ivy Bridge */
++ 0x0a04, /* Haswell-ULT */
+ 0x0c00, /* Haswell */
++ 0x1604, /* Broadwell */
+ };
+
+ static struct pci_dev *get_intel_host(void)
+diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
+index f7ae898833dd..7232d43e2207 100644
+--- a/drivers/scsi/scsi_sysfs.c
++++ b/drivers/scsi/scsi_sysfs.c
+@@ -1058,11 +1058,12 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
+ }
+
+ error = scsi_dh_add_device(sdev);
+- if (error) {
++ if (error)
++ /*
++ * device_handler is optional, so any error can be ignored
++ */
+ sdev_printk(KERN_INFO, sdev,
+ "failed to add device handler: %d\n", error);
+- return error;
+- }
+
+ device_enable_async_suspend(&sdev->sdev_dev);
+ error = device_add(&sdev->sdev_dev);
+diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
+index 13c3cd11ab92..05d30f433b19 100644
+--- a/drivers/staging/rdma/ipath/ipath_file_ops.c
++++ b/drivers/staging/rdma/ipath/ipath_file_ops.c
+@@ -45,6 +45,8 @@
+ #include <linux/uio.h>
+ #include <asm/pgtable.h>
+
++#include <rdma/ib.h>
++
+ #include "ipath_kernel.h"
+ #include "ipath_common.h"
+ #include "ipath_user_sdma.h"
+@@ -2243,6 +2245,9 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
+ ssize_t ret = 0;
+ void *dest;
+
++ if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
++ return -EACCES;
++
+ if (count < sizeof(cmd.type)) {
+ ret = -EINVAL;
+ goto bail;
+diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
+index 7865228f664f..807d80145686 100644
+--- a/drivers/tty/pty.c
++++ b/drivers/tty/pty.c
+@@ -679,14 +679,14 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
+ /* this is called once with whichever end is closed last */
+ static void pty_unix98_shutdown(struct tty_struct *tty)
+ {
+- struct inode *ptmx_inode;
++ struct pts_fs_info *fsi;
+
+ if (tty->driver->subtype == PTY_TYPE_MASTER)
+- ptmx_inode = tty->driver_data;
++ fsi = tty->driver_data;
+ else
+- ptmx_inode = tty->link->driver_data;
+- devpts_kill_index(ptmx_inode, tty->index);
+- devpts_del_ref(ptmx_inode);
++ fsi = tty->link->driver_data;
++ devpts_kill_index(fsi, tty->index);
++ devpts_put_ref(fsi);
+ }
+
+ static const struct tty_operations ptm_unix98_ops = {
+@@ -738,6 +738,7 @@ static const struct tty_operations pty_unix98_ops = {
+
+ static int ptmx_open(struct inode *inode, struct file *filp)
+ {
++ struct pts_fs_info *fsi;
+ struct tty_struct *tty;
+ struct inode *slave_inode;
+ int retval;
+@@ -752,47 +753,41 @@ static int ptmx_open(struct inode *inode, struct file *filp)
+ if (retval)
+ return retval;
+
++ fsi = devpts_get_ref(inode, filp);
++ retval = -ENODEV;
++ if (!fsi)
++ goto out_free_file;
++
+ /* find a device that is not in use. */
+ mutex_lock(&devpts_mutex);
+- index = devpts_new_index(inode);
+- if (index < 0) {
+- retval = index;
+- mutex_unlock(&devpts_mutex);
+- goto err_file;
+- }
+-
++ index = devpts_new_index(fsi);
+ mutex_unlock(&devpts_mutex);
+
+- mutex_lock(&tty_mutex);
+- tty = tty_init_dev(ptm_driver, index);
++ retval = index;
++ if (index < 0)
++ goto out_put_ref;
+
+- if (IS_ERR(tty)) {
+- retval = PTR_ERR(tty);
+- goto out;
+- }
+
++ mutex_lock(&tty_mutex);
++ tty = tty_init_dev(ptm_driver, index);
+ /* The tty returned here is locked so we can safely
+ drop the mutex */
+ mutex_unlock(&tty_mutex);
+
+- set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
+- tty->driver_data = inode;
++ retval = PTR_ERR(tty);
++ if (IS_ERR(tty))
++ goto out;
+
+ /*
+- * In the case where all references to ptmx inode are dropped and we
+- * still have /dev/tty opened pointing to the master/slave pair (ptmx
+- * is closed/released before /dev/tty), we must make sure that the inode
+- * is still valid when we call the final pty_unix98_shutdown, thus we
+- * hold an additional reference to the ptmx inode. For the same /dev/tty
+- * last close case, we also need to make sure the super_block isn't
+- * destroyed (devpts instance unmounted), before /dev/tty is closed and
+- * on its release devpts_kill_index is called.
++ * From here on out, the tty is "live", and the index and
++ * fsi will be killed/put by the tty_release()
+ */
+- devpts_add_ref(inode);
++ set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
++ tty->driver_data = fsi;
+
+ tty_add_file(tty, filp);
+
+- slave_inode = devpts_pty_new(inode,
++ slave_inode = devpts_pty_new(fsi,
+ MKDEV(UNIX98_PTY_SLAVE_MAJOR, index), index,
+ tty->link);
+ if (IS_ERR(slave_inode)) {
+@@ -811,12 +806,14 @@ static int ptmx_open(struct inode *inode, struct file *filp)
+ return 0;
+ err_release:
+ tty_unlock(tty);
++ // This will also put-ref the fsi
+ tty_release(inode, filp);
+ return retval;
+ out:
+- mutex_unlock(&tty_mutex);
+- devpts_kill_index(inode, index);
+-err_file:
++ devpts_kill_index(fsi, index);
++out_put_ref:
++ devpts_put_ref(fsi);
++out_free_file:
+ tty_free_file(filp);
+ return retval;
+ }
+diff --git a/fs/dcache.c b/fs/dcache.c
+index 108d7d810be3..71b6056ad35d 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -578,7 +578,6 @@ static struct dentry *dentry_kill(struct dentry *dentry)
+
+ failed:
+ spin_unlock(&dentry->d_lock);
+- cpu_relax();
+ return dentry; /* try again with same dentry */
+ }
+
+@@ -752,6 +751,8 @@ void dput(struct dentry *dentry)
+ return;
+
+ repeat:
++ might_sleep();
++
+ rcu_read_lock();
+ if (likely(fast_dput(dentry))) {
+ rcu_read_unlock();
+@@ -783,8 +784,10 @@ repeat:
+
+ kill_it:
+ dentry = dentry_kill(dentry);
+- if (dentry)
++ if (dentry) {
++ cond_resched();
+ goto repeat;
++ }
+ }
+ EXPORT_SYMBOL(dput);
+
+diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
+index 706de324f2a6..c82edb049117 100644
+--- a/fs/devpts/inode.c
++++ b/fs/devpts/inode.c
+@@ -128,6 +128,7 @@ static const match_table_t tokens = {
+ struct pts_fs_info {
+ struct ida allocated_ptys;
+ struct pts_mount_opts mount_opts;
++ struct super_block *sb;
+ struct dentry *ptmx_dentry;
+ };
+
+@@ -358,7 +359,7 @@ static const struct super_operations devpts_sops = {
+ .show_options = devpts_show_options,
+ };
+
+-static void *new_pts_fs_info(void)
++static void *new_pts_fs_info(struct super_block *sb)
+ {
+ struct pts_fs_info *fsi;
+
+@@ -369,6 +370,7 @@ static void *new_pts_fs_info(void)
+ ida_init(&fsi->allocated_ptys);
+ fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE;
+ fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
++ fsi->sb = sb;
+
+ return fsi;
+ }
+@@ -384,7 +386,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
+ s->s_op = &devpts_sops;
+ s->s_time_gran = 1;
+
+- s->s_fs_info = new_pts_fs_info();
++ s->s_fs_info = new_pts_fs_info(s);
+ if (!s->s_fs_info)
+ goto fail;
+
+@@ -524,17 +526,14 @@ static struct file_system_type devpts_fs_type = {
+ * to the System V naming convention
+ */
+
+-int devpts_new_index(struct inode *ptmx_inode)
++int devpts_new_index(struct pts_fs_info *fsi)
+ {
+- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+- struct pts_fs_info *fsi;
+ int index;
+ int ida_ret;
+
+- if (!sb)
++ if (!fsi)
+ return -ENODEV;
+
+- fsi = DEVPTS_SB(sb);
+ retry:
+ if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
+ return -ENOMEM;
+@@ -564,11 +563,8 @@ retry:
+ return index;
+ }
+
+-void devpts_kill_index(struct inode *ptmx_inode, int idx)
++void devpts_kill_index(struct pts_fs_info *fsi, int idx)
+ {
+- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+- struct pts_fs_info *fsi = DEVPTS_SB(sb);
+-
+ mutex_lock(&allocated_ptys_lock);
+ ida_remove(&fsi->allocated_ptys, idx);
+ pty_count--;
+@@ -578,21 +574,25 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
+ /*
+ * pty code needs to hold extra references in case of last /dev/tty close
+ */
+-
+-void devpts_add_ref(struct inode *ptmx_inode)
++struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file)
+ {
+- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
++ struct super_block *sb;
++ struct pts_fs_info *fsi;
++
++ sb = pts_sb_from_inode(ptmx_inode);
++ if (!sb)
++ return NULL;
++ fsi = DEVPTS_SB(sb);
++ if (!fsi)
++ return NULL;
+
+ atomic_inc(&sb->s_active);
+- ihold(ptmx_inode);
++ return fsi;
+ }
+
+-void devpts_del_ref(struct inode *ptmx_inode)
++void devpts_put_ref(struct pts_fs_info *fsi)
+ {
+- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+-
+- iput(ptmx_inode);
+- deactivate_super(sb);
++ deactivate_super(fsi->sb);
+ }
+
+ /**
+@@ -604,22 +604,21 @@ void devpts_del_ref(struct inode *ptmx_inode)
+ *
+ * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill.
+ */
+-struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
++struct inode *devpts_pty_new(struct pts_fs_info *fsi, dev_t device, int index,
+ void *priv)
+ {
+ struct dentry *dentry;
+- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
++ struct super_block *sb;
+ struct inode *inode;
+ struct dentry *root;
+- struct pts_fs_info *fsi;
+ struct pts_mount_opts *opts;
+ char s[12];
+
+- if (!sb)
++ if (!fsi)
+ return ERR_PTR(-ENODEV);
+
++ sb = fsi->sb;
+ root = sb->s_root;
+- fsi = DEVPTS_SB(sb);
+ opts = &fsi->mount_opts;
+
+ inode = new_inode(sb);
+diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
+index fe1f50fe764f..f97110461c19 100644
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -208,6 +208,9 @@ static int ext4_init_block_bitmap(struct super_block *sb,
+ memset(bh->b_data, 0, sb->s_blocksize);
+
+ bit_max = ext4_num_base_meta_clusters(sb, block_group);
++ if ((bit_max >> 3) >= bh->b_size)
++ return -EFSCORRUPTED;
++
+ for (bit = 0; bit < bit_max; bit++)
+ ext4_set_bit(bit, bh->b_data);
+
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 62880586ed85..8eac7d586997 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -376,9 +376,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
+ ext4_fsblk_t block = ext4_ext_pblock(ext);
+ int len = ext4_ext_get_actual_len(ext);
+ ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
+- ext4_lblk_t last = lblock + len - 1;
+
+- if (len == 0 || lblock > last)
++ /*
++ * We allow neither:
++ * - zero length
++ * - overflow/wrap-around
++ */
++ if (lblock + len <= lblock)
+ return 0;
+ return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
+ }
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index e31d762eedce..9a5ad0f0d3ed 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -205,9 +205,9 @@ void ext4_evict_inode(struct inode *inode)
+ * Note that directories do not have this problem because they
+ * don't use page cache.
+ */
+- if (ext4_should_journal_data(inode) &&
+- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
+- inode->i_ino != EXT4_JOURNAL_INO) {
++ if (inode->i_ino != EXT4_JOURNAL_INO &&
++ ext4_should_journal_data(inode) &&
++ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+ tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
+
+@@ -2589,13 +2589,36 @@ retry:
+ done = true;
+ }
+ }
+- ext4_journal_stop(handle);
++ /*
++ * Caution: If the handle is synchronous,
++ * ext4_journal_stop() can wait for transaction commit
++ * to finish which may depend on writeback of pages to
++ * complete or on page lock to be released. In that
++ * case, we have to wait until after after we have
++ * submitted all the IO, released page locks we hold,
++ * and dropped io_end reference (for extent conversion
++ * to be able to complete) before stopping the handle.
++ */
++ if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
++ ext4_journal_stop(handle);
++ handle = NULL;
++ }
+ /* Submit prepared bio */
+ ext4_io_submit(&mpd.io_submit);
+ /* Unlock pages we didn't use */
+ mpage_release_unused_pages(&mpd, give_up_on_write);
+- /* Drop our io_end reference we got from init */
+- ext4_put_io_end(mpd.io_submit.io_end);
++ /*
++ * Drop our io_end reference we got from init. We have
++ * to be careful and use deferred io_end finishing if
++ * we are still holding the transaction as we can
++ * release the last reference to io_end which may end
++ * up doing unwritten extent conversion.
++ */
++ if (handle) {
++ ext4_put_io_end_defer(mpd.io_submit.io_end);
++ ext4_journal_stop(handle);
++ } else
++ ext4_put_io_end(mpd.io_submit.io_end);
+
+ if (ret == -ENOSPC && sbi->s_journal) {
+ /*
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index cf734170daa9..c4dcac8a018d 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2932,7 +2932,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+ ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
+ "fs metadata", block, block+len);
+ /* File system mounted not to panic on error
+- * Fix the bitmap and repeat the block allocation
++ * Fix the bitmap and return EFSCORRUPTED
+ * We leak some of the blocks here.
+ */
+ ext4_lock_group(sb, ac->ac_b_ex.fe_group);
+@@ -2941,7 +2941,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+ ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
+ if (!err)
+- err = -EAGAIN;
++ err = -EFSCORRUPTED;
+ goto out_err;
+ }
+
+@@ -4506,18 +4506,7 @@ repeat:
+ }
+ if (likely(ac->ac_status == AC_STATUS_FOUND)) {
+ *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
+- if (*errp == -EAGAIN) {
+- /*
+- * drop the reference that we took
+- * in ext4_mb_use_best_found
+- */
+- ext4_mb_release_context(ac);
+- ac->ac_b_ex.fe_group = 0;
+- ac->ac_b_ex.fe_start = 0;
+- ac->ac_b_ex.fe_len = 0;
+- ac->ac_status = AC_STATUS_CONTINUE;
+- goto repeat;
+- } else if (*errp) {
++ if (*errp) {
+ ext4_discard_allocated_blocks(ac);
+ goto errout;
+ } else {
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 852c26806af2..c542ebcf7a92 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2240,6 +2240,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
+ while (es->s_last_orphan) {
+ struct inode *inode;
+
++ /*
++ * We may have encountered an error during cleanup; if
++ * so, skip the rest.
++ */
++ if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
++ jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
++ es->s_last_orphan = 0;
++ break;
++ }
++
+ inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
+ if (IS_ERR(inode)) {
+ es->s_last_orphan = 0;
+@@ -3372,6 +3382,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ goto failed_mount;
+ }
+
++ if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
++ ext4_msg(sb, KERN_ERR,
++ "Number of reserved GDT blocks insanely large: %d",
++ le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
++ goto failed_mount;
++ }
++
+ if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+ if (blocksize != PAGE_SIZE) {
+ ext4_msg(sb, KERN_ERR,
+diff --git a/fs/fuse/file.c b/fs/fuse/file.c
+index c2e340d6ec6e..d58d4c0af0ce 100644
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -417,6 +417,15 @@ static int fuse_flush(struct file *file, fl_owner_t id)
+ fuse_sync_writes(inode);
+ mutex_unlock(&inode->i_mutex);
+
++ if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
++ test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
++ err = -ENOSPC;
++ if (test_bit(AS_EIO, &file->f_mapping->flags) &&
++ test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
++ err = -EIO;
++ if (err)
++ return err;
++
+ req = fuse_get_req_nofail_nopages(fc, file);
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+@@ -462,6 +471,21 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
+ goto out;
+
+ fuse_sync_writes(inode);
++
++ /*
++ * Due to implementation of fuse writeback
++ * filemap_write_and_wait_range() does not catch errors.
++ * We have to do this directly after fuse_sync_writes()
++ */
++ if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
++ test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
++ err = -ENOSPC;
++ if (test_bit(AS_EIO, &file->f_mapping->flags) &&
++ test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
++ err = -EIO;
++ if (err)
++ goto out;
++
+ err = sync_inode_metadata(inode, 1);
+ if (err)
+ goto out;
+diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
+index 2913db2a5b99..0d5e8e59b390 100644
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -926,7 +926,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
+ arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
+ FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
+ FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
+- FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
++ FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
+ FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
+ FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
+ req->in.h.opcode = FUSE_INIT;
+diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
+index e0ee0b3000b2..358a4db72a27 100644
+--- a/include/linux/devpts_fs.h
++++ b/include/linux/devpts_fs.h
+@@ -15,38 +15,24 @@
+
+ #include <linux/errno.h>
+
++struct pts_fs_info;
++
+ #ifdef CONFIG_UNIX98_PTYS
+
+-int devpts_new_index(struct inode *ptmx_inode);
+-void devpts_kill_index(struct inode *ptmx_inode, int idx);
+-void devpts_add_ref(struct inode *ptmx_inode);
+-void devpts_del_ref(struct inode *ptmx_inode);
++/* Look up a pts fs info and get a ref to it */
++struct pts_fs_info *devpts_get_ref(struct inode *, struct file *);
++void devpts_put_ref(struct pts_fs_info *);
++
++int devpts_new_index(struct pts_fs_info *);
++void devpts_kill_index(struct pts_fs_info *, int);
++
+ /* mknod in devpts */
+-struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
+- void *priv);
++struct inode *devpts_pty_new(struct pts_fs_info *, dev_t, int, void *);
+ /* get private structure */
+ void *devpts_get_priv(struct inode *pts_inode);
+ /* unlink */
+ void devpts_pty_kill(struct inode *inode);
+
+-#else
+-
+-/* Dummy stubs in the no-pty case */
+-static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; }
+-static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { }
+-static inline void devpts_add_ref(struct inode *ptmx_inode) { }
+-static inline void devpts_del_ref(struct inode *ptmx_inode) { }
+-static inline struct inode *devpts_pty_new(struct inode *ptmx_inode,
+- dev_t device, int index, void *priv)
+-{
+- return ERR_PTR(-EINVAL);
+-}
+-static inline void *devpts_get_priv(struct inode *pts_inode)
+-{
+- return NULL;
+-}
+-static inline void devpts_pty_kill(struct inode *inode) { }
+-
+ #endif
+
+
+diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
+index cd0e2413c358..435fd8426b8a 100644
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -174,6 +174,11 @@ struct mem_cgroup_thresholds {
+ struct mem_cgroup_threshold_ary *spare;
+ };
+
++struct mem_cgroup_id {
++ int id;
++ atomic_t ref;
++};
++
+ /*
+ * The memory controller data structure. The memory controller controls both
+ * page cache and RSS per cgroup. We would eventually like to provide
+@@ -183,6 +188,9 @@ struct mem_cgroup_thresholds {
+ struct mem_cgroup {
+ struct cgroup_subsys_state css;
+
++ /* Private memcg ID. Used to ID objects that outlive the cgroup */
++ struct mem_cgroup_id id;
++
+ /* Accounted resources */
+ struct page_counter memory;
+ struct page_counter memsw;
+diff --git a/ipc/msg.c b/ipc/msg.c
+index 1471db9a7e61..c6521c205cb4 100644
+--- a/ipc/msg.c
++++ b/ipc/msg.c
+@@ -680,7 +680,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
+ rcu_read_lock();
+ ipc_lock_object(&msq->q_perm);
+
+- ipc_rcu_putref(msq, ipc_rcu_free);
++ ipc_rcu_putref(msq, msg_rcu_free);
+ /* raced with RMID? */
+ if (!ipc_valid_object(&msq->q_perm)) {
+ err = -EIDRM;
+diff --git a/ipc/sem.c b/ipc/sem.c
+index b471e5a3863d..20d07008ad5e 100644
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -442,7 +442,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
+ static inline void sem_lock_and_putref(struct sem_array *sma)
+ {
+ sem_lock(sma, NULL, -1);
+- ipc_rcu_putref(sma, ipc_rcu_free);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ }
+
+ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
+@@ -1385,7 +1385,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
+ rcu_read_unlock();
+ sem_io = ipc_alloc(sizeof(ushort)*nsems);
+ if (sem_io == NULL) {
+- ipc_rcu_putref(sma, ipc_rcu_free);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ return -ENOMEM;
+ }
+
+@@ -1419,20 +1419,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
+ if (nsems > SEMMSL_FAST) {
+ sem_io = ipc_alloc(sizeof(ushort)*nsems);
+ if (sem_io == NULL) {
+- ipc_rcu_putref(sma, ipc_rcu_free);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ return -ENOMEM;
+ }
+ }
+
+ if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
+- ipc_rcu_putref(sma, ipc_rcu_free);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ err = -EFAULT;
+ goto out_free;
+ }
+
+ for (i = 0; i < nsems; i++) {
+ if (sem_io[i] > SEMVMX) {
+- ipc_rcu_putref(sma, ipc_rcu_free);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ err = -ERANGE;
+ goto out_free;
+ }
+@@ -1722,7 +1722,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
+ /* step 2: allocate new undo structure */
+ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
+ if (!new) {
+- ipc_rcu_putref(sma, ipc_rcu_free);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ return ERR_PTR(-ENOMEM);
+ }
+
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index 67648e6b2ac8..6b90d184e9c0 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -272,21 +272,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
+
+ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
+ {
+- return memcg->css.id;
+-}
+-
+-/*
+- * A helper function to get mem_cgroup from ID. must be called under
+- * rcu_read_lock(). The caller is responsible for calling
+- * css_tryget_online() if the mem_cgroup is used for charging. (dropping
+- * refcnt from swap can be called against removed memcg.)
+- */
+-static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
+-{
+- struct cgroup_subsys_state *css;
+-
+- css = css_from_id(id, &memory_cgrp_subsys);
+- return mem_cgroup_from_css(css);
++ return memcg->id.id;
+ }
+
+ /* Writing them here to avoid exposing memcg's inner layout */
+@@ -4124,6 +4110,88 @@ static struct cftype mem_cgroup_legacy_files[] = {
+ { }, /* terminate */
+ };
+
++/*
++ * Private memory cgroup IDR
++ *
++ * Swap-out records and page cache shadow entries need to store memcg
++ * references in constrained space, so we maintain an ID space that is
++ * limited to 16 bit (MEM_CGROUP_ID_MAX), limiting the total number of
++ * memory-controlled cgroups to 64k.
++ *
++ * However, there usually are many references to the oflline CSS after
++ * the cgroup has been destroyed, such as page cache or reclaimable
++ * slab objects, that don't need to hang on to the ID. We want to keep
++ * those dead CSS from occupying IDs, or we might quickly exhaust the
++ * relatively small ID space and prevent the creation of new cgroups
++ * even when there are much fewer than 64k cgroups - possibly none.
++ *
++ * Maintain a private 16-bit ID space for memcg, and allow the ID to
++ * be freed and recycled when it's no longer needed, which is usually
++ * when the CSS is offlined.
++ *
++ * The only exception to that are records of swapped out tmpfs/shmem
++ * pages that need to be attributed to live ancestors on swapin. But
++ * those references are manageable from userspace.
++ */
++
++static DEFINE_IDR(mem_cgroup_idr);
++
++static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
++{
++ atomic_add(n, &memcg->id.ref);
++}
++
++static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
++{
++ while (!atomic_inc_not_zero(&memcg->id.ref)) {
++ /*
++ * The root cgroup cannot be destroyed, so it's refcount must
++ * always be >= 1.
++ */
++ if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
++ VM_BUG_ON(1);
++ break;
++ }
++ memcg = parent_mem_cgroup(memcg);
++ if (!memcg)
++ memcg = root_mem_cgroup;
++ }
++ return memcg;
++}
++
++static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
++{
++ if (atomic_sub_and_test(n, &memcg->id.ref)) {
++ idr_remove(&mem_cgroup_idr, memcg->id.id);
++ memcg->id.id = 0;
++
++ /* Memcg ID pins CSS */
++ css_put(&memcg->css);
++ }
++}
++
++static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
++{
++ mem_cgroup_id_get_many(memcg, 1);
++}
++
++static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
++{
++ mem_cgroup_id_put_many(memcg, 1);
++}
++
++/**
++ * mem_cgroup_from_id - look up a memcg from a memcg id
++ * @id: the memcg id to look up
++ *
++ * Caller must hold rcu_read_lock().
++ */
++struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
++{
++ WARN_ON_ONCE(!rcu_read_lock_held());
++ return idr_find(&mem_cgroup_idr, id);
++}
++
+ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
+ {
+ struct mem_cgroup_per_node *pn;
+@@ -4178,6 +4246,12 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
+ if (memcg_wb_domain_init(memcg, GFP_KERNEL))
+ goto out_free_stat;
+
++ memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
++ 1, MEM_CGROUP_ID_MAX,
++ GFP_KERNEL);
++ if (memcg->id.id < 0)
++ goto out_free_stat;
++
+ return memcg;
+
+ out_free_stat:
+@@ -4263,9 +4337,11 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
+ #ifdef CONFIG_CGROUP_WRITEBACK
+ INIT_LIST_HEAD(&memcg->cgwb_list);
+ #endif
++ idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+ return &memcg->css;
+
+ free_out:
++ idr_remove(&mem_cgroup_idr, memcg->id.id);
+ __mem_cgroup_free(memcg);
+ return ERR_PTR(error);
+ }
+@@ -4277,8 +4353,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
+ struct mem_cgroup *parent = mem_cgroup_from_css(css->parent);
+ int ret;
+
+- if (css->id > MEM_CGROUP_ID_MAX)
+- return -ENOSPC;
++ /* Online state pins memcg ID, memcg ID pins CSS */
++ mem_cgroup_id_get(mem_cgroup_from_css(css));
++ css_get(css);
+
+ if (!parent)
+ return 0;
+@@ -4352,6 +4429,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
+ memcg_deactivate_kmem(memcg);
+
+ wb_memcg_offline(memcg);
++
++ mem_cgroup_id_put(memcg);
+ }
+
+ static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
+@@ -4785,6 +4864,8 @@ static void __mem_cgroup_clear_mc(void)
+ if (!mem_cgroup_is_root(mc.from))
+ page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
+
++ mem_cgroup_id_put_many(mc.from, mc.moved_swap);
++
+ /*
+ * we charged both to->memory and to->memsw, so we
+ * should uncharge to->memory.
+@@ -4792,9 +4873,9 @@ static void __mem_cgroup_clear_mc(void)
+ if (!mem_cgroup_is_root(mc.to))
+ page_counter_uncharge(&mc.to->memory, mc.moved_swap);
+
+- css_put_many(&mc.from->css, mc.moved_swap);
++ mem_cgroup_id_get_many(mc.to, mc.moved_swap);
++ css_put_many(&mc.to->css, mc.moved_swap);
+
+- /* we've already done css_get(mc.to) */
+ mc.moved_swap = 0;
+ }
+ memcg_oom_recover(from);
+@@ -5670,7 +5751,7 @@ subsys_initcall(mem_cgroup_init);
+ */
+ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ {
+- struct mem_cgroup *memcg;
++ struct mem_cgroup *memcg, *swap_memcg;
+ unsigned short oldid;
+
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+@@ -5685,15 +5766,27 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ if (!memcg)
+ return;
+
+- oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
++ /*
++ * In case the memcg owning these pages has been offlined and doesn't
++ * have an ID allocated to it anymore, charge the closest online
++ * ancestor for the swap instead and transfer the memory+swap charge.
++ */
++ swap_memcg = mem_cgroup_id_get_online(memcg);
++ oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
+ VM_BUG_ON_PAGE(oldid, page);
+- mem_cgroup_swap_statistics(memcg, true);
++ mem_cgroup_swap_statistics(swap_memcg, true);
+
+ page->mem_cgroup = NULL;
+
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memory, 1);
+
++ if (memcg != swap_memcg) {
++ if (!mem_cgroup_is_root(swap_memcg))
++ page_counter_charge(&swap_memcg->memsw, 1);
++ page_counter_uncharge(&memcg->memsw, 1);
++ }
++
+ /*
+ * Interrupts should be disabled here because the caller holds the
+ * mapping->tree_lock lock which is taken with interrupts-off. It is
+@@ -5703,6 +5796,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ VM_BUG_ON(!irqs_disabled());
+ mem_cgroup_charge_statistics(memcg, page, -1);
+ memcg_check_events(memcg, page);
++
++ if (!mem_cgroup_is_root(memcg))
++ css_put(&memcg->css);
+ }
+
+ /**
+@@ -5726,7 +5822,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memsw, 1);
+ mem_cgroup_swap_statistics(memcg, false);
+- css_put(&memcg->css);
++ mem_cgroup_id_put(memcg);
+ }
+ rcu_read_unlock();
+ }
+diff --git a/mm/slab_common.c b/mm/slab_common.c
+index 3c6a86b4ec25..bec2fce9fafc 100644
+--- a/mm/slab_common.c
++++ b/mm/slab_common.c
+@@ -521,8 +521,8 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
+ goto out_unlock;
+
+ cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
+- cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
+- css->id, memcg_name_buf);
++ cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
++ css->serial_nr, memcg_name_buf);
+ if (!cache_name)
+ goto out_unlock;
+
+diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
+index 2b68418c7198..ffe95d954007 100644
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -479,6 +479,9 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
+ if (!rtnh_ok(rtnh, remaining))
+ return -EINVAL;
+
++ if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
++ return -EINVAL;
++
+ nexthop_nh->nh_flags =
+ (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
+ nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
+@@ -1003,6 +1006,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
+ if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
+ goto err_inval;
+
++ if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
++ goto err_inval;
++
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (cfg->fc_mp) {
+ nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index d4c51158470f..12b98e257c5f 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -89,7 +89,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1;
+ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
+
+ /* rfc5961 challenge ack rate limiting */
+-int sysctl_tcp_challenge_ack_limit = 100;
++int sysctl_tcp_challenge_ack_limit = 1000;
+
+ int sysctl_tcp_stdurg __read_mostly;
+ int sysctl_tcp_rfc1337 __read_mostly;
+@@ -3390,6 +3390,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
+ return flag;
+ }
+
++static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
++ u32 *last_oow_ack_time)
++{
++ if (*last_oow_ack_time) {
++ s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
++
++ if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
++ NET_INC_STATS_BH(net, mib_idx);
++ return true; /* rate-limited: don't send yet! */
++ }
++ }
++
++ *last_oow_ack_time = tcp_time_stamp;
++
++ return false; /* not rate-limited: go ahead, send dupack now! */
++}
++
+ /* Return true if we're currently rate-limiting out-of-window ACKs and
+ * thus shouldn't send a dupack right now. We rate-limit dupacks in
+ * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
+@@ -3403,21 +3420,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
+ /* Data packets without SYNs are not likely part of an ACK loop. */
+ if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
+ !tcp_hdr(skb)->syn)
+- goto not_rate_limited;
+-
+- if (*last_oow_ack_time) {
+- s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+-
+- if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+- NET_INC_STATS_BH(net, mib_idx);
+- return true; /* rate-limited: don't send yet! */
+- }
+- }
+-
+- *last_oow_ack_time = tcp_time_stamp;
++ return false;
+
+-not_rate_limited:
+- return false; /* not rate-limited: go ahead, send dupack now! */
++ return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
+ }
+
+ /* RFC 5961 7 [ACK Throttling] */
+@@ -3427,21 +3432,26 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
+ static u32 challenge_timestamp;
+ static unsigned int challenge_count;
+ struct tcp_sock *tp = tcp_sk(sk);
+- u32 now;
++ u32 count, now;
+
+ /* First check our per-socket dupack rate limit. */
+- if (tcp_oow_rate_limited(sock_net(sk), skb,
+- LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
+- &tp->last_oow_ack_time))
++ if (__tcp_oow_rate_limited(sock_net(sk),
++ LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
++ &tp->last_oow_ack_time))
+ return;
+
+- /* Then check the check host-wide RFC 5961 rate limit. */
++ /* Then check host-wide RFC 5961 rate limit. */
+ now = jiffies / HZ;
+ if (now != challenge_timestamp) {
++ u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
++
+ challenge_timestamp = now;
+- challenge_count = 0;
++ WRITE_ONCE(challenge_count, half +
++ prandom_u32_max(sysctl_tcp_challenge_ack_limit));
+ }
+- if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
++ count = READ_ONCE(challenge_count);
++ if (count > 0) {
++ WRITE_ONCE(challenge_count, count - 1);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+ tcp_send_ack(sk);
+ }
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 7c9883ab56e5..660c967ba84a 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -239,7 +239,8 @@ void tcp_select_initial_window(int __space, __u32 mss,
+ /* Set window scaling on max possible window
+ * See RFC1323 for an explanation of the limit to 14
+ */
+- space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
++ space = max_t(u32, space, sysctl_tcp_rmem[2]);
++ space = max_t(u32, space, sysctl_rmem_max);
+ space = min_t(u32, space, *window_clamp);
+ while (space > 65535 && (*rcv_wscale) < 14) {
+ space >>= 1;
+diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
+index 923abd6b3064..8d2f7c9b491d 100644
+--- a/net/irda/af_irda.c
++++ b/net/irda/af_irda.c
+@@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
+ }
+
+ /* Check if we have opened a local TSAP */
+- if (!self->tsap)
+- irda_open_tsap(self, LSAP_ANY, addr->sir_name);
++ if (!self->tsap) {
++ err = irda_open_tsap(self, LSAP_ANY, addr->sir_name);
++ if (err)
++ goto out;
++ }
+
+ /* Move to connecting socket, start sending Connect Requests */
+ sock->state = SS_CONNECTING;
+diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
+index ad4fa49ad1db..9068369f8a1b 100644
+--- a/security/apparmor/apparmorfs.c
++++ b/security/apparmor/apparmorfs.c
+@@ -331,6 +331,7 @@ static int aa_fs_seq_hash_show(struct seq_file *seq, void *v)
+ seq_printf(seq, "%.2x", profile->hash[i]);
+ seq_puts(seq, "\n");
+ }
++ aa_put_profile(profile);
+
+ return 0;
+ }