Linux patch 4.19.1724.19-171

Signed-off-by: Alice Ferrazzi <alicef@gentoo.org>
author: Alice Ferrazzi <alicef@gentoo.org> 2021-01-30 22:33:01 +0900
committer: Alice Ferrazzi <alicef@gentoo.org> 2021-01-30 22:33:38 +0900
commit: b59ee32ab3065145d58b65bb7895cca32a510ec5 (patch)
tree: 76d5f03eb34a1411952f004800372048eb42c847
parent: Linux patch 4.19.171 (diff)
download: linux-patches-b59ee32ab3065145d58b65bb7895cca32a510ec5.tar.gz
linux-patches-b59ee32ab3065145d58b65bb7895cca32a510ec5.tar.bz2
linux-patches-b59ee32ab3065145d58b65bb7895cca32a510ec5.zip
2 files changed, 1610 insertions, 0 deletions
diff --git a/0000_README b/0000_README
index 1c1a3723..a202ba15 100644
--- a/0000_README
+++ b/0000_README
@@ -723,6 +723,10 @@ Patch:  1170_linux-4.19.171.patch
 From:   https://www.kernel.org
 Desc:   Linux 4.19.171
 
+Patch:  1171_linux-4.19.172.patch
+From:   https://www.kernel.org
+Desc:   Linux 4.19.172
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.
diff --git a/1171_linux-4.19.172.patch b/1171_linux-4.19.172.patch
new file mode 100644
index 00000000..fc24ced3
--- /dev/null
+++ b/1171_linux-4.19.172.patch
@@ -0,0 +1,1606 @@
+diff --git a/Documentation/device-mapper/dm-integrity.txt b/Documentation/device-mapper/dm-integrity.txt
+index 297251b0d2d57..bf6af2ade0a67 100644
+--- a/Documentation/device-mapper/dm-integrity.txt
++++ b/Documentation/device-mapper/dm-integrity.txt
+@@ -146,6 +146,13 @@ block_size:number
+ 	Supported values are 512, 1024, 2048 and 4096 bytes.  If not
+ 	specified the default block size is 512 bytes.
+ 
++legacy_recalculate
++	Allow recalculating of volumes with HMAC keys. This is disabled by
++	default for security reasons - an attacker could modify the volume,
++	set recalc_sector to zero, and the kernel would not detect the
++	modification.
++
++
+ The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
+ be changed when reloading the target (load an inactive table and swap the
+ tables with suspend and resume). The other arguments should not be changed
+diff --git a/Makefile b/Makefile
+index 335b015c5c9ba..7da0ddd650521 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 4
+ PATCHLEVEL = 19
+-SUBLEVEL = 171
++SUBLEVEL = 172
+ EXTRAVERSION =
+ NAME = "People's Front"
+ 
+diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
+index 3b78dcda47364..874caed723905 100644
+--- a/drivers/gpio/gpio-mvebu.c
++++ b/drivers/gpio/gpio-mvebu.c
+@@ -650,9 +650,8 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
+ 
+ 	spin_lock_irqsave(&mvpwm->lock, flags);
+ 
+-	val = (unsigned long long)
+-		readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm));
+-	val *= NSEC_PER_SEC;
++	u = readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm));
++	val = (unsigned long long) u * NSEC_PER_SEC;
+ 	do_div(val, mvpwm->clk_rate);
+ 	if (val > UINT_MAX)
+ 		state->duty_cycle = UINT_MAX;
+@@ -661,21 +660,17 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
+ 	else
+ 		state->duty_cycle = 1;
+ 
+-	val = (unsigned long long)
+-		readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm));
++	val = (unsigned long long) u; /* on duration */
++	/* period = on + off duration */
++	val += readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm));
+ 	val *= NSEC_PER_SEC;
+ 	do_div(val, mvpwm->clk_rate);
+-	if (val < state->duty_cycle) {
++	if (val > UINT_MAX)
++		state->period = UINT_MAX;
++	else if (val)
++		state->period = val;
++	else
+ 		state->period = 1;
+-	} else {
+-		val -= state->duty_cycle;
+-		if (val > UINT_MAX)
+-			state->period = UINT_MAX;
+-		else if (val)
+-			state->period = val;
+-		else
+-			state->period = 1;
+-	}
+ 
+ 	regmap_read(mvchip->regs, GPIO_BLINK_EN_OFF + mvchip->offset, &u);
+ 	if (u)
+diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
+index 523014f2c0eb2..8006732b8f424 100644
+--- a/drivers/hid/wacom_sys.c
++++ b/drivers/hid/wacom_sys.c
+@@ -150,9 +150,9 @@ static int wacom_wac_pen_serial_enforce(struct hid_device *hdev,
+ 	}
+ 
+ 	if (flush)
+-		wacom_wac_queue_flush(hdev, &wacom_wac->pen_fifo);
++		wacom_wac_queue_flush(hdev, wacom_wac->pen_fifo);
+ 	else if (insert)
+-		wacom_wac_queue_insert(hdev, &wacom_wac->pen_fifo,
++		wacom_wac_queue_insert(hdev, wacom_wac->pen_fifo,
+ 				       raw_data, report_size);
+ 
+ 	return insert && !flush;
+@@ -1251,7 +1251,7 @@ static void wacom_devm_kfifo_release(struct device *dev, void *res)
+ static int wacom_devm_kfifo_alloc(struct wacom *wacom)
+ {
+ 	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+-	struct kfifo_rec_ptr_2 *pen_fifo = &wacom_wac->pen_fifo;
++	struct kfifo_rec_ptr_2 *pen_fifo;
+ 	int error;
+ 
+ 	pen_fifo = devres_alloc(wacom_devm_kfifo_release,
+@@ -1268,6 +1268,7 @@ static int wacom_devm_kfifo_alloc(struct wacom *wacom)
+ 	}
+ 
+ 	devres_add(&wacom->hdev->dev, pen_fifo);
++	wacom_wac->pen_fifo = pen_fifo;
+ 
+ 	return 0;
+ }
+diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h
+index f67d871841c0c..46da97162ef43 100644
+--- a/drivers/hid/wacom_wac.h
++++ b/drivers/hid/wacom_wac.h
+@@ -344,7 +344,7 @@ struct wacom_wac {
+ 	struct input_dev *pen_input;
+ 	struct input_dev *touch_input;
+ 	struct input_dev *pad_input;
+-	struct kfifo_rec_ptr_2 pen_fifo;
++	struct kfifo_rec_ptr_2 *pen_fifo;
+ 	int pid;
+ 	int num_contacts_left;
+ 	u8 bt_features;
+diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
+index 1917051b512f5..cffd423172726 100644
+--- a/drivers/md/dm-integrity.c
++++ b/drivers/md/dm-integrity.c
+@@ -240,6 +240,7 @@ struct dm_integrity_c {
+ 
+ 	bool journal_uptodate;
+ 	bool just_formatted;
++	bool legacy_recalculate;
+ 
+ 	struct alg_spec internal_hash_alg;
+ 	struct alg_spec journal_crypt_alg;
+@@ -345,6 +346,14 @@ static int dm_integrity_failed(struct dm_integrity_c *ic)
+ 	return READ_ONCE(ic->failed);
+ }
+ 
++static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic)
++{
++	if ((ic->internal_hash_alg.key || ic->journal_mac_alg.key) &&
++	    !ic->legacy_recalculate)
++		return true;
++	return false;
++}
++
+ static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i,
+ 					  unsigned j, unsigned char seq)
+ {
+@@ -2503,6 +2512,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
+ 		arg_count += !!ic->internal_hash_alg.alg_string;
+ 		arg_count += !!ic->journal_crypt_alg.alg_string;
+ 		arg_count += !!ic->journal_mac_alg.alg_string;
++		arg_count += ic->legacy_recalculate;
+ 		DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start,
+ 		       ic->tag_size, ic->mode, arg_count);
+ 		if (ic->meta_dev)
+@@ -2516,6 +2526,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
+ 		DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
+ 		DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
+ 		DMEMIT(" commit_time:%u", ic->autocommit_msec);
++		if (ic->legacy_recalculate)
++			DMEMIT(" legacy_recalculate");
+ 
+ #define EMIT_ALG(a, n)							\
+ 		do {							\
+@@ -3118,7 +3130,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
+ 	unsigned extra_args;
+ 	struct dm_arg_set as;
+ 	static const struct dm_arg _args[] = {
+-		{0, 15, "Invalid number of feature args"},
++		{0, 12, "Invalid number of feature args"},
+ 	};
+ 	unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
+ 	bool recalculate;
+@@ -3248,6 +3260,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
+ 				goto bad;
+ 		} else if (!strcmp(opt_string, "recalculate")) {
+ 			recalculate = true;
++		} else if (!strcmp(opt_string, "legacy_recalculate")) {
++			ic->legacy_recalculate = true;
+ 		} else {
+ 			r = -EINVAL;
+ 			ti->error = "Invalid argument";
+@@ -3523,6 +3537,14 @@ try_smaller_buffer:
+ 		}
+ 	}
+ 
++	if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
++	    le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors &&
++	    dm_integrity_disable_recalculate(ic)) {
++		ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\"";
++		r = -EOPNOTSUPP;
++		goto bad;
++	}
++
+ 	ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
+ 			1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL);
+ 	if (IS_ERR(ic->bufio)) {
+diff --git a/fs/exec.c b/fs/exec.c
+index 52788644c4af2..6eea921a7e72f 100644
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1011,7 +1011,7 @@ static int exec_mmap(struct mm_struct *mm)
+ 	/* Notify parent that we're no longer interested in the old VM */
+ 	tsk = current;
+ 	old_mm = current->mm;
+-	mm_release(tsk, old_mm);
++	exec_mm_release(tsk, old_mm);
+ 
+ 	if (old_mm) {
+ 		sync_mm_rss(old_mm);
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index b2a9c746f8ce4..edeb837081c80 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5209,7 +5209,7 @@ static int other_inode_match(struct inode * inode, unsigned long ino,
+ 	    (inode->i_state & I_DIRTY_TIME)) {
+ 		struct ext4_inode_info	*ei = EXT4_I(inode);
+ 
+-		inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
++		inode->i_state &= ~I_DIRTY_TIME;
+ 		spin_unlock(&inode->i_lock);
+ 
+ 		spin_lock(&ei->i_raw_lock);
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 15216b440880a..f2d0c4acb3cbb 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -1157,7 +1157,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
+  */
+ static int move_expired_inodes(struct list_head *delaying_queue,
+ 			       struct list_head *dispatch_queue,
+-			       int flags, unsigned long dirtied_before)
++			       unsigned long dirtied_before)
+ {
+ 	LIST_HEAD(tmp);
+ 	struct list_head *pos, *node;
+@@ -1173,8 +1173,6 @@ static int move_expired_inodes(struct list_head *delaying_queue,
+ 		list_move(&inode->i_io_list, &tmp);
+ 		moved++;
+ 		spin_lock(&inode->i_lock);
+-		if (flags & EXPIRE_DIRTY_ATIME)
+-			inode->i_state |= I_DIRTY_TIME_EXPIRED;
+ 		inode->i_state |= I_SYNC_QUEUED;
+ 		spin_unlock(&inode->i_lock);
+ 		if (sb_is_blkdev_sb(inode->i_sb))
+@@ -1222,11 +1220,11 @@ static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
+ 
+ 	assert_spin_locked(&wb->list_lock);
+ 	list_splice_init(&wb->b_more_io, &wb->b_io);
+-	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, dirtied_before);
++	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before);
+ 	if (!work->for_sync)
+ 		time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
+ 	moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
+-				     EXPIRE_DIRTY_ATIME, time_expire_jif);
++				     time_expire_jif);
+ 	if (moved)
+ 		wb_io_lists_populated(wb);
+ 	trace_writeback_queue_io(wb, work, dirtied_before, moved);
+@@ -1394,26 +1392,26 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
+ 			ret = err;
+ 	}
+ 
++	/*
++	 * If the inode has dirty timestamps and we need to write them, call
++	 * mark_inode_dirty_sync() to notify the filesystem about it and to
++	 * change I_DIRTY_TIME into I_DIRTY_SYNC.
++	 */
++	if ((inode->i_state & I_DIRTY_TIME) &&
++	    (wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
++	     time_after(jiffies, inode->dirtied_time_when +
++			dirtytime_expire_interval * HZ))) {
++		trace_writeback_lazytime(inode);
++		mark_inode_dirty_sync(inode);
++	}
++
+ 	/*
+ 	 * Some filesystems may redirty the inode during the writeback
+ 	 * due to delalloc, clear dirty metadata flags right before
+ 	 * write_inode()
+ 	 */
+ 	spin_lock(&inode->i_lock);
+-
+ 	dirty = inode->i_state & I_DIRTY;
+-	if (inode->i_state & I_DIRTY_TIME) {
+-		if ((dirty & I_DIRTY_INODE) ||
+-		    wbc->sync_mode == WB_SYNC_ALL ||
+-		    unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
+-		    unlikely(time_after(jiffies,
+-					(inode->dirtied_time_when +
+-					 dirtytime_expire_interval * HZ)))) {
+-			dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
+-			trace_writeback_lazytime(inode);
+-		}
+-	} else
+-		inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
+ 	inode->i_state &= ~dirty;
+ 
+ 	/*
+@@ -1434,8 +1432,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
+ 
+ 	spin_unlock(&inode->i_lock);
+ 
+-	if (dirty & I_DIRTY_TIME)
+-		mark_inode_dirty_sync(inode);
+ 	/* Don't write the inode if only I_DIRTY_PAGES was set */
+ 	if (dirty & ~I_DIRTY_PAGES) {
+ 		int err = write_inode(inode, wbc);
+diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
+index ae453dd236a69..6fcdf7e449fe7 100644
+--- a/fs/xfs/xfs_trans_inode.c
++++ b/fs/xfs/xfs_trans_inode.c
+@@ -99,9 +99,9 @@ xfs_trans_log_inode(
+ 	 * to log the timestamps, or will clear already cleared fields in the
+ 	 * worst case.
+ 	 */
+-	if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
++	if (inode->i_state & I_DIRTY_TIME) {
+ 		spin_lock(&inode->i_lock);
+-		inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
++		inode->i_state &= ~I_DIRTY_TIME;
+ 		spin_unlock(&inode->i_lock);
+ 	}
+ 
+diff --git a/include/linux/compat.h b/include/linux/compat.h
+index de0c13bdcd2c7..189d0e111d57d 100644
+--- a/include/linux/compat.h
++++ b/include/linux/compat.h
+@@ -445,8 +445,6 @@ struct compat_kexec_segment;
+ struct compat_mq_attr;
+ struct compat_msgbuf;
+ 
+-extern void compat_exit_robust_list(struct task_struct *curr);
+-
+ #define BITS_PER_COMPAT_LONG    (8*sizeof(compat_long_t))
+ 
+ #define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG)
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 876bfb6df06a9..b6a955ba6173a 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2071,7 +2071,6 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+ #define I_DIO_WAKEUP		(1 << __I_DIO_WAKEUP)
+ #define I_LINKABLE		(1 << 10)
+ #define I_DIRTY_TIME		(1 << 11)
+-#define I_DIRTY_TIME_EXPIRED	(1 << 12)
+ #define I_WB_SWITCH		(1 << 13)
+ #define I_OVL_INUSE		(1 << 14)
+ #define I_CREATING		(1 << 15)
+diff --git a/include/linux/futex.h b/include/linux/futex.h
+index a61bf436dcf36..b70df27d7e85c 100644
+--- a/include/linux/futex.h
++++ b/include/linux/futex.h
+@@ -2,7 +2,9 @@
+ #ifndef _LINUX_FUTEX_H
+ #define _LINUX_FUTEX_H
+ 
++#include <linux/sched.h>
+ #include <linux/ktime.h>
++
+ #include <uapi/linux/futex.h>
+ 
+ struct inode;
+@@ -51,15 +53,35 @@ union futex_key {
+ #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
+ 
+ #ifdef CONFIG_FUTEX
+-extern void exit_robust_list(struct task_struct *curr);
++enum {
++	FUTEX_STATE_OK,
++	FUTEX_STATE_EXITING,
++	FUTEX_STATE_DEAD,
++};
+ 
+-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+-	      u32 __user *uaddr2, u32 val2, u32 val3);
+-#else
+-static inline void exit_robust_list(struct task_struct *curr)
++static inline void futex_init_task(struct task_struct *tsk)
+ {
++	tsk->robust_list = NULL;
++#ifdef CONFIG_COMPAT
++	tsk->compat_robust_list = NULL;
++#endif
++	INIT_LIST_HEAD(&tsk->pi_state_list);
++	tsk->pi_state_cache = NULL;
++	tsk->futex_state = FUTEX_STATE_OK;
++	mutex_init(&tsk->futex_exit_mutex);
+ }
+ 
++void futex_exit_recursive(struct task_struct *tsk);
++void futex_exit_release(struct task_struct *tsk);
++void futex_exec_release(struct task_struct *tsk);
++
++long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
++	      u32 __user *uaddr2, u32 val2, u32 val3);
++#else
++static inline void futex_init_task(struct task_struct *tsk) { }
++static inline void futex_exit_recursive(struct task_struct *tsk) { }
++static inline void futex_exit_release(struct task_struct *tsk) { }
++static inline void futex_exec_release(struct task_struct *tsk) { }
+ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
+ 			    ktime_t *timeout, u32 __user *uaddr2,
+ 			    u32 val2, u32 val3)
+@@ -68,12 +90,4 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
+ }
+ #endif
+ 
+-#ifdef CONFIG_FUTEX_PI
+-extern void exit_pi_state_list(struct task_struct *curr);
+-#else
+-static inline void exit_pi_state_list(struct task_struct *curr)
+-{
+-}
+-#endif
+-
+ #endif
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index c69f308f3a53c..5524cd5c6abe6 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -996,6 +996,8 @@ struct task_struct {
+ #endif
+ 	struct list_head		pi_state_list;
+ 	struct futex_pi_state		*pi_state_cache;
++	struct mutex			futex_exit_mutex;
++	unsigned int			futex_state;
+ #endif
+ #ifdef CONFIG_PERF_EVENTS
+ 	struct perf_event_context	*perf_event_ctxp[perf_nr_task_contexts];
+@@ -1377,7 +1379,6 @@ extern struct pid *cad_pid;
+  */
+ #define PF_IDLE			0x00000002	/* I am an IDLE thread */
+ #define PF_EXITING		0x00000004	/* Getting shut down */
+-#define PF_EXITPIDONE		0x00000008	/* PI exit done on shut down */
+ #define PF_VCPU			0x00000010	/* I'm a virtual CPU */
+ #define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
+ #define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
+diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
+index 766bbe8138615..8d3b7e731b742 100644
+--- a/include/linux/sched/mm.h
++++ b/include/linux/sched/mm.h
+@@ -119,8 +119,10 @@ extern struct mm_struct *get_task_mm(struct task_struct *task);
+  * succeeds.
+  */
+ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
+-/* Remove the current tasks stale references to the old mm_struct */
+-extern void mm_release(struct task_struct *, struct mm_struct *);
++/* Remove the current tasks stale references to the old mm_struct on exit() */
++extern void exit_mm_release(struct task_struct *, struct mm_struct *);
++/* Remove the current tasks stale references to the old mm_struct on exec() */
++extern void exec_mm_release(struct task_struct *, struct mm_struct *);
+ 
+ #ifdef CONFIG_MEMCG
+ extern void mm_update_next_owner(struct mm_struct *mm);
+diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
+index 29d09755e5cfc..146e7b3faa856 100644
+--- a/include/trace/events/writeback.h
++++ b/include/trace/events/writeback.h
+@@ -20,7 +20,6 @@
+ 		{I_CLEAR,		"I_CLEAR"},		\
+ 		{I_SYNC,		"I_SYNC"},		\
+ 		{I_DIRTY_TIME,		"I_DIRTY_TIME"},	\
+-		{I_DIRTY_TIME_EXPIRED,	"I_DIRTY_TIME_EXPIRED"}, \
+ 		{I_REFERENCED,		"I_REFERENCED"}		\
+ 	)
+ 
+diff --git a/kernel/exit.c b/kernel/exit.c
+index 65133ebddfada..908e7a33e1fcb 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -498,7 +498,7 @@ static void exit_mm(void)
+ 	struct mm_struct *mm = current->mm;
+ 	struct core_state *core_state;
+ 
+-	mm_release(current, mm);
++	exit_mm_release(current, mm);
+ 	if (!mm)
+ 		return;
+ 	sync_mm_rss(mm);
+@@ -818,32 +818,12 @@ void __noreturn do_exit(long code)
+ 	 */
+ 	if (unlikely(tsk->flags & PF_EXITING)) {
+ 		pr_alert("Fixing recursive fault but reboot is needed!\n");
+-		/*
+-		 * We can do this unlocked here. The futex code uses
+-		 * this flag just to verify whether the pi state
+-		 * cleanup has been done or not. In the worst case it
+-		 * loops once more. We pretend that the cleanup was
+-		 * done as there is no way to return. Either the
+-		 * OWNER_DIED bit is set by now or we push the blocked
+-		 * task into the wait for ever nirwana as well.
+-		 */
+-		tsk->flags |= PF_EXITPIDONE;
++		futex_exit_recursive(tsk);
+ 		set_current_state(TASK_UNINTERRUPTIBLE);
+ 		schedule();
+ 	}
+ 
+ 	exit_signals(tsk);  /* sets PF_EXITING */
+-	/*
+-	 * Ensure that all new tsk->pi_lock acquisitions must observe
+-	 * PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
+-	 */
+-	smp_mb();
+-	/*
+-	 * Ensure that we must observe the pi_state in exit_mm() ->
+-	 * mm_release() -> exit_pi_state_list().
+-	 */
+-	raw_spin_lock_irq(&tsk->pi_lock);
+-	raw_spin_unlock_irq(&tsk->pi_lock);
+ 
+ 	/* sync mm's RSS info before statistics gathering */
+ 	if (tsk->mm)
+@@ -918,12 +898,6 @@ void __noreturn do_exit(long code)
+ 	 * Make sure we are holding no locks:
+ 	 */
+ 	debug_check_no_locks_held();
+-	/*
+-	 * We can do this unlocked here. The futex code uses this flag
+-	 * just to verify whether the pi state cleanup has been done
+-	 * or not. In the worst case it loops once more.
+-	 */
+-	tsk->flags |= PF_EXITPIDONE;
+ 
+ 	if (tsk->io_context)
+ 		exit_io_context(tsk);
+diff --git a/kernel/fork.c b/kernel/fork.c
+index f2c92c1001949..cf535b9d5db75 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1217,24 +1217,8 @@ static int wait_for_vfork_done(struct task_struct *child,
+  * restoring the old one. . .
+  * Eric Biederman 10 January 1998
+  */
+-void mm_release(struct task_struct *tsk, struct mm_struct *mm)
++static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+ {
+-	/* Get rid of any futexes when releasing the mm */
+-#ifdef CONFIG_FUTEX
+-	if (unlikely(tsk->robust_list)) {
+-		exit_robust_list(tsk);
+-		tsk->robust_list = NULL;
+-	}
+-#ifdef CONFIG_COMPAT
+-	if (unlikely(tsk->compat_robust_list)) {
+-		compat_exit_robust_list(tsk);
+-		tsk->compat_robust_list = NULL;
+-	}
+-#endif
+-	if (unlikely(!list_empty(&tsk->pi_state_list)))
+-		exit_pi_state_list(tsk);
+-#endif
+-
+ 	uprobe_free_utask(tsk);
+ 
+ 	/* Get rid of any cached register state */
+@@ -1267,6 +1251,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+ 		complete_vfork_done(tsk);
+ }
+ 
++void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
++{
++	futex_exit_release(tsk);
++	mm_release(tsk, mm);
++}
++
++void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
++{
++	futex_exec_release(tsk);
++	mm_release(tsk, mm);
++}
++
+ /*
+  * Allocate a new mm structure and copy contents from the
+  * mm structure of the passed in task structure.
+@@ -1937,14 +1933,8 @@ static __latent_entropy struct task_struct *copy_process(
+ #ifdef CONFIG_BLOCK
+ 	p->plug = NULL;
+ #endif
+-#ifdef CONFIG_FUTEX
+-	p->robust_list = NULL;
+-#ifdef CONFIG_COMPAT
+-	p->compat_robust_list = NULL;
+-#endif
+-	INIT_LIST_HEAD(&p->pi_state_list);
+-	p->pi_state_cache = NULL;
+-#endif
++	futex_init_task(p);
++
+ 	/*
+ 	 * sigaltstack should be cleared when sharing the same VM
+ 	 */
+diff --git a/kernel/futex.c b/kernel/futex.c
+index 334dc4cae780e..224adcdac6c19 100644
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -341,6 +341,12 @@ static inline bool should_fail_futex(bool fshared)
+ }
+ #endif /* CONFIG_FAIL_FUTEX */
+ 
++#ifdef CONFIG_COMPAT
++static void compat_exit_robust_list(struct task_struct *curr);
++#else
++static inline void compat_exit_robust_list(struct task_struct *curr) { }
++#endif
++
+ static inline void futex_get_mm(union futex_key *key)
+ {
+ 	mmgrab(key->private.mm);
+@@ -833,6 +839,29 @@ static struct futex_pi_state *alloc_pi_state(void)
+ 	return pi_state;
+ }
+ 
++static void pi_state_update_owner(struct futex_pi_state *pi_state,
++				  struct task_struct *new_owner)
++{
++	struct task_struct *old_owner = pi_state->owner;
++
++	lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
++
++	if (old_owner) {
++		raw_spin_lock(&old_owner->pi_lock);
++		WARN_ON(list_empty(&pi_state->list));
++		list_del_init(&pi_state->list);
++		raw_spin_unlock(&old_owner->pi_lock);
++	}
++
++	if (new_owner) {
++		raw_spin_lock(&new_owner->pi_lock);
++		WARN_ON(!list_empty(&pi_state->list));
++		list_add(&pi_state->list, &new_owner->pi_state_list);
++		pi_state->owner = new_owner;
++		raw_spin_unlock(&new_owner->pi_lock);
++	}
++}
++
+ static void get_pi_state(struct futex_pi_state *pi_state)
+ {
+ 	WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount));
+@@ -855,17 +884,11 @@ static void put_pi_state(struct futex_pi_state *pi_state)
+ 	 * and has cleaned up the pi_state already
+ 	 */
+ 	if (pi_state->owner) {
+-		struct task_struct *owner;
+ 		unsigned long flags;
+ 
+ 		raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
+-		owner = pi_state->owner;
+-		if (owner) {
+-			raw_spin_lock(&owner->pi_lock);
+-			list_del_init(&pi_state->list);
+-			raw_spin_unlock(&owner->pi_lock);
+-		}
+-		rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
++		pi_state_update_owner(pi_state, NULL);
++		rt_mutex_proxy_unlock(&pi_state->pi_mutex);
+ 		raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
+ 	}
+ 
+@@ -890,7 +913,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
+  * Kernel cleans up PI-state, but userspace is likely hosed.
+  * (Robust-futex cleanup is separate and might save the day for userspace.)
+  */
+-void exit_pi_state_list(struct task_struct *curr)
++static void exit_pi_state_list(struct task_struct *curr)
+ {
+ 	struct list_head *next, *head = &curr->pi_state_list;
+ 	struct futex_pi_state *pi_state;
+@@ -960,7 +983,8 @@ void exit_pi_state_list(struct task_struct *curr)
+ 	}
+ 	raw_spin_unlock_irq(&curr->pi_lock);
+ }
+-
++#else
++static inline void exit_pi_state_list(struct task_struct *curr) { }
+ #endif
+ 
+ /*
+@@ -1010,7 +1034,8 @@ void exit_pi_state_list(struct task_struct *curr)
+  *	FUTEX_OWNER_DIED bit. See [4]
+  *
+  * [10] There is no transient state which leaves owner and user space
+- *	TID out of sync.
++ *	TID out of sync. Except one error case where the kernel is denied
++ *	write access to the user address, see fixup_pi_state_owner().
+  *
+  *
+  * Serialization and lifetime rules:
+@@ -1169,16 +1194,47 @@ out_error:
+ 	return ret;
+ }
+ 
++/**
++ * wait_for_owner_exiting - Block until the owner has exited
++ * @exiting:	Pointer to the exiting task
++ *
++ * Caller must hold a refcount on @exiting.
++ */
++static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
++{
++	if (ret != -EBUSY) {
++		WARN_ON_ONCE(exiting);
++		return;
++	}
++
++	if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
++		return;
++
++	mutex_lock(&exiting->futex_exit_mutex);
++	/*
++	 * No point in doing state checking here. If the waiter got here
++	 * while the task was in exec()->exec_futex_release() then it can
++	 * have any FUTEX_STATE_* value when the waiter has acquired the
++	 * mutex. OK, if running, EXITING or DEAD if it reached exit()
++	 * already. Highly unlikely and not a problem. Just one more round
++	 * through the futex maze.
++	 */
++	mutex_unlock(&exiting->futex_exit_mutex);
++
++	put_task_struct(exiting);
++}
++
+ static int handle_exit_race(u32 __user *uaddr, u32 uval,
+ 			    struct task_struct *tsk)
+ {
+ 	u32 uval2;
+ 
+ 	/*
+-	 * If PF_EXITPIDONE is not yet set, then try again.
++	 * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
++	 * caller that the alleged owner is busy.
+ 	 */
+-	if (tsk && !(tsk->flags & PF_EXITPIDONE))
+-		return -EAGAIN;
++	if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
++		return -EBUSY;
+ 
+ 	/*
+ 	 * Reread the user space value to handle the following situation:
+@@ -1196,8 +1252,9 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
+ 	 *    *uaddr = 0xC0000000;	     tsk = get_task(PID);
+ 	 *   }				     if (!tsk->flags & PF_EXITING) {
+ 	 *  ...				       attach();
+-	 *  tsk->flags |= PF_EXITPIDONE;     } else {
+-	 *				       if (!(tsk->flags & PF_EXITPIDONE))
++	 *  tsk->futex_state =               } else {
++	 *	FUTEX_STATE_DEAD;              if (tsk->futex_state !=
++	 *					  FUTEX_STATE_DEAD)
+ 	 *				         return -EAGAIN;
+ 	 *				       return -ESRCH; <--- FAIL
+ 	 *				     }
+@@ -1228,7 +1285,8 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
+  * it after doing proper sanity checks.
+  */
+ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
+-			      struct futex_pi_state **ps)
++			      struct futex_pi_state **ps,
++			      struct task_struct **exiting)
+ {
+ 	pid_t pid = uval & FUTEX_TID_MASK;
+ 	struct futex_pi_state *pi_state;
+@@ -1253,22 +1311,33 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
+ 	}
+ 
+ 	/*
+-	 * We need to look at the task state flags to figure out,
+-	 * whether the task is exiting. To protect against the do_exit
+-	 * change of the task flags, we do this protected by
+-	 * p->pi_lock:
++	 * We need to look at the task state to figure out, whether the
++	 * task is exiting. To protect against the change of the task state
++	 * in futex_exit_release(), we do this protected by p->pi_lock:
+ 	 */
+ 	raw_spin_lock_irq(&p->pi_lock);
+-	if (unlikely(p->flags & PF_EXITING)) {
++	if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
+ 		/*
+-		 * The task is on the way out. When PF_EXITPIDONE is
+-		 * set, we know that the task has finished the
+-		 * cleanup:
++		 * The task is on the way out. When the futex state is
++		 * FUTEX_STATE_DEAD, we know that the task has finished
++		 * the cleanup:
+ 		 */
+ 		int ret = handle_exit_race(uaddr, uval, p);
+ 
+ 		raw_spin_unlock_irq(&p->pi_lock);
+-		put_task_struct(p);
++		/*
++		 * If the owner task is between FUTEX_STATE_EXITING and
++		 * FUTEX_STATE_DEAD then store the task pointer and keep
++		 * the reference on the task struct. The calling code will
++		 * drop all locks, wait for the task to reach
++		 * FUTEX_STATE_DEAD and then drop the refcount. This is
++		 * required to prevent a live lock when the current task
++		 * preempted the exiting task between the two states.
++		 */
++		if (ret == -EBUSY)
++			*exiting = p;
++		else
++			put_task_struct(p);
+ 		return ret;
+ 	}
+ 
+@@ -1307,7 +1376,8 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
+ 
+ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
+ 			   struct futex_hash_bucket *hb,
+-			   union futex_key *key, struct futex_pi_state **ps)
++			   union futex_key *key, struct futex_pi_state **ps,
++			   struct task_struct **exiting)
+ {
+ 	struct futex_q *top_waiter = futex_top_waiter(hb, key);
+ 
+@@ -1322,7 +1392,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
+ 	 * We are the first waiter - try to look up the owner based on
+ 	 * @uval and attach to it.
+ 	 */
+-	return attach_to_pi_owner(uaddr, uval, key, ps);
++	return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
+ }
+ 
+ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
+@@ -1350,6 +1420,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
+  *			lookup
+  * @task:		the task to perform the atomic lock work for.  This will
+  *			be "current" except in the case of requeue pi.
++ * @exiting:		Pointer to store the task pointer of the owner task
++ *			which is in the middle of exiting
+  * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
+  *
+  * Return:
+@@ -1358,11 +1430,17 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
+  *  - <0 - error
+  *
+  * The hb->lock and futex_key refs shall be held by the caller.
++ *
++ * @exiting is only set when the return value is -EBUSY. If so, this holds
++ * a refcount on the exiting task on return and the caller needs to drop it
++ * after waiting for the exit to complete.
+  */
+ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
+ 				union futex_key *key,
+ 				struct futex_pi_state **ps,
+-				struct task_struct *task, int set_waiters)
++				struct task_struct *task,
++				struct task_struct **exiting,
++				int set_waiters)
+ {
+ 	u32 uval, newval, vpid = task_pid_vnr(task);
+ 	struct futex_q *top_waiter;
+@@ -1432,7 +1510,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
+ 	 * attach to the owner. If that fails, no harm done, we only
+ 	 * set the FUTEX_WAITERS bit in the user space variable.
+ 	 */
+-	return attach_to_pi_owner(uaddr, newval, key, ps);
++	return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
+ }
+ 
+ /**
+@@ -1537,26 +1615,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
+ 			ret = -EINVAL;
+ 	}
+ 
+-	if (ret)
+-		goto out_unlock;
+-
+-	/*
+-	 * This is a point of no return; once we modify the uval there is no
+-	 * going back and subsequent operations must not fail.
+-	 */
+-
+-	raw_spin_lock(&pi_state->owner->pi_lock);
+-	WARN_ON(list_empty(&pi_state->list));
+-	list_del_init(&pi_state->list);
+-	raw_spin_unlock(&pi_state->owner->pi_lock);
+-
+-	raw_spin_lock(&new_owner->pi_lock);
+-	WARN_ON(!list_empty(&pi_state->list));
+-	list_add(&pi_state->list, &new_owner->pi_state_list);
+-	pi_state->owner = new_owner;
+-	raw_spin_unlock(&new_owner->pi_lock);
+-
+-	postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
++	if (!ret) {
++		/*
++		 * This is a point of no return; once we modified the uval
++		 * there is no going back and subsequent operations must
++		 * not fail.
++		 */
++		pi_state_update_owner(pi_state, new_owner);
++		postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
++	}
+ 
+ out_unlock:
+ 	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+@@ -1853,6 +1920,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
+  * @key1:		the from futex key
+  * @key2:		the to futex key
+  * @ps:			address to store the pi_state pointer
++ * @exiting:		Pointer to store the task pointer of the owner task
++ *			which is in the middle of exiting
+  * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
+  *
+  * Try and get the lock on behalf of the top waiter if we can do it atomically.
+@@ -1860,16 +1929,20 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
+  * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
+  * hb1 and hb2 must be held by the caller.
+  *
++ * @exiting is only set when the return value is -EBUSY. If so, this holds
++ * a refcount on the exiting task on return and the caller needs to drop it
++ * after waiting for the exit to complete.
++ *
+  * Return:
+  *  -  0 - failed to acquire the lock atomically;
+  *  - >0 - acquired the lock, return value is vpid of the top_waiter
+  *  - <0 - error
+  */
+-static int futex_proxy_trylock_atomic(u32 __user *pifutex,
+-				 struct futex_hash_bucket *hb1,
+-				 struct futex_hash_bucket *hb2,
+-				 union futex_key *key1, union futex_key *key2,
+-				 struct futex_pi_state **ps, int set_waiters)
++static int
++futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
++			   struct futex_hash_bucket *hb2, union futex_key *key1,
++			   union futex_key *key2, struct futex_pi_state **ps,
++			   struct task_struct **exiting, int set_waiters)
+ {
+ 	struct futex_q *top_waiter = NULL;
+ 	u32 curval;
+@@ -1906,7 +1979,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
+ 	 */
+ 	vpid = task_pid_vnr(top_waiter->task);
+ 	ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
+-				   set_waiters);
++				   exiting, set_waiters);
+ 	if (ret == 1) {
+ 		requeue_pi_wake_futex(top_waiter, key2, hb2);
+ 		return vpid;
+@@ -2035,6 +2108,8 @@ retry_private:
+ 	}
+ 
+ 	if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
++		struct task_struct *exiting = NULL;
++
+ 		/*
+ 		 * Attempt to acquire uaddr2 and wake the top waiter. If we
+ 		 * intend to requeue waiters, force setting the FUTEX_WAITERS
+@@ -2042,7 +2117,8 @@ retry_private:
+ 		 * faults rather in the requeue loop below.
+ 		 */
+ 		ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
+-						 &key2, &pi_state, nr_requeue);
++						 &key2, &pi_state,
++						 &exiting, nr_requeue);
+ 
+ 		/*
+ 		 * At this point the top_waiter has either taken uaddr2 or is
+@@ -2069,7 +2145,8 @@ retry_private:
+ 			 * If that call succeeds then we have pi_state and an
+ 			 * initial refcount on it.
+ 			 */
+-			ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state);
++			ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
++					      &pi_state, &exiting);
+ 		}
+ 
+ 		switch (ret) {
+@@ -2087,17 +2164,24 @@ retry_private:
+ 			if (!ret)
+ 				goto retry;
+ 			goto out;
++		case -EBUSY:
+ 		case -EAGAIN:
+ 			/*
+ 			 * Two reasons for this:
+-			 * - Owner is exiting and we just wait for the
++			 * - EBUSY: Owner is exiting and we just wait for the
+ 			 *   exit to complete.
+-			 * - The user space value changed.
++			 * - EAGAIN: The user space value changed.
+ 			 */
+ 			double_unlock_hb(hb1, hb2);
+ 			hb_waiters_dec(hb2);
+ 			put_futex_key(&key2);
+ 			put_futex_key(&key1);
++			/*
++			 * Handle the case where the owner is in the middle of
++			 * exiting. Wait for the exit to complete otherwise
++			 * this task might loop forever, aka. live lock.
++			 */
++			wait_for_owner_exiting(ret, exiting);
+ 			cond_resched();
+ 			goto retry;
+ 		default:
+@@ -2362,18 +2446,13 @@ static void unqueue_me_pi(struct futex_q *q)
+ 	spin_unlock(q->lock_ptr);
+ }
+ 
+-static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+-				struct task_struct *argowner)
++static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
++				  struct task_struct *argowner)
+ {
++	u32 uval, uninitialized_var(curval), newval, newtid;
+ 	struct futex_pi_state *pi_state = q->pi_state;
+-	u32 uval, uninitialized_var(curval), newval;
+ 	struct task_struct *oldowner, *newowner;
+-	u32 newtid;
+-	int ret, err = 0;
+-
+-	lockdep_assert_held(q->lock_ptr);
+-
+-	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
++	int err = 0;
+ 
+ 	oldowner = pi_state->owner;
+ 
+@@ -2407,14 +2486,12 @@ retry:
+ 			 * We raced against a concurrent self; things are
+ 			 * already fixed up. Nothing to do.
+ 			 */
+-			ret = 0;
+-			goto out_unlock;
++			return 0;
+ 		}
+ 
+ 		if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
+-			/* We got the lock after all, nothing to fix. */
+-			ret = 0;
+-			goto out_unlock;
++			/* We got the lock. pi_state is correct. Tell caller. */
++			return 1;
+ 		}
+ 
+ 		/*
+@@ -2441,8 +2518,7 @@ retry:
+ 			 * We raced against a concurrent self; things are
+ 			 * already fixed up. Nothing to do.
+ 			 */
+-			ret = 0;
+-			goto out_unlock;
++			return 1;
+ 		}
+ 		newowner = argowner;
+ 	}
+@@ -2472,22 +2548,9 @@ retry:
+ 	 * We fixed up user space. Now we need to fix the pi_state
+ 	 * itself.
+ 	 */
+-	if (pi_state->owner != NULL) {
+-		raw_spin_lock(&pi_state->owner->pi_lock);
+-		WARN_ON(list_empty(&pi_state->list));
+-		list_del_init(&pi_state->list);
+-		raw_spin_unlock(&pi_state->owner->pi_lock);
+-	}
++	pi_state_update_owner(pi_state, newowner);
+ 
+-	pi_state->owner = newowner;
+-
+-	raw_spin_lock(&newowner->pi_lock);
+-	WARN_ON(!list_empty(&pi_state->list));
+-	list_add(&pi_state->list, &newowner->pi_state_list);
+-	raw_spin_unlock(&newowner->pi_lock);
+-	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+-
+-	return 0;
++	return argowner == current;
+ 
+ 	/*
+ 	 * In order to reschedule or handle a page fault, we need to drop the
+@@ -2508,17 +2571,16 @@ handle_err:
+ 
+ 	switch (err) {
+ 	case -EFAULT:
+-		ret = fault_in_user_writeable(uaddr);
++		err = fault_in_user_writeable(uaddr);
+ 		break;
+ 
+ 	case -EAGAIN:
+ 		cond_resched();
+-		ret = 0;
++		err = 0;
+ 		break;
+ 
+ 	default:
+ 		WARN_ON_ONCE(1);
+-		ret = err;
+ 		break;
+ 	}
+ 
+@@ -2528,17 +2590,44 @@ handle_err:
+ 	/*
+ 	 * Check if someone else fixed it for us:
+ 	 */
+-	if (pi_state->owner != oldowner) {
+-		ret = 0;
+-		goto out_unlock;
+-	}
++	if (pi_state->owner != oldowner)
++		return argowner == current;
+ 
+-	if (ret)
+-		goto out_unlock;
++	/* Retry if err was -EAGAIN or the fault in succeeded */
++	if (!err)
++		goto retry;
+ 
+-	goto retry;
++	/*
++	 * fault_in_user_writeable() failed so user state is immutable. At
++	 * best we can make the kernel state consistent but user state will
++	 * be most likely hosed and any subsequent unlock operation will be
++	 * rejected due to PI futex rule [10].
++	 *
++	 * Ensure that the rtmutex owner is also the pi_state owner despite
++	 * the user space value claiming something different. There is no
++	 * point in unlocking the rtmutex if current is the owner as it
++	 * would need to wait until the next waiter has taken the rtmutex
++	 * to guarantee consistent state. Keep it simple. Userspace asked
++	 * for this wreckaged state.
++	 *
++	 * The rtmutex has an owner - either current or some other
++	 * task. See the EAGAIN loop above.
++	 */
++	pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
+ 
+-out_unlock:
++	return err;
++}
++
++static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
++				struct task_struct *argowner)
++{
++	struct futex_pi_state *pi_state = q->pi_state;
++	int ret;
++
++	lockdep_assert_held(q->lock_ptr);
++
++	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
++	ret = __fixup_pi_state_owner(uaddr, q, argowner);
+ 	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+ 	return ret;
+ }
+@@ -2562,8 +2651,6 @@ static long futex_wait_restart(struct restart_block *restart);
+  */
+ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
+ {
+-	int ret = 0;
+-
+ 	if (locked) {
+ 		/*
+ 		 * Got the lock. We might not be the anticipated owner if we
+@@ -2574,8 +2661,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
+ 		 * stable state, anything else needs more attention.
+ 		 */
+ 		if (q->pi_state->owner != current)
+-			ret = fixup_pi_state_owner(uaddr, q, current);
+-		goto out;
++			return fixup_pi_state_owner(uaddr, q, current);
++		return 1;
+ 	}
+ 
+ 	/*
+@@ -2586,24 +2673,17 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
+ 	 * Another speculative read; pi_state->owner == current is unstable
+ 	 * but needs our attention.
+ 	 */
+-	if (q->pi_state->owner == current) {
+-		ret = fixup_pi_state_owner(uaddr, q, NULL);
+-		goto out;
+-	}
++	if (q->pi_state->owner == current)
++		return fixup_pi_state_owner(uaddr, q, NULL);
+ 
+ 	/*
+ 	 * Paranoia check. If we did not take the lock, then we should not be
+-	 * the owner of the rt_mutex.
++	 * the owner of the rt_mutex. Warn and establish consistent state.
+ 	 */
+-	if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
+-		printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
+-				"pi-state %p\n", ret,
+-				q->pi_state->pi_mutex.owner,
+-				q->pi_state->owner);
+-	}
++	if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
++		return fixup_pi_state_owner(uaddr, q, current);
+ 
+-out:
+-	return ret ? ret : locked;
++	return 0;
+ }
+ 
+ /**
+@@ -2824,7 +2904,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
+ 			 ktime_t *time, int trylock)
+ {
+ 	struct hrtimer_sleeper timeout, *to = NULL;
+-	struct futex_pi_state *pi_state = NULL;
++	struct task_struct *exiting = NULL;
+ 	struct rt_mutex_waiter rt_waiter;
+ 	struct futex_hash_bucket *hb;
+ 	struct futex_q q = futex_q_init;
+@@ -2852,7 +2932,8 @@ retry:
+ retry_private:
+ 	hb = queue_lock(&q);
+ 
+-	ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
++	ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
++				   &exiting, 0);
+ 	if (unlikely(ret)) {
+ 		/*
+ 		 * Atomic work succeeded and we got the lock,
+@@ -2865,15 +2946,22 @@ retry_private:
+ 			goto out_unlock_put_key;
+ 		case -EFAULT:
+ 			goto uaddr_faulted;
++		case -EBUSY:
+ 		case -EAGAIN:
+ 			/*
+ 			 * Two reasons for this:
+-			 * - Task is exiting and we just wait for the
++			 * - EBUSY: Task is exiting and we just wait for the
+ 			 *   exit to complete.
+-			 * - The user space value changed.
++			 * - EAGAIN: The user space value changed.
+ 			 */
+ 			queue_unlock(hb);
+ 			put_futex_key(&q.key);
++			/*
++			 * Handle the case where the owner is in the middle of
++			 * exiting. Wait for the exit to complete otherwise
++			 * this task might loop forever, aka. live lock.
++			 */
++			wait_for_owner_exiting(ret, exiting);
+ 			cond_resched();
+ 			goto retry;
+ 		default:
+@@ -2958,23 +3046,9 @@ no_block:
+ 	if (res)
+ 		ret = (res < 0) ? res : 0;
+ 
+-	/*
+-	 * If fixup_owner() faulted and was unable to handle the fault, unlock
+-	 * it and return the fault to userspace.
+-	 */
+-	if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
+-		pi_state = q.pi_state;
+-		get_pi_state(pi_state);
+-	}
+-
+ 	/* Unqueue and drop the lock */
+ 	unqueue_me_pi(&q);
+ 
+-	if (pi_state) {
+-		rt_mutex_futex_unlock(&pi_state->pi_mutex);
+-		put_pi_state(pi_state);
+-	}
+-
+ 	goto out_put_key;
+ 
+ out_unlock_put_key:
+@@ -3240,7 +3314,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ 				 u32 __user *uaddr2)
+ {
+ 	struct hrtimer_sleeper timeout, *to = NULL;
+-	struct futex_pi_state *pi_state = NULL;
+ 	struct rt_mutex_waiter rt_waiter;
+ 	struct futex_hash_bucket *hb;
+ 	union futex_key key2 = FUTEX_KEY_INIT;
+@@ -3325,16 +3398,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ 		if (q.pi_state && (q.pi_state->owner != current)) {
+ 			spin_lock(q.lock_ptr);
+ 			ret = fixup_pi_state_owner(uaddr2, &q, current);
+-			if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
+-				pi_state = q.pi_state;
+-				get_pi_state(pi_state);
+-			}
+ 			/*
+ 			 * Drop the reference to the pi state which
+ 			 * the requeue_pi() code acquired for us.
+ 			 */
+ 			put_pi_state(q.pi_state);
+ 			spin_unlock(q.lock_ptr);
++			/*
++			 * Adjust the return value. It's either -EFAULT or
++			 * success (1) but the caller expects 0 for success.
++			 */
++			ret = ret < 0 ? ret : 0;
+ 		}
+ 	} else {
+ 		struct rt_mutex *pi_mutex;
+@@ -3365,25 +3439,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ 		if (res)
+ 			ret = (res < 0) ? res : 0;
+ 
+-		/*
+-		 * If fixup_pi_state_owner() faulted and was unable to handle
+-		 * the fault, unlock the rt_mutex and return the fault to
+-		 * userspace.
+-		 */
+-		if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
+-			pi_state = q.pi_state;
+-			get_pi_state(pi_state);
+-		}
+-
+ 		/* Unqueue and drop the lock. */
+ 		unqueue_me_pi(&q);
+ 	}
+ 
+-	if (pi_state) {
+-		rt_mutex_futex_unlock(&pi_state->pi_mutex);
+-		put_pi_state(pi_state);
+-	}
+-
+ 	if (ret == -EINTR) {
+ 		/*
+ 		 * We've already been requeued, but cannot restart by calling
+@@ -3625,7 +3684,7 @@ static inline int fetch_robust_entry(struct robust_list __user **entry,
+  *
+  * We silently return on any sign of list-walking problem.
+  */
+-void exit_robust_list(struct task_struct *curr)
++static void exit_robust_list(struct task_struct *curr)
+ {
+ 	struct robust_list_head __user *head = curr->robust_list;
+ 	struct robust_list __user *entry, *next_entry, *pending;
+@@ -3690,6 +3749,114 @@ void exit_robust_list(struct task_struct *curr)
+ 	}
+ }
+ 
++static void futex_cleanup(struct task_struct *tsk)
++{
++	if (unlikely(tsk->robust_list)) {
++		exit_robust_list(tsk);
++		tsk->robust_list = NULL;
++	}
++
++#ifdef CONFIG_COMPAT
++	if (unlikely(tsk->compat_robust_list)) {
++		compat_exit_robust_list(tsk);
++		tsk->compat_robust_list = NULL;
++	}
++#endif
++
++	if (unlikely(!list_empty(&tsk->pi_state_list)))
++		exit_pi_state_list(tsk);
++}
++
++/**
++ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
++ * @tsk:	task to set the state on
++ *
++ * Set the futex exit state of the task lockless. The futex waiter code
++ * observes that state when a task is exiting and loops until the task has
++ * actually finished the futex cleanup. The worst case for this is that the
++ * waiter runs through the wait loop until the state becomes visible.
++ *
++ * This is called from the recursive fault handling path in do_exit().
++ *
++ * This is best effort. Either the futex exit code has run already or
++ * not. If the OWNER_DIED bit has been set on the futex then the waiter can
++ * take it over. If not, the problem is pushed back to user space. If the
++ * futex exit code did not run yet, then an already queued waiter might
++ * block forever, but there is nothing which can be done about that.
++ */
++void futex_exit_recursive(struct task_struct *tsk)
++{
++	/* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
++	if (tsk->futex_state == FUTEX_STATE_EXITING)
++		mutex_unlock(&tsk->futex_exit_mutex);
++	tsk->futex_state = FUTEX_STATE_DEAD;
++}
++
++static void futex_cleanup_begin(struct task_struct *tsk)
++{
++	/*
++	 * Prevent various race issues against a concurrent incoming waiter
++	 * including live locks by forcing the waiter to block on
++	 * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
++	 * attach_to_pi_owner().
++	 */
++	mutex_lock(&tsk->futex_exit_mutex);
++
++	/*
++	 * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
++	 *
++	 * This ensures that all subsequent checks of tsk->futex_state in
++	 * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
++	 * tsk->pi_lock held.
++	 *
++	 * It guarantees also that a pi_state which was queued right before
++	 * the state change under tsk->pi_lock by a concurrent waiter must
++	 * be observed in exit_pi_state_list().
++	 */
++	raw_spin_lock_irq(&tsk->pi_lock);
++	tsk->futex_state = FUTEX_STATE_EXITING;
++	raw_spin_unlock_irq(&tsk->pi_lock);
++}
++
++static void futex_cleanup_end(struct task_struct *tsk, int state)
++{
++	/*
++	 * Lockless store. The only side effect is that an observer might
++	 * take another loop until it becomes visible.
++	 */
++	tsk->futex_state = state;
++	/*
++	 * Drop the exit protection. This unblocks waiters which observed
++	 * FUTEX_STATE_EXITING to reevaluate the state.
++	 */
++	mutex_unlock(&tsk->futex_exit_mutex);
++}
++
++void futex_exec_release(struct task_struct *tsk)
++{
++	/*
++	 * The state handling is done for consistency, but in the case of
++	 * exec() there is no way to prevent futher damage as the PID stays
++	 * the same. But for the unlikely and arguably buggy case that a
++	 * futex is held on exec(), this provides at least as much state
++	 * consistency protection which is possible.
++	 */
++	futex_cleanup_begin(tsk);
++	futex_cleanup(tsk);
++	/*
++	 * Reset the state to FUTEX_STATE_OK. The task is alive and about
++	 * exec a new binary.
++	 */
++	futex_cleanup_end(tsk, FUTEX_STATE_OK);
++}
++
++void futex_exit_release(struct task_struct *tsk)
++{
++	futex_cleanup_begin(tsk);
++	futex_cleanup(tsk);
++	futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
++}
++
+ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+ 		u32 __user *uaddr2, u32 val2, u32 val3)
+ {
+@@ -3817,7 +3984,7 @@ static void __user *futex_uaddr(struct robust_list __user *entry,
+  *
+  * We silently return on any sign of list-walking problem.
+  */
+-void compat_exit_robust_list(struct task_struct *curr)
++static void compat_exit_robust_list(struct task_struct *curr)
+ {
+ 	struct compat_robust_list_head __user *head = curr->compat_robust_list;
+ 	struct robust_list __user *entry, *next_entry, *pending;
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 9562aaa2afdce..a5ec4f68527e5 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1719,8 +1719,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
+  * possible because it belongs to the pi_state which is about to be freed
+  * and it is not longer visible to other tasks.
+  */
+-void rt_mutex_proxy_unlock(struct rt_mutex *lock,
+-			   struct task_struct *proxy_owner)
++void rt_mutex_proxy_unlock(struct rt_mutex *lock)
+ {
+ 	debug_rt_mutex_proxy_unlock(lock);
+ 	rt_mutex_set_owner(lock, NULL);
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index d1d62f942be22..ca6fb489007b6 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -133,8 +133,7 @@ enum rtmutex_chainwalk {
+ extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
+ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
+ 				       struct task_struct *proxy_owner);
+-extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
+-				  struct task_struct *proxy_owner);
++extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
+ extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
+ extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+ 				     struct rt_mutex_waiter *waiter,
+diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
+index 87ce9736043da..360129e475407 100644
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -4393,6 +4393,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
+ 
+ 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ 		return;
++	/* prevent another thread from changing buffer sizes */
++	mutex_lock(&buffer->mutex);
+ 
+ 	atomic_inc(&buffer->resize_disabled);
+ 	atomic_inc(&cpu_buffer->record_disabled);
+@@ -4416,6 +4418,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
+ 
+ 	atomic_dec(&cpu_buffer->record_disabled);
+ 	atomic_dec(&buffer->resize_disabled);
++
++	mutex_unlock(&buffer->mutex);
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
+ 
+diff --git a/mm/slub.c b/mm/slub.c
+index 02295fa61583c..eac80b0516fe8 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -5766,10 +5766,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
+ 
+ 	s->kobj.kset = kset;
+ 	err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
+-	if (err) {
+-		kobject_put(&s->kobj);
++	if (err)
+ 		goto out;
+-	}
+ 
+ 	err = sysfs_create_group(&s->kobj, &slab_attr_group);
+ 	if (err)
+diff --git a/tools/build/Makefile b/tools/build/Makefile
+index 727050c40f096..8a55378e8b7ce 100644
+--- a/tools/build/Makefile
++++ b/tools/build/Makefile
+@@ -15,10 +15,6 @@ endef
+ $(call allow-override,CC,$(CROSS_COMPILE)gcc)
+ $(call allow-override,LD,$(CROSS_COMPILE)ld)
+ 
+-HOSTCC ?= gcc
+-HOSTLD ?= ld
+-HOSTAR ?= ar
+-
+ export HOSTCC HOSTLD HOSTAR
+ 
+ ifeq ($(V),1)
+diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
+index baa92279c137e..15f32f67cf340 100644
+--- a/tools/objtool/Makefile
++++ b/tools/objtool/Makefile
+@@ -7,15 +7,6 @@ ARCH := x86
+ endif
+ 
+ # always use the host compiler
+-ifneq ($(LLVM),)
+-HOSTAR	?= llvm-ar
+-HOSTCC	?= clang
+-HOSTLD	?= ld.lld
+-else
+-HOSTAR	?= ar
+-HOSTCC	?= gcc
+-HOSTLD	?= ld
+-endif
+ AR	 = $(HOSTAR)
+ CC	 = $(HOSTCC)
+ LD	 = $(HOSTLD)
+diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
+index 0be4116953790..678aa7feb84d0 100644
+--- a/tools/perf/Makefile.perf
++++ b/tools/perf/Makefile.perf
+@@ -148,10 +148,6 @@ endef
+ 
+ LD += $(EXTRA_LDFLAGS)
+ 
+-HOSTCC  ?= gcc
+-HOSTLD  ?= ld
+-HOSTAR  ?= ar
+-
+ PKG_CONFIG = $(CROSS_COMPILE)pkg-config
+ LLVM_CONFIG ?= llvm-config
+ 
+diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config
+index fc116c060b98d..32ff7baf39df4 100644
+--- a/tools/power/acpi/Makefile.config
++++ b/tools/power/acpi/Makefile.config
+@@ -57,7 +57,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
+ CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
+ CROSS_COMPILE ?= $(CROSS)
+ LD = $(CC)
+-HOSTCC = gcc
+ 
+ # check if compiler option is supported
+ cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
+diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
+index 8fc6b1ca47dca..42dbe05b18077 100644
+--- a/tools/scripts/Makefile.include
++++ b/tools/scripts/Makefile.include
+@@ -60,6 +60,16 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld)
+ $(call allow-override,CXX,$(CROSS_COMPILE)g++)
+ $(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+ 
++ifneq ($(LLVM),)
++HOSTAR  ?= llvm-ar
++HOSTCC  ?= clang
++HOSTLD  ?= ld.lld
++else
++HOSTAR  ?= ar
++HOSTCC  ?= gcc
++HOSTLD  ?= ld
++endif
++
+ ifeq ($(CC_NO_CLANG), 1)
+ EXTRA_WARNINGS += -Wstrict-aliasing=3
+ endif
author	Alice Ferrazzi <alicef@gentoo.org>	2021-01-30 22:33:01 +0900
committer	Alice Ferrazzi <alicef@gentoo.org>	2021-01-30 22:33:38 +0900
commit	b59ee32ab3065145d58b65bb7895cca32a510ec5 (patch)
tree	76d5f03eb34a1411952f004800372048eb42c847
parent	Linux patch 4.19.171 (diff)
download	linux-patches-b59ee32ab3065145d58b65bb7895cca32a510ec5.tar.gz linux-patches-b59ee32ab3065145d58b65bb7895cca32a510ec5.tar.bz2 linux-patches-b59ee32ab3065145d58b65bb7895cca32a510ec5.zip