diff options
author | Mike Pagano <mpagano@gentoo.org> | 2021-08-10 12:21:29 -0400 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2021-08-10 12:21:29 -0400 |
commit | 78ff2030a414af3cdccfd6b634571a3006ce13c0 (patch) | |
tree | d1690a5f4466c90c17c8da82b0e0564c3c27163c | |
parent | Linux patch 4.4.279 (diff) | |
download | linux-patches-78ff2030a414af3cdccfd6b634571a3006ce13c0.tar.gz linux-patches-78ff2030a414af3cdccfd6b634571a3006ce13c0.tar.bz2 linux-patches-78ff2030a414af3cdccfd6b634571a3006ce13c0.zip |
Linux patch 4.4.2804.4-282
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1279_linux-4.4.280.patch | 1100 |
2 files changed, 1104 insertions, 0 deletions
diff --git a/0000_README b/0000_README index 5eea747d..878287ee 100644 --- a/0000_README +++ b/0000_README @@ -1159,6 +1159,10 @@ Patch: 1278_linux-4.4.279.patch From: http://www.kernel.org Desc: Linux 4.4.279 +Patch: 1279_linux-4.4.280.patch +From: http://www.kernel.org +Desc: Linux 4.4.280 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1279_linux-4.4.280.patch b/1279_linux-4.4.280.patch new file mode 100644 index 00000000..b08ce93d --- /dev/null +++ b/1279_linux-4.4.280.patch @@ -0,0 +1,1100 @@ +diff --git a/Makefile b/Makefile +index 7dc479e9a6655..870bd763830e6 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,6 +1,6 @@ + VERSION = 4 + PATCHLEVEL = 4 +-SUBLEVEL = 279 ++SUBLEVEL = 280 + EXTRAVERSION = + NAME = Blurry Fish Butt + +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index 0a93e9d1708e2..3072e9c93ae6b 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -880,9 +880,7 @@ static __always_inline void rcu_read_lock(void) + * Unfortunately, this function acquires the scheduler's runqueue and + * priority-inheritance spinlocks. This means that deadlock could result + * if the caller of rcu_read_unlock() already holds one of these locks or +- * any lock that is ever acquired while holding them; or any lock which +- * can be taken from interrupt context because rcu_boost()->rt_mutex_lock() +- * does not disable irqs while taking ->wait_lock. ++ * any lock that is ever acquired while holding them. + * + * That said, RCU readers are never priority boosted unless they were + * preempted. Therefore, one way to avoid deadlock is to make sure +diff --git a/kernel/futex.c b/kernel/futex.c +index ff5499b0c5b34..6d47b7dc1cfbe 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -825,7 +825,7 @@ static int refill_pi_state_cache(void) + return 0; + } + +-static struct futex_pi_state * alloc_pi_state(void) ++static struct futex_pi_state *alloc_pi_state(void) + { + struct futex_pi_state *pi_state = current->pi_state_cache; + +@@ -858,10 +858,18 @@ static void pi_state_update_owner(struct futex_pi_state *pi_state, + } + } + ++static void get_pi_state(struct futex_pi_state *pi_state) ++{ ++ WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount)); ++} ++ + /* ++ * Drops a reference to the pi_state object and frees or caches it ++ * when the last reference is gone. ++ * + * Must be called with the hb lock held. + */ +-static void free_pi_state(struct futex_pi_state *pi_state) ++static void put_pi_state(struct futex_pi_state *pi_state) + { + if (!pi_state) + return; +@@ -898,7 +906,7 @@ static void free_pi_state(struct futex_pi_state *pi_state) + * Look up the task based on what TID userspace gave us. + * We dont trust it. + */ +-static struct task_struct * futex_find_get_task(pid_t pid) ++static struct task_struct *futex_find_get_task(pid_t pid) + { + struct task_struct *p; + +@@ -958,10 +966,12 @@ static void exit_pi_state_list(struct task_struct *curr) + pi_state->owner = NULL; + raw_spin_unlock_irq(&curr->pi_lock); + +- rt_mutex_futex_unlock(&pi_state->pi_mutex); +- ++ get_pi_state(pi_state); + spin_unlock(&hb->lock); + ++ rt_mutex_futex_unlock(&pi_state->pi_mutex); ++ put_pi_state(pi_state); ++ + raw_spin_lock_irq(&curr->pi_lock); + } + raw_spin_unlock_irq(&curr->pi_lock); +@@ -1075,6 +1085,11 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval, + * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), + * which in turn means that futex_lock_pi() still has a reference on + * our pi_state. ++ * ++ * The waiter holding a reference on @pi_state also protects against ++ * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi() ++ * and futex_wait_requeue_pi() as it cannot go to 0 and consequently ++ * free pi_state before we can take a reference ourselves. + */ + WARN_ON(!atomic_read(&pi_state->refcount)); + +@@ -1146,7 +1161,7 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval, + goto out_einval; + + out_attach: +- atomic_inc(&pi_state->refcount); ++ get_pi_state(pi_state); + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + *ps = pi_state; + return 0; +@@ -1529,48 +1544,35 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) + q->lock_ptr = NULL; + } + +-static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, +- struct futex_hash_bucket *hb) ++/* ++ * Caller must hold a reference on @pi_state. ++ */ ++static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) + { +- struct task_struct *new_owner; +- struct futex_pi_state *pi_state = this->pi_state; + u32 uninitialized_var(curval), newval; ++ struct task_struct *new_owner; ++ bool deboost = false; + WAKE_Q(wake_q); +- bool deboost; + int ret = 0; + +- if (!pi_state) +- return -EINVAL; +- +- /* +- * If current does not own the pi_state then the futex is +- * inconsistent and user space fiddled with the futex value. +- */ +- if (pi_state->owner != current) +- return -EINVAL; +- +- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); +- +- /* +- * When we interleave with futex_lock_pi() where it does +- * rt_mutex_timed_futex_lock(), we might observe @this futex_q waiter, +- * but the rt_mutex's wait_list can be empty (either still, or again, +- * depending on which side we land). +- * +- * When this happens, give up our locks and try again, giving the +- * futex_lock_pi() instance time to complete, either by waiting on the +- * rtmutex or removing itself from the futex queue. +- */ +- if (!new_owner) { +- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); +- return -EAGAIN; ++ if (WARN_ON_ONCE(!new_owner)) { ++ /* ++ * As per the comment in futex_unlock_pi() this should not happen. ++ * ++ * When this happens, give up our locks and try again, giving ++ * the futex_lock_pi() instance time to complete, either by ++ * waiting on the rtmutex or removing itself from the futex ++ * queue. ++ */ ++ ret = -EAGAIN; ++ goto out_unlock; + } + + /* +- * We pass it to the next owner. The WAITERS bit is always +- * kept enabled while there is PI state around. We cleanup the +- * owner died bit, because we are the owner. ++ * We pass it to the next owner. The WAITERS bit is always kept ++ * enabled while there is PI state around. We cleanup the owner ++ * died bit, because we are the owner. + */ + newval = FUTEX_WAITERS | task_pid_vnr(new_owner); + +@@ -1603,15 +1605,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, + deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); + } + ++out_unlock: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); +- spin_unlock(&hb->lock); + + if (deboost) { + wake_up_q(&wake_q); + rt_mutex_adjust_prio(current); + } + +- return 0; ++ return ret; + } + + /* +@@ -2121,7 +2123,7 @@ retry_private: + case 0: + break; + case -EFAULT: +- free_pi_state(pi_state); ++ put_pi_state(pi_state); + pi_state = NULL; + double_unlock_hb(hb1, hb2); + hb_waiters_dec(hb2); +@@ -2139,7 +2141,7 @@ retry_private: + * exit to complete. + * - EAGAIN: The user space value changed. + */ +- free_pi_state(pi_state); ++ put_pi_state(pi_state); + pi_state = NULL; + double_unlock_hb(hb1, hb2); + hb_waiters_dec(hb2); +@@ -2201,7 +2203,7 @@ retry_private: + */ + if (requeue_pi) { + /* Prepare the waiter to take the rt_mutex. */ +- atomic_inc(&pi_state->refcount); ++ get_pi_state(pi_state); + this->pi_state = pi_state; + ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, + this->rt_waiter, +@@ -2214,7 +2216,7 @@ retry_private: + } else if (ret) { + /* -EDEADLK */ + this->pi_state = NULL; +- free_pi_state(pi_state); ++ put_pi_state(pi_state); + goto out_unlock; + } + } +@@ -2223,7 +2225,7 @@ retry_private: + } + + out_unlock: +- free_pi_state(pi_state); ++ put_pi_state(pi_state); + double_unlock_hb(hb1, hb2); + wake_up_q(&wake_q); + hb_waiters_dec(hb2); +@@ -2277,20 +2279,7 @@ queue_unlock(struct futex_hash_bucket *hb) + hb_waiters_dec(hb); + } + +-/** +- * queue_me() - Enqueue the futex_q on the futex_hash_bucket +- * @q: The futex_q to enqueue +- * @hb: The destination hash bucket +- * +- * The hb->lock must be held by the caller, and is released here. A call to +- * queue_me() is typically paired with exactly one call to unqueue_me(). The +- * exceptions involve the PI related operations, which may use unqueue_me_pi() +- * or nothing if the unqueue is done as part of the wake process and the unqueue +- * state is implicit in the state of woken task (see futex_wait_requeue_pi() for +- * an example). +- */ +-static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) +- __releases(&hb->lock) ++static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) + { + int prio; + +@@ -2307,6 +2296,24 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) + plist_node_init(&q->list, prio); + plist_add(&q->list, &hb->chain); + q->task = current; ++} ++ ++/** ++ * queue_me() - Enqueue the futex_q on the futex_hash_bucket ++ * @q: The futex_q to enqueue ++ * @hb: The destination hash bucket ++ * ++ * The hb->lock must be held by the caller, and is released here. A call to ++ * queue_me() is typically paired with exactly one call to unqueue_me(). The ++ * exceptions involve the PI related operations, which may use unqueue_me_pi() ++ * or nothing if the unqueue is done as part of the wake process and the unqueue ++ * state is implicit in the state of woken task (see futex_wait_requeue_pi() for ++ * an example). ++ */ ++static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) ++ __releases(&hb->lock) ++{ ++ __queue_me(q, hb); + spin_unlock(&hb->lock); + } + +@@ -2376,7 +2383,7 @@ static void unqueue_me_pi(struct futex_q *q) + __unqueue_futex(q); + + BUG_ON(!q->pi_state); +- free_pi_state(q->pi_state); ++ put_pi_state(q->pi_state); + q->pi_state = NULL; + + spin_unlock(q->lock_ptr); +@@ -2430,10 +2437,22 @@ retry: + } + + /* +- * Since we just failed the trylock; there must be an owner. ++ * The trylock just failed, so either there is an owner or ++ * there is a higher priority waiter than this one. + */ + newowner = rt_mutex_owner(&pi_state->pi_mutex); +- BUG_ON(!newowner); ++ /* ++ * If the higher priority waiter has not yet taken over the ++ * rtmutex then newowner is NULL. We can't return here with ++ * that state because it's inconsistent vs. the user space ++ * state. So drop the locks and try again. It's a valid ++ * situation and not any different from the other retry ++ * conditions. ++ */ ++ if (unlikely(!newowner)) { ++ err = -EAGAIN; ++ goto handle_fault; ++ } + } else { + WARN_ON_ONCE(argowner != current); + if (oldowner == current) { +@@ -2454,7 +2473,7 @@ retry: + if (get_futex_value_locked(&uval, uaddr)) + goto handle_fault; + +- while (1) { ++ for (;;) { + newval = (uval & FUTEX_OWNER_DIED) | newtid; + + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) +@@ -2812,6 +2831,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, + { + struct hrtimer_sleeper timeout, *to = NULL; + struct task_struct *exiting = NULL; ++ struct rt_mutex_waiter rt_waiter; + struct futex_hash_bucket *hb; + struct futex_q q = futex_q_init; + int res, ret; +@@ -2872,24 +2892,51 @@ retry_private: + } + } + ++ WARN_ON(!q.pi_state); ++ + /* + * Only actually queue now that the atomic ops are done: + */ +- queue_me(&q, hb); ++ __queue_me(&q, hb); + +- WARN_ON(!q.pi_state); +- /* +- * Block on the PI mutex: +- */ +- if (!trylock) { +- ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to); +- } else { ++ if (trylock) { + ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); + /* Fixup the trylock return value: */ + ret = ret ? 0 : -EWOULDBLOCK; ++ goto no_block; ++ } ++ ++ /* ++ * We must add ourselves to the rt_mutex waitlist while holding hb->lock ++ * such that the hb and rt_mutex wait lists match. ++ */ ++ rt_mutex_init_waiter(&rt_waiter); ++ ret = rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); ++ if (ret) { ++ if (ret == 1) ++ ret = 0; ++ ++ goto no_block; + } + ++ spin_unlock(q.lock_ptr); ++ ++ if (unlikely(to)) ++ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); ++ ++ ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); ++ + spin_lock(q.lock_ptr); ++ /* ++ * If we failed to acquire the lock (signal/timeout), we must ++ * first acquire the hb->lock before removing the lock from the ++ * rt_mutex waitqueue, such that we can keep the hb and rt_mutex ++ * wait lists consistent. ++ */ ++ if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) ++ ret = 0; ++ ++no_block: + /* + * Fixup the pi_state owner and possibly acquire the lock if we + * haven't already. +@@ -2913,8 +2960,10 @@ out_unlock_put_key: + out_put_key: + put_futex_key(&q.key); + out: +- if (to) ++ if (to) { ++ hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); ++ } + return ret != -EINTR ? ret : -ERESTARTNOINTR; + + uaddr_faulted: +@@ -2967,10 +3016,39 @@ retry: + */ + match = futex_top_waiter(hb, &key); + if (match) { +- ret = wake_futex_pi(uaddr, uval, match, hb); ++ struct futex_pi_state *pi_state = match->pi_state; ++ ++ ret = -EINVAL; ++ if (!pi_state) ++ goto out_unlock; ++ ++ /* ++ * If current does not own the pi_state then the futex is ++ * inconsistent and user space fiddled with the futex value. ++ */ ++ if (pi_state->owner != current) ++ goto out_unlock; ++ ++ get_pi_state(pi_state); ++ /* ++ * Since modifying the wait_list is done while holding both ++ * hb->lock and wait_lock, holding either is sufficient to ++ * observe it. ++ * ++ * By taking wait_lock while still holding hb->lock, we ensure ++ * there is no point where we hold neither; and therefore ++ * wake_futex_pi() must observe a state consistent with what we ++ * observed. ++ */ ++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); ++ spin_unlock(&hb->lock); ++ ++ ret = wake_futex_pi(uaddr, uval, pi_state); ++ ++ put_pi_state(pi_state); ++ + /* +- * In case of success wake_futex_pi dropped the hash +- * bucket lock. ++ * Success, we're done! No tricky corner cases. + */ + if (!ret) + goto out_putkey; +@@ -2985,7 +3063,6 @@ retry: + * setting the FUTEX_WAITERS bit. Try again. + */ + if (ret == -EAGAIN) { +- spin_unlock(&hb->lock); + put_futex_key(&key); + goto retry; + } +@@ -2993,7 +3070,7 @@ retry: + * wake_futex_pi has detected invalid state. Tell user + * space. + */ +- goto out_unlock; ++ goto out_putkey; + } + + /* +@@ -3003,8 +3080,10 @@ retry: + * preserve the WAITERS bit not the OWNER_DIED one. We are the + * owner. + */ +- if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) ++ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) { ++ spin_unlock(&hb->lock); + goto pi_faulted; ++ } + + /* + * If uval has changed, let user space handle it. +@@ -3018,7 +3097,6 @@ out_putkey: + return ret; + + pi_faulted: +- spin_unlock(&hb->lock); + put_futex_key(&key); + + ret = fault_in_user_writeable(uaddr); +@@ -3148,10 +3226,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, + * The waiter is allocated on our stack, manipulated by the requeue + * code while we sleep on uaddr. + */ +- debug_rt_mutex_init_waiter(&rt_waiter); +- RB_CLEAR_NODE(&rt_waiter.pi_tree_entry); +- RB_CLEAR_NODE(&rt_waiter.tree_entry); +- rt_waiter.task = NULL; ++ rt_mutex_init_waiter(&rt_waiter); + + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); + if (unlikely(ret != 0)) +@@ -3210,7 +3285,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, + * Drop the reference to the pi state which + * the requeue_pi() code acquired for us. + */ +- free_pi_state(q.pi_state); ++ put_pi_state(q.pi_state); + spin_unlock(q.lock_ptr); + /* + * Adjust the return value. It's either -EFAULT or +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 1c0cb5c3c6ad6..532986d82179b 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -163,13 +163,14 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) + * 2) Drop lock->wait_lock + * 3) Try to unlock the lock with cmpxchg + */ +-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) ++static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, ++ unsigned long flags) + __releases(lock->wait_lock) + { + struct task_struct *owner = rt_mutex_owner(lock); + + clear_rt_mutex_waiters(lock); +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + /* + * If a new waiter comes in between the unlock and the cmpxchg + * we have two situations: +@@ -211,11 +212,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) + /* + * Simple slow path only version: lock->owner is protected by lock->wait_lock. + */ +-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) ++static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, ++ unsigned long flags) + __releases(lock->wait_lock) + { + lock->owner = NULL; +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + return true; + } + #endif +@@ -497,7 +499,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + int ret = 0, depth = 0; + struct rt_mutex *lock; + bool detect_deadlock; +- unsigned long flags; + bool requeue = true; + + detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); +@@ -540,7 +541,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + /* + * [1] Task cannot go away as we did a get_task() before ! + */ +- raw_spin_lock_irqsave(&task->pi_lock, flags); ++ raw_spin_lock_irq(&task->pi_lock); + + /* + * [2] Get the waiter on which @task is blocked on. +@@ -624,7 +625,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + * operations. + */ + if (!raw_spin_trylock(&lock->wait_lock)) { +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); ++ raw_spin_unlock_irq(&task->pi_lock); + cpu_relax(); + goto retry; + } +@@ -655,7 +656,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + /* + * No requeue[7] here. Just release @task [8] + */ +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); ++ raw_spin_unlock(&task->pi_lock); + put_task_struct(task); + + /* +@@ -663,14 +664,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + * If there is no owner of the lock, end of chain. + */ + if (!rt_mutex_owner(lock)) { +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irq(&lock->wait_lock); + return 0; + } + + /* [10] Grab the next task, i.e. owner of @lock */ + task = rt_mutex_owner(lock); + get_task_struct(task); +- raw_spin_lock_irqsave(&task->pi_lock, flags); ++ raw_spin_lock(&task->pi_lock); + + /* + * No requeue [11] here. We just do deadlock detection. +@@ -685,8 +686,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + top_waiter = rt_mutex_top_waiter(lock); + + /* [13] Drop locks */ +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock(&task->pi_lock); ++ raw_spin_unlock_irq(&lock->wait_lock); + + /* If owner is not blocked, end of chain. */ + if (!next_lock) +@@ -707,7 +708,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + rt_mutex_enqueue(lock, waiter); + + /* [8] Release the task */ +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); ++ raw_spin_unlock(&task->pi_lock); + put_task_struct(task); + + /* +@@ -725,14 +726,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + */ + if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) + wake_up_process(rt_mutex_top_waiter(lock)->task); +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irq(&lock->wait_lock); + return 0; + } + + /* [10] Grab the next task, i.e. the owner of @lock */ + task = rt_mutex_owner(lock); + get_task_struct(task); +- raw_spin_lock_irqsave(&task->pi_lock, flags); ++ raw_spin_lock(&task->pi_lock); + + /* [11] requeue the pi waiters if necessary */ + if (waiter == rt_mutex_top_waiter(lock)) { +@@ -786,8 +787,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + top_waiter = rt_mutex_top_waiter(lock); + + /* [13] Drop the locks */ +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock(&task->pi_lock); ++ raw_spin_unlock_irq(&lock->wait_lock); + + /* + * Make the actual exit decisions [12], based on the stored +@@ -810,7 +811,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + goto again; + + out_unlock_pi: +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); ++ raw_spin_unlock_irq(&task->pi_lock); + out_put_task: + put_task_struct(task); + +@@ -820,7 +821,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + /* + * Try to take an rt-mutex + * +- * Must be called with lock->wait_lock held. ++ * Must be called with lock->wait_lock held and interrupts disabled + * + * @lock: The lock to be acquired. + * @task: The task which wants to acquire the lock +@@ -830,8 +831,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, + struct rt_mutex_waiter *waiter) + { +- unsigned long flags; +- + /* + * Before testing whether we can acquire @lock, we set the + * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all +@@ -916,7 +915,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, + * case, but conditionals are more expensive than a redundant + * store. + */ +- raw_spin_lock_irqsave(&task->pi_lock, flags); ++ raw_spin_lock(&task->pi_lock); + task->pi_blocked_on = NULL; + /* + * Finish the lock acquisition. @task is the new owner. If +@@ -925,7 +924,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, + */ + if (rt_mutex_has_waiters(lock)) + rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); ++ raw_spin_unlock(&task->pi_lock); + + takeit: + /* We got the lock. */ +@@ -945,7 +944,7 @@ takeit: + * + * Prepare waiter and propagate pi chain + * +- * This must be called with lock->wait_lock held. ++ * This must be called with lock->wait_lock held and interrupts disabled + */ + static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, +@@ -956,7 +955,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + struct rt_mutex_waiter *top_waiter = waiter; + struct rt_mutex *next_lock; + int chain_walk = 0, res; +- unsigned long flags; + + /* + * Early deadlock detection. We really don't want the task to +@@ -970,7 +968,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + if (owner == task) + return -EDEADLK; + +- raw_spin_lock_irqsave(&task->pi_lock, flags); ++ raw_spin_lock(&task->pi_lock); + __rt_mutex_adjust_prio(task); + waiter->task = task; + waiter->lock = lock; +@@ -983,12 +981,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + + task->pi_blocked_on = waiter; + +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); ++ raw_spin_unlock(&task->pi_lock); + + if (!owner) + return 0; + +- raw_spin_lock_irqsave(&owner->pi_lock, flags); ++ raw_spin_lock(&owner->pi_lock); + if (waiter == rt_mutex_top_waiter(lock)) { + rt_mutex_dequeue_pi(owner, top_waiter); + rt_mutex_enqueue_pi(owner, waiter); +@@ -1003,7 +1001,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + /* Store the lock on which owner is blocked or NULL */ + next_lock = task_blocked_on_lock(owner); + +- raw_spin_unlock_irqrestore(&owner->pi_lock, flags); ++ raw_spin_unlock(&owner->pi_lock); + /* + * Even if full deadlock detection is on, if the owner is not + * blocked itself, we can avoid finding this out in the chain +@@ -1019,12 +1017,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + */ + get_task_struct(owner); + +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irq(&lock->wait_lock); + + res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, + next_lock, waiter, task); + +- raw_spin_lock(&lock->wait_lock); ++ raw_spin_lock_irq(&lock->wait_lock); + + return res; + } +@@ -1033,15 +1031,14 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + * Remove the top waiter from the current tasks pi waiter tree and + * queue it up. + * +- * Called with lock->wait_lock held. ++ * Called with lock->wait_lock held and interrupts disabled. + */ + static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, + struct rt_mutex *lock) + { + struct rt_mutex_waiter *waiter; +- unsigned long flags; + +- raw_spin_lock_irqsave(¤t->pi_lock, flags); ++ raw_spin_lock(¤t->pi_lock); + + waiter = rt_mutex_top_waiter(lock); + +@@ -1063,7 +1060,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, + */ + lock->owner = (void *) RT_MUTEX_HAS_WAITERS; + +- raw_spin_unlock_irqrestore(¤t->pi_lock, flags); ++ raw_spin_unlock(¤t->pi_lock); + + wake_q_add(wake_q, waiter->task); + } +@@ -1071,7 +1068,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, + /* + * Remove a waiter from a lock and give up + * +- * Must be called with lock->wait_lock held and ++ * Must be called with lock->wait_lock held and interrupts disabled. I must + * have just failed to try_to_take_rt_mutex(). + */ + static void remove_waiter(struct rt_mutex *lock, +@@ -1080,12 +1077,11 @@ static void remove_waiter(struct rt_mutex *lock, + bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); + struct task_struct *owner = rt_mutex_owner(lock); + struct rt_mutex *next_lock; +- unsigned long flags; + +- raw_spin_lock_irqsave(¤t->pi_lock, flags); ++ raw_spin_lock(¤t->pi_lock); + rt_mutex_dequeue(lock, waiter); + current->pi_blocked_on = NULL; +- raw_spin_unlock_irqrestore(¤t->pi_lock, flags); ++ raw_spin_unlock(¤t->pi_lock); + + /* + * Only update priority if the waiter was the highest priority +@@ -1094,7 +1090,7 @@ static void remove_waiter(struct rt_mutex *lock, + if (!owner || !is_top_waiter) + return; + +- raw_spin_lock_irqsave(&owner->pi_lock, flags); ++ raw_spin_lock(&owner->pi_lock); + + rt_mutex_dequeue_pi(owner, waiter); + +@@ -1106,7 +1102,7 @@ static void remove_waiter(struct rt_mutex *lock, + /* Store the lock on which owner is blocked or NULL */ + next_lock = task_blocked_on_lock(owner); + +- raw_spin_unlock_irqrestore(&owner->pi_lock, flags); ++ raw_spin_unlock(&owner->pi_lock); + + /* + * Don't walk the chain, if the owner task is not blocked +@@ -1118,12 +1114,12 @@ static void remove_waiter(struct rt_mutex *lock, + /* gets dropped in rt_mutex_adjust_prio_chain()! */ + get_task_struct(owner); + +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irq(&lock->wait_lock); + + rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, + next_lock, NULL, current); + +- raw_spin_lock(&lock->wait_lock); ++ raw_spin_lock_irq(&lock->wait_lock); + } + + /* +@@ -1155,15 +1151,23 @@ void rt_mutex_adjust_pi(struct task_struct *task) + next_lock, NULL, task); + } + ++void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) ++{ ++ debug_rt_mutex_init_waiter(waiter); ++ RB_CLEAR_NODE(&waiter->pi_tree_entry); ++ RB_CLEAR_NODE(&waiter->tree_entry); ++ waiter->task = NULL; ++} ++ + /** + * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop + * @lock: the rt_mutex to take + * @state: the state the task should block in (TASK_INTERRUPTIBLE +- * or TASK_UNINTERRUPTIBLE) ++ * or TASK_UNINTERRUPTIBLE) + * @timeout: the pre-initialized and started timer, or NULL for none + * @waiter: the pre-initialized rt_mutex_waiter + * +- * lock->wait_lock must be held by the caller. ++ * Must be called with lock->wait_lock held and interrupts disabled + */ + static int __sched + __rt_mutex_slowlock(struct rt_mutex *lock, int state, +@@ -1191,13 +1195,13 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, + break; + } + +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irq(&lock->wait_lock); + + debug_rt_mutex_print_deadlock(waiter); + + schedule(); + +- raw_spin_lock(&lock->wait_lock); ++ raw_spin_lock_irq(&lock->wait_lock); + set_current_state(state); + } + +@@ -1234,17 +1238,24 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, + enum rtmutex_chainwalk chwalk) + { + struct rt_mutex_waiter waiter; ++ unsigned long flags; + int ret = 0; + +- debug_rt_mutex_init_waiter(&waiter); +- RB_CLEAR_NODE(&waiter.pi_tree_entry); +- RB_CLEAR_NODE(&waiter.tree_entry); ++ rt_mutex_init_waiter(&waiter); + +- raw_spin_lock(&lock->wait_lock); ++ /* ++ * Technically we could use raw_spin_[un]lock_irq() here, but this can ++ * be called in early boot if the cmpxchg() fast path is disabled ++ * (debug, no architecture support). In this case we will acquire the ++ * rtmutex with lock->wait_lock held. But we cannot unconditionally ++ * enable interrupts in that early boot case. So we need to use the ++ * irqsave/restore variants. ++ */ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); + + /* Try to acquire the lock again: */ + if (try_to_take_rt_mutex(lock, current, NULL)) { +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + return 0; + } + +@@ -1273,7 +1284,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, + */ + fixup_rt_mutex_waiters(lock); + +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + /* Remove pending timer: */ + if (unlikely(timeout)) +@@ -1302,6 +1313,7 @@ static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock) + */ + static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) + { ++ unsigned long flags; + int ret; + + /* +@@ -1313,14 +1325,14 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) + return 0; + + /* +- * The mutex has currently no owner. Lock the wait lock and +- * try to acquire the lock. ++ * The mutex has currently no owner. Lock the wait lock and try to ++ * acquire the lock. We use irqsave here to support early boot calls. + */ +- raw_spin_lock(&lock->wait_lock); ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); + + ret = __rt_mutex_slowtrylock(lock); + +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + return ret; + } +@@ -1332,7 +1344,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) + static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, + struct wake_q_head *wake_q) + { +- raw_spin_lock(&lock->wait_lock); ++ unsigned long flags; ++ ++ /* irqsave required to support early boot calls */ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); + + debug_rt_mutex_unlock(lock); + +@@ -1369,10 +1384,10 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, + */ + while (!rt_mutex_has_waiters(lock)) { + /* Drops lock->wait_lock ! */ +- if (unlock_rt_mutex_safe(lock) == true) ++ if (unlock_rt_mutex_safe(lock, flags) == true) + return false; + /* Relock the rtmutex and try again */ +- raw_spin_lock(&lock->wait_lock); ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); + } + + /* +@@ -1383,7 +1398,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, + */ + mark_wakeup_next_waiter(wake_q, lock); + +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + /* check PI boosting */ + return true; +@@ -1482,19 +1497,6 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) + } + EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); + +-/* +- * Futex variant with full deadlock detection. +- * Futex variants must not use the fast-path, see __rt_mutex_futex_unlock(). +- */ +-int __sched rt_mutex_timed_futex_lock(struct rt_mutex *lock, +- struct hrtimer_sleeper *timeout) +-{ +- might_sleep(); +- +- return rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, +- timeout, RT_MUTEX_FULL_CHAINWALK); +-} +- + /* + * Futex variant, must not use fastpath. + */ +@@ -1687,10 +1689,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + { + int ret; + +- raw_spin_lock(&lock->wait_lock); ++ raw_spin_lock_irq(&lock->wait_lock); + + if (try_to_take_rt_mutex(lock, task, NULL)) { +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irq(&lock->wait_lock); + return 1; + } + +@@ -1711,7 +1713,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + if (unlikely(ret)) + remove_waiter(lock, waiter); + +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irq(&lock->wait_lock); + + debug_rt_mutex_print_deadlock(waiter); + +@@ -1761,20 +1763,16 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, + { + int ret; + +- raw_spin_lock(&lock->wait_lock); +- +- set_current_state(TASK_INTERRUPTIBLE); +- ++ raw_spin_lock_irq(&lock->wait_lock); + /* sleep on the mutex */ ++ set_current_state(TASK_INTERRUPTIBLE); + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); +- + /* + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might + * have to fix that up. + */ + fixup_rt_mutex_waiters(lock); +- +- raw_spin_unlock(&lock->wait_lock); ++ raw_spin_unlock_irq(&lock->wait_lock); + + return ret; + } +@@ -1804,15 +1802,32 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, + bool cleanup = false; + + raw_spin_lock_irq(&lock->wait_lock); ++ /* ++ * Do an unconditional try-lock, this deals with the lock stealing ++ * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter() ++ * sets a NULL owner. ++ * ++ * We're not interested in the return value, because the subsequent ++ * test on rt_mutex_owner() will infer that. If the trylock succeeded, ++ * we will own the lock and it will have removed the waiter. If we ++ * failed the trylock, we're still not owner and we need to remove ++ * ourselves. ++ */ ++ try_to_take_rt_mutex(lock, current, waiter); + /* + * Unless we're the owner; we're still enqueued on the wait_list. + * So check if we became owner, if not, take us off the wait_list. + */ + if (rt_mutex_owner(lock) != current) { + remove_waiter(lock, waiter); +- fixup_rt_mutex_waiters(lock); + cleanup = true; + } ++ /* ++ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might ++ * have to fix that up. ++ */ ++ fixup_rt_mutex_waiters(lock); ++ + raw_spin_unlock_irq(&lock->wait_lock); + + return cleanup; +diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h +index 4584db96265d4..97c048c494f00 100644 +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -102,6 +102,7 @@ extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); + extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, + struct task_struct *proxy_owner); + extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); ++extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); + extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task); +@@ -110,7 +111,6 @@ extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter); + extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter); +-extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); + extern int rt_mutex_futex_trylock(struct rt_mutex *l); + extern int __rt_mutex_futex_trylock(struct rt_mutex *l); + |