diff options
author | 2024-05-25 11:16:20 -0400 | |
---|---|---|
committer | 2024-05-25 11:16:20 -0400 | |
commit | 174de16ebc3197a705f61aa2262db6be4f421fd8 (patch) | |
tree | f783ed846133d77f97898838a70cce20e148a6b5 | |
parent | Linux patch 6.1.91 (diff) | |
download | linux-patches-174de16ebc3197a705f61aa2262db6be4f421fd8.tar.gz linux-patches-174de16ebc3197a705f61aa2262db6be4f421fd8.tar.bz2 linux-patches-174de16ebc3197a705f61aa2262db6be4f421fd8.zip |
Linux patch 6.1.926.1-101
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1091_linux-6.1.92.patch | 2589 |
2 files changed, 2593 insertions, 0 deletions
diff --git a/0000_README b/0000_README index b8e8658e..0d537557 100644 --- a/0000_README +++ b/0000_README @@ -407,6 +407,10 @@ Patch: 1090_linux-6.1.91.patch From: https://www.kernel.org Desc: Linux 6.1.91 +Patch: 1091_linux-6.1.92.patch +From: https://www.kernel.org +Desc: Linux 6.1.92 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1091_linux-6.1.92.patch b/1091_linux-6.1.92.patch new file mode 100644 index 00000000..fc3ed66e --- /dev/null +++ b/1091_linux-6.1.92.patch @@ -0,0 +1,2589 @@ +diff --git a/Documentation/admin-guide/hw-vuln/core-scheduling.rst b/Documentation/admin-guide/hw-vuln/core-scheduling.rst +index cf1eeefdfc32f..a92e10ec402e7 100644 +--- a/Documentation/admin-guide/hw-vuln/core-scheduling.rst ++++ b/Documentation/admin-guide/hw-vuln/core-scheduling.rst +@@ -67,8 +67,8 @@ arg4: + will be performed for all tasks in the task group of ``pid``. + + arg5: +- userspace pointer to an unsigned long for storing the cookie returned by +- ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands. ++ userspace pointer to an unsigned long long for storing the cookie returned ++ by ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands. + + In order for a process to push a cookie to, or pull a cookie from a process, it + is required to have the ptrace access mode: `PTRACE_MODE_READ_REALCREDS` to the +diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py +index abe7680883771..6387624423363 100755 +--- a/Documentation/sphinx/kernel_include.py ++++ b/Documentation/sphinx/kernel_include.py +@@ -97,7 +97,6 @@ class KernelInclude(Include): + # HINT: this is the only line I had to change / commented out: + #path = utils.relative_path(None, path) + +- path = nodes.reprunicode(path) + encoding = self.options.get( + 'encoding', self.state.document.settings.input_encoding) + e_handler=self.state.document.settings.input_encoding_error_handler +diff --git a/Makefile b/Makefile +index a7d90996e4125..0be668057cb2a 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 6 + PATCHLEVEL = 1 +-SUBLEVEL = 91 ++SUBLEVEL = 92 + EXTRAVERSION = + NAME = Curry Ramen + +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index c15f71501c6c2..044b98a62f7bb 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -1752,7 +1752,6 @@ config ARM64_LSE_ATOMICS + + config ARM64_USE_LSE_ATOMICS + bool "Atomic instructions" +- depends on JUMP_LABEL + default y + help + As part of the Large System Extensions, ARMv8.1 introduces new +diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h +index c503db8e73b01..f99d74826a7ef 100644 +--- a/arch/arm64/include/asm/lse.h ++++ b/arch/arm64/include/asm/lse.h +@@ -10,7 +10,6 @@ + + #include <linux/compiler_types.h> + #include <linux/export.h> +-#include <linux/jump_label.h> + #include <linux/stringify.h> + #include <asm/alternative.h> + #include <asm/alternative-macros.h> +diff --git a/drivers/android/binder.c b/drivers/android/binder.c +index 8c2b7c074eca1..46111f8c12e61 100644 +--- a/drivers/android/binder.c ++++ b/drivers/android/binder.c +@@ -5350,7 +5350,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + goto err; + break; + case BINDER_SET_MAX_THREADS: { +- int max_threads; ++ u32 max_threads; + + if (copy_from_user(&max_threads, ubuf, + sizeof(max_threads))) { +diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h +index abe19d88c6ecc..c2c1bb3c1e60b 100644 +--- a/drivers/android/binder_internal.h ++++ b/drivers/android/binder_internal.h +@@ -420,7 +420,7 @@ struct binder_proc { + struct list_head todo; + struct binder_stats stats; + struct list_head delivered_death; +- int max_threads; ++ u32 max_threads; + int requested_threads; + int requested_threads_started; + int tmp_ref; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +index 9fe2eae88ec17..ee83d282b49a8 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +@@ -974,6 +974,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, + if (!obj) + return -EINVAL; + ++ if (!info || info->head.block == AMDGPU_RAS_BLOCK_COUNT) ++ return -EINVAL; ++ + if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { + amdgpu_ras_get_ecc_info(adev, &err_data); + } else { +diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +index d52cbc0e9b679..5f57bdd597c27 100644 +--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c ++++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +@@ -924,7 +924,12 @@ static bool setup_dsc_config( + if (!is_dsc_possible) + goto done; + +- dsc_cfg->num_slices_v = pic_height/slice_height; ++ if (slice_height > 0) { ++ dsc_cfg->num_slices_v = pic_height / slice_height; ++ } else { ++ is_dsc_possible = false; ++ goto done; ++ } + + if (target_bandwidth_kbps > 0) { + is_dsc_possible = decide_dsc_target_bpp_x16( +diff --git a/drivers/mfd/stpmic1.c b/drivers/mfd/stpmic1.c +index eb3da558c3fbd..ee0469d5d4354 100644 +--- a/drivers/mfd/stpmic1.c ++++ b/drivers/mfd/stpmic1.c +@@ -108,8 +108,9 @@ static const struct regmap_irq stpmic1_irqs[] = { + static const struct regmap_irq_chip stpmic1_regmap_irq_chip = { + .name = "pmic_irq", + .status_base = INT_PENDING_R1, +- .mask_base = INT_CLEAR_MASK_R1, +- .unmask_base = INT_SET_MASK_R1, ++ .mask_base = INT_SET_MASK_R1, ++ .unmask_base = INT_CLEAR_MASK_R1, ++ .mask_unmask_non_inverted = true, + .ack_base = INT_CLEAR_R1, + .num_regs = STPMIC1_PMIC_NUM_IRQ_REGS, + .irqs = stpmic1_irqs, +diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c +index 3a927452a6501..7e39017e440fb 100644 +--- a/drivers/mmc/core/mmc.c ++++ b/drivers/mmc/core/mmc.c +@@ -1819,8 +1819,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, + + if (err) + goto free_card; +- +- } else if (!mmc_card_hs400es(card)) { ++ } else if (mmc_card_hs400es(card)) { ++ if (host->ops->execute_hs400_tuning) { ++ err = host->ops->execute_hs400_tuning(host, card); ++ if (err) ++ goto free_card; ++ } ++ } else { + /* Select the desired bus width optionally */ + err = mmc_select_bus_width(card); + if (err > 0 && mmc_card_hs(card)) { +diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c +index e64bef490a174..42d8e5e771b7e 100644 +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c +@@ -544,17 +544,15 @@ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) + + /** + * ice_vc_isvalid_q_id +- * @vf: pointer to the VF info +- * @vsi_id: VSI ID ++ * @vsi: VSI to check queue ID against + * @qid: VSI relative queue ID + * + * check for the valid queue ID + */ +-static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid) ++static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u8 qid) + { +- struct ice_vsi *vsi = ice_find_vsi(vf->pf, vsi_id); + /* allocated Tx and Rx queues should be always equal for VF VSI */ +- return (vsi && (qid < vsi->alloc_txq)); ++ return qid < vsi->alloc_txq; + } + + /** +@@ -1254,7 +1252,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) + */ + q_map = vqs->rx_queues; + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { +- if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { ++ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } +@@ -1276,7 +1274,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) + + q_map = vqs->tx_queues; + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { +- if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { ++ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } +@@ -1381,7 +1379,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) + q_map = vqs->tx_queues; + + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { +- if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { ++ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } +@@ -1407,7 +1405,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) + bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF); + } else if (q_map) { + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { +- if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { ++ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } +@@ -1463,7 +1461,7 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id, + for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { + vsi_q_id = vsi_q_id_idx; + +- if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id)) ++ if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) + return VIRTCHNL_STATUS_ERR_PARAM; + + q_vector->num_ring_rx++; +@@ -1477,7 +1475,7 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id, + for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { + vsi_q_id = vsi_q_id_idx; + +- if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id)) ++ if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) + return VIRTCHNL_STATUS_ERR_PARAM; + + q_vector->num_ring_tx++; +@@ -1611,7 +1609,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) + qpi->txq.headwb_enabled || + !ice_vc_isvalid_ring_len(qpi->txq.ring_len) || + !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) || +- !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) { ++ !ice_vc_isvalid_q_id(vsi, qpi->txq.queue_id)) { + goto error_param; + } + +diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +index 7f72604079723..fb8e856933097 100644 +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +@@ -107,9 +107,6 @@ ice_vc_fdir_param_check(struct ice_vf *vf, u16 vsi_id) + if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF)) + return -EINVAL; + +- if (vsi_id != vf->lan_vsi_num) +- return -EINVAL; +- + if (!ice_vc_isvalid_vsi_id(vf, vsi_id)) + return -EINVAL; + +diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c +index 502518cdb4618..6453c92f0fa7c 100644 +--- a/drivers/net/ethernet/micrel/ks8851_common.c ++++ b/drivers/net/ethernet/micrel/ks8851_common.c +@@ -328,7 +328,6 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) + { + struct ks8851_net *ks = _ks; + struct sk_buff_head rxq; +- unsigned handled = 0; + unsigned long flags; + unsigned int status; + struct sk_buff *skb; +@@ -336,24 +335,17 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) + ks8851_lock(ks, &flags); + + status = ks8851_rdreg16(ks, KS_ISR); ++ ks8851_wrreg16(ks, KS_ISR, status); + + netif_dbg(ks, intr, ks->netdev, + "%s: status 0x%04x\n", __func__, status); + +- if (status & IRQ_LCI) +- handled |= IRQ_LCI; +- + if (status & IRQ_LDI) { + u16 pmecr = ks8851_rdreg16(ks, KS_PMECR); + pmecr &= ~PMECR_WKEVT_MASK; + ks8851_wrreg16(ks, KS_PMECR, pmecr | PMECR_WKEVT_LINK); +- +- handled |= IRQ_LDI; + } + +- if (status & IRQ_RXPSI) +- handled |= IRQ_RXPSI; +- + if (status & IRQ_TXI) { + unsigned short tx_space = ks8851_rdreg16(ks, KS_TXMIR); + +@@ -365,20 +357,12 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) + if (netif_queue_stopped(ks->netdev)) + netif_wake_queue(ks->netdev); + spin_unlock(&ks->statelock); +- +- handled |= IRQ_TXI; + } + +- if (status & IRQ_RXI) +- handled |= IRQ_RXI; +- + if (status & IRQ_SPIBEI) { + netdev_err(ks->netdev, "%s: spi bus error\n", __func__); +- handled |= IRQ_SPIBEI; + } + +- ks8851_wrreg16(ks, KS_ISR, handled); +- + if (status & IRQ_RXI) { + /* the datasheet says to disable the rx interrupt during + * packet read-out, however we're masking the interrupt +diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c +index 21b6c4d94a632..6d31061818e93 100644 +--- a/drivers/net/usb/ax88179_178a.c ++++ b/drivers/net/usb/ax88179_178a.c +@@ -174,6 +174,7 @@ struct ax88179_data { + u32 wol_supported; + u32 wolopts; + u8 disconnecting; ++ u8 initialized; + }; + + struct ax88179_int_data { +@@ -1673,6 +1674,18 @@ static int ax88179_reset(struct usbnet *dev) + return 0; + } + ++static int ax88179_net_reset(struct usbnet *dev) ++{ ++ struct ax88179_data *ax179_data = dev->driver_priv; ++ ++ if (ax179_data->initialized) ++ ax88179_reset(dev); ++ else ++ ax179_data->initialized = 1; ++ ++ return 0; ++} ++ + static int ax88179_stop(struct usbnet *dev) + { + u16 tmp16; +@@ -1692,6 +1705,7 @@ static const struct driver_info ax88179_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +@@ -1704,6 +1718,7 @@ static const struct driver_info ax88178a_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +@@ -1716,7 +1731,7 @@ static const struct driver_info cypress_GX3_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, +- .reset = ax88179_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +@@ -1729,7 +1744,7 @@ static const struct driver_info dlink_dub1312_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, +- .reset = ax88179_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +@@ -1742,7 +1757,7 @@ static const struct driver_info sitecom_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, +- .reset = ax88179_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +@@ -1755,7 +1770,7 @@ static const struct driver_info samsung_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, +- .reset = ax88179_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +@@ -1768,7 +1783,7 @@ static const struct driver_info lenovo_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, +- .reset = ax88179_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +@@ -1781,7 +1796,7 @@ static const struct driver_info belkin_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, +- .reset = ax88179_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +@@ -1794,7 +1809,7 @@ static const struct driver_info toshiba_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, +- .reset = ax88179_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +@@ -1807,7 +1822,7 @@ static const struct driver_info mct_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, +- .reset = ax88179_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +@@ -1820,7 +1835,7 @@ static const struct driver_info at_umc2000_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, +- .reset = ax88179_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +@@ -1833,7 +1848,7 @@ static const struct driver_info at_umc200_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, +- .reset = ax88179_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +@@ -1846,7 +1861,7 @@ static const struct driver_info at_umc2000sp_info = { + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, +- .reset = ax88179_reset, ++ .reset = ax88179_net_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, +diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c +index 1ef36a0a7dd20..223482584f54f 100644 +--- a/drivers/pinctrl/core.c ++++ b/drivers/pinctrl/core.c +@@ -205,6 +205,7 @@ static int pinctrl_register_one_pin(struct pinctrl_dev *pctldev, + const struct pinctrl_pin_desc *pin) + { + struct pin_desc *pindesc; ++ int error; + + pindesc = pin_desc_get(pctldev, pin->number); + if (pindesc) { +@@ -226,18 +227,25 @@ static int pinctrl_register_one_pin(struct pinctrl_dev *pctldev, + } else { + pindesc->name = kasprintf(GFP_KERNEL, "PIN%u", pin->number); + if (!pindesc->name) { +- kfree(pindesc); +- return -ENOMEM; ++ error = -ENOMEM; ++ goto failed; + } + pindesc->dynamic_name = true; + } + + pindesc->drv_data = pin->drv_data; + +- radix_tree_insert(&pctldev->pin_desc_tree, pin->number, pindesc); ++ error = radix_tree_insert(&pctldev->pin_desc_tree, pin->number, pindesc); ++ if (error) ++ goto failed; ++ + pr_debug("registered pin %d (%s) on %s\n", + pin->number, pindesc->name, pctldev->desc->name); + return 0; ++ ++failed: ++ kfree(pindesc); ++ return error; + } + + static int pinctrl_register_pins(struct pinctrl_dev *pctldev, +diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c +index d421a2ccaa1ea..ffec5299b5c1d 100644 +--- a/drivers/remoteproc/mtk_scp.c ++++ b/drivers/remoteproc/mtk_scp.c +@@ -126,7 +126,7 @@ static int scp_elf_read_ipi_buf_addr(struct mtk_scp *scp, + static int scp_ipi_init(struct mtk_scp *scp, const struct firmware *fw) + { + int ret; +- size_t offset; ++ size_t buf_sz, offset; + + /* read the ipi buf addr from FW itself first */ + ret = scp_elf_read_ipi_buf_addr(scp, fw, &offset); +@@ -138,6 +138,14 @@ static int scp_ipi_init(struct mtk_scp *scp, const struct firmware *fw) + } + dev_info(scp->dev, "IPI buf addr %#010zx\n", offset); + ++ /* Make sure IPI buffer fits in the L2TCM range assigned to this core */ ++ buf_sz = sizeof(*scp->recv_buf) + sizeof(*scp->send_buf); ++ ++ if (scp->sram_size < buf_sz + offset) { ++ dev_err(scp->dev, "IPI buffer does not fit in SRAM.\n"); ++ return -EOVERFLOW; ++ } ++ + scp->recv_buf = (struct mtk_share_obj __iomem *) + (scp->sram_base + offset); + scp->send_buf = (struct mtk_share_obj __iomem *) +diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c +index 7aa37be3216a5..86433e3c3409a 100644 +--- a/drivers/tty/serial/kgdboc.c ++++ b/drivers/tty/serial/kgdboc.c +@@ -19,6 +19,7 @@ + #include <linux/console.h> + #include <linux/vt_kern.h> + #include <linux/input.h> ++#include <linux/irq_work.h> + #include <linux/module.h> + #include <linux/platform_device.h> + #include <linux/serial_core.h> +@@ -48,6 +49,25 @@ static struct kgdb_io kgdboc_earlycon_io_ops; + static int (*earlycon_orig_exit)(struct console *con); + #endif /* IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) */ + ++/* ++ * When we leave the debug trap handler we need to reset the keyboard status ++ * (since the original keyboard state gets partially clobbered by kdb use of ++ * the keyboard). ++ * ++ * The path to deliver the reset is somewhat circuitous. ++ * ++ * To deliver the reset we register an input handler, reset the keyboard and ++ * then deregister the input handler. However, to get this done right, we do ++ * have to carefully manage the calling context because we can only register ++ * input handlers from task context. ++ * ++ * In particular we need to trigger the action from the debug trap handler with ++ * all its NMI and/or NMI-like oddities. To solve this the kgdboc trap exit code ++ * (the "post_exception" callback) uses irq_work_queue(), which is NMI-safe, to ++ * schedule a callback from a hardirq context. From there we have to defer the ++ * work again, this time using schedule_work(), to get a callback using the ++ * system workqueue, which runs in task context. ++ */ + #ifdef CONFIG_KDB_KEYBOARD + static int kgdboc_reset_connect(struct input_handler *handler, + struct input_dev *dev, +@@ -99,10 +119,17 @@ static void kgdboc_restore_input_helper(struct work_struct *dummy) + + static DECLARE_WORK(kgdboc_restore_input_work, kgdboc_restore_input_helper); + ++static void kgdboc_queue_restore_input_helper(struct irq_work *unused) ++{ ++ schedule_work(&kgdboc_restore_input_work); ++} ++ ++static DEFINE_IRQ_WORK(kgdboc_restore_input_irq_work, kgdboc_queue_restore_input_helper); ++ + static void kgdboc_restore_input(void) + { + if (likely(system_state == SYSTEM_RUNNING)) +- schedule_work(&kgdboc_restore_input_work); ++ irq_work_queue(&kgdboc_restore_input_irq_work); + } + + static int kgdboc_register_kbd(char **cptr) +@@ -133,6 +160,7 @@ static void kgdboc_unregister_kbd(void) + i--; + } + } ++ irq_work_sync(&kgdboc_restore_input_irq_work); + flush_work(&kgdboc_restore_input_work); + } + #else /* ! CONFIG_KDB_KEYBOARD */ +diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c +index 2d7ac92ce9b84..c72c6f8ec2c88 100644 +--- a/drivers/usb/dwc3/gadget.c ++++ b/drivers/usb/dwc3/gadget.c +@@ -1708,7 +1708,6 @@ static int __dwc3_gadget_get_frame(struct dwc3 *dwc) + */ + static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt) + { +- struct dwc3 *dwc = dep->dwc; + struct dwc3_gadget_ep_cmd_params params; + u32 cmd; + int ret; +@@ -1733,8 +1732,7 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int + dep->resource_index = 0; + + if (!interrupt) { +- if (!DWC3_IP_IS(DWC3) || DWC3_VER_IS_PRIOR(DWC3, 310A)) +- mdelay(1); ++ mdelay(1); + dep->flags &= ~DWC3_EP_TRANSFER_STARTED; + } else if (!ret) { + dep->flags |= DWC3_EP_END_TRANSFER_PENDING; +diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c +index 195c9c16f817f..e804db927d5cf 100644 +--- a/drivers/usb/typec/tipd/core.c ++++ b/drivers/usb/typec/tipd/core.c +@@ -24,6 +24,7 @@ + #define TPS_REG_MODE 0x03 + #define TPS_REG_CMD1 0x08 + #define TPS_REG_DATA1 0x09 ++#define TPS_REG_VERSION 0x0F + #define TPS_REG_INT_EVENT1 0x14 + #define TPS_REG_INT_EVENT2 0x15 + #define TPS_REG_INT_MASK1 0x16 +@@ -518,49 +519,67 @@ static irqreturn_t cd321x_interrupt(int irq, void *data) + + static irqreturn_t tps6598x_interrupt(int irq, void *data) + { ++ int intev_len = TPS_65981_2_6_INTEVENT_LEN; + struct tps6598x *tps = data; +- u64 event1 = 0; +- u64 event2 = 0; ++ u64 event1[2] = { }; ++ u64 event2[2] = { }; ++ u32 version; + u32 status; + int ret; + + mutex_lock(&tps->lock); + +- ret = tps6598x_read64(tps, TPS_REG_INT_EVENT1, &event1); +- ret |= tps6598x_read64(tps, TPS_REG_INT_EVENT2, &event2); ++ ret = tps6598x_read32(tps, TPS_REG_VERSION, &version); ++ if (ret) ++ dev_warn(tps->dev, "%s: failed to read version (%d)\n", ++ __func__, ret); ++ ++ if (TPS_VERSION_HW_VERSION(version) == TPS_VERSION_HW_65987_8_DH || ++ TPS_VERSION_HW_VERSION(version) == TPS_VERSION_HW_65987_8_DK) ++ intev_len = TPS_65987_8_INTEVENT_LEN; ++ ++ ret = tps6598x_block_read(tps, TPS_REG_INT_EVENT1, event1, intev_len); ++ ++ ret = tps6598x_block_read(tps, TPS_REG_INT_EVENT1, event1, intev_len); + if (ret) { +- dev_err(tps->dev, "%s: failed to read events\n", __func__); ++ dev_err(tps->dev, "%s: failed to read event1\n", __func__); + goto err_unlock; + } +- trace_tps6598x_irq(event1, event2); ++ ret = tps6598x_block_read(tps, TPS_REG_INT_EVENT2, event2, intev_len); ++ if (ret) { ++ dev_err(tps->dev, "%s: failed to read event2\n", __func__); ++ goto err_unlock; ++ } ++ trace_tps6598x_irq(event1[0], event2[0]); + +- if (!(event1 | event2)) ++ if (!(event1[0] | event1[1] | event2[0] | event2[1])) + goto err_unlock; + + if (!tps6598x_read_status(tps, &status)) + goto err_clear_ints; + +- if ((event1 | event2) & TPS_REG_INT_POWER_STATUS_UPDATE) ++ if ((event1[0] | event2[0]) & TPS_REG_INT_POWER_STATUS_UPDATE) + if (!tps6598x_read_power_status(tps)) + goto err_clear_ints; + +- if ((event1 | event2) & TPS_REG_INT_DATA_STATUS_UPDATE) ++ if ((event1[0] | event2[0]) & TPS_REG_INT_DATA_STATUS_UPDATE) + if (!tps6598x_read_data_status(tps)) + goto err_clear_ints; + + /* Handle plug insert or removal */ +- if ((event1 | event2) & TPS_REG_INT_PLUG_EVENT) ++ if ((event1[0] | event2[0]) & TPS_REG_INT_PLUG_EVENT) + tps6598x_handle_plug_event(tps, status); + + err_clear_ints: +- tps6598x_write64(tps, TPS_REG_INT_CLEAR1, event1); +- tps6598x_write64(tps, TPS_REG_INT_CLEAR2, event2); ++ tps6598x_block_write(tps, TPS_REG_INT_CLEAR1, event1, intev_len); ++ tps6598x_block_write(tps, TPS_REG_INT_CLEAR2, event2, intev_len); + + err_unlock: + mutex_unlock(&tps->lock); + +- if (event1 | event2) ++ if (event1[0] | event1[1] | event2[0] | event2[1]) + return IRQ_HANDLED; ++ + return IRQ_NONE; + } + +diff --git a/drivers/usb/typec/tipd/tps6598x.h b/drivers/usb/typec/tipd/tps6598x.h +index 527857549d699..1fc3cc8ad199a 100644 +--- a/drivers/usb/typec/tipd/tps6598x.h ++++ b/drivers/usb/typec/tipd/tps6598x.h +@@ -199,4 +199,15 @@ + #define TPS_DATA_STATUS_DP_SPEC_PIN_ASSIGNMENT_A BIT(2) + #define TPS_DATA_STATUS_DP_SPEC_PIN_ASSIGNMENT_B (BIT(2) | BIT(1)) + ++/* Version Register */ ++#define TPS_VERSION_HW_VERSION_MASK GENMASK(31, 24) ++#define TPS_VERSION_HW_VERSION(x) TPS_FIELD_GET(TPS_VERSION_HW_VERSION_MASK, (x)) ++#define TPS_VERSION_HW_65981_2_6 0x00 ++#define TPS_VERSION_HW_65987_8_DH 0xF7 ++#define TPS_VERSION_HW_65987_8_DK 0xF9 ++ ++/* Int Event Register length */ ++#define TPS_65981_2_6_INTEVENT_LEN 8 ++#define TPS_65987_8_INTEVENT_LEN 11 ++ + #endif /* __TPS6598X_H__ */ +diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c +index 73cd5bf350472..2431febc46151 100644 +--- a/drivers/usb/typec/ucsi/displayport.c ++++ b/drivers/usb/typec/ucsi/displayport.c +@@ -275,8 +275,6 @@ static void ucsi_displayport_work(struct work_struct *work) + struct ucsi_dp *dp = container_of(work, struct ucsi_dp, work); + int ret; + +- mutex_lock(&dp->con->lock); +- + ret = typec_altmode_vdm(dp->alt, dp->header, + dp->vdo_data, dp->vdo_size); + if (ret) +@@ -285,8 +283,6 @@ static void ucsi_displayport_work(struct work_struct *work) + dp->vdo_data = NULL; + dp->vdo_size = 0; + dp->header = 0; +- +- mutex_unlock(&dp->con->lock); + } + + void ucsi_displayport_remove_partner(struct typec_altmode *alt) +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index a0a4d8de82cad..dac1a5c110c0e 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -579,7 +579,7 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter, + return iomap_read_inline_data(iter, folio); + } + +-static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, ++static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, + size_t len, struct folio **foliop) + { + const struct iomap_page_ops *page_ops = iter->iomap.page_ops; +@@ -613,6 +613,27 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, + status = (iter->flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOMEM; + goto out_no_page; + } ++ ++ /* ++ * Now we have a locked folio, before we do anything with it we need to ++ * check that the iomap we have cached is not stale. The inode extent ++ * mapping can change due to concurrent IO in flight (e.g. ++ * IOMAP_UNWRITTEN state can change and memory reclaim could have ++ * reclaimed a previously partially written page at this index after IO ++ * completion before this write reaches this file offset) and hence we ++ * could do the wrong thing here (zero a page range incorrectly or fail ++ * to zero) and corrupt data. ++ */ ++ if (page_ops && page_ops->iomap_valid) { ++ bool iomap_valid = page_ops->iomap_valid(iter->inode, ++ &iter->iomap); ++ if (!iomap_valid) { ++ iter->iomap.flags |= IOMAP_F_STALE; ++ status = 0; ++ goto out_unlock; ++ } ++ } ++ + if (pos + len > folio_pos(folio) + folio_size(folio)) + len = folio_pos(folio) + folio_size(folio) - pos; + +@@ -768,6 +789,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) + status = iomap_write_begin(iter, pos, bytes, &folio); + if (unlikely(status)) + break; ++ if (iter->iomap.flags & IOMAP_F_STALE) ++ break; + + page = folio_file_page(folio, pos >> PAGE_SHIFT); + if (mapping_writably_mapped(mapping)) +@@ -827,6 +850,231 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, + } + EXPORT_SYMBOL_GPL(iomap_file_buffered_write); + ++/* ++ * Scan the data range passed to us for dirty page cache folios. If we find a ++ * dirty folio, punch out the preceeding range and update the offset from which ++ * the next punch will start from. ++ * ++ * We can punch out storage reservations under clean pages because they either ++ * contain data that has been written back - in which case the delalloc punch ++ * over that range is a no-op - or they have been read faults in which case they ++ * contain zeroes and we can remove the delalloc backing range and any new ++ * writes to those pages will do the normal hole filling operation... ++ * ++ * This makes the logic simple: we only need to keep the delalloc extents only ++ * over the dirty ranges of the page cache. ++ * ++ * This function uses [start_byte, end_byte) intervals (i.e. open ended) to ++ * simplify range iterations. ++ */ ++static int iomap_write_delalloc_scan(struct inode *inode, ++ loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte, ++ int (*punch)(struct inode *inode, loff_t offset, loff_t length)) ++{ ++ while (start_byte < end_byte) { ++ struct folio *folio; ++ ++ /* grab locked page */ ++ folio = filemap_lock_folio(inode->i_mapping, ++ start_byte >> PAGE_SHIFT); ++ if (!folio) { ++ start_byte = ALIGN_DOWN(start_byte, PAGE_SIZE) + ++ PAGE_SIZE; ++ continue; ++ } ++ ++ /* if dirty, punch up to offset */ ++ if (folio_test_dirty(folio)) { ++ if (start_byte > *punch_start_byte) { ++ int error; ++ ++ error = punch(inode, *punch_start_byte, ++ start_byte - *punch_start_byte); ++ if (error) { ++ folio_unlock(folio); ++ folio_put(folio); ++ return error; ++ } ++ } ++ ++ /* ++ * Make sure the next punch start is correctly bound to ++ * the end of this data range, not the end of the folio. ++ */ ++ *punch_start_byte = min_t(loff_t, end_byte, ++ folio_next_index(folio) << PAGE_SHIFT); ++ } ++ ++ /* move offset to start of next folio in range */ ++ start_byte = folio_next_index(folio) << PAGE_SHIFT; ++ folio_unlock(folio); ++ folio_put(folio); ++ } ++ return 0; ++} ++ ++/* ++ * Punch out all the delalloc blocks in the range given except for those that ++ * have dirty data still pending in the page cache - those are going to be ++ * written and so must still retain the delalloc backing for writeback. ++ * ++ * As we are scanning the page cache for data, we don't need to reimplement the ++ * wheel - mapping_seek_hole_data() does exactly what we need to identify the ++ * start and end of data ranges correctly even for sub-folio block sizes. This ++ * byte range based iteration is especially convenient because it means we ++ * don't have to care about variable size folios, nor where the start or end of ++ * the data range lies within a folio, if they lie within the same folio or even ++ * if there are multiple discontiguous data ranges within the folio. ++ * ++ * It should be noted that mapping_seek_hole_data() is not aware of EOF, and so ++ * can return data ranges that exist in the cache beyond EOF. e.g. a page fault ++ * spanning EOF will initialise the post-EOF data to zeroes and mark it up to ++ * date. A write page fault can then mark it dirty. If we then fail a write() ++ * beyond EOF into that up to date cached range, we allocate a delalloc block ++ * beyond EOF and then have to punch it out. Because the range is up to date, ++ * mapping_seek_hole_data() will return it, and we will skip the punch because ++ * the folio is dirty. THis is incorrect - we always need to punch out delalloc ++ * beyond EOF in this case as writeback will never write back and covert that ++ * delalloc block beyond EOF. Hence we limit the cached data scan range to EOF, ++ * resulting in always punching out the range from the EOF to the end of the ++ * range the iomap spans. ++ * ++ * Intervals are of the form [start_byte, end_byte) (i.e. open ended) because it ++ * matches the intervals returned by mapping_seek_hole_data(). i.e. SEEK_DATA ++ * returns the start of a data range (start_byte), and SEEK_HOLE(start_byte) ++ * returns the end of the data range (data_end). Using closed intervals would ++ * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose ++ * the code to subtle off-by-one bugs.... ++ */ ++static int iomap_write_delalloc_release(struct inode *inode, ++ loff_t start_byte, loff_t end_byte, ++ int (*punch)(struct inode *inode, loff_t pos, loff_t length)) ++{ ++ loff_t punch_start_byte = start_byte; ++ loff_t scan_end_byte = min(i_size_read(inode), end_byte); ++ int error = 0; ++ ++ /* ++ * Lock the mapping to avoid races with page faults re-instantiating ++ * folios and dirtying them via ->page_mkwrite whilst we walk the ++ * cache and perform delalloc extent removal. Failing to do this can ++ * leave dirty pages with no space reservation in the cache. ++ */ ++ filemap_invalidate_lock(inode->i_mapping); ++ while (start_byte < scan_end_byte) { ++ loff_t data_end; ++ ++ start_byte = mapping_seek_hole_data(inode->i_mapping, ++ start_byte, scan_end_byte, SEEK_DATA); ++ /* ++ * If there is no more data to scan, all that is left is to ++ * punch out the remaining range. ++ */ ++ if (start_byte == -ENXIO || start_byte == scan_end_byte) ++ break; ++ if (start_byte < 0) { ++ error = start_byte; ++ goto out_unlock; ++ } ++ WARN_ON_ONCE(start_byte < punch_start_byte); ++ WARN_ON_ONCE(start_byte > scan_end_byte); ++ ++ /* ++ * We find the end of this contiguous cached data range by ++ * seeking from start_byte to the beginning of the next hole. ++ */ ++ data_end = mapping_seek_hole_data(inode->i_mapping, start_byte, ++ scan_end_byte, SEEK_HOLE); ++ if (data_end < 0) { ++ error = data_end; ++ goto out_unlock; ++ } ++ WARN_ON_ONCE(data_end <= start_byte); ++ WARN_ON_ONCE(data_end > scan_end_byte); ++ ++ error = iomap_write_delalloc_scan(inode, &punch_start_byte, ++ start_byte, data_end, punch); ++ if (error) ++ goto out_unlock; ++ ++ /* The next data search starts at the end of this one. */ ++ start_byte = data_end; ++ } ++ ++ if (punch_start_byte < end_byte) ++ error = punch(inode, punch_start_byte, ++ end_byte - punch_start_byte); ++out_unlock: ++ filemap_invalidate_unlock(inode->i_mapping); ++ return error; ++} ++ ++/* ++ * When a short write occurs, the filesystem may need to remove reserved space ++ * that was allocated in ->iomap_begin from it's ->iomap_end method. For ++ * filesystems that use delayed allocation, we need to punch out delalloc ++ * extents from the range that are not dirty in the page cache. As the write can ++ * race with page faults, there can be dirty pages over the delalloc extent ++ * outside the range of a short write but still within the delalloc extent ++ * allocated for this iomap. ++ * ++ * This function uses [start_byte, end_byte) intervals (i.e. open ended) to ++ * simplify range iterations. ++ * ++ * The punch() callback *must* only punch delalloc extents in the range passed ++ * to it. It must skip over all other types of extents in the range and leave ++ * them completely unchanged. It must do this punch atomically with respect to ++ * other extent modifications. ++ * ++ * The punch() callback may be called with a folio locked to prevent writeback ++ * extent allocation racing at the edge of the range we are currently punching. ++ * The locked folio may or may not cover the range being punched, so it is not ++ * safe for the punch() callback to lock folios itself. ++ * ++ * Lock order is: ++ * ++ * inode->i_rwsem (shared or exclusive) ++ * inode->i_mapping->invalidate_lock (exclusive) ++ * folio_lock() ++ * ->punch ++ * internal filesystem allocation lock ++ */ ++int iomap_file_buffered_write_punch_delalloc(struct inode *inode, ++ struct iomap *iomap, loff_t pos, loff_t length, ++ ssize_t written, ++ int (*punch)(struct inode *inode, loff_t pos, loff_t length)) ++{ ++ loff_t start_byte; ++ loff_t end_byte; ++ int blocksize = i_blocksize(inode); ++ ++ if (iomap->type != IOMAP_DELALLOC) ++ return 0; ++ ++ /* If we didn't reserve the blocks, we're not allowed to punch them. */ ++ if (!(iomap->flags & IOMAP_F_NEW)) ++ return 0; ++ ++ /* ++ * start_byte refers to the first unused block after a short write. If ++ * nothing was written, round offset down to point at the first block in ++ * the range. ++ */ ++ if (unlikely(!written)) ++ start_byte = round_down(pos, blocksize); ++ else ++ start_byte = round_up(pos + written, blocksize); ++ end_byte = round_up(pos + length, blocksize); ++ ++ /* Nothing to do if we've written the entire delalloc extent */ ++ if (start_byte >= end_byte) ++ return 0; ++ ++ return iomap_write_delalloc_release(inode, start_byte, end_byte, ++ punch); ++} ++EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc); ++ + static loff_t iomap_unshare_iter(struct iomap_iter *iter) + { + struct iomap *iomap = &iter->iomap; +@@ -851,6 +1099,8 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) + status = iomap_write_begin(iter, pos, bytes, &folio); + if (unlikely(status)) + return status; ++ if (iter->iomap.flags & IOMAP_F_STALE) ++ break; + + status = iomap_write_end(iter, pos, bytes, bytes, folio); + if (WARN_ON_ONCE(status == 0)) +@@ -906,6 +1156,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) + status = iomap_write_begin(iter, pos, bytes, &folio); + if (status) + return status; ++ if (iter->iomap.flags & IOMAP_F_STALE) ++ break; + + offset = offset_in_folio(folio, pos); + if (bytes > folio_size(folio) - offset) +diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c +index a1c7592d2aded..79a0614eaab77 100644 +--- a/fs/iomap/iter.c ++++ b/fs/iomap/iter.c +@@ -7,12 +7,28 @@ + #include <linux/iomap.h> + #include "trace.h" + ++/* ++ * Advance to the next range we need to map. ++ * ++ * If the iomap is marked IOMAP_F_STALE, it means the existing map was not fully ++ * processed - it was aborted because the extent the iomap spanned may have been ++ * changed during the operation. In this case, the iteration behaviour is to ++ * remap the unprocessed range of the iter, and that means we may need to remap ++ * even when we've made no progress (i.e. iter->processed = 0). Hence the ++ * "finished iterating" case needs to distinguish between ++ * (processed = 0) meaning we are done and (processed = 0 && stale) meaning we ++ * need to remap the entire remaining range. ++ */ + static inline int iomap_iter_advance(struct iomap_iter *iter) + { ++ bool stale = iter->iomap.flags & IOMAP_F_STALE; ++ + /* handle the previous iteration (if any) */ + if (iter->iomap.length) { +- if (iter->processed <= 0) ++ if (iter->processed < 0) + return iter->processed; ++ if (!iter->processed && !stale) ++ return 0; + if (WARN_ON_ONCE(iter->processed > iomap_length(iter))) + return -EIO; + iter->pos += iter->processed; +@@ -33,6 +49,7 @@ static inline void iomap_iter_done(struct iomap_iter *iter) + WARN_ON_ONCE(iter->iomap.offset > iter->pos); + WARN_ON_ONCE(iter->iomap.length == 0); + WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos); ++ WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_STALE); + + trace_iomap_iter_dstmap(iter->inode, &iter->iomap); + if (iter->srcmap.type != IOMAP_HOLE) +diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c +index 456af7d230cf1..46a0a2d6962e1 100644 +--- a/fs/nfs/callback.c ++++ b/fs/nfs/callback.c +@@ -80,9 +80,6 @@ nfs4_callback_svc(void *vrqstp) + set_freezable(); + + while (!kthread_freezable_should_stop(NULL)) { +- +- if (signal_pending(current)) +- flush_signals(current); + /* + * Listen for a request on the socket + */ +@@ -112,11 +109,7 @@ nfs41_callback_svc(void *vrqstp) + set_freezable(); + + while (!kthread_freezable_should_stop(NULL)) { +- +- if (signal_pending(current)) +- flush_signals(current); +- +- prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); ++ prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE); + spin_lock_bh(&serv->sv_cb_lock); + if (!list_empty(&serv->sv_cb_list)) { + req = list_first_entry(&serv->sv_cb_list, +diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c +index ba53cd89ec62c..b6d768bd5ccca 100644 +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -1313,12 +1313,11 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, + /* found a match */ + if (ni->nsui_busy) { + /* wait - and try again */ +- prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, +- TASK_INTERRUPTIBLE); ++ prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE); + spin_unlock(&nn->nfsd_ssc_lock); + + /* allow 20secs for mount/unmount for now - revisit */ +- if (signal_pending(current) || ++ if (kthread_should_stop() || + (schedule_timeout(20*HZ) == 0)) { + finish_wait(&nn->nfsd_ssc_waitq, &wait); + kfree(work); +diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c +index 0c75636054a54..a8190caf77f17 100644 +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -952,15 +952,6 @@ nfsd(void *vrqstp) + + current->fs->umask = 0; + +- /* +- * thread is spawned with all signals set to SIG_IGN, re-enable +- * the ones that will bring down the thread +- */ +- allow_signal(SIGKILL); +- allow_signal(SIGHUP); +- allow_signal(SIGINT); +- allow_signal(SIGQUIT); +- + atomic_inc(&nfsdstats.th_cnt); + + set_freezable(); +@@ -985,9 +976,6 @@ nfsd(void *vrqstp) + validate_process_creds(); + } + +- /* Clear signals before calling svc_exit_thread() */ +- flush_signals(current); +- + atomic_dec(&nfsdstats.th_cnt); + + out: +diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c +index 49d0d4ea63fcd..0d56a8d862e80 100644 +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -4058,7 +4058,7 @@ xfs_bmap_alloc_userdata( + * the busy list. + */ + bma->datatype = XFS_ALLOC_NOBUSY; +- if (whichfork == XFS_DATA_FORK) { ++ if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { + bma->datatype |= XFS_ALLOC_USERDATA; + if (bma->offset == 0) + bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; +@@ -4551,7 +4551,8 @@ xfs_bmapi_convert_delalloc( + * the extent. Just return the real extent at this offset. + */ + if (!isnullstartblock(bma.got.br_startblock)) { +- xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags); ++ xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, ++ xfs_iomap_inode_sequence(ip, flags)); + *seq = READ_ONCE(ifp->if_seq); + goto out_trans_cancel; + } +@@ -4599,7 +4600,8 @@ xfs_bmapi_convert_delalloc( + XFS_STATS_INC(mp, xs_xstrat_quick); + + ASSERT(!isnullstartblock(bma.got.br_startblock)); +- xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags); ++ xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, ++ xfs_iomap_inode_sequence(ip, flags)); + *seq = READ_ONCE(ifp->if_seq); + + if (whichfork == XFS_COW_FORK) +diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h +index 5362908164b0b..580ccbd5aadc2 100644 +--- a/fs/xfs/libxfs/xfs_errortag.h ++++ b/fs/xfs/libxfs/xfs_errortag.h +@@ -40,13 +40,12 @@ + #define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25 + #define XFS_ERRTAG_BMAP_FINISH_ONE 26 + #define XFS_ERRTAG_AG_RESV_CRITICAL 27 ++ + /* +- * DEBUG mode instrumentation to test and/or trigger delayed allocation +- * block killing in the event of failed writes. When enabled, all +- * buffered writes are silenty dropped and handled as if they failed. +- * All delalloc blocks in the range of the write (including pre-existing +- * delalloc blocks!) are tossed as part of the write failure error +- * handling sequence. ++ * Drop-writes support removed because write error handling cannot trash ++ * pre-existing delalloc extents in any useful way anymore. We retain the ++ * definition so that we can reject it as an invalid value in ++ * xfs_errortag_valid(). + */ + #define XFS_ERRTAG_DROP_WRITES 28 + #define XFS_ERRTAG_LOG_BAD_CRC 29 +@@ -95,7 +94,6 @@ + #define XFS_RANDOM_REFCOUNT_FINISH_ONE 1 + #define XFS_RANDOM_BMAP_FINISH_ONE 1 + #define XFS_RANDOM_AG_RESV_CRITICAL 4 +-#define XFS_RANDOM_DROP_WRITES 1 + #define XFS_RANDOM_LOG_BAD_CRC 1 + #define XFS_RANDOM_LOG_ITEM_PIN 1 + #define XFS_RANDOM_BUF_LRU_REF 2 +diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c +index 3f34bafe18dd1..6f7ed9288fe40 100644 +--- a/fs/xfs/libxfs/xfs_refcount.c ++++ b/fs/xfs/libxfs/xfs_refcount.c +@@ -815,11 +815,136 @@ xfs_refcount_find_right_extents( + /* Is this extent valid? */ + static inline bool + xfs_refc_valid( +- struct xfs_refcount_irec *rc) ++ const struct xfs_refcount_irec *rc) + { + return rc->rc_startblock != NULLAGBLOCK; + } + ++static inline xfs_nlink_t ++xfs_refc_merge_refcount( ++ const struct xfs_refcount_irec *irec, ++ enum xfs_refc_adjust_op adjust) ++{ ++ /* Once a record hits MAXREFCOUNT, it is pinned there forever */ ++ if (irec->rc_refcount == MAXREFCOUNT) ++ return MAXREFCOUNT; ++ return irec->rc_refcount + adjust; ++} ++ ++static inline bool ++xfs_refc_want_merge_center( ++ const struct xfs_refcount_irec *left, ++ const struct xfs_refcount_irec *cleft, ++ const struct xfs_refcount_irec *cright, ++ const struct xfs_refcount_irec *right, ++ bool cleft_is_cright, ++ enum xfs_refc_adjust_op adjust, ++ unsigned long long *ulenp) ++{ ++ unsigned long long ulen = left->rc_blockcount; ++ xfs_nlink_t new_refcount; ++ ++ /* ++ * To merge with a center record, both shoulder records must be ++ * adjacent to the record we want to adjust. This is only true if ++ * find_left and find_right made all four records valid. ++ */ ++ if (!xfs_refc_valid(left) || !xfs_refc_valid(right) || ++ !xfs_refc_valid(cleft) || !xfs_refc_valid(cright)) ++ return false; ++ ++ /* There must only be one record for the entire range. */ ++ if (!cleft_is_cright) ++ return false; ++ ++ /* The shoulder record refcounts must match the new refcount. */ ++ new_refcount = xfs_refc_merge_refcount(cleft, adjust); ++ if (left->rc_refcount != new_refcount) ++ return false; ++ if (right->rc_refcount != new_refcount) ++ return false; ++ ++ /* ++ * The new record cannot exceed the max length. ulen is a ULL as the ++ * individual record block counts can be up to (u32 - 1) in length ++ * hence we need to catch u32 addition overflows here. ++ */ ++ ulen += cleft->rc_blockcount + right->rc_blockcount; ++ if (ulen >= MAXREFCEXTLEN) ++ return false; ++ ++ *ulenp = ulen; ++ return true; ++} ++ ++static inline bool ++xfs_refc_want_merge_left( ++ const struct xfs_refcount_irec *left, ++ const struct xfs_refcount_irec *cleft, ++ enum xfs_refc_adjust_op adjust) ++{ ++ unsigned long long ulen = left->rc_blockcount; ++ xfs_nlink_t new_refcount; ++ ++ /* ++ * For a left merge, the left shoulder record must be adjacent to the ++ * start of the range. If this is true, find_left made left and cleft ++ * contain valid contents. ++ */ ++ if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft)) ++ return false; ++ ++ /* Left shoulder record refcount must match the new refcount. */ ++ new_refcount = xfs_refc_merge_refcount(cleft, adjust); ++ if (left->rc_refcount != new_refcount) ++ return false; ++ ++ /* ++ * The new record cannot exceed the max length. ulen is a ULL as the ++ * individual record block counts can be up to (u32 - 1) in length ++ * hence we need to catch u32 addition overflows here. ++ */ ++ ulen += cleft->rc_blockcount; ++ if (ulen >= MAXREFCEXTLEN) ++ return false; ++ ++ return true; ++} ++ ++static inline bool ++xfs_refc_want_merge_right( ++ const struct xfs_refcount_irec *cright, ++ const struct xfs_refcount_irec *right, ++ enum xfs_refc_adjust_op adjust) ++{ ++ unsigned long long ulen = right->rc_blockcount; ++ xfs_nlink_t new_refcount; ++ ++ /* ++ * For a right merge, the right shoulder record must be adjacent to the ++ * end of the range. If this is true, find_right made cright and right ++ * contain valid contents. ++ */ ++ if (!xfs_refc_valid(right) || !xfs_refc_valid(cright)) ++ return false; ++ ++ /* Right shoulder record refcount must match the new refcount. */ ++ new_refcount = xfs_refc_merge_refcount(cright, adjust); ++ if (right->rc_refcount != new_refcount) ++ return false; ++ ++ /* ++ * The new record cannot exceed the max length. ulen is a ULL as the ++ * individual record block counts can be up to (u32 - 1) in length ++ * hence we need to catch u32 addition overflows here. ++ */ ++ ulen += cright->rc_blockcount; ++ if (ulen >= MAXREFCEXTLEN) ++ return false; ++ ++ return true; ++} ++ + /* + * Try to merge with any extents on the boundaries of the adjustment range. + */ +@@ -861,23 +986,15 @@ xfs_refcount_merge_extents( + (cleft.rc_blockcount == cright.rc_blockcount); + + /* Try to merge left, cleft, and right. cleft must == cright. */ +- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + +- right.rc_blockcount; +- if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && +- xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && +- left.rc_refcount == cleft.rc_refcount + adjust && +- right.rc_refcount == cleft.rc_refcount + adjust && +- ulen < MAXREFCEXTLEN) { ++ if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal, ++ adjust, &ulen)) { + *shape_changed = true; + return xfs_refcount_merge_center_extents(cur, &left, &cleft, + &right, ulen, aglen); + } + + /* Try to merge left and cleft. */ +- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; +- if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && +- left.rc_refcount == cleft.rc_refcount + adjust && +- ulen < MAXREFCEXTLEN) { ++ if (xfs_refc_want_merge_left(&left, &cleft, adjust)) { + *shape_changed = true; + error = xfs_refcount_merge_left_extent(cur, &left, &cleft, + agbno, aglen); +@@ -893,10 +1010,7 @@ xfs_refcount_merge_extents( + } + + /* Try to merge cright and right. */ +- ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; +- if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && +- right.rc_refcount == cright.rc_refcount + adjust && +- ulen < MAXREFCEXTLEN) { ++ if (xfs_refc_want_merge_right(&cright, &right, adjust)) { + *shape_changed = true; + return xfs_refcount_merge_right_extent(cur, &right, &cright, + aglen); +diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c +index b6a584e044be0..bf2cca78304eb 100644 +--- a/fs/xfs/libxfs/xfs_sb.c ++++ b/fs/xfs/libxfs/xfs_sb.c +@@ -266,7 +266,8 @@ xfs_validate_sb_write( + return -EFSCORRUPTED; + } + +- if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { ++ if (!xfs_is_readonly(mp) && ++ xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { + xfs_alert(mp, + "Corruption detected in superblock read-only compatible features (0x%x)!", + (sbp->sb_features_ro_compat & +@@ -973,7 +974,9 @@ xfs_log_sb( + */ + if (xfs_has_lazysbcount(mp)) { + mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); +- mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); ++ mp->m_sb.sb_ifree = min_t(uint64_t, ++ percpu_counter_sum(&mp->m_ifree), ++ mp->m_sb.sb_icount); + mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); + } + +diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c +index 5d1a995b15f83..21c241e96d483 100644 +--- a/fs/xfs/xfs_aops.c ++++ b/fs/xfs/xfs_aops.c +@@ -114,9 +114,8 @@ xfs_end_ioend( + if (unlikely(error)) { + if (ioend->io_flags & IOMAP_F_SHARED) { + xfs_reflink_cancel_cow_range(ip, offset, size, true); +- xfs_bmap_punch_delalloc_range(ip, +- XFS_B_TO_FSBT(mp, offset), +- XFS_B_TO_FSB(mp, size)); ++ xfs_bmap_punch_delalloc_range(ip, offset, ++ offset + size); + } + goto done; + } +@@ -373,7 +372,7 @@ xfs_map_blocks( + isnullstartblock(imap.br_startblock)) + goto allocate_blocks; + +- xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0); ++ xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq); + trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); + return 0; + allocate_blocks: +@@ -440,27 +439,25 @@ xfs_prepare_ioend( + } + + /* +- * If the page has delalloc blocks on it, we need to punch them out before we +- * invalidate the page. If we don't, we leave a stale delalloc mapping on the +- * inode that can trip up a later direct I/O read operation on the same region. ++ * If the folio has delalloc blocks on it, the caller is asking us to punch them ++ * out. If we don't, we can leave a stale delalloc mapping covered by a clean ++ * page that needs to be dirtied again before the delalloc mapping can be ++ * converted. This stale delalloc mapping can trip up a later direct I/O read ++ * operation on the same region. + * +- * We prevent this by truncating away the delalloc regions on the page. Because ++ * We prevent this by truncating away the delalloc regions on the folio. Because + * they are delalloc, we can do this without needing a transaction. Indeed - if + * we get ENOSPC errors, we have to be able to do this truncation without a +- * transaction as there is no space left for block reservation (typically why we +- * see a ENOSPC in writeback). ++ * transaction as there is no space left for block reservation (typically why ++ * we see a ENOSPC in writeback). + */ + static void + xfs_discard_folio( + struct folio *folio, + loff_t pos) + { +- struct inode *inode = folio->mapping->host; +- struct xfs_inode *ip = XFS_I(inode); ++ struct xfs_inode *ip = XFS_I(folio->mapping->host); + struct xfs_mount *mp = ip->i_mount; +- size_t offset = offset_in_folio(folio, pos); +- xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, pos); +- xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, offset); + int error; + + if (xfs_is_shutdown(mp)) +@@ -470,8 +467,14 @@ xfs_discard_folio( + "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", + folio, ip->i_ino, pos); + +- error = xfs_bmap_punch_delalloc_range(ip, start_fsb, +- i_blocks_per_folio(inode, folio) - pageoff_fsb); ++ /* ++ * The end of the punch range is always the offset of the the first ++ * byte of the next folio. Hence the end offset is only dependent on the ++ * folio itself and not the start offset that is passed in. ++ */ ++ error = xfs_bmap_punch_delalloc_range(ip, pos, ++ folio_pos(folio) + folio_size(folio)); ++ + if (error && !xfs_is_shutdown(mp)) + xfs_alert(mp, "page discard unable to remove delalloc mapping."); + } +diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c +index 04d0c2bff67c4..867645b74d889 100644 +--- a/fs/xfs/xfs_bmap_util.c ++++ b/fs/xfs/xfs_bmap_util.c +@@ -590,11 +590,13 @@ xfs_getbmap( + int + xfs_bmap_punch_delalloc_range( + struct xfs_inode *ip, +- xfs_fileoff_t start_fsb, +- xfs_fileoff_t length) ++ xfs_off_t start_byte, ++ xfs_off_t end_byte) + { ++ struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = &ip->i_df; +- xfs_fileoff_t end_fsb = start_fsb + length; ++ xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); ++ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); + struct xfs_bmbt_irec got, del; + struct xfs_iext_cursor icur; + int error = 0; +@@ -607,7 +609,7 @@ xfs_bmap_punch_delalloc_range( + + while (got.br_startoff + got.br_blockcount > start_fsb) { + del = got; +- xfs_trim_extent(&del, start_fsb, length); ++ xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb); + + /* + * A delete can push the cursor forward. Step back to the +diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h +index 24b37d211f1dc..6888078f5c31e 100644 +--- a/fs/xfs/xfs_bmap_util.h ++++ b/fs/xfs/xfs_bmap_util.h +@@ -31,7 +31,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap) + #endif /* CONFIG_XFS_RT */ + + int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, +- xfs_fileoff_t start_fsb, xfs_fileoff_t length); ++ xfs_off_t start_byte, xfs_off_t end_byte); + + struct kgetbmap { + __s64 bmv_offset; /* file offset of segment in blocks */ +diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c +index dde346450952a..54c774af6e1c6 100644 +--- a/fs/xfs/xfs_buf.c ++++ b/fs/xfs/xfs_buf.c +@@ -1945,6 +1945,7 @@ xfs_free_buftarg( + list_lru_destroy(&btp->bt_lru); + + blkdev_issue_flush(btp->bt_bdev); ++ invalidate_bdev(btp->bt_bdev); + fs_put_dax(btp->bt_daxdev, btp->bt_mount); + + kmem_free(btp); +diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c +index 522d450a94b18..df7322ed73fa9 100644 +--- a/fs/xfs/xfs_buf_item.c ++++ b/fs/xfs/xfs_buf_item.c +@@ -1018,6 +1018,8 @@ xfs_buf_item_relse( + trace_xfs_buf_item_relse(bp, _RET_IP_); + ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); + ++ if (atomic_read(&bip->bli_refcount)) ++ return; + bp->b_log_item = NULL; + xfs_buf_rele(bp); + xfs_buf_item_free(bip); +diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c +index c6b2aabd6f187..dea3c0649d2f7 100644 +--- a/fs/xfs/xfs_error.c ++++ b/fs/xfs/xfs_error.c +@@ -46,7 +46,7 @@ static unsigned int xfs_errortag_random_default[] = { + XFS_RANDOM_REFCOUNT_FINISH_ONE, + XFS_RANDOM_BMAP_FINISH_ONE, + XFS_RANDOM_AG_RESV_CRITICAL, +- XFS_RANDOM_DROP_WRITES, ++ 0, /* XFS_RANDOM_DROP_WRITES has been removed */ + XFS_RANDOM_LOG_BAD_CRC, + XFS_RANDOM_LOG_ITEM_PIN, + XFS_RANDOM_BUF_LRU_REF, +@@ -162,7 +162,6 @@ XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDA + XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE); + XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE); + XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL); +-XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES); + XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC); + XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN); + XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF); +@@ -206,7 +205,6 @@ static struct attribute *xfs_errortag_attrs[] = { + XFS_ERRORTAG_ATTR_LIST(refcount_finish_one), + XFS_ERRORTAG_ATTR_LIST(bmap_finish_one), + XFS_ERRORTAG_ATTR_LIST(ag_resv_critical), +- XFS_ERRORTAG_ATTR_LIST(drop_writes), + XFS_ERRORTAG_ATTR_LIST(log_bad_crc), + XFS_ERRORTAG_ATTR_LIST(log_item_pin), + XFS_ERRORTAG_ATTR_LIST(buf_lru_ref), +@@ -256,6 +254,19 @@ xfs_errortag_del( + kmem_free(mp->m_errortag); + } + ++static bool ++xfs_errortag_valid( ++ unsigned int error_tag) ++{ ++ if (error_tag >= XFS_ERRTAG_MAX) ++ return false; ++ ++ /* Error out removed injection types */ ++ if (error_tag == XFS_ERRTAG_DROP_WRITES) ++ return false; ++ return true; ++} ++ + bool + xfs_errortag_test( + struct xfs_mount *mp, +@@ -277,7 +288,9 @@ xfs_errortag_test( + if (!mp->m_errortag) + return false; + +- ASSERT(error_tag < XFS_ERRTAG_MAX); ++ if (!xfs_errortag_valid(error_tag)) ++ return false; ++ + randfactor = mp->m_errortag[error_tag]; + if (!randfactor || prandom_u32_max(randfactor)) + return false; +@@ -293,7 +306,7 @@ xfs_errortag_get( + struct xfs_mount *mp, + unsigned int error_tag) + { +- if (error_tag >= XFS_ERRTAG_MAX) ++ if (!xfs_errortag_valid(error_tag)) + return -EINVAL; + + return mp->m_errortag[error_tag]; +@@ -305,7 +318,7 @@ xfs_errortag_set( + unsigned int error_tag, + unsigned int tag_value) + { +- if (error_tag >= XFS_ERRTAG_MAX) ++ if (!xfs_errortag_valid(error_tag)) + return -EINVAL; + + mp->m_errortag[error_tag] = tag_value; +@@ -319,7 +332,7 @@ xfs_errortag_add( + { + BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX); + +- if (error_tag >= XFS_ERRTAG_MAX) ++ if (!xfs_errortag_valid(error_tag)) + return -EINVAL; + + return xfs_errortag_set(mp, error_tag, +diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c +index e462d39c840e6..595a5bcf46b94 100644 +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -1325,7 +1325,7 @@ __xfs_filemap_fault( + if (write_fault) { + xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + ret = iomap_page_mkwrite(vmf, +- &xfs_buffered_write_iomap_ops); ++ &xfs_page_mkwrite_iomap_ops); + xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + } else { + ret = filemap_fault(vmf); +diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c +index 13851c0d640bc..332da0d7b85cf 100644 +--- a/fs/xfs/xfs_fsops.c ++++ b/fs/xfs/xfs_fsops.c +@@ -129,6 +129,10 @@ xfs_growfs_data_private( + if (delta < 0 && nagcount < 2) + return -EINVAL; + ++ /* No work to do */ ++ if (delta == 0) ++ return 0; ++ + oagcount = mp->m_sb.sb_agcount; + /* allocate the new per-ag structures */ + if (nagcount > oagcount) { +diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c +index d884cba1d7072..dd5a664c294f5 100644 +--- a/fs/xfs/xfs_icache.c ++++ b/fs/xfs/xfs_icache.c +@@ -342,6 +342,9 @@ xfs_iget_recycle( + + trace_xfs_iget_recycle(ip); + ++ if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) ++ return -EAGAIN; ++ + /* + * We need to make it look like the inode is being reclaimed to prevent + * the actual reclaim workers from stomping over us while we recycle +@@ -355,6 +358,7 @@ xfs_iget_recycle( + + ASSERT(!rwsem_is_locked(&inode->i_rwsem)); + error = xfs_reinit_inode(mp, inode); ++ xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (error) { + /* + * Re-initializing the inode failed, and we are in deep +@@ -523,6 +527,8 @@ xfs_iget_cache_hit( + if (ip->i_flags & XFS_IRECLAIMABLE) { + /* Drops i_flags_lock and RCU read lock. */ + error = xfs_iget_recycle(pag, ip); ++ if (error == -EAGAIN) ++ goto out_skip; + if (error) + return error; + } else { +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c +index aa303be11576f..54b707787f907 100644 +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -1652,8 +1652,11 @@ xfs_inode_needs_inactive( + if (VFS_I(ip)->i_mode == 0) + return false; + +- /* If this is a read-only mount, don't do this (would generate I/O) */ +- if (xfs_is_readonly(mp)) ++ /* ++ * If this is a read-only mount, don't do this (would generate I/O) ++ * unless we're in log recovery and cleaning the iunlinked list. ++ */ ++ if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log)) + return false; + + /* If the log isn't running, push inodes straight to reclaim. */ +@@ -1713,8 +1716,11 @@ xfs_inactive( + mp = ip->i_mount; + ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY)); + +- /* If this is a read-only mount, don't do this (would generate I/O) */ +- if (xfs_is_readonly(mp)) ++ /* ++ * If this is a read-only mount, don't do this (would generate I/O) ++ * unless we're in log recovery and cleaning the iunlinked list. ++ */ ++ if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log)) + goto out; + + /* Metadata inodes require explicit resource cleanup. */ +@@ -2479,7 +2485,7 @@ xfs_remove( + error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, + tp->t_mountp->m_sb.sb_rootino, 0); + if (error) +- return error; ++ goto out_trans_cancel; + } + } else { + /* +diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c +index 1f783e9796296..85fbb3b71d1c6 100644 +--- a/fs/xfs/xfs_ioctl.c ++++ b/fs/xfs/xfs_ioctl.c +@@ -754,7 +754,7 @@ xfs_bulkstat_fmt( + static int + xfs_bulk_ireq_setup( + struct xfs_mount *mp, +- struct xfs_bulk_ireq *hdr, ++ const struct xfs_bulk_ireq *hdr, + struct xfs_ibulk *breq, + void __user *ubuffer) + { +@@ -780,7 +780,7 @@ xfs_bulk_ireq_setup( + + switch (hdr->ino) { + case XFS_BULK_IREQ_SPECIAL_ROOT: +- hdr->ino = mp->m_sb.sb_rootino; ++ breq->startino = mp->m_sb.sb_rootino; + break; + default: + return -EINVAL; +diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c +index 07da03976ec12..ab5512c0bcf7a 100644 +--- a/fs/xfs/xfs_iomap.c ++++ b/fs/xfs/xfs_iomap.c +@@ -48,13 +48,45 @@ xfs_alert_fsblock_zero( + return -EFSCORRUPTED; + } + ++u64 ++xfs_iomap_inode_sequence( ++ struct xfs_inode *ip, ++ u16 iomap_flags) ++{ ++ u64 cookie = 0; ++ ++ if (iomap_flags & IOMAP_F_XATTR) ++ return READ_ONCE(ip->i_af.if_seq); ++ if ((iomap_flags & IOMAP_F_SHARED) && ip->i_cowfp) ++ cookie = (u64)READ_ONCE(ip->i_cowfp->if_seq) << 32; ++ return cookie | READ_ONCE(ip->i_df.if_seq); ++} ++ ++/* ++ * Check that the iomap passed to us is still valid for the given offset and ++ * length. ++ */ ++static bool ++xfs_iomap_valid( ++ struct inode *inode, ++ const struct iomap *iomap) ++{ ++ return iomap->validity_cookie == ++ xfs_iomap_inode_sequence(XFS_I(inode), iomap->flags); ++} ++ ++const struct iomap_page_ops xfs_iomap_page_ops = { ++ .iomap_valid = xfs_iomap_valid, ++}; ++ + int + xfs_bmbt_to_iomap( + struct xfs_inode *ip, + struct iomap *iomap, + struct xfs_bmbt_irec *imap, + unsigned int mapping_flags, +- u16 iomap_flags) ++ u16 iomap_flags, ++ u64 sequence_cookie) + { + struct xfs_mount *mp = ip->i_mount; + struct xfs_buftarg *target = xfs_inode_buftarg(ip); +@@ -91,6 +123,9 @@ xfs_bmbt_to_iomap( + if (xfs_ipincount(ip) && + (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) + iomap->flags |= IOMAP_F_DIRTY; ++ ++ iomap->validity_cookie = sequence_cookie; ++ iomap->page_ops = &xfs_iomap_page_ops; + return 0; + } + +@@ -195,7 +230,8 @@ xfs_iomap_write_direct( + xfs_fileoff_t offset_fsb, + xfs_fileoff_t count_fsb, + unsigned int flags, +- struct xfs_bmbt_irec *imap) ++ struct xfs_bmbt_irec *imap, ++ u64 *seq) + { + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; +@@ -285,6 +321,7 @@ xfs_iomap_write_direct( + error = xfs_alert_fsblock_zero(ip, imap); + + out_unlock: ++ *seq = xfs_iomap_inode_sequence(ip, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + +@@ -743,6 +780,7 @@ xfs_direct_write_iomap_begin( + bool shared = false; + u16 iomap_flags = 0; + unsigned int lockmode = XFS_ILOCK_SHARED; ++ u64 seq; + + ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); + +@@ -811,9 +849,10 @@ xfs_direct_write_iomap_begin( + goto out_unlock; + } + ++ seq = xfs_iomap_inode_sequence(ip, iomap_flags); + xfs_iunlock(ip, lockmode); + trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); +- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags); ++ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); + + allocate_blocks: + error = -EAGAIN; +@@ -839,24 +878,26 @@ xfs_direct_write_iomap_begin( + xfs_iunlock(ip, lockmode); + + error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, +- flags, &imap); ++ flags, &imap, &seq); + if (error) + return error; + + trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, +- iomap_flags | IOMAP_F_NEW); ++ iomap_flags | IOMAP_F_NEW, seq); + + out_found_cow: +- xfs_iunlock(ip, lockmode); + length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); + trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); + if (imap.br_startblock != HOLESTARTBLOCK) { +- error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0); ++ seq = xfs_iomap_inode_sequence(ip, 0); ++ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); + if (error) +- return error; ++ goto out_unlock; + } +- return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED); ++ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); ++ xfs_iunlock(ip, lockmode); ++ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); + + out_unlock: + if (lockmode) +@@ -915,6 +956,7 @@ xfs_buffered_write_iomap_begin( + int allocfork = XFS_DATA_FORK; + int error = 0; + unsigned int lockmode = XFS_ILOCK_EXCL; ++ u64 seq; + + if (xfs_is_shutdown(mp)) + return -EIO; +@@ -926,6 +968,10 @@ xfs_buffered_write_iomap_begin( + + ASSERT(!XFS_IS_REALTIME_INODE(ip)); + ++ error = xfs_qm_dqattach(ip); ++ if (error) ++ return error; ++ + error = xfs_ilock_for_iomap(ip, flags, &lockmode); + if (error) + return error; +@@ -1029,10 +1075,6 @@ xfs_buffered_write_iomap_begin( + allocfork = XFS_COW_FORK; + } + +- error = xfs_qm_dqattach_locked(ip, false); +- if (error) +- goto out_unlock; +- + if (eof && offset + count > XFS_ISIZE(ip)) { + /* + * Determine the initial size of the preallocation. +@@ -1094,32 +1136,47 @@ xfs_buffered_write_iomap_begin( + * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch + * them out if the write happens to fail. + */ ++ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); +- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW); ++ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq); + + found_imap: ++ seq = xfs_iomap_inode_sequence(ip, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); +- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); ++ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); + + found_cow: +- xfs_iunlock(ip, XFS_ILOCK_EXCL); ++ seq = xfs_iomap_inode_sequence(ip, 0); + if (imap.br_startoff <= offset_fsb) { +- error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0); ++ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); + if (error) +- return error; ++ goto out_unlock; ++ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); ++ xfs_iunlock(ip, XFS_ILOCK_EXCL); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, +- IOMAP_F_SHARED); ++ IOMAP_F_SHARED, seq); + } + + xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); +- return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0); ++ xfs_iunlock(ip, XFS_ILOCK_EXCL); ++ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq); + + out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + } + ++static int ++xfs_buffered_write_delalloc_punch( ++ struct inode *inode, ++ loff_t offset, ++ loff_t length) ++{ ++ return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, ++ offset + length); ++} ++ + static int + xfs_buffered_write_iomap_end( + struct inode *inode, +@@ -1129,56 +1186,17 @@ xfs_buffered_write_iomap_end( + unsigned flags, + struct iomap *iomap) + { +- struct xfs_inode *ip = XFS_I(inode); +- struct xfs_mount *mp = ip->i_mount; +- xfs_fileoff_t start_fsb; +- xfs_fileoff_t end_fsb; +- int error = 0; +- +- if (iomap->type != IOMAP_DELALLOC) +- return 0; +- +- /* +- * Behave as if the write failed if drop writes is enabled. Set the NEW +- * flag to force delalloc cleanup. +- */ +- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) { +- iomap->flags |= IOMAP_F_NEW; +- written = 0; +- } + +- /* +- * start_fsb refers to the first unused block after a short write. If +- * nothing was written, round offset down to point at the first block in +- * the range. +- */ +- if (unlikely(!written)) +- start_fsb = XFS_B_TO_FSBT(mp, offset); +- else +- start_fsb = XFS_B_TO_FSB(mp, offset + written); +- end_fsb = XFS_B_TO_FSB(mp, offset + length); ++ struct xfs_mount *mp = XFS_M(inode->i_sb); ++ int error; + +- /* +- * Trim delalloc blocks if they were allocated by this write and we +- * didn't manage to write the whole range. +- * +- * We don't need to care about racing delalloc as we hold i_mutex +- * across the reserve/allocate/unreserve calls. If there are delalloc +- * blocks in the range, they are ours. +- */ +- if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { +- truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), +- XFS_FSB_TO_B(mp, end_fsb) - 1); +- +- error = xfs_bmap_punch_delalloc_range(ip, start_fsb, +- end_fsb - start_fsb); +- if (error && !xfs_is_shutdown(mp)) { +- xfs_alert(mp, "%s: unable to clean up ino %lld", +- __func__, ip->i_ino); +- return error; +- } ++ error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset, ++ length, written, &xfs_buffered_write_delalloc_punch); ++ if (error && !xfs_is_shutdown(mp)) { ++ xfs_alert(mp, "%s: unable to clean up ino 0x%llx", ++ __func__, XFS_I(inode)->i_ino); ++ return error; + } +- + return 0; + } + +@@ -1187,6 +1205,15 @@ const struct iomap_ops xfs_buffered_write_iomap_ops = { + .iomap_end = xfs_buffered_write_iomap_end, + }; + ++/* ++ * iomap_page_mkwrite() will never fail in a way that requires delalloc extents ++ * that it allocated to be revoked. Hence we do not need an .iomap_end method ++ * for this operation. ++ */ ++const struct iomap_ops xfs_page_mkwrite_iomap_ops = { ++ .iomap_begin = xfs_buffered_write_iomap_begin, ++}; ++ + static int + xfs_read_iomap_begin( + struct inode *inode, +@@ -1204,6 +1231,7 @@ xfs_read_iomap_begin( + int nimaps = 1, error = 0; + bool shared = false; + unsigned int lockmode = XFS_ILOCK_SHARED; ++ u64 seq; + + ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); + +@@ -1217,13 +1245,14 @@ xfs_read_iomap_begin( + &nimaps, 0); + if (!error && (flags & IOMAP_REPORT)) + error = xfs_reflink_trim_around_shared(ip, &imap, &shared); ++ seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0); + xfs_iunlock(ip, lockmode); + + if (error) + return error; + trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, +- shared ? IOMAP_F_SHARED : 0); ++ shared ? IOMAP_F_SHARED : 0, seq); + } + + const struct iomap_ops xfs_read_iomap_ops = { +@@ -1248,6 +1277,7 @@ xfs_seek_iomap_begin( + struct xfs_bmbt_irec imap, cmap; + int error = 0; + unsigned lockmode; ++ u64 seq; + + if (xfs_is_shutdown(mp)) + return -EIO; +@@ -1282,8 +1312,9 @@ xfs_seek_iomap_begin( + if (data_fsb < cow_fsb + cmap.br_blockcount) + end_fsb = min(end_fsb, data_fsb); + xfs_trim_extent(&cmap, offset_fsb, end_fsb); ++ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); + error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, +- IOMAP_F_SHARED); ++ IOMAP_F_SHARED, seq); + /* + * This is a COW extent, so we must probe the page cache + * because there could be dirty page cache being backed +@@ -1304,8 +1335,9 @@ xfs_seek_iomap_begin( + imap.br_startblock = HOLESTARTBLOCK; + imap.br_state = XFS_EXT_NORM; + done: ++ seq = xfs_iomap_inode_sequence(ip, 0); + xfs_trim_extent(&imap, offset_fsb, end_fsb); +- error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); ++ error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); + out_unlock: + xfs_iunlock(ip, lockmode); + return error; +@@ -1331,6 +1363,7 @@ xfs_xattr_iomap_begin( + struct xfs_bmbt_irec imap; + int nimaps = 1, error = 0; + unsigned lockmode; ++ int seq; + + if (xfs_is_shutdown(mp)) + return -EIO; +@@ -1347,12 +1380,14 @@ xfs_xattr_iomap_begin( + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, + &nimaps, XFS_BMAPI_ATTRFORK); + out_unlock: ++ ++ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_XATTR); + xfs_iunlock(ip, lockmode); + + if (error) + return error; + ASSERT(nimaps); +- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); ++ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_XATTR, seq); + } + + const struct iomap_ops xfs_xattr_iomap_ops = { +diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h +index c782e8c0479c0..4da13440bae9b 100644 +--- a/fs/xfs/xfs_iomap.h ++++ b/fs/xfs/xfs_iomap.h +@@ -13,14 +13,15 @@ struct xfs_bmbt_irec; + + int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb, + xfs_fileoff_t count_fsb, unsigned int flags, +- struct xfs_bmbt_irec *imap); ++ struct xfs_bmbt_irec *imap, u64 *sequence); + int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); + xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip, + xfs_fileoff_t end_fsb); + ++u64 xfs_iomap_inode_sequence(struct xfs_inode *ip, u16 iomap_flags); + int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap, + struct xfs_bmbt_irec *imap, unsigned int mapping_flags, +- u16 iomap_flags); ++ u16 iomap_flags, u64 sequence_cookie); + + int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len, + bool *did_zero); +@@ -47,6 +48,7 @@ xfs_aligned_fsb_count( + } + + extern const struct iomap_ops xfs_buffered_write_iomap_ops; ++extern const struct iomap_ops xfs_page_mkwrite_iomap_ops; + extern const struct iomap_ops xfs_direct_write_iomap_ops; + extern const struct iomap_ops xfs_read_iomap_ops; + extern const struct iomap_ops xfs_seek_iomap_ops; +diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c +index f02a0dd522b3d..d9aa5eab02c3f 100644 +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -730,15 +730,7 @@ xfs_log_mount( + * just worked. + */ + if (!xfs_has_norecovery(mp)) { +- /* +- * log recovery ignores readonly state and so we need to clear +- * mount-based read only state so it can write to disk. +- */ +- bool readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, +- &mp->m_opstate); + error = xlog_recover(log); +- if (readonly) +- set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + if (error) { + xfs_warn(mp, "log mount/recovery failed: error %d", + error); +@@ -787,7 +779,6 @@ xfs_log_mount_finish( + struct xfs_mount *mp) + { + struct xlog *log = mp->m_log; +- bool readonly; + int error = 0; + + if (xfs_has_norecovery(mp)) { +@@ -795,12 +786,6 @@ xfs_log_mount_finish( + return 0; + } + +- /* +- * log recovery ignores readonly state and so we need to clear +- * mount-based read only state so it can write to disk. +- */ +- readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); +- + /* + * During the second phase of log recovery, we need iget and + * iput to behave like they do for an active filesystem. +@@ -850,8 +835,6 @@ xfs_log_mount_finish( + xfs_buftarg_drain(mp->m_ddev_targp); + + clear_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); +- if (readonly) +- set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + + /* Make sure the log is dead if we're returning failure. */ + ASSERT(!error || xlog_is_shutdown(log)); +@@ -886,6 +869,23 @@ xlog_force_iclog( + return xlog_state_release_iclog(iclog->ic_log, iclog, NULL); + } + ++/* ++ * Cycle all the iclogbuf locks to make sure all log IO completion ++ * is done before we tear down these buffers. ++ */ ++static void ++xlog_wait_iclog_completion(struct xlog *log) ++{ ++ int i; ++ struct xlog_in_core *iclog = log->l_iclog; ++ ++ for (i = 0; i < log->l_iclog_bufs; i++) { ++ down(&iclog->ic_sema); ++ up(&iclog->ic_sema); ++ iclog = iclog->ic_next; ++ } ++} ++ + /* + * Wait for the iclog and all prior iclogs to be written disk as required by the + * log force state machine. Waiting on ic_force_wait ensures iclog completions +@@ -1111,6 +1111,14 @@ xfs_log_unmount( + { + xfs_log_clean(mp); + ++ /* ++ * If shutdown has come from iclog IO context, the log ++ * cleaning will have been skipped and so we need to wait ++ * for the iclog to complete shutdown processing before we ++ * tear anything down. ++ */ ++ xlog_wait_iclog_completion(mp->m_log); ++ + xfs_buftarg_drain(mp->m_ddev_targp); + + xfs_trans_ail_destroy(mp); +@@ -2113,17 +2121,6 @@ xlog_dealloc_log( + xlog_in_core_t *iclog, *next_iclog; + int i; + +- /* +- * Cycle all the iclogbuf locks to make sure all log IO completion +- * is done before we tear down these buffers. +- */ +- iclog = log->l_iclog; +- for (i = 0; i < log->l_iclog_bufs; i++) { +- down(&iclog->ic_sema); +- up(&iclog->ic_sema); +- iclog = iclog->ic_next; +- } +- + /* + * Destroy the CIL after waiting for iclog IO completion because an + * iclog EIO error will try to shut down the log, which accesses the +diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c +index e8bb3c2e847e1..fb87ffb48f7fe 100644 +--- a/fs/xfs/xfs_mount.c ++++ b/fs/xfs/xfs_mount.c +@@ -538,6 +538,20 @@ xfs_check_summary_counts( + return 0; + } + ++static void ++xfs_unmount_check( ++ struct xfs_mount *mp) ++{ ++ if (xfs_is_shutdown(mp)) ++ return; ++ ++ if (percpu_counter_sum(&mp->m_ifree) > ++ percpu_counter_sum(&mp->m_icount)) { ++ xfs_alert(mp, "ifree/icount mismatch at unmount"); ++ xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS); ++ } ++} ++ + /* + * Flush and reclaim dirty inodes in preparation for unmount. Inodes and + * internal inode structures can be sitting in the CIL and AIL at this point, +@@ -1077,6 +1091,7 @@ xfs_unmountfs( + if (error) + xfs_warn(mp, "Unable to free reserved block pool. " + "Freespace may not be correct on next mount."); ++ xfs_unmount_check(mp); + + xfs_log_unmount(mp); + xfs_da_unmount(mp); +diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c +index 37a24f0f7cd40..38d23f0e703a8 100644 +--- a/fs/xfs/xfs_pnfs.c ++++ b/fs/xfs/xfs_pnfs.c +@@ -125,6 +125,7 @@ xfs_fs_map_blocks( + int nimaps = 1; + uint lock_flags; + int error = 0; ++ u64 seq; + + if (xfs_is_shutdown(mp)) + return -EIO; +@@ -176,6 +177,7 @@ xfs_fs_map_blocks( + lock_flags = xfs_ilock_data_map_shared(ip); + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, + &imap, &nimaps, bmapi_flags); ++ seq = xfs_iomap_inode_sequence(ip, 0); + + ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK); + +@@ -189,7 +191,7 @@ xfs_fs_map_blocks( + xfs_iunlock(ip, lock_flags); + + error = xfs_iomap_write_direct(ip, offset_fsb, +- end_fsb - offset_fsb, 0, &imap); ++ end_fsb - offset_fsb, 0, &imap, &seq); + if (error) + goto out_unlock; + +@@ -209,7 +211,7 @@ xfs_fs_map_blocks( + } + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + +- error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0); ++ error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0, seq); + *device_generation = mp->m_generation; + return error; + out_unlock: +diff --git a/include/linux/iomap.h b/include/linux/iomap.h +index 238a03087e17e..0983dfc9a203c 100644 +--- a/include/linux/iomap.h ++++ b/include/linux/iomap.h +@@ -49,26 +49,35 @@ struct vm_fault; + * + * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of + * buffer heads for this mapping. ++ * ++ * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent ++ * rather than a file data extent. + */ +-#define IOMAP_F_NEW 0x01 +-#define IOMAP_F_DIRTY 0x02 +-#define IOMAP_F_SHARED 0x04 +-#define IOMAP_F_MERGED 0x08 +-#define IOMAP_F_BUFFER_HEAD 0x10 +-#define IOMAP_F_ZONE_APPEND 0x20 ++#define IOMAP_F_NEW (1U << 0) ++#define IOMAP_F_DIRTY (1U << 1) ++#define IOMAP_F_SHARED (1U << 2) ++#define IOMAP_F_MERGED (1U << 3) ++#define IOMAP_F_BUFFER_HEAD (1U << 4) ++#define IOMAP_F_ZONE_APPEND (1U << 5) ++#define IOMAP_F_XATTR (1U << 6) + + /* + * Flags set by the core iomap code during operations: + * + * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size + * has changed as the result of this write operation. ++ * ++ * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file ++ * range it covers needs to be remapped by the high level before the operation ++ * can proceed. + */ +-#define IOMAP_F_SIZE_CHANGED 0x100 ++#define IOMAP_F_SIZE_CHANGED (1U << 8) ++#define IOMAP_F_STALE (1U << 9) + + /* + * Flags from 0x1000 up are for file system specific usage: + */ +-#define IOMAP_F_PRIVATE 0x1000 ++#define IOMAP_F_PRIVATE (1U << 12) + + + /* +@@ -89,6 +98,7 @@ struct iomap { + void *inline_data; + void *private; /* filesystem private */ + const struct iomap_page_ops *page_ops; ++ u64 validity_cookie; /* used with .iomap_valid() */ + }; + + static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) +@@ -128,6 +138,23 @@ struct iomap_page_ops { + int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len); + void (*page_done)(struct inode *inode, loff_t pos, unsigned copied, + struct page *page); ++ ++ /* ++ * Check that the cached iomap still maps correctly to the filesystem's ++ * internal extent map. FS internal extent maps can change while iomap ++ * is iterating a cached iomap, so this hook allows iomap to detect that ++ * the iomap needs to be refreshed during a long running write ++ * operation. ++ * ++ * The filesystem can store internal state (e.g. a sequence number) in ++ * iomap->validity_cookie when the iomap is first mapped to be able to ++ * detect changes between mapping time and whenever .iomap_valid() is ++ * called. ++ * ++ * This is called with the folio over the specified file position held ++ * locked by the iomap code. ++ */ ++ bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); + }; + + /* +@@ -226,6 +253,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) + + ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, + const struct iomap_ops *ops); ++int iomap_file_buffered_write_punch_delalloc(struct inode *inode, ++ struct iomap *iomap, loff_t pos, loff_t length, ssize_t written, ++ int (*punch)(struct inode *inode, loff_t pos, loff_t length)); ++ + int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); + void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); + bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); +diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c +index 8117d0e08d5a2..1393eefbf2187 100644 +--- a/net/sunrpc/svc_xprt.c ++++ b/net/sunrpc/svc_xprt.c +@@ -696,8 +696,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) + /* Made progress, don't sleep yet */ + continue; + +- set_current_state(TASK_INTERRUPTIBLE); +- if (signalled() || kthread_should_stop()) { ++ set_current_state(TASK_IDLE); ++ if (kthread_should_stop()) { + set_current_state(TASK_RUNNING); + return -EINTR; + } +@@ -733,7 +733,7 @@ rqst_should_sleep(struct svc_rqst *rqstp) + return false; + + /* are we shutting down? */ +- if (signalled() || kthread_should_stop()) ++ if (kthread_should_stop()) + return false; + + /* are we freezing? */ +@@ -755,11 +755,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) + if (rqstp->rq_xprt) + goto out_found; + +- /* +- * We have to be able to interrupt this wait +- * to bring down the daemons ... +- */ +- set_current_state(TASK_INTERRUPTIBLE); ++ set_current_state(TASK_IDLE); + smp_mb__before_atomic(); + clear_bit(SP_CONGESTED, &pool->sp_flags); + clear_bit(RQ_BUSY, &rqstp->rq_flags); +@@ -781,7 +777,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) + if (!time_left) + atomic_long_inc(&pool->sp_stats.threads_timedout); + +- if (signalled() || kthread_should_stop()) ++ if (kthread_should_stop()) + return ERR_PTR(-EINTR); + return ERR_PTR(-EAGAIN); + out_found: +@@ -879,7 +875,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) + try_to_freeze(); + cond_resched(); + err = -EINTR; +- if (signalled() || kthread_should_stop()) ++ if (kthread_should_stop()) + goto out; + + xprt = svc_get_next_xprt(rqstp, timeout); +diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c +index bc700f85f80be..ea277c55a38db 100644 +--- a/security/keys/trusted-keys/trusted_tpm2.c ++++ b/security/keys/trusted-keys/trusted_tpm2.c +@@ -38,6 +38,7 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, + u8 *end_work = scratch + SCRATCH_SIZE; + u8 *priv, *pub; + u16 priv_len, pub_len; ++ int ret; + + priv_len = get_unaligned_be16(src) + 2; + priv = src; +@@ -57,8 +58,10 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, + unsigned char bool[3], *w = bool; + /* tag 0 is emptyAuth */ + w = asn1_encode_boolean(w, w + sizeof(bool), true); +- if (WARN(IS_ERR(w), "BUG: Boolean failed to encode")) +- return PTR_ERR(w); ++ if (WARN(IS_ERR(w), "BUG: Boolean failed to encode")) { ++ ret = PTR_ERR(w); ++ goto err; ++ } + work = asn1_encode_tag(work, end_work, 0, bool, w - bool); + } + +@@ -69,8 +72,10 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, + * trigger, so if it does there's something nefarious going on + */ + if (WARN(work - scratch + pub_len + priv_len + 14 > SCRATCH_SIZE, +- "BUG: scratch buffer is too small")) +- return -EINVAL; ++ "BUG: scratch buffer is too small")) { ++ ret = -EINVAL; ++ goto err; ++ } + + work = asn1_encode_integer(work, end_work, options->keyhandle); + work = asn1_encode_octet_string(work, end_work, pub, pub_len); +@@ -79,10 +84,18 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, + work1 = payload->blob; + work1 = asn1_encode_sequence(work1, work1 + sizeof(payload->blob), + scratch, work - scratch); +- if (WARN(IS_ERR(work1), "BUG: ASN.1 encoder failed")) +- return PTR_ERR(work1); ++ if (IS_ERR(work1)) { ++ ret = PTR_ERR(work1); ++ pr_err("BUG: ASN.1 encoder failed with %d\n", ret); ++ goto err; ++ } + ++ kfree(scratch); + return work1 - payload->blob; ++ ++err: ++ kfree(scratch); ++ return ret; + } + + struct tpm2_key_context { |