summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pagano <mpagano@gentoo.org>2024-05-25 11:16:20 -0400
committerMike Pagano <mpagano@gentoo.org>2024-05-25 11:16:20 -0400
commit174de16ebc3197a705f61aa2262db6be4f421fd8 (patch)
treef783ed846133d77f97898838a70cce20e148a6b5
parentLinux patch 6.1.91 (diff)
downloadlinux-patches-174de16ebc3197a705f61aa2262db6be4f421fd8.tar.gz
linux-patches-174de16ebc3197a705f61aa2262db6be4f421fd8.tar.bz2
linux-patches-174de16ebc3197a705f61aa2262db6be4f421fd8.zip
Linux patch 6.1.926.1-101
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r--0000_README4
-rw-r--r--1091_linux-6.1.92.patch2589
2 files changed, 2593 insertions, 0 deletions
diff --git a/0000_README b/0000_README
index b8e8658e..0d537557 100644
--- a/0000_README
+++ b/0000_README
@@ -407,6 +407,10 @@ Patch: 1090_linux-6.1.91.patch
From: https://www.kernel.org
Desc: Linux 6.1.91
+Patch: 1091_linux-6.1.92.patch
+From: https://www.kernel.org
+Desc: Linux 6.1.92
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1091_linux-6.1.92.patch b/1091_linux-6.1.92.patch
new file mode 100644
index 00000000..fc3ed66e
--- /dev/null
+++ b/1091_linux-6.1.92.patch
@@ -0,0 +1,2589 @@
+diff --git a/Documentation/admin-guide/hw-vuln/core-scheduling.rst b/Documentation/admin-guide/hw-vuln/core-scheduling.rst
+index cf1eeefdfc32f..a92e10ec402e7 100644
+--- a/Documentation/admin-guide/hw-vuln/core-scheduling.rst
++++ b/Documentation/admin-guide/hw-vuln/core-scheduling.rst
+@@ -67,8 +67,8 @@ arg4:
+ will be performed for all tasks in the task group of ``pid``.
+
+ arg5:
+- userspace pointer to an unsigned long for storing the cookie returned by
+- ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands.
++ userspace pointer to an unsigned long long for storing the cookie returned
++ by ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands.
+
+ In order for a process to push a cookie to, or pull a cookie from a process, it
+ is required to have the ptrace access mode: `PTRACE_MODE_READ_REALCREDS` to the
+diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py
+index abe7680883771..6387624423363 100755
+--- a/Documentation/sphinx/kernel_include.py
++++ b/Documentation/sphinx/kernel_include.py
+@@ -97,7 +97,6 @@ class KernelInclude(Include):
+ # HINT: this is the only line I had to change / commented out:
+ #path = utils.relative_path(None, path)
+
+- path = nodes.reprunicode(path)
+ encoding = self.options.get(
+ 'encoding', self.state.document.settings.input_encoding)
+ e_handler=self.state.document.settings.input_encoding_error_handler
+diff --git a/Makefile b/Makefile
+index a7d90996e4125..0be668057cb2a 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 6
+ PATCHLEVEL = 1
+-SUBLEVEL = 91
++SUBLEVEL = 92
+ EXTRAVERSION =
+ NAME = Curry Ramen
+
+diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
+index c15f71501c6c2..044b98a62f7bb 100644
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -1752,7 +1752,6 @@ config ARM64_LSE_ATOMICS
+
+ config ARM64_USE_LSE_ATOMICS
+ bool "Atomic instructions"
+- depends on JUMP_LABEL
+ default y
+ help
+ As part of the Large System Extensions, ARMv8.1 introduces new
+diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
+index c503db8e73b01..f99d74826a7ef 100644
+--- a/arch/arm64/include/asm/lse.h
++++ b/arch/arm64/include/asm/lse.h
+@@ -10,7 +10,6 @@
+
+ #include <linux/compiler_types.h>
+ #include <linux/export.h>
+-#include <linux/jump_label.h>
+ #include <linux/stringify.h>
+ #include <asm/alternative.h>
+ #include <asm/alternative-macros.h>
+diff --git a/drivers/android/binder.c b/drivers/android/binder.c
+index 8c2b7c074eca1..46111f8c12e61 100644
+--- a/drivers/android/binder.c
++++ b/drivers/android/binder.c
+@@ -5350,7 +5350,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ goto err;
+ break;
+ case BINDER_SET_MAX_THREADS: {
+- int max_threads;
++ u32 max_threads;
+
+ if (copy_from_user(&max_threads, ubuf,
+ sizeof(max_threads))) {
+diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
+index abe19d88c6ecc..c2c1bb3c1e60b 100644
+--- a/drivers/android/binder_internal.h
++++ b/drivers/android/binder_internal.h
+@@ -420,7 +420,7 @@ struct binder_proc {
+ struct list_head todo;
+ struct binder_stats stats;
+ struct list_head delivered_death;
+- int max_threads;
++ u32 max_threads;
+ int requested_threads;
+ int requested_threads_started;
+ int tmp_ref;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+index 9fe2eae88ec17..ee83d282b49a8 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+@@ -974,6 +974,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
+ if (!obj)
+ return -EINVAL;
+
++ if (!info || info->head.block == AMDGPU_RAS_BLOCK_COUNT)
++ return -EINVAL;
++
+ if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
+ amdgpu_ras_get_ecc_info(adev, &err_data);
+ } else {
+diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+index d52cbc0e9b679..5f57bdd597c27 100644
+--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
++++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+@@ -924,7 +924,12 @@ static bool setup_dsc_config(
+ if (!is_dsc_possible)
+ goto done;
+
+- dsc_cfg->num_slices_v = pic_height/slice_height;
++ if (slice_height > 0) {
++ dsc_cfg->num_slices_v = pic_height / slice_height;
++ } else {
++ is_dsc_possible = false;
++ goto done;
++ }
+
+ if (target_bandwidth_kbps > 0) {
+ is_dsc_possible = decide_dsc_target_bpp_x16(
+diff --git a/drivers/mfd/stpmic1.c b/drivers/mfd/stpmic1.c
+index eb3da558c3fbd..ee0469d5d4354 100644
+--- a/drivers/mfd/stpmic1.c
++++ b/drivers/mfd/stpmic1.c
+@@ -108,8 +108,9 @@ static const struct regmap_irq stpmic1_irqs[] = {
+ static const struct regmap_irq_chip stpmic1_regmap_irq_chip = {
+ .name = "pmic_irq",
+ .status_base = INT_PENDING_R1,
+- .mask_base = INT_CLEAR_MASK_R1,
+- .unmask_base = INT_SET_MASK_R1,
++ .mask_base = INT_SET_MASK_R1,
++ .unmask_base = INT_CLEAR_MASK_R1,
++ .mask_unmask_non_inverted = true,
+ .ack_base = INT_CLEAR_R1,
+ .num_regs = STPMIC1_PMIC_NUM_IRQ_REGS,
+ .irqs = stpmic1_irqs,
+diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
+index 3a927452a6501..7e39017e440fb 100644
+--- a/drivers/mmc/core/mmc.c
++++ b/drivers/mmc/core/mmc.c
+@@ -1819,8 +1819,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
+
+ if (err)
+ goto free_card;
+-
+- } else if (!mmc_card_hs400es(card)) {
++ } else if (mmc_card_hs400es(card)) {
++ if (host->ops->execute_hs400_tuning) {
++ err = host->ops->execute_hs400_tuning(host, card);
++ if (err)
++ goto free_card;
++ }
++ } else {
+ /* Select the desired bus width optionally */
+ err = mmc_select_bus_width(card);
+ if (err > 0 && mmc_card_hs(card)) {
+diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+index e64bef490a174..42d8e5e771b7e 100644
+--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+@@ -544,17 +544,15 @@ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
+
+ /**
+ * ice_vc_isvalid_q_id
+- * @vf: pointer to the VF info
+- * @vsi_id: VSI ID
++ * @vsi: VSI to check queue ID against
+ * @qid: VSI relative queue ID
+ *
+ * check for the valid queue ID
+ */
+-static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid)
++static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u8 qid)
+ {
+- struct ice_vsi *vsi = ice_find_vsi(vf->pf, vsi_id);
+ /* allocated Tx and Rx queues should be always equal for VF VSI */
+- return (vsi && (qid < vsi->alloc_txq));
++ return qid < vsi->alloc_txq;
+ }
+
+ /**
+@@ -1254,7 +1252,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
+ */
+ q_map = vqs->rx_queues;
+ for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+- if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
++ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+@@ -1276,7 +1274,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
+
+ q_map = vqs->tx_queues;
+ for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+- if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
++ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+@@ -1381,7 +1379,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
+ q_map = vqs->tx_queues;
+
+ for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+- if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
++ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+@@ -1407,7 +1405,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
+ bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF);
+ } else if (q_map) {
+ for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+- if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
++ if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+@@ -1463,7 +1461,7 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id,
+ for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
+ vsi_q_id = vsi_q_id_idx;
+
+- if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id))
++ if (!ice_vc_isvalid_q_id(vsi, vsi_q_id))
+ return VIRTCHNL_STATUS_ERR_PARAM;
+
+ q_vector->num_ring_rx++;
+@@ -1477,7 +1475,7 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id,
+ for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
+ vsi_q_id = vsi_q_id_idx;
+
+- if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id))
++ if (!ice_vc_isvalid_q_id(vsi, vsi_q_id))
+ return VIRTCHNL_STATUS_ERR_PARAM;
+
+ q_vector->num_ring_tx++;
+@@ -1611,7 +1609,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
+ qpi->txq.headwb_enabled ||
+ !ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
+ !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
+- !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) {
++ !ice_vc_isvalid_q_id(vsi, qpi->txq.queue_id)) {
+ goto error_param;
+ }
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+index 7f72604079723..fb8e856933097 100644
+--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+@@ -107,9 +107,6 @@ ice_vc_fdir_param_check(struct ice_vf *vf, u16 vsi_id)
+ if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF))
+ return -EINVAL;
+
+- if (vsi_id != vf->lan_vsi_num)
+- return -EINVAL;
+-
+ if (!ice_vc_isvalid_vsi_id(vf, vsi_id))
+ return -EINVAL;
+
+diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
+index 502518cdb4618..6453c92f0fa7c 100644
+--- a/drivers/net/ethernet/micrel/ks8851_common.c
++++ b/drivers/net/ethernet/micrel/ks8851_common.c
+@@ -328,7 +328,6 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
+ {
+ struct ks8851_net *ks = _ks;
+ struct sk_buff_head rxq;
+- unsigned handled = 0;
+ unsigned long flags;
+ unsigned int status;
+ struct sk_buff *skb;
+@@ -336,24 +335,17 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
+ ks8851_lock(ks, &flags);
+
+ status = ks8851_rdreg16(ks, KS_ISR);
++ ks8851_wrreg16(ks, KS_ISR, status);
+
+ netif_dbg(ks, intr, ks->netdev,
+ "%s: status 0x%04x\n", __func__, status);
+
+- if (status & IRQ_LCI)
+- handled |= IRQ_LCI;
+-
+ if (status & IRQ_LDI) {
+ u16 pmecr = ks8851_rdreg16(ks, KS_PMECR);
+ pmecr &= ~PMECR_WKEVT_MASK;
+ ks8851_wrreg16(ks, KS_PMECR, pmecr | PMECR_WKEVT_LINK);
+-
+- handled |= IRQ_LDI;
+ }
+
+- if (status & IRQ_RXPSI)
+- handled |= IRQ_RXPSI;
+-
+ if (status & IRQ_TXI) {
+ unsigned short tx_space = ks8851_rdreg16(ks, KS_TXMIR);
+
+@@ -365,20 +357,12 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
+ if (netif_queue_stopped(ks->netdev))
+ netif_wake_queue(ks->netdev);
+ spin_unlock(&ks->statelock);
+-
+- handled |= IRQ_TXI;
+ }
+
+- if (status & IRQ_RXI)
+- handled |= IRQ_RXI;
+-
+ if (status & IRQ_SPIBEI) {
+ netdev_err(ks->netdev, "%s: spi bus error\n", __func__);
+- handled |= IRQ_SPIBEI;
+ }
+
+- ks8851_wrreg16(ks, KS_ISR, handled);
+-
+ if (status & IRQ_RXI) {
+ /* the datasheet says to disable the rx interrupt during
+ * packet read-out, however we're masking the interrupt
+diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
+index 21b6c4d94a632..6d31061818e93 100644
+--- a/drivers/net/usb/ax88179_178a.c
++++ b/drivers/net/usb/ax88179_178a.c
+@@ -174,6 +174,7 @@ struct ax88179_data {
+ u32 wol_supported;
+ u32 wolopts;
+ u8 disconnecting;
++ u8 initialized;
+ };
+
+ struct ax88179_int_data {
+@@ -1673,6 +1674,18 @@ static int ax88179_reset(struct usbnet *dev)
+ return 0;
+ }
+
++static int ax88179_net_reset(struct usbnet *dev)
++{
++ struct ax88179_data *ax179_data = dev->driver_priv;
++
++ if (ax179_data->initialized)
++ ax88179_reset(dev);
++ else
++ ax179_data->initialized = 1;
++
++ return 0;
++}
++
+ static int ax88179_stop(struct usbnet *dev)
+ {
+ u16 tmp16;
+@@ -1692,6 +1705,7 @@ static const struct driver_info ax88179_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+@@ -1704,6 +1718,7 @@ static const struct driver_info ax88178a_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+@@ -1716,7 +1731,7 @@ static const struct driver_info cypress_GX3_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+- .reset = ax88179_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+@@ -1729,7 +1744,7 @@ static const struct driver_info dlink_dub1312_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+- .reset = ax88179_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+@@ -1742,7 +1757,7 @@ static const struct driver_info sitecom_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+- .reset = ax88179_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+@@ -1755,7 +1770,7 @@ static const struct driver_info samsung_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+- .reset = ax88179_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+@@ -1768,7 +1783,7 @@ static const struct driver_info lenovo_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+- .reset = ax88179_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+@@ -1781,7 +1796,7 @@ static const struct driver_info belkin_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+- .reset = ax88179_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+@@ -1794,7 +1809,7 @@ static const struct driver_info toshiba_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+- .reset = ax88179_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+@@ -1807,7 +1822,7 @@ static const struct driver_info mct_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+- .reset = ax88179_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+@@ -1820,7 +1835,7 @@ static const struct driver_info at_umc2000_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+- .reset = ax88179_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+@@ -1833,7 +1848,7 @@ static const struct driver_info at_umc200_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+- .reset = ax88179_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+@@ -1846,7 +1861,7 @@ static const struct driver_info at_umc2000sp_info = {
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+- .reset = ax88179_reset,
++ .reset = ax88179_net_reset,
+ .stop = ax88179_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
+index 1ef36a0a7dd20..223482584f54f 100644
+--- a/drivers/pinctrl/core.c
++++ b/drivers/pinctrl/core.c
+@@ -205,6 +205,7 @@ static int pinctrl_register_one_pin(struct pinctrl_dev *pctldev,
+ const struct pinctrl_pin_desc *pin)
+ {
+ struct pin_desc *pindesc;
++ int error;
+
+ pindesc = pin_desc_get(pctldev, pin->number);
+ if (pindesc) {
+@@ -226,18 +227,25 @@ static int pinctrl_register_one_pin(struct pinctrl_dev *pctldev,
+ } else {
+ pindesc->name = kasprintf(GFP_KERNEL, "PIN%u", pin->number);
+ if (!pindesc->name) {
+- kfree(pindesc);
+- return -ENOMEM;
++ error = -ENOMEM;
++ goto failed;
+ }
+ pindesc->dynamic_name = true;
+ }
+
+ pindesc->drv_data = pin->drv_data;
+
+- radix_tree_insert(&pctldev->pin_desc_tree, pin->number, pindesc);
++ error = radix_tree_insert(&pctldev->pin_desc_tree, pin->number, pindesc);
++ if (error)
++ goto failed;
++
+ pr_debug("registered pin %d (%s) on %s\n",
+ pin->number, pindesc->name, pctldev->desc->name);
+ return 0;
++
++failed:
++ kfree(pindesc);
++ return error;
+ }
+
+ static int pinctrl_register_pins(struct pinctrl_dev *pctldev,
+diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c
+index d421a2ccaa1ea..ffec5299b5c1d 100644
+--- a/drivers/remoteproc/mtk_scp.c
++++ b/drivers/remoteproc/mtk_scp.c
+@@ -126,7 +126,7 @@ static int scp_elf_read_ipi_buf_addr(struct mtk_scp *scp,
+ static int scp_ipi_init(struct mtk_scp *scp, const struct firmware *fw)
+ {
+ int ret;
+- size_t offset;
++ size_t buf_sz, offset;
+
+ /* read the ipi buf addr from FW itself first */
+ ret = scp_elf_read_ipi_buf_addr(scp, fw, &offset);
+@@ -138,6 +138,14 @@ static int scp_ipi_init(struct mtk_scp *scp, const struct firmware *fw)
+ }
+ dev_info(scp->dev, "IPI buf addr %#010zx\n", offset);
+
++ /* Make sure IPI buffer fits in the L2TCM range assigned to this core */
++ buf_sz = sizeof(*scp->recv_buf) + sizeof(*scp->send_buf);
++
++ if (scp->sram_size < buf_sz + offset) {
++ dev_err(scp->dev, "IPI buffer does not fit in SRAM.\n");
++ return -EOVERFLOW;
++ }
++
+ scp->recv_buf = (struct mtk_share_obj __iomem *)
+ (scp->sram_base + offset);
+ scp->send_buf = (struct mtk_share_obj __iomem *)
+diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
+index 7aa37be3216a5..86433e3c3409a 100644
+--- a/drivers/tty/serial/kgdboc.c
++++ b/drivers/tty/serial/kgdboc.c
+@@ -19,6 +19,7 @@
+ #include <linux/console.h>
+ #include <linux/vt_kern.h>
+ #include <linux/input.h>
++#include <linux/irq_work.h>
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
+ #include <linux/serial_core.h>
+@@ -48,6 +49,25 @@ static struct kgdb_io kgdboc_earlycon_io_ops;
+ static int (*earlycon_orig_exit)(struct console *con);
+ #endif /* IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) */
+
++/*
++ * When we leave the debug trap handler we need to reset the keyboard status
++ * (since the original keyboard state gets partially clobbered by kdb use of
++ * the keyboard).
++ *
++ * The path to deliver the reset is somewhat circuitous.
++ *
++ * To deliver the reset we register an input handler, reset the keyboard and
++ * then deregister the input handler. However, to get this done right, we do
++ * have to carefully manage the calling context because we can only register
++ * input handlers from task context.
++ *
++ * In particular we need to trigger the action from the debug trap handler with
++ * all its NMI and/or NMI-like oddities. To solve this the kgdboc trap exit code
++ * (the "post_exception" callback) uses irq_work_queue(), which is NMI-safe, to
++ * schedule a callback from a hardirq context. From there we have to defer the
++ * work again, this time using schedule_work(), to get a callback using the
++ * system workqueue, which runs in task context.
++ */
+ #ifdef CONFIG_KDB_KEYBOARD
+ static int kgdboc_reset_connect(struct input_handler *handler,
+ struct input_dev *dev,
+@@ -99,10 +119,17 @@ static void kgdboc_restore_input_helper(struct work_struct *dummy)
+
+ static DECLARE_WORK(kgdboc_restore_input_work, kgdboc_restore_input_helper);
+
++static void kgdboc_queue_restore_input_helper(struct irq_work *unused)
++{
++ schedule_work(&kgdboc_restore_input_work);
++}
++
++static DEFINE_IRQ_WORK(kgdboc_restore_input_irq_work, kgdboc_queue_restore_input_helper);
++
+ static void kgdboc_restore_input(void)
+ {
+ if (likely(system_state == SYSTEM_RUNNING))
+- schedule_work(&kgdboc_restore_input_work);
++ irq_work_queue(&kgdboc_restore_input_irq_work);
+ }
+
+ static int kgdboc_register_kbd(char **cptr)
+@@ -133,6 +160,7 @@ static void kgdboc_unregister_kbd(void)
+ i--;
+ }
+ }
++ irq_work_sync(&kgdboc_restore_input_irq_work);
+ flush_work(&kgdboc_restore_input_work);
+ }
+ #else /* ! CONFIG_KDB_KEYBOARD */
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index 2d7ac92ce9b84..c72c6f8ec2c88 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -1708,7 +1708,6 @@ static int __dwc3_gadget_get_frame(struct dwc3 *dwc)
+ */
+ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt)
+ {
+- struct dwc3 *dwc = dep->dwc;
+ struct dwc3_gadget_ep_cmd_params params;
+ u32 cmd;
+ int ret;
+@@ -1733,8 +1732,7 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int
+ dep->resource_index = 0;
+
+ if (!interrupt) {
+- if (!DWC3_IP_IS(DWC3) || DWC3_VER_IS_PRIOR(DWC3, 310A))
+- mdelay(1);
++ mdelay(1);
+ dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
+ } else if (!ret) {
+ dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
+diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c
+index 195c9c16f817f..e804db927d5cf 100644
+--- a/drivers/usb/typec/tipd/core.c
++++ b/drivers/usb/typec/tipd/core.c
+@@ -24,6 +24,7 @@
+ #define TPS_REG_MODE 0x03
+ #define TPS_REG_CMD1 0x08
+ #define TPS_REG_DATA1 0x09
++#define TPS_REG_VERSION 0x0F
+ #define TPS_REG_INT_EVENT1 0x14
+ #define TPS_REG_INT_EVENT2 0x15
+ #define TPS_REG_INT_MASK1 0x16
+@@ -518,49 +519,67 @@ static irqreturn_t cd321x_interrupt(int irq, void *data)
+
+ static irqreturn_t tps6598x_interrupt(int irq, void *data)
+ {
++ int intev_len = TPS_65981_2_6_INTEVENT_LEN;
+ struct tps6598x *tps = data;
+- u64 event1 = 0;
+- u64 event2 = 0;
++ u64 event1[2] = { };
++ u64 event2[2] = { };
++ u32 version;
+ u32 status;
+ int ret;
+
+ mutex_lock(&tps->lock);
+
+- ret = tps6598x_read64(tps, TPS_REG_INT_EVENT1, &event1);
+- ret |= tps6598x_read64(tps, TPS_REG_INT_EVENT2, &event2);
++ ret = tps6598x_read32(tps, TPS_REG_VERSION, &version);
++ if (ret)
++ dev_warn(tps->dev, "%s: failed to read version (%d)\n",
++ __func__, ret);
++
++ if (TPS_VERSION_HW_VERSION(version) == TPS_VERSION_HW_65987_8_DH ||
++ TPS_VERSION_HW_VERSION(version) == TPS_VERSION_HW_65987_8_DK)
++ intev_len = TPS_65987_8_INTEVENT_LEN;
++
++ ret = tps6598x_block_read(tps, TPS_REG_INT_EVENT1, event1, intev_len);
++
++ ret = tps6598x_block_read(tps, TPS_REG_INT_EVENT1, event1, intev_len);
+ if (ret) {
+- dev_err(tps->dev, "%s: failed to read events\n", __func__);
++ dev_err(tps->dev, "%s: failed to read event1\n", __func__);
+ goto err_unlock;
+ }
+- trace_tps6598x_irq(event1, event2);
++ ret = tps6598x_block_read(tps, TPS_REG_INT_EVENT2, event2, intev_len);
++ if (ret) {
++ dev_err(tps->dev, "%s: failed to read event2\n", __func__);
++ goto err_unlock;
++ }
++ trace_tps6598x_irq(event1[0], event2[0]);
+
+- if (!(event1 | event2))
++ if (!(event1[0] | event1[1] | event2[0] | event2[1]))
+ goto err_unlock;
+
+ if (!tps6598x_read_status(tps, &status))
+ goto err_clear_ints;
+
+- if ((event1 | event2) & TPS_REG_INT_POWER_STATUS_UPDATE)
++ if ((event1[0] | event2[0]) & TPS_REG_INT_POWER_STATUS_UPDATE)
+ if (!tps6598x_read_power_status(tps))
+ goto err_clear_ints;
+
+- if ((event1 | event2) & TPS_REG_INT_DATA_STATUS_UPDATE)
++ if ((event1[0] | event2[0]) & TPS_REG_INT_DATA_STATUS_UPDATE)
+ if (!tps6598x_read_data_status(tps))
+ goto err_clear_ints;
+
+ /* Handle plug insert or removal */
+- if ((event1 | event2) & TPS_REG_INT_PLUG_EVENT)
++ if ((event1[0] | event2[0]) & TPS_REG_INT_PLUG_EVENT)
+ tps6598x_handle_plug_event(tps, status);
+
+ err_clear_ints:
+- tps6598x_write64(tps, TPS_REG_INT_CLEAR1, event1);
+- tps6598x_write64(tps, TPS_REG_INT_CLEAR2, event2);
++ tps6598x_block_write(tps, TPS_REG_INT_CLEAR1, event1, intev_len);
++ tps6598x_block_write(tps, TPS_REG_INT_CLEAR2, event2, intev_len);
+
+ err_unlock:
+ mutex_unlock(&tps->lock);
+
+- if (event1 | event2)
++ if (event1[0] | event1[1] | event2[0] | event2[1])
+ return IRQ_HANDLED;
++
+ return IRQ_NONE;
+ }
+
+diff --git a/drivers/usb/typec/tipd/tps6598x.h b/drivers/usb/typec/tipd/tps6598x.h
+index 527857549d699..1fc3cc8ad199a 100644
+--- a/drivers/usb/typec/tipd/tps6598x.h
++++ b/drivers/usb/typec/tipd/tps6598x.h
+@@ -199,4 +199,15 @@
+ #define TPS_DATA_STATUS_DP_SPEC_PIN_ASSIGNMENT_A BIT(2)
+ #define TPS_DATA_STATUS_DP_SPEC_PIN_ASSIGNMENT_B (BIT(2) | BIT(1))
+
++/* Version Register */
++#define TPS_VERSION_HW_VERSION_MASK GENMASK(31, 24)
++#define TPS_VERSION_HW_VERSION(x) TPS_FIELD_GET(TPS_VERSION_HW_VERSION_MASK, (x))
++#define TPS_VERSION_HW_65981_2_6 0x00
++#define TPS_VERSION_HW_65987_8_DH 0xF7
++#define TPS_VERSION_HW_65987_8_DK 0xF9
++
++/* Int Event Register length */
++#define TPS_65981_2_6_INTEVENT_LEN 8
++#define TPS_65987_8_INTEVENT_LEN 11
++
+ #endif /* __TPS6598X_H__ */
+diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c
+index 73cd5bf350472..2431febc46151 100644
+--- a/drivers/usb/typec/ucsi/displayport.c
++++ b/drivers/usb/typec/ucsi/displayport.c
+@@ -275,8 +275,6 @@ static void ucsi_displayport_work(struct work_struct *work)
+ struct ucsi_dp *dp = container_of(work, struct ucsi_dp, work);
+ int ret;
+
+- mutex_lock(&dp->con->lock);
+-
+ ret = typec_altmode_vdm(dp->alt, dp->header,
+ dp->vdo_data, dp->vdo_size);
+ if (ret)
+@@ -285,8 +283,6 @@ static void ucsi_displayport_work(struct work_struct *work)
+ dp->vdo_data = NULL;
+ dp->vdo_size = 0;
+ dp->header = 0;
+-
+- mutex_unlock(&dp->con->lock);
+ }
+
+ void ucsi_displayport_remove_partner(struct typec_altmode *alt)
+diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
+index a0a4d8de82cad..dac1a5c110c0e 100644
+--- a/fs/iomap/buffered-io.c
++++ b/fs/iomap/buffered-io.c
+@@ -579,7 +579,7 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter,
+ return iomap_read_inline_data(iter, folio);
+ }
+
+-static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
++static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
+ size_t len, struct folio **foliop)
+ {
+ const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
+@@ -613,6 +613,27 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
+ status = (iter->flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOMEM;
+ goto out_no_page;
+ }
++
++ /*
++ * Now we have a locked folio, before we do anything with it we need to
++ * check that the iomap we have cached is not stale. The inode extent
++ * mapping can change due to concurrent IO in flight (e.g.
++ * IOMAP_UNWRITTEN state can change and memory reclaim could have
++ * reclaimed a previously partially written page at this index after IO
++ * completion before this write reaches this file offset) and hence we
++ * could do the wrong thing here (zero a page range incorrectly or fail
++ * to zero) and corrupt data.
++ */
++ if (page_ops && page_ops->iomap_valid) {
++ bool iomap_valid = page_ops->iomap_valid(iter->inode,
++ &iter->iomap);
++ if (!iomap_valid) {
++ iter->iomap.flags |= IOMAP_F_STALE;
++ status = 0;
++ goto out_unlock;
++ }
++ }
++
+ if (pos + len > folio_pos(folio) + folio_size(folio))
+ len = folio_pos(folio) + folio_size(folio) - pos;
+
+@@ -768,6 +789,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
+ status = iomap_write_begin(iter, pos, bytes, &folio);
+ if (unlikely(status))
+ break;
++ if (iter->iomap.flags & IOMAP_F_STALE)
++ break;
+
+ page = folio_file_page(folio, pos >> PAGE_SHIFT);
+ if (mapping_writably_mapped(mapping))
+@@ -827,6 +850,231 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
+ }
+ EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
+
++/*
++ * Scan the data range passed to us for dirty page cache folios. If we find a
++ * dirty folio, punch out the preceeding range and update the offset from which
++ * the next punch will start from.
++ *
++ * We can punch out storage reservations under clean pages because they either
++ * contain data that has been written back - in which case the delalloc punch
++ * over that range is a no-op - or they have been read faults in which case they
++ * contain zeroes and we can remove the delalloc backing range and any new
++ * writes to those pages will do the normal hole filling operation...
++ *
++ * This makes the logic simple: we only need to keep the delalloc extents only
++ * over the dirty ranges of the page cache.
++ *
++ * This function uses [start_byte, end_byte) intervals (i.e. open ended) to
++ * simplify range iterations.
++ */
++static int iomap_write_delalloc_scan(struct inode *inode,
++ loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte,
++ int (*punch)(struct inode *inode, loff_t offset, loff_t length))
++{
++ while (start_byte < end_byte) {
++ struct folio *folio;
++
++ /* grab locked page */
++ folio = filemap_lock_folio(inode->i_mapping,
++ start_byte >> PAGE_SHIFT);
++ if (!folio) {
++ start_byte = ALIGN_DOWN(start_byte, PAGE_SIZE) +
++ PAGE_SIZE;
++ continue;
++ }
++
++ /* if dirty, punch up to offset */
++ if (folio_test_dirty(folio)) {
++ if (start_byte > *punch_start_byte) {
++ int error;
++
++ error = punch(inode, *punch_start_byte,
++ start_byte - *punch_start_byte);
++ if (error) {
++ folio_unlock(folio);
++ folio_put(folio);
++ return error;
++ }
++ }
++
++ /*
++ * Make sure the next punch start is correctly bound to
++ * the end of this data range, not the end of the folio.
++ */
++ *punch_start_byte = min_t(loff_t, end_byte,
++ folio_next_index(folio) << PAGE_SHIFT);
++ }
++
++ /* move offset to start of next folio in range */
++ start_byte = folio_next_index(folio) << PAGE_SHIFT;
++ folio_unlock(folio);
++ folio_put(folio);
++ }
++ return 0;
++}
++
++/*
++ * Punch out all the delalloc blocks in the range given except for those that
++ * have dirty data still pending in the page cache - those are going to be
++ * written and so must still retain the delalloc backing for writeback.
++ *
++ * As we are scanning the page cache for data, we don't need to reimplement the
++ * wheel - mapping_seek_hole_data() does exactly what we need to identify the
++ * start and end of data ranges correctly even for sub-folio block sizes. This
++ * byte range based iteration is especially convenient because it means we
++ * don't have to care about variable size folios, nor where the start or end of
++ * the data range lies within a folio, if they lie within the same folio or even
++ * if there are multiple discontiguous data ranges within the folio.
++ *
++ * It should be noted that mapping_seek_hole_data() is not aware of EOF, and so
++ * can return data ranges that exist in the cache beyond EOF. e.g. a page fault
++ * spanning EOF will initialise the post-EOF data to zeroes and mark it up to
++ * date. A write page fault can then mark it dirty. If we then fail a write()
++ * beyond EOF into that up to date cached range, we allocate a delalloc block
++ * beyond EOF and then have to punch it out. Because the range is up to date,
++ * mapping_seek_hole_data() will return it, and we will skip the punch because
++ * the folio is dirty. THis is incorrect - we always need to punch out delalloc
++ * beyond EOF in this case as writeback will never write back and covert that
++ * delalloc block beyond EOF. Hence we limit the cached data scan range to EOF,
++ * resulting in always punching out the range from the EOF to the end of the
++ * range the iomap spans.
++ *
++ * Intervals are of the form [start_byte, end_byte) (i.e. open ended) because it
++ * matches the intervals returned by mapping_seek_hole_data(). i.e. SEEK_DATA
++ * returns the start of a data range (start_byte), and SEEK_HOLE(start_byte)
++ * returns the end of the data range (data_end). Using closed intervals would
++ * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose
++ * the code to subtle off-by-one bugs....
++ */
++static int iomap_write_delalloc_release(struct inode *inode,
++ loff_t start_byte, loff_t end_byte,
++ int (*punch)(struct inode *inode, loff_t pos, loff_t length))
++{
++ loff_t punch_start_byte = start_byte;
++ loff_t scan_end_byte = min(i_size_read(inode), end_byte);
++ int error = 0;
++
++ /*
++ * Lock the mapping to avoid races with page faults re-instantiating
++ * folios and dirtying them via ->page_mkwrite whilst we walk the
++ * cache and perform delalloc extent removal. Failing to do this can
++ * leave dirty pages with no space reservation in the cache.
++ */
++ filemap_invalidate_lock(inode->i_mapping);
++ while (start_byte < scan_end_byte) {
++ loff_t data_end;
++
++ start_byte = mapping_seek_hole_data(inode->i_mapping,
++ start_byte, scan_end_byte, SEEK_DATA);
++ /*
++ * If there is no more data to scan, all that is left is to
++ * punch out the remaining range.
++ */
++ if (start_byte == -ENXIO || start_byte == scan_end_byte)
++ break;
++ if (start_byte < 0) {
++ error = start_byte;
++ goto out_unlock;
++ }
++ WARN_ON_ONCE(start_byte < punch_start_byte);
++ WARN_ON_ONCE(start_byte > scan_end_byte);
++
++ /*
++ * We find the end of this contiguous cached data range by
++ * seeking from start_byte to the beginning of the next hole.
++ */
++ data_end = mapping_seek_hole_data(inode->i_mapping, start_byte,
++ scan_end_byte, SEEK_HOLE);
++ if (data_end < 0) {
++ error = data_end;
++ goto out_unlock;
++ }
++ WARN_ON_ONCE(data_end <= start_byte);
++ WARN_ON_ONCE(data_end > scan_end_byte);
++
++ error = iomap_write_delalloc_scan(inode, &punch_start_byte,
++ start_byte, data_end, punch);
++ if (error)
++ goto out_unlock;
++
++ /* The next data search starts at the end of this one. */
++ start_byte = data_end;
++ }
++
++ if (punch_start_byte < end_byte)
++ error = punch(inode, punch_start_byte,
++ end_byte - punch_start_byte);
++out_unlock:
++ filemap_invalidate_unlock(inode->i_mapping);
++ return error;
++}
++
++/*
++ * When a short write occurs, the filesystem may need to remove reserved space
++ * that was allocated in ->iomap_begin from it's ->iomap_end method. For
++ * filesystems that use delayed allocation, we need to punch out delalloc
++ * extents from the range that are not dirty in the page cache. As the write can
++ * race with page faults, there can be dirty pages over the delalloc extent
++ * outside the range of a short write but still within the delalloc extent
++ * allocated for this iomap.
++ *
++ * This function uses [start_byte, end_byte) intervals (i.e. open ended) to
++ * simplify range iterations.
++ *
++ * The punch() callback *must* only punch delalloc extents in the range passed
++ * to it. It must skip over all other types of extents in the range and leave
++ * them completely unchanged. It must do this punch atomically with respect to
++ * other extent modifications.
++ *
++ * The punch() callback may be called with a folio locked to prevent writeback
++ * extent allocation racing at the edge of the range we are currently punching.
++ * The locked folio may or may not cover the range being punched, so it is not
++ * safe for the punch() callback to lock folios itself.
++ *
++ * Lock order is:
++ *
++ * inode->i_rwsem (shared or exclusive)
++ * inode->i_mapping->invalidate_lock (exclusive)
++ * folio_lock()
++ * ->punch
++ * internal filesystem allocation lock
++ */
++int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
++ struct iomap *iomap, loff_t pos, loff_t length,
++ ssize_t written,
++ int (*punch)(struct inode *inode, loff_t pos, loff_t length))
++{
++ loff_t start_byte;
++ loff_t end_byte;
++ int blocksize = i_blocksize(inode);
++
++ if (iomap->type != IOMAP_DELALLOC)
++ return 0;
++
++ /* If we didn't reserve the blocks, we're not allowed to punch them. */
++ if (!(iomap->flags & IOMAP_F_NEW))
++ return 0;
++
++ /*
++ * start_byte refers to the first unused block after a short write. If
++ * nothing was written, round offset down to point at the first block in
++ * the range.
++ */
++ if (unlikely(!written))
++ start_byte = round_down(pos, blocksize);
++ else
++ start_byte = round_up(pos + written, blocksize);
++ end_byte = round_up(pos + length, blocksize);
++
++ /* Nothing to do if we've written the entire delalloc extent */
++ if (start_byte >= end_byte)
++ return 0;
++
++ return iomap_write_delalloc_release(inode, start_byte, end_byte,
++ punch);
++}
++EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
++
+ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
+ {
+ struct iomap *iomap = &iter->iomap;
+@@ -851,6 +1099,8 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
+ status = iomap_write_begin(iter, pos, bytes, &folio);
+ if (unlikely(status))
+ return status;
++ if (iter->iomap.flags & IOMAP_F_STALE)
++ break;
+
+ status = iomap_write_end(iter, pos, bytes, bytes, folio);
+ if (WARN_ON_ONCE(status == 0))
+@@ -906,6 +1156,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
+ status = iomap_write_begin(iter, pos, bytes, &folio);
+ if (status)
+ return status;
++ if (iter->iomap.flags & IOMAP_F_STALE)
++ break;
+
+ offset = offset_in_folio(folio, pos);
+ if (bytes > folio_size(folio) - offset)
+diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c
+index a1c7592d2aded..79a0614eaab77 100644
+--- a/fs/iomap/iter.c
++++ b/fs/iomap/iter.c
+@@ -7,12 +7,28 @@
+ #include <linux/iomap.h>
+ #include "trace.h"
+
++/*
++ * Advance to the next range we need to map.
++ *
++ * If the iomap is marked IOMAP_F_STALE, it means the existing map was not fully
++ * processed - it was aborted because the extent the iomap spanned may have been
++ * changed during the operation. In this case, the iteration behaviour is to
++ * remap the unprocessed range of the iter, and that means we may need to remap
++ * even when we've made no progress (i.e. iter->processed = 0). Hence the
++ * "finished iterating" case needs to distinguish between
++ * (processed = 0) meaning we are done and (processed = 0 && stale) meaning we
++ * need to remap the entire remaining range.
++ */
+ static inline int iomap_iter_advance(struct iomap_iter *iter)
+ {
++ bool stale = iter->iomap.flags & IOMAP_F_STALE;
++
+ /* handle the previous iteration (if any) */
+ if (iter->iomap.length) {
+- if (iter->processed <= 0)
++ if (iter->processed < 0)
+ return iter->processed;
++ if (!iter->processed && !stale)
++ return 0;
+ if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
+ return -EIO;
+ iter->pos += iter->processed;
+@@ -33,6 +49,7 @@ static inline void iomap_iter_done(struct iomap_iter *iter)
+ WARN_ON_ONCE(iter->iomap.offset > iter->pos);
+ WARN_ON_ONCE(iter->iomap.length == 0);
+ WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos);
++ WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_STALE);
+
+ trace_iomap_iter_dstmap(iter->inode, &iter->iomap);
+ if (iter->srcmap.type != IOMAP_HOLE)
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index 456af7d230cf1..46a0a2d6962e1 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -80,9 +80,6 @@ nfs4_callback_svc(void *vrqstp)
+ set_freezable();
+
+ while (!kthread_freezable_should_stop(NULL)) {
+-
+- if (signal_pending(current))
+- flush_signals(current);
+ /*
+ * Listen for a request on the socket
+ */
+@@ -112,11 +109,7 @@ nfs41_callback_svc(void *vrqstp)
+ set_freezable();
+
+ while (!kthread_freezable_should_stop(NULL)) {
+-
+- if (signal_pending(current))
+- flush_signals(current);
+-
+- prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
++ prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE);
+ spin_lock_bh(&serv->sv_cb_lock);
+ if (!list_empty(&serv->sv_cb_list)) {
+ req = list_first_entry(&serv->sv_cb_list,
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index ba53cd89ec62c..b6d768bd5ccca 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1313,12 +1313,11 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
+ /* found a match */
+ if (ni->nsui_busy) {
+ /* wait - and try again */
+- prepare_to_wait(&nn->nfsd_ssc_waitq, &wait,
+- TASK_INTERRUPTIBLE);
++ prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE);
+ spin_unlock(&nn->nfsd_ssc_lock);
+
+ /* allow 20secs for mount/unmount for now - revisit */
+- if (signal_pending(current) ||
++ if (kthread_should_stop() ||
+ (schedule_timeout(20*HZ) == 0)) {
+ finish_wait(&nn->nfsd_ssc_waitq, &wait);
+ kfree(work);
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 0c75636054a54..a8190caf77f17 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -952,15 +952,6 @@ nfsd(void *vrqstp)
+
+ current->fs->umask = 0;
+
+- /*
+- * thread is spawned with all signals set to SIG_IGN, re-enable
+- * the ones that will bring down the thread
+- */
+- allow_signal(SIGKILL);
+- allow_signal(SIGHUP);
+- allow_signal(SIGINT);
+- allow_signal(SIGQUIT);
+-
+ atomic_inc(&nfsdstats.th_cnt);
+
+ set_freezable();
+@@ -985,9 +976,6 @@ nfsd(void *vrqstp)
+ validate_process_creds();
+ }
+
+- /* Clear signals before calling svc_exit_thread() */
+- flush_signals(current);
+-
+ atomic_dec(&nfsdstats.th_cnt);
+
+ out:
+diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
+index 49d0d4ea63fcd..0d56a8d862e80 100644
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -4058,7 +4058,7 @@ xfs_bmap_alloc_userdata(
+ * the busy list.
+ */
+ bma->datatype = XFS_ALLOC_NOBUSY;
+- if (whichfork == XFS_DATA_FORK) {
++ if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
+ bma->datatype |= XFS_ALLOC_USERDATA;
+ if (bma->offset == 0)
+ bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
+@@ -4551,7 +4551,8 @@ xfs_bmapi_convert_delalloc(
+ * the extent. Just return the real extent at this offset.
+ */
+ if (!isnullstartblock(bma.got.br_startblock)) {
+- xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
++ xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
++ xfs_iomap_inode_sequence(ip, flags));
+ *seq = READ_ONCE(ifp->if_seq);
+ goto out_trans_cancel;
+ }
+@@ -4599,7 +4600,8 @@ xfs_bmapi_convert_delalloc(
+ XFS_STATS_INC(mp, xs_xstrat_quick);
+
+ ASSERT(!isnullstartblock(bma.got.br_startblock));
+- xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
++ xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
++ xfs_iomap_inode_sequence(ip, flags));
+ *seq = READ_ONCE(ifp->if_seq);
+
+ if (whichfork == XFS_COW_FORK)
+diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
+index 5362908164b0b..580ccbd5aadc2 100644
+--- a/fs/xfs/libxfs/xfs_errortag.h
++++ b/fs/xfs/libxfs/xfs_errortag.h
+@@ -40,13 +40,12 @@
+ #define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25
+ #define XFS_ERRTAG_BMAP_FINISH_ONE 26
+ #define XFS_ERRTAG_AG_RESV_CRITICAL 27
++
+ /*
+- * DEBUG mode instrumentation to test and/or trigger delayed allocation
+- * block killing in the event of failed writes. When enabled, all
+- * buffered writes are silenty dropped and handled as if they failed.
+- * All delalloc blocks in the range of the write (including pre-existing
+- * delalloc blocks!) are tossed as part of the write failure error
+- * handling sequence.
++ * Drop-writes support removed because write error handling cannot trash
++ * pre-existing delalloc extents in any useful way anymore. We retain the
++ * definition so that we can reject it as an invalid value in
++ * xfs_errortag_valid().
+ */
+ #define XFS_ERRTAG_DROP_WRITES 28
+ #define XFS_ERRTAG_LOG_BAD_CRC 29
+@@ -95,7 +94,6 @@
+ #define XFS_RANDOM_REFCOUNT_FINISH_ONE 1
+ #define XFS_RANDOM_BMAP_FINISH_ONE 1
+ #define XFS_RANDOM_AG_RESV_CRITICAL 4
+-#define XFS_RANDOM_DROP_WRITES 1
+ #define XFS_RANDOM_LOG_BAD_CRC 1
+ #define XFS_RANDOM_LOG_ITEM_PIN 1
+ #define XFS_RANDOM_BUF_LRU_REF 2
+diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
+index 3f34bafe18dd1..6f7ed9288fe40 100644
+--- a/fs/xfs/libxfs/xfs_refcount.c
++++ b/fs/xfs/libxfs/xfs_refcount.c
+@@ -815,11 +815,136 @@ xfs_refcount_find_right_extents(
+ /* Is this extent valid? */
+ static inline bool
+ xfs_refc_valid(
+- struct xfs_refcount_irec *rc)
++ const struct xfs_refcount_irec *rc)
+ {
+ return rc->rc_startblock != NULLAGBLOCK;
+ }
+
++static inline xfs_nlink_t
++xfs_refc_merge_refcount(
++ const struct xfs_refcount_irec *irec,
++ enum xfs_refc_adjust_op adjust)
++{
++ /* Once a record hits MAXREFCOUNT, it is pinned there forever */
++ if (irec->rc_refcount == MAXREFCOUNT)
++ return MAXREFCOUNT;
++ return irec->rc_refcount + adjust;
++}
++
++static inline bool
++xfs_refc_want_merge_center(
++ const struct xfs_refcount_irec *left,
++ const struct xfs_refcount_irec *cleft,
++ const struct xfs_refcount_irec *cright,
++ const struct xfs_refcount_irec *right,
++ bool cleft_is_cright,
++ enum xfs_refc_adjust_op adjust,
++ unsigned long long *ulenp)
++{
++ unsigned long long ulen = left->rc_blockcount;
++ xfs_nlink_t new_refcount;
++
++ /*
++ * To merge with a center record, both shoulder records must be
++ * adjacent to the record we want to adjust. This is only true if
++ * find_left and find_right made all four records valid.
++ */
++ if (!xfs_refc_valid(left) || !xfs_refc_valid(right) ||
++ !xfs_refc_valid(cleft) || !xfs_refc_valid(cright))
++ return false;
++
++ /* There must only be one record for the entire range. */
++ if (!cleft_is_cright)
++ return false;
++
++ /* The shoulder record refcounts must match the new refcount. */
++ new_refcount = xfs_refc_merge_refcount(cleft, adjust);
++ if (left->rc_refcount != new_refcount)
++ return false;
++ if (right->rc_refcount != new_refcount)
++ return false;
++
++ /*
++ * The new record cannot exceed the max length. ulen is a ULL as the
++ * individual record block counts can be up to (u32 - 1) in length
++ * hence we need to catch u32 addition overflows here.
++ */
++ ulen += cleft->rc_blockcount + right->rc_blockcount;
++ if (ulen >= MAXREFCEXTLEN)
++ return false;
++
++ *ulenp = ulen;
++ return true;
++}
++
++static inline bool
++xfs_refc_want_merge_left(
++ const struct xfs_refcount_irec *left,
++ const struct xfs_refcount_irec *cleft,
++ enum xfs_refc_adjust_op adjust)
++{
++ unsigned long long ulen = left->rc_blockcount;
++ xfs_nlink_t new_refcount;
++
++ /*
++ * For a left merge, the left shoulder record must be adjacent to the
++ * start of the range. If this is true, find_left made left and cleft
++ * contain valid contents.
++ */
++ if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft))
++ return false;
++
++ /* Left shoulder record refcount must match the new refcount. */
++ new_refcount = xfs_refc_merge_refcount(cleft, adjust);
++ if (left->rc_refcount != new_refcount)
++ return false;
++
++ /*
++ * The new record cannot exceed the max length. ulen is a ULL as the
++ * individual record block counts can be up to (u32 - 1) in length
++ * hence we need to catch u32 addition overflows here.
++ */
++ ulen += cleft->rc_blockcount;
++ if (ulen >= MAXREFCEXTLEN)
++ return false;
++
++ return true;
++}
++
++static inline bool
++xfs_refc_want_merge_right(
++ const struct xfs_refcount_irec *cright,
++ const struct xfs_refcount_irec *right,
++ enum xfs_refc_adjust_op adjust)
++{
++ unsigned long long ulen = right->rc_blockcount;
++ xfs_nlink_t new_refcount;
++
++ /*
++ * For a right merge, the right shoulder record must be adjacent to the
++ * end of the range. If this is true, find_right made cright and right
++ * contain valid contents.
++ */
++ if (!xfs_refc_valid(right) || !xfs_refc_valid(cright))
++ return false;
++
++ /* Right shoulder record refcount must match the new refcount. */
++ new_refcount = xfs_refc_merge_refcount(cright, adjust);
++ if (right->rc_refcount != new_refcount)
++ return false;
++
++ /*
++ * The new record cannot exceed the max length. ulen is a ULL as the
++ * individual record block counts can be up to (u32 - 1) in length
++ * hence we need to catch u32 addition overflows here.
++ */
++ ulen += cright->rc_blockcount;
++ if (ulen >= MAXREFCEXTLEN)
++ return false;
++
++ return true;
++}
++
+ /*
+ * Try to merge with any extents on the boundaries of the adjustment range.
+ */
+@@ -861,23 +986,15 @@ xfs_refcount_merge_extents(
+ (cleft.rc_blockcount == cright.rc_blockcount);
+
+ /* Try to merge left, cleft, and right. cleft must == cright. */
+- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount +
+- right.rc_blockcount;
+- if (xfs_refc_valid(&left) && xfs_refc_valid(&right) &&
+- xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal &&
+- left.rc_refcount == cleft.rc_refcount + adjust &&
+- right.rc_refcount == cleft.rc_refcount + adjust &&
+- ulen < MAXREFCEXTLEN) {
++ if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal,
++ adjust, &ulen)) {
+ *shape_changed = true;
+ return xfs_refcount_merge_center_extents(cur, &left, &cleft,
+ &right, ulen, aglen);
+ }
+
+ /* Try to merge left and cleft. */
+- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount;
+- if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) &&
+- left.rc_refcount == cleft.rc_refcount + adjust &&
+- ulen < MAXREFCEXTLEN) {
++ if (xfs_refc_want_merge_left(&left, &cleft, adjust)) {
+ *shape_changed = true;
+ error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
+ agbno, aglen);
+@@ -893,10 +1010,7 @@ xfs_refcount_merge_extents(
+ }
+
+ /* Try to merge cright and right. */
+- ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount;
+- if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) &&
+- right.rc_refcount == cright.rc_refcount + adjust &&
+- ulen < MAXREFCEXTLEN) {
++ if (xfs_refc_want_merge_right(&cright, &right, adjust)) {
+ *shape_changed = true;
+ return xfs_refcount_merge_right_extent(cur, &right, &cright,
+ aglen);
+diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
+index b6a584e044be0..bf2cca78304eb 100644
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -266,7 +266,8 @@ xfs_validate_sb_write(
+ return -EFSCORRUPTED;
+ }
+
+- if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
++ if (!xfs_is_readonly(mp) &&
++ xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
+ xfs_alert(mp,
+ "Corruption detected in superblock read-only compatible features (0x%x)!",
+ (sbp->sb_features_ro_compat &
+@@ -973,7 +974,9 @@ xfs_log_sb(
+ */
+ if (xfs_has_lazysbcount(mp)) {
+ mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
+- mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
++ mp->m_sb.sb_ifree = min_t(uint64_t,
++ percpu_counter_sum(&mp->m_ifree),
++ mp->m_sb.sb_icount);
+ mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+ }
+
+diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
+index 5d1a995b15f83..21c241e96d483 100644
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -114,9 +114,8 @@ xfs_end_ioend(
+ if (unlikely(error)) {
+ if (ioend->io_flags & IOMAP_F_SHARED) {
+ xfs_reflink_cancel_cow_range(ip, offset, size, true);
+- xfs_bmap_punch_delalloc_range(ip,
+- XFS_B_TO_FSBT(mp, offset),
+- XFS_B_TO_FSB(mp, size));
++ xfs_bmap_punch_delalloc_range(ip, offset,
++ offset + size);
+ }
+ goto done;
+ }
+@@ -373,7 +372,7 @@ xfs_map_blocks(
+ isnullstartblock(imap.br_startblock))
+ goto allocate_blocks;
+
+- xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0);
++ xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq);
+ trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
+ return 0;
+ allocate_blocks:
+@@ -440,27 +439,25 @@ xfs_prepare_ioend(
+ }
+
+ /*
+- * If the page has delalloc blocks on it, we need to punch them out before we
+- * invalidate the page. If we don't, we leave a stale delalloc mapping on the
+- * inode that can trip up a later direct I/O read operation on the same region.
++ * If the folio has delalloc blocks on it, the caller is asking us to punch them
++ * out. If we don't, we can leave a stale delalloc mapping covered by a clean
++ * page that needs to be dirtied again before the delalloc mapping can be
++ * converted. This stale delalloc mapping can trip up a later direct I/O read
++ * operation on the same region.
+ *
+- * We prevent this by truncating away the delalloc regions on the page. Because
++ * We prevent this by truncating away the delalloc regions on the folio. Because
+ * they are delalloc, we can do this without needing a transaction. Indeed - if
+ * we get ENOSPC errors, we have to be able to do this truncation without a
+- * transaction as there is no space left for block reservation (typically why we
+- * see a ENOSPC in writeback).
++ * transaction as there is no space left for block reservation (typically why
++ * we see a ENOSPC in writeback).
+ */
+ static void
+ xfs_discard_folio(
+ struct folio *folio,
+ loff_t pos)
+ {
+- struct inode *inode = folio->mapping->host;
+- struct xfs_inode *ip = XFS_I(inode);
++ struct xfs_inode *ip = XFS_I(folio->mapping->host);
+ struct xfs_mount *mp = ip->i_mount;
+- size_t offset = offset_in_folio(folio, pos);
+- xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, pos);
+- xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, offset);
+ int error;
+
+ if (xfs_is_shutdown(mp))
+@@ -470,8 +467,14 @@ xfs_discard_folio(
+ "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.",
+ folio, ip->i_ino, pos);
+
+- error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+- i_blocks_per_folio(inode, folio) - pageoff_fsb);
++ /*
++ * The end of the punch range is always the offset of the the first
++ * byte of the next folio. Hence the end offset is only dependent on the
++ * folio itself and not the start offset that is passed in.
++ */
++ error = xfs_bmap_punch_delalloc_range(ip, pos,
++ folio_pos(folio) + folio_size(folio));
++
+ if (error && !xfs_is_shutdown(mp))
+ xfs_alert(mp, "page discard unable to remove delalloc mapping.");
+ }
+diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
+index 04d0c2bff67c4..867645b74d889 100644
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -590,11 +590,13 @@ xfs_getbmap(
+ int
+ xfs_bmap_punch_delalloc_range(
+ struct xfs_inode *ip,
+- xfs_fileoff_t start_fsb,
+- xfs_fileoff_t length)
++ xfs_off_t start_byte,
++ xfs_off_t end_byte)
+ {
++ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp = &ip->i_df;
+- xfs_fileoff_t end_fsb = start_fsb + length;
++ xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
++ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
+ struct xfs_bmbt_irec got, del;
+ struct xfs_iext_cursor icur;
+ int error = 0;
+@@ -607,7 +609,7 @@ xfs_bmap_punch_delalloc_range(
+
+ while (got.br_startoff + got.br_blockcount > start_fsb) {
+ del = got;
+- xfs_trim_extent(&del, start_fsb, length);
++ xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb);
+
+ /*
+ * A delete can push the cursor forward. Step back to the
+diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
+index 24b37d211f1dc..6888078f5c31e 100644
+--- a/fs/xfs/xfs_bmap_util.h
++++ b/fs/xfs/xfs_bmap_util.h
+@@ -31,7 +31,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
+ #endif /* CONFIG_XFS_RT */
+
+ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
+- xfs_fileoff_t start_fsb, xfs_fileoff_t length);
++ xfs_off_t start_byte, xfs_off_t end_byte);
+
+ struct kgetbmap {
+ __s64 bmv_offset; /* file offset of segment in blocks */
+diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
+index dde346450952a..54c774af6e1c6 100644
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -1945,6 +1945,7 @@ xfs_free_buftarg(
+ list_lru_destroy(&btp->bt_lru);
+
+ blkdev_issue_flush(btp->bt_bdev);
++ invalidate_bdev(btp->bt_bdev);
+ fs_put_dax(btp->bt_daxdev, btp->bt_mount);
+
+ kmem_free(btp);
+diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
+index 522d450a94b18..df7322ed73fa9 100644
+--- a/fs/xfs/xfs_buf_item.c
++++ b/fs/xfs/xfs_buf_item.c
+@@ -1018,6 +1018,8 @@ xfs_buf_item_relse(
+ trace_xfs_buf_item_relse(bp, _RET_IP_);
+ ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
+
++ if (atomic_read(&bip->bli_refcount))
++ return;
+ bp->b_log_item = NULL;
+ xfs_buf_rele(bp);
+ xfs_buf_item_free(bip);
+diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
+index c6b2aabd6f187..dea3c0649d2f7 100644
+--- a/fs/xfs/xfs_error.c
++++ b/fs/xfs/xfs_error.c
+@@ -46,7 +46,7 @@ static unsigned int xfs_errortag_random_default[] = {
+ XFS_RANDOM_REFCOUNT_FINISH_ONE,
+ XFS_RANDOM_BMAP_FINISH_ONE,
+ XFS_RANDOM_AG_RESV_CRITICAL,
+- XFS_RANDOM_DROP_WRITES,
++ 0, /* XFS_RANDOM_DROP_WRITES has been removed */
+ XFS_RANDOM_LOG_BAD_CRC,
+ XFS_RANDOM_LOG_ITEM_PIN,
+ XFS_RANDOM_BUF_LRU_REF,
+@@ -162,7 +162,6 @@ XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDA
+ XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE);
+ XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE);
+ XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL);
+-XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES);
+ XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC);
+ XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN);
+ XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF);
+@@ -206,7 +205,6 @@ static struct attribute *xfs_errortag_attrs[] = {
+ XFS_ERRORTAG_ATTR_LIST(refcount_finish_one),
+ XFS_ERRORTAG_ATTR_LIST(bmap_finish_one),
+ XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
+- XFS_ERRORTAG_ATTR_LIST(drop_writes),
+ XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
+ XFS_ERRORTAG_ATTR_LIST(log_item_pin),
+ XFS_ERRORTAG_ATTR_LIST(buf_lru_ref),
+@@ -256,6 +254,19 @@ xfs_errortag_del(
+ kmem_free(mp->m_errortag);
+ }
+
++static bool
++xfs_errortag_valid(
++ unsigned int error_tag)
++{
++ if (error_tag >= XFS_ERRTAG_MAX)
++ return false;
++
++ /* Error out removed injection types */
++ if (error_tag == XFS_ERRTAG_DROP_WRITES)
++ return false;
++ return true;
++}
++
+ bool
+ xfs_errortag_test(
+ struct xfs_mount *mp,
+@@ -277,7 +288,9 @@ xfs_errortag_test(
+ if (!mp->m_errortag)
+ return false;
+
+- ASSERT(error_tag < XFS_ERRTAG_MAX);
++ if (!xfs_errortag_valid(error_tag))
++ return false;
++
+ randfactor = mp->m_errortag[error_tag];
+ if (!randfactor || prandom_u32_max(randfactor))
+ return false;
+@@ -293,7 +306,7 @@ xfs_errortag_get(
+ struct xfs_mount *mp,
+ unsigned int error_tag)
+ {
+- if (error_tag >= XFS_ERRTAG_MAX)
++ if (!xfs_errortag_valid(error_tag))
+ return -EINVAL;
+
+ return mp->m_errortag[error_tag];
+@@ -305,7 +318,7 @@ xfs_errortag_set(
+ unsigned int error_tag,
+ unsigned int tag_value)
+ {
+- if (error_tag >= XFS_ERRTAG_MAX)
++ if (!xfs_errortag_valid(error_tag))
+ return -EINVAL;
+
+ mp->m_errortag[error_tag] = tag_value;
+@@ -319,7 +332,7 @@ xfs_errortag_add(
+ {
+ BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX);
+
+- if (error_tag >= XFS_ERRTAG_MAX)
++ if (!xfs_errortag_valid(error_tag))
+ return -EINVAL;
+
+ return xfs_errortag_set(mp, error_tag,
+diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
+index e462d39c840e6..595a5bcf46b94 100644
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -1325,7 +1325,7 @@ __xfs_filemap_fault(
+ if (write_fault) {
+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ ret = iomap_page_mkwrite(vmf,
+- &xfs_buffered_write_iomap_ops);
++ &xfs_page_mkwrite_iomap_ops);
+ xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ } else {
+ ret = filemap_fault(vmf);
+diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
+index 13851c0d640bc..332da0d7b85cf 100644
+--- a/fs/xfs/xfs_fsops.c
++++ b/fs/xfs/xfs_fsops.c
+@@ -129,6 +129,10 @@ xfs_growfs_data_private(
+ if (delta < 0 && nagcount < 2)
+ return -EINVAL;
+
++ /* No work to do */
++ if (delta == 0)
++ return 0;
++
+ oagcount = mp->m_sb.sb_agcount;
+ /* allocate the new per-ag structures */
+ if (nagcount > oagcount) {
+diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
+index d884cba1d7072..dd5a664c294f5 100644
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -342,6 +342,9 @@ xfs_iget_recycle(
+
+ trace_xfs_iget_recycle(ip);
+
++ if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
++ return -EAGAIN;
++
+ /*
+ * We need to make it look like the inode is being reclaimed to prevent
+ * the actual reclaim workers from stomping over us while we recycle
+@@ -355,6 +358,7 @@ xfs_iget_recycle(
+
+ ASSERT(!rwsem_is_locked(&inode->i_rwsem));
+ error = xfs_reinit_inode(mp, inode);
++ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ if (error) {
+ /*
+ * Re-initializing the inode failed, and we are in deep
+@@ -523,6 +527,8 @@ xfs_iget_cache_hit(
+ if (ip->i_flags & XFS_IRECLAIMABLE) {
+ /* Drops i_flags_lock and RCU read lock. */
+ error = xfs_iget_recycle(pag, ip);
++ if (error == -EAGAIN)
++ goto out_skip;
+ if (error)
+ return error;
+ } else {
+diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
+index aa303be11576f..54b707787f907 100644
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -1652,8 +1652,11 @@ xfs_inode_needs_inactive(
+ if (VFS_I(ip)->i_mode == 0)
+ return false;
+
+- /* If this is a read-only mount, don't do this (would generate I/O) */
+- if (xfs_is_readonly(mp))
++ /*
++ * If this is a read-only mount, don't do this (would generate I/O)
++ * unless we're in log recovery and cleaning the iunlinked list.
++ */
++ if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log))
+ return false;
+
+ /* If the log isn't running, push inodes straight to reclaim. */
+@@ -1713,8 +1716,11 @@ xfs_inactive(
+ mp = ip->i_mount;
+ ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
+
+- /* If this is a read-only mount, don't do this (would generate I/O) */
+- if (xfs_is_readonly(mp))
++ /*
++ * If this is a read-only mount, don't do this (would generate I/O)
++ * unless we're in log recovery and cleaning the iunlinked list.
++ */
++ if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log))
+ goto out;
+
+ /* Metadata inodes require explicit resource cleanup. */
+@@ -2479,7 +2485,7 @@ xfs_remove(
+ error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
+ tp->t_mountp->m_sb.sb_rootino, 0);
+ if (error)
+- return error;
++ goto out_trans_cancel;
+ }
+ } else {
+ /*
+diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
+index 1f783e9796296..85fbb3b71d1c6 100644
+--- a/fs/xfs/xfs_ioctl.c
++++ b/fs/xfs/xfs_ioctl.c
+@@ -754,7 +754,7 @@ xfs_bulkstat_fmt(
+ static int
+ xfs_bulk_ireq_setup(
+ struct xfs_mount *mp,
+- struct xfs_bulk_ireq *hdr,
++ const struct xfs_bulk_ireq *hdr,
+ struct xfs_ibulk *breq,
+ void __user *ubuffer)
+ {
+@@ -780,7 +780,7 @@ xfs_bulk_ireq_setup(
+
+ switch (hdr->ino) {
+ case XFS_BULK_IREQ_SPECIAL_ROOT:
+- hdr->ino = mp->m_sb.sb_rootino;
++ breq->startino = mp->m_sb.sb_rootino;
+ break;
+ default:
+ return -EINVAL;
+diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
+index 07da03976ec12..ab5512c0bcf7a 100644
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -48,13 +48,45 @@ xfs_alert_fsblock_zero(
+ return -EFSCORRUPTED;
+ }
+
++u64
++xfs_iomap_inode_sequence(
++ struct xfs_inode *ip,
++ u16 iomap_flags)
++{
++ u64 cookie = 0;
++
++ if (iomap_flags & IOMAP_F_XATTR)
++ return READ_ONCE(ip->i_af.if_seq);
++ if ((iomap_flags & IOMAP_F_SHARED) && ip->i_cowfp)
++ cookie = (u64)READ_ONCE(ip->i_cowfp->if_seq) << 32;
++ return cookie | READ_ONCE(ip->i_df.if_seq);
++}
++
++/*
++ * Check that the iomap passed to us is still valid for the given offset and
++ * length.
++ */
++static bool
++xfs_iomap_valid(
++ struct inode *inode,
++ const struct iomap *iomap)
++{
++ return iomap->validity_cookie ==
++ xfs_iomap_inode_sequence(XFS_I(inode), iomap->flags);
++}
++
++const struct iomap_page_ops xfs_iomap_page_ops = {
++ .iomap_valid = xfs_iomap_valid,
++};
++
+ int
+ xfs_bmbt_to_iomap(
+ struct xfs_inode *ip,
+ struct iomap *iomap,
+ struct xfs_bmbt_irec *imap,
+ unsigned int mapping_flags,
+- u16 iomap_flags)
++ u16 iomap_flags,
++ u64 sequence_cookie)
+ {
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
+@@ -91,6 +123,9 @@ xfs_bmbt_to_iomap(
+ if (xfs_ipincount(ip) &&
+ (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
+ iomap->flags |= IOMAP_F_DIRTY;
++
++ iomap->validity_cookie = sequence_cookie;
++ iomap->page_ops = &xfs_iomap_page_ops;
+ return 0;
+ }
+
+@@ -195,7 +230,8 @@ xfs_iomap_write_direct(
+ xfs_fileoff_t offset_fsb,
+ xfs_fileoff_t count_fsb,
+ unsigned int flags,
+- struct xfs_bmbt_irec *imap)
++ struct xfs_bmbt_irec *imap,
++ u64 *seq)
+ {
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+@@ -285,6 +321,7 @@ xfs_iomap_write_direct(
+ error = xfs_alert_fsblock_zero(ip, imap);
+
+ out_unlock:
++ *seq = xfs_iomap_inode_sequence(ip, 0);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+
+@@ -743,6 +780,7 @@ xfs_direct_write_iomap_begin(
+ bool shared = false;
+ u16 iomap_flags = 0;
+ unsigned int lockmode = XFS_ILOCK_SHARED;
++ u64 seq;
+
+ ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
+
+@@ -811,9 +849,10 @@ xfs_direct_write_iomap_begin(
+ goto out_unlock;
+ }
+
++ seq = xfs_iomap_inode_sequence(ip, iomap_flags);
+ xfs_iunlock(ip, lockmode);
+ trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
+- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags);
++ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq);
+
+ allocate_blocks:
+ error = -EAGAIN;
+@@ -839,24 +878,26 @@ xfs_direct_write_iomap_begin(
+ xfs_iunlock(ip, lockmode);
+
+ error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb,
+- flags, &imap);
++ flags, &imap, &seq);
+ if (error)
+ return error;
+
+ trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
+- iomap_flags | IOMAP_F_NEW);
++ iomap_flags | IOMAP_F_NEW, seq);
+
+ out_found_cow:
+- xfs_iunlock(ip, lockmode);
+ length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
+ trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
+ if (imap.br_startblock != HOLESTARTBLOCK) {
+- error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
++ seq = xfs_iomap_inode_sequence(ip, 0);
++ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
+ if (error)
+- return error;
++ goto out_unlock;
+ }
+- return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED);
++ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
++ xfs_iunlock(ip, lockmode);
++ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq);
+
+ out_unlock:
+ if (lockmode)
+@@ -915,6 +956,7 @@ xfs_buffered_write_iomap_begin(
+ int allocfork = XFS_DATA_FORK;
+ int error = 0;
+ unsigned int lockmode = XFS_ILOCK_EXCL;
++ u64 seq;
+
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+@@ -926,6 +968,10 @@ xfs_buffered_write_iomap_begin(
+
+ ASSERT(!XFS_IS_REALTIME_INODE(ip));
+
++ error = xfs_qm_dqattach(ip);
++ if (error)
++ return error;
++
+ error = xfs_ilock_for_iomap(ip, flags, &lockmode);
+ if (error)
+ return error;
+@@ -1029,10 +1075,6 @@ xfs_buffered_write_iomap_begin(
+ allocfork = XFS_COW_FORK;
+ }
+
+- error = xfs_qm_dqattach_locked(ip, false);
+- if (error)
+- goto out_unlock;
+-
+ if (eof && offset + count > XFS_ISIZE(ip)) {
+ /*
+ * Determine the initial size of the preallocation.
+@@ -1094,32 +1136,47 @@ xfs_buffered_write_iomap_begin(
+ * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+ * them out if the write happens to fail.
+ */
++ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
+- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW);
++ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq);
+
+ found_imap:
++ seq = xfs_iomap_inode_sequence(ip, 0);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
++ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
+
+ found_cow:
+- xfs_iunlock(ip, XFS_ILOCK_EXCL);
++ seq = xfs_iomap_inode_sequence(ip, 0);
+ if (imap.br_startoff <= offset_fsb) {
+- error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
++ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
+ if (error)
+- return error;
++ goto out_unlock;
++ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
++ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
+- IOMAP_F_SHARED);
++ IOMAP_F_SHARED, seq);
+ }
+
+ xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
+- return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0);
++ xfs_iunlock(ip, XFS_ILOCK_EXCL);
++ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq);
+
+ out_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+ }
+
++static int
++xfs_buffered_write_delalloc_punch(
++ struct inode *inode,
++ loff_t offset,
++ loff_t length)
++{
++ return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset,
++ offset + length);
++}
++
+ static int
+ xfs_buffered_write_iomap_end(
+ struct inode *inode,
+@@ -1129,56 +1186,17 @@ xfs_buffered_write_iomap_end(
+ unsigned flags,
+ struct iomap *iomap)
+ {
+- struct xfs_inode *ip = XFS_I(inode);
+- struct xfs_mount *mp = ip->i_mount;
+- xfs_fileoff_t start_fsb;
+- xfs_fileoff_t end_fsb;
+- int error = 0;
+-
+- if (iomap->type != IOMAP_DELALLOC)
+- return 0;
+-
+- /*
+- * Behave as if the write failed if drop writes is enabled. Set the NEW
+- * flag to force delalloc cleanup.
+- */
+- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) {
+- iomap->flags |= IOMAP_F_NEW;
+- written = 0;
+- }
+
+- /*
+- * start_fsb refers to the first unused block after a short write. If
+- * nothing was written, round offset down to point at the first block in
+- * the range.
+- */
+- if (unlikely(!written))
+- start_fsb = XFS_B_TO_FSBT(mp, offset);
+- else
+- start_fsb = XFS_B_TO_FSB(mp, offset + written);
+- end_fsb = XFS_B_TO_FSB(mp, offset + length);
++ struct xfs_mount *mp = XFS_M(inode->i_sb);
++ int error;
+
+- /*
+- * Trim delalloc blocks if they were allocated by this write and we
+- * didn't manage to write the whole range.
+- *
+- * We don't need to care about racing delalloc as we hold i_mutex
+- * across the reserve/allocate/unreserve calls. If there are delalloc
+- * blocks in the range, they are ours.
+- */
+- if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
+- truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
+- XFS_FSB_TO_B(mp, end_fsb) - 1);
+-
+- error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+- end_fsb - start_fsb);
+- if (error && !xfs_is_shutdown(mp)) {
+- xfs_alert(mp, "%s: unable to clean up ino %lld",
+- __func__, ip->i_ino);
+- return error;
+- }
++ error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset,
++ length, written, &xfs_buffered_write_delalloc_punch);
++ if (error && !xfs_is_shutdown(mp)) {
++ xfs_alert(mp, "%s: unable to clean up ino 0x%llx",
++ __func__, XFS_I(inode)->i_ino);
++ return error;
+ }
+-
+ return 0;
+ }
+
+@@ -1187,6 +1205,15 @@ const struct iomap_ops xfs_buffered_write_iomap_ops = {
+ .iomap_end = xfs_buffered_write_iomap_end,
+ };
+
++/*
++ * iomap_page_mkwrite() will never fail in a way that requires delalloc extents
++ * that it allocated to be revoked. Hence we do not need an .iomap_end method
++ * for this operation.
++ */
++const struct iomap_ops xfs_page_mkwrite_iomap_ops = {
++ .iomap_begin = xfs_buffered_write_iomap_begin,
++};
++
+ static int
+ xfs_read_iomap_begin(
+ struct inode *inode,
+@@ -1204,6 +1231,7 @@ xfs_read_iomap_begin(
+ int nimaps = 1, error = 0;
+ bool shared = false;
+ unsigned int lockmode = XFS_ILOCK_SHARED;
++ u64 seq;
+
+ ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO)));
+
+@@ -1217,13 +1245,14 @@ xfs_read_iomap_begin(
+ &nimaps, 0);
+ if (!error && (flags & IOMAP_REPORT))
+ error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
++ seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0);
+ xfs_iunlock(ip, lockmode);
+
+ if (error)
+ return error;
+ trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
+- shared ? IOMAP_F_SHARED : 0);
++ shared ? IOMAP_F_SHARED : 0, seq);
+ }
+
+ const struct iomap_ops xfs_read_iomap_ops = {
+@@ -1248,6 +1277,7 @@ xfs_seek_iomap_begin(
+ struct xfs_bmbt_irec imap, cmap;
+ int error = 0;
+ unsigned lockmode;
++ u64 seq;
+
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+@@ -1282,8 +1312,9 @@ xfs_seek_iomap_begin(
+ if (data_fsb < cow_fsb + cmap.br_blockcount)
+ end_fsb = min(end_fsb, data_fsb);
+ xfs_trim_extent(&cmap, offset_fsb, end_fsb);
++ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
+ error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
+- IOMAP_F_SHARED);
++ IOMAP_F_SHARED, seq);
+ /*
+ * This is a COW extent, so we must probe the page cache
+ * because there could be dirty page cache being backed
+@@ -1304,8 +1335,9 @@ xfs_seek_iomap_begin(
+ imap.br_startblock = HOLESTARTBLOCK;
+ imap.br_state = XFS_EXT_NORM;
+ done:
++ seq = xfs_iomap_inode_sequence(ip, 0);
+ xfs_trim_extent(&imap, offset_fsb, end_fsb);
+- error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
++ error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
+ out_unlock:
+ xfs_iunlock(ip, lockmode);
+ return error;
+@@ -1331,6 +1363,7 @@ xfs_xattr_iomap_begin(
+ struct xfs_bmbt_irec imap;
+ int nimaps = 1, error = 0;
+ unsigned lockmode;
++ int seq;
+
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+@@ -1347,12 +1380,14 @@ xfs_xattr_iomap_begin(
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+ &nimaps, XFS_BMAPI_ATTRFORK);
+ out_unlock:
++
++ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_XATTR);
+ xfs_iunlock(ip, lockmode);
+
+ if (error)
+ return error;
+ ASSERT(nimaps);
+- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
++ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_XATTR, seq);
+ }
+
+ const struct iomap_ops xfs_xattr_iomap_ops = {
+diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
+index c782e8c0479c0..4da13440bae9b 100644
+--- a/fs/xfs/xfs_iomap.h
++++ b/fs/xfs/xfs_iomap.h
+@@ -13,14 +13,15 @@ struct xfs_bmbt_irec;
+
+ int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb,
+ xfs_fileoff_t count_fsb, unsigned int flags,
+- struct xfs_bmbt_irec *imap);
++ struct xfs_bmbt_irec *imap, u64 *sequence);
+ int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
+ xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip,
+ xfs_fileoff_t end_fsb);
+
++u64 xfs_iomap_inode_sequence(struct xfs_inode *ip, u16 iomap_flags);
+ int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap,
+ struct xfs_bmbt_irec *imap, unsigned int mapping_flags,
+- u16 iomap_flags);
++ u16 iomap_flags, u64 sequence_cookie);
+
+ int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len,
+ bool *did_zero);
+@@ -47,6 +48,7 @@ xfs_aligned_fsb_count(
+ }
+
+ extern const struct iomap_ops xfs_buffered_write_iomap_ops;
++extern const struct iomap_ops xfs_page_mkwrite_iomap_ops;
+ extern const struct iomap_ops xfs_direct_write_iomap_ops;
+ extern const struct iomap_ops xfs_read_iomap_ops;
+ extern const struct iomap_ops xfs_seek_iomap_ops;
+diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
+index f02a0dd522b3d..d9aa5eab02c3f 100644
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -730,15 +730,7 @@ xfs_log_mount(
+ * just worked.
+ */
+ if (!xfs_has_norecovery(mp)) {
+- /*
+- * log recovery ignores readonly state and so we need to clear
+- * mount-based read only state so it can write to disk.
+- */
+- bool readonly = test_and_clear_bit(XFS_OPSTATE_READONLY,
+- &mp->m_opstate);
+ error = xlog_recover(log);
+- if (readonly)
+- set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+ if (error) {
+ xfs_warn(mp, "log mount/recovery failed: error %d",
+ error);
+@@ -787,7 +779,6 @@ xfs_log_mount_finish(
+ struct xfs_mount *mp)
+ {
+ struct xlog *log = mp->m_log;
+- bool readonly;
+ int error = 0;
+
+ if (xfs_has_norecovery(mp)) {
+@@ -795,12 +786,6 @@ xfs_log_mount_finish(
+ return 0;
+ }
+
+- /*
+- * log recovery ignores readonly state and so we need to clear
+- * mount-based read only state so it can write to disk.
+- */
+- readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+-
+ /*
+ * During the second phase of log recovery, we need iget and
+ * iput to behave like they do for an active filesystem.
+@@ -850,8 +835,6 @@ xfs_log_mount_finish(
+ xfs_buftarg_drain(mp->m_ddev_targp);
+
+ clear_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate);
+- if (readonly)
+- set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+
+ /* Make sure the log is dead if we're returning failure. */
+ ASSERT(!error || xlog_is_shutdown(log));
+@@ -886,6 +869,23 @@ xlog_force_iclog(
+ return xlog_state_release_iclog(iclog->ic_log, iclog, NULL);
+ }
+
++/*
++ * Cycle all the iclogbuf locks to make sure all log IO completion
++ * is done before we tear down these buffers.
++ */
++static void
++xlog_wait_iclog_completion(struct xlog *log)
++{
++ int i;
++ struct xlog_in_core *iclog = log->l_iclog;
++
++ for (i = 0; i < log->l_iclog_bufs; i++) {
++ down(&iclog->ic_sema);
++ up(&iclog->ic_sema);
++ iclog = iclog->ic_next;
++ }
++}
++
+ /*
+ * Wait for the iclog and all prior iclogs to be written disk as required by the
+ * log force state machine. Waiting on ic_force_wait ensures iclog completions
+@@ -1111,6 +1111,14 @@ xfs_log_unmount(
+ {
+ xfs_log_clean(mp);
+
++ /*
++ * If shutdown has come from iclog IO context, the log
++ * cleaning will have been skipped and so we need to wait
++ * for the iclog to complete shutdown processing before we
++ * tear anything down.
++ */
++ xlog_wait_iclog_completion(mp->m_log);
++
+ xfs_buftarg_drain(mp->m_ddev_targp);
+
+ xfs_trans_ail_destroy(mp);
+@@ -2113,17 +2121,6 @@ xlog_dealloc_log(
+ xlog_in_core_t *iclog, *next_iclog;
+ int i;
+
+- /*
+- * Cycle all the iclogbuf locks to make sure all log IO completion
+- * is done before we tear down these buffers.
+- */
+- iclog = log->l_iclog;
+- for (i = 0; i < log->l_iclog_bufs; i++) {
+- down(&iclog->ic_sema);
+- up(&iclog->ic_sema);
+- iclog = iclog->ic_next;
+- }
+-
+ /*
+ * Destroy the CIL after waiting for iclog IO completion because an
+ * iclog EIO error will try to shut down the log, which accesses the
+diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
+index e8bb3c2e847e1..fb87ffb48f7fe 100644
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -538,6 +538,20 @@ xfs_check_summary_counts(
+ return 0;
+ }
+
++static void
++xfs_unmount_check(
++ struct xfs_mount *mp)
++{
++ if (xfs_is_shutdown(mp))
++ return;
++
++ if (percpu_counter_sum(&mp->m_ifree) >
++ percpu_counter_sum(&mp->m_icount)) {
++ xfs_alert(mp, "ifree/icount mismatch at unmount");
++ xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
++ }
++}
++
+ /*
+ * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
+ * internal inode structures can be sitting in the CIL and AIL at this point,
+@@ -1077,6 +1091,7 @@ xfs_unmountfs(
+ if (error)
+ xfs_warn(mp, "Unable to free reserved block pool. "
+ "Freespace may not be correct on next mount.");
++ xfs_unmount_check(mp);
+
+ xfs_log_unmount(mp);
+ xfs_da_unmount(mp);
+diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
+index 37a24f0f7cd40..38d23f0e703a8 100644
+--- a/fs/xfs/xfs_pnfs.c
++++ b/fs/xfs/xfs_pnfs.c
+@@ -125,6 +125,7 @@ xfs_fs_map_blocks(
+ int nimaps = 1;
+ uint lock_flags;
+ int error = 0;
++ u64 seq;
+
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+@@ -176,6 +177,7 @@ xfs_fs_map_blocks(
+ lock_flags = xfs_ilock_data_map_shared(ip);
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
+ &imap, &nimaps, bmapi_flags);
++ seq = xfs_iomap_inode_sequence(ip, 0);
+
+ ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK);
+
+@@ -189,7 +191,7 @@ xfs_fs_map_blocks(
+ xfs_iunlock(ip, lock_flags);
+
+ error = xfs_iomap_write_direct(ip, offset_fsb,
+- end_fsb - offset_fsb, 0, &imap);
++ end_fsb - offset_fsb, 0, &imap, &seq);
+ if (error)
+ goto out_unlock;
+
+@@ -209,7 +211,7 @@ xfs_fs_map_blocks(
+ }
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+
+- error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0);
++ error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0, seq);
+ *device_generation = mp->m_generation;
+ return error;
+ out_unlock:
+diff --git a/include/linux/iomap.h b/include/linux/iomap.h
+index 238a03087e17e..0983dfc9a203c 100644
+--- a/include/linux/iomap.h
++++ b/include/linux/iomap.h
+@@ -49,26 +49,35 @@ struct vm_fault;
+ *
+ * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of
+ * buffer heads for this mapping.
++ *
++ * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent
++ * rather than a file data extent.
+ */
+-#define IOMAP_F_NEW 0x01
+-#define IOMAP_F_DIRTY 0x02
+-#define IOMAP_F_SHARED 0x04
+-#define IOMAP_F_MERGED 0x08
+-#define IOMAP_F_BUFFER_HEAD 0x10
+-#define IOMAP_F_ZONE_APPEND 0x20
++#define IOMAP_F_NEW (1U << 0)
++#define IOMAP_F_DIRTY (1U << 1)
++#define IOMAP_F_SHARED (1U << 2)
++#define IOMAP_F_MERGED (1U << 3)
++#define IOMAP_F_BUFFER_HEAD (1U << 4)
++#define IOMAP_F_ZONE_APPEND (1U << 5)
++#define IOMAP_F_XATTR (1U << 6)
+
+ /*
+ * Flags set by the core iomap code during operations:
+ *
+ * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size
+ * has changed as the result of this write operation.
++ *
++ * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file
++ * range it covers needs to be remapped by the high level before the operation
++ * can proceed.
+ */
+-#define IOMAP_F_SIZE_CHANGED 0x100
++#define IOMAP_F_SIZE_CHANGED (1U << 8)
++#define IOMAP_F_STALE (1U << 9)
+
+ /*
+ * Flags from 0x1000 up are for file system specific usage:
+ */
+-#define IOMAP_F_PRIVATE 0x1000
++#define IOMAP_F_PRIVATE (1U << 12)
+
+
+ /*
+@@ -89,6 +98,7 @@ struct iomap {
+ void *inline_data;
+ void *private; /* filesystem private */
+ const struct iomap_page_ops *page_ops;
++ u64 validity_cookie; /* used with .iomap_valid() */
+ };
+
+ static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
+@@ -128,6 +138,23 @@ struct iomap_page_ops {
+ int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
+ void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
+ struct page *page);
++
++ /*
++ * Check that the cached iomap still maps correctly to the filesystem's
++ * internal extent map. FS internal extent maps can change while iomap
++ * is iterating a cached iomap, so this hook allows iomap to detect that
++ * the iomap needs to be refreshed during a long running write
++ * operation.
++ *
++ * The filesystem can store internal state (e.g. a sequence number) in
++ * iomap->validity_cookie when the iomap is first mapped to be able to
++ * detect changes between mapping time and whenever .iomap_valid() is
++ * called.
++ *
++ * This is called with the folio over the specified file position held
++ * locked by the iomap code.
++ */
++ bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
+ };
+
+ /*
+@@ -226,6 +253,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
+
+ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
+ const struct iomap_ops *ops);
++int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
++ struct iomap *iomap, loff_t pos, loff_t length, ssize_t written,
++ int (*punch)(struct inode *inode, loff_t pos, loff_t length));
++
+ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
+ void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
+ bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 8117d0e08d5a2..1393eefbf2187 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -696,8 +696,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
+ /* Made progress, don't sleep yet */
+ continue;
+
+- set_current_state(TASK_INTERRUPTIBLE);
+- if (signalled() || kthread_should_stop()) {
++ set_current_state(TASK_IDLE);
++ if (kthread_should_stop()) {
+ set_current_state(TASK_RUNNING);
+ return -EINTR;
+ }
+@@ -733,7 +733,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
+ return false;
+
+ /* are we shutting down? */
+- if (signalled() || kthread_should_stop())
++ if (kthread_should_stop())
+ return false;
+
+ /* are we freezing? */
+@@ -755,11 +755,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+ if (rqstp->rq_xprt)
+ goto out_found;
+
+- /*
+- * We have to be able to interrupt this wait
+- * to bring down the daemons ...
+- */
+- set_current_state(TASK_INTERRUPTIBLE);
++ set_current_state(TASK_IDLE);
+ smp_mb__before_atomic();
+ clear_bit(SP_CONGESTED, &pool->sp_flags);
+ clear_bit(RQ_BUSY, &rqstp->rq_flags);
+@@ -781,7 +777,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+ if (!time_left)
+ atomic_long_inc(&pool->sp_stats.threads_timedout);
+
+- if (signalled() || kthread_should_stop())
++ if (kthread_should_stop())
+ return ERR_PTR(-EINTR);
+ return ERR_PTR(-EAGAIN);
+ out_found:
+@@ -879,7 +875,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
+ try_to_freeze();
+ cond_resched();
+ err = -EINTR;
+- if (signalled() || kthread_should_stop())
++ if (kthread_should_stop())
+ goto out;
+
+ xprt = svc_get_next_xprt(rqstp, timeout);
+diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
+index bc700f85f80be..ea277c55a38db 100644
+--- a/security/keys/trusted-keys/trusted_tpm2.c
++++ b/security/keys/trusted-keys/trusted_tpm2.c
+@@ -38,6 +38,7 @@ static int tpm2_key_encode(struct trusted_key_payload *payload,
+ u8 *end_work = scratch + SCRATCH_SIZE;
+ u8 *priv, *pub;
+ u16 priv_len, pub_len;
++ int ret;
+
+ priv_len = get_unaligned_be16(src) + 2;
+ priv = src;
+@@ -57,8 +58,10 @@ static int tpm2_key_encode(struct trusted_key_payload *payload,
+ unsigned char bool[3], *w = bool;
+ /* tag 0 is emptyAuth */
+ w = asn1_encode_boolean(w, w + sizeof(bool), true);
+- if (WARN(IS_ERR(w), "BUG: Boolean failed to encode"))
+- return PTR_ERR(w);
++ if (WARN(IS_ERR(w), "BUG: Boolean failed to encode")) {
++ ret = PTR_ERR(w);
++ goto err;
++ }
+ work = asn1_encode_tag(work, end_work, 0, bool, w - bool);
+ }
+
+@@ -69,8 +72,10 @@ static int tpm2_key_encode(struct trusted_key_payload *payload,
+ * trigger, so if it does there's something nefarious going on
+ */
+ if (WARN(work - scratch + pub_len + priv_len + 14 > SCRATCH_SIZE,
+- "BUG: scratch buffer is too small"))
+- return -EINVAL;
++ "BUG: scratch buffer is too small")) {
++ ret = -EINVAL;
++ goto err;
++ }
+
+ work = asn1_encode_integer(work, end_work, options->keyhandle);
+ work = asn1_encode_octet_string(work, end_work, pub, pub_len);
+@@ -79,10 +84,18 @@ static int tpm2_key_encode(struct trusted_key_payload *payload,
+ work1 = payload->blob;
+ work1 = asn1_encode_sequence(work1, work1 + sizeof(payload->blob),
+ scratch, work - scratch);
+- if (WARN(IS_ERR(work1), "BUG: ASN.1 encoder failed"))
+- return PTR_ERR(work1);
++ if (IS_ERR(work1)) {
++ ret = PTR_ERR(work1);
++ pr_err("BUG: ASN.1 encoder failed with %d\n", ret);
++ goto err;
++ }
+
++ kfree(scratch);
+ return work1 - payload->blob;
++
++err:
++ kfree(scratch);
++ return ret;
+ }
+
+ struct tpm2_key_context {