summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomáš Mózes <hydrapolic@gmail.com>2023-10-18 20:30:08 +0200
committerTomáš Mózes <hydrapolic@gmail.com>2023-10-18 20:30:08 +0200
commitffe00bc5becaed2dbaed9fdcadb6eea0bd4f9dd4 (patch)
tree52368f6790930150973266710b54e6142adf91bd
parentXen 4.17.1-pre-patchset-0 (diff)
downloadxen-upstream-patches-ffe00bc5becaed2dbaed9fdcadb6eea0bd4f9dd4.tar.gz
xen-upstream-patches-ffe00bc5becaed2dbaed9fdcadb6eea0bd4f9dd4.tar.bz2
xen-upstream-patches-ffe00bc5becaed2dbaed9fdcadb6eea0bd4f9dd4.zip
Xen 4.17.3-pre-patchset-04.17.3-pre-patchset-0
Signed-off-by: Tomáš Mózes <hydrapolic@gmail.com>
-rw-r--r--0001-update-Xen-version-to-4.17.1-pre.patch136
-rw-r--r--0001-update-Xen-version-to-4.17.3-pre.patch25
-rw-r--r--0002-x86-fix-build-with-old-gcc-after-CPU-policy-changes.patch84
-rw-r--r--0002-x86-irq-do-not-release-irq-until-all-cleanup-is-done.patch90
-rw-r--r--0003-libxl-Use-XEN_LIB_DIR-to-store-bootloader-from-pygru.patch45
-rw-r--r--0003-x86-pvh-do-not-forward-MADT-Local-APIC-NMI-structure.patch103
-rw-r--r--0004-build-define-ARCH-and-SRCARCH-later.patch67
-rw-r--r--0004-x86-HVM-don-t-mark-external-IRQs-as-pending-when-vLA.patch71
-rw-r--r--0005-build-remove-TARGET_SUBARCH-a-duplicate-of-ARCH.patch50
-rw-r--r--0005-x86-Viridian-don-t-mark-IRQ-vectors-as-pending-when-.patch60
-rw-r--r--0006-build-remove-TARGET_ARCH-a-duplicate-of-SRCARCH.patch123
-rw-r--r--0006-x86-HVM-don-t-mark-evtchn-upcall-vector-as-pending-w.patch70
-rw-r--r--0007-build-evaluate-XEN_BUILD_-and-XEN_DOMAIN-immediately.patch58
-rw-r--r--0007-ioreq_broadcast-accept-partial-broadcast-success.patch34
-rw-r--r--0008-Config.mk-evaluate-XEN_COMPILE_ARCH-and-XEN_OS-immed.patch50
-rw-r--r--0008-EFI-relocate-the-ESRT-when-booting-via-multiboot2.patch195
-rw-r--r--0009-x86-time-prevent-overflow-with-high-frequency-TSCs.patch34
-rw-r--r--0009-x86emul-rework-wrapping-of-libc-functions-in-test-an.patch245
-rw-r--r--0010-rombios-Work-around-GCC-issue-99578.patch43
-rw-r--r--0010-tools-oxenstored-Fix-incorrect-scope-after-an-if-sta.patch52
-rw-r--r--0011-rombios-Avoid-using-K-R-function-syntax.patch74
-rw-r--r--0011-tools-ocaml-evtchn-OCaml-5-support-fix-potential-res.patch68
-rw-r--r--0012-rombios-Remove-the-use-of-egrep.patch34
-rw-r--r--0012-tools-ocaml-evtchn-Add-binding-for-xenevtchn_fdopen.patch81
-rw-r--r--0013-CI-Resync-FreeBSD-config-with-staging.patch62
-rw-r--r--0013-tools-ocaml-evtchn-Extend-the-init-binding-with-a-cl.patch90
-rw-r--r--0014-tools-oxenstored-Style-fixes-to-Domain.patch64
-rw-r--r--0014-tools-vchan-Fix-Wsingle-bit-bitfield-constant-conver.patch43
-rw-r--r--0015-tools-oxenstored-Bind-the-DOM_EXC-VIRQ-in-in-Event.i.patch82
-rw-r--r--0015-xen-vcpu-ignore-VCPU_SSHOTTMR_future.patch143
-rw-r--r--0016-tools-oxenstored-Rename-some-port-variables-to-remot.patch144
-rw-r--r--0016-x86-head-check-base-address-alignment.patch85
-rw-r--r--0017-tools-oxenstored-Implement-Domain.rebind_evtchn.patch67
-rw-r--r--0017-xenalyze-Handle-start-of-day-RUNNING-transitions.patch275
-rw-r--r--0018-tools-oxenstored-Rework-Domain-evtchn-handling-to-us.patch209
-rw-r--r--0018-x86-ioapic-sanitize-IO-APIC-pins-before-enabling-lap.patch113
-rw-r--r--0019-tools-oxenstored-Keep-dev-xen-evtchn-open-across-liv.patch367
-rw-r--r--0019-x86-ioapic-add-a-raw-field-to-RTE-struct.patch147
-rw-r--r--0020-tools-oxenstored-Log-live-update-issues-at-warning-l.patch42
-rw-r--r--0020-x86-ioapic-RTE-modifications-must-use-ioapic_write_e.patch180
-rw-r--r--0021-iommu-vtd-rename-io_apic_read_remap_rte-local-variab.patch64
-rw-r--r--0021-tools-oxenstored-Set-uncaught-exception-handler.patch83
-rw-r--r--0022-tools-oxenstored-syslog-Avoid-potential-NULL-derefer.patch55
-rw-r--r--0022-x86-iommu-pass-full-IO-APIC-RTE-for-remapping-table-.patch462
-rw-r--r--0023-build-correct-gas-noexecstack-check.patch34
-rw-r--r--0023-tools-oxenstored-Render-backtraces-more-nicely-in-Sy.patch83
-rw-r--r--0024-Revert-tools-xenstore-simplify-loop-handling-connect.patch136
-rw-r--r--0024-libxl-slightly-correct-JSON-generation-of-CPU-policy.patch38
-rw-r--r--0025-tboot-Disable-CET-at-shutdown.patch53
-rw-r--r--0025-x86-S3-Restore-Xen-s-MSR_PAT-value-on-S3-resume.patch36
-rw-r--r--0026-tools-Fix-build-with-recent-QEMU-use-enable-trace-ba.patch50
-rw-r--r--0026-x86-svm-Fix-valid-condition-in-svm_get_pending_event.patch29
-rw-r--r--0027-include-compat-produce-stubs-for-headers-not-otherwi.patch74
-rw-r--r--0027-x86-vmx-Revert-x86-VMX-sanitize-rIP-before-re-enteri.patch100
-rw-r--r--0028-x86-irq-fix-reporting-of-spurious-i8259-interrupts.patch41
-rw-r--r--0028-x86-vmx-Calculate-model-specific-LBRs-once-at-start-.patch342
-rw-r--r--0029-x86-vmx-Support-for-CPUs-without-model-specific-LBR.patch83
-rw-r--r--0029-xen-arm-page-Handle-cache-flush-of-an-element-at-the.patch111
-rw-r--r--0030-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch48
-rw-r--r--0030-x86-shadow-fix-PAE-check-for-top-level-table-unshado.patch39
-rw-r--r--0031-build-fix-building-flask-headers-before-descending-i.patch50
-rw-r--r--0031-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch74
-rw-r--r--0032-ns16550-fix-an-incorrect-assignment-to-uart-io_size.patch34
-rw-r--r--0032-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch85
-rw-r--r--0033-libxl-fix-guest-kexec-skip-cpuid-policy.patch72
-rw-r--r--0033-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch83
-rw-r--r--0034-tools-ocaml-xenctrl-Make-domain_getinfolist-tail-rec.patch71
-rw-r--r--0034-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch106
-rw-r--r--0035-tools-ocaml-xenctrl-Use-larger-chunksize-in-domain_g.patch41
-rw-r--r--0035-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch74
-rw-r--r--0036-tools-ocaml-xb-mmap-Use-Data_abstract_val-wrapper.patch75
-rw-r--r--0036-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch109
-rw-r--r--0037-tools-ocaml-xb-Drop-Xs_ring.write.patch62
-rw-r--r--0037-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch89
-rw-r--r--0038-tools-oxenstored-validate-config-file-before-live-up.patch131
-rw-r--r--0038-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch91
-rw-r--r--0039-tools-ocaml-libs-Don-t-declare-stubs-as-taking-void.patch61
-rw-r--r--0039-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch228
-rw-r--r--0040-tools-ocaml-libs-Allocate-the-correct-amount-of-memo.patch80
-rw-r--r--0040-x86-shadow-defer-releasing-of-PV-s-top-level-shadow-.patch455
-rw-r--r--0041-tools-ocaml-evtchn-Don-t-reference-Custom-objects-wi.patch213
-rw-r--r--0041-tools-xenstored-domain_entry_fix-Handle-conflicting-.patch64
-rw-r--r--0042-iommu-amd-vi-flush-IOMMU-TLB-when-flushing-the-DTE.patch186
-rw-r--r--0042-tools-ocaml-xc-Fix-binding-for-xc_domain_assign_devi.patch70
-rw-r--r--0043-libfsimage-xfs-Remove-dead-code.patch71
-rw-r--r--0043-tools-ocaml-xc-Don-t-reference-Abstract_Tag-objects-.patch76
-rw-r--r--0044-libfsimage-xfs-Amend-mask32lo-to-allow-the-value-32.patch33
-rw-r--r--0044-tools-ocaml-libs-Fix-memory-resource-leaks-with-caml.patch61
-rw-r--r--0045-libfsimage-xfs-Sanity-check-the-superblock-during-mo.patch137
-rw-r--r--0045-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch120
-rw-r--r--0046-automation-Remove-clang-8-from-Debian-unstable-conta.patch84
-rw-r--r--0046-libfsimage-xfs-Add-compile-time-check-to-libfsimage.patch62
-rw-r--r--0047-libs-util-Fix-parallel-build-between-flex-bison-and-.patch50
-rw-r--r--0047-tools-pygrub-Remove-unnecessary-hypercall.patch60
-rw-r--r--0048-tools-pygrub-Small-refactors.patch65
-rw-r--r--0048-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch126
-rw-r--r--0049-tools-pygrub-Open-the-output-files-earlier.patch105
-rw-r--r--0049-x86-shskt-Disable-CET-SS-on-parts-susceptible-to-fra.patch195
-rw-r--r--0050-credit2-respect-credit2_runqueue-all-when-arranging-.patch69
-rw-r--r--0050-tools-libfsimage-Export-a-new-function-to-preload-al.patch126
-rw-r--r--0051-build-make-FILE-symbol-paths-consistent.patch42
-rw-r--r--0051-tools-pygrub-Deprivilege-pygrub.patch307
-rw-r--r--0052-libxl-add-support-for-running-bootloader-in-restrict.patch251
-rw-r--r--0052-x86-ucode-AMD-apply-the-patch-early-on-every-logical.patch154
-rw-r--r--0053-libxl-limit-bootloader-execution-in-restricted-mode.patch158
-rw-r--r--0053-x86-perform-mem_sharing-teardown-before-paging-teard.patch111
-rw-r--r--0054-x86-svm-Fix-asymmetry-with-AMD-DR-MASK-context-switc.patch104
-rw-r--r--0054-xen-Work-around-Clang-IAS-macro-expansion-bug.patch109
-rw-r--r--0055-x86-pv-Correct-the-auditing-of-guest-breakpoint-addr.patch86
-rw-r--r--0055-xen-Fix-Clang-Wunicode-diagnostic-when-building-asm-.patch83
-rw-r--r--0056-tools-Use-PKG_CONFIG_FILE-instead-of-PKG_CONFIG-vari.patch91
-rw-r--r--0057-libs-guest-Fix-resource-leaks-in-xc_core_arch_map_p2.patch65
-rw-r--r--0058-libs-guest-Fix-leak-on-realloc-failure-in-backup_pte.patch56
-rw-r--r--0059-x86-ucode-AMD-late-load-the-patch-on-every-logical-t.patch90
-rw-r--r--0060-x86-shadow-account-for-log-dirty-mode-when-pre-alloc.patch92
-rw-r--r--0061-x86-HVM-bound-number-of-pinned-cache-attribute-regio.patch50
-rw-r--r--0062-x86-HVM-serialize-pinned-cache-attribute-list-manipu.patch126
-rw-r--r--0063-x86-spec-ctrl-Defer-CR4_PV32_RESTORE-on-the-cstar_en.patch56
-rw-r--r--0064-x86-vmx-implement-VMExit-based-guest-Bus-Lock-detect.patch175
-rw-r--r--0065-x86-vmx-introduce-helper-to-set-VMX_INTR_SHADOW_NMI.patch102
-rw-r--r--0066-x86-vmx-implement-Notify-VM-Exit.patch243
-rw-r--r--0067-tools-python-change-s-size-type-for-Python-3.10.patch72
-rw-r--r--0068-tools-xenmon-Fix-xenmon.py-for-with-python3.x.patch54
-rw-r--r--0069-x86-spec-ctrl-Add-BHI-controls-to-userspace-componen.patch51
-rw-r--r--0070-core-parking-fix-build-with-gcc12-and-NR_CPUS-1.patch95
-rw-r--r--0071-x86-altp2m-help-gcc13-to-avoid-it-emitting-a-warning.patch129
-rw-r--r--0072-VT-d-constrain-IGD-check.patch44
-rw-r--r--0073-bunzip-work-around-gcc13-warning.patch42
-rw-r--r--0074-libacpi-fix-PCI-hotplug-AML.patch57
-rw-r--r--0075-AMD-IOMMU-without-XT-x2APIC-needs-to-be-forced-into-.patch42
-rw-r--r--0076-VT-d-fix-iommu-no-igfx-if-the-IOMMU-scope-contains-f.patch44
-rw-r--r--0077-x86-shadow-fix-and-improve-sh_page_has_multiple_shad.patch47
-rw-r--r--0078-x86-nospec-Fix-evaluate_nospec-code-generation-under.patch101
-rw-r--r--0079-x86-shadow-Fix-build-with-no-PG_log_dirty.patch56
-rw-r--r--0080-x86-vmx-Don-t-spuriously-crash-the-domain-when-INIT-.patch51
-rw-r--r--0081-x86-ucode-Fix-error-paths-control_thread_fn.patch56
-rw-r--r--0082-include-don-t-mention-stub-headers-more-than-once-in.patch37
-rw-r--r--0083-vpci-msix-handle-accesses-adjacent-to-the-MSI-X-tabl.patch540
-rw-r--r--0084-ns16550-correct-name-value-pair-parsing-for-PCI-port.patch59
-rw-r--r--0085-CI-Drop-automation-configs.patch87
-rw-r--r--0086-automation-Switch-arm32-cross-builds-to-run-on-arm64.patch87
-rw-r--r--0087-automation-Remove-CentOS-7.2-containers-and-builds.patch145
-rw-r--r--0088-automation-Remove-non-debug-x86_32-build-jobs.patch67
-rw-r--r--0089-CI-Remove-llvm-8-from-the-Debian-Stretch-container.patch103
-rw-r--r--info.txt6
145 files changed, 6138 insertions, 8495 deletions
diff --git a/0001-update-Xen-version-to-4.17.1-pre.patch b/0001-update-Xen-version-to-4.17.1-pre.patch
deleted file mode 100644
index 1d1bb53..0000000
--- a/0001-update-Xen-version-to-4.17.1-pre.patch
+++ /dev/null
@@ -1,136 +0,0 @@
-From 0b999fa2eadaeff840a8331b87f1f73abf3b14eb Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 20 Dec 2022 13:40:38 +0100
-Subject: [PATCH 01/89] update Xen version to 4.17.1-pre
-
----
- MAINTAINERS | 92 +++++-----------------------------------------------
- xen/Makefile | 2 +-
- 2 files changed, 10 insertions(+), 84 deletions(-)
-
-diff --git a/MAINTAINERS b/MAINTAINERS
-index 175f10f33f..ebb908cc37 100644
---- a/MAINTAINERS
-+++ b/MAINTAINERS
-@@ -54,6 +54,15 @@ list. Remember to copy the appropriate stable branch maintainer who
- will be listed in this section of the MAINTAINERS file in the
- appropriate branch.
-
-+The maintainer for this branch is:
-+
-+ Jan Beulich <jbeulich@suse.com>
-+
-+Tools backport requests should also be copied to:
-+
-+       Anthony Perard <anthony.perard@citrix.com>
-+
-+
- Unstable Subsystem Maintainers
- ==============================
-
-@@ -104,89 +113,6 @@ Descriptions of section entries:
- xen-maintainers-<version format number of this file>
-
-
-- Check-in policy
-- ===============
--
--In order for a patch to be checked in, in general, several conditions
--must be met:
--
--1. In order to get a change to a given file committed, it must have
-- the approval of at least one maintainer of that file.
--
-- A patch of course needs Acks from the maintainers of each file that
-- it changes; so a patch which changes xen/arch/x86/traps.c,
-- xen/arch/x86/mm/p2m.c, and xen/arch/x86/mm/shadow/multi.c would
-- require an Ack from each of the three sets of maintainers.
--
-- See below for rules on nested maintainership.
--
--2. It must have appropriate approval from someone other than the
-- submitter. This can be either:
--
-- a. An Acked-by from a maintainer of the code being touched (a
-- co-maintainer if available, or a more general level maintainer if
-- not available; see the secton on nested maintainership)
--
-- b. A Reviewed-by by anyone of suitable stature in the community
--
--3. Sufficient time must have been given for anyone to respond. This
-- depends in large part upon the urgency and nature of the patch.
-- For a straightforward uncontroversial patch, a day or two may be
-- sufficient; for a controversial patch, a week or two may be better.
--
--4. There must be no "open" objections.
--
--In a case where one person submits a patch and a maintainer gives an
--Ack, the Ack stands in for both the approval requirement (#1) and the
--Acked-by-non-submitter requirement (#2).
--
--In a case where a maintainer themselves submits a patch, the
--Signed-off-by meets the approval requirement (#1); so a Review
--from anyone in the community suffices for requirement #2.
--
--Before a maintainer checks in their own patch with another community
--member's R-b but no co-maintainer Ack, it is especially important to
--give their co-maintainer opportunity to give feedback, perhaps
--declaring their intention to check it in without their co-maintainers
--ack a day before doing so.
--
--Maintainers may choose to override non-maintainer objections in the
--case that consensus can't be reached.
--
--As always, no policy can cover all possible situations. In
--exceptional circumstances, committers may commit a patch in absence of
--one or more of the above requirements, if they are reasonably
--confident that the other maintainers will approve of their decision in
--retrospect.
--
-- The meaning of nesting
-- ======================
--
--Many maintainership areas are "nested": for example, there are entries
--for xen/arch/x86 as well as xen/arch/x86/mm, and even
--xen/arch/x86/mm/shadow; and there is a section at the end called "THE
--REST" which lists all committers. The meaning of nesting is that:
--
--1. Under normal circumstances, the Ack of the most specific maintainer
--is both necessary and sufficient to get a change to a given file
--committed. So a change to xen/arch/x86/mm/shadow/multi.c requires the
--the Ack of the xen/arch/x86/mm/shadow maintainer for that part of the
--patch, but would not require the Ack of the xen/arch/x86 maintainer or
--the xen/arch/x86/mm maintainer.
--
--2. In unusual circumstances, a more general maintainer's Ack can stand
--in for or even overrule a specific maintainer's Ack. Unusual
--circumstances might include:
-- - The patch is fixing a high-priority issue causing immediate pain,
-- and the more specific maintainer is not available.
-- - The more specific maintainer has not responded either to the
-- original patch, nor to "pings", within a reasonable amount of time.
-- - The more general maintainer wants to overrule the more specific
-- maintainer on some issue. (This should be exceptional.)
-- - In the case of a disagreement between maintainers, THE REST can
-- settle the matter by majority vote. (This should be very exceptional
-- indeed.)
--
-
- Maintainers List (try to look for most precise areas first)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index d7102a3b47..dcedfbc38e 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -6,7 +6,7 @@ this-makefile := $(call lastword,$(MAKEFILE_LIST))
- # All other places this is stored (eg. compile.h) should be autogenerated.
- export XEN_VERSION = 4
- export XEN_SUBVERSION = 17
--export XEN_EXTRAVERSION ?= .0$(XEN_VENDORVERSION)
-+export XEN_EXTRAVERSION ?= .1-pre$(XEN_VENDORVERSION)
- export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
- -include xen-version
-
---
-2.40.0
-
diff --git a/0001-update-Xen-version-to-4.17.3-pre.patch b/0001-update-Xen-version-to-4.17.3-pre.patch
new file mode 100644
index 0000000..1be1cd1
--- /dev/null
+++ b/0001-update-Xen-version-to-4.17.3-pre.patch
@@ -0,0 +1,25 @@
+From 2f337a04bfc2dda794ae0fc108577ec72932f83b Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 21 Aug 2023 15:52:13 +0200
+Subject: [PATCH 01/55] update Xen version to 4.17.3-pre
+
+---
+ xen/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/Makefile b/xen/Makefile
+index fbada570b8..f6005bd536 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -6,7 +6,7 @@ this-makefile := $(call lastword,$(MAKEFILE_LIST))
+ # All other places this is stored (eg. compile.h) should be autogenerated.
+ export XEN_VERSION = 4
+ export XEN_SUBVERSION = 17
+-export XEN_EXTRAVERSION ?= .2$(XEN_VENDORVERSION)
++export XEN_EXTRAVERSION ?= .3-pre$(XEN_VENDORVERSION)
+ export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
+ -include xen-version
+
+--
+2.42.0
+
diff --git a/0002-x86-fix-build-with-old-gcc-after-CPU-policy-changes.patch b/0002-x86-fix-build-with-old-gcc-after-CPU-policy-changes.patch
new file mode 100644
index 0000000..1b62572
--- /dev/null
+++ b/0002-x86-fix-build-with-old-gcc-after-CPU-policy-changes.patch
@@ -0,0 +1,84 @@
+From 7d8897984927a51495e9a1b827aa4bce1d779b87 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 21 Aug 2023 15:53:17 +0200
+Subject: [PATCH 02/55] x86: fix build with old gcc after CPU policy changes
+
+Old gcc won't cope with initializers involving unnamed struct/union
+fields.
+
+Fixes: 441b1b2a50ea ("x86/emul: Switch x86_emulate_ctxt to cpu_policy")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 768846690d64bc730c1a1123e8de3af731bb2eb3
+master date: 2023-04-19 11:02:47 +0200
+---
+ tools/fuzz/x86_instruction_emulator/fuzz-emul.c | 4 +++-
+ xen/arch/x86/pv/emul-priv-op.c | 4 +++-
+ xen/arch/x86/pv/ro-page-fault.c | 4 +++-
+ 3 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
+index 4885a68210..eeeb6931f4 100644
+--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
++++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
+@@ -893,12 +893,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
+ struct x86_emulate_ctxt ctxt = {
+ .data = &state,
+ .regs = &input.regs,
+- .cpu_policy = &cp,
+ .addr_size = 8 * sizeof(void *),
+ .sp_size = 8 * sizeof(void *),
+ };
+ int rc;
+
++ /* Not part of the initializer, for old gcc to cope. */
++ ctxt.cpu_policy = &cp;
++
+ /* Reset all global state variables */
+ memset(&input, 0, sizeof(input));
+
+diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
+index 04416f1979..2c94beb10e 100644
+--- a/xen/arch/x86/pv/emul-priv-op.c
++++ b/xen/arch/x86/pv/emul-priv-op.c
+@@ -1327,12 +1327,14 @@ int pv_emulate_privileged_op(struct cpu_user_regs *regs)
+ struct domain *currd = curr->domain;
+ struct priv_op_ctxt ctxt = {
+ .ctxt.regs = regs,
+- .ctxt.cpu_policy = currd->arch.cpu_policy,
+ .ctxt.lma = !is_pv_32bit_domain(currd),
+ };
+ int rc;
+ unsigned int eflags, ar;
+
++ /* Not part of the initializer, for old gcc to cope. */
++ ctxt.ctxt.cpu_policy = currd->arch.cpu_policy;
++
+ if ( !pv_emul_read_descriptor(regs->cs, curr, &ctxt.cs.base,
+ &ctxt.cs.limit, &ar, 1) ||
+ !(ar & _SEGMENT_S) ||
+diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c
+index 0d02c7d2ab..f23ad5d184 100644
+--- a/xen/arch/x86/pv/ro-page-fault.c
++++ b/xen/arch/x86/pv/ro-page-fault.c
+@@ -356,7 +356,6 @@ int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs)
+ unsigned int addr_size = is_pv_32bit_domain(currd) ? 32 : BITS_PER_LONG;
+ struct x86_emulate_ctxt ctxt = {
+ .regs = regs,
+- .cpu_policy = currd->arch.cpu_policy,
+ .addr_size = addr_size,
+ .sp_size = addr_size,
+ .lma = addr_size > 32,
+@@ -364,6 +363,9 @@ int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs)
+ int rc;
+ bool mmio_ro;
+
++ /* Not part of the initializer, for old gcc to cope. */
++ ctxt.cpu_policy = currd->arch.cpu_policy;
++
+ /* Attempt to read the PTE that maps the VA being accessed. */
+ pte = guest_get_eff_kern_l1e(addr);
+
+--
+2.42.0
+
diff --git a/0002-x86-irq-do-not-release-irq-until-all-cleanup-is-done.patch b/0002-x86-irq-do-not-release-irq-until-all-cleanup-is-done.patch
deleted file mode 100644
index 1c7a13d..0000000
--- a/0002-x86-irq-do-not-release-irq-until-all-cleanup-is-done.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From 9cbc04a95f8a7f7cc27901211cbe19a42850c4ed Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 20 Dec 2022 13:43:04 +0100
-Subject: [PATCH 02/89] x86/irq: do not release irq until all cleanup is done
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Current code in _clear_irq_vector() will mark the irq as unused before
-doing the cleanup required when move_in_progress is true.
-
-This can lead to races in create_irq() if the function picks an irq
-desc that's been marked as unused but has move_in_progress set, as the
-call to assign_irq_vector() in that function can then fail with
--EAGAIN.
-
-Prevent that by only marking irq descs as unused when all the cleanup
-has been done. While there also use write_atomic() when setting
-IRQ_UNUSED in _clear_irq_vector() and add a barrier in order to
-prevent the setting of IRQ_UNUSED getting reordered by the compiler.
-
-The check for move_in_progress cannot be removed from
-_assign_irq_vector(), as other users (io_apic_set_pci_routing() and
-ioapic_guest_write()) can still pass active irq descs to
-assign_irq_vector().
-
-Note the trace point is not moved and is now set before the irq is
-marked as unused. This is done so that the CPU mask provided in the
-trace point is the one belonging to the current vector, not the old
-one.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: e267d11969a40f0aec33dbf966f5a6490b205f43
-master date: 2022-12-02 10:32:21 +0100
----
- xen/arch/x86/irq.c | 31 ++++++++++++++++---------------
- 1 file changed, 16 insertions(+), 15 deletions(-)
-
-diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
-index cd0c8a30a8..20150b1c7f 100644
---- a/xen/arch/x86/irq.c
-+++ b/xen/arch/x86/irq.c
-@@ -220,27 +220,28 @@ static void _clear_irq_vector(struct irq_desc *desc)
- clear_bit(vector, desc->arch.used_vectors);
- }
-
-- desc->arch.used = IRQ_UNUSED;
--
- trace_irq_mask(TRC_HW_IRQ_CLEAR_VECTOR, irq, vector, tmp_mask);
-
-- if ( likely(!desc->arch.move_in_progress) )
-- return;
-+ if ( unlikely(desc->arch.move_in_progress) )
-+ {
-+ /* If we were in motion, also clear desc->arch.old_vector */
-+ old_vector = desc->arch.old_vector;
-+ cpumask_and(tmp_mask, desc->arch.old_cpu_mask, &cpu_online_map);
-
-- /* If we were in motion, also clear desc->arch.old_vector */
-- old_vector = desc->arch.old_vector;
-- cpumask_and(tmp_mask, desc->arch.old_cpu_mask, &cpu_online_map);
-+ for_each_cpu(cpu, tmp_mask)
-+ {
-+ ASSERT(per_cpu(vector_irq, cpu)[old_vector] == irq);
-+ TRACE_3D(TRC_HW_IRQ_MOVE_FINISH, irq, old_vector, cpu);
-+ per_cpu(vector_irq, cpu)[old_vector] = ~irq;
-+ }
-
-- for_each_cpu(cpu, tmp_mask)
-- {
-- ASSERT(per_cpu(vector_irq, cpu)[old_vector] == irq);
-- TRACE_3D(TRC_HW_IRQ_MOVE_FINISH, irq, old_vector, cpu);
-- per_cpu(vector_irq, cpu)[old_vector] = ~irq;
-- }
-+ release_old_vec(desc);
-
-- release_old_vec(desc);
-+ desc->arch.move_in_progress = 0;
-+ }
-
-- desc->arch.move_in_progress = 0;
-+ smp_wmb();
-+ write_atomic(&desc->arch.used, IRQ_UNUSED);
- }
-
- void __init clear_irq_vector(int irq)
---
-2.40.0
-
diff --git a/0003-libxl-Use-XEN_LIB_DIR-to-store-bootloader-from-pygru.patch b/0003-libxl-Use-XEN_LIB_DIR-to-store-bootloader-from-pygru.patch
new file mode 100644
index 0000000..a395d7a
--- /dev/null
+++ b/0003-libxl-Use-XEN_LIB_DIR-to-store-bootloader-from-pygru.patch
@@ -0,0 +1,45 @@
+From 8d84be5b557b27e9cc53e48285aebad28a48468c Mon Sep 17 00:00:00 2001
+From: Anthony PERARD <anthony.perard@citrix.com>
+Date: Mon, 21 Aug 2023 15:53:47 +0200
+Subject: [PATCH 03/55] libxl: Use XEN_LIB_DIR to store bootloader from pygrub
+
+In osstest, the jobs using pygrub on arm64 on the branch linux-linus
+started to fails with:
+ [Errno 28] No space left on device
+ Error writing temporary copy of ramdisk
+
+This is because /var/run is small when dom0 has only 512MB to work
+with, /var/run is only 40MB. The size of both kernel and ramdisk on
+this jobs is now about 42MB, so not enough space in /var/run.
+
+So, to avoid writing a big binary in ramfs, we will use /var/lib
+instead, like we already do when saving the device model state on
+migration.
+
+Reported-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+Reviewed-by: Jason Andryuk <jandryuk@gmail.com>
+master commit: ad89640ad766d3cb6c92fc8b6406ca6bbab44136
+master date: 2023-08-08 09:45:20 +0200
+---
+ tools/libs/light/libxl_bootloader.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/libs/light/libxl_bootloader.c b/tools/libs/light/libxl_bootloader.c
+index 1bc6e51827..108329b4a5 100644
+--- a/tools/libs/light/libxl_bootloader.c
++++ b/tools/libs/light/libxl_bootloader.c
+@@ -245,8 +245,8 @@ static void bootloader_cleanup(libxl__egc *egc, libxl__bootloader_state *bl)
+ static void bootloader_setpaths(libxl__gc *gc, libxl__bootloader_state *bl)
+ {
+ uint32_t domid = bl->domid;
+- bl->outputdir = GCSPRINTF(XEN_RUN_DIR "/bootloader.%"PRIu32".d", domid);
+- bl->outputpath = GCSPRINTF(XEN_RUN_DIR "/bootloader.%"PRIu32".out", domid);
++ bl->outputdir = GCSPRINTF(XEN_LIB_DIR "/bootloader.%"PRIu32".d", domid);
++ bl->outputpath = GCSPRINTF(XEN_LIB_DIR "/bootloader.%"PRIu32".out", domid);
+ }
+
+ /* Callbacks */
+--
+2.42.0
+
diff --git a/0003-x86-pvh-do-not-forward-MADT-Local-APIC-NMI-structure.patch b/0003-x86-pvh-do-not-forward-MADT-Local-APIC-NMI-structure.patch
deleted file mode 100644
index 47d6997..0000000
--- a/0003-x86-pvh-do-not-forward-MADT-Local-APIC-NMI-structure.patch
+++ /dev/null
@@ -1,103 +0,0 @@
-From b7b34bd66ac77326bb49b10130013b4a9f83e4a2 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 20 Dec 2022 13:43:37 +0100
-Subject: [PATCH 03/89] x86/pvh: do not forward MADT Local APIC NMI structures
- to dom0
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Currently Xen will passthrough any Local APIC NMI Structure found in
-the native ACPI MADT table to a PVH dom0. This is wrong because PVH
-doesn't have access to the physical local APIC, and instead gets an
-emulated local APIC by Xen, that doesn't have the LINT0 or LINT1
-pins wired to anything. Furthermore the ACPI Processor UIDs used in
-the APIC NMI Structures are likely to not match the ones generated by
-Xen for the Local x2APIC Structures, creating confusion to dom0.
-
-Fix this by removing the logic to passthrough the Local APIC NMI
-Structure for PVH dom0.
-
-Fixes: 1d74282c45 ('x86: setup PVHv2 Dom0 ACPI tables')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: b39e6385250ccef9509af0eab9003ad5c1478842
-master date: 2022-12-02 10:33:40 +0100
----
- xen/arch/x86/hvm/dom0_build.c | 34 +---------------------------------
- 1 file changed, 1 insertion(+), 33 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c
-index 1864d048a1..3ac6b7b423 100644
---- a/xen/arch/x86/hvm/dom0_build.c
-+++ b/xen/arch/x86/hvm/dom0_build.c
-@@ -58,9 +58,6 @@
- static unsigned int __initdata acpi_intr_overrides;
- static struct acpi_madt_interrupt_override __initdata *intsrcovr;
-
--static unsigned int __initdata acpi_nmi_sources;
--static struct acpi_madt_nmi_source __initdata *nmisrc;
--
- static unsigned int __initdata order_stats[MAX_ORDER + 1];
-
- static void __init print_order_stats(const struct domain *d)
-@@ -763,25 +760,6 @@ static int __init cf_check acpi_set_intr_ovr(
- return 0;
- }
-
--static int __init cf_check acpi_count_nmi_src(
-- struct acpi_subtable_header *header, const unsigned long end)
--{
-- acpi_nmi_sources++;
-- return 0;
--}
--
--static int __init cf_check acpi_set_nmi_src(
-- struct acpi_subtable_header *header, const unsigned long end)
--{
-- const struct acpi_madt_nmi_source *src =
-- container_of(header, struct acpi_madt_nmi_source, header);
--
-- *nmisrc = *src;
-- nmisrc++;
--
-- return 0;
--}
--
- static int __init pvh_setup_acpi_madt(struct domain *d, paddr_t *addr)
- {
- struct acpi_table_madt *madt;
-@@ -797,16 +775,11 @@ static int __init pvh_setup_acpi_madt(struct domain *d, paddr_t *addr)
- acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE,
- acpi_count_intr_ovr, UINT_MAX);
-
-- /* Count number of NMI sources in the MADT. */
-- acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_count_nmi_src,
-- UINT_MAX);
--
- max_vcpus = dom0_max_vcpus();
- /* Calculate the size of the crafted MADT. */
- size = sizeof(*madt);
- size += sizeof(*io_apic) * nr_ioapics;
- size += sizeof(*intsrcovr) * acpi_intr_overrides;
-- size += sizeof(*nmisrc) * acpi_nmi_sources;
- size += sizeof(*x2apic) * max_vcpus;
-
- madt = xzalloc_bytes(size);
-@@ -862,12 +835,7 @@ static int __init pvh_setup_acpi_madt(struct domain *d, paddr_t *addr)
- acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_set_intr_ovr,
- acpi_intr_overrides);
-
-- /* Setup NMI sources. */
-- nmisrc = (void *)intsrcovr;
-- acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_set_nmi_src,
-- acpi_nmi_sources);
--
-- ASSERT(((void *)nmisrc - (void *)madt) == size);
-+ ASSERT(((void *)intsrcovr - (void *)madt) == size);
- madt->header.length = size;
- /*
- * Calling acpi_tb_checksum here is a layering violation, but
---
-2.40.0
-
diff --git a/0004-build-define-ARCH-and-SRCARCH-later.patch b/0004-build-define-ARCH-and-SRCARCH-later.patch
new file mode 100644
index 0000000..aebcbb7
--- /dev/null
+++ b/0004-build-define-ARCH-and-SRCARCH-later.patch
@@ -0,0 +1,67 @@
+From 1c3927f8f6743538a35aa45a91a2d4adbde9f277 Mon Sep 17 00:00:00 2001
+From: Anthony PERARD <anthony.perard@citrix.com>
+Date: Wed, 5 Jul 2023 08:25:03 +0200
+Subject: [PATCH 04/55] build: define ARCH and SRCARCH later
+
+Defining ARCH and SRCARCH later in xen/Makefile allows to switch to
+immediate evaluation variable type.
+
+ARCH and SRCARCH depend on value defined in Config.mk and aren't used
+for e.g. TARGET_SUBARCH or TARGET_ARCH, and not before they're needed in
+a sub-make or a rule.
+
+This will help reduce the number of times the shell rune is been
+run.
+
+With GNU make 4.4, the number of execution of the command present in
+these $(shell ) increased greatly. This is probably because as of make
+4.4, exported variable are also added to the environment of $(shell )
+construct.
+
+Also, `make -d` shows a lot of these:
+ Makefile:39: not recursively expanding SRCARCH to export to shell function
+ Makefile:38: not recursively expanding ARCH to export to shell function
+
+Reported-by: Jason Andryuk <jandryuk@gmail.com>
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+Tested-by: Jason Andryuk <jandryuk@gmail.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 58e0a3f3b2c430f8640ef9df67ac857b0008ebc8)
+---
+ xen/Makefile | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/xen/Makefile b/xen/Makefile
+index f6005bd536..7ecfa6e8e9 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -35,12 +35,6 @@ MAKEFLAGS += -rR
+
+ EFI_MOUNTPOINT ?= $(BOOT_DIR)/efi
+
+-ARCH=$(XEN_TARGET_ARCH)
+-SRCARCH=$(shell echo $(ARCH) | \
+- sed -e 's/x86.*/x86/' -e s'/arm\(32\|64\)/arm/g' \
+- -e s'/riscv.*/riscv/g')
+-export ARCH SRCARCH
+-
+ # Allow someone to change their config file
+ export KCONFIG_CONFIG ?= .config
+
+@@ -241,6 +235,13 @@ include scripts/Kbuild.include
+ include $(XEN_ROOT)/Config.mk
+
+ # Set ARCH/SUBARCH appropriately.
++
++ARCH := $(XEN_TARGET_ARCH)
++SRCARCH := $(shell echo $(ARCH) | \
++ sed -e 's/x86.*/x86/' -e 's/arm\(32\|64\)/arm/g' \
++ -e 's/riscv.*/riscv/g')
++export ARCH SRCARCH
++
+ export TARGET_SUBARCH := $(XEN_TARGET_ARCH)
+ export TARGET_ARCH := $(shell echo $(XEN_TARGET_ARCH) | \
+ sed -e 's/x86.*/x86/' -e s'/arm\(32\|64\)/arm/g' \
+--
+2.42.0
+
diff --git a/0004-x86-HVM-don-t-mark-external-IRQs-as-pending-when-vLA.patch b/0004-x86-HVM-don-t-mark-external-IRQs-as-pending-when-vLA.patch
deleted file mode 100644
index 01dcba8..0000000
--- a/0004-x86-HVM-don-t-mark-external-IRQs-as-pending-when-vLA.patch
+++ /dev/null
@@ -1,71 +0,0 @@
-From 54bb56e12868100c5ce06e33b4f57b6b2b8f37b9 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 20 Dec 2022 13:44:07 +0100
-Subject: [PATCH 04/89] x86/HVM: don't mark external IRQs as pending when
- vLAPIC is disabled
-
-In software-disabled state an LAPIC does not accept any interrupt
-requests and hence no IRR bit would newly become set while in this
-state. As a result it is also wrong for us to mark IO-APIC or MSI
-originating vectors as having a pending request when the vLAPIC is in
-this state. Such interrupts are simply lost.
-
-Introduce (IO-APIC) or re-use (MSI) a local variable to help
-readability.
-
-Fixes: 4fe21ad3712e ("This patch add virtual IOAPIC support for VMX guest")
-Fixes: 85715f4bc7c9 ("MSI 5/6: add MSI support to passthrough HVM domain")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: f1d7aac1e3c3cd164e17d41791a575a5c3e87121
-master date: 2022-12-02 10:35:01 +0100
----
- xen/arch/x86/hvm/vioapic.c | 9 +++++++--
- xen/arch/x86/hvm/vmsi.c | 10 ++++++----
- 2 files changed, 13 insertions(+), 6 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
-index cb7f440160..41e3c4d5e4 100644
---- a/xen/arch/x86/hvm/vioapic.c
-+++ b/xen/arch/x86/hvm/vioapic.c
-@@ -460,9 +460,14 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
-
- case dest_Fixed:
- for_each_vcpu ( d, v )
-- if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) )
-- ioapic_inj_irq(vioapic, vcpu_vlapic(v), vector, trig_mode,
-+ {
-+ struct vlapic *vlapic = vcpu_vlapic(v);
-+
-+ if ( vlapic_enabled(vlapic) &&
-+ vlapic_match_dest(vlapic, NULL, 0, dest, dest_mode) )
-+ ioapic_inj_irq(vioapic, vlapic, vector, trig_mode,
- delivery_mode);
-+ }
- break;
-
- case dest_NMI:
-diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
-index 75f92885dc..3cd4923060 100644
---- a/xen/arch/x86/hvm/vmsi.c
-+++ b/xen/arch/x86/hvm/vmsi.c
-@@ -87,10 +87,12 @@ int vmsi_deliver(
-
- case dest_Fixed:
- for_each_vcpu ( d, v )
-- if ( vlapic_match_dest(vcpu_vlapic(v), NULL,
-- 0, dest, dest_mode) )
-- vmsi_inj_irq(vcpu_vlapic(v), vector,
-- trig_mode, delivery_mode);
-+ {
-+ target = vcpu_vlapic(v);
-+ if ( vlapic_enabled(target) &&
-+ vlapic_match_dest(target, NULL, 0, dest, dest_mode) )
-+ vmsi_inj_irq(target, vector, trig_mode, delivery_mode);
-+ }
- break;
-
- default:
---
-2.40.0
-
diff --git a/0005-build-remove-TARGET_SUBARCH-a-duplicate-of-ARCH.patch b/0005-build-remove-TARGET_SUBARCH-a-duplicate-of-ARCH.patch
new file mode 100644
index 0000000..4f31614
--- /dev/null
+++ b/0005-build-remove-TARGET_SUBARCH-a-duplicate-of-ARCH.patch
@@ -0,0 +1,50 @@
+From 56076ef445073458c39c481f9b70c3b4ff848839 Mon Sep 17 00:00:00 2001
+From: Anthony PERARD <anthony.perard@citrix.com>
+Date: Wed, 5 Jul 2023 08:27:51 +0200
+Subject: [PATCH 05/55] build: remove TARGET_SUBARCH, a duplicate of ARCH
+
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit a6ab7dd061338c33faef629cbe52ed1608571d84)
+---
+ xen/Makefile | 3 +--
+ xen/build.mk | 2 +-
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/xen/Makefile b/xen/Makefile
+index 7ecfa6e8e9..6e89bcf348 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -234,7 +234,7 @@ include scripts/Kbuild.include
+ # we need XEN_TARGET_ARCH to generate the proper config
+ include $(XEN_ROOT)/Config.mk
+
+-# Set ARCH/SUBARCH appropriately.
++# Set ARCH/SRCARCH appropriately.
+
+ ARCH := $(XEN_TARGET_ARCH)
+ SRCARCH := $(shell echo $(ARCH) | \
+@@ -242,7 +242,6 @@ SRCARCH := $(shell echo $(ARCH) | \
+ -e 's/riscv.*/riscv/g')
+ export ARCH SRCARCH
+
+-export TARGET_SUBARCH := $(XEN_TARGET_ARCH)
+ export TARGET_ARCH := $(shell echo $(XEN_TARGET_ARCH) | \
+ sed -e 's/x86.*/x86/' -e s'/arm\(32\|64\)/arm/g' \
+ -e s'/riscv.*/riscv/g')
+diff --git a/xen/build.mk b/xen/build.mk
+index 758590c68e..d049d3a53a 100644
+--- a/xen/build.mk
++++ b/xen/build.mk
+@@ -41,7 +41,7 @@ include/xen/compile.h: include/xen/compile.h.in .banner FORCE
+ targets += include/xen/compile.h
+
+ -include $(wildcard .asm-offsets.s.d)
+-asm-offsets.s: arch/$(TARGET_ARCH)/$(TARGET_SUBARCH)/asm-offsets.c
++asm-offsets.s: arch/$(TARGET_ARCH)/$(ARCH)/asm-offsets.c
+ $(CC) $(call cpp_flags,$(c_flags)) -S -g0 -o $@.new -MQ $@ $<
+ $(call move-if-changed,$@.new,$@)
+
+--
+2.42.0
+
diff --git a/0005-x86-Viridian-don-t-mark-IRQ-vectors-as-pending-when-.patch b/0005-x86-Viridian-don-t-mark-IRQ-vectors-as-pending-when-.patch
deleted file mode 100644
index 3086285..0000000
--- a/0005-x86-Viridian-don-t-mark-IRQ-vectors-as-pending-when-.patch
+++ /dev/null
@@ -1,60 +0,0 @@
-From 5810edc049cd5828c2628a377ca8443610e54f82 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 20 Dec 2022 13:44:38 +0100
-Subject: [PATCH 05/89] x86/Viridian: don't mark IRQ vectors as pending when
- vLAPIC is disabled
-
-In software-disabled state an LAPIC does not accept any interrupt
-requests and hence no IRR bit would newly become set while in this
-state. As a result it is also wrong for us to mark Viridian IPI or timer
-vectors as having a pending request when the vLAPIC is in this state.
-Such interrupts are simply lost.
-
-Introduce a local variable in send_ipi() to help readability.
-
-Fixes: fda96b7382ea ("viridian: add implementation of the HvSendSyntheticClusterIpi hypercall")
-Fixes: 26fba3c85571 ("viridian: add implementation of synthetic timers")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-master commit: 831419f82913417dee4e5b0f80769c5db590540b
-master date: 2022-12-02 10:35:32 +0100
----
- xen/arch/x86/hvm/viridian/synic.c | 2 +-
- xen/arch/x86/hvm/viridian/viridian.c | 7 ++++++-
- 2 files changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/viridian/synic.c b/xen/arch/x86/hvm/viridian/synic.c
-index e18538c60a..856bb898b8 100644
---- a/xen/arch/x86/hvm/viridian/synic.c
-+++ b/xen/arch/x86/hvm/viridian/synic.c
-@@ -359,7 +359,7 @@ bool viridian_synic_deliver_timer_msg(struct vcpu *v, unsigned int sintx,
- BUILD_BUG_ON(sizeof(payload) > sizeof(msg->u.payload));
- memcpy(msg->u.payload, &payload, sizeof(payload));
-
-- if ( !vs->masked )
-+ if ( !vs->masked && vlapic_enabled(vcpu_vlapic(v)) )
- vlapic_set_irq(vcpu_vlapic(v), vs->vector, 0);
-
- return true;
-diff --git a/xen/arch/x86/hvm/viridian/viridian.c b/xen/arch/x86/hvm/viridian/viridian.c
-index 25dca93e8b..2937ddd3a8 100644
---- a/xen/arch/x86/hvm/viridian/viridian.c
-+++ b/xen/arch/x86/hvm/viridian/viridian.c
-@@ -811,7 +811,12 @@ static void send_ipi(struct hypercall_vpmask *vpmask, uint8_t vector)
- cpu_raise_softirq_batch_begin();
-
- for_each_vp ( vpmask, vp )
-- vlapic_set_irq(vcpu_vlapic(currd->vcpu[vp]), vector, 0);
-+ {
-+ struct vlapic *vlapic = vcpu_vlapic(currd->vcpu[vp]);
-+
-+ if ( vlapic_enabled(vlapic) )
-+ vlapic_set_irq(vlapic, vector, 0);
-+ }
-
- if ( nr > 1 )
- cpu_raise_softirq_batch_finish();
---
-2.40.0
-
diff --git a/0006-build-remove-TARGET_ARCH-a-duplicate-of-SRCARCH.patch b/0006-build-remove-TARGET_ARCH-a-duplicate-of-SRCARCH.patch
new file mode 100644
index 0000000..9eef37a
--- /dev/null
+++ b/0006-build-remove-TARGET_ARCH-a-duplicate-of-SRCARCH.patch
@@ -0,0 +1,123 @@
+From 36e84ea02e1e8dce8f3a4e9351ab1c72dec3c11e Mon Sep 17 00:00:00 2001
+From: Anthony PERARD <anthony.perard@citrix.com>
+Date: Wed, 5 Jul 2023 08:29:49 +0200
+Subject: [PATCH 06/55] build: remove TARGET_ARCH, a duplicate of SRCARCH
+
+The same command is used to generate the value of both $(TARGET_ARCH)
+and $(SRCARCH), as $(ARCH) is an alias for $(XEN_TARGET_ARCH).
+
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit ac27b3beb9b7b423d5563768de890c7594c21b4e)
+---
+ xen/Makefile | 20 ++++++++------------
+ xen/Rules.mk | 2 +-
+ xen/build.mk | 6 +++---
+ 3 files changed, 12 insertions(+), 16 deletions(-)
+
+diff --git a/xen/Makefile b/xen/Makefile
+index 6e89bcf348..1a3b9a081f 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -242,10 +242,6 @@ SRCARCH := $(shell echo $(ARCH) | \
+ -e 's/riscv.*/riscv/g')
+ export ARCH SRCARCH
+
+-export TARGET_ARCH := $(shell echo $(XEN_TARGET_ARCH) | \
+- sed -e 's/x86.*/x86/' -e s'/arm\(32\|64\)/arm/g' \
+- -e s'/riscv.*/riscv/g')
+-
+ export CONFIG_SHELL := $(SHELL)
+ export CC CXX LD NM OBJCOPY OBJDUMP ADDR2LINE
+ export YACC = $(if $(BISON),$(BISON),bison)
+@@ -262,7 +258,7 @@ export XEN_TREEWIDE_CFLAGS := $(CFLAGS)
+ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
+ CLANG_FLAGS :=
+
+-ifeq ($(TARGET_ARCH),x86)
++ifeq ($(SRCARCH),x86)
+ # The tests to select whether the integrated assembler is usable need to happen
+ # before testing any assembler features, or else the result of the tests would
+ # be stale if the integrated assembler is not used.
+@@ -430,22 +426,22 @@ endif
+
+ ifdef building_out_of_srctree
+ CFLAGS += -I$(objtree)/include
+- CFLAGS += -I$(objtree)/arch/$(TARGET_ARCH)/include
++ CFLAGS += -I$(objtree)/arch/$(SRCARCH)/include
+ endif
+ CFLAGS += -I$(srctree)/include
+-CFLAGS += -I$(srctree)/arch/$(TARGET_ARCH)/include
++CFLAGS += -I$(srctree)/arch/$(SRCARCH)/include
+
+ # Note that link order matters!
+ ALL_OBJS-y := common/built_in.o
+ ALL_OBJS-y += drivers/built_in.o
+ ALL_OBJS-y += lib/built_in.o
+ ALL_OBJS-y += xsm/built_in.o
+-ALL_OBJS-y += arch/$(TARGET_ARCH)/built_in.o
++ALL_OBJS-y += arch/$(SRCARCH)/built_in.o
+ ALL_OBJS-$(CONFIG_CRYPTO) += crypto/built_in.o
+
+ ALL_LIBS-y := lib/lib.a
+
+-include $(srctree)/arch/$(TARGET_ARCH)/arch.mk
++include $(srctree)/arch/$(SRCARCH)/arch.mk
+
+ # define new variables to avoid the ones defined in Config.mk
+ export XEN_CFLAGS := $(CFLAGS)
+@@ -587,11 +583,11 @@ $(TARGET): outputmakefile FORCE
+ $(Q)$(MAKE) $(build)=tools
+ $(Q)$(MAKE) $(build)=. include/xen/compile.h
+ $(Q)$(MAKE) $(build)=include all
+- $(Q)$(MAKE) $(build)=arch/$(TARGET_ARCH) include
+- $(Q)$(MAKE) $(build)=. arch/$(TARGET_ARCH)/include/asm/asm-offsets.h
++ $(Q)$(MAKE) $(build)=arch/$(SRCARCH) include
++ $(Q)$(MAKE) $(build)=. arch/$(SRCARCH)/include/asm/asm-offsets.h
+ $(Q)$(MAKE) $(build)=. MKRELOC=$(MKRELOC) 'ALL_OBJS=$(ALL_OBJS-y)' 'ALL_LIBS=$(ALL_LIBS-y)' $@
+
+-SUBDIRS = xsm arch/$(TARGET_ARCH) common drivers lib test
++SUBDIRS = xsm arch/$(SRCARCH) common drivers lib test
+ define all_sources
+ ( find include -type f -name '*.h' -print; \
+ find $(SUBDIRS) -type f -name '*.[chS]' -print )
+diff --git a/xen/Rules.mk b/xen/Rules.mk
+index 59072ae8df..8af3dd7277 100644
+--- a/xen/Rules.mk
++++ b/xen/Rules.mk
+@@ -180,7 +180,7 @@ cpp_flags = $(filter-out -Wa$(comma)% -flto,$(1))
+ c_flags = -MMD -MP -MF $(depfile) $(XEN_CFLAGS)
+ a_flags = -MMD -MP -MF $(depfile) $(XEN_AFLAGS)
+
+-include $(srctree)/arch/$(TARGET_ARCH)/Rules.mk
++include $(srctree)/arch/$(SRCARCH)/Rules.mk
+
+ c_flags += $(_c_flags)
+ a_flags += $(_c_flags)
+diff --git a/xen/build.mk b/xen/build.mk
+index d049d3a53a..9ecb104f1e 100644
+--- a/xen/build.mk
++++ b/xen/build.mk
+@@ -41,11 +41,11 @@ include/xen/compile.h: include/xen/compile.h.in .banner FORCE
+ targets += include/xen/compile.h
+
+ -include $(wildcard .asm-offsets.s.d)
+-asm-offsets.s: arch/$(TARGET_ARCH)/$(ARCH)/asm-offsets.c
++asm-offsets.s: arch/$(SRCARCH)/$(ARCH)/asm-offsets.c
+ $(CC) $(call cpp_flags,$(c_flags)) -S -g0 -o $@.new -MQ $@ $<
+ $(call move-if-changed,$@.new,$@)
+
+-arch/$(TARGET_ARCH)/include/asm/asm-offsets.h: asm-offsets.s
++arch/$(SRCARCH)/include/asm/asm-offsets.h: asm-offsets.s
+ @(set -e; \
+ echo "/*"; \
+ echo " * DO NOT MODIFY."; \
+@@ -87,4 +87,4 @@ endif
+ targets += prelink.o
+
+ $(TARGET): prelink.o FORCE
+- $(Q)$(MAKE) $(build)=arch/$(TARGET_ARCH) $@
++ $(Q)$(MAKE) $(build)=arch/$(SRCARCH) $@
+--
+2.42.0
+
diff --git a/0006-x86-HVM-don-t-mark-evtchn-upcall-vector-as-pending-w.patch b/0006-x86-HVM-don-t-mark-evtchn-upcall-vector-as-pending-w.patch
deleted file mode 100644
index 2577f20..0000000
--- a/0006-x86-HVM-don-t-mark-evtchn-upcall-vector-as-pending-w.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 26f39b3d705b667aa21f368c252abffb0b4d3e5d Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 20 Dec 2022 13:45:07 +0100
-Subject: [PATCH 06/89] x86/HVM: don't mark evtchn upcall vector as pending
- when vLAPIC is disabled
-
-Linux'es relatively new use of HVMOP_set_evtchn_upcall_vector has
-exposed a problem with the marking of the respective vector as
-pending: For quite some time Linux has been checking whether any stale
-ISR or IRR bits would still be set while preparing the LAPIC for use.
-This check is now triggering on the upcall vector, as the registration,
-at least for APs, happens before the LAPIC is actually enabled.
-
-In software-disabled state an LAPIC would not accept any interrupt
-requests and hence no IRR bit would newly become set while in this
-state. As a result it is also wrong for us to mark the upcall vector as
-having a pending request when the vLAPIC is in this state.
-
-To compensate for the "enabled" check added to the assertion logic, add
-logic to (conditionally) mark the upcall vector as having a request
-pending at the time the LAPIC is being software-enabled by the guest.
-Note however that, like for the pt_may_unmask_irq() we already have
-there, long term we may need to find a different solution. This will be
-especially relevant in case yet better LAPIC acceleration would
-eliminate notifications of guest writes to this and other registers.
-
-Fixes: 7b5b8ca7dffd ("x86/upcall: inject a spurious event after setting upcall vector")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-master commit: f5d0279839b58cb622f0995dbf9cff056f03082e
-master date: 2022-12-06 13:51:49 +0100
----
- xen/arch/x86/hvm/irq.c | 5 +++--
- xen/arch/x86/hvm/vlapic.c | 3 +++
- 2 files changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
-index 858ab5b248..d93ffe4546 100644
---- a/xen/arch/x86/hvm/irq.c
-+++ b/xen/arch/x86/hvm/irq.c
-@@ -321,9 +321,10 @@ void hvm_assert_evtchn_irq(struct vcpu *v)
-
- if ( v->arch.hvm.evtchn_upcall_vector != 0 )
- {
-- uint8_t vector = v->arch.hvm.evtchn_upcall_vector;
-+ struct vlapic *vlapic = vcpu_vlapic(v);
-
-- vlapic_set_irq(vcpu_vlapic(v), vector, 0);
-+ if ( vlapic_enabled(vlapic) )
-+ vlapic_set_irq(vlapic, v->arch.hvm.evtchn_upcall_vector, 0);
- }
- else if ( is_hvm_pv_evtchn_domain(v->domain) )
- vcpu_kick(v);
-diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
-index 257d3b6851..eb32f12e2d 100644
---- a/xen/arch/x86/hvm/vlapic.c
-+++ b/xen/arch/x86/hvm/vlapic.c
-@@ -829,6 +829,9 @@ void vlapic_reg_write(struct vcpu *v, unsigned int reg, uint32_t val)
- {
- vlapic->hw.disabled &= ~VLAPIC_SW_DISABLED;
- pt_may_unmask_irq(vlapic_domain(vlapic), &vlapic->pt);
-+ if ( v->arch.hvm.evtchn_upcall_vector &&
-+ vcpu_info(v, evtchn_upcall_pending) )
-+ vlapic_set_irq(vlapic, v->arch.hvm.evtchn_upcall_vector, 0);
- }
- break;
-
---
-2.40.0
-
diff --git a/0007-build-evaluate-XEN_BUILD_-and-XEN_DOMAIN-immediately.patch b/0007-build-evaluate-XEN_BUILD_-and-XEN_DOMAIN-immediately.patch
new file mode 100644
index 0000000..81e5ca4
--- /dev/null
+++ b/0007-build-evaluate-XEN_BUILD_-and-XEN_DOMAIN-immediately.patch
@@ -0,0 +1,58 @@
+From a1f68fb56710c507f9c1ec8e8d784f5b1e4088f1 Mon Sep 17 00:00:00 2001
+From: Anthony PERARD <anthony.perard@citrix.com>
+Date: Mon, 31 Jul 2023 15:02:18 +0200
+Subject: [PATCH 07/55] build: evaluate XEN_BUILD_* and XEN_DOMAIN immediately
+
+With GNU make 4.4, the number of execution of the command present in
+these $(shell ) increased greatly. This is probably because as of make
+4.4, exported variable are also added to the environment of $(shell )
+construct.
+
+Also, `make -d` shows a lot of these:
+ Makefile:15: not recursively expanding XEN_BUILD_DATE to export to shell function
+ Makefile:16: not recursively expanding XEN_BUILD_TIME to export to shell function
+ Makefile:17: not recursively expanding XEN_BUILD_HOST to export to shell function
+ Makefile:14: not recursively expanding XEN_DOMAIN to export to shell function
+
+So to avoid having these command been run more than necessary, we
+will replace ?= by an equivalent but with immediate expansion.
+
+Reported-by: Jason Andryuk <jandryuk@gmail.com>
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+Tested-by: Jason Andryuk <jandryuk@gmail.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 0c594c1b57ee2ecec5f70826c53a2cf02a9c2acb)
+---
+ xen/Makefile | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/xen/Makefile b/xen/Makefile
+index 1a3b9a081f..7bb9de7bdc 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -11,10 +11,18 @@ export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
+ -include xen-version
+
+ export XEN_WHOAMI ?= $(USER)
+-export XEN_DOMAIN ?= $(shell ([ -x /bin/dnsdomainname ] && /bin/dnsdomainname) || ([ -x /bin/domainname ] && /bin/domainname || echo [unknown]))
+-export XEN_BUILD_DATE ?= $(shell LC_ALL=C date)
+-export XEN_BUILD_TIME ?= $(shell LC_ALL=C date +%T)
+-export XEN_BUILD_HOST ?= $(shell hostname)
++ifeq ($(origin XEN_DOMAIN), undefined)
++export XEN_DOMAIN := $(shell ([ -x /bin/dnsdomainname ] && /bin/dnsdomainname) || ([ -x /bin/domainname ] && /bin/domainname || echo [unknown]))
++endif
++ifeq ($(origin XEN_BUILD_DATE), undefined)
++export XEN_BUILD_DATE := $(shell LC_ALL=C date)
++endif
++ifeq ($(origin XEN_BUILD_TIME), undefined)
++export XEN_BUILD_TIME := $(shell LC_ALL=C date +%T)
++endif
++ifeq ($(origin XEN_BUILD_HOST), undefined)
++export XEN_BUILD_HOST := $(shell hostname)
++endif
+
+ # Best effort attempt to find a python interpreter, defaulting to Python 3 if
+ # available. Fall back to just `python` if `which` is nowhere to be found.
+--
+2.42.0
+
diff --git a/0007-ioreq_broadcast-accept-partial-broadcast-success.patch b/0007-ioreq_broadcast-accept-partial-broadcast-success.patch
deleted file mode 100644
index 654990b..0000000
--- a/0007-ioreq_broadcast-accept-partial-broadcast-success.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From c3e37c60fbf8f8cd71db0f0846c9c7aeadf02963 Mon Sep 17 00:00:00 2001
-From: Per Bilse <per.bilse@citrix.com>
-Date: Tue, 20 Dec 2022 13:45:38 +0100
-Subject: [PATCH 07/89] ioreq_broadcast(): accept partial broadcast success
-
-Avoid incorrectly triggering an error when a broadcast buffered ioreq
-is not handled by all registered clients, as long as the failure is
-strictly because the client doesn't handle buffered ioreqs.
-
-Signed-off-by: Per Bilse <per.bilse@citrix.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-master commit: a44734df6c24fadbdb001f051cc5580c467caf7d
-master date: 2022-12-07 12:17:30 +0100
----
- xen/common/ioreq.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/xen/common/ioreq.c b/xen/common/ioreq.c
-index 4617aef29b..ecb8f545e1 100644
---- a/xen/common/ioreq.c
-+++ b/xen/common/ioreq.c
-@@ -1317,7 +1317,8 @@ unsigned int ioreq_broadcast(ioreq_t *p, bool buffered)
-
- FOR_EACH_IOREQ_SERVER(d, id, s)
- {
-- if ( !s->enabled )
-+ if ( !s->enabled ||
-+ (buffered && s->bufioreq_handling == HVM_IOREQSRV_BUFIOREQ_OFF) )
- continue;
-
- if ( ioreq_send(s, p, buffered) == IOREQ_STATUS_UNHANDLED )
---
-2.40.0
-
diff --git a/0008-Config.mk-evaluate-XEN_COMPILE_ARCH-and-XEN_OS-immed.patch b/0008-Config.mk-evaluate-XEN_COMPILE_ARCH-and-XEN_OS-immed.patch
new file mode 100644
index 0000000..8a4cb7d
--- /dev/null
+++ b/0008-Config.mk-evaluate-XEN_COMPILE_ARCH-and-XEN_OS-immed.patch
@@ -0,0 +1,50 @@
+From 476d2624ec3cf3e60709580ff1df208bb8f616e2 Mon Sep 17 00:00:00 2001
+From: Anthony PERARD <anthony.perard@citrix.com>
+Date: Mon, 31 Jul 2023 15:02:34 +0200
+Subject: [PATCH 08/55] Config.mk: evaluate XEN_COMPILE_ARCH and XEN_OS
+ immediately
+
+With GNU make 4.4, the number of execution of the command present in
+these $(shell ) increased greatly. This is probably because as of make
+4.4, exported variable are also added to the environment of $(shell )
+construct.
+
+So to avoid having these command been run more than necessary, we
+will replace ?= by an equivalent but with immediate expansion.
+
+Reported-by: Jason Andryuk <jandryuk@gmail.com>
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+Tested-by: Jason Andryuk <jandryuk@gmail.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit a07414d989cf52e5e84192b78023bee1589bbda4)
+---
+ Config.mk | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/Config.mk b/Config.mk
+index 8bc2bcd5f6..4864033c73 100644
+--- a/Config.mk
++++ b/Config.mk
+@@ -19,13 +19,17 @@ or = $(if $(strip $(1)),$(1),$(if $(strip $(2)),$(2),$(if $(strip $(3)),$(
+
+ -include $(XEN_ROOT)/.config
+
+-XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
++ifeq ($(origin XEN_COMPILE_ARCH), undefined)
++XEN_COMPILE_ARCH := $(shell uname -m | sed -e s/i.86/x86_32/ \
+ -e s/i86pc/x86_32/ -e s/amd64/x86_64/ \
+ -e s/armv7.*/arm32/ -e s/armv8.*/arm64/ \
+ -e s/aarch64/arm64/)
++endif
+
+ XEN_TARGET_ARCH ?= $(XEN_COMPILE_ARCH)
+-XEN_OS ?= $(shell uname -s)
++ifeq ($(origin XEN_OS), undefined)
++XEN_OS := $(shell uname -s)
++endif
+
+ CONFIG_$(XEN_OS) := y
+
+--
+2.42.0
+
diff --git a/0008-EFI-relocate-the-ESRT-when-booting-via-multiboot2.patch b/0008-EFI-relocate-the-ESRT-when-booting-via-multiboot2.patch
deleted file mode 100644
index d1acae6..0000000
--- a/0008-EFI-relocate-the-ESRT-when-booting-via-multiboot2.patch
+++ /dev/null
@@ -1,195 +0,0 @@
-From 1dcc9b6dfe528c7815a314f9b5581804b5e23750 Mon Sep 17 00:00:00 2001
-From: Demi Marie Obenour <demi@invisiblethingslab.com>
-Date: Tue, 20 Dec 2022 13:46:09 +0100
-Subject: [PATCH 08/89] EFI: relocate the ESRT when booting via multiboot2
-
-This was missed in the initial patchset.
-
-Move efi_relocate_esrt() up to avoid adding a forward declaration.
-
-Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 8d7acf3f7d8d2555c78421dced45bc49f79ae806
-master date: 2022-12-14 12:00:35 +0100
----
- xen/arch/x86/efi/efi-boot.h | 2 +
- xen/common/efi/boot.c | 136 ++++++++++++++++++------------------
- 2 files changed, 70 insertions(+), 68 deletions(-)
-
-diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
-index 27f928ed3c..c94e53d139 100644
---- a/xen/arch/x86/efi/efi-boot.h
-+++ b/xen/arch/x86/efi/efi-boot.h
-@@ -823,6 +823,8 @@ void __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable
- if ( gop )
- efi_set_gop_mode(gop, gop_mode);
-
-+ efi_relocate_esrt(SystemTable);
-+
- efi_exit_boot(ImageHandle, SystemTable);
- }
-
-diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c
-index b3de1011ee..d3c6b055ae 100644
---- a/xen/common/efi/boot.c
-+++ b/xen/common/efi/boot.c
-@@ -625,6 +625,74 @@ static size_t __init get_esrt_size(const EFI_MEMORY_DESCRIPTOR *desc)
- return esrt_ptr->FwResourceCount * sizeof(esrt_ptr->Entries[0]);
- }
-
-+static EFI_GUID __initdata esrt_guid = EFI_SYSTEM_RESOURCE_TABLE_GUID;
-+
-+static void __init efi_relocate_esrt(EFI_SYSTEM_TABLE *SystemTable)
-+{
-+ EFI_STATUS status;
-+ UINTN info_size = 0, map_key, mdesc_size;
-+ void *memory_map = NULL;
-+ UINT32 ver;
-+ unsigned int i;
-+
-+ for ( ; ; )
-+ {
-+ status = efi_bs->GetMemoryMap(&info_size, memory_map, &map_key,
-+ &mdesc_size, &ver);
-+ if ( status == EFI_SUCCESS && memory_map != NULL )
-+ break;
-+ if ( status == EFI_BUFFER_TOO_SMALL || memory_map == NULL )
-+ {
-+ info_size += 8 * mdesc_size;
-+ if ( memory_map != NULL )
-+ efi_bs->FreePool(memory_map);
-+ memory_map = NULL;
-+ status = efi_bs->AllocatePool(EfiLoaderData, info_size, &memory_map);
-+ if ( status == EFI_SUCCESS )
-+ continue;
-+ PrintErr(L"Cannot allocate memory to relocate ESRT\r\n");
-+ }
-+ else
-+ PrintErr(L"Cannot obtain memory map to relocate ESRT\r\n");
-+ return;
-+ }
-+
-+ /* Try to obtain the ESRT. Errors are not fatal. */
-+ for ( i = 0; i < info_size; i += mdesc_size )
-+ {
-+ /*
-+ * ESRT needs to be moved to memory of type EfiACPIReclaimMemory
-+ * so that the memory it is in will not be used for other purposes.
-+ */
-+ void *new_esrt = NULL;
-+ const EFI_MEMORY_DESCRIPTOR *desc = memory_map + i;
-+ size_t esrt_size = get_esrt_size(desc);
-+
-+ if ( !esrt_size )
-+ continue;
-+ if ( desc->Type == EfiRuntimeServicesData ||
-+ desc->Type == EfiACPIReclaimMemory )
-+ break; /* ESRT already safe from reuse */
-+ status = efi_bs->AllocatePool(EfiACPIReclaimMemory, esrt_size,
-+ &new_esrt);
-+ if ( status == EFI_SUCCESS && new_esrt )
-+ {
-+ memcpy(new_esrt, (void *)esrt, esrt_size);
-+ status = efi_bs->InstallConfigurationTable(&esrt_guid, new_esrt);
-+ if ( status != EFI_SUCCESS )
-+ {
-+ PrintErr(L"Cannot install new ESRT\r\n");
-+ efi_bs->FreePool(new_esrt);
-+ }
-+ }
-+ else
-+ PrintErr(L"Cannot allocate memory for ESRT\r\n");
-+ break;
-+ }
-+
-+ efi_bs->FreePool(memory_map);
-+}
-+
- /*
- * Include architecture specific implementation here, which references the
- * static globals defined above.
-@@ -903,8 +971,6 @@ static UINTN __init efi_find_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop,
- return gop_mode;
- }
-
--static EFI_GUID __initdata esrt_guid = EFI_SYSTEM_RESOURCE_TABLE_GUID;
--
- static void __init efi_tables(void)
- {
- unsigned int i;
-@@ -1113,72 +1179,6 @@ static void __init efi_set_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop, UINTN gop
- #define INVALID_VIRTUAL_ADDRESS (0xBAAADUL << \
- (EFI_PAGE_SHIFT + BITS_PER_LONG - 32))
-
--static void __init efi_relocate_esrt(EFI_SYSTEM_TABLE *SystemTable)
--{
-- EFI_STATUS status;
-- UINTN info_size = 0, map_key, mdesc_size;
-- void *memory_map = NULL;
-- UINT32 ver;
-- unsigned int i;
--
-- for ( ; ; )
-- {
-- status = efi_bs->GetMemoryMap(&info_size, memory_map, &map_key,
-- &mdesc_size, &ver);
-- if ( status == EFI_SUCCESS && memory_map != NULL )
-- break;
-- if ( status == EFI_BUFFER_TOO_SMALL || memory_map == NULL )
-- {
-- info_size += 8 * mdesc_size;
-- if ( memory_map != NULL )
-- efi_bs->FreePool(memory_map);
-- memory_map = NULL;
-- status = efi_bs->AllocatePool(EfiLoaderData, info_size, &memory_map);
-- if ( status == EFI_SUCCESS )
-- continue;
-- PrintErr(L"Cannot allocate memory to relocate ESRT\r\n");
-- }
-- else
-- PrintErr(L"Cannot obtain memory map to relocate ESRT\r\n");
-- return;
-- }
--
-- /* Try to obtain the ESRT. Errors are not fatal. */
-- for ( i = 0; i < info_size; i += mdesc_size )
-- {
-- /*
-- * ESRT needs to be moved to memory of type EfiACPIReclaimMemory
-- * so that the memory it is in will not be used for other purposes.
-- */
-- void *new_esrt = NULL;
-- const EFI_MEMORY_DESCRIPTOR *desc = memory_map + i;
-- size_t esrt_size = get_esrt_size(desc);
--
-- if ( !esrt_size )
-- continue;
-- if ( desc->Type == EfiRuntimeServicesData ||
-- desc->Type == EfiACPIReclaimMemory )
-- break; /* ESRT already safe from reuse */
-- status = efi_bs->AllocatePool(EfiACPIReclaimMemory, esrt_size,
-- &new_esrt);
-- if ( status == EFI_SUCCESS && new_esrt )
-- {
-- memcpy(new_esrt, (void *)esrt, esrt_size);
-- status = efi_bs->InstallConfigurationTable(&esrt_guid, new_esrt);
-- if ( status != EFI_SUCCESS )
-- {
-- PrintErr(L"Cannot install new ESRT\r\n");
-- efi_bs->FreePool(new_esrt);
-- }
-- }
-- else
-- PrintErr(L"Cannot allocate memory for ESRT\r\n");
-- break;
-- }
--
-- efi_bs->FreePool(memory_map);
--}
--
- static void __init efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
- {
- EFI_STATUS status;
---
-2.40.0
-
diff --git a/0009-x86-time-prevent-overflow-with-high-frequency-TSCs.patch b/0009-x86-time-prevent-overflow-with-high-frequency-TSCs.patch
deleted file mode 100644
index a9401d7..0000000
--- a/0009-x86-time-prevent-overflow-with-high-frequency-TSCs.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From a7a26da0b59da7233e6c6f63b180bab131398351 Mon Sep 17 00:00:00 2001
-From: Neowutran <xen@neowutran.ovh>
-Date: Tue, 20 Dec 2022 13:46:38 +0100
-Subject: [PATCH 09/89] x86/time: prevent overflow with high frequency TSCs
-
-Make sure tsc_khz is promoted to a 64-bit type before multiplying by
-1000 to avoid an 'overflow before widen' bug. Otherwise just above
-4.294GHz the value will overflow. Processors with clocks this high are
-now in production and require this to work correctly.
-
-Signed-off-by: Neowutran <xen@neowutran.ovh>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: ad15a0a8ca2515d8ac58edfc0bc1d3719219cb77
-master date: 2022-12-19 11:34:16 +0100
----
- xen/arch/x86/time.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
-index b01acd390d..d882b43cf0 100644
---- a/xen/arch/x86/time.c
-+++ b/xen/arch/x86/time.c
-@@ -2585,7 +2585,7 @@ int tsc_set_info(struct domain *d,
- case TSC_MODE_ALWAYS_EMULATE:
- d->arch.vtsc_offset = get_s_time() - elapsed_nsec;
- d->arch.tsc_khz = gtsc_khz ?: cpu_khz;
-- set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000);
-+ set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000UL);
-
- /*
- * In default mode use native TSC if the host has safe TSC and
---
-2.40.0
-
diff --git a/0009-x86emul-rework-wrapping-of-libc-functions-in-test-an.patch b/0009-x86emul-rework-wrapping-of-libc-functions-in-test-an.patch
new file mode 100644
index 0000000..4f9c0bb
--- /dev/null
+++ b/0009-x86emul-rework-wrapping-of-libc-functions-in-test-an.patch
@@ -0,0 +1,245 @@
+From 37f1d68fa34220600f1e4ec82af5da70127757e5 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Fri, 18 Aug 2023 15:04:28 +0200
+Subject: [PATCH 09/55] x86emul: rework wrapping of libc functions in test and
+ fuzzing harnesses
+
+Our present approach is working fully behind the compiler's back. This
+was found to not work with LTO. Employ ld's --wrap= option instead. Note
+that while this makes the build work at least with new enough gcc (it
+doesn't with gcc7, for example, due to tool chain side issues afaict),
+according to my testing things still won't work when building the
+fuzzing harness with afl-cc: While with the gcc7 tool chain I see afl-as
+getting invoked, this does not happen with gcc13. Yet without using that
+assembler wrapper the resulting binary will look uninstrumented to
+afl-fuzz.
+
+While checking the resulting binaries I noticed that we've gained uses
+of snprintf() and strstr(), which only just so happen to not cause any
+problems. Add a wrappers for them as well.
+
+Since we don't have any actual uses of v{,sn}printf(), no definitions of
+their wrappers appear (just yet). But I think we want
+__wrap_{,sn}printf() to properly use __real_v{,sn}printf() right away,
+which means we need delarations of the latter.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+(cherry picked from commit 6fba45ca3be1c5d46cddb1eaf371d9e69550b244)
+---
+ tools/fuzz/x86_instruction_emulator/Makefile | 6 ++-
+ tools/tests/x86_emulator/Makefile | 4 +-
+ tools/tests/x86_emulator/wrappers.c | 55 ++++++++++++++------
+ tools/tests/x86_emulator/x86-emulate.h | 14 +++--
+ 4 files changed, 53 insertions(+), 26 deletions(-)
+
+diff --git a/tools/fuzz/x86_instruction_emulator/Makefile b/tools/fuzz/x86_instruction_emulator/Makefile
+index 13aa238503..c83959c847 100644
+--- a/tools/fuzz/x86_instruction_emulator/Makefile
++++ b/tools/fuzz/x86_instruction_emulator/Makefile
+@@ -29,6 +29,8 @@ GCOV_FLAGS := --coverage
+ %-cov.o: %.c
+ $(CC) -c $(CFLAGS) $(GCOV_FLAGS) $< -o $@
+
++WRAPPED = $(shell sed -n 's,^ *WRAP(\([[:alnum:]_]*\));,\1,p' x86-emulate.h)
++
+ x86-emulate.h: x86_emulate/x86_emulate.h
+ x86-emulate.o x86-emulate-cov.o: x86-emulate.h x86_emulate/x86_emulate.c
+ fuzz-emul.o fuzz-emul-cov.o wrappers.o: x86-emulate.h
+@@ -37,10 +39,10 @@ x86-insn-fuzzer.a: fuzz-emul.o x86-emulate.o cpuid.o
+ $(AR) rc $@ $^
+
+ afl-harness: afl-harness.o fuzz-emul.o x86-emulate.o cpuid.o wrappers.o
+- $(CC) $(CFLAGS) $^ -o $@
++ $(CC) $(CFLAGS) $(addprefix -Wl$(comma)--wrap=,$(WRAPPED)) $^ -o $@
+
+ afl-harness-cov: afl-harness-cov.o fuzz-emul-cov.o x86-emulate-cov.o cpuid.o wrappers.o
+- $(CC) $(CFLAGS) $(GCOV_FLAGS) $^ -o $@
++ $(CC) $(CFLAGS) $(GCOV_FLAGS) $(addprefix -Wl$(comma)--wrap=,$(WRAPPED)) $^ -o $@
+
+ # Common targets
+ .PHONY: all
+diff --git a/tools/tests/x86_emulator/Makefile b/tools/tests/x86_emulator/Makefile
+index bd82598f97..a2fd6607c6 100644
+--- a/tools/tests/x86_emulator/Makefile
++++ b/tools/tests/x86_emulator/Makefile
+@@ -250,8 +250,10 @@ xop.h avx512f.h: simd-fma.c
+
+ endif # 32-bit override
+
++WRAPPED := $(shell sed -n 's,^ *WRAP(\([[:alnum:]_]*\));,\1,p' x86-emulate.h)
++
+ $(TARGET): x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o
+- $(HOSTCC) $(HOSTCFLAGS) -o $@ $^
++ $(HOSTCC) $(HOSTCFLAGS) $(addprefix -Wl$(comma)--wrap=,$(WRAPPED)) -o $@ $^
+
+ .PHONY: clean
+ clean:
+diff --git a/tools/tests/x86_emulator/wrappers.c b/tools/tests/x86_emulator/wrappers.c
+index eba7cc93c5..3829a6f416 100644
+--- a/tools/tests/x86_emulator/wrappers.c
++++ b/tools/tests/x86_emulator/wrappers.c
+@@ -1,78 +1,103 @@
+ #include <stdarg.h>
+
+-#define WRAP(x) typeof(x) emul_##x
++#define WRAP(x) typeof(x) __wrap_ ## x, __real_ ## x
+ #include "x86-emulate.h"
+
+-size_t emul_fwrite(const void *src, size_t sz, size_t n, FILE *f)
++size_t __wrap_fwrite(const void *src, size_t sz, size_t n, FILE *f)
+ {
+ emul_save_fpu_state();
+- sz = fwrite(src, sz, n, f);
++ sz = __real_fwrite(src, sz, n, f);
+ emul_restore_fpu_state();
+
+ return sz;
+ }
+
+-int emul_memcmp(const void *p1, const void *p2, size_t sz)
++int __wrap_memcmp(const void *p1, const void *p2, size_t sz)
+ {
+ int rc;
+
+ emul_save_fpu_state();
+- rc = memcmp(p1, p2, sz);
++ rc = __real_memcmp(p1, p2, sz);
+ emul_restore_fpu_state();
+
+ return rc;
+ }
+
+-void *emul_memcpy(void *dst, const void *src, size_t sz)
++void *__wrap_memcpy(void *dst, const void *src, size_t sz)
+ {
+ emul_save_fpu_state();
+- memcpy(dst, src, sz);
++ __real_memcpy(dst, src, sz);
+ emul_restore_fpu_state();
+
+ return dst;
+ }
+
+-void *emul_memset(void *dst, int c, size_t sz)
++void *__wrap_memset(void *dst, int c, size_t sz)
+ {
+ emul_save_fpu_state();
+- memset(dst, c, sz);
++ __real_memset(dst, c, sz);
+ emul_restore_fpu_state();
+
+ return dst;
+ }
+
+-int emul_printf(const char *fmt, ...)
++int __wrap_printf(const char *fmt, ...)
+ {
+ va_list varg;
+ int rc;
+
+ emul_save_fpu_state();
+ va_start(varg, fmt);
+- rc = vprintf(fmt, varg);
++ rc = __real_vprintf(fmt, varg);
+ va_end(varg);
+ emul_restore_fpu_state();
+
+ return rc;
+ }
+
+-int emul_putchar(int c)
++int __wrap_putchar(int c)
+ {
+ int rc;
+
+ emul_save_fpu_state();
+- rc = putchar(c);
++ rc = __real_putchar(c);
+ emul_restore_fpu_state();
+
+ return rc;
+ }
+
+-int emul_puts(const char *str)
++int __wrap_puts(const char *str)
+ {
+ int rc;
+
+ emul_save_fpu_state();
+- rc = puts(str);
++ rc = __real_puts(str);
+ emul_restore_fpu_state();
+
+ return rc;
+ }
++
++int __wrap_snprintf(char *buf, size_t n, const char *fmt, ...)
++{
++ va_list varg;
++ int rc;
++
++ emul_save_fpu_state();
++ va_start(varg, fmt);
++ rc = __real_vsnprintf(buf, n, fmt, varg);
++ va_end(varg);
++ emul_restore_fpu_state();
++
++ return rc;
++}
++
++char *__wrap_strstr(const char *s1, const char *s2)
++{
++ char *s;
++
++ emul_save_fpu_state();
++ s = __real_strstr(s1, s2);
++ emul_restore_fpu_state();
++
++ return s;
++}
+diff --git a/tools/tests/x86_emulator/x86-emulate.h b/tools/tests/x86_emulator/x86-emulate.h
+index 19bea9c38d..58760f096d 100644
+--- a/tools/tests/x86_emulator/x86-emulate.h
++++ b/tools/tests/x86_emulator/x86-emulate.h
+@@ -29,9 +29,7 @@
+ #ifdef EOF
+ # error "Must not include <stdio.h> before x86-emulate.h"
+ #endif
+-#ifdef WRAP
+-# include <stdio.h>
+-#endif
++#include <stdio.h>
+
+ #include <xen/xen.h>
+
+@@ -85,11 +83,7 @@ void emul_restore_fpu_state(void);
+ * around the actual function.
+ */
+ #ifndef WRAP
+-# if 0 /* This only works for explicit calls, not for compiler generated ones. */
+-# define WRAP(x) typeof(x) x asm("emul_" #x)
+-# else
+-# define WRAP(x) asm(".equ " #x ", emul_" #x)
+-# endif
++# define WRAP(x) typeof(x) __wrap_ ## x
+ #endif
+
+ WRAP(fwrite);
+@@ -99,6 +93,10 @@ WRAP(memset);
+ WRAP(printf);
+ WRAP(putchar);
+ WRAP(puts);
++WRAP(snprintf);
++WRAP(strstr);
++WRAP(vprintf);
++WRAP(vsnprintf);
+
+ #undef WRAP
+
+--
+2.42.0
+
diff --git a/0010-rombios-Work-around-GCC-issue-99578.patch b/0010-rombios-Work-around-GCC-issue-99578.patch
new file mode 100644
index 0000000..3995f02
--- /dev/null
+++ b/0010-rombios-Work-around-GCC-issue-99578.patch
@@ -0,0 +1,43 @@
+From ae1045c42954772e48862162d0e95fbc9393c91e Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 17 Aug 2023 21:32:53 +0100
+Subject: [PATCH 10/55] rombios: Work around GCC issue 99578
+
+GCC 12 objects to pointers derived from a constant:
+
+ util.c: In function 'find_rsdp':
+ util.c:429:16: error: array subscript 0 is outside array bounds of 'uint16_t[0]' {aka 'short unsigned int[]'} [-Werror=array-bounds]
+ 429 | ebda_seg = *(uint16_t *)ADDR_FROM_SEG_OFF(0x40, 0xe);
+ cc1: all warnings being treated as errors
+
+This is a GCC bug, but work around it rather than turning array-bounds
+checking off generally.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit e35138a2ffbe1fe71edaaaaae71063dc545a8416)
+---
+ tools/firmware/rombios/32bit/util.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/tools/firmware/rombios/32bit/util.c b/tools/firmware/rombios/32bit/util.c
+index 6c1c480514..a47e000a26 100644
+--- a/tools/firmware/rombios/32bit/util.c
++++ b/tools/firmware/rombios/32bit/util.c
+@@ -424,10 +424,10 @@ static struct acpi_20_rsdp *__find_rsdp(const void *start, unsigned int len)
+ struct acpi_20_rsdp *find_rsdp(void)
+ {
+ struct acpi_20_rsdp *rsdp;
+- uint16_t ebda_seg;
++ uint16_t *volatile /* GCC issue 99578 */ ebda_seg =
++ ADDR_FROM_SEG_OFF(0x40, 0xe);
+
+- ebda_seg = *(uint16_t *)ADDR_FROM_SEG_OFF(0x40, 0xe);
+- rsdp = __find_rsdp((void *)(ebda_seg << 16), 1024);
++ rsdp = __find_rsdp((void *)(*ebda_seg << 16), 1024);
+ if (!rsdp)
+ rsdp = __find_rsdp((void *)0xE0000, 0x20000);
+
+--
+2.42.0
+
diff --git a/0010-tools-oxenstored-Fix-incorrect-scope-after-an-if-sta.patch b/0010-tools-oxenstored-Fix-incorrect-scope-after-an-if-sta.patch
deleted file mode 100644
index a8c427d..0000000
--- a/0010-tools-oxenstored-Fix-incorrect-scope-after-an-if-sta.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-From 2e8d7a08bcd111fe21569e9ace1a047df76da949 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 11 Nov 2022 18:50:34 +0000
-Subject: [PATCH 10/89] tools/oxenstored: Fix incorrect scope after an if
- statement
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-A debug statement got inserted into a single-expression if statement.
-
-Insert brackets to give the intended meaning, rather than the actual meaning
-where the "let con = Connections..." is outside and executed unconditionally.
-
-This results in some unnecessary ring checks for domains which otherwise have
-IO credit.
-
-Fixes: 42f0581a91d4 ("tools/oxenstored: Implement live update for socket connections")
-Reported-by: Edwin Török <edvin.torok@citrix.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit ee36179371fd4215a43fb179be2165f65c1cd1cd)
----
- tools/ocaml/xenstored/xenstored.ml | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
-index ffd43a4eee..c5dc7a28d0 100644
---- a/tools/ocaml/xenstored/xenstored.ml
-+++ b/tools/ocaml/xenstored/xenstored.ml
-@@ -475,7 +475,7 @@ let _ =
-
- let ring_scan_checker dom =
- (* no need to scan domains already marked as for processing *)
-- if not (Domain.get_io_credit dom > 0) then
-+ if not (Domain.get_io_credit dom > 0) then (
- debug "Looking up domid %d" (Domain.get_id dom);
- let con = Connections.find_domain cons (Domain.get_id dom) in
- if not (Connection.has_more_work con) then (
-@@ -490,7 +490,8 @@ let _ =
- let n = 32 + 2 * (Domains.number domains) in
- info "found lazy domain %d, credit %d" (Domain.get_id dom) n;
- Domain.set_io_credit ~n dom
-- ) in
-+ )
-+ ) in
-
- let last_stat_time = ref 0. in
- let last_scan_time = ref 0. in
---
-2.40.0
-
diff --git a/0011-rombios-Avoid-using-K-R-function-syntax.patch b/0011-rombios-Avoid-using-K-R-function-syntax.patch
new file mode 100644
index 0000000..0bd761f
--- /dev/null
+++ b/0011-rombios-Avoid-using-K-R-function-syntax.patch
@@ -0,0 +1,74 @@
+From 24487fec3bbebbc1fd3f00d16bca7fb0f56a5f30 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 18 Aug 2023 10:47:46 +0100
+Subject: [PATCH 11/55] rombios: Avoid using K&R function syntax
+
+Clang-15 complains:
+
+ tcgbios.c:598:25: error: a function declaration without a prototype is deprecated in all versions of C [-Werror,-Wstrict-prototypes]
+ void tcpa_calling_int19h()
+ ^
+ void
+
+C2x formally removes K&R syntax. The declarations for these functions in
+32bitprotos.h are already ANSI compatible. Update the definitions to match.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit a562afa5679d4a7ceb9cb9222fec1fea9a61f738)
+---
+ tools/firmware/rombios/32bit/tcgbios/tcgbios.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c
+index fa22c4460a..ad0eac0d20 100644
+--- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c
++++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c
+@@ -595,7 +595,7 @@ static void tcpa_add_measurement(uint32_t pcrIndex,
+ /*
+ * Add measurement to log about call of int 19h
+ */
+-void tcpa_calling_int19h()
++void tcpa_calling_int19h(void)
+ {
+ tcpa_add_measurement(4, EV_ACTION, 0);
+ }
+@@ -603,7 +603,7 @@ void tcpa_calling_int19h()
+ /*
+ * Add measurement to log about retuning from int 19h
+ */
+-void tcpa_returned_int19h()
++void tcpa_returned_int19h(void)
+ {
+ tcpa_add_measurement(4, EV_ACTION, 1);
+ }
+@@ -611,7 +611,7 @@ void tcpa_returned_int19h()
+ /*
+ * Add event separators for PCRs 0 to 7; specs 8.2.3
+ */
+-void tcpa_add_event_separators()
++void tcpa_add_event_separators(void)
+ {
+ uint32_t pcrIndex = 0;
+ while (pcrIndex <= 7) {
+@@ -624,7 +624,7 @@ void tcpa_add_event_separators()
+ /*
+ * Add a wake event to the log
+ */
+-void tcpa_wake_event()
++void tcpa_wake_event(void)
+ {
+ tcpa_add_measurement_to_log(6,
+ EV_ACTION,
+@@ -659,7 +659,7 @@ void tcpa_add_bootdevice(uint32_t bootcd, uint32_t bootdrv)
+ * Add measurement to the log about option rom scan
+ * 10.4.3 : action 14
+ */
+-void tcpa_start_option_rom_scan()
++void tcpa_start_option_rom_scan(void)
+ {
+ tcpa_add_measurement(2, EV_ACTION, 14);
+ }
+--
+2.42.0
+
diff --git a/0011-tools-ocaml-evtchn-OCaml-5-support-fix-potential-res.patch b/0011-tools-ocaml-evtchn-OCaml-5-support-fix-potential-res.patch
deleted file mode 100644
index c9cf630..0000000
--- a/0011-tools-ocaml-evtchn-OCaml-5-support-fix-potential-res.patch
+++ /dev/null
@@ -1,68 +0,0 @@
-From d11528a993f80c6a86f4cb0c30578c026348e3e4 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Tue, 18 Jan 2022 15:04:48 +0000
-Subject: [PATCH 11/89] tools/ocaml/evtchn: OCaml 5 support, fix potential
- resource leak
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-There is no binding for xenevtchn_close(). In principle, this is a resource
-leak, but the typical usage is as a singleton that lives for the lifetime of
-the program.
-
-Ocaml 5 no longer permits storing a naked C pointer in an Ocaml value.
-
-Therefore, use a Custom block. This allows us to use the finaliser callback
-to call xenevtchn_close(), if the Ocaml object goes out of scope.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 22d5affdf0cecfa6faae46fbaec68b8018835220)
----
- tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 21 +++++++++++++++++--
- 1 file changed, 19 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-index f889a7a2e4..37f1cc4e14 100644
---- a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-+++ b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-@@ -33,7 +33,22 @@
- #include <caml/fail.h>
- #include <caml/signals.h>
-
--#define _H(__h) ((xenevtchn_handle *)(__h))
-+#define _H(__h) (*((xenevtchn_handle **)Data_custom_val(__h)))
-+
-+static void stub_evtchn_finalize(value v)
-+{
-+ xenevtchn_close(_H(v));
-+}
-+
-+static struct custom_operations xenevtchn_ops = {
-+ .identifier = "xenevtchn",
-+ .finalize = stub_evtchn_finalize,
-+ .compare = custom_compare_default, /* Can't compare */
-+ .hash = custom_hash_default, /* Can't hash */
-+ .serialize = custom_serialize_default, /* Can't serialize */
-+ .deserialize = custom_deserialize_default, /* Can't deserialize */
-+ .compare_ext = custom_compare_ext_default, /* Can't compare */
-+};
-
- CAMLprim value stub_eventchn_init(void)
- {
-@@ -48,7 +63,9 @@ CAMLprim value stub_eventchn_init(void)
- if (xce == NULL)
- caml_failwith("open failed");
-
-- result = (value)xce;
-+ result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
-+ _H(result) = xce;
-+
- CAMLreturn(result);
- }
-
---
-2.40.0
-
diff --git a/0012-rombios-Remove-the-use-of-egrep.patch b/0012-rombios-Remove-the-use-of-egrep.patch
new file mode 100644
index 0000000..44702b4
--- /dev/null
+++ b/0012-rombios-Remove-the-use-of-egrep.patch
@@ -0,0 +1,34 @@
+From e418a77295e6b512d212b57123c11e4d4fb23e8c Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 18 Aug 2023 11:05:00 +0100
+Subject: [PATCH 12/55] rombios: Remove the use of egrep
+
+As the Alpine 3.18 container notes:
+
+ egrep: warning: egrep is obsolescent; using grep -E
+
+Adjust it.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 5ddac3c2852ecc120acab86fc403153a2097c5dc)
+---
+ tools/firmware/rombios/32bit/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/firmware/rombios/32bit/Makefile b/tools/firmware/rombios/32bit/Makefile
+index c058c71551..50d45647c2 100644
+--- a/tools/firmware/rombios/32bit/Makefile
++++ b/tools/firmware/rombios/32bit/Makefile
+@@ -26,7 +26,7 @@ $(TARGET): 32bitbios_all.o
+ 32bitbios_all.o: 32bitbios.o tcgbios/tcgbiosext.o util.o pmm.o
+ $(LD) $(LDFLAGS_DIRECT) -s -r $^ -o 32bitbios_all.o
+ @nm 32bitbios_all.o | \
+- egrep '^ +U ' >/dev/null && { \
++ grep -E '^ +U ' >/dev/null && { \
+ echo "There are undefined symbols in the BIOS:"; \
+ nm -u 32bitbios_all.o; \
+ exit 11; \
+--
+2.42.0
+
diff --git a/0012-tools-ocaml-evtchn-Add-binding-for-xenevtchn_fdopen.patch b/0012-tools-ocaml-evtchn-Add-binding-for-xenevtchn_fdopen.patch
deleted file mode 100644
index 7e921fd..0000000
--- a/0012-tools-ocaml-evtchn-Add-binding-for-xenevtchn_fdopen.patch
+++ /dev/null
@@ -1,81 +0,0 @@
-From 24d9dc2ae2f88249fcf81f7b7e612cdfb7c73e4b Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Mon, 14 Nov 2022 13:36:19 +0000
-Subject: [PATCH 12/89] tools/ocaml/evtchn: Add binding for xenevtchn_fdopen()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-For live update, the new oxenstored needs to reconstruct an evtchn object
-around an existing file descriptor.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 7ba68a6c558e1fd811c95cb7215a5cd07a3cc2ea)
----
- tools/ocaml/libs/eventchn/xeneventchn.ml | 1 +
- tools/ocaml/libs/eventchn/xeneventchn.mli | 4 ++++
- tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 19 +++++++++++++++++++
- 3 files changed, 24 insertions(+)
-
-diff --git a/tools/ocaml/libs/eventchn/xeneventchn.ml b/tools/ocaml/libs/eventchn/xeneventchn.ml
-index dd00a1f0ea..be4de82f46 100644
---- a/tools/ocaml/libs/eventchn/xeneventchn.ml
-+++ b/tools/ocaml/libs/eventchn/xeneventchn.ml
-@@ -17,6 +17,7 @@
- type handle
-
- external init: unit -> handle = "stub_eventchn_init"
-+external fdopen: Unix.file_descr -> handle = "stub_eventchn_fdopen"
- external fd: handle -> Unix.file_descr = "stub_eventchn_fd"
-
- type t = int
-diff --git a/tools/ocaml/libs/eventchn/xeneventchn.mli b/tools/ocaml/libs/eventchn/xeneventchn.mli
-index 08c7337643..98b3c86f37 100644
---- a/tools/ocaml/libs/eventchn/xeneventchn.mli
-+++ b/tools/ocaml/libs/eventchn/xeneventchn.mli
-@@ -47,6 +47,10 @@ val init: unit -> handle
- (** Return an initialised event channel interface. On error it
- will throw a Failure exception. *)
-
-+val fdopen: Unix.file_descr -> handle
-+(** Return an initialised event channel interface, from an already open evtchn
-+ file descriptor. On error it will throw a Failure exception. *)
-+
- val fd: handle -> Unix.file_descr
- (** Return a file descriptor suitable for Unix.select. When
- the descriptor becomes readable, it is safe to call 'pending'.
-diff --git a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-index 37f1cc4e14..7bdf711bc1 100644
---- a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-+++ b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-@@ -69,6 +69,25 @@ CAMLprim value stub_eventchn_init(void)
- CAMLreturn(result);
- }
-
-+CAMLprim value stub_eventchn_fdopen(value fdval)
-+{
-+ CAMLparam1(fdval);
-+ CAMLlocal1(result);
-+ xenevtchn_handle *xce;
-+
-+ caml_enter_blocking_section();
-+ xce = xenevtchn_fdopen(NULL, Int_val(fdval), 0);
-+ caml_leave_blocking_section();
-+
-+ if (xce == NULL)
-+ caml_failwith("evtchn fdopen failed");
-+
-+ result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
-+ _H(result) = xce;
-+
-+ CAMLreturn(result);
-+}
-+
- CAMLprim value stub_eventchn_fd(value xce)
- {
- CAMLparam1(xce);
---
-2.40.0
-
diff --git a/0013-CI-Resync-FreeBSD-config-with-staging.patch b/0013-CI-Resync-FreeBSD-config-with-staging.patch
new file mode 100644
index 0000000..dcd867b
--- /dev/null
+++ b/0013-CI-Resync-FreeBSD-config-with-staging.patch
@@ -0,0 +1,62 @@
+From f00d56309533427981f09ef2614f1bae4bcab62e Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 17 Feb 2023 11:16:32 +0000
+Subject: [PATCH 13/55] CI: Resync FreeBSD config with staging
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CI: Update FreeBSD to 13.1
+
+Also print the compiler version before starting. It's not easy to find
+otherwise, and does change from time to time.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+(cherry picked from commit 5e7667ea2dd33e0e5e0f3a96db37fdb4ecd98fba)
+
+CI: Update FreeBSD to 13.2
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Stefano Stabellini <sstabellini@kernel.org>
+(cherry picked from commit f872a624cbf92de9944483eea7674ef80ced1380)
+
+CI: Update FreeBSD to 12.4
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+(cherry picked from commit a73560896ce3c513460f26bd1c205060d6ec4f8a)
+---
+ .cirrus.yml | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/.cirrus.yml b/.cirrus.yml
+index c38333e736..7e0beb200d 100644
+--- a/.cirrus.yml
++++ b/.cirrus.yml
+@@ -10,19 +10,20 @@ freebsd_template: &FREEBSD_TEMPLATE
+ libxml2 glib git
+
+ build_script:
++ - cc --version
+ - ./configure --with-system-seabios=/usr/local/share/seabios/bios.bin
+ - gmake -j`sysctl -n hw.ncpu` clang=y
+
+ task:
+ name: 'FreeBSD 12'
+ freebsd_instance:
+- image_family: freebsd-12-3
++ image_family: freebsd-12-4
+ << : *FREEBSD_TEMPLATE
+
+ task:
+ name: 'FreeBSD 13'
+ freebsd_instance:
+- image_family: freebsd-13-0
++ image_family: freebsd-13-2
+ << : *FREEBSD_TEMPLATE
+
+ task:
+--
+2.42.0
+
diff --git a/0013-tools-ocaml-evtchn-Extend-the-init-binding-with-a-cl.patch b/0013-tools-ocaml-evtchn-Extend-the-init-binding-with-a-cl.patch
deleted file mode 100644
index af889eb..0000000
--- a/0013-tools-ocaml-evtchn-Extend-the-init-binding-with-a-cl.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From c7cf603836e40de1b4a6ca7d1d52736eb4a10327 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Thu, 3 Nov 2022 14:50:38 +0000
-Subject: [PATCH 13/89] tools/ocaml/evtchn: Extend the init() binding with a
- cloexec flag
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-For live update, oxenstored wants to clear CLOEXEC on the evtchn handle, so it
-survives the execve() into the new oxenstored.
-
-Have the new interface match how cloexec works in other Ocaml standard
-libraries.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 9bafe4a53306e7aa2ce6ffc96f7477c6f329f7a7)
----
- tools/ocaml/libs/eventchn/xeneventchn.ml | 5 ++++-
- tools/ocaml/libs/eventchn/xeneventchn.mli | 9 ++++++---
- tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 10 +++++++---
- 3 files changed, 17 insertions(+), 7 deletions(-)
-
-diff --git a/tools/ocaml/libs/eventchn/xeneventchn.ml b/tools/ocaml/libs/eventchn/xeneventchn.ml
-index be4de82f46..c16fdd4674 100644
---- a/tools/ocaml/libs/eventchn/xeneventchn.ml
-+++ b/tools/ocaml/libs/eventchn/xeneventchn.ml
-@@ -16,7 +16,10 @@
-
- type handle
-
--external init: unit -> handle = "stub_eventchn_init"
-+external _init: bool -> handle = "stub_eventchn_init"
-+
-+let init ?(cloexec=true) () = _init cloexec
-+
- external fdopen: Unix.file_descr -> handle = "stub_eventchn_fdopen"
- external fd: handle -> Unix.file_descr = "stub_eventchn_fd"
-
-diff --git a/tools/ocaml/libs/eventchn/xeneventchn.mli b/tools/ocaml/libs/eventchn/xeneventchn.mli
-index 98b3c86f37..870429b6b5 100644
---- a/tools/ocaml/libs/eventchn/xeneventchn.mli
-+++ b/tools/ocaml/libs/eventchn/xeneventchn.mli
-@@ -43,9 +43,12 @@ val to_int: t -> int
-
- val of_int: int -> t
-
--val init: unit -> handle
--(** Return an initialised event channel interface. On error it
-- will throw a Failure exception. *)
-+val init: ?cloexec:bool -> unit -> handle
-+(** [init ?cloexec ()]
-+ Return an initialised event channel interface.
-+ The default is to close the underlying file descriptor
-+ on [execve], which can be overriden with [~cloexec:false].
-+ On error it will throw a Failure exception. *)
-
- val fdopen: Unix.file_descr -> handle
- (** Return an initialised event channel interface, from an already open evtchn
-diff --git a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-index 7bdf711bc1..aa8a69cc1e 100644
---- a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-+++ b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-@@ -50,14 +50,18 @@ static struct custom_operations xenevtchn_ops = {
- .compare_ext = custom_compare_ext_default, /* Can't compare */
- };
-
--CAMLprim value stub_eventchn_init(void)
-+CAMLprim value stub_eventchn_init(value cloexec)
- {
-- CAMLparam0();
-+ CAMLparam1(cloexec);
- CAMLlocal1(result);
- xenevtchn_handle *xce;
-+ unsigned int flags = 0;
-+
-+ if ( !Bool_val(cloexec) )
-+ flags |= XENEVTCHN_NO_CLOEXEC;
-
- caml_enter_blocking_section();
-- xce = xenevtchn_open(NULL, 0);
-+ xce = xenevtchn_open(NULL, flags);
- caml_leave_blocking_section();
-
- if (xce == NULL)
---
-2.40.0
-
diff --git a/0014-tools-oxenstored-Style-fixes-to-Domain.patch b/0014-tools-oxenstored-Style-fixes-to-Domain.patch
deleted file mode 100644
index aad4399..0000000
--- a/0014-tools-oxenstored-Style-fixes-to-Domain.patch
+++ /dev/null
@@ -1,64 +0,0 @@
-From 0929960173bc76b8d90df73c8ee665747c233e18 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 30 Nov 2022 14:56:43 +0000
-Subject: [PATCH 14/89] tools/oxenstored: Style fixes to Domain
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This file has some style problems so severe that they interfere with the
-readability of the subsequent bugfix patches.
-
-Fix these issues ahead of time, to make the subsequent changes more readable.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit b45bfaf359e4821b1bf98a4fcd194d7fd176f167)
----
- tools/ocaml/xenstored/domain.ml | 16 +++++++---------
- 1 file changed, 7 insertions(+), 9 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml
-index 81cb59b8f1..ab08dcf37f 100644
---- a/tools/ocaml/xenstored/domain.ml
-+++ b/tools/ocaml/xenstored/domain.ml
-@@ -57,17 +57,16 @@ let is_paused_for_conflict dom = dom.conflict_credit <= 0.0
- let is_free_to_conflict = is_dom0
-
- let string_of_port = function
--| None -> "None"
--| Some x -> string_of_int (Xeneventchn.to_int x)
-+ | None -> "None"
-+ | Some x -> string_of_int (Xeneventchn.to_int x)
-
- let dump d chan =
- fprintf chan "dom,%d,%nd,%d\n" d.id d.mfn d.remote_port
-
--let notify dom = match dom.port with
--| None ->
-- warn "domain %d: attempt to notify on unknown port" dom.id
--| Some port ->
-- Event.notify dom.eventchn port
-+let notify dom =
-+ match dom.port with
-+ | None -> warn "domain %d: attempt to notify on unknown port" dom.id
-+ | Some port -> Event.notify dom.eventchn port
-
- let bind_interdomain dom =
- begin match dom.port with
-@@ -84,8 +83,7 @@ let close dom =
- | None -> ()
- | Some port -> Event.unbind dom.eventchn port
- end;
-- Xenmmap.unmap dom.interface;
-- ()
-+ Xenmmap.unmap dom.interface
-
- let make id mfn remote_port interface eventchn = {
- id = id;
---
-2.40.0
-
diff --git a/0014-tools-vchan-Fix-Wsingle-bit-bitfield-constant-conver.patch b/0014-tools-vchan-Fix-Wsingle-bit-bitfield-constant-conver.patch
new file mode 100644
index 0000000..6e29490
--- /dev/null
+++ b/0014-tools-vchan-Fix-Wsingle-bit-bitfield-constant-conver.patch
@@ -0,0 +1,43 @@
+From 052a8d24bc670ab6503e21dfd2fb8bccfc22aa73 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 8 Aug 2023 14:53:42 +0100
+Subject: [PATCH 14/55] tools/vchan: Fix
+ -Wsingle-bit-bitfield-constant-conversion
+
+Gitlab reports:
+
+ node.c:158:17: error: implicit truncation from 'int' to a one-bit wide bit-field changes value from 1 to -1 [-Werror,-Wsingle-bit-bitfield-constant-conversion]
+
+ ctrl->blocking = 1;
+ ^ ~
+ 1 error generated.
+ make[4]: *** [/builds/xen-project/people/andyhhp/xen/tools/vchan/../../tools/Rules.mk:188: node.o] Error 1
+
+In Xen 4.18, this was fixed with c/s 99ab02f63ea8 ("tools: convert bitfields
+to unsigned type") but this is an ABI change which can't be backported.
+
+Swich 1 for -1 to provide a minimally invasive way to fix the build.
+
+No functional change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+---
+ tools/vchan/node.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/vchan/node.c b/tools/vchan/node.c
+index f1638f013d..a28293b720 100644
+--- a/tools/vchan/node.c
++++ b/tools/vchan/node.c
+@@ -155,7 +155,7 @@ int main(int argc, char **argv)
+ perror("libxenvchan_*_init");
+ exit(1);
+ }
+- ctrl->blocking = 1;
++ ctrl->blocking = -1;
+
+ srand(seed);
+ fprintf(stderr, "seed=%d\n", seed);
+--
+2.42.0
+
diff --git a/0015-tools-oxenstored-Bind-the-DOM_EXC-VIRQ-in-in-Event.i.patch b/0015-tools-oxenstored-Bind-the-DOM_EXC-VIRQ-in-in-Event.i.patch
deleted file mode 100644
index 8b83edf..0000000
--- a/0015-tools-oxenstored-Bind-the-DOM_EXC-VIRQ-in-in-Event.i.patch
+++ /dev/null
@@ -1,82 +0,0 @@
-From bc5cc00868ea29d814bb3d783e28b49d1acf63e9 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 29 Nov 2022 21:05:43 +0000
-Subject: [PATCH 15/89] tools/oxenstored: Bind the DOM_EXC VIRQ in in
- Event.init()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Xenstored always needs to bind the DOM_EXC VIRQ.
-
-Instead of doing it shortly after the call to Event.init(), do it in the
-constructor directly. This removes the need for the field to be a mutable
-option.
-
-It will also simplify a future change to support live update. Rename the
-field from virq_port (which could be any VIRQ) to it's proper name.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 9804a5db435fe40c8ded8cf36c2d2b2281c56f1d)
----
- tools/ocaml/xenstored/event.ml | 9 ++++++---
- tools/ocaml/xenstored/xenstored.ml | 4 +---
- 2 files changed, 7 insertions(+), 6 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/event.ml b/tools/ocaml/xenstored/event.ml
-index ccca90b6fc..a3be296374 100644
---- a/tools/ocaml/xenstored/event.ml
-+++ b/tools/ocaml/xenstored/event.ml
-@@ -17,12 +17,15 @@
- (**************** high level binding ****************)
- type t = {
- handle: Xeneventchn.handle;
-- mutable virq_port: Xeneventchn.t option;
-+ domexc: Xeneventchn.t;
- }
-
--let init () = { handle = Xeneventchn.init (); virq_port = None; }
-+let init () =
-+ let handle = Xeneventchn.init () in
-+ let domexc = Xeneventchn.bind_dom_exc_virq handle in
-+ { handle; domexc }
-+
- let fd eventchn = Xeneventchn.fd eventchn.handle
--let bind_dom_exc_virq eventchn = eventchn.virq_port <- Some (Xeneventchn.bind_dom_exc_virq eventchn.handle)
- let bind_interdomain eventchn domid port = Xeneventchn.bind_interdomain eventchn.handle domid port
- let unbind eventchn port = Xeneventchn.unbind eventchn.handle port
- let notify eventchn port = Xeneventchn.notify eventchn.handle port
-diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
-index c5dc7a28d0..55071b49ec 100644
---- a/tools/ocaml/xenstored/xenstored.ml
-+++ b/tools/ocaml/xenstored/xenstored.ml
-@@ -397,7 +397,6 @@ let _ =
- if cf.restart && Sys.file_exists Disk.xs_daemon_database then (
- let rwro = DB.from_file store domains cons Disk.xs_daemon_database in
- info "Live reload: database loaded";
-- Event.bind_dom_exc_virq eventchn;
- Process.LiveUpdate.completed ();
- rwro
- ) else (
-@@ -413,7 +412,6 @@ let _ =
-
- if cf.domain_init then (
- Connections.add_domain cons (Domains.create0 domains);
-- Event.bind_dom_exc_virq eventchn
- );
- rw_sock
- ) in
-@@ -451,7 +449,7 @@ let _ =
- let port = Event.pending eventchn in
- debug "pending port %d" (Xeneventchn.to_int port);
- finally (fun () ->
-- if Some port = eventchn.Event.virq_port then (
-+ if port = eventchn.Event.domexc then (
- let (notify, deaddom) = Domains.cleanup domains in
- List.iter (Store.reset_permissions store) deaddom;
- List.iter (Connections.del_domain cons) deaddom;
---
-2.40.0
-
diff --git a/0015-xen-vcpu-ignore-VCPU_SSHOTTMR_future.patch b/0015-xen-vcpu-ignore-VCPU_SSHOTTMR_future.patch
new file mode 100644
index 0000000..81e010b
--- /dev/null
+++ b/0015-xen-vcpu-ignore-VCPU_SSHOTTMR_future.patch
@@ -0,0 +1,143 @@
+From 7b5155a79ea946dd513847d4e7ad2b7e6a4ebb73 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Sep 2023 08:45:29 +0200
+Subject: [PATCH 15/55] xen/vcpu: ignore VCPU_SSHOTTMR_future
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The usage of VCPU_SSHOTTMR_future in Linux prior to 4.7 is bogus.
+When the hypervisor returns -ETIME (timeout in the past) Linux keeps
+retrying to setup the timer with a higher timeout instead of
+self-injecting a timer interrupt.
+
+On boxes without any hardware assistance for logdirty we have seen HVM
+Linux guests < 4.7 with 32vCPUs give up trying to setup the timer when
+logdirty is enabled:
+
+CE: Reprogramming failure. Giving up
+CE: xen increased min_delta_ns to 1000000 nsec
+CE: Reprogramming failure. Giving up
+CE: Reprogramming failure. Giving up
+CE: xen increased min_delta_ns to 506250 nsec
+CE: xen increased min_delta_ns to 759375 nsec
+CE: xen increased min_delta_ns to 1000000 nsec
+CE: Reprogramming failure. Giving up
+CE: Reprogramming failure. Giving up
+CE: Reprogramming failure. Giving up
+Freezing user space processes ...
+INFO: rcu_sched detected stalls on CPUs/tasks: { 14} (detected by 10, t=60002 jiffies, g=4006, c=4005, q=14130)
+Task dump for CPU 14:
+swapper/14 R running task 0 0 1 0x00000000
+Call Trace:
+ [<ffffffff90160f5d>] ? rcu_eqs_enter_common.isra.30+0x3d/0xf0
+ [<ffffffff907b9bde>] ? default_idle+0x1e/0xd0
+ [<ffffffff90039570>] ? arch_cpu_idle+0x20/0xc0
+ [<ffffffff9010820a>] ? cpu_startup_entry+0x14a/0x1e0
+ [<ffffffff9005d3a7>] ? start_secondary+0x1f7/0x270
+ [<ffffffff900000d5>] ? start_cpu+0x5/0x14
+INFO: rcu_sched detected stalls on CPUs/tasks: { 26} (detected by 24, t=60002 jiffies, g=6922, c=6921, q=7013)
+Task dump for CPU 26:
+swapper/26 R running task 0 0 1 0x00000000
+Call Trace:
+ [<ffffffff90160f5d>] ? rcu_eqs_enter_common.isra.30+0x3d/0xf0
+ [<ffffffff907b9bde>] ? default_idle+0x1e/0xd0
+ [<ffffffff90039570>] ? arch_cpu_idle+0x20/0xc0
+ [<ffffffff9010820a>] ? cpu_startup_entry+0x14a/0x1e0
+ [<ffffffff9005d3a7>] ? start_secondary+0x1f7/0x270
+ [<ffffffff900000d5>] ? start_cpu+0x5/0x14
+INFO: rcu_sched detected stalls on CPUs/tasks: { 26} (detected by 24, t=60002 jiffies, g=8499, c=8498, q=7664)
+Task dump for CPU 26:
+swapper/26 R running task 0 0 1 0x00000000
+Call Trace:
+ [<ffffffff90160f5d>] ? rcu_eqs_enter_common.isra.30+0x3d/0xf0
+ [<ffffffff907b9bde>] ? default_idle+0x1e/0xd0
+ [<ffffffff90039570>] ? arch_cpu_idle+0x20/0xc0
+ [<ffffffff9010820a>] ? cpu_startup_entry+0x14a/0x1e0
+ [<ffffffff9005d3a7>] ? start_secondary+0x1f7/0x270
+ [<ffffffff900000d5>] ? start_cpu+0x5/0x14
+
+Thus leading to CPU stalls and a broken system as a result.
+
+Workaround this bogus usage by ignoring the VCPU_SSHOTTMR_future in
+the hypervisor. Old Linux versions are the only ones known to have
+(wrongly) attempted to use the flag, and ignoring it is compatible
+with the behavior expected by any guests setting that flag.
+
+Note the usage of the flag has been removed from Linux by commit:
+
+c06b6d70feb3 xen/x86: don't lose event interrupts
+
+Which landed in Linux 4.7.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Henry Wang <Henry.Wang@arm.com> # CHANGELOG
+Acked-by: Jan Beulich <jbeulich@suse.com>
+master commit: 19c6cbd90965b1440bd551069373d6fa3f2f365d
+master date: 2023-05-03 13:36:05 +0200
+---
+ CHANGELOG.md | 6 ++++++
+ xen/common/domain.c | 13 ++++++++++---
+ xen/include/public/vcpu.h | 5 ++++-
+ 3 files changed, 20 insertions(+), 4 deletions(-)
+
+diff --git a/CHANGELOG.md b/CHANGELOG.md
+index 7f4d0f25e9..bb0eceb69a 100644
+--- a/CHANGELOG.md
++++ b/CHANGELOG.md
+@@ -4,6 +4,12 @@ Notable changes to Xen will be documented in this file.
+
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
+
++## [4.17.3](https://xenbits.xen.org/gitweb/?p=xen.git;a=shortlog;h=RELEASE-4.17.3)
++
++### Changed
++ - Ignore VCPUOP_set_singleshot_timer's VCPU_SSHOTTMR_future flag. The only
++ known user doesn't use it properly, leading to in-guest breakage.
++
+ ## [4.17.0](https://xenbits.xen.org/gitweb/?p=xen.git;a=shortlog;h=RELEASE-4.17.0) - 2022-12-12
+
+ ### Changed
+diff --git a/xen/common/domain.c b/xen/common/domain.c
+index 53f7e734fe..30c2279673 100644
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -1691,9 +1691,16 @@ long common_vcpu_op(int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
+ if ( copy_from_guest(&set, arg, 1) )
+ return -EFAULT;
+
+- if ( (set.flags & VCPU_SSHOTTMR_future) &&
+- (set.timeout_abs_ns < NOW()) )
+- return -ETIME;
++ if ( set.timeout_abs_ns < NOW() )
++ {
++ /*
++ * Simplify the logic if the timeout has already expired and just
++ * inject the event.
++ */
++ stop_timer(&v->singleshot_timer);
++ send_timer_event(v);
++ break;
++ }
+
+ migrate_timer(&v->singleshot_timer, smp_processor_id());
+ set_timer(&v->singleshot_timer, set.timeout_abs_ns);
+diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h
+index 81a3b3a743..a836b264a9 100644
+--- a/xen/include/public/vcpu.h
++++ b/xen/include/public/vcpu.h
+@@ -150,7 +150,10 @@ typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
+ DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);
+
+ /* Flags to VCPUOP_set_singleshot_timer. */
+- /* Require the timeout to be in the future (return -ETIME if it's passed). */
++ /*
++ * Request the timeout to be in the future (return -ETIME if it's passed)
++ * but can be ignored by the hypervisor.
++ */
+ #define _VCPU_SSHOTTMR_future (0)
+ #define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future)
+
+--
+2.42.0
+
diff --git a/0016-tools-oxenstored-Rename-some-port-variables-to-remot.patch b/0016-tools-oxenstored-Rename-some-port-variables-to-remot.patch
deleted file mode 100644
index 4f168d6..0000000
--- a/0016-tools-oxenstored-Rename-some-port-variables-to-remot.patch
+++ /dev/null
@@ -1,144 +0,0 @@
-From fd0d9b05970986545656c8f6f688f70f3e78a29b Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 30 Nov 2022 03:17:28 +0000
-Subject: [PATCH 16/89] tools/oxenstored: Rename some 'port' variables to
- 'remote_port'
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This will make the logic clearer when we plumb local_port through these
-functions.
-
-While doing this, rearrange the construct in Domains.create0 to separate the
-remote port handling from the interface handling. (The interface logic is
-dubious in several ways, but not altered by this cleanup.)
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 31fbee749a75621039ca601eaee7222050a7dd83)
----
- tools/ocaml/xenstored/domains.ml | 26 ++++++++++++--------------
- tools/ocaml/xenstored/process.ml | 12 ++++++------
- tools/ocaml/xenstored/xenstored.ml | 8 ++++----
- 3 files changed, 22 insertions(+), 24 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml
-index 17fe2fa257..26018ac0dd 100644
---- a/tools/ocaml/xenstored/domains.ml
-+++ b/tools/ocaml/xenstored/domains.ml
-@@ -122,9 +122,9 @@ let cleanup doms =
- let resume _doms _domid =
- ()
-
--let create doms domid mfn port =
-+let create doms domid mfn remote_port =
- let interface = Xenctrl.map_foreign_range xc domid (Xenmmap.getpagesize()) mfn in
-- let dom = Domain.make domid mfn port interface doms.eventchn in
-+ let dom = Domain.make domid mfn remote_port interface doms.eventchn in
- Hashtbl.add doms.table domid dom;
- Domain.bind_interdomain dom;
- dom
-@@ -133,18 +133,16 @@ let xenstored_kva = ref ""
- let xenstored_port = ref ""
-
- let create0 doms =
-- let port, interface =
-- (
-- let port = Utils.read_file_single_integer !xenstored_port
-- and fd = Unix.openfile !xenstored_kva
-- [ Unix.O_RDWR ] 0o600 in
-- let interface = Xenmmap.mmap fd Xenmmap.RDWR Xenmmap.SHARED
-- (Xenmmap.getpagesize()) 0 in
-- Unix.close fd;
-- port, interface
-- )
-- in
-- let dom = Domain.make 0 Nativeint.zero port interface doms.eventchn in
-+ let remote_port = Utils.read_file_single_integer !xenstored_port in
-+
-+ let interface =
-+ let fd = Unix.openfile !xenstored_kva [ Unix.O_RDWR ] 0o600 in
-+ let interface = Xenmmap.mmap fd Xenmmap.RDWR Xenmmap.SHARED (Xenmmap.getpagesize()) 0 in
-+ Unix.close fd;
-+ interface
-+ in
-+
-+ let dom = Domain.make 0 Nativeint.zero remote_port interface doms.eventchn in
- Hashtbl.add doms.table 0 dom;
- Domain.bind_interdomain dom;
- Domain.notify dom;
-diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml
-index 72a79e9328..b2973aca2a 100644
---- a/tools/ocaml/xenstored/process.ml
-+++ b/tools/ocaml/xenstored/process.ml
-@@ -558,10 +558,10 @@ let do_transaction_end con t domains cons data =
- let do_introduce con t domains cons data =
- if not (Connection.is_dom0 con)
- then raise Define.Permission_denied;
-- let (domid, mfn, port) =
-+ let (domid, mfn, remote_port) =
- match (split None '\000' data) with
-- | domid :: mfn :: port :: _ ->
-- int_of_string domid, Nativeint.of_string mfn, int_of_string port
-+ | domid :: mfn :: remote_port :: _ ->
-+ int_of_string domid, Nativeint.of_string mfn, int_of_string remote_port
- | _ -> raise Invalid_Cmd_Args;
- in
- let dom =
-@@ -569,18 +569,18 @@ let do_introduce con t domains cons data =
- let edom = Domains.find domains domid in
- if (Domain.get_mfn edom) = mfn && (Connections.find_domain cons domid) != con then begin
- (* Use XS_INTRODUCE for recreating the xenbus event-channel. *)
-- edom.remote_port <- port;
-+ edom.remote_port <- remote_port;
- Domain.bind_interdomain edom;
- end;
- edom
- else try
-- let ndom = Domains.create domains domid mfn port in
-+ let ndom = Domains.create domains domid mfn remote_port in
- Connections.add_domain cons ndom;
- Connections.fire_spec_watches (Transaction.get_root t) cons Store.Path.introduce_domain;
- ndom
- with _ -> raise Invalid_Cmd_Args
- in
-- if (Domain.get_remote_port dom) <> port || (Domain.get_mfn dom) <> mfn then
-+ if (Domain.get_remote_port dom) <> remote_port || (Domain.get_mfn dom) <> mfn then
- raise Domain_not_match
-
- let do_release con t domains cons data =
-diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
-index 55071b49ec..1f11f576b5 100644
---- a/tools/ocaml/xenstored/xenstored.ml
-+++ b/tools/ocaml/xenstored/xenstored.ml
-@@ -167,10 +167,10 @@ let from_channel_f chan global_f socket_f domain_f watch_f store_f =
- global_f ~rw
- | "socket" :: fd :: [] ->
- socket_f ~fd:(int_of_string fd)
-- | "dom" :: domid :: mfn :: port :: []->
-+ | "dom" :: domid :: mfn :: remote_port :: []->
- domain_f (int_of_string domid)
- (Nativeint.of_string mfn)
-- (int_of_string port)
-+ (int_of_string remote_port)
- | "watch" :: domid :: path :: token :: [] ->
- watch_f (int_of_string domid)
- (unhexify path) (unhexify token)
-@@ -209,10 +209,10 @@ let from_channel store cons doms chan =
- else
- warn "Ignoring invalid socket FD %d" fd
- in
-- let domain_f domid mfn port =
-+ let domain_f domid mfn remote_port =
- let ndom =
- if domid > 0 then
-- Domains.create doms domid mfn port
-+ Domains.create doms domid mfn remote_port
- else
- Domains.create0 doms
- in
---
-2.40.0
-
diff --git a/0016-x86-head-check-base-address-alignment.patch b/0016-x86-head-check-base-address-alignment.patch
new file mode 100644
index 0000000..2b9cead
--- /dev/null
+++ b/0016-x86-head-check-base-address-alignment.patch
@@ -0,0 +1,85 @@
+From e5f9987d5f63ecc3cc9884c614aca699a41e7ca7 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Sep 2023 08:46:28 +0200
+Subject: [PATCH 16/55] x86/head: check base address alignment
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Ensure that the base address is 2M aligned, or else the page table
+entries created would be corrupt as reserved bits on the PDE end up
+set.
+
+We have encountered a broken firmware where grub2 would end up loading
+Xen at a non 2M aligned region when using the multiboot2 protocol, and
+that caused a very difficult to debug triple fault.
+
+If the alignment is not as required by the page tables print an error
+message and stop the boot. Also add a build time check that the
+calculation of symbol offsets don't break alignment of passed
+addresses.
+
+The check could be performed earlier, but so far the alignment is
+required by the page tables, and hence feels more natural that the
+check lives near to the piece of code that requires it.
+
+Note that when booted as an EFI application from the PE entry point
+the alignment check is already performed by
+efi_arch_load_addr_check(), and hence there's no need to add another
+check at the point where page tables get built in
+efi_arch_memory_setup().
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 0946068e7faea22868c577d7afa54ba4970ff520
+master date: 2023-05-03 13:36:25 +0200
+---
+ xen/arch/x86/boot/head.S | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
+index 245c859dd7..6bc64c9e86 100644
+--- a/xen/arch/x86/boot/head.S
++++ b/xen/arch/x86/boot/head.S
+@@ -1,3 +1,4 @@
++#include <xen/lib.h>
+ #include <xen/multiboot.h>
+ #include <xen/multiboot2.h>
+ #include <public/xen.h>
+@@ -121,6 +122,7 @@ multiboot2_header:
+ .Lbad_ldr_nst: .asciz "ERR: EFI SystemTable is not provided by bootloader!"
+ .Lbad_ldr_nih: .asciz "ERR: EFI ImageHandle is not provided by bootloader!"
+ .Lbad_efi_msg: .asciz "ERR: EFI IA-32 platforms are not supported!"
++.Lbad_alg_msg: .asciz "ERR: Xen must be loaded at a 2Mb boundary!"
+
+ .section .init.data, "aw", @progbits
+ .align 4
+@@ -146,6 +148,9 @@ bad_cpu:
+ not_multiboot:
+ mov $sym_offs(.Lbad_ldr_msg), %ecx
+ jmp .Lget_vtb
++.Lnot_aligned:
++ mov $sym_offs(.Lbad_alg_msg), %ecx
++ jmp .Lget_vtb
+ .Lmb2_no_st:
+ /*
+ * Here we are on EFI platform. vga_text_buffer was zapped earlier
+@@ -673,6 +678,15 @@ trampoline_setup:
+ cmp %edi, %eax
+ jb 1b
+
++ .if !IS_ALIGNED(sym_offs(0), 1 << L2_PAGETABLE_SHIFT)
++ .error "Symbol offset calculation breaks alignment"
++ .endif
++
++ /* Check that the image base is aligned. */
++ lea sym_esi(_start), %eax
++ test $(1 << L2_PAGETABLE_SHIFT) - 1, %eax
++ jnz .Lnot_aligned
++
+ /* Map Xen into the higher mappings using 2M superpages. */
+ lea _PAGE_PSE + PAGE_HYPERVISOR_RWX + sym_esi(_start), %eax
+ mov $sym_offs(_start), %ecx /* %eax = PTE to write ^ */
+--
+2.42.0
+
diff --git a/0017-tools-oxenstored-Implement-Domain.rebind_evtchn.patch b/0017-tools-oxenstored-Implement-Domain.rebind_evtchn.patch
deleted file mode 100644
index 72bcae0..0000000
--- a/0017-tools-oxenstored-Implement-Domain.rebind_evtchn.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-From a20daa7ffda7ccc0e65abe77532a5dc8059bf128 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 30 Nov 2022 11:55:58 +0000
-Subject: [PATCH 17/89] tools/oxenstored: Implement Domain.rebind_evtchn
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Generally speaking, the event channel local/remote port is fixed for the
-lifetime of the associated domain object. The exception to this is a
-secondary XS_INTRODUCE (defined to re-bind to a new event channel) which pokes
-around at the domain object's internal state.
-
-We need to refactor the evtchn handling to support live update, so start by
-moving the relevant manipulation into Domain.
-
-No practical change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit aecdc28d9538ca2a1028ef9bc6550cb171dbbed4)
----
- tools/ocaml/xenstored/domain.ml | 12 ++++++++++++
- tools/ocaml/xenstored/process.ml | 3 +--
- 2 files changed, 13 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml
-index ab08dcf37f..d59a9401e2 100644
---- a/tools/ocaml/xenstored/domain.ml
-+++ b/tools/ocaml/xenstored/domain.ml
-@@ -63,6 +63,18 @@ let string_of_port = function
- let dump d chan =
- fprintf chan "dom,%d,%nd,%d\n" d.id d.mfn d.remote_port
-
-+let rebind_evtchn d remote_port =
-+ begin match d.port with
-+ | None -> ()
-+ | Some p -> Event.unbind d.eventchn p
-+ end;
-+ let local = Event.bind_interdomain d.eventchn d.id remote_port in
-+ debug "domain %d rebind (l %s, r %d) => (l %d, r %d)"
-+ d.id (string_of_port d.port) d.remote_port
-+ (Xeneventchn.to_int local) remote_port;
-+ d.remote_port <- remote_port;
-+ d.port <- Some (local)
-+
- let notify dom =
- match dom.port with
- | None -> warn "domain %d: attempt to notify on unknown port" dom.id
-diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml
-index b2973aca2a..1c80e7198d 100644
---- a/tools/ocaml/xenstored/process.ml
-+++ b/tools/ocaml/xenstored/process.ml
-@@ -569,8 +569,7 @@ let do_introduce con t domains cons data =
- let edom = Domains.find domains domid in
- if (Domain.get_mfn edom) = mfn && (Connections.find_domain cons domid) != con then begin
- (* Use XS_INTRODUCE for recreating the xenbus event-channel. *)
-- edom.remote_port <- remote_port;
-- Domain.bind_interdomain edom;
-+ Domain.rebind_evtchn edom remote_port;
- end;
- edom
- else try
---
-2.40.0
-
diff --git a/0017-xenalyze-Handle-start-of-day-RUNNING-transitions.patch b/0017-xenalyze-Handle-start-of-day-RUNNING-transitions.patch
new file mode 100644
index 0000000..a4501a3
--- /dev/null
+++ b/0017-xenalyze-Handle-start-of-day-RUNNING-transitions.patch
@@ -0,0 +1,275 @@
+From f04295dd802fb6cd43a02ec59a5964b2c5950fe1 Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@cloud.com>
+Date: Tue, 5 Sep 2023 08:47:14 +0200
+Subject: [PATCH 17/55] xenalyze: Handle start-of-day ->RUNNING transitions
+
+A recent xentrace highlighted an unhandled corner case in the vcpu
+"start-of-day" logic, if the trace starts after the last running ->
+non-running transition, but before the first non-running -> running
+transition. Because start-of-day wasn't handled, vcpu_next_update()
+was expecting p->current to be NULL, and tripping out with the
+following error message when it wasn't:
+
+vcpu_next_update: FATAL: p->current not NULL! (d32768dv$p, runstate RUNSTATE_INIT)
+
+where 32768 is the DEFAULT_DOMAIN, and $p is the pcpu number.
+
+Instead of calling vcpu_start() piecemeal throughout
+sched_runstate_process(), call it at the top of the function if the
+vcpu in question is still in RUNSTATE_INIT, so that we can handle all
+the cases in one place.
+
+Sketch out at the top of the function all cases which we need to
+handle, and what to do in those cases. Some transitions tell us where
+v is running; some transitions tell us about what is (or is not)
+running on p; some transitions tell us neither.
+
+If a transition tells us where v is now running, update its state;
+otherwise leave it in INIT, in order to avoid having to deal with TSC
+skew on start-up.
+
+If a transition tells us what is or is not running on p, update
+p->current (either to v or NULL). Otherwise leave it alone.
+
+If neither, do nothing.
+
+Reifying those rules:
+
+- If we're continuing to run, set v to RUNNING, and use p->first_tsc
+ as the runstate time.
+
+- If we're starting to run, set v to RUNNING, and use ri->tsc as the
+ runstate time.
+
+- If v is being deschedled, leave v in the INIT state to avoid dealing
+ with TSC skew; but set p->current to NULL so that whatever is
+ scheduled next won't trigger the assert in vcpu_next_update().
+
+- If a vcpu is waking up (switching from one non-runnable state to
+ another non-runnable state), leave v in INIT, and p in whatever
+ state it's in (which may be the default domain, or some other vcpu
+ which has already run).
+
+While here, fix the comment above vcpu_start; it's called when the
+vcpu state is INIT, not when current is the default domain.
+
+Signed-off-by: George Dunlap <george.dunlap@cloud.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: aab4b38b5d77e3c65f44bacd56427a85b7392a11
+master date: 2023-06-30 11:25:33 +0100
+---
+ tools/xentrace/xenalyze.c | 159 ++++++++++++++++++++++++--------------
+ 1 file changed, 101 insertions(+), 58 deletions(-)
+
+diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
+index e7ec284eea..9b4b62c82f 100644
+--- a/tools/xentrace/xenalyze.c
++++ b/tools/xentrace/xenalyze.c
+@@ -6885,39 +6885,86 @@ void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t tsc)
+ p->lost_record.seen_valid_schedule = 1;
+ }
+
+-/* If current is the default domain, we're fixing up from something
+- * like start-of-day. Update what we can. */
+-void vcpu_start(struct pcpu_info *p, struct vcpu_data *v) {
+- /* If vcpus are created, or first show up, in a "dead zone", this will
+- * fail. */
+- if( !p->current || p->current->d->did != DEFAULT_DOMAIN) {
+- fprintf(stderr, "Strange, p->current not default domain!\n");
+- error(ERR_FILE, NULL);
+- return;
+- }
++/*
++ * If the vcpu in question is in state INIT, we're fixing up from something
++ * like start-of-day. Update what we can.
++ */
++void vcpu_start(struct pcpu_info *p, struct vcpu_data *v,
++ int old_runstate, int new_runstate, tsc_t ri_tsc) {
++ tsc_t tsc;
++
++ /*
++ *
++ * Cases:
++ * running -> running:
++ * v -> running, using p->first_tsc
++ * {runnable, blocked} -> running:
++ * v -> running, using ri->tsc
++ * running -> {runnable, blocked}:
++ * Leave v INIT, but clear p->current in case another vcpu is scheduled
++ * blocked -> runnable:
++ * Leave INIT, and also leave p->current, since we still don't know who's scheduled here
++ */
++
++ /*
++ * NB that a vcpu won't come out of INIT until it starts running somewhere.
++ * If this event is pcpu that has already seen a scheduling event, p->current
++ * should be null; if this is the first scheduling event on this pcpu,
++ * p->current should be the default domain.
++ */
++ if( old_runstate == RUNSTATE_RUNNING ) {
++ if ( !p->current || p->current->d->did != DEFAULT_DOMAIN) {
++ fprintf(stderr, "Strange, p->current not default domain!\n");
++ error(ERR_FILE, NULL);
++ return;
+
+- if(!p->first_tsc) {
+- fprintf(stderr, "Strange, p%d first_tsc 0!\n", p->pid);
+- error(ERR_FILE, NULL);
++ }
++
++ if(!p->first_tsc) {
++ fprintf(stderr, "Strange, p%d first_tsc 0!\n", p->pid);
++ error(ERR_FILE, NULL);
++ }
++
++ if(p->first_tsc <= p->current->runstate.tsc) {
++ fprintf(stderr, "Strange, first_tsc %llx < default_domain runstate tsc %llx!\n",
++ p->first_tsc,
++ p->current->runstate.tsc);
++ error(ERR_FILE, NULL);
++ }
++
++ /* Change default domain to 'queued' */
++ runstate_update(p->current, RUNSTATE_QUEUED, p->first_tsc);
++
++ /*
++ * Set current to NULL, so that if another vcpu (not in INIT)
++ * is scheduled here, we don't trip over the check in
++ * vcpu_next_update()
++ */
++ p->current = NULL;
+ }
+
+- if(p->first_tsc <= p->current->runstate.tsc) {
+- fprintf(stderr, "Strange, first_tsc %llx < default_domain runstate tsc %llx!\n",
+- p->first_tsc,
+- p->current->runstate.tsc);
+- error(ERR_FILE, NULL);
++ /* TSC skew at start-of-day is hard to deal with. Don't
++ * bring a vcpu out of INIT until it's seen to be actually
++ * running somewhere. */
++ if ( new_runstate != RUNSTATE_RUNNING ) {
++ fprintf(warn, "First schedule for d%dv%d doesn't take us into a running state; leaving INIT\n",
++ v->d->did, v->vid);
++
++ return;
+ }
+
+- /* Change default domain to 'queued' */
+- runstate_update(p->current, RUNSTATE_QUEUED, p->first_tsc);
++ tsc = ri_tsc;
++ if ( old_runstate == RUNSTATE_RUNNING ) {
++ /* FIXME: Copy over data from the default domain this interval */
++ fprintf(warn, "Using first_tsc for d%dv%d (%lld cycles)\n",
++ v->d->did, v->vid, p->last_tsc - p->first_tsc);
+
+- /* FIXME: Copy over data from the default domain this interval */
+- fprintf(warn, "Using first_tsc for d%dv%d (%lld cycles)\n",
+- v->d->did, v->vid, p->last_tsc - p->first_tsc);
++ tsc = p->first_tsc;
++ }
+
+ /* Simulate the time since the first tsc */
+- runstate_update(v, RUNSTATE_RUNNING, p->first_tsc);
+- p->time.tsc = p->first_tsc;
++ runstate_update(v, RUNSTATE_RUNNING, tsc);
++ p->time.tsc = tsc;
+ p->current = v;
+ pcpu_string_draw(p);
+ v->p = p;
+@@ -7021,6 +7068,13 @@ void sched_runstate_process(struct pcpu_info *p)
+ last_oldstate = v->runstate.last_oldstate;
+ v->runstate.last_oldstate.wrong = RUNSTATE_INIT;
+
++ /* Handle all "start-of-day" issues in one place. This can be
++ * done before any of the other tracks or sanity checks. */
++ if ( v->runstate.state == RUNSTATE_INIT ) {
++ vcpu_start(p, v, sevt.old_runstate, sevt.new_runstate, ri->tsc);
++ return;
++ }
++
+ /* Close vmexits when the putative reason for blocking / &c stops.
+ * This way, we don't account cpu contention to some other overhead. */
+ if(sevt.new_runstate == RUNSTATE_RUNNABLE
+@@ -7190,32 +7244,27 @@ update:
+ * or stopping actually running on a physical cpu. */
+ if ( type == CONTINUE )
+ {
+- if( v->runstate.state == RUNSTATE_INIT ) {
+- /* Start-of-day; account first tsc -> now to v */
+- vcpu_start(p, v);
+- } else {
+- /* Continue running. First, do some sanity checks */
+- if ( v->runstate.state == RUNSTATE_LOST ) {
+- fprintf(warn, "WARNING: continue with d%dv%d in RUNSTATE_LOST. Resetting current.\n",
+- v->d->did, v->vid);
+- if ( p->current )
+- vcpu_prev_update(p, p->current, ri->tsc, RUNSTATE_LOST);
+- vcpu_next_update(p, v, ri->tsc);
+- }
+- else if( v->runstate.state != RUNSTATE_RUNNING ) {
+- /* This should never happen. */
+- fprintf(warn, "FATAL: sevt.old_runstate running, but d%dv%d runstate %s!\n",
+- v->d->did, v->vid, runstate_name[v->runstate.state]);
+- error(ERR_FILE, NULL);
+- } else if ( v->p != p ) {
+- fprintf(warn, "FATAL: continue on p%d, but d%dv%d p%d!\n",
+- p->pid, v->d->did, v->vid,
+- v->p ? v->p->pid : -1);
+- error(ERR_FILE, NULL);
+- }
+-
+- runstate_update(v, RUNSTATE_RUNNING, ri->tsc);
++ /* Continue running. First, do some sanity checks */
++ if ( v->runstate.state == RUNSTATE_LOST ) {
++ fprintf(warn, "WARNING: continue with d%dv%d in RUNSTATE_LOST. Resetting current.\n",
++ v->d->did, v->vid);
++ if ( p->current )
++ vcpu_prev_update(p, p->current, ri->tsc, RUNSTATE_LOST);
++ vcpu_next_update(p, v, ri->tsc);
++ }
++ else if( v->runstate.state != RUNSTATE_RUNNING ) {
++ /* This should never happen. */
++ fprintf(warn, "FATAL: sevt.old_runstate running, but d%dv%d runstate %s!\n",
++ v->d->did, v->vid, runstate_name[v->runstate.state]);
++ error(ERR_FILE, NULL);
++ } else if ( v->p != p ) {
++ fprintf(warn, "FATAL: continue on p%d, but d%dv%d p%d!\n",
++ p->pid, v->d->did, v->vid,
++ v->p ? v->p->pid : -1);
++ error(ERR_FILE, NULL);
+ }
++
++ runstate_update(v, RUNSTATE_RUNNING, ri->tsc);
+ }
+ else if ( sevt.old_runstate == RUNSTATE_RUNNING
+ || v->runstate.state == RUNSTATE_RUNNING )
+@@ -7232,10 +7281,7 @@ update:
+ * # (should never happen)
+ */
+ if( sevt.old_runstate == RUNSTATE_RUNNING ) {
+- if( v->runstate.state == RUNSTATE_INIT ) {
+- /* Start-of-day; account first tsc -> now to v */
+- vcpu_start(p, v);
+- } else if( v->runstate.state != RUNSTATE_RUNNING
++ if( v->runstate.state != RUNSTATE_RUNNING
+ && v->runstate.state != RUNSTATE_LOST ) {
+ /* This should never happen. */
+ fprintf(warn, "FATAL: sevt.old_runstate running, but d%dv%d runstate %s!\n",
+@@ -7264,11 +7310,8 @@ update:
+
+ vcpu_next_update(p, v, ri->tsc);
+ }
+- else if ( v->runstate.state != RUNSTATE_INIT )
++ else
+ {
+- /* TSC skew at start-of-day is hard to deal with. Don't
+- * bring a vcpu out of INIT until it's seen to be actually
+- * running somewhere. */
+ runstate_update(v, sevt.new_runstate, ri->tsc);
+ }
+
+--
+2.42.0
+
diff --git a/0018-tools-oxenstored-Rework-Domain-evtchn-handling-to-us.patch b/0018-tools-oxenstored-Rework-Domain-evtchn-handling-to-us.patch
deleted file mode 100644
index 1392b34..0000000
--- a/0018-tools-oxenstored-Rework-Domain-evtchn-handling-to-us.patch
+++ /dev/null
@@ -1,209 +0,0 @@
-From 4b418768ef4d75d0f70e4ce7cb5710404527bf47 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 30 Nov 2022 11:59:34 +0000
-Subject: [PATCH 18/89] tools/oxenstored: Rework Domain evtchn handling to use
- port_pair
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Inter-domain event channels are always a pair of local and remote ports.
-Right now the handling is asymmetric, caused by the fact that the evtchn is
-bound after the associated Domain object is constructed.
-
-First, move binding of the event channel into the Domain.make() constructor.
-This means the local port no longer needs to be an option. It also removes
-the final callers of Domain.bind_interdomain.
-
-Next, introduce a new port_pair type to encapsulate the fact that these two
-should be updated together, and replace the previous port and remote_port
-fields. This refactoring also changes the Domain.get_port interface (removing
-an option) so take the opportunity to name it get_local_port instead.
-
-Also, this fixes a use-after-free risk with Domain.close. Once the evtchn has
-been unbound, the same local port number can be reused for a different
-purpose, so explicitly invalidate the ports to prevent their accidental misuse
-in the future.
-
-This also cleans up some of the debugging, to always print a port pair.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit df2db174b36eba67c218763ef621c67912202fc6)
----
- tools/ocaml/xenstored/connections.ml | 9 +---
- tools/ocaml/xenstored/domain.ml | 75 ++++++++++++++--------------
- tools/ocaml/xenstored/domains.ml | 2 -
- 3 files changed, 39 insertions(+), 47 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/connections.ml b/tools/ocaml/xenstored/connections.ml
-index 7d68c583b4..a80ae0bed2 100644
---- a/tools/ocaml/xenstored/connections.ml
-+++ b/tools/ocaml/xenstored/connections.ml
-@@ -48,9 +48,7 @@ let add_domain cons dom =
- let xbcon = Xenbus.Xb.open_mmap ~capacity (Domain.get_interface dom) (fun () -> Domain.notify dom) in
- let con = Connection.create xbcon (Some dom) in
- Hashtbl.add cons.domains (Domain.get_id dom) con;
-- match Domain.get_port dom with
-- | Some p -> Hashtbl.add cons.ports p con;
-- | None -> ()
-+ Hashtbl.add cons.ports (Domain.get_local_port dom) con
-
- let select ?(only_if = (fun _ -> true)) cons =
- Hashtbl.fold (fun _ con (ins, outs) ->
-@@ -97,10 +95,7 @@ let del_domain cons id =
- let con = find_domain cons id in
- Hashtbl.remove cons.domains id;
- (match Connection.get_domain con with
-- | Some d ->
-- (match Domain.get_port d with
-- | Some p -> Hashtbl.remove cons.ports p
-- | None -> ())
-+ | Some d -> Hashtbl.remove cons.ports (Domain.get_local_port d)
- | None -> ());
- del_watches cons con;
- Connection.close con
-diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml
-index d59a9401e2..481e10794d 100644
---- a/tools/ocaml/xenstored/domain.ml
-+++ b/tools/ocaml/xenstored/domain.ml
-@@ -19,14 +19,31 @@ open Printf
- let debug fmt = Logging.debug "domain" fmt
- let warn fmt = Logging.warn "domain" fmt
-
-+(* A bound inter-domain event channel port pair. The remote port, and the
-+ local port it is bound to. *)
-+type port_pair =
-+{
-+ local: Xeneventchn.t;
-+ remote: int;
-+}
-+
-+(* Sentinal port_pair with both set to EVTCHN_INVALID *)
-+let invalid_ports =
-+{
-+ local = Xeneventchn.of_int 0;
-+ remote = 0
-+}
-+
-+let string_of_port_pair p =
-+ sprintf "(l %d, r %d)" (Xeneventchn.to_int p.local) p.remote
-+
- type t =
- {
- id: Xenctrl.domid;
- mfn: nativeint;
- interface: Xenmmap.mmap_interface;
- eventchn: Event.t;
-- mutable remote_port: int;
-- mutable port: Xeneventchn.t option;
-+ mutable ports: port_pair;
- mutable bad_client: bool;
- mutable io_credit: int; (* the rounds of ring process left to do, default is 0,
- usually set to 1 when there is work detected, could
-@@ -41,8 +58,8 @@ let is_dom0 d = d.id = 0
- let get_id domain = domain.id
- let get_interface d = d.interface
- let get_mfn d = d.mfn
--let get_remote_port d = d.remote_port
--let get_port d = d.port
-+let get_remote_port d = d.ports.remote
-+let get_local_port d = d.ports.local
-
- let is_bad_domain domain = domain.bad_client
- let mark_as_bad domain = domain.bad_client <- true
-@@ -56,54 +73,36 @@ let is_paused_for_conflict dom = dom.conflict_credit <= 0.0
-
- let is_free_to_conflict = is_dom0
-
--let string_of_port = function
-- | None -> "None"
-- | Some x -> string_of_int (Xeneventchn.to_int x)
--
- let dump d chan =
-- fprintf chan "dom,%d,%nd,%d\n" d.id d.mfn d.remote_port
-+ fprintf chan "dom,%d,%nd,%d\n" d.id d.mfn d.ports.remote
-
- let rebind_evtchn d remote_port =
-- begin match d.port with
-- | None -> ()
-- | Some p -> Event.unbind d.eventchn p
-- end;
-+ Event.unbind d.eventchn d.ports.local;
- let local = Event.bind_interdomain d.eventchn d.id remote_port in
-- debug "domain %d rebind (l %s, r %d) => (l %d, r %d)"
-- d.id (string_of_port d.port) d.remote_port
-- (Xeneventchn.to_int local) remote_port;
-- d.remote_port <- remote_port;
-- d.port <- Some (local)
-+ let new_ports = { local; remote = remote_port } in
-+ debug "domain %d rebind %s => %s"
-+ d.id (string_of_port_pair d.ports) (string_of_port_pair new_ports);
-+ d.ports <- new_ports
-
- let notify dom =
-- match dom.port with
-- | None -> warn "domain %d: attempt to notify on unknown port" dom.id
-- | Some port -> Event.notify dom.eventchn port
--
--let bind_interdomain dom =
-- begin match dom.port with
-- | None -> ()
-- | Some port -> Event.unbind dom.eventchn port
-- end;
-- dom.port <- Some (Event.bind_interdomain dom.eventchn dom.id dom.remote_port);
-- debug "bound domain %d remote port %d to local port %s" dom.id dom.remote_port (string_of_port dom.port)
--
-+ Event.notify dom.eventchn dom.ports.local
-
- let close dom =
-- debug "domain %d unbound port %s" dom.id (string_of_port dom.port);
-- begin match dom.port with
-- | None -> ()
-- | Some port -> Event.unbind dom.eventchn port
-- end;
-+ debug "domain %d unbind %s" dom.id (string_of_port_pair dom.ports);
-+ Event.unbind dom.eventchn dom.ports.local;
-+ dom.ports <- invalid_ports;
- Xenmmap.unmap dom.interface
-
--let make id mfn remote_port interface eventchn = {
-+let make id mfn remote_port interface eventchn =
-+ let local = Event.bind_interdomain eventchn id remote_port in
-+ let ports = { local; remote = remote_port } in
-+ debug "domain %d bind %s" id (string_of_port_pair ports);
-+{
- id = id;
- mfn = mfn;
-- remote_port = remote_port;
-+ ports;
- interface = interface;
- eventchn = eventchn;
-- port = None;
- bad_client = false;
- io_credit = 0;
- conflict_credit = !Define.conflict_burst_limit;
-diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml
-index 26018ac0dd..2ab0c5f4d8 100644
---- a/tools/ocaml/xenstored/domains.ml
-+++ b/tools/ocaml/xenstored/domains.ml
-@@ -126,7 +126,6 @@ let create doms domid mfn remote_port =
- let interface = Xenctrl.map_foreign_range xc domid (Xenmmap.getpagesize()) mfn in
- let dom = Domain.make domid mfn remote_port interface doms.eventchn in
- Hashtbl.add doms.table domid dom;
-- Domain.bind_interdomain dom;
- dom
-
- let xenstored_kva = ref ""
-@@ -144,7 +143,6 @@ let create0 doms =
-
- let dom = Domain.make 0 Nativeint.zero remote_port interface doms.eventchn in
- Hashtbl.add doms.table 0 dom;
-- Domain.bind_interdomain dom;
- Domain.notify dom;
- dom
-
---
-2.40.0
-
diff --git a/0018-x86-ioapic-sanitize-IO-APIC-pins-before-enabling-lap.patch b/0018-x86-ioapic-sanitize-IO-APIC-pins-before-enabling-lap.patch
new file mode 100644
index 0000000..a03f86e
--- /dev/null
+++ b/0018-x86-ioapic-sanitize-IO-APIC-pins-before-enabling-lap.patch
@@ -0,0 +1,113 @@
+From d0cdd34dd815bf99c3f8a7bddfdde5ae59b0f0db Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Sep 2023 08:47:34 +0200
+Subject: [PATCH 18/55] x86/ioapic: sanitize IO-APIC pins before enabling lapic
+ LVTERR/ESR
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The current logic to init the local APIC and the IO-APIC does init the
+local APIC LVTERR/ESR before doing any sanitization on the IO-APIC pin
+configuration. It's already noted on enable_IO_APIC() that Xen
+shouldn't trust the IO-APIC being empty at bootup.
+
+At XenServer we have a system where the IO-APIC 0 is handed to Xen
+with pin 0 unmasked, set to Fixed delivery mode, edge triggered and
+with a vector of 0 (all fields of the RTE are zeroed). Once the local
+APIC LVTERR/ESR is enabled periodic injections from such pin cause the
+local APIC to in turn inject periodic error vectors:
+
+APIC error on CPU0: 00(40), Received illegal vector
+APIC error on CPU0: 40(40), Received illegal vector
+APIC error on CPU0: 40(40), Received illegal vector
+APIC error on CPU0: 40(40), Received illegal vector
+APIC error on CPU0: 40(40), Received illegal vector
+APIC error on CPU0: 40(40), Received illegal vector
+
+That prevents Xen from booting.
+
+Move the masking of the IO-APIC pins ahead of the setup of the local
+APIC. This has the side effect of also moving the detection of the
+pin where the i8259 is connected, as such detection must be done
+before masking any pins.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 813da5f0e73b8cbd2ac3c7922506e58c28cd736d
+master date: 2023-07-17 10:31:10 +0200
+---
+ xen/arch/x86/apic.c | 4 ++++
+ xen/arch/x86/include/asm/irq.h | 1 +
+ xen/arch/x86/io_apic.c | 4 +---
+ xen/arch/x86/smpboot.c | 5 +++++
+ 4 files changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c
+index 47e6e5fe41..33103d3e91 100644
+--- a/xen/arch/x86/apic.c
++++ b/xen/arch/x86/apic.c
+@@ -1491,6 +1491,10 @@ int __init APIC_init_uniprocessor (void)
+ physids_clear(phys_cpu_present_map);
+ physid_set(boot_cpu_physical_apicid, phys_cpu_present_map);
+
++ if ( !skip_ioapic_setup && nr_ioapics )
++ /* Sanitize the IO-APIC pins before enabling the lapic LVTERR/ESR. */
++ enable_IO_APIC();
++
+ setup_local_APIC(true);
+
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h
+index 76e6ed6d60..f6a0207a80 100644
+--- a/xen/arch/x86/include/asm/irq.h
++++ b/xen/arch/x86/include/asm/irq.h
+@@ -122,6 +122,7 @@ bool bogus_8259A_irq(unsigned int irq);
+ int i8259A_suspend(void);
+ int i8259A_resume(void);
+
++void enable_IO_APIC(void);
+ void setup_IO_APIC(void);
+ void disable_IO_APIC(void);
+ void setup_ioapic_dest(void);
+diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
+index 9b8a972cf5..25a08b1ea6 100644
+--- a/xen/arch/x86/io_apic.c
++++ b/xen/arch/x86/io_apic.c
+@@ -1273,7 +1273,7 @@ static void cf_check _print_IO_APIC_keyhandler(unsigned char key)
+ __print_IO_APIC(0);
+ }
+
+-static void __init enable_IO_APIC(void)
++void __init enable_IO_APIC(void)
+ {
+ int i8259_apic, i8259_pin;
+ int i, apic;
+@@ -2067,8 +2067,6 @@ static void __init ioapic_pm_state_alloc(void)
+
+ void __init setup_IO_APIC(void)
+ {
+- enable_IO_APIC();
+-
+ if (acpi_ioapic)
+ io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
+ else
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index b46fd9ab18..41ec3211ac 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -1232,6 +1232,11 @@ void __init smp_prepare_cpus(void)
+ verify_local_APIC();
+
+ connect_bsp_APIC();
++
++ if ( !skip_ioapic_setup && nr_ioapics )
++ /* Sanitize the IO-APIC pins before enabling the lapic LVTERR/ESR. */
++ enable_IO_APIC();
++
+ setup_local_APIC(true);
+
+ if ( !skip_ioapic_setup && nr_ioapics )
+--
+2.42.0
+
diff --git a/0019-tools-oxenstored-Keep-dev-xen-evtchn-open-across-liv.patch b/0019-tools-oxenstored-Keep-dev-xen-evtchn-open-across-liv.patch
deleted file mode 100644
index f6ae3fe..0000000
--- a/0019-tools-oxenstored-Keep-dev-xen-evtchn-open-across-liv.patch
+++ /dev/null
@@ -1,367 +0,0 @@
-From f02171b663393e10d35123e5572c0f5b3e72c29d Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Thu, 3 Nov 2022 15:31:39 +0000
-Subject: [PATCH 19/89] tools/oxenstored: Keep /dev/xen/evtchn open across live
- update
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Closing the evtchn handle will unbind and free all local ports. The new
-xenstored would need to rebind all evtchns, which is work that we don't want
-or need to be doing during the critical handover period.
-
-However, it turns out that the Windows PV drivers also rebind their local port
-too across suspend/resume, leaving (o)xenstored with a stale idea of the
-remote port to use. In this case, reusing the established connection is the
-only robust option.
-
-Therefore:
- * Have oxenstored open /dev/xen/evtchn without CLOEXEC at start of day.
- * Extend the handover information with the evtchn fd, domexc virq local port,
- and the local port number for each domain connection.
- * Have (the new) oxenstored recover the open handle using Xeneventchn.fdopen,
- and use the provided local ports rather than trying to rebind them.
-
-When this new information isn't present (i.e. live updating from an oxenstored
-prior to this change), the best-effort status quo will have to do.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 9b224c25293a53fcbe32da68052d861dda71a6f4)
----
- tools/ocaml/xenstored/domain.ml | 13 +++--
- tools/ocaml/xenstored/domains.ml | 9 ++--
- tools/ocaml/xenstored/event.ml | 20 +++++--
- tools/ocaml/xenstored/process.ml | 2 +-
- tools/ocaml/xenstored/xenstored.ml | 85 ++++++++++++++++++++----------
- 5 files changed, 90 insertions(+), 39 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml
-index 481e10794d..5c15752a37 100644
---- a/tools/ocaml/xenstored/domain.ml
-+++ b/tools/ocaml/xenstored/domain.ml
-@@ -74,7 +74,8 @@ let is_paused_for_conflict dom = dom.conflict_credit <= 0.0
- let is_free_to_conflict = is_dom0
-
- let dump d chan =
-- fprintf chan "dom,%d,%nd,%d\n" d.id d.mfn d.ports.remote
-+ fprintf chan "dom,%d,%nd,%d,%d\n"
-+ d.id d.mfn d.ports.remote (Xeneventchn.to_int d.ports.local)
-
- let rebind_evtchn d remote_port =
- Event.unbind d.eventchn d.ports.local;
-@@ -93,8 +94,14 @@ let close dom =
- dom.ports <- invalid_ports;
- Xenmmap.unmap dom.interface
-
--let make id mfn remote_port interface eventchn =
-- let local = Event.bind_interdomain eventchn id remote_port in
-+(* On clean start, local_port will be None, and we must bind the remote port
-+ given. On Live Update, the event channel is already bound, and both the
-+ local and remote port numbers come from the transfer record. *)
-+let make ?local_port ~remote_port id mfn interface eventchn =
-+ let local = match local_port with
-+ | None -> Event.bind_interdomain eventchn id remote_port
-+ | Some p -> Xeneventchn.of_int p
-+ in
- let ports = { local; remote = remote_port } in
- debug "domain %d bind %s" id (string_of_port_pair ports);
- {
-diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml
-index 2ab0c5f4d8..b6c075c838 100644
---- a/tools/ocaml/xenstored/domains.ml
-+++ b/tools/ocaml/xenstored/domains.ml
-@@ -56,6 +56,7 @@ let exist doms id = Hashtbl.mem doms.table id
- let find doms id = Hashtbl.find doms.table id
- let number doms = Hashtbl.length doms.table
- let iter doms fct = Hashtbl.iter (fun _ b -> fct b) doms.table
-+let eventchn doms = doms.eventchn
-
- let rec is_empty_queue q =
- Queue.is_empty q ||
-@@ -122,16 +123,16 @@ let cleanup doms =
- let resume _doms _domid =
- ()
-
--let create doms domid mfn remote_port =
-+let create doms ?local_port ~remote_port domid mfn =
- let interface = Xenctrl.map_foreign_range xc domid (Xenmmap.getpagesize()) mfn in
-- let dom = Domain.make domid mfn remote_port interface doms.eventchn in
-+ let dom = Domain.make ?local_port ~remote_port domid mfn interface doms.eventchn in
- Hashtbl.add doms.table domid dom;
- dom
-
- let xenstored_kva = ref ""
- let xenstored_port = ref ""
-
--let create0 doms =
-+let create0 ?local_port doms =
- let remote_port = Utils.read_file_single_integer !xenstored_port in
-
- let interface =
-@@ -141,7 +142,7 @@ let create0 doms =
- interface
- in
-
-- let dom = Domain.make 0 Nativeint.zero remote_port interface doms.eventchn in
-+ let dom = Domain.make ?local_port ~remote_port 0 Nativeint.zero interface doms.eventchn in
- Hashtbl.add doms.table 0 dom;
- Domain.notify dom;
- dom
-diff --git a/tools/ocaml/xenstored/event.ml b/tools/ocaml/xenstored/event.ml
-index a3be296374..629dc6041b 100644
---- a/tools/ocaml/xenstored/event.ml
-+++ b/tools/ocaml/xenstored/event.ml
-@@ -20,9 +20,18 @@ type t = {
- domexc: Xeneventchn.t;
- }
-
--let init () =
-- let handle = Xeneventchn.init () in
-- let domexc = Xeneventchn.bind_dom_exc_virq handle in
-+(* On clean start, both parameters will be None, and we must open the evtchn
-+ handle and bind the DOM_EXC VIRQ. On Live Update, the fd is preserved
-+ across exec(), and the DOM_EXC VIRQ still bound. *)
-+let init ?fd ?domexc_port () =
-+ let handle = match fd with
-+ | None -> Xeneventchn.init ~cloexec:false ()
-+ | Some fd -> fd |> Utils.FD.of_int |> Xeneventchn.fdopen
-+ in
-+ let domexc = match domexc_port with
-+ | None -> Xeneventchn.bind_dom_exc_virq handle
-+ | Some p -> Xeneventchn.of_int p
-+ in
- { handle; domexc }
-
- let fd eventchn = Xeneventchn.fd eventchn.handle
-@@ -31,3 +40,8 @@ let unbind eventchn port = Xeneventchn.unbind eventchn.handle port
- let notify eventchn port = Xeneventchn.notify eventchn.handle port
- let pending eventchn = Xeneventchn.pending eventchn.handle
- let unmask eventchn port = Xeneventchn.unmask eventchn.handle port
-+
-+let dump e chan =
-+ Printf.fprintf chan "evtchn-dev,%d,%d\n"
-+ (Utils.FD.to_int @@ Xeneventchn.fd e.handle)
-+ (Xeneventchn.to_int e.domexc)
-diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml
-index 1c80e7198d..02bd0f7d80 100644
---- a/tools/ocaml/xenstored/process.ml
-+++ b/tools/ocaml/xenstored/process.ml
-@@ -573,7 +573,7 @@ let do_introduce con t domains cons data =
- end;
- edom
- else try
-- let ndom = Domains.create domains domid mfn remote_port in
-+ let ndom = Domains.create ~remote_port domains domid mfn in
- Connections.add_domain cons ndom;
- Connections.fire_spec_watches (Transaction.get_root t) cons Store.Path.introduce_domain;
- ndom
-diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
-index 1f11f576b5..f526f4fb23 100644
---- a/tools/ocaml/xenstored/xenstored.ml
-+++ b/tools/ocaml/xenstored/xenstored.ml
-@@ -144,7 +144,7 @@ exception Bad_format of string
-
- let dump_format_header = "$xenstored-dump-format"
-
--let from_channel_f chan global_f socket_f domain_f watch_f store_f =
-+let from_channel_f chan global_f evtchn_f socket_f domain_f watch_f store_f =
- let unhexify s = Utils.unhexify s in
- let getpath s =
- let u = Utils.unhexify s in
-@@ -165,12 +165,19 @@ let from_channel_f chan global_f socket_f domain_f watch_f store_f =
- (* there might be more parameters here,
- e.g. a RO socket from a previous version: ignore it *)
- global_f ~rw
-+ | "evtchn-dev" :: fd :: domexc_port :: [] ->
-+ evtchn_f ~fd:(int_of_string fd)
-+ ~domexc_port:(int_of_string domexc_port)
- | "socket" :: fd :: [] ->
- socket_f ~fd:(int_of_string fd)
-- | "dom" :: domid :: mfn :: remote_port :: []->
-- domain_f (int_of_string domid)
-- (Nativeint.of_string mfn)
-- (int_of_string remote_port)
-+ | "dom" :: domid :: mfn :: remote_port :: rest ->
-+ let local_port = match rest with
-+ | [] -> None (* backward compat: old version didn't have it *)
-+ | local_port :: _ -> Some (int_of_string local_port) in
-+ domain_f ?local_port
-+ ~remote_port:(int_of_string remote_port)
-+ (int_of_string domid)
-+ (Nativeint.of_string mfn)
- | "watch" :: domid :: path :: token :: [] ->
- watch_f (int_of_string domid)
- (unhexify path) (unhexify token)
-@@ -189,10 +196,21 @@ let from_channel_f chan global_f socket_f domain_f watch_f store_f =
- done;
- info "Completed loading xenstore dump"
-
--let from_channel store cons doms chan =
-+let from_channel store cons domains_init chan =
- (* don't let the permission get on our way, full perm ! *)
- let op = Store.get_ops store Perms.Connection.full_rights in
- let rwro = ref (None) in
-+ let doms = ref (None) in
-+
-+ let require_doms () =
-+ match !doms with
-+ | None ->
-+ warn "No event channel file descriptor available in dump!";
-+ let domains = domains_init @@ Event.init () in
-+ doms := Some domains;
-+ domains
-+ | Some d -> d
-+ in
- let global_f ~rw =
- let get_listen_sock sockfd =
- let fd = sockfd |> int_of_string |> Utils.FD.of_int in
-@@ -201,6 +219,10 @@ let from_channel store cons doms chan =
- in
- rwro := get_listen_sock rw
- in
-+ let evtchn_f ~fd ~domexc_port =
-+ let evtchn = Event.init ~fd ~domexc_port () in
-+ doms := Some(domains_init evtchn)
-+ in
- let socket_f ~fd =
- let ufd = Utils.FD.of_int fd in
- let is_valid = try (Unix.fstat ufd).Unix.st_kind = Unix.S_SOCK with _ -> false in
-@@ -209,12 +231,13 @@ let from_channel store cons doms chan =
- else
- warn "Ignoring invalid socket FD %d" fd
- in
-- let domain_f domid mfn remote_port =
-+ let domain_f ?local_port ~remote_port domid mfn =
-+ let doms = require_doms () in
- let ndom =
- if domid > 0 then
-- Domains.create doms domid mfn remote_port
-+ Domains.create ?local_port ~remote_port doms domid mfn
- else
-- Domains.create0 doms
-+ Domains.create0 ?local_port doms
- in
- Connections.add_domain cons ndom;
- in
-@@ -229,8 +252,8 @@ let from_channel store cons doms chan =
- op.Store.write path value;
- op.Store.setperms path perms
- in
-- from_channel_f chan global_f socket_f domain_f watch_f store_f;
-- !rwro
-+ from_channel_f chan global_f evtchn_f socket_f domain_f watch_f store_f;
-+ !rwro, require_doms ()
-
- let from_file store cons doms file =
- info "Loading xenstore dump from %s" file;
-@@ -238,7 +261,7 @@ let from_file store cons doms file =
- finally (fun () -> from_channel store doms cons channel)
- (fun () -> close_in channel)
-
--let to_channel store cons rw chan =
-+let to_channel store cons (rw, evtchn) chan =
- let hexify s = Utils.hexify s in
-
- fprintf chan "%s\n" dump_format_header;
-@@ -248,6 +271,9 @@ let to_channel store cons rw chan =
- Utils.FD.to_int fd in
- fprintf chan "global,%d\n" (fdopt rw);
-
-+ (* dump evtchn device info *)
-+ Event.dump evtchn chan;
-+
- (* dump connections related to domains: domid, mfn, eventchn port/ sockets, and watches *)
- Connections.iter cons (fun con -> Connection.dump con chan);
-
-@@ -367,7 +393,6 @@ let _ =
- | None -> () end;
-
- let store = Store.create () in
-- let eventchn = Event.init () in
- let next_frequent_ops = ref 0. in
- let advance_next_frequent_ops () =
- next_frequent_ops := (Unix.gettimeofday () +. !Define.conflict_max_history_seconds)
-@@ -375,16 +400,8 @@ let _ =
- let delay_next_frequent_ops_by duration =
- next_frequent_ops := !next_frequent_ops +. duration
- in
-- let domains = Domains.init eventchn advance_next_frequent_ops in
-+ let domains_init eventchn = Domains.init eventchn advance_next_frequent_ops in
-
-- (* For things that need to be done periodically but more often
-- * than the periodic_ops function *)
-- let frequent_ops () =
-- if Unix.gettimeofday () > !next_frequent_ops then (
-- History.trim ();
-- Domains.incr_conflict_credit domains;
-- advance_next_frequent_ops ()
-- ) in
- let cons = Connections.create () in
-
- let quit = ref false in
-@@ -393,14 +410,15 @@ let _ =
- List.iter (fun path ->
- Store.write store Perms.Connection.full_rights path "") Store.Path.specials;
-
-- let rw_sock =
-+ let rw_sock, domains =
- if cf.restart && Sys.file_exists Disk.xs_daemon_database then (
-- let rwro = DB.from_file store domains cons Disk.xs_daemon_database in
-+ let rw, domains = DB.from_file store domains_init cons Disk.xs_daemon_database in
- info "Live reload: database loaded";
- Process.LiveUpdate.completed ();
-- rwro
-+ rw, domains
- ) else (
- info "No live reload: regular startup";
-+ let domains = domains_init @@ Event.init () in
- if !Disk.enable then (
- info "reading store from disk";
- Disk.read store
-@@ -413,9 +431,18 @@ let _ =
- if cf.domain_init then (
- Connections.add_domain cons (Domains.create0 domains);
- );
-- rw_sock
-+ rw_sock, domains
- ) in
-
-+ (* For things that need to be done periodically but more often
-+ * than the periodic_ops function *)
-+ let frequent_ops () =
-+ if Unix.gettimeofday () > !next_frequent_ops then (
-+ History.trim ();
-+ Domains.incr_conflict_credit domains;
-+ advance_next_frequent_ops ()
-+ ) in
-+
- (* required for xenstore-control to detect availability of live-update *)
- let tool_path = Store.Path.of_string "/tool" in
- if not (Store.path_exists store tool_path) then
-@@ -430,8 +457,10 @@ let _ =
- Sys.set_signal Sys.sigusr1 (Sys.Signal_handle (fun _ -> sigusr1_handler store));
- Sys.set_signal Sys.sigpipe Sys.Signal_ignore;
-
-+ let eventchn = Domains.eventchn domains in
-+
- if cf.activate_access_log then begin
-- let post_rotate () = DB.to_file store cons (None) Disk.xs_daemon_database in
-+ let post_rotate () = DB.to_file store cons (None, eventchn) Disk.xs_daemon_database in
- Logging.init_access_log post_rotate
- end;
-
-@@ -593,7 +622,7 @@ let _ =
- live_update := Process.LiveUpdate.should_run cons;
- if !live_update || !quit then begin
- (* don't initiate live update if saving state fails *)
-- DB.to_file store cons (rw_sock) Disk.xs_daemon_database;
-+ DB.to_file store cons (rw_sock, eventchn) Disk.xs_daemon_database;
- quit := true;
- end
- with exc ->
---
-2.40.0
-
diff --git a/0019-x86-ioapic-add-a-raw-field-to-RTE-struct.patch b/0019-x86-ioapic-add-a-raw-field-to-RTE-struct.patch
new file mode 100644
index 0000000..10e5946
--- /dev/null
+++ b/0019-x86-ioapic-add-a-raw-field-to-RTE-struct.patch
@@ -0,0 +1,147 @@
+From a885649098e06432939907eee84f735a644883e6 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Sep 2023 08:48:43 +0200
+Subject: [PATCH 19/55] x86/ioapic: add a raw field to RTE struct
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Further changes will require access to the full RTE as a single value
+in order to pass it to IOMMU interrupt remapping handlers.
+
+No functional change intended.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+master commit: cdc48cb5a74b10c2b07a09d2f554756d730bfee3
+master date: 2023-07-28 09:39:44 +0200
+---
+ xen/arch/x86/include/asm/io_apic.h | 57 +++++++++++++-----------
+ xen/arch/x86/io_apic.c | 2 +-
+ xen/drivers/passthrough/amd/iommu_intr.c | 4 +-
+ xen/drivers/passthrough/vtd/intremap.c | 4 +-
+ 4 files changed, 35 insertions(+), 32 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/io_apic.h b/xen/arch/x86/include/asm/io_apic.h
+index ef0878b09e..a558bb063c 100644
+--- a/xen/arch/x86/include/asm/io_apic.h
++++ b/xen/arch/x86/include/asm/io_apic.h
+@@ -89,35 +89,38 @@ enum ioapic_irq_destination_types {
+ };
+
+ struct IO_APIC_route_entry {
+- unsigned int vector:8;
+- unsigned int delivery_mode:3; /*
+- * 000: FIXED
+- * 001: lowest prio
+- * 111: ExtINT
+- */
+- unsigned int dest_mode:1; /* 0: physical, 1: logical */
+- unsigned int delivery_status:1;
+- unsigned int polarity:1; /* 0: low, 1: high */
+- unsigned int irr:1;
+- unsigned int trigger:1; /* 0: edge, 1: level */
+- unsigned int mask:1; /* 0: enabled, 1: disabled */
+- unsigned int __reserved_2:15;
+-
+ union {
+ struct {
+- unsigned int __reserved_1:24;
+- unsigned int physical_dest:4;
+- unsigned int __reserved_2:4;
+- } physical;
+-
+- struct {
+- unsigned int __reserved_1:24;
+- unsigned int logical_dest:8;
+- } logical;
+-
+- /* used when Interrupt Remapping with EIM is enabled */
+- unsigned int dest32;
+- } dest;
++ unsigned int vector:8;
++ unsigned int delivery_mode:3; /*
++ * 000: FIXED
++ * 001: lowest prio
++ * 111: ExtINT
++ */
++ unsigned int dest_mode:1; /* 0: physical, 1: logical */
++ unsigned int delivery_status:1;
++ unsigned int polarity:1; /* 0: low, 1: high */
++ unsigned int irr:1;
++ unsigned int trigger:1; /* 0: edge, 1: level */
++ unsigned int mask:1; /* 0: enabled, 1: disabled */
++ unsigned int __reserved_2:15;
++
++ union {
++ struct {
++ unsigned int __reserved_1:24;
++ unsigned int physical_dest:4;
++ unsigned int __reserved_2:4;
++ } physical;
++
++ struct {
++ unsigned int __reserved_1:24;
++ unsigned int logical_dest:8;
++ } logical;
++ unsigned int dest32;
++ } dest;
++ };
++ uint64_t raw;
++ };
+ };
+
+ /*
+diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
+index 25a08b1ea6..aada2ef96c 100644
+--- a/xen/arch/x86/io_apic.c
++++ b/xen/arch/x86/io_apic.c
+@@ -2360,7 +2360,7 @@ int ioapic_guest_read(unsigned long physbase, unsigned int reg, u32 *pval)
+ int ioapic_guest_write(unsigned long physbase, unsigned int reg, u32 val)
+ {
+ int apic, pin, irq, ret, pirq;
+- struct IO_APIC_route_entry rte = { 0 };
++ struct IO_APIC_route_entry rte = { };
+ unsigned long flags;
+ struct irq_desc *desc;
+
+diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c
+index f4de09f431..9e6be3be35 100644
+--- a/xen/drivers/passthrough/amd/iommu_intr.c
++++ b/xen/drivers/passthrough/amd/iommu_intr.c
+@@ -352,8 +352,8 @@ static int update_intremap_entry_from_ioapic(
+ void cf_check amd_iommu_ioapic_update_ire(
+ unsigned int apic, unsigned int reg, unsigned int value)
+ {
+- struct IO_APIC_route_entry old_rte = { 0 };
+- struct IO_APIC_route_entry new_rte = { 0 };
++ struct IO_APIC_route_entry old_rte = { };
++ struct IO_APIC_route_entry new_rte = { };
+ unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
+ unsigned int pin = (reg - 0x10) / 2;
+ int seg, bdf, rc;
+diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c
+index 1512e4866b..019c21c556 100644
+--- a/xen/drivers/passthrough/vtd/intremap.c
++++ b/xen/drivers/passthrough/vtd/intremap.c
+@@ -419,7 +419,7 @@ unsigned int cf_check io_apic_read_remap_rte(
+ {
+ unsigned int ioapic_pin = (reg - 0x10) / 2;
+ int index;
+- struct IO_xAPIC_route_entry old_rte = { 0 };
++ struct IO_xAPIC_route_entry old_rte = { };
+ int rte_upper = (reg & 1) ? 1 : 0;
+ struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
+
+@@ -442,7 +442,7 @@ void cf_check io_apic_write_remap_rte(
+ unsigned int apic, unsigned int reg, unsigned int value)
+ {
+ unsigned int ioapic_pin = (reg - 0x10) / 2;
+- struct IO_xAPIC_route_entry old_rte = { 0 };
++ struct IO_xAPIC_route_entry old_rte = { };
+ struct IO_APIC_route_remap_entry *remap_rte;
+ unsigned int rte_upper = (reg & 1) ? 1 : 0;
+ struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
+--
+2.42.0
+
diff --git a/0020-tools-oxenstored-Log-live-update-issues-at-warning-l.patch b/0020-tools-oxenstored-Log-live-update-issues-at-warning-l.patch
deleted file mode 100644
index 533e3e7..0000000
--- a/0020-tools-oxenstored-Log-live-update-issues-at-warning-l.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 991b512f5f69dde3c923804f887be9df56b03a74 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Tue, 8 Nov 2022 08:57:47 +0000
-Subject: [PATCH 20/89] tools/oxenstored: Log live update issues at warning
- level
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-During live update, oxenstored tries a best effort approach to recover as many
-domains and information as possible even if it encounters errors restoring
-some domains.
-
-However, logging about misunderstood input is more severe than simply info.
-Log it at warning instead.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 3f02e0a70fe9f8143454b742563433958d4a87f8)
----
- tools/ocaml/xenstored/xenstored.ml | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
-index f526f4fb23..35b8cbd43f 100644
---- a/tools/ocaml/xenstored/xenstored.ml
-+++ b/tools/ocaml/xenstored/xenstored.ml
-@@ -186,9 +186,9 @@ let from_channel_f chan global_f evtchn_f socket_f domain_f watch_f store_f =
- (Perms.Node.of_string (unhexify perms ^ "\000"))
- (unhexify value)
- | _ ->
-- info "restoring: ignoring unknown line: %s" line
-+ warn "restoring: ignoring unknown line: %s" line
- with exn ->
-- info "restoring: ignoring unknown line: %s (exception: %s)"
-+ warn "restoring: ignoring unknown line: %s (exception: %s)"
- line (Printexc.to_string exn);
- ()
- with End_of_file ->
---
-2.40.0
-
diff --git a/0020-x86-ioapic-RTE-modifications-must-use-ioapic_write_e.patch b/0020-x86-ioapic-RTE-modifications-must-use-ioapic_write_e.patch
new file mode 100644
index 0000000..43faeeb
--- /dev/null
+++ b/0020-x86-ioapic-RTE-modifications-must-use-ioapic_write_e.patch
@@ -0,0 +1,180 @@
+From 1bd4523d696d26976f64a919df8c7a1b3ea32f6f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Sep 2023 08:49:37 +0200
+Subject: [PATCH 20/55] x86/ioapic: RTE modifications must use
+ ioapic_write_entry
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Do not allow to write to RTE registers using io_apic_write and instead
+require changes to RTE to be performed using ioapic_write_entry.
+
+This is in preparation for passing the full contents of the RTE to the
+IOMMU interrupt remapping handlers, so remapping entries for IO-APIC
+RTEs can be updated atomically when possible.
+
+While immediately this commit might expand the number of MMIO accesses
+in order to update an IO-APIC RTE, further changes will benefit from
+getting the full RTE value passed to the IOMMU handlers, as the logic
+is greatly simplified when the IOMMU handlers can get the complete RTE
+value in one go.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: ef7995ed1bcd7eac37fb3c3fe56eaa54ea9baf6c
+master date: 2023-07-28 09:40:20 +0200
+---
+ xen/arch/x86/include/asm/io_apic.h | 8 ++---
+ xen/arch/x86/io_apic.c | 43 ++++++++++++------------
+ xen/drivers/passthrough/amd/iommu_intr.c | 6 ----
+ 3 files changed, 25 insertions(+), 32 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/io_apic.h b/xen/arch/x86/include/asm/io_apic.h
+index a558bb063c..6b514b4e3d 100644
+--- a/xen/arch/x86/include/asm/io_apic.h
++++ b/xen/arch/x86/include/asm/io_apic.h
+@@ -161,8 +161,8 @@ static inline void __io_apic_write(unsigned int apic, unsigned int reg, unsigned
+
+ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+ {
+- if ( ioapic_reg_remapped(reg) )
+- return iommu_update_ire_from_apic(apic, reg, value);
++ /* RTE writes must use ioapic_write_entry. */
++ BUG_ON(reg >= 0x10);
+ __io_apic_write(apic, reg, value);
+ }
+
+@@ -172,8 +172,8 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
+ */
+ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+ {
+- if ( ioapic_reg_remapped(reg) )
+- return iommu_update_ire_from_apic(apic, reg, value);
++ /* RTE writes must use ioapic_write_entry. */
++ BUG_ON(reg >= 0x10);
+ *(IO_APIC_BASE(apic) + 4) = value;
+ }
+
+diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
+index aada2ef96c..041233b9b7 100644
+--- a/xen/arch/x86/io_apic.c
++++ b/xen/arch/x86/io_apic.c
+@@ -237,15 +237,15 @@ struct IO_APIC_route_entry __ioapic_read_entry(
+ {
+ union entry_union eu;
+
+- if ( raw )
++ if ( raw || !iommu_intremap )
+ {
+ eu.w1 = __io_apic_read(apic, 0x10 + 2 * pin);
+ eu.w2 = __io_apic_read(apic, 0x11 + 2 * pin);
+ }
+ else
+ {
+- eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+- eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
++ eu.w1 = iommu_read_apic_from_ire(apic, 0x10 + 2 * pin);
++ eu.w2 = iommu_read_apic_from_ire(apic, 0x11 + 2 * pin);
+ }
+
+ return eu.entry;
+@@ -269,15 +269,15 @@ void __ioapic_write_entry(
+ {
+ union entry_union eu = { .entry = e };
+
+- if ( raw )
++ if ( raw || !iommu_intremap )
+ {
+ __io_apic_write(apic, 0x11 + 2 * pin, eu.w2);
+ __io_apic_write(apic, 0x10 + 2 * pin, eu.w1);
+ }
+ else
+ {
+- io_apic_write(apic, 0x11 + 2 * pin, eu.w2);
+- io_apic_write(apic, 0x10 + 2 * pin, eu.w1);
++ iommu_update_ire_from_apic(apic, 0x11 + 2 * pin, eu.w2);
++ iommu_update_ire_from_apic(apic, 0x10 + 2 * pin, eu.w1);
+ }
+ }
+
+@@ -433,16 +433,17 @@ static void modify_IO_APIC_irq(unsigned int irq, unsigned int enable,
+ unsigned int disable)
+ {
+ struct irq_pin_list *entry = irq_2_pin + irq;
+- unsigned int pin, reg;
+
+ for (;;) {
+- pin = entry->pin;
++ unsigned int pin = entry->pin;
++ struct IO_APIC_route_entry rte;
++
+ if (pin == -1)
+ break;
+- reg = io_apic_read(entry->apic, 0x10 + pin*2);
+- reg &= ~disable;
+- reg |= enable;
+- io_apic_modify(entry->apic, 0x10 + pin*2, reg);
++ rte = __ioapic_read_entry(entry->apic, pin, false);
++ rte.raw &= ~(uint64_t)disable;
++ rte.raw |= enable;
++ __ioapic_write_entry(entry->apic, pin, false, rte);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+@@ -584,16 +585,16 @@ set_ioapic_affinity_irq(struct irq_desc *desc, const cpumask_t *mask)
+ dest = SET_APIC_LOGICAL_ID(dest);
+ entry = irq_2_pin + irq;
+ for (;;) {
+- unsigned int data;
++ struct IO_APIC_route_entry rte;
++
+ pin = entry->pin;
+ if (pin == -1)
+ break;
+
+- io_apic_write(entry->apic, 0x10 + 1 + pin*2, dest);
+- data = io_apic_read(entry->apic, 0x10 + pin*2);
+- data &= ~IO_APIC_REDIR_VECTOR_MASK;
+- data |= MASK_INSR(desc->arch.vector, IO_APIC_REDIR_VECTOR_MASK);
+- io_apic_modify(entry->apic, 0x10 + pin*2, data);
++ rte = __ioapic_read_entry(entry->apic, pin, false);
++ rte.dest.dest32 = dest;
++ rte.vector = desc->arch.vector;
++ __ioapic_write_entry(entry->apic, pin, false, rte);
+
+ if (!entry->next)
+ break;
+@@ -2127,10 +2128,8 @@ void ioapic_resume(void)
+ reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+ __io_apic_write(apic, 0, reg_00.raw);
+ }
+- for (i = 0; i < nr_ioapic_entries[apic]; i++, entry++) {
+- __io_apic_write(apic, 0x11+2*i, *(((int *)entry)+1));
+- __io_apic_write(apic, 0x10+2*i, *(((int *)entry)+0));
+- }
++ for (i = 0; i < nr_ioapic_entries[apic]; i++, entry++)
++ __ioapic_write_entry(apic, i, true, *entry);
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c
+index 9e6be3be35..f32c418a7e 100644
+--- a/xen/drivers/passthrough/amd/iommu_intr.c
++++ b/xen/drivers/passthrough/amd/iommu_intr.c
+@@ -361,12 +361,6 @@ void cf_check amd_iommu_ioapic_update_ire(
+ struct amd_iommu *iommu;
+ unsigned int idx;
+
+- if ( !iommu_intremap )
+- {
+- __io_apic_write(apic, reg, value);
+- return;
+- }
+-
+ idx = ioapic_id_to_index(IO_APIC_ID(apic));
+ if ( idx == MAX_IO_APICS )
+ return;
+--
+2.42.0
+
diff --git a/0021-iommu-vtd-rename-io_apic_read_remap_rte-local-variab.patch b/0021-iommu-vtd-rename-io_apic_read_remap_rte-local-variab.patch
new file mode 100644
index 0000000..6560452
--- /dev/null
+++ b/0021-iommu-vtd-rename-io_apic_read_remap_rte-local-variab.patch
@@ -0,0 +1,64 @@
+From e08e7330c58b7ee1efb00e348521a6afc524dc38 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Sep 2023 08:50:05 +0200
+Subject: [PATCH 21/55] iommu/vtd: rename io_apic_read_remap_rte() local
+ variable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Preparatory change to unify the IO-APIC pin variable name between
+io_apic_read_remap_rte() and amd_iommu_ioapic_update_ire(), so that
+the local variable can be made a function parameter with the same name
+across vendors.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+master commit: a478b38c01b65fa030303f0324a3380d872eb165
+master date: 2023-07-28 09:40:42 +0200
+---
+ xen/drivers/passthrough/vtd/intremap.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c
+index 019c21c556..53c9de9a75 100644
+--- a/xen/drivers/passthrough/vtd/intremap.c
++++ b/xen/drivers/passthrough/vtd/intremap.c
+@@ -441,14 +441,14 @@ unsigned int cf_check io_apic_read_remap_rte(
+ void cf_check io_apic_write_remap_rte(
+ unsigned int apic, unsigned int reg, unsigned int value)
+ {
+- unsigned int ioapic_pin = (reg - 0x10) / 2;
++ unsigned int pin = (reg - 0x10) / 2;
+ struct IO_xAPIC_route_entry old_rte = { };
+ struct IO_APIC_route_remap_entry *remap_rte;
+ unsigned int rte_upper = (reg & 1) ? 1 : 0;
+ struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
+ int saved_mask;
+
+- old_rte = __ioapic_read_entry(apic, ioapic_pin, true);
++ old_rte = __ioapic_read_entry(apic, pin, true);
+
+ remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
+
+@@ -458,7 +458,7 @@ void cf_check io_apic_write_remap_rte(
+ __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
+ remap_rte->mask = saved_mask;
+
+- if ( ioapic_rte_to_remap_entry(iommu, apic, ioapic_pin,
++ if ( ioapic_rte_to_remap_entry(iommu, apic, pin,
+ &old_rte, rte_upper, value) )
+ {
+ __io_apic_write(apic, reg, value);
+@@ -468,7 +468,7 @@ void cf_check io_apic_write_remap_rte(
+ __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
+ }
+ else
+- __ioapic_write_entry(apic, ioapic_pin, true, old_rte);
++ __ioapic_write_entry(apic, pin, true, old_rte);
+ }
+
+ static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
+--
+2.42.0
+
diff --git a/0021-tools-oxenstored-Set-uncaught-exception-handler.patch b/0021-tools-oxenstored-Set-uncaught-exception-handler.patch
deleted file mode 100644
index 8a42fcc..0000000
--- a/0021-tools-oxenstored-Set-uncaught-exception-handler.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From e13a9a2146952859c21c0a0c7b8b07757c2aba9d Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Mon, 7 Nov 2022 17:41:36 +0000
-Subject: [PATCH 21/89] tools/oxenstored: Set uncaught exception handler
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Unhandled exceptions go to stderr by default, but this doesn't typically work
-for oxenstored because:
- * daemonize reopens stderr as /dev/null
- * systemd redirects stderr to /dev/null too
-
-Debugging an unhandled exception requires reproducing the issue locally when
-using --no-fork, and is not conducive to figuring out what went wrong on a
-remote system.
-
-Install a custom handler which also tries to render the backtrace to the
-configured syslog facility, and DAEMON|ERR otherwise.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit ee7815f49faf743e960dac9e72809eb66393bc6d)
----
- tools/ocaml/xenstored/logging.ml | 29 +++++++++++++++++++++++++++++
- tools/ocaml/xenstored/xenstored.ml | 3 ++-
- 2 files changed, 31 insertions(+), 1 deletion(-)
-
-diff --git a/tools/ocaml/xenstored/logging.ml b/tools/ocaml/xenstored/logging.ml
-index 39c3036155..255051437d 100644
---- a/tools/ocaml/xenstored/logging.ml
-+++ b/tools/ocaml/xenstored/logging.ml
-@@ -342,3 +342,32 @@ let xb_answer ~tid ~con ~ty data =
- let watch_not_fired ~con perms path =
- let data = Printf.sprintf "EPERM perms=[%s] path=%s" perms path in
- access_logging ~tid:0 ~con ~data Watch_not_fired ~level:Info
-+
-+let msg_of exn bt =
-+ Printf.sprintf "Fatal exception: %s\n%s\n" (Printexc.to_string exn)
-+ (Printexc.raw_backtrace_to_string bt)
-+
-+let fallback_exception_handler exn bt =
-+ (* stderr goes to /dev/null, so use the logger where possible,
-+ but always print to stderr too, in case everything else fails,
-+ e.g. this can be used to debug with --no-fork
-+
-+ this function should try not to raise exceptions, but if it does
-+ the ocaml runtime should still print the exception, both the original,
-+ and the one from this function, but to stderr this time
-+ *)
-+ let msg = msg_of exn bt in
-+ prerr_endline msg;
-+ (* See Printexc.set_uncaught_exception_handler, need to flush,
-+ so has to call stop and flush *)
-+ match !xenstored_logger with
-+ | Some l -> error "xenstored-fallback" "%s" msg; l.stop ()
-+ | None ->
-+ (* Too early, no logger set yet.
-+ We normally try to use the configured logger so we don't flood syslog
-+ during development for example, or if the user has a file set
-+ *)
-+ try Syslog.log Syslog.Daemon Syslog.Err msg
-+ with e ->
-+ let bt = Printexc.get_raw_backtrace () in
-+ prerr_endline @@ msg_of e bt
-diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
-index 35b8cbd43f..4d5851c5cb 100644
---- a/tools/ocaml/xenstored/xenstored.ml
-+++ b/tools/ocaml/xenstored/xenstored.ml
-@@ -355,7 +355,8 @@ let tweak_gc () =
- Gc.set { (Gc.get ()) with Gc.max_overhead = !Define.gc_max_overhead }
-
-
--let _ =
-+let () =
-+ Printexc.set_uncaught_exception_handler Logging.fallback_exception_handler;
- let cf = do_argv in
- let pidfile =
- if Sys.file_exists (config_filename cf) then
---
-2.40.0
-
diff --git a/0022-tools-oxenstored-syslog-Avoid-potential-NULL-derefer.patch b/0022-tools-oxenstored-syslog-Avoid-potential-NULL-derefer.patch
deleted file mode 100644
index eb6d42e..0000000
--- a/0022-tools-oxenstored-syslog-Avoid-potential-NULL-derefer.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-From 91a9ac6e9be5aa94020f5c482e6c51b581e2ea39 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Tue, 8 Nov 2022 14:24:19 +0000
-Subject: [PATCH 22/89] tools/oxenstored/syslog: Avoid potential NULL
- dereference
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-strdup() may return NULL. Check for this before passing to syslog().
-
-Drop const from c_msg. It is bogus, as demonstrated by the need to cast to
-void * in order to free the memory.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit acd3fb6d65905f8a185dcb9fe6a330a591b96203)
----
- tools/ocaml/xenstored/syslog_stubs.c | 7 +++++--
- 1 file changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/syslog_stubs.c b/tools/ocaml/xenstored/syslog_stubs.c
-index 875d48ad57..e16c3a9491 100644
---- a/tools/ocaml/xenstored/syslog_stubs.c
-+++ b/tools/ocaml/xenstored/syslog_stubs.c
-@@ -14,6 +14,7 @@
-
- #include <syslog.h>
- #include <string.h>
-+#include <caml/fail.h>
- #include <caml/mlvalues.h>
- #include <caml/memory.h>
- #include <caml/alloc.h>
-@@ -35,14 +36,16 @@ static int __syslog_facility_table[] = {
- value stub_syslog(value facility, value level, value msg)
- {
- CAMLparam3(facility, level, msg);
-- const char *c_msg = strdup(String_val(msg));
-+ char *c_msg = strdup(String_val(msg));
- int c_facility = __syslog_facility_table[Int_val(facility)]
- | __syslog_level_table[Int_val(level)];
-
-+ if ( !c_msg )
-+ caml_raise_out_of_memory();
- caml_enter_blocking_section();
- syslog(c_facility, "%s", c_msg);
- caml_leave_blocking_section();
-
-- free((void*)c_msg);
-+ free(c_msg);
- CAMLreturn(Val_unit);
- }
---
-2.40.0
-
diff --git a/0022-x86-iommu-pass-full-IO-APIC-RTE-for-remapping-table-.patch b/0022-x86-iommu-pass-full-IO-APIC-RTE-for-remapping-table-.patch
new file mode 100644
index 0000000..e06714e
--- /dev/null
+++ b/0022-x86-iommu-pass-full-IO-APIC-RTE-for-remapping-table-.patch
@@ -0,0 +1,462 @@
+From 5116fe12d8238cc7d6582ceefd3f7e944bff9a1d Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Sep 2023 08:50:39 +0200
+Subject: [PATCH 22/55] x86/iommu: pass full IO-APIC RTE for remapping table
+ update
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+So that the remapping entry can be updated atomically when possible.
+
+Doing such update atomically will avoid Xen having to mask the IO-APIC
+pin prior to performing any interrupt movements (ie: changing the
+destination and vector fields), as the interrupt remapping entry is
+always consistent.
+
+This also simplifies some of the logic on both VT-d and AMD-Vi
+implementations, as having the full RTE available instead of half of
+it avoids to possibly read and update the missing other half from
+hardware.
+
+While there remove the explicit zeroing of new_ire fields in
+ioapic_rte_to_remap_entry() and initialize the variable at definition
+so all fields are zeroed. Note fields could be also initialized with
+final values at definition, but I found that likely too much to be
+done at this time.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 3e033172b0250446bfe119f31c7f0f51684b0472
+master date: 2023-08-01 11:48:39 +0200
+---
+ xen/arch/x86/include/asm/iommu.h | 3 +-
+ xen/arch/x86/io_apic.c | 5 +-
+ xen/drivers/passthrough/amd/iommu.h | 2 +-
+ xen/drivers/passthrough/amd/iommu_intr.c | 100 ++---------------
+ xen/drivers/passthrough/vtd/extern.h | 2 +-
+ xen/drivers/passthrough/vtd/intremap.c | 131 +++++++++++------------
+ xen/drivers/passthrough/x86/iommu.c | 4 +-
+ xen/include/xen/iommu.h | 3 +-
+ 8 files changed, 82 insertions(+), 168 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/iommu.h b/xen/arch/x86/include/asm/iommu.h
+index fc0afe35bf..c0d4ad3742 100644
+--- a/xen/arch/x86/include/asm/iommu.h
++++ b/xen/arch/x86/include/asm/iommu.h
+@@ -97,7 +97,8 @@ struct iommu_init_ops {
+
+ extern const struct iommu_init_ops *iommu_init_ops;
+
+-void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
++void iommu_update_ire_from_apic(unsigned int apic, unsigned int pin,
++ uint64_t rte);
+ unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg);
+ int iommu_setup_hpet_msi(struct msi_desc *);
+
+diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
+index 041233b9b7..b3afef8933 100644
+--- a/xen/arch/x86/io_apic.c
++++ b/xen/arch/x86/io_apic.c
+@@ -275,10 +275,7 @@ void __ioapic_write_entry(
+ __io_apic_write(apic, 0x10 + 2 * pin, eu.w1);
+ }
+ else
+- {
+- iommu_update_ire_from_apic(apic, 0x11 + 2 * pin, eu.w2);
+- iommu_update_ire_from_apic(apic, 0x10 + 2 * pin, eu.w1);
+- }
++ iommu_update_ire_from_apic(apic, pin, e.raw);
+ }
+
+ static void ioapic_write_entry(
+diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h
+index 8bc3c35b1b..5429ada58e 100644
+--- a/xen/drivers/passthrough/amd/iommu.h
++++ b/xen/drivers/passthrough/amd/iommu.h
+@@ -300,7 +300,7 @@ int cf_check amd_iommu_free_intremap_table(
+ unsigned int amd_iommu_intremap_table_order(
+ const void *irt, const struct amd_iommu *iommu);
+ void cf_check amd_iommu_ioapic_update_ire(
+- unsigned int apic, unsigned int reg, unsigned int value);
++ unsigned int apic, unsigned int pin, uint64_t rte);
+ unsigned int cf_check amd_iommu_read_ioapic_from_ire(
+ unsigned int apic, unsigned int reg);
+ int cf_check amd_iommu_msi_msg_update_ire(
+diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c
+index f32c418a7e..e83a2a932a 100644
+--- a/xen/drivers/passthrough/amd/iommu_intr.c
++++ b/xen/drivers/passthrough/amd/iommu_intr.c
+@@ -247,11 +247,6 @@ static void update_intremap_entry(const struct amd_iommu *iommu,
+ }
+ }
+
+-static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
+-{
+- return rte->vector | (rte->delivery_mode << 8);
+-}
+-
+ static inline void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
+ {
+ rte->vector = (u8)offset;
+@@ -267,7 +262,6 @@ static int update_intremap_entry_from_ioapic(
+ int bdf,
+ struct amd_iommu *iommu,
+ struct IO_APIC_route_entry *rte,
+- bool_t lo_update,
+ u16 *index)
+ {
+ unsigned long flags;
+@@ -315,31 +309,6 @@ static int update_intremap_entry_from_ioapic(
+ spin_lock(lock);
+ }
+
+- if ( fresh )
+- /* nothing */;
+- else if ( !lo_update )
+- {
+- /*
+- * Low half of incoming RTE is already in remapped format,
+- * so need to recover vector and delivery mode from IRTE.
+- */
+- ASSERT(get_rte_index(rte) == offset);
+- if ( iommu->ctrl.ga_en )
+- vector = entry.ptr128->full.vector;
+- else
+- vector = entry.ptr32->flds.vector;
+- /* The IntType fields match for both formats. */
+- delivery_mode = entry.ptr32->flds.int_type;
+- }
+- else if ( x2apic_enabled )
+- {
+- /*
+- * High half of incoming RTE was read from the I/O APIC and hence may
+- * not hold the full destination, so need to recover full destination
+- * from IRTE.
+- */
+- dest = get_full_dest(entry.ptr128);
+- }
+ update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, dest);
+
+ spin_unlock_irqrestore(lock, flags);
+@@ -350,14 +319,11 @@ static int update_intremap_entry_from_ioapic(
+ }
+
+ void cf_check amd_iommu_ioapic_update_ire(
+- unsigned int apic, unsigned int reg, unsigned int value)
++ unsigned int apic, unsigned int pin, uint64_t rte)
+ {
+- struct IO_APIC_route_entry old_rte = { };
+- struct IO_APIC_route_entry new_rte = { };
+- unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
+- unsigned int pin = (reg - 0x10) / 2;
++ struct IO_APIC_route_entry old_rte;
++ struct IO_APIC_route_entry new_rte = { .raw = rte };
+ int seg, bdf, rc;
+- bool saved_mask, fresh = false;
+ struct amd_iommu *iommu;
+ unsigned int idx;
+
+@@ -373,58 +339,23 @@ void cf_check amd_iommu_ioapic_update_ire(
+ {
+ AMD_IOMMU_WARN("failed to find IOMMU for IO-APIC @ %04x:%04x\n",
+ seg, bdf);
+- __io_apic_write(apic, reg, value);
++ __ioapic_write_entry(apic, pin, true, new_rte);
+ return;
+ }
+
+- /* save io-apic rte lower 32 bits */
+- *((u32 *)&old_rte) = __io_apic_read(apic, rte_lo);
+- saved_mask = old_rte.mask;
+-
+- if ( reg == rte_lo )
+- {
+- *((u32 *)&new_rte) = value;
+- /* read upper 32 bits from io-apic rte */
+- *(((u32 *)&new_rte) + 1) = __io_apic_read(apic, reg + 1);
+- }
+- else
+- {
+- *((u32 *)&new_rte) = *((u32 *)&old_rte);
+- *(((u32 *)&new_rte) + 1) = value;
+- }
+-
+- if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_MAX_ENTRIES )
+- {
+- ASSERT(saved_mask);
+-
+- /*
+- * There's nowhere except the IRTE to store a full 32-bit destination,
+- * so we may not bypass entry allocation and updating of the low RTE
+- * half in the (usual) case of the high RTE half getting written first.
+- */
+- if ( new_rte.mask && !x2apic_enabled )
+- {
+- __io_apic_write(apic, reg, value);
+- return;
+- }
+-
+- fresh = true;
+- }
+-
++ old_rte = __ioapic_read_entry(apic, pin, true);
+ /* mask the interrupt while we change the intremap table */
+- if ( !saved_mask )
++ if ( !old_rte.mask )
+ {
+ old_rte.mask = 1;
+- __io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
++ __ioapic_write_entry(apic, pin, true, old_rte);
+ }
+
+ /* Update interrupt remapping entry */
+ rc = update_intremap_entry_from_ioapic(
+- bdf, iommu, &new_rte, reg == rte_lo,
++ bdf, iommu, &new_rte,
+ &ioapic_sbdf[idx].pin_2_idx[pin]);
+
+- __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
+-
+ if ( rc )
+ {
+ /* Keep the entry masked. */
+@@ -433,20 +364,7 @@ void cf_check amd_iommu_ioapic_update_ire(
+ return;
+ }
+
+- /* For lower bits access, return directly to avoid double writes */
+- if ( reg == rte_lo )
+- return;
+-
+- /*
+- * Unmask the interrupt after we have updated the intremap table. Also
+- * write the low half if a fresh entry was allocated for a high half
+- * update in x2APIC mode.
+- */
+- if ( !saved_mask || (x2apic_enabled && fresh) )
+- {
+- old_rte.mask = saved_mask;
+- __io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
+- }
++ __ioapic_write_entry(apic, pin, true, new_rte);
+ }
+
+ unsigned int cf_check amd_iommu_read_ioapic_from_ire(
+diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
+index 39602d1f88..d49e40c5ce 100644
+--- a/xen/drivers/passthrough/vtd/extern.h
++++ b/xen/drivers/passthrough/vtd/extern.h
+@@ -92,7 +92,7 @@ int cf_check intel_iommu_get_reserved_device_memory(
+ unsigned int cf_check io_apic_read_remap_rte(
+ unsigned int apic, unsigned int reg);
+ void cf_check io_apic_write_remap_rte(
+- unsigned int apic, unsigned int reg, unsigned int value);
++ unsigned int apic, unsigned int pin, uint64_t rte);
+
+ struct msi_desc;
+ struct msi_msg;
+diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c
+index 53c9de9a75..78d7bc139a 100644
+--- a/xen/drivers/passthrough/vtd/intremap.c
++++ b/xen/drivers/passthrough/vtd/intremap.c
+@@ -328,15 +328,14 @@ static int remap_entry_to_ioapic_rte(
+
+ static int ioapic_rte_to_remap_entry(struct vtd_iommu *iommu,
+ int apic, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte,
+- unsigned int rte_upper, unsigned int value)
++ struct IO_xAPIC_route_entry new_rte)
+ {
+ struct iremap_entry *iremap_entry = NULL, *iremap_entries;
+ struct iremap_entry new_ire;
+ struct IO_APIC_route_remap_entry *remap_rte;
+- struct IO_xAPIC_route_entry new_rte;
+ int index;
+ unsigned long flags;
+- bool init = false;
++ bool init = false, masked = old_rte->mask;
+
+ remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
+ spin_lock_irqsave(&iommu->intremap.lock, flags);
+@@ -364,48 +363,40 @@ static int ioapic_rte_to_remap_entry(struct vtd_iommu *iommu,
+
+ new_ire = *iremap_entry;
+
+- if ( rte_upper )
+- {
+- if ( x2apic_enabled )
+- new_ire.remap.dst = value;
+- else
+- new_ire.remap.dst = (value >> 24) << 8;
+- }
++ if ( x2apic_enabled )
++ new_ire.remap.dst = new_rte.dest.dest32;
+ else
+- {
+- *(((u32 *)&new_rte) + 0) = value;
+- new_ire.remap.fpd = 0;
+- new_ire.remap.dm = new_rte.dest_mode;
+- new_ire.remap.tm = new_rte.trigger;
+- new_ire.remap.dlm = new_rte.delivery_mode;
+- /* Hardware require RH = 1 for LPR delivery mode */
+- new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
+- new_ire.remap.avail = 0;
+- new_ire.remap.res_1 = 0;
+- new_ire.remap.vector = new_rte.vector;
+- new_ire.remap.res_2 = 0;
+-
+- set_ioapic_source_id(IO_APIC_ID(apic), &new_ire);
+- new_ire.remap.res_3 = 0;
+- new_ire.remap.res_4 = 0;
+- new_ire.remap.p = 1; /* finally, set present bit */
+-
+- /* now construct new ioapic rte entry */
+- remap_rte->vector = new_rte.vector;
+- remap_rte->delivery_mode = 0; /* has to be 0 for remap format */
+- remap_rte->index_15 = (index >> 15) & 0x1;
+- remap_rte->index_0_14 = index & 0x7fff;
+-
+- remap_rte->delivery_status = new_rte.delivery_status;
+- remap_rte->polarity = new_rte.polarity;
+- remap_rte->irr = new_rte.irr;
+- remap_rte->trigger = new_rte.trigger;
+- remap_rte->mask = new_rte.mask;
+- remap_rte->reserved = 0;
+- remap_rte->format = 1; /* indicate remap format */
+- }
+-
+- update_irte(iommu, iremap_entry, &new_ire, !init);
++ new_ire.remap.dst = GET_xAPIC_ID(new_rte.dest.dest32) << 8;
++
++ new_ire.remap.dm = new_rte.dest_mode;
++ new_ire.remap.tm = new_rte.trigger;
++ new_ire.remap.dlm = new_rte.delivery_mode;
++ /* Hardware require RH = 1 for LPR delivery mode. */
++ new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
++ new_ire.remap.vector = new_rte.vector;
++
++ set_ioapic_source_id(IO_APIC_ID(apic), &new_ire);
++ /* Finally, set present bit. */
++ new_ire.remap.p = 1;
++
++ /* Now construct new ioapic rte entry. */
++ remap_rte->vector = new_rte.vector;
++ /* Has to be 0 for remap format. */
++ remap_rte->delivery_mode = 0;
++ remap_rte->index_15 = (index >> 15) & 0x1;
++ remap_rte->index_0_14 = index & 0x7fff;
++
++ remap_rte->delivery_status = new_rte.delivery_status;
++ remap_rte->polarity = new_rte.polarity;
++ remap_rte->irr = new_rte.irr;
++ remap_rte->trigger = new_rte.trigger;
++ remap_rte->mask = new_rte.mask;
++ remap_rte->reserved = 0;
++ /* Indicate remap format. */
++ remap_rte->format = 1;
++
++ /* If cmpxchg16b is not available the caller must mask the IO-APIC pin. */
++ update_irte(iommu, iremap_entry, &new_ire, !init && !masked);
+ iommu_sync_cache(iremap_entry, sizeof(*iremap_entry));
+ iommu_flush_iec_index(iommu, 0, index);
+
+@@ -439,36 +430,42 @@ unsigned int cf_check io_apic_read_remap_rte(
+ }
+
+ void cf_check io_apic_write_remap_rte(
+- unsigned int apic, unsigned int reg, unsigned int value)
++ unsigned int apic, unsigned int pin, uint64_t rte)
+ {
+- unsigned int pin = (reg - 0x10) / 2;
++ struct IO_xAPIC_route_entry new_rte = { .raw = rte };
+ struct IO_xAPIC_route_entry old_rte = { };
+- struct IO_APIC_route_remap_entry *remap_rte;
+- unsigned int rte_upper = (reg & 1) ? 1 : 0;
+ struct vtd_iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
+- int saved_mask;
+-
+- old_rte = __ioapic_read_entry(apic, pin, true);
+-
+- remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
+-
+- /* mask the interrupt while we change the intremap table */
+- saved_mask = remap_rte->mask;
+- remap_rte->mask = 1;
+- __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
+- remap_rte->mask = saved_mask;
++ bool masked = true;
++ int rc;
+
+- if ( ioapic_rte_to_remap_entry(iommu, apic, pin,
+- &old_rte, rte_upper, value) )
++ if ( !cpu_has_cx16 )
+ {
+- __io_apic_write(apic, reg, value);
++ /*
++ * Cannot atomically update the IRTE entry: mask the IO-APIC pin to
++ * avoid interrupts seeing an inconsistent IRTE entry.
++ */
++ old_rte = __ioapic_read_entry(apic, pin, true);
++ if ( !old_rte.mask )
++ {
++ masked = false;
++ old_rte.mask = 1;
++ __ioapic_write_entry(apic, pin, true, old_rte);
++ }
++ }
+
+- /* Recover the original value of 'mask' bit */
+- if ( rte_upper )
+- __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
++ rc = ioapic_rte_to_remap_entry(iommu, apic, pin, &old_rte, new_rte);
++ if ( rc )
++ {
++ if ( !masked )
++ {
++ /* Recover the original value of 'mask' bit */
++ old_rte.mask = 0;
++ __ioapic_write_entry(apic, pin, true, old_rte);
++ }
++ return;
+ }
+- else
+- __ioapic_write_entry(apic, pin, true, old_rte);
++ /* old_rte will contain the updated IO-APIC RTE on success. */
++ __ioapic_write_entry(apic, pin, true, old_rte);
+ }
+
+ static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
+diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c
+index f671b0f2bb..8bd0ccb2e9 100644
+--- a/xen/drivers/passthrough/x86/iommu.c
++++ b/xen/drivers/passthrough/x86/iommu.c
+@@ -142,9 +142,9 @@ int iommu_enable_x2apic(void)
+ }
+
+ void iommu_update_ire_from_apic(
+- unsigned int apic, unsigned int reg, unsigned int value)
++ unsigned int apic, unsigned int pin, uint64_t rte)
+ {
+- iommu_vcall(&iommu_ops, update_ire_from_apic, apic, reg, value);
++ iommu_vcall(&iommu_ops, update_ire_from_apic, apic, pin, rte);
+ }
+
+ unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg)
+diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
+index 4f22fc1bed..f8a52627f7 100644
+--- a/xen/include/xen/iommu.h
++++ b/xen/include/xen/iommu.h
+@@ -274,7 +274,8 @@ struct iommu_ops {
+ int (*enable_x2apic)(void);
+ void (*disable_x2apic)(void);
+
+- void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value);
++ void (*update_ire_from_apic)(unsigned int apic, unsigned int pin,
++ uint64_t rte);
+ unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
+
+ int (*setup_hpet_msi)(struct msi_desc *);
+--
+2.42.0
+
diff --git a/0023-build-correct-gas-noexecstack-check.patch b/0023-build-correct-gas-noexecstack-check.patch
new file mode 100644
index 0000000..245d631
--- /dev/null
+++ b/0023-build-correct-gas-noexecstack-check.patch
@@ -0,0 +1,34 @@
+From ba360fbb6413231f84a7d68f5cb34858f81d4d23 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 5 Sep 2023 08:51:50 +0200
+Subject: [PATCH 23/55] build: correct gas --noexecstack check
+
+The check was missing an escape for the inner $, thus breaking things
+in the unlikely event that the underlying assembler doesn't support this
+option.
+
+Fixes: 62d22296a95d ("build: silence GNU ld warning about executable stacks")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: d1f6a58dfdc508c43a51c1865c826d519bf16493
+master date: 2023-08-14 09:58:19 +0200
+---
+ xen/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/Makefile b/xen/Makefile
+index 7bb9de7bdc..455916c757 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -405,7 +405,7 @@ endif
+
+ AFLAGS += -D__ASSEMBLY__
+
+-$(call cc-option-add,AFLAGS,CC,-Wa$(comma)--noexecstack)
++$(call cc-option-add,AFLAGS,CC,-Wa$$(comma)--noexecstack)
+
+ LDFLAGS-$(call ld-option,--warn-rwx-segments) += --no-warn-rwx-segments
+
+--
+2.42.0
+
diff --git a/0023-tools-oxenstored-Render-backtraces-more-nicely-in-Sy.patch b/0023-tools-oxenstored-Render-backtraces-more-nicely-in-Sy.patch
deleted file mode 100644
index c0343d0..0000000
--- a/0023-tools-oxenstored-Render-backtraces-more-nicely-in-Sy.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From c4972a4272690384b15d5706f2a833aed636895e Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 1 Dec 2022 21:06:25 +0000
-Subject: [PATCH 23/89] tools/oxenstored: Render backtraces more nicely in
- Syslog
-
-fallback_exception_handler feeds a string with embedded newlines directly into
-syslog(). While this is an improvement on getting nothing, syslogd escapes
-all control characters it gets, and emits one (long) log line.
-
-Fix the problem generally in the syslog stub. As we already have a local copy
-of the string, split it in place and emit one syslog() call per line.
-
-Also tweak Logging.msg_of to avoid putting an extra newline on a string which
-already ends with one.
-
-Fixes: ee7815f49faf ("tools/oxenstored: Set uncaught exception handler")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit d2162d884cba0ff7b2ac0d832f4e044444bda2e1)
----
- tools/ocaml/xenstored/logging.ml | 2 +-
- tools/ocaml/xenstored/syslog_stubs.c | 26 +++++++++++++++++++++++---
- 2 files changed, 24 insertions(+), 4 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/logging.ml b/tools/ocaml/xenstored/logging.ml
-index 255051437d..f233bc9a39 100644
---- a/tools/ocaml/xenstored/logging.ml
-+++ b/tools/ocaml/xenstored/logging.ml
-@@ -344,7 +344,7 @@ let watch_not_fired ~con perms path =
- access_logging ~tid:0 ~con ~data Watch_not_fired ~level:Info
-
- let msg_of exn bt =
-- Printf.sprintf "Fatal exception: %s\n%s\n" (Printexc.to_string exn)
-+ Printf.sprintf "Fatal exception: %s\n%s" (Printexc.to_string exn)
- (Printexc.raw_backtrace_to_string bt)
-
- let fallback_exception_handler exn bt =
-diff --git a/tools/ocaml/xenstored/syslog_stubs.c b/tools/ocaml/xenstored/syslog_stubs.c
-index e16c3a9491..760e78ff73 100644
---- a/tools/ocaml/xenstored/syslog_stubs.c
-+++ b/tools/ocaml/xenstored/syslog_stubs.c
-@@ -37,14 +37,34 @@ value stub_syslog(value facility, value level, value msg)
- {
- CAMLparam3(facility, level, msg);
- char *c_msg = strdup(String_val(msg));
-+ char *s = c_msg, *ss;
- int c_facility = __syslog_facility_table[Int_val(facility)]
- | __syslog_level_table[Int_val(level)];
-
- if ( !c_msg )
- caml_raise_out_of_memory();
-- caml_enter_blocking_section();
-- syslog(c_facility, "%s", c_msg);
-- caml_leave_blocking_section();
-+
-+ /*
-+ * syslog() doesn't like embedded newlines, and c_msg generally
-+ * contains them.
-+ *
-+ * Split the message in place by converting \n to \0, and issue one
-+ * syslog() call per line, skipping the final iteration if c_msg ends
-+ * with a newline anyway.
-+ */
-+ do {
-+ ss = strchr(s, '\n');
-+ if ( ss )
-+ *ss = '\0';
-+ else if ( *s == '\0' )
-+ break;
-+
-+ caml_enter_blocking_section();
-+ syslog(c_facility, "%s", s);
-+ caml_leave_blocking_section();
-+
-+ s = ss + 1;
-+ } while ( ss );
-
- free(c_msg);
- CAMLreturn(Val_unit);
---
-2.40.0
-
diff --git a/0024-Revert-tools-xenstore-simplify-loop-handling-connect.patch b/0024-Revert-tools-xenstore-simplify-loop-handling-connect.patch
deleted file mode 100644
index 81481fc..0000000
--- a/0024-Revert-tools-xenstore-simplify-loop-handling-connect.patch
+++ /dev/null
@@ -1,136 +0,0 @@
-From 2f8851c37f88e4eb4858e16626fcb2379db71a4f Mon Sep 17 00:00:00 2001
-From: Jason Andryuk <jandryuk@gmail.com>
-Date: Thu, 26 Jan 2023 11:00:24 +0100
-Subject: [PATCH 24/89] Revert "tools/xenstore: simplify loop handling
- connection I/O"
-
-I'm observing guest kexec trigger xenstored to abort on a double free.
-
-gdb output:
-Program received signal SIGABRT, Aborted.
-__pthread_kill_implementation (no_tid=0, signo=6, threadid=140645614258112) at ./nptl/pthread_kill.c:44
-44 ./nptl/pthread_kill.c: No such file or directory.
-(gdb) bt
- at ./nptl/pthread_kill.c:44
- at ./nptl/pthread_kill.c:78
- at ./nptl/pthread_kill.c:89
- at ../sysdeps/posix/raise.c:26
- at talloc.c:119
- ptr=ptr@entry=0x559fae724290) at talloc.c:232
- at xenstored_core.c:2945
-(gdb) frame 5
- at talloc.c:119
-119 TALLOC_ABORT("Bad talloc magic value - double free");
-(gdb) frame 7
- at xenstored_core.c:2945
-2945 talloc_increase_ref_count(conn);
-(gdb) p conn
-$1 = (struct connection *) 0x559fae724290
-
-Looking at a xenstore trace, we have:
-IN 0x559fae71f250 20230120 17:40:53 READ (/local/domain/3/image/device-model-dom
-id )
-wrl: dom 0 1 msec 10000 credit 1000000 reserve 100 disc
-ard
-wrl: dom 3 1 msec 10000 credit 1000000 reserve 100 disc
-ard
-wrl: dom 0 0 msec 10000 credit 1000000 reserve 0 disc
-ard
-wrl: dom 3 0 msec 10000 credit 1000000 reserve 0 disc
-ard
-OUT 0x559fae71f250 20230120 17:40:53 ERROR (ENOENT )
-wrl: dom 0 1 msec 10000 credit 1000000 reserve 100 disc
-ard
-wrl: dom 3 1 msec 10000 credit 1000000 reserve 100 disc
-ard
-IN 0x559fae71f250 20230120 17:40:53 RELEASE (3 )
-DESTROY watch 0x559fae73f630
-DESTROY watch 0x559fae75ddf0
-DESTROY watch 0x559fae75ec30
-DESTROY watch 0x559fae75ea60
-DESTROY watch 0x559fae732c00
-DESTROY watch 0x559fae72cea0
-DESTROY watch 0x559fae728fc0
-DESTROY watch 0x559fae729570
-DESTROY connection 0x559fae724290
-orphaned node /local/domain/3/device/suspend/event-channel deleted
-orphaned node /local/domain/3/device/vbd/51712 deleted
-orphaned node /local/domain/3/device/vkbd/0 deleted
-orphaned node /local/domain/3/device/vif/0 deleted
-orphaned node /local/domain/3/control/shutdown deleted
-orphaned node /local/domain/3/control/feature-poweroff deleted
-orphaned node /local/domain/3/control/feature-reboot deleted
-orphaned node /local/domain/3/control/feature-suspend deleted
-orphaned node /local/domain/3/control/feature-s3 deleted
-orphaned node /local/domain/3/control/feature-s4 deleted
-orphaned node /local/domain/3/control/sysrq deleted
-orphaned node /local/domain/3/data deleted
-orphaned node /local/domain/3/drivers deleted
-orphaned node /local/domain/3/feature deleted
-orphaned node /local/domain/3/attr deleted
-orphaned node /local/domain/3/error deleted
-orphaned node /local/domain/3/console/backend-id deleted
-
-and no further output.
-
-The trace shows that DESTROY was called for connection 0x559fae724290,
-but that is the same pointer (conn) main() was looping through from
-connections. So it wasn't actually removed from the connections list?
-
-Reverting commit e8e6e42279a5 "tools/xenstore: simplify loop handling
-connection I/O" fixes the abort/double free. I think the use of
-list_for_each_entry_safe is incorrect. list_for_each_entry_safe makes
-traversal safe for deleting the current iterator, but RELEASE/do_release
-will delete some other entry in the connections list. I think the
-observed abort is because list_for_each_entry has next pointing to the
-deleted connection, and it is used in the subsequent iteration.
-
-Add a comment explaining the unsuitability of list_for_each_entry_safe.
-Also notice that the old code takes a reference on next which would
-prevents a use-after-free.
-
-This reverts commit e8e6e42279a5723239c5c40ba4c7f579a979465d.
-
-This is XSA-425/CVE-2022-42330.
-
-Fixes: e8e6e42279a5 ("tools/xenstore: simplify loop handling connection I/O")
-Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-Reviewed-by: Julien Grall <jgrall@amazon.com>
----
- tools/xenstore/xenstored_core.c | 19 +++++++++++++++++--
- 1 file changed, 17 insertions(+), 2 deletions(-)
-
-diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
-index 476d5c6d51..56dbdc2530 100644
---- a/tools/xenstore/xenstored_core.c
-+++ b/tools/xenstore/xenstored_core.c
-@@ -2935,8 +2935,23 @@ int main(int argc, char *argv[])
- }
- }
-
-- list_for_each_entry_safe(conn, next, &connections, list) {
-- talloc_increase_ref_count(conn);
-+ /*
-+ * list_for_each_entry_safe is not suitable here because
-+ * handle_input may delete entries besides the current one, but
-+ * those may be in the temporary next which would trigger a
-+ * use-after-free. list_for_each_entry_safe is only safe for
-+ * deleting the current entry.
-+ */
-+ next = list_entry(connections.next, typeof(*conn), list);
-+ if (&next->list != &connections)
-+ talloc_increase_ref_count(next);
-+ while (&next->list != &connections) {
-+ conn = next;
-+
-+ next = list_entry(conn->list.next,
-+ typeof(*conn), list);
-+ if (&next->list != &connections)
-+ talloc_increase_ref_count(next);
-
- if (conn_can_read(conn))
- handle_input(conn);
---
-2.40.0
-
diff --git a/0024-libxl-slightly-correct-JSON-generation-of-CPU-policy.patch b/0024-libxl-slightly-correct-JSON-generation-of-CPU-policy.patch
new file mode 100644
index 0000000..1ec7335
--- /dev/null
+++ b/0024-libxl-slightly-correct-JSON-generation-of-CPU-policy.patch
@@ -0,0 +1,38 @@
+From 042982297802e7b746dc2fac95a453cc88d0aa83 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 5 Sep 2023 08:52:15 +0200
+Subject: [PATCH 24/55] libxl: slightly correct JSON generation of CPU policy
+
+The "cpuid_empty" label is also (in principle; maybe only for rubbish
+input) reachable in the "cpuid_only" case. Hence the label needs to live
+ahead of the check of the variable.
+
+Fixes: 5b80cecb747b ("libxl: introduce MSR data in libxl_cpuid_policy")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: ebce4e3a146c39e57bb7a890e059e89c32b6d547
+master date: 2023-08-17 16:24:17 +0200
+---
+ tools/libs/light/libxl_cpuid.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c
+index 849722541c..5c66d094b2 100644
+--- a/tools/libs/light/libxl_cpuid.c
++++ b/tools/libs/light/libxl_cpuid.c
+@@ -710,10 +710,11 @@ parse_cpuid:
+ libxl__strdup(NOGC, libxl__json_object_get_string(r));
+ }
+ }
++
++cpuid_empty:
+ if (cpuid_only)
+ return 0;
+
+-cpuid_empty:
+ co = libxl__json_map_get("msr", o, JSON_ARRAY);
+ if (!libxl__json_object_is_array(co))
+ return ERROR_FAIL;
+--
+2.42.0
+
diff --git a/0025-tboot-Disable-CET-at-shutdown.patch b/0025-tboot-Disable-CET-at-shutdown.patch
new file mode 100644
index 0000000..f06db61
--- /dev/null
+++ b/0025-tboot-Disable-CET-at-shutdown.patch
@@ -0,0 +1,53 @@
+From 7ca58fbef489fcb17631872a2bdc929823a2a494 Mon Sep 17 00:00:00 2001
+From: Jason Andryuk <jandryuk@gmail.com>
+Date: Tue, 5 Sep 2023 08:52:33 +0200
+Subject: [PATCH 25/55] tboot: Disable CET at shutdown
+
+tboot_shutdown() calls into tboot to perform the actual system shutdown.
+tboot isn't built with endbr annotations, and Xen has CET-IBT enabled on
+newer hardware. shutdown_entry isn't annotated with endbr and Xen
+faults:
+
+Panic on CPU 0:
+CONTROL-FLOW PROTECTION FAULT: #CP[0003] endbranch
+
+And Xen hangs at this point.
+
+Disabling CET-IBT let Xen and tboot power off, but reboot was
+perfoming a poweroff instead of a warm reboot. Disabling all of CET,
+i.e. shadow stacks as well, lets tboot reboot properly.
+
+Fixes: cdbe2b0a1aec ("x86: Enable CET Indirect Branch Tracking")
+Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Daniel P. Smith <dpsmith@apertussolutions.com>
+master commit: 0801868f550539d417d46f82c49307480947ccaa
+master date: 2023-08-17 16:24:49 +0200
+---
+ xen/arch/x86/tboot.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/xen/arch/x86/tboot.c b/xen/arch/x86/tboot.c
+index fe1abfdf08..a2e9e97ed7 100644
+--- a/xen/arch/x86/tboot.c
++++ b/xen/arch/x86/tboot.c
+@@ -398,6 +398,16 @@ void tboot_shutdown(uint32_t shutdown_type)
+ tboot_gen_xenheap_integrity(g_tboot_shared->s3_key, &xenheap_mac);
+ }
+
++ /*
++ * Disable CET - tboot may not be built with endbr, and it doesn't support
++ * shadow stacks.
++ */
++ if ( read_cr4() & X86_CR4_CET )
++ {
++ wrmsrl(MSR_S_CET, 0);
++ write_cr4(read_cr4() & ~X86_CR4_CET);
++ }
++
+ /*
+ * During early boot, we can be called by panic before idle_vcpu[0] is
+ * setup, but in that case we don't need to change page tables.
+--
+2.42.0
+
diff --git a/0025-x86-S3-Restore-Xen-s-MSR_PAT-value-on-S3-resume.patch b/0025-x86-S3-Restore-Xen-s-MSR_PAT-value-on-S3-resume.patch
deleted file mode 100644
index 142280f..0000000
--- a/0025-x86-S3-Restore-Xen-s-MSR_PAT-value-on-S3-resume.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From a470a83c36c07b56d90957ae1e6e9ebc458d3686 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 7 Feb 2023 16:56:14 +0100
-Subject: [PATCH 25/89] x86/S3: Restore Xen's MSR_PAT value on S3 resume
-
-There are two paths in the trampoline, and Xen's PAT needs setting up in both,
-not just the boot path.
-
-Fixes: 4304ff420e51 ("x86/S3: Drop {save,restore}_rest_processor_state() completely")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 4d975798e11579fdf405b348543061129e01b0fb
-master date: 2023-01-10 21:21:30 +0000
----
- xen/arch/x86/boot/wakeup.S | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/xen/arch/x86/boot/wakeup.S b/xen/arch/x86/boot/wakeup.S
-index c17d613b61..08447e1934 100644
---- a/xen/arch/x86/boot/wakeup.S
-+++ b/xen/arch/x86/boot/wakeup.S
-@@ -130,6 +130,11 @@ wakeup_32:
- and %edi, %edx
- wrmsr
- 1:
-+ /* Set up PAT before enabling paging. */
-+ mov $XEN_MSR_PAT & 0xffffffff, %eax
-+ mov $XEN_MSR_PAT >> 32, %edx
-+ mov $MSR_IA32_CR_PAT, %ecx
-+ wrmsr
-
- /* Set up EFER (Extended Feature Enable Register). */
- movl $MSR_EFER,%ecx
---
-2.40.0
-
diff --git a/0026-tools-Fix-build-with-recent-QEMU-use-enable-trace-ba.patch b/0026-tools-Fix-build-with-recent-QEMU-use-enable-trace-ba.patch
deleted file mode 100644
index 5d937d5..0000000
--- a/0026-tools-Fix-build-with-recent-QEMU-use-enable-trace-ba.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 1d7a388e7b9711cbd7e14b2020b168b6789772af Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Tue, 7 Feb 2023 16:57:22 +0100
-Subject: [PATCH 26/89] tools: Fix build with recent QEMU, use
- "--enable-trace-backends"
-
-The configure option "--enable-trace-backend" isn't accepted anymore
-and we should use "--enable-trace-backends" instead which was
-introduce in 2014 and allow multiple backends.
-
-"--enable-trace-backends" was introduced by:
- 5b808275f3bb ("trace: Multi-backend tracing")
-The backward compatible option "--enable-trace-backend" is removed by
- 10229ec3b0ff ("configure: remove backwards-compatibility and obsolete options")
-
-As we already use ./configure options that wouldn't be accepted by
-older version of QEMU's configure, we will simply use the new spelling
-for the option and avoid trying to detect which spelling to use.
-
-We already make use if "--firmwarepath=" which was introduced by
- 3d5eecab4a5a ("Add --firmwarepath to configure")
-which already include the new spelling for "--enable-trace-backends".
-
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Reviewed-by: Jason Andryuk <jandryuk@gmail.com>
-master commit: e66d450b6e0ffec635639df993ab43ce28b3383f
-master date: 2023-01-11 10:45:29 +0100
----
- tools/Makefile | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/tools/Makefile b/tools/Makefile
-index 9e28027835..4906fdbc23 100644
---- a/tools/Makefile
-+++ b/tools/Makefile
-@@ -218,9 +218,9 @@ subdir-all-qemu-xen-dir: qemu-xen-dir-find
- mkdir -p qemu-xen-build; \
- cd qemu-xen-build; \
- if $$source/scripts/tracetool.py --check-backend --backend log ; then \
-- enable_trace_backend='--enable-trace-backend=log'; \
-+ enable_trace_backend="--enable-trace-backends=log"; \
- elif $$source/scripts/tracetool.py --check-backend --backend stderr ; then \
-- enable_trace_backend='--enable-trace-backend=stderr'; \
-+ enable_trace_backend='--enable-trace-backends=stderr'; \
- else \
- enable_trace_backend='' ; \
- fi ; \
---
-2.40.0
-
diff --git a/0026-x86-svm-Fix-valid-condition-in-svm_get_pending_event.patch b/0026-x86-svm-Fix-valid-condition-in-svm_get_pending_event.patch
new file mode 100644
index 0000000..10aa14f
--- /dev/null
+++ b/0026-x86-svm-Fix-valid-condition-in-svm_get_pending_event.patch
@@ -0,0 +1,29 @@
+From a939e953cdd522da3d8f0efeaea84448b5b570f9 Mon Sep 17 00:00:00 2001
+From: Jinoh Kang <jinoh.kang.kr@gmail.com>
+Date: Tue, 5 Sep 2023 08:53:01 +0200
+Subject: [PATCH 26/55] x86/svm: Fix valid condition in svm_get_pending_event()
+
+Fixes: 9864841914c2 ("x86/vm_event: add support for VM_EVENT_REASON_INTERRUPT")
+Signed-off-by: Jinoh Kang <jinoh.kang.kr@gmail.com>
+master commit: b2865c2b6f164d2c379177cdd1cb200e4eaba549
+master date: 2023-08-18 20:21:44 +0100
+---
+ xen/arch/x86/hvm/svm/svm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
+index 5fa945c526..e8f50e7c5e 100644
+--- a/xen/arch/x86/hvm/svm/svm.c
++++ b/xen/arch/x86/hvm/svm/svm.c
+@@ -2490,7 +2490,7 @@ static bool cf_check svm_get_pending_event(
+ {
+ const struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb;
+
+- if ( vmcb->event_inj.v )
++ if ( !vmcb->event_inj.v )
+ return false;
+
+ info->vector = vmcb->event_inj.vector;
+--
+2.42.0
+
diff --git a/0027-include-compat-produce-stubs-for-headers-not-otherwi.patch b/0027-include-compat-produce-stubs-for-headers-not-otherwi.patch
deleted file mode 100644
index 3528bd6..0000000
--- a/0027-include-compat-produce-stubs-for-headers-not-otherwi.patch
+++ /dev/null
@@ -1,74 +0,0 @@
-From c871e05e138aae2ac75e9b4ccebe6cf3fd1a775b Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 7 Feb 2023 16:57:52 +0100
-Subject: [PATCH 27/89] include/compat: produce stubs for headers not otherwise
- generated
-
-Public headers can include other public headers. Such interdependencies
-are retained in their compat counterparts. Since some compat headers are
-generated only in certain configurations, the referenced headers still
-need to exist. The lack thereof was observed with hvm/hvm_op.h needing
-trace.h, where generation of the latter depends on TRACEBUFFER=y. Make
-empty stubs in such cases (as generating the extra headers is relatively
-slow and hence better to avoid). Changes to .config and incrementally
-(re-)building is covered by the respective .*.cmd then no longer
-matching the command to be used, resulting in the necessary re-creation
-of the (possibly stub) header.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: 6bec713f871f21c6254a5783c1e39867ea828256
-master date: 2023-01-12 16:17:54 +0100
----
- xen/include/Makefile | 14 +++++++++++++-
- 1 file changed, 13 insertions(+), 1 deletion(-)
-
-diff --git a/xen/include/Makefile b/xen/include/Makefile
-index 65be310eca..cfd7851614 100644
---- a/xen/include/Makefile
-+++ b/xen/include/Makefile
-@@ -34,6 +34,8 @@ headers-$(CONFIG_TRACEBUFFER) += compat/trace.h
- headers-$(CONFIG_XENOPROF) += compat/xenoprof.h
- headers-$(CONFIG_XSM_FLASK) += compat/xsm/flask_op.h
-
-+headers-n := $(filter-out $(headers-y),$(headers-n) $(headers-))
-+
- cppflags-y := -include public/xen-compat.h -DXEN_GENERATING_COMPAT_HEADERS
- cppflags-$(CONFIG_X86) += -m32
-
-@@ -43,13 +45,16 @@ public-$(CONFIG_X86) := $(wildcard $(srcdir)/public/arch-x86/*.h $(srcdir)/publi
- public-$(CONFIG_ARM) := $(wildcard $(srcdir)/public/arch-arm/*.h $(srcdir)/public/arch-arm/*/*.h)
-
- .PHONY: all
--all: $(addprefix $(obj)/,$(headers-y))
-+all: $(addprefix $(obj)/,$(headers-y) $(headers-n))
-
- quiet_cmd_compat_h = GEN $@
- cmd_compat_h = \
- $(PYTHON) $(srctree)/tools/compat-build-header.py <$< $(patsubst $(obj)/%,%,$@) >>$@.new; \
- mv -f $@.new $@
-
-+quiet_cmd_stub_h = GEN $@
-+cmd_stub_h = echo '/* empty */' >$@
-+
- quiet_cmd_compat_i = CPP $@
- cmd_compat_i = $(CPP) $(filter-out -Wa$(comma)% -include %/include/xen/config.h,$(XEN_CFLAGS)) $(cppflags-y) -o $@ $<
-
-@@ -69,6 +74,13 @@ targets += $(headers-y)
- $(obj)/compat/%.h: $(obj)/compat/%.i $(srctree)/tools/compat-build-header.py FORCE
- $(call if_changed,compat_h)
-
-+# Placeholders may be needed in case files in $(headers-y) include files we
-+# don't otherwise generate. Real dependencies would need spelling out explicitly,
-+# for them to appear in $(headers-y) instead.
-+targets += $(headers-n)
-+$(addprefix $(obj)/,$(headers-n)): FORCE
-+ $(call if_changed,stub_h)
-+
- .PRECIOUS: $(obj)/compat/%.i
- targets += $(patsubst %.h, %.i, $(headers-y))
- $(obj)/compat/%.i: $(obj)/compat/%.c FORCE
---
-2.40.0
-
diff --git a/0027-x86-vmx-Revert-x86-VMX-sanitize-rIP-before-re-enteri.patch b/0027-x86-vmx-Revert-x86-VMX-sanitize-rIP-before-re-enteri.patch
new file mode 100644
index 0000000..a022066
--- /dev/null
+++ b/0027-x86-vmx-Revert-x86-VMX-sanitize-rIP-before-re-enteri.patch
@@ -0,0 +1,100 @@
+From 8be85d8c0df2445c012fac42117396b483db5db0 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 5 Sep 2023 08:53:31 +0200
+Subject: [PATCH 27/55] x86/vmx: Revert "x86/VMX: sanitize rIP before
+ re-entering guest"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+At the time of XSA-170, the x86 instruction emulator was genuinely broken. It
+would load arbitrary values into %rip and putting a check here probably was
+the best stopgap security fix. It should have been reverted following c/s
+81d3a0b26c1 "x86emul: limit-check branch targets" which corrected the emulator
+behaviour.
+
+However, everyone involved in XSA-170, myself included, failed to read the SDM
+correctly. On the subject of %rip consistency checks, the SDM stated:
+
+ If the processor supports N < 64 linear-address bits, bits 63:N must be
+ identical
+
+A non-canonical %rip (and SSP more recently) is an explicitly legal state in
+x86, and the VMEntry consistency checks are intentionally off-by-one from a
+regular canonical check.
+
+The consequence of this bug is that Xen will currently take a legal x86 state
+which would successfully VMEnter, and corrupt it into having non-architectural
+behaviour.
+
+Furthermore, in the time this bugfix has been pending in public, I
+successfully persuaded Intel to clarify the SDM, adding the following
+clarification:
+
+ The guest RIP value is not required to be canonical; the value of bit N-1
+ may differ from that of bit N.
+
+Fixes: ffbbfda377 ("x86/VMX: sanitize rIP before re-entering guest")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 10c83bb0f5d158d101d983883741b76f927e54a3
+master date: 2023-08-23 18:44:59 +0100
+---
+ xen/arch/x86/hvm/vmx/vmx.c | 34 +---------------------------------
+ 1 file changed, 1 insertion(+), 33 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
+index f256dc2635..072288a5ef 100644
+--- a/xen/arch/x86/hvm/vmx/vmx.c
++++ b/xen/arch/x86/hvm/vmx/vmx.c
+@@ -3975,7 +3975,7 @@ static void undo_nmis_unblocked_by_iret(void)
+ void vmx_vmexit_handler(struct cpu_user_regs *regs)
+ {
+ unsigned long exit_qualification, exit_reason, idtv_info, intr_info = 0;
+- unsigned int vector = 0, mode;
++ unsigned int vector = 0;
+ struct vcpu *v = current;
+ struct domain *currd = v->domain;
+
+@@ -4650,38 +4650,6 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
+ out:
+ if ( nestedhvm_vcpu_in_guestmode(v) )
+ nvmx_idtv_handling();
+-
+- /*
+- * VM entry will fail (causing the guest to get crashed) if rIP (and
+- * rFLAGS, but we don't have an issue there) doesn't meet certain
+- * criteria. As we must not allow less than fully privileged mode to have
+- * such an effect on the domain, we correct rIP in that case (accepting
+- * this not being architecturally correct behavior, as the injected #GP
+- * fault will then not see the correct [invalid] return address).
+- * And since we know the guest will crash, we crash it right away if it
+- * already is in most privileged mode.
+- */
+- mode = vmx_guest_x86_mode(v);
+- if ( mode == 8 ? !is_canonical_address(regs->rip)
+- : regs->rip != regs->eip )
+- {
+- gprintk(XENLOG_WARNING, "Bad rIP %lx for mode %u\n", regs->rip, mode);
+-
+- if ( vmx_get_cpl() )
+- {
+- __vmread(VM_ENTRY_INTR_INFO, &intr_info);
+- if ( !(intr_info & INTR_INFO_VALID_MASK) )
+- hvm_inject_hw_exception(TRAP_gp_fault, 0);
+- /* Need to fix rIP nevertheless. */
+- if ( mode == 8 )
+- regs->rip = (long)(regs->rip << (64 - VADDR_BITS)) >>
+- (64 - VADDR_BITS);
+- else
+- regs->rip = regs->eip;
+- }
+- else
+- domain_crash(v->domain);
+- }
+ }
+
+ static void lbr_tsx_fixup(void)
+--
+2.42.0
+
diff --git a/0028-x86-irq-fix-reporting-of-spurious-i8259-interrupts.patch b/0028-x86-irq-fix-reporting-of-spurious-i8259-interrupts.patch
new file mode 100644
index 0000000..2fcfd68
--- /dev/null
+++ b/0028-x86-irq-fix-reporting-of-spurious-i8259-interrupts.patch
@@ -0,0 +1,41 @@
+From 699de512748d8e3bdcb3225b3b2a77c10cfd2408 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Sep 2023 08:53:57 +0200
+Subject: [PATCH 28/55] x86/irq: fix reporting of spurious i8259 interrupts
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The return value of bogus_8259A_irq() is wrong: the function will
+return `true` when the IRQ is real and `false` when it's a spurious
+IRQ. This causes the "No irq handler for vector ..." message in
+do_IRQ() to be printed for spurious i8259 interrupts which is not
+intended (and not helpful).
+
+Fix by inverting the return value of bogus_8259A_irq().
+
+Fixes: 132906348a14 ('x86/i8259: Handle bogus spurious interrupts more quietly')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 709f6c8ce6422475c372e67507606170a31ccb65
+master date: 2023-08-30 10:03:53 +0200
+---
+ xen/arch/x86/i8259.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/i8259.c b/xen/arch/x86/i8259.c
+index 6b35be10f0..ed9f55abe5 100644
+--- a/xen/arch/x86/i8259.c
++++ b/xen/arch/x86/i8259.c
+@@ -37,7 +37,7 @@ static bool _mask_and_ack_8259A_irq(unsigned int irq);
+
+ bool bogus_8259A_irq(unsigned int irq)
+ {
+- return _mask_and_ack_8259A_irq(irq);
++ return !_mask_and_ack_8259A_irq(irq);
+ }
+
+ static void cf_check mask_and_ack_8259A_irq(struct irq_desc *desc)
+--
+2.42.0
+
diff --git a/0028-x86-vmx-Calculate-model-specific-LBRs-once-at-start-.patch b/0028-x86-vmx-Calculate-model-specific-LBRs-once-at-start-.patch
deleted file mode 100644
index 8185bee..0000000
--- a/0028-x86-vmx-Calculate-model-specific-LBRs-once-at-start-.patch
+++ /dev/null
@@ -1,342 +0,0 @@
-From 5e3250258afbace3e5dc3f31ac99c1eebf60f238 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 7 Feb 2023 16:58:25 +0100
-Subject: [PATCH 28/89] x86/vmx: Calculate model-specific LBRs once at start of
- day
-
-There is no point repeating this calculation at runtime, especially as it is
-in the fallback path of the WRSMR/RDMSR handlers.
-
-Move the infrastructure higher in vmx.c to avoid forward declarations,
-renaming last_branch_msr_get() to get_model_specific_lbr() to highlight that
-these are model-specific only.
-
-No practical change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: e94af0d58f86c3a914b9cbbf4d9ed3d43b974771
-master date: 2023-01-12 18:42:00 +0000
----
- xen/arch/x86/hvm/vmx/vmx.c | 276 +++++++++++++++++++------------------
- 1 file changed, 139 insertions(+), 137 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index 7c81b80710..ad91464103 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -396,6 +396,142 @@ void vmx_pi_hooks_deassign(struct domain *d)
- domain_unpause(d);
- }
-
-+static const struct lbr_info {
-+ u32 base, count;
-+} p4_lbr[] = {
-+ { MSR_P4_LER_FROM_LIP, 1 },
-+ { MSR_P4_LER_TO_LIP, 1 },
-+ { MSR_P4_LASTBRANCH_TOS, 1 },
-+ { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-+ { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-+ { 0, 0 }
-+}, c2_lbr[] = {
-+ { MSR_IA32_LASTINTFROMIP, 1 },
-+ { MSR_IA32_LASTINTTOIP, 1 },
-+ { MSR_C2_LASTBRANCH_TOS, 1 },
-+ { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
-+ { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
-+ { 0, 0 }
-+}, nh_lbr[] = {
-+ { MSR_IA32_LASTINTFROMIP, 1 },
-+ { MSR_IA32_LASTINTTOIP, 1 },
-+ { MSR_NHL_LBR_SELECT, 1 },
-+ { MSR_NHL_LASTBRANCH_TOS, 1 },
-+ { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-+ { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-+ { 0, 0 }
-+}, sk_lbr[] = {
-+ { MSR_IA32_LASTINTFROMIP, 1 },
-+ { MSR_IA32_LASTINTTOIP, 1 },
-+ { MSR_NHL_LBR_SELECT, 1 },
-+ { MSR_NHL_LASTBRANCH_TOS, 1 },
-+ { MSR_SKL_LASTBRANCH_0_FROM_IP, NUM_MSR_SKL_LASTBRANCH },
-+ { MSR_SKL_LASTBRANCH_0_TO_IP, NUM_MSR_SKL_LASTBRANCH },
-+ { MSR_SKL_LASTBRANCH_0_INFO, NUM_MSR_SKL_LASTBRANCH },
-+ { 0, 0 }
-+}, at_lbr[] = {
-+ { MSR_IA32_LASTINTFROMIP, 1 },
-+ { MSR_IA32_LASTINTTOIP, 1 },
-+ { MSR_C2_LASTBRANCH_TOS, 1 },
-+ { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-+ { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-+ { 0, 0 }
-+}, sm_lbr[] = {
-+ { MSR_IA32_LASTINTFROMIP, 1 },
-+ { MSR_IA32_LASTINTTOIP, 1 },
-+ { MSR_SM_LBR_SELECT, 1 },
-+ { MSR_SM_LASTBRANCH_TOS, 1 },
-+ { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-+ { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-+ { 0, 0 }
-+}, gm_lbr[] = {
-+ { MSR_IA32_LASTINTFROMIP, 1 },
-+ { MSR_IA32_LASTINTTOIP, 1 },
-+ { MSR_SM_LBR_SELECT, 1 },
-+ { MSR_SM_LASTBRANCH_TOS, 1 },
-+ { MSR_GM_LASTBRANCH_0_FROM_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO },
-+ { MSR_GM_LASTBRANCH_0_TO_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO },
-+ { 0, 0 }
-+};
-+static const struct lbr_info *__ro_after_init model_specific_lbr;
-+
-+static const struct lbr_info *__init get_model_specific_lbr(void)
-+{
-+ switch ( boot_cpu_data.x86 )
-+ {
-+ case 6:
-+ switch ( boot_cpu_data.x86_model )
-+ {
-+ /* Core2 Duo */
-+ case 0x0f:
-+ /* Enhanced Core */
-+ case 0x17:
-+ /* Xeon 7400 */
-+ case 0x1d:
-+ return c2_lbr;
-+ /* Nehalem */
-+ case 0x1a: case 0x1e: case 0x1f: case 0x2e:
-+ /* Westmere */
-+ case 0x25: case 0x2c: case 0x2f:
-+ /* Sandy Bridge */
-+ case 0x2a: case 0x2d:
-+ /* Ivy Bridge */
-+ case 0x3a: case 0x3e:
-+ /* Haswell */
-+ case 0x3c: case 0x3f: case 0x45: case 0x46:
-+ /* Broadwell */
-+ case 0x3d: case 0x47: case 0x4f: case 0x56:
-+ return nh_lbr;
-+ /* Skylake */
-+ case 0x4e: case 0x5e:
-+ /* Xeon Scalable */
-+ case 0x55:
-+ /* Cannon Lake */
-+ case 0x66:
-+ /* Goldmont Plus */
-+ case 0x7a:
-+ /* Ice Lake */
-+ case 0x6a: case 0x6c: case 0x7d: case 0x7e:
-+ /* Tiger Lake */
-+ case 0x8c: case 0x8d:
-+ /* Tremont */
-+ case 0x86:
-+ /* Kaby Lake */
-+ case 0x8e: case 0x9e:
-+ /* Comet Lake */
-+ case 0xa5: case 0xa6:
-+ return sk_lbr;
-+ /* Atom */
-+ case 0x1c: case 0x26: case 0x27: case 0x35: case 0x36:
-+ return at_lbr;
-+ /* Silvermont */
-+ case 0x37: case 0x4a: case 0x4d: case 0x5a: case 0x5d:
-+ /* Xeon Phi Knights Landing */
-+ case 0x57:
-+ /* Xeon Phi Knights Mill */
-+ case 0x85:
-+ /* Airmont */
-+ case 0x4c:
-+ return sm_lbr;
-+ /* Goldmont */
-+ case 0x5c: case 0x5f:
-+ return gm_lbr;
-+ }
-+ break;
-+
-+ case 15:
-+ switch ( boot_cpu_data.x86_model )
-+ {
-+ /* Pentium4/Xeon with em64t */
-+ case 3: case 4: case 6:
-+ return p4_lbr;
-+ }
-+ break;
-+ }
-+
-+ return NULL;
-+}
-+
- static int cf_check vmx_domain_initialise(struct domain *d)
- {
- static const struct arch_csw csw = {
-@@ -2837,6 +2973,7 @@ const struct hvm_function_table * __init start_vmx(void)
- vmx_function_table.tsc_scaling.setup = vmx_setup_tsc_scaling;
- }
-
-+ model_specific_lbr = get_model_specific_lbr();
- lbr_tsx_fixup_check();
- ler_to_fixup_check();
-
-@@ -2983,141 +3120,6 @@ static int vmx_cr_access(cr_access_qual_t qual)
- return X86EMUL_OKAY;
- }
-
--static const struct lbr_info {
-- u32 base, count;
--} p4_lbr[] = {
-- { MSR_P4_LER_FROM_LIP, 1 },
-- { MSR_P4_LER_TO_LIP, 1 },
-- { MSR_P4_LASTBRANCH_TOS, 1 },
-- { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-- { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-- { 0, 0 }
--}, c2_lbr[] = {
-- { MSR_IA32_LASTINTFROMIP, 1 },
-- { MSR_IA32_LASTINTTOIP, 1 },
-- { MSR_C2_LASTBRANCH_TOS, 1 },
-- { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
-- { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
-- { 0, 0 }
--}, nh_lbr[] = {
-- { MSR_IA32_LASTINTFROMIP, 1 },
-- { MSR_IA32_LASTINTTOIP, 1 },
-- { MSR_NHL_LBR_SELECT, 1 },
-- { MSR_NHL_LASTBRANCH_TOS, 1 },
-- { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-- { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-- { 0, 0 }
--}, sk_lbr[] = {
-- { MSR_IA32_LASTINTFROMIP, 1 },
-- { MSR_IA32_LASTINTTOIP, 1 },
-- { MSR_NHL_LBR_SELECT, 1 },
-- { MSR_NHL_LASTBRANCH_TOS, 1 },
-- { MSR_SKL_LASTBRANCH_0_FROM_IP, NUM_MSR_SKL_LASTBRANCH },
-- { MSR_SKL_LASTBRANCH_0_TO_IP, NUM_MSR_SKL_LASTBRANCH },
-- { MSR_SKL_LASTBRANCH_0_INFO, NUM_MSR_SKL_LASTBRANCH },
-- { 0, 0 }
--}, at_lbr[] = {
-- { MSR_IA32_LASTINTFROMIP, 1 },
-- { MSR_IA32_LASTINTTOIP, 1 },
-- { MSR_C2_LASTBRANCH_TOS, 1 },
-- { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-- { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-- { 0, 0 }
--}, sm_lbr[] = {
-- { MSR_IA32_LASTINTFROMIP, 1 },
-- { MSR_IA32_LASTINTTOIP, 1 },
-- { MSR_SM_LBR_SELECT, 1 },
-- { MSR_SM_LASTBRANCH_TOS, 1 },
-- { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-- { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-- { 0, 0 }
--}, gm_lbr[] = {
-- { MSR_IA32_LASTINTFROMIP, 1 },
-- { MSR_IA32_LASTINTTOIP, 1 },
-- { MSR_SM_LBR_SELECT, 1 },
-- { MSR_SM_LASTBRANCH_TOS, 1 },
-- { MSR_GM_LASTBRANCH_0_FROM_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO },
-- { MSR_GM_LASTBRANCH_0_TO_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO },
-- { 0, 0 }
--};
--
--static const struct lbr_info *last_branch_msr_get(void)
--{
-- switch ( boot_cpu_data.x86 )
-- {
-- case 6:
-- switch ( boot_cpu_data.x86_model )
-- {
-- /* Core2 Duo */
-- case 0x0f:
-- /* Enhanced Core */
-- case 0x17:
-- /* Xeon 7400 */
-- case 0x1d:
-- return c2_lbr;
-- /* Nehalem */
-- case 0x1a: case 0x1e: case 0x1f: case 0x2e:
-- /* Westmere */
-- case 0x25: case 0x2c: case 0x2f:
-- /* Sandy Bridge */
-- case 0x2a: case 0x2d:
-- /* Ivy Bridge */
-- case 0x3a: case 0x3e:
-- /* Haswell */
-- case 0x3c: case 0x3f: case 0x45: case 0x46:
-- /* Broadwell */
-- case 0x3d: case 0x47: case 0x4f: case 0x56:
-- return nh_lbr;
-- /* Skylake */
-- case 0x4e: case 0x5e:
-- /* Xeon Scalable */
-- case 0x55:
-- /* Cannon Lake */
-- case 0x66:
-- /* Goldmont Plus */
-- case 0x7a:
-- /* Ice Lake */
-- case 0x6a: case 0x6c: case 0x7d: case 0x7e:
-- /* Tiger Lake */
-- case 0x8c: case 0x8d:
-- /* Tremont */
-- case 0x86:
-- /* Kaby Lake */
-- case 0x8e: case 0x9e:
-- /* Comet Lake */
-- case 0xa5: case 0xa6:
-- return sk_lbr;
-- /* Atom */
-- case 0x1c: case 0x26: case 0x27: case 0x35: case 0x36:
-- return at_lbr;
-- /* Silvermont */
-- case 0x37: case 0x4a: case 0x4d: case 0x5a: case 0x5d:
-- /* Xeon Phi Knights Landing */
-- case 0x57:
-- /* Xeon Phi Knights Mill */
-- case 0x85:
-- /* Airmont */
-- case 0x4c:
-- return sm_lbr;
-- /* Goldmont */
-- case 0x5c: case 0x5f:
-- return gm_lbr;
-- }
-- break;
--
-- case 15:
-- switch ( boot_cpu_data.x86_model )
-- {
-- /* Pentium4/Xeon with em64t */
-- case 3: case 4: case 6:
-- return p4_lbr;
-- }
-- break;
-- }
--
-- return NULL;
--}
--
- enum
- {
- LBR_FORMAT_32 = 0x0, /* 32-bit record format */
-@@ -3224,7 +3226,7 @@ static void __init ler_to_fixup_check(void)
-
- static int is_last_branch_msr(u32 ecx)
- {
-- const struct lbr_info *lbr = last_branch_msr_get();
-+ const struct lbr_info *lbr = model_specific_lbr;
-
- if ( lbr == NULL )
- return 0;
-@@ -3563,7 +3565,7 @@ static int cf_check vmx_msr_write_intercept(
- if ( !(v->arch.hvm.vmx.lbr_flags & LBR_MSRS_INSERTED) &&
- (msr_content & IA32_DEBUGCTLMSR_LBR) )
- {
-- const struct lbr_info *lbr = last_branch_msr_get();
-+ const struct lbr_info *lbr = model_specific_lbr;
-
- if ( unlikely(!lbr) )
- {
---
-2.40.0
-
diff --git a/0029-x86-vmx-Support-for-CPUs-without-model-specific-LBR.patch b/0029-x86-vmx-Support-for-CPUs-without-model-specific-LBR.patch
deleted file mode 100644
index 2f87b83..0000000
--- a/0029-x86-vmx-Support-for-CPUs-without-model-specific-LBR.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From e904d8ae01a0be53368c8c388f13bf4ffcbcdf6c Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 7 Feb 2023 16:59:14 +0100
-Subject: [PATCH 29/89] x86/vmx: Support for CPUs without model-specific LBR
-
-Ice Lake (server at least) has both architectural LBR and model-specific LBR.
-Sapphire Rapids does not have model-specific LBR at all. I.e. On SPR and
-later, model_specific_lbr will always be NULL, so we must make changes to
-avoid reliably hitting the domain_crash().
-
-The Arch LBR spec states that CPUs without model-specific LBR implement
-MSR_DBG_CTL.LBR by discarding writes and always returning 0.
-
-Do this for any CPU for which we lack model-specific LBR information.
-
-Adjust the now-stale comment, now that the Arch LBR spec has created a way to
-signal "no model specific LBR" to guests.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: 3edca52ce736297d7fcf293860cd94ef62638052
-master date: 2023-01-12 18:42:00 +0000
----
- xen/arch/x86/hvm/vmx/vmx.c | 31 ++++++++++++++++---------------
- 1 file changed, 16 insertions(+), 15 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index ad91464103..861f91f2af 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -3545,18 +3545,26 @@ static int cf_check vmx_msr_write_intercept(
- if ( msr_content & rsvd )
- goto gp_fault;
-
-+ /*
-+ * The Arch LBR spec (new in Ice Lake) states that CPUs with no
-+ * model-specific LBRs implement MSR_DBG_CTL.LBR by discarding writes
-+ * and always returning 0.
-+ *
-+ * Use this property in all cases where we don't know any
-+ * model-specific LBR information, as it matches real hardware
-+ * behaviour on post-Ice Lake systems.
-+ */
-+ if ( !model_specific_lbr )
-+ msr_content &= ~IA32_DEBUGCTLMSR_LBR;
-+
- /*
- * When a guest first enables LBR, arrange to save and restore the LBR
- * MSRs and allow the guest direct access.
- *
-- * MSR_DEBUGCTL and LBR has existed almost as long as MSRs have
-- * existed, and there is no architectural way to hide the feature, or
-- * fail the attempt to enable LBR.
-- *
-- * Unknown host LBR MSRs or hitting -ENOSPC with the guest load/save
-- * list are definitely hypervisor bugs, whereas -ENOMEM for allocating
-- * the load/save list is simply unlucky (and shouldn't occur with
-- * sensible management by the toolstack).
-+ * Hitting -ENOSPC with the guest load/save list is definitely a
-+ * hypervisor bug, whereas -ENOMEM for allocating the load/save list
-+ * is simply unlucky (and shouldn't occur with sensible management by
-+ * the toolstack).
- *
- * Either way, there is nothing we can do right now to recover, and
- * the guest won't execute correctly either. Simply crash the domain
-@@ -3567,13 +3575,6 @@ static int cf_check vmx_msr_write_intercept(
- {
- const struct lbr_info *lbr = model_specific_lbr;
-
-- if ( unlikely(!lbr) )
-- {
-- gprintk(XENLOG_ERR, "Unknown Host LBR MSRs\n");
-- domain_crash(v->domain);
-- return X86EMUL_OKAY;
-- }
--
- for ( ; lbr->count; lbr++ )
- {
- unsigned int i;
---
-2.40.0
-
diff --git a/0029-xen-arm-page-Handle-cache-flush-of-an-element-at-the.patch b/0029-xen-arm-page-Handle-cache-flush-of-an-element-at-the.patch
new file mode 100644
index 0000000..bc866d0
--- /dev/null
+++ b/0029-xen-arm-page-Handle-cache-flush-of-an-element-at-the.patch
@@ -0,0 +1,111 @@
+From d31e5b2a9c39816a954d1088d4cfc782f0006f39 Mon Sep 17 00:00:00 2001
+From: Stefano Stabellini <stefano.stabellini@amd.com>
+Date: Tue, 5 Sep 2023 14:33:29 +0200
+Subject: [PATCH 29/55] xen/arm: page: Handle cache flush of an element at the
+ top of the address space
+
+The region that needs to be cleaned/invalidated may be at the top
+of the address space. This means that 'end' (i.e. 'p + size') will
+be 0 and therefore nothing will be cleaned/invalidated as the check
+in the loop will always be false.
+
+On Arm64, we only support we only support up to 48-bit Virtual
+address space. So this is not a concern there. However, for 32-bit,
+the mapcache is using the last 2GB of the address space. Therefore
+we may not clean/invalidate properly some pages. This could lead
+to memory corruption or data leakage (the scrubbed value may
+still sit in the cache when the guest could read directly the memory
+and therefore read the old content).
+
+Rework invalidate_dcache_va_range(), clean_dcache_va_range(),
+clean_and_invalidate_dcache_va_range() to handle a cache flush
+with an element at the top of the address space.
+
+This is CVE-2023-34321 / XSA-437.
+
+Reported-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Acked-by: Bertrand Marquis <bertrand.marquis@arm.com>
+master commit: 9a216e92de9f9011097e4f1fb55ff67ba0a21704
+master date: 2023-09-05 14:30:08 +0200
+---
+ xen/arch/arm/include/asm/page.h | 33 ++++++++++++++++++++-------------
+ 1 file changed, 20 insertions(+), 13 deletions(-)
+
+diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h
+index e7cd62190c..d7fe770a5e 100644
+--- a/xen/arch/arm/include/asm/page.h
++++ b/xen/arch/arm/include/asm/page.h
+@@ -160,26 +160,25 @@ static inline size_t read_dcache_line_bytes(void)
+
+ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
+ {
+- const void *end = p + size;
+ size_t cacheline_mask = dcache_line_bytes - 1;
+
+ dsb(sy); /* So the CPU issues all writes to the range */
+
+ if ( (uintptr_t)p & cacheline_mask )
+ {
++ size -= dcache_line_bytes - ((uintptr_t)p & cacheline_mask);
+ p = (void *)((uintptr_t)p & ~cacheline_mask);
+ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
+ p += dcache_line_bytes;
+ }
+- if ( (uintptr_t)end & cacheline_mask )
+- {
+- end = (void *)((uintptr_t)end & ~cacheline_mask);
+- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (end));
+- }
+
+- for ( ; p < end; p += dcache_line_bytes )
++ for ( ; size >= dcache_line_bytes;
++ p += dcache_line_bytes, size -= dcache_line_bytes )
+ asm volatile (__invalidate_dcache_one(0) : : "r" (p));
+
++ if ( size > 0 )
++ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
++
+ dsb(sy); /* So we know the flushes happen before continuing */
+
+ return 0;
+@@ -187,10 +186,14 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
+
+ static inline int clean_dcache_va_range(const void *p, unsigned long size)
+ {
+- const void *end = p + size;
++ size_t cacheline_mask = dcache_line_bytes - 1;
++
+ dsb(sy); /* So the CPU issues all writes to the range */
+- p = (void *)((uintptr_t)p & ~(dcache_line_bytes - 1));
+- for ( ; p < end; p += dcache_line_bytes )
++ size += (uintptr_t)p & cacheline_mask;
++ size = (size + cacheline_mask) & ~cacheline_mask;
++ p = (void *)((uintptr_t)p & ~cacheline_mask);
++ for ( ; size >= dcache_line_bytes;
++ p += dcache_line_bytes, size -= dcache_line_bytes )
+ asm volatile (__clean_dcache_one(0) : : "r" (p));
+ dsb(sy); /* So we know the flushes happen before continuing */
+ /* ARM callers assume that dcache_* functions cannot fail. */
+@@ -200,10 +203,14 @@ static inline int clean_dcache_va_range(const void *p, unsigned long size)
+ static inline int clean_and_invalidate_dcache_va_range
+ (const void *p, unsigned long size)
+ {
+- const void *end = p + size;
++ size_t cacheline_mask = dcache_line_bytes - 1;
++
+ dsb(sy); /* So the CPU issues all writes to the range */
+- p = (void *)((uintptr_t)p & ~(dcache_line_bytes - 1));
+- for ( ; p < end; p += dcache_line_bytes )
++ size += (uintptr_t)p & cacheline_mask;
++ size = (size + cacheline_mask) & ~cacheline_mask;
++ p = (void *)((uintptr_t)p & ~cacheline_mask);
++ for ( ; size >= dcache_line_bytes;
++ p += dcache_line_bytes, size -= dcache_line_bytes )
+ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
+ dsb(sy); /* So we know the flushes happen before continuing */
+ /* ARM callers assume that dcache_* functions cannot fail. */
+--
+2.42.0
+
diff --git a/0030-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch b/0030-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch
new file mode 100644
index 0000000..4581d03
--- /dev/null
+++ b/0030-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch
@@ -0,0 +1,48 @@
+From d2d2dcae879c6cc05227c9620f0a772f35fe6886 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 23 Aug 2023 09:26:36 +0200
+Subject: [PATCH 30/55] x86/AMD: extend Zenbleed check to models "good" ucode
+ isn't known for
+
+Reportedly the AMD Custom APU 0405 found on SteamDeck, models 0x90 and
+0x91, (quoting the respective Linux commit) is similarly affected. Put
+another instance of our Zen1 vs Zen2 distinction checks in
+amd_check_zenbleed(), forcing use of the chickenbit irrespective of
+ucode version (building upon real hardware never surfacing a version of
+0xffffffff).
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+(cherry picked from commit 145a69c0944ac70cfcf9d247c85dee9e99d9d302)
+---
+ xen/arch/x86/cpu/amd.c | 13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
+index 3ea214fc2e..1bb3044be1 100644
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -909,10 +909,17 @@ void amd_check_zenbleed(void)
+ case 0xa0 ... 0xaf: good_rev = 0x08a00008; break;
+ default:
+ /*
+- * With the Fam17h check above, parts getting here are Zen1.
+- * They're not affected.
++ * With the Fam17h check above, most parts getting here are
++ * Zen1. They're not affected. Assume Zen2 ones making it
++ * here are affected regardless of microcode version.
++ *
++ * Zen1 vs Zen2 isn't a simple model number comparison, so use
++ * STIBP as a heuristic to distinguish.
+ */
+- return;
++ if (!boot_cpu_has(X86_FEATURE_AMD_STIBP))
++ return;
++ good_rev = ~0U;
++ break;
+ }
+
+ rdmsrl(MSR_AMD64_DE_CFG, val);
+--
+2.42.0
+
diff --git a/0030-x86-shadow-fix-PAE-check-for-top-level-table-unshado.patch b/0030-x86-shadow-fix-PAE-check-for-top-level-table-unshado.patch
deleted file mode 100644
index e2bb8df..0000000
--- a/0030-x86-shadow-fix-PAE-check-for-top-level-table-unshado.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 2d74e7035bd060d662f1c4f8522377be8021be92 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 7 Feb 2023 16:59:54 +0100
-Subject: [PATCH 30/89] x86/shadow: fix PAE check for top-level table
- unshadowing
-
-Clearly within the for_each_vcpu() the vCPU of this loop is meant, not
-the (loop invariant) one the fault occurred on.
-
-Fixes: 3d5e6a3ff383 ("x86 hvm: implement HVMOP_pagetable_dying")
-Fixes: ef3b0d8d2c39 ("x86/shadow: shadow_table[] needs only one entry for PV-only configs")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: f8fdceefbb1193ec81667eb40b83bc525cb71204
-master date: 2023-01-20 09:23:42 +0100
----
- xen/arch/x86/mm/shadow/multi.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
-index 2370b30602..671bf8c228 100644
---- a/xen/arch/x86/mm/shadow/multi.c
-+++ b/xen/arch/x86/mm/shadow/multi.c
-@@ -2672,10 +2672,10 @@ static int cf_check sh_page_fault(
- #if GUEST_PAGING_LEVELS == 3
- unsigned int i;
-
-- for_each_shadow_table(v, i)
-+ for_each_shadow_table(tmp, i)
- {
- mfn_t smfn = pagetable_get_mfn(
-- v->arch.paging.shadow.shadow_table[i]);
-+ tmp->arch.paging.shadow.shadow_table[i]);
-
- if ( mfn_valid(smfn) && (mfn_x(smfn) != 0) )
- {
---
-2.40.0
-
diff --git a/0031-build-fix-building-flask-headers-before-descending-i.patch b/0031-build-fix-building-flask-headers-before-descending-i.patch
deleted file mode 100644
index 273e795..0000000
--- a/0031-build-fix-building-flask-headers-before-descending-i.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 819a5d4ed8b79e21843d5960a7ab8fbd16f28233 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Tue, 7 Feb 2023 17:00:29 +0100
-Subject: [PATCH 31/89] build: fix building flask headers before descending in
- flask/ss/
-
-Unfortunatly, adding prerequisite to "$(obj)/ss/built_in.o" doesn't
-work because we have "$(obj)/%/built_in.o: $(obj)/% ;" in Rules.mk.
-So, make is allow to try to build objects in "xsm/flask/ss/" before
-generating the headers.
-
-Adding a prerequisite on "$(obj)/ss" instead will fix the issue as
-that's the target used to run make in this subdirectory.
-
-Unfortunatly, that target is also used when running `make clean`, so
-we want to ignore it in this case. $(MAKECMDGOALS) can't be used in
-this case as it is empty, but we can guess which operation is done by
-looking at the list of loaded makefiles.
-
-Fixes: 7a3bcd2babcc ("build: build everything from the root dir, use obj=$subdir")
-Reported-by: "Daniel P. Smith" <dpsmith@apertussolutions.com>
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Acked-by: Daniel P. Smith <dpsmith@apertussolutions.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: d60324d8af9404014cfcc37bba09e9facfd02fcf
-master date: 2023-01-23 15:03:58 +0100
----
- xen/xsm/flask/Makefile | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/xen/xsm/flask/Makefile b/xen/xsm/flask/Makefile
-index d25312f4fa..3fdcf7727e 100644
---- a/xen/xsm/flask/Makefile
-+++ b/xen/xsm/flask/Makefile
-@@ -16,7 +16,11 @@ FLASK_H_FILES := flask.h class_to_string.h initial_sid_to_string.h
- AV_H_FILES := av_perm_to_string.h av_permissions.h
- ALL_H_FILES := $(addprefix include/,$(FLASK_H_FILES) $(AV_H_FILES))
-
--$(addprefix $(obj)/,$(obj-y)) $(obj)/ss/built_in.o: $(addprefix $(obj)/,$(ALL_H_FILES))
-+# Adding prerequisite to descending into ss/ folder only when not running
-+# `make *clean`.
-+ifeq ($(filter %/Makefile.clean,$(MAKEFILE_LIST)),)
-+$(addprefix $(obj)/,$(obj-y)) $(obj)/ss: $(addprefix $(obj)/,$(ALL_H_FILES))
-+endif
- extra-y += $(ALL_H_FILES)
-
- mkflask := $(srcdir)/policy/mkflask.sh
---
-2.40.0
-
diff --git a/0031-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch b/0031-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch
new file mode 100644
index 0000000..10417ae
--- /dev/null
+++ b/0031-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch
@@ -0,0 +1,74 @@
+From dc28aba565f226f9bec24cfde993e78478acfb4e Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 12 Sep 2023 15:06:49 +0100
+Subject: [PATCH 31/55] x86/spec-ctrl: Fix confusion between
+ SPEC_CTRL_EXIT_TO_XEN{,_IST}
+
+c/s 3fffaf9c13e9 ("x86/entry: Avoid using alternatives in NMI/#MC paths")
+dropped the only user, leaving behind the (incorrect) implication that Xen had
+split exit paths.
+
+Delete the unused SPEC_CTRL_EXIT_TO_XEN and rename SPEC_CTRL_EXIT_TO_XEN_IST
+to SPEC_CTRL_EXIT_TO_XEN for consistency.
+
+No functional change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 1c18d73774533a55ba9d1cbee8bdace03efdb5e7)
+---
+ xen/arch/x86/include/asm/spec_ctrl_asm.h | 10 ++--------
+ xen/arch/x86/x86_64/entry.S | 2 +-
+ 2 files changed, 3 insertions(+), 9 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+index f23bb105c5..e8fd01243c 100644
+--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+@@ -79,7 +79,6 @@
+ * - SPEC_CTRL_ENTRY_FROM_PV
+ * - SPEC_CTRL_ENTRY_FROM_INTR
+ * - SPEC_CTRL_ENTRY_FROM_INTR_IST
+- * - SPEC_CTRL_EXIT_TO_XEN_IST
+ * - SPEC_CTRL_EXIT_TO_XEN
+ * - SPEC_CTRL_EXIT_TO_PV
+ *
+@@ -268,11 +267,6 @@
+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \
+ X86_FEATURE_SC_MSR_PV
+
+-/* Use when exiting to Xen context. */
+-#define SPEC_CTRL_EXIT_TO_XEN \
+- ALTERNATIVE "", \
+- DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR_PV
+-
+ /* Use when exiting to PV guest context. */
+ #define SPEC_CTRL_EXIT_TO_PV \
+ ALTERNATIVE "", \
+@@ -339,8 +333,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ UNLIKELY_END(\@_serialise)
+ .endm
+
+-/* Use when exiting to Xen in IST context. */
+-.macro SPEC_CTRL_EXIT_TO_XEN_IST
++/* Use when exiting to Xen context. */
++.macro SPEC_CTRL_EXIT_TO_XEN
+ /*
+ * Requires %rbx=stack_end
+ * Clobbers %rax, %rcx, %rdx
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 7675a59ff0..b45a09823a 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -673,7 +673,7 @@ UNLIKELY_START(ne, exit_cr3)
+ UNLIKELY_END(exit_cr3)
+
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+- SPEC_CTRL_EXIT_TO_XEN_IST /* Req: %rbx=end, Clob: acd */
++ SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */
+
+ RESTORE_ALL adj=8
+ iretq
+--
+2.42.0
+
diff --git a/0032-ns16550-fix-an-incorrect-assignment-to-uart-io_size.patch b/0032-ns16550-fix-an-incorrect-assignment-to-uart-io_size.patch
deleted file mode 100644
index 8b3a410..0000000
--- a/0032-ns16550-fix-an-incorrect-assignment-to-uart-io_size.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From d0127881376baeea1e4eb71d0f7b56d942147124 Mon Sep 17 00:00:00 2001
-From: Ayan Kumar Halder <ayan.kumar.halder@amd.com>
-Date: Tue, 7 Feb 2023 17:00:47 +0100
-Subject: [PATCH 32/89] ns16550: fix an incorrect assignment to uart->io_size
-
-uart->io_size represents the size in bytes. Thus, when serial_port.bit_width
-is assigned to it, it should be converted to size in bytes.
-
-Fixes: 17b516196c ("ns16550: add ACPI support for ARM only")
-Reported-by: Jan Beulich <jbeulich@suse.com>
-Signed-off-by: Ayan Kumar Halder <ayan.kumar.halder@amd.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-master commit: 352c89f72ddb67b8d9d4e492203f8c77f85c8df1
-master date: 2023-01-24 16:54:38 +0100
----
- xen/drivers/char/ns16550.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c
-index 01a05c9aa8..ce013fb6a5 100644
---- a/xen/drivers/char/ns16550.c
-+++ b/xen/drivers/char/ns16550.c
-@@ -1875,7 +1875,7 @@ static int __init ns16550_acpi_uart_init(const void *data)
- uart->parity = spcr->parity;
- uart->stop_bits = spcr->stop_bits;
- uart->io_base = spcr->serial_port.address;
-- uart->io_size = spcr->serial_port.bit_width;
-+ uart->io_size = DIV_ROUND_UP(spcr->serial_port.bit_width, BITS_PER_BYTE);
- uart->reg_shift = spcr->serial_port.bit_offset;
- uart->reg_width = spcr->serial_port.access_width;
-
---
-2.40.0
-
diff --git a/0032-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch b/0032-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch
new file mode 100644
index 0000000..a0c83da
--- /dev/null
+++ b/0032-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch
@@ -0,0 +1,85 @@
+From 84690fb82c4f4aecb72a6789d8994efa74841e09 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 12 Sep 2023 17:03:16 +0100
+Subject: [PATCH 32/55] x86/spec-ctrl: Fold DO_SPEC_CTRL_EXIT_TO_XEN into it's
+ single user
+
+With the SPEC_CTRL_EXIT_TO_XEN{,_IST} confusion fixed, it's now obvious that
+there's only a single EXIT_TO_XEN path. Fold DO_SPEC_CTRL_EXIT_TO_XEN into
+SPEC_CTRL_EXIT_TO_XEN to simplify further fixes.
+
+When merging labels, switch the name to .L\@_skip_sc_msr as "skip" on its own
+is going to be too generic shortly.
+
+No functional change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 694bb0f280fd08a4377e36e32b84b5062def4de2)
+---
+ xen/arch/x86/include/asm/spec_ctrl_asm.h | 40 ++++++++++--------------
+ 1 file changed, 16 insertions(+), 24 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+index e8fd01243c..d5f65d80ea 100644
+--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+@@ -211,27 +211,6 @@
+ wrmsr
+ .endm
+
+-.macro DO_SPEC_CTRL_EXIT_TO_XEN
+-/*
+- * Requires %rbx=stack_end
+- * Clobbers %rax, %rcx, %rdx
+- *
+- * When returning to Xen context, look to see whether SPEC_CTRL shadowing is
+- * in effect, and reload the shadow value. This covers race conditions which
+- * exist with an NMI/MCE/etc hitting late in the return-to-guest path.
+- */
+- xor %edx, %edx
+-
+- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
+- jz .L\@_skip
+-
+- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
+- mov $MSR_SPEC_CTRL, %ecx
+- wrmsr
+-
+-.L\@_skip:
+-.endm
+-
+ .macro DO_SPEC_CTRL_EXIT_TO_GUEST
+ /*
+ * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo
+@@ -340,11 +319,24 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ * Clobbers %rax, %rcx, %rdx
+ */
+ testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
+- jz .L\@_skip
++ jz .L\@_skip_sc_msr
+
+- DO_SPEC_CTRL_EXIT_TO_XEN
++ /*
++ * When returning to Xen context, look to see whether SPEC_CTRL shadowing
++ * is in effect, and reload the shadow value. This covers race conditions
++ * which exist with an NMI/MCE/etc hitting late in the return-to-guest
++ * path.
++ */
++ xor %edx, %edx
+
+-.L\@_skip:
++ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
++ jz .L\@_skip_sc_msr
++
++ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
++ mov $MSR_SPEC_CTRL, %ecx
++ wrmsr
++
++.L\@_skip_sc_msr:
+ .endm
+
+ #endif /* __ASSEMBLY__ */
+--
+2.42.0
+
diff --git a/0033-libxl-fix-guest-kexec-skip-cpuid-policy.patch b/0033-libxl-fix-guest-kexec-skip-cpuid-policy.patch
deleted file mode 100644
index 7eb3779..0000000
--- a/0033-libxl-fix-guest-kexec-skip-cpuid-policy.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 3dae50283d9819c691a97f15b133124c00d39a2f Mon Sep 17 00:00:00 2001
-From: Jason Andryuk <jandryuk@gmail.com>
-Date: Tue, 7 Feb 2023 17:01:49 +0100
-Subject: [PATCH 33/89] libxl: fix guest kexec - skip cpuid policy
-
-When a domain performs a kexec (soft reset), libxl__build_pre() is
-called with the existing domid. Calling libxl__cpuid_legacy() on the
-existing domain fails since the cpuid policy has already been set, and
-the guest isn't rebuilt and doesn't kexec.
-
-xc: error: Failed to set d1's policy (err leaf 0xffffffff, subleaf 0xffffffff, msr 0xffffffff) (17 = File exists): Internal error
-libxl: error: libxl_cpuid.c:494:libxl__cpuid_legacy: Domain 1:Failed to apply CPUID policy: File exists
-libxl: error: libxl_create.c:1641:domcreate_rebuild_done: Domain 1:cannot (re-)build domain: -3
-libxl: error: libxl_xshelp.c:201:libxl__xs_read_mandatory: xenstore read failed: `/libxl/1/type': No such file or directory
-libxl: warning: libxl_dom.c:49:libxl__domain_type: unable to get domain type for domid=1, assuming HVM
-
-During a soft_reset, skip calling libxl__cpuid_legacy() to avoid the
-issue. Before commit 34990446ca91, the libxl__cpuid_legacy() failure
-would have been ignored, so kexec would continue.
-
-Fixes: 34990446ca91 ("libxl: don't ignore the return value from xc_cpuid_apply_policy")
-Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: 1e454c2b5b1172e0fc7457e411ebaba61db8fc87
-master date: 2023-01-26 10:58:23 +0100
----
- tools/libs/light/libxl_create.c | 2 ++
- tools/libs/light/libxl_dom.c | 2 +-
- tools/libs/light/libxl_internal.h | 1 +
- 3 files changed, 4 insertions(+), 1 deletion(-)
-
-diff --git a/tools/libs/light/libxl_create.c b/tools/libs/light/libxl_create.c
-index 612eacfc7f..dbee32b7b7 100644
---- a/tools/libs/light/libxl_create.c
-+++ b/tools/libs/light/libxl_create.c
-@@ -2203,6 +2203,8 @@ static int do_domain_soft_reset(libxl_ctx *ctx,
- aop_console_how);
- cdcs->domid_out = &domid_out;
-
-+ state->soft_reset = true;
-+
- dom_path = libxl__xs_get_dompath(gc, domid);
- if (!dom_path) {
- LOGD(ERROR, domid, "failed to read domain path");
-diff --git a/tools/libs/light/libxl_dom.c b/tools/libs/light/libxl_dom.c
-index b454f988fb..f6311eea6e 100644
---- a/tools/libs/light/libxl_dom.c
-+++ b/tools/libs/light/libxl_dom.c
-@@ -382,7 +382,7 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
- /* Construct a CPUID policy, but only for brand new domains. Domains
- * being migrated-in/restored have CPUID handled during the
- * static_data_done() callback. */
-- if (!state->restore)
-+ if (!state->restore && !state->soft_reset)
- rc = libxl__cpuid_legacy(ctx, domid, false, info);
-
- out:
-diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
-index a7c447c10e..cae160351f 100644
---- a/tools/libs/light/libxl_internal.h
-+++ b/tools/libs/light/libxl_internal.h
-@@ -1406,6 +1406,7 @@ typedef struct {
- /* Whether this domain is being migrated/restored, or booting fresh. Only
- * applicable to the primary domain, not support domains (e.g. stub QEMU). */
- bool restore;
-+ bool soft_reset;
- } libxl__domain_build_state;
-
- _hidden void libxl__domain_build_state_init(libxl__domain_build_state *s);
---
-2.40.0
-
diff --git a/0033-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch b/0033-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch
new file mode 100644
index 0000000..a278c5f
--- /dev/null
+++ b/0033-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch
@@ -0,0 +1,83 @@
+From 3952c73bdbd05f0e666986fce633a591237b3c88 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 1 Sep 2023 11:38:44 +0100
+Subject: [PATCH 33/55] x86/spec-ctrl: Turn the remaining
+ SPEC_CTRL_{ENTRY,EXIT}_* into asm macros
+
+These have grown more complex over time, with some already having been
+converted.
+
+Provide full Requires/Clobbers comments, otherwise missing at this level of
+indirection.
+
+No functional change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 7125429aafb9e3c9c88fc93001fc2300e0ac2cc8)
+---
+ xen/arch/x86/include/asm/spec_ctrl_asm.h | 37 ++++++++++++++++++------
+ 1 file changed, 28 insertions(+), 9 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+index d5f65d80ea..c6d5f2ad01 100644
+--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+@@ -231,26 +231,45 @@
+ .endm
+
+ /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
+-#define SPEC_CTRL_ENTRY_FROM_PV \
++.macro SPEC_CTRL_ENTRY_FROM_PV
++/*
++ * Requires %rsp=regs/cpuinfo, %rdx=0
++ * Clobbers %rax, %rcx, %rdx
++ */
+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \
+- X86_FEATURE_IBPB_ENTRY_PV; \
+- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \
++ X86_FEATURE_IBPB_ENTRY_PV
++
++ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV
++
+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \
+ X86_FEATURE_SC_MSR_PV
++.endm
+
+ /* Use in interrupt/exception context. May interrupt Xen or PV context. */
+-#define SPEC_CTRL_ENTRY_FROM_INTR \
++.macro SPEC_CTRL_ENTRY_FROM_INTR
++/*
++ * Requires %rsp=regs, %r14=stack_end, %rdx=0
++ * Clobbers %rax, %rcx, %rdx
++ */
+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \
+- X86_FEATURE_IBPB_ENTRY_PV; \
+- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \
++ X86_FEATURE_IBPB_ENTRY_PV
++
++ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV
++
+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \
+ X86_FEATURE_SC_MSR_PV
++.endm
+
+ /* Use when exiting to PV guest context. */
+-#define SPEC_CTRL_EXIT_TO_PV \
+- ALTERNATIVE "", \
+- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV; \
++.macro SPEC_CTRL_EXIT_TO_PV
++/*
++ * Requires %rax=spec_ctrl, %rsp=regs/info
++ * Clobbers %rcx, %rdx
++ */
++ ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
++
+ DO_SPEC_CTRL_COND_VERW
++.endm
+
+ /*
+ * Use in IST interrupt/exception context. May interrupt Xen or PV context.
+--
+2.42.0
+
diff --git a/0034-tools-ocaml-xenctrl-Make-domain_getinfolist-tail-rec.patch b/0034-tools-ocaml-xenctrl-Make-domain_getinfolist-tail-rec.patch
deleted file mode 100644
index 8f57d4e..0000000
--- a/0034-tools-ocaml-xenctrl-Make-domain_getinfolist-tail-rec.patch
+++ /dev/null
@@ -1,71 +0,0 @@
-From 03f545b6cf3220b4647677b588e5525a781a4813 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Tue, 1 Nov 2022 17:59:16 +0000
-Subject: [PATCH 34/89] tools/ocaml/xenctrl: Make domain_getinfolist tail
- recursive
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-domain_getinfolist() is quadratic with the number of domains, because of the
-behaviour of the underlying hypercall. xenopsd was further observed to be
-wasting excessive quantites of time manipulating the list of already-obtained
-domains.
-
-Implement a tail recursive `rev_concat` equivalent to `concat |> rev`, and use
-it instead of calling `@` multiple times.
-
-An incidental benefit is that the list of domains will now be in domid order,
-instead of having pairs of 2 domains changing direction every time.
-
-In a scalability testing scenario with ~1000 VMs, a combination of this and
-the subsequent change takes xenopsd's wallclock time in domain_getinfolist()
-down from 88% to 0.02%
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Tested-by: Pau Ruiz Safont <pau.safont@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit c3b6be714c64aa62b56d0bce96f4b6a10b5c2078)
----
- tools/ocaml/libs/xc/xenctrl.ml | 23 +++++++++++++++++------
- 1 file changed, 17 insertions(+), 6 deletions(-)
-
-diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
-index 83e39a8616..85b73a7f6f 100644
---- a/tools/ocaml/libs/xc/xenctrl.ml
-+++ b/tools/ocaml/libs/xc/xenctrl.ml
-@@ -222,14 +222,25 @@ external domain_shutdown: handle -> domid -> shutdown_reason -> unit
- external _domain_getinfolist: handle -> domid -> int -> domaininfo list
- = "stub_xc_domain_getinfolist"
-
-+let rev_append_fold acc e = List.rev_append e acc
-+
-+(**
-+ * [rev_concat lst] is equivalent to [lst |> List.concat |> List.rev]
-+ * except it is tail recursive, whereas [List.concat] isn't.
-+ * Example:
-+ * rev_concat [[10;9;8];[7;6];[5]]] = [5; 6; 7; 8; 9; 10]
-+ *)
-+let rev_concat lst = List.fold_left rev_append_fold [] lst
-+
- let domain_getinfolist handle first_domain =
- let nb = 2 in
-- let last_domid l = (List.hd l).domid + 1 in
-- let rec __getlist from =
-- let l = _domain_getinfolist handle from nb in
-- (if List.length l = nb then __getlist (last_domid l) else []) @ l
-- in
-- List.rev (__getlist first_domain)
-+ let rec __getlist lst from =
-+ (* _domain_getinfolist returns domains in reverse order, largest first *)
-+ match _domain_getinfolist handle from nb with
-+ | [] -> rev_concat lst
-+ | (hd :: _) as l -> __getlist (l :: lst) (hd.domid + 1)
-+ in
-+ __getlist [] first_domain
-
- external domain_getinfo: handle -> domid -> domaininfo= "stub_xc_domain_getinfo"
-
---
-2.40.0
-
diff --git a/0034-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch b/0034-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch
new file mode 100644
index 0000000..f360cbd
--- /dev/null
+++ b/0034-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch
@@ -0,0 +1,106 @@
+From ba023e93d0b1e60b80251bf080bab694efb9f8e3 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 30 Aug 2023 20:11:50 +0100
+Subject: [PATCH 34/55] x86/spec-ctrl: Improve all SPEC_CTRL_{ENTER,EXIT}_*
+ comments
+
+... to better explain how they're used.
+
+Doing so highlights that SPEC_CTRL_EXIT_TO_XEN is missing a VERW flush for the
+corner case when e.g. an NMI hits late in an exit-to-guest path.
+
+Leave a TODO, which will be addressed in subsequent patches which arrange for
+VERW flushing to be safe within SPEC_CTRL_EXIT_TO_XEN.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 45f00557350dc7d0756551069803fc49c29184ca)
+---
+ xen/arch/x86/include/asm/spec_ctrl_asm.h | 36 ++++++++++++++++++++----
+ 1 file changed, 31 insertions(+), 5 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+index c6d5f2ad01..97c4db31cd 100644
+--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+@@ -230,7 +230,10 @@
+ wrmsr
+ .endm
+
+-/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
++/*
++ * Used after an entry from PV context: SYSCALL, SYSENTER, INT,
++ * etc. There is always a guest speculation state in context.
++ */
+ .macro SPEC_CTRL_ENTRY_FROM_PV
+ /*
+ * Requires %rsp=regs/cpuinfo, %rdx=0
+@@ -245,7 +248,11 @@
+ X86_FEATURE_SC_MSR_PV
+ .endm
+
+-/* Use in interrupt/exception context. May interrupt Xen or PV context. */
++/*
++ * Used after an exception or maskable interrupt, hitting Xen or PV context.
++ * There will either be a guest speculation context, or (barring fatal
++ * exceptions) a well-formed Xen speculation context.
++ */
+ .macro SPEC_CTRL_ENTRY_FROM_INTR
+ /*
+ * Requires %rsp=regs, %r14=stack_end, %rdx=0
+@@ -260,7 +267,10 @@
+ X86_FEATURE_SC_MSR_PV
+ .endm
+
+-/* Use when exiting to PV guest context. */
++/*
++ * Used when exiting from any entry context, back to PV context. This
++ * includes from an IST entry which moved onto the primary stack.
++ */
+ .macro SPEC_CTRL_EXIT_TO_PV
+ /*
+ * Requires %rax=spec_ctrl, %rsp=regs/info
+@@ -272,7 +282,13 @@
+ .endm
+
+ /*
+- * Use in IST interrupt/exception context. May interrupt Xen or PV context.
++ * Used after an IST entry hitting Xen or PV context. Special care is needed,
++ * because when hitting Xen context, there may not be a well-formed
++ * speculation context. (i.e. it can hit in the middle of
++ * SPEC_CTRL_{ENTRY,EXIT}_* regions.)
++ *
++ * An IST entry which hits PV context moves onto the primary stack and leaves
++ * via SPEC_CTRL_EXIT_TO_PV, *not* SPEC_CTRL_EXIT_TO_XEN.
+ */
+ .macro SPEC_CTRL_ENTRY_FROM_INTR_IST
+ /*
+@@ -331,7 +347,14 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ UNLIKELY_END(\@_serialise)
+ .endm
+
+-/* Use when exiting to Xen context. */
++/*
++ * Use when exiting from any entry context, back to Xen context. This
++ * includes returning to other SPEC_CTRL_{ENTRY,EXIT}_* regions with an
++ * incomplete speculation context.
++ *
++ * Because we might have interrupted Xen beyond SPEC_CTRL_EXIT_TO_$GUEST, we
++ * need to treat this as if it were an EXIT_TO_$GUEST case too.
++ */
+ .macro SPEC_CTRL_EXIT_TO_XEN
+ /*
+ * Requires %rbx=stack_end
+@@ -356,6 +379,9 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ wrmsr
+
+ .L\@_skip_sc_msr:
++
++ /* TODO VERW */
++
+ .endm
+
+ #endif /* __ASSEMBLY__ */
+--
+2.42.0
+
diff --git a/0035-tools-ocaml-xenctrl-Use-larger-chunksize-in-domain_g.patch b/0035-tools-ocaml-xenctrl-Use-larger-chunksize-in-domain_g.patch
deleted file mode 100644
index 6c64355..0000000
--- a/0035-tools-ocaml-xenctrl-Use-larger-chunksize-in-domain_g.patch
+++ /dev/null
@@ -1,41 +0,0 @@
-From 5d8f9cfa166c55a308856e7b021d778350edbd6c Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Tue, 1 Nov 2022 17:59:17 +0000
-Subject: [PATCH 35/89] tools/ocaml/xenctrl: Use larger chunksize in
- domain_getinfolist
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-domain_getinfolist() is quadratic with the number of domains, because of the
-behaviour of the underlying hypercall. Nevertheless, getting domain info in
-blocks of 1024 is far more efficient than blocks of 2.
-
-In a scalability testing scenario with ~1000 VMs, a combination of this and
-the previous change takes xenopsd's wallclock time in domain_getinfolist()
-down from 88% to 0.02%
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Tested-by: Pau Ruiz Safont <pau.safont@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 95db09b1b154fb72fad861815ceae1f3fa49fc4e)
----
- tools/ocaml/libs/xc/xenctrl.ml | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
-index 85b73a7f6f..aa650533f7 100644
---- a/tools/ocaml/libs/xc/xenctrl.ml
-+++ b/tools/ocaml/libs/xc/xenctrl.ml
-@@ -233,7 +233,7 @@ let rev_append_fold acc e = List.rev_append e acc
- let rev_concat lst = List.fold_left rev_append_fold [] lst
-
- let domain_getinfolist handle first_domain =
-- let nb = 2 in
-+ let nb = 1024 in
- let rec __getlist lst from =
- (* _domain_getinfolist returns domains in reverse order, largest first *)
- match _domain_getinfolist handle from nb with
---
-2.40.0
-
diff --git a/0035-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch b/0035-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch
new file mode 100644
index 0000000..fe2acaf
--- /dev/null
+++ b/0035-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch
@@ -0,0 +1,74 @@
+From 5f7efd47c8273fde972637d0360851802f76eca9 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 13 Sep 2023 13:48:16 +0100
+Subject: [PATCH 35/55] x86/entry: Adjust restore_all_xen to hold stack_end in
+ %r14
+
+All other SPEC_CTRL_{ENTRY,EXIT}_* helpers hold stack_end in %r14. Adjust it
+for consistency.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 7aa28849a1155d856e214e9a80a7e65fffdc3e58)
+---
+ xen/arch/x86/include/asm/spec_ctrl_asm.h | 8 ++++----
+ xen/arch/x86/x86_64/entry.S | 8 ++++----
+ 2 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+index 97c4db31cd..66c706496f 100644
+--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+@@ -357,10 +357,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ */
+ .macro SPEC_CTRL_EXIT_TO_XEN
+ /*
+- * Requires %rbx=stack_end
++ * Requires %r14=stack_end
+ * Clobbers %rax, %rcx, %rdx
+ */
+- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
++ testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
+ jz .L\@_skip_sc_msr
+
+ /*
+@@ -371,10 +371,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ */
+ xor %edx, %edx
+
+- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
++ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
+ jz .L\@_skip_sc_msr
+
+- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
++ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax
+ mov $MSR_SPEC_CTRL, %ecx
+ wrmsr
+
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index b45a09823a..92279a225d 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -665,15 +665,15 @@ restore_all_xen:
+ * Check whether we need to switch to the per-CPU page tables, in
+ * case we return to late PV exit code (from an NMI or #MC).
+ */
+- GET_STACK_END(bx)
+- cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx)
++ GET_STACK_END(14)
++ cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14)
+ UNLIKELY_START(ne, exit_cr3)
+- mov STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax
++ mov STACK_CPUINFO_FIELD(pv_cr3)(%r14), %rax
+ mov %rax, %cr3
+ UNLIKELY_END(exit_cr3)
+
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+- SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */
++ SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */
+
+ RESTORE_ALL adj=8
+ iretq
+--
+2.42.0
+
diff --git a/0036-tools-ocaml-xb-mmap-Use-Data_abstract_val-wrapper.patch b/0036-tools-ocaml-xb-mmap-Use-Data_abstract_val-wrapper.patch
deleted file mode 100644
index d6a324a..0000000
--- a/0036-tools-ocaml-xb-mmap-Use-Data_abstract_val-wrapper.patch
+++ /dev/null
@@ -1,75 +0,0 @@
-From 7d516fc87637dc551494f8eca08f106f578f7112 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Fri, 16 Dec 2022 18:25:10 +0000
-Subject: [PATCH 36/89] tools/ocaml/xb,mmap: Use Data_abstract_val wrapper
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This is not strictly necessary since it is essentially a no-op currently: a
-cast to void * and value *, even in OCaml 5.0.
-
-However it does make it clearer that what we have here is not a regular OCaml
-value, but one allocated with Abstract_tag or Custom_tag, and follows the
-example from the manual more closely:
-https://v2.ocaml.org/manual/intfc.html#ss:c-outside-head
-
-It also makes it clearer that these modules have been reviewed for
-compat with OCaml 5.0.
-
-We cannot use OCaml finalizers here, because we want exact control over when
-to unmap these pages from remote domains.
-
-No functional change.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit d2ccc637111d6dbcf808aaffeec7a46f0b1e1c81)
----
- tools/ocaml/libs/mmap/mmap_stubs.h | 4 ++++
- tools/ocaml/libs/mmap/xenmmap_stubs.c | 2 +-
- tools/ocaml/libs/xb/xs_ring_stubs.c | 2 +-
- 3 files changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/libs/mmap/mmap_stubs.h b/tools/ocaml/libs/mmap/mmap_stubs.h
-index 65e4239890..f4784e4715 100644
---- a/tools/ocaml/libs/mmap/mmap_stubs.h
-+++ b/tools/ocaml/libs/mmap/mmap_stubs.h
-@@ -30,4 +30,8 @@ struct mmap_interface
- int len;
- };
-
-+#ifndef Data_abstract_val
-+#define Data_abstract_val(x) ((void *)Op_val(x))
-+#endif
-+
- #endif
-diff --git a/tools/ocaml/libs/mmap/xenmmap_stubs.c b/tools/ocaml/libs/mmap/xenmmap_stubs.c
-index e2ce088e25..e03951d781 100644
---- a/tools/ocaml/libs/mmap/xenmmap_stubs.c
-+++ b/tools/ocaml/libs/mmap/xenmmap_stubs.c
-@@ -28,7 +28,7 @@
- #include <caml/fail.h>
- #include <caml/callback.h>
-
--#define Intf_val(a) ((struct mmap_interface *) a)
-+#define Intf_val(a) ((struct mmap_interface *)Data_abstract_val(a))
-
- static int mmap_interface_init(struct mmap_interface *intf,
- int fd, int pflag, int mflag,
-diff --git a/tools/ocaml/libs/xb/xs_ring_stubs.c b/tools/ocaml/libs/xb/xs_ring_stubs.c
-index 7a91fdee75..1f58524535 100644
---- a/tools/ocaml/libs/xb/xs_ring_stubs.c
-+++ b/tools/ocaml/libs/xb/xs_ring_stubs.c
-@@ -35,7 +35,7 @@
- #include <sys/mman.h>
- #include "mmap_stubs.h"
-
--#define GET_C_STRUCT(a) ((struct mmap_interface *) a)
-+#define GET_C_STRUCT(a) ((struct mmap_interface *)Data_abstract_val(a))
-
- /*
- * Bytes_val has been introduced by Ocaml 4.06.1. So define our own version
---
-2.40.0
-
diff --git a/0036-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch b/0036-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch
new file mode 100644
index 0000000..ba7ea21
--- /dev/null
+++ b/0036-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch
@@ -0,0 +1,109 @@
+From e4a71bc0da0baf7464bb0d8e33053f330e5ea366 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 13 Sep 2023 12:20:12 +0100
+Subject: [PATCH 36/55] x86/entry: Track the IST-ness of an entry for the exit
+ paths
+
+Use %r12 to hold an ist_exit boolean. This register is zero elsewhere in the
+entry/exit asm, so it only needs setting in the IST path.
+
+As this is subtle and fragile, add check_ist_exit() to be used in debugging
+builds to cross-check that the ist_exit boolean matches the entry vector.
+
+Write check_ist_exit() it in C, because it's debug only and the logic more
+complicated than I care to maintain in asm.
+
+For now, we only need to use this signal in the exit-to-Xen path, but some
+exit-to-guest paths happen in IST context too. Check the correctness in all
+exit paths to avoid the logic bit-rotting.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 21bdc25b05a0f8ab6bc73520a9ca01327360732c)
+
+x86/entry: Partially revert IST-exit checks
+
+The patch adding check_ist_exit() didn't account for the fact that
+reset_stack_and_jump() is not an ABI-preserving boundary. The IST-ness in
+%r12 doesn't survive into the next context, and is a stale value C.
+
+This shows up in Gitlab CI for the Clang build:
+
+ https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/5112783827
+
+and in OSSTest for GCC 8:
+
+ http://logs.test-lab.xenproject.org/osstest/logs/183045/test-amd64-amd64-xl-qemuu-debianhvm-amd64/serial-pinot0.log
+
+There's no straightforward way to reconstruct the IST-exit-ness on the
+exit-to-guest path after a context switch. For now, we only need IST-exit on
+the return-to-Xen path.
+
+Fixes: 21bdc25b05a0 ("x86/entry: Track the IST-ness of an entry for the exit paths")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 9b57c800b79b96769ea3dcd6468578fa664d19f9)
+---
+ xen/arch/x86/traps.c | 13 +++++++++++++
+ xen/arch/x86/x86_64/entry.S | 13 ++++++++++++-
+ 2 files changed, 25 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
+index d12004b1c6..e65cc60041 100644
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -2315,6 +2315,19 @@ void asm_domain_crash_synchronous(unsigned long addr)
+ do_softirq();
+ }
+
++#ifdef CONFIG_DEBUG
++void check_ist_exit(const struct cpu_user_regs *regs, bool ist_exit)
++{
++ const unsigned int ist_mask =
++ (1U << X86_EXC_NMI) | (1U << X86_EXC_DB) |
++ (1U << X86_EXC_DF) | (1U << X86_EXC_MC);
++ uint8_t ev = regs->entry_vector;
++ bool is_ist = (ev < TRAP_nr) && ((1U << ev) & ist_mask);
++
++ ASSERT(is_ist == ist_exit);
++}
++#endif
++
+ /*
+ * Local variables:
+ * mode: C
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 92279a225d..4cebc4fbe3 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -659,8 +659,15 @@ ENTRY(early_page_fault)
+ .section .text.entry, "ax", @progbits
+
+ ALIGN
+-/* No special register assumptions. */
++/* %r12=ist_exit */
+ restore_all_xen:
++
++#ifdef CONFIG_DEBUG
++ mov %rsp, %rdi
++ mov %r12, %rsi
++ call check_ist_exit
++#endif
++
+ /*
+ * Check whether we need to switch to the per-CPU page tables, in
+ * case we return to late PV exit code (from an NMI or #MC).
+@@ -1091,6 +1098,10 @@ handle_ist_exception:
+ .L_ist_dispatch_done:
+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+ mov %bl, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14)
++
++ /* This is an IST exit */
++ mov $1, %r12d
++
+ cmpb $TRAP_nmi,UREGS_entry_vector(%rsp)
+ jne ret_from_intr
+
+--
+2.42.0
+
diff --git a/0037-tools-ocaml-xb-Drop-Xs_ring.write.patch b/0037-tools-ocaml-xb-Drop-Xs_ring.write.patch
deleted file mode 100644
index 226ae52..0000000
--- a/0037-tools-ocaml-xb-Drop-Xs_ring.write.patch
+++ /dev/null
@@ -1,62 +0,0 @@
-From f0e653fb4aea77210b8096c170e82de3c2039d89 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Fri, 16 Dec 2022 18:25:20 +0000
-Subject: [PATCH 37/89] tools/ocaml/xb: Drop Xs_ring.write
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This function is unusued (only Xs_ring.write_substring is used), and the
-bytes/string conversion here is backwards: the C stub implements the bytes
-version and then we use a Bytes.unsafe_of_string to convert a string into
-bytes.
-
-However the operation here really is read-only: we read from the string and
-write it to the ring, so the C stub should implement the read-only string
-version, and if needed we could use Bytes.unsafe_to_string to be able to send
-'bytes'. However that is not necessary as the 'bytes' version is dropped above.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 01f139215e678c2dc7d4bb3f9f2777069bb1b091)
----
- tools/ocaml/libs/xb/xs_ring.ml | 5 +----
- tools/ocaml/libs/xb/xs_ring_stubs.c | 2 +-
- 2 files changed, 2 insertions(+), 5 deletions(-)
-
-diff --git a/tools/ocaml/libs/xb/xs_ring.ml b/tools/ocaml/libs/xb/xs_ring.ml
-index db7f86bd27..dd5e014a33 100644
---- a/tools/ocaml/libs/xb/xs_ring.ml
-+++ b/tools/ocaml/libs/xb/xs_ring.ml
-@@ -25,14 +25,11 @@ module Server_features = Set.Make(struct
- end)
-
- external read: Xenmmap.mmap_interface -> bytes -> int -> int = "ml_interface_read"
--external write: Xenmmap.mmap_interface -> bytes -> int -> int = "ml_interface_write"
-+external write_substring: Xenmmap.mmap_interface -> string -> int -> int = "ml_interface_write"
-
- external _internal_set_server_features: Xenmmap.mmap_interface -> int -> unit = "ml_interface_set_server_features" [@@noalloc]
- external _internal_get_server_features: Xenmmap.mmap_interface -> int = "ml_interface_get_server_features" [@@noalloc]
-
--let write_substring mmap buff len =
-- write mmap (Bytes.unsafe_of_string buff) len
--
- let get_server_features mmap =
- (* NB only one feature currently defined above *)
- let x = _internal_get_server_features mmap in
-diff --git a/tools/ocaml/libs/xb/xs_ring_stubs.c b/tools/ocaml/libs/xb/xs_ring_stubs.c
-index 1f58524535..1243c63f03 100644
---- a/tools/ocaml/libs/xb/xs_ring_stubs.c
-+++ b/tools/ocaml/libs/xb/xs_ring_stubs.c
-@@ -112,7 +112,7 @@ CAMLprim value ml_interface_write(value ml_interface,
- CAMLlocal1(ml_result);
-
- struct mmap_interface *interface = GET_C_STRUCT(ml_interface);
-- const unsigned char *buffer = Bytes_val(ml_buffer);
-+ const char *buffer = String_val(ml_buffer);
- int len = Int_val(ml_len);
- int result;
-
---
-2.40.0
-
diff --git a/0037-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch b/0037-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch
new file mode 100644
index 0000000..6580907
--- /dev/null
+++ b/0037-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch
@@ -0,0 +1,89 @@
+From 2e2c3efcfc9f183674a8de6ed954ffbe7188b70d Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 13 Sep 2023 13:53:33 +0100
+Subject: [PATCH 37/55] x86/spec-ctrl: Issue VERW during IST exit to Xen
+
+There is a corner case where e.g. an NMI hitting an exit-to-guest path after
+SPEC_CTRL_EXIT_TO_* would have run the entire NMI handler *after* the VERW
+flush to scrub potentially sensitive data from uarch buffers.
+
+In order to compensate, issue VERW when exiting to Xen from an IST entry.
+
+SPEC_CTRL_EXIT_TO_XEN already has two reads of spec_ctrl_flags off the stack,
+and we're about to add a third. Load the field into %ebx, and list the
+register as clobbered.
+
+%r12 has been arranged to be the ist_exit signal, so add this as an input
+dependency and use it to identify when to issue a VERW.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 3ee6066bcd737756b0990d417d94eddc0b0d2585)
+---
+ xen/arch/x86/include/asm/spec_ctrl_asm.h | 20 +++++++++++++++-----
+ xen/arch/x86/x86_64/entry.S | 2 +-
+ 2 files changed, 16 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+index 66c706496f..28a75796e6 100644
+--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+@@ -357,10 +357,12 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ */
+ .macro SPEC_CTRL_EXIT_TO_XEN
+ /*
+- * Requires %r14=stack_end
+- * Clobbers %rax, %rcx, %rdx
++ * Requires %r12=ist_exit, %r14=stack_end
++ * Clobbers %rax, %rbx, %rcx, %rdx
+ */
+- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
++ movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx
++
++ testb $SCF_ist_sc_msr, %bl
+ jz .L\@_skip_sc_msr
+
+ /*
+@@ -371,7 +373,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ */
+ xor %edx, %edx
+
+- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
++ testb $SCF_use_shadow, %bl
+ jz .L\@_skip_sc_msr
+
+ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax
+@@ -380,8 +382,16 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+
+ .L\@_skip_sc_msr:
+
+- /* TODO VERW */
++ test %r12, %r12
++ jz .L\@_skip_ist_exit
++
++ /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */
++ testb $SCF_verw, %bl
++ jz .L\@_skip_verw
++ verw STACK_CPUINFO_FIELD(verw_sel)(%r14)
++.L\@_skip_verw:
+
++.L\@_skip_ist_exit:
+ .endm
+
+ #endif /* __ASSEMBLY__ */
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 4cebc4fbe3..c12e011b4d 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -680,7 +680,7 @@ UNLIKELY_START(ne, exit_cr3)
+ UNLIKELY_END(exit_cr3)
+
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+- SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */
++ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */
+
+ RESTORE_ALL adj=8
+ iretq
+--
+2.42.0
+
diff --git a/0038-tools-oxenstored-validate-config-file-before-live-up.patch b/0038-tools-oxenstored-validate-config-file-before-live-up.patch
deleted file mode 100644
index 5b7f58a..0000000
--- a/0038-tools-oxenstored-validate-config-file-before-live-up.patch
+++ /dev/null
@@ -1,131 +0,0 @@
-From e74d868b48d55dfb20f5a41ec20fbec93d8e5deb Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Tue, 11 May 2021 15:56:50 +0000
-Subject: [PATCH 38/89] tools/oxenstored: validate config file before live
- update
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The configuration file can contain typos or various errors that could prevent
-live update from succeeding (e.g. a flag only valid on a different version).
-Unknown entries in the config file would be ignored on startup normally,
-add a strict --config-test that live-update can use to check that the config file
-is valid *for the new binary*.
-
-For compatibility with running old code during live update recognize
---live --help as an equivalent to --config-test.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit e6f07052ce4a0f0b7d4dc522d87465efb2d9ee86)
----
- tools/ocaml/xenstored/parse_arg.ml | 26 ++++++++++++++++++++++++++
- tools/ocaml/xenstored/xenstored.ml | 11 +++++++++--
- 2 files changed, 35 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/parse_arg.ml b/tools/ocaml/xenstored/parse_arg.ml
-index 7c0478e76a..5e4ca6f1f7 100644
---- a/tools/ocaml/xenstored/parse_arg.ml
-+++ b/tools/ocaml/xenstored/parse_arg.ml
-@@ -26,8 +26,14 @@ type config =
- restart: bool;
- live_reload: bool;
- disable_socket: bool;
-+ config_test: bool;
- }
-
-+let get_config_filename config_file =
-+ match config_file with
-+ | Some name -> name
-+ | None -> Define.default_config_dir ^ "/oxenstored.conf"
-+
- let do_argv =
- let pidfile = ref "" and tracefile = ref "" (* old xenstored compatibility *)
- and domain_init = ref true
-@@ -38,6 +44,8 @@ let do_argv =
- and restart = ref false
- and live_reload = ref false
- and disable_socket = ref false
-+ and config_test = ref false
-+ and help = ref false
- in
-
- let speclist =
-@@ -55,10 +63,27 @@ let do_argv =
- ("-T", Arg.Set_string tracefile, ""); (* for compatibility *)
- ("--restart", Arg.Set restart, "Read database on starting");
- ("--live", Arg.Set live_reload, "Read live dump on startup");
-+ ("--config-test", Arg.Set config_test, "Test validity of config file");
- ("--disable-socket", Arg.Unit (fun () -> disable_socket := true), "Disable socket");
-+ ("--help", Arg.Set help, "Display this list of options")
- ] in
- let usage_msg = "usage : xenstored [--config-file <filename>] [--no-domain-init] [--help] [--no-fork] [--reraise-top-level] [--restart] [--disable-socket]" in
- Arg.parse speclist (fun _ -> ()) usage_msg;
-+ let () =
-+ if !help then begin
-+ if !live_reload then
-+ (*
-+ * Transform --live --help into --config-test for backward compat with
-+ * running code during live update.
-+ * Caller will validate config and exit
-+ *)
-+ config_test := true
-+ else begin
-+ Arg.usage_string speclist usage_msg |> print_endline;
-+ exit 0
-+ end
-+ end
-+ in
- {
- domain_init = !domain_init;
- activate_access_log = !activate_access_log;
-@@ -70,4 +95,5 @@ let do_argv =
- restart = !restart;
- live_reload = !live_reload;
- disable_socket = !disable_socket;
-+ config_test = !config_test;
- }
-diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
-index 4d5851c5cb..e2638a5af2 100644
---- a/tools/ocaml/xenstored/xenstored.ml
-+++ b/tools/ocaml/xenstored/xenstored.ml
-@@ -88,7 +88,7 @@ let default_pidfile = Paths.xen_run_dir ^ "/xenstored.pid"
-
- let ring_scan_interval = ref 20
-
--let parse_config filename =
-+let parse_config ?(strict=false) filename =
- let pidfile = ref default_pidfile in
- let options = [
- ("merge-activate", Config.Set_bool Transaction.do_coalesce);
-@@ -129,11 +129,12 @@ let parse_config filename =
- ("xenstored-port", Config.Set_string Domains.xenstored_port); ] in
- begin try Config.read filename options (fun _ _ -> raise Not_found)
- with
-- | Config.Error err -> List.iter (fun (k, e) ->
-+ | Config.Error err as e -> List.iter (fun (k, e) ->
- match e with
- | "unknown key" -> eprintf "config: unknown key %s\n" k
- | _ -> eprintf "config: %s: %s\n" k e
- ) err;
-+ if strict then raise e
- | Sys_error m -> eprintf "error: config: %s\n" m;
- end;
- !pidfile
-@@ -358,6 +359,12 @@ let tweak_gc () =
- let () =
- Printexc.set_uncaught_exception_handler Logging.fallback_exception_handler;
- let cf = do_argv in
-+ if cf.config_test then begin
-+ let path = config_filename cf in
-+ let _pidfile:string = parse_config ~strict:true path in
-+ Printf.printf "Configuration valid at %s\n%!" path;
-+ exit 0
-+ end;
- let pidfile =
- if Sys.file_exists (config_filename cf) then
- parse_config (config_filename cf)
---
-2.40.0
-
diff --git a/0038-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch b/0038-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch
new file mode 100644
index 0000000..6f2cdcb
--- /dev/null
+++ b/0038-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch
@@ -0,0 +1,91 @@
+From 19ee1e1faa32b79274b3484cb1170a5970f1e602 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 15 Sep 2023 12:13:51 +0100
+Subject: [PATCH 38/55] x86/amd: Introduce is_zen{1,2}_uarch() predicates
+
+We already have 3 cases using STIBP as a Zen1/2 heuristic, and are about to
+introduce a 4th. Wrap the heuristic into a pair of predicates rather than
+opencoding it, and the explanation of the heuristic, at each usage site.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit de1d265001397f308c5c3c5d3ffc30e7ef8c0705)
+---
+ xen/arch/x86/cpu/amd.c | 18 ++++--------------
+ xen/arch/x86/include/asm/amd.h | 11 +++++++++++
+ 2 files changed, 15 insertions(+), 14 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
+index 1bb3044be1..e94ba5a0e0 100644
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -855,15 +855,13 @@ void amd_set_legacy_ssbd(bool enable)
+ * non-branch instructions to be ignored. It is to be set unilaterally in
+ * newer microcode.
+ *
+- * This chickenbit is something unrelated on Zen1, and Zen1 vs Zen2 isn't a
+- * simple model number comparison, so use STIBP as a heuristic to separate the
+- * two uarches in Fam17h(AMD)/18h(Hygon).
++ * This chickenbit is something unrelated on Zen1.
+ */
+ void amd_init_spectral_chicken(void)
+ {
+ uint64_t val, chickenbit = 1 << 1;
+
+- if (cpu_has_hypervisor || !boot_cpu_has(X86_FEATURE_AMD_STIBP))
++ if (cpu_has_hypervisor || !is_zen2_uarch())
+ return;
+
+ if (rdmsr_safe(MSR_AMD64_DE_CFG2, val) == 0 && !(val & chickenbit))
+@@ -912,11 +910,8 @@ void amd_check_zenbleed(void)
+ * With the Fam17h check above, most parts getting here are
+ * Zen1. They're not affected. Assume Zen2 ones making it
+ * here are affected regardless of microcode version.
+- *
+- * Zen1 vs Zen2 isn't a simple model number comparison, so use
+- * STIBP as a heuristic to distinguish.
+ */
+- if (!boot_cpu_has(X86_FEATURE_AMD_STIBP))
++ if (is_zen1_uarch())
+ return;
+ good_rev = ~0U;
+ break;
+@@ -1277,12 +1272,7 @@ static int __init cf_check zen2_c6_errata_check(void)
+ */
+ s_time_t delta;
+
+- /*
+- * Zen1 vs Zen2 isn't a simple model number comparison, so use STIBP as
+- * a heuristic to separate the two uarches in Fam17h.
+- */
+- if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 ||
+- !boot_cpu_has(X86_FEATURE_AMD_STIBP))
++ if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch())
+ return 0;
+
+ /*
+diff --git a/xen/arch/x86/include/asm/amd.h b/xen/arch/x86/include/asm/amd.h
+index a975d3de26..82324110ab 100644
+--- a/xen/arch/x86/include/asm/amd.h
++++ b/xen/arch/x86/include/asm/amd.h
+@@ -140,6 +140,17 @@
+ AMD_MODEL_RANGE(0x11, 0x0, 0x0, 0xff, 0xf), \
+ AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0xff, 0xf))
+
++/*
++ * The Zen1 and Zen2 microarchitectures are implemented by AMD (Fam17h) and
++ * Hygon (Fam18h) but without simple model number rules. Instead, use STIBP
++ * as a heuristic that distinguishes the two.
++ *
++ * The caller is required to perform the appropriate vendor/family checks
++ * first.
++ */
++#define is_zen1_uarch() (!boot_cpu_has(X86_FEATURE_AMD_STIBP))
++#define is_zen2_uarch() boot_cpu_has(X86_FEATURE_AMD_STIBP)
++
+ struct cpuinfo_x86;
+ int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...);
+
+--
+2.42.0
+
diff --git a/0039-tools-ocaml-libs-Don-t-declare-stubs-as-taking-void.patch b/0039-tools-ocaml-libs-Don-t-declare-stubs-as-taking-void.patch
deleted file mode 100644
index c967391..0000000
--- a/0039-tools-ocaml-libs-Don-t-declare-stubs-as-taking-void.patch
+++ /dev/null
@@ -1,61 +0,0 @@
-From 2c21e1bee6d62cbd523069e839086addf35da9f2 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com>
-Date: Thu, 12 Jan 2023 11:28:29 +0000
-Subject: [PATCH 39/89] tools/ocaml/libs: Don't declare stubs as taking void
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-There is no such thing as an Ocaml function (C stub or otherwise) taking no
-parameters. In the absence of any other parameters, unit is still passed.
-
-This doesn't explode with any ABI we care about, but would malfunction for an
-ABI environment such as stdcall.
-
-Fixes: c3afd398ba7f ("ocaml: Add XS bindings.")
-Fixes: 8b7ce06a2d34 ("ocaml: Add XC bindings.")
-Signed-off-by: Edwin Török <edwin.torok@cloud.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit ff8b560be80b9211c303d74df7e4b3921d2bb8ca)
----
- tools/ocaml/libs/xb/xenbus_stubs.c | 5 ++---
- tools/ocaml/libs/xc/xenctrl_stubs.c | 4 ++--
- 2 files changed, 4 insertions(+), 5 deletions(-)
-
-diff --git a/tools/ocaml/libs/xb/xenbus_stubs.c b/tools/ocaml/libs/xb/xenbus_stubs.c
-index 3065181a55..97116b0782 100644
---- a/tools/ocaml/libs/xb/xenbus_stubs.c
-+++ b/tools/ocaml/libs/xb/xenbus_stubs.c
-@@ -30,10 +30,9 @@
- #include <xenctrl.h>
- #include <xen/io/xs_wire.h>
-
--CAMLprim value stub_header_size(void)
-+CAMLprim value stub_header_size(value unit)
- {
-- CAMLparam0();
-- CAMLreturn(Val_int(sizeof(struct xsd_sockmsg)));
-+ return Val_int(sizeof(struct xsd_sockmsg));
- }
-
- CAMLprim value stub_header_of_string(value s)
-diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
-index f37848ae0b..6eb0ea69da 100644
---- a/tools/ocaml/libs/xc/xenctrl_stubs.c
-+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
-@@ -67,9 +67,9 @@ static void Noreturn failwith_xc(xc_interface *xch)
- caml_raise_with_string(*caml_named_value("xc.error"), error_str);
- }
-
--CAMLprim value stub_xc_interface_open(void)
-+CAMLprim value stub_xc_interface_open(value unit)
- {
-- CAMLparam0();
-+ CAMLparam1(unit);
- xc_interface *xch;
-
- /* Don't assert XC_OPENFLAG_NON_REENTRANT because these bindings
---
-2.40.0
-
diff --git a/0039-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch b/0039-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch
new file mode 100644
index 0000000..4b23d12
--- /dev/null
+++ b/0039-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch
@@ -0,0 +1,228 @@
+From 9ac2f49f5fa3a5159409241d4f74fb0d721dd4c5 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 30 Aug 2023 20:24:25 +0100
+Subject: [PATCH 39/55] x86/spec-ctrl: Mitigate the Zen1 DIV leakage
+
+In the Zen1 microarchitecure, there is one divider in the pipeline which
+services uops from both threads. In the case of #DE, the latched result from
+the previous DIV to execute will be forwarded speculatively.
+
+This is an interesting covert channel that allows two threads to communicate
+without any system calls. In also allows userspace to obtain the result of
+the most recent DIV instruction executed (even speculatively) in the core,
+which can be from a higher privilege context.
+
+Scrub the result from the divider by executing a non-faulting divide. This
+needs performing on the exit-to-guest paths, and ist_exit-to-Xen.
+
+Alternatives in IST context is believed safe now that it's done in NMI
+context.
+
+This is XSA-439 / CVE-2023-20588.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit b5926c6ecf05c28ee99c6248c42d691ccbf0c315)
+---
+ docs/misc/xen-command-line.pandoc | 6 ++-
+ xen/arch/x86/hvm/svm/entry.S | 1 +
+ xen/arch/x86/include/asm/cpufeatures.h | 2 +-
+ xen/arch/x86/include/asm/spec_ctrl_asm.h | 17 +++++++++
+ xen/arch/x86/spec_ctrl.c | 48 +++++++++++++++++++++++-
+ 5 files changed, 71 insertions(+), 3 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index d9dae740cc..b92c8f969c 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -2315,7 +2315,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
+ > {msr-sc,rsb,md-clear,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
+ > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
+ > eager-fpu,l1d-flush,branch-harden,srb-lock,
+-> unpriv-mmio,gds-mit}=<bool> ]`
++> unpriv-mmio,gds-mit,div-scrub}=<bool> ]`
+
+ Controls for speculative execution sidechannel mitigations. By default, Xen
+ will pick the most appropriate mitigations based on compiled in support,
+@@ -2437,6 +2437,10 @@ has elected not to lock the configuration, Xen will use GDS_CTRL to mitigate
+ GDS with. Otherwise, Xen will mitigate by disabling AVX, which blocks the use
+ of the AVX2 Gather instructions.
+
++On all hardware, the `div-scrub=` option can be used to force or prevent Xen
++from mitigating the DIV-leakage vulnerability. By default, Xen will mitigate
++DIV-leakage on hardware believed to be vulnerable.
++
+ ### sync_console
+ > `= <boolean>`
+
+diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
+index 981cd82e7c..934f12cf5c 100644
+--- a/xen/arch/x86/hvm/svm/entry.S
++++ b/xen/arch/x86/hvm/svm/entry.S
+@@ -74,6 +74,7 @@ __UNLIKELY_END(nsvm_hap)
+ 1: /* No Spectre v1 concerns. Execution will hit VMRUN imminently. */
+ .endm
+ ALTERNATIVE "", svm_vmentry_spec_ctrl, X86_FEATURE_SC_MSR_HVM
++ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
+
+ pop %r15
+ pop %r14
+diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h
+index da0593de85..c3aad21c3b 100644
+--- a/xen/arch/x86/include/asm/cpufeatures.h
++++ b/xen/arch/x86/include/asm/cpufeatures.h
+@@ -35,7 +35,7 @@ XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM
+ XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */
+ XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */
+ XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */
+-/* Bits 23 unused. */
++XEN_CPUFEATURE(SC_DIV, X86_SYNTH(23)) /* DIV scrub needed */
+ XEN_CPUFEATURE(SC_RSB_IDLE, X86_SYNTH(24)) /* RSB overwrite needed for idle. */
+ XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */
+ XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */
+diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+index 28a75796e6..f4b8b9d956 100644
+--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+@@ -177,6 +177,19 @@
+ .L\@_verw_skip:
+ .endm
+
++.macro DO_SPEC_CTRL_DIV
++/*
++ * Requires nothing
++ * Clobbers %rax
++ *
++ * Issue a DIV for its flushing side effect (Zen1 uarch specific). Any
++ * non-faulting DIV will do; a byte DIV has least latency, and doesn't clobber
++ * %rdx.
++ */
++ mov $1, %eax
++ div %al
++.endm
++
+ .macro DO_SPEC_CTRL_ENTRY maybexen:req
+ /*
+ * Requires %rsp=regs (also cpuinfo if !maybexen)
+@@ -279,6 +292,8 @@
+ ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
+
+ DO_SPEC_CTRL_COND_VERW
++
++ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
+ .endm
+
+ /*
+@@ -391,6 +406,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ verw STACK_CPUINFO_FIELD(verw_sel)(%r14)
+ .L\@_skip_verw:
+
++ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
++
+ .L\@_skip_ist_exit:
+ .endm
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 79b98f0fe7..0ff3c895ac 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -79,6 +79,7 @@ static int8_t __initdata opt_srb_lock = -1;
+ static bool __initdata opt_unpriv_mmio;
+ static bool __ro_after_init opt_fb_clear_mmio;
+ static int8_t __initdata opt_gds_mit = -1;
++static int8_t __initdata opt_div_scrub = -1;
+
+ static int __init cf_check parse_spec_ctrl(const char *s)
+ {
+@@ -133,6 +134,7 @@ static int __init cf_check parse_spec_ctrl(const char *s)
+ opt_srb_lock = 0;
+ opt_unpriv_mmio = false;
+ opt_gds_mit = 0;
++ opt_div_scrub = 0;
+ }
+ else if ( val > 0 )
+ rc = -EINVAL;
+@@ -285,6 +287,8 @@ static int __init cf_check parse_spec_ctrl(const char *s)
+ opt_unpriv_mmio = val;
+ else if ( (val = parse_boolean("gds-mit", s, ss)) >= 0 )
+ opt_gds_mit = val;
++ else if ( (val = parse_boolean("div-scrub", s, ss)) >= 0 )
++ opt_div_scrub = val;
+ else
+ rc = -EINVAL;
+
+@@ -485,7 +489,7 @@ static void __init print_details(enum ind_thunk thunk)
+ "\n");
+
+ /* Settings for Xen's protection, irrespective of guests. */
+- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s\n",
++ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
+ thunk == THUNK_NONE ? "N/A" :
+ thunk == THUNK_RETPOLINE ? "RETPOLINE" :
+ thunk == THUNK_LFENCE ? "LFENCE" :
+@@ -510,6 +514,7 @@ static void __init print_details(enum ind_thunk thunk)
+ opt_l1d_flush ? " L1D_FLUSH" : "",
+ opt_md_clear_pv || opt_md_clear_hvm ||
+ opt_fb_clear_mmio ? " VERW" : "",
++ opt_div_scrub ? " DIV" : "",
+ opt_branch_harden ? " BRANCH_HARDEN" : "");
+
+ /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
+@@ -967,6 +972,45 @@ static void __init srso_calculations(bool hw_smt_enabled)
+ setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
+ }
+
++/*
++ * The Div leakage issue is specific to the AMD Zen1 microarchitecure.
++ *
++ * However, there's no $FOO_NO bit defined, so if we're virtualised we have no
++ * hope of spotting the case where we might move to vulnerable hardware. We
++ * also can't make any useful conclusion about SMT-ness.
++ *
++ * Don't check the hypervisor bit, so at least we do the safe thing when
++ * booting on something that looks like a Zen1 CPU.
++ */
++static bool __init has_div_vuln(void)
++{
++ if ( !(boot_cpu_data.x86_vendor &
++ (X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
++ return false;
++
++ if ( boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18 )
++ return false;
++
++ return is_zen1_uarch();
++}
++
++static void __init div_calculations(bool hw_smt_enabled)
++{
++ bool cpu_bug_div = has_div_vuln();
++
++ if ( opt_div_scrub == -1 )
++ opt_div_scrub = cpu_bug_div;
++
++ if ( opt_div_scrub )
++ setup_force_cpu_cap(X86_FEATURE_SC_DIV);
++
++ if ( opt_smt == -1 && !cpu_has_hypervisor && cpu_bug_div && hw_smt_enabled )
++ warning_add(
++ "Booted on leaky-DIV hardware with SMT/Hyperthreading\n"
++ "enabled. Please assess your configuration and choose an\n"
++ "explicit 'smt=<bool>' setting. See XSA-439.\n");
++}
++
+ static void __init ibpb_calculations(void)
+ {
+ bool def_ibpb_entry = false;
+@@ -1726,6 +1770,8 @@ void __init init_speculation_mitigations(void)
+
+ ibpb_calculations();
+
++ div_calculations(hw_smt_enabled);
++
+ /* Check whether Eager FPU should be enabled by default. */
+ if ( opt_eager_fpu == -1 )
+ opt_eager_fpu = should_use_eager_fpu();
+--
+2.42.0
+
diff --git a/0040-tools-ocaml-libs-Allocate-the-correct-amount-of-memo.patch b/0040-tools-ocaml-libs-Allocate-the-correct-amount-of-memo.patch
deleted file mode 100644
index 5a26683..0000000
--- a/0040-tools-ocaml-libs-Allocate-the-correct-amount-of-memo.patch
+++ /dev/null
@@ -1,80 +0,0 @@
-From 5797b798a542a7e5be34698463152cb92f18776f Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 31 Jan 2023 10:59:42 +0000
-Subject: [PATCH 40/89] tools/ocaml/libs: Allocate the correct amount of memory
- for Abstract_tag
-
-caml_alloc() takes units of Wsize (word size), not bytes. As a consequence,
-we're allocating 4 or 8 times too much memory.
-
-Ocaml has a helper, Wsize_bsize(), but it truncates cases which aren't an
-exact multiple. Use a BUILD_BUG_ON() to cover the potential for truncation,
-as there's no rounding-up form of the helper.
-
-Fixes: 8b7ce06a2d34 ("ocaml: Add XC bindings.")
-Fixes: d3e649277a13 ("ocaml: add mmap bindings implementation.")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 36eb2de31b6ecb8787698fb1a701bd708c8971b2)
----
- tools/ocaml/libs/mmap/Makefile | 2 ++
- tools/ocaml/libs/mmap/xenmmap_stubs.c | 6 +++++-
- tools/ocaml/libs/xc/xenctrl_stubs.c | 5 ++++-
- 3 files changed, 11 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/libs/mmap/Makefile b/tools/ocaml/libs/mmap/Makefile
-index a621537135..855b8b2c98 100644
---- a/tools/ocaml/libs/mmap/Makefile
-+++ b/tools/ocaml/libs/mmap/Makefile
-@@ -2,6 +2,8 @@ OCAML_TOPLEVEL=$(CURDIR)/../..
- XEN_ROOT=$(OCAML_TOPLEVEL)/../..
- include $(OCAML_TOPLEVEL)/common.make
-
-+CFLAGS += $(CFLAGS_xeninclude)
-+
- OBJS = xenmmap
- INTF = $(foreach obj, $(OBJS),$(obj).cmi)
- LIBS = xenmmap.cma xenmmap.cmxa
-diff --git a/tools/ocaml/libs/mmap/xenmmap_stubs.c b/tools/ocaml/libs/mmap/xenmmap_stubs.c
-index e03951d781..d623ad390e 100644
---- a/tools/ocaml/libs/mmap/xenmmap_stubs.c
-+++ b/tools/ocaml/libs/mmap/xenmmap_stubs.c
-@@ -21,6 +21,8 @@
- #include <errno.h>
- #include "mmap_stubs.h"
-
-+#include <xen-tools/libs.h>
-+
- #include <caml/mlvalues.h>
- #include <caml/memory.h>
- #include <caml/alloc.h>
-@@ -59,7 +61,9 @@ CAMLprim value stub_mmap_init(value fd, value pflag, value mflag,
- default: caml_invalid_argument("maptype");
- }
-
-- result = caml_alloc(sizeof(struct mmap_interface), Abstract_tag);
-+ BUILD_BUG_ON((sizeof(struct mmap_interface) % sizeof(value)) != 0);
-+ result = caml_alloc(Wsize_bsize(sizeof(struct mmap_interface)),
-+ Abstract_tag);
-
- if (mmap_interface_init(Intf_val(result), Int_val(fd),
- c_pflag, c_mflag,
-diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
-index 6eb0ea69da..e25367531b 100644
---- a/tools/ocaml/libs/xc/xenctrl_stubs.c
-+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
-@@ -956,7 +956,10 @@ CAMLprim value stub_map_foreign_range(value xch, value dom,
- uint32_t c_dom;
- unsigned long c_mfn;
-
-- result = caml_alloc(sizeof(struct mmap_interface), Abstract_tag);
-+ BUILD_BUG_ON((sizeof(struct mmap_interface) % sizeof(value)) != 0);
-+ result = caml_alloc(Wsize_bsize(sizeof(struct mmap_interface)),
-+ Abstract_tag);
-+
- intf = (struct mmap_interface *) result;
-
- intf->len = Int_val(size);
---
-2.40.0
-
diff --git a/0040-x86-shadow-defer-releasing-of-PV-s-top-level-shadow-.patch b/0040-x86-shadow-defer-releasing-of-PV-s-top-level-shadow-.patch
new file mode 100644
index 0000000..21fb16f
--- /dev/null
+++ b/0040-x86-shadow-defer-releasing-of-PV-s-top-level-shadow-.patch
@@ -0,0 +1,455 @@
+From 90c540c58985dc774cf0a1d2dc423473d3f37267 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <JBeulich@suse.com>
+Date: Wed, 20 Sep 2023 10:33:26 +0100
+Subject: [PATCH 40/55] x86/shadow: defer releasing of PV's top-level shadow
+ reference
+
+sh_set_toplevel_shadow() re-pinning the top-level shadow we may be
+running on is not enough (and at the same time unnecessary when the
+shadow isn't what we're running on): That shadow becomes eligible for
+blowing away (from e.g. shadow_prealloc()) immediately after the
+paging lock was dropped. Yet it needs to remain valid until the actual
+page table switch occurred.
+
+Propagate up the call chain the shadow entry that needs releasing
+eventually, and carry out the release immediately after switching page
+tables. Handle update_cr3() failures by switching to idle pagetables.
+Note that various further uses of update_cr3() are HVM-only or only act
+on paused vCPU-s, in which case sh_set_toplevel_shadow() will not defer
+releasing of the reference.
+
+While changing the update_cr3() hook, also convert the "do_locking"
+parameter to boolean.
+
+This is CVE-2023-34322 / XSA-438.
+
+Reported-by: Tim Deegan <tim@xen.org>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: George Dunlap <george.dunlap@cloud.com>
+(cherry picked from commit fb0ff49fe9f784bfee0370c2a3c5f20e39d7a1cb)
+---
+ xen/arch/x86/include/asm/mm.h | 2 +-
+ xen/arch/x86/include/asm/paging.h | 6 ++--
+ xen/arch/x86/include/asm/shadow.h | 8 +++++
+ xen/arch/x86/mm.c | 27 +++++++++++----
+ xen/arch/x86/mm/hap/hap.c | 6 ++--
+ xen/arch/x86/mm/shadow/common.c | 55 ++++++++++++++++++++-----------
+ xen/arch/x86/mm/shadow/multi.c | 33 ++++++++++++-------
+ xen/arch/x86/mm/shadow/none.c | 4 ++-
+ xen/arch/x86/mm/shadow/private.h | 14 ++++----
+ xen/arch/x86/pv/domain.c | 25 ++++++++++++--
+ 10 files changed, 127 insertions(+), 53 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
+index d723c7c38f..a5d7fdd32e 100644
+--- a/xen/arch/x86/include/asm/mm.h
++++ b/xen/arch/x86/include/asm/mm.h
+@@ -552,7 +552,7 @@ void audit_domains(void);
+ #endif
+
+ void make_cr3(struct vcpu *v, mfn_t mfn);
+-void update_cr3(struct vcpu *v);
++pagetable_t update_cr3(struct vcpu *v);
+ int vcpu_destroy_pagetables(struct vcpu *);
+ void *do_page_walk(struct vcpu *v, unsigned long addr);
+
+diff --git a/xen/arch/x86/include/asm/paging.h b/xen/arch/x86/include/asm/paging.h
+index 6f7000d5f4..94c590f31a 100644
+--- a/xen/arch/x86/include/asm/paging.h
++++ b/xen/arch/x86/include/asm/paging.h
+@@ -138,7 +138,7 @@ struct paging_mode {
+ paddr_t ga, uint32_t *pfec,
+ unsigned int *page_order);
+ #endif
+- void (*update_cr3 )(struct vcpu *v, int do_locking,
++ pagetable_t (*update_cr3 )(struct vcpu *v, bool do_locking,
+ bool noflush);
+ void (*update_paging_modes )(struct vcpu *v);
+ bool (*flush_tlb )(const unsigned long *vcpu_bitmap);
+@@ -310,9 +310,9 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
+ /* Update all the things that are derived from the guest's CR3.
+ * Called when the guest changes CR3; the caller can then use v->arch.cr3
+ * as the value to load into the host CR3 to schedule this vcpu */
+-static inline void paging_update_cr3(struct vcpu *v, bool noflush)
++static inline pagetable_t paging_update_cr3(struct vcpu *v, bool noflush)
+ {
+- paging_get_hostmode(v)->update_cr3(v, 1, noflush);
++ return paging_get_hostmode(v)->update_cr3(v, 1, noflush);
+ }
+
+ /* Update all the things that are derived from the guest's CR0/CR3/CR4.
+diff --git a/xen/arch/x86/include/asm/shadow.h b/xen/arch/x86/include/asm/shadow.h
+index dad876d294..0b72c9eda8 100644
+--- a/xen/arch/x86/include/asm/shadow.h
++++ b/xen/arch/x86/include/asm/shadow.h
+@@ -99,6 +99,9 @@ int shadow_set_allocation(struct domain *d, unsigned int pages,
+
+ int shadow_get_allocation_bytes(struct domain *d, uint64_t *size);
+
++/* Helper to invoke for deferred releasing of a top-level shadow's reference. */
++void shadow_put_top_level(struct domain *d, pagetable_t old);
++
+ #else /* !CONFIG_SHADOW_PAGING */
+
+ #define shadow_vcpu_teardown(v) ASSERT(is_pv_vcpu(v))
+@@ -121,6 +124,11 @@ static inline void shadow_prepare_page_type_change(struct domain *d,
+
+ static inline void shadow_blow_tables_per_domain(struct domain *d) {}
+
++static inline void shadow_put_top_level(struct domain *d, pagetable_t old)
++{
++ ASSERT_UNREACHABLE();
++}
++
+ static inline int shadow_domctl(struct domain *d,
+ struct xen_domctl_shadow_op *sc,
+ XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index b46eee1332..e884a6fdbd 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -567,15 +567,12 @@ void write_ptbase(struct vcpu *v)
+ *
+ * Update ref counts to shadow tables appropriately.
+ */
+-void update_cr3(struct vcpu *v)
++pagetable_t update_cr3(struct vcpu *v)
+ {
+ mfn_t cr3_mfn;
+
+ if ( paging_mode_enabled(v->domain) )
+- {
+- paging_update_cr3(v, false);
+- return;
+- }
++ return paging_update_cr3(v, false);
+
+ if ( !(v->arch.flags & TF_kernel_mode) )
+ cr3_mfn = pagetable_get_mfn(v->arch.guest_table_user);
+@@ -583,6 +580,8 @@ void update_cr3(struct vcpu *v)
+ cr3_mfn = pagetable_get_mfn(v->arch.guest_table);
+
+ make_cr3(v, cr3_mfn);
++
++ return pagetable_null();
+ }
+
+ static inline void set_tlbflush_timestamp(struct page_info *page)
+@@ -3285,6 +3284,7 @@ int new_guest_cr3(mfn_t mfn)
+ struct domain *d = curr->domain;
+ int rc;
+ mfn_t old_base_mfn;
++ pagetable_t old_shadow;
+
+ if ( is_pv_32bit_domain(d) )
+ {
+@@ -3352,9 +3352,22 @@ int new_guest_cr3(mfn_t mfn)
+ if ( !VM_ASSIST(d, m2p_strict) )
+ fill_ro_mpt(mfn);
+ curr->arch.guest_table = pagetable_from_mfn(mfn);
+- update_cr3(curr);
++ old_shadow = update_cr3(curr);
++
++ /*
++ * In shadow mode update_cr3() can fail, in which case here we're still
++ * running on the prior top-level shadow (which we're about to release).
++ * Switch to the idle page tables in such an event; the guest will have
++ * been crashed already.
++ */
++ if ( likely(!mfn_eq(pagetable_get_mfn(old_shadow),
++ maddr_to_mfn(curr->arch.cr3 & ~X86_CR3_NOFLUSH))) )
++ write_ptbase(curr);
++ else
++ write_ptbase(idle_vcpu[curr->processor]);
+
+- write_ptbase(curr);
++ if ( !pagetable_is_null(old_shadow) )
++ shadow_put_top_level(d, old_shadow);
+
+ if ( likely(mfn_x(old_base_mfn) != 0) )
+ {
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index 0fc1b1d9ac..57a19c3d59 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -739,11 +739,13 @@ static bool cf_check hap_invlpg(struct vcpu *v, unsigned long linear)
+ return 1;
+ }
+
+-static void cf_check hap_update_cr3(
+- struct vcpu *v, int do_locking, bool noflush)
++static pagetable_t cf_check hap_update_cr3(
++ struct vcpu *v, bool do_locking, bool noflush)
+ {
+ v->arch.hvm.hw_cr[3] = v->arch.hvm.guest_cr[3];
+ hvm_update_guest_cr3(v, noflush);
++
++ return pagetable_null();
+ }
+
+ static bool flush_vcpu(const struct vcpu *v, const unsigned long *vcpu_bitmap)
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index cf5e181f74..c0940f939e 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2590,13 +2590,13 @@ void cf_check shadow_update_paging_modes(struct vcpu *v)
+ }
+
+ /* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
+-void sh_set_toplevel_shadow(struct vcpu *v,
+- unsigned int slot,
+- mfn_t gmfn,
+- unsigned int root_type,
+- mfn_t (*make_shadow)(struct vcpu *v,
+- mfn_t gmfn,
+- uint32_t shadow_type))
++pagetable_t sh_set_toplevel_shadow(struct vcpu *v,
++ unsigned int slot,
++ mfn_t gmfn,
++ unsigned int root_type,
++ mfn_t (*make_shadow)(struct vcpu *v,
++ mfn_t gmfn,
++ uint32_t shadow_type))
+ {
+ mfn_t smfn;
+ pagetable_t old_entry, new_entry;
+@@ -2653,20 +2653,37 @@ void sh_set_toplevel_shadow(struct vcpu *v,
+ mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry)));
+ v->arch.paging.shadow.shadow_table[slot] = new_entry;
+
+- /* Decrement the refcount of the old contents of this slot */
+- if ( !pagetable_is_null(old_entry) )
++ /*
++ * Decrement the refcount of the old contents of this slot, unless
++ * we're still running on that shadow - in that case it'll need holding
++ * on to until the actual page table switch did occur.
++ */
++ if ( !pagetable_is_null(old_entry) && (v != current || !is_pv_domain(d)) )
+ {
+- mfn_t old_smfn = pagetable_get_mfn(old_entry);
+- /* Need to repin the old toplevel shadow if it's been unpinned
+- * by shadow_prealloc(): in PV mode we're still running on this
+- * shadow and it's not safe to free it yet. */
+- if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(d, old_smfn) )
+- {
+- printk(XENLOG_G_ERR "can't re-pin %"PRI_mfn"\n", mfn_x(old_smfn));
+- domain_crash(d);
+- }
+- sh_put_ref(d, old_smfn, 0);
++ sh_put_ref(d, pagetable_get_mfn(old_entry), 0);
++ old_entry = pagetable_null();
+ }
++
++ /*
++ * 2- and 3-level shadow mode is used for HVM only. Therefore we never run
++ * on such a shadow, so only call sites requesting an L4 shadow need to pay
++ * attention to the returned value.
++ */
++ ASSERT(pagetable_is_null(old_entry) || root_type == SH_type_l4_64_shadow);
++
++ return old_entry;
++}
++
++/*
++ * Helper invoked when releasing of a top-level shadow's reference was
++ * deferred in sh_set_toplevel_shadow() above.
++ */
++void shadow_put_top_level(struct domain *d, pagetable_t old_entry)
++{
++ ASSERT(!pagetable_is_null(old_entry));
++ paging_lock(d);
++ sh_put_ref(d, pagetable_get_mfn(old_entry), 0);
++ paging_unlock(d);
+ }
+
+ /**************************************************************************/
+diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
+index 671bf8c228..c92b354a78 100644
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -3224,7 +3224,8 @@ static void cf_check sh_detach_old_tables(struct vcpu *v)
+ }
+ }
+
+-static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
++static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking,
++ bool noflush)
+ /* Updates vcpu->arch.cr3 after the guest has changed CR3.
+ * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
+ * if appropriate).
+@@ -3238,6 +3239,7 @@ static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+ {
+ struct domain *d = v->domain;
+ mfn_t gmfn;
++ pagetable_t old_entry = pagetable_null();
+ #if GUEST_PAGING_LEVELS == 3
+ const guest_l3e_t *gl3e;
+ unsigned int i, guest_idx;
+@@ -3247,7 +3249,7 @@ static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+ if ( !is_hvm_domain(d) && !v->is_initialised )
+ {
+ ASSERT(v->arch.cr3 == 0);
+- return;
++ return old_entry;
+ }
+
+ if ( do_locking ) paging_lock(v->domain);
+@@ -3320,11 +3322,12 @@ static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+ #if GUEST_PAGING_LEVELS == 4
+ if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 )
+ guest_flush_tlb_mask(d, d->dirty_cpumask);
+- sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow, sh_make_shadow);
++ old_entry = sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow,
++ sh_make_shadow);
+ if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
+ {
+ ASSERT(d->is_dying || d->is_shutting_down);
+- return;
++ return old_entry;
+ }
+ if ( !shadow_mode_external(d) && !is_pv_32bit_domain(d) )
+ {
+@@ -3368,24 +3371,30 @@ static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+ gl2gfn = guest_l3e_get_gfn(gl3e[i]);
+ gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt);
+ if ( p2m_is_ram(p2mt) )
+- sh_set_toplevel_shadow(v, i, gl2mfn, SH_type_l2_shadow,
+- sh_make_shadow);
++ old_entry = sh_set_toplevel_shadow(v, i, gl2mfn,
++ SH_type_l2_shadow,
++ sh_make_shadow);
+ else
+- sh_set_toplevel_shadow(v, i, INVALID_MFN, 0,
+- sh_make_shadow);
++ old_entry = sh_set_toplevel_shadow(v, i, INVALID_MFN, 0,
++ sh_make_shadow);
+ }
+ else
+- sh_set_toplevel_shadow(v, i, INVALID_MFN, 0, sh_make_shadow);
++ old_entry = sh_set_toplevel_shadow(v, i, INVALID_MFN, 0,
++ sh_make_shadow);
++
++ ASSERT(pagetable_is_null(old_entry));
+ }
+ }
+ #elif GUEST_PAGING_LEVELS == 2
+ if ( sh_remove_write_access(d, gmfn, 2, 0) != 0 )
+ guest_flush_tlb_mask(d, d->dirty_cpumask);
+- sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow, sh_make_shadow);
++ old_entry = sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow,
++ sh_make_shadow);
++ ASSERT(pagetable_is_null(old_entry));
+ if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
+ {
+ ASSERT(d->is_dying || d->is_shutting_down);
+- return;
++ return old_entry;
+ }
+ #else
+ #error This should never happen
+@@ -3473,6 +3482,8 @@ static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+
+ /* Release the lock, if we took it (otherwise it's the caller's problem) */
+ if ( do_locking ) paging_unlock(v->domain);
++
++ return old_entry;
+ }
+
+
+diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c
+index eaaa874b11..743c0ffb85 100644
+--- a/xen/arch/x86/mm/shadow/none.c
++++ b/xen/arch/x86/mm/shadow/none.c
+@@ -52,9 +52,11 @@ static unsigned long cf_check _gva_to_gfn(
+ }
+ #endif
+
+-static void cf_check _update_cr3(struct vcpu *v, int do_locking, bool noflush)
++static pagetable_t cf_check _update_cr3(struct vcpu *v, bool do_locking,
++ bool noflush)
+ {
+ ASSERT_UNREACHABLE();
++ return pagetable_null();
+ }
+
+ static void cf_check _update_paging_modes(struct vcpu *v)
+diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
+index c2bb1ed3c3..91f798c5aa 100644
+--- a/xen/arch/x86/mm/shadow/private.h
++++ b/xen/arch/x86/mm/shadow/private.h
+@@ -391,13 +391,13 @@ mfn_t shadow_alloc(struct domain *d,
+ void shadow_free(struct domain *d, mfn_t smfn);
+
+ /* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
+-void sh_set_toplevel_shadow(struct vcpu *v,
+- unsigned int slot,
+- mfn_t gmfn,
+- unsigned int root_type,
+- mfn_t (*make_shadow)(struct vcpu *v,
+- mfn_t gmfn,
+- uint32_t shadow_type));
++pagetable_t sh_set_toplevel_shadow(struct vcpu *v,
++ unsigned int slot,
++ mfn_t gmfn,
++ unsigned int root_type,
++ mfn_t (*make_shadow)(struct vcpu *v,
++ mfn_t gmfn,
++ uint32_t shadow_type));
+
+ /* Update the shadows in response to a pagetable write from Xen */
+ int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size);
+diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
+index 5c92812dc6..2a445bb17b 100644
+--- a/xen/arch/x86/pv/domain.c
++++ b/xen/arch/x86/pv/domain.c
+@@ -424,10 +424,13 @@ bool __init xpti_pcid_enabled(void)
+
+ static void _toggle_guest_pt(struct vcpu *v)
+ {
++ bool guest_update;
++ pagetable_t old_shadow;
+ unsigned long cr3;
+
+ v->arch.flags ^= TF_kernel_mode;
+- update_cr3(v);
++ guest_update = v->arch.flags & TF_kernel_mode;
++ old_shadow = update_cr3(v);
+
+ /*
+ * Don't flush user global mappings from the TLB. Don't tick TLB clock.
+@@ -436,13 +439,31 @@ static void _toggle_guest_pt(struct vcpu *v)
+ * TLB flush (for just the incoming PCID), as the top level page table may
+ * have changed behind our backs. To be on the safe side, suppress the
+ * no-flush unconditionally in this case.
++ *
++ * Furthermore in shadow mode update_cr3() can fail, in which case here
++ * we're still running on the prior top-level shadow (which we're about
++ * to release). Switch to the idle page tables in such an event; the
++ * guest will have been crashed already.
+ */
+ cr3 = v->arch.cr3;
+ if ( shadow_mode_enabled(v->domain) )
++ {
+ cr3 &= ~X86_CR3_NOFLUSH;
++
++ if ( unlikely(mfn_eq(pagetable_get_mfn(old_shadow),
++ maddr_to_mfn(cr3))) )
++ {
++ cr3 = idle_vcpu[v->processor]->arch.cr3;
++ /* Also suppress runstate/time area updates below. */
++ guest_update = false;
++ }
++ }
+ write_cr3(cr3);
+
+- if ( !(v->arch.flags & TF_kernel_mode) )
++ if ( !pagetable_is_null(old_shadow) )
++ shadow_put_top_level(v->domain, old_shadow);
++
++ if ( !guest_update )
+ return;
+
+ if ( v->arch.pv.need_update_runstate_area && update_runstate_area(v) )
+--
+2.42.0
+
diff --git a/0041-tools-ocaml-evtchn-Don-t-reference-Custom-objects-wi.patch b/0041-tools-ocaml-evtchn-Don-t-reference-Custom-objects-wi.patch
deleted file mode 100644
index cabcdd0..0000000
--- a/0041-tools-ocaml-evtchn-Don-t-reference-Custom-objects-wi.patch
+++ /dev/null
@@ -1,213 +0,0 @@
-From 021b82cc0c71ba592439f175c1ededa800b172a9 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com>
-Date: Thu, 12 Jan 2023 17:48:29 +0000
-Subject: [PATCH 41/89] tools/ocaml/evtchn: Don't reference Custom objects with
- the GC lock released
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The modification to the _H() macro for Ocaml 5 support introduced a subtle
-bug. From the manual:
-
- https://ocaml.org/manual/intfc.html#ss:parallel-execution-long-running-c-code
-
-"After caml_release_runtime_system() was called and until
-caml_acquire_runtime_system() is called, the C code must not access any OCaml
-data, nor call any function of the run-time system, nor call back into OCaml
-code."
-
-Previously, the value was a naked C pointer, so dereferencing it wasn't
-"accessing any Ocaml data", but the fix to avoid naked C pointers added a
-layer of indirection through an Ocaml Custom object, meaning that the common
-pattern of using _H() in a blocking section is unsafe.
-
-In order to fix:
-
- * Drop the _H() macro and replace it with a static inline xce_of_val().
- * Opencode the assignment into Data_custom_val() in the two constructors.
- * Rename "value xce" parameters to "value xce_val" so we can consistently
- have "xenevtchn_handle *xce" on the stack, and obtain the pointer with the
- GC lock still held.
-
-Fixes: 22d5affdf0ce ("tools/ocaml/evtchn: OCaml 5 support, fix potential resource leak")
-Signed-off-by: Edwin Török <edwin.torok@cloud.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 2636d8ff7a670c4d2485757dbe966e36c259a960)
----
- tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 60 +++++++++++--------
- 1 file changed, 35 insertions(+), 25 deletions(-)
-
-diff --git a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-index aa8a69cc1e..d7881ca95f 100644
---- a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-+++ b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-@@ -33,11 +33,14 @@
- #include <caml/fail.h>
- #include <caml/signals.h>
-
--#define _H(__h) (*((xenevtchn_handle **)Data_custom_val(__h)))
-+static inline xenevtchn_handle *xce_of_val(value v)
-+{
-+ return *(xenevtchn_handle **)Data_custom_val(v);
-+}
-
- static void stub_evtchn_finalize(value v)
- {
-- xenevtchn_close(_H(v));
-+ xenevtchn_close(xce_of_val(v));
- }
-
- static struct custom_operations xenevtchn_ops = {
-@@ -68,7 +71,7 @@ CAMLprim value stub_eventchn_init(value cloexec)
- caml_failwith("open failed");
-
- result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
-- _H(result) = xce;
-+ *(xenevtchn_handle **)Data_custom_val(result) = xce;
-
- CAMLreturn(result);
- }
-@@ -87,18 +90,19 @@ CAMLprim value stub_eventchn_fdopen(value fdval)
- caml_failwith("evtchn fdopen failed");
-
- result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
-- _H(result) = xce;
-+ *(xenevtchn_handle **)Data_custom_val(result) = xce;
-
- CAMLreturn(result);
- }
-
--CAMLprim value stub_eventchn_fd(value xce)
-+CAMLprim value stub_eventchn_fd(value xce_val)
- {
-- CAMLparam1(xce);
-+ CAMLparam1(xce_val);
- CAMLlocal1(result);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- int fd;
-
-- fd = xenevtchn_fd(_H(xce));
-+ fd = xenevtchn_fd(xce);
- if (fd == -1)
- caml_failwith("evtchn fd failed");
-
-@@ -107,13 +111,14 @@ CAMLprim value stub_eventchn_fd(value xce)
- CAMLreturn(result);
- }
-
--CAMLprim value stub_eventchn_notify(value xce, value port)
-+CAMLprim value stub_eventchn_notify(value xce_val, value port)
- {
-- CAMLparam2(xce, port);
-+ CAMLparam2(xce_val, port);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- int rc;
-
- caml_enter_blocking_section();
-- rc = xenevtchn_notify(_H(xce), Int_val(port));
-+ rc = xenevtchn_notify(xce, Int_val(port));
- caml_leave_blocking_section();
-
- if (rc == -1)
-@@ -122,15 +127,16 @@ CAMLprim value stub_eventchn_notify(value xce, value port)
- CAMLreturn(Val_unit);
- }
-
--CAMLprim value stub_eventchn_bind_interdomain(value xce, value domid,
-+CAMLprim value stub_eventchn_bind_interdomain(value xce_val, value domid,
- value remote_port)
- {
-- CAMLparam3(xce, domid, remote_port);
-+ CAMLparam3(xce_val, domid, remote_port);
- CAMLlocal1(port);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- xenevtchn_port_or_error_t rc;
-
- caml_enter_blocking_section();
-- rc = xenevtchn_bind_interdomain(_H(xce), Int_val(domid), Int_val(remote_port));
-+ rc = xenevtchn_bind_interdomain(xce, Int_val(domid), Int_val(remote_port));
- caml_leave_blocking_section();
-
- if (rc == -1)
-@@ -140,14 +146,15 @@ CAMLprim value stub_eventchn_bind_interdomain(value xce, value domid,
- CAMLreturn(port);
- }
-
--CAMLprim value stub_eventchn_bind_virq(value xce, value virq_type)
-+CAMLprim value stub_eventchn_bind_virq(value xce_val, value virq_type)
- {
-- CAMLparam2(xce, virq_type);
-+ CAMLparam2(xce_val, virq_type);
- CAMLlocal1(port);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- xenevtchn_port_or_error_t rc;
-
- caml_enter_blocking_section();
-- rc = xenevtchn_bind_virq(_H(xce), Int_val(virq_type));
-+ rc = xenevtchn_bind_virq(xce, Int_val(virq_type));
- caml_leave_blocking_section();
-
- if (rc == -1)
-@@ -157,13 +164,14 @@ CAMLprim value stub_eventchn_bind_virq(value xce, value virq_type)
- CAMLreturn(port);
- }
-
--CAMLprim value stub_eventchn_unbind(value xce, value port)
-+CAMLprim value stub_eventchn_unbind(value xce_val, value port)
- {
-- CAMLparam2(xce, port);
-+ CAMLparam2(xce_val, port);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- int rc;
-
- caml_enter_blocking_section();
-- rc = xenevtchn_unbind(_H(xce), Int_val(port));
-+ rc = xenevtchn_unbind(xce, Int_val(port));
- caml_leave_blocking_section();
-
- if (rc == -1)
-@@ -172,14 +180,15 @@ CAMLprim value stub_eventchn_unbind(value xce, value port)
- CAMLreturn(Val_unit);
- }
-
--CAMLprim value stub_eventchn_pending(value xce)
-+CAMLprim value stub_eventchn_pending(value xce_val)
- {
-- CAMLparam1(xce);
-+ CAMLparam1(xce_val);
- CAMLlocal1(result);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- xenevtchn_port_or_error_t port;
-
- caml_enter_blocking_section();
-- port = xenevtchn_pending(_H(xce));
-+ port = xenevtchn_pending(xce);
- caml_leave_blocking_section();
-
- if (port == -1)
-@@ -189,16 +198,17 @@ CAMLprim value stub_eventchn_pending(value xce)
- CAMLreturn(result);
- }
-
--CAMLprim value stub_eventchn_unmask(value xce, value _port)
-+CAMLprim value stub_eventchn_unmask(value xce_val, value _port)
- {
-- CAMLparam2(xce, _port);
-+ CAMLparam2(xce_val, _port);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- evtchn_port_t port;
- int rc;
-
- port = Int_val(_port);
-
- caml_enter_blocking_section();
-- rc = xenevtchn_unmask(_H(xce), port);
-+ rc = xenevtchn_unmask(xce, port);
- caml_leave_blocking_section();
-
- if (rc)
---
-2.40.0
-
diff --git a/0041-tools-xenstored-domain_entry_fix-Handle-conflicting-.patch b/0041-tools-xenstored-domain_entry_fix-Handle-conflicting-.patch
new file mode 100644
index 0000000..1edecc8
--- /dev/null
+++ b/0041-tools-xenstored-domain_entry_fix-Handle-conflicting-.patch
@@ -0,0 +1,64 @@
+From c4e05c97f57d236040d1da5c1fbf6e3699dc86ea Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Fri, 22 Sep 2023 11:32:16 +0100
+Subject: [PATCH 41/55] tools/xenstored: domain_entry_fix(): Handle conflicting
+ transaction
+
+The function domain_entry_fix() will be initially called to check if the
+quota is correct before attempt to commit any nodes. So it would be
+possible that accounting is temporarily negative. This is the case
+in the following sequence:
+
+ 1) Create 50 nodes
+ 2) Start two transactions
+ 3) Delete all the nodes in each transaction
+ 4) Commit the two transactions
+
+Because the first transaction will have succeed and updated the
+accounting, there is no guarantee that 'd->nbentry + num' will still
+be above 0. So the assert() would be triggered.
+The assert() was introduced in dbef1f748289 ("tools/xenstore: simplify
+and fix per domain node accounting") with the assumption that the
+value can't be negative. As this is not true revert to the original
+check but restricted to the path where we don't update. Take the
+opportunity to explain the rationale behind the check.
+
+This CVE-2023-34323 / XSA-440.
+
+Fixes: dbef1f748289 ("tools/xenstore: simplify and fix per domain node accounting")
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+---
+ tools/xenstore/xenstored_domain.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index aa86892fed..6074df210c 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -1094,10 +1094,20 @@ int domain_entry_fix(unsigned int domid, int num, bool update)
+ }
+
+ cnt = d->nbentry + num;
+- assert(cnt >= 0);
+
+- if (update)
++ if (update) {
++ assert(cnt >= 0);
+ d->nbentry = cnt;
++ } else if (cnt < 0) {
++ /*
++ * In a transaction when a node is being added/removed AND
++ * the same node has been added/removed outside the
++ * transaction in parallel, the result value may be negative.
++ * This is no problem, as the transaction will fail due to
++ * the resulting conflict. So override 'cnt'.
++ */
++ cnt = 0;
++ }
+
+ return domid_is_unprivileged(domid) ? cnt : 0;
+ }
+--
+2.42.0
+
diff --git a/0042-iommu-amd-vi-flush-IOMMU-TLB-when-flushing-the-DTE.patch b/0042-iommu-amd-vi-flush-IOMMU-TLB-when-flushing-the-DTE.patch
new file mode 100644
index 0000000..66597c2
--- /dev/null
+++ b/0042-iommu-amd-vi-flush-IOMMU-TLB-when-flushing-the-DTE.patch
@@ -0,0 +1,186 @@
+From 0d8f9f7f2706e8ad8dfff203173693b631339b86 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau@citrix.com>
+Date: Tue, 13 Jun 2023 15:01:05 +0200
+Subject: [PATCH 42/55] iommu/amd-vi: flush IOMMU TLB when flushing the DTE
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The caching invalidation guidelines from the AMD-Vi specification (48882—Rev
+3.07-PUB—Oct 2022) seem to be misleading on some hardware, as devices will
+malfunction (see stale DMA mappings) if some fields of the DTE are updated but
+the IOMMU TLB is not flushed. This has been observed in practice on AMD
+systems. Due to the lack of guidance from the currently published
+specification this patch aims to increase the flushing done in order to prevent
+device malfunction.
+
+In order to fix, issue an INVALIDATE_IOMMU_PAGES command from
+amd_iommu_flush_device(), flushing all the address space. Note this requires
+callers to be adjusted in order to pass the DomID on the DTE previous to the
+modification.
+
+Some call sites don't provide a valid DomID to amd_iommu_flush_device() in
+order to avoid the flush. That's because the device had address translations
+disabled and hence the previous DomID on the DTE is not valid. Note the
+current logic relies on the entity disabling address translations to also flush
+the TLB of the in use DomID.
+
+Device I/O TLB flushing when ATS are enabled is not covered by the current
+change, as ATS usage is not security supported.
+
+This is XSA-442 / CVE-2023-34326
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 5fc98b97084a46884acef9320e643faf40d42212)
+---
+ xen/drivers/passthrough/amd/iommu.h | 3 ++-
+ xen/drivers/passthrough/amd/iommu_cmd.c | 10 +++++++++-
+ xen/drivers/passthrough/amd/iommu_guest.c | 5 +++--
+ xen/drivers/passthrough/amd/iommu_init.c | 6 +++++-
+ xen/drivers/passthrough/amd/pci_amd_iommu.c | 14 ++++++++++----
+ 5 files changed, 29 insertions(+), 9 deletions(-)
+
+diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h
+index 5429ada58e..a58be28bf9 100644
+--- a/xen/drivers/passthrough/amd/iommu.h
++++ b/xen/drivers/passthrough/amd/iommu.h
+@@ -283,7 +283,8 @@ void amd_iommu_flush_pages(struct domain *d, unsigned long dfn,
+ unsigned int order);
+ void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev,
+ uint64_t gaddr, unsigned int order);
+-void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf);
++void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf,
++ domid_t domid);
+ void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf);
+ void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
+
+diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c
+index 40ddf366bb..cb28b36abc 100644
+--- a/xen/drivers/passthrough/amd/iommu_cmd.c
++++ b/xen/drivers/passthrough/amd/iommu_cmd.c
+@@ -363,10 +363,18 @@ void amd_iommu_flush_pages(struct domain *d,
+ _amd_iommu_flush_pages(d, __dfn_to_daddr(dfn), order);
+ }
+
+-void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf)
++void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf,
++ domid_t domid)
+ {
+ invalidate_dev_table_entry(iommu, bdf);
+ flush_command_buffer(iommu, 0);
++
++ /* Also invalidate IOMMU TLB entries when flushing the DTE. */
++ if ( domid != DOMID_INVALID )
++ {
++ invalidate_iommu_pages(iommu, INV_IOMMU_ALL_PAGES_ADDRESS, domid, 0);
++ flush_command_buffer(iommu, 0);
++ }
+ }
+
+ void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf)
+diff --git a/xen/drivers/passthrough/amd/iommu_guest.c b/xen/drivers/passthrough/amd/iommu_guest.c
+index 80a331f546..be86bce6fb 100644
+--- a/xen/drivers/passthrough/amd/iommu_guest.c
++++ b/xen/drivers/passthrough/amd/iommu_guest.c
+@@ -385,7 +385,7 @@ static int do_completion_wait(struct domain *d, cmd_entry_t *cmd)
+
+ static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd)
+ {
+- uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id;
++ uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id, prev_domid;
+ struct amd_iommu_dte *gdte, *mdte, *dte_base;
+ struct amd_iommu *iommu = NULL;
+ struct guest_iommu *g_iommu;
+@@ -445,13 +445,14 @@ static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd)
+ req_id = get_dma_requestor_id(iommu->seg, mbdf);
+ dte_base = iommu->dev_table.buffer;
+ mdte = &dte_base[req_id];
++ prev_domid = mdte->domain_id;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ dte_set_gcr3_table(mdte, hdom_id, gcr3_mfn << PAGE_SHIFT, gv, glx);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+- amd_iommu_flush_device(iommu, req_id);
++ amd_iommu_flush_device(iommu, req_id, prev_domid);
+
+ return 0;
+ }
+diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
+index 166570648d..101a60ce17 100644
+--- a/xen/drivers/passthrough/amd/iommu_init.c
++++ b/xen/drivers/passthrough/amd/iommu_init.c
+@@ -1547,7 +1547,11 @@ static int cf_check _invalidate_all_devices(
+ req_id = ivrs_mappings[bdf].dte_requestor_id;
+ if ( iommu )
+ {
+- amd_iommu_flush_device(iommu, req_id);
++ /*
++ * IOMMU TLB flush performed separately (see
++ * invalidate_all_domain_pages()).
++ */
++ amd_iommu_flush_device(iommu, req_id, DOMID_INVALID);
+ amd_iommu_flush_intremap(iommu, req_id);
+ }
+ }
+diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
+index 94e3775506..8641b84712 100644
+--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
++++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
+@@ -192,10 +192,13 @@ static int __must_check amd_iommu_setup_domain_device(
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+- amd_iommu_flush_device(iommu, req_id);
++ /* DTE didn't have DMA translations enabled, do not flush the TLB. */
++ amd_iommu_flush_device(iommu, req_id, DOMID_INVALID);
+ }
+ else if ( dte->pt_root != mfn_x(page_to_mfn(root_pg)) )
+ {
++ domid_t prev_domid = dte->domain_id;
++
+ /*
+ * Strictly speaking if the device is the only one with this requestor
+ * ID, it could be allowed to be re-assigned regardless of unity map
+@@ -252,7 +255,7 @@ static int __must_check amd_iommu_setup_domain_device(
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+- amd_iommu_flush_device(iommu, req_id);
++ amd_iommu_flush_device(iommu, req_id, prev_domid);
+ }
+ else
+ spin_unlock_irqrestore(&iommu->lock, flags);
+@@ -421,6 +424,8 @@ static void amd_iommu_disable_domain_device(const struct domain *domain,
+ spin_lock_irqsave(&iommu->lock, flags);
+ if ( dte->tv || dte->v )
+ {
++ domid_t prev_domid = dte->domain_id;
++
+ /* See the comment in amd_iommu_setup_device_table(). */
+ dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_ABORTED;
+ smp_wmb();
+@@ -439,7 +444,7 @@ static void amd_iommu_disable_domain_device(const struct domain *domain,
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+- amd_iommu_flush_device(iommu, req_id);
++ amd_iommu_flush_device(iommu, req_id, prev_domid);
+
+ AMD_IOMMU_DEBUG("Disable: device id = %#x, "
+ "domain = %d, paging mode = %d\n",
+@@ -610,7 +615,8 @@ static int cf_check amd_iommu_add_device(u8 devfn, struct pci_dev *pdev)
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+- amd_iommu_flush_device(iommu, bdf);
++ /* DTE didn't have DMA translations enabled, do not flush the TLB. */
++ amd_iommu_flush_device(iommu, bdf, DOMID_INVALID);
+ }
+
+ if ( amd_iommu_reserve_domain_unity_map(
+--
+2.42.0
+
diff --git a/0042-tools-ocaml-xc-Fix-binding-for-xc_domain_assign_devi.patch b/0042-tools-ocaml-xc-Fix-binding-for-xc_domain_assign_devi.patch
deleted file mode 100644
index ac3e86d..0000000
--- a/0042-tools-ocaml-xc-Fix-binding-for-xc_domain_assign_devi.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From afdcc108566e5a4ee352b6427c98ebad6885a81d Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com>
-Date: Thu, 12 Jan 2023 11:38:38 +0000
-Subject: [PATCH 42/89] tools/ocaml/xc: Fix binding for
- xc_domain_assign_device()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The patch adding this binding was plain broken, and unreviewed. It modified
-the C stub to add a 4th parameter without an equivalent adjustment in the
-Ocaml side of the bindings.
-
-In 64bit builds, this causes us to dereference whatever dead value is in %rcx
-when trying to interpret the rflags parameter.
-
-This has gone unnoticed because Xapi doesn't use this binding (it has its
-own), but unbreak the binding by passing RDM_RELAXED unconditionally for
-now (matching the libxl default behaviour).
-
-Fixes: 9b34056cb4 ("tools: extend xc_assign_device() to support rdm reservation policy")
-Signed-off-by: Edwin Török <edwin.torok@cloud.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 4250683842104f02996428f93927a035c8e19266)
----
- tools/ocaml/libs/xc/xenctrl_stubs.c | 17 +++++------------
- 1 file changed, 5 insertions(+), 12 deletions(-)
-
-diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
-index e25367531b..f376d94334 100644
---- a/tools/ocaml/libs/xc/xenctrl_stubs.c
-+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
-@@ -1139,17 +1139,12 @@ CAMLprim value stub_xc_domain_test_assign_device(value xch, value domid, value d
- CAMLreturn(Val_bool(ret == 0));
- }
-
--static int domain_assign_device_rdm_flag_table[] = {
-- XEN_DOMCTL_DEV_RDM_RELAXED,
--};
--
--CAMLprim value stub_xc_domain_assign_device(value xch, value domid, value desc,
-- value rflag)
-+CAMLprim value stub_xc_domain_assign_device(value xch, value domid, value desc)
- {
-- CAMLparam4(xch, domid, desc, rflag);
-+ CAMLparam3(xch, domid, desc);
- int ret;
- int domain, bus, dev, func;
-- uint32_t sbdf, flag;
-+ uint32_t sbdf;
-
- domain = Int_val(Field(desc, 0));
- bus = Int_val(Field(desc, 1));
-@@ -1157,10 +1152,8 @@ CAMLprim value stub_xc_domain_assign_device(value xch, value domid, value desc,
- func = Int_val(Field(desc, 3));
- sbdf = encode_sbdf(domain, bus, dev, func);
-
-- ret = Int_val(Field(rflag, 0));
-- flag = domain_assign_device_rdm_flag_table[ret];
--
-- ret = xc_assign_device(_H(xch), _D(domid), sbdf, flag);
-+ ret = xc_assign_device(_H(xch), _D(domid), sbdf,
-+ XEN_DOMCTL_DEV_RDM_RELAXED);
-
- if (ret < 0)
- failwith_xc(_H(xch));
---
-2.40.0
-
diff --git a/0043-libfsimage-xfs-Remove-dead-code.patch b/0043-libfsimage-xfs-Remove-dead-code.patch
new file mode 100644
index 0000000..cbb9ad4
--- /dev/null
+++ b/0043-libfsimage-xfs-Remove-dead-code.patch
@@ -0,0 +1,71 @@
+From d665c6690eb3c2c86cb2c7dac09804211481f926 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Thu, 14 Sep 2023 13:22:50 +0100
+Subject: [PATCH 43/55] libfsimage/xfs: Remove dead code
+
+xfs_info.agnolog (and related code) and XFS_INO_AGBNO_BITS are dead code
+that serve no purpose.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 37fc1e6c1c5c63aafd9cfd76a37728d5baea7d71)
+---
+ tools/libfsimage/xfs/fsys_xfs.c | 18 ------------------
+ 1 file changed, 18 deletions(-)
+
+diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c
+index d735a88e55..2800699f59 100644
+--- a/tools/libfsimage/xfs/fsys_xfs.c
++++ b/tools/libfsimage/xfs/fsys_xfs.c
+@@ -37,7 +37,6 @@ struct xfs_info {
+ int blklog;
+ int inopblog;
+ int agblklog;
+- int agnolog;
+ unsigned int nextents;
+ xfs_daddr_t next;
+ xfs_daddr_t daddr;
+@@ -65,9 +64,7 @@ static struct xfs_info xfs;
+
+ #define XFS_INO_MASK(k) ((xfs_uint32_t)((1ULL << (k)) - 1))
+ #define XFS_INO_OFFSET_BITS xfs.inopblog
+-#define XFS_INO_AGBNO_BITS xfs.agblklog
+ #define XFS_INO_AGINO_BITS (xfs.agblklog + xfs.inopblog)
+-#define XFS_INO_AGNO_BITS xfs.agnolog
+
+ static inline xfs_agblock_t
+ agino2agbno (xfs_agino_t agino)
+@@ -149,20 +146,6 @@ xt_len (xfs_bmbt_rec_32_t *r)
+ return le32(r->l3) & mask32lo(21);
+ }
+
+-static inline int
+-xfs_highbit32(xfs_uint32_t v)
+-{
+- int i;
+-
+- if (--v) {
+- for (i = 0; i < 31; i++, v >>= 1) {
+- if (v == 0)
+- return i;
+- }
+- }
+- return 0;
+-}
+-
+ static int
+ isinxt (xfs_fileoff_t key, xfs_fileoff_t offset, xfs_filblks_t len)
+ {
+@@ -472,7 +455,6 @@ xfs_mount (fsi_file_t *ffi, const char *options)
+
+ xfs.inopblog = super.sb_inopblog;
+ xfs.agblklog = super.sb_agblklog;
+- xfs.agnolog = xfs_highbit32 (le32(super.sb_agcount));
+
+ xfs.btnode_ptr0_off =
+ ((xfs.bsize - sizeof(xfs_btree_block_t)) /
+--
+2.42.0
+
diff --git a/0043-tools-ocaml-xc-Don-t-reference-Abstract_Tag-objects-.patch b/0043-tools-ocaml-xc-Don-t-reference-Abstract_Tag-objects-.patch
deleted file mode 100644
index b7fec46..0000000
--- a/0043-tools-ocaml-xc-Don-t-reference-Abstract_Tag-objects-.patch
+++ /dev/null
@@ -1,76 +0,0 @@
-From bf935b1ff7cc76b2d25f877e56a359afaafcac1f Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 31 Jan 2023 17:19:30 +0000
-Subject: [PATCH 43/89] tools/ocaml/xc: Don't reference Abstract_Tag objects
- with the GC lock released
-
-The intf->{addr,len} references in the xc_map_foreign_range() call are unsafe.
-From the manual:
-
- https://ocaml.org/manual/intfc.html#ss:parallel-execution-long-running-c-code
-
-"After caml_release_runtime_system() was called and until
-caml_acquire_runtime_system() is called, the C code must not access any OCaml
-data, nor call any function of the run-time system, nor call back into OCaml
-code."
-
-More than what the manual says, the intf pointer is (potentially) invalidated
-by caml_enter_blocking_section() if another thread happens to perform garbage
-collection at just the right (wrong) moment.
-
-Rewrite the logic. There's no need to stash data in the Ocaml object until
-the success path at the very end.
-
-Fixes: 8b7ce06a2d34 ("ocaml: Add XC bindings.")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 9e7c74e6f9fd2e44df1212643b80af9032b45b07)
----
- tools/ocaml/libs/xc/xenctrl_stubs.c | 23 +++++++++++------------
- 1 file changed, 11 insertions(+), 12 deletions(-)
-
-diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
-index f376d94334..facb561577 100644
---- a/tools/ocaml/libs/xc/xenctrl_stubs.c
-+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
-@@ -953,26 +953,25 @@ CAMLprim value stub_map_foreign_range(value xch, value dom,
- CAMLparam4(xch, dom, size, mfn);
- CAMLlocal1(result);
- struct mmap_interface *intf;
-- uint32_t c_dom;
-- unsigned long c_mfn;
-+ unsigned long c_mfn = Nativeint_val(mfn);
-+ int len = Int_val(size);
-+ void *ptr;
-
- BUILD_BUG_ON((sizeof(struct mmap_interface) % sizeof(value)) != 0);
- result = caml_alloc(Wsize_bsize(sizeof(struct mmap_interface)),
- Abstract_tag);
-
-- intf = (struct mmap_interface *) result;
--
-- intf->len = Int_val(size);
--
-- c_dom = _D(dom);
-- c_mfn = Nativeint_val(mfn);
- caml_enter_blocking_section();
-- intf->addr = xc_map_foreign_range(_H(xch), c_dom,
-- intf->len, PROT_READ|PROT_WRITE,
-- c_mfn);
-+ ptr = xc_map_foreign_range(_H(xch), _D(dom), len,
-+ PROT_READ|PROT_WRITE, c_mfn);
- caml_leave_blocking_section();
-- if (!intf->addr)
-+
-+ if (!ptr)
- caml_failwith("xc_map_foreign_range error");
-+
-+ intf = Data_abstract_val(result);
-+ *intf = (struct mmap_interface){ ptr, len };
-+
- CAMLreturn(result);
- }
-
---
-2.40.0
-
diff --git a/0044-libfsimage-xfs-Amend-mask32lo-to-allow-the-value-32.patch b/0044-libfsimage-xfs-Amend-mask32lo-to-allow-the-value-32.patch
new file mode 100644
index 0000000..880ff83
--- /dev/null
+++ b/0044-libfsimage-xfs-Amend-mask32lo-to-allow-the-value-32.patch
@@ -0,0 +1,33 @@
+From f1cd620cc3572c858e276463e05f695d949362c5 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Thu, 14 Sep 2023 13:22:51 +0100
+Subject: [PATCH 44/55] libfsimage/xfs: Amend mask32lo() to allow the value 32
+
+agblklog could plausibly be 32, but that would overflow this shift.
+Perform the shift as ULL and cast to u32 at the end instead.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit ddc45e4eea946bb373a4b4a60c84bf9339cf413b)
+---
+ tools/libfsimage/xfs/fsys_xfs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c
+index 2800699f59..4720bb4505 100644
+--- a/tools/libfsimage/xfs/fsys_xfs.c
++++ b/tools/libfsimage/xfs/fsys_xfs.c
+@@ -60,7 +60,7 @@ static struct xfs_info xfs;
+ #define inode ((xfs_dinode_t *)((char *)FSYS_BUF + 8192))
+ #define icore (inode->di_core)
+
+-#define mask32lo(n) (((xfs_uint32_t)1 << (n)) - 1)
++#define mask32lo(n) ((xfs_uint32_t)((1ull << (n)) - 1))
+
+ #define XFS_INO_MASK(k) ((xfs_uint32_t)((1ULL << (k)) - 1))
+ #define XFS_INO_OFFSET_BITS xfs.inopblog
+--
+2.42.0
+
diff --git a/0044-tools-ocaml-libs-Fix-memory-resource-leaks-with-caml.patch b/0044-tools-ocaml-libs-Fix-memory-resource-leaks-with-caml.patch
deleted file mode 100644
index 8876ab7..0000000
--- a/0044-tools-ocaml-libs-Fix-memory-resource-leaks-with-caml.patch
+++ /dev/null
@@ -1,61 +0,0 @@
-From 587823eca162d063027faf1826ec3544f0a06e78 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 1 Feb 2023 11:27:42 +0000
-Subject: [PATCH 44/89] tools/ocaml/libs: Fix memory/resource leaks with
- caml_alloc_custom()
-
-All caml_alloc_*() functions can throw exceptions, and longjump out of
-context. If this happens, we leak the xch/xce handle.
-
-Reorder the logic to allocate the the Ocaml object first.
-
-Fixes: 8b3c06a3e545 ("tools/ocaml/xenctrl: OCaml 5 support, fix use-after-free")
-Fixes: 22d5affdf0ce ("tools/ocaml/evtchn: OCaml 5 support, fix potential resource leak")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit d69ccf52ad467ccc22029172a8e61dc621187889)
----
- tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-index d7881ca95f..de2fc29292 100644
---- a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-+++ b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-@@ -63,6 +63,8 @@ CAMLprim value stub_eventchn_init(value cloexec)
- if ( !Bool_val(cloexec) )
- flags |= XENEVTCHN_NO_CLOEXEC;
-
-+ result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
-+
- caml_enter_blocking_section();
- xce = xenevtchn_open(NULL, flags);
- caml_leave_blocking_section();
-@@ -70,7 +72,6 @@ CAMLprim value stub_eventchn_init(value cloexec)
- if (xce == NULL)
- caml_failwith("open failed");
-
-- result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
- *(xenevtchn_handle **)Data_custom_val(result) = xce;
-
- CAMLreturn(result);
-@@ -82,6 +83,8 @@ CAMLprim value stub_eventchn_fdopen(value fdval)
- CAMLlocal1(result);
- xenevtchn_handle *xce;
-
-+ result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
-+
- caml_enter_blocking_section();
- xce = xenevtchn_fdopen(NULL, Int_val(fdval), 0);
- caml_leave_blocking_section();
-@@ -89,7 +92,6 @@ CAMLprim value stub_eventchn_fdopen(value fdval)
- if (xce == NULL)
- caml_failwith("evtchn fdopen failed");
-
-- result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
- *(xenevtchn_handle **)Data_custom_val(result) = xce;
-
- CAMLreturn(result);
---
-2.40.0
-
diff --git a/0045-libfsimage-xfs-Sanity-check-the-superblock-during-mo.patch b/0045-libfsimage-xfs-Sanity-check-the-superblock-during-mo.patch
new file mode 100644
index 0000000..01ae52a
--- /dev/null
+++ b/0045-libfsimage-xfs-Sanity-check-the-superblock-during-mo.patch
@@ -0,0 +1,137 @@
+From 78143c5336c8316bcc648e964d65a07f216cf77f Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Thu, 14 Sep 2023 13:22:52 +0100
+Subject: [PATCH 45/55] libfsimage/xfs: Sanity-check the superblock during
+ mounts
+
+Sanity-check the XFS superblock for wellformedness at the mount handler.
+This forces pygrub to abort parsing a potentially malformed filesystem and
+ensures the invariants assumed throughout the rest of the code hold.
+
+Also, derive parameters from previously sanitized parameters where possible
+(rather than reading them off the superblock)
+
+The code doesn't try to avoid overflowing the end of the disk, because
+that's an unlikely and benign error. Parameters used in calculations of
+xfs_daddr_t (like the root inode index) aren't in critical need of being
+sanitized.
+
+The sanitization of agblklog is basically checking that no obvious
+overflows happen on agblklog, and then ensuring agblocks is contained in
+the range (2^(sb_agblklog-1), 2^sb_agblklog].
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 620500dd1baf33347dfde5e7fde7cf7fe347da5c)
+---
+ tools/libfsimage/xfs/fsys_xfs.c | 48 ++++++++++++++++++++++++++-------
+ tools/libfsimage/xfs/xfs.h | 12 +++++++++
+ 2 files changed, 50 insertions(+), 10 deletions(-)
+
+diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c
+index 4720bb4505..e4eb7e1ee2 100644
+--- a/tools/libfsimage/xfs/fsys_xfs.c
++++ b/tools/libfsimage/xfs/fsys_xfs.c
+@@ -17,6 +17,7 @@
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
++#include <stdbool.h>
+ #include <xenfsimage_grub.h>
+ #include "xfs.h"
+
+@@ -433,29 +434,56 @@ first_dentry (fsi_file_t *ffi, xfs_ino_t *ino)
+ return next_dentry (ffi, ino);
+ }
+
++static bool
++xfs_sb_is_invalid (const xfs_sb_t *super)
++{
++ return (le32(super->sb_magicnum) != XFS_SB_MAGIC)
++ || ((le16(super->sb_versionnum) & XFS_SB_VERSION_NUMBITS) !=
++ XFS_SB_VERSION_4)
++ || (super->sb_inodelog < XFS_SB_INODELOG_MIN)
++ || (super->sb_inodelog > XFS_SB_INODELOG_MAX)
++ || (super->sb_blocklog < XFS_SB_BLOCKLOG_MIN)
++ || (super->sb_blocklog > XFS_SB_BLOCKLOG_MAX)
++ || (super->sb_blocklog < super->sb_inodelog)
++ || (super->sb_agblklog > XFS_SB_AGBLKLOG_MAX)
++ || ((1ull << super->sb_agblklog) < le32(super->sb_agblocks))
++ || (((1ull << super->sb_agblklog) >> 1) >=
++ le32(super->sb_agblocks))
++ || ((super->sb_blocklog + super->sb_dirblklog) >=
++ XFS_SB_DIRBLK_NUMBITS);
++}
++
+ static int
+ xfs_mount (fsi_file_t *ffi, const char *options)
+ {
+ xfs_sb_t super;
+
+ if (!devread (ffi, 0, 0, sizeof(super), (char *)&super)
+- || (le32(super.sb_magicnum) != XFS_SB_MAGIC)
+- || ((le16(super.sb_versionnum)
+- & XFS_SB_VERSION_NUMBITS) != XFS_SB_VERSION_4) ) {
++ || xfs_sb_is_invalid(&super)) {
+ return 0;
+ }
+
+- xfs.bsize = le32 (super.sb_blocksize);
+- xfs.blklog = super.sb_blocklog;
+- xfs.bdlog = xfs.blklog - SECTOR_BITS;
++ /*
++ * Not sanitized. It's exclusively used to generate disk addresses,
++ * so it's not important from a security standpoint.
++ */
+ xfs.rootino = le64 (super.sb_rootino);
+- xfs.isize = le16 (super.sb_inodesize);
+- xfs.agblocks = le32 (super.sb_agblocks);
+- xfs.dirbsize = xfs.bsize << super.sb_dirblklog;
+
+- xfs.inopblog = super.sb_inopblog;
++ /*
++ * Sanitized to be consistent with each other, only used to
++ * generate disk addresses, so it's safe
++ */
++ xfs.agblocks = le32 (super.sb_agblocks);
+ xfs.agblklog = super.sb_agblklog;
+
++ /* Derived from sanitized parameters */
++ xfs.bsize = 1 << super.sb_blocklog;
++ xfs.blklog = super.sb_blocklog;
++ xfs.bdlog = super.sb_blocklog - SECTOR_BITS;
++ xfs.isize = 1 << super.sb_inodelog;
++ xfs.dirbsize = 1 << (super.sb_blocklog + super.sb_dirblklog);
++ xfs.inopblog = super.sb_blocklog - super.sb_inodelog;
++
+ xfs.btnode_ptr0_off =
+ ((xfs.bsize - sizeof(xfs_btree_block_t)) /
+ (sizeof (xfs_bmbt_key_t) + sizeof (xfs_bmbt_ptr_t)))
+diff --git a/tools/libfsimage/xfs/xfs.h b/tools/libfsimage/xfs/xfs.h
+index 40699281e4..b87e37d3d7 100644
+--- a/tools/libfsimage/xfs/xfs.h
++++ b/tools/libfsimage/xfs/xfs.h
+@@ -134,6 +134,18 @@ typedef struct xfs_sb
+ xfs_uint8_t sb_dummy[7]; /* padding */
+ } xfs_sb_t;
+
++/* Bound taken from xfs.c in GRUB2. It doesn't exist in the spec */
++#define XFS_SB_DIRBLK_NUMBITS 27
++/* Implied by the XFS specification. The minimum block size is 512 octets */
++#define XFS_SB_BLOCKLOG_MIN 9
++/* Implied by the XFS specification. The maximum block size is 65536 octets */
++#define XFS_SB_BLOCKLOG_MAX 16
++/* Implied by the XFS specification. The minimum inode size is 256 octets */
++#define XFS_SB_INODELOG_MIN 8
++/* Implied by the XFS specification. The maximum inode size is 2048 octets */
++#define XFS_SB_INODELOG_MAX 11
++/* High bound for sb_agblklog */
++#define XFS_SB_AGBLKLOG_MAX 32
+
+ /* those are from xfs_btree.h */
+
+--
+2.42.0
+
diff --git a/0045-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch b/0045-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch
deleted file mode 100644
index 1720bdd..0000000
--- a/0045-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch
+++ /dev/null
@@ -1,120 +0,0 @@
-From 3685e754e6017c616769b28133286d06bf07b613 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 8 Sep 2022 21:27:58 +0100
-Subject: [PATCH 45/89] x86/spec-ctrl: Mitigate Cross-Thread Return Address
- Predictions
-
-This is XSA-426 / CVE-2022-27672
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 63305e5392ec2d17b85e7996a97462744425db80)
----
- docs/misc/xen-command-line.pandoc | 2 +-
- xen/arch/x86/include/asm/cpufeatures.h | 3 ++-
- xen/arch/x86/include/asm/spec_ctrl.h | 15 +++++++++++++
- xen/arch/x86/spec_ctrl.c | 31 +++++++++++++++++++++++---
- 4 files changed, 46 insertions(+), 5 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 424b12cfb2..e7fe8b0cc9 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2343,7 +2343,7 @@ guests to use.
- on entry and exit. These blocks are necessary to virtualise support for
- guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc.
- * `rsb=` offers control over whether to overwrite the Return Stack Buffer /
-- Return Address Stack on entry to Xen.
-+ Return Address Stack on entry to Xen and on idle.
- * `md-clear=` offers control over whether to use VERW to flush
- microarchitectural buffers on idle and exit from Xen. *Note: For
- compatibility with development versions of this fix, `mds=` is also accepted
-diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h
-index 865f110986..da0593de85 100644
---- a/xen/arch/x86/include/asm/cpufeatures.h
-+++ b/xen/arch/x86/include/asm/cpufeatures.h
-@@ -35,7 +35,8 @@ XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM
- XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */
- XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */
- XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */
--/* Bits 23,24 unused. */
-+/* Bits 23 unused. */
-+XEN_CPUFEATURE(SC_RSB_IDLE, X86_SYNTH(24)) /* RSB overwrite needed for idle. */
- XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */
- XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */
- XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */
-diff --git a/xen/arch/x86/include/asm/spec_ctrl.h b/xen/arch/x86/include/asm/spec_ctrl.h
-index 6a77c39378..391973ef6a 100644
---- a/xen/arch/x86/include/asm/spec_ctrl.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl.h
-@@ -159,6 +159,21 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info)
- */
- alternative_input("", "verw %[sel]", X86_FEATURE_SC_VERW_IDLE,
- [sel] "m" (info->verw_sel));
-+
-+ /*
-+ * Cross-Thread Return Address Predictions:
-+ *
-+ * On vulnerable systems, the return predictions (RSB/RAS) are statically
-+ * partitioned between active threads. When entering idle, our entries
-+ * are re-partitioned to allow the other threads to use them.
-+ *
-+ * In some cases, we might still have guest entries in the RAS, so flush
-+ * them before injecting them sideways to our sibling thread.
-+ *
-+ * (ab)use alternative_input() to specify clobbers.
-+ */
-+ alternative_input("", "DO_OVERWRITE_RSB", X86_FEATURE_SC_RSB_IDLE,
-+ : "rax", "rcx");
- }
-
- /* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index a320b81947..e80e2a5ed1 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -1327,13 +1327,38 @@ void __init init_speculation_mitigations(void)
- * 3) Some CPUs have RSBs which are not full width, which allow the
- * attacker's entries to alias Xen addresses.
- *
-+ * 4) Some CPUs have RSBs which are re-partitioned based on thread
-+ * idleness, which allows an attacker to inject entries into the other
-+ * thread. We still active the optimisation in this case, and mitigate
-+ * in the idle path which has lower overhead.
-+ *
- * It is safe to turn off RSB stuffing when Xen is using SMEP itself, and
- * 32bit PV guests are disabled, and when the RSB is full width.
- */
- BUILD_BUG_ON(RO_MPT_VIRT_START != PML4_ADDR(256));
-- if ( opt_rsb_pv == -1 && boot_cpu_has(X86_FEATURE_XEN_SMEP) &&
-- !opt_pv32 && rsb_is_full_width() )
-- opt_rsb_pv = 0;
-+ if ( opt_rsb_pv == -1 )
-+ {
-+ opt_rsb_pv = (opt_pv32 || !boot_cpu_has(X86_FEATURE_XEN_SMEP) ||
-+ !rsb_is_full_width());
-+
-+ /*
-+ * Cross-Thread Return Address Predictions.
-+ *
-+ * Vulnerable systems are Zen1/Zen2 uarch, which is AMD Fam17 / Hygon
-+ * Fam18, when SMT is active.
-+ *
-+ * To mitigate, we must flush the RSB/RAS/RAP once between entering
-+ * Xen and going idle.
-+ *
-+ * Most cases flush on entry to Xen anyway. The one case where we
-+ * don't is when using the SMEP optimisation for PV guests. Flushing
-+ * before going idle is less overhead than flushing on PV entry.
-+ */
-+ if ( !opt_rsb_pv && hw_smt_enabled &&
-+ (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD|X86_VENDOR_HYGON)) &&
-+ (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) )
-+ setup_force_cpu_cap(X86_FEATURE_SC_RSB_IDLE);
-+ }
-
- if ( opt_rsb_pv )
- {
---
-2.40.0
-
diff --git a/0046-automation-Remove-clang-8-from-Debian-unstable-conta.patch b/0046-automation-Remove-clang-8-from-Debian-unstable-conta.patch
deleted file mode 100644
index 6fc3323..0000000
--- a/0046-automation-Remove-clang-8-from-Debian-unstable-conta.patch
+++ /dev/null
@@ -1,84 +0,0 @@
-From aaf74a532c02017998492c0bf60a9c6be3332f20 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Tue, 21 Feb 2023 16:55:38 +0000
-Subject: [PATCH 46/89] automation: Remove clang-8 from Debian unstable
- container
-
-First, apt complain that it isn't the right way to add keys anymore,
-but hopefully that's just a warning.
-
-Second, we can't install clang-8:
-The following packages have unmet dependencies:
- clang-8 : Depends: libstdc++-8-dev but it is not installable
- Depends: libgcc-8-dev but it is not installable
- Depends: libobjc-8-dev but it is not installable
- Recommends: llvm-8-dev but it is not going to be installed
- Recommends: libomp-8-dev but it is not going to be installed
- libllvm8 : Depends: libffi7 (>= 3.3~20180313) but it is not installable
-E: Unable to correct problems, you have held broken packages.
-
-clang on Debian unstable is now version 14.0.6.
-
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit a6b1e2b80fe2053b1c9c9843fb086a668513ea36)
----
- automation/build/debian/unstable-llvm-8.list | 3 ---
- automation/build/debian/unstable.dockerfile | 12 ------------
- automation/gitlab-ci/build.yaml | 10 ----------
- 3 files changed, 25 deletions(-)
- delete mode 100644 automation/build/debian/unstable-llvm-8.list
-
-diff --git a/automation/build/debian/unstable-llvm-8.list b/automation/build/debian/unstable-llvm-8.list
-deleted file mode 100644
-index dc119fa0b4..0000000000
---- a/automation/build/debian/unstable-llvm-8.list
-+++ /dev/null
-@@ -1,3 +0,0 @@
--# Unstable LLVM 8 repos
--deb http://apt.llvm.org/unstable/ llvm-toolchain-8 main
--deb-src http://apt.llvm.org/unstable/ llvm-toolchain-8 main
-diff --git a/automation/build/debian/unstable.dockerfile b/automation/build/debian/unstable.dockerfile
-index 9de766d596..b560337b7a 100644
---- a/automation/build/debian/unstable.dockerfile
-+++ b/automation/build/debian/unstable.dockerfile
-@@ -51,15 +51,3 @@ RUN apt-get update && \
- apt-get autoremove -y && \
- apt-get clean && \
- rm -rf /var/lib/apt/lists* /tmp/* /var/tmp/*
--
--RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|apt-key add -
--COPY unstable-llvm-8.list /etc/apt/sources.list.d/
--
--RUN apt-get update && \
-- apt-get --quiet --yes install \
-- clang-8 \
-- lld-8 \
-- && \
-- apt-get autoremove -y && \
-- apt-get clean && \
-- rm -rf /var/lib/apt/lists* /tmp/* /var/tmp/*
-diff --git a/automation/gitlab-ci/build.yaml b/automation/gitlab-ci/build.yaml
-index 716ee0b1e4..bed161b471 100644
---- a/automation/gitlab-ci/build.yaml
-+++ b/automation/gitlab-ci/build.yaml
-@@ -312,16 +312,6 @@ debian-unstable-clang-debug:
- variables:
- CONTAINER: debian:unstable
-
--debian-unstable-clang-8:
-- extends: .clang-8-x86-64-build
-- variables:
-- CONTAINER: debian:unstable
--
--debian-unstable-clang-8-debug:
-- extends: .clang-8-x86-64-build-debug
-- variables:
-- CONTAINER: debian:unstable
--
- debian-unstable-gcc:
- extends: .gcc-x86-64-build
- variables:
---
-2.40.0
-
diff --git a/0046-libfsimage-xfs-Add-compile-time-check-to-libfsimage.patch b/0046-libfsimage-xfs-Add-compile-time-check-to-libfsimage.patch
new file mode 100644
index 0000000..0c32745
--- /dev/null
+++ b/0046-libfsimage-xfs-Add-compile-time-check-to-libfsimage.patch
@@ -0,0 +1,62 @@
+From eb4efdac4cc7121f832ee156f39761312878f3a5 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Thu, 14 Sep 2023 13:22:53 +0100
+Subject: [PATCH 46/55] libfsimage/xfs: Add compile-time check to libfsimage
+
+Adds the common tools include folder to the -I compile flags
+of libfsimage. This allows us to use:
+ xen-tools/common-macros.h:BUILD_BUG_ON()
+
+With it, statically assert a sanitized "blocklog - SECTOR_BITS" cannot
+underflow.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 7d85c70431593550e32022e3a19a37f306f49e00)
+---
+ tools/libfsimage/common.mk | 2 +-
+ tools/libfsimage/xfs/fsys_xfs.c | 4 +++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/tools/libfsimage/common.mk b/tools/libfsimage/common.mk
+index 4fc8c66795..e4336837d0 100644
+--- a/tools/libfsimage/common.mk
++++ b/tools/libfsimage/common.mk
+@@ -1,7 +1,7 @@
+ include $(XEN_ROOT)/tools/Rules.mk
+
+ FSDIR := $(libdir)/xenfsimage
+-CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ -DFSIMAGE_FSDIR=\"$(FSDIR)\"
++CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ $(CFLAGS_xeninclude) -DFSIMAGE_FSDIR=\"$(FSDIR)\"
+ CFLAGS += -D_GNU_SOURCE
+ LDFLAGS += -L../common/
+
+diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c
+index e4eb7e1ee2..4a8dd6f239 100644
+--- a/tools/libfsimage/xfs/fsys_xfs.c
++++ b/tools/libfsimage/xfs/fsys_xfs.c
+@@ -19,6 +19,7 @@
+
+ #include <stdbool.h>
+ #include <xenfsimage_grub.h>
++#include <xen-tools/libs.h>
+ #include "xfs.h"
+
+ #define MAX_LINK_COUNT 8
+@@ -477,9 +478,10 @@ xfs_mount (fsi_file_t *ffi, const char *options)
+ xfs.agblklog = super.sb_agblklog;
+
+ /* Derived from sanitized parameters */
++ BUILD_BUG_ON(XFS_SB_BLOCKLOG_MIN < SECTOR_BITS);
++ xfs.bdlog = super.sb_blocklog - SECTOR_BITS;
+ xfs.bsize = 1 << super.sb_blocklog;
+ xfs.blklog = super.sb_blocklog;
+- xfs.bdlog = super.sb_blocklog - SECTOR_BITS;
+ xfs.isize = 1 << super.sb_inodelog;
+ xfs.dirbsize = 1 << (super.sb_blocklog + super.sb_dirblklog);
+ xfs.inopblog = super.sb_blocklog - super.sb_inodelog;
+--
+2.42.0
+
diff --git a/0047-libs-util-Fix-parallel-build-between-flex-bison-and-.patch b/0047-libs-util-Fix-parallel-build-between-flex-bison-and-.patch
deleted file mode 100644
index f3e6d36..0000000
--- a/0047-libs-util-Fix-parallel-build-between-flex-bison-and-.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From c622b8ace93cc38c73f47f5044dc3663ef93f815 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Fri, 3 Mar 2023 07:55:24 +0100
-Subject: [PATCH 47/89] libs/util: Fix parallel build between flex/bison and CC
- rules
-
-flex/bison generate two targets, and when those targets are
-prerequisite of other rules they are considered independently by make.
-
-We can have a situation where the .c file is out-of-date but not the
-.h, git checkout for example. In this case, if a rule only have the .h
-file as prerequiste, make will procced and start to build the object.
-In parallel, another target can have the .c file as prerequisite and
-make will find out it need re-generating and do so, changing the .h at
-the same time. This parallel task breaks the first one.
-
-To avoid this scenario, we put both the header and the source as
-prerequisite for all object even if they only need the header.
-
-Reported-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: bf652a50fb3bb3b1b3d93db6fb79bc28f978fe75
-master date: 2023-02-09 18:26:17 +0000
----
- tools/libs/util/Makefile | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/tools/libs/util/Makefile b/tools/libs/util/Makefile
-index 493d2e00be..fee4ea0dc7 100644
---- a/tools/libs/util/Makefile
-+++ b/tools/libs/util/Makefile
-@@ -40,6 +40,14 @@ include $(XEN_ROOT)/tools/libs/libs.mk
-
- $(OBJS-y) $(PIC_OBJS): $(AUTOINCS)
-
-+# Adding the .c conterparts of the headers generated by flex/bison as
-+# prerequisite of all objects.
-+# This is to tell make that if only the .c file is out-of-date but not the
-+# header, it should still wait for the .c file to be rebuilt.
-+# Otherwise, make doesn't considered "%.c %.h" as grouped targets, and will run
-+# the flex/bison rules in parallel of CC rules which only need the header.
-+$(OBJS-y) $(PIC_OBJS): libxlu_cfg_l.c libxlu_cfg_y.c libxlu_disk_l.c
-+
- %.c %.h:: %.y
- @rm -f $*.[ch]
- $(BISON) --output=$*.c $<
---
-2.40.0
-
diff --git a/0047-tools-pygrub-Remove-unnecessary-hypercall.patch b/0047-tools-pygrub-Remove-unnecessary-hypercall.patch
new file mode 100644
index 0000000..6bdd9bb
--- /dev/null
+++ b/0047-tools-pygrub-Remove-unnecessary-hypercall.patch
@@ -0,0 +1,60 @@
+From 8a584126eae53a44cefb0acdbca201233a557fa5 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Mon, 25 Sep 2023 18:32:21 +0100
+Subject: [PATCH 47/55] tools/pygrub: Remove unnecessary hypercall
+
+There's a hypercall being issued in order to determine whether PV64 is
+supported, but since Xen 4.3 that's strictly true so it's not required.
+
+Plus, this way we can avoid mapping the privcmd interface altogether in the
+depriv pygrub.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+(cherry picked from commit f4b504c6170c446e61055cbd388ae4e832a9deca)
+---
+ tools/pygrub/src/pygrub | 12 +-----------
+ 1 file changed, 1 insertion(+), 11 deletions(-)
+
+diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
+index ce7ab0eb8c..ce4e07d3e8 100755
+--- a/tools/pygrub/src/pygrub
++++ b/tools/pygrub/src/pygrub
+@@ -18,7 +18,6 @@ import os, sys, string, struct, tempfile, re, traceback, stat, errno
+ import copy
+ import logging
+ import platform
+-import xen.lowlevel.xc
+
+ import curses, _curses, curses.textpad, curses.ascii
+ import getopt
+@@ -668,14 +667,6 @@ def run_grub(file, entry, fs, cfg_args):
+
+ return grubcfg
+
+-def supports64bitPVguest():
+- xc = xen.lowlevel.xc.xc()
+- caps = xc.xeninfo()['xen_caps'].split(" ")
+- for cap in caps:
+- if cap == "xen-3.0-x86_64":
+- return True
+- return False
+-
+ # If nothing has been specified, look for a Solaris domU. If found, perform the
+ # necessary tweaks.
+ def sniff_solaris(fs, cfg):
+@@ -684,8 +675,7 @@ def sniff_solaris(fs, cfg):
+ return cfg
+
+ if not cfg["kernel"]:
+- if supports64bitPVguest() and \
+- fs.file_exists("/platform/i86xpv/kernel/amd64/unix"):
++ if fs.file_exists("/platform/i86xpv/kernel/amd64/unix"):
+ cfg["kernel"] = "/platform/i86xpv/kernel/amd64/unix"
+ cfg["ramdisk"] = "/platform/i86pc/amd64/boot_archive"
+ elif fs.file_exists("/platform/i86xpv/kernel/unix"):
+--
+2.42.0
+
diff --git a/0048-tools-pygrub-Small-refactors.patch b/0048-tools-pygrub-Small-refactors.patch
new file mode 100644
index 0000000..55b238c
--- /dev/null
+++ b/0048-tools-pygrub-Small-refactors.patch
@@ -0,0 +1,65 @@
+From e7059f16f7c2b99fea30b9671fec74c0375eee8f Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Mon, 25 Sep 2023 18:32:22 +0100
+Subject: [PATCH 48/55] tools/pygrub: Small refactors
+
+Small tidy up to ensure output_directory always has a trailing '/' to ease
+concatenating paths and that `output` can only be a filename or None.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+(cherry picked from commit 9f2ff9a7c9b3ac734ae99f17f0134ed0343dcccf)
+---
+ tools/pygrub/src/pygrub | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
+index ce4e07d3e8..1042c05b86 100755
+--- a/tools/pygrub/src/pygrub
++++ b/tools/pygrub/src/pygrub
+@@ -793,7 +793,7 @@ if __name__ == "__main__":
+ debug = False
+ not_really = False
+ output_format = "sxp"
+- output_directory = "/var/run/xen/pygrub"
++ output_directory = "/var/run/xen/pygrub/"
+
+ # what was passed in
+ incfg = { "kernel": None, "ramdisk": None, "args": "" }
+@@ -815,7 +815,8 @@ if __name__ == "__main__":
+ usage()
+ sys.exit()
+ elif o in ("--output",):
+- output = a
++ if a != "-":
++ output = a
+ elif o in ("--kernel",):
+ incfg["kernel"] = a
+ elif o in ("--ramdisk",):
+@@ -847,12 +848,11 @@ if __name__ == "__main__":
+ if not os.path.isdir(a):
+ print("%s is not an existing directory" % a)
+ sys.exit(1)
+- output_directory = a
++ output_directory = a + '/'
+
+ if debug:
+ logging.basicConfig(level=logging.DEBUG)
+
+-
+ try:
+ os.makedirs(output_directory, 0o700)
+ except OSError as e:
+@@ -861,7 +861,7 @@ if __name__ == "__main__":
+ else:
+ raise
+
+- if output is None or output == "-":
++ if output is None:
+ fd = sys.stdout.fileno()
+ else:
+ fd = os.open(output, os.O_WRONLY)
+--
+2.42.0
+
diff --git a/0048-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch b/0048-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch
deleted file mode 100644
index 46c48de..0000000
--- a/0048-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch
+++ /dev/null
@@ -1,126 +0,0 @@
-From cdc23d47ad85e756540eaa8655ebc2a0445612ed Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 3 Mar 2023 07:55:54 +0100
-Subject: [PATCH 48/89] x86/cpuid: Infrastructure for leaves 7:1{ecx,edx}
-
-We don't actually need ecx yet, but adding it in now will reduce the amount to
-which leaf 7 is out of order in a featureset.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: b4a23bf6293aadecfd03bf9e83974443e2eac9cb
-master date: 2023-02-09 18:26:17 +0000
----
- tools/misc/xen-cpuid.c | 10 ++++++++++
- xen/arch/x86/cpu/common.c | 3 ++-
- xen/include/public/arch-x86/cpufeatureset.h | 4 ++++
- xen/include/xen/lib/x86/cpuid.h | 15 ++++++++++++++-
- 4 files changed, 30 insertions(+), 2 deletions(-)
-
-diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
-index d5833e9ce8..addb3a39a1 100644
---- a/tools/misc/xen-cpuid.c
-+++ b/tools/misc/xen-cpuid.c
-@@ -202,6 +202,14 @@ static const char *const str_7b1[32] =
- [ 0] = "ppin",
- };
-
-+static const char *const str_7c1[32] =
-+{
-+};
-+
-+static const char *const str_7d1[32] =
-+{
-+};
-+
- static const char *const str_7d2[32] =
- {
- [ 0] = "intel-psfd",
-@@ -229,6 +237,8 @@ static const struct {
- { "0x80000021.eax", "e21a", str_e21a },
- { "0x00000007:1.ebx", "7b1", str_7b1 },
- { "0x00000007:2.edx", "7d2", str_7d2 },
-+ { "0x00000007:1.ecx", "7c1", str_7c1 },
-+ { "0x00000007:1.edx", "7d1", str_7d1 },
- };
-
- #define COL_ALIGN "18"
-diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
-index 0412dbc915..b3fcf4680f 100644
---- a/xen/arch/x86/cpu/common.c
-+++ b/xen/arch/x86/cpu/common.c
-@@ -450,7 +450,8 @@ static void generic_identify(struct cpuinfo_x86 *c)
- cpuid_count(7, 1,
- &c->x86_capability[FEATURESET_7a1],
- &c->x86_capability[FEATURESET_7b1],
-- &tmp, &tmp);
-+ &c->x86_capability[FEATURESET_7c1],
-+ &c->x86_capability[FEATURESET_7d1]);
- if (max_subleaf >= 2)
- cpuid_count(7, 2,
- &tmp, &tmp, &tmp,
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index 7915f5826f..f43cdcd0f9 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -295,6 +295,10 @@ XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /* MSR_SPEC_CTRL.RRSBA_DIS_* */
- XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /* MSR_SPEC_CTRL.BHI_DIS_S */
- XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */
-
-+/* Intel-defined CPU features, CPUID level 0x00000007:1.ecx, word 14 */
-+
-+/* Intel-defined CPU features, CPUID level 0x00000007:1.edx, word 15 */
-+
- #endif /* XEN_CPUFEATURE */
-
- /* Clean up from a default include. Close the enum (for C). */
-diff --git a/xen/include/xen/lib/x86/cpuid.h b/xen/include/xen/lib/x86/cpuid.h
-index 73a5c33036..fa98b371ee 100644
---- a/xen/include/xen/lib/x86/cpuid.h
-+++ b/xen/include/xen/lib/x86/cpuid.h
-@@ -18,6 +18,8 @@
- #define FEATURESET_e21a 11 /* 0x80000021.eax */
- #define FEATURESET_7b1 12 /* 0x00000007:1.ebx */
- #define FEATURESET_7d2 13 /* 0x00000007:2.edx */
-+#define FEATURESET_7c1 14 /* 0x00000007:1.ecx */
-+#define FEATURESET_7d1 15 /* 0x00000007:1.edx */
-
- struct cpuid_leaf
- {
-@@ -194,7 +196,14 @@ struct cpuid_policy
- uint32_t _7b1;
- struct { DECL_BITFIELD(7b1); };
- };
-- uint32_t /* c */:32, /* d */:32;
-+ union {
-+ uint32_t _7c1;
-+ struct { DECL_BITFIELD(7c1); };
-+ };
-+ union {
-+ uint32_t _7d1;
-+ struct { DECL_BITFIELD(7d1); };
-+ };
-
- /* Subleaf 2. */
- uint32_t /* a */:32, /* b */:32, /* c */:32;
-@@ -343,6 +352,8 @@ static inline void cpuid_policy_to_featureset(
- fs[FEATURESET_e21a] = p->extd.e21a;
- fs[FEATURESET_7b1] = p->feat._7b1;
- fs[FEATURESET_7d2] = p->feat._7d2;
-+ fs[FEATURESET_7c1] = p->feat._7c1;
-+ fs[FEATURESET_7d1] = p->feat._7d1;
- }
-
- /* Fill in a CPUID policy from a featureset bitmap. */
-@@ -363,6 +374,8 @@ static inline void cpuid_featureset_to_policy(
- p->extd.e21a = fs[FEATURESET_e21a];
- p->feat._7b1 = fs[FEATURESET_7b1];
- p->feat._7d2 = fs[FEATURESET_7d2];
-+ p->feat._7c1 = fs[FEATURESET_7c1];
-+ p->feat._7d1 = fs[FEATURESET_7d1];
- }
-
- static inline uint64_t cpuid_policy_xcr0_max(const struct cpuid_policy *p)
---
-2.40.0
-
diff --git a/0049-tools-pygrub-Open-the-output-files-earlier.patch b/0049-tools-pygrub-Open-the-output-files-earlier.patch
new file mode 100644
index 0000000..c3b00b1
--- /dev/null
+++ b/0049-tools-pygrub-Open-the-output-files-earlier.patch
@@ -0,0 +1,105 @@
+From 37977420670c65db220349510599d3fe47600ad8 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Mon, 25 Sep 2023 18:32:23 +0100
+Subject: [PATCH 49/55] tools/pygrub: Open the output files earlier
+
+This patch allows pygrub to get ahold of every RW file descriptor it needs
+early on. A later patch will clamp the filesystem it can access so it can't
+obtain any others.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+(cherry picked from commit 0710d7d44586251bfca9758890616dc3d6de8a74)
+---
+ tools/pygrub/src/pygrub | 37 ++++++++++++++++++++++---------------
+ 1 file changed, 22 insertions(+), 15 deletions(-)
+
+diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
+index 1042c05b86..91e2ec2ab1 100755
+--- a/tools/pygrub/src/pygrub
++++ b/tools/pygrub/src/pygrub
+@@ -738,8 +738,7 @@ if __name__ == "__main__":
+ def usage():
+ print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--offset=] <image>" %(sys.argv[0],), file=sys.stderr)
+
+- def copy_from_image(fs, file_to_read, file_type, output_directory,
+- not_really):
++ def copy_from_image(fs, file_to_read, file_type, fd_dst, path_dst, not_really):
+ if not_really:
+ if fs.file_exists(file_to_read):
+ return "<%s:%s>" % (file_type, file_to_read)
+@@ -750,21 +749,18 @@ if __name__ == "__main__":
+ except Exception as e:
+ print(e, file=sys.stderr)
+ sys.exit("Error opening %s in guest" % file_to_read)
+- (tfd, ret) = tempfile.mkstemp(prefix="boot_"+file_type+".",
+- dir=output_directory)
+ dataoff = 0
+ while True:
+ data = datafile.read(FS_READ_MAX, dataoff)
+ if len(data) == 0:
+- os.close(tfd)
++ os.close(fd_dst)
+ del datafile
+- return ret
++ return
+ try:
+- os.write(tfd, data)
++ os.write(fd_dst, data)
+ except Exception as e:
+ print(e, file=sys.stderr)
+- os.close(tfd)
+- os.unlink(ret)
++ os.unlink(path_dst)
+ del datafile
+ sys.exit("Error writing temporary copy of "+file_type)
+ dataoff += len(data)
+@@ -861,6 +857,14 @@ if __name__ == "__main__":
+ else:
+ raise
+
++ if not_really:
++ fd_kernel = path_kernel = fd_ramdisk = path_ramdisk = None
++ else:
++ (fd_kernel, path_kernel) = tempfile.mkstemp(prefix="boot_kernel.",
++ dir=output_directory)
++ (fd_ramdisk, path_ramdisk) = tempfile.mkstemp(prefix="boot_ramdisk.",
++ dir=output_directory)
++
+ if output is None:
+ fd = sys.stdout.fileno()
+ else:
+@@ -920,20 +924,23 @@ if __name__ == "__main__":
+ if fs is None:
+ raise RuntimeError("Unable to find partition containing kernel")
+
+- bootcfg["kernel"] = copy_from_image(fs, chosencfg["kernel"], "kernel",
+- output_directory, not_really)
++ copy_from_image(fs, chosencfg["kernel"], "kernel",
++ fd_kernel, path_kernel, not_really)
++ bootcfg["kernel"] = path_kernel
+
+ if chosencfg["ramdisk"]:
+ try:
+- bootcfg["ramdisk"] = copy_from_image(fs, chosencfg["ramdisk"],
+- "ramdisk", output_directory,
+- not_really)
++ copy_from_image(fs, chosencfg["ramdisk"], "ramdisk",
++ fd_ramdisk, path_ramdisk, not_really)
+ except:
+ if not not_really:
+- os.unlink(bootcfg["kernel"])
++ os.unlink(path_kernel)
+ raise
++ bootcfg["ramdisk"] = path_ramdisk
+ else:
+ initrd = None
++ if not not_really:
++ os.unlink(path_ramdisk)
+
+ args = None
+ if chosencfg["args"]:
+--
+2.42.0
+
diff --git a/0049-x86-shskt-Disable-CET-SS-on-parts-susceptible-to-fra.patch b/0049-x86-shskt-Disable-CET-SS-on-parts-susceptible-to-fra.patch
deleted file mode 100644
index a34217e..0000000
--- a/0049-x86-shskt-Disable-CET-SS-on-parts-susceptible-to-fra.patch
+++ /dev/null
@@ -1,195 +0,0 @@
-From 8202b9cf84674c5b23a89c4b8722afbb9787f917 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 3 Mar 2023 07:56:16 +0100
-Subject: [PATCH 49/89] x86/shskt: Disable CET-SS on parts susceptible to
- fractured updates
-
-Refer to Intel SDM Rev 70 (Dec 2022), Vol3 17.2.3 "Supervisor Shadow Stack
-Token".
-
-Architecturally, an event delivery which starts in CPL<3 and switches shadow
-stack will first validate the Supervisor Shadow Stack Token (setting the busy
-bit), then pushes CS/LIP/SSP. One example of this is an NMI interrupting Xen.
-
-Some CPUs suffer from an issue called fracturing, whereby a fault/vmexit/etc
-between setting the busy bit and completing the event injection renders the
-action non-restartable, because when it comes time to restart, the busy bit is
-found to be already set.
-
-This is far more easily encountered under virt, yet it is not the fault of the
-hypervisor, nor the fault of the guest kernel. The fault lies somewhere
-between the architectural specification, and the uarch behaviour.
-
-Intel have allocated CPUID.7[1].ecx[18] CET_SSS to enumerate that supervisor
-shadow stacks are safe to use. Because of how Xen lays out its shadow stacks,
-fracturing is not expected to be a problem on native.
-
-Detect this case on boot and default to not using shstk if virtualised.
-Specifying `cet=shstk` on the command line will override this heuristic and
-enable shadow stacks irrespective.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 01e7477d1b081cff4288ff9f51ec59ee94c03ee0
-master date: 2023-02-09 18:26:17 +0000
----
- docs/misc/xen-command-line.pandoc | 7 +++-
- tools/libs/light/libxl_cpuid.c | 2 +
- tools/misc/xen-cpuid.c | 1 +
- xen/arch/x86/cpu/common.c | 11 ++++-
- xen/arch/x86/setup.c | 46 +++++++++++++++++----
- xen/include/public/arch-x86/cpufeatureset.h | 1 +
- 6 files changed, 57 insertions(+), 11 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index e7fe8b0cc9..807ca51fb2 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -287,10 +287,15 @@ can be maintained with the pv-shim mechanism.
- protection.
-
- The option is available when `CONFIG_XEN_SHSTK` is compiled in, and
-- defaults to `true` on hardware supporting CET-SS. Specifying
-+ generally defaults to `true` on hardware supporting CET-SS. Specifying
- `cet=no-shstk` will cause Xen not to use Shadow Stacks even when support
- is available in hardware.
-
-+ Some hardware suffers from an issue known as Supervisor Shadow Stack
-+ Fracturing. On such hardware, Xen will default to not using Shadow Stacks
-+ when virtualised. Specifying `cet=shstk` will override this heuristic and
-+ enable Shadow Stacks unilaterally.
-+
- * The `ibt=` boolean controls whether Xen uses Indirect Branch Tracking for
- its own protection.
-
-diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c
-index 2aa23225f4..d97a2f3338 100644
---- a/tools/libs/light/libxl_cpuid.c
-+++ b/tools/libs/light/libxl_cpuid.c
-@@ -235,6 +235,8 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str)
- {"fsrs", 0x00000007, 1, CPUID_REG_EAX, 11, 1},
- {"fsrcs", 0x00000007, 1, CPUID_REG_EAX, 12, 1},
-
-+ {"cet-sss", 0x00000007, 1, CPUID_REG_EDX, 18, 1},
-+
- {"intel-psfd", 0x00000007, 2, CPUID_REG_EDX, 0, 1},
- {"mcdt-no", 0x00000007, 2, CPUID_REG_EDX, 5, 1},
-
-diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
-index addb3a39a1..0248eaef44 100644
---- a/tools/misc/xen-cpuid.c
-+++ b/tools/misc/xen-cpuid.c
-@@ -208,6 +208,7 @@ static const char *const str_7c1[32] =
-
- static const char *const str_7d1[32] =
- {
-+ [18] = "cet-sss",
- };
-
- static const char *const str_7d2[32] =
-diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
-index b3fcf4680f..27f73d3bbe 100644
---- a/xen/arch/x86/cpu/common.c
-+++ b/xen/arch/x86/cpu/common.c
-@@ -346,11 +346,18 @@ void __init early_cpu_init(void)
- x86_cpuid_vendor_to_str(c->x86_vendor), c->x86, c->x86,
- c->x86_model, c->x86_model, c->x86_mask, eax);
-
-- if (c->cpuid_level >= 7)
-- cpuid_count(7, 0, &eax, &ebx,
-+ if (c->cpuid_level >= 7) {
-+ uint32_t max_subleaf;
-+
-+ cpuid_count(7, 0, &max_subleaf, &ebx,
- &c->x86_capability[FEATURESET_7c0],
- &c->x86_capability[FEATURESET_7d0]);
-
-+ if (max_subleaf >= 1)
-+ cpuid_count(7, 1, &eax, &ebx, &ecx,
-+ &c->x86_capability[FEATURESET_7d1]);
-+ }
-+
- eax = cpuid_eax(0x80000000);
- if ((eax >> 16) == 0x8000 && eax >= 0x80000008) {
- ebx = eax >= 0x8000001f ? cpuid_ebx(0x8000001f) : 0;
-diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
-index e05189f649..09c17b1016 100644
---- a/xen/arch/x86/setup.c
-+++ b/xen/arch/x86/setup.c
-@@ -95,11 +95,7 @@ unsigned long __initdata highmem_start;
- size_param("highmem-start", highmem_start);
- #endif
-
--#ifdef CONFIG_XEN_SHSTK
--static bool __initdata opt_xen_shstk = true;
--#else
--#define opt_xen_shstk false
--#endif
-+static int8_t __initdata opt_xen_shstk = -IS_ENABLED(CONFIG_XEN_SHSTK);
-
- #ifdef CONFIG_XEN_IBT
- static bool __initdata opt_xen_ibt = true;
-@@ -1104,11 +1100,45 @@ void __init noreturn __start_xen(unsigned long mbi_p)
- early_cpu_init();
-
- /* Choose shadow stack early, to set infrastructure up appropriately. */
-- if ( opt_xen_shstk && boot_cpu_has(X86_FEATURE_CET_SS) )
-+ if ( !boot_cpu_has(X86_FEATURE_CET_SS) )
-+ opt_xen_shstk = 0;
-+
-+ if ( opt_xen_shstk )
- {
-- printk("Enabling Supervisor Shadow Stacks\n");
-+ /*
-+ * Some CPUs suffer from Shadow Stack Fracturing, an issue whereby a
-+ * fault/VMExit/etc between setting a Supervisor Busy bit and the
-+ * event delivery completing renders the operation non-restartable.
-+ * On restart, event delivery will find the Busy bit already set.
-+ *
-+ * This is a problem on bare metal, but outside of synthetic cases or
-+ * a very badly timed #MC, it's not believed to be a problem. It is a
-+ * much bigger problem under virt, because we can VMExit for a number
-+ * of legitimate reasons and tickle this bug.
-+ *
-+ * CPUs with this addressed enumerate CET-SSS to indicate that
-+ * supervisor shadow stacks are now safe to use.
-+ */
-+ bool cpu_has_bug_shstk_fracture =
-+ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-+ !boot_cpu_has(X86_FEATURE_CET_SSS);
-
-- setup_force_cpu_cap(X86_FEATURE_XEN_SHSTK);
-+ /*
-+ * On bare metal, assume that Xen won't be impacted by shstk
-+ * fracturing problems. Under virt, be more conservative and disable
-+ * shstk by default.
-+ */
-+ if ( opt_xen_shstk == -1 )
-+ opt_xen_shstk =
-+ cpu_has_hypervisor ? !cpu_has_bug_shstk_fracture
-+ : true;
-+
-+ if ( opt_xen_shstk )
-+ {
-+ printk("Enabling Supervisor Shadow Stacks\n");
-+
-+ setup_force_cpu_cap(X86_FEATURE_XEN_SHSTK);
-+ }
- }
-
- if ( opt_xen_ibt && boot_cpu_has(X86_FEATURE_CET_IBT) )
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index f43cdcd0f9..08600cfdc7 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -298,6 +298,7 @@ XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */
- /* Intel-defined CPU features, CPUID level 0x00000007:1.ecx, word 14 */
-
- /* Intel-defined CPU features, CPUID level 0x00000007:1.edx, word 15 */
-+XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks safe to use */
-
- #endif /* XEN_CPUFEATURE */
-
---
-2.40.0
-
diff --git a/0050-credit2-respect-credit2_runqueue-all-when-arranging-.patch b/0050-credit2-respect-credit2_runqueue-all-when-arranging-.patch
deleted file mode 100644
index 0444aa9..0000000
--- a/0050-credit2-respect-credit2_runqueue-all-when-arranging-.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-From 74b76704fd4059e9133e84c1384501858e9663b7 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
- <marmarek@invisiblethingslab.com>
-Date: Fri, 3 Mar 2023 07:57:39 +0100
-Subject: [PATCH 50/89] credit2: respect credit2_runqueue=all when arranging
- runqueues
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Documentation for credit2_runqueue=all says it should create one queue
-for all pCPUs on the host. But since introduction
-sched_credit2_max_cpus_runqueue, it actually created separate runqueue
-per socket, even if the CPUs count is below
-sched_credit2_max_cpus_runqueue.
-
-Adjust the condition to skip syblink check in case of
-credit2_runqueue=all.
-
-Fixes: 8e2aa76dc167 ("xen: credit2: limit the max number of CPUs in a runqueue")
-Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-master commit: 1f5747ee929fbbcae58d7234c6c38a77495d0cfe
-master date: 2023-02-15 16:12:42 +0100
----
- docs/misc/xen-command-line.pandoc | 5 +++++
- xen/common/sched/credit2.c | 9 +++++++--
- 2 files changed, 12 insertions(+), 2 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 807ca51fb2..5be5ce10c6 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -726,6 +726,11 @@ Available alternatives, with their meaning, are:
- * `all`: just one runqueue shared by all the logical pCPUs of
- the host
-
-+Regardless of the above choice, Xen attempts to respect
-+`sched_credit2_max_cpus_runqueue` limit, which may mean more than one runqueue
-+for the `all` value. If that isn't intended, raise
-+the `sched_credit2_max_cpus_runqueue` value.
-+
- ### dbgp
- > `= ehci[ <integer> | @pci<bus>:<slot>.<func> ]`
- > `= xhci[ <integer> | @pci<bus>:<slot>.<func> ][,share=<bool>|hwdom]`
-diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
-index 0e3f89e537..ae55feea34 100644
---- a/xen/common/sched/credit2.c
-+++ b/xen/common/sched/credit2.c
-@@ -996,9 +996,14 @@ cpu_add_to_runqueue(const struct scheduler *ops, unsigned int cpu)
- *
- * Otherwise, let's try to make sure that siblings stay in the
- * same runqueue, pretty much under any cinrcumnstances.
-+ *
-+ * Furthermore, try to respect credit2_runqueue=all, as long as
-+ * max_cpus_runq isn't violated.
- */
-- if ( rqd->refcnt < max_cpus_runq && (ops->cpupool->gran != SCHED_GRAN_cpu ||
-- cpu_runqueue_siblings_match(rqd, cpu, max_cpus_runq)) )
-+ if ( rqd->refcnt < max_cpus_runq &&
-+ (ops->cpupool->gran != SCHED_GRAN_cpu ||
-+ cpu_runqueue_siblings_match(rqd, cpu, max_cpus_runq) ||
-+ opt_runqueue == OPT_RUNQUEUE_ALL) )
- {
- /*
- * This runqueue is ok, but as we said, we also want an even
---
-2.40.0
-
diff --git a/0050-tools-libfsimage-Export-a-new-function-to-preload-al.patch b/0050-tools-libfsimage-Export-a-new-function-to-preload-al.patch
new file mode 100644
index 0000000..949528d
--- /dev/null
+++ b/0050-tools-libfsimage-Export-a-new-function-to-preload-al.patch
@@ -0,0 +1,126 @@
+From 8ee19246ad2c1d0ce241a52683f56b144a4f0b0e Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Mon, 25 Sep 2023 18:32:24 +0100
+Subject: [PATCH 50/55] tools/libfsimage: Export a new function to preload all
+ plugins
+
+This is work required in order to let pygrub operate in highly deprivileged
+chroot mode. This patch adds a function that preloads every plugin, hence
+ensuring that a on function exit, every shared library is loaded in memory.
+
+The new "init" function is supposed to be used before depriv, but that's
+fine because it's not acting on untrusted data.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+(cherry picked from commit 990e65c3ad9ac08642ce62a92852c80be6c83e96)
+---
+ tools/libfsimage/common/fsimage_plugin.c | 4 ++--
+ tools/libfsimage/common/mapfile-GNU | 1 +
+ tools/libfsimage/common/mapfile-SunOS | 1 +
+ tools/libfsimage/common/xenfsimage.h | 8 ++++++++
+ tools/pygrub/src/fsimage/fsimage.c | 15 +++++++++++++++
+ 5 files changed, 27 insertions(+), 2 deletions(-)
+
+diff --git a/tools/libfsimage/common/fsimage_plugin.c b/tools/libfsimage/common/fsimage_plugin.c
+index de1412b423..d0cb9e96a6 100644
+--- a/tools/libfsimage/common/fsimage_plugin.c
++++ b/tools/libfsimage/common/fsimage_plugin.c
+@@ -119,7 +119,7 @@ fail:
+ return (-1);
+ }
+
+-static int load_plugins(void)
++int fsi_init(void)
+ {
+ const char *fsdir = getenv("XEN_FSIMAGE_FSDIR");
+ struct dirent *dp = NULL;
+@@ -180,7 +180,7 @@ int find_plugin(fsi_t *fsi, const char *path, const char *options)
+ fsi_plugin_t *fp;
+ int ret = 0;
+
+- if (plugins == NULL && (ret = load_plugins()) != 0)
++ if (plugins == NULL && (ret = fsi_init()) != 0)
+ goto out;
+
+ for (fp = plugins; fp != NULL; fp = fp->fp_next) {
+diff --git a/tools/libfsimage/common/mapfile-GNU b/tools/libfsimage/common/mapfile-GNU
+index 26d4d7a69e..2d54d527d7 100644
+--- a/tools/libfsimage/common/mapfile-GNU
++++ b/tools/libfsimage/common/mapfile-GNU
+@@ -1,6 +1,7 @@
+ VERSION {
+ libfsimage.so.1.0 {
+ global:
++ fsi_init;
+ fsi_open_fsimage;
+ fsi_close_fsimage;
+ fsi_file_exists;
+diff --git a/tools/libfsimage/common/mapfile-SunOS b/tools/libfsimage/common/mapfile-SunOS
+index e99b90b650..48deedb425 100644
+--- a/tools/libfsimage/common/mapfile-SunOS
++++ b/tools/libfsimage/common/mapfile-SunOS
+@@ -1,5 +1,6 @@
+ libfsimage.so.1.0 {
+ global:
++ fsi_init;
+ fsi_open_fsimage;
+ fsi_close_fsimage;
+ fsi_file_exists;
+diff --git a/tools/libfsimage/common/xenfsimage.h b/tools/libfsimage/common/xenfsimage.h
+index 201abd54f2..341883b2d7 100644
+--- a/tools/libfsimage/common/xenfsimage.h
++++ b/tools/libfsimage/common/xenfsimage.h
+@@ -35,6 +35,14 @@ extern C {
+ typedef struct fsi fsi_t;
+ typedef struct fsi_file fsi_file_t;
+
++/*
++ * Optional initialization function. If invoked it loads the associated
++ * dynamic libraries for the backends ahead of time. This is required if
++ * the library is to run as part of a highly deprivileged executable, as
++ * the libraries may not be reachable after depriv.
++ */
++int fsi_init(void);
++
+ fsi_t *fsi_open_fsimage(const char *, uint64_t, const char *);
+ void fsi_close_fsimage(fsi_t *);
+
+diff --git a/tools/pygrub/src/fsimage/fsimage.c b/tools/pygrub/src/fsimage/fsimage.c
+index 2ebbbe35df..92fbf2851f 100644
+--- a/tools/pygrub/src/fsimage/fsimage.c
++++ b/tools/pygrub/src/fsimage/fsimage.c
+@@ -286,6 +286,15 @@ fsimage_getbootstring(PyObject *o, PyObject *args)
+ return Py_BuildValue("s", bootstring);
+ }
+
++static PyObject *
++fsimage_init(PyObject *o, PyObject *args)
++{
++ if (!PyArg_ParseTuple(args, ""))
++ return (NULL);
++
++ return Py_BuildValue("i", fsi_init());
++}
++
+ PyDoc_STRVAR(fsimage_open__doc__,
+ "open(name, [offset=off]) - Open the given file as a filesystem image.\n"
+ "\n"
+@@ -297,7 +306,13 @@ PyDoc_STRVAR(fsimage_getbootstring__doc__,
+ "getbootstring(fs) - Return the boot string needed for this file system "
+ "or NULL if none is needed.\n");
+
++PyDoc_STRVAR(fsimage_init__doc__,
++ "init() - Loads every dynamic library contained in xenfsimage "
++ "into memory so that it can be used in chrooted environments.\n");
++
+ static struct PyMethodDef fsimage_module_methods[] = {
++ { "init", (PyCFunction)fsimage_init,
++ METH_VARARGS, fsimage_init__doc__ },
+ { "open", (PyCFunction)fsimage_open,
+ METH_VARARGS|METH_KEYWORDS, fsimage_open__doc__ },
+ { "getbootstring", (PyCFunction)fsimage_getbootstring,
+--
+2.42.0
+
diff --git a/0051-build-make-FILE-symbol-paths-consistent.patch b/0051-build-make-FILE-symbol-paths-consistent.patch
deleted file mode 100644
index 47528c2..0000000
--- a/0051-build-make-FILE-symbol-paths-consistent.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 46c104cce0bf340193cb1eacaee5dcd75e264c8f Mon Sep 17 00:00:00 2001
-From: Ross Lagerwall <ross.lagerwall@citrix.com>
-Date: Fri, 3 Mar 2023 07:58:12 +0100
-Subject: [PATCH 51/89] build: make FILE symbol paths consistent
-
-The FILE symbols in out-of-tree builds may be either a relative path to
-the object dir or an absolute path depending on how the build is
-invoked. Fix the paths for C files so that they are consistent with
-in-tree builds - the path is relative to the "xen" directory (e.g.
-common/irq.c).
-
-This fixes livepatch builds when the original Xen build was out-of-tree
-since livepatch-build always does in-tree builds. Note that this doesn't
-fix the behaviour for Clang < 6 which always embeds full paths.
-
-Fixes: 7115fa562fe7 ("build: adding out-of-tree support to the xen build")
-Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 5b9bb91abba7c983def3b4bef71ab08ad360a242
-master date: 2023-02-15 16:13:49 +0100
----
- xen/Rules.mk | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/xen/Rules.mk b/xen/Rules.mk
-index 70b7489ea8..d6b7cec0a8 100644
---- a/xen/Rules.mk
-+++ b/xen/Rules.mk
-@@ -228,8 +228,9 @@ quiet_cmd_cc_o_c = CC $@
- ifeq ($(CONFIG_ENFORCE_UNIQUE_SYMBOLS),y)
- cmd_cc_o_c = $(CC) $(c_flags) -c $< -o $(dot-target).tmp -MQ $@
- ifneq ($(CONFIG_CC_IS_CLANG)$(call clang-ifversion,-lt,600,y),yy)
-+ rel-path = $(patsubst $(abs_srctree)/%,%,$(call realpath,$(1)))
- cmd_objcopy_fix_sym = \
-- $(OBJCOPY) --redefine-sym $(<F)=$< $(dot-target).tmp $@ && rm -f $(dot-target).tmp
-+ $(OBJCOPY) --redefine-sym $(<F)=$(call rel-path,$<) $(dot-target).tmp $@ && rm -f $(dot-target).tmp
- else
- cmd_objcopy_fix_sym = mv -f $(dot-target).tmp $@
- endif
---
-2.40.0
-
diff --git a/0051-tools-pygrub-Deprivilege-pygrub.patch b/0051-tools-pygrub-Deprivilege-pygrub.patch
new file mode 100644
index 0000000..1d89191
--- /dev/null
+++ b/0051-tools-pygrub-Deprivilege-pygrub.patch
@@ -0,0 +1,307 @@
+From f5e211654e5fbb7f1fc5cfea7f9c7ab525edb9e7 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Mon, 25 Sep 2023 18:32:25 +0100
+Subject: [PATCH 51/55] tools/pygrub: Deprivilege pygrub
+
+Introduce a --runas=<uid> flag to deprivilege pygrub on Linux and *BSDs. It
+also implicitly creates a chroot env where it drops a deprivileged forked
+process. The chroot itself is cleaned up at the end.
+
+If the --runas arg is present, then pygrub forks, leaving the child to
+deprivilege itself, and waiting for it to complete. When the child exists,
+the parent performs cleanup and exits with the same error code.
+
+This is roughly what the child does:
+ 1. Initialize libfsimage (this loads every .so in memory so the chroot
+ can avoid bind-mounting /{,usr}/lib*
+ 2. Create a temporary empty chroot directory
+ 3. Mount tmpfs in it
+ 4. Bind mount the disk inside, because libfsimage expects a path, not a
+ file descriptor.
+ 5. Remount the root tmpfs to be stricter (ro,nosuid,nodev)
+ 6. Set RLIMIT_FSIZE to a sensibly high amount (128 MiB)
+ 7. Depriv gid, groups and uid
+
+With this scheme in place, the "output" files are writable (up to
+RLIMIT_FSIZE octets) and the exposed filesystem is immutable and contains
+the single only file we can't easily get rid of (the disk).
+
+If running on Linux, the child process also unshares mount, IPC, and
+network namespaces before dropping its privileges.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+(cherry picked from commit e0342ae5556f2b6e2db50701b8a0679a45822ca6)
+---
+ tools/pygrub/setup.py | 2 +-
+ tools/pygrub/src/pygrub | 162 +++++++++++++++++++++++++++++++++++++---
+ 2 files changed, 154 insertions(+), 10 deletions(-)
+
+diff --git a/tools/pygrub/setup.py b/tools/pygrub/setup.py
+index 0e4e3d02d3..06b96733d0 100644
+--- a/tools/pygrub/setup.py
++++ b/tools/pygrub/setup.py
+@@ -17,7 +17,7 @@ xenfsimage = Extension("xenfsimage",
+ pkgs = [ 'grub' ]
+
+ setup(name='pygrub',
+- version='0.6',
++ version='0.7',
+ description='Boot loader that looks a lot like grub for Xen',
+ author='Jeremy Katz',
+ author_email='katzj@redhat.com',
+diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
+index 91e2ec2ab1..7cea496ade 100755
+--- a/tools/pygrub/src/pygrub
++++ b/tools/pygrub/src/pygrub
+@@ -16,8 +16,11 @@ from __future__ import print_function
+
+ import os, sys, string, struct, tempfile, re, traceback, stat, errno
+ import copy
++import ctypes, ctypes.util
+ import logging
+ import platform
++import resource
++import subprocess
+
+ import curses, _curses, curses.textpad, curses.ascii
+ import getopt
+@@ -27,10 +30,135 @@ import grub.GrubConf
+ import grub.LiloConf
+ import grub.ExtLinuxConf
+
+-PYGRUB_VER = 0.6
++PYGRUB_VER = 0.7
+ FS_READ_MAX = 1024 * 1024
+ SECTOR_SIZE = 512
+
++# Unless provided through the env variable PYGRUB_MAX_FILE_SIZE_MB, then
++# this is the maximum filesize allowed for files written by the depriv
++# pygrub
++LIMIT_FSIZE = 128 << 20
++
++CLONE_NEWNS = 0x00020000 # mount namespace
++CLONE_NEWNET = 0x40000000 # network namespace
++CLONE_NEWIPC = 0x08000000 # IPC namespace
++
++def unshare(flags):
++ if not sys.platform.startswith("linux"):
++ print("skip_unshare reason=not_linux platform=%s", sys.platform, file=sys.stderr)
++ return
++
++ libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
++ unshare_prototype = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, use_errno=True)
++ unshare = unshare_prototype(('unshare', libc))
++
++ if unshare(flags) < 0:
++ raise OSError(ctypes.get_errno(), os.strerror(ctypes.get_errno()))
++
++def bind_mount(src, dst, options):
++ open(dst, "a").close() # touch
++
++ rc = subprocess.call(["mount", "--bind", "-o", options, src, dst])
++ if rc != 0:
++ raise RuntimeError("bad_mount: src=%s dst=%s opts=%s" %
++ (src, dst, options))
++
++def downgrade_rlimits():
++ # Wipe the authority to use unrequired resources
++ resource.setrlimit(resource.RLIMIT_NPROC, (0, 0))
++ resource.setrlimit(resource.RLIMIT_CORE, (0, 0))
++ resource.setrlimit(resource.RLIMIT_MEMLOCK, (0, 0))
++
++ # py2's resource module doesn't know about resource.RLIMIT_MSGQUEUE
++ #
++ # TODO: Use resource.RLIMIT_MSGQUEUE after python2 is deprecated
++ if sys.platform.startswith('linux'):
++ RLIMIT_MSGQUEUE = 12
++ resource.setrlimit(RLIMIT_MSGQUEUE, (0, 0))
++
++ # The final look of the filesystem for this process is fully RO, but
++ # note we have some file descriptor already open (notably, kernel and
++ # ramdisk). In order to avoid a compromised pygrub from filling up the
++ # filesystem we set RLIMIT_FSIZE to a high bound, so that the file
++ # write permissions are bound.
++ fsize = LIMIT_FSIZE
++ if "PYGRUB_MAX_FILE_SIZE_MB" in os.environ.keys():
++ fsize = os.environ["PYGRUB_MAX_FILE_SIZE_MB"] << 20
++
++ resource.setrlimit(resource.RLIMIT_FSIZE, (fsize, fsize))
++
++def depriv(output_directory, output, device, uid, path_kernel, path_ramdisk):
++ # The only point of this call is to force the loading of libfsimage.
++ # That way, we don't need to bind-mount it into the chroot
++ rc = xenfsimage.init()
++ if rc != 0:
++ os.unlink(path_ramdisk)
++ os.unlink(path_kernel)
++ raise RuntimeError("bad_xenfsimage: rc=%d" % rc)
++
++ # Create a temporary directory for the chroot
++ chroot = tempfile.mkdtemp(prefix=str(uid)+'-', dir=output_directory) + '/'
++ device_path = '/device'
++
++ pid = os.fork()
++ if pid:
++ # parent
++ _, rc = os.waitpid(pid, 0)
++
++ for path in [path_kernel, path_ramdisk]:
++ # If the child didn't write anything, just get rid of it,
++ # otherwise we end up consuming a 0-size file when parsing
++ # systems without a ramdisk that the ultimate caller of pygrub
++ # may just be unaware of
++ if rc != 0 or os.path.getsize(path) == 0:
++ os.unlink(path)
++
++ # Normally, unshare(CLONE_NEWNS) will ensure this is not required.
++ # However, this syscall doesn't exist in *BSD systems and doesn't
++ # auto-unmount everything on older Linux kernels (At least as of
++ # Linux 4.19, but it seems fixed in 5.15). Either way,
++ # recursively unmount everything if needed. Quietly.
++ with open('/dev/null', 'w') as devnull:
++ subprocess.call(["umount", "-f", chroot + device_path],
++ stdout=devnull, stderr=devnull)
++ subprocess.call(["umount", "-f", chroot],
++ stdout=devnull, stderr=devnull)
++ os.rmdir(chroot)
++
++ sys.exit(rc)
++
++ # By unsharing the namespace we're making sure it's all bulk-released
++ # at the end, when the namespaces disappear. This means the kernel does
++ # (almost) all the cleanup for us and the parent just has to remove the
++ # temporary directory.
++ unshare(CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWNET)
++
++ # Set sensible limits using the setrlimit interface
++ downgrade_rlimits()
++
++ # We'll mount tmpfs on the chroot to ensure the deprivileged child
++ # cannot affect the persistent state. It's RW now in order to
++ # bind-mount the device, but note it's remounted RO after that.
++ rc = subprocess.call(["mount", "-t", "tmpfs", "none", chroot])
++ if rc != 0:
++ raise RuntimeError("mount_tmpfs rc=%d dst=\"%s\"" % (rc, chroot))
++
++ # Bind the untrusted device RO
++ bind_mount(device, chroot + device_path, "ro,nosuid,noexec")
++
++ rc = subprocess.call(["mount", "-t", "tmpfs", "-o", "remount,ro,nosuid,noexec,nodev", "none", chroot])
++ if rc != 0:
++ raise RuntimeError("remount_tmpfs rc=%d dst=\"%s\"" % (rc, chroot))
++
++ # Drop superpowers!
++ os.chroot(chroot)
++ os.chdir('/')
++ os.setgid(uid)
++ os.setgroups([uid])
++ os.setuid(uid)
++
++ return device_path
++
+ def read_size_roundup(fd, size):
+ if platform.system() != 'FreeBSD':
+ return size
+@@ -736,7 +864,7 @@ if __name__ == "__main__":
+ sel = None
+
+ def usage():
+- print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--offset=] <image>" %(sys.argv[0],), file=sys.stderr)
++ print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--runas=] [--offset=] <image>" %(sys.argv[0],), file=sys.stderr)
+
+ def copy_from_image(fs, file_to_read, file_type, fd_dst, path_dst, not_really):
+ if not_really:
+@@ -760,7 +888,8 @@ if __name__ == "__main__":
+ os.write(fd_dst, data)
+ except Exception as e:
+ print(e, file=sys.stderr)
+- os.unlink(path_dst)
++ if path_dst:
++ os.unlink(path_dst)
+ del datafile
+ sys.exit("Error writing temporary copy of "+file_type)
+ dataoff += len(data)
+@@ -769,7 +898,7 @@ if __name__ == "__main__":
+ opts, args = getopt.gnu_getopt(sys.argv[1:], 'qilnh::',
+ ["quiet", "interactive", "list-entries", "not-really", "help",
+ "output=", "output-format=", "output-directory=", "offset=",
+- "entry=", "kernel=",
++ "runas=", "entry=", "kernel=",
+ "ramdisk=", "args=", "isconfig", "debug"])
+ except getopt.GetoptError:
+ usage()
+@@ -790,6 +919,7 @@ if __name__ == "__main__":
+ not_really = False
+ output_format = "sxp"
+ output_directory = "/var/run/xen/pygrub/"
++ uid = None
+
+ # what was passed in
+ incfg = { "kernel": None, "ramdisk": None, "args": "" }
+@@ -813,6 +943,13 @@ if __name__ == "__main__":
+ elif o in ("--output",):
+ if a != "-":
+ output = a
++ elif o in ("--runas",):
++ try:
++ uid = int(a)
++ except ValueError:
++ print("runas value must be an integer user id")
++ usage()
++ sys.exit(1)
+ elif o in ("--kernel",):
+ incfg["kernel"] = a
+ elif o in ("--ramdisk",):
+@@ -849,6 +986,10 @@ if __name__ == "__main__":
+ if debug:
+ logging.basicConfig(level=logging.DEBUG)
+
++ if interactive and uid:
++ print("In order to use --runas, you must also set --entry or -q", file=sys.stderr)
++ sys.exit(1)
++
+ try:
+ os.makedirs(output_directory, 0o700)
+ except OSError as e:
+@@ -870,6 +1011,9 @@ if __name__ == "__main__":
+ else:
+ fd = os.open(output, os.O_WRONLY)
+
++ if uid:
++ file = depriv(output_directory, output, file, uid, path_kernel, path_ramdisk)
++
+ # debug
+ if isconfig:
+ chosencfg = run_grub(file, entry, fs, incfg["args"])
+@@ -925,21 +1069,21 @@ if __name__ == "__main__":
+ raise RuntimeError("Unable to find partition containing kernel")
+
+ copy_from_image(fs, chosencfg["kernel"], "kernel",
+- fd_kernel, path_kernel, not_really)
++ fd_kernel, None if uid else path_kernel, not_really)
+ bootcfg["kernel"] = path_kernel
+
+ if chosencfg["ramdisk"]:
+ try:
+ copy_from_image(fs, chosencfg["ramdisk"], "ramdisk",
+- fd_ramdisk, path_ramdisk, not_really)
++ fd_ramdisk, None if uid else path_ramdisk, not_really)
+ except:
+- if not not_really:
+- os.unlink(path_kernel)
++ if not uid and not not_really:
++ os.unlink(path_kernel)
+ raise
+ bootcfg["ramdisk"] = path_ramdisk
+ else:
+ initrd = None
+- if not not_really:
++ if not uid and not not_really:
+ os.unlink(path_ramdisk)
+
+ args = None
+--
+2.42.0
+
diff --git a/0052-libxl-add-support-for-running-bootloader-in-restrict.patch b/0052-libxl-add-support-for-running-bootloader-in-restrict.patch
new file mode 100644
index 0000000..08691b9
--- /dev/null
+++ b/0052-libxl-add-support-for-running-bootloader-in-restrict.patch
@@ -0,0 +1,251 @@
+From 42bf49d74b711ca7fef37bcde12928220c8e9700 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau@citrix.com>
+Date: Mon, 25 Sep 2023 14:30:20 +0200
+Subject: [PATCH 52/55] libxl: add support for running bootloader in restricted
+ mode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Much like the device model depriv mode, add the same kind of support for the
+bootloader. Such feature allows passing a UID as a parameter for the
+bootloader to run as, together with the bootloader itself taking the necessary
+actions to isolate.
+
+Note that the user to run the bootloader as must have the right permissions to
+access the guest disk image (in read mode only), and that the bootloader will
+be run in non-interactive mode when restricted.
+
+If enabled bootloader restrict mode will attempt to re-use the user(s) from the
+QEMU depriv implementation if no user is provided on the configuration file or
+the environment. See docs/features/qemu-deprivilege.pandoc for more
+information about how to setup those users.
+
+Bootloader restrict mode is not enabled by default as it requires certain
+setup to be done first (setup of the user(s) to use in restrict mode).
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+(cherry picked from commit 1f762642d2cad1a40634e3280361928109d902f1)
+---
+ docs/man/xl.1.pod.in | 33 +++++++++++
+ tools/libs/light/libxl_bootloader.c | 89 ++++++++++++++++++++++++++++-
+ tools/libs/light/libxl_dm.c | 8 +--
+ tools/libs/light/libxl_internal.h | 8 +++
+ 4 files changed, 131 insertions(+), 7 deletions(-)
+
+diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in
+index 101e14241d..4831e12242 100644
+--- a/docs/man/xl.1.pod.in
++++ b/docs/man/xl.1.pod.in
+@@ -1957,6 +1957,39 @@ ignored:
+
+ =back
+
++=head1 ENVIRONMENT VARIABLES
++
++The following environment variables shall affect the execution of xl:
++
++=over 4
++
++=item LIBXL_BOOTLOADER_RESTRICT
++
++Attempt to restrict the bootloader after startup, to limit the
++consequences of security vulnerabilities due to parsing guest
++owned image files.
++
++See docs/features/qemu-deprivilege.pandoc for more information
++on how to setup the unprivileged users.
++
++Note that running the bootloader in restricted mode also implies using
++non-interactive mode, and the disk image must be readable by the
++restricted user.
++
++Having this variable set is equivalent to enabling the option, even if the
++value is 0.
++
++=item LIBXL_BOOTLOADER_USER
++
++When using bootloader_restrict, run the bootloader as this user. If
++not set the default QEMU restrict users will be used.
++
++NOTE: Each domain MUST have a SEPARATE username.
++
++See docs/features/qemu-deprivilege.pandoc for more information.
++
++=back
++
+ =head1 SEE ALSO
+
+ The following man pages:
+diff --git a/tools/libs/light/libxl_bootloader.c b/tools/libs/light/libxl_bootloader.c
+index 108329b4a5..23c0ef3e89 100644
+--- a/tools/libs/light/libxl_bootloader.c
++++ b/tools/libs/light/libxl_bootloader.c
+@@ -14,6 +14,7 @@
+
+ #include "libxl_osdeps.h" /* must come before any other headers */
+
++#include <pwd.h>
+ #include <termios.h>
+ #ifdef HAVE_UTMP_H
+ #include <utmp.h>
+@@ -42,8 +43,71 @@ static void bootloader_arg(libxl__bootloader_state *bl, const char *arg)
+ bl->args[bl->nargs++] = arg;
+ }
+
+-static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl,
+- const char *bootloader_path)
++static int bootloader_uid(libxl__gc *gc, domid_t guest_domid,
++ const char *user, uid_t *intended_uid)
++{
++ struct passwd *user_base, user_pwbuf;
++ int rc;
++
++ if (user) {
++ rc = userlookup_helper_getpwnam(gc, user, &user_pwbuf, &user_base);
++ if (rc) return rc;
++
++ if (!user_base) {
++ LOGD(ERROR, guest_domid, "Couldn't find user %s", user);
++ return ERROR_INVAL;
++ }
++
++ *intended_uid = user_base->pw_uid;
++ return 0;
++ }
++
++ /* Re-use QEMU user range for the bootloader. */
++ rc = userlookup_helper_getpwnam(gc, LIBXL_QEMU_USER_RANGE_BASE,
++ &user_pwbuf, &user_base);
++ if (rc) return rc;
++
++ if (user_base) {
++ struct passwd *user_clash, user_clash_pwbuf;
++ uid_t temp_uid = user_base->pw_uid + guest_domid;
++
++ rc = userlookup_helper_getpwuid(gc, temp_uid, &user_clash_pwbuf,
++ &user_clash);
++ if (rc) return rc;
++
++ if (user_clash) {
++ LOGD(ERROR, guest_domid,
++ "wanted to use uid %ld (%s + %d) but that is user %s !",
++ (long)temp_uid, LIBXL_QEMU_USER_RANGE_BASE,
++ guest_domid, user_clash->pw_name);
++ return ERROR_INVAL;
++ }
++
++ *intended_uid = temp_uid;
++ return 0;
++ }
++
++ rc = userlookup_helper_getpwnam(gc, LIBXL_QEMU_USER_SHARED, &user_pwbuf,
++ &user_base);
++ if (rc) return rc;
++
++ if (user_base) {
++ LOGD(WARN, guest_domid, "Could not find user %s, falling back to %s",
++ LIBXL_QEMU_USER_RANGE_BASE, LIBXL_QEMU_USER_SHARED);
++ *intended_uid = user_base->pw_uid;
++
++ return 0;
++ }
++
++ LOGD(ERROR, guest_domid,
++ "Could not find user %s or range base pseudo-user %s, cannot restrict",
++ LIBXL_QEMU_USER_SHARED, LIBXL_QEMU_USER_RANGE_BASE);
++
++ return ERROR_INVAL;
++}
++
++static int make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl,
++ const char *bootloader_path)
+ {
+ const libxl_domain_build_info *info = bl->info;
+
+@@ -61,6 +125,23 @@ static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl,
+ ARG(GCSPRINTF("--ramdisk=%s", info->ramdisk));
+ if (info->cmdline && *info->cmdline != '\0')
+ ARG(GCSPRINTF("--args=%s", info->cmdline));
++ if (getenv("LIBXL_BOOTLOADER_RESTRICT") ||
++ getenv("LIBXL_BOOTLOADER_USER")) {
++ uid_t uid = -1;
++ int rc = bootloader_uid(gc, bl->domid, getenv("LIBXL_BOOTLOADER_USER"),
++ &uid);
++
++ if (rc) return rc;
++
++ assert(uid != -1);
++ if (!uid) {
++ LOGD(ERROR, bl->domid, "bootloader restrict UID is 0 (root)!");
++ return ERROR_INVAL;
++ }
++ LOGD(DEBUG, bl->domid, "using uid %ld", (long)uid);
++ ARG(GCSPRINTF("--runas=%ld", (long)uid));
++ ARG("--quiet");
++ }
+
+ ARG(GCSPRINTF("--output=%s", bl->outputpath));
+ ARG("--output-format=simple0");
+@@ -79,6 +160,7 @@ static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl,
+ /* Sentinel for execv */
+ ARG(NULL);
+
++ return 0;
+ #undef ARG
+ }
+
+@@ -443,7 +525,8 @@ static void bootloader_disk_attached_cb(libxl__egc *egc,
+ bootloader = bltmp;
+ }
+
+- make_bootloader_args(gc, bl, bootloader);
++ rc = make_bootloader_args(gc, bl, bootloader);
++ if (rc) goto out;
+
+ bl->openpty.ao = ao;
+ bl->openpty.callback = bootloader_gotptys;
+diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
+index fc264a3a13..14b593110f 100644
+--- a/tools/libs/light/libxl_dm.c
++++ b/tools/libs/light/libxl_dm.c
+@@ -80,10 +80,10 @@ static int libxl__create_qemu_logfile(libxl__gc *gc, char *name)
+ * On error, return a libxl-style error code.
+ */
+ #define DEFINE_USERLOOKUP_HELPER(NAME,SPEC_TYPE,STRUCTNAME,SYSCONF) \
+- static int userlookup_helper_##NAME(libxl__gc *gc, \
+- SPEC_TYPE spec, \
+- struct STRUCTNAME *resultbuf, \
+- struct STRUCTNAME **out) \
++ int userlookup_helper_##NAME(libxl__gc *gc, \
++ SPEC_TYPE spec, \
++ struct STRUCTNAME *resultbuf, \
++ struct STRUCTNAME **out) \
+ { \
+ struct STRUCTNAME *resultp = NULL; \
+ char *buf = NULL; \
+diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
+index 7ad38de30e..f1e3a9a15b 100644
+--- a/tools/libs/light/libxl_internal.h
++++ b/tools/libs/light/libxl_internal.h
+@@ -4873,6 +4873,14 @@ struct libxl__cpu_policy {
+ struct xc_msr *msr;
+ };
+
++struct passwd;
++_hidden int userlookup_helper_getpwnam(libxl__gc*, const char *user,
++ struct passwd *res,
++ struct passwd **out);
++_hidden int userlookup_helper_getpwuid(libxl__gc*, uid_t uid,
++ struct passwd *res,
++ struct passwd **out);
++
+ #endif
+
+ /*
+--
+2.42.0
+
diff --git a/0052-x86-ucode-AMD-apply-the-patch-early-on-every-logical.patch b/0052-x86-ucode-AMD-apply-the-patch-early-on-every-logical.patch
deleted file mode 100644
index 22a214b..0000000
--- a/0052-x86-ucode-AMD-apply-the-patch-early-on-every-logical.patch
+++ /dev/null
@@ -1,154 +0,0 @@
-From e9a7942f6c1638c668605fbf6d6e02bc7bff2582 Mon Sep 17 00:00:00 2001
-From: Sergey Dyasli <sergey.dyasli@citrix.com>
-Date: Fri, 3 Mar 2023 07:58:35 +0100
-Subject: [PATCH 52/89] x86/ucode/AMD: apply the patch early on every logical
- thread
-
-The original issue has been reported on AMD Bulldozer-based CPUs where
-ucode loading loses the LWP feature bit in order to gain the IBPB bit.
-LWP disabling is per-SMT/CMT core modification and needs to happen on
-each sibling thread despite the shared microcode engine. Otherwise,
-logical CPUs will end up with different cpuid capabilities.
-Link: https://bugzilla.kernel.org/show_bug.cgi?id=216211
-
-Guests running under Xen happen to be not affected because of levelling
-logic for the feature masking/override MSRs which causes the LWP bit to
-fall out and hides the issue. The latest recommendation from AMD, after
-discussing this bug, is to load ucode on every logical CPU.
-
-In Linux kernel this issue has been addressed by e7ad18d1169c
-("x86/microcode/AMD: Apply the patch early on every logical thread").
-Follow the same approach in Xen.
-
-Introduce SAME_UCODE match result and use it for early AMD ucode
-loading. Take this opportunity and move opt_ucode_allow_same out of
-compare_revisions() to the relevant callers and also modify the warning
-message based on it. Intel's side of things is modified for consistency
-but provides no functional change.
-
-Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: f4ef8a41b80831db2136bdaff9f946a1a4b051e7
-master date: 2023-02-21 15:08:05 +0100
----
- xen/arch/x86/cpu/microcode/amd.c | 11 ++++++++---
- xen/arch/x86/cpu/microcode/core.c | 26 +++++++++++++++++---------
- xen/arch/x86/cpu/microcode/intel.c | 10 +++++++---
- xen/arch/x86/cpu/microcode/private.h | 3 ++-
- 4 files changed, 34 insertions(+), 16 deletions(-)
-
-diff --git a/xen/arch/x86/cpu/microcode/amd.c b/xen/arch/x86/cpu/microcode/amd.c
-index 8195707ee1..ded8fe90e6 100644
---- a/xen/arch/x86/cpu/microcode/amd.c
-+++ b/xen/arch/x86/cpu/microcode/amd.c
-@@ -176,8 +176,8 @@ static enum microcode_match_result compare_revisions(
- if ( new_rev > old_rev )
- return NEW_UCODE;
-
-- if ( opt_ucode_allow_same && new_rev == old_rev )
-- return NEW_UCODE;
-+ if ( new_rev == old_rev )
-+ return SAME_UCODE;
-
- return OLD_UCODE;
- }
-@@ -220,8 +220,13 @@ static int cf_check apply_microcode(const struct microcode_patch *patch)
- unsigned int cpu = smp_processor_id();
- struct cpu_signature *sig = &per_cpu(cpu_sig, cpu);
- uint32_t rev, old_rev = sig->rev;
-+ enum microcode_match_result result = microcode_fits(patch);
-
-- if ( microcode_fits(patch) != NEW_UCODE )
-+ /*
-+ * Allow application of the same revision to pick up SMT-specific changes
-+ * even if the revision of the other SMT thread is already up-to-date.
-+ */
-+ if ( result != NEW_UCODE && result != SAME_UCODE )
- return -EINVAL;
-
- if ( check_final_patch_levels(sig) )
-diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c
-index 452a7ca773..57ecc5358b 100644
---- a/xen/arch/x86/cpu/microcode/core.c
-+++ b/xen/arch/x86/cpu/microcode/core.c
-@@ -610,17 +610,25 @@ static long cf_check microcode_update_helper(void *data)
- * that ucode revision.
- */
- spin_lock(&microcode_mutex);
-- if ( microcode_cache &&
-- alternative_call(ucode_ops.compare_patch,
-- patch, microcode_cache) != NEW_UCODE )
-+ if ( microcode_cache )
- {
-- spin_unlock(&microcode_mutex);
-- printk(XENLOG_WARNING "microcode: couldn't find any newer revision "
-- "in the provided blob!\n");
-- microcode_free_patch(patch);
-- ret = -ENOENT;
-+ enum microcode_match_result result;
-
-- goto put;
-+ result = alternative_call(ucode_ops.compare_patch, patch,
-+ microcode_cache);
-+
-+ if ( result != NEW_UCODE &&
-+ !(opt_ucode_allow_same && result == SAME_UCODE) )
-+ {
-+ spin_unlock(&microcode_mutex);
-+ printk(XENLOG_WARNING
-+ "microcode: couldn't find any newer%s revision in the provided blob!\n",
-+ opt_ucode_allow_same ? " (or the same)" : "");
-+ microcode_free_patch(patch);
-+ ret = -ENOENT;
-+
-+ goto put;
-+ }
- }
- spin_unlock(&microcode_mutex);
-
-diff --git a/xen/arch/x86/cpu/microcode/intel.c b/xen/arch/x86/cpu/microcode/intel.c
-index f5ba6d76d7..cb08f63d2e 100644
---- a/xen/arch/x86/cpu/microcode/intel.c
-+++ b/xen/arch/x86/cpu/microcode/intel.c
-@@ -232,8 +232,8 @@ static enum microcode_match_result compare_revisions(
- if ( new_rev > old_rev )
- return NEW_UCODE;
-
-- if ( opt_ucode_allow_same && new_rev == old_rev )
-- return NEW_UCODE;
-+ if ( new_rev == old_rev )
-+ return SAME_UCODE;
-
- /*
- * Treat pre-production as always applicable - anyone using pre-production
-@@ -290,8 +290,12 @@ static int cf_check apply_microcode(const struct microcode_patch *patch)
- unsigned int cpu = smp_processor_id();
- struct cpu_signature *sig = &this_cpu(cpu_sig);
- uint32_t rev, old_rev = sig->rev;
-+ enum microcode_match_result result;
-+
-+ result = microcode_update_match(patch);
-
-- if ( microcode_update_match(patch) != NEW_UCODE )
-+ if ( result != NEW_UCODE &&
-+ !(opt_ucode_allow_same && result == SAME_UCODE) )
- return -EINVAL;
-
- wbinvd();
-diff --git a/xen/arch/x86/cpu/microcode/private.h b/xen/arch/x86/cpu/microcode/private.h
-index c085a10268..feafab0677 100644
---- a/xen/arch/x86/cpu/microcode/private.h
-+++ b/xen/arch/x86/cpu/microcode/private.h
-@@ -6,7 +6,8 @@
- extern bool opt_ucode_allow_same;
-
- enum microcode_match_result {
-- OLD_UCODE, /* signature matched, but revision id is older or equal */
-+ OLD_UCODE, /* signature matched, but revision id is older */
-+ SAME_UCODE, /* signature matched, but revision id is the same */
- NEW_UCODE, /* signature matched, but revision id is newer */
- MIS_UCODE, /* signature mismatched */
- };
---
-2.40.0
-
diff --git a/0053-libxl-limit-bootloader-execution-in-restricted-mode.patch b/0053-libxl-limit-bootloader-execution-in-restricted-mode.patch
new file mode 100644
index 0000000..8c790d3
--- /dev/null
+++ b/0053-libxl-limit-bootloader-execution-in-restricted-mode.patch
@@ -0,0 +1,158 @@
+From 46d00dbf4c22b28910f73f66a03e5cabe50b5395 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau@citrix.com>
+Date: Thu, 28 Sep 2023 12:22:35 +0200
+Subject: [PATCH 53/55] libxl: limit bootloader execution in restricted mode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Introduce a timeout for bootloader execution when running in restricted mode.
+
+Allow overwriting the default time out with an environment provided value.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+(cherry picked from commit 9c114178ffd700112e91f5ec66cf5151b9c9a8cc)
+---
+ docs/man/xl.1.pod.in | 8 ++++++
+ tools/libs/light/libxl_bootloader.c | 40 +++++++++++++++++++++++++++++
+ tools/libs/light/libxl_internal.h | 2 ++
+ 3 files changed, 50 insertions(+)
+
+diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in
+index 4831e12242..c3eb6570ab 100644
+--- a/docs/man/xl.1.pod.in
++++ b/docs/man/xl.1.pod.in
+@@ -1988,6 +1988,14 @@ NOTE: Each domain MUST have a SEPARATE username.
+
+ See docs/features/qemu-deprivilege.pandoc for more information.
+
++=item LIBXL_BOOTLOADER_TIMEOUT
++
++Timeout in seconds for bootloader execution when running in restricted mode.
++Otherwise the build time default in LIBXL_BOOTLOADER_TIMEOUT will be used.
++
++If defined the value must be an unsigned integer between 0 and INT_MAX,
++otherwise behavior is undefined. Setting to 0 disables the timeout.
++
+ =back
+
+ =head1 SEE ALSO
+diff --git a/tools/libs/light/libxl_bootloader.c b/tools/libs/light/libxl_bootloader.c
+index 23c0ef3e89..ee26d08f37 100644
+--- a/tools/libs/light/libxl_bootloader.c
++++ b/tools/libs/light/libxl_bootloader.c
+@@ -30,6 +30,8 @@ static void bootloader_keystrokes_copyfail(libxl__egc *egc,
+ libxl__datacopier_state *dc, int rc, int onwrite, int errnoval);
+ static void bootloader_display_copyfail(libxl__egc *egc,
+ libxl__datacopier_state *dc, int rc, int onwrite, int errnoval);
++static void bootloader_timeout(libxl__egc *egc, libxl__ev_time *ev,
++ const struct timeval *requested_abs, int rc);
+ static void bootloader_domaindeath(libxl__egc*, libxl__domaindeathcheck *dc,
+ int rc);
+ static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
+@@ -297,6 +299,7 @@ void libxl__bootloader_init(libxl__bootloader_state *bl)
+ bl->ptys[0].master = bl->ptys[0].slave = 0;
+ bl->ptys[1].master = bl->ptys[1].slave = 0;
+ libxl__ev_child_init(&bl->child);
++ libxl__ev_time_init(&bl->time);
+ libxl__domaindeathcheck_init(&bl->deathcheck);
+ bl->keystrokes.ao = bl->ao; libxl__datacopier_init(&bl->keystrokes);
+ bl->display.ao = bl->ao; libxl__datacopier_init(&bl->display);
+@@ -314,6 +317,7 @@ static void bootloader_cleanup(libxl__egc *egc, libxl__bootloader_state *bl)
+ libxl__domaindeathcheck_stop(gc,&bl->deathcheck);
+ libxl__datacopier_kill(&bl->keystrokes);
+ libxl__datacopier_kill(&bl->display);
++ libxl__ev_time_deregister(gc, &bl->time);
+ for (i=0; i<2; i++) {
+ libxl__carefd_close(bl->ptys[i].master);
+ libxl__carefd_close(bl->ptys[i].slave);
+@@ -375,6 +379,7 @@ static void bootloader_stop(libxl__egc *egc,
+
+ libxl__datacopier_kill(&bl->keystrokes);
+ libxl__datacopier_kill(&bl->display);
++ libxl__ev_time_deregister(gc, &bl->time);
+ if (libxl__ev_child_inuse(&bl->child)) {
+ r = kill(bl->child.pid, SIGTERM);
+ if (r) LOGED(WARN, bl->domid, "%sfailed to kill bootloader [%lu]",
+@@ -637,6 +642,25 @@ static void bootloader_gotptys(libxl__egc *egc, libxl__openpty_state *op)
+
+ struct termios termattr;
+
++ if (getenv("LIBXL_BOOTLOADER_RESTRICT") ||
++ getenv("LIBXL_BOOTLOADER_USER")) {
++ const char *timeout_env = getenv("LIBXL_BOOTLOADER_TIMEOUT");
++ int timeout = timeout_env ? atoi(timeout_env)
++ : LIBXL_BOOTLOADER_TIMEOUT;
++
++ if (timeout) {
++ /* Set execution timeout */
++ rc = libxl__ev_time_register_rel(ao, &bl->time,
++ bootloader_timeout,
++ timeout * 1000);
++ if (rc) {
++ LOGED(ERROR, bl->domid,
++ "unable to register timeout for bootloader execution");
++ goto out;
++ }
++ }
++ }
++
+ pid_t pid = libxl__ev_child_fork(gc, &bl->child, bootloader_finished);
+ if (pid == -1) {
+ rc = ERROR_FAIL;
+@@ -702,6 +726,21 @@ static void bootloader_display_copyfail(libxl__egc *egc,
+ libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, display);
+ bootloader_copyfail(egc, "bootloader output", bl, 1, rc,onwrite,errnoval);
+ }
++static void bootloader_timeout(libxl__egc *egc, libxl__ev_time *ev,
++ const struct timeval *requested_abs, int rc)
++{
++ libxl__bootloader_state *bl = CONTAINER_OF(ev, *bl, time);
++ STATE_AO_GC(bl->ao);
++
++ libxl__ev_time_deregister(gc, &bl->time);
++
++ assert(libxl__ev_child_inuse(&bl->child));
++ LOGD(ERROR, bl->domid, "killing bootloader because of timeout");
++
++ libxl__ev_child_kill_deregister(ao, &bl->child, SIGKILL);
++
++ bootloader_callback(egc, bl, rc);
++}
+
+ static void bootloader_domaindeath(libxl__egc *egc,
+ libxl__domaindeathcheck *dc,
+@@ -718,6 +757,7 @@ static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
+ STATE_AO_GC(bl->ao);
+ int rc;
+
++ libxl__ev_time_deregister(gc, &bl->time);
+ libxl__datacopier_kill(&bl->keystrokes);
+ libxl__datacopier_kill(&bl->display);
+
+diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
+index f1e3a9a15b..d05783617f 100644
+--- a/tools/libs/light/libxl_internal.h
++++ b/tools/libs/light/libxl_internal.h
+@@ -102,6 +102,7 @@
+ #define LIBXL_QMP_CMD_TIMEOUT 10
+ #define LIBXL_STUBDOM_START_TIMEOUT 30
+ #define LIBXL_QEMU_BODGE_TIMEOUT 2
++#define LIBXL_BOOTLOADER_TIMEOUT 120
+ #define LIBXL_XENCONSOLE_LIMIT 1048576
+ #define LIBXL_XENCONSOLE_PROTOCOL "vt100"
+ #define LIBXL_MAXMEM_CONSTANT 1024
+@@ -3744,6 +3745,7 @@ struct libxl__bootloader_state {
+ libxl__openpty_state openpty;
+ libxl__openpty_result ptys[2]; /* [0] is for bootloader */
+ libxl__ev_child child;
++ libxl__ev_time time;
+ libxl__domaindeathcheck deathcheck;
+ int nargs, argsspace;
+ const char **args;
+--
+2.42.0
+
diff --git a/0053-x86-perform-mem_sharing-teardown-before-paging-teard.patch b/0053-x86-perform-mem_sharing-teardown-before-paging-teard.patch
deleted file mode 100644
index 934c0f5..0000000
--- a/0053-x86-perform-mem_sharing-teardown-before-paging-teard.patch
+++ /dev/null
@@ -1,111 +0,0 @@
-From e8f28e129d23c940749c66150a89c4ed683a0fb9 Mon Sep 17 00:00:00 2001
-From: Tamas K Lengyel <tamas@tklengyel.com>
-Date: Fri, 3 Mar 2023 07:59:08 +0100
-Subject: [PATCH 53/89] x86: perform mem_sharing teardown before paging
- teardown
-
-An assert failure has been observed in p2m_teardown when performing vm
-forking and then destroying the forked VM (p2m-basic.c:173). The assert
-checks whether the domain's shared pages counter is 0. According to the
-patch that originally added the assert (7bedbbb5c31) the p2m_teardown
-should only happen after mem_sharing already relinquished all shared pages.
-
-In this patch we flip the order in which relinquish ops are called to avoid
-tripping the assert. Conceptually sharing being torn down makes sense to
-happen before paging is torn down.
-
-Fixes: e7aa55c0aab3 ("x86/p2m: free the paging memory pool preemptively")
-Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 2869349f0cb3a89dcbf1f1b30371f58df6309312
-master date: 2023-02-23 12:35:48 +0100
----
- xen/arch/x86/domain.c | 56 ++++++++++++++++++++++---------------------
- 1 file changed, 29 insertions(+), 27 deletions(-)
-
-diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
-index 5a119eec3a..e546c98322 100644
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -2347,9 +2347,9 @@ int domain_relinquish_resources(struct domain *d)
-
- enum {
- PROG_iommu_pagetables = 1,
-+ PROG_shared,
- PROG_paging,
- PROG_vcpu_pagetables,
-- PROG_shared,
- PROG_xen,
- PROG_l4,
- PROG_l3,
-@@ -2368,6 +2368,34 @@ int domain_relinquish_resources(struct domain *d)
- if ( ret )
- return ret;
-
-+#ifdef CONFIG_MEM_SHARING
-+ PROGRESS(shared):
-+
-+ if ( is_hvm_domain(d) )
-+ {
-+ /*
-+ * If the domain has shared pages, relinquish them allowing
-+ * for preemption.
-+ */
-+ ret = relinquish_shared_pages(d);
-+ if ( ret )
-+ return ret;
-+
-+ /*
-+ * If the domain is forked, decrement the parent's pause count
-+ * and release the domain.
-+ */
-+ if ( mem_sharing_is_fork(d) )
-+ {
-+ struct domain *parent = d->parent;
-+
-+ d->parent = NULL;
-+ domain_unpause(parent);
-+ put_domain(parent);
-+ }
-+ }
-+#endif
-+
- PROGRESS(paging):
-
- /* Tear down paging-assistance stuff. */
-@@ -2408,32 +2436,6 @@ int domain_relinquish_resources(struct domain *d)
- d->arch.auto_unmask = 0;
- }
-
--#ifdef CONFIG_MEM_SHARING
-- PROGRESS(shared):
--
-- if ( is_hvm_domain(d) )
-- {
-- /* If the domain has shared pages, relinquish them allowing
-- * for preemption. */
-- ret = relinquish_shared_pages(d);
-- if ( ret )
-- return ret;
--
-- /*
-- * If the domain is forked, decrement the parent's pause count
-- * and release the domain.
-- */
-- if ( mem_sharing_is_fork(d) )
-- {
-- struct domain *parent = d->parent;
--
-- d->parent = NULL;
-- domain_unpause(parent);
-- put_domain(parent);
-- }
-- }
--#endif
--
- spin_lock(&d->page_alloc_lock);
- page_list_splice(&d->arch.relmem_list, &d->page_list);
- INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
---
-2.40.0
-
diff --git a/0054-x86-svm-Fix-asymmetry-with-AMD-DR-MASK-context-switc.patch b/0054-x86-svm-Fix-asymmetry-with-AMD-DR-MASK-context-switc.patch
new file mode 100644
index 0000000..af72c9a
--- /dev/null
+++ b/0054-x86-svm-Fix-asymmetry-with-AMD-DR-MASK-context-switc.patch
@@ -0,0 +1,104 @@
+From 3f8b444072fd8615288d9d11e53fbf0b6a8a7750 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 26 Sep 2023 20:03:36 +0100
+Subject: [PATCH 54/55] x86/svm: Fix asymmetry with AMD DR MASK context
+ switching
+
+The handling of MSR_DR{0..3}_MASK is asymmetric between PV and HVM guests.
+
+HVM guests context switch in based on the guest view of DBEXT, whereas PV
+guest switch in base on the host capability. Both guest types leave the
+context dirty for the next vCPU.
+
+This leads to the following issue:
+
+ * PV or HVM vCPU has debugging active (%dr7 + mask)
+ * Switch out deactivates %dr7 but leaves other state stale in hardware
+ * HVM vCPU with debugging activate but can't see DBEXT is switched in
+ * Switch in loads %dr7 but leaves the mask MSRs alone
+
+Now, the HVM vCPU is operating in the context of the prior vCPU's mask MSR,
+and furthermore in a case where it genuinely expects there to be no masking
+MSRs.
+
+As a stopgap, adjust the HVM path to switch in/out the masks based on host
+capabilities rather than guest visibility (i.e. like the PV path). Adjustment
+of the of the intercepts still needs to be dependent on the guest visibility
+of DBEXT.
+
+This is part of XSA-444 / CVE-2023-34327
+
+Fixes: c097f54912d3 ("x86/SVM: support data breakpoint extension registers")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+(cherry picked from commit 5d54282f984bb9a7a65b3d12208584f9fdf1c8e1)
+---
+ xen/arch/x86/hvm/svm/svm.c | 24 ++++++++++++++++++------
+ xen/arch/x86/traps.c | 5 +++++
+ 2 files changed, 23 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
+index e8f50e7c5e..fd32600ae3 100644
+--- a/xen/arch/x86/hvm/svm/svm.c
++++ b/xen/arch/x86/hvm/svm/svm.c
+@@ -339,6 +339,10 @@ static void svm_save_dr(struct vcpu *v)
+ v->arch.hvm.flag_dr_dirty = 0;
+ vmcb_set_dr_intercepts(vmcb, ~0u);
+
++ /*
++ * The guest can only have changed the mask MSRs if we previous dropped
++ * intercepts. Re-read them from hardware.
++ */
+ if ( v->domain->arch.cpuid->extd.dbext )
+ {
+ svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_RW);
+@@ -370,17 +374,25 @@ static void __restore_debug_registers(struct vmcb_struct *vmcb, struct vcpu *v)
+
+ ASSERT(v == current);
+
+- if ( v->domain->arch.cpuid->extd.dbext )
++ /*
++ * Both the PV and HVM paths leave stale DR_MASK values in hardware on
++ * context-switch-out. If we're activating %dr7 for the guest, we must
++ * sync the DR_MASKs too, whether or not the guest can see them.
++ */
++ if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+ {
+- svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+- svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+- svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+- svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+-
+ wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, v->arch.msrs->dr_mask[0]);
+ wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, v->arch.msrs->dr_mask[1]);
+ wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, v->arch.msrs->dr_mask[2]);
+ wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, v->arch.msrs->dr_mask[3]);
++
++ if ( v->domain->arch.cpuid->extd.dbext )
++ {
++ svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_NONE);
++ svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_NONE);
++ svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_NONE);
++ svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_NONE);
++ }
+ }
+
+ write_debugreg(0, v->arch.dr[0]);
+diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
+index e65cc60041..06c4f3868b 100644
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -2281,6 +2281,11 @@ void activate_debugregs(const struct vcpu *curr)
+ if ( curr->arch.dr7 & DR7_ACTIVE_MASK )
+ write_debugreg(7, curr->arch.dr7);
+
++ /*
++ * Both the PV and HVM paths leave stale DR_MASK values in hardware on
++ * context-switch-out. If we're activating %dr7 for the guest, we must
++ * sync the DR_MASKs too, whether or not the guest can see them.
++ */
+ if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+ {
+ wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, curr->arch.msrs->dr_mask[0]);
+--
+2.42.0
+
diff --git a/0054-xen-Work-around-Clang-IAS-macro-expansion-bug.patch b/0054-xen-Work-around-Clang-IAS-macro-expansion-bug.patch
deleted file mode 100644
index 525dc49..0000000
--- a/0054-xen-Work-around-Clang-IAS-macro-expansion-bug.patch
+++ /dev/null
@@ -1,109 +0,0 @@
-From 837bdc6eb2df796e832302347f363afc820694fe Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 3 Mar 2023 08:00:04 +0100
-Subject: [PATCH 54/89] xen: Work around Clang-IAS macro \@ expansion bug
-
-https://github.com/llvm/llvm-project/issues/60792
-
-It turns out that Clang-IAS does not expand \@ uniquely in a translaition
-unit, and the XSA-426 change tickles this bug:
-
- <instantiation>:4:1: error: invalid symbol redefinition
- .L1_fill_rsb_loop:
- ^
- make[3]: *** [Rules.mk:247: arch/x86/acpi/cpu_idle.o] Error 1
-
-Extend DO_OVERWRITE_RSB with an optional parameter so C callers can mix %= in
-too, which Clang does seem to expand properly.
-
-Fixes: 63305e5392ec ("x86/spec-ctrl: Mitigate Cross-Thread Return Address Predictions")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: a2adacff0b91cc7b977abb209dc419a2ef15963f
-master date: 2023-02-24 17:44:29 +0000
----
- xen/arch/x86/include/asm/spec_ctrl.h | 4 ++--
- xen/arch/x86/include/asm/spec_ctrl_asm.h | 19 ++++++++++++-------
- 2 files changed, 14 insertions(+), 9 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/spec_ctrl.h b/xen/arch/x86/include/asm/spec_ctrl.h
-index 391973ef6a..a431fea587 100644
---- a/xen/arch/x86/include/asm/spec_ctrl.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl.h
-@@ -83,7 +83,7 @@ static always_inline void spec_ctrl_new_guest_context(void)
- wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB);
-
- /* (ab)use alternative_input() to specify clobbers. */
-- alternative_input("", "DO_OVERWRITE_RSB", X86_BUG_IBPB_NO_RET,
-+ alternative_input("", "DO_OVERWRITE_RSB xu=%=", X86_BUG_IBPB_NO_RET,
- : "rax", "rcx");
- }
-
-@@ -172,7 +172,7 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info)
- *
- * (ab)use alternative_input() to specify clobbers.
- */
-- alternative_input("", "DO_OVERWRITE_RSB", X86_FEATURE_SC_RSB_IDLE,
-+ alternative_input("", "DO_OVERWRITE_RSB xu=%=", X86_FEATURE_SC_RSB_IDLE,
- : "rax", "rcx");
- }
-
-diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-index fab27ff553..f23bb105c5 100644
---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-@@ -117,11 +117,16 @@
- .L\@_done:
- .endm
-
--.macro DO_OVERWRITE_RSB tmp=rax
-+.macro DO_OVERWRITE_RSB tmp=rax xu
- /*
- * Requires nothing
- * Clobbers \tmp (%rax by default), %rcx
- *
-+ * xu is an optional parameter to add eXtra Uniqueness. It is intended for
-+ * passing %= in from an asm() block, in order to work around
-+ * https://github.com/llvm/llvm-project/issues/60792 where Clang-IAS doesn't
-+ * expand \@ uniquely.
-+ *
- * Requires 256 bytes of {,shadow}stack space, but %rsp/SSP has no net
- * change. Based on Google's performance numbers, the loop is unrolled to 16
- * iterations and two calls per iteration.
-@@ -136,27 +141,27 @@
- mov $16, %ecx /* 16 iterations, two calls per loop */
- mov %rsp, %\tmp /* Store the current %rsp */
-
--.L\@_fill_rsb_loop:
-+.L\@_fill_rsb_loop\xu:
-
- .irp n, 1, 2 /* Unrolled twice. */
-- call .L\@_insert_rsb_entry_\n /* Create an RSB entry. */
-+ call .L\@_insert_rsb_entry\xu\n /* Create an RSB entry. */
- int3 /* Halt rogue speculation. */
-
--.L\@_insert_rsb_entry_\n:
-+.L\@_insert_rsb_entry\xu\n:
- .endr
-
- sub $1, %ecx
-- jnz .L\@_fill_rsb_loop
-+ jnz .L\@_fill_rsb_loop\xu
- mov %\tmp, %rsp /* Restore old %rsp */
-
- #ifdef CONFIG_XEN_SHSTK
- mov $1, %ecx
- rdsspd %ecx
- cmp $1, %ecx
-- je .L\@_shstk_done
-+ je .L\@_shstk_done\xu
- mov $64, %ecx /* 64 * 4 bytes, given incsspd */
- incsspd %ecx /* Restore old SSP */
--.L\@_shstk_done:
-+.L\@_shstk_done\xu:
- #endif
- .endm
-
---
-2.40.0
-
diff --git a/0055-x86-pv-Correct-the-auditing-of-guest-breakpoint-addr.patch b/0055-x86-pv-Correct-the-auditing-of-guest-breakpoint-addr.patch
new file mode 100644
index 0000000..5838e7f
--- /dev/null
+++ b/0055-x86-pv-Correct-the-auditing-of-guest-breakpoint-addr.patch
@@ -0,0 +1,86 @@
+From 0b56bed864ca9b572473957f0254aefa797216f2 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 26 Sep 2023 20:03:36 +0100
+Subject: [PATCH 55/55] x86/pv: Correct the auditing of guest breakpoint
+ addresses
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The use of access_ok() is buggy, because it permits access to the compat
+translation area. 64bit PV guests don't use the XLAT area, but on AMD
+hardware, the DBEXT feature allows a breakpoint to match up to a 4G aligned
+region, allowing the breakpoint to reach outside of the XLAT area.
+
+Prior to c/s cda16c1bb223 ("x86: mirror compat argument translation area for
+32-bit PV"), the live GDT was within 4G of the XLAT area.
+
+All together, this allowed a malicious 64bit PV guest on AMD hardware to place
+a breakpoint over the live GDT, and trigger a #DB livelock (CVE-2015-8104).
+
+Introduce breakpoint_addr_ok() and explain why __addr_ok() happens to be an
+appropriate check in this case.
+
+For Xen 4.14 and later, this is a latent bug because the XLAT area has moved
+to be on its own with nothing interesting adjacent. For Xen 4.13 and older on
+AMD hardware, this fixes a PV-trigger-able DoS.
+
+This is part of XSA-444 / CVE-2023-34328.
+
+Fixes: 65e355490817 ("x86/PV: support data breakpoint extension registers")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit dc9d9aa62ddeb14abd5672690d30789829f58f7e)
+---
+ xen/arch/x86/include/asm/debugreg.h | 20 ++++++++++++++++++++
+ xen/arch/x86/pv/misc-hypercalls.c | 2 +-
+ 2 files changed, 21 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/include/asm/debugreg.h b/xen/arch/x86/include/asm/debugreg.h
+index c57914efc6..cc29826524 100644
+--- a/xen/arch/x86/include/asm/debugreg.h
++++ b/xen/arch/x86/include/asm/debugreg.h
+@@ -77,6 +77,26 @@
+ asm volatile ( "mov %%db" #reg ",%0" : "=r" (__val) ); \
+ __val; \
+ })
++
++/*
++ * Architecturally, %dr{0..3} can have any arbitrary value. However, Xen
++ * can't allow the guest to breakpoint the Xen address range, so we limit the
++ * guest to the lower canonical half, or above the Xen range in the higher
++ * canonical half.
++ *
++ * Breakpoint lengths are specified to mask the low order address bits,
++ * meaning all breakpoints are naturally aligned. With %dr7, the widest
++ * breakpoint is 8 bytes. With DBEXT, the widest breakpoint is 4G. Both of
++ * the Xen boundaries have >4G alignment.
++ *
++ * In principle we should account for HYPERVISOR_COMPAT_VIRT_START(d), but
++ * 64bit Xen has never enforced this for compat guests, and there's no problem
++ * (to Xen) if the guest breakpoints it's alias of the M2P. Skipping this
++ * aspect simplifies the logic, and causes us not to reject a migrating guest
++ * which operated fine on prior versions of Xen.
++ */
++#define breakpoint_addr_ok(a) __addr_ok(a)
++
+ long set_debugreg(struct vcpu *, unsigned int reg, unsigned long value);
+ void activate_debugregs(const struct vcpu *);
+
+diff --git a/xen/arch/x86/pv/misc-hypercalls.c b/xen/arch/x86/pv/misc-hypercalls.c
+index aaaf70eb63..f8636de907 100644
+--- a/xen/arch/x86/pv/misc-hypercalls.c
++++ b/xen/arch/x86/pv/misc-hypercalls.c
+@@ -72,7 +72,7 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
+ switch ( reg )
+ {
+ case 0 ... 3:
+- if ( !access_ok(value, sizeof(long)) )
++ if ( !breakpoint_addr_ok(value) )
+ return -EPERM;
+
+ v->arch.dr[reg] = value;
+--
+2.42.0
+
diff --git a/0055-xen-Fix-Clang-Wunicode-diagnostic-when-building-asm-.patch b/0055-xen-Fix-Clang-Wunicode-diagnostic-when-building-asm-.patch
deleted file mode 100644
index 02755a9..0000000
--- a/0055-xen-Fix-Clang-Wunicode-diagnostic-when-building-asm-.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From b10cf1561a638c835481ae923b571cb8f7350a89 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 3 Mar 2023 08:01:21 +0100
-Subject: [PATCH 55/89] xen: Fix Clang -Wunicode diagnostic when building
- asm-macros
-
-While trying to work around a different Clang-IAS bug (parent changeset), I
-stumbled onto:
-
- In file included from arch/x86/asm-macros.c:3:
- ./arch/x86/include/asm/spec_ctrl_asm.h:144:19: error: \u used with
- no following hex digits; treating as '\' followed by identifier [-Werror,-Wunicode]
- .L\@_fill_rsb_loop\uniq:
- ^
-
-It turns out that Clang -E is sensitive to the file extension of the source
-file it is processing. Furthermore, C explicitly permits the use of \u
-escapes in identifier names, so the diagnostic would be reasonable in
-principle if we trying to compile the result.
-
-asm-macros should really have been .S from the outset, as it is ultimately
-generating assembly, not C. Rename it, which causes Clang not to complain.
-
-We need to introduce rules for generating a .i file from .S, and substituting
-c_flags for a_flags lets us drop the now-redundant -D__ASSEMBLY__.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 53f0d02040b1df08f0589f162790ca376e1c2040
-master date: 2023-02-24 17:44:29 +0000
----
- xen/Rules.mk | 6 ++++++
- xen/arch/x86/Makefile | 2 +-
- xen/arch/x86/{asm-macros.c => asm-macros.S} | 0
- 3 files changed, 7 insertions(+), 1 deletion(-)
- rename xen/arch/x86/{asm-macros.c => asm-macros.S} (100%)
-
-diff --git a/xen/Rules.mk b/xen/Rules.mk
-index d6b7cec0a8..59072ae8df 100644
---- a/xen/Rules.mk
-+++ b/xen/Rules.mk
-@@ -273,6 +273,9 @@ $(filter %.init.o,$(obj-y) $(obj-bin-y) $(extra-y)): $(obj)/%.init.o: $(obj)/%.o
- quiet_cmd_cpp_i_c = CPP $@
- cmd_cpp_i_c = $(CPP) $(call cpp_flags,$(c_flags)) -MQ $@ -o $@ $<
-
-+quiet_cmd_cpp_i_S = CPP $@
-+cmd_cpp_i_S = $(CPP) $(call cpp_flags,$(a_flags)) -MQ $@ -o $@ $<
-+
- quiet_cmd_cc_s_c = CC $@
- cmd_cc_s_c = $(CC) $(filter-out -Wa$(comma)%,$(c_flags)) -S $< -o $@
-
-@@ -282,6 +285,9 @@ cmd_cpp_s_S = $(CPP) $(call cpp_flags,$(a_flags)) -MQ $@ -o $@ $<
- $(obj)/%.i: $(src)/%.c FORCE
- $(call if_changed_dep,cpp_i_c)
-
-+$(obj)/%.i: $(src)/%.S FORCE
-+ $(call if_changed_dep,cpp_i_S)
-+
- $(obj)/%.s: $(src)/%.c FORCE
- $(call if_changed_dep,cc_s_c)
-
-diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
-index 177a2ff742..5accbe4c67 100644
---- a/xen/arch/x86/Makefile
-+++ b/xen/arch/x86/Makefile
-@@ -240,7 +240,7 @@ $(obj)/efi/buildid.o $(obj)/efi/relocs-dummy.o: ;
- .PHONY: include
- include: $(objtree)/arch/x86/include/asm/asm-macros.h
-
--$(obj)/asm-macros.i: CFLAGS-y += -D__ASSEMBLY__ -P
-+$(obj)/asm-macros.i: CFLAGS-y += -P
-
- $(objtree)/arch/x86/include/asm/asm-macros.h: $(obj)/asm-macros.i $(src)/Makefile
- $(call filechk,asm-macros.h)
-diff --git a/xen/arch/x86/asm-macros.c b/xen/arch/x86/asm-macros.S
-similarity index 100%
-rename from xen/arch/x86/asm-macros.c
-rename to xen/arch/x86/asm-macros.S
---
-2.40.0
-
diff --git a/0056-tools-Use-PKG_CONFIG_FILE-instead-of-PKG_CONFIG-vari.patch b/0056-tools-Use-PKG_CONFIG_FILE-instead-of-PKG_CONFIG-vari.patch
deleted file mode 100644
index 59cc172..0000000
--- a/0056-tools-Use-PKG_CONFIG_FILE-instead-of-PKG_CONFIG-vari.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From 53bd16bcc0d0f5ed5d1ac6d6dc14bf6ecf2e2c43 Mon Sep 17 00:00:00 2001
-From: Bertrand Marquis <bertrand.marquis@arm.com>
-Date: Fri, 3 Mar 2023 08:02:30 +0100
-Subject: [PATCH 56/89] tools: Use PKG_CONFIG_FILE instead of PKG_CONFIG
- variable
-
-Replace PKG_CONFIG variable name with PKG_CONFIG_FILE for the name of
-the pkg-config file.
-This is preventing a conflict in some build systems where PKG_CONFIG
-actually contains the path to the pkg-config executable to use, as the
-default assignment in libs.mk is using a weak assignment (?=).
-
-This problem has been found when trying to build the latest version of
-Xen tools using buildroot.
-
-Fixes: d400dc5729e4 ("tools: tweak tools/libs/libs.mk for being able to support libxenctrl")
-Signed-off-by: Bertrand Marquis <bertrand.marquis@arm.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: b97e2fe7b9e1f4706693552697239ac2b71efee4
-master date: 2023-02-24 17:44:29 +0000
----
- tools/libs/ctrl/Makefile | 2 +-
- tools/libs/libs.mk | 16 ++++++++--------
- 2 files changed, 9 insertions(+), 9 deletions(-)
-
-diff --git a/tools/libs/ctrl/Makefile b/tools/libs/ctrl/Makefile
-index 93442ab389..15d0ae8e4e 100644
---- a/tools/libs/ctrl/Makefile
-+++ b/tools/libs/ctrl/Makefile
-@@ -4,7 +4,7 @@ include $(XEN_ROOT)/tools/Rules.mk
- include Makefile.common
-
- LIBHEADER := xenctrl.h xenctrl_compat.h
--PKG_CONFIG := xencontrol.pc
-+PKG_CONFIG_FILE := xencontrol.pc
- PKG_CONFIG_NAME := Xencontrol
-
- NO_HEADERS_CHK := y
-diff --git a/tools/libs/libs.mk b/tools/libs/libs.mk
-index 3eb91fc8f3..3fab5aecff 100644
---- a/tools/libs/libs.mk
-+++ b/tools/libs/libs.mk
-@@ -1,7 +1,7 @@
- # Common Makefile for building a lib.
- #
- # Variables taken as input:
--# PKG_CONFIG: name of pkg-config file (xen$(LIBNAME).pc if empty)
-+# PKG_CONFIG_FILE: name of pkg-config file (xen$(LIBNAME).pc if empty)
- # MAJOR: major version of lib (Xen version if empty)
- # MINOR: minor version of lib (0 if empty)
-
-@@ -26,7 +26,7 @@ ifneq ($(nosharedlibs),y)
- TARGETS += lib$(LIB_FILE_NAME).so
- endif
-
--PKG_CONFIG ?= $(LIB_FILE_NAME).pc
-+PKG_CONFIG_FILE ?= $(LIB_FILE_NAME).pc
- PKG_CONFIG_NAME ?= Xen$(LIBNAME)
- PKG_CONFIG_DESC ?= The $(PKG_CONFIG_NAME) library for Xen hypervisor
- PKG_CONFIG_VERSION := $(MAJOR).$(MINOR)
-@@ -35,13 +35,13 @@ PKG_CONFIG_LIB := $(LIB_FILE_NAME)
- PKG_CONFIG_REQPRIV := $(subst $(space),$(comma),$(strip $(foreach lib,$(patsubst ctrl,control,$(USELIBS_$(LIBNAME))),xen$(lib))))
-
- ifneq ($(CONFIG_LIBXC_MINIOS),y)
--TARGETS += $(PKG_CONFIG)
--$(PKG_CONFIG): PKG_CONFIG_PREFIX = $(prefix)
--$(PKG_CONFIG): PKG_CONFIG_INCDIR = $(includedir)
--$(PKG_CONFIG): PKG_CONFIG_LIBDIR = $(libdir)
-+TARGETS += $(PKG_CONFIG_FILE)
-+$(PKG_CONFIG_FILE): PKG_CONFIG_PREFIX = $(prefix)
-+$(PKG_CONFIG_FILE): PKG_CONFIG_INCDIR = $(includedir)
-+$(PKG_CONFIG_FILE): PKG_CONFIG_LIBDIR = $(libdir)
- endif
-
--PKG_CONFIG_LOCAL := $(PKG_CONFIG_DIR)/$(PKG_CONFIG)
-+PKG_CONFIG_LOCAL := $(PKG_CONFIG_DIR)/$(PKG_CONFIG_FILE)
-
- LIBHEADER ?= $(LIB_FILE_NAME).h
- LIBHEADERS = $(foreach h, $(LIBHEADER), $(XEN_INCLUDE)/$(h))
-@@ -103,7 +103,7 @@ install:: all
- $(SYMLINK_SHLIB) lib$(LIB_FILE_NAME).so.$(MAJOR).$(MINOR) $(DESTDIR)$(libdir)/lib$(LIB_FILE_NAME).so.$(MAJOR)
- $(SYMLINK_SHLIB) lib$(LIB_FILE_NAME).so.$(MAJOR) $(DESTDIR)$(libdir)/lib$(LIB_FILE_NAME).so
- for i in $(LIBHEADERS); do $(INSTALL_DATA) $$i $(DESTDIR)$(includedir); done
-- $(INSTALL_DATA) $(PKG_CONFIG) $(DESTDIR)$(PKG_INSTALLDIR)
-+ $(INSTALL_DATA) $(PKG_CONFIG_FILE) $(DESTDIR)$(PKG_INSTALLDIR)
-
- .PHONY: uninstall
- uninstall::
---
-2.40.0
-
diff --git a/0057-libs-guest-Fix-resource-leaks-in-xc_core_arch_map_p2.patch b/0057-libs-guest-Fix-resource-leaks-in-xc_core_arch_map_p2.patch
deleted file mode 100644
index ea80bd0..0000000
--- a/0057-libs-guest-Fix-resource-leaks-in-xc_core_arch_map_p2.patch
+++ /dev/null
@@ -1,65 +0,0 @@
-From 01f85d835bb10d18bdab2cc780ea5ad47004516d Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 3 Mar 2023 08:02:59 +0100
-Subject: [PATCH 57/89] libs/guest: Fix resource leaks in
- xc_core_arch_map_p2m_tree_rw()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Edwin, with the help of GCC's -fanalyzer, identified that p2m_frame_list_list
-gets leaked. What fanalyzer can't see is that the live_p2m_frame_list_list
-and live_p2m_frame_list foreign mappings are leaked too.
-
-Rework the logic so the out path is executed unconditionally, which cleans up
-all the intermediate allocations/mappings appropriately.
-
-Fixes: bd7a29c3d0b9 ("tools/libs/ctrl: fix xc_core_arch_map_p2m() to support linear p2m table")
-Reported-by: Edwin Török <edwin.torok@cloud.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-master commit: 1868d7f22660c8980bd0a7e53f044467e8b63bb5
-master date: 2023-02-27 15:51:23 +0000
----
- tools/libs/guest/xg_core_x86.c | 8 +++-----
- 1 file changed, 3 insertions(+), 5 deletions(-)
-
-diff --git a/tools/libs/guest/xg_core_x86.c b/tools/libs/guest/xg_core_x86.c
-index 61106b98b8..c5e4542ccc 100644
---- a/tools/libs/guest/xg_core_x86.c
-+++ b/tools/libs/guest/xg_core_x86.c
-@@ -229,11 +229,11 @@ xc_core_arch_map_p2m_tree_rw(xc_interface *xch, struct domain_info_context *dinf
- uint32_t dom, shared_info_any_t *live_shinfo)
- {
- /* Double and single indirect references to the live P2M table */
-- xen_pfn_t *live_p2m_frame_list_list;
-+ xen_pfn_t *live_p2m_frame_list_list = NULL;
- xen_pfn_t *live_p2m_frame_list = NULL;
- /* Copies of the above. */
- xen_pfn_t *p2m_frame_list_list = NULL;
-- xen_pfn_t *p2m_frame_list;
-+ xen_pfn_t *p2m_frame_list = NULL;
-
- int err;
- int i;
-@@ -297,8 +297,6 @@ xc_core_arch_map_p2m_tree_rw(xc_interface *xch, struct domain_info_context *dinf
-
- dinfo->p2m_frames = P2M_FL_ENTRIES;
-
-- return p2m_frame_list;
--
- out:
- err = errno;
-
-@@ -312,7 +310,7 @@ xc_core_arch_map_p2m_tree_rw(xc_interface *xch, struct domain_info_context *dinf
-
- errno = err;
-
-- return NULL;
-+ return p2m_frame_list;
- }
-
- static int
---
-2.40.0
-
diff --git a/0058-libs-guest-Fix-leak-on-realloc-failure-in-backup_pte.patch b/0058-libs-guest-Fix-leak-on-realloc-failure-in-backup_pte.patch
deleted file mode 100644
index d55c095..0000000
--- a/0058-libs-guest-Fix-leak-on-realloc-failure-in-backup_pte.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From fa8250f1920413f02b63551a6a4d8ef0b47891a8 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com>
-Date: Fri, 3 Mar 2023 08:03:19 +0100
-Subject: [PATCH 58/89] libs/guest: Fix leak on realloc failure in
- backup_ptes()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From `man 2 realloc`:
-
- If realloc() fails, the original block is left untouched; it is not freed or moved.
-
-Found using GCC -fanalyzer:
-
- | 184 | backup->entries = realloc(backup->entries,
- | | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- | | | | |
- | | | | (91) when ‘realloc’ fails
- | | | (92) ‘old_ptes.entries’ leaks here; was allocated at (44)
- | | (90) ...to here
-
-Signed-off-by: Edwin Török <edwin.torok@cloud.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 275d13184cfa52ebe4336ed66526ce93716adbe0
-master date: 2023-02-27 15:51:23 +0000
----
- tools/libs/guest/xg_offline_page.c | 10 ++++++++--
- 1 file changed, 8 insertions(+), 2 deletions(-)
-
-diff --git a/tools/libs/guest/xg_offline_page.c b/tools/libs/guest/xg_offline_page.c
-index c594fdba41..ccd0299f0f 100644
---- a/tools/libs/guest/xg_offline_page.c
-+++ b/tools/libs/guest/xg_offline_page.c
-@@ -181,10 +181,16 @@ static int backup_ptes(xen_pfn_t table_mfn, int offset,
-
- if (backup->max == backup->cur)
- {
-- backup->entries = realloc(backup->entries,
-- backup->max * 2 * sizeof(struct pte_backup_entry));
-+ void *orig = backup->entries;
-+
-+ backup->entries = realloc(
-+ orig, backup->max * 2 * sizeof(struct pte_backup_entry));
-+
- if (backup->entries == NULL)
-+ {
-+ free(orig);
- return -1;
-+ }
- else
- backup->max *= 2;
- }
---
-2.40.0
-
diff --git a/0059-x86-ucode-AMD-late-load-the-patch-on-every-logical-t.patch b/0059-x86-ucode-AMD-late-load-the-patch-on-every-logical-t.patch
deleted file mode 100644
index 292a61a..0000000
--- a/0059-x86-ucode-AMD-late-load-the-patch-on-every-logical-t.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From ec5b058d2a6436a2e180315522fcf1645a8153b4 Mon Sep 17 00:00:00 2001
-From: Sergey Dyasli <sergey.dyasli@citrix.com>
-Date: Fri, 3 Mar 2023 08:03:43 +0100
-Subject: [PATCH 59/89] x86/ucode/AMD: late load the patch on every logical
- thread
-
-Currently late ucode loading is performed only on the first core of CPU
-siblings. But according to the latest recommendation from AMD, late
-ucode loading should happen on every logical thread/core on AMD CPUs.
-
-To achieve that, introduce is_cpu_primary() helper which will consider
-every logical cpu as "primary" when running on AMD CPUs. Also include
-Hygon in the check for future-proofing.
-
-Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: f1315e48a03a42f78f9b03c0a384165baf02acae
-master date: 2023-02-28 14:51:28 +0100
----
- xen/arch/x86/cpu/microcode/core.c | 24 +++++++++++++++++++-----
- 1 file changed, 19 insertions(+), 5 deletions(-)
-
-diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c
-index 57ecc5358b..2497630bbe 100644
---- a/xen/arch/x86/cpu/microcode/core.c
-+++ b/xen/arch/x86/cpu/microcode/core.c
-@@ -274,6 +274,20 @@ static bool microcode_update_cache(struct microcode_patch *patch)
- return true;
- }
-
-+/* Returns true if ucode should be loaded on a given cpu */
-+static bool is_cpu_primary(unsigned int cpu)
-+{
-+ if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
-+ /* Load ucode on every logical thread/core */
-+ return true;
-+
-+ /* Intel CPUs should load ucode only on the first core of SMT siblings */
-+ if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
-+ return true;
-+
-+ return false;
-+}
-+
- /* Wait for a condition to be met with a timeout (us). */
- static int wait_for_condition(bool (*func)(unsigned int data),
- unsigned int data, unsigned int timeout)
-@@ -380,7 +394,7 @@ static int primary_thread_work(const struct microcode_patch *patch)
- static int cf_check microcode_nmi_callback(
- const struct cpu_user_regs *regs, int cpu)
- {
-- unsigned int primary = cpumask_first(this_cpu(cpu_sibling_mask));
-+ bool primary_cpu = is_cpu_primary(cpu);
- int ret;
-
- /* System-generated NMI, leave to main handler */
-@@ -393,10 +407,10 @@ static int cf_check microcode_nmi_callback(
- * ucode_in_nmi.
- */
- if ( cpu == cpumask_first(&cpu_online_map) ||
-- (!ucode_in_nmi && cpu == primary) )
-+ (!ucode_in_nmi && primary_cpu) )
- return 0;
-
-- if ( cpu == primary )
-+ if ( primary_cpu )
- ret = primary_thread_work(nmi_patch);
- else
- ret = secondary_nmi_work();
-@@ -547,7 +561,7 @@ static int cf_check do_microcode_update(void *patch)
- */
- if ( cpu == cpumask_first(&cpu_online_map) )
- ret = control_thread_fn(patch);
-- else if ( cpu == cpumask_first(this_cpu(cpu_sibling_mask)) )
-+ else if ( is_cpu_primary(cpu) )
- ret = primary_thread_fn(patch);
- else
- ret = secondary_thread_fn();
-@@ -640,7 +654,7 @@ static long cf_check microcode_update_helper(void *data)
- /* Calculate the number of online CPU core */
- nr_cores = 0;
- for_each_online_cpu(cpu)
-- if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
-+ if ( is_cpu_primary(cpu) )
- nr_cores++;
-
- printk(XENLOG_INFO "%u cores are to update their microcode\n", nr_cores);
---
-2.40.0
-
diff --git a/0060-x86-shadow-account-for-log-dirty-mode-when-pre-alloc.patch b/0060-x86-shadow-account-for-log-dirty-mode-when-pre-alloc.patch
deleted file mode 100644
index fd397b0..0000000
--- a/0060-x86-shadow-account-for-log-dirty-mode-when-pre-alloc.patch
+++ /dev/null
@@ -1,92 +0,0 @@
-From f8f8f07880d3817fc7b0472420eca9fecaa55358 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 11:58:50 +0000
-Subject: [PATCH 60/89] x86/shadow: account for log-dirty mode when
- pre-allocating
-
-Pre-allocation is intended to ensure that in the course of constructing
-or updating shadows there won't be any risk of just made shadows or
-shadows being acted upon can disappear under our feet. The amount of
-pages pre-allocated then, however, needs to account for all possible
-subsequent allocations. While the use in sh_page_fault() accounts for
-all shadows which may need making, so far it didn't account for
-allocations coming from log-dirty tracking (which piggybacks onto the
-P2M allocation functions).
-
-Since shadow_prealloc() takes a count of shadows (or other data
-structures) rather than a count of pages, putting the adjustment at the
-call site of this function won't work very well: We simply can't express
-the correct count that way in all cases. Instead take care of this in
-the function itself, by "snooping" for L1 type requests. (While not
-applicable right now, future new request sites of L1 tables would then
-also be covered right away.)
-
-It is relevant to note here that pre-allocations like the one done from
-shadow_alloc_p2m_page() are benign when they fall in the "scope" of an
-earlier pre-alloc which already included that count: The inner call will
-simply find enough pages available then; it'll bail right away.
-
-This is CVE-2022-42332 / XSA-427.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Tim Deegan <tim@xen.org>
-(cherry picked from commit 91767a71061035ae42be93de495cd976f863a41a)
----
- xen/arch/x86/include/asm/paging.h | 4 ++++
- xen/arch/x86/mm/paging.c | 1 +
- xen/arch/x86/mm/shadow/common.c | 12 +++++++++++-
- 3 files changed, 16 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/include/asm/paging.h b/xen/arch/x86/include/asm/paging.h
-index b2b243a4ff..635ccc83b1 100644
---- a/xen/arch/x86/include/asm/paging.h
-+++ b/xen/arch/x86/include/asm/paging.h
-@@ -190,6 +190,10 @@ bool paging_mfn_is_dirty(const struct domain *d, mfn_t gmfn);
- #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
- (LOGDIRTY_NODE_ENTRIES-1))
-
-+#define paging_logdirty_levels() \
-+ (DIV_ROUND_UP(PADDR_BITS - PAGE_SHIFT - (PAGE_SHIFT + 3), \
-+ PAGE_SHIFT - ilog2(sizeof(mfn_t))) + 1)
-+
- #ifdef CONFIG_HVM
- /* VRAM dirty tracking support */
- struct sh_dirty_vram {
-diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
-index 8d579fa9a3..308d44bce7 100644
---- a/xen/arch/x86/mm/paging.c
-+++ b/xen/arch/x86/mm/paging.c
-@@ -282,6 +282,7 @@ void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn)
- if ( unlikely(!VALID_M2P(pfn_x(pfn))) )
- return;
-
-+ BUILD_BUG_ON(paging_logdirty_levels() != 4);
- i1 = L1_LOGDIRTY_IDX(pfn);
- i2 = L2_LOGDIRTY_IDX(pfn);
- i3 = L3_LOGDIRTY_IDX(pfn);
-diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
-index a8404f97f6..cf5e181f74 100644
---- a/xen/arch/x86/mm/shadow/common.c
-+++ b/xen/arch/x86/mm/shadow/common.c
-@@ -1015,7 +1015,17 @@ bool shadow_prealloc(struct domain *d, unsigned int type, unsigned int count)
- if ( unlikely(d->is_dying) )
- return false;
-
-- ret = _shadow_prealloc(d, shadow_size(type) * count);
-+ count *= shadow_size(type);
-+ /*
-+ * Log-dirty handling may result in allocations when populating its
-+ * tracking structures. Tie this to the caller requesting space for L1
-+ * shadows.
-+ */
-+ if ( paging_mode_log_dirty(d) &&
-+ ((SHF_L1_ANY | SHF_FL1_ANY) & (1u << type)) )
-+ count += paging_logdirty_levels();
-+
-+ ret = _shadow_prealloc(d, count);
- if ( !ret && (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
- /*
- * Failing to allocate memory required for shadow usage can only result in
---
-2.40.0
-
diff --git a/0061-x86-HVM-bound-number-of-pinned-cache-attribute-regio.patch b/0061-x86-HVM-bound-number-of-pinned-cache-attribute-regio.patch
deleted file mode 100644
index b638eca..0000000
--- a/0061-x86-HVM-bound-number-of-pinned-cache-attribute-regio.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From d0cb66d59a956ccba3dbe794f4ec01e4a4269ee9 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 12:01:01 +0000
-Subject: [PATCH 61/89] x86/HVM: bound number of pinned cache attribute regions
-
-This is exposed via DMOP, i.e. to potentially not fully privileged
-device models. With that we may not permit registration of an (almost)
-unbounded amount of such regions.
-
-This is CVE-2022-42333 / part of XSA-428.
-
-Fixes: 642123c5123f ("x86/hvm: provide XEN_DMOP_pin_memory_cacheattr")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit a5e768640f786b681063f4e08af45d0c4e91debf)
----
- xen/arch/x86/hvm/mtrr.c | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
-index 4d2aa6def8..714911dd7f 100644
---- a/xen/arch/x86/hvm/mtrr.c
-+++ b/xen/arch/x86/hvm/mtrr.c
-@@ -595,6 +595,7 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- uint64_t gfn_end, uint32_t type)
- {
- struct hvm_mem_pinned_cacheattr_range *range;
-+ unsigned int nr = 0;
- int rc = 1;
-
- if ( !is_hvm_domain(d) )
-@@ -666,11 +667,15 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- rc = -EBUSY;
- break;
- }
-+ ++nr;
- }
- rcu_read_unlock(&pinned_cacheattr_rcu_lock);
- if ( rc <= 0 )
- return rc;
-
-+ if ( nr >= 64 /* The limit is arbitrary. */ )
-+ return -ENOSPC;
-+
- range = xzalloc(struct hvm_mem_pinned_cacheattr_range);
- if ( range == NULL )
- return -ENOMEM;
---
-2.40.0
-
diff --git a/0062-x86-HVM-serialize-pinned-cache-attribute-list-manipu.patch b/0062-x86-HVM-serialize-pinned-cache-attribute-list-manipu.patch
deleted file mode 100644
index a0f6efc..0000000
--- a/0062-x86-HVM-serialize-pinned-cache-attribute-list-manipu.patch
+++ /dev/null
@@ -1,126 +0,0 @@
-From a2a915b3960e6ab060d8be2c36e6e697700ea87c Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 12:01:01 +0000
-Subject: [PATCH 62/89] x86/HVM: serialize pinned cache attribute list
- manipulation
-
-While the RCU variants of list insertion and removal allow lockless list
-traversal (with RCU just read-locked), insertions and removals still
-need serializing amongst themselves. To keep things simple, use the
-domain lock for this purpose.
-
-This is CVE-2022-42334 / part of XSA-428.
-
-Fixes: 642123c5123f ("x86/hvm: provide XEN_DMOP_pin_memory_cacheattr")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Julien Grall <jgrall@amazon.com>
-(cherry picked from commit 829ec245cf66560e3b50d140ccb3168e7fb7c945)
----
- xen/arch/x86/hvm/mtrr.c | 51 +++++++++++++++++++++++++----------------
- 1 file changed, 31 insertions(+), 20 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
-index 714911dd7f..bd5cc42ef4 100644
---- a/xen/arch/x86/hvm/mtrr.c
-+++ b/xen/arch/x86/hvm/mtrr.c
-@@ -594,7 +594,7 @@ static void cf_check free_pinned_cacheattr_entry(struct rcu_head *rcu)
- int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- uint64_t gfn_end, uint32_t type)
- {
-- struct hvm_mem_pinned_cacheattr_range *range;
-+ struct hvm_mem_pinned_cacheattr_range *range, *newr;
- unsigned int nr = 0;
- int rc = 1;
-
-@@ -608,14 +608,15 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- {
- case XEN_DOMCTL_DELETE_MEM_CACHEATTR:
- /* Remove the requested range. */
-- rcu_read_lock(&pinned_cacheattr_rcu_lock);
-- list_for_each_entry_rcu ( range,
-- &d->arch.hvm.pinned_cacheattr_ranges,
-- list )
-+ domain_lock(d);
-+ list_for_each_entry ( range,
-+ &d->arch.hvm.pinned_cacheattr_ranges,
-+ list )
- if ( range->start == gfn_start && range->end == gfn_end )
- {
-- rcu_read_unlock(&pinned_cacheattr_rcu_lock);
- list_del_rcu(&range->list);
-+ domain_unlock(d);
-+
- type = range->type;
- call_rcu(&range->rcu, free_pinned_cacheattr_entry);
- p2m_memory_type_changed(d);
-@@ -636,7 +637,7 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- }
- return 0;
- }
-- rcu_read_unlock(&pinned_cacheattr_rcu_lock);
-+ domain_unlock(d);
- return -ENOENT;
-
- case PAT_TYPE_UC_MINUS:
-@@ -651,7 +652,10 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- return -EINVAL;
- }
-
-- rcu_read_lock(&pinned_cacheattr_rcu_lock);
-+ newr = xzalloc(struct hvm_mem_pinned_cacheattr_range);
-+
-+ domain_lock(d);
-+
- list_for_each_entry_rcu ( range,
- &d->arch.hvm.pinned_cacheattr_ranges,
- list )
-@@ -669,27 +673,34 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- }
- ++nr;
- }
-- rcu_read_unlock(&pinned_cacheattr_rcu_lock);
-+
- if ( rc <= 0 )
-- return rc;
-+ /* nothing */;
-+ else if ( nr >= 64 /* The limit is arbitrary. */ )
-+ rc = -ENOSPC;
-+ else if ( !newr )
-+ rc = -ENOMEM;
-+ else
-+ {
-+ newr->start = gfn_start;
-+ newr->end = gfn_end;
-+ newr->type = type;
-
-- if ( nr >= 64 /* The limit is arbitrary. */ )
-- return -ENOSPC;
-+ list_add_rcu(&newr->list, &d->arch.hvm.pinned_cacheattr_ranges);
-
-- range = xzalloc(struct hvm_mem_pinned_cacheattr_range);
-- if ( range == NULL )
-- return -ENOMEM;
-+ newr = NULL;
-+ rc = 0;
-+ }
-+
-+ domain_unlock(d);
-
-- range->start = gfn_start;
-- range->end = gfn_end;
-- range->type = type;
-+ xfree(newr);
-
-- list_add_rcu(&range->list, &d->arch.hvm.pinned_cacheattr_ranges);
- p2m_memory_type_changed(d);
- if ( type != PAT_TYPE_WRBACK )
- flush_all(FLUSH_CACHE);
-
-- return 0;
-+ return rc;
- }
-
- static int cf_check hvm_save_mtrr_msr(struct vcpu *v, hvm_domain_context_t *h)
---
-2.40.0
-
diff --git a/0063-x86-spec-ctrl-Defer-CR4_PV32_RESTORE-on-the-cstar_en.patch b/0063-x86-spec-ctrl-Defer-CR4_PV32_RESTORE-on-the-cstar_en.patch
deleted file mode 100644
index fa97a41..0000000
--- a/0063-x86-spec-ctrl-Defer-CR4_PV32_RESTORE-on-the-cstar_en.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From a730e4d1190594102784222f76a984d10bbc88a9 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 10 Feb 2023 21:11:14 +0000
-Subject: [PATCH 63/89] x86/spec-ctrl: Defer CR4_PV32_RESTORE on the
- cstar_enter path
-
-As stated (correctly) by the comment next to SPEC_CTRL_ENTRY_FROM_PV, between
-the two hunks visible in the patch, RET's are not safe prior to this point.
-
-CR4_PV32_RESTORE hides a CALL/RET pair in certain configurations (PV32
-compiled in, SMEP or SMAP active), and the RET can be attacked with one of
-several known speculative issues.
-
-Furthermore, CR4_PV32_RESTORE also hides a reference to the cr4_pv32_mask
-global variable, which is not safe when XPTI is active before restoring Xen's
-full pagetables.
-
-This crash has gone unnoticed because it is only AMD CPUs which permit the
-SYSCALL instruction in compatibility mode, and these are not vulnerable to
-Meltdown so don't activate XPTI by default.
-
-This is XSA-429 / CVE-2022-42331
-
-Fixes: 5e7962901131 ("x86/entry: Organise the use of MSR_SPEC_CTRL at each entry/exit point")
-Fixes: 5784de3e2067 ("x86: Meltdown band-aid against malicious 64-bit PV guests")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit df5b055b12116d9e63ced59ae5389e69a2a3de48)
----
- xen/arch/x86/x86_64/entry.S | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index ae01285181..7675a59ff0 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -288,7 +288,6 @@ ENTRY(cstar_enter)
- ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK
- #endif
- push %rax /* Guest %rsp */
-- CR4_PV32_RESTORE
- movq 8(%rsp), %rax /* Restore guest %rax. */
- movq $FLAT_USER_SS32, 8(%rsp) /* Assume a 64bit domain. Compat handled lower. */
- pushq %r11
-@@ -312,6 +311,8 @@ ENTRY(cstar_enter)
- .Lcstar_cr3_okay:
- sti
-
-+ CR4_PV32_RESTORE
-+
- movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx
-
- #ifdef CONFIG_PV32
---
-2.40.0
-
diff --git a/0064-x86-vmx-implement-VMExit-based-guest-Bus-Lock-detect.patch b/0064-x86-vmx-implement-VMExit-based-guest-Bus-Lock-detect.patch
deleted file mode 100644
index cebb501..0000000
--- a/0064-x86-vmx-implement-VMExit-based-guest-Bus-Lock-detect.patch
+++ /dev/null
@@ -1,175 +0,0 @@
-From 83f12e4eafdc4b034501adf4847a09a1293fdf8b Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 21 Mar 2023 13:40:41 +0100
-Subject: [PATCH 64/89] x86/vmx: implement VMExit based guest Bus Lock
- detection
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Add support for enabling guest Bus Lock Detection on Intel systems.
-Such detection works by triggering a vmexit, which ought to be enough
-of a pause to prevent a guest from abusing of the Bus Lock.
-
-Add an extra Xen perf counter to track the number of Bus Locks detected.
-This is done because Bus Locks can also be reported by setting the bit
-26 in the exit reason field, so also account for those.
-
-Note EXIT_REASON_BUS_LOCK VMExits will always have bit 26 set in
-exit_reason, and hence the performance counter doesn't need to be
-increased for EXIT_REASON_BUS_LOCK handling.
-
-Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: f7d07619d2ae0382e2922e287fbfbb27722f3f0b
-master date: 2022-12-19 11:22:43 +0100
----
- xen/arch/x86/hvm/vmx/vmcs.c | 4 +++-
- xen/arch/x86/hvm/vmx/vmx.c | 15 +++++++++++++++
- xen/arch/x86/hvm/vmx/vvmx.c | 3 ++-
- xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 3 +++
- xen/arch/x86/include/asm/hvm/vmx/vmx.h | 2 ++
- xen/arch/x86/include/asm/perfc_defn.h | 4 +++-
- 6 files changed, 28 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
-index 84dbb88d33..a0d5e8d6ab 100644
---- a/xen/arch/x86/hvm/vmx/vmcs.c
-+++ b/xen/arch/x86/hvm/vmx/vmcs.c
-@@ -209,6 +209,7 @@ static void __init vmx_display_features(void)
- P(cpu_has_vmx_virt_exceptions, "Virtualisation Exceptions");
- P(cpu_has_vmx_pml, "Page Modification Logging");
- P(cpu_has_vmx_tsc_scaling, "TSC Scaling");
-+ P(cpu_has_vmx_bus_lock_detection, "Bus Lock Detection");
- #undef P
-
- if ( !printed )
-@@ -318,7 +319,8 @@ static int vmx_init_vmcs_config(bool bsp)
- SECONDARY_EXEC_ENABLE_VM_FUNCTIONS |
- SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
- SECONDARY_EXEC_XSAVES |
-- SECONDARY_EXEC_TSC_SCALING);
-+ SECONDARY_EXEC_TSC_SCALING |
-+ SECONDARY_EXEC_BUS_LOCK_DETECTION);
- if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
- opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
- if ( opt_vpid_enabled )
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index 861f91f2af..d0f0f2e429 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -4084,6 +4084,12 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
- return;
- }
-
-+ if ( unlikely(exit_reason & VMX_EXIT_REASONS_BUS_LOCK) )
-+ {
-+ perfc_incr(buslock);
-+ exit_reason &= ~VMX_EXIT_REASONS_BUS_LOCK;
-+ }
-+
- /* XXX: This looks ugly, but we need a mechanism to ensure
- * any pending vmresume has really happened
- */
-@@ -4593,6 +4599,15 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
- vmx_handle_descriptor_access(exit_reason);
- break;
-
-+ case EXIT_REASON_BUS_LOCK:
-+ /*
-+ * Nothing to do: just taking a vmexit should be enough of a pause to
-+ * prevent a VM from crippling the host with bus locks. Note
-+ * EXIT_REASON_BUS_LOCK will always have bit 26 set in exit_reason, and
-+ * hence the perf counter is already increased.
-+ */
-+ break;
-+
- case EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED:
- case EXIT_REASON_INVPCID:
- /* fall through */
-diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
-index 5f54451475..2095c1e612 100644
---- a/xen/arch/x86/hvm/vmx/vvmx.c
-+++ b/xen/arch/x86/hvm/vmx/vvmx.c
-@@ -2405,7 +2405,7 @@ void nvmx_idtv_handling(void)
- * be reinjected, otherwise, pass to L1.
- */
- __vmread(VM_EXIT_REASON, &reason);
-- if ( reason != EXIT_REASON_EPT_VIOLATION ?
-+ if ( (uint16_t)reason != EXIT_REASON_EPT_VIOLATION ?
- !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) :
- !nvcpu->nv_vmexit_pending )
- {
-@@ -2486,6 +2486,7 @@ int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
- case EXIT_REASON_EPT_VIOLATION:
- case EXIT_REASON_EPT_MISCONFIG:
- case EXIT_REASON_EXTERNAL_INTERRUPT:
-+ case EXIT_REASON_BUS_LOCK:
- /* pass to L0 handler */
- break;
- case VMX_EXIT_REASONS_FAILED_VMENTRY:
-diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
-index 75f9928abf..f3df5113d4 100644
---- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
-+++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
-@@ -267,6 +267,7 @@ extern u32 vmx_vmentry_control;
- #define SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS 0x00040000
- #define SECONDARY_EXEC_XSAVES 0x00100000
- #define SECONDARY_EXEC_TSC_SCALING 0x02000000
-+#define SECONDARY_EXEC_BUS_LOCK_DETECTION 0x40000000
- extern u32 vmx_secondary_exec_control;
-
- #define VMX_EPT_EXEC_ONLY_SUPPORTED 0x00000001
-@@ -346,6 +347,8 @@ extern u64 vmx_ept_vpid_cap;
- (vmx_secondary_exec_control & SECONDARY_EXEC_XSAVES)
- #define cpu_has_vmx_tsc_scaling \
- (vmx_secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
-+#define cpu_has_vmx_bus_lock_detection \
-+ (vmx_secondary_exec_control & SECONDARY_EXEC_BUS_LOCK_DETECTION)
-
- #define VMCS_RID_TYPE_MASK 0x80000000
-
-diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmx.h b/xen/arch/x86/include/asm/hvm/vmx/vmx.h
-index 8eedf59155..03995701a1 100644
---- a/xen/arch/x86/include/asm/hvm/vmx/vmx.h
-+++ b/xen/arch/x86/include/asm/hvm/vmx/vmx.h
-@@ -159,6 +159,7 @@ static inline void pi_clear_sn(struct pi_desc *pi_desc)
- * Exit Reasons
- */
- #define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
-+#define VMX_EXIT_REASONS_BUS_LOCK (1u << 26)
-
- #define EXIT_REASON_EXCEPTION_NMI 0
- #define EXIT_REASON_EXTERNAL_INTERRUPT 1
-@@ -219,6 +220,7 @@ static inline void pi_clear_sn(struct pi_desc *pi_desc)
- #define EXIT_REASON_PML_FULL 62
- #define EXIT_REASON_XSAVES 63
- #define EXIT_REASON_XRSTORS 64
-+#define EXIT_REASON_BUS_LOCK 74
- /* Remember to also update VMX_PERF_EXIT_REASON_SIZE! */
-
- /*
-diff --git a/xen/arch/x86/include/asm/perfc_defn.h b/xen/arch/x86/include/asm/perfc_defn.h
-index 509afc516b..6fce21e85a 100644
---- a/xen/arch/x86/include/asm/perfc_defn.h
-+++ b/xen/arch/x86/include/asm/perfc_defn.h
-@@ -6,7 +6,7 @@ PERFCOUNTER_ARRAY(exceptions, "exceptions", 32)
-
- #ifdef CONFIG_HVM
-
--#define VMX_PERF_EXIT_REASON_SIZE 65
-+#define VMX_PERF_EXIT_REASON_SIZE 75
- #define VMEXIT_NPF_PERFC 143
- #define SVM_PERF_EXIT_REASON_SIZE (VMEXIT_NPF_PERFC + 1)
- PERFCOUNTER_ARRAY(vmexits, "vmexits",
-@@ -128,4 +128,6 @@ PERFCOUNTER(pauseloop_exits, "vmexits from Pause-Loop Detection")
- PERFCOUNTER(iommu_pt_shatters, "IOMMU page table shatters")
- PERFCOUNTER(iommu_pt_coalesces, "IOMMU page table coalesces")
-
-+PERFCOUNTER(buslock, "Bus Locks Detected")
-+
- /*#endif*/ /* __XEN_PERFC_DEFN_H__ */
---
-2.40.0
-
diff --git a/0065-x86-vmx-introduce-helper-to-set-VMX_INTR_SHADOW_NMI.patch b/0065-x86-vmx-introduce-helper-to-set-VMX_INTR_SHADOW_NMI.patch
deleted file mode 100644
index 847ee99..0000000
--- a/0065-x86-vmx-introduce-helper-to-set-VMX_INTR_SHADOW_NMI.patch
+++ /dev/null
@@ -1,102 +0,0 @@
-From 27abea1ba6fa68f81b98de31cf9b9ebb594ff238 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 21 Mar 2023 13:41:49 +0100
-Subject: [PATCH 65/89] x86/vmx: introduce helper to set VMX_INTR_SHADOW_NMI
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Introduce a small helper to OR VMX_INTR_SHADOW_NMI in
-GUEST_INTERRUPTIBILITY_INFO in order to help dealing with the NMI
-unblocked by IRET case. Replace the existing usage in handling
-EXIT_REASON_EXCEPTION_NMI and also add such handling to EPT violations
-and page-modification log-full events.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: d329b37d12132164c3894d0b6284be72576ef950
-master date: 2022-12-19 11:23:34 +0100
----
- xen/arch/x86/hvm/vmx/vmx.c | 28 +++++++++++++++++++-------
- xen/arch/x86/include/asm/hvm/vmx/vmx.h | 3 +++
- 2 files changed, 24 insertions(+), 7 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index d0f0f2e429..456726e897 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -3967,6 +3967,15 @@ static int vmx_handle_apic_write(void)
- return vlapic_apicv_write(current, exit_qualification & 0xfff);
- }
-
-+static void undo_nmis_unblocked_by_iret(void)
-+{
-+ unsigned long guest_info;
-+
-+ __vmread(GUEST_INTERRUPTIBILITY_INFO, &guest_info);
-+ __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
-+ guest_info | VMX_INTR_SHADOW_NMI);
-+}
-+
- void vmx_vmexit_handler(struct cpu_user_regs *regs)
- {
- unsigned long exit_qualification, exit_reason, idtv_info, intr_info = 0;
-@@ -4167,13 +4176,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
- if ( unlikely(intr_info & INTR_INFO_NMI_UNBLOCKED_BY_IRET) &&
- !(idtv_info & INTR_INFO_VALID_MASK) &&
- (vector != TRAP_double_fault) )
-- {
-- unsigned long guest_info;
--
-- __vmread(GUEST_INTERRUPTIBILITY_INFO, &guest_info);
-- __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
-- guest_info | VMX_INTR_SHADOW_NMI);
-- }
-+ undo_nmis_unblocked_by_iret();
-
- perfc_incra(cause_vector, vector);
-
-@@ -4539,6 +4542,11 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
-
- __vmread(GUEST_PHYSICAL_ADDRESS, &gpa);
- __vmread(EXIT_QUALIFICATION, &exit_qualification);
-+
-+ if ( unlikely(exit_qualification & INTR_INFO_NMI_UNBLOCKED_BY_IRET) &&
-+ !(idtv_info & INTR_INFO_VALID_MASK) )
-+ undo_nmis_unblocked_by_iret();
-+
- ept_handle_violation(exit_qualification, gpa);
- break;
- }
-@@ -4583,6 +4591,12 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
- break;
-
- case EXIT_REASON_PML_FULL:
-+ __vmread(EXIT_QUALIFICATION, &exit_qualification);
-+
-+ if ( unlikely(exit_qualification & INTR_INFO_NMI_UNBLOCKED_BY_IRET) &&
-+ !(idtv_info & INTR_INFO_VALID_MASK) )
-+ undo_nmis_unblocked_by_iret();
-+
- vmx_vcpu_flush_pml_buffer(v);
- break;
-
-diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmx.h b/xen/arch/x86/include/asm/hvm/vmx/vmx.h
-index 03995701a1..eae39365aa 100644
---- a/xen/arch/x86/include/asm/hvm/vmx/vmx.h
-+++ b/xen/arch/x86/include/asm/hvm/vmx/vmx.h
-@@ -225,6 +225,9 @@ static inline void pi_clear_sn(struct pi_desc *pi_desc)
-
- /*
- * Interruption-information format
-+ *
-+ * Note INTR_INFO_NMI_UNBLOCKED_BY_IRET is also used with Exit Qualification
-+ * field for EPT violations, PML full and SPP-related event vmexits.
- */
- #define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */
- #define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */
---
-2.40.0
-
diff --git a/0066-x86-vmx-implement-Notify-VM-Exit.patch b/0066-x86-vmx-implement-Notify-VM-Exit.patch
deleted file mode 100644
index bc54d18..0000000
--- a/0066-x86-vmx-implement-Notify-VM-Exit.patch
+++ /dev/null
@@ -1,243 +0,0 @@
-From b745ff30113d2bd91e2d34cf56437b2fe2e2ea35 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 21 Mar 2023 13:42:43 +0100
-Subject: [PATCH 66/89] x86/vmx: implement Notify VM Exit
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Under certain conditions guests can get the CPU stuck in an unbounded
-loop without the possibility of an interrupt window to occur on
-instruction boundary. This was the case with the scenarios described
-in XSA-156.
-
-Make use of the Notify VM Exit mechanism, that will trigger a VM Exit
-if no interrupt window occurs for a specified amount of time. Note
-that using the Notify VM Exit avoids having to trap #AC and #DB
-exceptions, as Xen is guaranteed to get a VM Exit even if the guest
-puts the CPU in a loop without an interrupt window, as such disable
-the intercepts if the feature is available and enabled.
-
-Setting the notify VM exit window to 0 is safe because there's a
-threshold added by the hardware in order to have a sane window value.
-
-Note the handling of EXIT_REASON_NOTIFY in the nested virtualization
-case is passed to L0, and hence a nested guest being able to trigger a
-notify VM exit with an invalid context would be able to crash the L1
-hypervisor (by L0 destroying the domain). Since we don't expose VM
-Notify support to L1 it should already enable the required
-protections in order to prevent VM Notify from triggering in the first
-place.
-
-Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-
-x86/vmx: Partially revert "x86/vmx: implement Notify VM Exit"
-
-The original patch tried to do two things - implement VMNotify, and
-re-optimise VT-x to not intercept #DB/#AC by default.
-
-The second part is buggy in multiple ways. Both GDBSX and Introspection need
-to conditionally intercept #DB, which was not accounted for. Also, #DB
-interception has nothing at all to do with cpu_has_monitor_trap_flag.
-
-Revert the second half, leaving #DB/#AC intercepted unilaterally, but with
-VMNotify active by default when available.
-
-Fixes: 573279cde1c4 ("x86/vmx: implement Notify VM Exit")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: 573279cde1c4e752d4df34bc65ffafa17573148e
-master date: 2022-12-19 11:24:14 +0100
-master commit: 5f08bc9404c7cfa8131e262c7dbcb4d96c752686
-master date: 2023-01-20 19:39:32 +0000
----
- docs/misc/xen-command-line.pandoc | 11 +++++++++++
- xen/arch/x86/hvm/vmx/vmcs.c | 10 ++++++++++
- xen/arch/x86/hvm/vmx/vmx.c | 16 ++++++++++++++++
- xen/arch/x86/hvm/vmx/vvmx.c | 1 +
- xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 4 ++++
- xen/arch/x86/include/asm/hvm/vmx/vmx.h | 6 ++++++
- xen/arch/x86/include/asm/perfc_defn.h | 3 ++-
- 7 files changed, 50 insertions(+), 1 deletion(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 5be5ce10c6..d601120faa 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2634,6 +2634,17 @@ guest will notify Xen that it has failed to acquire a spinlock.
- <major>, <minor> and <build> must be integers. The values will be
- encoded in guest CPUID 0x40000002 if viridian enlightenments are enabled.
-
-+### vm-notify-window (Intel)
-+> `= <integer>`
-+
-+> Default: `0`
-+
-+Specify the value of the VM Notify window used to detect locked VMs. Set to -1
-+to disable the feature. Value is in units of crystal clock cycles.
-+
-+Note the hardware might add a threshold to the provided value in order to make
-+it safe, and hence using 0 is fine.
-+
- ### vpid (Intel)
- > `= <boolean>`
-
-diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
-index a0d5e8d6ab..7912053bda 100644
---- a/xen/arch/x86/hvm/vmx/vmcs.c
-+++ b/xen/arch/x86/hvm/vmx/vmcs.c
-@@ -67,6 +67,9 @@ integer_param("ple_gap", ple_gap);
- static unsigned int __read_mostly ple_window = 4096;
- integer_param("ple_window", ple_window);
-
-+static unsigned int __ro_after_init vm_notify_window;
-+integer_param("vm-notify-window", vm_notify_window);
-+
- static bool __read_mostly opt_ept_pml = true;
- static s8 __read_mostly opt_ept_ad = -1;
- int8_t __read_mostly opt_ept_exec_sp = -1;
-@@ -210,6 +213,7 @@ static void __init vmx_display_features(void)
- P(cpu_has_vmx_pml, "Page Modification Logging");
- P(cpu_has_vmx_tsc_scaling, "TSC Scaling");
- P(cpu_has_vmx_bus_lock_detection, "Bus Lock Detection");
-+ P(cpu_has_vmx_notify_vm_exiting, "Notify VM Exit");
- #undef P
-
- if ( !printed )
-@@ -329,6 +333,8 @@ static int vmx_init_vmcs_config(bool bsp)
- opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST;
- if ( opt_ept_pml )
- opt |= SECONDARY_EXEC_ENABLE_PML;
-+ if ( vm_notify_window != ~0u )
-+ opt |= SECONDARY_EXEC_NOTIFY_VM_EXITING;
-
- /*
- * "APIC Register Virtualization" and "Virtual Interrupt Delivery"
-@@ -1290,6 +1296,10 @@ static int construct_vmcs(struct vcpu *v)
- v->arch.hvm.vmx.exception_bitmap = HVM_TRAP_MASK
- | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault))
- | (v->arch.fully_eager_fpu ? 0 : (1U << TRAP_no_device));
-+
-+ if ( cpu_has_vmx_notify_vm_exiting )
-+ __vmwrite(NOTIFY_WINDOW, vm_notify_window);
-+
- vmx_update_exception_bitmap(v);
-
- v->arch.hvm.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index 456726e897..f0e759eeaf 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -4622,6 +4622,22 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
- */
- break;
-
-+ case EXIT_REASON_NOTIFY:
-+ __vmread(EXIT_QUALIFICATION, &exit_qualification);
-+
-+ if ( unlikely(exit_qualification & NOTIFY_VM_CONTEXT_INVALID) )
-+ {
-+ perfc_incr(vmnotify_crash);
-+ gprintk(XENLOG_ERR, "invalid VM context after notify vmexit\n");
-+ domain_crash(v->domain);
-+ break;
-+ }
-+
-+ if ( unlikely(exit_qualification & INTR_INFO_NMI_UNBLOCKED_BY_IRET) )
-+ undo_nmis_unblocked_by_iret();
-+
-+ break;
-+
- case EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED:
- case EXIT_REASON_INVPCID:
- /* fall through */
-diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
-index 2095c1e612..f8fe8d0c14 100644
---- a/xen/arch/x86/hvm/vmx/vvmx.c
-+++ b/xen/arch/x86/hvm/vmx/vvmx.c
-@@ -2487,6 +2487,7 @@ int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
- case EXIT_REASON_EPT_MISCONFIG:
- case EXIT_REASON_EXTERNAL_INTERRUPT:
- case EXIT_REASON_BUS_LOCK:
-+ case EXIT_REASON_NOTIFY:
- /* pass to L0 handler */
- break;
- case VMX_EXIT_REASONS_FAILED_VMENTRY:
-diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
-index f3df5113d4..78404e42b3 100644
---- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
-+++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
-@@ -268,6 +268,7 @@ extern u32 vmx_vmentry_control;
- #define SECONDARY_EXEC_XSAVES 0x00100000
- #define SECONDARY_EXEC_TSC_SCALING 0x02000000
- #define SECONDARY_EXEC_BUS_LOCK_DETECTION 0x40000000
-+#define SECONDARY_EXEC_NOTIFY_VM_EXITING 0x80000000
- extern u32 vmx_secondary_exec_control;
-
- #define VMX_EPT_EXEC_ONLY_SUPPORTED 0x00000001
-@@ -349,6 +350,8 @@ extern u64 vmx_ept_vpid_cap;
- (vmx_secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
- #define cpu_has_vmx_bus_lock_detection \
- (vmx_secondary_exec_control & SECONDARY_EXEC_BUS_LOCK_DETECTION)
-+#define cpu_has_vmx_notify_vm_exiting \
-+ (vmx_secondary_exec_control & SECONDARY_EXEC_NOTIFY_VM_EXITING)
-
- #define VMCS_RID_TYPE_MASK 0x80000000
-
-@@ -456,6 +459,7 @@ enum vmcs_field {
- SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
- PLE_GAP = 0x00004020,
- PLE_WINDOW = 0x00004022,
-+ NOTIFY_WINDOW = 0x00004024,
- VM_INSTRUCTION_ERROR = 0x00004400,
- VM_EXIT_REASON = 0x00004402,
- VM_EXIT_INTR_INFO = 0x00004404,
-diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmx.h b/xen/arch/x86/include/asm/hvm/vmx/vmx.h
-index eae39365aa..8e1e42ac47 100644
---- a/xen/arch/x86/include/asm/hvm/vmx/vmx.h
-+++ b/xen/arch/x86/include/asm/hvm/vmx/vmx.h
-@@ -221,6 +221,7 @@ static inline void pi_clear_sn(struct pi_desc *pi_desc)
- #define EXIT_REASON_XSAVES 63
- #define EXIT_REASON_XRSTORS 64
- #define EXIT_REASON_BUS_LOCK 74
-+#define EXIT_REASON_NOTIFY 75
- /* Remember to also update VMX_PERF_EXIT_REASON_SIZE! */
-
- /*
-@@ -236,6 +237,11 @@ static inline void pi_clear_sn(struct pi_desc *pi_desc)
- #define INTR_INFO_VALID_MASK 0x80000000 /* 31 */
- #define INTR_INFO_RESVD_BITS_MASK 0x7ffff000
-
-+/*
-+ * Exit Qualifications for NOTIFY VM EXIT
-+ */
-+#define NOTIFY_VM_CONTEXT_INVALID 1u
-+
- /*
- * Exit Qualifications for MOV for Control Register Access
- */
-diff --git a/xen/arch/x86/include/asm/perfc_defn.h b/xen/arch/x86/include/asm/perfc_defn.h
-index 6fce21e85a..487e20dc97 100644
---- a/xen/arch/x86/include/asm/perfc_defn.h
-+++ b/xen/arch/x86/include/asm/perfc_defn.h
-@@ -6,7 +6,7 @@ PERFCOUNTER_ARRAY(exceptions, "exceptions", 32)
-
- #ifdef CONFIG_HVM
-
--#define VMX_PERF_EXIT_REASON_SIZE 75
-+#define VMX_PERF_EXIT_REASON_SIZE 76
- #define VMEXIT_NPF_PERFC 143
- #define SVM_PERF_EXIT_REASON_SIZE (VMEXIT_NPF_PERFC + 1)
- PERFCOUNTER_ARRAY(vmexits, "vmexits",
-@@ -129,5 +129,6 @@ PERFCOUNTER(iommu_pt_shatters, "IOMMU page table shatters")
- PERFCOUNTER(iommu_pt_coalesces, "IOMMU page table coalesces")
-
- PERFCOUNTER(buslock, "Bus Locks Detected")
-+PERFCOUNTER(vmnotify_crash, "domain crashes by Notify VM Exit")
-
- /*#endif*/ /* __XEN_PERFC_DEFN_H__ */
---
-2.40.0
-
diff --git a/0067-tools-python-change-s-size-type-for-Python-3.10.patch b/0067-tools-python-change-s-size-type-for-Python-3.10.patch
deleted file mode 100644
index 0671c67..0000000
--- a/0067-tools-python-change-s-size-type-for-Python-3.10.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 651ffe2c7847cb9922d22980984a3bea6f47bea7 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
- <marmarek@invisiblethingslab.com>
-Date: Tue, 21 Mar 2023 13:43:44 +0100
-Subject: [PATCH 67/89] tools/python: change 's#' size type for Python >= 3.10
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Python < 3.10 by default uses 'int' type for data+size string types
-(s#), unless PY_SSIZE_T_CLEAN is defined - in which case it uses
-Py_ssize_t. The former behavior was removed in Python 3.10 and now it's
-required to define PY_SSIZE_T_CLEAN before including Python.h, and using
-Py_ssize_t for the length argument. The PY_SSIZE_T_CLEAN behavior is
-supported since Python 2.5.
-
-Adjust bindings accordingly.
-
-Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: 897257ba49d0a6ddcf084960fd792ccce9c40f94
-master date: 2023-02-06 08:50:13 +0100
----
- tools/python/xen/lowlevel/xc/xc.c | 3 ++-
- tools/python/xen/lowlevel/xs/xs.c | 3 ++-
- 2 files changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
-index fd00861032..cfb2734a99 100644
---- a/tools/python/xen/lowlevel/xc/xc.c
-+++ b/tools/python/xen/lowlevel/xc/xc.c
-@@ -4,6 +4,7 @@
- * Copyright (c) 2003-2004, K A Fraser (University of Cambridge)
- */
-
-+#define PY_SSIZE_T_CLEAN
- #include <Python.h>
- #define XC_WANT_COMPAT_MAP_FOREIGN_API
- #include <xenctrl.h>
-@@ -1774,7 +1775,7 @@ static PyObject *pyflask_load(PyObject *self, PyObject *args, PyObject *kwds)
- {
- xc_interface *xc_handle;
- char *policy;
-- uint32_t len;
-+ Py_ssize_t len;
- int ret;
-
- static char *kwd_list[] = { "policy", NULL };
-diff --git a/tools/python/xen/lowlevel/xs/xs.c b/tools/python/xen/lowlevel/xs/xs.c
-index 0dad7fa5f2..3ba5a8b893 100644
---- a/tools/python/xen/lowlevel/xs/xs.c
-+++ b/tools/python/xen/lowlevel/xs/xs.c
-@@ -18,6 +18,7 @@
- * Copyright (C) 2005 XenSource Ltd.
- */
-
-+#define PY_SSIZE_T_CLEAN
- #include <Python.h>
-
- #include <stdbool.h>
-@@ -141,7 +142,7 @@ static PyObject *xspy_write(XsHandle *self, PyObject *args)
- char *thstr;
- char *path;
- char *data;
-- int data_n;
-+ Py_ssize_t data_n;
- bool result;
-
- if (!xh)
---
-2.40.0
-
diff --git a/0068-tools-xenmon-Fix-xenmon.py-for-with-python3.x.patch b/0068-tools-xenmon-Fix-xenmon.py-for-with-python3.x.patch
deleted file mode 100644
index a47812b..0000000
--- a/0068-tools-xenmon-Fix-xenmon.py-for-with-python3.x.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From 244d39fb13abae6c2da341b76363f169d8bbc93b Mon Sep 17 00:00:00 2001
-From: Bernhard Kaindl <bernhard.kaindl@citrix.com>
-Date: Tue, 21 Mar 2023 13:44:04 +0100
-Subject: [PATCH 68/89] tools/xenmon: Fix xenmon.py for with python3.x
-
-Fixes for Py3:
-* class Delayed(): file not defined; also an error for pylint -E. Inherit
- object instead for Py2 compatibility. Fix DomainInfo() too.
-* Inconsistent use of tabs and spaces for indentation (in one block)
-
-Signed-off-by: Bernhard Kaindl <bernhard.kaindl@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 3a59443c1d5ae0677a792c660ccd3796ce036732
-master date: 2023-02-06 10:22:12 +0000
----
- tools/xenmon/xenmon.py | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/tools/xenmon/xenmon.py b/tools/xenmon/xenmon.py
-index 175eacd2cb..977ada6887 100644
---- a/tools/xenmon/xenmon.py
-+++ b/tools/xenmon/xenmon.py
-@@ -117,7 +117,7 @@ def setup_cmdline_parser():
- return parser
-
- # encapsulate information about a domain
--class DomainInfo:
-+class DomainInfo(object):
- def __init__(self):
- self.allocated_sum = 0
- self.gotten_sum = 0
-@@ -533,7 +533,7 @@ def show_livestats(cpu):
- # simple functions to allow initialization of log files without actually
- # physically creating files that are never used; only on the first real
- # write does the file get created
--class Delayed(file):
-+class Delayed(object):
- def __init__(self, filename, mode):
- self.filename = filename
- self.saved_mode = mode
-@@ -677,8 +677,8 @@ def main():
-
- if os.uname()[0] == "SunOS":
- xenbaked_cmd = "/usr/lib/xenbaked"
-- stop_cmd = "/usr/bin/pkill -INT -z global xenbaked"
-- kill_cmd = "/usr/bin/pkill -KILL -z global xenbaked"
-+ stop_cmd = "/usr/bin/pkill -INT -z global xenbaked"
-+ kill_cmd = "/usr/bin/pkill -KILL -z global xenbaked"
- else:
- # assumes that xenbaked is in your path
- xenbaked_cmd = "xenbaked"
---
-2.40.0
-
diff --git a/0069-x86-spec-ctrl-Add-BHI-controls-to-userspace-componen.patch b/0069-x86-spec-ctrl-Add-BHI-controls-to-userspace-componen.patch
deleted file mode 100644
index 734a2e5..0000000
--- a/0069-x86-spec-ctrl-Add-BHI-controls-to-userspace-componen.patch
+++ /dev/null
@@ -1,51 +0,0 @@
-From b4dad09bb23c439f2e67ed2eb6d7bdd640b8bbae Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 21 Mar 2023 13:44:27 +0100
-Subject: [PATCH 69/89] x86/spec-ctrl: Add BHI controls to userspace components
-
-This was an oversight when adding the Xen parts.
-
-Fixes: cea9ae062295 ("x86/spec-ctrl: Enumeration for new Intel BHI controls")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 9276e832aef60437da13d91e66fc259fd94d6f91
-master date: 2023-03-13 11:26:26 +0000
----
- tools/libs/light/libxl_cpuid.c | 3 +++
- tools/misc/xen-cpuid.c | 6 +++---
- 2 files changed, 6 insertions(+), 3 deletions(-)
-
-diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c
-index d97a2f3338..55cfbc8f23 100644
---- a/tools/libs/light/libxl_cpuid.c
-+++ b/tools/libs/light/libxl_cpuid.c
-@@ -238,6 +238,9 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str)
- {"cet-sss", 0x00000007, 1, CPUID_REG_EDX, 18, 1},
-
- {"intel-psfd", 0x00000007, 2, CPUID_REG_EDX, 0, 1},
-+ {"ipred-ctrl", 0x00000007, 2, CPUID_REG_EDX, 1, 1},
-+ {"rrsba-ctrl", 0x00000007, 2, CPUID_REG_EDX, 2, 1},
-+ {"bhi-ctrl", 0x00000007, 2, CPUID_REG_EDX, 4, 1},
- {"mcdt-no", 0x00000007, 2, CPUID_REG_EDX, 5, 1},
-
- {"lahfsahf", 0x80000001, NA, CPUID_REG_ECX, 0, 1},
-diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
-index 0248eaef44..45e443f5d9 100644
---- a/tools/misc/xen-cpuid.c
-+++ b/tools/misc/xen-cpuid.c
-@@ -213,9 +213,9 @@ static const char *const str_7d1[32] =
-
- static const char *const str_7d2[32] =
- {
-- [ 0] = "intel-psfd",
--
-- /* 4 */ [ 5] = "mcdt-no",
-+ [ 0] = "intel-psfd", [ 1] = "ipred-ctrl",
-+ [ 2] = "rrsba-ctrl",
-+ [ 4] = "bhi-ctrl", [ 5] = "mcdt-no",
- };
-
- static const struct {
---
-2.40.0
-
diff --git a/0070-core-parking-fix-build-with-gcc12-and-NR_CPUS-1.patch b/0070-core-parking-fix-build-with-gcc12-and-NR_CPUS-1.patch
deleted file mode 100644
index 0b2c2b4..0000000
--- a/0070-core-parking-fix-build-with-gcc12-and-NR_CPUS-1.patch
+++ /dev/null
@@ -1,95 +0,0 @@
-From b5409f4e4d0722e8669123d59f15f784903d153f Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 13:44:53 +0100
-Subject: [PATCH 70/89] core-parking: fix build with gcc12 and NR_CPUS=1
-
-Gcc12 takes issue with core_parking_remove()'s
-
- for ( ; i < cur_idle_nums; ++i )
- core_parking_cpunum[i] = core_parking_cpunum[i + 1];
-
-complaining that the right hand side array access is past the bounds of
-1. Clearly the compiler can't know that cur_idle_nums can only ever be
-zero in this case (as the sole CPU cannot be parked).
-
-Arrange for core_parking.c's contents to not be needed altogether, and
-then disable its building when NR_CPUS == 1.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 4b0422f70feb4b1cd04598ffde805fc224f3812e
-master date: 2023-03-13 15:15:42 +0100
----
- xen/arch/x86/Kconfig | 2 +-
- xen/arch/x86/platform_hypercall.c | 11 ++++++++---
- xen/arch/x86/sysctl.c | 3 +++
- xen/common/Kconfig | 1 +
- 4 files changed, 13 insertions(+), 4 deletions(-)
-
-diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
-index 6a7825f4ba..2a5c3304e2 100644
---- a/xen/arch/x86/Kconfig
-+++ b/xen/arch/x86/Kconfig
-@@ -10,7 +10,7 @@ config X86
- select ALTERNATIVE_CALL
- select ARCH_MAP_DOMAIN_PAGE
- select ARCH_SUPPORTS_INT128
-- select CORE_PARKING
-+ imply CORE_PARKING
- select HAS_ALTERNATIVE
- select HAS_COMPAT
- select HAS_CPUFREQ
-diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
-index a7341dc3d7..e7deee2268 100644
---- a/xen/arch/x86/platform_hypercall.c
-+++ b/xen/arch/x86/platform_hypercall.c
-@@ -727,12 +727,17 @@ ret_t do_platform_op(
- case XEN_CORE_PARKING_SET:
- idle_nums = min_t(uint32_t,
- op->u.core_parking.idle_nums, num_present_cpus() - 1);
-- ret = continue_hypercall_on_cpu(
-- 0, core_parking_helper, (void *)(unsigned long)idle_nums);
-+ if ( CONFIG_NR_CPUS > 1 )
-+ ret = continue_hypercall_on_cpu(
-+ 0, core_parking_helper,
-+ (void *)(unsigned long)idle_nums);
-+ else if ( idle_nums )
-+ ret = -EINVAL;
- break;
-
- case XEN_CORE_PARKING_GET:
-- op->u.core_parking.idle_nums = get_cur_idle_nums();
-+ op->u.core_parking.idle_nums = CONFIG_NR_CPUS > 1
-+ ? get_cur_idle_nums() : 0;
- ret = __copy_field_to_guest(u_xenpf_op, op, u.core_parking) ?
- -EFAULT : 0;
- break;
-diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
-index f82abc2488..f8f8d79755 100644
---- a/xen/arch/x86/sysctl.c
-+++ b/xen/arch/x86/sysctl.c
-@@ -179,6 +179,9 @@ long arch_do_sysctl(
- ret = -EBUSY;
- break;
- }
-+ if ( CONFIG_NR_CPUS <= 1 )
-+ /* Mimic behavior of smt_up_down_helper(). */
-+ return 0;
- plug = op == XEN_SYSCTL_CPU_HOTPLUG_SMT_ENABLE;
- fn = smt_up_down_helper;
- hcpu = _p(plug);
-diff --git a/xen/common/Kconfig b/xen/common/Kconfig
-index f1ea3199c8..855c843113 100644
---- a/xen/common/Kconfig
-+++ b/xen/common/Kconfig
-@@ -10,6 +10,7 @@ config COMPAT
-
- config CORE_PARKING
- bool
-+ depends on NR_CPUS > 1
-
- config GRANT_TABLE
- bool "Grant table support" if EXPERT
---
-2.40.0
-
diff --git a/0071-x86-altp2m-help-gcc13-to-avoid-it-emitting-a-warning.patch b/0071-x86-altp2m-help-gcc13-to-avoid-it-emitting-a-warning.patch
deleted file mode 100644
index b33bd11..0000000
--- a/0071-x86-altp2m-help-gcc13-to-avoid-it-emitting-a-warning.patch
+++ /dev/null
@@ -1,129 +0,0 @@
-From d84612ecab00ab31c09a7c5a5892906edbacaf5b Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 13:45:47 +0100
-Subject: [PATCH 71/89] x86/altp2m: help gcc13 to avoid it emitting a warning
-
-Switches of altp2m-s always expect a valid altp2m to be in place (and
-indeed altp2m_vcpu_initialise() sets the active one to be at index 0).
-The compiler, however, cannot know that, and hence it cannot eliminate
-p2m_get_altp2m()'s case of returnin (literal) NULL. If then the compiler
-decides to special case that code path in the caller, the dereference in
-instances of
-
- atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
-
-can, to the code generator, appear to be NULL dereferences, leading to
-
-In function 'atomic_dec',
- inlined from '...' at ...:
-./arch/x86/include/asm/atomic.h:182:5: error: array subscript 0 is outside array bounds of 'int[0]' [-Werror=array-bounds=]
-
-Aid the compiler by adding a BUG_ON() checking the return value of the
-problematic p2m_get_altp2m(). Since with the use of the local variable
-the 2nd p2m_get_altp2m() each will look questionable at the first glance
-(Why is the local variable not used here?), open-code the only relevant
-piece of p2m_get_altp2m() there.
-
-To avoid repeatedly doing these transformations, and also to limit how
-"bad" the open-coding really is, convert the entire operation to an
-inline helper, used by all three instances (and accepting the redundant
-BUG_ON(idx >= MAX_ALTP2M) in two of the three cases).
-
-Reported-by: Charles Arnold <carnold@suse.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: be62b1fc2aa7375d553603fca07299da765a89fe
-master date: 2023-03-13 15:16:21 +0100
----
- xen/arch/x86/hvm/vmx/vmx.c | 8 +-------
- xen/arch/x86/include/asm/p2m.h | 20 ++++++++++++++++++++
- xen/arch/x86/mm/p2m.c | 14 ++------------
- 3 files changed, 23 insertions(+), 19 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index f0e759eeaf..a8fb4365ad 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -4072,13 +4072,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
- }
- }
-
-- if ( idx != vcpu_altp2m(v).p2midx )
-- {
-- BUG_ON(idx >= MAX_ALTP2M);
-- atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
-- vcpu_altp2m(v).p2midx = idx;
-- atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
-- }
-+ p2m_set_altp2m(v, idx);
- }
-
- if ( unlikely(currd->arch.monitor.vmexit_enabled) )
-diff --git a/xen/arch/x86/include/asm/p2m.h b/xen/arch/x86/include/asm/p2m.h
-index bd684d02f3..cd43d8621a 100644
---- a/xen/arch/x86/include/asm/p2m.h
-+++ b/xen/arch/x86/include/asm/p2m.h
-@@ -879,6 +879,26 @@ static inline struct p2m_domain *p2m_get_altp2m(struct vcpu *v)
- return v->domain->arch.altp2m_p2m[index];
- }
-
-+/* set current alternate p2m table */
-+static inline bool p2m_set_altp2m(struct vcpu *v, unsigned int idx)
-+{
-+ struct p2m_domain *orig;
-+
-+ BUG_ON(idx >= MAX_ALTP2M);
-+
-+ if ( idx == vcpu_altp2m(v).p2midx )
-+ return false;
-+
-+ orig = p2m_get_altp2m(v);
-+ BUG_ON(!orig);
-+ atomic_dec(&orig->active_vcpus);
-+
-+ vcpu_altp2m(v).p2midx = idx;
-+ atomic_inc(&v->domain->arch.altp2m_p2m[idx]->active_vcpus);
-+
-+ return true;
-+}
-+
- /* Switch alternate p2m for a single vcpu */
- bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx);
-
-diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
-index a405ee5fde..b28c899b5e 100644
---- a/xen/arch/x86/mm/p2m.c
-+++ b/xen/arch/x86/mm/p2m.c
-@@ -1787,13 +1787,8 @@ bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx)
-
- if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
- {
-- if ( idx != vcpu_altp2m(v).p2midx )
-- {
-- atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
-- vcpu_altp2m(v).p2midx = idx;
-- atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
-+ if ( p2m_set_altp2m(v, idx) )
- altp2m_vcpu_update_p2m(v);
-- }
- rc = 1;
- }
-
-@@ -2070,13 +2065,8 @@ int p2m_switch_domain_altp2m_by_id(struct domain *d, unsigned int idx)
- if ( d->arch.altp2m_visible_eptp[idx] != mfn_x(INVALID_MFN) )
- {
- for_each_vcpu( d, v )
-- if ( idx != vcpu_altp2m(v).p2midx )
-- {
-- atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
-- vcpu_altp2m(v).p2midx = idx;
-- atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
-+ if ( p2m_set_altp2m(v, idx) )
- altp2m_vcpu_update_p2m(v);
-- }
-
- rc = 0;
- }
---
-2.40.0
-
diff --git a/0072-VT-d-constrain-IGD-check.patch b/0072-VT-d-constrain-IGD-check.patch
deleted file mode 100644
index 497b04b..0000000
--- a/0072-VT-d-constrain-IGD-check.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From f971f5c531ce6a5fd6c1ff1f525f2c6837eeb78d Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 13:46:39 +0100
-Subject: [PATCH 72/89] VT-d: constrain IGD check
-
-Marking a DRHD as controlling an IGD isn't very sensible without
-checking that at the very least it's a graphics device that lives at
-0000:00:02.0. Re-use the reading of the class-code to control both the
-clearing of "gfx_only" and the setting of "igd_drhd_address".
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: f8c4317295fa1cde1a81779b7e362651c084efb8
-master date: 2023-03-14 10:44:08 +0100
----
- xen/drivers/passthrough/vtd/dmar.c | 9 +++------
- 1 file changed, 3 insertions(+), 6 deletions(-)
-
-diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c
-index 78c8bad151..78d4526446 100644
---- a/xen/drivers/passthrough/vtd/dmar.c
-+++ b/xen/drivers/passthrough/vtd/dmar.c
-@@ -391,15 +391,12 @@ static int __init acpi_parse_dev_scope(
-
- if ( drhd )
- {
-- if ( (seg == 0) && (bus == 0) && (path->dev == 2) &&
-- (path->fn == 0) )
-- igd_drhd_address = drhd->address;
--
-- if ( gfx_only &&
-- pci_conf_read8(PCI_SBDF(seg, bus, path->dev, path->fn),
-+ if ( pci_conf_read8(PCI_SBDF(seg, bus, path->dev, path->fn),
- PCI_CLASS_DEVICE + 1) != 0x03
- /* PCI_BASE_CLASS_DISPLAY */ )
- gfx_only = false;
-+ else if ( !seg && !bus && path->dev == 2 && !path->fn )
-+ igd_drhd_address = drhd->address;
- }
-
- break;
---
-2.40.0
-
diff --git a/0073-bunzip-work-around-gcc13-warning.patch b/0073-bunzip-work-around-gcc13-warning.patch
deleted file mode 100644
index c7ec163..0000000
--- a/0073-bunzip-work-around-gcc13-warning.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 7082d656ae9bcd26392caf72e50e0f7a61c8f285 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 13:47:11 +0100
-Subject: [PATCH 73/89] bunzip: work around gcc13 warning
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-While provable that length[0] is always initialized (because symCount
-cannot be zero), upcoming gcc13 fails to recognize this and warns about
-the unconditional use of the value immediately following the loop.
-
-See also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106511.
-
-Reported-by: Martin Liška <martin.liska@suse.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 402195e56de0aacf97e05c80ed367d464ca6938b
-master date: 2023-03-14 10:45:28 +0100
----
- xen/common/bunzip2.c | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/xen/common/bunzip2.c b/xen/common/bunzip2.c
-index 61b80aff1b..4466426941 100644
---- a/xen/common/bunzip2.c
-+++ b/xen/common/bunzip2.c
-@@ -233,6 +233,11 @@ static int __init get_next_block(struct bunzip_data *bd)
- becomes negative, so an unsigned inequality catches
- it.) */
- t = get_bits(bd, 5)-1;
-+ /* GCC 13 has apparently improved use-before-set detection, but
-+ it can't figure out that length[0] is always intialized by
-+ virtue of symCount always being positive when making it here.
-+ See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106511. */
-+ length[0] = 0;
- for (i = 0; i < symCount; i++) {
- for (;;) {
- if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
---
-2.40.0
-
diff --git a/0074-libacpi-fix-PCI-hotplug-AML.patch b/0074-libacpi-fix-PCI-hotplug-AML.patch
deleted file mode 100644
index 3583849..0000000
--- a/0074-libacpi-fix-PCI-hotplug-AML.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 3eac216e6e60860bbc030602c401d3ef8efce8d9 Mon Sep 17 00:00:00 2001
-From: David Woodhouse <dwmw@amazon.co.uk>
-Date: Tue, 21 Mar 2023 13:47:52 +0100
-Subject: [PATCH 74/89] libacpi: fix PCI hotplug AML
-
-The emulated PIIX3 uses a nybble for the status of each PCI function,
-so the status for e.g. slot 0 functions 0 and 1 respectively can be
-read as (\_GPE.PH00 & 0x0F), and (\_GPE.PH00 >> 0x04).
-
-The AML that Xen gives to a guest gets the operand order for the odd-
-numbered functions the wrong way round, returning (0x04 >> \_GPE.PH00)
-instead.
-
-As far as I can tell, this was the wrong way round in Xen from the
-moment that PCI hotplug was first introduced in commit 83d82e6f35a8:
-
-+ ShiftRight (0x4, \_GPE.PH00, Local1)
-+ Return (Local1) /* IN status as the _STA */
-
-Or maybe there's bizarre AML operand ordering going on there, like
-Intel's wrong-way-round assembler, and it only broke later when it was
-changed to being generated?
-
-Either way, it's definitely wrong now, and instrumenting a Linux guest
-shows that it correctly sees _STA being 0x00 in function 0 of an empty
-slot, but then the loop in acpiphp_glue.c::get_slot_status() goes on to
-look at function 1 and sees that _STA evaluates to 0x04. Thus reporting
-an adapter is present in every slot in /sys/bus/pci/slots/*
-
-Quite why Linux wants to look for function 1 being physically present
-when function 0 isn't... I don't want to think about right now.
-
-Fixes: 83d82e6f35a8 ("hvmloader: pass-through: multi-function PCI hot-plug")
-Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: b190af7d3e90f58da5f58044b8dea7261b8b483d
-master date: 2023-03-20 17:12:34 +0100
----
- tools/libacpi/mk_dsdt.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/libacpi/mk_dsdt.c b/tools/libacpi/mk_dsdt.c
-index 1176da80ef..1d27809116 100644
---- a/tools/libacpi/mk_dsdt.c
-+++ b/tools/libacpi/mk_dsdt.c
-@@ -431,7 +431,7 @@ int main(int argc, char **argv)
- stmt("Store", "0x89, \\_GPE.DPT2");
- }
- if ( slot & 1 )
-- stmt("ShiftRight", "0x4, \\_GPE.PH%02X, Local1", slot & ~1);
-+ stmt("ShiftRight", "\\_GPE.PH%02X, 0x04, Local1", slot & ~1);
- else
- stmt("And", "\\_GPE.PH%02X, 0x0f, Local1", slot & ~1);
- stmt("Return", "Local1"); /* IN status as the _STA */
---
-2.40.0
-
diff --git a/0075-AMD-IOMMU-without-XT-x2APIC-needs-to-be-forced-into-.patch b/0075-AMD-IOMMU-without-XT-x2APIC-needs-to-be-forced-into-.patch
deleted file mode 100644
index 5decf2c..0000000
--- a/0075-AMD-IOMMU-without-XT-x2APIC-needs-to-be-forced-into-.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 3c85fb7b65d6a8b0fa993bc1cb67eea9b4a64aca Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Fri, 31 Mar 2023 08:28:56 +0200
-Subject: [PATCH 75/89] AMD/IOMMU: without XT, x2APIC needs to be forced into
- physical mode
-
-An earlier change with the same title (commit 1ba66a870eba) altered only
-the path where x2apic_phys was already set to false (perhaps from the
-command line). The same of course needs applying when the variable
-wasn't modified yet from its initial value.
-
-Reported-by: Elliott Mitchell <ehem+xen@m5p.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 0d2686f6b66b4b1b3c72c3525083b0ce02830054
-master date: 2023-03-21 09:23:25 +0100
----
- xen/arch/x86/genapic/x2apic.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c
-index 7dfc793514..d512c50fc5 100644
---- a/xen/arch/x86/genapic/x2apic.c
-+++ b/xen/arch/x86/genapic/x2apic.c
-@@ -236,11 +236,11 @@ const struct genapic *__init apic_x2apic_probe(void)
- if ( x2apic_phys < 0 )
- {
- /*
-- * Force physical mode if there's no interrupt remapping support: The
-- * ID in clustered mode requires a 32 bit destination field due to
-+ * Force physical mode if there's no (full) interrupt remapping support:
-+ * The ID in clustered mode requires a 32 bit destination field due to
- * the usage of the high 16 bits to hold the cluster ID.
- */
-- x2apic_phys = !iommu_intremap ||
-+ x2apic_phys = iommu_intremap != iommu_intremap_full ||
- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) ||
- (IS_ENABLED(CONFIG_X2APIC_PHYSICAL) &&
- !(acpi_gbl_FADT.flags & ACPI_FADT_APIC_CLUSTER));
---
-2.40.0
-
diff --git a/0076-VT-d-fix-iommu-no-igfx-if-the-IOMMU-scope-contains-f.patch b/0076-VT-d-fix-iommu-no-igfx-if-the-IOMMU-scope-contains-f.patch
deleted file mode 100644
index d897da6..0000000
--- a/0076-VT-d-fix-iommu-no-igfx-if-the-IOMMU-scope-contains-f.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 33b1c8cd86bd6c311131b8dff32bd45581e2fbc1 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
- <marmarek@invisiblethingslab.com>
-Date: Fri, 31 Mar 2023 08:29:55 +0200
-Subject: [PATCH 76/89] VT-d: fix iommu=no-igfx if the IOMMU scope contains
- fake device(s)
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-If the scope for IGD's IOMMU contains additional device that doesn't
-actually exist, iommu=no-igfx would not disable that IOMMU. In this
-particular case (Thinkpad x230) it included 00:02.1, but there is no
-such device on this platform. Consider only existing devices for the
-"gfx only" check as well as the establishing of IGD DRHD address
-(underlying is_igd_drhd(), which is used to determine applicability of
-two workarounds).
-
-Fixes: 2d7f191b392e ("VT-d: generalize and correct "iommu=no-igfx" handling")
-Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: 49de6749baa8d0addc3048defd4ef3e85cb135e9
-master date: 2023-03-23 09:16:41 +0100
----
- xen/drivers/passthrough/vtd/dmar.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c
-index 78d4526446..4936c20952 100644
---- a/xen/drivers/passthrough/vtd/dmar.c
-+++ b/xen/drivers/passthrough/vtd/dmar.c
-@@ -389,7 +389,7 @@ static int __init acpi_parse_dev_scope(
- printk(VTDPREFIX " endpoint: %pp\n",
- &PCI_SBDF(seg, bus, path->dev, path->fn));
-
-- if ( drhd )
-+ if ( drhd && pci_device_detect(seg, bus, path->dev, path->fn) )
- {
- if ( pci_conf_read8(PCI_SBDF(seg, bus, path->dev, path->fn),
- PCI_CLASS_DEVICE + 1) != 0x03
---
-2.40.0
-
diff --git a/0077-x86-shadow-fix-and-improve-sh_page_has_multiple_shad.patch b/0077-x86-shadow-fix-and-improve-sh_page_has_multiple_shad.patch
deleted file mode 100644
index 3486ccd..0000000
--- a/0077-x86-shadow-fix-and-improve-sh_page_has_multiple_shad.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From 6f2d89d68175e74aca9c67761aa87ffc8f5ffed1 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Fri, 31 Mar 2023 08:30:41 +0200
-Subject: [PATCH 77/89] x86/shadow: fix and improve
- sh_page_has_multiple_shadows()
-
-While no caller currently invokes the function without first making sure
-there is at least one shadow [1], we'd better eliminate UB here:
-find_first_set_bit() requires input to be non-zero to return a well-
-defined result.
-
-Further, using find_first_set_bit() isn't very efficient in the first
-place for the intended purpose.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-
-[1] The function has exactly two uses, and both are from OOS code, which
- is HVM-only. For HVM (but not for PV) sh_mfn_is_a_page_table(),
- guarding the call to sh_unsync(), guarantees at least one shadow.
- Hence even if sh_page_has_multiple_shadows() returned a bogus value
- when invoked for a PV domain, the subsequent is_hvm_vcpu() and
- oos_active checks (the former being redundant with the latter) will
- compensate. (Arguably that oos_active check should come first, for
- both clarity and efficiency reasons.)
-master commit: 2896224a4e294652c33f487b603d20bd30955f21
-master date: 2023-03-24 11:07:08 +0100
----
- xen/arch/x86/mm/shadow/private.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
-index 85bb26c7ea..c2bb1ed3c3 100644
---- a/xen/arch/x86/mm/shadow/private.h
-+++ b/xen/arch/x86/mm/shadow/private.h
-@@ -324,7 +324,7 @@ static inline int sh_page_has_multiple_shadows(struct page_info *pg)
- return 0;
- shadows = pg->shadow_flags & SHF_page_type_mask;
- /* More than one type bit set in shadow-flags? */
-- return ( (shadows & ~(1UL << find_first_set_bit(shadows))) != 0 );
-+ return shadows && (shadows & (shadows - 1));
- }
-
- #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
---
-2.40.0
-
diff --git a/0078-x86-nospec-Fix-evaluate_nospec-code-generation-under.patch b/0078-x86-nospec-Fix-evaluate_nospec-code-generation-under.patch
deleted file mode 100644
index 62de15a..0000000
--- a/0078-x86-nospec-Fix-evaluate_nospec-code-generation-under.patch
+++ /dev/null
@@ -1,101 +0,0 @@
-From 00aa5c93d14c6561a69fe204cbe29f7519830782 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 31 Mar 2023 08:31:20 +0200
-Subject: [PATCH 78/89] x86/nospec: Fix evaluate_nospec() code generation under
- Clang
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-It turns out that evaluate_nospec() code generation is not safe under Clang.
-Given:
-
- void eval_nospec_test(int x)
- {
- if ( evaluate_nospec(x) )
- asm volatile ("nop #true" ::: "memory");
- else
- asm volatile ("nop #false" ::: "memory");
- }
-
-Clang emits:
-
- <eval_nospec_test>:
- 0f ae e8 lfence
- 85 ff test %edi,%edi
- 74 02 je <eval_nospec_test+0x9>
- 90 nop
- c3 ret
- 90 nop
- c3 ret
-
-which is not safe because the lfence has been hoisted above the conditional
-jump. Clang concludes that both barrier_nospec_true()'s have identical side
-effects and can safely be merged.
-
-Clang can be persuaded that the side effects are different if there are
-different comments in the asm blocks. This is fragile, but no more fragile
-that other aspects of this construct.
-
-Introduce barrier_nospec_false() with a separate internal comment to prevent
-Clang merging it with barrier_nospec_true() despite the otherwise-identical
-content. The generated code now becomes:
-
- <eval_nospec_test>:
- 85 ff test %edi,%edi
- 74 05 je <eval_nospec_test+0x9>
- 0f ae e8 lfence
- 90 nop
- c3 ret
- 0f ae e8 lfence
- 90 nop
- c3 ret
-
-which has the correct number of lfence's, and in the correct place.
-
-Link: https://github.com/llvm/llvm-project/issues/55084
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: bc3c133841435829ba5c0a48427e2a77633502ab
-master date: 2023-03-24 12:16:31 +0000
----
- xen/arch/x86/include/asm/nospec.h | 15 +++++++++++++--
- 1 file changed, 13 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/nospec.h b/xen/arch/x86/include/asm/nospec.h
-index 5312ae4c6f..7150e76b87 100644
---- a/xen/arch/x86/include/asm/nospec.h
-+++ b/xen/arch/x86/include/asm/nospec.h
-@@ -10,15 +10,26 @@
- static always_inline bool barrier_nospec_true(void)
- {
- #ifdef CONFIG_SPECULATIVE_HARDEN_BRANCH
-- alternative("lfence", "", X86_FEATURE_SC_NO_BRANCH_HARDEN);
-+ alternative("lfence #nospec-true", "", X86_FEATURE_SC_NO_BRANCH_HARDEN);
- #endif
- return true;
- }
-
-+static always_inline bool barrier_nospec_false(void)
-+{
-+#ifdef CONFIG_SPECULATIVE_HARDEN_BRANCH
-+ alternative("lfence #nospec-false", "", X86_FEATURE_SC_NO_BRANCH_HARDEN);
-+#endif
-+ return false;
-+}
-+
- /* Allow to protect evaluation of conditionals with respect to speculation */
- static always_inline bool evaluate_nospec(bool condition)
- {
-- return condition ? barrier_nospec_true() : !barrier_nospec_true();
-+ if ( condition )
-+ return barrier_nospec_true();
-+ else
-+ return barrier_nospec_false();
- }
-
- /* Allow to block speculative execution in generic code */
---
-2.40.0
-
diff --git a/0079-x86-shadow-Fix-build-with-no-PG_log_dirty.patch b/0079-x86-shadow-Fix-build-with-no-PG_log_dirty.patch
deleted file mode 100644
index f7652a4..0000000
--- a/0079-x86-shadow-Fix-build-with-no-PG_log_dirty.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 11c8ef59b9024849c0fc224354904615d5579628 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 31 Mar 2023 08:32:11 +0200
-Subject: [PATCH 79/89] x86/shadow: Fix build with no PG_log_dirty
-
-Gitlab Randconfig found:
-
- arch/x86/mm/shadow/common.c: In function 'shadow_prealloc':
- arch/x86/mm/shadow/common.c:1023:18: error: implicit declaration of function
- 'paging_logdirty_levels'; did you mean 'paging_log_dirty_init'? [-Werror=implicit-function-declaration]
- 1023 | count += paging_logdirty_levels();
- | ^~~~~~~~~~~~~~~~~~~~~~
- | paging_log_dirty_init
- arch/x86/mm/shadow/common.c:1023:18: error: nested extern declaration of 'paging_logdirty_levels' [-Werror=nested-externs]
-
-The '#if PG_log_dirty' expression is currently SHADOW_PAGING && !HVM &&
-PV_SHIM_EXCLUSIVE. Move the declaration outside.
-
-Fixes: 33fb3a661223 ("x86/shadow: account for log-dirty mode when pre-allocating")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 6d14cb105b1c54ad7b4228d858ae85aa8a672bbd
-master date: 2023-03-24 12:16:31 +0000
----
- xen/arch/x86/include/asm/paging.h | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/paging.h b/xen/arch/x86/include/asm/paging.h
-index 635ccc83b1..6f7000d5f4 100644
---- a/xen/arch/x86/include/asm/paging.h
-+++ b/xen/arch/x86/include/asm/paging.h
-@@ -152,6 +152,10 @@ struct paging_mode {
- /*****************************************************************************
- * Log dirty code */
-
-+#define paging_logdirty_levels() \
-+ (DIV_ROUND_UP(PADDR_BITS - PAGE_SHIFT - (PAGE_SHIFT + 3), \
-+ PAGE_SHIFT - ilog2(sizeof(mfn_t))) + 1)
-+
- #if PG_log_dirty
-
- /* get the dirty bitmap for a specific range of pfns */
-@@ -190,10 +194,6 @@ bool paging_mfn_is_dirty(const struct domain *d, mfn_t gmfn);
- #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
- (LOGDIRTY_NODE_ENTRIES-1))
-
--#define paging_logdirty_levels() \
-- (DIV_ROUND_UP(PADDR_BITS - PAGE_SHIFT - (PAGE_SHIFT + 3), \
-- PAGE_SHIFT - ilog2(sizeof(mfn_t))) + 1)
--
- #ifdef CONFIG_HVM
- /* VRAM dirty tracking support */
- struct sh_dirty_vram {
---
-2.40.0
-
diff --git a/0080-x86-vmx-Don-t-spuriously-crash-the-domain-when-INIT-.patch b/0080-x86-vmx-Don-t-spuriously-crash-the-domain-when-INIT-.patch
deleted file mode 100644
index 539401f..0000000
--- a/0080-x86-vmx-Don-t-spuriously-crash-the-domain-when-INIT-.patch
+++ /dev/null
@@ -1,51 +0,0 @@
-From f6a3e93b3788aa009e9b86d9cb14c243b958daa9 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 31 Mar 2023 08:32:57 +0200
-Subject: [PATCH 80/89] x86/vmx: Don't spuriously crash the domain when INIT is
- received
-
-In VMX operation, the handling of INIT IPIs is changed. Instead of the CPU
-resetting, the next VMEntry fails with EXIT_REASON_INIT. From the TXT spec,
-the intent of this behaviour is so that an entity which cares can scrub
-secrets from RAM before participating in an orderly shutdown.
-
-Right now, Xen's behaviour is that when an INIT arrives, the HVM VM which
-schedules next is killed (citing an unknown VMExit), *and* we ignore the INIT
-and continue blindly onwards anyway.
-
-This patch addresses only the first of these two problems by ignoring the INIT
-and continuing without crashing the VM in question.
-
-The second wants addressing too, just as soon as we've figured out something
-better to do...
-
-Discovered as collateral damage from when an AP triple faults on S3 resume on
-Intel TigerLake platforms.
-
-Link: https://github.com/QubesOS/qubes-issues/issues/7283
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: b1f11273d5a774cc88a3685c96c2e7cf6385e3b6
-master date: 2023-03-24 22:49:58 +0000
----
- xen/arch/x86/hvm/vmx/vmx.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index a8fb4365ad..64dbd50197 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -4038,6 +4038,10 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
- case EXIT_REASON_MCE_DURING_VMENTRY:
- do_machine_check(regs);
- break;
-+
-+ case EXIT_REASON_INIT:
-+ printk(XENLOG_ERR "Error: INIT received - ignoring\n");
-+ return; /* Renter the guest without further processing */
- }
-
- /* Now enable interrupts so it's safe to take locks. */
---
-2.40.0
-
diff --git a/0081-x86-ucode-Fix-error-paths-control_thread_fn.patch b/0081-x86-ucode-Fix-error-paths-control_thread_fn.patch
deleted file mode 100644
index 765fa84..0000000
--- a/0081-x86-ucode-Fix-error-paths-control_thread_fn.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 7f55774489d2f12a23f2ac0f516b62e2709cea99 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 31 Mar 2023 08:33:28 +0200
-Subject: [PATCH 81/89] x86/ucode: Fix error paths control_thread_fn()
-
-These two early exits skipped re-enabling the watchdog, restoring the NMI
-callback, and clearing the nmi_patch global pointer. Always execute the tail
-of the function on the way out.
-
-Fixes: 8dd4dfa92d62 ("x86/microcode: Synchronize late microcode loading")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Sergey Dyasli <sergey.dyasli@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: fc2e1f3aad602a66c14b8285a1bd38a82f8fd02d
-master date: 2023-03-28 11:57:56 +0100
----
- xen/arch/x86/cpu/microcode/core.c | 9 +++------
- 1 file changed, 3 insertions(+), 6 deletions(-)
-
-diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c
-index 2497630bbe..c760723e4f 100644
---- a/xen/arch/x86/cpu/microcode/core.c
-+++ b/xen/arch/x86/cpu/microcode/core.c
-@@ -490,10 +490,7 @@ static int control_thread_fn(const struct microcode_patch *patch)
- ret = wait_for_condition(wait_cpu_callin, num_online_cpus(),
- MICROCODE_CALLIN_TIMEOUT_US);
- if ( ret )
-- {
-- set_state(LOADING_EXIT);
-- return ret;
-- }
-+ goto out;
-
- /* Control thread loads ucode first while others are in NMI handler. */
- ret = alternative_call(ucode_ops.apply_microcode, patch);
-@@ -505,8 +502,7 @@ static int control_thread_fn(const struct microcode_patch *patch)
- {
- printk(XENLOG_ERR
- "Late loading aborted: CPU%u failed to update ucode\n", cpu);
-- set_state(LOADING_EXIT);
-- return ret;
-+ goto out;
- }
-
- /* Let primary threads load the given ucode update */
-@@ -537,6 +533,7 @@ static int control_thread_fn(const struct microcode_patch *patch)
- }
- }
-
-+ out:
- /* Mark loading is done to unblock other threads */
- set_state(LOADING_EXIT);
-
---
-2.40.0
-
diff --git a/0082-include-don-t-mention-stub-headers-more-than-once-in.patch b/0082-include-don-t-mention-stub-headers-more-than-once-in.patch
deleted file mode 100644
index cc0a914..0000000
--- a/0082-include-don-t-mention-stub-headers-more-than-once-in.patch
+++ /dev/null
@@ -1,37 +0,0 @@
-From 350693582427887387f21a6eeedaa0ac48aecc3f Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Fri, 31 Mar 2023 08:34:04 +0200
-Subject: [PATCH 82/89] include: don't mention stub headers more than once in a
- make rule
-
-When !GRANT_TABLE and !PV_SHIM headers-n contains grant_table.h twice,
-causing make to complain "target '...' given more than once in the same
-rule" for the rule generating the stub headers. We don't need duplicate
-entries in headers-n anywhere, so zap them (by using $(sort ...)) right
-where the final value of the variable is constructed.
-
-Fixes: 6bec713f871f ("include/compat: produce stubs for headers not otherwise generated")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: 231ab79704cbb5b9be7700287c3b185225d34f1b
-master date: 2023-03-28 14:20:16 +0200
----
- xen/include/Makefile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/include/Makefile b/xen/include/Makefile
-index cfd7851614..e19f9464fd 100644
---- a/xen/include/Makefile
-+++ b/xen/include/Makefile
-@@ -34,7 +34,7 @@ headers-$(CONFIG_TRACEBUFFER) += compat/trace.h
- headers-$(CONFIG_XENOPROF) += compat/xenoprof.h
- headers-$(CONFIG_XSM_FLASK) += compat/xsm/flask_op.h
-
--headers-n := $(filter-out $(headers-y),$(headers-n) $(headers-))
-+headers-n := $(sort $(filter-out $(headers-y),$(headers-n) $(headers-)))
-
- cppflags-y := -include public/xen-compat.h -DXEN_GENERATING_COMPAT_HEADERS
- cppflags-$(CONFIG_X86) += -m32
---
-2.40.0
-
diff --git a/0083-vpci-msix-handle-accesses-adjacent-to-the-MSI-X-tabl.patch b/0083-vpci-msix-handle-accesses-adjacent-to-the-MSI-X-tabl.patch
deleted file mode 100644
index 8a1f412..0000000
--- a/0083-vpci-msix-handle-accesses-adjacent-to-the-MSI-X-tabl.patch
+++ /dev/null
@@ -1,540 +0,0 @@
-From 85100ed78ca18f188b1ca495f132db7df705f1a4 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Fri, 31 Mar 2023 08:34:26 +0200
-Subject: [PATCH 83/89] vpci/msix: handle accesses adjacent to the MSI-X table
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The handling of the MSI-X table accesses by Xen requires that any
-pages part of the MSI-X related tables are not mapped into the domain
-physmap. As a result, any device registers in the same pages as the
-start or the end of the MSIX or PBA tables is not currently
-accessible, as the accesses are just dropped.
-
-Note the spec forbids such placing of registers, as the MSIX and PBA
-tables must be 4K isolated from any other registers:
-
-"If a Base Address register that maps address space for the MSI-X
-Table or MSI-X PBA also maps other usable address space that is not
-associated with MSI-X structures, locations (e.g., for CSRs) used in
-the other address space must not share any naturally aligned 4-KB
-address range with one where either MSI-X structure resides."
-
-Yet the 'Intel Wi-Fi 6 AX201' device on one of my boxes has registers
-in the same page as the MSIX tables, and thus won't work on a PVH dom0
-without this fix.
-
-In order to cope with the behavior passthrough any accesses that fall
-on the same page as the MSIX tables (but don't fall in between) to the
-underlying hardware. Such forwarding also takes care of the PBA
-accesses, so it allows to remove the code doing this handling in
-msix_{read,write}. Note that as a result accesses to the PBA array
-are no longer limited to 4 and 8 byte sizes, there's no access size
-restriction for PBA accesses documented in the specification.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-vpci/msix: restore PBA access length and alignment restrictions
-
-Accesses to the PBA array have the same length and alignment
-limitations as accesses to the MSI-X table:
-
-"For all accesses to MSI-X Table and MSI-X PBA fields, software must
-use aligned full DWORD or aligned full QWORD transactions; otherwise,
-the result is undefined."
-
-Introduce such length and alignment checks into the handling of PBA
-accesses for vPCI. This was a mistake of mine for not reading the
-specification correctly.
-
-Note that accesses must now be aligned, and hence there's no longer a
-need to check that the end of the access falls into the PBA region as
-both the access and the region addresses must be aligned.
-
-Fixes: b177892d2d ('vpci/msix: handle accesses adjacent to the MSI-X table')
-Reported-by: Jan Beulich <jbeulich@suse.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: b177892d2d0e8a31122c218989f43130aeba5282
-master date: 2023-03-28 14:20:35 +0200
-master commit: 7a502b4fbc339e9d3d3d45fb37f09da06bc3081c
-master date: 2023-03-29 14:56:33 +0200
----
- xen/drivers/vpci/msix.c | 353 +++++++++++++++++++++++++++++-----------
- xen/drivers/vpci/vpci.c | 7 +-
- xen/include/xen/vpci.h | 8 +-
- 3 files changed, 273 insertions(+), 95 deletions(-)
-
-diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
-index bea0cc7aed..cafddcf305 100644
---- a/xen/drivers/vpci/msix.c
-+++ b/xen/drivers/vpci/msix.c
-@@ -27,6 +27,11 @@
- ((addr) >= vmsix_table_addr(vpci, nr) && \
- (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
-
-+#define VMSIX_ADDR_SAME_PAGE(addr, vpci, nr) \
-+ (PFN_DOWN(addr) >= PFN_DOWN(vmsix_table_addr(vpci, nr)) && \
-+ PFN_DOWN(addr) <= PFN_DOWN(vmsix_table_addr(vpci, nr) + \
-+ vmsix_table_size(vpci, nr) - 1))
-+
- static uint32_t cf_check control_read(
- const struct pci_dev *pdev, unsigned int reg, void *data)
- {
-@@ -149,7 +154,7 @@ static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
-
- for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
- if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
-- VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
-+ VMSIX_ADDR_SAME_PAGE(addr, msix->pdev->vpci, i) )
- return msix;
- }
-
-@@ -182,36 +187,172 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
- return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
- }
-
--static void __iomem *get_pba(struct vpci *vpci)
-+static void __iomem *get_table(struct vpci *vpci, unsigned int slot)
- {
- struct vpci_msix *msix = vpci->msix;
-+ paddr_t addr = 0;
-+
-+ ASSERT(spin_is_locked(&vpci->lock));
-+
-+ if ( likely(msix->table[slot]) )
-+ return msix->table[slot];
-+
-+ switch ( slot )
-+ {
-+ case VPCI_MSIX_TBL_TAIL:
-+ addr = vmsix_table_size(vpci, VPCI_MSIX_TABLE);
-+ fallthrough;
-+ case VPCI_MSIX_TBL_HEAD:
-+ addr += vmsix_table_addr(vpci, VPCI_MSIX_TABLE);
-+ break;
-+
-+ case VPCI_MSIX_PBA_TAIL:
-+ addr = vmsix_table_size(vpci, VPCI_MSIX_PBA);
-+ fallthrough;
-+ case VPCI_MSIX_PBA_HEAD:
-+ addr += vmsix_table_addr(vpci, VPCI_MSIX_PBA);
-+ break;
-+
-+ default:
-+ ASSERT_UNREACHABLE();
-+ return NULL;
-+ }
-+
-+ msix->table[slot] = ioremap(round_pgdown(addr), PAGE_SIZE);
-+
-+ return msix->table[slot];
-+}
-+
-+unsigned int get_slot(const struct vpci *vpci, unsigned long addr)
-+{
-+ unsigned long pfn = PFN_DOWN(addr);
-+
- /*
-- * PBA will only be unmapped when the device is deassigned, so access it
-- * without holding the vpci lock.
-+ * The logic below relies on having the tables identity mapped to the guest
-+ * address space, or for the `addr` parameter to be translated into its
-+ * host physical memory address equivalent.
- */
-- void __iomem *pba = read_atomic(&msix->pba);
-
-- if ( likely(pba) )
-- return pba;
-+ if ( pfn == PFN_DOWN(vmsix_table_addr(vpci, VPCI_MSIX_TABLE)) )
-+ return VPCI_MSIX_TBL_HEAD;
-+ if ( pfn == PFN_DOWN(vmsix_table_addr(vpci, VPCI_MSIX_TABLE) +
-+ vmsix_table_size(vpci, VPCI_MSIX_TABLE) - 1) )
-+ return VPCI_MSIX_TBL_TAIL;
-+ if ( pfn == PFN_DOWN(vmsix_table_addr(vpci, VPCI_MSIX_PBA)) )
-+ return VPCI_MSIX_PBA_HEAD;
-+ if ( pfn == PFN_DOWN(vmsix_table_addr(vpci, VPCI_MSIX_PBA) +
-+ vmsix_table_size(vpci, VPCI_MSIX_PBA) - 1) )
-+ return VPCI_MSIX_PBA_TAIL;
-+
-+ ASSERT_UNREACHABLE();
-+ return -1;
-+}
-+
-+static bool adjacent_handle(const struct vpci_msix *msix, unsigned long addr)
-+{
-+ unsigned int i;
-+
-+ if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
-+ return true;
-+
-+ if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_TABLE) )
-+ return false;
-+
-+ for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
-+ if ( VMSIX_ADDR_SAME_PAGE(addr, msix->pdev->vpci, i) )
-+ return true;
-+
-+ return false;
-+}
-
-- pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
-- vmsix_table_size(vpci, VPCI_MSIX_PBA));
-- if ( !pba )
-- return read_atomic(&msix->pba);
-+static int adjacent_read(const struct domain *d, const struct vpci_msix *msix,
-+ unsigned long addr, unsigned int len,
-+ unsigned long *data)
-+{
-+ const void __iomem *mem;
-+ struct vpci *vpci = msix->pdev->vpci;
-+ unsigned int slot;
-+
-+ *data = ~0ul;
-+
-+ if ( !adjacent_handle(msix, addr + len - 1) )
-+ return X86EMUL_OKAY;
-+
-+ if ( VMSIX_ADDR_IN_RANGE(addr, vpci, VPCI_MSIX_PBA) &&
-+ !access_allowed(msix->pdev, addr, len) )
-+ /* PBA accesses must be aligned and 4 or 8 bytes in size. */
-+ return X86EMUL_OKAY;
-+
-+ slot = get_slot(vpci, addr);
-+ if ( slot >= ARRAY_SIZE(msix->table) )
-+ return X86EMUL_OKAY;
-+
-+ if ( unlikely(!IS_ALIGNED(addr, len)) )
-+ {
-+ unsigned int i;
-+
-+ gprintk(XENLOG_DEBUG, "%pp: unaligned read to MSI-X related page\n",
-+ &msix->pdev->sbdf);
-+
-+ /*
-+ * Split unaligned accesses into byte sized ones. Shouldn't happen in
-+ * the first place, but devices shouldn't have registers in the same 4K
-+ * page as the MSIX tables either.
-+ *
-+ * It's unclear whether this could cause issues if a guest expects
-+ * registers to be accessed atomically, it better use an aligned access
-+ * if it has such expectations.
-+ */
-+ for ( i = 0; i < len; i++ )
-+ {
-+ unsigned long partial = ~0ul;
-+ int rc = adjacent_read(d, msix, addr + i, 1, &partial);
-+
-+ if ( rc != X86EMUL_OKAY )
-+ return rc;
-+
-+ *data &= ~(0xfful << (i * 8));
-+ *data |= (partial & 0xff) << (i * 8);
-+ }
-+
-+ return X86EMUL_OKAY;
-+ }
-
- spin_lock(&vpci->lock);
-- if ( !msix->pba )
-+ mem = get_table(vpci, slot);
-+ if ( !mem )
- {
-- write_atomic(&msix->pba, pba);
- spin_unlock(&vpci->lock);
-+ gprintk(XENLOG_WARNING,
-+ "%pp: unable to map MSI-X page, returning all bits set\n",
-+ &msix->pdev->sbdf);
-+ return X86EMUL_OKAY;
- }
-- else
-+
-+ switch ( len )
- {
-- spin_unlock(&vpci->lock);
-- iounmap(pba);
-+ case 1:
-+ *data = readb(mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ case 2:
-+ *data = readw(mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ case 4:
-+ *data = readl(mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ case 8:
-+ *data = readq(mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ default:
-+ ASSERT_UNREACHABLE();
- }
-+ spin_unlock(&vpci->lock);
-
-- return read_atomic(&msix->pba);
-+ return X86EMUL_OKAY;
- }
-
- static int cf_check msix_read(
-@@ -227,47 +368,11 @@ static int cf_check msix_read(
- if ( !msix )
- return X86EMUL_RETRY;
-
-- if ( !access_allowed(msix->pdev, addr, len) )
-- return X86EMUL_OKAY;
--
-- if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
-- {
-- struct vpci *vpci = msix->pdev->vpci;
-- unsigned int idx = addr - vmsix_table_addr(vpci, VPCI_MSIX_PBA);
-- const void __iomem *pba = get_pba(vpci);
--
-- /*
-- * Access to PBA.
-- *
-- * TODO: note that this relies on having the PBA identity mapped to the
-- * guest address space. If this changes the address will need to be
-- * translated.
-- */
-- if ( !pba )
-- {
-- gprintk(XENLOG_WARNING,
-- "%pp: unable to map MSI-X PBA, report all pending\n",
-- &msix->pdev->sbdf);
-- return X86EMUL_OKAY;
-- }
--
-- switch ( len )
-- {
-- case 4:
-- *data = readl(pba + idx);
-- break;
--
-- case 8:
-- *data = readq(pba + idx);
-- break;
--
-- default:
-- ASSERT_UNREACHABLE();
-- break;
-- }
-+ if ( adjacent_handle(msix, addr) )
-+ return adjacent_read(d, msix, addr, len, data);
-
-+ if ( !access_allowed(msix->pdev, addr, len) )
- return X86EMUL_OKAY;
-- }
-
- spin_lock(&msix->pdev->vpci->lock);
- entry = get_entry(msix, addr);
-@@ -303,56 +408,102 @@ static int cf_check msix_read(
- return X86EMUL_OKAY;
- }
-
--static int cf_check msix_write(
-- struct vcpu *v, unsigned long addr, unsigned int len, unsigned long data)
-+static int adjacent_write(const struct domain *d, const struct vpci_msix *msix,
-+ unsigned long addr, unsigned int len,
-+ unsigned long data)
- {
-- const struct domain *d = v->domain;
-- struct vpci_msix *msix = msix_find(d, addr);
-- struct vpci_msix_entry *entry;
-- unsigned int offset;
-+ void __iomem *mem;
-+ struct vpci *vpci = msix->pdev->vpci;
-+ unsigned int slot;
-
-- if ( !msix )
-- return X86EMUL_RETRY;
-+ if ( !adjacent_handle(msix, addr + len - 1) )
-+ return X86EMUL_OKAY;
-
-- if ( !access_allowed(msix->pdev, addr, len) )
-+ /*
-+ * Only check start and end of the access because the size of the PBA is
-+ * assumed to be equal or bigger (8 bytes) than the length of any access
-+ * handled here.
-+ */
-+ if ( VMSIX_ADDR_IN_RANGE(addr, vpci, VPCI_MSIX_PBA) &&
-+ (!access_allowed(msix->pdev, addr, len) || !is_hardware_domain(d)) )
-+ /* Ignore writes to PBA for DomUs, it's undefined behavior. */
- return X86EMUL_OKAY;
-
-- if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
-+ slot = get_slot(vpci, addr);
-+ if ( slot >= ARRAY_SIZE(msix->table) )
-+ return X86EMUL_OKAY;
-+
-+ if ( unlikely(!IS_ALIGNED(addr, len)) )
- {
-- struct vpci *vpci = msix->pdev->vpci;
-- unsigned int idx = addr - vmsix_table_addr(vpci, VPCI_MSIX_PBA);
-- const void __iomem *pba = get_pba(vpci);
-+ unsigned int i;
-
-- if ( !is_hardware_domain(d) )
-- /* Ignore writes to PBA for DomUs, it's behavior is undefined. */
-- return X86EMUL_OKAY;
-+ gprintk(XENLOG_DEBUG, "%pp: unaligned write to MSI-X related page\n",
-+ &msix->pdev->sbdf);
-
-- if ( !pba )
-+ for ( i = 0; i < len; i++ )
- {
-- /* Unable to map the PBA, ignore write. */
-- gprintk(XENLOG_WARNING,
-- "%pp: unable to map MSI-X PBA, write ignored\n",
-- &msix->pdev->sbdf);
-- return X86EMUL_OKAY;
-+ int rc = adjacent_write(d, msix, addr + i, 1, data >> (i * 8));
-+
-+ if ( rc != X86EMUL_OKAY )
-+ return rc;
- }
-
-- switch ( len )
-- {
-- case 4:
-- writel(data, pba + idx);
-- break;
-+ return X86EMUL_OKAY;
-+ }
-
-- case 8:
-- writeq(data, pba + idx);
-- break;
-+ spin_lock(&vpci->lock);
-+ mem = get_table(vpci, slot);
-+ if ( !mem )
-+ {
-+ spin_unlock(&vpci->lock);
-+ gprintk(XENLOG_WARNING,
-+ "%pp: unable to map MSI-X page, dropping write\n",
-+ &msix->pdev->sbdf);
-+ return X86EMUL_OKAY;
-+ }
-
-- default:
-- ASSERT_UNREACHABLE();
-- break;
-- }
-+ switch ( len )
-+ {
-+ case 1:
-+ writeb(data, mem + PAGE_OFFSET(addr));
-+ break;
-
-- return X86EMUL_OKAY;
-+ case 2:
-+ writew(data, mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ case 4:
-+ writel(data, mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ case 8:
-+ writeq(data, mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ default:
-+ ASSERT_UNREACHABLE();
- }
-+ spin_unlock(&vpci->lock);
-+
-+ return X86EMUL_OKAY;
-+}
-+
-+static int cf_check msix_write(
-+ struct vcpu *v, unsigned long addr, unsigned int len, unsigned long data)
-+{
-+ const struct domain *d = v->domain;
-+ struct vpci_msix *msix = msix_find(d, addr);
-+ struct vpci_msix_entry *entry;
-+ unsigned int offset;
-+
-+ if ( !msix )
-+ return X86EMUL_RETRY;
-+
-+ if ( adjacent_handle(msix, addr) )
-+ return adjacent_write(d, msix, addr, len, data);
-+
-+ if ( !access_allowed(msix->pdev, addr, len) )
-+ return X86EMUL_OKAY;
-
- spin_lock(&msix->pdev->vpci->lock);
- entry = get_entry(msix, addr);
-@@ -482,6 +633,26 @@ int vpci_make_msix_hole(const struct pci_dev *pdev)
- }
- }
-
-+ if ( is_hardware_domain(d) )
-+ {
-+ /*
-+ * For dom0 only: remove any hypervisor mappings of the MSIX or PBA
-+ * related areas, as dom0 is capable of moving the position of the BARs
-+ * in the host address space.
-+ *
-+ * We rely on being called with the vPCI lock held once the domain is
-+ * running, so the maps are not in use.
-+ */
-+ for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->table); i++ )
-+ if ( pdev->vpci->msix->table[i] )
-+ {
-+ /* If there are any maps, the domain must be running. */
-+ ASSERT(spin_is_locked(&pdev->vpci->lock));
-+ iounmap(pdev->vpci->msix->table[i]);
-+ pdev->vpci->msix->table[i] = NULL;
-+ }
-+ }
-+
- return 0;
- }
-
-diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
-index 6d48d496bb..652807a4a4 100644
---- a/xen/drivers/vpci/vpci.c
-+++ b/xen/drivers/vpci/vpci.c
-@@ -54,9 +54,12 @@ void vpci_remove_device(struct pci_dev *pdev)
- spin_unlock(&pdev->vpci->lock);
- if ( pdev->vpci->msix )
- {
-+ unsigned int i;
-+
- list_del(&pdev->vpci->msix->next);
-- if ( pdev->vpci->msix->pba )
-- iounmap(pdev->vpci->msix->pba);
-+ for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->table); i++ )
-+ if ( pdev->vpci->msix->table[i] )
-+ iounmap(pdev->vpci->msix->table[i]);
- }
- xfree(pdev->vpci->msix);
- xfree(pdev->vpci->msi);
-diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
-index d8acfeba8a..0b8a2a3c74 100644
---- a/xen/include/xen/vpci.h
-+++ b/xen/include/xen/vpci.h
-@@ -133,8 +133,12 @@ struct vpci {
- bool enabled : 1;
- /* Masked? */
- bool masked : 1;
-- /* PBA map */
-- void __iomem *pba;
-+ /* Partial table map. */
-+#define VPCI_MSIX_TBL_HEAD 0
-+#define VPCI_MSIX_TBL_TAIL 1
-+#define VPCI_MSIX_PBA_HEAD 2
-+#define VPCI_MSIX_PBA_TAIL 3
-+ void __iomem *table[4];
- /* Entries. */
- struct vpci_msix_entry {
- uint64_t addr;
---
-2.40.0
-
diff --git a/0084-ns16550-correct-name-value-pair-parsing-for-PCI-port.patch b/0084-ns16550-correct-name-value-pair-parsing-for-PCI-port.patch
deleted file mode 100644
index 6ab5c69..0000000
--- a/0084-ns16550-correct-name-value-pair-parsing-for-PCI-port.patch
+++ /dev/null
@@ -1,59 +0,0 @@
-From 7758cd57e002c5096b2296ede67c59fca68724d7 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Fri, 31 Mar 2023 08:35:15 +0200
-Subject: [PATCH 84/89] ns16550: correct name/value pair parsing for PCI
- port/bridge
-
-First of all these were inverted: "bridge=" caused the port coordinates
-to be established, while "port=" controlled the bridge coordinates. And
-then the error messages being identical also wasn't helpful. While
-correcting this also move both case blocks close together.
-
-Fixes: 97fd49a7e074 ("ns16550: add support for UART parameters to be specifed with name-value pairs")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: e692b22230b411d762ac9e278a398e28df474eae
-master date: 2023-03-29 14:55:37 +0200
----
- xen/drivers/char/ns16550.c | 16 ++++++++--------
- 1 file changed, 8 insertions(+), 8 deletions(-)
-
-diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c
-index ce013fb6a5..97b3d8d269 100644
---- a/xen/drivers/char/ns16550.c
-+++ b/xen/drivers/char/ns16550.c
-@@ -1631,13 +1631,6 @@ static bool __init parse_namevalue_pairs(char *str, struct ns16550 *uart)
- break;
-
- #ifdef CONFIG_HAS_PCI
-- case bridge_bdf:
-- if ( !parse_pci(param_value, NULL, &uart->ps_bdf[0],
-- &uart->ps_bdf[1], &uart->ps_bdf[2]) )
-- PARSE_ERR_RET("Bad port PCI coordinates\n");
-- uart->ps_bdf_enable = true;
-- break;
--
- case device:
- if ( strncmp(param_value, "pci", 3) == 0 )
- {
-@@ -1652,9 +1645,16 @@ static bool __init parse_namevalue_pairs(char *str, struct ns16550 *uart)
- break;
-
- case port_bdf:
-+ if ( !parse_pci(param_value, NULL, &uart->ps_bdf[0],
-+ &uart->ps_bdf[1], &uart->ps_bdf[2]) )
-+ PARSE_ERR_RET("Bad port PCI coordinates\n");
-+ uart->ps_bdf_enable = true;
-+ break;
-+
-+ case bridge_bdf:
- if ( !parse_pci(param_value, NULL, &uart->pb_bdf[0],
- &uart->pb_bdf[1], &uart->pb_bdf[2]) )
-- PARSE_ERR_RET("Bad port PCI coordinates\n");
-+ PARSE_ERR_RET("Bad bridge PCI coordinates\n");
- uart->pb_bdf_enable = true;
- break;
- #endif
---
-2.40.0
-
diff --git a/0085-CI-Drop-automation-configs.patch b/0085-CI-Drop-automation-configs.patch
deleted file mode 100644
index bfed25a..0000000
--- a/0085-CI-Drop-automation-configs.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From 4c0d792675f0843c6dd52acdae38e5c0e112b09e Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 29 Dec 2022 15:39:13 +0000
-Subject: [PATCH 85/89] CI: Drop automation/configs/
-
-Having 3 extra hypervisor builds on the end of a full build is deeply
-confusing to debug if one of them fails, because the .config file presented in
-the artefacts is not the one which caused a build failure. Also, the log
-tends to be truncated in the UI.
-
-PV-only is tested as part of PV-Shim in a full build anyway, so doesn't need
-repeating. HVM-only and neither appear frequently in randconfig, so drop all
-the logic here to simplify things.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Michal Orzel <michal.orzel@amd.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-(cherry picked from commit 7b20009a812f26e74bdbde2ab96165376b3dad34)
----
- automation/configs/x86/hvm_only_config | 3 ---
- automation/configs/x86/no_hvm_pv_config | 3 ---
- automation/configs/x86/pv_only_config | 3 ---
- automation/scripts/build | 21 ---------------------
- 4 files changed, 30 deletions(-)
- delete mode 100644 automation/configs/x86/hvm_only_config
- delete mode 100644 automation/configs/x86/no_hvm_pv_config
- delete mode 100644 automation/configs/x86/pv_only_config
-
-diff --git a/automation/configs/x86/hvm_only_config b/automation/configs/x86/hvm_only_config
-deleted file mode 100644
-index 9efbddd535..0000000000
---- a/automation/configs/x86/hvm_only_config
-+++ /dev/null
-@@ -1,3 +0,0 @@
--CONFIG_HVM=y
--# CONFIG_PV is not set
--# CONFIG_DEBUG is not set
-diff --git a/automation/configs/x86/no_hvm_pv_config b/automation/configs/x86/no_hvm_pv_config
-deleted file mode 100644
-index 0bf6a8e468..0000000000
---- a/automation/configs/x86/no_hvm_pv_config
-+++ /dev/null
-@@ -1,3 +0,0 @@
--# CONFIG_HVM is not set
--# CONFIG_PV is not set
--# CONFIG_DEBUG is not set
-diff --git a/automation/configs/x86/pv_only_config b/automation/configs/x86/pv_only_config
-deleted file mode 100644
-index e9d8b4a7c7..0000000000
---- a/automation/configs/x86/pv_only_config
-+++ /dev/null
-@@ -1,3 +0,0 @@
--CONFIG_PV=y
--# CONFIG_HVM is not set
--# CONFIG_DEBUG is not set
-diff --git a/automation/scripts/build b/automation/scripts/build
-index a593419063..5dafa72ba5 100755
---- a/automation/scripts/build
-+++ b/automation/scripts/build
-@@ -85,24 +85,3 @@ if [[ "${XEN_TARGET_ARCH}" != "x86_32" ]]; then
- cp -r dist binaries/
- fi
- fi
--
--if [[ "${hypervisor_only}" == "y" ]]; then
-- # If we are build testing a specific Kconfig exit now, there's no point in
-- # testing all the possible configs.
-- exit 0
--fi
--
--# Build all the configs we care about
--case ${XEN_TARGET_ARCH} in
-- x86_64) arch=x86 ;;
-- *) exit 0 ;;
--esac
--
--cfg_dir="automation/configs/${arch}"
--for cfg in `ls ${cfg_dir}`; do
-- echo "Building $cfg"
-- make -j$(nproc) -C xen clean
-- rm -f xen/.config
-- make -C xen KBUILD_DEFCONFIG=../../../../${cfg_dir}/${cfg} defconfig
-- make -j$(nproc) -C xen
--done
---
-2.40.0
-
diff --git a/0086-automation-Switch-arm32-cross-builds-to-run-on-arm64.patch b/0086-automation-Switch-arm32-cross-builds-to-run-on-arm64.patch
deleted file mode 100644
index a200cab..0000000
--- a/0086-automation-Switch-arm32-cross-builds-to-run-on-arm64.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From e3b23da4a10fafdabce22e2eba225d9404fc646f Mon Sep 17 00:00:00 2001
-From: Michal Orzel <michal.orzel@amd.com>
-Date: Tue, 14 Feb 2023 16:38:38 +0100
-Subject: [PATCH 86/89] automation: Switch arm32 cross builds to run on arm64
-
-Due to the limited x86 CI resources slowing down the whole pipeline,
-switch the arm32 cross builds to be executed on arm64 which is much more
-capable. For that, rename the existing debian container dockerfile
-from unstable-arm32-gcc to unstable-arm64v8-arm32-gcc and use
-arm64v8/debian:unstable as an image. Note, that we cannot use the same
-container name as we have to keep the backwards compatibility.
-Take the opportunity to remove extra empty line at the end of a file.
-
-Modify the tag of .arm32-cross-build-tmpl to arm64 and update the build
-jobs accordingly.
-
-Signed-off-by: Michal Orzel <michal.orzel@amd.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-(cherry picked from commit a35fccc8df93de7154dba87db6e7bcf391e9d51c)
----
- ...ockerfile => unstable-arm64v8-arm32-gcc.dockerfile} | 3 +--
- automation/gitlab-ci/build.yaml | 10 +++++-----
- 2 files changed, 6 insertions(+), 7 deletions(-)
- rename automation/build/debian/{unstable-arm32-gcc.dockerfile => unstable-arm64v8-arm32-gcc.dockerfile} (94%)
-
-diff --git a/automation/build/debian/unstable-arm32-gcc.dockerfile b/automation/build/debian/unstable-arm64v8-arm32-gcc.dockerfile
-similarity index 94%
-rename from automation/build/debian/unstable-arm32-gcc.dockerfile
-rename to automation/build/debian/unstable-arm64v8-arm32-gcc.dockerfile
-index b41a57f197..11860425a6 100644
---- a/automation/build/debian/unstable-arm32-gcc.dockerfile
-+++ b/automation/build/debian/unstable-arm64v8-arm32-gcc.dockerfile
-@@ -1,4 +1,4 @@
--FROM debian:unstable
-+FROM arm64v8/debian:unstable
- LABEL maintainer.name="The Xen Project" \
- maintainer.email="xen-devel@lists.xenproject.org"
-
-@@ -21,4 +21,3 @@ RUN apt-get update && \
- apt-get autoremove -y && \
- apt-get clean && \
- rm -rf /var/lib/apt/lists* /tmp/* /var/tmp/*
--
-diff --git a/automation/gitlab-ci/build.yaml b/automation/gitlab-ci/build.yaml
-index bed161b471..b4caf159f9 100644
---- a/automation/gitlab-ci/build.yaml
-+++ b/automation/gitlab-ci/build.yaml
-@@ -123,7 +123,7 @@
- variables:
- XEN_TARGET_ARCH: arm32
- tags:
-- - x86_64
-+ - arm64
-
- .arm32-cross-build:
- extends: .arm32-cross-build-tmpl
-@@ -505,23 +505,23 @@ alpine-3.12-clang-debug:
- debian-unstable-gcc-arm32:
- extends: .gcc-arm32-cross-build
- variables:
-- CONTAINER: debian:unstable-arm32-gcc
-+ CONTAINER: debian:unstable-arm64v8-arm32-gcc
-
- debian-unstable-gcc-arm32-debug:
- extends: .gcc-arm32-cross-build-debug
- variables:
-- CONTAINER: debian:unstable-arm32-gcc
-+ CONTAINER: debian:unstable-arm64v8-arm32-gcc
-
- debian-unstable-gcc-arm32-randconfig:
- extends: .gcc-arm32-cross-build
- variables:
-- CONTAINER: debian:unstable-arm32-gcc
-+ CONTAINER: debian:unstable-arm64v8-arm32-gcc
- RANDCONFIG: y
-
- debian-unstable-gcc-arm32-debug-randconfig:
- extends: .gcc-arm32-cross-build-debug
- variables:
-- CONTAINER: debian:unstable-arm32-gcc
-+ CONTAINER: debian:unstable-arm64v8-arm32-gcc
- RANDCONFIG: y
-
- # Arm builds
---
-2.40.0
-
diff --git a/0087-automation-Remove-CentOS-7.2-containers-and-builds.patch b/0087-automation-Remove-CentOS-7.2-containers-and-builds.patch
deleted file mode 100644
index b5d629d..0000000
--- a/0087-automation-Remove-CentOS-7.2-containers-and-builds.patch
+++ /dev/null
@@ -1,145 +0,0 @@
-From 8c414bab3092bb68ab4eaaba39b61e3804c45f0a Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Tue, 21 Feb 2023 16:55:36 +0000
-Subject: [PATCH 87/89] automation: Remove CentOS 7.2 containers and builds
-
-We already have a container which track the latest CentOS 7, no need
-for this one as well.
-
-Also, 7.2 have outdated root certificate which prevent connection to
-website which use Let's Encrypt.
-
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit ba512629f76dfddb39ea9133ee51cdd9e392a927)
----
- automation/build/centos/7.2.dockerfile | 52 -------------------------
- automation/build/centos/CentOS-7.2.repo | 35 -----------------
- automation/gitlab-ci/build.yaml | 10 -----
- 3 files changed, 97 deletions(-)
- delete mode 100644 automation/build/centos/7.2.dockerfile
- delete mode 100644 automation/build/centos/CentOS-7.2.repo
-
-diff --git a/automation/build/centos/7.2.dockerfile b/automation/build/centos/7.2.dockerfile
-deleted file mode 100644
-index 4baa097e31..0000000000
---- a/automation/build/centos/7.2.dockerfile
-+++ /dev/null
-@@ -1,52 +0,0 @@
--FROM centos:7.2.1511
--LABEL maintainer.name="The Xen Project" \
-- maintainer.email="xen-devel@lists.xenproject.org"
--
--# ensure we only get bits from the vault for
--# the version we want
--COPY CentOS-7.2.repo /etc/yum.repos.d/CentOS-Base.repo
--
--# install EPEL for dev86, xz-devel and possibly other packages
--RUN yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm && \
-- yum clean all
--
--RUN mkdir /build
--WORKDIR /build
--
--# work around https://github.com/moby/moby/issues/10180
--# and install Xen depends
--RUN rpm --rebuilddb && \
-- yum -y install \
-- yum-plugin-ovl \
-- gcc \
-- gcc-c++ \
-- ncurses-devel \
-- zlib-devel \
-- openssl-devel \
-- python-devel \
-- libuuid-devel \
-- pkgconfig \
-- # gettext for Xen < 4.13
-- gettext \
-- flex \
-- bison \
-- libaio-devel \
-- glib2-devel \
-- yajl-devel \
-- pixman-devel \
-- glibc-devel \
-- # glibc-devel.i686 for Xen < 4.15
-- glibc-devel.i686 \
-- make \
-- binutils \
-- git \
-- wget \
-- acpica-tools \
-- python-markdown \
-- patch \
-- checkpolicy \
-- dev86 \
-- xz-devel \
-- bzip2 \
-- nasm \
-- && yum clean all
-diff --git a/automation/build/centos/CentOS-7.2.repo b/automation/build/centos/CentOS-7.2.repo
-deleted file mode 100644
-index 4da27faeb5..0000000000
---- a/automation/build/centos/CentOS-7.2.repo
-+++ /dev/null
-@@ -1,35 +0,0 @@
--# CentOS-Base.repo
--#
--# This is a replacement file that pins things to just use CentOS 7.2
--# from the CentOS Vault.
--#
--
--[base]
--name=CentOS-7.2.1511 - Base
--baseurl=http://vault.centos.org/7.2.1511/os/$basearch/
--gpgcheck=1
--gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
--
--#released updates
--[updates]
--name=CentOS-7.2.1511 - Updates
--baseurl=http://vault.centos.org/7.2.1511/updates/$basearch/
--gpgcheck=1
--gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
--
--#additional packages that may be useful
--[extras]
--name=CentOS-7.2.1511 - Extras
--baseurl=http://vault.centos.org/7.2.1511/extras/$basearch/
--gpgcheck=1
--gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
--
--#additional packages that extend functionality of existing packages
--[centosplus]
--name=CentOS-7.2.1511 - Plus
--baseurl=http://vault.centos.org/7.2.1511/centosplus/$basearch/
--gpgcheck=1
--gpgcheck=1
--enabled=0
--gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
--
-diff --git a/automation/gitlab-ci/build.yaml b/automation/gitlab-ci/build.yaml
-index b4caf159f9..ff6df1cfc2 100644
---- a/automation/gitlab-ci/build.yaml
-+++ b/automation/gitlab-ci/build.yaml
-@@ -184,16 +184,6 @@ archlinux-gcc-debug:
- variables:
- CONTAINER: archlinux:current
-
--centos-7-2-gcc:
-- extends: .gcc-x86-64-build
-- variables:
-- CONTAINER: centos:7.2
--
--centos-7-2-gcc-debug:
-- extends: .gcc-x86-64-build-debug
-- variables:
-- CONTAINER: centos:7.2
--
- centos-7-gcc:
- extends: .gcc-x86-64-build
- variables:
---
-2.40.0
-
diff --git a/0088-automation-Remove-non-debug-x86_32-build-jobs.patch b/0088-automation-Remove-non-debug-x86_32-build-jobs.patch
deleted file mode 100644
index d16014e..0000000
--- a/0088-automation-Remove-non-debug-x86_32-build-jobs.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-From 435a1e5e8fd6fbd52cc16570dcff5982bdbec351 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Fri, 24 Feb 2023 17:29:15 +0000
-Subject: [PATCH 88/89] automation: Remove non-debug x86_32 build jobs
-
-In the interest of having less jobs, we remove the x86_32 build jobs
-that do release build. Debug build is very likely to be enough to find
-32bit build issues.
-
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit 7b66792ea7f77fb9e587e1e9c530a7c869eecba1)
----
- automation/gitlab-ci/build.yaml | 20 --------------------
- 1 file changed, 20 deletions(-)
-
-diff --git a/automation/gitlab-ci/build.yaml b/automation/gitlab-ci/build.yaml
-index ff6df1cfc2..eea517aa0a 100644
---- a/automation/gitlab-ci/build.yaml
-+++ b/automation/gitlab-ci/build.yaml
-@@ -264,21 +264,11 @@ debian-stretch-gcc-debug:
- variables:
- CONTAINER: debian:stretch
-
--debian-stretch-32-clang:
-- extends: .clang-x86-32-build
-- variables:
-- CONTAINER: debian:stretch-i386
--
- debian-stretch-32-clang-debug:
- extends: .clang-x86-32-build-debug
- variables:
- CONTAINER: debian:stretch-i386
-
--debian-stretch-32-gcc:
-- extends: .gcc-x86-32-build
-- variables:
-- CONTAINER: debian:stretch-i386
--
- debian-stretch-32-gcc-debug:
- extends: .gcc-x86-32-build-debug
- variables:
-@@ -324,21 +314,11 @@ debian-unstable-gcc-debug-randconfig:
- CONTAINER: debian:unstable
- RANDCONFIG: y
-
--debian-unstable-32-clang:
-- extends: .clang-x86-32-build
-- variables:
-- CONTAINER: debian:unstable-i386
--
- debian-unstable-32-clang-debug:
- extends: .clang-x86-32-build-debug
- variables:
- CONTAINER: debian:unstable-i386
-
--debian-unstable-32-gcc:
-- extends: .gcc-x86-32-build
-- variables:
-- CONTAINER: debian:unstable-i386
--
- debian-unstable-32-gcc-debug:
- extends: .gcc-x86-32-build-debug
- variables:
---
-2.40.0
-
diff --git a/0089-CI-Remove-llvm-8-from-the-Debian-Stretch-container.patch b/0089-CI-Remove-llvm-8-from-the-Debian-Stretch-container.patch
deleted file mode 100644
index c0294ec..0000000
--- a/0089-CI-Remove-llvm-8-from-the-Debian-Stretch-container.patch
+++ /dev/null
@@ -1,103 +0,0 @@
-From e4a5fb9227889bec99ab212b839680f4d5b51e60 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 24 Mar 2023 17:59:56 +0000
-Subject: [PATCH 89/89] CI: Remove llvm-8 from the Debian Stretch container
-
-For similar reasons to c/s a6b1e2b80fe20. While this container is still
-build-able for now, all the other problems with explicitly-versioned compilers
-remain.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-(cherry picked from commit 7a298375721636290a57f31bb0f7c2a5a38956a4)
----
- automation/build/debian/stretch-llvm-8.list | 3 ---
- automation/build/debian/stretch.dockerfile | 12 ---------
- automation/gitlab-ci/build.yaml | 27 ---------------------
- 3 files changed, 42 deletions(-)
- delete mode 100644 automation/build/debian/stretch-llvm-8.list
-
-diff --git a/automation/build/debian/stretch-llvm-8.list b/automation/build/debian/stretch-llvm-8.list
-deleted file mode 100644
-index 09fe843fb2..0000000000
---- a/automation/build/debian/stretch-llvm-8.list
-+++ /dev/null
-@@ -1,3 +0,0 @@
--# Strech LLVM 8 repos
--deb http://apt.llvm.org/stretch/ llvm-toolchain-stretch-8 main
--deb-src http://apt.llvm.org/stretch/ llvm-toolchain-stretch-8 main
-diff --git a/automation/build/debian/stretch.dockerfile b/automation/build/debian/stretch.dockerfile
-index da6aa874dd..9861acbcc3 100644
---- a/automation/build/debian/stretch.dockerfile
-+++ b/automation/build/debian/stretch.dockerfile
-@@ -53,15 +53,3 @@ RUN apt-get update && \
- apt-get autoremove -y && \
- apt-get clean && \
- rm -rf /var/lib/apt/lists* /tmp/* /var/tmp/*
--
--RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
--COPY stretch-llvm-8.list /etc/apt/sources.list.d/
--
--RUN apt-get update && \
-- apt-get --quiet --yes install \
-- clang-8 \
-- lld-8 \
-- && \
-- apt-get autoremove -y && \
-- apt-get clean && \
-- rm -rf /var/lib/apt/lists* /tmp/* /var/tmp/*
-diff --git a/automation/gitlab-ci/build.yaml b/automation/gitlab-ci/build.yaml
-index eea517aa0a..802449cb96 100644
---- a/automation/gitlab-ci/build.yaml
-+++ b/automation/gitlab-ci/build.yaml
-@@ -27,13 +27,6 @@
- CXX: clang++
- clang: y
-
--.clang-8-tmpl:
-- variables: &clang-8
-- CC: clang-8
-- CXX: clang++-8
-- LD: ld.lld-8
-- clang: y
--
- .x86-64-build-tmpl:
- <<: *build
- variables:
-@@ -98,16 +91,6 @@
- variables:
- <<: *clang
-
--.clang-8-x86-64-build:
-- extends: .x86-64-build
-- variables:
-- <<: *clang-8
--
--.clang-8-x86-64-build-debug:
-- extends: .x86-64-build-debug
-- variables:
-- <<: *clang-8
--
- .clang-x86-32-build:
- extends: .x86-32-build
- variables:
-@@ -244,16 +227,6 @@ debian-stretch-clang-debug:
- variables:
- CONTAINER: debian:stretch
-
--debian-stretch-clang-8:
-- extends: .clang-8-x86-64-build
-- variables:
-- CONTAINER: debian:stretch
--
--debian-stretch-clang-8-debug:
-- extends: .clang-8-x86-64-build-debug
-- variables:
-- CONTAINER: debian:stretch
--
- debian-stretch-gcc:
- extends: .gcc-x86-64-build
- variables:
---
-2.40.0
-
diff --git a/info.txt b/info.txt
index 45b2f7f..26a1905 100644
--- a/info.txt
+++ b/info.txt
@@ -1,6 +1,6 @@
-Xen upstream patchset #0 for 4.17.1-pre
+Xen upstream patchset #0 for 4.17.3-pre
Containing patches from
-RELEASE-4.17.0 (5556ac9bf224ed6b977f214653b234de45dcdfbf)
+RELEASE-4.17.2 (b86c313a4a9c3ec4c9f825d9b99131753296485f)
to
-staging-4.17 (e4a5fb9227889bec99ab212b839680f4d5b51e60)
+staging-4.17 (0b56bed864ca9b572473957f0254aefa797216f2)