summarylogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.SRCINFO42
-rw-r--r--0002-Btintel_Fix_bdaddress_comparison_with_garbage_value.patch (renamed from 0005-Bluetooth_btintel_Fix_bdaddress_comparison_with_garbage_value.patch)9
-rw-r--r--0002-PCI_Add_more_NVIDIA_controllers_to_the_MSI_masking_quirk.patch21
-rw-r--r--0003-iommu_intel_do_deep_dma-unmapping_to_avoid_kernel-flooding.patch85
-rw-r--r--0004-cpufreq_intel_pstate_ITMT_support_for_overclocked_system.patch51
-rw-r--r--0006-lg-laptop_Recognize_more_models.patch36
-rw-r--r--0102-iomap-iomap_bmap-should-accept-unwritten-maps.patch38
-rw-r--r--0103-futex.patch9811
-rw-r--r--0104-revert-xhci-Add-support-for-Renesas-controller-with-memory.patch89
-rw-r--r--0108-drm_i915_Add_workaround_numbers_to_GEN7_COMMON_SLICE_CHICKEN1_whitelisting.patch17
-rw-r--r--0201-lenovo-wmi2.patch15
-rw-r--r--0301-revert-garbage-collect-fbdev-scrolling-acceleration.patch1038
-rw-r--r--0302-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch (renamed from 0301-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch)0
-rw-r--r--0303-revert-fbcon-remove-no-op-fbcon_set_origin.patch (renamed from 0302-revert-fbcon-remove-no-op-fbcon_set_origin.patch)0
-rw-r--r--0304-revert-fbcon-remove-soft-scrollback-code.patch (renamed from 0303-revert-fbcon-remove-soft-scrollback-code.patch)0
-rw-r--r--0999-acs.gitpatch52
-rw-r--r--PKGBUILD49
-rw-r--r--config140
18 files changed, 1199 insertions, 10294 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 7281d81aefb6..18eec9fb8a91 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,5 +1,5 @@
pkgbase = linux-acs-manjaro
- pkgver = 5.15.16
+ pkgver = 5.16.2
pkgrel = 1
url = https://www.kernel.org/
arch = x86_64
@@ -17,24 +17,17 @@ pkgbase = linux-acs-manjaro
makedepends = tar
makedepends = xz
options = !strip
- source = https://www.kernel.org/pub/linux/kernel/v5.x/linux-5.15.tar.xz
- source = https://www.kernel.org/pub/linux/kernel/v5.x/patch-5.15.16.xz
+ source = https://www.kernel.org/pub/linux/kernel/v5.x/linux-5.16.tar.xz
+ source = https://www.kernel.org/pub/linux/kernel/v5.x/patch-5.16.2.xz
source = config
source = 0001-ZEN-Add-sysctl-and-CONFIG-to-disallow-unprivileged-CLONE_NEWUSER.patch
- source = 0002-PCI_Add_more_NVIDIA_controllers_to_the_MSI_masking_quirk.patch
- source = 0003-iommu_intel_do_deep_dma-unmapping_to_avoid_kernel-flooding.patch
- source = 0004-cpufreq_intel_pstate_ITMT_support_for_overclocked_system.patch
- source = 0005-Bluetooth_btintel_Fix_bdaddress_comparison_with_garbage_value.patch
- source = 0006-lg-laptop_Recognize_more_models.patch
+ source = 0002-Btintel_Fix_bdaddress_comparison_with_garbage_value.patch
source = 0101-i2c-nuvoton-nc677x-hwmon-driver.patch
- source = 0103-futex.patch
- source = 0104-revert-xhci-Add-support-for-Renesas-controller-with-memory.patch
source = 0105-quirk-kernel-org-bug-210681-firmware_rome_error.patch
- source = 0108-drm_i915_Add_workaround_numbers_to_GEN7_COMMON_SLICE_CHICKEN1_whitelisting.patch::https://patchwork.freedesktop.org/patch/463650/raw/
- source = 0201-lenovo-wmi2.patch
- source = 0301-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
- source = 0302-revert-fbcon-remove-no-op-fbcon_set_origin.patch
- source = 0303-revert-fbcon-remove-soft-scrollback-code.patch
+ source = 0301-revert-garbage-collect-fbdev-scrolling-acceleration.patch
+ source = 0302-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
+ source = 0303-revert-fbcon-remove-no-op-fbcon_set_origin.patch
+ source = 0304-revert-fbcon-remove-soft-scrollback-code.patch
source = 0401-bootsplash.patch
source = 0402-bootsplash.patch
source = 0403-bootsplash.patch
@@ -49,21 +42,14 @@ pkgbase = linux-acs-manjaro
source = 0412-bootsplash.patch
source = 0413-bootsplash.gitpatch
source = 0999-acs.gitpatch
- sha256sums = 57b2cf6991910e3b67a1b3490022e8a0674b6965c74c12da1e99d138d1991ee8
- sha256sums = 0817171996521675b3c1130568503f08d8b1672c955cc842200a21bf5914cd95
- sha256sums = 93320dbe5928e51fb777a4f13dd9a7364eb150d7983073f7dc159e89a6ffa747
+ sha256sums = 027d7e8988bb69ac12ee92406c3be1fe13f990b1ca2249e226225cd1573308bb
+ sha256sums = 3a09c2f1ad410c09cf03921abeed1a6ca7c38138fb508171ee673d429d179171
+ sha256sums = cb2d729cc20743014d9e3bd08facb9f5bdd19d9fa89014f415c61b4a6eb78e97
sha256sums = 986f8d802f37b72a54256f0ab84da83cb229388d58c0b6750f7c770818a18421
- sha256sums = e2823eff3355b7c88a3fa327ea2f84f23cbd36569e0a5f0f76599023f63a52ca
- sha256sums = ce53090a4572cd6162d22225113082f7e4df5028a1230529d170460e26dcf849
- sha256sums = ab0360eac59329eb84f028c2f402ee4a17e4b3dfacb7957355e6178d35af87b9
- sha256sums = 76701599bbafa49b90ccb073ef29ce2dc3731566e8fa852bd1e9e7796e184754
- sha256sums = a2a0a0542055a6a921542fbb05cedb6eb6f3d3fb0c038bfb2304bfd3931a0f71
+ sha256sums = b89188b1bc3516d54965dd36def6a2af3d81379e53ff7e527bbd91f77c6f191b
sha256sums = 7823d7488f42bc4ed7dfae6d1014dbde679d8b862c9a3697a39ba0dae5918978
- sha256sums = 844e66a95d7df754c55ac2f1ce7e215b1e56e20ca095462d926a993d557b20e0
- sha256sums = d9330ea593829a6ef3b824db9570253280cbff7da2b4beb47cbc037824d1a29b
sha256sums = 5e804e1f241ce542f3f0e83d274ede6aa4b0539e510fb9376f8106e8732ce69b
- sha256sums = e8e6120035977903a7117ba215809b9b162b64a789848107513f219180baaada
- sha256sums = 1d58ef2991c625f6f0eb33b4cb8303932f53f1c4694e42bae24c9cd36d2ad013
+ sha256sums = 365d4225a7db60bd064ebbc34ce0ae582a0c378ad6c4cec7960a5ae4641a6757
sha256sums = 2b11905b63b05b25807dd64757c779da74dd4c37e36d3f7a46485b1ee5a9d326
sha256sums = 94a8538251ad148f1025cc3de446ce64f73dc32b01815426fb159c722e8fa5bc
sha256sums = 1f18c5c10a3c63e41ecd05ad34cd9f6653ba96e9f1049ce2b7bb6da2578ae710
@@ -80,7 +66,7 @@ pkgbase = linux-acs-manjaro
sha256sums = 27471eee564ca3149dd271b0817719b5565a9594dc4d884fe3dc51a5f03832bc
sha256sums = 60e295601e4fb33d9bf65f198c54c7eb07c0d1e91e2ad1e0dd6cd6e142cb266d
sha256sums = 035ea4b2a7621054f4560471f45336b981538a40172d8f17285910d4e0e0b3ef
- sha256sums = 6d6b327ec7c7798f628f98ab964f4457d3cf043bad2632eb8f27548478a83cc1
+ sha256sums = 2542b5cea79ab5817ce3d30c54acd045966b9c14587bfb0b2f50d473da48a1d5
pkgname = linux-acs-manjaro
pkgdesc = The Linux Manjaro standart kernel and modules with ACS patch
diff --git a/0005-Bluetooth_btintel_Fix_bdaddress_comparison_with_garbage_value.patch b/0002-Btintel_Fix_bdaddress_comparison_with_garbage_value.patch
index 38cf2bde55bd..80cd663cd131 100644
--- a/0005-Bluetooth_btintel_Fix_bdaddress_comparison_with_garbage_value.patch
+++ b/0002-Btintel_Fix_bdaddress_comparison_with_garbage_value.patch
@@ -1,4 +1,4 @@
-From ae3386d67597db29ad2ba2685815e224a39897bc Mon Sep 17 00:00:00 2001
+From efbb86e8bf678eb5a376deaa3b693fb7a21b8e41 Mon Sep 17 00:00:00 2001
From: Kiran K <kiran.k@intel.com>
Date: Wed, 13 Oct 2021 13:35:11 +0530
Subject: [PATCH] Bluetooth: btintel: Fix bdaddress comparison with garbage
@@ -16,10 +16,10 @@ Reviewed-by: Tedd Ho-Jeong An <tedd.an@intel.com>
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
-index f1705b46fc8898..b9055a3e61ed76 100644
+index 9359bff4729659..8f9109b40961f4 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
-@@ -2006,14 +2006,16 @@ static int btintel_prepare_fw_download_tlv(struct hci_dev *hdev,
+@@ -2081,14 +2081,16 @@ static int btintel_prepare_fw_download_tlv(struct hci_dev *hdev,
if (ver->img_type == 0x03) {
btintel_clear_flag(hdev, INTEL_BOOTLOADER);
btintel_check_bdaddr(hdev);
@@ -44,7 +44,7 @@ index f1705b46fc8898..b9055a3e61ed76 100644
}
btintel_get_fw_name_tlv(ver, fwname, sizeof(fwname), "sfi");
-@@ -2303,6 +2305,10 @@ static int btintel_setup_combined(struct hci_dev *hdev)
+@@ -2466,6 +2468,10 @@ static int btintel_setup_combined(struct hci_dev *hdev)
goto exit_error;
}
@@ -55,3 +55,4 @@ index f1705b46fc8898..b9055a3e61ed76 100644
/* For TLV type device, parse the tlv data */
err = btintel_parse_version_tlv(hdev, &ver_tlv, skb);
if (err) {
+
diff --git a/0002-PCI_Add_more_NVIDIA_controllers_to_the_MSI_masking_quirk.patch b/0002-PCI_Add_more_NVIDIA_controllers_to_the_MSI_masking_quirk.patch
deleted file mode 100644
index 01b324a03a17..000000000000
--- a/0002-PCI_Add_more_NVIDIA_controllers_to_the_MSI_masking_quirk.patch
+++ /dev/null
@@ -1,21 +0,0 @@
-From 1ac8f753e4249e6864c1c42070ba957ceef1f82a Mon Sep 17 00:00:00 2001
-From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
-Date: Thu, 18 Nov 2021 22:53:31 +0100
-Subject: [PATCH] PCI: Add more NVIDIA controllers to the MSI masking quirk
-
-For: https://bugs.archlinux.org/task/72734
-For: https://bugs.archlinux.org/task/72777
----
- drivers/pci/quirks.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index 208fa03acdda00..7fdb7e9c2e12c4 100644
---- a/drivers/pci/quirks.c
-+++ b/drivers/pci/quirks.c
-@@ -5802,3 +5802,5 @@ static void nvidia_ion_ahci_fixup(struct pci_dev *pdev)
- pdev->dev_flags |= PCI_DEV_FLAGS_HAS_MSI_MASKING;
- }
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab8, nvidia_ion_ahci_fixup);
-+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab9, nvidia_ion_ahci_fixup);
-+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0d88, nvidia_ion_ahci_fixup);
diff --git a/0003-iommu_intel_do_deep_dma-unmapping_to_avoid_kernel-flooding.patch b/0003-iommu_intel_do_deep_dma-unmapping_to_avoid_kernel-flooding.patch
deleted file mode 100644
index bc9dc1857912..000000000000
--- a/0003-iommu_intel_do_deep_dma-unmapping_to_avoid_kernel-flooding.patch
+++ /dev/null
@@ -1,85 +0,0 @@
-From 74db74ec6ce112c6137d51610429e7ac9ea5b6c1 Mon Sep 17 00:00:00 2001
-From: Ajay Garg <ajaygargnsit@gmail.com>
-Date: Tue, 12 Oct 2021 19:26:53 +0530
-Subject: [PATCH] iommu: intel: do deep dma-unmapping, to avoid
- kernel-flooding.
-
-Origins at :
-https://lists.linuxfoundation.org/pipermail/iommu/2021-October/thread.html
-
-=== Changes from v1 => v2 ===
-
-a)
-Improved patch-description.
-
-b)
-A more root-level fix, as suggested by
-
- 1.
- Alex Williamson <alex.williamson@redhat.com>
-
- 2.
- Lu Baolu <baolu.lu@linux.intel.com>
-
-=== Issue ===
-
-Kernel-flooding is seen, when an x86_64 L1 guest (Ubuntu-21) is booted in qemu/kvm
-on a x86_64 host (Ubuntu-21), with a host-pci-device attached.
-
-Following kind of logs, along with the stacktraces, cause the flood :
-
-......
- DMAR: ERROR: DMA PTE for vPFN 0x428ec already set (to 3f6ec003 not 3f6ec003)
- DMAR: ERROR: DMA PTE for vPFN 0x428ed already set (to 3f6ed003 not 3f6ed003)
- DMAR: ERROR: DMA PTE for vPFN 0x428ee already set (to 3f6ee003 not 3f6ee003)
- DMAR: ERROR: DMA PTE for vPFN 0x428ef already set (to 3f6ef003 not 3f6ef003)
- DMAR: ERROR: DMA PTE for vPFN 0x428f0 already set (to 3f6f0003 not 3f6f0003)
-......
-
-=== Current Behaviour, leading to the issue ===
-
-Currently, when we do a dma-unmapping, we unmap/unlink the mappings, but
-the pte-entries are not cleared.
-
-Thus, following sequencing would flood the kernel-logs :
-
-i)
-A dma-unmapping makes the real/leaf-level pte-slot invalid, but the
-pte-content itself is not cleared.
-
-ii)
-Now, during some later dma-mapping procedure, as the pte-slot is about
-to hold a new pte-value, the intel-iommu checks if a prior
-pte-entry exists in the pte-slot. If it exists, it logs a kernel-error,
-along with a corresponding stacktrace.
-
-iii)
-Step ii) runs in abundance, and the kernel-logs run insane.
-
-=== Fix ===
-
-We ensure that as part of a dma-unmapping, each (unmapped) pte-slot
-is also cleared of its value/content (at the leaf-level, where the
-real mapping from a iova => pfn mapping is stored).
-
-This completes a "deep" dma-unmapping.
-
-Signed-off-by: Ajay Garg <ajaygargnsit@gmail.com>
-Link: https://lore.kernel.org/linux-iommu/20211012135653.3852-1-ajaygargnsit@gmail.com/
----
- drivers/iommu/intel/iommu.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
-index 78f8c8e6803e97..d8da48a91ba3b2 100644
---- a/drivers/iommu/intel/iommu.c
-+++ b/drivers/iommu/intel/iommu.c
-@@ -5092,6 +5092,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
- gather->freelist = domain_unmap(dmar_domain, start_pfn,
- last_pfn, gather->freelist);
-
-+ dma_pte_clear_range(dmar_domain, start_pfn, last_pfn);
-+
- if (dmar_domain->max_addr == iova + size)
- dmar_domain->max_addr = iova;
-
diff --git a/0004-cpufreq_intel_pstate_ITMT_support_for_overclocked_system.patch b/0004-cpufreq_intel_pstate_ITMT_support_for_overclocked_system.patch
deleted file mode 100644
index 1f7922e34722..000000000000
--- a/0004-cpufreq_intel_pstate_ITMT_support_for_overclocked_system.patch
+++ /dev/null
@@ -1,51 +0,0 @@
-From 62f1f7606485d450b23f86bc18dab101e7a2443d Mon Sep 17 00:00:00 2001
-From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
-Date: Thu, 18 Nov 2021 21:18:01 -0800
-Subject: [PATCH] cpufreq: intel_pstate: ITMT support for overclocked system
-
-On systems with overclocking enabled, CPPC Highest Performance can be
-hard coded to 0xff. In this case even if we have cores with different
-highest performance, ITMT can't be enabled as the current implementation
-depends on CPPC Highest Performance.
-
-On such systems we can use MSR_HWP_CAPABILITIES maximum performance field
-when CPPC.Highest Performance is 0xff.
-
-Due to legacy reasons, we can't solely depend on MSR_HWP_CAPABILITIES as
-in some older systems CPPC Highest Performance is the only way to identify
-different performing cores.
-
-Reported-by: Michael Larabel <Michael@MichaelLarabel.com>
-Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
----
- drivers/cpufreq/intel_pstate.c | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
-index e15c3bc17a55ce..8a2c6b58b6524f 100644
---- a/drivers/cpufreq/intel_pstate.c
-+++ b/drivers/cpufreq/intel_pstate.c
-@@ -335,6 +335,8 @@ static void intel_pstste_sched_itmt_work_fn(struct work_struct *work)
-
- static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
-
-+#define CPPC_MAX_PERF U8_MAX
-+
- static void intel_pstate_set_itmt_prio(int cpu)
- {
- struct cppc_perf_caps cppc_perf;
-@@ -345,6 +347,14 @@ static void intel_pstate_set_itmt_prio(int cpu)
- if (ret)
- return;
-
-+ /*
-+ * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
-+ * In this case we can't use CPPC.highest_perf to enable ITMT.
-+ * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
-+ */
-+ if (cppc_perf.highest_perf == CPPC_MAX_PERF)
-+ cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
-+
- /*
- * The priorities can be set regardless of whether or not
- * sched_set_itmt_support(true) has been called and it is valid to
diff --git a/0006-lg-laptop_Recognize_more_models.patch b/0006-lg-laptop_Recognize_more_models.patch
deleted file mode 100644
index 8fbd217c36a2..000000000000
--- a/0006-lg-laptop_Recognize_more_models.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From 675d4b66de78eec370cf5053eecdf00b26780af3 Mon Sep 17 00:00:00 2001
-From: Matan Ziv-Av <matan@svgalib.org>
-Date: Tue, 23 Nov 2021 22:14:55 +0200
-Subject: [PATCH] lg-laptop: Recognize more models
-
-LG uses 5 instead of 0 in the third digit (second digit after 2019) of the year string to indicate newer models in the same year. Handle this case as well.
-
-Signed-off-by: Matan Ziv-Av <matan@svgalib.org>
-For: https://bugs.archlinux.org/task/71772
----
- drivers/platform/x86/lg-laptop.c | 12 ++++++++++++
- 1 file changed, 12 insertions(+)
-
-diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c
-index 88b551caeaaf41..d6f74d3a7605e2 100644
---- a/drivers/platform/x86/lg-laptop.c
-+++ b/drivers/platform/x86/lg-laptop.c
-@@ -658,6 +658,18 @@ static int acpi_add(struct acpi_device *device)
- if (product && strlen(product) > 4)
- switch (product[4]) {
- case '5':
-+ if (strlen(product) > 5)
-+ switch (product[5]) {
-+ case 'N':
-+ year = 2021;
-+ break;
-+ case '0':
-+ year = 2016;
-+ break;
-+ default:
-+ year = 2022;
-+ }
-+ break;
- case '6':
- year = 2016;
- break;
diff --git a/0102-iomap-iomap_bmap-should-accept-unwritten-maps.patch b/0102-iomap-iomap_bmap-should-accept-unwritten-maps.patch
deleted file mode 100644
index 9ca50277e88c..000000000000
--- a/0102-iomap-iomap_bmap-should-accept-unwritten-maps.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From: Yuxuan Shui <yshuiv7@gmail.com>
-To: viro@zeniv.linux.org.uk
-Cc: linux-fsdevel@vger.kernel.org, Yuxuan Shui <yshuiv7@gmail.com>
-Subject: [PATCH] iomap: iomap_bmap should accept unwritten maps
-Date: Tue, 5 May 2020 19:36:08 +0100
-Message-ID: <20200505183608.10280-1-yshuiv7@gmail.com> (raw)
-
-commit ac58e4fb03f9d111d733a4ad379d06eef3a24705 moved ext4_bmap from
-generic_block_bmap to iomap_bmap, this introduced a regression which
-prevents some user from using previously working swapfiles. The kernel
-will complain about holes while there is none.
-
-What is happening here is that the swapfile has unwritten mappings,
-which is rejected by iomap_bmap, but was accepted by ext4_get_block.
-
-This commit makes sure iomap_bmap would accept unwritten mappings as
-well.
-
-Signed-off-by: Yuxuan Shui <yshuiv7@gmail.com>
----
- fs/iomap/fiemap.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c
-index d55e8f491a5e..fb488dcfa8c7 100644
---- a/fs/iomap/fiemap.c
-+++ b/fs/iomap/fiemap.c
-@@ -115,7 +115,7 @@ iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
- {
- sector_t *bno = data, addr;
-
-- if (iomap->type == IOMAP_MAPPED) {
-+ if (iomap->type == IOMAP_MAPPED || iomap->type == IOMAP_UNWRITTEN) {
- addr = (pos - iomap->offset + iomap->addr) >> inode->i_blkbits;
- *bno = addr;
- }
---
-2.26.2
diff --git a/0103-futex.patch b/0103-futex.patch
deleted file mode 100644
index d33f488ae054..000000000000
--- a/0103-futex.patch
+++ /dev/null
@@ -1,9811 +0,0 @@
-From 4dc2913212c08c6970f6e8971fd23b6328982f94 Mon Sep 17 00:00:00 2001
-From: Piotr Gorski <lucjan.lucjanov@gmail.com>
-Date: Mon, 1 Nov 2021 12:11:04 +0100
-Subject: [PATCH] futex: resync from gitlab.collabora.com
-
-Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
----
- Documentation/userspace-api/futex2.rst | 86 +
- Documentation/userspace-api/index.rst | 1 +
- MAINTAINERS | 3 +-
- arch/arm/tools/syscall.tbl | 1 +
- arch/arm64/include/asm/unistd.h | 2 +-
- arch/arm64/include/asm/unistd32.h | 2 +
- arch/x86/entry/syscalls/syscall_32.tbl | 1 +
- arch/x86/entry/syscalls/syscall_64.tbl | 1 +
- include/linux/syscalls.h | 7 +-
- include/uapi/asm-generic/unistd.h | 5 +-
- include/uapi/linux/futex.h | 25 +
- kernel/Makefile | 2 +-
- kernel/futex.c | 4272 -----------------
- kernel/futex/Makefile | 3 +
- kernel/futex/core.c | 1176 +++++
- kernel/futex/futex.h | 295 ++
- kernel/futex/pi.c | 1233 +++++
- kernel/futex/requeue.c | 897 ++++
- kernel/futex/syscalls.c | 396 ++
- kernel/futex/waitwake.c | 708 +++
- kernel/sys_ni.c | 3 +-
- .../selftests/futex/functional/.gitignore | 1 +
- .../selftests/futex/functional/Makefile | 3 +-
- .../futex/functional/futex_wait_timeout.c | 21 +-
- .../futex/functional/futex_wait_wouldblock.c | 41 +-
- .../selftests/futex/functional/futex_waitv.c | 237 +
- .../testing/selftests/futex/functional/run.sh | 3 +
- .../selftests/futex/include/futex2test.h | 22 +
- 28 files changed, 5163 insertions(+), 4284 deletions(-)
- create mode 100644 Documentation/userspace-api/futex2.rst
- delete mode 100644 kernel/futex.c
- create mode 100644 kernel/futex/Makefile
- create mode 100644 kernel/futex/core.c
- create mode 100644 kernel/futex/futex.h
- create mode 100644 kernel/futex/pi.c
- create mode 100644 kernel/futex/requeue.c
- create mode 100644 kernel/futex/syscalls.c
- create mode 100644 kernel/futex/waitwake.c
- create mode 100644 tools/testing/selftests/futex/functional/futex_waitv.c
- create mode 100644 tools/testing/selftests/futex/include/futex2test.h
-
-diff --git a/Documentation/userspace-api/futex2.rst b/Documentation/userspace-api/futex2.rst
-new file mode 100644
-index 000000000..7d37409df
---- /dev/null
-+++ b/Documentation/userspace-api/futex2.rst
-@@ -0,0 +1,86 @@
-+.. SPDX-License-Identifier: GPL-2.0
-+
-+======
-+futex2
-+======
-+
-+:Author: André Almeida <andrealmeid@collabora.com>
-+
-+futex, or fast user mutex, is a set of syscalls to allow userspace to create
-+performant synchronization mechanisms, such as mutexes, semaphores and
-+conditional variables in userspace. C standard libraries, like glibc, uses it
-+as a means to implement more high level interfaces like pthreads.
-+
-+futex2 is a followup version of the initial futex syscall, designed to overcome
-+limitations of the original interface.
-+
-+User API
-+========
-+
-+``futex_waitv()``
-+-----------------
-+
-+Wait on an array of futexes, wake on any::
-+
-+ futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes,
-+ unsigned int flags, struct timespec *timeout, clockid_t clockid)
-+
-+ struct futex_waitv {
-+ __u64 val;
-+ __u64 uaddr;
-+ __u32 flags;
-+ __u32 __reserved;
-+ };
-+
-+Userspace sets an array of struct futex_waitv (up to a max of 128 entries),
-+using ``uaddr`` for the address to wait for, ``val`` for the expected value
-+and ``flags`` to specify the type (e.g. private) and size of futex.
-+``__reserved`` needs to be 0, but it can be used for future extension. The
-+pointer for the first item of the array is passed as ``waiters``. An invalid
-+address for ``waiters`` or for any ``uaddr`` returns ``-EFAULT``.
-+
-+If userspace has 32-bit pointers, it should do a explicit cast to make sure
-+the upper bits are zeroed. ``uintptr_t`` does the tricky and it works for
-+both 32/64-bit pointers.
-+
-+``nr_futexes`` specifies the size of the array. Numbers out of [1, 128]
-+interval will make the syscall return ``-EINVAL``.
-+
-+The ``flags`` argument of the syscall needs to be 0, but it can be used for
-+future extension.
-+
-+For each entry in ``waiters`` array, the current value at ``uaddr`` is compared
-+to ``val``. If it's different, the syscall undo all the work done so far and
-+return ``-EAGAIN``. If all tests and verifications succeeds, syscall waits until
-+one of the following happens:
-+
-+- The timeout expires, returning ``-ETIMEOUT``.
-+- A signal was sent to the sleeping task, returning ``-ERESTARTSYS``.
-+- Some futex at the list was awaken, returning the index of some waked futex.
-+
-+An example of how to use the interface can be found at ``tools/testing/selftests/futex/functional/futex_waitv.c``.
-+
-+Timeout
-+-------
-+
-+``struct timespec *timeout`` argument is an optional argument that points to an
-+absolute timeout. You need to specify the type of clock being used at
-+``clockid`` argument. ``CLOCK_MONOTONIC`` and ``CLOCK_REALTIME`` are supported.
-+This syscall accepts only 64bit timespec structs.
-+
-+Types of futex
-+--------------
-+
-+A futex can be either private or shared. Private is used for processes that
-+shares the same memory space and the virtual address of the futex will be the
-+same for all processes. This allows for optimizations in the kernel. To use
-+private futexes, it's necessary to specify ``FUTEX_PRIVATE_FLAG`` in the futex
-+flag. For processes that doesn't share the same memory space and therefore can
-+have different virtual addresses for the same futex (using, for instance, a
-+file-backed shared memory) requires different internal mechanisms to be get
-+properly enqueued. This is the default behavior, and it works with both private
-+and shared futexes.
-+
-+Futexes can be of different sizes: 8, 16, 32 or 64 bits. Currently, the only
-+supported one is 32 bit sized futex, and it need to be specified using
-+``FUTEX_32`` flag.
-diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
-index c432be070..a61eac0c7 100644
---- a/Documentation/userspace-api/index.rst
-+++ b/Documentation/userspace-api/index.rst
-@@ -28,6 +28,7 @@ place where this information is gathered.
- media/index
- sysfs-platform_profile
- vduse
-+ futex2
-
- .. only:: subproject and html
-
-diff --git a/MAINTAINERS b/MAINTAINERS
-index 3b79fd441..dd165835f 100644
---- a/MAINTAINERS
-+++ b/MAINTAINERS
-@@ -7737,6 +7737,7 @@ M: Ingo Molnar <mingo@redhat.com>
- R: Peter Zijlstra <peterz@infradead.org>
- R: Darren Hart <dvhart@infradead.org>
- R: Davidlohr Bueso <dave@stgolabs.net>
-+R: André Almeida <andrealmeid@collabora.com>
- L: linux-kernel@vger.kernel.org
- S: Maintained
- T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
-@@ -7744,7 +7745,7 @@ F: Documentation/locking/*futex*
- F: include/asm-generic/futex.h
- F: include/linux/futex.h
- F: include/uapi/linux/futex.h
--F: kernel/futex.c
-+F: kernel/futex/*
- F: tools/perf/bench/futex*
- F: tools/testing/selftests/futex/
-
-diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
-index e842209e1..543100151 100644
---- a/arch/arm/tools/syscall.tbl
-+++ b/arch/arm/tools/syscall.tbl
-@@ -462,3 +462,4 @@
- 446 common landlock_restrict_self sys_landlock_restrict_self
- # 447 reserved for memfd_secret
- 448 common process_mrelease sys_process_mrelease
-+449 common futex_waitv sys_futex_waitv
-diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
-index 3cb206aea..6bdb5f5db 100644
---- a/arch/arm64/include/asm/unistd.h
-+++ b/arch/arm64/include/asm/unistd.h
-@@ -38,7 +38,7 @@
- #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
- #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-
--#define __NR_compat_syscalls 449
-+#define __NR_compat_syscalls 450
- #endif
-
- #define __ARCH_WANT_SYS_CLONE
-diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
-index 844f6ae58..41ea1195e 100644
---- a/arch/arm64/include/asm/unistd32.h
-+++ b/arch/arm64/include/asm/unistd32.h
-@@ -903,6 +903,8 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
- __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
- #define __NR_process_mrelease 448
- __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
-+#define __NR_futex_waitv 449
-+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
-
- /*
- * Please add new compat syscalls above this comment and update
-diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
-index 960a021d5..7e2554369 100644
---- a/arch/x86/entry/syscalls/syscall_32.tbl
-+++ b/arch/x86/entry/syscalls/syscall_32.tbl
-@@ -453,3 +453,4 @@
- 446 i386 landlock_restrict_self sys_landlock_restrict_self
- 447 i386 memfd_secret sys_memfd_secret
- 448 i386 process_mrelease sys_process_mrelease
-+449 i386 futex_waitv sys_futex_waitv
-diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
-index 18b5500ea..fe8f8dd15 100644
---- a/arch/x86/entry/syscalls/syscall_64.tbl
-+++ b/arch/x86/entry/syscalls/syscall_64.tbl
-@@ -370,6 +370,7 @@
- 446 common landlock_restrict_self sys_landlock_restrict_self
- 447 common memfd_secret sys_memfd_secret
- 448 common process_mrelease sys_process_mrelease
-+449 common futex_waitv sys_futex_waitv
-
- #
- # Due to a historical design error, certain syscalls are numbered differently
-diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
-index 252243c77..528a478db 100644
---- a/include/linux/syscalls.h
-+++ b/include/linux/syscalls.h
-@@ -58,6 +58,7 @@ struct mq_attr;
- struct compat_stat;
- struct old_timeval32;
- struct robust_list_head;
-+struct futex_waitv;
- struct getcpu_cache;
- struct old_linux_dirent;
- struct perf_event_attr;
-@@ -610,7 +611,7 @@ asmlinkage long sys_waitid(int which, pid_t pid,
- asmlinkage long sys_set_tid_address(int __user *tidptr);
- asmlinkage long sys_unshare(unsigned long unshare_flags);
-
--/* kernel/futex.c */
-+/* kernel/futex/syscalls.c */
- asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
- const struct __kernel_timespec __user *utime,
- u32 __user *uaddr2, u32 val3);
-@@ -623,6 +624,10 @@ asmlinkage long sys_get_robust_list(int pid,
- asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
- size_t len);
-
-+asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
-+ unsigned int nr_futexes, unsigned int flags,
-+ struct __kernel_timespec __user *timeout, clockid_t clockid);
-+
- /* kernel/hrtimer.c */
- asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
- struct __kernel_timespec __user *rmtp);
-diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
-index 1c5fb86d4..4557a8b60 100644
---- a/include/uapi/asm-generic/unistd.h
-+++ b/include/uapi/asm-generic/unistd.h
-@@ -880,8 +880,11 @@ __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
- #define __NR_process_mrelease 448
- __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
-
-+#define __NR_futex_waitv 449
-+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
-+
- #undef __NR_syscalls
--#define __NR_syscalls 449
-+#define __NR_syscalls 450
-
- /*
- * 32 bit systems traditionally used different
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index 235e5b2fa..71a5df8d2 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -43,6 +43,31 @@
- #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \
- FUTEX_PRIVATE_FLAG)
-
-+/*
-+ * Flags to specify the bit length of the futex word for futex2 syscalls.
-+ * Currently, only 32 is supported.
-+ */
-+#define FUTEX_32 2
-+
-+/*
-+ * Max numbers of elements in a futex_waitv array
-+ */
-+#define FUTEX_WAITV_MAX 128
-+
-+/**
-+ * struct futex_waitv - A waiter for vectorized wait
-+ * @val: Expected value at uaddr
-+ * @uaddr: User address to wait on
-+ * @flags: Flags for this waiter
-+ * @__reserved: Reserved member to preserve data alignment. Should be 0.
-+ */
-+struct futex_waitv {
-+ __u64 val;
-+ __u64 uaddr;
-+ __u32 flags;
-+ __u32 __reserved;
-+};
-+
- /*
- * Support for robust futexes: the kernel cleans up held futexes at
- * thread exit time.
-diff --git a/kernel/Makefile b/kernel/Makefile
-index 4df609be4..3f6ab5d50 100644
---- a/kernel/Makefile
-+++ b/kernel/Makefile
-@@ -59,7 +59,7 @@ obj-$(CONFIG_FREEZER) += freezer.o
- obj-$(CONFIG_PROFILING) += profile.o
- obj-$(CONFIG_STACKTRACE) += stacktrace.o
- obj-y += time/
--obj-$(CONFIG_FUTEX) += futex.o
-+obj-$(CONFIG_FUTEX) += futex/
- obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
- obj-$(CONFIG_SMP) += smp.o
- ifneq ($(CONFIG_SMP),y)
-diff --git a/kernel/futex.c b/kernel/futex.c
-deleted file mode 100644
-index c15ad276f..000000000
---- a/kernel/futex.c
-+++ /dev/null
-@@ -1,4272 +0,0 @@
--// SPDX-License-Identifier: GPL-2.0-or-later
--/*
-- * Fast Userspace Mutexes (which I call "Futexes!").
-- * (C) Rusty Russell, IBM 2002
-- *
-- * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
-- * (C) Copyright 2003 Red Hat Inc, All Rights Reserved
-- *
-- * Removed page pinning, fix privately mapped COW pages and other cleanups
-- * (C) Copyright 2003, 2004 Jamie Lokier
-- *
-- * Robust futex support started by Ingo Molnar
-- * (C) Copyright 2006 Red Hat Inc, All Rights Reserved
-- * Thanks to Thomas Gleixner for suggestions, analysis and fixes.
-- *
-- * PI-futex support started by Ingo Molnar and Thomas Gleixner
-- * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
-- * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
-- *
-- * PRIVATE futexes by Eric Dumazet
-- * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
-- *
-- * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
-- * Copyright (C) IBM Corporation, 2009
-- * Thanks to Thomas Gleixner for conceptual design and careful reviews.
-- *
-- * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
-- * enough at me, Linus for the original (flawed) idea, Matthew
-- * Kirkwood for proof-of-concept implementation.
-- *
-- * "The futexes are also cursed."
-- * "But they come in a choice of three flavours!"
-- */
--#include <linux/compat.h>
--#include <linux/jhash.h>
--#include <linux/pagemap.h>
--#include <linux/syscalls.h>
--#include <linux/freezer.h>
--#include <linux/memblock.h>
--#include <linux/fault-inject.h>
--#include <linux/time_namespace.h>
--
--#include <asm/futex.h>
--
--#include "locking/rtmutex_common.h"
--
--/*
-- * READ this before attempting to hack on futexes!
-- *
-- * Basic futex operation and ordering guarantees
-- * =============================================
-- *
-- * The waiter reads the futex value in user space and calls
-- * futex_wait(). This function computes the hash bucket and acquires
-- * the hash bucket lock. After that it reads the futex user space value
-- * again and verifies that the data has not changed. If it has not changed
-- * it enqueues itself into the hash bucket, releases the hash bucket lock
-- * and schedules.
-- *
-- * The waker side modifies the user space value of the futex and calls
-- * futex_wake(). This function computes the hash bucket and acquires the
-- * hash bucket lock. Then it looks for waiters on that futex in the hash
-- * bucket and wakes them.
-- *
-- * In futex wake up scenarios where no tasks are blocked on a futex, taking
-- * the hb spinlock can be avoided and simply return. In order for this
-- * optimization to work, ordering guarantees must exist so that the waiter
-- * being added to the list is acknowledged when the list is concurrently being
-- * checked by the waker, avoiding scenarios like the following:
-- *
-- * CPU 0 CPU 1
-- * val = *futex;
-- * sys_futex(WAIT, futex, val);
-- * futex_wait(futex, val);
-- * uval = *futex;
-- * *futex = newval;
-- * sys_futex(WAKE, futex);
-- * futex_wake(futex);
-- * if (queue_empty())
-- * return;
-- * if (uval == val)
-- * lock(hash_bucket(futex));
-- * queue();
-- * unlock(hash_bucket(futex));
-- * schedule();
-- *
-- * This would cause the waiter on CPU 0 to wait forever because it
-- * missed the transition of the user space value from val to newval
-- * and the waker did not find the waiter in the hash bucket queue.
-- *
-- * The correct serialization ensures that a waiter either observes
-- * the changed user space value before blocking or is woken by a
-- * concurrent waker:
-- *
-- * CPU 0 CPU 1
-- * val = *futex;
-- * sys_futex(WAIT, futex, val);
-- * futex_wait(futex, val);
-- *
-- * waiters++; (a)
-- * smp_mb(); (A) <-- paired with -.
-- * |
-- * lock(hash_bucket(futex)); |
-- * |
-- * uval = *futex; |
-- * | *futex = newval;
-- * | sys_futex(WAKE, futex);
-- * | futex_wake(futex);
-- * |
-- * `--------> smp_mb(); (B)
-- * if (uval == val)
-- * queue();
-- * unlock(hash_bucket(futex));
-- * schedule(); if (waiters)
-- * lock(hash_bucket(futex));
-- * else wake_waiters(futex);
-- * waiters--; (b) unlock(hash_bucket(futex));
-- *
-- * Where (A) orders the waiters increment and the futex value read through
-- * atomic operations (see hb_waiters_inc) and where (B) orders the write
-- * to futex and the waiters read (see hb_waiters_pending()).
-- *
-- * This yields the following case (where X:=waiters, Y:=futex):
-- *
-- * X = Y = 0
-- *
-- * w[X]=1 w[Y]=1
-- * MB MB
-- * r[Y]=y r[X]=x
-- *
-- * Which guarantees that x==0 && y==0 is impossible; which translates back into
-- * the guarantee that we cannot both miss the futex variable change and the
-- * enqueue.
-- *
-- * Note that a new waiter is accounted for in (a) even when it is possible that
-- * the wait call can return error, in which case we backtrack from it in (b).
-- * Refer to the comment in queue_lock().
-- *
-- * Similarly, in order to account for waiters being requeued on another
-- * address we always increment the waiters for the destination bucket before
-- * acquiring the lock. It then decrements them again after releasing it -
-- * the code that actually moves the futex(es) between hash buckets (requeue_futex)
-- * will do the additional required waiter count housekeeping. This is done for
-- * double_lock_hb() and double_unlock_hb(), respectively.
-- */
--
--#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
--#define futex_cmpxchg_enabled 1
--#else
--static int __read_mostly futex_cmpxchg_enabled;
--#endif
--
--/*
-- * Futex flags used to encode options to functions and preserve them across
-- * restarts.
-- */
--#ifdef CONFIG_MMU
--# define FLAGS_SHARED 0x01
--#else
--/*
-- * NOMMU does not have per process address space. Let the compiler optimize
-- * code away.
-- */
--# define FLAGS_SHARED 0x00
--#endif
--#define FLAGS_CLOCKRT 0x02
--#define FLAGS_HAS_TIMEOUT 0x04
--
--/*
-- * Priority Inheritance state:
-- */
--struct futex_pi_state {
-- /*
-- * list of 'owned' pi_state instances - these have to be
-- * cleaned up in do_exit() if the task exits prematurely:
-- */
-- struct list_head list;
--
-- /*
-- * The PI object:
-- */
-- struct rt_mutex_base pi_mutex;
--
-- struct task_struct *owner;
-- refcount_t refcount;
--
-- union futex_key key;
--} __randomize_layout;
--
--/**
-- * struct futex_q - The hashed futex queue entry, one per waiting task
-- * @list: priority-sorted list of tasks waiting on this futex
-- * @task: the task waiting on the futex
-- * @lock_ptr: the hash bucket lock
-- * @key: the key the futex is hashed on
-- * @pi_state: optional priority inheritance state
-- * @rt_waiter: rt_waiter storage for use with requeue_pi
-- * @requeue_pi_key: the requeue_pi target futex key
-- * @bitset: bitset for the optional bitmasked wakeup
-- * @requeue_state: State field for futex_requeue_pi()
-- * @requeue_wait: RCU wait for futex_requeue_pi() (RT only)
-- *
-- * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
-- * we can wake only the relevant ones (hashed queues may be shared).
-- *
-- * A futex_q has a woken state, just like tasks have TASK_RUNNING.
-- * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
-- * The order of wakeup is always to make the first condition true, then
-- * the second.
-- *
-- * PI futexes are typically woken before they are removed from the hash list via
-- * the rt_mutex code. See unqueue_me_pi().
-- */
--struct futex_q {
-- struct plist_node list;
--
-- struct task_struct *task;
-- spinlock_t *lock_ptr;
-- union futex_key key;
-- struct futex_pi_state *pi_state;
-- struct rt_mutex_waiter *rt_waiter;
-- union futex_key *requeue_pi_key;
-- u32 bitset;
-- atomic_t requeue_state;
--#ifdef CONFIG_PREEMPT_RT
-- struct rcuwait requeue_wait;
--#endif
--} __randomize_layout;
--
--/*
-- * On PREEMPT_RT, the hash bucket lock is a 'sleeping' spinlock with an
-- * underlying rtmutex. The task which is about to be requeued could have
-- * just woken up (timeout, signal). After the wake up the task has to
-- * acquire hash bucket lock, which is held by the requeue code. As a task
-- * can only be blocked on _ONE_ rtmutex at a time, the proxy lock blocking
-- * and the hash bucket lock blocking would collide and corrupt state.
-- *
-- * On !PREEMPT_RT this is not a problem and everything could be serialized
-- * on hash bucket lock, but aside of having the benefit of common code,
-- * this allows to avoid doing the requeue when the task is already on the
-- * way out and taking the hash bucket lock of the original uaddr1 when the
-- * requeue has been completed.
-- *
-- * The following state transitions are valid:
-- *
-- * On the waiter side:
-- * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE
-- * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT
-- *
-- * On the requeue side:
-- * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS
-- * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED
-- * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed)
-- * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED
-- * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed)
-- *
-- * The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this
-- * signals that the waiter is already on the way out. It also means that
-- * the waiter is still on the 'wait' futex, i.e. uaddr1.
-- *
-- * The waiter side signals early wakeup to the requeue side either through
-- * setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending
-- * on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately
-- * proceed to take the hash bucket lock of uaddr1. If it set state to WAIT,
-- * which means the wakeup is interleaving with a requeue in progress it has
-- * to wait for the requeue side to change the state. Either to DONE/LOCKED
-- * or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex
-- * and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by
-- * the requeue side when the requeue attempt failed via deadlock detection
-- * and therefore the waiter q is still on the uaddr1 futex.
-- */
--enum {
-- Q_REQUEUE_PI_NONE = 0,
-- Q_REQUEUE_PI_IGNORE,
-- Q_REQUEUE_PI_IN_PROGRESS,
-- Q_REQUEUE_PI_WAIT,
-- Q_REQUEUE_PI_DONE,
-- Q_REQUEUE_PI_LOCKED,
--};
--
--static const struct futex_q futex_q_init = {
-- /* list gets initialized in queue_me()*/
-- .key = FUTEX_KEY_INIT,
-- .bitset = FUTEX_BITSET_MATCH_ANY,
-- .requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE),
--};
--
--/*
-- * Hash buckets are shared by all the futex_keys that hash to the same
-- * location. Each key may have multiple futex_q structures, one for each task
-- * waiting on a futex.
-- */
--struct futex_hash_bucket {
-- atomic_t waiters;
-- spinlock_t lock;
-- struct plist_head chain;
--} ____cacheline_aligned_in_smp;
--
--/*
-- * The base of the bucket array and its size are always used together
-- * (after initialization only in hash_futex()), so ensure that they
-- * reside in the same cacheline.
-- */
--static struct {
-- struct futex_hash_bucket *queues;
-- unsigned long hashsize;
--} __futex_data __read_mostly __aligned(2*sizeof(long));
--#define futex_queues (__futex_data.queues)
--#define futex_hashsize (__futex_data.hashsize)
--
--
--/*
-- * Fault injections for futexes.
-- */
--#ifdef CONFIG_FAIL_FUTEX
--
--static struct {
-- struct fault_attr attr;
--
-- bool ignore_private;
--} fail_futex = {
-- .attr = FAULT_ATTR_INITIALIZER,
-- .ignore_private = false,
--};
--
--static int __init setup_fail_futex(char *str)
--{
-- return setup_fault_attr(&fail_futex.attr, str);
--}
--__setup("fail_futex=", setup_fail_futex);
--
--static bool should_fail_futex(bool fshared)
--{
-- if (fail_futex.ignore_private && !fshared)
-- return false;
--
-- return should_fail(&fail_futex.attr, 1);
--}
--
--#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
--
--static int __init fail_futex_debugfs(void)
--{
-- umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
-- struct dentry *dir;
--
-- dir = fault_create_debugfs_attr("fail_futex", NULL,
-- &fail_futex.attr);
-- if (IS_ERR(dir))
-- return PTR_ERR(dir);
--
-- debugfs_create_bool("ignore-private", mode, dir,
-- &fail_futex.ignore_private);
-- return 0;
--}
--
--late_initcall(fail_futex_debugfs);
--
--#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
--
--#else
--static inline bool should_fail_futex(bool fshared)
--{
-- return false;
--}
--#endif /* CONFIG_FAIL_FUTEX */
--
--#ifdef CONFIG_COMPAT
--static void compat_exit_robust_list(struct task_struct *curr);
--#endif
--
--/*
-- * Reflects a new waiter being added to the waitqueue.
-- */
--static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
--{
--#ifdef CONFIG_SMP
-- atomic_inc(&hb->waiters);
-- /*
-- * Full barrier (A), see the ordering comment above.
-- */
-- smp_mb__after_atomic();
--#endif
--}
--
--/*
-- * Reflects a waiter being removed from the waitqueue by wakeup
-- * paths.
-- */
--static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
--{
--#ifdef CONFIG_SMP
-- atomic_dec(&hb->waiters);
--#endif
--}
--
--static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
--{
--#ifdef CONFIG_SMP
-- /*
-- * Full barrier (B), see the ordering comment above.
-- */
-- smp_mb();
-- return atomic_read(&hb->waiters);
--#else
-- return 1;
--#endif
--}
--
--/**
-- * hash_futex - Return the hash bucket in the global hash
-- * @key: Pointer to the futex key for which the hash is calculated
-- *
-- * We hash on the keys returned from get_futex_key (see below) and return the
-- * corresponding hash bucket in the global hash.
-- */
--static struct futex_hash_bucket *hash_futex(union futex_key *key)
--{
-- u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
-- key->both.offset);
--
-- return &futex_queues[hash & (futex_hashsize - 1)];
--}
--
--
--/**
-- * match_futex - Check whether two futex keys are equal
-- * @key1: Pointer to key1
-- * @key2: Pointer to key2
-- *
-- * Return 1 if two futex_keys are equal, 0 otherwise.
-- */
--static inline int match_futex(union futex_key *key1, union futex_key *key2)
--{
-- return (key1 && key2
-- && key1->both.word == key2->both.word
-- && key1->both.ptr == key2->both.ptr
-- && key1->both.offset == key2->both.offset);
--}
--
--enum futex_access {
-- FUTEX_READ,
-- FUTEX_WRITE
--};
--
--/**
-- * futex_setup_timer - set up the sleeping hrtimer.
-- * @time: ptr to the given timeout value
-- * @timeout: the hrtimer_sleeper structure to be set up
-- * @flags: futex flags
-- * @range_ns: optional range in ns
-- *
-- * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
-- * value given
-- */
--static inline struct hrtimer_sleeper *
--futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
-- int flags, u64 range_ns)
--{
-- if (!time)
-- return NULL;
--
-- hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
-- CLOCK_REALTIME : CLOCK_MONOTONIC,
-- HRTIMER_MODE_ABS);
-- /*
-- * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
-- * effectively the same as calling hrtimer_set_expires().
-- */
-- hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
--
-- return timeout;
--}
--
--/*
-- * Generate a machine wide unique identifier for this inode.
-- *
-- * This relies on u64 not wrapping in the life-time of the machine; which with
-- * 1ns resolution means almost 585 years.
-- *
-- * This further relies on the fact that a well formed program will not unmap
-- * the file while it has a (shared) futex waiting on it. This mapping will have
-- * a file reference which pins the mount and inode.
-- *
-- * If for some reason an inode gets evicted and read back in again, it will get
-- * a new sequence number and will _NOT_ match, even though it is the exact same
-- * file.
-- *
-- * It is important that match_futex() will never have a false-positive, esp.
-- * for PI futexes that can mess up the state. The above argues that false-negatives
-- * are only possible for malformed programs.
-- */
--static u64 get_inode_sequence_number(struct inode *inode)
--{
-- static atomic64_t i_seq;
-- u64 old;
--
-- /* Does the inode already have a sequence number? */
-- old = atomic64_read(&inode->i_sequence);
-- if (likely(old))
-- return old;
--
-- for (;;) {
-- u64 new = atomic64_add_return(1, &i_seq);
-- if (WARN_ON_ONCE(!new))
-- continue;
--
-- old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
-- if (old)
-- return old;
-- return new;
-- }
--}
--
--/**
-- * get_futex_key() - Get parameters which are the keys for a futex
-- * @uaddr: virtual address of the futex
-- * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
-- * @key: address where result is stored.
-- * @rw: mapping needs to be read/write (values: FUTEX_READ,
-- * FUTEX_WRITE)
-- *
-- * Return: a negative error code or 0
-- *
-- * The key words are stored in @key on success.
-- *
-- * For shared mappings (when @fshared), the key is:
-- *
-- * ( inode->i_sequence, page->index, offset_within_page )
-- *
-- * [ also see get_inode_sequence_number() ]
-- *
-- * For private mappings (or when !@fshared), the key is:
-- *
-- * ( current->mm, address, 0 )
-- *
-- * This allows (cross process, where applicable) identification of the futex
-- * without keeping the page pinned for the duration of the FUTEX_WAIT.
-- *
-- * lock_page() might sleep, the caller should not hold a spinlock.
-- */
--static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
-- enum futex_access rw)
--{
-- unsigned long address = (unsigned long)uaddr;
-- struct mm_struct *mm = current->mm;
-- struct page *page, *tail;
-- struct address_space *mapping;
-- int err, ro = 0;
--
-- /*
-- * The futex address must be "naturally" aligned.
-- */
-- key->both.offset = address % PAGE_SIZE;
-- if (unlikely((address % sizeof(u32)) != 0))
-- return -EINVAL;
-- address -= key->both.offset;
--
-- if (unlikely(!access_ok(uaddr, sizeof(u32))))
-- return -EFAULT;
--
-- if (unlikely(should_fail_futex(fshared)))
-- return -EFAULT;
--
-- /*
-- * PROCESS_PRIVATE futexes are fast.
-- * As the mm cannot disappear under us and the 'key' only needs
-- * virtual address, we dont even have to find the underlying vma.
-- * Note : We do have to check 'uaddr' is a valid user address,
-- * but access_ok() should be faster than find_vma()
-- */
-- if (!fshared) {
-- key->private.mm = mm;
-- key->private.address = address;
-- return 0;
-- }
--
--again:
-- /* Ignore any VERIFY_READ mapping (futex common case) */
-- if (unlikely(should_fail_futex(true)))
-- return -EFAULT;
--
-- err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
-- /*
-- * If write access is not required (eg. FUTEX_WAIT), try
-- * and get read-only access.
-- */
-- if (err == -EFAULT && rw == FUTEX_READ) {
-- err = get_user_pages_fast(address, 1, 0, &page);
-- ro = 1;
-- }
-- if (err < 0)
-- return err;
-- else
-- err = 0;
--
-- /*
-- * The treatment of mapping from this point on is critical. The page
-- * lock protects many things but in this context the page lock
-- * stabilizes mapping, prevents inode freeing in the shared
-- * file-backed region case and guards against movement to swap cache.
-- *
-- * Strictly speaking the page lock is not needed in all cases being
-- * considered here and page lock forces unnecessarily serialization
-- * From this point on, mapping will be re-verified if necessary and
-- * page lock will be acquired only if it is unavoidable
-- *
-- * Mapping checks require the head page for any compound page so the
-- * head page and mapping is looked up now. For anonymous pages, it
-- * does not matter if the page splits in the future as the key is
-- * based on the address. For filesystem-backed pages, the tail is
-- * required as the index of the page determines the key. For
-- * base pages, there is no tail page and tail == page.
-- */
-- tail = page;
-- page = compound_head(page);
-- mapping = READ_ONCE(page->mapping);
--
-- /*
-- * If page->mapping is NULL, then it cannot be a PageAnon
-- * page; but it might be the ZERO_PAGE or in the gate area or
-- * in a special mapping (all cases which we are happy to fail);
-- * or it may have been a good file page when get_user_pages_fast
-- * found it, but truncated or holepunched or subjected to
-- * invalidate_complete_page2 before we got the page lock (also
-- * cases which we are happy to fail). And we hold a reference,
-- * so refcount care in invalidate_complete_page's remove_mapping
-- * prevents drop_caches from setting mapping to NULL beneath us.
-- *
-- * The case we do have to guard against is when memory pressure made
-- * shmem_writepage move it from filecache to swapcache beneath us:
-- * an unlikely race, but we do need to retry for page->mapping.
-- */
-- if (unlikely(!mapping)) {
-- int shmem_swizzled;
--
-- /*
-- * Page lock is required to identify which special case above
-- * applies. If this is really a shmem page then the page lock
-- * will prevent unexpected transitions.
-- */
-- lock_page(page);
-- shmem_swizzled = PageSwapCache(page) || page->mapping;
-- unlock_page(page);
-- put_page(page);
--
-- if (shmem_swizzled)
-- goto again;
--
-- return -EFAULT;
-- }
--
-- /*
-- * Private mappings are handled in a simple way.
-- *
-- * If the futex key is stored on an anonymous page, then the associated
-- * object is the mm which is implicitly pinned by the calling process.
-- *
-- * NOTE: When userspace waits on a MAP_SHARED mapping, even if
-- * it's a read-only handle, it's expected that futexes attach to
-- * the object not the particular process.
-- */
-- if (PageAnon(page)) {
-- /*
-- * A RO anonymous page will never change and thus doesn't make
-- * sense for futex operations.
-- */
-- if (unlikely(should_fail_futex(true)) || ro) {
-- err = -EFAULT;
-- goto out;
-- }
--
-- key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
-- key->private.mm = mm;
-- key->private.address = address;
--
-- } else {
-- struct inode *inode;
--
-- /*
-- * The associated futex object in this case is the inode and
-- * the page->mapping must be traversed. Ordinarily this should
-- * be stabilised under page lock but it's not strictly
-- * necessary in this case as we just want to pin the inode, not
-- * update the radix tree or anything like that.
-- *
-- * The RCU read lock is taken as the inode is finally freed
-- * under RCU. If the mapping still matches expectations then the
-- * mapping->host can be safely accessed as being a valid inode.
-- */
-- rcu_read_lock();
--
-- if (READ_ONCE(page->mapping) != mapping) {
-- rcu_read_unlock();
-- put_page(page);
--
-- goto again;
-- }
--
-- inode = READ_ONCE(mapping->host);
-- if (!inode) {
-- rcu_read_unlock();
-- put_page(page);
--
-- goto again;
-- }
--
-- key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-- key->shared.i_seq = get_inode_sequence_number(inode);
-- key->shared.pgoff = page_to_pgoff(tail);
-- rcu_read_unlock();
-- }
--
--out:
-- put_page(page);
-- return err;
--}
--
--/**
-- * fault_in_user_writeable() - Fault in user address and verify RW access
-- * @uaddr: pointer to faulting user space address
-- *
-- * Slow path to fixup the fault we just took in the atomic write
-- * access to @uaddr.
-- *
-- * We have no generic implementation of a non-destructive write to the
-- * user address. We know that we faulted in the atomic pagefault
-- * disabled section so we can as well avoid the #PF overhead by
-- * calling get_user_pages() right away.
-- */
--static int fault_in_user_writeable(u32 __user *uaddr)
--{
-- struct mm_struct *mm = current->mm;
-- int ret;
--
-- mmap_read_lock(mm);
-- ret = fixup_user_fault(mm, (unsigned long)uaddr,
-- FAULT_FLAG_WRITE, NULL);
-- mmap_read_unlock(mm);
--
-- return ret < 0 ? ret : 0;
--}
--
--/**
-- * futex_top_waiter() - Return the highest priority waiter on a futex
-- * @hb: the hash bucket the futex_q's reside in
-- * @key: the futex key (to distinguish it from other futex futex_q's)
-- *
-- * Must be called with the hb lock held.
-- */
--static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
-- union futex_key *key)
--{
-- struct futex_q *this;
--
-- plist_for_each_entry(this, &hb->chain, list) {
-- if (match_futex(&this->key, key))
-- return this;
-- }
-- return NULL;
--}
--
--static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
-- u32 uval, u32 newval)
--{
-- int ret;
--
-- pagefault_disable();
-- ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
-- pagefault_enable();
--
-- return ret;
--}
--
--static int get_futex_value_locked(u32 *dest, u32 __user *from)
--{
-- int ret;
--
-- pagefault_disable();
-- ret = __get_user(*dest, from);
-- pagefault_enable();
--
-- return ret ? -EFAULT : 0;
--}
--
--
--/*
-- * PI code:
-- */
--static int refill_pi_state_cache(void)
--{
-- struct futex_pi_state *pi_state;
--
-- if (likely(current->pi_state_cache))
-- return 0;
--
-- pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
--
-- if (!pi_state)
-- return -ENOMEM;
--
-- INIT_LIST_HEAD(&pi_state->list);
-- /* pi_mutex gets initialized later */
-- pi_state->owner = NULL;
-- refcount_set(&pi_state->refcount, 1);
-- pi_state->key = FUTEX_KEY_INIT;
--
-- current->pi_state_cache = pi_state;
--
-- return 0;
--}
--
--static struct futex_pi_state *alloc_pi_state(void)
--{
-- struct futex_pi_state *pi_state = current->pi_state_cache;
--
-- WARN_ON(!pi_state);
-- current->pi_state_cache = NULL;
--
-- return pi_state;
--}
--
--static void pi_state_update_owner(struct futex_pi_state *pi_state,
-- struct task_struct *new_owner)
--{
-- struct task_struct *old_owner = pi_state->owner;
--
-- lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
--
-- if (old_owner) {
-- raw_spin_lock(&old_owner->pi_lock);
-- WARN_ON(list_empty(&pi_state->list));
-- list_del_init(&pi_state->list);
-- raw_spin_unlock(&old_owner->pi_lock);
-- }
--
-- if (new_owner) {
-- raw_spin_lock(&new_owner->pi_lock);
-- WARN_ON(!list_empty(&pi_state->list));
-- list_add(&pi_state->list, &new_owner->pi_state_list);
-- pi_state->owner = new_owner;
-- raw_spin_unlock(&new_owner->pi_lock);
-- }
--}
--
--static void get_pi_state(struct futex_pi_state *pi_state)
--{
-- WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
--}
--
--/*
-- * Drops a reference to the pi_state object and frees or caches it
-- * when the last reference is gone.
-- */
--static void put_pi_state(struct futex_pi_state *pi_state)
--{
-- if (!pi_state)
-- return;
--
-- if (!refcount_dec_and_test(&pi_state->refcount))
-- return;
--
-- /*
-- * If pi_state->owner is NULL, the owner is most probably dying
-- * and has cleaned up the pi_state already
-- */
-- if (pi_state->owner) {
-- unsigned long flags;
--
-- raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
-- pi_state_update_owner(pi_state, NULL);
-- rt_mutex_proxy_unlock(&pi_state->pi_mutex);
-- raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
-- }
--
-- if (current->pi_state_cache) {
-- kfree(pi_state);
-- } else {
-- /*
-- * pi_state->list is already empty.
-- * clear pi_state->owner.
-- * refcount is at 0 - put it back to 1.
-- */
-- pi_state->owner = NULL;
-- refcount_set(&pi_state->refcount, 1);
-- current->pi_state_cache = pi_state;
-- }
--}
--
--#ifdef CONFIG_FUTEX_PI
--
--/*
-- * This task is holding PI mutexes at exit time => bad.
-- * Kernel cleans up PI-state, but userspace is likely hosed.
-- * (Robust-futex cleanup is separate and might save the day for userspace.)
-- */
--static void exit_pi_state_list(struct task_struct *curr)
--{
-- struct list_head *next, *head = &curr->pi_state_list;
-- struct futex_pi_state *pi_state;
-- struct futex_hash_bucket *hb;
-- union futex_key key = FUTEX_KEY_INIT;
--
-- if (!futex_cmpxchg_enabled)
-- return;
-- /*
-- * We are a ZOMBIE and nobody can enqueue itself on
-- * pi_state_list anymore, but we have to be careful
-- * versus waiters unqueueing themselves:
-- */
-- raw_spin_lock_irq(&curr->pi_lock);
-- while (!list_empty(head)) {
-- next = head->next;
-- pi_state = list_entry(next, struct futex_pi_state, list);
-- key = pi_state->key;
-- hb = hash_futex(&key);
--
-- /*
-- * We can race against put_pi_state() removing itself from the
-- * list (a waiter going away). put_pi_state() will first
-- * decrement the reference count and then modify the list, so
-- * its possible to see the list entry but fail this reference
-- * acquire.
-- *
-- * In that case; drop the locks to let put_pi_state() make
-- * progress and retry the loop.
-- */
-- if (!refcount_inc_not_zero(&pi_state->refcount)) {
-- raw_spin_unlock_irq(&curr->pi_lock);
-- cpu_relax();
-- raw_spin_lock_irq(&curr->pi_lock);
-- continue;
-- }
-- raw_spin_unlock_irq(&curr->pi_lock);
--
-- spin_lock(&hb->lock);
-- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-- raw_spin_lock(&curr->pi_lock);
-- /*
-- * We dropped the pi-lock, so re-check whether this
-- * task still owns the PI-state:
-- */
-- if (head->next != next) {
-- /* retain curr->pi_lock for the loop invariant */
-- raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
-- spin_unlock(&hb->lock);
-- put_pi_state(pi_state);
-- continue;
-- }
--
-- WARN_ON(pi_state->owner != curr);
-- WARN_ON(list_empty(&pi_state->list));
-- list_del_init(&pi_state->list);
-- pi_state->owner = NULL;
--
-- raw_spin_unlock(&curr->pi_lock);
-- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-- spin_unlock(&hb->lock);
--
-- rt_mutex_futex_unlock(&pi_state->pi_mutex);
-- put_pi_state(pi_state);
--
-- raw_spin_lock_irq(&curr->pi_lock);
-- }
-- raw_spin_unlock_irq(&curr->pi_lock);
--}
--#else
--static inline void exit_pi_state_list(struct task_struct *curr) { }
--#endif
--
--/*
-- * We need to check the following states:
-- *
-- * Waiter | pi_state | pi->owner | uTID | uODIED | ?
-- *
-- * [1] NULL | --- | --- | 0 | 0/1 | Valid
-- * [2] NULL | --- | --- | >0 | 0/1 | Valid
-- *
-- * [3] Found | NULL | -- | Any | 0/1 | Invalid
-- *
-- * [4] Found | Found | NULL | 0 | 1 | Valid
-- * [5] Found | Found | NULL | >0 | 1 | Invalid
-- *
-- * [6] Found | Found | task | 0 | 1 | Valid
-- *
-- * [7] Found | Found | NULL | Any | 0 | Invalid
-- *
-- * [8] Found | Found | task | ==taskTID | 0/1 | Valid
-- * [9] Found | Found | task | 0 | 0 | Invalid
-- * [10] Found | Found | task | !=taskTID | 0/1 | Invalid
-- *
-- * [1] Indicates that the kernel can acquire the futex atomically. We
-- * came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
-- *
-- * [2] Valid, if TID does not belong to a kernel thread. If no matching
-- * thread is found then it indicates that the owner TID has died.
-- *
-- * [3] Invalid. The waiter is queued on a non PI futex
-- *
-- * [4] Valid state after exit_robust_list(), which sets the user space
-- * value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
-- *
-- * [5] The user space value got manipulated between exit_robust_list()
-- * and exit_pi_state_list()
-- *
-- * [6] Valid state after exit_pi_state_list() which sets the new owner in
-- * the pi_state but cannot access the user space value.
-- *
-- * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
-- *
-- * [8] Owner and user space value match
-- *
-- * [9] There is no transient state which sets the user space TID to 0
-- * except exit_robust_list(), but this is indicated by the
-- * FUTEX_OWNER_DIED bit. See [4]
-- *
-- * [10] There is no transient state which leaves owner and user space
-- * TID out of sync. Except one error case where the kernel is denied
-- * write access to the user address, see fixup_pi_state_owner().
-- *
-- *
-- * Serialization and lifetime rules:
-- *
-- * hb->lock:
-- *
-- * hb -> futex_q, relation
-- * futex_q -> pi_state, relation
-- *
-- * (cannot be raw because hb can contain arbitrary amount
-- * of futex_q's)
-- *
-- * pi_mutex->wait_lock:
-- *
-- * {uval, pi_state}
-- *
-- * (and pi_mutex 'obviously')
-- *
-- * p->pi_lock:
-- *
-- * p->pi_state_list -> pi_state->list, relation
-- * pi_mutex->owner -> pi_state->owner, relation
-- *
-- * pi_state->refcount:
-- *
-- * pi_state lifetime
-- *
-- *
-- * Lock order:
-- *
-- * hb->lock
-- * pi_mutex->wait_lock
-- * p->pi_lock
-- *
-- */
--
--/*
-- * Validate that the existing waiter has a pi_state and sanity check
-- * the pi_state against the user space value. If correct, attach to
-- * it.
-- */
--static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
-- struct futex_pi_state *pi_state,
-- struct futex_pi_state **ps)
--{
-- pid_t pid = uval & FUTEX_TID_MASK;
-- u32 uval2;
-- int ret;
--
-- /*
-- * Userspace might have messed up non-PI and PI futexes [3]
-- */
-- if (unlikely(!pi_state))
-- return -EINVAL;
--
-- /*
-- * We get here with hb->lock held, and having found a
-- * futex_top_waiter(). This means that futex_lock_pi() of said futex_q
-- * has dropped the hb->lock in between queue_me() and unqueue_me_pi(),
-- * which in turn means that futex_lock_pi() still has a reference on
-- * our pi_state.
-- *
-- * The waiter holding a reference on @pi_state also protects against
-- * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
-- * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
-- * free pi_state before we can take a reference ourselves.
-- */
-- WARN_ON(!refcount_read(&pi_state->refcount));
--
-- /*
-- * Now that we have a pi_state, we can acquire wait_lock
-- * and do the state validation.
-- */
-- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
--
-- /*
-- * Since {uval, pi_state} is serialized by wait_lock, and our current
-- * uval was read without holding it, it can have changed. Verify it
-- * still is what we expect it to be, otherwise retry the entire
-- * operation.
-- */
-- if (get_futex_value_locked(&uval2, uaddr))
-- goto out_efault;
--
-- if (uval != uval2)
-- goto out_eagain;
--
-- /*
-- * Handle the owner died case:
-- */
-- if (uval & FUTEX_OWNER_DIED) {
-- /*
-- * exit_pi_state_list sets owner to NULL and wakes the
-- * topmost waiter. The task which acquires the
-- * pi_state->rt_mutex will fixup owner.
-- */
-- if (!pi_state->owner) {
-- /*
-- * No pi state owner, but the user space TID
-- * is not 0. Inconsistent state. [5]
-- */
-- if (pid)
-- goto out_einval;
-- /*
-- * Take a ref on the state and return success. [4]
-- */
-- goto out_attach;
-- }
--
-- /*
-- * If TID is 0, then either the dying owner has not
-- * yet executed exit_pi_state_list() or some waiter
-- * acquired the rtmutex in the pi state, but did not
-- * yet fixup the TID in user space.
-- *
-- * Take a ref on the state and return success. [6]
-- */
-- if (!pid)
-- goto out_attach;
-- } else {
-- /*
-- * If the owner died bit is not set, then the pi_state
-- * must have an owner. [7]
-- */
-- if (!pi_state->owner)
-- goto out_einval;
-- }
--
-- /*
-- * Bail out if user space manipulated the futex value. If pi
-- * state exists then the owner TID must be the same as the
-- * user space TID. [9/10]
-- */
-- if (pid != task_pid_vnr(pi_state->owner))
-- goto out_einval;
--
--out_attach:
-- get_pi_state(pi_state);
-- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-- *ps = pi_state;
-- return 0;
--
--out_einval:
-- ret = -EINVAL;
-- goto out_error;
--
--out_eagain:
-- ret = -EAGAIN;
-- goto out_error;
--
--out_efault:
-- ret = -EFAULT;
-- goto out_error;
--
--out_error:
-- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-- return ret;
--}
--
--/**
-- * wait_for_owner_exiting - Block until the owner has exited
-- * @ret: owner's current futex lock status
-- * @exiting: Pointer to the exiting task
-- *
-- * Caller must hold a refcount on @exiting.
-- */
--static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
--{
-- if (ret != -EBUSY) {
-- WARN_ON_ONCE(exiting);
-- return;
-- }
--
-- if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
-- return;
--
-- mutex_lock(&exiting->futex_exit_mutex);
-- /*
-- * No point in doing state checking here. If the waiter got here
-- * while the task was in exec()->exec_futex_release() then it can
-- * have any FUTEX_STATE_* value when the waiter has acquired the
-- * mutex. OK, if running, EXITING or DEAD if it reached exit()
-- * already. Highly unlikely and not a problem. Just one more round
-- * through the futex maze.
-- */
-- mutex_unlock(&exiting->futex_exit_mutex);
--
-- put_task_struct(exiting);
--}
--
--static int handle_exit_race(u32 __user *uaddr, u32 uval,
-- struct task_struct *tsk)
--{
-- u32 uval2;
--
-- /*
-- * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
-- * caller that the alleged owner is busy.
-- */
-- if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
-- return -EBUSY;
--
-- /*
-- * Reread the user space value to handle the following situation:
-- *
-- * CPU0 CPU1
-- *
-- * sys_exit() sys_futex()
-- * do_exit() futex_lock_pi()
-- * futex_lock_pi_atomic()
-- * exit_signals(tsk) No waiters:
-- * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID
-- * mm_release(tsk) Set waiter bit
-- * exit_robust_list(tsk) { *uaddr = 0x80000PID;
-- * Set owner died attach_to_pi_owner() {
-- * *uaddr = 0xC0000000; tsk = get_task(PID);
-- * } if (!tsk->flags & PF_EXITING) {
-- * ... attach();
-- * tsk->futex_state = } else {
-- * FUTEX_STATE_DEAD; if (tsk->futex_state !=
-- * FUTEX_STATE_DEAD)
-- * return -EAGAIN;
-- * return -ESRCH; <--- FAIL
-- * }
-- *
-- * Returning ESRCH unconditionally is wrong here because the
-- * user space value has been changed by the exiting task.
-- *
-- * The same logic applies to the case where the exiting task is
-- * already gone.
-- */
-- if (get_futex_value_locked(&uval2, uaddr))
-- return -EFAULT;
--
-- /* If the user space value has changed, try again. */
-- if (uval2 != uval)
-- return -EAGAIN;
--
-- /*
-- * The exiting task did not have a robust list, the robust list was
-- * corrupted or the user space value in *uaddr is simply bogus.
-- * Give up and tell user space.
-- */
-- return -ESRCH;
--}
--
--static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
-- struct futex_pi_state **ps)
--{
-- /*
-- * No existing pi state. First waiter. [2]
-- *
-- * This creates pi_state, we have hb->lock held, this means nothing can
-- * observe this state, wait_lock is irrelevant.
-- */
-- struct futex_pi_state *pi_state = alloc_pi_state();
--
-- /*
-- * Initialize the pi_mutex in locked state and make @p
-- * the owner of it:
-- */
-- rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
--
-- /* Store the key for possible exit cleanups: */
-- pi_state->key = *key;
--
-- WARN_ON(!list_empty(&pi_state->list));
-- list_add(&pi_state->list, &p->pi_state_list);
-- /*
-- * Assignment without holding pi_state->pi_mutex.wait_lock is safe
-- * because there is no concurrency as the object is not published yet.
-- */
-- pi_state->owner = p;
--
-- *ps = pi_state;
--}
--/*
-- * Lookup the task for the TID provided from user space and attach to
-- * it after doing proper sanity checks.
-- */
--static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
-- struct futex_pi_state **ps,
-- struct task_struct **exiting)
--{
-- pid_t pid = uval & FUTEX_TID_MASK;
-- struct task_struct *p;
--
-- /*
-- * We are the first waiter - try to look up the real owner and attach
-- * the new pi_state to it, but bail out when TID = 0 [1]
-- *
-- * The !pid check is paranoid. None of the call sites should end up
-- * with pid == 0, but better safe than sorry. Let the caller retry
-- */
-- if (!pid)
-- return -EAGAIN;
-- p = find_get_task_by_vpid(pid);
-- if (!p)
-- return handle_exit_race(uaddr, uval, NULL);
--
-- if (unlikely(p->flags & PF_KTHREAD)) {
-- put_task_struct(p);
-- return -EPERM;
-- }
--
-- /*
-- * We need to look at the task state to figure out, whether the
-- * task is exiting. To protect against the change of the task state
-- * in futex_exit_release(), we do this protected by p->pi_lock:
-- */
-- raw_spin_lock_irq(&p->pi_lock);
-- if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
-- /*
-- * The task is on the way out. When the futex state is
-- * FUTEX_STATE_DEAD, we know that the task has finished
-- * the cleanup:
-- */
-- int ret = handle_exit_race(uaddr, uval, p);
--
-- raw_spin_unlock_irq(&p->pi_lock);
-- /*
-- * If the owner task is between FUTEX_STATE_EXITING and
-- * FUTEX_STATE_DEAD then store the task pointer and keep
-- * the reference on the task struct. The calling code will
-- * drop all locks, wait for the task to reach
-- * FUTEX_STATE_DEAD and then drop the refcount. This is
-- * required to prevent a live lock when the current task
-- * preempted the exiting task between the two states.
-- */
-- if (ret == -EBUSY)
-- *exiting = p;
-- else
-- put_task_struct(p);
-- return ret;
-- }
--
-- __attach_to_pi_owner(p, key, ps);
-- raw_spin_unlock_irq(&p->pi_lock);
--
-- put_task_struct(p);
--
-- return 0;
--}
--
--static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
--{
-- int err;
-- u32 curval;
--
-- if (unlikely(should_fail_futex(true)))
-- return -EFAULT;
--
-- err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
-- if (unlikely(err))
-- return err;
--
-- /* If user space value changed, let the caller retry */
-- return curval != uval ? -EAGAIN : 0;
--}
--
--/**
-- * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
-- * @uaddr: the pi futex user address
-- * @hb: the pi futex hash bucket
-- * @key: the futex key associated with uaddr and hb
-- * @ps: the pi_state pointer where we store the result of the
-- * lookup
-- * @task: the task to perform the atomic lock work for. This will
-- * be "current" except in the case of requeue pi.
-- * @exiting: Pointer to store the task pointer of the owner task
-- * which is in the middle of exiting
-- * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
-- *
-- * Return:
-- * - 0 - ready to wait;
-- * - 1 - acquired the lock;
-- * - <0 - error
-- *
-- * The hb->lock must be held by the caller.
-- *
-- * @exiting is only set when the return value is -EBUSY. If so, this holds
-- * a refcount on the exiting task on return and the caller needs to drop it
-- * after waiting for the exit to complete.
-- */
--static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
-- union futex_key *key,
-- struct futex_pi_state **ps,
-- struct task_struct *task,
-- struct task_struct **exiting,
-- int set_waiters)
--{
-- u32 uval, newval, vpid = task_pid_vnr(task);
-- struct futex_q *top_waiter;
-- int ret;
--
-- /*
-- * Read the user space value first so we can validate a few
-- * things before proceeding further.
-- */
-- if (get_futex_value_locked(&uval, uaddr))
-- return -EFAULT;
--
-- if (unlikely(should_fail_futex(true)))
-- return -EFAULT;
--
-- /*
-- * Detect deadlocks.
-- */
-- if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
-- return -EDEADLK;
--
-- if ((unlikely(should_fail_futex(true))))
-- return -EDEADLK;
--
-- /*
-- * Lookup existing state first. If it exists, try to attach to
-- * its pi_state.
-- */
-- top_waiter = futex_top_waiter(hb, key);
-- if (top_waiter)
-- return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
--
-- /*
-- * No waiter and user TID is 0. We are here because the
-- * waiters or the owner died bit is set or called from
-- * requeue_cmp_pi or for whatever reason something took the
-- * syscall.
-- */
-- if (!(uval & FUTEX_TID_MASK)) {
-- /*
-- * We take over the futex. No other waiters and the user space
-- * TID is 0. We preserve the owner died bit.
-- */
-- newval = uval & FUTEX_OWNER_DIED;
-- newval |= vpid;
--
-- /* The futex requeue_pi code can enforce the waiters bit */
-- if (set_waiters)
-- newval |= FUTEX_WAITERS;
--
-- ret = lock_pi_update_atomic(uaddr, uval, newval);
-- if (ret)
-- return ret;
--
-- /*
-- * If the waiter bit was requested the caller also needs PI
-- * state attached to the new owner of the user space futex.
-- *
-- * @task is guaranteed to be alive and it cannot be exiting
-- * because it is either sleeping or waiting in
-- * futex_requeue_pi_wakeup_sync().
-- *
-- * No need to do the full attach_to_pi_owner() exercise
-- * because @task is known and valid.
-- */
-- if (set_waiters) {
-- raw_spin_lock_irq(&task->pi_lock);
-- __attach_to_pi_owner(task, key, ps);
-- raw_spin_unlock_irq(&task->pi_lock);
-- }
-- return 1;
-- }
--
-- /*
-- * First waiter. Set the waiters bit before attaching ourself to
-- * the owner. If owner tries to unlock, it will be forced into
-- * the kernel and blocked on hb->lock.
-- */
-- newval = uval | FUTEX_WAITERS;
-- ret = lock_pi_update_atomic(uaddr, uval, newval);
-- if (ret)
-- return ret;
-- /*
-- * If the update of the user space value succeeded, we try to
-- * attach to the owner. If that fails, no harm done, we only
-- * set the FUTEX_WAITERS bit in the user space variable.
-- */
-- return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
--}
--
--/**
-- * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
-- * @q: The futex_q to unqueue
-- *
-- * The q->lock_ptr must not be NULL and must be held by the caller.
-- */
--static void __unqueue_futex(struct futex_q *q)
--{
-- struct futex_hash_bucket *hb;
--
-- if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
-- return;
-- lockdep_assert_held(q->lock_ptr);
--
-- hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
-- plist_del(&q->list, &hb->chain);
-- hb_waiters_dec(hb);
--}
--
--/*
-- * The hash bucket lock must be held when this is called.
-- * Afterwards, the futex_q must not be accessed. Callers
-- * must ensure to later call wake_up_q() for the actual
-- * wakeups to occur.
-- */
--static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
--{
-- struct task_struct *p = q->task;
--
-- if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
-- return;
--
-- get_task_struct(p);
-- __unqueue_futex(q);
-- /*
-- * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
-- * is written, without taking any locks. This is possible in the event
-- * of a spurious wakeup, for example. A memory barrier is required here
-- * to prevent the following store to lock_ptr from getting ahead of the
-- * plist_del in __unqueue_futex().
-- */
-- smp_store_release(&q->lock_ptr, NULL);
--
-- /*
-- * Queue the task for later wakeup for after we've released
-- * the hb->lock.
-- */
-- wake_q_add_safe(wake_q, p);
--}
--
--/*
-- * Caller must hold a reference on @pi_state.
-- */
--static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
--{
-- struct rt_mutex_waiter *top_waiter;
-- struct task_struct *new_owner;
-- bool postunlock = false;
-- DEFINE_RT_WAKE_Q(wqh);
-- u32 curval, newval;
-- int ret = 0;
--
-- top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
-- if (WARN_ON_ONCE(!top_waiter)) {
-- /*
-- * As per the comment in futex_unlock_pi() this should not happen.
-- *
-- * When this happens, give up our locks and try again, giving
-- * the futex_lock_pi() instance time to complete, either by
-- * waiting on the rtmutex or removing itself from the futex
-- * queue.
-- */
-- ret = -EAGAIN;
-- goto out_unlock;
-- }
--
-- new_owner = top_waiter->task;
--
-- /*
-- * We pass it to the next owner. The WAITERS bit is always kept
-- * enabled while there is PI state around. We cleanup the owner
-- * died bit, because we are the owner.
-- */
-- newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
--
-- if (unlikely(should_fail_futex(true))) {
-- ret = -EFAULT;
-- goto out_unlock;
-- }
--
-- ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
-- if (!ret && (curval != uval)) {
-- /*
-- * If a unconditional UNLOCK_PI operation (user space did not
-- * try the TID->0 transition) raced with a waiter setting the
-- * FUTEX_WAITERS flag between get_user() and locking the hash
-- * bucket lock, retry the operation.
-- */
-- if ((FUTEX_TID_MASK & curval) == uval)
-- ret = -EAGAIN;
-- else
-- ret = -EINVAL;
-- }
--
-- if (!ret) {
-- /*
-- * This is a point of no return; once we modified the uval
-- * there is no going back and subsequent operations must
-- * not fail.
-- */
-- pi_state_update_owner(pi_state, new_owner);
-- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh);
-- }
--
--out_unlock:
-- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
--
-- if (postunlock)
-- rt_mutex_postunlock(&wqh);
--
-- return ret;
--}
--
--/*
-- * Express the locking dependencies for lockdep:
-- */
--static inline void
--double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
--{
-- if (hb1 <= hb2) {
-- spin_lock(&hb1->lock);
-- if (hb1 < hb2)
-- spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
-- } else { /* hb1 > hb2 */
-- spin_lock(&hb2->lock);
-- spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
-- }
--}
--
--static inline void
--double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
--{
-- spin_unlock(&hb1->lock);
-- if (hb1 != hb2)
-- spin_unlock(&hb2->lock);
--}
--
--/*
-- * Wake up waiters matching bitset queued on this futex (uaddr).
-- */
--static int
--futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
--{
-- struct futex_hash_bucket *hb;
-- struct futex_q *this, *next;
-- union futex_key key = FUTEX_KEY_INIT;
-- int ret;
-- DEFINE_WAKE_Q(wake_q);
--
-- if (!bitset)
-- return -EINVAL;
--
-- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
-- if (unlikely(ret != 0))
-- return ret;
--
-- hb = hash_futex(&key);
--
-- /* Make sure we really have tasks to wakeup */
-- if (!hb_waiters_pending(hb))
-- return ret;
--
-- spin_lock(&hb->lock);
--
-- plist_for_each_entry_safe(this, next, &hb->chain, list) {
-- if (match_futex (&this->key, &key)) {
-- if (this->pi_state || this->rt_waiter) {
-- ret = -EINVAL;
-- break;
-- }
--
-- /* Check if one of the bits is set in both bitsets */
-- if (!(this->bitset & bitset))
-- continue;
--
-- mark_wake_futex(&wake_q, this);
-- if (++ret >= nr_wake)
-- break;
-- }
-- }
--
-- spin_unlock(&hb->lock);
-- wake_up_q(&wake_q);
-- return ret;
--}
--
--static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
--{
-- unsigned int op = (encoded_op & 0x70000000) >> 28;
-- unsigned int cmp = (encoded_op & 0x0f000000) >> 24;
-- int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
-- int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
-- int oldval, ret;
--
-- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
-- if (oparg < 0 || oparg > 31) {
-- char comm[sizeof(current->comm)];
-- /*
-- * kill this print and return -EINVAL when userspace
-- * is sane again
-- */
-- pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
-- get_task_comm(comm, current), oparg);
-- oparg &= 31;
-- }
-- oparg = 1 << oparg;
-- }
--
-- pagefault_disable();
-- ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
-- pagefault_enable();
-- if (ret)
-- return ret;
--
-- switch (cmp) {
-- case FUTEX_OP_CMP_EQ:
-- return oldval == cmparg;
-- case FUTEX_OP_CMP_NE:
-- return oldval != cmparg;
-- case FUTEX_OP_CMP_LT:
-- return oldval < cmparg;
-- case FUTEX_OP_CMP_GE:
-- return oldval >= cmparg;
-- case FUTEX_OP_CMP_LE:
-- return oldval <= cmparg;
-- case FUTEX_OP_CMP_GT:
-- return oldval > cmparg;
-- default:
-- return -ENOSYS;
-- }
--}
--
--/*
-- * Wake up all waiters hashed on the physical page that is mapped
-- * to this virtual address:
-- */
--static int
--futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
-- int nr_wake, int nr_wake2, int op)
--{
-- union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
-- struct futex_hash_bucket *hb1, *hb2;
-- struct futex_q *this, *next;
-- int ret, op_ret;
-- DEFINE_WAKE_Q(wake_q);
--
--retry:
-- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
-- if (unlikely(ret != 0))
-- return ret;
-- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
-- if (unlikely(ret != 0))
-- return ret;
--
-- hb1 = hash_futex(&key1);
-- hb2 = hash_futex(&key2);
--
--retry_private:
-- double_lock_hb(hb1, hb2);
-- op_ret = futex_atomic_op_inuser(op, uaddr2);
-- if (unlikely(op_ret < 0)) {
-- double_unlock_hb(hb1, hb2);
--
-- if (!IS_ENABLED(CONFIG_MMU) ||
-- unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
-- /*
-- * we don't get EFAULT from MMU faults if we don't have
-- * an MMU, but we might get them from range checking
-- */
-- ret = op_ret;
-- return ret;
-- }
--
-- if (op_ret == -EFAULT) {
-- ret = fault_in_user_writeable(uaddr2);
-- if (ret)
-- return ret;
-- }
--
-- cond_resched();
-- if (!(flags & FLAGS_SHARED))
-- goto retry_private;
-- goto retry;
-- }
--
-- plist_for_each_entry_safe(this, next, &hb1->chain, list) {
-- if (match_futex (&this->key, &key1)) {
-- if (this->pi_state || this->rt_waiter) {
-- ret = -EINVAL;
-- goto out_unlock;
-- }
-- mark_wake_futex(&wake_q, this);
-- if (++ret >= nr_wake)
-- break;
-- }
-- }
--
-- if (op_ret > 0) {
-- op_ret = 0;
-- plist_for_each_entry_safe(this, next, &hb2->chain, list) {
-- if (match_futex (&this->key, &key2)) {
-- if (this->pi_state || this->rt_waiter) {
-- ret = -EINVAL;
-- goto out_unlock;
-- }
-- mark_wake_futex(&wake_q, this);
-- if (++op_ret >= nr_wake2)
-- break;
-- }
-- }
-- ret += op_ret;
-- }
--
--out_unlock:
-- double_unlock_hb(hb1, hb2);
-- wake_up_q(&wake_q);
-- return ret;
--}
--
--/**
-- * requeue_futex() - Requeue a futex_q from one hb to another
-- * @q: the futex_q to requeue
-- * @hb1: the source hash_bucket
-- * @hb2: the target hash_bucket
-- * @key2: the new key for the requeued futex_q
-- */
--static inline
--void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
-- struct futex_hash_bucket *hb2, union futex_key *key2)
--{
--
-- /*
-- * If key1 and key2 hash to the same bucket, no need to
-- * requeue.
-- */
-- if (likely(&hb1->chain != &hb2->chain)) {
-- plist_del(&q->list, &hb1->chain);
-- hb_waiters_dec(hb1);
-- hb_waiters_inc(hb2);
-- plist_add(&q->list, &hb2->chain);
-- q->lock_ptr = &hb2->lock;
-- }
-- q->key = *key2;
--}
--
--static inline bool futex_requeue_pi_prepare(struct futex_q *q,
-- struct futex_pi_state *pi_state)
--{
-- int old, new;
--
-- /*
-- * Set state to Q_REQUEUE_PI_IN_PROGRESS unless an early wakeup has
-- * already set Q_REQUEUE_PI_IGNORE to signal that requeue should
-- * ignore the waiter.
-- */
-- old = atomic_read_acquire(&q->requeue_state);
-- do {
-- if (old == Q_REQUEUE_PI_IGNORE)
-- return false;
--
-- /*
-- * futex_proxy_trylock_atomic() might have set it to
-- * IN_PROGRESS and a interleaved early wake to WAIT.
-- *
-- * It was considered to have an extra state for that
-- * trylock, but that would just add more conditionals
-- * all over the place for a dubious value.
-- */
-- if (old != Q_REQUEUE_PI_NONE)
-- break;
--
-- new = Q_REQUEUE_PI_IN_PROGRESS;
-- } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
--
-- q->pi_state = pi_state;
-- return true;
--}
--
--static inline void futex_requeue_pi_complete(struct futex_q *q, int locked)
--{
-- int old, new;
--
-- old = atomic_read_acquire(&q->requeue_state);
-- do {
-- if (old == Q_REQUEUE_PI_IGNORE)
-- return;
--
-- if (locked >= 0) {
-- /* Requeue succeeded. Set DONE or LOCKED */
-- WARN_ON_ONCE(old != Q_REQUEUE_PI_IN_PROGRESS &&
-- old != Q_REQUEUE_PI_WAIT);
-- new = Q_REQUEUE_PI_DONE + locked;
-- } else if (old == Q_REQUEUE_PI_IN_PROGRESS) {
-- /* Deadlock, no early wakeup interleave */
-- new = Q_REQUEUE_PI_NONE;
-- } else {
-- /* Deadlock, early wakeup interleave. */
-- WARN_ON_ONCE(old != Q_REQUEUE_PI_WAIT);
-- new = Q_REQUEUE_PI_IGNORE;
-- }
-- } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
--
--#ifdef CONFIG_PREEMPT_RT
-- /* If the waiter interleaved with the requeue let it know */
-- if (unlikely(old == Q_REQUEUE_PI_WAIT))
-- rcuwait_wake_up(&q->requeue_wait);
--#endif
--}
--
--static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q)
--{
-- int old, new;
--
-- old = atomic_read_acquire(&q->requeue_state);
-- do {
-- /* Is requeue done already? */
-- if (old >= Q_REQUEUE_PI_DONE)
-- return old;
--
-- /*
-- * If not done, then tell the requeue code to either ignore
-- * the waiter or to wake it up once the requeue is done.
-- */
-- new = Q_REQUEUE_PI_WAIT;
-- if (old == Q_REQUEUE_PI_NONE)
-- new = Q_REQUEUE_PI_IGNORE;
-- } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
--
-- /* If the requeue was in progress, wait for it to complete */
-- if (old == Q_REQUEUE_PI_IN_PROGRESS) {
--#ifdef CONFIG_PREEMPT_RT
-- rcuwait_wait_event(&q->requeue_wait,
-- atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT,
-- TASK_UNINTERRUPTIBLE);
--#else
-- (void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT);
--#endif
-- }
--
-- /*
-- * Requeue is now either prohibited or complete. Reread state
-- * because during the wait above it might have changed. Nothing
-- * will modify q->requeue_state after this point.
-- */
-- return atomic_read(&q->requeue_state);
--}
--
--/**
-- * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
-- * @q: the futex_q
-- * @key: the key of the requeue target futex
-- * @hb: the hash_bucket of the requeue target futex
-- *
-- * During futex_requeue, with requeue_pi=1, it is possible to acquire the
-- * target futex if it is uncontended or via a lock steal.
-- *
-- * 1) Set @q::key to the requeue target futex key so the waiter can detect
-- * the wakeup on the right futex.
-- *
-- * 2) Dequeue @q from the hash bucket.
-- *
-- * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock
-- * acquisition.
-- *
-- * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that
-- * the waiter has to fixup the pi state.
-- *
-- * 5) Complete the requeue state so the waiter can make progress. After
-- * this point the waiter task can return from the syscall immediately in
-- * case that the pi state does not have to be fixed up.
-- *
-- * 6) Wake the waiter task.
-- *
-- * Must be called with both q->lock_ptr and hb->lock held.
-- */
--static inline
--void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
-- struct futex_hash_bucket *hb)
--{
-- q->key = *key;
--
-- __unqueue_futex(q);
--
-- WARN_ON(!q->rt_waiter);
-- q->rt_waiter = NULL;
--
-- q->lock_ptr = &hb->lock;
--
-- /* Signal locked state to the waiter */
-- futex_requeue_pi_complete(q, 1);
-- wake_up_state(q->task, TASK_NORMAL);
--}
--
--/**
-- * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
-- * @pifutex: the user address of the to futex
-- * @hb1: the from futex hash bucket, must be locked by the caller
-- * @hb2: the to futex hash bucket, must be locked by the caller
-- * @key1: the from futex key
-- * @key2: the to futex key
-- * @ps: address to store the pi_state pointer
-- * @exiting: Pointer to store the task pointer of the owner task
-- * which is in the middle of exiting
-- * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
-- *
-- * Try and get the lock on behalf of the top waiter if we can do it atomically.
-- * Wake the top waiter if we succeed. If the caller specified set_waiters,
-- * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
-- * hb1 and hb2 must be held by the caller.
-- *
-- * @exiting is only set when the return value is -EBUSY. If so, this holds
-- * a refcount on the exiting task on return and the caller needs to drop it
-- * after waiting for the exit to complete.
-- *
-- * Return:
-- * - 0 - failed to acquire the lock atomically;
-- * - >0 - acquired the lock, return value is vpid of the top_waiter
-- * - <0 - error
-- */
--static int
--futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
-- struct futex_hash_bucket *hb2, union futex_key *key1,
-- union futex_key *key2, struct futex_pi_state **ps,
-- struct task_struct **exiting, int set_waiters)
--{
-- struct futex_q *top_waiter = NULL;
-- u32 curval;
-- int ret;
--
-- if (get_futex_value_locked(&curval, pifutex))
-- return -EFAULT;
--
-- if (unlikely(should_fail_futex(true)))
-- return -EFAULT;
--
-- /*
-- * Find the top_waiter and determine if there are additional waiters.
-- * If the caller intends to requeue more than 1 waiter to pifutex,
-- * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
-- * as we have means to handle the possible fault. If not, don't set
-- * the bit unnecessarily as it will force the subsequent unlock to enter
-- * the kernel.
-- */
-- top_waiter = futex_top_waiter(hb1, key1);
--
-- /* There are no waiters, nothing for us to do. */
-- if (!top_waiter)
-- return 0;
--
-- /*
-- * Ensure that this is a waiter sitting in futex_wait_requeue_pi()
-- * and waiting on the 'waitqueue' futex which is always !PI.
-- */
-- if (!top_waiter->rt_waiter || top_waiter->pi_state)
-- return -EINVAL;
--
-- /* Ensure we requeue to the expected futex. */
-- if (!match_futex(top_waiter->requeue_pi_key, key2))
-- return -EINVAL;
--
-- /* Ensure that this does not race against an early wakeup */
-- if (!futex_requeue_pi_prepare(top_waiter, NULL))
-- return -EAGAIN;
--
-- /*
-- * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit
-- * in the contended case or if @set_waiters is true.
-- *
-- * In the contended case PI state is attached to the lock owner. If
-- * the user space lock can be acquired then PI state is attached to
-- * the new owner (@top_waiter->task) when @set_waiters is true.
-- */
-- ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
-- exiting, set_waiters);
-- if (ret == 1) {
-- /*
-- * Lock was acquired in user space and PI state was
-- * attached to @top_waiter->task. That means state is fully
-- * consistent and the waiter can return to user space
-- * immediately after the wakeup.
-- */
-- requeue_pi_wake_futex(top_waiter, key2, hb2);
-- } else if (ret < 0) {
-- /* Rewind top_waiter::requeue_state */
-- futex_requeue_pi_complete(top_waiter, ret);
-- } else {
-- /*
-- * futex_lock_pi_atomic() did not acquire the user space
-- * futex, but managed to establish the proxy lock and pi
-- * state. top_waiter::requeue_state cannot be fixed up here
-- * because the waiter is not enqueued on the rtmutex
-- * yet. This is handled at the callsite depending on the
-- * result of rt_mutex_start_proxy_lock() which is
-- * guaranteed to be reached with this function returning 0.
-- */
-- }
-- return ret;
--}
--
--/**
-- * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
-- * @uaddr1: source futex user address
-- * @flags: futex flags (FLAGS_SHARED, etc.)
-- * @uaddr2: target futex user address
-- * @nr_wake: number of waiters to wake (must be 1 for requeue_pi)
-- * @nr_requeue: number of waiters to requeue (0-INT_MAX)
-- * @cmpval: @uaddr1 expected value (or %NULL)
-- * @requeue_pi: if we are attempting to requeue from a non-pi futex to a
-- * pi futex (pi to pi requeue is not supported)
-- *
-- * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
-- * uaddr2 atomically on behalf of the top waiter.
-- *
-- * Return:
-- * - >=0 - on success, the number of tasks requeued or woken;
-- * - <0 - on error
-- */
--static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
-- u32 __user *uaddr2, int nr_wake, int nr_requeue,
-- u32 *cmpval, int requeue_pi)
--{
-- union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
-- int task_count = 0, ret;
-- struct futex_pi_state *pi_state = NULL;
-- struct futex_hash_bucket *hb1, *hb2;
-- struct futex_q *this, *next;
-- DEFINE_WAKE_Q(wake_q);
--
-- if (nr_wake < 0 || nr_requeue < 0)
-- return -EINVAL;
--
-- /*
-- * When PI not supported: return -ENOSYS if requeue_pi is true,
-- * consequently the compiler knows requeue_pi is always false past
-- * this point which will optimize away all the conditional code
-- * further down.
-- */
-- if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
-- return -ENOSYS;
--
-- if (requeue_pi) {
-- /*
-- * Requeue PI only works on two distinct uaddrs. This
-- * check is only valid for private futexes. See below.
-- */
-- if (uaddr1 == uaddr2)
-- return -EINVAL;
--
-- /*
-- * futex_requeue() allows the caller to define the number
-- * of waiters to wake up via the @nr_wake argument. With
-- * REQUEUE_PI, waking up more than one waiter is creating
-- * more problems than it solves. Waking up a waiter makes
-- * only sense if the PI futex @uaddr2 is uncontended as
-- * this allows the requeue code to acquire the futex
-- * @uaddr2 before waking the waiter. The waiter can then
-- * return to user space without further action. A secondary
-- * wakeup would just make the futex_wait_requeue_pi()
-- * handling more complex, because that code would have to
-- * look up pi_state and do more or less all the handling
-- * which the requeue code has to do for the to be requeued
-- * waiters. So restrict the number of waiters to wake to
-- * one, and only wake it up when the PI futex is
-- * uncontended. Otherwise requeue it and let the unlock of
-- * the PI futex handle the wakeup.
-- *
-- * All REQUEUE_PI users, e.g. pthread_cond_signal() and
-- * pthread_cond_broadcast() must use nr_wake=1.
-- */
-- if (nr_wake != 1)
-- return -EINVAL;
--
-- /*
-- * requeue_pi requires a pi_state, try to allocate it now
-- * without any locks in case it fails.
-- */
-- if (refill_pi_state_cache())
-- return -ENOMEM;
-- }
--
--retry:
-- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
-- if (unlikely(ret != 0))
-- return ret;
-- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
-- requeue_pi ? FUTEX_WRITE : FUTEX_READ);
-- if (unlikely(ret != 0))
-- return ret;
--
-- /*
-- * The check above which compares uaddrs is not sufficient for
-- * shared futexes. We need to compare the keys:
-- */
-- if (requeue_pi && match_futex(&key1, &key2))
-- return -EINVAL;
--
-- hb1 = hash_futex(&key1);
-- hb2 = hash_futex(&key2);
--
--retry_private:
-- hb_waiters_inc(hb2);
-- double_lock_hb(hb1, hb2);
--
-- if (likely(cmpval != NULL)) {
-- u32 curval;
--
-- ret = get_futex_value_locked(&curval, uaddr1);
--
-- if (unlikely(ret)) {
-- double_unlock_hb(hb1, hb2);
-- hb_waiters_dec(hb2);
--
-- ret = get_user(curval, uaddr1);
-- if (ret)
-- return ret;
--
-- if (!(flags & FLAGS_SHARED))
-- goto retry_private;
--
-- goto retry;
-- }
-- if (curval != *cmpval) {
-- ret = -EAGAIN;
-- goto out_unlock;
-- }
-- }
--
-- if (requeue_pi) {
-- struct task_struct *exiting = NULL;
--
-- /*
-- * Attempt to acquire uaddr2 and wake the top waiter. If we
-- * intend to requeue waiters, force setting the FUTEX_WAITERS
-- * bit. We force this here where we are able to easily handle
-- * faults rather in the requeue loop below.
-- *
-- * Updates topwaiter::requeue_state if a top waiter exists.
-- */
-- ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
-- &key2, &pi_state,
-- &exiting, nr_requeue);
--
-- /*
-- * At this point the top_waiter has either taken uaddr2 or
-- * is waiting on it. In both cases pi_state has been
-- * established and an initial refcount on it. In case of an
-- * error there's nothing.
-- *
-- * The top waiter's requeue_state is up to date:
-- *
-- * - If the lock was acquired atomically (ret == 1), then
-- * the state is Q_REQUEUE_PI_LOCKED.
-- *
-- * The top waiter has been dequeued and woken up and can
-- * return to user space immediately. The kernel/user
-- * space state is consistent. In case that there must be
-- * more waiters requeued the WAITERS bit in the user
-- * space futex is set so the top waiter task has to go
-- * into the syscall slowpath to unlock the futex. This
-- * will block until this requeue operation has been
-- * completed and the hash bucket locks have been
-- * dropped.
-- *
-- * - If the trylock failed with an error (ret < 0) then
-- * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
-- * happened", or Q_REQUEUE_PI_IGNORE when there was an
-- * interleaved early wakeup.
-- *
-- * - If the trylock did not succeed (ret == 0) then the
-- * state is either Q_REQUEUE_PI_IN_PROGRESS or
-- * Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
-- * This will be cleaned up in the loop below, which
-- * cannot fail because futex_proxy_trylock_atomic() did
-- * the same sanity checks for requeue_pi as the loop
-- * below does.
-- */
-- switch (ret) {
-- case 0:
-- /* We hold a reference on the pi state. */
-- break;
--
-- case 1:
-- /*
-- * futex_proxy_trylock_atomic() acquired the user space
-- * futex. Adjust task_count.
-- */
-- task_count++;
-- ret = 0;
-- break;
--
-- /*
-- * If the above failed, then pi_state is NULL and
-- * waiter::requeue_state is correct.
-- */
-- case -EFAULT:
-- double_unlock_hb(hb1, hb2);
-- hb_waiters_dec(hb2);
-- ret = fault_in_user_writeable(uaddr2);
-- if (!ret)
-- goto retry;
-- return ret;
-- case -EBUSY:
-- case -EAGAIN:
-- /*
-- * Two reasons for this:
-- * - EBUSY: Owner is exiting and we just wait for the
-- * exit to complete.
-- * - EAGAIN: The user space value changed.
-- */
-- double_unlock_hb(hb1, hb2);
-- hb_waiters_dec(hb2);
-- /*
-- * Handle the case where the owner is in the middle of
-- * exiting. Wait for the exit to complete otherwise
-- * this task might loop forever, aka. live lock.
-- */
-- wait_for_owner_exiting(ret, exiting);
-- cond_resched();
-- goto retry;
-- default:
-- goto out_unlock;
-- }
-- }
--
-- plist_for_each_entry_safe(this, next, &hb1->chain, list) {
-- if (task_count - nr_wake >= nr_requeue)
-- break;
--
-- if (!match_futex(&this->key, &key1))
-- continue;
--
-- /*
-- * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
-- * be paired with each other and no other futex ops.
-- *
-- * We should never be requeueing a futex_q with a pi_state,
-- * which is awaiting a futex_unlock_pi().
-- */
-- if ((requeue_pi && !this->rt_waiter) ||
-- (!requeue_pi && this->rt_waiter) ||
-- this->pi_state) {
-- ret = -EINVAL;
-- break;
-- }
--
-- /* Plain futexes just wake or requeue and are done */
-- if (!requeue_pi) {
-- if (++task_count <= nr_wake)
-- mark_wake_futex(&wake_q, this);
-- else
-- requeue_futex(this, hb1, hb2, &key2);
-- continue;
-- }
--
-- /* Ensure we requeue to the expected futex for requeue_pi. */
-- if (!match_futex(this->requeue_pi_key, &key2)) {
-- ret = -EINVAL;
-- break;
-- }
--
-- /*
-- * Requeue nr_requeue waiters and possibly one more in the case
-- * of requeue_pi if we couldn't acquire the lock atomically.
-- *
-- * Prepare the waiter to take the rt_mutex. Take a refcount
-- * on the pi_state and store the pointer in the futex_q
-- * object of the waiter.
-- */
-- get_pi_state(pi_state);
--
-- /* Don't requeue when the waiter is already on the way out. */
-- if (!futex_requeue_pi_prepare(this, pi_state)) {
-- /*
-- * Early woken waiter signaled that it is on the
-- * way out. Drop the pi_state reference and try the
-- * next waiter. @this->pi_state is still NULL.
-- */
-- put_pi_state(pi_state);
-- continue;
-- }
--
-- ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
-- this->rt_waiter,
-- this->task);
--
-- if (ret == 1) {
-- /*
-- * We got the lock. We do neither drop the refcount
-- * on pi_state nor clear this->pi_state because the
-- * waiter needs the pi_state for cleaning up the
-- * user space value. It will drop the refcount
-- * after doing so. this::requeue_state is updated
-- * in the wakeup as well.
-- */
-- requeue_pi_wake_futex(this, &key2, hb2);
-- task_count++;
-- } else if (!ret) {
-- /* Waiter is queued, move it to hb2 */
-- requeue_futex(this, hb1, hb2, &key2);
-- futex_requeue_pi_complete(this, 0);
-- task_count++;
-- } else {
-- /*
-- * rt_mutex_start_proxy_lock() detected a potential
-- * deadlock when we tried to queue that waiter.
-- * Drop the pi_state reference which we took above
-- * and remove the pointer to the state from the
-- * waiters futex_q object.
-- */
-- this->pi_state = NULL;
-- put_pi_state(pi_state);
-- futex_requeue_pi_complete(this, ret);
-- /*
-- * We stop queueing more waiters and let user space
-- * deal with the mess.
-- */
-- break;
-- }
-- }
--
-- /*
-- * We took an extra initial reference to the pi_state in
-- * futex_proxy_trylock_atomic(). We need to drop it here again.
-- */
-- put_pi_state(pi_state);
--
--out_unlock:
-- double_unlock_hb(hb1, hb2);
-- wake_up_q(&wake_q);
-- hb_waiters_dec(hb2);
-- return ret ? ret : task_count;
--}
--
--/* The key must be already stored in q->key. */
--static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
-- __acquires(&hb->lock)
--{
-- struct futex_hash_bucket *hb;
--
-- hb = hash_futex(&q->key);
--
-- /*
-- * Increment the counter before taking the lock so that
-- * a potential waker won't miss a to-be-slept task that is
-- * waiting for the spinlock. This is safe as all queue_lock()
-- * users end up calling queue_me(). Similarly, for housekeeping,
-- * decrement the counter at queue_unlock() when some error has
-- * occurred and we don't end up adding the task to the list.
-- */
-- hb_waiters_inc(hb); /* implies smp_mb(); (A) */
--
-- q->lock_ptr = &hb->lock;
--
-- spin_lock(&hb->lock);
-- return hb;
--}
--
--static inline void
--queue_unlock(struct futex_hash_bucket *hb)
-- __releases(&hb->lock)
--{
-- spin_unlock(&hb->lock);
-- hb_waiters_dec(hb);
--}
--
--static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
--{
-- int prio;
--
-- /*
-- * The priority used to register this element is
-- * - either the real thread-priority for the real-time threads
-- * (i.e. threads with a priority lower than MAX_RT_PRIO)
-- * - or MAX_RT_PRIO for non-RT threads.
-- * Thus, all RT-threads are woken first in priority order, and
-- * the others are woken last, in FIFO order.
-- */
-- prio = min(current->normal_prio, MAX_RT_PRIO);
--
-- plist_node_init(&q->list, prio);
-- plist_add(&q->list, &hb->chain);
-- q->task = current;
--}
--
--/**
-- * queue_me() - Enqueue the futex_q on the futex_hash_bucket
-- * @q: The futex_q to enqueue
-- * @hb: The destination hash bucket
-- *
-- * The hb->lock must be held by the caller, and is released here. A call to
-- * queue_me() is typically paired with exactly one call to unqueue_me(). The
-- * exceptions involve the PI related operations, which may use unqueue_me_pi()
-- * or nothing if the unqueue is done as part of the wake process and the unqueue
-- * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
-- * an example).
-- */
--static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
-- __releases(&hb->lock)
--{
-- __queue_me(q, hb);
-- spin_unlock(&hb->lock);
--}
--
--/**
-- * unqueue_me() - Remove the futex_q from its futex_hash_bucket
-- * @q: The futex_q to unqueue
-- *
-- * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
-- * be paired with exactly one earlier call to queue_me().
-- *
-- * Return:
-- * - 1 - if the futex_q was still queued (and we removed unqueued it);
-- * - 0 - if the futex_q was already removed by the waking thread
-- */
--static int unqueue_me(struct futex_q *q)
--{
-- spinlock_t *lock_ptr;
-- int ret = 0;
--
-- /* In the common case we don't take the spinlock, which is nice. */
--retry:
-- /*
-- * q->lock_ptr can change between this read and the following spin_lock.
-- * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
-- * optimizing lock_ptr out of the logic below.
-- */
-- lock_ptr = READ_ONCE(q->lock_ptr);
-- if (lock_ptr != NULL) {
-- spin_lock(lock_ptr);
-- /*
-- * q->lock_ptr can change between reading it and
-- * spin_lock(), causing us to take the wrong lock. This
-- * corrects the race condition.
-- *
-- * Reasoning goes like this: if we have the wrong lock,
-- * q->lock_ptr must have changed (maybe several times)
-- * between reading it and the spin_lock(). It can
-- * change again after the spin_lock() but only if it was
-- * already changed before the spin_lock(). It cannot,
-- * however, change back to the original value. Therefore
-- * we can detect whether we acquired the correct lock.
-- */
-- if (unlikely(lock_ptr != q->lock_ptr)) {
-- spin_unlock(lock_ptr);
-- goto retry;
-- }
-- __unqueue_futex(q);
--
-- BUG_ON(q->pi_state);
--
-- spin_unlock(lock_ptr);
-- ret = 1;
-- }
--
-- return ret;
--}
--
--/*
-- * PI futexes can not be requeued and must remove themselves from the
-- * hash bucket. The hash bucket lock (i.e. lock_ptr) is held.
-- */
--static void unqueue_me_pi(struct futex_q *q)
--{
-- __unqueue_futex(q);
--
-- BUG_ON(!q->pi_state);
-- put_pi_state(q->pi_state);
-- q->pi_state = NULL;
--}
--
--static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-- struct task_struct *argowner)
--{
-- struct futex_pi_state *pi_state = q->pi_state;
-- struct task_struct *oldowner, *newowner;
-- u32 uval, curval, newval, newtid;
-- int err = 0;
--
-- oldowner = pi_state->owner;
--
-- /*
-- * We are here because either:
-- *
-- * - we stole the lock and pi_state->owner needs updating to reflect
-- * that (@argowner == current),
-- *
-- * or:
-- *
-- * - someone stole our lock and we need to fix things to point to the
-- * new owner (@argowner == NULL).
-- *
-- * Either way, we have to replace the TID in the user space variable.
-- * This must be atomic as we have to preserve the owner died bit here.
-- *
-- * Note: We write the user space value _before_ changing the pi_state
-- * because we can fault here. Imagine swapped out pages or a fork
-- * that marked all the anonymous memory readonly for cow.
-- *
-- * Modifying pi_state _before_ the user space value would leave the
-- * pi_state in an inconsistent state when we fault here, because we
-- * need to drop the locks to handle the fault. This might be observed
-- * in the PID checks when attaching to PI state .
-- */
--retry:
-- if (!argowner) {
-- if (oldowner != current) {
-- /*
-- * We raced against a concurrent self; things are
-- * already fixed up. Nothing to do.
-- */
-- return 0;
-- }
--
-- if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
-- /* We got the lock. pi_state is correct. Tell caller. */
-- return 1;
-- }
--
-- /*
-- * The trylock just failed, so either there is an owner or
-- * there is a higher priority waiter than this one.
-- */
-- newowner = rt_mutex_owner(&pi_state->pi_mutex);
-- /*
-- * If the higher priority waiter has not yet taken over the
-- * rtmutex then newowner is NULL. We can't return here with
-- * that state because it's inconsistent vs. the user space
-- * state. So drop the locks and try again. It's a valid
-- * situation and not any different from the other retry
-- * conditions.
-- */
-- if (unlikely(!newowner)) {
-- err = -EAGAIN;
-- goto handle_err;
-- }
-- } else {
-- WARN_ON_ONCE(argowner != current);
-- if (oldowner == current) {
-- /*
-- * We raced against a concurrent self; things are
-- * already fixed up. Nothing to do.
-- */
-- return 1;
-- }
-- newowner = argowner;
-- }
--
-- newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
-- /* Owner died? */
-- if (!pi_state->owner)
-- newtid |= FUTEX_OWNER_DIED;
--
-- err = get_futex_value_locked(&uval, uaddr);
-- if (err)
-- goto handle_err;
--
-- for (;;) {
-- newval = (uval & FUTEX_OWNER_DIED) | newtid;
--
-- err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
-- if (err)
-- goto handle_err;
--
-- if (curval == uval)
-- break;
-- uval = curval;
-- }
--
-- /*
-- * We fixed up user space. Now we need to fix the pi_state
-- * itself.
-- */
-- pi_state_update_owner(pi_state, newowner);
--
-- return argowner == current;
--
-- /*
-- * In order to reschedule or handle a page fault, we need to drop the
-- * locks here. In the case of a fault, this gives the other task
-- * (either the highest priority waiter itself or the task which stole
-- * the rtmutex) the chance to try the fixup of the pi_state. So once we
-- * are back from handling the fault we need to check the pi_state after
-- * reacquiring the locks and before trying to do another fixup. When
-- * the fixup has been done already we simply return.
-- *
-- * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
-- * drop hb->lock since the caller owns the hb -> futex_q relation.
-- * Dropping the pi_mutex->wait_lock requires the state revalidate.
-- */
--handle_err:
-- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-- spin_unlock(q->lock_ptr);
--
-- switch (err) {
-- case -EFAULT:
-- err = fault_in_user_writeable(uaddr);
-- break;
--
-- case -EAGAIN:
-- cond_resched();
-- err = 0;
-- break;
--
-- default:
-- WARN_ON_ONCE(1);
-- break;
-- }
--
-- spin_lock(q->lock_ptr);
-- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
--
-- /*
-- * Check if someone else fixed it for us:
-- */
-- if (pi_state->owner != oldowner)
-- return argowner == current;
--
-- /* Retry if err was -EAGAIN or the fault in succeeded */
-- if (!err)
-- goto retry;
--
-- /*
-- * fault_in_user_writeable() failed so user state is immutable. At
-- * best we can make the kernel state consistent but user state will
-- * be most likely hosed and any subsequent unlock operation will be
-- * rejected due to PI futex rule [10].
-- *
-- * Ensure that the rtmutex owner is also the pi_state owner despite
-- * the user space value claiming something different. There is no
-- * point in unlocking the rtmutex if current is the owner as it
-- * would need to wait until the next waiter has taken the rtmutex
-- * to guarantee consistent state. Keep it simple. Userspace asked
-- * for this wreckaged state.
-- *
-- * The rtmutex has an owner - either current or some other
-- * task. See the EAGAIN loop above.
-- */
-- pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
--
-- return err;
--}
--
--static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-- struct task_struct *argowner)
--{
-- struct futex_pi_state *pi_state = q->pi_state;
-- int ret;
--
-- lockdep_assert_held(q->lock_ptr);
--
-- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-- ret = __fixup_pi_state_owner(uaddr, q, argowner);
-- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-- return ret;
--}
--
--static long futex_wait_restart(struct restart_block *restart);
--
--/**
-- * fixup_owner() - Post lock pi_state and corner case management
-- * @uaddr: user address of the futex
-- * @q: futex_q (contains pi_state and access to the rt_mutex)
-- * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0)
-- *
-- * After attempting to lock an rt_mutex, this function is called to cleanup
-- * the pi_state owner as well as handle race conditions that may allow us to
-- * acquire the lock. Must be called with the hb lock held.
-- *
-- * Return:
-- * - 1 - success, lock taken;
-- * - 0 - success, lock not taken;
-- * - <0 - on error (-EFAULT)
-- */
--static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
--{
-- if (locked) {
-- /*
-- * Got the lock. We might not be the anticipated owner if we
-- * did a lock-steal - fix up the PI-state in that case:
-- *
-- * Speculative pi_state->owner read (we don't hold wait_lock);
-- * since we own the lock pi_state->owner == current is the
-- * stable state, anything else needs more attention.
-- */
-- if (q->pi_state->owner != current)
-- return fixup_pi_state_owner(uaddr, q, current);
-- return 1;
-- }
--
-- /*
-- * If we didn't get the lock; check if anybody stole it from us. In
-- * that case, we need to fix up the uval to point to them instead of
-- * us, otherwise bad things happen. [10]
-- *
-- * Another speculative read; pi_state->owner == current is unstable
-- * but needs our attention.
-- */
-- if (q->pi_state->owner == current)
-- return fixup_pi_state_owner(uaddr, q, NULL);
--
-- /*
-- * Paranoia check. If we did not take the lock, then we should not be
-- * the owner of the rt_mutex. Warn and establish consistent state.
-- */
-- if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
-- return fixup_pi_state_owner(uaddr, q, current);
--
-- return 0;
--}
--
--/**
-- * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
-- * @hb: the futex hash bucket, must be locked by the caller
-- * @q: the futex_q to queue up on
-- * @timeout: the prepared hrtimer_sleeper, or null for no timeout
-- */
--static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
-- struct hrtimer_sleeper *timeout)
--{
-- /*
-- * The task state is guaranteed to be set before another task can
-- * wake it. set_current_state() is implemented using smp_store_mb() and
-- * queue_me() calls spin_unlock() upon completion, both serializing
-- * access to the hash list and forcing another memory barrier.
-- */
-- set_current_state(TASK_INTERRUPTIBLE);
-- queue_me(q, hb);
--
-- /* Arm the timer */
-- if (timeout)
-- hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
--
-- /*
-- * If we have been removed from the hash list, then another task
-- * has tried to wake us, and we can skip the call to schedule().
-- */
-- if (likely(!plist_node_empty(&q->list))) {
-- /*
-- * If the timer has already expired, current will already be
-- * flagged for rescheduling. Only call schedule if there
-- * is no timeout, or if it has yet to expire.
-- */
-- if (!timeout || timeout->task)
-- freezable_schedule();
-- }
-- __set_current_state(TASK_RUNNING);
--}
--
--/**
-- * futex_wait_setup() - Prepare to wait on a futex
-- * @uaddr: the futex userspace address
-- * @val: the expected value
-- * @flags: futex flags (FLAGS_SHARED, etc.)
-- * @q: the associated futex_q
-- * @hb: storage for hash_bucket pointer to be returned to caller
-- *
-- * Setup the futex_q and locate the hash_bucket. Get the futex value and
-- * compare it with the expected value. Handle atomic faults internally.
-- * Return with the hb lock held on success, and unlocked on failure.
-- *
-- * Return:
-- * - 0 - uaddr contains val and hb has been locked;
-- * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
-- */
--static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
-- struct futex_q *q, struct futex_hash_bucket **hb)
--{
-- u32 uval;
-- int ret;
--
-- /*
-- * Access the page AFTER the hash-bucket is locked.
-- * Order is important:
-- *
-- * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
-- * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); }
-- *
-- * The basic logical guarantee of a futex is that it blocks ONLY
-- * if cond(var) is known to be true at the time of blocking, for
-- * any cond. If we locked the hash-bucket after testing *uaddr, that
-- * would open a race condition where we could block indefinitely with
-- * cond(var) false, which would violate the guarantee.
-- *
-- * On the other hand, we insert q and release the hash-bucket only
-- * after testing *uaddr. This guarantees that futex_wait() will NOT
-- * absorb a wakeup if *uaddr does not match the desired values
-- * while the syscall executes.
-- */
--retry:
-- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
-- if (unlikely(ret != 0))
-- return ret;
--
--retry_private:
-- *hb = queue_lock(q);
--
-- ret = get_futex_value_locked(&uval, uaddr);
--
-- if (ret) {
-- queue_unlock(*hb);
--
-- ret = get_user(uval, uaddr);
-- if (ret)
-- return ret;
--
-- if (!(flags & FLAGS_SHARED))
-- goto retry_private;
--
-- goto retry;
-- }
--
-- if (uval != val) {
-- queue_unlock(*hb);
-- ret = -EWOULDBLOCK;
-- }
--
-- return ret;
--}
--
--static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
-- ktime_t *abs_time, u32 bitset)
--{
-- struct hrtimer_sleeper timeout, *to;
-- struct restart_block *restart;
-- struct futex_hash_bucket *hb;
-- struct futex_q q = futex_q_init;
-- int ret;
--
-- if (!bitset)
-- return -EINVAL;
-- q.bitset = bitset;
--
-- to = futex_setup_timer(abs_time, &timeout, flags,
-- current->timer_slack_ns);
--retry:
-- /*
-- * Prepare to wait on uaddr. On success, it holds hb->lock and q
-- * is initialized.
-- */
-- ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
-- if (ret)
-- goto out;
--
-- /* queue_me and wait for wakeup, timeout, or a signal. */
-- futex_wait_queue_me(hb, &q, to);
--
-- /* If we were woken (and unqueued), we succeeded, whatever. */
-- ret = 0;
-- if (!unqueue_me(&q))
-- goto out;
-- ret = -ETIMEDOUT;
-- if (to && !to->task)
-- goto out;
--
-- /*
-- * We expect signal_pending(current), but we might be the
-- * victim of a spurious wakeup as well.
-- */
-- if (!signal_pending(current))
-- goto retry;
--
-- ret = -ERESTARTSYS;
-- if (!abs_time)
-- goto out;
--
-- restart = &current->restart_block;
-- restart->futex.uaddr = uaddr;
-- restart->futex.val = val;
-- restart->futex.time = *abs_time;
-- restart->futex.bitset = bitset;
-- restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
--
-- ret = set_restart_fn(restart, futex_wait_restart);
--
--out:
-- if (to) {
-- hrtimer_cancel(&to->timer);
-- destroy_hrtimer_on_stack(&to->timer);
-- }
-- return ret;
--}
--
--
--static long futex_wait_restart(struct restart_block *restart)
--{
-- u32 __user *uaddr = restart->futex.uaddr;
-- ktime_t t, *tp = NULL;
--
-- if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
-- t = restart->futex.time;
-- tp = &t;
-- }
-- restart->fn = do_no_restart_syscall;
--
-- return (long)futex_wait(uaddr, restart->futex.flags,
-- restart->futex.val, tp, restart->futex.bitset);
--}
--
--
--/*
-- * Userspace tried a 0 -> TID atomic transition of the futex value
-- * and failed. The kernel side here does the whole locking operation:
-- * if there are waiters then it will block as a consequence of relying
-- * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
-- * a 0 value of the futex too.).
-- *
-- * Also serves as futex trylock_pi()'ing, and due semantics.
-- */
--static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
-- ktime_t *time, int trylock)
--{
-- struct hrtimer_sleeper timeout, *to;
-- struct task_struct *exiting = NULL;
-- struct rt_mutex_waiter rt_waiter;
-- struct futex_hash_bucket *hb;
-- struct futex_q q = futex_q_init;
-- int res, ret;
--
-- if (!IS_ENABLED(CONFIG_FUTEX_PI))
-- return -ENOSYS;
--
-- if (refill_pi_state_cache())
-- return -ENOMEM;
--
-- to = futex_setup_timer(time, &timeout, flags, 0);
--
--retry:
-- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
-- if (unlikely(ret != 0))
-- goto out;
--
--retry_private:
-- hb = queue_lock(&q);
--
-- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
-- &exiting, 0);
-- if (unlikely(ret)) {
-- /*
-- * Atomic work succeeded and we got the lock,
-- * or failed. Either way, we do _not_ block.
-- */
-- switch (ret) {
-- case 1:
-- /* We got the lock. */
-- ret = 0;
-- goto out_unlock_put_key;
-- case -EFAULT:
-- goto uaddr_faulted;
-- case -EBUSY:
-- case -EAGAIN:
-- /*
-- * Two reasons for this:
-- * - EBUSY: Task is exiting and we just wait for the
-- * exit to complete.
-- * - EAGAIN: The user space value changed.
-- */
-- queue_unlock(hb);
-- /*
-- * Handle the case where the owner is in the middle of
-- * exiting. Wait for the exit to complete otherwise
-- * this task might loop forever, aka. live lock.
-- */
-- wait_for_owner_exiting(ret, exiting);
-- cond_resched();
-- goto retry;
-- default:
-- goto out_unlock_put_key;
-- }
-- }
--
-- WARN_ON(!q.pi_state);
--
-- /*
-- * Only actually queue now that the atomic ops are done:
-- */
-- __queue_me(&q, hb);
--
-- if (trylock) {
-- ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
-- /* Fixup the trylock return value: */
-- ret = ret ? 0 : -EWOULDBLOCK;
-- goto no_block;
-- }
--
-- rt_mutex_init_waiter(&rt_waiter);
--
-- /*
-- * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
-- * hold it while doing rt_mutex_start_proxy(), because then it will
-- * include hb->lock in the blocking chain, even through we'll not in
-- * fact hold it while blocking. This will lead it to report -EDEADLK
-- * and BUG when futex_unlock_pi() interleaves with this.
-- *
-- * Therefore acquire wait_lock while holding hb->lock, but drop the
-- * latter before calling __rt_mutex_start_proxy_lock(). This
-- * interleaves with futex_unlock_pi() -- which does a similar lock
-- * handoff -- such that the latter can observe the futex_q::pi_state
-- * before __rt_mutex_start_proxy_lock() is done.
-- */
-- raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
-- spin_unlock(q.lock_ptr);
-- /*
-- * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
-- * such that futex_unlock_pi() is guaranteed to observe the waiter when
-- * it sees the futex_q::pi_state.
-- */
-- ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
-- raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
--
-- if (ret) {
-- if (ret == 1)
-- ret = 0;
-- goto cleanup;
-- }
--
-- if (unlikely(to))
-- hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
--
-- ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
--
--cleanup:
-- spin_lock(q.lock_ptr);
-- /*
-- * If we failed to acquire the lock (deadlock/signal/timeout), we must
-- * first acquire the hb->lock before removing the lock from the
-- * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
-- * lists consistent.
-- *
-- * In particular; it is important that futex_unlock_pi() can not
-- * observe this inconsistency.
-- */
-- if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
-- ret = 0;
--
--no_block:
-- /*
-- * Fixup the pi_state owner and possibly acquire the lock if we
-- * haven't already.
-- */
-- res = fixup_owner(uaddr, &q, !ret);
-- /*
-- * If fixup_owner() returned an error, propagate that. If it acquired
-- * the lock, clear our -ETIMEDOUT or -EINTR.
-- */
-- if (res)
-- ret = (res < 0) ? res : 0;
--
-- unqueue_me_pi(&q);
-- spin_unlock(q.lock_ptr);
-- goto out;
--
--out_unlock_put_key:
-- queue_unlock(hb);
--
--out:
-- if (to) {
-- hrtimer_cancel(&to->timer);
-- destroy_hrtimer_on_stack(&to->timer);
-- }
-- return ret != -EINTR ? ret : -ERESTARTNOINTR;
--
--uaddr_faulted:
-- queue_unlock(hb);
--
-- ret = fault_in_user_writeable(uaddr);
-- if (ret)
-- goto out;
--
-- if (!(flags & FLAGS_SHARED))
-- goto retry_private;
--
-- goto retry;
--}
--
--/*
-- * Userspace attempted a TID -> 0 atomic transition, and failed.
-- * This is the in-kernel slowpath: we look up the PI state (if any),
-- * and do the rt-mutex unlock.
-- */
--static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
--{
-- u32 curval, uval, vpid = task_pid_vnr(current);
-- union futex_key key = FUTEX_KEY_INIT;
-- struct futex_hash_bucket *hb;
-- struct futex_q *top_waiter;
-- int ret;
--
-- if (!IS_ENABLED(CONFIG_FUTEX_PI))
-- return -ENOSYS;
--
--retry:
-- if (get_user(uval, uaddr))
-- return -EFAULT;
-- /*
-- * We release only a lock we actually own:
-- */
-- if ((uval & FUTEX_TID_MASK) != vpid)
-- return -EPERM;
--
-- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
-- if (ret)
-- return ret;
--
-- hb = hash_futex(&key);
-- spin_lock(&hb->lock);
--
-- /*
-- * Check waiters first. We do not trust user space values at
-- * all and we at least want to know if user space fiddled
-- * with the futex value instead of blindly unlocking.
-- */
-- top_waiter = futex_top_waiter(hb, &key);
-- if (top_waiter) {
-- struct futex_pi_state *pi_state = top_waiter->pi_state;
--
-- ret = -EINVAL;
-- if (!pi_state)
-- goto out_unlock;
--
-- /*
-- * If current does not own the pi_state then the futex is
-- * inconsistent and user space fiddled with the futex value.
-- */
-- if (pi_state->owner != current)
-- goto out_unlock;
--
-- get_pi_state(pi_state);
-- /*
-- * By taking wait_lock while still holding hb->lock, we ensure
-- * there is no point where we hold neither; and therefore
-- * wake_futex_pi() must observe a state consistent with what we
-- * observed.
-- *
-- * In particular; this forces __rt_mutex_start_proxy() to
-- * complete such that we're guaranteed to observe the
-- * rt_waiter. Also see the WARN in wake_futex_pi().
-- */
-- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-- spin_unlock(&hb->lock);
--
-- /* drops pi_state->pi_mutex.wait_lock */
-- ret = wake_futex_pi(uaddr, uval, pi_state);
--
-- put_pi_state(pi_state);
--
-- /*
-- * Success, we're done! No tricky corner cases.
-- */
-- if (!ret)
-- return ret;
-- /*
-- * The atomic access to the futex value generated a
-- * pagefault, so retry the user-access and the wakeup:
-- */
-- if (ret == -EFAULT)
-- goto pi_faulted;
-- /*
-- * A unconditional UNLOCK_PI op raced against a waiter
-- * setting the FUTEX_WAITERS bit. Try again.
-- */
-- if (ret == -EAGAIN)
-- goto pi_retry;
-- /*
-- * wake_futex_pi has detected invalid state. Tell user
-- * space.
-- */
-- return ret;
-- }
--
-- /*
-- * We have no kernel internal state, i.e. no waiters in the
-- * kernel. Waiters which are about to queue themselves are stuck
-- * on hb->lock. So we can safely ignore them. We do neither
-- * preserve the WAITERS bit not the OWNER_DIED one. We are the
-- * owner.
-- */
-- if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
-- spin_unlock(&hb->lock);
-- switch (ret) {
-- case -EFAULT:
-- goto pi_faulted;
--
-- case -EAGAIN:
-- goto pi_retry;
--
-- default:
-- WARN_ON_ONCE(1);
-- return ret;
-- }
-- }
--
-- /*
-- * If uval has changed, let user space handle it.
-- */
-- ret = (curval == uval) ? 0 : -EAGAIN;
--
--out_unlock:
-- spin_unlock(&hb->lock);
-- return ret;
--
--pi_retry:
-- cond_resched();
-- goto retry;
--
--pi_faulted:
--
-- ret = fault_in_user_writeable(uaddr);
-- if (!ret)
-- goto retry;
--
-- return ret;
--}
--
--/**
-- * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex
-- * @hb: the hash_bucket futex_q was original enqueued on
-- * @q: the futex_q woken while waiting to be requeued
-- * @timeout: the timeout associated with the wait (NULL if none)
-- *
-- * Determine the cause for the early wakeup.
-- *
-- * Return:
-- * -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR
-- */
--static inline
--int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
-- struct futex_q *q,
-- struct hrtimer_sleeper *timeout)
--{
-- int ret;
--
-- /*
-- * With the hb lock held, we avoid races while we process the wakeup.
-- * We only need to hold hb (and not hb2) to ensure atomicity as the
-- * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
-- * It can't be requeued from uaddr2 to something else since we don't
-- * support a PI aware source futex for requeue.
-- */
-- WARN_ON_ONCE(&hb->lock != q->lock_ptr);
--
-- /*
-- * We were woken prior to requeue by a timeout or a signal.
-- * Unqueue the futex_q and determine which it was.
-- */
-- plist_del(&q->list, &hb->chain);
-- hb_waiters_dec(hb);
--
-- /* Handle spurious wakeups gracefully */
-- ret = -EWOULDBLOCK;
-- if (timeout && !timeout->task)
-- ret = -ETIMEDOUT;
-- else if (signal_pending(current))
-- ret = -ERESTARTNOINTR;
-- return ret;
--}
--
--/**
-- * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
-- * @uaddr: the futex we initially wait on (non-pi)
-- * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
-- * the same type, no requeueing from private to shared, etc.
-- * @val: the expected value of uaddr
-- * @abs_time: absolute timeout
-- * @bitset: 32 bit wakeup bitset set by userspace, defaults to all
-- * @uaddr2: the pi futex we will take prior to returning to user-space
-- *
-- * The caller will wait on uaddr and will be requeued by futex_requeue() to
-- * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake
-- * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
-- * userspace. This ensures the rt_mutex maintains an owner when it has waiters;
-- * without one, the pi logic would not know which task to boost/deboost, if
-- * there was a need to.
-- *
-- * We call schedule in futex_wait_queue_me() when we enqueue and return there
-- * via the following--
-- * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
-- * 2) wakeup on uaddr2 after a requeue
-- * 3) signal
-- * 4) timeout
-- *
-- * If 3, cleanup and return -ERESTARTNOINTR.
-- *
-- * If 2, we may then block on trying to take the rt_mutex and return via:
-- * 5) successful lock
-- * 6) signal
-- * 7) timeout
-- * 8) other lock acquisition failure
-- *
-- * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
-- *
-- * If 4 or 7, we cleanup and return with -ETIMEDOUT.
-- *
-- * Return:
-- * - 0 - On success;
-- * - <0 - On error
-- */
--static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
-- u32 val, ktime_t *abs_time, u32 bitset,
-- u32 __user *uaddr2)
--{
-- struct hrtimer_sleeper timeout, *to;
-- struct rt_mutex_waiter rt_waiter;
-- struct futex_hash_bucket *hb;
-- union futex_key key2 = FUTEX_KEY_INIT;
-- struct futex_q q = futex_q_init;
-- struct rt_mutex_base *pi_mutex;
-- int res, ret;
--
-- if (!IS_ENABLED(CONFIG_FUTEX_PI))
-- return -ENOSYS;
--
-- if (uaddr == uaddr2)
-- return -EINVAL;
--
-- if (!bitset)
-- return -EINVAL;
--
-- to = futex_setup_timer(abs_time, &timeout, flags,
-- current->timer_slack_ns);
--
-- /*
-- * The waiter is allocated on our stack, manipulated by the requeue
-- * code while we sleep on uaddr.
-- */
-- rt_mutex_init_waiter(&rt_waiter);
--
-- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
-- if (unlikely(ret != 0))
-- goto out;
--
-- q.bitset = bitset;
-- q.rt_waiter = &rt_waiter;
-- q.requeue_pi_key = &key2;
--
-- /*
-- * Prepare to wait on uaddr. On success, it holds hb->lock and q
-- * is initialized.
-- */
-- ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
-- if (ret)
-- goto out;
--
-- /*
-- * The check above which compares uaddrs is not sufficient for
-- * shared futexes. We need to compare the keys:
-- */
-- if (match_futex(&q.key, &key2)) {
-- queue_unlock(hb);
-- ret = -EINVAL;
-- goto out;
-- }
--
-- /* Queue the futex_q, drop the hb lock, wait for wakeup. */
-- futex_wait_queue_me(hb, &q, to);
--
-- switch (futex_requeue_pi_wakeup_sync(&q)) {
-- case Q_REQUEUE_PI_IGNORE:
-- /* The waiter is still on uaddr1 */
-- spin_lock(&hb->lock);
-- ret = handle_early_requeue_pi_wakeup(hb, &q, to);
-- spin_unlock(&hb->lock);
-- break;
--
-- case Q_REQUEUE_PI_LOCKED:
-- /* The requeue acquired the lock */
-- if (q.pi_state && (q.pi_state->owner != current)) {
-- spin_lock(q.lock_ptr);
-- ret = fixup_owner(uaddr2, &q, true);
-- /*
-- * Drop the reference to the pi state which the
-- * requeue_pi() code acquired for us.
-- */
-- put_pi_state(q.pi_state);
-- spin_unlock(q.lock_ptr);
-- /*
-- * Adjust the return value. It's either -EFAULT or
-- * success (1) but the caller expects 0 for success.
-- */
-- ret = ret < 0 ? ret : 0;
-- }
-- break;
--
-- case Q_REQUEUE_PI_DONE:
-- /* Requeue completed. Current is 'pi_blocked_on' the rtmutex */
-- pi_mutex = &q.pi_state->pi_mutex;
-- ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
--
-- /* Current is not longer pi_blocked_on */
-- spin_lock(q.lock_ptr);
-- if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
-- ret = 0;
--
-- debug_rt_mutex_free_waiter(&rt_waiter);
-- /*
-- * Fixup the pi_state owner and possibly acquire the lock if we
-- * haven't already.
-- */
-- res = fixup_owner(uaddr2, &q, !ret);
-- /*
-- * If fixup_owner() returned an error, propagate that. If it
-- * acquired the lock, clear -ETIMEDOUT or -EINTR.
-- */
-- if (res)
-- ret = (res < 0) ? res : 0;
--
-- unqueue_me_pi(&q);
-- spin_unlock(q.lock_ptr);
--
-- if (ret == -EINTR) {
-- /*
-- * We've already been requeued, but cannot restart
-- * by calling futex_lock_pi() directly. We could
-- * restart this syscall, but it would detect that
-- * the user space "val" changed and return
-- * -EWOULDBLOCK. Save the overhead of the restart
-- * and return -EWOULDBLOCK directly.
-- */
-- ret = -EWOULDBLOCK;
-- }
-- break;
-- default:
-- BUG();
-- }
--
--out:
-- if (to) {
-- hrtimer_cancel(&to->timer);
-- destroy_hrtimer_on_stack(&to->timer);
-- }
-- return ret;
--}
--
--/*
-- * Support for robust futexes: the kernel cleans up held futexes at
-- * thread exit time.
-- *
-- * Implementation: user-space maintains a per-thread list of locks it
-- * is holding. Upon do_exit(), the kernel carefully walks this list,
-- * and marks all locks that are owned by this thread with the
-- * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
-- * always manipulated with the lock held, so the list is private and
-- * per-thread. Userspace also maintains a per-thread 'list_op_pending'
-- * field, to allow the kernel to clean up if the thread dies after
-- * acquiring the lock, but just before it could have added itself to
-- * the list. There can only be one such pending lock.
-- */
--
--/**
-- * sys_set_robust_list() - Set the robust-futex list head of a task
-- * @head: pointer to the list-head
-- * @len: length of the list-head, as userspace expects
-- */
--SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
-- size_t, len)
--{
-- if (!futex_cmpxchg_enabled)
-- return -ENOSYS;
-- /*
-- * The kernel knows only one size for now:
-- */
-- if (unlikely(len != sizeof(*head)))
-- return -EINVAL;
--
-- current->robust_list = head;
--
-- return 0;
--}
--
--/**
-- * sys_get_robust_list() - Get the robust-futex list head of a task
-- * @pid: pid of the process [zero for current task]
-- * @head_ptr: pointer to a list-head pointer, the kernel fills it in
-- * @len_ptr: pointer to a length field, the kernel fills in the header size
-- */
--SYSCALL_DEFINE3(get_robust_list, int, pid,
-- struct robust_list_head __user * __user *, head_ptr,
-- size_t __user *, len_ptr)
--{
-- struct robust_list_head __user *head;
-- unsigned long ret;
-- struct task_struct *p;
--
-- if (!futex_cmpxchg_enabled)
-- return -ENOSYS;
--
-- rcu_read_lock();
--
-- ret = -ESRCH;
-- if (!pid)
-- p = current;
-- else {
-- p = find_task_by_vpid(pid);
-- if (!p)
-- goto err_unlock;
-- }
--
-- ret = -EPERM;
-- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
-- goto err_unlock;
--
-- head = p->robust_list;
-- rcu_read_unlock();
--
-- if (put_user(sizeof(*head), len_ptr))
-- return -EFAULT;
-- return put_user(head, head_ptr);
--
--err_unlock:
-- rcu_read_unlock();
--
-- return ret;
--}
--
--/* Constants for the pending_op argument of handle_futex_death */
--#define HANDLE_DEATH_PENDING true
--#define HANDLE_DEATH_LIST false
--
--/*
-- * Process a futex-list entry, check whether it's owned by the
-- * dying task, and do notification if so:
-- */
--static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
-- bool pi, bool pending_op)
--{
-- u32 uval, nval, mval;
-- int err;
--
-- /* Futex address must be 32bit aligned */
-- if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
-- return -1;
--
--retry:
-- if (get_user(uval, uaddr))
-- return -1;
--
-- /*
-- * Special case for regular (non PI) futexes. The unlock path in
-- * user space has two race scenarios:
-- *
-- * 1. The unlock path releases the user space futex value and
-- * before it can execute the futex() syscall to wake up
-- * waiters it is killed.
-- *
-- * 2. A woken up waiter is killed before it can acquire the
-- * futex in user space.
-- *
-- * In both cases the TID validation below prevents a wakeup of
-- * potential waiters which can cause these waiters to block
-- * forever.
-- *
-- * In both cases the following conditions are met:
-- *
-- * 1) task->robust_list->list_op_pending != NULL
-- * @pending_op == true
-- * 2) User space futex value == 0
-- * 3) Regular futex: @pi == false
-- *
-- * If these conditions are met, it is safe to attempt waking up a
-- * potential waiter without touching the user space futex value and
-- * trying to set the OWNER_DIED bit. The user space futex value is
-- * uncontended and the rest of the user space mutex state is
-- * consistent, so a woken waiter will just take over the
-- * uncontended futex. Setting the OWNER_DIED bit would create
-- * inconsistent state and malfunction of the user space owner died
-- * handling.
-- */
-- if (pending_op && !pi && !uval) {
-- futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
-- return 0;
-- }
--
-- if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
-- return 0;
--
-- /*
-- * Ok, this dying thread is truly holding a futex
-- * of interest. Set the OWNER_DIED bit atomically
-- * via cmpxchg, and if the value had FUTEX_WAITERS
-- * set, wake up a waiter (if any). (We have to do a
-- * futex_wake() even if OWNER_DIED is already set -
-- * to handle the rare but possible case of recursive
-- * thread-death.) The rest of the cleanup is done in
-- * userspace.
-- */
-- mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
--
-- /*
-- * We are not holding a lock here, but we want to have
-- * the pagefault_disable/enable() protection because
-- * we want to handle the fault gracefully. If the
-- * access fails we try to fault in the futex with R/W
-- * verification via get_user_pages. get_user() above
-- * does not guarantee R/W access. If that fails we
-- * give up and leave the futex locked.
-- */
-- if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) {
-- switch (err) {
-- case -EFAULT:
-- if (fault_in_user_writeable(uaddr))
-- return -1;
-- goto retry;
--
-- case -EAGAIN:
-- cond_resched();
-- goto retry;
--
-- default:
-- WARN_ON_ONCE(1);
-- return err;
-- }
-- }
--
-- if (nval != uval)
-- goto retry;
--
-- /*
-- * Wake robust non-PI futexes here. The wakeup of
-- * PI futexes happens in exit_pi_state():
-- */
-- if (!pi && (uval & FUTEX_WAITERS))
-- futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
--
-- return 0;
--}
--
--/*
-- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
-- */
--static inline int fetch_robust_entry(struct robust_list __user **entry,
-- struct robust_list __user * __user *head,
-- unsigned int *pi)
--{
-- unsigned long uentry;
--
-- if (get_user(uentry, (unsigned long __user *)head))
-- return -EFAULT;
--
-- *entry = (void __user *)(uentry & ~1UL);
-- *pi = uentry & 1;
--
-- return 0;
--}
--
--/*
-- * Walk curr->robust_list (very carefully, it's a userspace list!)
-- * and mark any locks found there dead, and notify any waiters.
-- *
-- * We silently return on any sign of list-walking problem.
-- */
--static void exit_robust_list(struct task_struct *curr)
--{
-- struct robust_list_head __user *head = curr->robust_list;
-- struct robust_list __user *entry, *next_entry, *pending;
-- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
-- unsigned int next_pi;
-- unsigned long futex_offset;
-- int rc;
--
-- if (!futex_cmpxchg_enabled)
-- return;
--
-- /*
-- * Fetch the list head (which was registered earlier, via
-- * sys_set_robust_list()):
-- */
-- if (fetch_robust_entry(&entry, &head->list.next, &pi))
-- return;
-- /*
-- * Fetch the relative futex offset:
-- */
-- if (get_user(futex_offset, &head->futex_offset))
-- return;
-- /*
-- * Fetch any possibly pending lock-add first, and handle it
-- * if it exists:
-- */
-- if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
-- return;
--
-- next_entry = NULL; /* avoid warning with gcc */
-- while (entry != &head->list) {
-- /*
-- * Fetch the next entry in the list before calling
-- * handle_futex_death:
-- */
-- rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
-- /*
-- * A pending lock might already be on the list, so
-- * don't process it twice:
-- */
-- if (entry != pending) {
-- if (handle_futex_death((void __user *)entry + futex_offset,
-- curr, pi, HANDLE_DEATH_LIST))
-- return;
-- }
-- if (rc)
-- return;
-- entry = next_entry;
-- pi = next_pi;
-- /*
-- * Avoid excessively long or circular lists:
-- */
-- if (!--limit)
-- break;
--
-- cond_resched();
-- }
--
-- if (pending) {
-- handle_futex_death((void __user *)pending + futex_offset,
-- curr, pip, HANDLE_DEATH_PENDING);
-- }
--}
--
--static void futex_cleanup(struct task_struct *tsk)
--{
-- if (unlikely(tsk->robust_list)) {
-- exit_robust_list(tsk);
-- tsk->robust_list = NULL;
-- }
--
--#ifdef CONFIG_COMPAT
-- if (unlikely(tsk->compat_robust_list)) {
-- compat_exit_robust_list(tsk);
-- tsk->compat_robust_list = NULL;
-- }
--#endif
--
-- if (unlikely(!list_empty(&tsk->pi_state_list)))
-- exit_pi_state_list(tsk);
--}
--
--/**
-- * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
-- * @tsk: task to set the state on
-- *
-- * Set the futex exit state of the task lockless. The futex waiter code
-- * observes that state when a task is exiting and loops until the task has
-- * actually finished the futex cleanup. The worst case for this is that the
-- * waiter runs through the wait loop until the state becomes visible.
-- *
-- * This is called from the recursive fault handling path in do_exit().
-- *
-- * This is best effort. Either the futex exit code has run already or
-- * not. If the OWNER_DIED bit has been set on the futex then the waiter can
-- * take it over. If not, the problem is pushed back to user space. If the
-- * futex exit code did not run yet, then an already queued waiter might
-- * block forever, but there is nothing which can be done about that.
-- */
--void futex_exit_recursive(struct task_struct *tsk)
--{
-- /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
-- if (tsk->futex_state == FUTEX_STATE_EXITING)
-- mutex_unlock(&tsk->futex_exit_mutex);
-- tsk->futex_state = FUTEX_STATE_DEAD;
--}
--
--static void futex_cleanup_begin(struct task_struct *tsk)
--{
-- /*
-- * Prevent various race issues against a concurrent incoming waiter
-- * including live locks by forcing the waiter to block on
-- * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
-- * attach_to_pi_owner().
-- */
-- mutex_lock(&tsk->futex_exit_mutex);
--
-- /*
-- * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
-- *
-- * This ensures that all subsequent checks of tsk->futex_state in
-- * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
-- * tsk->pi_lock held.
-- *
-- * It guarantees also that a pi_state which was queued right before
-- * the state change under tsk->pi_lock by a concurrent waiter must
-- * be observed in exit_pi_state_list().
-- */
-- raw_spin_lock_irq(&tsk->pi_lock);
-- tsk->futex_state = FUTEX_STATE_EXITING;
-- raw_spin_unlock_irq(&tsk->pi_lock);
--}
--
--static void futex_cleanup_end(struct task_struct *tsk, int state)
--{
-- /*
-- * Lockless store. The only side effect is that an observer might
-- * take another loop until it becomes visible.
-- */
-- tsk->futex_state = state;
-- /*
-- * Drop the exit protection. This unblocks waiters which observed
-- * FUTEX_STATE_EXITING to reevaluate the state.
-- */
-- mutex_unlock(&tsk->futex_exit_mutex);
--}
--
--void futex_exec_release(struct task_struct *tsk)
--{
-- /*
-- * The state handling is done for consistency, but in the case of
-- * exec() there is no way to prevent further damage as the PID stays
-- * the same. But for the unlikely and arguably buggy case that a
-- * futex is held on exec(), this provides at least as much state
-- * consistency protection which is possible.
-- */
-- futex_cleanup_begin(tsk);
-- futex_cleanup(tsk);
-- /*
-- * Reset the state to FUTEX_STATE_OK. The task is alive and about
-- * exec a new binary.
-- */
-- futex_cleanup_end(tsk, FUTEX_STATE_OK);
--}
--
--void futex_exit_release(struct task_struct *tsk)
--{
-- futex_cleanup_begin(tsk);
-- futex_cleanup(tsk);
-- futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
--}
--
--long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
-- u32 __user *uaddr2, u32 val2, u32 val3)
--{
-- int cmd = op & FUTEX_CMD_MASK;
-- unsigned int flags = 0;
--
-- if (!(op & FUTEX_PRIVATE_FLAG))
-- flags |= FLAGS_SHARED;
--
-- if (op & FUTEX_CLOCK_REALTIME) {
-- flags |= FLAGS_CLOCKRT;
-- if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
-- cmd != FUTEX_LOCK_PI2)
-- return -ENOSYS;
-- }
--
-- switch (cmd) {
-- case FUTEX_LOCK_PI:
-- case FUTEX_LOCK_PI2:
-- case FUTEX_UNLOCK_PI:
-- case FUTEX_TRYLOCK_PI:
-- case FUTEX_WAIT_REQUEUE_PI:
-- case FUTEX_CMP_REQUEUE_PI:
-- if (!futex_cmpxchg_enabled)
-- return -ENOSYS;
-- }
--
-- switch (cmd) {
-- case FUTEX_WAIT:
-- val3 = FUTEX_BITSET_MATCH_ANY;
-- fallthrough;
-- case FUTEX_WAIT_BITSET:
-- return futex_wait(uaddr, flags, val, timeout, val3);
-- case FUTEX_WAKE:
-- val3 = FUTEX_BITSET_MATCH_ANY;
-- fallthrough;
-- case FUTEX_WAKE_BITSET:
-- return futex_wake(uaddr, flags, val, val3);
-- case FUTEX_REQUEUE:
-- return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
-- case FUTEX_CMP_REQUEUE:
-- return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
-- case FUTEX_WAKE_OP:
-- return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
-- case FUTEX_LOCK_PI:
-- flags |= FLAGS_CLOCKRT;
-- fallthrough;
-- case FUTEX_LOCK_PI2:
-- return futex_lock_pi(uaddr, flags, timeout, 0);
-- case FUTEX_UNLOCK_PI:
-- return futex_unlock_pi(uaddr, flags);
-- case FUTEX_TRYLOCK_PI:
-- return futex_lock_pi(uaddr, flags, NULL, 1);
-- case FUTEX_WAIT_REQUEUE_PI:
-- val3 = FUTEX_BITSET_MATCH_ANY;
-- return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
-- uaddr2);
-- case FUTEX_CMP_REQUEUE_PI:
-- return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
-- }
-- return -ENOSYS;
--}
--
--static __always_inline bool futex_cmd_has_timeout(u32 cmd)
--{
-- switch (cmd) {
-- case FUTEX_WAIT:
-- case FUTEX_LOCK_PI:
-- case FUTEX_LOCK_PI2:
-- case FUTEX_WAIT_BITSET:
-- case FUTEX_WAIT_REQUEUE_PI:
-- return true;
-- }
-- return false;
--}
--
--static __always_inline int
--futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
--{
-- if (!timespec64_valid(ts))
-- return -EINVAL;
--
-- *t = timespec64_to_ktime(*ts);
-- if (cmd == FUTEX_WAIT)
-- *t = ktime_add_safe(ktime_get(), *t);
-- else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
-- *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
-- return 0;
--}
--
--SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
-- const struct __kernel_timespec __user *, utime,
-- u32 __user *, uaddr2, u32, val3)
--{
-- int ret, cmd = op & FUTEX_CMD_MASK;
-- ktime_t t, *tp = NULL;
-- struct timespec64 ts;
--
-- if (utime && futex_cmd_has_timeout(cmd)) {
-- if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
-- return -EFAULT;
-- if (get_timespec64(&ts, utime))
-- return -EFAULT;
-- ret = futex_init_timeout(cmd, op, &ts, &t);
-- if (ret)
-- return ret;
-- tp = &t;
-- }
--
-- return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
--}
--
--#ifdef CONFIG_COMPAT
--/*
-- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
-- */
--static inline int
--compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
-- compat_uptr_t __user *head, unsigned int *pi)
--{
-- if (get_user(*uentry, head))
-- return -EFAULT;
--
-- *entry = compat_ptr((*uentry) & ~1);
-- *pi = (unsigned int)(*uentry) & 1;
--
-- return 0;
--}
--
--static void __user *futex_uaddr(struct robust_list __user *entry,
-- compat_long_t futex_offset)
--{
-- compat_uptr_t base = ptr_to_compat(entry);
-- void __user *uaddr = compat_ptr(base + futex_offset);
--
-- return uaddr;
--}
--
--/*
-- * Walk curr->robust_list (very carefully, it's a userspace list!)
-- * and mark any locks found there dead, and notify any waiters.
-- *
-- * We silently return on any sign of list-walking problem.
-- */
--static void compat_exit_robust_list(struct task_struct *curr)
--{
-- struct compat_robust_list_head __user *head = curr->compat_robust_list;
-- struct robust_list __user *entry, *next_entry, *pending;
-- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
-- unsigned int next_pi;
-- compat_uptr_t uentry, next_uentry, upending;
-- compat_long_t futex_offset;
-- int rc;
--
-- if (!futex_cmpxchg_enabled)
-- return;
--
-- /*
-- * Fetch the list head (which was registered earlier, via
-- * sys_set_robust_list()):
-- */
-- if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
-- return;
-- /*
-- * Fetch the relative futex offset:
-- */
-- if (get_user(futex_offset, &head->futex_offset))
-- return;
-- /*
-- * Fetch any possibly pending lock-add first, and handle it
-- * if it exists:
-- */
-- if (compat_fetch_robust_entry(&upending, &pending,
-- &head->list_op_pending, &pip))
-- return;
--
-- next_entry = NULL; /* avoid warning with gcc */
-- while (entry != (struct robust_list __user *) &head->list) {
-- /*
-- * Fetch the next entry in the list before calling
-- * handle_futex_death:
-- */
-- rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
-- (compat_uptr_t __user *)&entry->next, &next_pi);
-- /*
-- * A pending lock might already be on the list, so
-- * dont process it twice:
-- */
-- if (entry != pending) {
-- void __user *uaddr = futex_uaddr(entry, futex_offset);
--
-- if (handle_futex_death(uaddr, curr, pi,
-- HANDLE_DEATH_LIST))
-- return;
-- }
-- if (rc)
-- return;
-- uentry = next_uentry;
-- entry = next_entry;
-- pi = next_pi;
-- /*
-- * Avoid excessively long or circular lists:
-- */
-- if (!--limit)
-- break;
--
-- cond_resched();
-- }
-- if (pending) {
-- void __user *uaddr = futex_uaddr(pending, futex_offset);
--
-- handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
-- }
--}
--
--COMPAT_SYSCALL_DEFINE2(set_robust_list,
-- struct compat_robust_list_head __user *, head,
-- compat_size_t, len)
--{
-- if (!futex_cmpxchg_enabled)
-- return -ENOSYS;
--
-- if (unlikely(len != sizeof(*head)))
-- return -EINVAL;
--
-- current->compat_robust_list = head;
--
-- return 0;
--}
--
--COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
-- compat_uptr_t __user *, head_ptr,
-- compat_size_t __user *, len_ptr)
--{
-- struct compat_robust_list_head __user *head;
-- unsigned long ret;
-- struct task_struct *p;
--
-- if (!futex_cmpxchg_enabled)
-- return -ENOSYS;
--
-- rcu_read_lock();
--
-- ret = -ESRCH;
-- if (!pid)
-- p = current;
-- else {
-- p = find_task_by_vpid(pid);
-- if (!p)
-- goto err_unlock;
-- }
--
-- ret = -EPERM;
-- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
-- goto err_unlock;
--
-- head = p->compat_robust_list;
-- rcu_read_unlock();
--
-- if (put_user(sizeof(*head), len_ptr))
-- return -EFAULT;
-- return put_user(ptr_to_compat(head), head_ptr);
--
--err_unlock:
-- rcu_read_unlock();
--
-- return ret;
--}
--#endif /* CONFIG_COMPAT */
--
--#ifdef CONFIG_COMPAT_32BIT_TIME
--SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
-- const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
-- u32, val3)
--{
-- int ret, cmd = op & FUTEX_CMD_MASK;
-- ktime_t t, *tp = NULL;
-- struct timespec64 ts;
--
-- if (utime && futex_cmd_has_timeout(cmd)) {
-- if (get_old_timespec32(&ts, utime))
-- return -EFAULT;
-- ret = futex_init_timeout(cmd, op, &ts, &t);
-- if (ret)
-- return ret;
-- tp = &t;
-- }
--
-- return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
--}
--#endif /* CONFIG_COMPAT_32BIT_TIME */
--
--static void __init futex_detect_cmpxchg(void)
--{
--#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
-- u32 curval;
--
-- /*
-- * This will fail and we want it. Some arch implementations do
-- * runtime detection of the futex_atomic_cmpxchg_inatomic()
-- * functionality. We want to know that before we call in any
-- * of the complex code paths. Also we want to prevent
-- * registration of robust lists in that case. NULL is
-- * guaranteed to fault and we get -EFAULT on functional
-- * implementation, the non-functional ones will return
-- * -ENOSYS.
-- */
-- if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
-- futex_cmpxchg_enabled = 1;
--#endif
--}
--
--static int __init futex_init(void)
--{
-- unsigned int futex_shift;
-- unsigned long i;
--
--#if CONFIG_BASE_SMALL
-- futex_hashsize = 16;
--#else
-- futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
--#endif
--
-- futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
-- futex_hashsize, 0,
-- futex_hashsize < 256 ? HASH_SMALL : 0,
-- &futex_shift, NULL,
-- futex_hashsize, futex_hashsize);
-- futex_hashsize = 1UL << futex_shift;
--
-- futex_detect_cmpxchg();
--
-- for (i = 0; i < futex_hashsize; i++) {
-- atomic_set(&futex_queues[i].waiters, 0);
-- plist_head_init(&futex_queues[i].chain);
-- spin_lock_init(&futex_queues[i].lock);
-- }
--
-- return 0;
--}
--core_initcall(futex_init);
-diff --git a/kernel/futex/Makefile b/kernel/futex/Makefile
-new file mode 100644
-index 000000000..b77188d1f
---- /dev/null
-+++ b/kernel/futex/Makefile
-@@ -0,0 +1,3 @@
-+# SPDX-License-Identifier: GPL-2.0
-+
-+obj-y += core.o syscalls.o pi.o requeue.o waitwake.o
-diff --git a/kernel/futex/core.c b/kernel/futex/core.c
-new file mode 100644
-index 000000000..25d8a88b3
---- /dev/null
-+++ b/kernel/futex/core.c
-@@ -0,0 +1,1176 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Fast Userspace Mutexes (which I call "Futexes!").
-+ * (C) Rusty Russell, IBM 2002
-+ *
-+ * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
-+ * (C) Copyright 2003 Red Hat Inc, All Rights Reserved
-+ *
-+ * Removed page pinning, fix privately mapped COW pages and other cleanups
-+ * (C) Copyright 2003, 2004 Jamie Lokier
-+ *
-+ * Robust futex support started by Ingo Molnar
-+ * (C) Copyright 2006 Red Hat Inc, All Rights Reserved
-+ * Thanks to Thomas Gleixner for suggestions, analysis and fixes.
-+ *
-+ * PI-futex support started by Ingo Molnar and Thomas Gleixner
-+ * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
-+ * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
-+ *
-+ * PRIVATE futexes by Eric Dumazet
-+ * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
-+ *
-+ * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
-+ * Copyright (C) IBM Corporation, 2009
-+ * Thanks to Thomas Gleixner for conceptual design and careful reviews.
-+ *
-+ * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
-+ * enough at me, Linus for the original (flawed) idea, Matthew
-+ * Kirkwood for proof-of-concept implementation.
-+ *
-+ * "The futexes are also cursed."
-+ * "But they come in a choice of three flavours!"
-+ */
-+#include <linux/compat.h>
-+#include <linux/jhash.h>
-+#include <linux/pagemap.h>
-+#include <linux/memblock.h>
-+#include <linux/fault-inject.h>
-+#include <linux/slab.h>
-+
-+#include "futex.h"
-+#include "../locking/rtmutex_common.h"
-+
-+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
-+int __read_mostly futex_cmpxchg_enabled;
-+#endif
-+
-+
-+/*
-+ * The base of the bucket array and its size are always used together
-+ * (after initialization only in futex_hash()), so ensure that they
-+ * reside in the same cacheline.
-+ */
-+static struct {
-+ struct futex_hash_bucket *queues;
-+ unsigned long hashsize;
-+} __futex_data __read_mostly __aligned(2*sizeof(long));
-+#define futex_queues (__futex_data.queues)
-+#define futex_hashsize (__futex_data.hashsize)
-+
-+
-+/*
-+ * Fault injections for futexes.
-+ */
-+#ifdef CONFIG_FAIL_FUTEX
-+
-+static struct {
-+ struct fault_attr attr;
-+
-+ bool ignore_private;
-+} fail_futex = {
-+ .attr = FAULT_ATTR_INITIALIZER,
-+ .ignore_private = false,
-+};
-+
-+static int __init setup_fail_futex(char *str)
-+{
-+ return setup_fault_attr(&fail_futex.attr, str);
-+}
-+__setup("fail_futex=", setup_fail_futex);
-+
-+bool should_fail_futex(bool fshared)
-+{
-+ if (fail_futex.ignore_private && !fshared)
-+ return false;
-+
-+ return should_fail(&fail_futex.attr, 1);
-+}
-+
-+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-+
-+static int __init fail_futex_debugfs(void)
-+{
-+ umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
-+ struct dentry *dir;
-+
-+ dir = fault_create_debugfs_attr("fail_futex", NULL,
-+ &fail_futex.attr);
-+ if (IS_ERR(dir))
-+ return PTR_ERR(dir);
-+
-+ debugfs_create_bool("ignore-private", mode, dir,
-+ &fail_futex.ignore_private);
-+ return 0;
-+}
-+
-+late_initcall(fail_futex_debugfs);
-+
-+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
-+
-+#endif /* CONFIG_FAIL_FUTEX */
-+
-+/**
-+ * futex_hash - Return the hash bucket in the global hash
-+ * @key: Pointer to the futex key for which the hash is calculated
-+ *
-+ * We hash on the keys returned from get_futex_key (see below) and return the
-+ * corresponding hash bucket in the global hash.
-+ */
-+struct futex_hash_bucket *futex_hash(union futex_key *key)
-+{
-+ u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
-+ key->both.offset);
-+
-+ return &futex_queues[hash & (futex_hashsize - 1)];
-+}
-+
-+
-+/**
-+ * futex_setup_timer - set up the sleeping hrtimer.
-+ * @time: ptr to the given timeout value
-+ * @timeout: the hrtimer_sleeper structure to be set up
-+ * @flags: futex flags
-+ * @range_ns: optional range in ns
-+ *
-+ * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
-+ * value given
-+ */
-+struct hrtimer_sleeper *
-+futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
-+ int flags, u64 range_ns)
-+{
-+ if (!time)
-+ return NULL;
-+
-+ hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
-+ CLOCK_REALTIME : CLOCK_MONOTONIC,
-+ HRTIMER_MODE_ABS);
-+ /*
-+ * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
-+ * effectively the same as calling hrtimer_set_expires().
-+ */
-+ hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
-+
-+ return timeout;
-+}
-+
-+/*
-+ * Generate a machine wide unique identifier for this inode.
-+ *
-+ * This relies on u64 not wrapping in the life-time of the machine; which with
-+ * 1ns resolution means almost 585 years.
-+ *
-+ * This further relies on the fact that a well formed program will not unmap
-+ * the file while it has a (shared) futex waiting on it. This mapping will have
-+ * a file reference which pins the mount and inode.
-+ *
-+ * If for some reason an inode gets evicted and read back in again, it will get
-+ * a new sequence number and will _NOT_ match, even though it is the exact same
-+ * file.
-+ *
-+ * It is important that futex_match() will never have a false-positive, esp.
-+ * for PI futexes that can mess up the state. The above argues that false-negatives
-+ * are only possible for malformed programs.
-+ */
-+static u64 get_inode_sequence_number(struct inode *inode)
-+{
-+ static atomic64_t i_seq;
-+ u64 old;
-+
-+ /* Does the inode already have a sequence number? */
-+ old = atomic64_read(&inode->i_sequence);
-+ if (likely(old))
-+ return old;
-+
-+ for (;;) {
-+ u64 new = atomic64_add_return(1, &i_seq);
-+ if (WARN_ON_ONCE(!new))
-+ continue;
-+
-+ old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
-+ if (old)
-+ return old;
-+ return new;
-+ }
-+}
-+
-+/**
-+ * get_futex_key() - Get parameters which are the keys for a futex
-+ * @uaddr: virtual address of the futex
-+ * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
-+ * @key: address where result is stored.
-+ * @rw: mapping needs to be read/write (values: FUTEX_READ,
-+ * FUTEX_WRITE)
-+ *
-+ * Return: a negative error code or 0
-+ *
-+ * The key words are stored in @key on success.
-+ *
-+ * For shared mappings (when @fshared), the key is:
-+ *
-+ * ( inode->i_sequence, page->index, offset_within_page )
-+ *
-+ * [ also see get_inode_sequence_number() ]
-+ *
-+ * For private mappings (or when !@fshared), the key is:
-+ *
-+ * ( current->mm, address, 0 )
-+ *
-+ * This allows (cross process, where applicable) identification of the futex
-+ * without keeping the page pinned for the duration of the FUTEX_WAIT.
-+ *
-+ * lock_page() might sleep, the caller should not hold a spinlock.
-+ */
-+int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
-+ enum futex_access rw)
-+{
-+ unsigned long address = (unsigned long)uaddr;
-+ struct mm_struct *mm = current->mm;
-+ struct page *page, *tail;
-+ struct address_space *mapping;
-+ int err, ro = 0;
-+
-+ /*
-+ * The futex address must be "naturally" aligned.
-+ */
-+ key->both.offset = address % PAGE_SIZE;
-+ if (unlikely((address % sizeof(u32)) != 0))
-+ return -EINVAL;
-+ address -= key->both.offset;
-+
-+ if (unlikely(!access_ok(uaddr, sizeof(u32))))
-+ return -EFAULT;
-+
-+ if (unlikely(should_fail_futex(fshared)))
-+ return -EFAULT;
-+
-+ /*
-+ * PROCESS_PRIVATE futexes are fast.
-+ * As the mm cannot disappear under us and the 'key' only needs
-+ * virtual address, we dont even have to find the underlying vma.
-+ * Note : We do have to check 'uaddr' is a valid user address,
-+ * but access_ok() should be faster than find_vma()
-+ */
-+ if (!fshared) {
-+ key->private.mm = mm;
-+ key->private.address = address;
-+ return 0;
-+ }
-+
-+again:
-+ /* Ignore any VERIFY_READ mapping (futex common case) */
-+ if (unlikely(should_fail_futex(true)))
-+ return -EFAULT;
-+
-+ err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
-+ /*
-+ * If write access is not required (eg. FUTEX_WAIT), try
-+ * and get read-only access.
-+ */
-+ if (err == -EFAULT && rw == FUTEX_READ) {
-+ err = get_user_pages_fast(address, 1, 0, &page);
-+ ro = 1;
-+ }
-+ if (err < 0)
-+ return err;
-+ else
-+ err = 0;
-+
-+ /*
-+ * The treatment of mapping from this point on is critical. The page
-+ * lock protects many things but in this context the page lock
-+ * stabilizes mapping, prevents inode freeing in the shared
-+ * file-backed region case and guards against movement to swap cache.
-+ *
-+ * Strictly speaking the page lock is not needed in all cases being
-+ * considered here and page lock forces unnecessarily serialization
-+ * From this point on, mapping will be re-verified if necessary and
-+ * page lock will be acquired only if it is unavoidable
-+ *
-+ * Mapping checks require the head page for any compound page so the
-+ * head page and mapping is looked up now. For anonymous pages, it
-+ * does not matter if the page splits in the future as the key is
-+ * based on the address. For filesystem-backed pages, the tail is
-+ * required as the index of the page determines the key. For
-+ * base pages, there is no tail page and tail == page.
-+ */
-+ tail = page;
-+ page = compound_head(page);
-+ mapping = READ_ONCE(page->mapping);
-+
-+ /*
-+ * If page->mapping is NULL, then it cannot be a PageAnon
-+ * page; but it might be the ZERO_PAGE or in the gate area or
-+ * in a special mapping (all cases which we are happy to fail);
-+ * or it may have been a good file page when get_user_pages_fast
-+ * found it, but truncated or holepunched or subjected to
-+ * invalidate_complete_page2 before we got the page lock (also
-+ * cases which we are happy to fail). And we hold a reference,
-+ * so refcount care in invalidate_complete_page's remove_mapping
-+ * prevents drop_caches from setting mapping to NULL beneath us.
-+ *
-+ * The case we do have to guard against is when memory pressure made
-+ * shmem_writepage move it from filecache to swapcache beneath us:
-+ * an unlikely race, but we do need to retry for page->mapping.
-+ */
-+ if (unlikely(!mapping)) {
-+ int shmem_swizzled;
-+
-+ /*
-+ * Page lock is required to identify which special case above
-+ * applies. If this is really a shmem page then the page lock
-+ * will prevent unexpected transitions.
-+ */
-+ lock_page(page);
-+ shmem_swizzled = PageSwapCache(page) || page->mapping;
-+ unlock_page(page);
-+ put_page(page);
-+
-+ if (shmem_swizzled)
-+ goto again;
-+
-+ return -EFAULT;
-+ }
-+
-+ /*
-+ * Private mappings are handled in a simple way.
-+ *
-+ * If the futex key is stored on an anonymous page, then the associated
-+ * object is the mm which is implicitly pinned by the calling process.
-+ *
-+ * NOTE: When userspace waits on a MAP_SHARED mapping, even if
-+ * it's a read-only handle, it's expected that futexes attach to
-+ * the object not the particular process.
-+ */
-+ if (PageAnon(page)) {
-+ /*
-+ * A RO anonymous page will never change and thus doesn't make
-+ * sense for futex operations.
-+ */
-+ if (unlikely(should_fail_futex(true)) || ro) {
-+ err = -EFAULT;
-+ goto out;
-+ }
-+
-+ key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
-+ key->private.mm = mm;
-+ key->private.address = address;
-+
-+ } else {
-+ struct inode *inode;
-+
-+ /*
-+ * The associated futex object in this case is the inode and
-+ * the page->mapping must be traversed. Ordinarily this should
-+ * be stabilised under page lock but it's not strictly
-+ * necessary in this case as we just want to pin the inode, not
-+ * update the radix tree or anything like that.
-+ *
-+ * The RCU read lock is taken as the inode is finally freed
-+ * under RCU. If the mapping still matches expectations then the
-+ * mapping->host can be safely accessed as being a valid inode.
-+ */
-+ rcu_read_lock();
-+
-+ if (READ_ONCE(page->mapping) != mapping) {
-+ rcu_read_unlock();
-+ put_page(page);
-+
-+ goto again;
-+ }
-+
-+ inode = READ_ONCE(mapping->host);
-+ if (!inode) {
-+ rcu_read_unlock();
-+ put_page(page);
-+
-+ goto again;
-+ }
-+
-+ key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-+ key->shared.i_seq = get_inode_sequence_number(inode);
-+ key->shared.pgoff = page_to_pgoff(tail);
-+ rcu_read_unlock();
-+ }
-+
-+out:
-+ put_page(page);
-+ return err;
-+}
-+
-+/**
-+ * fault_in_user_writeable() - Fault in user address and verify RW access
-+ * @uaddr: pointer to faulting user space address
-+ *
-+ * Slow path to fixup the fault we just took in the atomic write
-+ * access to @uaddr.
-+ *
-+ * We have no generic implementation of a non-destructive write to the
-+ * user address. We know that we faulted in the atomic pagefault
-+ * disabled section so we can as well avoid the #PF overhead by
-+ * calling get_user_pages() right away.
-+ */
-+int fault_in_user_writeable(u32 __user *uaddr)
-+{
-+ struct mm_struct *mm = current->mm;
-+ int ret;
-+
-+ mmap_read_lock(mm);
-+ ret = fixup_user_fault(mm, (unsigned long)uaddr,
-+ FAULT_FLAG_WRITE, NULL);
-+ mmap_read_unlock(mm);
-+
-+ return ret < 0 ? ret : 0;
-+}
-+
-+/**
-+ * futex_top_waiter() - Return the highest priority waiter on a futex
-+ * @hb: the hash bucket the futex_q's reside in
-+ * @key: the futex key (to distinguish it from other futex futex_q's)
-+ *
-+ * Must be called with the hb lock held.
-+ */
-+struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key)
-+{
-+ struct futex_q *this;
-+
-+ plist_for_each_entry(this, &hb->chain, list) {
-+ if (futex_match(&this->key, key))
-+ return this;
-+ }
-+ return NULL;
-+}
-+
-+int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
-+{
-+ int ret;
-+
-+ pagefault_disable();
-+ ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
-+ pagefault_enable();
-+
-+ return ret;
-+}
-+
-+int futex_get_value_locked(u32 *dest, u32 __user *from)
-+{
-+ int ret;
-+
-+ pagefault_disable();
-+ ret = __get_user(*dest, from);
-+ pagefault_enable();
-+
-+ return ret ? -EFAULT : 0;
-+}
-+
-+/**
-+ * wait_for_owner_exiting - Block until the owner has exited
-+ * @ret: owner's current futex lock status
-+ * @exiting: Pointer to the exiting task
-+ *
-+ * Caller must hold a refcount on @exiting.
-+ */
-+void wait_for_owner_exiting(int ret, struct task_struct *exiting)
-+{
-+ if (ret != -EBUSY) {
-+ WARN_ON_ONCE(exiting);
-+ return;
-+ }
-+
-+ if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
-+ return;
-+
-+ mutex_lock(&exiting->futex_exit_mutex);
-+ /*
-+ * No point in doing state checking here. If the waiter got here
-+ * while the task was in exec()->exec_futex_release() then it can
-+ * have any FUTEX_STATE_* value when the waiter has acquired the
-+ * mutex. OK, if running, EXITING or DEAD if it reached exit()
-+ * already. Highly unlikely and not a problem. Just one more round
-+ * through the futex maze.
-+ */
-+ mutex_unlock(&exiting->futex_exit_mutex);
-+
-+ put_task_struct(exiting);
-+}
-+
-+/**
-+ * __futex_unqueue() - Remove the futex_q from its futex_hash_bucket
-+ * @q: The futex_q to unqueue
-+ *
-+ * The q->lock_ptr must not be NULL and must be held by the caller.
-+ */
-+void __futex_unqueue(struct futex_q *q)
-+{
-+ struct futex_hash_bucket *hb;
-+
-+ if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
-+ return;
-+ lockdep_assert_held(q->lock_ptr);
-+
-+ hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
-+ plist_del(&q->list, &hb->chain);
-+ futex_hb_waiters_dec(hb);
-+}
-+
-+/* The key must be already stored in q->key. */
-+struct futex_hash_bucket *futex_q_lock(struct futex_q *q)
-+ __acquires(&hb->lock)
-+{
-+ struct futex_hash_bucket *hb;
-+
-+ hb = futex_hash(&q->key);
-+
-+ /*
-+ * Increment the counter before taking the lock so that
-+ * a potential waker won't miss a to-be-slept task that is
-+ * waiting for the spinlock. This is safe as all futex_q_lock()
-+ * users end up calling futex_queue(). Similarly, for housekeeping,
-+ * decrement the counter at futex_q_unlock() when some error has
-+ * occurred and we don't end up adding the task to the list.
-+ */
-+ futex_hb_waiters_inc(hb); /* implies smp_mb(); (A) */
-+
-+ q->lock_ptr = &hb->lock;
-+
-+ spin_lock(&hb->lock);
-+ return hb;
-+}
-+
-+void futex_q_unlock(struct futex_hash_bucket *hb)
-+ __releases(&hb->lock)
-+{
-+ spin_unlock(&hb->lock);
-+ futex_hb_waiters_dec(hb);
-+}
-+
-+void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
-+{
-+ int prio;
-+
-+ /*
-+ * The priority used to register this element is
-+ * - either the real thread-priority for the real-time threads
-+ * (i.e. threads with a priority lower than MAX_RT_PRIO)
-+ * - or MAX_RT_PRIO for non-RT threads.
-+ * Thus, all RT-threads are woken first in priority order, and
-+ * the others are woken last, in FIFO order.
-+ */
-+ prio = min(current->normal_prio, MAX_RT_PRIO);
-+
-+ plist_node_init(&q->list, prio);
-+ plist_add(&q->list, &hb->chain);
-+ q->task = current;
-+}
-+
-+/**
-+ * futex_unqueue() - Remove the futex_q from its futex_hash_bucket
-+ * @q: The futex_q to unqueue
-+ *
-+ * The q->lock_ptr must not be held by the caller. A call to futex_unqueue() must
-+ * be paired with exactly one earlier call to futex_queue().
-+ *
-+ * Return:
-+ * - 1 - if the futex_q was still queued (and we removed unqueued it);
-+ * - 0 - if the futex_q was already removed by the waking thread
-+ */
-+int futex_unqueue(struct futex_q *q)
-+{
-+ spinlock_t *lock_ptr;
-+ int ret = 0;
-+
-+ /* In the common case we don't take the spinlock, which is nice. */
-+retry:
-+ /*
-+ * q->lock_ptr can change between this read and the following spin_lock.
-+ * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
-+ * optimizing lock_ptr out of the logic below.
-+ */
-+ lock_ptr = READ_ONCE(q->lock_ptr);
-+ if (lock_ptr != NULL) {
-+ spin_lock(lock_ptr);
-+ /*
-+ * q->lock_ptr can change between reading it and
-+ * spin_lock(), causing us to take the wrong lock. This
-+ * corrects the race condition.
-+ *
-+ * Reasoning goes like this: if we have the wrong lock,
-+ * q->lock_ptr must have changed (maybe several times)
-+ * between reading it and the spin_lock(). It can
-+ * change again after the spin_lock() but only if it was
-+ * already changed before the spin_lock(). It cannot,
-+ * however, change back to the original value. Therefore
-+ * we can detect whether we acquired the correct lock.
-+ */
-+ if (unlikely(lock_ptr != q->lock_ptr)) {
-+ spin_unlock(lock_ptr);
-+ goto retry;
-+ }
-+ __futex_unqueue(q);
-+
-+ BUG_ON(q->pi_state);
-+
-+ spin_unlock(lock_ptr);
-+ ret = 1;
-+ }
-+
-+ return ret;
-+}
-+
-+/*
-+ * PI futexes can not be requeued and must remove themselves from the
-+ * hash bucket. The hash bucket lock (i.e. lock_ptr) is held.
-+ */
-+void futex_unqueue_pi(struct futex_q *q)
-+{
-+ __futex_unqueue(q);
-+
-+ BUG_ON(!q->pi_state);
-+ put_pi_state(q->pi_state);
-+ q->pi_state = NULL;
-+}
-+
-+/* Constants for the pending_op argument of handle_futex_death */
-+#define HANDLE_DEATH_PENDING true
-+#define HANDLE_DEATH_LIST false
-+
-+/*
-+ * Process a futex-list entry, check whether it's owned by the
-+ * dying task, and do notification if so:
-+ */
-+static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
-+ bool pi, bool pending_op)
-+{
-+ u32 uval, nval, mval;
-+ int err;
-+
-+ /* Futex address must be 32bit aligned */
-+ if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
-+ return -1;
-+
-+retry:
-+ if (get_user(uval, uaddr))
-+ return -1;
-+
-+ /*
-+ * Special case for regular (non PI) futexes. The unlock path in
-+ * user space has two race scenarios:
-+ *
-+ * 1. The unlock path releases the user space futex value and
-+ * before it can execute the futex() syscall to wake up
-+ * waiters it is killed.
-+ *
-+ * 2. A woken up waiter is killed before it can acquire the
-+ * futex in user space.
-+ *
-+ * In both cases the TID validation below prevents a wakeup of
-+ * potential waiters which can cause these waiters to block
-+ * forever.
-+ *
-+ * In both cases the following conditions are met:
-+ *
-+ * 1) task->robust_list->list_op_pending != NULL
-+ * @pending_op == true
-+ * 2) User space futex value == 0
-+ * 3) Regular futex: @pi == false
-+ *
-+ * If these conditions are met, it is safe to attempt waking up a
-+ * potential waiter without touching the user space futex value and
-+ * trying to set the OWNER_DIED bit. The user space futex value is
-+ * uncontended and the rest of the user space mutex state is
-+ * consistent, so a woken waiter will just take over the
-+ * uncontended futex. Setting the OWNER_DIED bit would create
-+ * inconsistent state and malfunction of the user space owner died
-+ * handling.
-+ */
-+ if (pending_op && !pi && !uval) {
-+ futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
-+ return 0;
-+ }
-+
-+ if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
-+ return 0;
-+
-+ /*
-+ * Ok, this dying thread is truly holding a futex
-+ * of interest. Set the OWNER_DIED bit atomically
-+ * via cmpxchg, and if the value had FUTEX_WAITERS
-+ * set, wake up a waiter (if any). (We have to do a
-+ * futex_wake() even if OWNER_DIED is already set -
-+ * to handle the rare but possible case of recursive
-+ * thread-death.) The rest of the cleanup is done in
-+ * userspace.
-+ */
-+ mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
-+
-+ /*
-+ * We are not holding a lock here, but we want to have
-+ * the pagefault_disable/enable() protection because
-+ * we want to handle the fault gracefully. If the
-+ * access fails we try to fault in the futex with R/W
-+ * verification via get_user_pages. get_user() above
-+ * does not guarantee R/W access. If that fails we
-+ * give up and leave the futex locked.
-+ */
-+ if ((err = futex_cmpxchg_value_locked(&nval, uaddr, uval, mval))) {
-+ switch (err) {
-+ case -EFAULT:
-+ if (fault_in_user_writeable(uaddr))
-+ return -1;
-+ goto retry;
-+
-+ case -EAGAIN:
-+ cond_resched();
-+ goto retry;
-+
-+ default:
-+ WARN_ON_ONCE(1);
-+ return err;
-+ }
-+ }
-+
-+ if (nval != uval)
-+ goto retry;
-+
-+ /*
-+ * Wake robust non-PI futexes here. The wakeup of
-+ * PI futexes happens in exit_pi_state():
-+ */
-+ if (!pi && (uval & FUTEX_WAITERS))
-+ futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
-+
-+ return 0;
-+}
-+
-+/*
-+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
-+ */
-+static inline int fetch_robust_entry(struct robust_list __user **entry,
-+ struct robust_list __user * __user *head,
-+ unsigned int *pi)
-+{
-+ unsigned long uentry;
-+
-+ if (get_user(uentry, (unsigned long __user *)head))
-+ return -EFAULT;
-+
-+ *entry = (void __user *)(uentry & ~1UL);
-+ *pi = uentry & 1;
-+
-+ return 0;
-+}
-+
-+/*
-+ * Walk curr->robust_list (very carefully, it's a userspace list!)
-+ * and mark any locks found there dead, and notify any waiters.
-+ *
-+ * We silently return on any sign of list-walking problem.
-+ */
-+static void exit_robust_list(struct task_struct *curr)
-+{
-+ struct robust_list_head __user *head = curr->robust_list;
-+ struct robust_list __user *entry, *next_entry, *pending;
-+ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
-+ unsigned int next_pi;
-+ unsigned long futex_offset;
-+ int rc;
-+
-+ if (!futex_cmpxchg_enabled)
-+ return;
-+
-+ /*
-+ * Fetch the list head (which was registered earlier, via
-+ * sys_set_robust_list()):
-+ */
-+ if (fetch_robust_entry(&entry, &head->list.next, &pi))
-+ return;
-+ /*
-+ * Fetch the relative futex offset:
-+ */
-+ if (get_user(futex_offset, &head->futex_offset))
-+ return;
-+ /*
-+ * Fetch any possibly pending lock-add first, and handle it
-+ * if it exists:
-+ */
-+ if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
-+ return;
-+
-+ next_entry = NULL; /* avoid warning with gcc */
-+ while (entry != &head->list) {
-+ /*
-+ * Fetch the next entry in the list before calling
-+ * handle_futex_death:
-+ */
-+ rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
-+ /*
-+ * A pending lock might already be on the list, so
-+ * don't process it twice:
-+ */
-+ if (entry != pending) {
-+ if (handle_futex_death((void __user *)entry + futex_offset,
-+ curr, pi, HANDLE_DEATH_LIST))
-+ return;
-+ }
-+ if (rc)
-+ return;
-+ entry = next_entry;
-+ pi = next_pi;
-+ /*
-+ * Avoid excessively long or circular lists:
-+ */
-+ if (!--limit)
-+ break;
-+
-+ cond_resched();
-+ }
-+
-+ if (pending) {
-+ handle_futex_death((void __user *)pending + futex_offset,
-+ curr, pip, HANDLE_DEATH_PENDING);
-+ }
-+}
-+
-+#ifdef CONFIG_COMPAT
-+static void __user *futex_uaddr(struct robust_list __user *entry,
-+ compat_long_t futex_offset)
-+{
-+ compat_uptr_t base = ptr_to_compat(entry);
-+ void __user *uaddr = compat_ptr(base + futex_offset);
-+
-+ return uaddr;
-+}
-+
-+/*
-+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
-+ */
-+static inline int
-+compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
-+ compat_uptr_t __user *head, unsigned int *pi)
-+{
-+ if (get_user(*uentry, head))
-+ return -EFAULT;
-+
-+ *entry = compat_ptr((*uentry) & ~1);
-+ *pi = (unsigned int)(*uentry) & 1;
-+
-+ return 0;
-+}
-+
-+/*
-+ * Walk curr->robust_list (very carefully, it's a userspace list!)
-+ * and mark any locks found there dead, and notify any waiters.
-+ *
-+ * We silently return on any sign of list-walking problem.
-+ */
-+static void compat_exit_robust_list(struct task_struct *curr)
-+{
-+ struct compat_robust_list_head __user *head = curr->compat_robust_list;
-+ struct robust_list __user *entry, *next_entry, *pending;
-+ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
-+ unsigned int next_pi;
-+ compat_uptr_t uentry, next_uentry, upending;
-+ compat_long_t futex_offset;
-+ int rc;
-+
-+ if (!futex_cmpxchg_enabled)
-+ return;
-+
-+ /*
-+ * Fetch the list head (which was registered earlier, via
-+ * sys_set_robust_list()):
-+ */
-+ if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
-+ return;
-+ /*
-+ * Fetch the relative futex offset:
-+ */
-+ if (get_user(futex_offset, &head->futex_offset))
-+ return;
-+ /*
-+ * Fetch any possibly pending lock-add first, and handle it
-+ * if it exists:
-+ */
-+ if (compat_fetch_robust_entry(&upending, &pending,
-+ &head->list_op_pending, &pip))
-+ return;
-+
-+ next_entry = NULL; /* avoid warning with gcc */
-+ while (entry != (struct robust_list __user *) &head->list) {
-+ /*
-+ * Fetch the next entry in the list before calling
-+ * handle_futex_death:
-+ */
-+ rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
-+ (compat_uptr_t __user *)&entry->next, &next_pi);
-+ /*
-+ * A pending lock might already be on the list, so
-+ * dont process it twice:
-+ */
-+ if (entry != pending) {
-+ void __user *uaddr = futex_uaddr(entry, futex_offset);
-+
-+ if (handle_futex_death(uaddr, curr, pi,
-+ HANDLE_DEATH_LIST))
-+ return;
-+ }
-+ if (rc)
-+ return;
-+ uentry = next_uentry;
-+ entry = next_entry;
-+ pi = next_pi;
-+ /*
-+ * Avoid excessively long or circular lists:
-+ */
-+ if (!--limit)
-+ break;
-+
-+ cond_resched();
-+ }
-+ if (pending) {
-+ void __user *uaddr = futex_uaddr(pending, futex_offset);
-+
-+ handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
-+ }
-+}
-+#endif
-+
-+#ifdef CONFIG_FUTEX_PI
-+
-+/*
-+ * This task is holding PI mutexes at exit time => bad.
-+ * Kernel cleans up PI-state, but userspace is likely hosed.
-+ * (Robust-futex cleanup is separate and might save the day for userspace.)
-+ */
-+static void exit_pi_state_list(struct task_struct *curr)
-+{
-+ struct list_head *next, *head = &curr->pi_state_list;
-+ struct futex_pi_state *pi_state;
-+ struct futex_hash_bucket *hb;
-+ union futex_key key = FUTEX_KEY_INIT;
-+
-+ if (!futex_cmpxchg_enabled)
-+ return;
-+ /*
-+ * We are a ZOMBIE and nobody can enqueue itself on
-+ * pi_state_list anymore, but we have to be careful
-+ * versus waiters unqueueing themselves:
-+ */
-+ raw_spin_lock_irq(&curr->pi_lock);
-+ while (!list_empty(head)) {
-+ next = head->next;
-+ pi_state = list_entry(next, struct futex_pi_state, list);
-+ key = pi_state->key;
-+ hb = futex_hash(&key);
-+
-+ /*
-+ * We can race against put_pi_state() removing itself from the
-+ * list (a waiter going away). put_pi_state() will first
-+ * decrement the reference count and then modify the list, so
-+ * its possible to see the list entry but fail this reference
-+ * acquire.
-+ *
-+ * In that case; drop the locks to let put_pi_state() make
-+ * progress and retry the loop.
-+ */
-+ if (!refcount_inc_not_zero(&pi_state->refcount)) {
-+ raw_spin_unlock_irq(&curr->pi_lock);
-+ cpu_relax();
-+ raw_spin_lock_irq(&curr->pi_lock);
-+ continue;
-+ }
-+ raw_spin_unlock_irq(&curr->pi_lock);
-+
-+ spin_lock(&hb->lock);
-+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+ raw_spin_lock(&curr->pi_lock);
-+ /*
-+ * We dropped the pi-lock, so re-check whether this
-+ * task still owns the PI-state:
-+ */
-+ if (head->next != next) {
-+ /* retain curr->pi_lock for the loop invariant */
-+ raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
-+ spin_unlock(&hb->lock);
-+ put_pi_state(pi_state);
-+ continue;
-+ }
-+
-+ WARN_ON(pi_state->owner != curr);
-+ WARN_ON(list_empty(&pi_state->list));
-+ list_del_init(&pi_state->list);
-+ pi_state->owner = NULL;
-+
-+ raw_spin_unlock(&curr->pi_lock);
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+ spin_unlock(&hb->lock);
-+
-+ rt_mutex_futex_unlock(&pi_state->pi_mutex);
-+ put_pi_state(pi_state);
-+
-+ raw_spin_lock_irq(&curr->pi_lock);
-+ }
-+ raw_spin_unlock_irq(&curr->pi_lock);
-+}
-+#else
-+static inline void exit_pi_state_list(struct task_struct *curr) { }
-+#endif
-+
-+static void futex_cleanup(struct task_struct *tsk)
-+{
-+ if (unlikely(tsk->robust_list)) {
-+ exit_robust_list(tsk);
-+ tsk->robust_list = NULL;
-+ }
-+
-+#ifdef CONFIG_COMPAT
-+ if (unlikely(tsk->compat_robust_list)) {
-+ compat_exit_robust_list(tsk);
-+ tsk->compat_robust_list = NULL;
-+ }
-+#endif
-+
-+ if (unlikely(!list_empty(&tsk->pi_state_list)))
-+ exit_pi_state_list(tsk);
-+}
-+
-+/**
-+ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
-+ * @tsk: task to set the state on
-+ *
-+ * Set the futex exit state of the task lockless. The futex waiter code
-+ * observes that state when a task is exiting and loops until the task has
-+ * actually finished the futex cleanup. The worst case for this is that the
-+ * waiter runs through the wait loop until the state becomes visible.
-+ *
-+ * This is called from the recursive fault handling path in do_exit().
-+ *
-+ * This is best effort. Either the futex exit code has run already or
-+ * not. If the OWNER_DIED bit has been set on the futex then the waiter can
-+ * take it over. If not, the problem is pushed back to user space. If the
-+ * futex exit code did not run yet, then an already queued waiter might
-+ * block forever, but there is nothing which can be done about that.
-+ */
-+void futex_exit_recursive(struct task_struct *tsk)
-+{
-+ /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
-+ if (tsk->futex_state == FUTEX_STATE_EXITING)
-+ mutex_unlock(&tsk->futex_exit_mutex);
-+ tsk->futex_state = FUTEX_STATE_DEAD;
-+}
-+
-+static void futex_cleanup_begin(struct task_struct *tsk)
-+{
-+ /*
-+ * Prevent various race issues against a concurrent incoming waiter
-+ * including live locks by forcing the waiter to block on
-+ * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
-+ * attach_to_pi_owner().
-+ */
-+ mutex_lock(&tsk->futex_exit_mutex);
-+
-+ /*
-+ * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
-+ *
-+ * This ensures that all subsequent checks of tsk->futex_state in
-+ * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
-+ * tsk->pi_lock held.
-+ *
-+ * It guarantees also that a pi_state which was queued right before
-+ * the state change under tsk->pi_lock by a concurrent waiter must
-+ * be observed in exit_pi_state_list().
-+ */
-+ raw_spin_lock_irq(&tsk->pi_lock);
-+ tsk->futex_state = FUTEX_STATE_EXITING;
-+ raw_spin_unlock_irq(&tsk->pi_lock);
-+}
-+
-+static void futex_cleanup_end(struct task_struct *tsk, int state)
-+{
-+ /*
-+ * Lockless store. The only side effect is that an observer might
-+ * take another loop until it becomes visible.
-+ */
-+ tsk->futex_state = state;
-+ /*
-+ * Drop the exit protection. This unblocks waiters which observed
-+ * FUTEX_STATE_EXITING to reevaluate the state.
-+ */
-+ mutex_unlock(&tsk->futex_exit_mutex);
-+}
-+
-+void futex_exec_release(struct task_struct *tsk)
-+{
-+ /*
-+ * The state handling is done for consistency, but in the case of
-+ * exec() there is no way to prevent further damage as the PID stays
-+ * the same. But for the unlikely and arguably buggy case that a
-+ * futex is held on exec(), this provides at least as much state
-+ * consistency protection which is possible.
-+ */
-+ futex_cleanup_begin(tsk);
-+ futex_cleanup(tsk);
-+ /*
-+ * Reset the state to FUTEX_STATE_OK. The task is alive and about
-+ * exec a new binary.
-+ */
-+ futex_cleanup_end(tsk, FUTEX_STATE_OK);
-+}
-+
-+void futex_exit_release(struct task_struct *tsk)
-+{
-+ futex_cleanup_begin(tsk);
-+ futex_cleanup(tsk);
-+ futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
-+}
-+
-+static void __init futex_detect_cmpxchg(void)
-+{
-+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
-+ u32 curval;
-+
-+ /*
-+ * This will fail and we want it. Some arch implementations do
-+ * runtime detection of the futex_atomic_cmpxchg_inatomic()
-+ * functionality. We want to know that before we call in any
-+ * of the complex code paths. Also we want to prevent
-+ * registration of robust lists in that case. NULL is
-+ * guaranteed to fault and we get -EFAULT on functional
-+ * implementation, the non-functional ones will return
-+ * -ENOSYS.
-+ */
-+ if (futex_cmpxchg_value_locked(&curval, NULL, 0, 0) == -EFAULT)
-+ futex_cmpxchg_enabled = 1;
-+#endif
-+}
-+
-+static int __init futex_init(void)
-+{
-+ unsigned int futex_shift;
-+ unsigned long i;
-+
-+#if CONFIG_BASE_SMALL
-+ futex_hashsize = 16;
-+#else
-+ futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
-+#endif
-+
-+ futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
-+ futex_hashsize, 0,
-+ futex_hashsize < 256 ? HASH_SMALL : 0,
-+ &futex_shift, NULL,
-+ futex_hashsize, futex_hashsize);
-+ futex_hashsize = 1UL << futex_shift;
-+
-+ futex_detect_cmpxchg();
-+
-+ for (i = 0; i < futex_hashsize; i++) {
-+ atomic_set(&futex_queues[i].waiters, 0);
-+ plist_head_init(&futex_queues[i].chain);
-+ spin_lock_init(&futex_queues[i].lock);
-+ }
-+
-+ return 0;
-+}
-+core_initcall(futex_init);
-diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
-new file mode 100644
-index 000000000..948fcf317
---- /dev/null
-+++ b/kernel/futex/futex.h
-@@ -0,0 +1,295 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _FUTEX_H
-+#define _FUTEX_H
-+
-+#include <linux/futex.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <asm/futex.h>
-+
-+/*
-+ * Futex flags used to encode options to functions and preserve them across
-+ * restarts.
-+ */
-+#ifdef CONFIG_MMU
-+# define FLAGS_SHARED 0x01
-+#else
-+/*
-+ * NOMMU does not have per process address space. Let the compiler optimize
-+ * code away.
-+ */
-+# define FLAGS_SHARED 0x00
-+#endif
-+#define FLAGS_CLOCKRT 0x02
-+#define FLAGS_HAS_TIMEOUT 0x04
-+
-+#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
-+#define futex_cmpxchg_enabled 1
-+#else
-+extern int __read_mostly futex_cmpxchg_enabled;
-+#endif
-+
-+#ifdef CONFIG_FAIL_FUTEX
-+extern bool should_fail_futex(bool fshared);
-+#else
-+static inline bool should_fail_futex(bool fshared)
-+{
-+ return false;
-+}
-+#endif
-+
-+/*
-+ * Hash buckets are shared by all the futex_keys that hash to the same
-+ * location. Each key may have multiple futex_q structures, one for each task
-+ * waiting on a futex.
-+ */
-+struct futex_hash_bucket {
-+ atomic_t waiters;
-+ spinlock_t lock;
-+ struct plist_head chain;
-+} ____cacheline_aligned_in_smp;
-+
-+/*
-+ * Priority Inheritance state:
-+ */
-+struct futex_pi_state {
-+ /*
-+ * list of 'owned' pi_state instances - these have to be
-+ * cleaned up in do_exit() if the task exits prematurely:
-+ */
-+ struct list_head list;
-+
-+ /*
-+ * The PI object:
-+ */
-+ struct rt_mutex_base pi_mutex;
-+
-+ struct task_struct *owner;
-+ refcount_t refcount;
-+
-+ union futex_key key;
-+} __randomize_layout;
-+
-+/**
-+ * struct futex_q - The hashed futex queue entry, one per waiting task
-+ * @list: priority-sorted list of tasks waiting on this futex
-+ * @task: the task waiting on the futex
-+ * @lock_ptr: the hash bucket lock
-+ * @key: the key the futex is hashed on
-+ * @pi_state: optional priority inheritance state
-+ * @rt_waiter: rt_waiter storage for use with requeue_pi
-+ * @requeue_pi_key: the requeue_pi target futex key
-+ * @bitset: bitset for the optional bitmasked wakeup
-+ * @requeue_state: State field for futex_requeue_pi()
-+ * @requeue_wait: RCU wait for futex_requeue_pi() (RT only)
-+ *
-+ * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
-+ * we can wake only the relevant ones (hashed queues may be shared).
-+ *
-+ * A futex_q has a woken state, just like tasks have TASK_RUNNING.
-+ * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
-+ * The order of wakeup is always to make the first condition true, then
-+ * the second.
-+ *
-+ * PI futexes are typically woken before they are removed from the hash list via
-+ * the rt_mutex code. See futex_unqueue_pi().
-+ */
-+struct futex_q {
-+ struct plist_node list;
-+
-+ struct task_struct *task;
-+ spinlock_t *lock_ptr;
-+ union futex_key key;
-+ struct futex_pi_state *pi_state;
-+ struct rt_mutex_waiter *rt_waiter;
-+ union futex_key *requeue_pi_key;
-+ u32 bitset;
-+ atomic_t requeue_state;
-+#ifdef CONFIG_PREEMPT_RT
-+ struct rcuwait requeue_wait;
-+#endif
-+} __randomize_layout;
-+
-+extern const struct futex_q futex_q_init;
-+
-+enum futex_access {
-+ FUTEX_READ,
-+ FUTEX_WRITE
-+};
-+
-+extern int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
-+ enum futex_access rw);
-+
-+extern struct hrtimer_sleeper *
-+futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
-+ int flags, u64 range_ns);
-+
-+extern struct futex_hash_bucket *futex_hash(union futex_key *key);
-+
-+/**
-+ * futex_match - Check whether two futex keys are equal
-+ * @key1: Pointer to key1
-+ * @key2: Pointer to key2
-+ *
-+ * Return 1 if two futex_keys are equal, 0 otherwise.
-+ */
-+static inline int futex_match(union futex_key *key1, union futex_key *key2)
-+{
-+ return (key1 && key2
-+ && key1->both.word == key2->both.word
-+ && key1->both.ptr == key2->both.ptr
-+ && key1->both.offset == key2->both.offset);
-+}
-+
-+extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
-+ struct futex_q *q, struct futex_hash_bucket **hb);
-+extern void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
-+ struct hrtimer_sleeper *timeout);
-+extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
-+
-+extern int fault_in_user_writeable(u32 __user *uaddr);
-+extern int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval);
-+extern int futex_get_value_locked(u32 *dest, u32 __user *from);
-+extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
-+
-+extern void __futex_unqueue(struct futex_q *q);
-+extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb);
-+extern int futex_unqueue(struct futex_q *q);
-+
-+/**
-+ * futex_queue() - Enqueue the futex_q on the futex_hash_bucket
-+ * @q: The futex_q to enqueue
-+ * @hb: The destination hash bucket
-+ *
-+ * The hb->lock must be held by the caller, and is released here. A call to
-+ * futex_queue() is typically paired with exactly one call to futex_unqueue(). The
-+ * exceptions involve the PI related operations, which may use futex_unqueue_pi()
-+ * or nothing if the unqueue is done as part of the wake process and the unqueue
-+ * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
-+ * an example).
-+ */
-+static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
-+ __releases(&hb->lock)
-+{
-+ __futex_queue(q, hb);
-+ spin_unlock(&hb->lock);
-+}
-+
-+extern void futex_unqueue_pi(struct futex_q *q);
-+
-+extern void wait_for_owner_exiting(int ret, struct task_struct *exiting);
-+
-+/*
-+ * Reflects a new waiter being added to the waitqueue.
-+ */
-+static inline void futex_hb_waiters_inc(struct futex_hash_bucket *hb)
-+{
-+#ifdef CONFIG_SMP
-+ atomic_inc(&hb->waiters);
-+ /*
-+ * Full barrier (A), see the ordering comment above.
-+ */
-+ smp_mb__after_atomic();
-+#endif
-+}
-+
-+/*
-+ * Reflects a waiter being removed from the waitqueue by wakeup
-+ * paths.
-+ */
-+static inline void futex_hb_waiters_dec(struct futex_hash_bucket *hb)
-+{
-+#ifdef CONFIG_SMP
-+ atomic_dec(&hb->waiters);
-+#endif
-+}
-+
-+static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb)
-+{
-+#ifdef CONFIG_SMP
-+ /*
-+ * Full barrier (B), see the ordering comment above.
-+ */
-+ smp_mb();
-+ return atomic_read(&hb->waiters);
-+#else
-+ return 1;
-+#endif
-+}
-+
-+extern struct futex_hash_bucket *futex_q_lock(struct futex_q *q);
-+extern void futex_q_unlock(struct futex_hash_bucket *hb);
-+
-+
-+extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
-+ union futex_key *key,
-+ struct futex_pi_state **ps,
-+ struct task_struct *task,
-+ struct task_struct **exiting,
-+ int set_waiters);
-+
-+extern int refill_pi_state_cache(void);
-+extern void get_pi_state(struct futex_pi_state *pi_state);
-+extern void put_pi_state(struct futex_pi_state *pi_state);
-+extern int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked);
-+
-+/*
-+ * Express the locking dependencies for lockdep:
-+ */
-+static inline void
-+double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
-+{
-+ if (hb1 > hb2)
-+ swap(hb1, hb2);
-+
-+ spin_lock(&hb1->lock);
-+ if (hb1 != hb2)
-+ spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
-+}
-+
-+static inline void
-+double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
-+{
-+ spin_unlock(&hb1->lock);
-+ if (hb1 != hb2)
-+ spin_unlock(&hb2->lock);
-+}
-+
-+/* syscalls */
-+
-+extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32
-+ val, ktime_t *abs_time, u32 bitset, u32 __user
-+ *uaddr2);
-+
-+extern int futex_requeue(u32 __user *uaddr1, unsigned int flags,
-+ u32 __user *uaddr2, int nr_wake, int nr_requeue,
-+ u32 *cmpval, int requeue_pi);
-+
-+extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
-+ ktime_t *abs_time, u32 bitset);
-+
-+/**
-+ * struct futex_vector - Auxiliary struct for futex_waitv()
-+ * @w: Userspace provided data
-+ * @q: Kernel side data
-+ *
-+ * Struct used to build an array with all data need for futex_waitv()
-+ */
-+struct futex_vector {
-+ struct futex_waitv w;
-+ struct futex_q q;
-+};
-+
-+extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
-+ struct hrtimer_sleeper *to);
-+
-+extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset);
-+
-+extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags,
-+ u32 __user *uaddr2, int nr_wake, int nr_wake2, int op);
-+
-+extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags);
-+
-+extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock);
-+
-+#endif /* _FUTEX_H */
-diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
-new file mode 100644
-index 000000000..183b28c32
---- /dev/null
-+++ b/kernel/futex/pi.c
-@@ -0,0 +1,1233 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+
-+#include <linux/slab.h>
-+#include <linux/sched/task.h>
-+
-+#include "futex.h"
-+#include "../locking/rtmutex_common.h"
-+
-+/*
-+ * PI code:
-+ */
-+int refill_pi_state_cache(void)
-+{
-+ struct futex_pi_state *pi_state;
-+
-+ if (likely(current->pi_state_cache))
-+ return 0;
-+
-+ pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
-+
-+ if (!pi_state)
-+ return -ENOMEM;
-+
-+ INIT_LIST_HEAD(&pi_state->list);
-+ /* pi_mutex gets initialized later */
-+ pi_state->owner = NULL;
-+ refcount_set(&pi_state->refcount, 1);
-+ pi_state->key = FUTEX_KEY_INIT;
-+
-+ current->pi_state_cache = pi_state;
-+
-+ return 0;
-+}
-+
-+static struct futex_pi_state *alloc_pi_state(void)
-+{
-+ struct futex_pi_state *pi_state = current->pi_state_cache;
-+
-+ WARN_ON(!pi_state);
-+ current->pi_state_cache = NULL;
-+
-+ return pi_state;
-+}
-+
-+static void pi_state_update_owner(struct futex_pi_state *pi_state,
-+ struct task_struct *new_owner)
-+{
-+ struct task_struct *old_owner = pi_state->owner;
-+
-+ lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
-+
-+ if (old_owner) {
-+ raw_spin_lock(&old_owner->pi_lock);
-+ WARN_ON(list_empty(&pi_state->list));
-+ list_del_init(&pi_state->list);
-+ raw_spin_unlock(&old_owner->pi_lock);
-+ }
-+
-+ if (new_owner) {
-+ raw_spin_lock(&new_owner->pi_lock);
-+ WARN_ON(!list_empty(&pi_state->list));
-+ list_add(&pi_state->list, &new_owner->pi_state_list);
-+ pi_state->owner = new_owner;
-+ raw_spin_unlock(&new_owner->pi_lock);
-+ }
-+}
-+
-+void get_pi_state(struct futex_pi_state *pi_state)
-+{
-+ WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
-+}
-+
-+/*
-+ * Drops a reference to the pi_state object and frees or caches it
-+ * when the last reference is gone.
-+ */
-+void put_pi_state(struct futex_pi_state *pi_state)
-+{
-+ if (!pi_state)
-+ return;
-+
-+ if (!refcount_dec_and_test(&pi_state->refcount))
-+ return;
-+
-+ /*
-+ * If pi_state->owner is NULL, the owner is most probably dying
-+ * and has cleaned up the pi_state already
-+ */
-+ if (pi_state->owner) {
-+ unsigned long flags;
-+
-+ raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
-+ pi_state_update_owner(pi_state, NULL);
-+ rt_mutex_proxy_unlock(&pi_state->pi_mutex);
-+ raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
-+ }
-+
-+ if (current->pi_state_cache) {
-+ kfree(pi_state);
-+ } else {
-+ /*
-+ * pi_state->list is already empty.
-+ * clear pi_state->owner.
-+ * refcount is at 0 - put it back to 1.
-+ */
-+ pi_state->owner = NULL;
-+ refcount_set(&pi_state->refcount, 1);
-+ current->pi_state_cache = pi_state;
-+ }
-+}
-+
-+/*
-+ * We need to check the following states:
-+ *
-+ * Waiter | pi_state | pi->owner | uTID | uODIED | ?
-+ *
-+ * [1] NULL | --- | --- | 0 | 0/1 | Valid
-+ * [2] NULL | --- | --- | >0 | 0/1 | Valid
-+ *
-+ * [3] Found | NULL | -- | Any | 0/1 | Invalid
-+ *
-+ * [4] Found | Found | NULL | 0 | 1 | Valid
-+ * [5] Found | Found | NULL | >0 | 1 | Invalid
-+ *
-+ * [6] Found | Found | task | 0 | 1 | Valid
-+ *
-+ * [7] Found | Found | NULL | Any | 0 | Invalid
-+ *
-+ * [8] Found | Found | task | ==taskTID | 0/1 | Valid
-+ * [9] Found | Found | task | 0 | 0 | Invalid
-+ * [10] Found | Found | task | !=taskTID | 0/1 | Invalid
-+ *
-+ * [1] Indicates that the kernel can acquire the futex atomically. We
-+ * came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
-+ *
-+ * [2] Valid, if TID does not belong to a kernel thread. If no matching
-+ * thread is found then it indicates that the owner TID has died.
-+ *
-+ * [3] Invalid. The waiter is queued on a non PI futex
-+ *
-+ * [4] Valid state after exit_robust_list(), which sets the user space
-+ * value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
-+ *
-+ * [5] The user space value got manipulated between exit_robust_list()
-+ * and exit_pi_state_list()
-+ *
-+ * [6] Valid state after exit_pi_state_list() which sets the new owner in
-+ * the pi_state but cannot access the user space value.
-+ *
-+ * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
-+ *
-+ * [8] Owner and user space value match
-+ *
-+ * [9] There is no transient state which sets the user space TID to 0
-+ * except exit_robust_list(), but this is indicated by the
-+ * FUTEX_OWNER_DIED bit. See [4]
-+ *
-+ * [10] There is no transient state which leaves owner and user space
-+ * TID out of sync. Except one error case where the kernel is denied
-+ * write access to the user address, see fixup_pi_state_owner().
-+ *
-+ *
-+ * Serialization and lifetime rules:
-+ *
-+ * hb->lock:
-+ *
-+ * hb -> futex_q, relation
-+ * futex_q -> pi_state, relation
-+ *
-+ * (cannot be raw because hb can contain arbitrary amount
-+ * of futex_q's)
-+ *
-+ * pi_mutex->wait_lock:
-+ *
-+ * {uval, pi_state}
-+ *
-+ * (and pi_mutex 'obviously')
-+ *
-+ * p->pi_lock:
-+ *
-+ * p->pi_state_list -> pi_state->list, relation
-+ * pi_mutex->owner -> pi_state->owner, relation
-+ *
-+ * pi_state->refcount:
-+ *
-+ * pi_state lifetime
-+ *
-+ *
-+ * Lock order:
-+ *
-+ * hb->lock
-+ * pi_mutex->wait_lock
-+ * p->pi_lock
-+ *
-+ */
-+
-+/*
-+ * Validate that the existing waiter has a pi_state and sanity check
-+ * the pi_state against the user space value. If correct, attach to
-+ * it.
-+ */
-+static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
-+ struct futex_pi_state *pi_state,
-+ struct futex_pi_state **ps)
-+{
-+ pid_t pid = uval & FUTEX_TID_MASK;
-+ u32 uval2;
-+ int ret;
-+
-+ /*
-+ * Userspace might have messed up non-PI and PI futexes [3]
-+ */
-+ if (unlikely(!pi_state))
-+ return -EINVAL;
-+
-+ /*
-+ * We get here with hb->lock held, and having found a
-+ * futex_top_waiter(). This means that futex_lock_pi() of said futex_q
-+ * has dropped the hb->lock in between futex_queue() and futex_unqueue_pi(),
-+ * which in turn means that futex_lock_pi() still has a reference on
-+ * our pi_state.
-+ *
-+ * The waiter holding a reference on @pi_state also protects against
-+ * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
-+ * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
-+ * free pi_state before we can take a reference ourselves.
-+ */
-+ WARN_ON(!refcount_read(&pi_state->refcount));
-+
-+ /*
-+ * Now that we have a pi_state, we can acquire wait_lock
-+ * and do the state validation.
-+ */
-+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+
-+ /*
-+ * Since {uval, pi_state} is serialized by wait_lock, and our current
-+ * uval was read without holding it, it can have changed. Verify it
-+ * still is what we expect it to be, otherwise retry the entire
-+ * operation.
-+ */
-+ if (futex_get_value_locked(&uval2, uaddr))
-+ goto out_efault;
-+
-+ if (uval != uval2)
-+ goto out_eagain;
-+
-+ /*
-+ * Handle the owner died case:
-+ */
-+ if (uval & FUTEX_OWNER_DIED) {
-+ /*
-+ * exit_pi_state_list sets owner to NULL and wakes the
-+ * topmost waiter. The task which acquires the
-+ * pi_state->rt_mutex will fixup owner.
-+ */
-+ if (!pi_state->owner) {
-+ /*
-+ * No pi state owner, but the user space TID
-+ * is not 0. Inconsistent state. [5]
-+ */
-+ if (pid)
-+ goto out_einval;
-+ /*
-+ * Take a ref on the state and return success. [4]
-+ */
-+ goto out_attach;
-+ }
-+
-+ /*
-+ * If TID is 0, then either the dying owner has not
-+ * yet executed exit_pi_state_list() or some waiter
-+ * acquired the rtmutex in the pi state, but did not
-+ * yet fixup the TID in user space.
-+ *
-+ * Take a ref on the state and return success. [6]
-+ */
-+ if (!pid)
-+ goto out_attach;
-+ } else {
-+ /*
-+ * If the owner died bit is not set, then the pi_state
-+ * must have an owner. [7]
-+ */
-+ if (!pi_state->owner)
-+ goto out_einval;
-+ }
-+
-+ /*
-+ * Bail out if user space manipulated the futex value. If pi
-+ * state exists then the owner TID must be the same as the
-+ * user space TID. [9/10]
-+ */
-+ if (pid != task_pid_vnr(pi_state->owner))
-+ goto out_einval;
-+
-+out_attach:
-+ get_pi_state(pi_state);
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+ *ps = pi_state;
-+ return 0;
-+
-+out_einval:
-+ ret = -EINVAL;
-+ goto out_error;
-+
-+out_eagain:
-+ ret = -EAGAIN;
-+ goto out_error;
-+
-+out_efault:
-+ ret = -EFAULT;
-+ goto out_error;
-+
-+out_error:
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+ return ret;
-+}
-+
-+static int handle_exit_race(u32 __user *uaddr, u32 uval,
-+ struct task_struct *tsk)
-+{
-+ u32 uval2;
-+
-+ /*
-+ * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
-+ * caller that the alleged owner is busy.
-+ */
-+ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
-+ return -EBUSY;
-+
-+ /*
-+ * Reread the user space value to handle the following situation:
-+ *
-+ * CPU0 CPU1
-+ *
-+ * sys_exit() sys_futex()
-+ * do_exit() futex_lock_pi()
-+ * futex_lock_pi_atomic()
-+ * exit_signals(tsk) No waiters:
-+ * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID
-+ * mm_release(tsk) Set waiter bit
-+ * exit_robust_list(tsk) { *uaddr = 0x80000PID;
-+ * Set owner died attach_to_pi_owner() {
-+ * *uaddr = 0xC0000000; tsk = get_task(PID);
-+ * } if (!tsk->flags & PF_EXITING) {
-+ * ... attach();
-+ * tsk->futex_state = } else {
-+ * FUTEX_STATE_DEAD; if (tsk->futex_state !=
-+ * FUTEX_STATE_DEAD)
-+ * return -EAGAIN;
-+ * return -ESRCH; <--- FAIL
-+ * }
-+ *
-+ * Returning ESRCH unconditionally is wrong here because the
-+ * user space value has been changed by the exiting task.
-+ *
-+ * The same logic applies to the case where the exiting task is
-+ * already gone.
-+ */
-+ if (futex_get_value_locked(&uval2, uaddr))
-+ return -EFAULT;
-+
-+ /* If the user space value has changed, try again. */
-+ if (uval2 != uval)
-+ return -EAGAIN;
-+
-+ /*
-+ * The exiting task did not have a robust list, the robust list was
-+ * corrupted or the user space value in *uaddr is simply bogus.
-+ * Give up and tell user space.
-+ */
-+ return -ESRCH;
-+}
-+
-+static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
-+ struct futex_pi_state **ps)
-+{
-+ /*
-+ * No existing pi state. First waiter. [2]
-+ *
-+ * This creates pi_state, we have hb->lock held, this means nothing can
-+ * observe this state, wait_lock is irrelevant.
-+ */
-+ struct futex_pi_state *pi_state = alloc_pi_state();
-+
-+ /*
-+ * Initialize the pi_mutex in locked state and make @p
-+ * the owner of it:
-+ */
-+ rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
-+
-+ /* Store the key for possible exit cleanups: */
-+ pi_state->key = *key;
-+
-+ WARN_ON(!list_empty(&pi_state->list));
-+ list_add(&pi_state->list, &p->pi_state_list);
-+ /*
-+ * Assignment without holding pi_state->pi_mutex.wait_lock is safe
-+ * because there is no concurrency as the object is not published yet.
-+ */
-+ pi_state->owner = p;
-+
-+ *ps = pi_state;
-+}
-+/*
-+ * Lookup the task for the TID provided from user space and attach to
-+ * it after doing proper sanity checks.
-+ */
-+static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
-+ struct futex_pi_state **ps,
-+ struct task_struct **exiting)
-+{
-+ pid_t pid = uval & FUTEX_TID_MASK;
-+ struct task_struct *p;
-+
-+ /*
-+ * We are the first waiter - try to look up the real owner and attach
-+ * the new pi_state to it, but bail out when TID = 0 [1]
-+ *
-+ * The !pid check is paranoid. None of the call sites should end up
-+ * with pid == 0, but better safe than sorry. Let the caller retry
-+ */
-+ if (!pid)
-+ return -EAGAIN;
-+ p = find_get_task_by_vpid(pid);
-+ if (!p)
-+ return handle_exit_race(uaddr, uval, NULL);
-+
-+ if (unlikely(p->flags & PF_KTHREAD)) {
-+ put_task_struct(p);
-+ return -EPERM;
-+ }
-+
-+ /*
-+ * We need to look at the task state to figure out, whether the
-+ * task is exiting. To protect against the change of the task state
-+ * in futex_exit_release(), we do this protected by p->pi_lock:
-+ */
-+ raw_spin_lock_irq(&p->pi_lock);
-+ if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
-+ /*
-+ * The task is on the way out. When the futex state is
-+ * FUTEX_STATE_DEAD, we know that the task has finished
-+ * the cleanup:
-+ */
-+ int ret = handle_exit_race(uaddr, uval, p);
-+
-+ raw_spin_unlock_irq(&p->pi_lock);
-+ /*
-+ * If the owner task is between FUTEX_STATE_EXITING and
-+ * FUTEX_STATE_DEAD then store the task pointer and keep
-+ * the reference on the task struct. The calling code will
-+ * drop all locks, wait for the task to reach
-+ * FUTEX_STATE_DEAD and then drop the refcount. This is
-+ * required to prevent a live lock when the current task
-+ * preempted the exiting task between the two states.
-+ */
-+ if (ret == -EBUSY)
-+ *exiting = p;
-+ else
-+ put_task_struct(p);
-+ return ret;
-+ }
-+
-+ __attach_to_pi_owner(p, key, ps);
-+ raw_spin_unlock_irq(&p->pi_lock);
-+
-+ put_task_struct(p);
-+
-+ return 0;
-+}
-+
-+static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
-+{
-+ int err;
-+ u32 curval;
-+
-+ if (unlikely(should_fail_futex(true)))
-+ return -EFAULT;
-+
-+ err = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval);
-+ if (unlikely(err))
-+ return err;
-+
-+ /* If user space value changed, let the caller retry */
-+ return curval != uval ? -EAGAIN : 0;
-+}
-+
-+/**
-+ * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
-+ * @uaddr: the pi futex user address
-+ * @hb: the pi futex hash bucket
-+ * @key: the futex key associated with uaddr and hb
-+ * @ps: the pi_state pointer where we store the result of the
-+ * lookup
-+ * @task: the task to perform the atomic lock work for. This will
-+ * be "current" except in the case of requeue pi.
-+ * @exiting: Pointer to store the task pointer of the owner task
-+ * which is in the middle of exiting
-+ * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
-+ *
-+ * Return:
-+ * - 0 - ready to wait;
-+ * - 1 - acquired the lock;
-+ * - <0 - error
-+ *
-+ * The hb->lock must be held by the caller.
-+ *
-+ * @exiting is only set when the return value is -EBUSY. If so, this holds
-+ * a refcount on the exiting task on return and the caller needs to drop it
-+ * after waiting for the exit to complete.
-+ */
-+int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
-+ union futex_key *key,
-+ struct futex_pi_state **ps,
-+ struct task_struct *task,
-+ struct task_struct **exiting,
-+ int set_waiters)
-+{
-+ u32 uval, newval, vpid = task_pid_vnr(task);
-+ struct futex_q *top_waiter;
-+ int ret;
-+
-+ /*
-+ * Read the user space value first so we can validate a few
-+ * things before proceeding further.
-+ */
-+ if (futex_get_value_locked(&uval, uaddr))
-+ return -EFAULT;
-+
-+ if (unlikely(should_fail_futex(true)))
-+ return -EFAULT;
-+
-+ /*
-+ * Detect deadlocks.
-+ */
-+ if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
-+ return -EDEADLK;
-+
-+ if ((unlikely(should_fail_futex(true))))
-+ return -EDEADLK;
-+
-+ /*
-+ * Lookup existing state first. If it exists, try to attach to
-+ * its pi_state.
-+ */
-+ top_waiter = futex_top_waiter(hb, key);
-+ if (top_waiter)
-+ return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
-+
-+ /*
-+ * No waiter and user TID is 0. We are here because the
-+ * waiters or the owner died bit is set or called from
-+ * requeue_cmp_pi or for whatever reason something took the
-+ * syscall.
-+ */
-+ if (!(uval & FUTEX_TID_MASK)) {
-+ /*
-+ * We take over the futex. No other waiters and the user space
-+ * TID is 0. We preserve the owner died bit.
-+ */
-+ newval = uval & FUTEX_OWNER_DIED;
-+ newval |= vpid;
-+
-+ /* The futex requeue_pi code can enforce the waiters bit */
-+ if (set_waiters)
-+ newval |= FUTEX_WAITERS;
-+
-+ ret = lock_pi_update_atomic(uaddr, uval, newval);
-+ if (ret)
-+ return ret;
-+
-+ /*
-+ * If the waiter bit was requested the caller also needs PI
-+ * state attached to the new owner of the user space futex.
-+ *
-+ * @task is guaranteed to be alive and it cannot be exiting
-+ * because it is either sleeping or waiting in
-+ * futex_requeue_pi_wakeup_sync().
-+ *
-+ * No need to do the full attach_to_pi_owner() exercise
-+ * because @task is known and valid.
-+ */
-+ if (set_waiters) {
-+ raw_spin_lock_irq(&task->pi_lock);
-+ __attach_to_pi_owner(task, key, ps);
-+ raw_spin_unlock_irq(&task->pi_lock);
-+ }
-+ return 1;
-+ }
-+
-+ /*
-+ * First waiter. Set the waiters bit before attaching ourself to
-+ * the owner. If owner tries to unlock, it will be forced into
-+ * the kernel and blocked on hb->lock.
-+ */
-+ newval = uval | FUTEX_WAITERS;
-+ ret = lock_pi_update_atomic(uaddr, uval, newval);
-+ if (ret)
-+ return ret;
-+ /*
-+ * If the update of the user space value succeeded, we try to
-+ * attach to the owner. If that fails, no harm done, we only
-+ * set the FUTEX_WAITERS bit in the user space variable.
-+ */
-+ return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
-+}
-+
-+/*
-+ * Caller must hold a reference on @pi_state.
-+ */
-+static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
-+{
-+ struct rt_mutex_waiter *top_waiter;
-+ struct task_struct *new_owner;
-+ bool postunlock = false;
-+ DEFINE_RT_WAKE_Q(wqh);
-+ u32 curval, newval;
-+ int ret = 0;
-+
-+ top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
-+ if (WARN_ON_ONCE(!top_waiter)) {
-+ /*
-+ * As per the comment in futex_unlock_pi() this should not happen.
-+ *
-+ * When this happens, give up our locks and try again, giving
-+ * the futex_lock_pi() instance time to complete, either by
-+ * waiting on the rtmutex or removing itself from the futex
-+ * queue.
-+ */
-+ ret = -EAGAIN;
-+ goto out_unlock;
-+ }
-+
-+ new_owner = top_waiter->task;
-+
-+ /*
-+ * We pass it to the next owner. The WAITERS bit is always kept
-+ * enabled while there is PI state around. We cleanup the owner
-+ * died bit, because we are the owner.
-+ */
-+ newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
-+
-+ if (unlikely(should_fail_futex(true))) {
-+ ret = -EFAULT;
-+ goto out_unlock;
-+ }
-+
-+ ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval);
-+ if (!ret && (curval != uval)) {
-+ /*
-+ * If a unconditional UNLOCK_PI operation (user space did not
-+ * try the TID->0 transition) raced with a waiter setting the
-+ * FUTEX_WAITERS flag between get_user() and locking the hash
-+ * bucket lock, retry the operation.
-+ */
-+ if ((FUTEX_TID_MASK & curval) == uval)
-+ ret = -EAGAIN;
-+ else
-+ ret = -EINVAL;
-+ }
-+
-+ if (!ret) {
-+ /*
-+ * This is a point of no return; once we modified the uval
-+ * there is no going back and subsequent operations must
-+ * not fail.
-+ */
-+ pi_state_update_owner(pi_state, new_owner);
-+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh);
-+ }
-+
-+out_unlock:
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+
-+ if (postunlock)
-+ rt_mutex_postunlock(&wqh);
-+
-+ return ret;
-+}
-+
-+static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-+ struct task_struct *argowner)
-+{
-+ struct futex_pi_state *pi_state = q->pi_state;
-+ struct task_struct *oldowner, *newowner;
-+ u32 uval, curval, newval, newtid;
-+ int err = 0;
-+
-+ oldowner = pi_state->owner;
-+
-+ /*
-+ * We are here because either:
-+ *
-+ * - we stole the lock and pi_state->owner needs updating to reflect
-+ * that (@argowner == current),
-+ *
-+ * or:
-+ *
-+ * - someone stole our lock and we need to fix things to point to the
-+ * new owner (@argowner == NULL).
-+ *
-+ * Either way, we have to replace the TID in the user space variable.
-+ * This must be atomic as we have to preserve the owner died bit here.
-+ *
-+ * Note: We write the user space value _before_ changing the pi_state
-+ * because we can fault here. Imagine swapped out pages or a fork
-+ * that marked all the anonymous memory readonly for cow.
-+ *
-+ * Modifying pi_state _before_ the user space value would leave the
-+ * pi_state in an inconsistent state when we fault here, because we
-+ * need to drop the locks to handle the fault. This might be observed
-+ * in the PID checks when attaching to PI state .
-+ */
-+retry:
-+ if (!argowner) {
-+ if (oldowner != current) {
-+ /*
-+ * We raced against a concurrent self; things are
-+ * already fixed up. Nothing to do.
-+ */
-+ return 0;
-+ }
-+
-+ if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
-+ /* We got the lock. pi_state is correct. Tell caller. */
-+ return 1;
-+ }
-+
-+ /*
-+ * The trylock just failed, so either there is an owner or
-+ * there is a higher priority waiter than this one.
-+ */
-+ newowner = rt_mutex_owner(&pi_state->pi_mutex);
-+ /*
-+ * If the higher priority waiter has not yet taken over the
-+ * rtmutex then newowner is NULL. We can't return here with
-+ * that state because it's inconsistent vs. the user space
-+ * state. So drop the locks and try again. It's a valid
-+ * situation and not any different from the other retry
-+ * conditions.
-+ */
-+ if (unlikely(!newowner)) {
-+ err = -EAGAIN;
-+ goto handle_err;
-+ }
-+ } else {
-+ WARN_ON_ONCE(argowner != current);
-+ if (oldowner == current) {
-+ /*
-+ * We raced against a concurrent self; things are
-+ * already fixed up. Nothing to do.
-+ */
-+ return 1;
-+ }
-+ newowner = argowner;
-+ }
-+
-+ newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
-+ /* Owner died? */
-+ if (!pi_state->owner)
-+ newtid |= FUTEX_OWNER_DIED;
-+
-+ err = futex_get_value_locked(&uval, uaddr);
-+ if (err)
-+ goto handle_err;
-+
-+ for (;;) {
-+ newval = (uval & FUTEX_OWNER_DIED) | newtid;
-+
-+ err = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval);
-+ if (err)
-+ goto handle_err;
-+
-+ if (curval == uval)
-+ break;
-+ uval = curval;
-+ }
-+
-+ /*
-+ * We fixed up user space. Now we need to fix the pi_state
-+ * itself.
-+ */
-+ pi_state_update_owner(pi_state, newowner);
-+
-+ return argowner == current;
-+
-+ /*
-+ * In order to reschedule or handle a page fault, we need to drop the
-+ * locks here. In the case of a fault, this gives the other task
-+ * (either the highest priority waiter itself or the task which stole
-+ * the rtmutex) the chance to try the fixup of the pi_state. So once we
-+ * are back from handling the fault we need to check the pi_state after
-+ * reacquiring the locks and before trying to do another fixup. When
-+ * the fixup has been done already we simply return.
-+ *
-+ * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
-+ * drop hb->lock since the caller owns the hb -> futex_q relation.
-+ * Dropping the pi_mutex->wait_lock requires the state revalidate.
-+ */
-+handle_err:
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+ spin_unlock(q->lock_ptr);
-+
-+ switch (err) {
-+ case -EFAULT:
-+ err = fault_in_user_writeable(uaddr);
-+ break;
-+
-+ case -EAGAIN:
-+ cond_resched();
-+ err = 0;
-+ break;
-+
-+ default:
-+ WARN_ON_ONCE(1);
-+ break;
-+ }
-+
-+ spin_lock(q->lock_ptr);
-+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+
-+ /*
-+ * Check if someone else fixed it for us:
-+ */
-+ if (pi_state->owner != oldowner)
-+ return argowner == current;
-+
-+ /* Retry if err was -EAGAIN or the fault in succeeded */
-+ if (!err)
-+ goto retry;
-+
-+ /*
-+ * fault_in_user_writeable() failed so user state is immutable. At
-+ * best we can make the kernel state consistent but user state will
-+ * be most likely hosed and any subsequent unlock operation will be
-+ * rejected due to PI futex rule [10].
-+ *
-+ * Ensure that the rtmutex owner is also the pi_state owner despite
-+ * the user space value claiming something different. There is no
-+ * point in unlocking the rtmutex if current is the owner as it
-+ * would need to wait until the next waiter has taken the rtmutex
-+ * to guarantee consistent state. Keep it simple. Userspace asked
-+ * for this wreckaged state.
-+ *
-+ * The rtmutex has an owner - either current or some other
-+ * task. See the EAGAIN loop above.
-+ */
-+ pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
-+
-+ return err;
-+}
-+
-+static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-+ struct task_struct *argowner)
-+{
-+ struct futex_pi_state *pi_state = q->pi_state;
-+ int ret;
-+
-+ lockdep_assert_held(q->lock_ptr);
-+
-+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+ ret = __fixup_pi_state_owner(uaddr, q, argowner);
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+ return ret;
-+}
-+
-+/**
-+ * fixup_pi_owner() - Post lock pi_state and corner case management
-+ * @uaddr: user address of the futex
-+ * @q: futex_q (contains pi_state and access to the rt_mutex)
-+ * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0)
-+ *
-+ * After attempting to lock an rt_mutex, this function is called to cleanup
-+ * the pi_state owner as well as handle race conditions that may allow us to
-+ * acquire the lock. Must be called with the hb lock held.
-+ *
-+ * Return:
-+ * - 1 - success, lock taken;
-+ * - 0 - success, lock not taken;
-+ * - <0 - on error (-EFAULT)
-+ */
-+int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked)
-+{
-+ if (locked) {
-+ /*
-+ * Got the lock. We might not be the anticipated owner if we
-+ * did a lock-steal - fix up the PI-state in that case:
-+ *
-+ * Speculative pi_state->owner read (we don't hold wait_lock);
-+ * since we own the lock pi_state->owner == current is the
-+ * stable state, anything else needs more attention.
-+ */
-+ if (q->pi_state->owner != current)
-+ return fixup_pi_state_owner(uaddr, q, current);
-+ return 1;
-+ }
-+
-+ /*
-+ * If we didn't get the lock; check if anybody stole it from us. In
-+ * that case, we need to fix up the uval to point to them instead of
-+ * us, otherwise bad things happen. [10]
-+ *
-+ * Another speculative read; pi_state->owner == current is unstable
-+ * but needs our attention.
-+ */
-+ if (q->pi_state->owner == current)
-+ return fixup_pi_state_owner(uaddr, q, NULL);
-+
-+ /*
-+ * Paranoia check. If we did not take the lock, then we should not be
-+ * the owner of the rt_mutex. Warn and establish consistent state.
-+ */
-+ if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
-+ return fixup_pi_state_owner(uaddr, q, current);
-+
-+ return 0;
-+}
-+
-+/*
-+ * Userspace tried a 0 -> TID atomic transition of the futex value
-+ * and failed. The kernel side here does the whole locking operation:
-+ * if there are waiters then it will block as a consequence of relying
-+ * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
-+ * a 0 value of the futex too.).
-+ *
-+ * Also serves as futex trylock_pi()'ing, and due semantics.
-+ */
-+int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock)
-+{
-+ struct hrtimer_sleeper timeout, *to;
-+ struct task_struct *exiting = NULL;
-+ struct rt_mutex_waiter rt_waiter;
-+ struct futex_hash_bucket *hb;
-+ struct futex_q q = futex_q_init;
-+ int res, ret;
-+
-+ if (!IS_ENABLED(CONFIG_FUTEX_PI))
-+ return -ENOSYS;
-+
-+ if (refill_pi_state_cache())
-+ return -ENOMEM;
-+
-+ to = futex_setup_timer(time, &timeout, flags, 0);
-+
-+retry:
-+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
-+ if (unlikely(ret != 0))
-+ goto out;
-+
-+retry_private:
-+ hb = futex_q_lock(&q);
-+
-+ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
-+ &exiting, 0);
-+ if (unlikely(ret)) {
-+ /*
-+ * Atomic work succeeded and we got the lock,
-+ * or failed. Either way, we do _not_ block.
-+ */
-+ switch (ret) {
-+ case 1:
-+ /* We got the lock. */
-+ ret = 0;
-+ goto out_unlock_put_key;
-+ case -EFAULT:
-+ goto uaddr_faulted;
-+ case -EBUSY:
-+ case -EAGAIN:
-+ /*
-+ * Two reasons for this:
-+ * - EBUSY: Task is exiting and we just wait for the
-+ * exit to complete.
-+ * - EAGAIN: The user space value changed.
-+ */
-+ futex_q_unlock(hb);
-+ /*
-+ * Handle the case where the owner is in the middle of
-+ * exiting. Wait for the exit to complete otherwise
-+ * this task might loop forever, aka. live lock.
-+ */
-+ wait_for_owner_exiting(ret, exiting);
-+ cond_resched();
-+ goto retry;
-+ default:
-+ goto out_unlock_put_key;
-+ }
-+ }
-+
-+ WARN_ON(!q.pi_state);
-+
-+ /*
-+ * Only actually queue now that the atomic ops are done:
-+ */
-+ __futex_queue(&q, hb);
-+
-+ if (trylock) {
-+ ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
-+ /* Fixup the trylock return value: */
-+ ret = ret ? 0 : -EWOULDBLOCK;
-+ goto no_block;
-+ }
-+
-+ rt_mutex_init_waiter(&rt_waiter);
-+
-+ /*
-+ * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
-+ * hold it while doing rt_mutex_start_proxy(), because then it will
-+ * include hb->lock in the blocking chain, even through we'll not in
-+ * fact hold it while blocking. This will lead it to report -EDEADLK
-+ * and BUG when futex_unlock_pi() interleaves with this.
-+ *
-+ * Therefore acquire wait_lock while holding hb->lock, but drop the
-+ * latter before calling __rt_mutex_start_proxy_lock(). This
-+ * interleaves with futex_unlock_pi() -- which does a similar lock
-+ * handoff -- such that the latter can observe the futex_q::pi_state
-+ * before __rt_mutex_start_proxy_lock() is done.
-+ */
-+ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
-+ spin_unlock(q.lock_ptr);
-+ /*
-+ * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
-+ * such that futex_unlock_pi() is guaranteed to observe the waiter when
-+ * it sees the futex_q::pi_state.
-+ */
-+ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
-+ raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
-+
-+ if (ret) {
-+ if (ret == 1)
-+ ret = 0;
-+ goto cleanup;
-+ }
-+
-+ if (unlikely(to))
-+ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
-+
-+ ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
-+
-+cleanup:
-+ spin_lock(q.lock_ptr);
-+ /*
-+ * If we failed to acquire the lock (deadlock/signal/timeout), we must
-+ * first acquire the hb->lock before removing the lock from the
-+ * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
-+ * lists consistent.
-+ *
-+ * In particular; it is important that futex_unlock_pi() can not
-+ * observe this inconsistency.
-+ */
-+ if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
-+ ret = 0;
-+
-+no_block:
-+ /*
-+ * Fixup the pi_state owner and possibly acquire the lock if we
-+ * haven't already.
-+ */
-+ res = fixup_pi_owner(uaddr, &q, !ret);
-+ /*
-+ * If fixup_pi_owner() returned an error, propagate that. If it acquired
-+ * the lock, clear our -ETIMEDOUT or -EINTR.
-+ */
-+ if (res)
-+ ret = (res < 0) ? res : 0;
-+
-+ futex_unqueue_pi(&q);
-+ spin_unlock(q.lock_ptr);
-+ goto out;
-+
-+out_unlock_put_key:
-+ futex_q_unlock(hb);
-+
-+out:
-+ if (to) {
-+ hrtimer_cancel(&to->timer);
-+ destroy_hrtimer_on_stack(&to->timer);
-+ }
-+ return ret != -EINTR ? ret : -ERESTARTNOINTR;
-+
-+uaddr_faulted:
-+ futex_q_unlock(hb);
-+
-+ ret = fault_in_user_writeable(uaddr);
-+ if (ret)
-+ goto out;
-+
-+ if (!(flags & FLAGS_SHARED))
-+ goto retry_private;
-+
-+ goto retry;
-+}
-+
-+/*
-+ * Userspace attempted a TID -> 0 atomic transition, and failed.
-+ * This is the in-kernel slowpath: we look up the PI state (if any),
-+ * and do the rt-mutex unlock.
-+ */
-+int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
-+{
-+ u32 curval, uval, vpid = task_pid_vnr(current);
-+ union futex_key key = FUTEX_KEY_INIT;
-+ struct futex_hash_bucket *hb;
-+ struct futex_q *top_waiter;
-+ int ret;
-+
-+ if (!IS_ENABLED(CONFIG_FUTEX_PI))
-+ return -ENOSYS;
-+
-+retry:
-+ if (get_user(uval, uaddr))
-+ return -EFAULT;
-+ /*
-+ * We release only a lock we actually own:
-+ */
-+ if ((uval & FUTEX_TID_MASK) != vpid)
-+ return -EPERM;
-+
-+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
-+ if (ret)
-+ return ret;
-+
-+ hb = futex_hash(&key);
-+ spin_lock(&hb->lock);
-+
-+ /*
-+ * Check waiters first. We do not trust user space values at
-+ * all and we at least want to know if user space fiddled
-+ * with the futex value instead of blindly unlocking.
-+ */
-+ top_waiter = futex_top_waiter(hb, &key);
-+ if (top_waiter) {
-+ struct futex_pi_state *pi_state = top_waiter->pi_state;
-+
-+ ret = -EINVAL;
-+ if (!pi_state)
-+ goto out_unlock;
-+
-+ /*
-+ * If current does not own the pi_state then the futex is
-+ * inconsistent and user space fiddled with the futex value.
-+ */
-+ if (pi_state->owner != current)
-+ goto out_unlock;
-+
-+ get_pi_state(pi_state);
-+ /*
-+ * By taking wait_lock while still holding hb->lock, we ensure
-+ * there is no point where we hold neither; and therefore
-+ * wake_futex_p() must observe a state consistent with what we
-+ * observed.
-+ *
-+ * In particular; this forces __rt_mutex_start_proxy() to
-+ * complete such that we're guaranteed to observe the
-+ * rt_waiter. Also see the WARN in wake_futex_pi().
-+ */
-+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+ spin_unlock(&hb->lock);
-+
-+ /* drops pi_state->pi_mutex.wait_lock */
-+ ret = wake_futex_pi(uaddr, uval, pi_state);
-+
-+ put_pi_state(pi_state);
-+
-+ /*
-+ * Success, we're done! No tricky corner cases.
-+ */
-+ if (!ret)
-+ return ret;
-+ /*
-+ * The atomic access to the futex value generated a
-+ * pagefault, so retry the user-access and the wakeup:
-+ */
-+ if (ret == -EFAULT)
-+ goto pi_faulted;
-+ /*
-+ * A unconditional UNLOCK_PI op raced against a waiter
-+ * setting the FUTEX_WAITERS bit. Try again.
-+ */
-+ if (ret == -EAGAIN)
-+ goto pi_retry;
-+ /*
-+ * wake_futex_pi has detected invalid state. Tell user
-+ * space.
-+ */
-+ return ret;
-+ }
-+
-+ /*
-+ * We have no kernel internal state, i.e. no waiters in the
-+ * kernel. Waiters which are about to queue themselves are stuck
-+ * on hb->lock. So we can safely ignore them. We do neither
-+ * preserve the WAITERS bit not the OWNER_DIED one. We are the
-+ * owner.
-+ */
-+ if ((ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, 0))) {
-+ spin_unlock(&hb->lock);
-+ switch (ret) {
-+ case -EFAULT:
-+ goto pi_faulted;
-+
-+ case -EAGAIN:
-+ goto pi_retry;
-+
-+ default:
-+ WARN_ON_ONCE(1);
-+ return ret;
-+ }
-+ }
-+
-+ /*
-+ * If uval has changed, let user space handle it.
-+ */
-+ ret = (curval == uval) ? 0 : -EAGAIN;
-+
-+out_unlock:
-+ spin_unlock(&hb->lock);
-+ return ret;
-+
-+pi_retry:
-+ cond_resched();
-+ goto retry;
-+
-+pi_faulted:
-+
-+ ret = fault_in_user_writeable(uaddr);
-+ if (!ret)
-+ goto retry;
-+
-+ return ret;
-+}
-+
-diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c
-new file mode 100644
-index 000000000..cba8b1a6a
---- /dev/null
-+++ b/kernel/futex/requeue.c
-@@ -0,0 +1,897 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+
-+#include <linux/sched/signal.h>
-+
-+#include "futex.h"
-+#include "../locking/rtmutex_common.h"
-+
-+/*
-+ * On PREEMPT_RT, the hash bucket lock is a 'sleeping' spinlock with an
-+ * underlying rtmutex. The task which is about to be requeued could have
-+ * just woken up (timeout, signal). After the wake up the task has to
-+ * acquire hash bucket lock, which is held by the requeue code. As a task
-+ * can only be blocked on _ONE_ rtmutex at a time, the proxy lock blocking
-+ * and the hash bucket lock blocking would collide and corrupt state.
-+ *
-+ * On !PREEMPT_RT this is not a problem and everything could be serialized
-+ * on hash bucket lock, but aside of having the benefit of common code,
-+ * this allows to avoid doing the requeue when the task is already on the
-+ * way out and taking the hash bucket lock of the original uaddr1 when the
-+ * requeue has been completed.
-+ *
-+ * The following state transitions are valid:
-+ *
-+ * On the waiter side:
-+ * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE
-+ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT
-+ *
-+ * On the requeue side:
-+ * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS
-+ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED
-+ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed)
-+ * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED
-+ * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed)
-+ *
-+ * The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this
-+ * signals that the waiter is already on the way out. It also means that
-+ * the waiter is still on the 'wait' futex, i.e. uaddr1.
-+ *
-+ * The waiter side signals early wakeup to the requeue side either through
-+ * setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending
-+ * on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately
-+ * proceed to take the hash bucket lock of uaddr1. If it set state to WAIT,
-+ * which means the wakeup is interleaving with a requeue in progress it has
-+ * to wait for the requeue side to change the state. Either to DONE/LOCKED
-+ * or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex
-+ * and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by
-+ * the requeue side when the requeue attempt failed via deadlock detection
-+ * and therefore the waiter q is still on the uaddr1 futex.
-+ */
-+enum {
-+ Q_REQUEUE_PI_NONE = 0,
-+ Q_REQUEUE_PI_IGNORE,
-+ Q_REQUEUE_PI_IN_PROGRESS,
-+ Q_REQUEUE_PI_WAIT,
-+ Q_REQUEUE_PI_DONE,
-+ Q_REQUEUE_PI_LOCKED,
-+};
-+
-+const struct futex_q futex_q_init = {
-+ /* list gets initialized in futex_queue()*/
-+ .key = FUTEX_KEY_INIT,
-+ .bitset = FUTEX_BITSET_MATCH_ANY,
-+ .requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE),
-+};
-+
-+/**
-+ * requeue_futex() - Requeue a futex_q from one hb to another
-+ * @q: the futex_q to requeue
-+ * @hb1: the source hash_bucket
-+ * @hb2: the target hash_bucket
-+ * @key2: the new key for the requeued futex_q
-+ */
-+static inline
-+void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
-+ struct futex_hash_bucket *hb2, union futex_key *key2)
-+{
-+
-+ /*
-+ * If key1 and key2 hash to the same bucket, no need to
-+ * requeue.
-+ */
-+ if (likely(&hb1->chain != &hb2->chain)) {
-+ plist_del(&q->list, &hb1->chain);
-+ futex_hb_waiters_dec(hb1);
-+ futex_hb_waiters_inc(hb2);
-+ plist_add(&q->list, &hb2->chain);
-+ q->lock_ptr = &hb2->lock;
-+ }
-+ q->key = *key2;
-+}
-+
-+static inline bool futex_requeue_pi_prepare(struct futex_q *q,
-+ struct futex_pi_state *pi_state)
-+{
-+ int old, new;
-+
-+ /*
-+ * Set state to Q_REQUEUE_PI_IN_PROGRESS unless an early wakeup has
-+ * already set Q_REQUEUE_PI_IGNORE to signal that requeue should
-+ * ignore the waiter.
-+ */
-+ old = atomic_read_acquire(&q->requeue_state);
-+ do {
-+ if (old == Q_REQUEUE_PI_IGNORE)
-+ return false;
-+
-+ /*
-+ * futex_proxy_trylock_atomic() might have set it to
-+ * IN_PROGRESS and a interleaved early wake to WAIT.
-+ *
-+ * It was considered to have an extra state for that
-+ * trylock, but that would just add more conditionals
-+ * all over the place for a dubious value.
-+ */
-+ if (old != Q_REQUEUE_PI_NONE)
-+ break;
-+
-+ new = Q_REQUEUE_PI_IN_PROGRESS;
-+ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
-+
-+ q->pi_state = pi_state;
-+ return true;
-+}
-+
-+static inline void futex_requeue_pi_complete(struct futex_q *q, int locked)
-+{
-+ int old, new;
-+
-+ old = atomic_read_acquire(&q->requeue_state);
-+ do {
-+ if (old == Q_REQUEUE_PI_IGNORE)
-+ return;
-+
-+ if (locked >= 0) {
-+ /* Requeue succeeded. Set DONE or LOCKED */
-+ WARN_ON_ONCE(old != Q_REQUEUE_PI_IN_PROGRESS &&
-+ old != Q_REQUEUE_PI_WAIT);
-+ new = Q_REQUEUE_PI_DONE + locked;
-+ } else if (old == Q_REQUEUE_PI_IN_PROGRESS) {
-+ /* Deadlock, no early wakeup interleave */
-+ new = Q_REQUEUE_PI_NONE;
-+ } else {
-+ /* Deadlock, early wakeup interleave. */
-+ WARN_ON_ONCE(old != Q_REQUEUE_PI_WAIT);
-+ new = Q_REQUEUE_PI_IGNORE;
-+ }
-+ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
-+
-+#ifdef CONFIG_PREEMPT_RT
-+ /* If the waiter interleaved with the requeue let it know */
-+ if (unlikely(old == Q_REQUEUE_PI_WAIT))
-+ rcuwait_wake_up(&q->requeue_wait);
-+#endif
-+}
-+
-+static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q)
-+{
-+ int old, new;
-+
-+ old = atomic_read_acquire(&q->requeue_state);
-+ do {
-+ /* Is requeue done already? */
-+ if (old >= Q_REQUEUE_PI_DONE)
-+ return old;
-+
-+ /*
-+ * If not done, then tell the requeue code to either ignore
-+ * the waiter or to wake it up once the requeue is done.
-+ */
-+ new = Q_REQUEUE_PI_WAIT;
-+ if (old == Q_REQUEUE_PI_NONE)
-+ new = Q_REQUEUE_PI_IGNORE;
-+ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
-+
-+ /* If the requeue was in progress, wait for it to complete */
-+ if (old == Q_REQUEUE_PI_IN_PROGRESS) {
-+#ifdef CONFIG_PREEMPT_RT
-+ rcuwait_wait_event(&q->requeue_wait,
-+ atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT,
-+ TASK_UNINTERRUPTIBLE);
-+#else
-+ (void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT);
-+#endif
-+ }
-+
-+ /*
-+ * Requeue is now either prohibited or complete. Reread state
-+ * because during the wait above it might have changed. Nothing
-+ * will modify q->requeue_state after this point.
-+ */
-+ return atomic_read(&q->requeue_state);
-+}
-+
-+/**
-+ * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
-+ * @q: the futex_q
-+ * @key: the key of the requeue target futex
-+ * @hb: the hash_bucket of the requeue target futex
-+ *
-+ * During futex_requeue, with requeue_pi=1, it is possible to acquire the
-+ * target futex if it is uncontended or via a lock steal.
-+ *
-+ * 1) Set @q::key to the requeue target futex key so the waiter can detect
-+ * the wakeup on the right futex.
-+ *
-+ * 2) Dequeue @q from the hash bucket.
-+ *
-+ * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock
-+ * acquisition.
-+ *
-+ * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that
-+ * the waiter has to fixup the pi state.
-+ *
-+ * 5) Complete the requeue state so the waiter can make progress. After
-+ * this point the waiter task can return from the syscall immediately in
-+ * case that the pi state does not have to be fixed up.
-+ *
-+ * 6) Wake the waiter task.
-+ *
-+ * Must be called with both q->lock_ptr and hb->lock held.
-+ */
-+static inline
-+void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
-+ struct futex_hash_bucket *hb)
-+{
-+ q->key = *key;
-+
-+ __futex_unqueue(q);
-+
-+ WARN_ON(!q->rt_waiter);
-+ q->rt_waiter = NULL;
-+
-+ q->lock_ptr = &hb->lock;
-+
-+ /* Signal locked state to the waiter */
-+ futex_requeue_pi_complete(q, 1);
-+ wake_up_state(q->task, TASK_NORMAL);
-+}
-+
-+/**
-+ * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
-+ * @pifutex: the user address of the to futex
-+ * @hb1: the from futex hash bucket, must be locked by the caller
-+ * @hb2: the to futex hash bucket, must be locked by the caller
-+ * @key1: the from futex key
-+ * @key2: the to futex key
-+ * @ps: address to store the pi_state pointer
-+ * @exiting: Pointer to store the task pointer of the owner task
-+ * which is in the middle of exiting
-+ * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
-+ *
-+ * Try and get the lock on behalf of the top waiter if we can do it atomically.
-+ * Wake the top waiter if we succeed. If the caller specified set_waiters,
-+ * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
-+ * hb1 and hb2 must be held by the caller.
-+ *
-+ * @exiting is only set when the return value is -EBUSY. If so, this holds
-+ * a refcount on the exiting task on return and the caller needs to drop it
-+ * after waiting for the exit to complete.
-+ *
-+ * Return:
-+ * - 0 - failed to acquire the lock atomically;
-+ * - >0 - acquired the lock, return value is vpid of the top_waiter
-+ * - <0 - error
-+ */
-+static int
-+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
-+ struct futex_hash_bucket *hb2, union futex_key *key1,
-+ union futex_key *key2, struct futex_pi_state **ps,
-+ struct task_struct **exiting, int set_waiters)
-+{
-+ struct futex_q *top_waiter = NULL;
-+ u32 curval;
-+ int ret;
-+
-+ if (futex_get_value_locked(&curval, pifutex))
-+ return -EFAULT;
-+
-+ if (unlikely(should_fail_futex(true)))
-+ return -EFAULT;
-+
-+ /*
-+ * Find the top_waiter and determine if there are additional waiters.
-+ * If the caller intends to requeue more than 1 waiter to pifutex,
-+ * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
-+ * as we have means to handle the possible fault. If not, don't set
-+ * the bit unnecessarily as it will force the subsequent unlock to enter
-+ * the kernel.
-+ */
-+ top_waiter = futex_top_waiter(hb1, key1);
-+
-+ /* There are no waiters, nothing for us to do. */
-+ if (!top_waiter)
-+ return 0;
-+
-+ /*
-+ * Ensure that this is a waiter sitting in futex_wait_requeue_pi()
-+ * and waiting on the 'waitqueue' futex which is always !PI.
-+ */
-+ if (!top_waiter->rt_waiter || top_waiter->pi_state)
-+ return -EINVAL;
-+
-+ /* Ensure we requeue to the expected futex. */
-+ if (!futex_match(top_waiter->requeue_pi_key, key2))
-+ return -EINVAL;
-+
-+ /* Ensure that this does not race against an early wakeup */
-+ if (!futex_requeue_pi_prepare(top_waiter, NULL))
-+ return -EAGAIN;
-+
-+ /*
-+ * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit
-+ * in the contended case or if @set_waiters is true.
-+ *
-+ * In the contended case PI state is attached to the lock owner. If
-+ * the user space lock can be acquired then PI state is attached to
-+ * the new owner (@top_waiter->task) when @set_waiters is true.
-+ */
-+ ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
-+ exiting, set_waiters);
-+ if (ret == 1) {
-+ /*
-+ * Lock was acquired in user space and PI state was
-+ * attached to @top_waiter->task. That means state is fully
-+ * consistent and the waiter can return to user space
-+ * immediately after the wakeup.
-+ */
-+ requeue_pi_wake_futex(top_waiter, key2, hb2);
-+ } else if (ret < 0) {
-+ /* Rewind top_waiter::requeue_state */
-+ futex_requeue_pi_complete(top_waiter, ret);
-+ } else {
-+ /*
-+ * futex_lock_pi_atomic() did not acquire the user space
-+ * futex, but managed to establish the proxy lock and pi
-+ * state. top_waiter::requeue_state cannot be fixed up here
-+ * because the waiter is not enqueued on the rtmutex
-+ * yet. This is handled at the callsite depending on the
-+ * result of rt_mutex_start_proxy_lock() which is
-+ * guaranteed to be reached with this function returning 0.
-+ */
-+ }
-+ return ret;
-+}
-+
-+/**
-+ * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
-+ * @uaddr1: source futex user address
-+ * @flags: futex flags (FLAGS_SHARED, etc.)
-+ * @uaddr2: target futex user address
-+ * @nr_wake: number of waiters to wake (must be 1 for requeue_pi)
-+ * @nr_requeue: number of waiters to requeue (0-INT_MAX)
-+ * @cmpval: @uaddr1 expected value (or %NULL)
-+ * @requeue_pi: if we are attempting to requeue from a non-pi futex to a
-+ * pi futex (pi to pi requeue is not supported)
-+ *
-+ * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
-+ * uaddr2 atomically on behalf of the top waiter.
-+ *
-+ * Return:
-+ * - >=0 - on success, the number of tasks requeued or woken;
-+ * - <0 - on error
-+ */
-+int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
-+ int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi)
-+{
-+ union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
-+ int task_count = 0, ret;
-+ struct futex_pi_state *pi_state = NULL;
-+ struct futex_hash_bucket *hb1, *hb2;
-+ struct futex_q *this, *next;
-+ DEFINE_WAKE_Q(wake_q);
-+
-+ if (nr_wake < 0 || nr_requeue < 0)
-+ return -EINVAL;
-+
-+ /*
-+ * When PI not supported: return -ENOSYS if requeue_pi is true,
-+ * consequently the compiler knows requeue_pi is always false past
-+ * this point which will optimize away all the conditional code
-+ * further down.
-+ */
-+ if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
-+ return -ENOSYS;
-+
-+ if (requeue_pi) {
-+ /*
-+ * Requeue PI only works on two distinct uaddrs. This
-+ * check is only valid for private futexes. See below.
-+ */
-+ if (uaddr1 == uaddr2)
-+ return -EINVAL;
-+
-+ /*
-+ * futex_requeue() allows the caller to define the number
-+ * of waiters to wake up via the @nr_wake argument. With
-+ * REQUEUE_PI, waking up more than one waiter is creating
-+ * more problems than it solves. Waking up a waiter makes
-+ * only sense if the PI futex @uaddr2 is uncontended as
-+ * this allows the requeue code to acquire the futex
-+ * @uaddr2 before waking the waiter. The waiter can then
-+ * return to user space without further action. A secondary
-+ * wakeup would just make the futex_wait_requeue_pi()
-+ * handling more complex, because that code would have to
-+ * look up pi_state and do more or less all the handling
-+ * which the requeue code has to do for the to be requeued
-+ * waiters. So restrict the number of waiters to wake to
-+ * one, and only wake it up when the PI futex is
-+ * uncontended. Otherwise requeue it and let the unlock of
-+ * the PI futex handle the wakeup.
-+ *
-+ * All REQUEUE_PI users, e.g. pthread_cond_signal() and
-+ * pthread_cond_broadcast() must use nr_wake=1.
-+ */
-+ if (nr_wake != 1)
-+ return -EINVAL;
-+
-+ /*
-+ * requeue_pi requires a pi_state, try to allocate it now
-+ * without any locks in case it fails.
-+ */
-+ if (refill_pi_state_cache())
-+ return -ENOMEM;
-+ }
-+
-+retry:
-+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
-+ if (unlikely(ret != 0))
-+ return ret;
-+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
-+ requeue_pi ? FUTEX_WRITE : FUTEX_READ);
-+ if (unlikely(ret != 0))
-+ return ret;
-+
-+ /*
-+ * The check above which compares uaddrs is not sufficient for
-+ * shared futexes. We need to compare the keys:
-+ */
-+ if (requeue_pi && futex_match(&key1, &key2))
-+ return -EINVAL;
-+
-+ hb1 = futex_hash(&key1);
-+ hb2 = futex_hash(&key2);
-+
-+retry_private:
-+ futex_hb_waiters_inc(hb2);
-+ double_lock_hb(hb1, hb2);
-+
-+ if (likely(cmpval != NULL)) {
-+ u32 curval;
-+
-+ ret = futex_get_value_locked(&curval, uaddr1);
-+
-+ if (unlikely(ret)) {
-+ double_unlock_hb(hb1, hb2);
-+ futex_hb_waiters_dec(hb2);
-+
-+ ret = get_user(curval, uaddr1);
-+ if (ret)
-+ return ret;
-+
-+ if (!(flags & FLAGS_SHARED))
-+ goto retry_private;
-+
-+ goto retry;
-+ }
-+ if (curval != *cmpval) {
-+ ret = -EAGAIN;
-+ goto out_unlock;
-+ }
-+ }
-+
-+ if (requeue_pi) {
-+ struct task_struct *exiting = NULL;
-+
-+ /*
-+ * Attempt to acquire uaddr2 and wake the top waiter. If we
-+ * intend to requeue waiters, force setting the FUTEX_WAITERS
-+ * bit. We force this here where we are able to easily handle
-+ * faults rather in the requeue loop below.
-+ *
-+ * Updates topwaiter::requeue_state if a top waiter exists.
-+ */
-+ ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
-+ &key2, &pi_state,
-+ &exiting, nr_requeue);
-+
-+ /*
-+ * At this point the top_waiter has either taken uaddr2 or
-+ * is waiting on it. In both cases pi_state has been
-+ * established and an initial refcount on it. In case of an
-+ * error there's nothing.
-+ *
-+ * The top waiter's requeue_state is up to date:
-+ *
-+ * - If the lock was acquired atomically (ret == 1), then
-+ * the state is Q_REQUEUE_PI_LOCKED.
-+ *
-+ * The top waiter has been dequeued and woken up and can
-+ * return to user space immediately. The kernel/user
-+ * space state is consistent. In case that there must be
-+ * more waiters requeued the WAITERS bit in the user
-+ * space futex is set so the top waiter task has to go
-+ * into the syscall slowpath to unlock the futex. This
-+ * will block until this requeue operation has been
-+ * completed and the hash bucket locks have been
-+ * dropped.
-+ *
-+ * - If the trylock failed with an error (ret < 0) then
-+ * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
-+ * happened", or Q_REQUEUE_PI_IGNORE when there was an
-+ * interleaved early wakeup.
-+ *
-+ * - If the trylock did not succeed (ret == 0) then the
-+ * state is either Q_REQUEUE_PI_IN_PROGRESS or
-+ * Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
-+ * This will be cleaned up in the loop below, which
-+ * cannot fail because futex_proxy_trylock_atomic() did
-+ * the same sanity checks for requeue_pi as the loop
-+ * below does.
-+ */
-+ switch (ret) {
-+ case 0:
-+ /* We hold a reference on the pi state. */
-+ break;
-+
-+ case 1:
-+ /*
-+ * futex_proxy_trylock_atomic() acquired the user space
-+ * futex. Adjust task_count.
-+ */
-+ task_count++;
-+ ret = 0;
-+ break;
-+
-+ /*
-+ * If the above failed, then pi_state is NULL and
-+ * waiter::requeue_state is correct.
-+ */
-+ case -EFAULT:
-+ double_unlock_hb(hb1, hb2);
-+ futex_hb_waiters_dec(hb2);
-+ ret = fault_in_user_writeable(uaddr2);
-+ if (!ret)
-+ goto retry;
-+ return ret;
-+ case -EBUSY:
-+ case -EAGAIN:
-+ /*
-+ * Two reasons for this:
-+ * - EBUSY: Owner is exiting and we just wait for the
-+ * exit to complete.
-+ * - EAGAIN: The user space value changed.
-+ */
-+ double_unlock_hb(hb1, hb2);
-+ futex_hb_waiters_dec(hb2);
-+ /*
-+ * Handle the case where the owner is in the middle of
-+ * exiting. Wait for the exit to complete otherwise
-+ * this task might loop forever, aka. live lock.
-+ */
-+ wait_for_owner_exiting(ret, exiting);
-+ cond_resched();
-+ goto retry;
-+ default:
-+ goto out_unlock;
-+ }
-+ }
-+
-+ plist_for_each_entry_safe(this, next, &hb1->chain, list) {
-+ if (task_count - nr_wake >= nr_requeue)
-+ break;
-+
-+ if (!futex_match(&this->key, &key1))
-+ continue;
-+
-+ /*
-+ * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
-+ * be paired with each other and no other futex ops.
-+ *
-+ * We should never be requeueing a futex_q with a pi_state,
-+ * which is awaiting a futex_unlock_pi().
-+ */
-+ if ((requeue_pi && !this->rt_waiter) ||
-+ (!requeue_pi && this->rt_waiter) ||
-+ this->pi_state) {
-+ ret = -EINVAL;
-+ break;
-+ }
-+
-+ /* Plain futexes just wake or requeue and are done */
-+ if (!requeue_pi) {
-+ if (++task_count <= nr_wake)
-+ futex_wake_mark(&wake_q, this);
-+ else
-+ requeue_futex(this, hb1, hb2, &key2);
-+ continue;
-+ }
-+
-+ /* Ensure we requeue to the expected futex for requeue_pi. */
-+ if (!futex_match(this->requeue_pi_key, &key2)) {
-+ ret = -EINVAL;
-+ break;
-+ }
-+
-+ /*
-+ * Requeue nr_requeue waiters and possibly one more in the case
-+ * of requeue_pi if we couldn't acquire the lock atomically.
-+ *
-+ * Prepare the waiter to take the rt_mutex. Take a refcount
-+ * on the pi_state and store the pointer in the futex_q
-+ * object of the waiter.
-+ */
-+ get_pi_state(pi_state);
-+
-+ /* Don't requeue when the waiter is already on the way out. */
-+ if (!futex_requeue_pi_prepare(this, pi_state)) {
-+ /*
-+ * Early woken waiter signaled that it is on the
-+ * way out. Drop the pi_state reference and try the
-+ * next waiter. @this->pi_state is still NULL.
-+ */
-+ put_pi_state(pi_state);
-+ continue;
-+ }
-+
-+ ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
-+ this->rt_waiter,
-+ this->task);
-+
-+ if (ret == 1) {
-+ /*
-+ * We got the lock. We do neither drop the refcount
-+ * on pi_state nor clear this->pi_state because the
-+ * waiter needs the pi_state for cleaning up the
-+ * user space value. It will drop the refcount
-+ * after doing so. this::requeue_state is updated
-+ * in the wakeup as well.
-+ */
-+ requeue_pi_wake_futex(this, &key2, hb2);
-+ task_count++;
-+ } else if (!ret) {
-+ /* Waiter is queued, move it to hb2 */
-+ requeue_futex(this, hb1, hb2, &key2);
-+ futex_requeue_pi_complete(this, 0);
-+ task_count++;
-+ } else {
-+ /*
-+ * rt_mutex_start_proxy_lock() detected a potential
-+ * deadlock when we tried to queue that waiter.
-+ * Drop the pi_state reference which we took above
-+ * and remove the pointer to the state from the
-+ * waiters futex_q object.
-+ */
-+ this->pi_state = NULL;
-+ put_pi_state(pi_state);
-+ futex_requeue_pi_complete(this, ret);
-+ /*
-+ * We stop queueing more waiters and let user space
-+ * deal with the mess.
-+ */
-+ break;
-+ }
-+ }
-+
-+ /*
-+ * We took an extra initial reference to the pi_state in
-+ * futex_proxy_trylock_atomic(). We need to drop it here again.
-+ */
-+ put_pi_state(pi_state);
-+
-+out_unlock:
-+ double_unlock_hb(hb1, hb2);
-+ wake_up_q(&wake_q);
-+ futex_hb_waiters_dec(hb2);
-+ return ret ? ret : task_count;
-+}
-+
-+/**
-+ * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex
-+ * @hb: the hash_bucket futex_q was original enqueued on
-+ * @q: the futex_q woken while waiting to be requeued
-+ * @timeout: the timeout associated with the wait (NULL if none)
-+ *
-+ * Determine the cause for the early wakeup.
-+ *
-+ * Return:
-+ * -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR
-+ */
-+static inline
-+int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
-+ struct futex_q *q,
-+ struct hrtimer_sleeper *timeout)
-+{
-+ int ret;
-+
-+ /*
-+ * With the hb lock held, we avoid races while we process the wakeup.
-+ * We only need to hold hb (and not hb2) to ensure atomicity as the
-+ * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
-+ * It can't be requeued from uaddr2 to something else since we don't
-+ * support a PI aware source futex for requeue.
-+ */
-+ WARN_ON_ONCE(&hb->lock != q->lock_ptr);
-+
-+ /*
-+ * We were woken prior to requeue by a timeout or a signal.
-+ * Unqueue the futex_q and determine which it was.
-+ */
-+ plist_del(&q->list, &hb->chain);
-+ futex_hb_waiters_dec(hb);
-+
-+ /* Handle spurious wakeups gracefully */
-+ ret = -EWOULDBLOCK;
-+ if (timeout && !timeout->task)
-+ ret = -ETIMEDOUT;
-+ else if (signal_pending(current))
-+ ret = -ERESTARTNOINTR;
-+ return ret;
-+}
-+
-+/**
-+ * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
-+ * @uaddr: the futex we initially wait on (non-pi)
-+ * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
-+ * the same type, no requeueing from private to shared, etc.
-+ * @val: the expected value of uaddr
-+ * @abs_time: absolute timeout
-+ * @bitset: 32 bit wakeup bitset set by userspace, defaults to all
-+ * @uaddr2: the pi futex we will take prior to returning to user-space
-+ *
-+ * The caller will wait on uaddr and will be requeued by futex_requeue() to
-+ * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake
-+ * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
-+ * userspace. This ensures the rt_mutex maintains an owner when it has waiters;
-+ * without one, the pi logic would not know which task to boost/deboost, if
-+ * there was a need to.
-+ *
-+ * We call schedule in futex_wait_queue() when we enqueue and return there
-+ * via the following--
-+ * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
-+ * 2) wakeup on uaddr2 after a requeue
-+ * 3) signal
-+ * 4) timeout
-+ *
-+ * If 3, cleanup and return -ERESTARTNOINTR.
-+ *
-+ * If 2, we may then block on trying to take the rt_mutex and return via:
-+ * 5) successful lock
-+ * 6) signal
-+ * 7) timeout
-+ * 8) other lock acquisition failure
-+ *
-+ * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
-+ *
-+ * If 4 or 7, we cleanup and return with -ETIMEDOUT.
-+ *
-+ * Return:
-+ * - 0 - On success;
-+ * - <0 - On error
-+ */
-+int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
-+ u32 val, ktime_t *abs_time, u32 bitset,
-+ u32 __user *uaddr2)
-+{
-+ struct hrtimer_sleeper timeout, *to;
-+ struct rt_mutex_waiter rt_waiter;
-+ struct futex_hash_bucket *hb;
-+ union futex_key key2 = FUTEX_KEY_INIT;
-+ struct futex_q q = futex_q_init;
-+ struct rt_mutex_base *pi_mutex;
-+ int res, ret;
-+
-+ if (!IS_ENABLED(CONFIG_FUTEX_PI))
-+ return -ENOSYS;
-+
-+ if (uaddr == uaddr2)
-+ return -EINVAL;
-+
-+ if (!bitset)
-+ return -EINVAL;
-+
-+ to = futex_setup_timer(abs_time, &timeout, flags,
-+ current->timer_slack_ns);
-+
-+ /*
-+ * The waiter is allocated on our stack, manipulated by the requeue
-+ * code while we sleep on uaddr.
-+ */
-+ rt_mutex_init_waiter(&rt_waiter);
-+
-+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
-+ if (unlikely(ret != 0))
-+ goto out;
-+
-+ q.bitset = bitset;
-+ q.rt_waiter = &rt_waiter;
-+ q.requeue_pi_key = &key2;
-+
-+ /*
-+ * Prepare to wait on uaddr. On success, it holds hb->lock and q
-+ * is initialized.
-+ */
-+ ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
-+ if (ret)
-+ goto out;
-+
-+ /*
-+ * The check above which compares uaddrs is not sufficient for
-+ * shared futexes. We need to compare the keys:
-+ */
-+ if (futex_match(&q.key, &key2)) {
-+ futex_q_unlock(hb);
-+ ret = -EINVAL;
-+ goto out;
-+ }
-+
-+ /* Queue the futex_q, drop the hb lock, wait for wakeup. */
-+ futex_wait_queue(hb, &q, to);
-+
-+ switch (futex_requeue_pi_wakeup_sync(&q)) {
-+ case Q_REQUEUE_PI_IGNORE:
-+ /* The waiter is still on uaddr1 */
-+ spin_lock(&hb->lock);
-+ ret = handle_early_requeue_pi_wakeup(hb, &q, to);
-+ spin_unlock(&hb->lock);
-+ break;
-+
-+ case Q_REQUEUE_PI_LOCKED:
-+ /* The requeue acquired the lock */
-+ if (q.pi_state && (q.pi_state->owner != current)) {
-+ spin_lock(q.lock_ptr);
-+ ret = fixup_pi_owner(uaddr2, &q, true);
-+ /*
-+ * Drop the reference to the pi state which the
-+ * requeue_pi() code acquired for us.
-+ */
-+ put_pi_state(q.pi_state);
-+ spin_unlock(q.lock_ptr);
-+ /*
-+ * Adjust the return value. It's either -EFAULT or
-+ * success (1) but the caller expects 0 for success.
-+ */
-+ ret = ret < 0 ? ret : 0;
-+ }
-+ break;
-+
-+ case Q_REQUEUE_PI_DONE:
-+ /* Requeue completed. Current is 'pi_blocked_on' the rtmutex */
-+ pi_mutex = &q.pi_state->pi_mutex;
-+ ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
-+
-+ /* Current is not longer pi_blocked_on */
-+ spin_lock(q.lock_ptr);
-+ if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
-+ ret = 0;
-+
-+ debug_rt_mutex_free_waiter(&rt_waiter);
-+ /*
-+ * Fixup the pi_state owner and possibly acquire the lock if we
-+ * haven't already.
-+ */
-+ res = fixup_pi_owner(uaddr2, &q, !ret);
-+ /*
-+ * If fixup_pi_owner() returned an error, propagate that. If it
-+ * acquired the lock, clear -ETIMEDOUT or -EINTR.
-+ */
-+ if (res)
-+ ret = (res < 0) ? res : 0;
-+
-+ futex_unqueue_pi(&q);
-+ spin_unlock(q.lock_ptr);
-+
-+ if (ret == -EINTR) {
-+ /*
-+ * We've already been requeued, but cannot restart
-+ * by calling futex_lock_pi() directly. We could
-+ * restart this syscall, but it would detect that
-+ * the user space "val" changed and return
-+ * -EWOULDBLOCK. Save the overhead of the restart
-+ * and return -EWOULDBLOCK directly.
-+ */
-+ ret = -EWOULDBLOCK;
-+ }
-+ break;
-+ default:
-+ BUG();
-+ }
-+
-+out:
-+ if (to) {
-+ hrtimer_cancel(&to->timer);
-+ destroy_hrtimer_on_stack(&to->timer);
-+ }
-+ return ret;
-+}
-+
-diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
-new file mode 100644
-index 000000000..368e9c17f
---- /dev/null
-+++ b/kernel/futex/syscalls.c
-@@ -0,0 +1,396 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+
-+#include <linux/compat.h>
-+#include <linux/syscalls.h>
-+#include <linux/time_namespace.h>
-+
-+#include "futex.h"
-+
-+/*
-+ * Support for robust futexes: the kernel cleans up held futexes at
-+ * thread exit time.
-+ *
-+ * Implementation: user-space maintains a per-thread list of locks it
-+ * is holding. Upon do_exit(), the kernel carefully walks this list,
-+ * and marks all locks that are owned by this thread with the
-+ * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
-+ * always manipulated with the lock held, so the list is private and
-+ * per-thread. Userspace also maintains a per-thread 'list_op_pending'
-+ * field, to allow the kernel to clean up if the thread dies after
-+ * acquiring the lock, but just before it could have added itself to
-+ * the list. There can only be one such pending lock.
-+ */
-+
-+/**
-+ * sys_set_robust_list() - Set the robust-futex list head of a task
-+ * @head: pointer to the list-head
-+ * @len: length of the list-head, as userspace expects
-+ */
-+SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
-+ size_t, len)
-+{
-+ if (!futex_cmpxchg_enabled)
-+ return -ENOSYS;
-+ /*
-+ * The kernel knows only one size for now:
-+ */
-+ if (unlikely(len != sizeof(*head)))
-+ return -EINVAL;
-+
-+ current->robust_list = head;
-+
-+ return 0;
-+}
-+
-+/**
-+ * sys_get_robust_list() - Get the robust-futex list head of a task
-+ * @pid: pid of the process [zero for current task]
-+ * @head_ptr: pointer to a list-head pointer, the kernel fills it in
-+ * @len_ptr: pointer to a length field, the kernel fills in the header size
-+ */
-+SYSCALL_DEFINE3(get_robust_list, int, pid,
-+ struct robust_list_head __user * __user *, head_ptr,
-+ size_t __user *, len_ptr)
-+{
-+ struct robust_list_head __user *head;
-+ unsigned long ret;
-+ struct task_struct *p;
-+
-+ if (!futex_cmpxchg_enabled)
-+ return -ENOSYS;
-+
-+ rcu_read_lock();
-+
-+ ret = -ESRCH;
-+ if (!pid)
-+ p = current;
-+ else {
-+ p = find_task_by_vpid(pid);
-+ if (!p)
-+ goto err_unlock;
-+ }
-+
-+ ret = -EPERM;
-+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
-+ goto err_unlock;
-+
-+ head = p->robust_list;
-+ rcu_read_unlock();
-+
-+ if (put_user(sizeof(*head), len_ptr))
-+ return -EFAULT;
-+ return put_user(head, head_ptr);
-+
-+err_unlock:
-+ rcu_read_unlock();
-+
-+ return ret;
-+}
-+
-+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
-+ u32 __user *uaddr2, u32 val2, u32 val3)
-+{
-+ int cmd = op & FUTEX_CMD_MASK;
-+ unsigned int flags = 0;
-+
-+ if (!(op & FUTEX_PRIVATE_FLAG))
-+ flags |= FLAGS_SHARED;
-+
-+ if (op & FUTEX_CLOCK_REALTIME) {
-+ flags |= FLAGS_CLOCKRT;
-+ if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
-+ cmd != FUTEX_LOCK_PI2)
-+ return -ENOSYS;
-+ }
-+
-+ switch (cmd) {
-+ case FUTEX_LOCK_PI:
-+ case FUTEX_LOCK_PI2:
-+ case FUTEX_UNLOCK_PI:
-+ case FUTEX_TRYLOCK_PI:
-+ case FUTEX_WAIT_REQUEUE_PI:
-+ case FUTEX_CMP_REQUEUE_PI:
-+ if (!futex_cmpxchg_enabled)
-+ return -ENOSYS;
-+ }
-+
-+ switch (cmd) {
-+ case FUTEX_WAIT:
-+ val3 = FUTEX_BITSET_MATCH_ANY;
-+ fallthrough;
-+ case FUTEX_WAIT_BITSET:
-+ return futex_wait(uaddr, flags, val, timeout, val3);
-+ case FUTEX_WAKE:
-+ val3 = FUTEX_BITSET_MATCH_ANY;
-+ fallthrough;
-+ case FUTEX_WAKE_BITSET:
-+ return futex_wake(uaddr, flags, val, val3);
-+ case FUTEX_REQUEUE:
-+ return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
-+ case FUTEX_CMP_REQUEUE:
-+ return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
-+ case FUTEX_WAKE_OP:
-+ return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
-+ case FUTEX_LOCK_PI:
-+ flags |= FLAGS_CLOCKRT;
-+ fallthrough;
-+ case FUTEX_LOCK_PI2:
-+ return futex_lock_pi(uaddr, flags, timeout, 0);
-+ case FUTEX_UNLOCK_PI:
-+ return futex_unlock_pi(uaddr, flags);
-+ case FUTEX_TRYLOCK_PI:
-+ return futex_lock_pi(uaddr, flags, NULL, 1);
-+ case FUTEX_WAIT_REQUEUE_PI:
-+ val3 = FUTEX_BITSET_MATCH_ANY;
-+ return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
-+ uaddr2);
-+ case FUTEX_CMP_REQUEUE_PI:
-+ return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
-+ }
-+ return -ENOSYS;
-+}
-+
-+static __always_inline bool futex_cmd_has_timeout(u32 cmd)
-+{
-+ switch (cmd) {
-+ case FUTEX_WAIT:
-+ case FUTEX_LOCK_PI:
-+ case FUTEX_LOCK_PI2:
-+ case FUTEX_WAIT_BITSET:
-+ case FUTEX_WAIT_REQUEUE_PI:
-+ return true;
-+ }
-+ return false;
-+}
-+
-+static __always_inline int
-+futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
-+{
-+ if (!timespec64_valid(ts))
-+ return -EINVAL;
-+
-+ *t = timespec64_to_ktime(*ts);
-+ if (cmd == FUTEX_WAIT)
-+ *t = ktime_add_safe(ktime_get(), *t);
-+ else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
-+ *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
-+ return 0;
-+}
-+
-+SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
-+ const struct __kernel_timespec __user *, utime,
-+ u32 __user *, uaddr2, u32, val3)
-+{
-+ int ret, cmd = op & FUTEX_CMD_MASK;
-+ ktime_t t, *tp = NULL;
-+ struct timespec64 ts;
-+
-+ if (utime && futex_cmd_has_timeout(cmd)) {
-+ if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
-+ return -EFAULT;
-+ if (get_timespec64(&ts, utime))
-+ return -EFAULT;
-+ ret = futex_init_timeout(cmd, op, &ts, &t);
-+ if (ret)
-+ return ret;
-+ tp = &t;
-+ }
-+
-+ return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
-+}
-+
-+/* Mask of available flags for each futex in futex_waitv list */
-+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
-+
-+/**
-+ * futex_parse_waitv - Parse a waitv array from userspace
-+ * @futexv: Kernel side list of waiters to be filled
-+ * @uwaitv: Userspace list to be parsed
-+ * @nr_futexes: Length of futexv
-+ *
-+ * Return: Error code on failure, 0 on success
-+ */
-+static int futex_parse_waitv(struct futex_vector *futexv,
-+ struct futex_waitv __user *uwaitv,
-+ unsigned int nr_futexes)
-+{
-+ struct futex_waitv aux;
-+ unsigned int i;
-+
-+ for (i = 0; i < nr_futexes; i++) {
-+ if (copy_from_user(&aux, &uwaitv[i], sizeof(aux)))
-+ return -EFAULT;
-+
-+ if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved)
-+ return -EINVAL;
-+
-+ if (!(aux.flags & FUTEX_32))
-+ return -EINVAL;
-+
-+ futexv[i].w.flags = aux.flags;
-+ futexv[i].w.val = aux.val;
-+ futexv[i].w.uaddr = aux.uaddr;
-+ futexv[i].q = futex_q_init;
-+ }
-+
-+ return 0;
-+}
-+
-+/**
-+ * sys_futex_waitv - Wait on a list of futexes
-+ * @waiters: List of futexes to wait on
-+ * @nr_futexes: Length of futexv
-+ * @flags: Flag for timeout (monotonic/realtime)
-+ * @timeout: Optional absolute timeout.
-+ * @clockid: Clock to be used for the timeout, realtime or monotonic.
-+ *
-+ * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
-+ * if a futex_wake() is performed at any uaddr. The syscall returns immediately
-+ * if any waiter has *uaddr != val. *timeout is an optional timeout value for the
-+ * operation. Each waiter has individual flags. The `flags` argument for the
-+ * syscall should be used solely for specifying the timeout as realtime, if
-+ * needed. Flags for private futexes, sizes, etc. should be used on the
-+ * individual flags of each waiter.
-+ *
-+ * Returns the array index of one of the awaken futexes. There's no given
-+ * information of how many were awakened, or any particular attribute of it (if
-+ * it's the first awakened, if it is of the smaller index...).
-+ */
-+
-+SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
-+ unsigned int, nr_futexes, unsigned int, flags,
-+ struct __kernel_timespec __user *, timeout, clockid_t, clockid)
-+{
-+ struct hrtimer_sleeper to;
-+ struct futex_vector *futexv;
-+ struct timespec64 ts;
-+ ktime_t time;
-+ int ret;
-+
-+ /* This syscall supports no flags for now */
-+ if (flags)
-+ return -EINVAL;
-+
-+ if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
-+ return -EINVAL;
-+
-+ if (timeout) {
-+ int flag_clkid = 0, flag_init = 0;
-+
-+ if (clockid == CLOCK_REALTIME) {
-+ flag_clkid = FLAGS_CLOCKRT;
-+ flag_init = FUTEX_CLOCK_REALTIME;
-+ }
-+
-+ if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
-+ return -EINVAL;
-+
-+ if (get_timespec64(&ts, timeout))
-+ return -EFAULT;
-+
-+ /*
-+ * Since there's no opcode for futex_waitv, use
-+ * FUTEX_WAIT_BITSET that uses absolute timeout as well
-+ */
-+ ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
-+ if (ret)
-+ return ret;
-+
-+ futex_setup_timer(&time, &to, flag_clkid, 0);
-+ }
-+
-+ futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
-+ if (!futexv)
-+ return -ENOMEM;
-+
-+ ret = futex_parse_waitv(futexv, waiters, nr_futexes);
-+ if (!ret)
-+ ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);
-+
-+ if (timeout) {
-+ hrtimer_cancel(&to.timer);
-+ destroy_hrtimer_on_stack(&to.timer);
-+ }
-+
-+ kfree(futexv);
-+ return ret;
-+}
-+
-+#ifdef CONFIG_COMPAT
-+COMPAT_SYSCALL_DEFINE2(set_robust_list,
-+ struct compat_robust_list_head __user *, head,
-+ compat_size_t, len)
-+{
-+ if (!futex_cmpxchg_enabled)
-+ return -ENOSYS;
-+
-+ if (unlikely(len != sizeof(*head)))
-+ return -EINVAL;
-+
-+ current->compat_robust_list = head;
-+
-+ return 0;
-+}
-+
-+COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
-+ compat_uptr_t __user *, head_ptr,
-+ compat_size_t __user *, len_ptr)
-+{
-+ struct compat_robust_list_head __user *head;
-+ unsigned long ret;
-+ struct task_struct *p;
-+
-+ if (!futex_cmpxchg_enabled)
-+ return -ENOSYS;
-+
-+ rcu_read_lock();
-+
-+ ret = -ESRCH;
-+ if (!pid)
-+ p = current;
-+ else {
-+ p = find_task_by_vpid(pid);
-+ if (!p)
-+ goto err_unlock;
-+ }
-+
-+ ret = -EPERM;
-+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
-+ goto err_unlock;
-+
-+ head = p->compat_robust_list;
-+ rcu_read_unlock();
-+
-+ if (put_user(sizeof(*head), len_ptr))
-+ return -EFAULT;
-+ return put_user(ptr_to_compat(head), head_ptr);
-+
-+err_unlock:
-+ rcu_read_unlock();
-+
-+ return ret;
-+}
-+#endif /* CONFIG_COMPAT */
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
-+ const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
-+ u32, val3)
-+{
-+ int ret, cmd = op & FUTEX_CMD_MASK;
-+ ktime_t t, *tp = NULL;
-+ struct timespec64 ts;
-+
-+ if (utime && futex_cmd_has_timeout(cmd)) {
-+ if (get_old_timespec32(&ts, utime))
-+ return -EFAULT;
-+ ret = futex_init_timeout(cmd, op, &ts, &t);
-+ if (ret)
-+ return ret;
-+ tp = &t;
-+ }
-+
-+ return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
-+}
-+#endif /* CONFIG_COMPAT_32BIT_TIME */
-+
-diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
-new file mode 100644
-index 000000000..b45597aab
---- /dev/null
-+++ b/kernel/futex/waitwake.c
-@@ -0,0 +1,708 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+
-+#include <linux/sched/task.h>
-+#include <linux/sched/signal.h>
-+#include <linux/freezer.h>
-+
-+#include "futex.h"
-+
-+/*
-+ * READ this before attempting to hack on futexes!
-+ *
-+ * Basic futex operation and ordering guarantees
-+ * =============================================
-+ *
-+ * The waiter reads the futex value in user space and calls
-+ * futex_wait(). This function computes the hash bucket and acquires
-+ * the hash bucket lock. After that it reads the futex user space value
-+ * again and verifies that the data has not changed. If it has not changed
-+ * it enqueues itself into the hash bucket, releases the hash bucket lock
-+ * and schedules.
-+ *
-+ * The waker side modifies the user space value of the futex and calls
-+ * futex_wake(). This function computes the hash bucket and acquires the
-+ * hash bucket lock. Then it looks for waiters on that futex in the hash
-+ * bucket and wakes them.
-+ *
-+ * In futex wake up scenarios where no tasks are blocked on a futex, taking
-+ * the hb spinlock can be avoided and simply return. In order for this
-+ * optimization to work, ordering guarantees must exist so that the waiter
-+ * being added to the list is acknowledged when the list is concurrently being
-+ * checked by the waker, avoiding scenarios like the following:
-+ *
-+ * CPU 0 CPU 1
-+ * val = *futex;
-+ * sys_futex(WAIT, futex, val);
-+ * futex_wait(futex, val);
-+ * uval = *futex;
-+ * *futex = newval;
-+ * sys_futex(WAKE, futex);
-+ * futex_wake(futex);
-+ * if (queue_empty())
-+ * return;
-+ * if (uval == val)
-+ * lock(hash_bucket(futex));
-+ * queue();
-+ * unlock(hash_bucket(futex));
-+ * schedule();
-+ *
-+ * This would cause the waiter on CPU 0 to wait forever because it
-+ * missed the transition of the user space value from val to newval
-+ * and the waker did not find the waiter in the hash bucket queue.
-+ *
-+ * The correct serialization ensures that a waiter either observes
-+ * the changed user space value before blocking or is woken by a
-+ * concurrent waker:
-+ *
-+ * CPU 0 CPU 1
-+ * val = *futex;
-+ * sys_futex(WAIT, futex, val);
-+ * futex_wait(futex, val);
-+ *
-+ * waiters++; (a)
-+ * smp_mb(); (A) <-- paired with -.
-+ * |
-+ * lock(hash_bucket(futex)); |
-+ * |
-+ * uval = *futex; |
-+ * | *futex = newval;
-+ * | sys_futex(WAKE, futex);
-+ * | futex_wake(futex);
-+ * |
-+ * `--------> smp_mb(); (B)
-+ * if (uval == val)
-+ * queue();
-+ * unlock(hash_bucket(futex));
-+ * schedule(); if (waiters)
-+ * lock(hash_bucket(futex));
-+ * else wake_waiters(futex);
-+ * waiters--; (b) unlock(hash_bucket(futex));
-+ *
-+ * Where (A) orders the waiters increment and the futex value read through
-+ * atomic operations (see futex_hb_waiters_inc) and where (B) orders the write
-+ * to futex and the waiters read (see futex_hb_waiters_pending()).
-+ *
-+ * This yields the following case (where X:=waiters, Y:=futex):
-+ *
-+ * X = Y = 0
-+ *
-+ * w[X]=1 w[Y]=1
-+ * MB MB
-+ * r[Y]=y r[X]=x
-+ *
-+ * Which guarantees that x==0 && y==0 is impossible; which translates back into
-+ * the guarantee that we cannot both miss the futex variable change and the
-+ * enqueue.
-+ *
-+ * Note that a new waiter is accounted for in (a) even when it is possible that
-+ * the wait call can return error, in which case we backtrack from it in (b).
-+ * Refer to the comment in futex_q_lock().
-+ *
-+ * Similarly, in order to account for waiters being requeued on another
-+ * address we always increment the waiters for the destination bucket before
-+ * acquiring the lock. It then decrements them again after releasing it -
-+ * the code that actually moves the futex(es) between hash buckets (requeue_futex)
-+ * will do the additional required waiter count housekeeping. This is done for
-+ * double_lock_hb() and double_unlock_hb(), respectively.
-+ */
-+
-+/*
-+ * The hash bucket lock must be held when this is called.
-+ * Afterwards, the futex_q must not be accessed. Callers
-+ * must ensure to later call wake_up_q() for the actual
-+ * wakeups to occur.
-+ */
-+void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q)
-+{
-+ struct task_struct *p = q->task;
-+
-+ if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
-+ return;
-+
-+ get_task_struct(p);
-+ __futex_unqueue(q);
-+ /*
-+ * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
-+ * is written, without taking any locks. This is possible in the event
-+ * of a spurious wakeup, for example. A memory barrier is required here
-+ * to prevent the following store to lock_ptr from getting ahead of the
-+ * plist_del in __futex_unqueue().
-+ */
-+ smp_store_release(&q->lock_ptr, NULL);
-+
-+ /*
-+ * Queue the task for later wakeup for after we've released
-+ * the hb->lock.
-+ */
-+ wake_q_add_safe(wake_q, p);
-+}
-+
-+/*
-+ * Wake up waiters matching bitset queued on this futex (uaddr).
-+ */
-+int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
-+{
-+ struct futex_hash_bucket *hb;
-+ struct futex_q *this, *next;
-+ union futex_key key = FUTEX_KEY_INIT;
-+ int ret;
-+ DEFINE_WAKE_Q(wake_q);
-+
-+ if (!bitset)
-+ return -EINVAL;
-+
-+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
-+ if (unlikely(ret != 0))
-+ return ret;
-+
-+ hb = futex_hash(&key);
-+
-+ /* Make sure we really have tasks to wakeup */
-+ if (!futex_hb_waiters_pending(hb))
-+ return ret;
-+
-+ spin_lock(&hb->lock);
-+
-+ plist_for_each_entry_safe(this, next, &hb->chain, list) {
-+ if (futex_match (&this->key, &key)) {
-+ if (this->pi_state || this->rt_waiter) {
-+ ret = -EINVAL;
-+ break;
-+ }
-+
-+ /* Check if one of the bits is set in both bitsets */
-+ if (!(this->bitset & bitset))
-+ continue;
-+
-+ futex_wake_mark(&wake_q, this);
-+ if (++ret >= nr_wake)
-+ break;
-+ }
-+ }
-+
-+ spin_unlock(&hb->lock);
-+ wake_up_q(&wake_q);
-+ return ret;
-+}
-+
-+static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
-+{
-+ unsigned int op = (encoded_op & 0x70000000) >> 28;
-+ unsigned int cmp = (encoded_op & 0x0f000000) >> 24;
-+ int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
-+ int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
-+ int oldval, ret;
-+
-+ if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
-+ if (oparg < 0 || oparg > 31) {
-+ char comm[sizeof(current->comm)];
-+ /*
-+ * kill this print and return -EINVAL when userspace
-+ * is sane again
-+ */
-+ pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
-+ get_task_comm(comm, current), oparg);
-+ oparg &= 31;
-+ }
-+ oparg = 1 << oparg;
-+ }
-+
-+ pagefault_disable();
-+ ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
-+ pagefault_enable();
-+ if (ret)
-+ return ret;
-+
-+ switch (cmp) {
-+ case FUTEX_OP_CMP_EQ:
-+ return oldval == cmparg;
-+ case FUTEX_OP_CMP_NE:
-+ return oldval != cmparg;
-+ case FUTEX_OP_CMP_LT:
-+ return oldval < cmparg;
-+ case FUTEX_OP_CMP_GE:
-+ return oldval >= cmparg;
-+ case FUTEX_OP_CMP_LE:
-+ return oldval <= cmparg;
-+ case FUTEX_OP_CMP_GT:
-+ return oldval > cmparg;
-+ default:
-+ return -ENOSYS;
-+ }
-+}
-+
-+/*
-+ * Wake up all waiters hashed on the physical page that is mapped
-+ * to this virtual address:
-+ */
-+int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
-+ int nr_wake, int nr_wake2, int op)
-+{
-+ union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
-+ struct futex_hash_bucket *hb1, *hb2;
-+ struct futex_q *this, *next;
-+ int ret, op_ret;
-+ DEFINE_WAKE_Q(wake_q);
-+
-+retry:
-+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
-+ if (unlikely(ret != 0))
-+ return ret;
-+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
-+ if (unlikely(ret != 0))
-+ return ret;
-+
-+ hb1 = futex_hash(&key1);
-+ hb2 = futex_hash(&key2);
-+
-+retry_private:
-+ double_lock_hb(hb1, hb2);
-+ op_ret = futex_atomic_op_inuser(op, uaddr2);
-+ if (unlikely(op_ret < 0)) {
-+ double_unlock_hb(hb1, hb2);
-+
-+ if (!IS_ENABLED(CONFIG_MMU) ||
-+ unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
-+ /*
-+ * we don't get EFAULT from MMU faults if we don't have
-+ * an MMU, but we might get them from range checking
-+ */
-+ ret = op_ret;
-+ return ret;
-+ }
-+
-+ if (op_ret == -EFAULT) {
-+ ret = fault_in_user_writeable(uaddr2);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ cond_resched();
-+ if (!(flags & FLAGS_SHARED))
-+ goto retry_private;
-+ goto retry;
-+ }
-+
-+ plist_for_each_entry_safe(this, next, &hb1->chain, list) {
-+ if (futex_match (&this->key, &key1)) {
-+ if (this->pi_state || this->rt_waiter) {
-+ ret = -EINVAL;
-+ goto out_unlock;
-+ }
-+ futex_wake_mark(&wake_q, this);
-+ if (++ret >= nr_wake)
-+ break;
-+ }
-+ }
-+
-+ if (op_ret > 0) {
-+ op_ret = 0;
-+ plist_for_each_entry_safe(this, next, &hb2->chain, list) {
-+ if (futex_match (&this->key, &key2)) {
-+ if (this->pi_state || this->rt_waiter) {
-+ ret = -EINVAL;
-+ goto out_unlock;
-+ }
-+ futex_wake_mark(&wake_q, this);
-+ if (++op_ret >= nr_wake2)
-+ break;
-+ }
-+ }
-+ ret += op_ret;
-+ }
-+
-+out_unlock:
-+ double_unlock_hb(hb1, hb2);
-+ wake_up_q(&wake_q);
-+ return ret;
-+}
-+
-+static long futex_wait_restart(struct restart_block *restart);
-+
-+/**
-+ * futex_wait_queue() - futex_queue() and wait for wakeup, timeout, or signal
-+ * @hb: the futex hash bucket, must be locked by the caller
-+ * @q: the futex_q to queue up on
-+ * @timeout: the prepared hrtimer_sleeper, or null for no timeout
-+ */
-+void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
-+ struct hrtimer_sleeper *timeout)
-+{
-+ /*
-+ * The task state is guaranteed to be set before another task can
-+ * wake it. set_current_state() is implemented using smp_store_mb() and
-+ * futex_queue() calls spin_unlock() upon completion, both serializing
-+ * access to the hash list and forcing another memory barrier.
-+ */
-+ set_current_state(TASK_INTERRUPTIBLE);
-+ futex_queue(q, hb);
-+
-+ /* Arm the timer */
-+ if (timeout)
-+ hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
-+
-+ /*
-+ * If we have been removed from the hash list, then another task
-+ * has tried to wake us, and we can skip the call to schedule().
-+ */
-+ if (likely(!plist_node_empty(&q->list))) {
-+ /*
-+ * If the timer has already expired, current will already be
-+ * flagged for rescheduling. Only call schedule if there
-+ * is no timeout, or if it has yet to expire.
-+ */
-+ if (!timeout || timeout->task)
-+ freezable_schedule();
-+ }
-+ __set_current_state(TASK_RUNNING);
-+}
-+
-+/**
-+ * unqueue_multiple - Remove various futexes from their hash bucket
-+ * @v: The list of futexes to unqueue
-+ * @count: Number of futexes in the list
-+ *
-+ * Helper to unqueue a list of futexes. This can't fail.
-+ *
-+ * Return:
-+ * - >=0 - Index of the last futex that was awoken;
-+ * - -1 - No futex was awoken
-+ */
-+static int unqueue_multiple(struct futex_vector *v, int count)
-+{
-+ int ret = -1, i;
-+
-+ for (i = 0; i < count; i++) {
-+ if (!futex_unqueue(&v[i].q))
-+ ret = i;
-+ }
-+
-+ return ret;
-+}
-+
-+/**
-+ * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes
-+ * @vs: The futex list to wait on
-+ * @count: The size of the list
-+ * @awaken: Index of the last awoken futex, if any. Used to notify the
-+ * caller that it can return this index to userspace (return parameter)
-+ *
-+ * Prepare multiple futexes in a single step and enqueue them. This may fail if
-+ * the futex list is invalid or if any futex was already awoken. On success the
-+ * task is ready to interruptible sleep.
-+ *
-+ * Return:
-+ * - 1 - One of the futexes was awaken by another thread
-+ * - 0 - Success
-+ * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
-+ */
-+static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *awaken)
-+{
-+ struct futex_hash_bucket *hb;
-+ bool retry = false;
-+ int ret, i;
-+ u32 uval;
-+
-+ /*
-+ * Enqueuing multiple futexes is tricky, because we need to enqueue
-+ * each futex in the list before dealing with the next one to avoid
-+ * deadlocking on the hash bucket. But, before enqueuing, we need to
-+ * make sure that current->state is TASK_INTERRUPTIBLE, so we don't
-+ * absorb any awake events, which cannot be done before the
-+ * get_futex_key of the next key, because it calls get_user_pages,
-+ * which can sleep. Thus, we fetch the list of futexes keys in two
-+ * steps, by first pinning all the memory keys in the futex key, and
-+ * only then we read each key and queue the corresponding futex.
-+ *
-+ * Private futexes doesn't need to recalculate hash in retry, so skip
-+ * get_futex_key() when retrying.
-+ */
-+retry:
-+ for (i = 0; i < count; i++) {
-+ if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry)
-+ continue;
-+
-+ ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr),
-+ !(vs[i].w.flags & FUTEX_PRIVATE_FLAG),
-+ &vs[i].q.key, FUTEX_READ);
-+
-+ if (unlikely(ret))
-+ return ret;
-+ }
-+
-+ set_current_state(TASK_INTERRUPTIBLE);
-+
-+ for (i = 0; i < count; i++) {
-+ u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr;
-+ struct futex_q *q = &vs[i].q;
-+ u32 val = (u32)vs[i].w.val;
-+
-+ hb = futex_q_lock(q);
-+ ret = futex_get_value_locked(&uval, uaddr);
-+
-+ if (!ret && uval == val) {
-+ /*
-+ * The bucket lock can't be held while dealing with the
-+ * next futex. Queue each futex at this moment so hb can
-+ * be unlocked.
-+ */
-+ futex_queue(q, hb);
-+ continue;
-+ }
-+
-+ futex_q_unlock(hb);
-+ __set_current_state(TASK_RUNNING);
-+
-+ /*
-+ * Even if something went wrong, if we find out that a futex
-+ * was awaken, we don't return error and return this index to
-+ * userspace
-+ */
-+ *awaken = unqueue_multiple(vs, i);
-+ if (*awaken >= 0)
-+ return 1;
-+
-+ if (ret) {
-+ /*
-+ * If we need to handle a page fault, we need to do so
-+ * without any lock and any enqueued futex (otherwise
-+ * we could lose some wakeup). So we do it here, after
-+ * undoing all the work done so far. In success, we
-+ * retry all the work.
-+ */
-+ if (get_user(uval, uaddr))
-+ return -EFAULT;
-+
-+ retry = true;
-+ goto retry;
-+ }
-+
-+ if (uval != val)
-+ return -EWOULDBLOCK;
-+ }
-+
-+ return 0;
-+}
-+
-+/**
-+ * futex_sleep_multiple - Check sleeping conditions and sleep
-+ * @vs: List of futexes to wait for
-+ * @count: Length of vs
-+ * @to: Timeout
-+ *
-+ * Sleep if and only if the timeout hasn't expired and no futex on the list has
-+ * been awaken.
-+ */
-+static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count,
-+ struct hrtimer_sleeper *to)
-+{
-+ if (to && !to->task)
-+ return;
-+
-+ for (; count; count--, vs++) {
-+ if (!READ_ONCE(vs->q.lock_ptr))
-+ return;
-+ }
-+
-+ freezable_schedule();
-+}
-+
-+/**
-+ * futex_wait_multiple - Prepare to wait on and enqueue several futexes
-+ * @vs: The list of futexes to wait on
-+ * @count: The number of objects
-+ * @to: Timeout before giving up and returning to userspace
-+ *
-+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
-+ * sleeps on a group of futexes and returns on the first futex that is
-+ * wake, or after the timeout has elapsed.
-+ *
-+ * Return:
-+ * - >=0 - Hint to the futex that was awoken
-+ * - <0 - On error
-+ */
-+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
-+ struct hrtimer_sleeper *to)
-+{
-+ int ret, hint = 0;
-+
-+ if (to)
-+ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
-+
-+ while (1) {
-+ ret = futex_wait_multiple_setup(vs, count, &hint);
-+ if (ret) {
-+ if (ret > 0) {
-+ /* A futex was awaken during setup */
-+ ret = hint;
-+ }
-+ return ret;
-+ }
-+
-+ futex_sleep_multiple(vs, count, to);
-+
-+ __set_current_state(TASK_RUNNING);
-+
-+ ret = unqueue_multiple(vs, count);
-+ if (ret >= 0)
-+ return ret;
-+
-+ if (to && !to->task)
-+ return -ETIMEDOUT;
-+ else if (signal_pending(current))
-+ return -ERESTARTSYS;
-+ /*
-+ * The final case is a spurious wakeup, for
-+ * which just retry.
-+ */
-+ }
-+}
-+
-+/**
-+ * futex_wait_setup() - Prepare to wait on a futex
-+ * @uaddr: the futex userspace address
-+ * @val: the expected value
-+ * @flags: futex flags (FLAGS_SHARED, etc.)
-+ * @q: the associated futex_q
-+ * @hb: storage for hash_bucket pointer to be returned to caller
-+ *
-+ * Setup the futex_q and locate the hash_bucket. Get the futex value and
-+ * compare it with the expected value. Handle atomic faults internally.
-+ * Return with the hb lock held on success, and unlocked on failure.
-+ *
-+ * Return:
-+ * - 0 - uaddr contains val and hb has been locked;
-+ * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
-+ */
-+int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
-+ struct futex_q *q, struct futex_hash_bucket **hb)
-+{
-+ u32 uval;
-+ int ret;
-+
-+ /*
-+ * Access the page AFTER the hash-bucket is locked.
-+ * Order is important:
-+ *
-+ * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
-+ * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); }
-+ *
-+ * The basic logical guarantee of a futex is that it blocks ONLY
-+ * if cond(var) is known to be true at the time of blocking, for
-+ * any cond. If we locked the hash-bucket after testing *uaddr, that
-+ * would open a race condition where we could block indefinitely with
-+ * cond(var) false, which would violate the guarantee.
-+ *
-+ * On the other hand, we insert q and release the hash-bucket only
-+ * after testing *uaddr. This guarantees that futex_wait() will NOT
-+ * absorb a wakeup if *uaddr does not match the desired values
-+ * while the syscall executes.
-+ */
-+retry:
-+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
-+ if (unlikely(ret != 0))
-+ return ret;
-+
-+retry_private:
-+ *hb = futex_q_lock(q);
-+
-+ ret = futex_get_value_locked(&uval, uaddr);
-+
-+ if (ret) {
-+ futex_q_unlock(*hb);
-+
-+ ret = get_user(uval, uaddr);
-+ if (ret)
-+ return ret;
-+
-+ if (!(flags & FLAGS_SHARED))
-+ goto retry_private;
-+
-+ goto retry;
-+ }
-+
-+ if (uval != val) {
-+ futex_q_unlock(*hb);
-+ ret = -EWOULDBLOCK;
-+ }
-+
-+ return ret;
-+}
-+
-+int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset)
-+{
-+ struct hrtimer_sleeper timeout, *to;
-+ struct restart_block *restart;
-+ struct futex_hash_bucket *hb;
-+ struct futex_q q = futex_q_init;
-+ int ret;
-+
-+ if (!bitset)
-+ return -EINVAL;
-+ q.bitset = bitset;
-+
-+ to = futex_setup_timer(abs_time, &timeout, flags,
-+ current->timer_slack_ns);
-+retry:
-+ /*
-+ * Prepare to wait on uaddr. On success, it holds hb->lock and q
-+ * is initialized.
-+ */
-+ ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
-+ if (ret)
-+ goto out;
-+
-+ /* futex_queue and wait for wakeup, timeout, or a signal. */
-+ futex_wait_queue(hb, &q, to);
-+
-+ /* If we were woken (and unqueued), we succeeded, whatever. */
-+ ret = 0;
-+ if (!futex_unqueue(&q))
-+ goto out;
-+ ret = -ETIMEDOUT;
-+ if (to && !to->task)
-+ goto out;
-+
-+ /*
-+ * We expect signal_pending(current), but we might be the
-+ * victim of a spurious wakeup as well.
-+ */
-+ if (!signal_pending(current))
-+ goto retry;
-+
-+ ret = -ERESTARTSYS;
-+ if (!abs_time)
-+ goto out;
-+
-+ restart = &current->restart_block;
-+ restart->futex.uaddr = uaddr;
-+ restart->futex.val = val;
-+ restart->futex.time = *abs_time;
-+ restart->futex.bitset = bitset;
-+ restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
-+
-+ ret = set_restart_fn(restart, futex_wait_restart);
-+
-+out:
-+ if (to) {
-+ hrtimer_cancel(&to->timer);
-+ destroy_hrtimer_on_stack(&to->timer);
-+ }
-+ return ret;
-+}
-+
-+static long futex_wait_restart(struct restart_block *restart)
-+{
-+ u32 __user *uaddr = restart->futex.uaddr;
-+ ktime_t t, *tp = NULL;
-+
-+ if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
-+ t = restart->futex.time;
-+ tp = &t;
-+ }
-+ restart->fn = do_no_restart_syscall;
-+
-+ return (long)futex_wait(uaddr, restart->futex.flags,
-+ restart->futex.val, tp, restart->futex.bitset);
-+}
-+
-diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
-index f43d89d92..d1944258c 100644
---- a/kernel/sys_ni.c
-+++ b/kernel/sys_ni.c
-@@ -143,13 +143,14 @@ COND_SYSCALL(capset);
- /* __ARCH_WANT_SYS_CLONE3 */
- COND_SYSCALL(clone3);
-
--/* kernel/futex.c */
-+/* kernel/futex/syscalls.c */
- COND_SYSCALL(futex);
- COND_SYSCALL(futex_time32);
- COND_SYSCALL(set_robust_list);
- COND_SYSCALL_COMPAT(set_robust_list);
- COND_SYSCALL(get_robust_list);
- COND_SYSCALL_COMPAT(get_robust_list);
-+COND_SYSCALL(futex_waitv);
-
- /* kernel/hrtimer.c */
-
-diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
-index 0e78b49d0..fbcbdb696 100644
---- a/tools/testing/selftests/futex/functional/.gitignore
-+++ b/tools/testing/selftests/futex/functional/.gitignore
-@@ -8,3 +8,4 @@ futex_wait_uninitialized_heap
- futex_wait_wouldblock
- futex_wait
- futex_requeue
-+futex_waitv
-diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
-index bd1fec59e..5cc38de9d 100644
---- a/tools/testing/selftests/futex/functional/Makefile
-+++ b/tools/testing/selftests/futex/functional/Makefile
-@@ -17,7 +17,8 @@ TEST_GEN_FILES := \
- futex_wait_uninitialized_heap \
- futex_wait_private_mapped_file \
- futex_wait \
-- futex_requeue
-+ futex_requeue \
-+ futex_waitv
-
- TEST_PROGS := run.sh
-
-diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-index 1f8f6daaf..3651ce17b 100644
---- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-@@ -17,6 +17,7 @@
-
- #include <pthread.h>
- #include "futextest.h"
-+#include "futex2test.h"
- #include "logging.h"
-
- #define TEST_NAME "futex-wait-timeout"
-@@ -96,6 +97,12 @@ int main(int argc, char *argv[])
- struct timespec to;
- pthread_t thread;
- int c;
-+ struct futex_waitv waitv = {
-+ .uaddr = (uintptr_t)&f1,
-+ .val = f1,
-+ .flags = FUTEX_32,
-+ .__reserved = 0
-+ };
-
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
-@@ -118,7 +125,7 @@ int main(int argc, char *argv[])
- }
-
- ksft_print_header();
-- ksft_set_plan(7);
-+ ksft_set_plan(9);
- ksft_print_msg("%s: Block on a futex and wait for timeout\n",
- basename(argv[0]));
- ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
-@@ -175,6 +182,18 @@ int main(int argc, char *argv[])
- res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS);
-
-+ /* futex_waitv with CLOCK_MONOTONIC */
-+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
-+ return RET_FAIL;
-+ res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
-+ test_timeout(res, &ret, "futex_waitv monotonic", ETIMEDOUT);
-+
-+ /* futex_waitv with CLOCK_REALTIME */
-+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
-+ return RET_FAIL;
-+ res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME);
-+ test_timeout(res, &ret, "futex_waitv realtime", ETIMEDOUT);
-+
- ksft_print_cnts();
- return ret;
- }
-diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-index 0ae390ff8..7d7a6a06c 100644
---- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-@@ -22,6 +22,7 @@
- #include <string.h>
- #include <time.h>
- #include "futextest.h"
-+#include "futex2test.h"
- #include "logging.h"
-
- #define TEST_NAME "futex-wait-wouldblock"
-@@ -42,6 +43,12 @@ int main(int argc, char *argv[])
- futex_t f1 = FUTEX_INITIALIZER;
- int res, ret = RET_PASS;
- int c;
-+ struct futex_waitv waitv = {
-+ .uaddr = (uintptr_t)&f1,
-+ .val = f1+1,
-+ .flags = FUTEX_32,
-+ .__reserved = 0
-+ };
-
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
-@@ -61,18 +68,44 @@ int main(int argc, char *argv[])
- }
-
- ksft_print_header();
-- ksft_set_plan(1);
-+ ksft_set_plan(2);
- ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
- basename(argv[0]));
-
- info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
- res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
- if (!res || errno != EWOULDBLOCK) {
-- fail("futex_wait returned: %d %s\n",
-- res ? errno : res, res ? strerror(errno) : "");
-+ ksft_test_result_fail("futex_wait returned: %d %s\n",
-+ res ? errno : res,
-+ res ? strerror(errno) : "");
- ret = RET_FAIL;
-+ } else {
-+ ksft_test_result_pass("futex_wait\n");
- }
-
-- print_result(TEST_NAME, ret);
-+ if (clock_gettime(CLOCK_MONOTONIC, &to)) {
-+ error("clock_gettime failed\n", errno);
-+ return errno;
-+ }
-+
-+ to.tv_nsec += timeout_ns;
-+
-+ if (to.tv_nsec >= 1000000000) {
-+ to.tv_sec++;
-+ to.tv_nsec -= 1000000000;
-+ }
-+
-+ info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
-+ res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
-+ if (!res || errno != EWOULDBLOCK) {
-+ ksft_test_result_pass("futex_waitv returned: %d %s\n",
-+ res ? errno : res,
-+ res ? strerror(errno) : "");
-+ ret = RET_FAIL;
-+ } else {
-+ ksft_test_result_pass("futex_waitv\n");
-+ }
-+
-+ ksft_print_cnts();
- return ret;
- }
-diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c
-new file mode 100644
-index 000000000..a94337f67
---- /dev/null
-+++ b/tools/testing/selftests/futex/functional/futex_waitv.c
-@@ -0,0 +1,237 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * futex_waitv() test by André Almeida <andrealmeid@collabora.com>
-+ *
-+ * Copyright 2021 Collabora Ltd.
-+ */
-+
-+#include <errno.h>
-+#include <error.h>
-+#include <getopt.h>
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <time.h>
-+#include <pthread.h>
-+#include <stdint.h>
-+#include <sys/shm.h>
-+#include "futextest.h"
-+#include "futex2test.h"
-+#include "logging.h"
-+
-+#define TEST_NAME "futex-wait"
-+#define WAKE_WAIT_US 10000
-+#define NR_FUTEXES 30
-+static struct futex_waitv waitv[NR_FUTEXES];
-+u_int32_t futexes[NR_FUTEXES] = {0};
-+
-+void usage(char *prog)
-+{
-+ printf("Usage: %s\n", prog);
-+ printf(" -c Use color\n");
-+ printf(" -h Display this help message\n");
-+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
-+ VQUIET, VCRITICAL, VINFO);
-+}
-+
-+void *waiterfn(void *arg)
-+{
-+ struct timespec to;
-+ int res;
-+
-+ /* setting absolute timeout for futex2 */
-+ if (clock_gettime(CLOCK_MONOTONIC, &to))
-+ error("gettime64 failed\n", errno);
-+
-+ to.tv_sec++;
-+
-+ res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
-+ if (res < 0) {
-+ ksft_test_result_fail("futex_waitv returned: %d %s\n",
-+ errno, strerror(errno));
-+ } else if (res != NR_FUTEXES - 1) {
-+ ksft_test_result_fail("futex_waitv returned: %d, expecting %d\n",
-+ res, NR_FUTEXES - 1);
-+ }
-+
-+ return NULL;
-+}
-+
-+int main(int argc, char *argv[])
-+{
-+ pthread_t waiter;
-+ int res, ret = RET_PASS;
-+ struct timespec to;
-+ int c, i;
-+
-+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
-+ switch (c) {
-+ case 'c':
-+ log_color(1);
-+ break;
-+ case 'h':
-+ usage(basename(argv[0]));
-+ exit(0);
-+ case 'v':
-+ log_verbosity(atoi(optarg));
-+ break;
-+ default:
-+ usage(basename(argv[0]));
-+ exit(1);
-+ }
-+ }
-+
-+ ksft_print_header();
-+ ksft_set_plan(7);
-+ ksft_print_msg("%s: Test FUTEX_WAITV\n",
-+ basename(argv[0]));
-+
-+ for (i = 0; i < NR_FUTEXES; i++) {
-+ waitv[i].uaddr = (uintptr_t)&futexes[i];
-+ waitv[i].flags = FUTEX_32 | FUTEX_PRIVATE_FLAG;
-+ waitv[i].val = 0;
-+ waitv[i].__reserved = 0;
-+ }
-+
-+ /* Private waitv */
-+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
-+ error("pthread_create failed\n", errno);
-+
-+ usleep(WAKE_WAIT_US);
-+
-+ res = futex_wake(u64_to_ptr(waitv[NR_FUTEXES - 1].uaddr), 1, FUTEX_PRIVATE_FLAG);
-+ if (res != 1) {
-+ ksft_test_result_fail("futex_wake private returned: %d %s\n",
-+ res ? errno : res,
-+ res ? strerror(errno) : "");
-+ ret = RET_FAIL;
-+ } else {
-+ ksft_test_result_pass("futex_waitv private\n");
-+ }
-+
-+ /* Shared waitv */
-+ for (i = 0; i < NR_FUTEXES; i++) {
-+ int shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
-+
-+ if (shm_id < 0) {
-+ perror("shmget");
-+ exit(1);
-+ }
-+
-+ unsigned int *shared_data = shmat(shm_id, NULL, 0);
-+
-+ *shared_data = 0;
-+ waitv[i].uaddr = (uintptr_t)shared_data;
-+ waitv[i].flags = FUTEX_32;
-+ waitv[i].val = 0;
-+ waitv[i].__reserved = 0;
-+ }
-+
-+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
-+ error("pthread_create failed\n", errno);
-+
-+ usleep(WAKE_WAIT_US);
-+
-+ res = futex_wake(u64_to_ptr(waitv[NR_FUTEXES - 1].uaddr), 1, 0);
-+ if (res != 1) {
-+ ksft_test_result_fail("futex_wake shared returned: %d %s\n",
-+ res ? errno : res,
-+ res ? strerror(errno) : "");
-+ ret = RET_FAIL;
-+ } else {
-+ ksft_test_result_pass("futex_waitv shared\n");
-+ }
-+
-+ for (i = 0; i < NR_FUTEXES; i++)
-+ shmdt(u64_to_ptr(waitv[i].uaddr));
-+
-+ /* Testing a waiter without FUTEX_32 flag */
-+ waitv[0].flags = FUTEX_PRIVATE_FLAG;
-+
-+ if (clock_gettime(CLOCK_MONOTONIC, &to))
-+ error("gettime64 failed\n", errno);
-+
-+ to.tv_sec++;
-+
-+ res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
-+ if (res == EINVAL) {
-+ ksft_test_result_fail("futex_waitv private returned: %d %s\n",
-+ res ? errno : res,
-+ res ? strerror(errno) : "");
-+ ret = RET_FAIL;
-+ } else {
-+ ksft_test_result_pass("futex_waitv without FUTEX_32\n");
-+ }
-+
-+ /* Testing a waiter with an unaligned address */
-+ waitv[0].flags = FUTEX_PRIVATE_FLAG | FUTEX_32;
-+ waitv[0].uaddr = 1;
-+
-+ if (clock_gettime(CLOCK_MONOTONIC, &to))
-+ error("gettime64 failed\n", errno);
-+
-+ to.tv_sec++;
-+
-+ res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
-+ if (res == EINVAL) {
-+ ksft_test_result_fail("futex_wake private returned: %d %s\n",
-+ res ? errno : res,
-+ res ? strerror(errno) : "");
-+ ret = RET_FAIL;
-+ } else {
-+ ksft_test_result_pass("futex_waitv with an unaligned address\n");
-+ }
-+
-+ /* Testing a NULL address for waiters.uaddr */
-+ waitv[0].uaddr = 0x00000000;
-+
-+ if (clock_gettime(CLOCK_MONOTONIC, &to))
-+ error("gettime64 failed\n", errno);
-+
-+ to.tv_sec++;
-+
-+ res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
-+ if (res == EINVAL) {
-+ ksft_test_result_fail("futex_waitv private returned: %d %s\n",
-+ res ? errno : res,
-+ res ? strerror(errno) : "");
-+ ret = RET_FAIL;
-+ } else {
-+ ksft_test_result_pass("futex_waitv NULL address in waitv.uaddr\n");
-+ }
-+
-+ /* Testing a NULL address for *waiters */
-+ if (clock_gettime(CLOCK_MONOTONIC, &to))
-+ error("gettime64 failed\n", errno);
-+
-+ to.tv_sec++;
-+
-+ res = futex_waitv(NULL, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
-+ if (res == EINVAL) {
-+ ksft_test_result_fail("futex_waitv private returned: %d %s\n",
-+ res ? errno : res,
-+ res ? strerror(errno) : "");
-+ ret = RET_FAIL;
-+ } else {
-+ ksft_test_result_pass("futex_waitv NULL address in *waiters\n");
-+ }
-+
-+ /* Testing an invalid clockid */
-+ if (clock_gettime(CLOCK_MONOTONIC, &to))
-+ error("gettime64 failed\n", errno);
-+
-+ to.tv_sec++;
-+
-+ res = futex_waitv(NULL, NR_FUTEXES, 0, &to, CLOCK_TAI);
-+ if (res == EINVAL) {
-+ ksft_test_result_fail("futex_waitv private returned: %d %s\n",
-+ res ? errno : res,
-+ res ? strerror(errno) : "");
-+ ret = RET_FAIL;
-+ } else {
-+ ksft_test_result_pass("futex_waitv invalid clockid\n");
-+ }
-+
-+ ksft_print_cnts();
-+ return ret;
-+}
-diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
-index 11a9d6229..5ccd599da 100755
---- a/tools/testing/selftests/futex/functional/run.sh
-+++ b/tools/testing/selftests/futex/functional/run.sh
-@@ -79,3 +79,6 @@ echo
-
- echo
- ./futex_requeue $COLOR
-+
-+echo
-+./futex_waitv $COLOR
-diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
-new file mode 100644
-index 000000000..9d305520e
---- /dev/null
-+++ b/tools/testing/selftests/futex/include/futex2test.h
-@@ -0,0 +1,22 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Futex2 library addons for futex tests
-+ *
-+ * Copyright 2021 Collabora Ltd.
-+ */
-+#include <stdint.h>
-+
-+#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
-+
-+/**
-+ * futex_waitv - Wait at multiple futexes, wake on any
-+ * @waiters: Array of waiters
-+ * @nr_waiters: Length of waiters array
-+ * @flags: Operation flags
-+ * @timo: Optional timeout for operation
-+ */
-+static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters,
-+ unsigned long flags, struct timespec *timo, clockid_t clockid)
-+{
-+ return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid);
-+}
---
-2.33.1.711.g9d530dc002
-
-
diff --git a/0104-revert-xhci-Add-support-for-Renesas-controller-with-memory.patch b/0104-revert-xhci-Add-support-for-Renesas-controller-with-memory.patch
deleted file mode 100644
index dafb17784fdb..000000000000
--- a/0104-revert-xhci-Add-support-for-Renesas-controller-with-memory.patch
+++ /dev/null
@@ -1,89 +0,0 @@
---- b/drivers/usb/host/xhci-pci.c
-+++ a/drivers/usb/host/xhci-pci.c
-@@ -636,14 +636,7 @@
- { /* end: all zeroes */ }
- };
- MODULE_DEVICE_TABLE(pci, pci_ids);
--
--/*
-- * Without CONFIG_USB_XHCI_PCI_RENESAS renesas_xhci_check_request_fw() won't
-- * load firmware, so don't encumber the xhci-pci driver with it.
-- */
--#if IS_ENABLED(CONFIG_USB_XHCI_PCI_RENESAS)
- MODULE_FIRMWARE("renesas_usb_fw.mem");
--#endif
-
- /* pci driver glue; this is a "new style" PCI driver module */
- static struct pci_driver xhci_pci_driver = {
---- b/drivers/usb/host/xhci-pci.c
-+++ a/drivers/usb/host/xhci-pci.c
-@@ -16,7 +16,6 @@
-
- #include "xhci.h"
- #include "xhci-trace.h"
--#include "xhci-pci.h"
-
- #define SSIC_PORT_NUM 2
- #define SSIC_PORT_CFG2 0x880c
-@@ -92,16 +91,7 @@ static int xhci_pci_reinit(struct xhci_h
-
- static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
- {
-- struct pci_dev *pdev = to_pci_dev(dev);
-- struct xhci_driver_data *driver_data;
-- const struct pci_device_id *id;
--
-- id = pci_match_id(pdev->driver->id_table, pdev);
--
-- if (id && id->driver_data) {
-- driver_data = (struct xhci_driver_data *)id->driver_data;
-- xhci->quirks |= driver_data->quirks;
-- }
-+ struct pci_dev *pdev = to_pci_dev(dev);
-
- /* Look for vendor-specific quirks */
- if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
-@@ -346,16 +336,8 @@ static int xhci_pci_probe(struct pci_dev
- int retval;
- struct xhci_hcd *xhci;
- struct usb_hcd *hcd;
-- struct xhci_driver_data *driver_data;
- struct reset_control *reset;
-
-- driver_data = (struct xhci_driver_data *)id->driver_data;
-- if (driver_data && driver_data->quirks & XHCI_RENESAS_FW_QUIRK) {
-- retval = renesas_xhci_check_request_fw(dev, id);
-- if (retval)
-- return retval;
-- }
--
- reset = devm_reset_control_get_optional_exclusive(&dev->dev, NULL);
- if (IS_ERR(reset))
- return PTR_ERR(reset);
-@@ -578,26 +557,14 @@ static void xhci_pci_shutdown(struct usb
-
- /*-------------------------------------------------------------------------*/
-
--static const struct xhci_driver_data reneses_data = {
-- .quirks = XHCI_RENESAS_FW_QUIRK,
-- .firmware = "renesas_usb_fw.mem",
--};
--
- /* PCI driver selection metadata; PCI hotplugging uses this */
- static const struct pci_device_id pci_ids[] = {
-- { PCI_DEVICE(0x1912, 0x0014),
-- .driver_data = (unsigned long)&reneses_data,
-- },
-- { PCI_DEVICE(0x1912, 0x0015),
-- .driver_data = (unsigned long)&reneses_data,
-- },
- /* handle any USB 3.0 xHCI controller */
- { PCI_DEVICE_CLASS(PCI_CLASS_SERIAL_USB_XHCI, ~0),
- },
- { /* end: all zeroes */ }
- };
- MODULE_DEVICE_TABLE(pci, pci_ids);
--MODULE_FIRMWARE("renesas_usb_fw.mem");
-
- /* pci driver glue; this is a "new style" PCI driver module */
- static struct pci_driver xhci_pci_driver = {
diff --git a/0108-drm_i915_Add_workaround_numbers_to_GEN7_COMMON_SLICE_CHICKEN1_whitelisting.patch b/0108-drm_i915_Add_workaround_numbers_to_GEN7_COMMON_SLICE_CHICKEN1_whitelisting.patch
deleted file mode 100644
index 7e59a4802e0a..000000000000
--- a/0108-drm_i915_Add_workaround_numbers_to_GEN7_COMMON_SLICE_CHICKEN1_whitelisting.patch
+++ /dev/null
@@ -1,17 +0,0 @@
-diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
-index cd2935b9e7c81..c3211325c2d3e 100644
---- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
-+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
-@@ -1869,7 +1869,11 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
- RING_FORCE_TO_NONPRIV_ACCESS_RD |
- RING_FORCE_TO_NONPRIV_RANGE_4);
-
-- /* Wa_1808121037:tgl */
-+ /*
-+ * Wa_1808121037:tgl
-+ * Wa_14012131227:dg1
-+ * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
-+ */
- whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
-
- /* Wa_1806527549:tgl */
diff --git a/0201-lenovo-wmi2.patch b/0201-lenovo-wmi2.patch
deleted file mode 100644
index c6b1b0603651..000000000000
--- a/0201-lenovo-wmi2.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
-index 791449a2370f..45d9010aafcf 100644
---- a/drivers/platform/x86/wmi.c
-+++ b/drivers/platform/x86/wmi.c
-@@ -1081,7 +1081,8 @@ static int wmi_create_device(struct device *wmi_bus_dev,
- wblock->dev.dev.bus = &wmi_bus_type;
- wblock->dev.dev.parent = wmi_bus_dev;
-
-- dev_set_name(&wblock->dev.dev, "%pUL", gblock->guid);
-+ dev_set_name(&wblock->dev.dev, "%s-%pUL",
-+ dev_name(&wblock->acpi_device->dev), gblock->guid);
-
- device_initialize(&wblock->dev.dev);
-
-
diff --git a/0301-revert-garbage-collect-fbdev-scrolling-acceleration.patch b/0301-revert-garbage-collect-fbdev-scrolling-acceleration.patch
new file mode 100644
index 000000000000..280ed9645c31
--- /dev/null
+++ b/0301-revert-garbage-collect-fbdev-scrolling-acceleration.patch
@@ -0,0 +1,1038 @@
+--- b/Documentation/gpu/todo.rst
++++ a/Documentation/gpu/todo.rst
+@@ -314,19 +314,16 @@
+ Garbage collect fbdev scrolling acceleration
+ --------------------------------------------
+
++Scroll acceleration is disabled in fbcon by hard-wiring p->scrollmode =
++SCROLL_REDRAW. There's a ton of code this will allow us to remove:
+-Scroll acceleration has been disabled in fbcon. Now it works as the old
+-SCROLL_REDRAW mode. A ton of code was removed in fbcon.c and the hook bmove was
+-removed from fbcon_ops.
+-Remaining tasks:
+
++- lots of code in fbcon.c
++
++- a bunch of the hooks in fbcon_ops, maybe the remaining hooks could be called
+-- a bunch of the hooks in fbcon_ops could be removed or simplified by calling
+ directly instead of the function table (with a switch on p->rotate)
+
+ - fb_copyarea is unused after this, and can be deleted from all drivers
+
+-- after that, fb_copyarea can be deleted from fb_ops in include/linux/fb.h as
+- well as cfb_copyarea
+-
+ Note that not all acceleration code can be deleted, since clearing and cursor
+ support is still accelerated, which might be good candidates for further
+ deletion projects.
+--- b/drivers/video/fbdev/core/bitblit.c
++++ a/drivers/video/fbdev/core/bitblit.c
+@@ -43,6 +43,21 @@
+ }
+ }
+
++static void bit_bmove(struct vc_data *vc, struct fb_info *info, int sy,
++ int sx, int dy, int dx, int height, int width)
++{
++ struct fb_copyarea area;
++
++ area.sx = sx * vc->vc_font.width;
++ area.sy = sy * vc->vc_font.height;
++ area.dx = dx * vc->vc_font.width;
++ area.dy = dy * vc->vc_font.height;
++ area.height = height * vc->vc_font.height;
++ area.width = width * vc->vc_font.width;
++
++ info->fbops->fb_copyarea(info, &area);
++}
++
+ static void bit_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int height, int width)
+ {
+@@ -378,6 +393,7 @@
+
+ void fbcon_set_bitops(struct fbcon_ops *ops)
+ {
++ ops->bmove = bit_bmove;
+ ops->clear = bit_clear;
+ ops->putcs = bit_putcs;
+ ops->clear_margins = bit_clear_margins;
+--- b/drivers/video/fbdev/core/fbcon.c
++++ a/drivers/video/fbdev/core/fbcon.c
+@@ -173,6 +173,8 @@
+ int count, int ypos, int xpos);
+ static void fbcon_clear_margins(struct vc_data *vc, int bottom_only);
+ static void fbcon_cursor(struct vc_data *vc, int mode);
++static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx,
++ int height, int width);
+ static int fbcon_switch(struct vc_data *vc);
+ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch);
+ static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table);
+@@ -180,8 +182,16 @@
+ /*
+ * Internal routines
+ */
++static __inline__ void ywrap_up(struct vc_data *vc, int count);
++static __inline__ void ywrap_down(struct vc_data *vc, int count);
++static __inline__ void ypan_up(struct vc_data *vc, int count);
++static __inline__ void ypan_down(struct vc_data *vc, int count);
++static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, int sx,
++ int dy, int dx, int height, int width, u_int y_break);
+ static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var,
+ int unit);
++static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
++ int line, int count, int dy);
+ static void fbcon_modechanged(struct fb_info *info);
+ static void fbcon_set_all_vcs(struct fb_info *info);
+ static void fbcon_start(void);
+@@ -1125,6 +1135,14 @@
+
+ ops->graphics = 0;
+
++ /*
++ * No more hw acceleration for fbcon.
++ *
++ * FIXME: Garbage collect all the now dead code after sufficient time
++ * has passed.
++ */
++ p->scrollmode = SCROLL_REDRAW;
++
+ /*
+ * ++guenther: console.c:vc_allocate() relies on initializing
+ * vc_{cols,rows}, but we must not set those if we are only
+@@ -1211,13 +1229,14 @@
+ * This system is now divided into two levels because of complications
+ * caused by hardware scrolling. Top level functions:
+ *
++ * fbcon_bmove(), fbcon_clear(), fbcon_putc(), fbcon_clear_margins()
+- * fbcon_clear(), fbcon_putc(), fbcon_clear_margins()
+ *
+ * handles y values in range [0, scr_height-1] that correspond to real
+ * screen positions. y_wrap shift means that first line of bitmap may be
+ * anywhere on this display. These functions convert lineoffsets to
+ * bitmap offsets and deal with the wrap-around case by splitting blits.
+ *
++ * fbcon_bmove_physical_8() -- These functions fast implementations
+ * fbcon_clear_physical_8() -- of original fbcon_XXX fns.
+ * fbcon_putc_physical_8() -- (font width != 8) may be added later
+ *
+@@ -1390,6 +1409,224 @@
+ }
+ }
+
++static __inline__ void ywrap_up(struct vc_data *vc, int count)
++{
++ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++ struct fbcon_ops *ops = info->fbcon_par;
++ struct fbcon_display *p = &fb_display[vc->vc_num];
++
++ p->yscroll += count;
++ if (p->yscroll >= p->vrows) /* Deal with wrap */
++ p->yscroll -= p->vrows;
++ ops->var.xoffset = 0;
++ ops->var.yoffset = p->yscroll * vc->vc_font.height;
++ ops->var.vmode |= FB_VMODE_YWRAP;
++ ops->update_start(info);
++ scrollback_max += count;
++ if (scrollback_max > scrollback_phys_max)
++ scrollback_max = scrollback_phys_max;
++ scrollback_current = 0;
++}
++
++static __inline__ void ywrap_down(struct vc_data *vc, int count)
++{
++ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++ struct fbcon_ops *ops = info->fbcon_par;
++ struct fbcon_display *p = &fb_display[vc->vc_num];
++
++ p->yscroll -= count;
++ if (p->yscroll < 0) /* Deal with wrap */
++ p->yscroll += p->vrows;
++ ops->var.xoffset = 0;
++ ops->var.yoffset = p->yscroll * vc->vc_font.height;
++ ops->var.vmode |= FB_VMODE_YWRAP;
++ ops->update_start(info);
++ scrollback_max -= count;
++ if (scrollback_max < 0)
++ scrollback_max = 0;
++ scrollback_current = 0;
++}
++
++static __inline__ void ypan_up(struct vc_data *vc, int count)
++{
++ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++ struct fbcon_display *p = &fb_display[vc->vc_num];
++ struct fbcon_ops *ops = info->fbcon_par;
++
++ p->yscroll += count;
++ if (p->yscroll > p->vrows - vc->vc_rows) {
++ ops->bmove(vc, info, p->vrows - vc->vc_rows,
++ 0, 0, 0, vc->vc_rows, vc->vc_cols);
++ p->yscroll -= p->vrows - vc->vc_rows;
++ }
++
++ ops->var.xoffset = 0;
++ ops->var.yoffset = p->yscroll * vc->vc_font.height;
++ ops->var.vmode &= ~FB_VMODE_YWRAP;
++ ops->update_start(info);
++ fbcon_clear_margins(vc, 1);
++ scrollback_max += count;
++ if (scrollback_max > scrollback_phys_max)
++ scrollback_max = scrollback_phys_max;
++ scrollback_current = 0;
++}
++
++static __inline__ void ypan_up_redraw(struct vc_data *vc, int t, int count)
++{
++ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++ struct fbcon_ops *ops = info->fbcon_par;
++ struct fbcon_display *p = &fb_display[vc->vc_num];
++
++ p->yscroll += count;
++
++ if (p->yscroll > p->vrows - vc->vc_rows) {
++ p->yscroll -= p->vrows - vc->vc_rows;
++ fbcon_redraw_move(vc, p, t + count, vc->vc_rows - count, t);
++ }
++
++ ops->var.xoffset = 0;
++ ops->var.yoffset = p->yscroll * vc->vc_font.height;
++ ops->var.vmode &= ~FB_VMODE_YWRAP;
++ ops->update_start(info);
++ fbcon_clear_margins(vc, 1);
++ scrollback_max += count;
++ if (scrollback_max > scrollback_phys_max)
++ scrollback_max = scrollback_phys_max;
++ scrollback_current = 0;
++}
++
++static __inline__ void ypan_down(struct vc_data *vc, int count)
++{
++ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++ struct fbcon_display *p = &fb_display[vc->vc_num];
++ struct fbcon_ops *ops = info->fbcon_par;
++
++ p->yscroll -= count;
++ if (p->yscroll < 0) {
++ ops->bmove(vc, info, 0, 0, p->vrows - vc->vc_rows,
++ 0, vc->vc_rows, vc->vc_cols);
++ p->yscroll += p->vrows - vc->vc_rows;
++ }
++
++ ops->var.xoffset = 0;
++ ops->var.yoffset = p->yscroll * vc->vc_font.height;
++ ops->var.vmode &= ~FB_VMODE_YWRAP;
++ ops->update_start(info);
++ fbcon_clear_margins(vc, 1);
++ scrollback_max -= count;
++ if (scrollback_max < 0)
++ scrollback_max = 0;
++ scrollback_current = 0;
++}
++
++static __inline__ void ypan_down_redraw(struct vc_data *vc, int t, int count)
++{
++ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++ struct fbcon_ops *ops = info->fbcon_par;
++ struct fbcon_display *p = &fb_display[vc->vc_num];
++
++ p->yscroll -= count;
++
++ if (p->yscroll < 0) {
++ p->yscroll += p->vrows - vc->vc_rows;
++ fbcon_redraw_move(vc, p, t, vc->vc_rows - count, t + count);
++ }
++
++ ops->var.xoffset = 0;
++ ops->var.yoffset = p->yscroll * vc->vc_font.height;
++ ops->var.vmode &= ~FB_VMODE_YWRAP;
++ ops->update_start(info);
++ fbcon_clear_margins(vc, 1);
++ scrollback_max -= count;
++ if (scrollback_max < 0)
++ scrollback_max = 0;
++ scrollback_current = 0;
++}
++
++static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
++ int line, int count, int dy)
++{
++ unsigned short *s = (unsigned short *)
++ (vc->vc_origin + vc->vc_size_row * line);
++
++ while (count--) {
++ unsigned short *start = s;
++ unsigned short *le = advance_row(s, 1);
++ unsigned short c;
++ int x = 0;
++ unsigned short attr = 1;
++
++ do {
++ c = scr_readw(s);
++ if (attr != (c & 0xff00)) {
++ attr = c & 0xff00;
++ if (s > start) {
++ fbcon_putcs(vc, start, s - start,
++ dy, x);
++ x += s - start;
++ start = s;
++ }
++ }
++ console_conditional_schedule();
++ s++;
++ } while (s < le);
++ if (s > start)
++ fbcon_putcs(vc, start, s - start, dy, x);
++ console_conditional_schedule();
++ dy++;
++ }
++}
++
++static void fbcon_redraw_blit(struct vc_data *vc, struct fb_info *info,
++ struct fbcon_display *p, int line, int count, int ycount)
++{
++ int offset = ycount * vc->vc_cols;
++ unsigned short *d = (unsigned short *)
++ (vc->vc_origin + vc->vc_size_row * line);
++ unsigned short *s = d + offset;
++ struct fbcon_ops *ops = info->fbcon_par;
++
++ while (count--) {
++ unsigned short *start = s;
++ unsigned short *le = advance_row(s, 1);
++ unsigned short c;
++ int x = 0;
++
++ do {
++ c = scr_readw(s);
++
++ if (c == scr_readw(d)) {
++ if (s > start) {
++ ops->bmove(vc, info, line + ycount, x,
++ line, x, 1, s-start);
++ x += s - start + 1;
++ start = s + 1;
++ } else {
++ x++;
++ start++;
++ }
++ }
++
++ scr_writew(c, d);
++ console_conditional_schedule();
++ s++;
++ d++;
++ } while (s < le);
++ if (s > start)
++ ops->bmove(vc, info, line + ycount, x, line, x, 1,
++ s-start);
++ console_conditional_schedule();
++ if (ycount > 0)
++ line++;
++ else {
++ line--;
++ /* NOTE: We subtract two lines from these pointers */
++ s -= vc->vc_size_row;
++ d -= vc->vc_size_row;
++ }
++ }
++}
++
+ static void fbcon_redraw(struct vc_data *vc, struct fbcon_display *p,
+ int line, int count, int offset)
+ {
+@@ -1450,6 +1687,7 @@
+ {
+ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ struct fbcon_display *p = &fb_display[vc->vc_num];
++ int scroll_partial = info->flags & FBINFO_PARTIAL_PAN_OK;
+
+ if (fbcon_is_inactive(vc, info))
+ return true;
+@@ -1466,32 +1704,249 @@
+ case SM_UP:
+ if (count > vc->vc_rows) /* Maximum realistic size */
+ count = vc->vc_rows;
++ if (logo_shown >= 0)
++ goto redraw_up;
++ switch (p->scrollmode) {
++ case SCROLL_MOVE:
++ fbcon_redraw_blit(vc, info, p, t, b - t - count,
++ count);
++ fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
++ scr_memsetw((unsigned short *) (vc->vc_origin +
++ vc->vc_size_row *
++ (b - count)),
++ vc->vc_video_erase_char,
++ vc->vc_size_row * count);
++ return true;
++
++ case SCROLL_WRAP_MOVE:
++ if (b - t - count > 3 * vc->vc_rows >> 2) {
++ if (t > 0)
++ fbcon_bmove(vc, 0, 0, count, 0, t,
++ vc->vc_cols);
++ ywrap_up(vc, count);
++ if (vc->vc_rows - b > 0)
++ fbcon_bmove(vc, b - count, 0, b, 0,
++ vc->vc_rows - b,
++ vc->vc_cols);
++ } else if (info->flags & FBINFO_READS_FAST)
++ fbcon_bmove(vc, t + count, 0, t, 0,
++ b - t - count, vc->vc_cols);
++ else
++ goto redraw_up;
++ fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
++ break;
++
++ case SCROLL_PAN_REDRAW:
++ if ((p->yscroll + count <=
++ 2 * (p->vrows - vc->vc_rows))
++ && ((!scroll_partial && (b - t == vc->vc_rows))
++ || (scroll_partial
++ && (b - t - count >
++ 3 * vc->vc_rows >> 2)))) {
++ if (t > 0)
++ fbcon_redraw_move(vc, p, 0, t, count);
++ ypan_up_redraw(vc, t, count);
++ if (vc->vc_rows - b > 0)
++ fbcon_redraw_move(vc, p, b,
++ vc->vc_rows - b, b);
++ } else
++ fbcon_redraw_move(vc, p, t + count, b - t - count, t);
++ fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
++ break;
++
++ case SCROLL_PAN_MOVE:
++ if ((p->yscroll + count <=
++ 2 * (p->vrows - vc->vc_rows))
++ && ((!scroll_partial && (b - t == vc->vc_rows))
++ || (scroll_partial
++ && (b - t - count >
++ 3 * vc->vc_rows >> 2)))) {
++ if (t > 0)
++ fbcon_bmove(vc, 0, 0, count, 0, t,
++ vc->vc_cols);
++ ypan_up(vc, count);
++ if (vc->vc_rows - b > 0)
++ fbcon_bmove(vc, b - count, 0, b, 0,
++ vc->vc_rows - b,
++ vc->vc_cols);
++ } else if (info->flags & FBINFO_READS_FAST)
++ fbcon_bmove(vc, t + count, 0, t, 0,
++ b - t - count, vc->vc_cols);
++ else
++ goto redraw_up;
++ fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
++ break;
++
++ case SCROLL_REDRAW:
++ redraw_up:
++ fbcon_redraw(vc, p, t, b - t - count,
++ count * vc->vc_cols);
++ fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
++ scr_memsetw((unsigned short *) (vc->vc_origin +
++ vc->vc_size_row *
++ (b - count)),
++ vc->vc_video_erase_char,
++ vc->vc_size_row * count);
++ return true;
++ }
++ break;
+- fbcon_redraw(vc, p, t, b - t - count,
+- count * vc->vc_cols);
+- fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+- scr_memsetw((unsigned short *) (vc->vc_origin +
+- vc->vc_size_row *
+- (b - count)),
+- vc->vc_video_erase_char,
+- vc->vc_size_row * count);
+- return true;
+
+ case SM_DOWN:
+ if (count > vc->vc_rows) /* Maximum realistic size */
+ count = vc->vc_rows;
++ if (logo_shown >= 0)
++ goto redraw_down;
++ switch (p->scrollmode) {
++ case SCROLL_MOVE:
++ fbcon_redraw_blit(vc, info, p, b - 1, b - t - count,
++ -count);
++ fbcon_clear(vc, t, 0, count, vc->vc_cols);
++ scr_memsetw((unsigned short *) (vc->vc_origin +
++ vc->vc_size_row *
++ t),
++ vc->vc_video_erase_char,
++ vc->vc_size_row * count);
++ return true;
++
++ case SCROLL_WRAP_MOVE:
++ if (b - t - count > 3 * vc->vc_rows >> 2) {
++ if (vc->vc_rows - b > 0)
++ fbcon_bmove(vc, b, 0, b - count, 0,
++ vc->vc_rows - b,
++ vc->vc_cols);
++ ywrap_down(vc, count);
++ if (t > 0)
++ fbcon_bmove(vc, count, 0, 0, 0, t,
++ vc->vc_cols);
++ } else if (info->flags & FBINFO_READS_FAST)
++ fbcon_bmove(vc, t, 0, t + count, 0,
++ b - t - count, vc->vc_cols);
++ else
++ goto redraw_down;
++ fbcon_clear(vc, t, 0, count, vc->vc_cols);
++ break;
++
++ case SCROLL_PAN_MOVE:
++ if ((count - p->yscroll <= p->vrows - vc->vc_rows)
++ && ((!scroll_partial && (b - t == vc->vc_rows))
++ || (scroll_partial
++ && (b - t - count >
++ 3 * vc->vc_rows >> 2)))) {
++ if (vc->vc_rows - b > 0)
++ fbcon_bmove(vc, b, 0, b - count, 0,
++ vc->vc_rows - b,
++ vc->vc_cols);
++ ypan_down(vc, count);
++ if (t > 0)
++ fbcon_bmove(vc, count, 0, 0, 0, t,
++ vc->vc_cols);
++ } else if (info->flags & FBINFO_READS_FAST)
++ fbcon_bmove(vc, t, 0, t + count, 0,
++ b - t - count, vc->vc_cols);
++ else
++ goto redraw_down;
++ fbcon_clear(vc, t, 0, count, vc->vc_cols);
++ break;
++
++ case SCROLL_PAN_REDRAW:
++ if ((count - p->yscroll <= p->vrows - vc->vc_rows)
++ && ((!scroll_partial && (b - t == vc->vc_rows))
++ || (scroll_partial
++ && (b - t - count >
++ 3 * vc->vc_rows >> 2)))) {
++ if (vc->vc_rows - b > 0)
++ fbcon_redraw_move(vc, p, b, vc->vc_rows - b,
++ b - count);
++ ypan_down_redraw(vc, t, count);
++ if (t > 0)
++ fbcon_redraw_move(vc, p, count, t, 0);
++ } else
++ fbcon_redraw_move(vc, p, t, b - t - count, t + count);
++ fbcon_clear(vc, t, 0, count, vc->vc_cols);
++ break;
++
++ case SCROLL_REDRAW:
++ redraw_down:
++ fbcon_redraw(vc, p, b - 1, b - t - count,
++ -count * vc->vc_cols);
++ fbcon_clear(vc, t, 0, count, vc->vc_cols);
++ scr_memsetw((unsigned short *) (vc->vc_origin +
++ vc->vc_size_row *
++ t),
++ vc->vc_video_erase_char,
++ vc->vc_size_row * count);
++ return true;
++ }
+- fbcon_redraw(vc, p, b - 1, b - t - count,
+- -count * vc->vc_cols);
+- fbcon_clear(vc, t, 0, count, vc->vc_cols);
+- scr_memsetw((unsigned short *) (vc->vc_origin +
+- vc->vc_size_row *
+- t),
+- vc->vc_video_erase_char,
+- vc->vc_size_row * count);
+- return true;
+ }
+ return false;
+ }
+
++
++static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx,
++ int height, int width)
++{
++ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++ struct fbcon_display *p = &fb_display[vc->vc_num];
++
++ if (fbcon_is_inactive(vc, info))
++ return;
++
++ if (!width || !height)
++ return;
++
++ /* Split blits that cross physical y_wrap case.
++ * Pathological case involves 4 blits, better to use recursive
++ * code rather than unrolled case
++ *
++ * Recursive invocations don't need to erase the cursor over and
++ * over again, so we use fbcon_bmove_rec()
++ */
++ fbcon_bmove_rec(vc, p, sy, sx, dy, dx, height, width,
++ p->vrows - p->yscroll);
++}
++
++static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, int sx,
++ int dy, int dx, int height, int width, u_int y_break)
++{
++ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++ struct fbcon_ops *ops = info->fbcon_par;
++ u_int b;
++
++ if (sy < y_break && sy + height > y_break) {
++ b = y_break - sy;
++ if (dy < sy) { /* Avoid trashing self */
++ fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
++ y_break);
++ fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
++ height - b, width, y_break);
++ } else {
++ fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
++ height - b, width, y_break);
++ fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
++ y_break);
++ }
++ return;
++ }
++
++ if (dy < y_break && dy + height > y_break) {
++ b = y_break - dy;
++ if (dy < sy) { /* Avoid trashing self */
++ fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
++ y_break);
++ fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
++ height - b, width, y_break);
++ } else {
++ fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
++ height - b, width, y_break);
++ fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
++ y_break);
++ }
++ return;
++ }
++ ops->bmove(vc, info, real_y(p, sy), sx, real_y(p, dy), dx,
++ height, width);
++}
++
+ static void updatescrollmode(struct fbcon_display *p,
+ struct fb_info *info,
+ struct vc_data *vc)
+@@ -1664,7 +2119,21 @@
+
+ updatescrollmode(p, info, vc);
+
++ switch (p->scrollmode) {
++ case SCROLL_WRAP_MOVE:
++ scrollback_phys_max = p->vrows - vc->vc_rows;
++ break;
++ case SCROLL_PAN_MOVE:
++ case SCROLL_PAN_REDRAW:
++ scrollback_phys_max = p->vrows - 2 * vc->vc_rows;
++ if (scrollback_phys_max < 0)
++ scrollback_phys_max = 0;
++ break;
++ default:
++ scrollback_phys_max = 0;
++ break;
++ }
++
+- scrollback_phys_max = 0;
+ scrollback_max = 0;
+ scrollback_current = 0;
+
+--- b/drivers/video/fbdev/core/fbcon.h
++++ a/drivers/video/fbdev/core/fbcon.h
+@@ -29,6 +29,7 @@
+ /* Filled in by the low-level console driver */
+ const u_char *fontdata;
+ int userfont; /* != 0 if fontdata kmalloc()ed */
++ u_short scrollmode; /* Scroll Method */
+ u_short inverse; /* != 0 text black on white as default */
+ short yscroll; /* Hardware scrolling */
+ int vrows; /* number of virtual rows */
+@@ -51,6 +52,8 @@
+ };
+
+ struct fbcon_ops {
++ void (*bmove)(struct vc_data *vc, struct fb_info *info, int sy,
++ int sx, int dy, int dx, int height, int width);
+ void (*clear)(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int height, int width);
+ void (*putcs)(struct vc_data *vc, struct fb_info *info,
+@@ -149,6 +152,62 @@
+ #define attr_bgcol_ec(bgshift, vc, info) attr_col_ec(bgshift, vc, info, 0)
+ #define attr_fgcol_ec(fgshift, vc, info) attr_col_ec(fgshift, vc, info, 1)
+
++ /*
++ * Scroll Method
++ */
++
++/* There are several methods fbcon can use to move text around the screen:
++ *
++ * Operation Pan Wrap
++ *---------------------------------------------
++ * SCROLL_MOVE copyarea No No
++ * SCROLL_PAN_MOVE copyarea Yes No
++ * SCROLL_WRAP_MOVE copyarea No Yes
++ * SCROLL_REDRAW imageblit No No
++ * SCROLL_PAN_REDRAW imageblit Yes No
++ * SCROLL_WRAP_REDRAW imageblit No Yes
++ *
++ * (SCROLL_WRAP_REDRAW is not implemented yet)
++ *
++ * In general, fbcon will choose the best scrolling
++ * method based on the rule below:
++ *
++ * Pan/Wrap > accel imageblit > accel copyarea >
++ * soft imageblit > (soft copyarea)
++ *
++ * Exception to the rule: Pan + accel copyarea is
++ * preferred over Pan + accel imageblit.
++ *
++ * The above is typical for PCI/AGP cards. Unless
++ * overridden, fbcon will never use soft copyarea.
++ *
++ * If you need to override the above rule, set the
++ * appropriate flags in fb_info->flags. For example,
++ * to prefer copyarea over imageblit, set
++ * FBINFO_READS_FAST.
++ *
++ * Other notes:
++ * + use the hardware engine to move the text
++ * (hw-accelerated copyarea() and fillrect())
++ * + use hardware-supported panning on a large virtual screen
++ * + amifb can not only pan, but also wrap the display by N lines
++ * (i.e. visible line i = physical line (i+N) % yres).
++ * + read what's already rendered on the screen and
++ * write it in a different place (this is cfb_copyarea())
++ * + re-render the text to the screen
++ *
++ * Whether to use wrapping or panning can only be figured out at
++ * runtime (when we know whether our font height is a multiple
++ * of the pan/wrap step)
++ *
++ */
++
++#define SCROLL_MOVE 0x001
++#define SCROLL_PAN_MOVE 0x002
++#define SCROLL_WRAP_MOVE 0x003
++#define SCROLL_REDRAW 0x004
++#define SCROLL_PAN_REDRAW 0x005
++
+ #ifdef CONFIG_FB_TILEBLITTING
+ extern void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info);
+ #endif
+--- b/drivers/video/fbdev/core/fbcon_ccw.c
++++ a/drivers/video/fbdev/core/fbcon_ccw.c
+@@ -59,12 +59,31 @@
+ }
+ }
+
++
++static void ccw_bmove(struct vc_data *vc, struct fb_info *info, int sy,
++ int sx, int dy, int dx, int height, int width)
++{
++ struct fbcon_ops *ops = info->fbcon_par;
++ struct fb_copyarea area;
++ u32 vyres = GETVYRES(ops->p->scrollmode, info);
++
++ area.sx = sy * vc->vc_font.height;
++ area.sy = vyres - ((sx + width) * vc->vc_font.width);
++ area.dx = dy * vc->vc_font.height;
++ area.dy = vyres - ((dx + width) * vc->vc_font.width);
++ area.width = height * vc->vc_font.height;
++ area.height = width * vc->vc_font.width;
++
++ info->fbops->fb_copyarea(info, &area);
++}
++
+ static void ccw_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int height, int width)
+ {
++ struct fbcon_ops *ops = info->fbcon_par;
+ struct fb_fillrect region;
+ int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
++ u32 vyres = GETVYRES(ops->p->scrollmode, info);
+- u32 vyres = info->var.yres;
+
+ region.color = attr_bgcol_ec(bgshift,vc,info);
+ region.dx = sy * vc->vc_font.height;
+@@ -121,7 +140,7 @@
+ u32 cnt, pitch, size;
+ u32 attribute = get_attribute(info, scr_readw(s));
+ u8 *dst, *buf = NULL;
++ u32 vyres = GETVYRES(ops->p->scrollmode, info);
+- u32 vyres = info->var.yres;
+
+ if (!ops->fontbuffer)
+ return;
+@@ -210,7 +229,7 @@
+ int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
+ int err = 1, dx, dy;
+ char *src;
++ u32 vyres = GETVYRES(ops->p->scrollmode, info);
+- u32 vyres = info->var.yres;
+
+ if (!ops->fontbuffer)
+ return;
+@@ -368,7 +387,7 @@
+ {
+ struct fbcon_ops *ops = info->fbcon_par;
+ u32 yoffset;
++ u32 vyres = GETVYRES(ops->p->scrollmode, info);
+- u32 vyres = info->var.yres;
+ int err;
+
+ yoffset = (vyres - info->var.yres) - ops->var.xoffset;
+@@ -383,6 +402,7 @@
+
+ void fbcon_rotate_ccw(struct fbcon_ops *ops)
+ {
++ ops->bmove = ccw_bmove;
+ ops->clear = ccw_clear;
+ ops->putcs = ccw_putcs;
+ ops->clear_margins = ccw_clear_margins;
+--- b/drivers/video/fbdev/core/fbcon_cw.c
++++ a/drivers/video/fbdev/core/fbcon_cw.c
+@@ -44,12 +44,31 @@
+ }
+ }
+
++
++static void cw_bmove(struct vc_data *vc, struct fb_info *info, int sy,
++ int sx, int dy, int dx, int height, int width)
++{
++ struct fbcon_ops *ops = info->fbcon_par;
++ struct fb_copyarea area;
++ u32 vxres = GETVXRES(ops->p->scrollmode, info);
++
++ area.sx = vxres - ((sy + height) * vc->vc_font.height);
++ area.sy = sx * vc->vc_font.width;
++ area.dx = vxres - ((dy + height) * vc->vc_font.height);
++ area.dy = dx * vc->vc_font.width;
++ area.width = height * vc->vc_font.height;
++ area.height = width * vc->vc_font.width;
++
++ info->fbops->fb_copyarea(info, &area);
++}
++
+ static void cw_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int height, int width)
+ {
++ struct fbcon_ops *ops = info->fbcon_par;
+ struct fb_fillrect region;
+ int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
++ u32 vxres = GETVXRES(ops->p->scrollmode, info);
+- u32 vxres = info->var.xres;
+
+ region.color = attr_bgcol_ec(bgshift,vc,info);
+ region.dx = vxres - ((sy + height) * vc->vc_font.height);
+@@ -106,7 +125,7 @@
+ u32 cnt, pitch, size;
+ u32 attribute = get_attribute(info, scr_readw(s));
+ u8 *dst, *buf = NULL;
++ u32 vxres = GETVXRES(ops->p->scrollmode, info);
+- u32 vxres = info->var.xres;
+
+ if (!ops->fontbuffer)
+ return;
+@@ -193,7 +212,7 @@
+ int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
+ int err = 1, dx, dy;
+ char *src;
++ u32 vxres = GETVXRES(ops->p->scrollmode, info);
+- u32 vxres = info->var.xres;
+
+ if (!ops->fontbuffer)
+ return;
+@@ -350,7 +369,7 @@
+ static int cw_update_start(struct fb_info *info)
+ {
+ struct fbcon_ops *ops = info->fbcon_par;
++ u32 vxres = GETVXRES(ops->p->scrollmode, info);
+- u32 vxres = info->var.xres;
+ u32 xoffset;
+ int err;
+
+@@ -366,6 +385,7 @@
+
+ void fbcon_rotate_cw(struct fbcon_ops *ops)
+ {
++ ops->bmove = cw_bmove;
+ ops->clear = cw_clear;
+ ops->putcs = cw_putcs;
+ ops->clear_margins = cw_clear_margins;
+--- b/drivers/video/fbdev/core/fbcon_rotate.h
++++ a/drivers/video/fbdev/core/fbcon_rotate.h
+@@ -11,6 +11,15 @@
+ #ifndef _FBCON_ROTATE_H
+ #define _FBCON_ROTATE_H
+
++#define GETVYRES(s,i) ({ \
++ (s == SCROLL_REDRAW || s == SCROLL_MOVE) ? \
++ (i)->var.yres : (i)->var.yres_virtual; })
++
++#define GETVXRES(s,i) ({ \
++ (s == SCROLL_REDRAW || s == SCROLL_MOVE || !(i)->fix.xpanstep) ? \
++ (i)->var.xres : (i)->var.xres_virtual; })
++
++
+ static inline int pattern_test_bit(u32 x, u32 y, u32 pitch, const char *pat)
+ {
+ u32 tmp = (y * pitch) + x, index = tmp / 8, bit = tmp % 8;
+--- b/drivers/video/fbdev/core/fbcon_ud.c
++++ a/drivers/video/fbdev/core/fbcon_ud.c
+@@ -44,13 +44,33 @@
+ }
+ }
+
++
++static void ud_bmove(struct vc_data *vc, struct fb_info *info, int sy,
++ int sx, int dy, int dx, int height, int width)
++{
++ struct fbcon_ops *ops = info->fbcon_par;
++ struct fb_copyarea area;
++ u32 vyres = GETVYRES(ops->p->scrollmode, info);
++ u32 vxres = GETVXRES(ops->p->scrollmode, info);
++
++ area.sy = vyres - ((sy + height) * vc->vc_font.height);
++ area.sx = vxres - ((sx + width) * vc->vc_font.width);
++ area.dy = vyres - ((dy + height) * vc->vc_font.height);
++ area.dx = vxres - ((dx + width) * vc->vc_font.width);
++ area.height = height * vc->vc_font.height;
++ area.width = width * vc->vc_font.width;
++
++ info->fbops->fb_copyarea(info, &area);
++}
++
+ static void ud_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int height, int width)
+ {
++ struct fbcon_ops *ops = info->fbcon_par;
+ struct fb_fillrect region;
+ int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
++ u32 vyres = GETVYRES(ops->p->scrollmode, info);
++ u32 vxres = GETVXRES(ops->p->scrollmode, info);
+- u32 vyres = info->var.yres;
+- u32 vxres = info->var.xres;
+
+ region.color = attr_bgcol_ec(bgshift,vc,info);
+ region.dy = vyres - ((sy + height) * vc->vc_font.height);
+@@ -142,8 +162,8 @@
+ u32 mod = vc->vc_font.width % 8, cnt, pitch, size;
+ u32 attribute = get_attribute(info, scr_readw(s));
+ u8 *dst, *buf = NULL;
++ u32 vyres = GETVYRES(ops->p->scrollmode, info);
++ u32 vxres = GETVXRES(ops->p->scrollmode, info);
+- u32 vyres = info->var.yres;
+- u32 vxres = info->var.xres;
+
+ if (!ops->fontbuffer)
+ return;
+@@ -239,8 +259,8 @@
+ int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
+ int err = 1, dx, dy;
+ char *src;
++ u32 vyres = GETVYRES(ops->p->scrollmode, info);
++ u32 vxres = GETVXRES(ops->p->scrollmode, info);
+- u32 vyres = info->var.yres;
+- u32 vxres = info->var.xres;
+
+ if (!ops->fontbuffer)
+ return;
+@@ -390,8 +410,8 @@
+ {
+ struct fbcon_ops *ops = info->fbcon_par;
+ int xoffset, yoffset;
++ u32 vyres = GETVYRES(ops->p->scrollmode, info);
++ u32 vxres = GETVXRES(ops->p->scrollmode, info);
+- u32 vyres = info->var.yres;
+- u32 vxres = info->var.xres;
+ int err;
+
+ xoffset = vxres - info->var.xres - ops->var.xoffset;
+@@ -409,6 +429,7 @@
+
+ void fbcon_rotate_ud(struct fbcon_ops *ops)
+ {
++ ops->bmove = ud_bmove;
+ ops->clear = ud_clear;
+ ops->putcs = ud_putcs;
+ ops->clear_margins = ud_clear_margins;
+--- b/drivers/video/fbdev/core/tileblit.c
++++ a/drivers/video/fbdev/core/tileblit.c
+@@ -16,6 +16,21 @@
+ #include <asm/types.h>
+ #include "fbcon.h"
+
++static void tile_bmove(struct vc_data *vc, struct fb_info *info, int sy,
++ int sx, int dy, int dx, int height, int width)
++{
++ struct fb_tilearea area;
++
++ area.sx = sx;
++ area.sy = sy;
++ area.dx = dx;
++ area.dy = dy;
++ area.height = height;
++ area.width = width;
++
++ info->tileops->fb_tilecopy(info, &area);
++}
++
+ static void tile_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int height, int width)
+ {
+@@ -118,6 +133,7 @@
+ struct fb_tilemap map;
+ struct fbcon_ops *ops = info->fbcon_par;
+
++ ops->bmove = tile_bmove;
+ ops->clear = tile_clear;
+ ops->putcs = tile_putcs;
+ ops->clear_margins = tile_clear_margins;
+--- b/drivers/video/fbdev/skeletonfb.c
++++ a/drivers/video/fbdev/skeletonfb.c
+@@ -505,15 +505,15 @@
+ }
+
+ /**
++ * xxxfb_copyarea - REQUIRED function. Can use generic routines if
++ * non acclerated hardware and packed pixel based.
+- * xxxfb_copyarea - OBSOLETE function.
+ * Copies one area of the screen to another area.
+- * Will be deleted in a future version
+ *
+ * @info: frame buffer structure that represents a single frame buffer
+ * @area: Structure providing the data to copy the framebuffer contents
+ * from one region to another.
+ *
++ * This drawing operation copies a rectangular area from one area of the
+- * This drawing operation copied a rectangular area from one area of the
+ * screen to another area.
+ */
+ void xxxfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
+@@ -645,9 +645,9 @@
+ .fb_setcolreg = xxxfb_setcolreg,
+ .fb_blank = xxxfb_blank,
+ .fb_pan_display = xxxfb_pan_display,
++ .fb_fillrect = xxxfb_fillrect, /* Needed !!! */
++ .fb_copyarea = xxxfb_copyarea, /* Needed !!! */
++ .fb_imageblit = xxxfb_imageblit, /* Needed !!! */
+- .fb_fillrect = xxxfb_fillrect, /* Needed !!! */
+- .fb_copyarea = xxxfb_copyarea, /* Obsolete */
+- .fb_imageblit = xxxfb_imageblit, /* Needed !!! */
+ .fb_cursor = xxxfb_cursor, /* Optional !!! */
+ .fb_sync = xxxfb_sync,
+ .fb_ioctl = xxxfb_ioctl,
+--- b/include/linux/fb.h
++++ a/include/linux/fb.h
+@@ -262,7 +262,7 @@
+
+ /* Draws a rectangle */
+ void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect);
++ /* Copy data from area to another */
+- /* Copy data from area to another. Obsolete. */
+ void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region);
+ /* Draws a image to the display */
+ void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image);
diff --git a/0301-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch b/0302-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
index c12688800eab..c12688800eab 100644
--- a/0301-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
+++ b/0302-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
diff --git a/0302-revert-fbcon-remove-no-op-fbcon_set_origin.patch b/0303-revert-fbcon-remove-no-op-fbcon_set_origin.patch
index 6491c541e883..6491c541e883 100644
--- a/0302-revert-fbcon-remove-no-op-fbcon_set_origin.patch
+++ b/0303-revert-fbcon-remove-no-op-fbcon_set_origin.patch
diff --git a/0303-revert-fbcon-remove-soft-scrollback-code.patch b/0304-revert-fbcon-remove-soft-scrollback-code.patch
index 4f9735447f37..4f9735447f37 100644
--- a/0303-revert-fbcon-remove-soft-scrollback-code.patch
+++ b/0304-revert-fbcon-remove-soft-scrollback-code.patch
diff --git a/0999-acs.gitpatch b/0999-acs.gitpatch
index 401b27c13f1c..e075ec1d3974 100644
--- a/0999-acs.gitpatch
+++ b/0999-acs.gitpatch
@@ -1,28 +1,27 @@
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 1396fd2..3c0ede4 100644
+index 2fba824..a797d74 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -3892,6 +3892,15 @@
- nomsi [MSI] If the PCI_MSI kernel config parameter is
- enabled, this kernel boot option can be used to
- disable the use of MSI interrupts system-wide.
-+ pcie_acs_override=
-+ [PCIE] Override missing PCIe ACS support for:
-+ downstream
-+ All downstream ports - full ACS capabilities
-+ multfunction
-+ All multifunction devices - multifunction ACS subset
-+ id:nnnn:nnnn
-+ Specfic device - full ACS capabilities
-+ Specified as vid:did (vendor/device ID) in hex
- noioapicquirk [APIC] Disable all boot interrupt quirks.
- Safety option to keep boot IRQs enabled. This
- should never be necessary.
+@@ -3922,6 +3922,14 @@
+ nomsi [MSI] If the PCI_MSI kernel config parameter is
+ enabled, this kernel boot option can be used to
+ disable the use of MSI interrupts system-wide.
++ pci_acs_override [PCIE] Override missing PCIe ACS support for:
++ downstream
++ All downstream ports - full ACS capabilities
++ multifunction
++ Add multifunction devices - multifunction ACS subset
++ id:nnnn:nnnn
++ Specific device - full ACS capabilities
++ Specified as vid:did (vendor/device ID) in hex
+ noioapicquirk [APIC] Disable all boot interrupt quirks.
+ Safety option to keep boot IRQs enabled. This
+ should never be necessary.
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index 4537d1e..c4f01fe 100644
+index 003950c..d3bb542 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
-@@ -193,6 +193,106 @@ static int __init pci_apply_final_quirks(void)
+@@ -193,6 +193,107 @@ static int __init pci_apply_final_quirks(void)
}
fs_initcall_sync(pci_apply_final_quirks);
@@ -34,6 +33,7 @@ index 4537d1e..c4f01fe 100644
+ unsigned short vendor;
+ unsigned short device;
+};
++
+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
+static u8 max_acs_id;
+
@@ -129,12 +129,12 @@ index 4537d1e..c4f01fe 100644
/*
* Decoding should be disabled for a PCI device during BAR sizing to avoid
* conflict. But doing so may cause problems on host bridge and perhaps other
-@@ -4949,6 +5049,8 @@ static const struct pci_dev_acs_enabled {
- { PCI_VENDOR_ID_NXP, 0x8d9b, pci_quirk_nxp_rp_acs },
- /* Zhaoxin Root/Downstream Ports */
- { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
-+ /* allow acs for any */
-+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
- { 0 }
+@@ -4950,6 +5051,8 @@ static const struct pci_dev_acs_enabled {
+ { PCI_VENDOR_ID_NXP, 0x8d9b, pci_quirk_nxp_rp_acs },
+ /* Zhaoxin Root/Downstream Ports */
+ { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
++ /* allow acs for any */
++ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
+ { 0 }
};
diff --git a/PKGBUILD b/PKGBUILD
index 83921d5466ac..fa62c470252d 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -12,9 +12,9 @@
pkgbase=linux-acs-manjaro
pkgname=('linux-acs-manjaro' 'linux-acs-manjaro-headers')
_kernelname=-ACS-MANJARO
-_basekernel=5.15
-_basever=515
-pkgver=5.15.16
+_basekernel=5.16
+_basever=516
+pkgver=5.16.2
pkgrel=1
arch=('x86_64')
url="https://www.kernel.org/"
@@ -37,25 +37,15 @@ source=("https://www.kernel.org/pub/linux/kernel/v5.x/linux-${_basekernel}.tar.x
'config'
# ARCH Patches
'0001-ZEN-Add-sysctl-and-CONFIG-to-disallow-unprivileged-CLONE_NEWUSER.patch'
- '0002-PCI_Add_more_NVIDIA_controllers_to_the_MSI_masking_quirk.patch'
- '0003-iommu_intel_do_deep_dma-unmapping_to_avoid_kernel-flooding.patch'
- '0004-cpufreq_intel_pstate_ITMT_support_for_overclocked_system.patch'
- '0005-Bluetooth_btintel_Fix_bdaddress_comparison_with_garbage_value.patch'
- '0006-lg-laptop_Recognize_more_models.patch'
+ '0002-Btintel_Fix_bdaddress_comparison_with_garbage_value.patch'
# MANJARO Patches
'0101-i2c-nuvoton-nc677x-hwmon-driver.patch'
-# '0102-iomap-iomap_bmap-should-accept-unwritten-maps.patch'
- '0103-futex.patch' # https://github.com/sirlucjan/kernel-patches
- '0104-revert-xhci-Add-support-for-Renesas-controller-with-memory.patch'
'0105-quirk-kernel-org-bug-210681-firmware_rome_error.patch'
- '0108-drm_i915_Add_workaround_numbers_to_GEN7_COMMON_SLICE_CHICKEN1_whitelisting.patch::https://patchwork.freedesktop.org/patch/463650/raw/'
- # Lenovo + AMD
- '0201-lenovo-wmi2.patch'
- # other patches
# Bootsplash
- '0301-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch'
- '0302-revert-fbcon-remove-no-op-fbcon_set_origin.patch'
- '0303-revert-fbcon-remove-soft-scrollback-code.patch'
+ '0301-revert-garbage-collect-fbdev-scrolling-acceleration.patch'
+ '0302-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch'
+ '0303-revert-fbcon-remove-no-op-fbcon_set_origin.patch'
+ '0304-revert-fbcon-remove-soft-scrollback-code.patch'
'0401-bootsplash.patch'
'0402-bootsplash.patch'
'0403-bootsplash.patch'
@@ -69,22 +59,16 @@ source=("https://www.kernel.org/pub/linux/kernel/v5.x/linux-${_basekernel}.tar.x
'0411-bootsplash.patch'
'0412-bootsplash.patch'
'0413-bootsplash.gitpatch'
+ # ACS override patch
'0999-acs.gitpatch')
-sha256sums=('57b2cf6991910e3b67a1b3490022e8a0674b6965c74c12da1e99d138d1991ee8'
- '0817171996521675b3c1130568503f08d8b1672c955cc842200a21bf5914cd95'
- '93320dbe5928e51fb777a4f13dd9a7364eb150d7983073f7dc159e89a6ffa747'
+sha256sums=('027d7e8988bb69ac12ee92406c3be1fe13f990b1ca2249e226225cd1573308bb'
+ '3a09c2f1ad410c09cf03921abeed1a6ca7c38138fb508171ee673d429d179171'
+ 'cb2d729cc20743014d9e3bd08facb9f5bdd19d9fa89014f415c61b4a6eb78e97'
'986f8d802f37b72a54256f0ab84da83cb229388d58c0b6750f7c770818a18421'
- 'e2823eff3355b7c88a3fa327ea2f84f23cbd36569e0a5f0f76599023f63a52ca'
- 'ce53090a4572cd6162d22225113082f7e4df5028a1230529d170460e26dcf849'
- 'ab0360eac59329eb84f028c2f402ee4a17e4b3dfacb7957355e6178d35af87b9'
- '76701599bbafa49b90ccb073ef29ce2dc3731566e8fa852bd1e9e7796e184754'
- 'a2a0a0542055a6a921542fbb05cedb6eb6f3d3fb0c038bfb2304bfd3931a0f71'
+ 'b89188b1bc3516d54965dd36def6a2af3d81379e53ff7e527bbd91f77c6f191b'
'7823d7488f42bc4ed7dfae6d1014dbde679d8b862c9a3697a39ba0dae5918978'
- '844e66a95d7df754c55ac2f1ce7e215b1e56e20ca095462d926a993d557b20e0'
- 'd9330ea593829a6ef3b824db9570253280cbff7da2b4beb47cbc037824d1a29b'
'5e804e1f241ce542f3f0e83d274ede6aa4b0539e510fb9376f8106e8732ce69b'
- 'e8e6120035977903a7117ba215809b9b162b64a789848107513f219180baaada'
- '1d58ef2991c625f6f0eb33b4cb8303932f53f1c4694e42bae24c9cd36d2ad013'
+ '365d4225a7db60bd064ebbc34ce0ae582a0c378ad6c4cec7960a5ae4641a6757'
'2b11905b63b05b25807dd64757c779da74dd4c37e36d3f7a46485b1ee5a9d326'
'94a8538251ad148f1025cc3de446ce64f73dc32b01815426fb159c722e8fa5bc'
'1f18c5c10a3c63e41ecd05ad34cd9f6653ba96e9f1049ce2b7bb6da2578ae710'
@@ -101,7 +85,7 @@ sha256sums=('57b2cf6991910e3b67a1b3490022e8a0674b6965c74c12da1e99d138d1991ee8'
'27471eee564ca3149dd271b0817719b5565a9594dc4d884fe3dc51a5f03832bc'
'60e295601e4fb33d9bf65f198c54c7eb07c0d1e91e2ad1e0dd6cd6e142cb266d'
'035ea4b2a7621054f4560471f45336b981538a40172d8f17285910d4e0e0b3ef'
- '6d6b327ec7c7798f628f98ab964f4457d3cf043bad2632eb8f27548478a83cc1')
+ '2542b5cea79ab5817ce3d30c54acd045966b9c14587bfb0b2f50d473da48a1d5')
prepare() {
cd "linux-${_basekernel}"
@@ -237,6 +221,9 @@ package_linux-acs-manjaro-headers() {
# add objtool for external module building and enabled VALIDATION_STACK option
install -Dt "${_builddir}/tools/objtool" tools/objtool/objtool
+ # required when DEBUG_INFO_BTF_MODULES is enabled
+ install -Dt "${_builddir}/tools/bpf/resolve_btfids" tools/bpf/resolve_btfids/resolve_btfids
+
# remove unneeded architectures
local _arch
for _arch in "${_builddir}"/arch/*/; do
diff --git a/config b/config
index f2b65cab7cbf..73690e4abb01 100644
--- a/config
+++ b/config
@@ -1,6 +1,6 @@
#
# Automatically generated file; DO NOT EDIT.
-# Linux/x86 5.15.15-1 Kernel Configuration
+# Linux/x86 5.16.0-1 Kernel Configuration
#
CONFIG_CC_VERSION_TEXT="gcc (GCC) 11.1.0"
CONFIG_CC_IS_GCC=y
@@ -121,6 +121,7 @@ CONFIG_BPF_JIT_DEFAULT_ON=y
CONFIG_BPF_LSM=y
# end of BPF subsystem
+CONFIG_PREEMPT_BUILD=y
# CONFIG_PREEMPT_NONE is not set
# CONFIG_PREEMPT_VOLUNTARY is not set
CONFIG_PREEMPT=y
@@ -191,6 +192,7 @@ CONFIG_UCLAMP_BUCKETS_COUNT=5
CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
CONFIG_CC_HAS_INT128=y
+CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5"
CONFIG_ARCH_SUPPORTS_INT128=y
CONFIG_NUMA_BALANCING=y
CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
@@ -411,6 +413,7 @@ CONFIG_NR_CPUS_RANGE_BEGIN=2
CONFIG_NR_CPUS_RANGE_END=512
CONFIG_NR_CPUS_DEFAULT=64
CONFIG_NR_CPUS=320
+CONFIG_SCHED_CLUSTER=y
CONFIG_SCHED_SMT=y
CONFIG_SCHED_MC=y
CONFIG_SCHED_MC_PRIO=y
@@ -511,6 +514,7 @@ CONFIG_LEGACY_VSYSCALL_XONLY=y
# CONFIG_LEGACY_VSYSCALL_NONE is not set
# CONFIG_CMDLINE_BOOL is not set
CONFIG_MODIFY_LDT_SYSCALL=y
+# CONFIG_STRICT_SIGALTSTACK_SIZE is not set
CONFIG_HAVE_LIVEPATCH=y
# CONFIG_LIVEPATCH is not set
# end of Processor type and features
@@ -712,6 +716,7 @@ CONFIG_KVM_AMD=m
CONFIG_KVM_AMD_SEV=y
CONFIG_KVM_XEN=y
CONFIG_KVM_MMU_AUDIT=y
+CONFIG_KVM_EXTERNAL_WRITE_TRACKING=y
CONFIG_AS_AVX512=y
CONFIG_AS_SHA1_NI=y
CONFIG_AS_SHA256_NI=y
@@ -740,6 +745,7 @@ CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
CONFIG_HAVE_OPTPROBES=y
CONFIG_HAVE_KPROBES_ON_FTRACE=y
+CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE=y
CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
CONFIG_HAVE_NMI=y
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
@@ -834,6 +840,7 @@ CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_ARCH_HAS_ELFCORE_COMPAT=y
CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH=y
+CONFIG_DYNAMIC_SIGFRAME=y
#
# GCOV-based kernel profiling
@@ -979,10 +986,10 @@ CONFIG_SPARSEMEM_VMEMMAP=y
CONFIG_HAVE_FAST_GUP=y
CONFIG_NUMA_KEEP_MEMINFO=y
CONFIG_MEMORY_ISOLATION=y
+CONFIG_EXCLUSIVE_SYSTEM_RAM=y
CONFIG_HAVE_BOOTMEM_INFO_NODE=y
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_SPARSE=y
CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
CONFIG_MEMORY_HOTREMOVE=y
@@ -1206,6 +1213,8 @@ CONFIG_BRIDGE_NETFILTER=m
# Core Netfilter Configuration
#
CONFIG_NETFILTER_INGRESS=y
+CONFIG_NETFILTER_EGRESS=y
+CONFIG_NETFILTER_SKIP_EGRESS=y
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_FAMILY_BRIDGE=y
CONFIG_NETFILTER_FAMILY_ARP=y
@@ -1604,10 +1613,11 @@ CONFIG_NET_DSA_TAG_DSA=m
CONFIG_NET_DSA_TAG_EDSA=m
CONFIG_NET_DSA_TAG_MTK=m
CONFIG_NET_DSA_TAG_KSZ=m
-CONFIG_NET_DSA_TAG_RTL4_A=m
CONFIG_NET_DSA_TAG_OCELOT=m
CONFIG_NET_DSA_TAG_OCELOT_8021Q=m
CONFIG_NET_DSA_TAG_QCA=m
+CONFIG_NET_DSA_TAG_RTL4_A=m
+CONFIG_NET_DSA_TAG_RTL8_4=m
CONFIG_NET_DSA_TAG_LAN9303=m
CONFIG_NET_DSA_TAG_SJA1105=m
CONFIG_NET_DSA_TAG_TRAILER=m
@@ -1963,7 +1973,7 @@ CONFIG_AF_RXRPC_DEBUG=y
CONFIG_RXKAD=y
CONFIG_AF_KCM=m
CONFIG_STREAM_PARSER=y
-CONFIG_MCTP=m
+# CONFIG_MCTP is not set
CONFIG_FIB_RULES=y
CONFIG_WIRELESS=y
CONFIG_WIRELESS_EXT=y
@@ -2063,7 +2073,7 @@ CONFIG_LWTUNNEL_BPF=y
CONFIG_DST_CACHE=y
CONFIG_GRO_CELLS=y
CONFIG_SOCK_VALIDATE_XMIT=y
-CONFIG_NET_SELFTESTS=m
+CONFIG_NET_SELFTESTS=y
CONFIG_NET_SOCK_MSG=y
CONFIG_NET_DEVLINK=y
CONFIG_PAGE_POOL=y
@@ -2258,6 +2268,7 @@ CONFIG_FW_CFG_SYSFS=m
# CONFIG_FW_CFG_SYSFS_CMDLINE is not set
CONFIG_SYSFB=y
# CONFIG_SYSFB_SIMPLEFB is not set
+CONFIG_CS_DSP=m
CONFIG_GOOGLE_FIRMWARE=y
# CONFIG_GOOGLE_SMI is not set
CONFIG_GOOGLE_COREBOOT_TABLE=m
@@ -2334,7 +2345,7 @@ CONFIG_MTD_BLOCK=m
# CONFIG_SM_FTL is not set
# CONFIG_MTD_OOPS is not set
# CONFIG_MTD_SWAP is not set
-CONFIG_MTD_PARTITIONED_MASTER=y
+# CONFIG_MTD_PARTITIONED_MASTER is not set
#
# RAM/ROM/Flash chip drivers
@@ -2346,7 +2357,7 @@ CONFIG_MTD_MAP_BANK_WIDTH_2=y
CONFIG_MTD_MAP_BANK_WIDTH_4=y
CONFIG_MTD_CFI_I1=y
CONFIG_MTD_CFI_I2=y
-# CONFIG_MTD_RAM is not set
+CONFIG_MTD_RAM=m
CONFIG_MTD_ROM=m
# CONFIG_MTD_ABSENT is not set
# end of RAM/ROM/Flash chip drivers
@@ -2357,7 +2368,7 @@ CONFIG_MTD_ROM=m
# CONFIG_MTD_COMPLEX_MAPPINGS is not set
# CONFIG_MTD_PHYSMAP is not set
# CONFIG_MTD_INTEL_VR_NOR is not set
-# CONFIG_MTD_PLATRAM is not set
+CONFIG_MTD_PLATRAM=m
# end of Mapping drivers for chip access
#
@@ -2370,9 +2381,7 @@ CONFIG_MTD_ROM=m
# CONFIG_MTD_SST25L is not set
# CONFIG_MTD_SLRAM is not set
CONFIG_MTD_PHRAM=m
-CONFIG_MTD_MTDRAM=m
-CONFIG_MTDRAM_TOTAL_SIZE=4096
-CONFIG_MTDRAM_ERASE_SIZE=128
+# CONFIG_MTD_MTDRAM is not set
CONFIG_MTD_BLOCK2MTD=m
#
@@ -2473,7 +2482,6 @@ CONFIG_ZRAM_WRITEBACK=y
# CONFIG_ZRAM_MEMORY_TRACKING is not set
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
-CONFIG_BLK_DEV_CRYPTOLOOP=m
CONFIG_BLK_DEV_DRBD=m
# CONFIG_DRBD_FAULT_INJECTION is not set
CONFIG_BLK_DEV_NBD=m
@@ -2576,6 +2584,7 @@ CONFIG_INTEL_MEI=m
CONFIG_INTEL_MEI_ME=m
CONFIG_INTEL_MEI_TXE=m
CONFIG_INTEL_MEI_HDCP=m
+# CONFIG_INTEL_MEI_PXP is not set
CONFIG_VMWARE_VMCI=m
CONFIG_GENWQE=m
CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY=0
@@ -2686,6 +2695,7 @@ CONFIG_SCSI_UFS_CDNS_PLATFORM=m
CONFIG_SCSI_UFS_BSG=y
CONFIG_SCSI_UFS_CRYPTO=y
CONFIG_SCSI_UFS_HPB=y
+# CONFIG_SCSI_UFS_HWMON is not set
CONFIG_SCSI_HPTIOP=m
CONFIG_SCSI_BUSLOGIC=m
CONFIG_SCSI_FLASHPOINT=y
@@ -2903,6 +2913,7 @@ CONFIG_DM_SWITCH=m
CONFIG_DM_LOG_WRITES=m
CONFIG_DM_INTEGRITY=m
CONFIG_DM_ZONED=m
+CONFIG_DM_AUDIT=y
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -2959,6 +2970,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+# CONFIG_AMT is not set
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -3079,6 +3091,9 @@ CONFIG_AMD_XGBE_HAVE_ECC=y
CONFIG_NET_VENDOR_AQUANTIA=y
CONFIG_AQTION=m
CONFIG_NET_VENDOR_ARC=y
+CONFIG_NET_VENDOR_ASIX=y
+CONFIG_SPI_AX88796C=y
+# CONFIG_SPI_AX88796C_COMPRESSION is not set
CONFIG_NET_VENDOR_ATHEROS=y
CONFIG_ATL2=m
CONFIG_ATL1=m
@@ -3189,6 +3204,7 @@ CONFIG_I40E_DCB=y
CONFIG_IAVF=m
CONFIG_I40EVF=m
CONFIG_ICE=m
+CONFIG_ICE_SWITCHDEV=y
CONFIG_FM10K=m
CONFIG_IGC=m
CONFIG_NET_VENDOR_MICROSOFT=y
@@ -3383,10 +3399,10 @@ CONFIG_SKFP=m
# CONFIG_HIPPI is not set
CONFIG_NET_SB1000=m
CONFIG_PHYLINK=m
-CONFIG_PHYLIB=m
+CONFIG_PHYLIB=y
CONFIG_SWPHY=y
CONFIG_LED_TRIGGER_PHY=y
-CONFIG_FIXED_PHY=m
+CONFIG_FIXED_PHY=y
CONFIG_SFP=m
#
@@ -3438,15 +3454,11 @@ CONFIG_DP83869_PHY=m
CONFIG_VITESSE_PHY=m
CONFIG_XILINX_GMII2RGMII=m
CONFIG_MICREL_KS8995MA=m
-
-#
-# MCTP Device Drivers
-#
-CONFIG_MDIO_DEVICE=m
-CONFIG_MDIO_BUS=m
-CONFIG_FWNODE_MDIO=m
-CONFIG_ACPI_MDIO=m
-CONFIG_MDIO_DEVRES=m
+CONFIG_MDIO_DEVICE=y
+CONFIG_MDIO_BUS=y
+CONFIG_FWNODE_MDIO=y
+CONFIG_ACPI_MDIO=y
+CONFIG_MDIO_DEVRES=y
CONFIG_MDIO_BITBANG=m
CONFIG_MDIO_BCM_UNIMAC=m
CONFIG_MDIO_CAVIUM=m
@@ -3740,7 +3752,9 @@ CONFIG_MT7663_USB_SDIO_COMMON=m
CONFIG_MT7663U=m
CONFIG_MT7663S=m
CONFIG_MT7915E=m
+CONFIG_MT7921_COMMON=m
CONFIG_MT7921E=m
+CONFIG_MT7921S=m
CONFIG_WLAN_VENDOR_MICROCHIP=y
CONFIG_WILC1000=m
CONFIG_WILC1000_SDIO=m
@@ -3812,6 +3826,12 @@ CONFIG_RTW88_8723DE=m
CONFIG_RTW88_8821CE=m
CONFIG_RTW88_DEBUG=y
CONFIG_RTW88_DEBUGFS=y
+CONFIG_RTW89=m
+CONFIG_RTW89_CORE=m
+CONFIG_RTW89_PCI=m
+CONFIG_RTW89_8852AE=m
+# CONFIG_RTW89_DEBUGMSG is not set
+# CONFIG_RTW89_DEBUGFS is not set
CONFIG_WLAN_VENDOR_RSI=y
CONFIG_RSI_91X=m
CONFIG_RSI_DEBUGFS=y
@@ -3955,6 +3975,7 @@ CONFIG_KEYBOARD_TWL4030=m
CONFIG_KEYBOARD_XTKBD=m
CONFIG_KEYBOARD_CROS_EC=m
CONFIG_KEYBOARD_MTK_PMIC=m
+CONFIG_KEYBOARD_CYPRESS_SF=y
CONFIG_INPUT_MOUSE=y
CONFIG_MOUSE_PS2=m
CONFIG_MOUSE_PS2_ALPS=y
@@ -4320,6 +4341,7 @@ CONFIG_HVC_DRIVER=y
CONFIG_HVC_IRQ=y
CONFIG_HVC_XEN=y
CONFIG_HVC_XEN_FRONTEND=y
+# CONFIG_RPMSG_TTY is not set
CONFIG_SERIAL_DEV_BUS=y
CONFIG_SERIAL_DEV_CTRL_TTYPORT=y
CONFIG_PRINTER=m
@@ -4333,6 +4355,7 @@ CONFIG_IPMI_PLAT_DATA=y
CONFIG_IPMI_DEVICE_INTERFACE=m
CONFIG_IPMI_SI=m
CONFIG_IPMI_SSIF=m
+# CONFIG_IPMI_IPMB is not set
CONFIG_IPMI_WATCHDOG=m
CONFIG_IPMI_POWEROFF=m
CONFIG_IPMB_DEVICE_INTERFACE=m
@@ -4592,6 +4615,10 @@ CONFIG_PINCTRL_MCP23S08_I2C=m
CONFIG_PINCTRL_MCP23S08_SPI=m
CONFIG_PINCTRL_MCP23S08=m
CONFIG_PINCTRL_SX150X=y
+
+#
+# Intel pinctrl drivers
+#
CONFIG_PINCTRL_BAYTRAIL=y
CONFIG_PINCTRL_CHERRYVIEW=y
CONFIG_PINCTRL_LYNXPOINT=y
@@ -4610,6 +4637,7 @@ CONFIG_PINCTRL_LAKEFIELD=y
CONFIG_PINCTRL_LEWISBURG=y
CONFIG_PINCTRL_SUNRISEPOINT=y
CONFIG_PINCTRL_TIGERLAKE=y
+# end of Intel pinctrl drivers
#
# Renesas pinctrl drivers
@@ -4942,6 +4970,7 @@ CONFIG_SENSORS_MAX1668=m
CONFIG_SENSORS_MAX197=m
CONFIG_SENSORS_MAX31722=m
CONFIG_SENSORS_MAX31730=m
+CONFIG_SENSORS_MAX6620=y
CONFIG_SENSORS_MAX6621=m
CONFIG_SENSORS_MAX6639=m
CONFIG_SENSORS_MAX6642=m
@@ -5337,7 +5366,6 @@ CONFIG_MFD_TPS65910=y
CONFIG_MFD_TPS65912=m
CONFIG_MFD_TPS65912_I2C=m
CONFIG_MFD_TPS65912_SPI=m
-CONFIG_MFD_TPS80031=y
CONFIG_TWL4030_CORE=y
CONFIG_MFD_TWL4030_AUDIO=y
CONFIG_TWL6040_CORE=y
@@ -5457,7 +5485,6 @@ CONFIG_REGULATOR_TPS6524X=m
CONFIG_REGULATOR_TPS6586X=m
CONFIG_REGULATOR_TPS65910=m
CONFIG_REGULATOR_TPS65912=m
-CONFIG_REGULATOR_TPS80031=m
CONFIG_REGULATOR_TWL4030=m
CONFIG_REGULATOR_WM831X=m
CONFIG_REGULATOR_WM8350=m
@@ -5497,12 +5524,15 @@ CONFIG_IR_TTUSBIR=m
CONFIG_RC_LOOPBACK=m
CONFIG_IR_SERIAL=m
CONFIG_IR_SERIAL_TRANSMITTER=y
-CONFIG_IR_SIR=m
CONFIG_RC_XBOX_DVD=m
CONFIG_IR_TOY=m
CONFIG_CEC_CORE=y
CONFIG_CEC_NOTIFIER=y
CONFIG_CEC_PIN=y
+
+#
+# CEC support
+#
CONFIG_MEDIA_CEC_RC=y
# CONFIG_CEC_PIN_ERROR_INJ is not set
CONFIG_MEDIA_CEC_SUPPORT=y
@@ -5513,6 +5543,8 @@ CONFIG_CEC_SECO=m
CONFIG_CEC_SECO_RC=y
CONFIG_USB_PULSE8_CEC=m
CONFIG_USB_RAINSHADOW_CEC=m
+# end of CEC support
+
CONFIG_MEDIA_SUPPORT=m
CONFIG_MEDIA_SUPPORT_FILTER=y
CONFIG_MEDIA_SUBDRV_AUTOSELECT=y
@@ -5556,10 +5588,6 @@ CONFIG_VIDEOBUF_VMALLOC=m
#
CONFIG_MEDIA_CONTROLLER_DVB=y
CONFIG_MEDIA_CONTROLLER_REQUEST_API=y
-
-#
-# Please notice that the enabled Media controller Request API is EXPERIMENTAL
-#
# end of Media controller options
#
@@ -5935,6 +5963,7 @@ CONFIG_VIDEO_M52790=m
CONFIG_VIDEO_APTINA_PLL=m
CONFIG_VIDEO_CCS_PLL=m
CONFIG_VIDEO_HI556=m
+# CONFIG_VIDEO_HI846 is not set
CONFIG_VIDEO_IMX208=m
CONFIG_VIDEO_IMX214=m
CONFIG_VIDEO_IMX219=m
@@ -5966,6 +5995,7 @@ CONFIG_VIDEO_OV9640=m
CONFIG_VIDEO_OV9650=m
CONFIG_VIDEO_OV9734=m
CONFIG_VIDEO_OV13858=m
+# CONFIG_VIDEO_OV13B10 is not set
CONFIG_VIDEO_VS6624=m
CONFIG_VIDEO_MT9M001=m
CONFIG_VIDEO_MT9M032=m
@@ -6730,6 +6760,9 @@ CONFIG_SND_SOC_AMD_RV_RT5682_MACH=m
CONFIG_SND_SOC_AMD_RENOIR=m
CONFIG_SND_SOC_AMD_RENOIR_MACH=m
CONFIG_SND_SOC_AMD_ACP5x=m
+# CONFIG_SND_SOC_AMD_VANGOGH_MACH is not set
+# CONFIG_SND_SOC_AMD_ACP6x is not set
+# CONFIG_SND_SOC_AMD_ACP_COMMON is not set
CONFIG_SND_ATMEL_SOC=m
# CONFIG_SND_BCM63XX_I2S_WHISTLER is not set
CONFIG_SND_DESIGNWARE_I2S=m
@@ -6820,6 +6853,7 @@ CONFIG_SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH=m
CONFIG_SND_SOC_INTEL_SOF_RT5682_MACH=m
CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m
CONFIG_SND_SOC_INTEL_SOF_PCM512x_MACH=m
+# CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH is not set
CONFIG_SND_SOC_INTEL_CML_LP_DA7219_MAX98357A_MACH=m
CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
@@ -6911,6 +6945,8 @@ CONFIG_SND_SOC_CS35L33=m
CONFIG_SND_SOC_CS35L34=m
CONFIG_SND_SOC_CS35L35=m
CONFIG_SND_SOC_CS35L36=m
+# CONFIG_SND_SOC_CS35L41_SPI is not set
+# CONFIG_SND_SOC_CS35L41_I2C is not set
CONFIG_SND_SOC_CS42L42=m
CONFIG_SND_SOC_CS42L51=m
CONFIG_SND_SOC_CS42L51_I2C=m
@@ -6951,6 +6987,7 @@ CONFIG_SND_SOC_MAX98357A=m
CONFIG_SND_SOC_MAX98504=m
CONFIG_SND_SOC_MAX9867=m
CONFIG_SND_SOC_MAX98927=m
+# CONFIG_SND_SOC_MAX98520 is not set
CONFIG_SND_SOC_MAX98373=m
CONFIG_SND_SOC_MAX98373_I2C=m
CONFIG_SND_SOC_MAX98373_SDW=m
@@ -7003,6 +7040,7 @@ CONFIG_SND_SOC_RT5677_SPI=m
CONFIG_SND_SOC_RT5682=m
CONFIG_SND_SOC_RT5682_I2C=m
CONFIG_SND_SOC_RT5682_SDW=m
+CONFIG_SND_SOC_RT5682S=m
CONFIG_SND_SOC_RT700=m
CONFIG_SND_SOC_RT700_SDW=m
CONFIG_SND_SOC_RT711=m
@@ -7011,6 +7049,7 @@ CONFIG_SND_SOC_RT711_SDCA_SDW=m
CONFIG_SND_SOC_RT715=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT715_SDCA_SDW=m
+# CONFIG_SND_SOC_RT9120 is not set
# CONFIG_SND_SOC_SDW_MOCKUP is not set
CONFIG_SND_SOC_SGTL5000=m
CONFIG_SND_SOC_SI476X=m
@@ -7094,6 +7133,7 @@ CONFIG_SND_SOC_MT6660=m
CONFIG_SND_SOC_NAU8315=m
CONFIG_SND_SOC_NAU8540=m
CONFIG_SND_SOC_NAU8810=m
+# CONFIG_SND_SOC_NAU8821 is not set
CONFIG_SND_SOC_NAU8822=m
CONFIG_SND_SOC_NAU8824=m
CONFIG_SND_SOC_NAU8825=m
@@ -7167,6 +7207,7 @@ CONFIG_HID_KYE=m
CONFIG_HID_UCLOGIC=m
CONFIG_HID_WALTOP=m
CONFIG_HID_VIEWSONIC=m
+# CONFIG_HID_XIAOMI is not set
CONFIG_HID_GYRATION=m
CONFIG_HID_ICADE=m
CONFIG_HID_ITE=m
@@ -7190,6 +7231,7 @@ CONFIG_HID_REDRAGON=m
CONFIG_HID_MICROSOFT=m
CONFIG_HID_MONTEREY=m
CONFIG_HID_MULTITOUCH=m
+# CONFIG_HID_NINTENDO is not set
CONFIG_HID_NTI=m
CONFIG_HID_NTRIG=m
CONFIG_HID_ORTEK=m
@@ -7968,7 +8010,6 @@ CONFIG_RTC_DRV_BQ32K=m
CONFIG_RTC_DRV_PALMAS=m
CONFIG_RTC_DRV_TPS6586X=m
CONFIG_RTC_DRV_TPS65910=m
-CONFIG_RTC_DRV_TPS80031=m
CONFIG_RTC_DRV_RC5T583=m
CONFIG_RTC_DRV_S35390A=m
CONFIG_RTC_DRV_FM3130=m
@@ -8141,6 +8182,7 @@ CONFIG_ACRN_HSM=m
CONFIG_VIRTIO=y
CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS=y
CONFIG_VIRTIO_PCI_LIB=m
+CONFIG_VIRTIO_PCI_LIB_LEGACY=m
CONFIG_VIRTIO_MENU=y
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_PCI_LEGACY=y
@@ -8161,6 +8203,7 @@ CONFIG_IFCVF=m
CONFIG_MLX5_VDPA=y
CONFIG_MLX5_VDPA_NET=m
CONFIG_VP_VDPA=m
+# CONFIG_ALIBABA_ENI_VDPA is not set
CONFIG_VHOST_IOTLB=m
CONFIG_VHOST_RING=m
CONFIG_VHOST=m
@@ -8198,6 +8241,7 @@ CONFIG_XEN_GNTDEV_DMABUF=y
CONFIG_XEN_GRANT_DEV_ALLOC=m
CONFIG_XEN_GRANT_DMA_ALLOC=y
CONFIG_SWIOTLB_XEN=y
+CONFIG_XEN_PCI_STUB=y
CONFIG_XEN_PCIDEV_BACKEND=m
CONFIG_XEN_PVCALLS_FRONTEND=m
CONFIG_XEN_PVCALLS_BACKEND=y
@@ -8228,7 +8272,6 @@ CONFIG_RTL8192E=m
CONFIG_RTL8723BS=m
CONFIG_R8712U=m
CONFIG_R8188EU=m
-CONFIG_88EU_AP_MODE=y
CONFIG_RTS5208=m
CONFIG_VT6655=m
CONFIG_VT6656=m
@@ -8345,6 +8388,7 @@ CONFIG_WMI_BMOF=m
CONFIG_HUAWEI_WMI=m
CONFIG_MXM_WMI=m
CONFIG_PEAQ_WMI=m
+CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m
CONFIG_XIAOMI_WMI=m
CONFIG_GIGABYTE_WMI=m
CONFIG_ACERHDF=m
@@ -8394,6 +8438,7 @@ CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y
CONFIG_THINKPAD_ACPI_VIDEO=y
CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
CONFIG_THINKPAD_LMI=m
+CONFIG_X86_PLATFORM_DRIVERS_INTEL=y
CONFIG_INTEL_ATOMISP2_PDX86=y
CONFIG_INTEL_ATOMISP2_LED=m
CONFIG_INTEL_SAR_INT1092=m
@@ -8420,6 +8465,7 @@ CONFIG_INTEL_INT0002_VGPIO=m
CONFIG_INTEL_OAKTRAIL=m
CONFIG_INTEL_BXTWC_PMIC_TMU=m
CONFIG_INTEL_CHTDC_TI_PWRBTN=m
+# CONFIG_INTEL_ISHTP_ECLITE is not set
CONFIG_INTEL_MRFLD_PWRBTN=m
CONFIG_INTEL_PUNIT_IPC=m
CONFIG_INTEL_RST=m
@@ -8429,6 +8475,7 @@ CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
CONFIG_MSI_LAPTOP=m
CONFIG_MSI_WMI=m
CONFIG_PCENGINES_APU2=m
+# CONFIG_BARCO_P50_GPIO is not set
CONFIG_SAMSUNG_LAPTOP=m
CONFIG_SAMSUNG_Q10=m
CONFIG_ACPI_TOSHIBA=m
@@ -8480,6 +8527,7 @@ CONFIG_WILCO_EC_TELEMETRY=m
CONFIG_MELLANOX_PLATFORM=y
CONFIG_MLXREG_HOTPLUG=m
CONFIG_MLXREG_IO=m
+# CONFIG_MLXREG_LC is not set
CONFIG_SURFACE_PLATFORMS=y
CONFIG_SURFACE3_WMI=m
CONFIG_SURFACE_3_BUTTON=m
@@ -8499,14 +8547,6 @@ CONFIG_HAVE_CLK=y
CONFIG_HAVE_CLK_PREPARE=y
CONFIG_COMMON_CLK=y
CONFIG_COMMON_CLK_WM831X=m
-
-#
-# Clock driver for ARM Reference designs
-#
-# CONFIG_ICST is not set
-# CONFIG_CLK_SP810 is not set
-# end of Clock driver for ARM Reference designs
-
CONFIG_LMK04832=m
CONFIG_COMMON_CLK_MAX9485=m
CONFIG_COMMON_CLK_SI5341=m
@@ -8692,6 +8732,10 @@ CONFIG_IIO_TRIGGERED_EVENT=m
#
CONFIG_ADIS16201=m
CONFIG_ADIS16209=m
+# CONFIG_ADXL313_I2C is not set
+# CONFIG_ADXL313_SPI is not set
+# CONFIG_ADXL355_I2C is not set
+# CONFIG_ADXL355_SPI is not set
CONFIG_ADXL372=m
CONFIG_ADXL372_SPI=m
CONFIG_ADXL372_I2C=m
@@ -8840,11 +8884,13 @@ CONFIG_PMS7003=m
CONFIG_SCD30_CORE=m
CONFIG_SCD30_I2C=m
CONFIG_SCD30_SERIAL=m
+# CONFIG_SCD4X is not set
CONFIG_SENSIRION_SGP30=m
CONFIG_SENSIRION_SGP40=m
CONFIG_SPS30=m
CONFIG_SPS30_I2C=m
CONFIG_SPS30_SERIAL=m
+# CONFIG_SENSEAIR_SUNRISE_CO2 is not set
CONFIG_VZ89X=m
# end of Chemical Sensors
@@ -8937,6 +8983,7 @@ CONFIG_AD9523=m
#
CONFIG_ADF4350=m
CONFIG_ADF4371=m
+# CONFIG_ADRF6780 is not set
# end of Phase-Locked Loop (PLL) frequency synthesizers
# end of Frequency Synthesizers DDS/PLL
@@ -9232,6 +9279,7 @@ CONFIG_TMP117=m
CONFIG_TSYS01=m
CONFIG_TSYS02D=m
CONFIG_MAX31856=m
+# CONFIG_MAX31865 is not set
# end of Temperature sensors
CONFIG_NTB=m
@@ -9280,7 +9328,13 @@ CONFIG_RESET_TI_SYSCON=m
CONFIG_GENERIC_PHY=y
CONFIG_USB_LGM_PHY=m
CONFIG_PHY_CAN_TRANSCEIVER=m
+
+#
+# PHY drivers for Broadcom platforms
+#
CONFIG_BCM_KONA_USB2_PHY=m
+# end of PHY drivers for Broadcom platforms
+
CONFIG_PHY_PXA_28NM_HSIC=m
CONFIG_PHY_PXA_28NM_USB2=m
CONFIG_PHY_CPCAP_USB=m
@@ -9680,6 +9734,7 @@ CONFIG_EROFS_FS_XATTR=y
CONFIG_EROFS_FS_POSIX_ACL=y
CONFIG_EROFS_FS_SECURITY=y
CONFIG_EROFS_FS_ZIP=y
+# CONFIG_EROFS_FS_ZIP_LZMA is not set
CONFIG_VBOXSF_FS=m
CONFIG_NETWORK_FILESYSTEMS=y
CONFIG_NFS_FS=m
@@ -9838,7 +9893,6 @@ CONFIG_SECURITY_PATH=y
CONFIG_LSM_MMAP_MIN_ADDR=65536
CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
CONFIG_HARDENED_USERCOPY=y
-CONFIG_HARDENED_USERCOPY_FALLBACK=y
CONFIG_FORTIFY_SOURCE=y
# CONFIG_STATIC_USERMODEHELPER is not set
CONFIG_SECURITY_SELINUX=y
@@ -10228,6 +10282,7 @@ CONFIG_XZ_DEC_IA64=y
CONFIG_XZ_DEC_ARM=y
CONFIG_XZ_DEC_ARMTHUMB=y
CONFIG_XZ_DEC_SPARC=y
+# CONFIG_XZ_DEC_MICROLZMA is not set
CONFIG_XZ_DEC_BCJ=y
# CONFIG_XZ_DEC_TEST is not set
CONFIG_DECOMPRESS_GZIP=y
@@ -10586,6 +10641,8 @@ CONFIG_HIST_TRIGGERS=y
# CONFIG_HIST_TRIGGERS_DEBUG is not set
# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
# CONFIG_SAMPLES is not set
+CONFIG_HAVE_SAMPLE_FTRACE_DIRECT=y
+CONFIG_HAVE_SAMPLE_FTRACE_DIRECT_MULTI=y
CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
CONFIG_STRICT_DEVMEM=y
CONFIG_IO_STRICT_DEVMEM=y
@@ -10631,7 +10688,6 @@ CONFIG_RUNTIME_TESTING_MENU=y
# CONFIG_LKDTM is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_DIV64 is not set
-# CONFIG_KPROBES_SANITY_TEST is not set
# CONFIG_BACKTRACE_SELF_TEST is not set
# CONFIG_RBTREE_TEST is not set
# CONFIG_REED_SOLOMON_TEST is not set