18 files changed, 1199 insertions, 10294 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 7281d81aefb6..18eec9fb8a91 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,5 +1,5 @@
 pkgbase = linux-acs-manjaro
-	pkgver = 5.15.16
+	pkgver = 5.16.2
 	pkgrel = 1
 	url = https://www.kernel.org/
 	arch = x86_64
@@ -17,24 +17,17 @@ pkgbase = linux-acs-manjaro
 	makedepends = tar
 	makedepends = xz
 	options = !strip
-	source = https://www.kernel.org/pub/linux/kernel/v5.x/linux-5.15.tar.xz
-	source = https://www.kernel.org/pub/linux/kernel/v5.x/patch-5.15.16.xz
+	source = https://www.kernel.org/pub/linux/kernel/v5.x/linux-5.16.tar.xz
+	source = https://www.kernel.org/pub/linux/kernel/v5.x/patch-5.16.2.xz
 	source = config
 	source = 0001-ZEN-Add-sysctl-and-CONFIG-to-disallow-unprivileged-CLONE_NEWUSER.patch
-	source = 0002-PCI_Add_more_NVIDIA_controllers_to_the_MSI_masking_quirk.patch
-	source = 0003-iommu_intel_do_deep_dma-unmapping_to_avoid_kernel-flooding.patch
-	source = 0004-cpufreq_intel_pstate_ITMT_support_for_overclocked_system.patch
-	source = 0005-Bluetooth_btintel_Fix_bdaddress_comparison_with_garbage_value.patch
-	source = 0006-lg-laptop_Recognize_more_models.patch
+	source = 0002-Btintel_Fix_bdaddress_comparison_with_garbage_value.patch
 	source = 0101-i2c-nuvoton-nc677x-hwmon-driver.patch
-	source = 0103-futex.patch
-	source = 0104-revert-xhci-Add-support-for-Renesas-controller-with-memory.patch
 	source = 0105-quirk-kernel-org-bug-210681-firmware_rome_error.patch
-	source = 0108-drm_i915_Add_workaround_numbers_to_GEN7_COMMON_SLICE_CHICKEN1_whitelisting.patch::https://patchwork.freedesktop.org/patch/463650/raw/
-	source = 0201-lenovo-wmi2.patch
-	source = 0301-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
-	source = 0302-revert-fbcon-remove-no-op-fbcon_set_origin.patch
-	source = 0303-revert-fbcon-remove-soft-scrollback-code.patch
+	source = 0301-revert-garbage-collect-fbdev-scrolling-acceleration.patch
+	source = 0302-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
+	source = 0303-revert-fbcon-remove-no-op-fbcon_set_origin.patch
+	source = 0304-revert-fbcon-remove-soft-scrollback-code.patch
 	source = 0401-bootsplash.patch
 	source = 0402-bootsplash.patch
 	source = 0403-bootsplash.patch
@@ -49,21 +42,14 @@ pkgbase = linux-acs-manjaro
 	source = 0412-bootsplash.patch
 	source = 0413-bootsplash.gitpatch
 	source = 0999-acs.gitpatch
-	sha256sums = 57b2cf6991910e3b67a1b3490022e8a0674b6965c74c12da1e99d138d1991ee8
-	sha256sums = 0817171996521675b3c1130568503f08d8b1672c955cc842200a21bf5914cd95
-	sha256sums = 93320dbe5928e51fb777a4f13dd9a7364eb150d7983073f7dc159e89a6ffa747
+	sha256sums = 027d7e8988bb69ac12ee92406c3be1fe13f990b1ca2249e226225cd1573308bb
+	sha256sums = 3a09c2f1ad410c09cf03921abeed1a6ca7c38138fb508171ee673d429d179171
+	sha256sums = cb2d729cc20743014d9e3bd08facb9f5bdd19d9fa89014f415c61b4a6eb78e97
 	sha256sums = 986f8d802f37b72a54256f0ab84da83cb229388d58c0b6750f7c770818a18421
-	sha256sums = e2823eff3355b7c88a3fa327ea2f84f23cbd36569e0a5f0f76599023f63a52ca
-	sha256sums = ce53090a4572cd6162d22225113082f7e4df5028a1230529d170460e26dcf849
-	sha256sums = ab0360eac59329eb84f028c2f402ee4a17e4b3dfacb7957355e6178d35af87b9
-	sha256sums = 76701599bbafa49b90ccb073ef29ce2dc3731566e8fa852bd1e9e7796e184754
-	sha256sums = a2a0a0542055a6a921542fbb05cedb6eb6f3d3fb0c038bfb2304bfd3931a0f71
+	sha256sums = b89188b1bc3516d54965dd36def6a2af3d81379e53ff7e527bbd91f77c6f191b
 	sha256sums = 7823d7488f42bc4ed7dfae6d1014dbde679d8b862c9a3697a39ba0dae5918978
-	sha256sums = 844e66a95d7df754c55ac2f1ce7e215b1e56e20ca095462d926a993d557b20e0
-	sha256sums = d9330ea593829a6ef3b824db9570253280cbff7da2b4beb47cbc037824d1a29b
 	sha256sums = 5e804e1f241ce542f3f0e83d274ede6aa4b0539e510fb9376f8106e8732ce69b
-	sha256sums = e8e6120035977903a7117ba215809b9b162b64a789848107513f219180baaada
-	sha256sums = 1d58ef2991c625f6f0eb33b4cb8303932f53f1c4694e42bae24c9cd36d2ad013
+	sha256sums = 365d4225a7db60bd064ebbc34ce0ae582a0c378ad6c4cec7960a5ae4641a6757
 	sha256sums = 2b11905b63b05b25807dd64757c779da74dd4c37e36d3f7a46485b1ee5a9d326
 	sha256sums = 94a8538251ad148f1025cc3de446ce64f73dc32b01815426fb159c722e8fa5bc
 	sha256sums = 1f18c5c10a3c63e41ecd05ad34cd9f6653ba96e9f1049ce2b7bb6da2578ae710
@@ -80,7 +66,7 @@ pkgbase = linux-acs-manjaro
 	sha256sums = 27471eee564ca3149dd271b0817719b5565a9594dc4d884fe3dc51a5f03832bc
 	sha256sums = 60e295601e4fb33d9bf65f198c54c7eb07c0d1e91e2ad1e0dd6cd6e142cb266d
 	sha256sums = 035ea4b2a7621054f4560471f45336b981538a40172d8f17285910d4e0e0b3ef
-	sha256sums = 6d6b327ec7c7798f628f98ab964f4457d3cf043bad2632eb8f27548478a83cc1
+	sha256sums = 2542b5cea79ab5817ce3d30c54acd045966b9c14587bfb0b2f50d473da48a1d5
 
 pkgname = linux-acs-manjaro
 	pkgdesc = The Linux Manjaro standart kernel and modules with ACS patch
diff --git a/0005-Bluetooth_btintel_Fix_bdaddress_comparison_with_garbage_value.patch b/0002-Btintel_Fix_bdaddress_comparison_with_garbage_value.patch
index 38cf2bde55bd..80cd663cd131 100644
--- a/0005-Bluetooth_btintel_Fix_bdaddress_comparison_with_garbage_value.patch
+++ b/0002-Btintel_Fix_bdaddress_comparison_with_garbage_value.patch
@@ -1,4 +1,4 @@
-From ae3386d67597db29ad2ba2685815e224a39897bc Mon Sep 17 00:00:00 2001
+From efbb86e8bf678eb5a376deaa3b693fb7a21b8e41 Mon Sep 17 00:00:00 2001
 From: Kiran K <kiran.k@intel.com>
 Date: Wed, 13 Oct 2021 13:35:11 +0530
 Subject: [PATCH] Bluetooth: btintel: Fix bdaddress comparison with garbage
@@ -16,10 +16,10 @@ Reviewed-by: Tedd Ho-Jeong An <tedd.an@intel.com>
  1 file changed, 14 insertions(+), 8 deletions(-)
 
 diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
-index f1705b46fc8898..b9055a3e61ed76 100644
+index 9359bff4729659..8f9109b40961f4 100644
 --- a/drivers/bluetooth/btintel.c
 +++ b/drivers/bluetooth/btintel.c
-@@ -2006,14 +2006,16 @@ static int btintel_prepare_fw_download_tlv(struct hci_dev *hdev,
+@@ -2081,14 +2081,16 @@ static int btintel_prepare_fw_download_tlv(struct hci_dev *hdev,
  	if (ver->img_type == 0x03) {
  		btintel_clear_flag(hdev, INTEL_BOOTLOADER);
  		btintel_check_bdaddr(hdev);
@@ -44,7 +44,7 @@ index f1705b46fc8898..b9055a3e61ed76 100644
  	}
 
  	btintel_get_fw_name_tlv(ver, fwname, sizeof(fwname), "sfi");
-@@ -2303,6 +2305,10 @@ static int btintel_setup_combined(struct hci_dev *hdev)
+@@ -2466,6 +2468,10 @@ static int btintel_setup_combined(struct hci_dev *hdev)
  		goto exit_error;
  	}
 
@@ -55,3 +55,4 @@ index f1705b46fc8898..b9055a3e61ed76 100644
  	/* For TLV type device, parse the tlv data */
  	err = btintel_parse_version_tlv(hdev, &ver_tlv, skb);
  	if (err) {
+
diff --git a/0002-PCI_Add_more_NVIDIA_controllers_to_the_MSI_masking_quirk.patch b/0002-PCI_Add_more_NVIDIA_controllers_to_the_MSI_masking_quirk.patch
deleted file mode 100644
index 01b324a03a17..000000000000
--- a/0002-PCI_Add_more_NVIDIA_controllers_to_the_MSI_masking_quirk.patch
+++ /dev/null
@@ -1,21 +0,0 @@
-From 1ac8f753e4249e6864c1c42070ba957ceef1f82a Mon Sep 17 00:00:00 2001
-From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
-Date: Thu, 18 Nov 2021 22:53:31 +0100
-Subject: [PATCH] PCI: Add more NVIDIA controllers to the MSI masking quirk
-
-For: https://bugs.archlinux.org/task/72734
-For: https://bugs.archlinux.org/task/72777
----
- drivers/pci/quirks.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index 208fa03acdda00..7fdb7e9c2e12c4 100644
---- a/drivers/pci/quirks.c
-+++ b/drivers/pci/quirks.c
-@@ -5802,3 +5802,5 @@ static void nvidia_ion_ahci_fixup(struct pci_dev *pdev)
- 	pdev->dev_flags |= PCI_DEV_FLAGS_HAS_MSI_MASKING;
- }
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab8, nvidia_ion_ahci_fixup);
-+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab9, nvidia_ion_ahci_fixup);
-+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0d88, nvidia_ion_ahci_fixup);
diff --git a/0003-iommu_intel_do_deep_dma-unmapping_to_avoid_kernel-flooding.patch b/0003-iommu_intel_do_deep_dma-unmapping_to_avoid_kernel-flooding.patch
deleted file mode 100644
index bc9dc1857912..000000000000
--- a/0003-iommu_intel_do_deep_dma-unmapping_to_avoid_kernel-flooding.patch
+++ /dev/null
@@ -1,85 +0,0 @@
-From 74db74ec6ce112c6137d51610429e7ac9ea5b6c1 Mon Sep 17 00:00:00 2001
-From: Ajay Garg <ajaygargnsit@gmail.com>
-Date: Tue, 12 Oct 2021 19:26:53 +0530
-Subject: [PATCH] iommu: intel: do deep dma-unmapping, to avoid
- kernel-flooding.
-
-Origins at :
-https://lists.linuxfoundation.org/pipermail/iommu/2021-October/thread.html
-
-=== Changes from v1 => v2 ===
-
-a)
-Improved patch-description.
-
-b)
-A more root-level fix, as suggested by
-
-	1.
-	Alex Williamson <alex.williamson@redhat.com>
-
-	2.
-	Lu Baolu <baolu.lu@linux.intel.com>
-
-=== Issue ===
-
-Kernel-flooding is seen, when an x86_64 L1 guest (Ubuntu-21) is booted in qemu/kvm
-on a x86_64 host (Ubuntu-21), with a host-pci-device attached.
-
-Following kind of logs, along with the stacktraces, cause the flood :
-
-......
- DMAR: ERROR: DMA PTE for vPFN 0x428ec already set (to 3f6ec003 not 3f6ec003)
- DMAR: ERROR: DMA PTE for vPFN 0x428ed already set (to 3f6ed003 not 3f6ed003)
- DMAR: ERROR: DMA PTE for vPFN 0x428ee already set (to 3f6ee003 not 3f6ee003)
- DMAR: ERROR: DMA PTE for vPFN 0x428ef already set (to 3f6ef003 not 3f6ef003)
- DMAR: ERROR: DMA PTE for vPFN 0x428f0 already set (to 3f6f0003 not 3f6f0003)
-......
-
-=== Current Behaviour, leading to the issue ===
-
-Currently, when we do a dma-unmapping, we unmap/unlink the mappings, but
-the pte-entries are not cleared.
-
-Thus, following sequencing would flood the kernel-logs :
-
-i)
-A dma-unmapping makes the real/leaf-level pte-slot invalid, but the
-pte-content itself is not cleared.
-
-ii)
-Now, during some later dma-mapping procedure, as the pte-slot is about
-to hold a new pte-value, the intel-iommu checks if a prior
-pte-entry exists in the pte-slot. If it exists, it logs a kernel-error,
-along with a corresponding stacktrace.
-
-iii)
-Step ii) runs in abundance, and the kernel-logs run insane.
-
-=== Fix ===
-
-We ensure that as part of a dma-unmapping, each (unmapped) pte-slot
-is also cleared of its value/content (at the leaf-level, where the
-real mapping from a iova => pfn mapping is stored).
-
-This completes a "deep" dma-unmapping.
-
-Signed-off-by: Ajay Garg <ajaygargnsit@gmail.com>
-Link: https://lore.kernel.org/linux-iommu/20211012135653.3852-1-ajaygargnsit@gmail.com/
----
- drivers/iommu/intel/iommu.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
-index 78f8c8e6803e97..d8da48a91ba3b2 100644
---- a/drivers/iommu/intel/iommu.c
-+++ b/drivers/iommu/intel/iommu.c
-@@ -5092,6 +5092,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
- 	gather->freelist = domain_unmap(dmar_domain, start_pfn,
- 					last_pfn, gather->freelist);
- 
-+	dma_pte_clear_range(dmar_domain, start_pfn, last_pfn);
-+
- 	if (dmar_domain->max_addr == iova + size)
- 		dmar_domain->max_addr = iova;
- 
diff --git a/0004-cpufreq_intel_pstate_ITMT_support_for_overclocked_system.patch b/0004-cpufreq_intel_pstate_ITMT_support_for_overclocked_system.patch
deleted file mode 100644
index 1f7922e34722..000000000000
--- a/0004-cpufreq_intel_pstate_ITMT_support_for_overclocked_system.patch
+++ /dev/null
@@ -1,51 +0,0 @@
-From 62f1f7606485d450b23f86bc18dab101e7a2443d Mon Sep 17 00:00:00 2001
-From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
-Date: Thu, 18 Nov 2021 21:18:01 -0800
-Subject: [PATCH] cpufreq: intel_pstate: ITMT support for overclocked system
-
-On systems with overclocking enabled, CPPC Highest Performance can be
-hard coded to 0xff. In this case even if we have cores with different
-highest performance, ITMT can't be enabled as the current implementation
-depends on CPPC Highest Performance.
-
-On such systems we can use MSR_HWP_CAPABILITIES maximum performance field
-when CPPC.Highest Performance is 0xff.
-
-Due to legacy reasons, we can't solely depend on MSR_HWP_CAPABILITIES as
-in some older systems CPPC Highest Performance is the only way to identify
-different performing cores.
-
-Reported-by: Michael Larabel <Michael@MichaelLarabel.com>
-Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
----
- drivers/cpufreq/intel_pstate.c | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
-index e15c3bc17a55ce..8a2c6b58b6524f 100644
---- a/drivers/cpufreq/intel_pstate.c
-+++ b/drivers/cpufreq/intel_pstate.c
-@@ -335,6 +335,8 @@ static void intel_pstste_sched_itmt_work_fn(struct work_struct *work)
- 
- static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
- 
-+#define CPPC_MAX_PERF	U8_MAX
-+
- static void intel_pstate_set_itmt_prio(int cpu)
- {
- 	struct cppc_perf_caps cppc_perf;
-@@ -345,6 +347,14 @@ static void intel_pstate_set_itmt_prio(int cpu)
- 	if (ret)
- 		return;
- 
-+	/*
-+	 * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
-+	 * In this case we can't use CPPC.highest_perf to enable ITMT.
-+	 * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
-+	 */
-+	if (cppc_perf.highest_perf == CPPC_MAX_PERF)
-+		cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
-+
- 	/*
- 	 * The priorities can be set regardless of whether or not
- 	 * sched_set_itmt_support(true) has been called and it is valid to
diff --git a/0006-lg-laptop_Recognize_more_models.patch b/0006-lg-laptop_Recognize_more_models.patch
deleted file mode 100644
index 8fbd217c36a2..000000000000
--- a/0006-lg-laptop_Recognize_more_models.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From 675d4b66de78eec370cf5053eecdf00b26780af3 Mon Sep 17 00:00:00 2001
-From: Matan Ziv-Av <matan@svgalib.org>
-Date: Tue, 23 Nov 2021 22:14:55 +0200
-Subject: [PATCH] lg-laptop: Recognize more models
-
-LG uses 5 instead of 0 in the third digit (second digit after 2019) of the year string to indicate newer models in the same year. Handle this case as well.
-
-Signed-off-by: Matan Ziv-Av <matan@svgalib.org>
-For: https://bugs.archlinux.org/task/71772
----
- drivers/platform/x86/lg-laptop.c | 12 ++++++++++++
- 1 file changed, 12 insertions(+)
-
-diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c
-index 88b551caeaaf41..d6f74d3a7605e2 100644
---- a/drivers/platform/x86/lg-laptop.c
-+++ b/drivers/platform/x86/lg-laptop.c
-@@ -658,6 +658,18 @@ static int acpi_add(struct acpi_device *device)
- 	if (product && strlen(product) > 4)
- 		switch (product[4]) {
- 		case '5':
-+			if (strlen(product) > 5)
-+				switch (product[5]) {
-+				case 'N':
-+					year = 2021;
-+					break;
-+				case '0':
-+					year = 2016;
-+					break;
-+				default:
-+					year = 2022;
-+				}
-+			break;
- 		case '6':
- 			year = 2016;
- 			break;
diff --git a/0102-iomap-iomap_bmap-should-accept-unwritten-maps.patch b/0102-iomap-iomap_bmap-should-accept-unwritten-maps.patch
deleted file mode 100644
index 9ca50277e88c..000000000000
--- a/0102-iomap-iomap_bmap-should-accept-unwritten-maps.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From: Yuxuan Shui <yshuiv7@gmail.com>
-To: viro@zeniv.linux.org.uk
-Cc: linux-fsdevel@vger.kernel.org, Yuxuan Shui <yshuiv7@gmail.com>
-Subject: [PATCH] iomap: iomap_bmap should accept unwritten maps
-Date: Tue,  5 May 2020 19:36:08 +0100
-Message-ID: <20200505183608.10280-1-yshuiv7@gmail.com> (raw)
-
-commit ac58e4fb03f9d111d733a4ad379d06eef3a24705 moved ext4_bmap from
-generic_block_bmap to iomap_bmap, this introduced a regression which
-prevents some user from using previously working swapfiles. The kernel
-will complain about holes while there is none.
-
-What is happening here is that the swapfile has unwritten mappings,
-which is rejected by iomap_bmap, but was accepted by ext4_get_block.
-
-This commit makes sure iomap_bmap would accept unwritten mappings as
-well.
-
-Signed-off-by: Yuxuan Shui <yshuiv7@gmail.com>
----
- fs/iomap/fiemap.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c
-index d55e8f491a5e..fb488dcfa8c7 100644
---- a/fs/iomap/fiemap.c
-+++ b/fs/iomap/fiemap.c
-@@ -115,7 +115,7 @@ iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
- {
- 	sector_t *bno = data, addr;
- 
--	if (iomap->type == IOMAP_MAPPED) {
-+	if (iomap->type == IOMAP_MAPPED || iomap->type == IOMAP_UNWRITTEN) {
- 		addr = (pos - iomap->offset + iomap->addr) >> inode->i_blkbits;
- 		*bno = addr;
- 	}
--- 
-2.26.2
diff --git a/0103-futex.patch b/0103-futex.patch
deleted file mode 100644
index d33f488ae054..000000000000
--- a/0103-futex.patch
+++ /dev/null
@@ -1,9811 +0,0 @@
-From 4dc2913212c08c6970f6e8971fd23b6328982f94 Mon Sep 17 00:00:00 2001
-From: Piotr Gorski <lucjan.lucjanov@gmail.com>
-Date: Mon, 1 Nov 2021 12:11:04 +0100
-Subject: [PATCH] futex: resync from gitlab.collabora.com
-
-Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
----
- Documentation/userspace-api/futex2.rst        |   86 +
- Documentation/userspace-api/index.rst         |    1 +
- MAINTAINERS                                   |    3 +-
- arch/arm/tools/syscall.tbl                    |    1 +
- arch/arm64/include/asm/unistd.h               |    2 +-
- arch/arm64/include/asm/unistd32.h             |    2 +
- arch/x86/entry/syscalls/syscall_32.tbl        |    1 +
- arch/x86/entry/syscalls/syscall_64.tbl        |    1 +
- include/linux/syscalls.h                      |    7 +-
- include/uapi/asm-generic/unistd.h             |    5 +-
- include/uapi/linux/futex.h                    |   25 +
- kernel/Makefile                               |    2 +-
- kernel/futex.c                                | 4272 -----------------
- kernel/futex/Makefile                         |    3 +
- kernel/futex/core.c                           | 1176 +++++
- kernel/futex/futex.h                          |  295 ++
- kernel/futex/pi.c                             | 1233 +++++
- kernel/futex/requeue.c                        |  897 ++++
- kernel/futex/syscalls.c                       |  396 ++
- kernel/futex/waitwake.c                       |  708 +++
- kernel/sys_ni.c                               |    3 +-
- .../selftests/futex/functional/.gitignore     |    1 +
- .../selftests/futex/functional/Makefile       |    3 +-
- .../futex/functional/futex_wait_timeout.c     |   21 +-
- .../futex/functional/futex_wait_wouldblock.c  |   41 +-
- .../selftests/futex/functional/futex_waitv.c  |  237 +
- .../testing/selftests/futex/functional/run.sh |    3 +
- .../selftests/futex/include/futex2test.h      |   22 +
- 28 files changed, 5163 insertions(+), 4284 deletions(-)
- create mode 100644 Documentation/userspace-api/futex2.rst
- delete mode 100644 kernel/futex.c
- create mode 100644 kernel/futex/Makefile
- create mode 100644 kernel/futex/core.c
- create mode 100644 kernel/futex/futex.h
- create mode 100644 kernel/futex/pi.c
- create mode 100644 kernel/futex/requeue.c
- create mode 100644 kernel/futex/syscalls.c
- create mode 100644 kernel/futex/waitwake.c
- create mode 100644 tools/testing/selftests/futex/functional/futex_waitv.c
- create mode 100644 tools/testing/selftests/futex/include/futex2test.h
-
-diff --git a/Documentation/userspace-api/futex2.rst b/Documentation/userspace-api/futex2.rst
-new file mode 100644
-index 000000000..7d37409df
---- /dev/null
-+++ b/Documentation/userspace-api/futex2.rst
-@@ -0,0 +1,86 @@
-+.. SPDX-License-Identifier: GPL-2.0
-+
-+======
-+futex2
-+======
-+
-+:Author: André Almeida <andrealmeid@collabora.com>
-+
-+futex, or fast user mutex, is a set of syscalls to allow userspace to create
-+performant synchronization mechanisms, such as mutexes, semaphores and
-+conditional variables in userspace. C standard libraries, like glibc, uses it
-+as a means to implement more high level interfaces like pthreads.
-+
-+futex2 is a followup version of the initial futex syscall, designed to overcome
-+limitations of the original interface.
-+
-+User API
-+========
-+
-+``futex_waitv()``
-+-----------------
-+
-+Wait on an array of futexes, wake on any::
-+
-+  futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes,
-+              unsigned int flags, struct timespec *timeout, clockid_t clockid)
-+
-+  struct futex_waitv {
-+        __u64 val;
-+        __u64 uaddr;
-+        __u32 flags;
-+        __u32 __reserved;
-+  };
-+
-+Userspace sets an array of struct futex_waitv (up to a max of 128 entries),
-+using ``uaddr`` for the address to wait for, ``val`` for the expected value
-+and ``flags`` to specify the type (e.g. private) and size of futex.
-+``__reserved`` needs to be 0, but it can be used for future extension. The
-+pointer for the first item of the array is passed as ``waiters``. An invalid
-+address for ``waiters`` or for any ``uaddr`` returns ``-EFAULT``.
-+
-+If userspace has 32-bit pointers, it should do a explicit cast to make sure
-+the upper bits are zeroed. ``uintptr_t`` does the tricky and it works for
-+both 32/64-bit pointers.
-+
-+``nr_futexes`` specifies the size of the array. Numbers out of [1, 128]
-+interval will make the syscall return ``-EINVAL``.
-+
-+The ``flags`` argument of the syscall needs to be 0, but it can be used for
-+future extension.
-+
-+For each entry in ``waiters`` array, the current value at ``uaddr`` is compared
-+to ``val``. If it's different, the syscall undo all the work done so far and
-+return ``-EAGAIN``. If all tests and verifications succeeds, syscall waits until
-+one of the following happens:
-+
-+- The timeout expires, returning ``-ETIMEOUT``.
-+- A signal was sent to the sleeping task, returning ``-ERESTARTSYS``.
-+- Some futex at the list was awaken, returning the index of some waked futex.
-+
-+An example of how to use the interface can be found at ``tools/testing/selftests/futex/functional/futex_waitv.c``.
-+
-+Timeout
-+-------
-+
-+``struct timespec *timeout`` argument is an optional argument that points to an
-+absolute timeout. You need to specify the type of clock being used at
-+``clockid`` argument. ``CLOCK_MONOTONIC`` and ``CLOCK_REALTIME`` are supported.
-+This syscall accepts only 64bit timespec structs.
-+
-+Types of futex
-+--------------
-+
-+A futex can be either private or shared. Private is used for processes that
-+shares the same memory space and the virtual address of the futex will be the
-+same for all processes. This allows for optimizations in the kernel. To use
-+private futexes, it's necessary to specify ``FUTEX_PRIVATE_FLAG`` in the futex
-+flag. For processes that doesn't share the same memory space and therefore can
-+have different virtual addresses for the same futex (using, for instance, a
-+file-backed shared memory) requires different internal mechanisms to be get
-+properly enqueued. This is the default behavior, and it works with both private
-+and shared futexes.
-+
-+Futexes can be of different sizes: 8, 16, 32 or 64 bits. Currently, the only
-+supported one is 32 bit sized futex, and it need to be specified using
-+``FUTEX_32`` flag.
-diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
-index c432be070..a61eac0c7 100644
---- a/Documentation/userspace-api/index.rst
-+++ b/Documentation/userspace-api/index.rst
-@@ -28,6 +28,7 @@ place where this information is gathered.
-    media/index
-    sysfs-platform_profile
-    vduse
-+   futex2
- 
- .. only::  subproject and html
- 
-diff --git a/MAINTAINERS b/MAINTAINERS
-index 3b79fd441..dd165835f 100644
---- a/MAINTAINERS
-+++ b/MAINTAINERS
-@@ -7737,6 +7737,7 @@ M:	Ingo Molnar <mingo@redhat.com>
- R:	Peter Zijlstra <peterz@infradead.org>
- R:	Darren Hart <dvhart@infradead.org>
- R:	Davidlohr Bueso <dave@stgolabs.net>
-+R:	André Almeida <andrealmeid@collabora.com>
- L:	linux-kernel@vger.kernel.org
- S:	Maintained
- T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
-@@ -7744,7 +7745,7 @@ F:	Documentation/locking/*futex*
- F:	include/asm-generic/futex.h
- F:	include/linux/futex.h
- F:	include/uapi/linux/futex.h
--F:	kernel/futex.c
-+F:	kernel/futex/*
- F:	tools/perf/bench/futex*
- F:	tools/testing/selftests/futex/
- 
-diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
-index e842209e1..543100151 100644
---- a/arch/arm/tools/syscall.tbl
-+++ b/arch/arm/tools/syscall.tbl
-@@ -462,3 +462,4 @@
- 446	common	landlock_restrict_self		sys_landlock_restrict_self
- # 447 reserved for memfd_secret
- 448	common	process_mrelease		sys_process_mrelease
-+449	common	futex_waitv			sys_futex_waitv
-diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
-index 3cb206aea..6bdb5f5db 100644
---- a/arch/arm64/include/asm/unistd.h
-+++ b/arch/arm64/include/asm/unistd.h
-@@ -38,7 +38,7 @@
- #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
- #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
- 
--#define __NR_compat_syscalls		449
-+#define __NR_compat_syscalls		450
- #endif
- 
- #define __ARCH_WANT_SYS_CLONE
-diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
-index 844f6ae58..41ea1195e 100644
---- a/arch/arm64/include/asm/unistd32.h
-+++ b/arch/arm64/include/asm/unistd32.h
-@@ -903,6 +903,8 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
- __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
- #define __NR_process_mrelease 448
- __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
-+#define __NR_futex_waitv 449
-+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
- 
- /*
-  * Please add new compat syscalls above this comment and update
-diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
-index 960a021d5..7e2554369 100644
---- a/arch/x86/entry/syscalls/syscall_32.tbl
-+++ b/arch/x86/entry/syscalls/syscall_32.tbl
-@@ -453,3 +453,4 @@
- 446	i386	landlock_restrict_self	sys_landlock_restrict_self
- 447	i386	memfd_secret		sys_memfd_secret
- 448	i386	process_mrelease	sys_process_mrelease
-+449	i386	futex_waitv		sys_futex_waitv
-diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
-index 18b5500ea..fe8f8dd15 100644
---- a/arch/x86/entry/syscalls/syscall_64.tbl
-+++ b/arch/x86/entry/syscalls/syscall_64.tbl
-@@ -370,6 +370,7 @@
- 446	common	landlock_restrict_self	sys_landlock_restrict_self
- 447	common	memfd_secret		sys_memfd_secret
- 448	common	process_mrelease	sys_process_mrelease
-+449	common	futex_waitv		sys_futex_waitv
- 
- #
- # Due to a historical design error, certain syscalls are numbered differently
-diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
-index 252243c77..528a478db 100644
---- a/include/linux/syscalls.h
-+++ b/include/linux/syscalls.h
-@@ -58,6 +58,7 @@ struct mq_attr;
- struct compat_stat;
- struct old_timeval32;
- struct robust_list_head;
-+struct futex_waitv;
- struct getcpu_cache;
- struct old_linux_dirent;
- struct perf_event_attr;
-@@ -610,7 +611,7 @@ asmlinkage long sys_waitid(int which, pid_t pid,
- asmlinkage long sys_set_tid_address(int __user *tidptr);
- asmlinkage long sys_unshare(unsigned long unshare_flags);
- 
--/* kernel/futex.c */
-+/* kernel/futex/syscalls.c */
- asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
- 			  const struct __kernel_timespec __user *utime,
- 			  u32 __user *uaddr2, u32 val3);
-@@ -623,6 +624,10 @@ asmlinkage long sys_get_robust_list(int pid,
- asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
- 				    size_t len);
- 
-+asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
-+				unsigned int nr_futexes, unsigned int flags,
-+				struct __kernel_timespec __user *timeout, clockid_t clockid);
-+
- /* kernel/hrtimer.c */
- asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
- 			      struct __kernel_timespec __user *rmtp);
-diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
-index 1c5fb86d4..4557a8b60 100644
---- a/include/uapi/asm-generic/unistd.h
-+++ b/include/uapi/asm-generic/unistd.h
-@@ -880,8 +880,11 @@ __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
- #define __NR_process_mrelease 448
- __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
- 
-+#define __NR_futex_waitv 449
-+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
-+
- #undef __NR_syscalls
--#define __NR_syscalls 449
-+#define __NR_syscalls 450
- 
- /*
-  * 32 bit systems traditionally used different
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index 235e5b2fa..71a5df8d2 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -43,6 +43,31 @@
- #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
- 					 FUTEX_PRIVATE_FLAG)
- 
-+/*
-+ * Flags to specify the bit length of the futex word for futex2 syscalls.
-+ * Currently, only 32 is supported.
-+ */
-+#define FUTEX_32		2
-+
-+/*
-+ * Max numbers of elements in a futex_waitv array
-+ */
-+#define FUTEX_WAITV_MAX		128
-+
-+/**
-+ * struct futex_waitv - A waiter for vectorized wait
-+ * @val:	Expected value at uaddr
-+ * @uaddr:	User address to wait on
-+ * @flags:	Flags for this waiter
-+ * @__reserved:	Reserved member to preserve data alignment. Should be 0.
-+ */
-+struct futex_waitv {
-+	__u64 val;
-+	__u64 uaddr;
-+	__u32 flags;
-+	__u32 __reserved;
-+};
-+
- /*
-  * Support for robust futexes: the kernel cleans up held futexes at
-  * thread exit time.
-diff --git a/kernel/Makefile b/kernel/Makefile
-index 4df609be4..3f6ab5d50 100644
---- a/kernel/Makefile
-+++ b/kernel/Makefile
-@@ -59,7 +59,7 @@ obj-$(CONFIG_FREEZER) += freezer.o
- obj-$(CONFIG_PROFILING) += profile.o
- obj-$(CONFIG_STACKTRACE) += stacktrace.o
- obj-y += time/
--obj-$(CONFIG_FUTEX) += futex.o
-+obj-$(CONFIG_FUTEX) += futex/
- obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
- obj-$(CONFIG_SMP) += smp.o
- ifneq ($(CONFIG_SMP),y)
-diff --git a/kernel/futex.c b/kernel/futex.c
-deleted file mode 100644
-index c15ad276f..000000000
---- a/kernel/futex.c
-+++ /dev/null
-@@ -1,4272 +0,0 @@
--// SPDX-License-Identifier: GPL-2.0-or-later
--/*
-- *  Fast Userspace Mutexes (which I call "Futexes!").
-- *  (C) Rusty Russell, IBM 2002
-- *
-- *  Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
-- *  (C) Copyright 2003 Red Hat Inc, All Rights Reserved
-- *
-- *  Removed page pinning, fix privately mapped COW pages and other cleanups
-- *  (C) Copyright 2003, 2004 Jamie Lokier
-- *
-- *  Robust futex support started by Ingo Molnar
-- *  (C) Copyright 2006 Red Hat Inc, All Rights Reserved
-- *  Thanks to Thomas Gleixner for suggestions, analysis and fixes.
-- *
-- *  PI-futex support started by Ingo Molnar and Thomas Gleixner
-- *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
-- *  Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
-- *
-- *  PRIVATE futexes by Eric Dumazet
-- *  Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
-- *
-- *  Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
-- *  Copyright (C) IBM Corporation, 2009
-- *  Thanks to Thomas Gleixner for conceptual design and careful reviews.
-- *
-- *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
-- *  enough at me, Linus for the original (flawed) idea, Matthew
-- *  Kirkwood for proof-of-concept implementation.
-- *
-- *  "The futexes are also cursed."
-- *  "But they come in a choice of three flavours!"
-- */
--#include <linux/compat.h>
--#include <linux/jhash.h>
--#include <linux/pagemap.h>
--#include <linux/syscalls.h>
--#include <linux/freezer.h>
--#include <linux/memblock.h>
--#include <linux/fault-inject.h>
--#include <linux/time_namespace.h>
--
--#include <asm/futex.h>
--
--#include "locking/rtmutex_common.h"
--
--/*
-- * READ this before attempting to hack on futexes!
-- *
-- * Basic futex operation and ordering guarantees
-- * =============================================
-- *
-- * The waiter reads the futex value in user space and calls
-- * futex_wait(). This function computes the hash bucket and acquires
-- * the hash bucket lock. After that it reads the futex user space value
-- * again and verifies that the data has not changed. If it has not changed
-- * it enqueues itself into the hash bucket, releases the hash bucket lock
-- * and schedules.
-- *
-- * The waker side modifies the user space value of the futex and calls
-- * futex_wake(). This function computes the hash bucket and acquires the
-- * hash bucket lock. Then it looks for waiters on that futex in the hash
-- * bucket and wakes them.
-- *
-- * In futex wake up scenarios where no tasks are blocked on a futex, taking
-- * the hb spinlock can be avoided and simply return. In order for this
-- * optimization to work, ordering guarantees must exist so that the waiter
-- * being added to the list is acknowledged when the list is concurrently being
-- * checked by the waker, avoiding scenarios like the following:
-- *
-- * CPU 0                               CPU 1
-- * val = *futex;
-- * sys_futex(WAIT, futex, val);
-- *   futex_wait(futex, val);
-- *   uval = *futex;
-- *                                     *futex = newval;
-- *                                     sys_futex(WAKE, futex);
-- *                                       futex_wake(futex);
-- *                                       if (queue_empty())
-- *                                         return;
-- *   if (uval == val)
-- *      lock(hash_bucket(futex));
-- *      queue();
-- *     unlock(hash_bucket(futex));
-- *     schedule();
-- *
-- * This would cause the waiter on CPU 0 to wait forever because it
-- * missed the transition of the user space value from val to newval
-- * and the waker did not find the waiter in the hash bucket queue.
-- *
-- * The correct serialization ensures that a waiter either observes
-- * the changed user space value before blocking or is woken by a
-- * concurrent waker:
-- *
-- * CPU 0                                 CPU 1
-- * val = *futex;
-- * sys_futex(WAIT, futex, val);
-- *   futex_wait(futex, val);
-- *
-- *   waiters++; (a)
-- *   smp_mb(); (A) <-- paired with -.
-- *                                  |
-- *   lock(hash_bucket(futex));      |
-- *                                  |
-- *   uval = *futex;                 |
-- *                                  |        *futex = newval;
-- *                                  |        sys_futex(WAKE, futex);
-- *                                  |          futex_wake(futex);
-- *                                  |
-- *                                  `--------> smp_mb(); (B)
-- *   if (uval == val)
-- *     queue();
-- *     unlock(hash_bucket(futex));
-- *     schedule();                         if (waiters)
-- *                                           lock(hash_bucket(futex));
-- *   else                                    wake_waiters(futex);
-- *     waiters--; (b)                        unlock(hash_bucket(futex));
-- *
-- * Where (A) orders the waiters increment and the futex value read through
-- * atomic operations (see hb_waiters_inc) and where (B) orders the write
-- * to futex and the waiters read (see hb_waiters_pending()).
-- *
-- * This yields the following case (where X:=waiters, Y:=futex):
-- *
-- *	X = Y = 0
-- *
-- *	w[X]=1		w[Y]=1
-- *	MB		MB
-- *	r[Y]=y		r[X]=x
-- *
-- * Which guarantees that x==0 && y==0 is impossible; which translates back into
-- * the guarantee that we cannot both miss the futex variable change and the
-- * enqueue.
-- *
-- * Note that a new waiter is accounted for in (a) even when it is possible that
-- * the wait call can return error, in which case we backtrack from it in (b).
-- * Refer to the comment in queue_lock().
-- *
-- * Similarly, in order to account for waiters being requeued on another
-- * address we always increment the waiters for the destination bucket before
-- * acquiring the lock. It then decrements them again  after releasing it -
-- * the code that actually moves the futex(es) between hash buckets (requeue_futex)
-- * will do the additional required waiter count housekeeping. This is done for
-- * double_lock_hb() and double_unlock_hb(), respectively.
-- */
--
--#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
--#define futex_cmpxchg_enabled 1
--#else
--static int  __read_mostly futex_cmpxchg_enabled;
--#endif
--
--/*
-- * Futex flags used to encode options to functions and preserve them across
-- * restarts.
-- */
--#ifdef CONFIG_MMU
--# define FLAGS_SHARED		0x01
--#else
--/*
-- * NOMMU does not have per process address space. Let the compiler optimize
-- * code away.
-- */
--# define FLAGS_SHARED		0x00
--#endif
--#define FLAGS_CLOCKRT		0x02
--#define FLAGS_HAS_TIMEOUT	0x04
--
--/*
-- * Priority Inheritance state:
-- */
--struct futex_pi_state {
--	/*
--	 * list of 'owned' pi_state instances - these have to be
--	 * cleaned up in do_exit() if the task exits prematurely:
--	 */
--	struct list_head list;
--
--	/*
--	 * The PI object:
--	 */
--	struct rt_mutex_base pi_mutex;
--
--	struct task_struct *owner;
--	refcount_t refcount;
--
--	union futex_key key;
--} __randomize_layout;
--
--/**
-- * struct futex_q - The hashed futex queue entry, one per waiting task
-- * @list:		priority-sorted list of tasks waiting on this futex
-- * @task:		the task waiting on the futex
-- * @lock_ptr:		the hash bucket lock
-- * @key:		the key the futex is hashed on
-- * @pi_state:		optional priority inheritance state
-- * @rt_waiter:		rt_waiter storage for use with requeue_pi
-- * @requeue_pi_key:	the requeue_pi target futex key
-- * @bitset:		bitset for the optional bitmasked wakeup
-- * @requeue_state:	State field for futex_requeue_pi()
-- * @requeue_wait:	RCU wait for futex_requeue_pi() (RT only)
-- *
-- * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
-- * we can wake only the relevant ones (hashed queues may be shared).
-- *
-- * A futex_q has a woken state, just like tasks have TASK_RUNNING.
-- * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
-- * The order of wakeup is always to make the first condition true, then
-- * the second.
-- *
-- * PI futexes are typically woken before they are removed from the hash list via
-- * the rt_mutex code. See unqueue_me_pi().
-- */
--struct futex_q {
--	struct plist_node list;
--
--	struct task_struct *task;
--	spinlock_t *lock_ptr;
--	union futex_key key;
--	struct futex_pi_state *pi_state;
--	struct rt_mutex_waiter *rt_waiter;
--	union futex_key *requeue_pi_key;
--	u32 bitset;
--	atomic_t requeue_state;
--#ifdef CONFIG_PREEMPT_RT
--	struct rcuwait requeue_wait;
--#endif
--} __randomize_layout;
--
--/*
-- * On PREEMPT_RT, the hash bucket lock is a 'sleeping' spinlock with an
-- * underlying rtmutex. The task which is about to be requeued could have
-- * just woken up (timeout, signal). After the wake up the task has to
-- * acquire hash bucket lock, which is held by the requeue code.  As a task
-- * can only be blocked on _ONE_ rtmutex at a time, the proxy lock blocking
-- * and the hash bucket lock blocking would collide and corrupt state.
-- *
-- * On !PREEMPT_RT this is not a problem and everything could be serialized
-- * on hash bucket lock, but aside of having the benefit of common code,
-- * this allows to avoid doing the requeue when the task is already on the
-- * way out and taking the hash bucket lock of the original uaddr1 when the
-- * requeue has been completed.
-- *
-- * The following state transitions are valid:
-- *
-- * On the waiter side:
-- *   Q_REQUEUE_PI_NONE		-> Q_REQUEUE_PI_IGNORE
-- *   Q_REQUEUE_PI_IN_PROGRESS	-> Q_REQUEUE_PI_WAIT
-- *
-- * On the requeue side:
-- *   Q_REQUEUE_PI_NONE		-> Q_REQUEUE_PI_INPROGRESS
-- *   Q_REQUEUE_PI_IN_PROGRESS	-> Q_REQUEUE_PI_DONE/LOCKED
-- *   Q_REQUEUE_PI_IN_PROGRESS	-> Q_REQUEUE_PI_NONE (requeue failed)
-- *   Q_REQUEUE_PI_WAIT		-> Q_REQUEUE_PI_DONE/LOCKED
-- *   Q_REQUEUE_PI_WAIT		-> Q_REQUEUE_PI_IGNORE (requeue failed)
-- *
-- * The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this
-- * signals that the waiter is already on the way out. It also means that
-- * the waiter is still on the 'wait' futex, i.e. uaddr1.
-- *
-- * The waiter side signals early wakeup to the requeue side either through
-- * setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending
-- * on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately
-- * proceed to take the hash bucket lock of uaddr1. If it set state to WAIT,
-- * which means the wakeup is interleaving with a requeue in progress it has
-- * to wait for the requeue side to change the state. Either to DONE/LOCKED
-- * or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex
-- * and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by
-- * the requeue side when the requeue attempt failed via deadlock detection
-- * and therefore the waiter q is still on the uaddr1 futex.
-- */
--enum {
--	Q_REQUEUE_PI_NONE		=  0,
--	Q_REQUEUE_PI_IGNORE,
--	Q_REQUEUE_PI_IN_PROGRESS,
--	Q_REQUEUE_PI_WAIT,
--	Q_REQUEUE_PI_DONE,
--	Q_REQUEUE_PI_LOCKED,
--};
--
--static const struct futex_q futex_q_init = {
--	/* list gets initialized in queue_me()*/
--	.key		= FUTEX_KEY_INIT,
--	.bitset		= FUTEX_BITSET_MATCH_ANY,
--	.requeue_state	= ATOMIC_INIT(Q_REQUEUE_PI_NONE),
--};
--
--/*
-- * Hash buckets are shared by all the futex_keys that hash to the same
-- * location.  Each key may have multiple futex_q structures, one for each task
-- * waiting on a futex.
-- */
--struct futex_hash_bucket {
--	atomic_t waiters;
--	spinlock_t lock;
--	struct plist_head chain;
--} ____cacheline_aligned_in_smp;
--
--/*
-- * The base of the bucket array and its size are always used together
-- * (after initialization only in hash_futex()), so ensure that they
-- * reside in the same cacheline.
-- */
--static struct {
--	struct futex_hash_bucket *queues;
--	unsigned long            hashsize;
--} __futex_data __read_mostly __aligned(2*sizeof(long));
--#define futex_queues   (__futex_data.queues)
--#define futex_hashsize (__futex_data.hashsize)
--
--
--/*
-- * Fault injections for futexes.
-- */
--#ifdef CONFIG_FAIL_FUTEX
--
--static struct {
--	struct fault_attr attr;
--
--	bool ignore_private;
--} fail_futex = {
--	.attr = FAULT_ATTR_INITIALIZER,
--	.ignore_private = false,
--};
--
--static int __init setup_fail_futex(char *str)
--{
--	return setup_fault_attr(&fail_futex.attr, str);
--}
--__setup("fail_futex=", setup_fail_futex);
--
--static bool should_fail_futex(bool fshared)
--{
--	if (fail_futex.ignore_private && !fshared)
--		return false;
--
--	return should_fail(&fail_futex.attr, 1);
--}
--
--#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
--
--static int __init fail_futex_debugfs(void)
--{
--	umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
--	struct dentry *dir;
--
--	dir = fault_create_debugfs_attr("fail_futex", NULL,
--					&fail_futex.attr);
--	if (IS_ERR(dir))
--		return PTR_ERR(dir);
--
--	debugfs_create_bool("ignore-private", mode, dir,
--			    &fail_futex.ignore_private);
--	return 0;
--}
--
--late_initcall(fail_futex_debugfs);
--
--#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
--
--#else
--static inline bool should_fail_futex(bool fshared)
--{
--	return false;
--}
--#endif /* CONFIG_FAIL_FUTEX */
--
--#ifdef CONFIG_COMPAT
--static void compat_exit_robust_list(struct task_struct *curr);
--#endif
--
--/*
-- * Reflects a new waiter being added to the waitqueue.
-- */
--static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
--{
--#ifdef CONFIG_SMP
--	atomic_inc(&hb->waiters);
--	/*
--	 * Full barrier (A), see the ordering comment above.
--	 */
--	smp_mb__after_atomic();
--#endif
--}
--
--/*
-- * Reflects a waiter being removed from the waitqueue by wakeup
-- * paths.
-- */
--static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
--{
--#ifdef CONFIG_SMP
--	atomic_dec(&hb->waiters);
--#endif
--}
--
--static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
--{
--#ifdef CONFIG_SMP
--	/*
--	 * Full barrier (B), see the ordering comment above.
--	 */
--	smp_mb();
--	return atomic_read(&hb->waiters);
--#else
--	return 1;
--#endif
--}
--
--/**
-- * hash_futex - Return the hash bucket in the global hash
-- * @key:	Pointer to the futex key for which the hash is calculated
-- *
-- * We hash on the keys returned from get_futex_key (see below) and return the
-- * corresponding hash bucket in the global hash.
-- */
--static struct futex_hash_bucket *hash_futex(union futex_key *key)
--{
--	u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
--			  key->both.offset);
--
--	return &futex_queues[hash & (futex_hashsize - 1)];
--}
--
--
--/**
-- * match_futex - Check whether two futex keys are equal
-- * @key1:	Pointer to key1
-- * @key2:	Pointer to key2
-- *
-- * Return 1 if two futex_keys are equal, 0 otherwise.
-- */
--static inline int match_futex(union futex_key *key1, union futex_key *key2)
--{
--	return (key1 && key2
--		&& key1->both.word == key2->both.word
--		&& key1->both.ptr == key2->both.ptr
--		&& key1->both.offset == key2->both.offset);
--}
--
--enum futex_access {
--	FUTEX_READ,
--	FUTEX_WRITE
--};
--
--/**
-- * futex_setup_timer - set up the sleeping hrtimer.
-- * @time:	ptr to the given timeout value
-- * @timeout:	the hrtimer_sleeper structure to be set up
-- * @flags:	futex flags
-- * @range_ns:	optional range in ns
-- *
-- * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
-- *	   value given
-- */
--static inline struct hrtimer_sleeper *
--futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
--		  int flags, u64 range_ns)
--{
--	if (!time)
--		return NULL;
--
--	hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
--				      CLOCK_REALTIME : CLOCK_MONOTONIC,
--				      HRTIMER_MODE_ABS);
--	/*
--	 * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
--	 * effectively the same as calling hrtimer_set_expires().
--	 */
--	hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
--
--	return timeout;
--}
--
--/*
-- * Generate a machine wide unique identifier for this inode.
-- *
-- * This relies on u64 not wrapping in the life-time of the machine; which with
-- * 1ns resolution means almost 585 years.
-- *
-- * This further relies on the fact that a well formed program will not unmap
-- * the file while it has a (shared) futex waiting on it. This mapping will have
-- * a file reference which pins the mount and inode.
-- *
-- * If for some reason an inode gets evicted and read back in again, it will get
-- * a new sequence number and will _NOT_ match, even though it is the exact same
-- * file.
-- *
-- * It is important that match_futex() will never have a false-positive, esp.
-- * for PI futexes that can mess up the state. The above argues that false-negatives
-- * are only possible for malformed programs.
-- */
--static u64 get_inode_sequence_number(struct inode *inode)
--{
--	static atomic64_t i_seq;
--	u64 old;
--
--	/* Does the inode already have a sequence number? */
--	old = atomic64_read(&inode->i_sequence);
--	if (likely(old))
--		return old;
--
--	for (;;) {
--		u64 new = atomic64_add_return(1, &i_seq);
--		if (WARN_ON_ONCE(!new))
--			continue;
--
--		old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
--		if (old)
--			return old;
--		return new;
--	}
--}
--
--/**
-- * get_futex_key() - Get parameters which are the keys for a futex
-- * @uaddr:	virtual address of the futex
-- * @fshared:	false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
-- * @key:	address where result is stored.
-- * @rw:		mapping needs to be read/write (values: FUTEX_READ,
-- *              FUTEX_WRITE)
-- *
-- * Return: a negative error code or 0
-- *
-- * The key words are stored in @key on success.
-- *
-- * For shared mappings (when @fshared), the key is:
-- *
-- *   ( inode->i_sequence, page->index, offset_within_page )
-- *
-- * [ also see get_inode_sequence_number() ]
-- *
-- * For private mappings (or when !@fshared), the key is:
-- *
-- *   ( current->mm, address, 0 )
-- *
-- * This allows (cross process, where applicable) identification of the futex
-- * without keeping the page pinned for the duration of the FUTEX_WAIT.
-- *
-- * lock_page() might sleep, the caller should not hold a spinlock.
-- */
--static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
--			 enum futex_access rw)
--{
--	unsigned long address = (unsigned long)uaddr;
--	struct mm_struct *mm = current->mm;
--	struct page *page, *tail;
--	struct address_space *mapping;
--	int err, ro = 0;
--
--	/*
--	 * The futex address must be "naturally" aligned.
--	 */
--	key->both.offset = address % PAGE_SIZE;
--	if (unlikely((address % sizeof(u32)) != 0))
--		return -EINVAL;
--	address -= key->both.offset;
--
--	if (unlikely(!access_ok(uaddr, sizeof(u32))))
--		return -EFAULT;
--
--	if (unlikely(should_fail_futex(fshared)))
--		return -EFAULT;
--
--	/*
--	 * PROCESS_PRIVATE futexes are fast.
--	 * As the mm cannot disappear under us and the 'key' only needs
--	 * virtual address, we dont even have to find the underlying vma.
--	 * Note : We do have to check 'uaddr' is a valid user address,
--	 *        but access_ok() should be faster than find_vma()
--	 */
--	if (!fshared) {
--		key->private.mm = mm;
--		key->private.address = address;
--		return 0;
--	}
--
--again:
--	/* Ignore any VERIFY_READ mapping (futex common case) */
--	if (unlikely(should_fail_futex(true)))
--		return -EFAULT;
--
--	err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
--	/*
--	 * If write access is not required (eg. FUTEX_WAIT), try
--	 * and get read-only access.
--	 */
--	if (err == -EFAULT && rw == FUTEX_READ) {
--		err = get_user_pages_fast(address, 1, 0, &page);
--		ro = 1;
--	}
--	if (err < 0)
--		return err;
--	else
--		err = 0;
--
--	/*
--	 * The treatment of mapping from this point on is critical. The page
--	 * lock protects many things but in this context the page lock
--	 * stabilizes mapping, prevents inode freeing in the shared
--	 * file-backed region case and guards against movement to swap cache.
--	 *
--	 * Strictly speaking the page lock is not needed in all cases being
--	 * considered here and page lock forces unnecessarily serialization
--	 * From this point on, mapping will be re-verified if necessary and
--	 * page lock will be acquired only if it is unavoidable
--	 *
--	 * Mapping checks require the head page for any compound page so the
--	 * head page and mapping is looked up now. For anonymous pages, it
--	 * does not matter if the page splits in the future as the key is
--	 * based on the address. For filesystem-backed pages, the tail is
--	 * required as the index of the page determines the key. For
--	 * base pages, there is no tail page and tail == page.
--	 */
--	tail = page;
--	page = compound_head(page);
--	mapping = READ_ONCE(page->mapping);
--
--	/*
--	 * If page->mapping is NULL, then it cannot be a PageAnon
--	 * page; but it might be the ZERO_PAGE or in the gate area or
--	 * in a special mapping (all cases which we are happy to fail);
--	 * or it may have been a good file page when get_user_pages_fast
--	 * found it, but truncated or holepunched or subjected to
--	 * invalidate_complete_page2 before we got the page lock (also
--	 * cases which we are happy to fail).  And we hold a reference,
--	 * so refcount care in invalidate_complete_page's remove_mapping
--	 * prevents drop_caches from setting mapping to NULL beneath us.
--	 *
--	 * The case we do have to guard against is when memory pressure made
--	 * shmem_writepage move it from filecache to swapcache beneath us:
--	 * an unlikely race, but we do need to retry for page->mapping.
--	 */
--	if (unlikely(!mapping)) {
--		int shmem_swizzled;
--
--		/*
--		 * Page lock is required to identify which special case above
--		 * applies. If this is really a shmem page then the page lock
--		 * will prevent unexpected transitions.
--		 */
--		lock_page(page);
--		shmem_swizzled = PageSwapCache(page) || page->mapping;
--		unlock_page(page);
--		put_page(page);
--
--		if (shmem_swizzled)
--			goto again;
--
--		return -EFAULT;
--	}
--
--	/*
--	 * Private mappings are handled in a simple way.
--	 *
--	 * If the futex key is stored on an anonymous page, then the associated
--	 * object is the mm which is implicitly pinned by the calling process.
--	 *
--	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
--	 * it's a read-only handle, it's expected that futexes attach to
--	 * the object not the particular process.
--	 */
--	if (PageAnon(page)) {
--		/*
--		 * A RO anonymous page will never change and thus doesn't make
--		 * sense for futex operations.
--		 */
--		if (unlikely(should_fail_futex(true)) || ro) {
--			err = -EFAULT;
--			goto out;
--		}
--
--		key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
--		key->private.mm = mm;
--		key->private.address = address;
--
--	} else {
--		struct inode *inode;
--
--		/*
--		 * The associated futex object in this case is the inode and
--		 * the page->mapping must be traversed. Ordinarily this should
--		 * be stabilised under page lock but it's not strictly
--		 * necessary in this case as we just want to pin the inode, not
--		 * update the radix tree or anything like that.
--		 *
--		 * The RCU read lock is taken as the inode is finally freed
--		 * under RCU. If the mapping still matches expectations then the
--		 * mapping->host can be safely accessed as being a valid inode.
--		 */
--		rcu_read_lock();
--
--		if (READ_ONCE(page->mapping) != mapping) {
--			rcu_read_unlock();
--			put_page(page);
--
--			goto again;
--		}
--
--		inode = READ_ONCE(mapping->host);
--		if (!inode) {
--			rcu_read_unlock();
--			put_page(page);
--
--			goto again;
--		}
--
--		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
--		key->shared.i_seq = get_inode_sequence_number(inode);
--		key->shared.pgoff = page_to_pgoff(tail);
--		rcu_read_unlock();
--	}
--
--out:
--	put_page(page);
--	return err;
--}
--
--/**
-- * fault_in_user_writeable() - Fault in user address and verify RW access
-- * @uaddr:	pointer to faulting user space address
-- *
-- * Slow path to fixup the fault we just took in the atomic write
-- * access to @uaddr.
-- *
-- * We have no generic implementation of a non-destructive write to the
-- * user address. We know that we faulted in the atomic pagefault
-- * disabled section so we can as well avoid the #PF overhead by
-- * calling get_user_pages() right away.
-- */
--static int fault_in_user_writeable(u32 __user *uaddr)
--{
--	struct mm_struct *mm = current->mm;
--	int ret;
--
--	mmap_read_lock(mm);
--	ret = fixup_user_fault(mm, (unsigned long)uaddr,
--			       FAULT_FLAG_WRITE, NULL);
--	mmap_read_unlock(mm);
--
--	return ret < 0 ? ret : 0;
--}
--
--/**
-- * futex_top_waiter() - Return the highest priority waiter on a futex
-- * @hb:		the hash bucket the futex_q's reside in
-- * @key:	the futex key (to distinguish it from other futex futex_q's)
-- *
-- * Must be called with the hb lock held.
-- */
--static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
--					union futex_key *key)
--{
--	struct futex_q *this;
--
--	plist_for_each_entry(this, &hb->chain, list) {
--		if (match_futex(&this->key, key))
--			return this;
--	}
--	return NULL;
--}
--
--static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
--				      u32 uval, u32 newval)
--{
--	int ret;
--
--	pagefault_disable();
--	ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
--	pagefault_enable();
--
--	return ret;
--}
--
--static int get_futex_value_locked(u32 *dest, u32 __user *from)
--{
--	int ret;
--
--	pagefault_disable();
--	ret = __get_user(*dest, from);
--	pagefault_enable();
--
--	return ret ? -EFAULT : 0;
--}
--
--
--/*
-- * PI code:
-- */
--static int refill_pi_state_cache(void)
--{
--	struct futex_pi_state *pi_state;
--
--	if (likely(current->pi_state_cache))
--		return 0;
--
--	pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
--
--	if (!pi_state)
--		return -ENOMEM;
--
--	INIT_LIST_HEAD(&pi_state->list);
--	/* pi_mutex gets initialized later */
--	pi_state->owner = NULL;
--	refcount_set(&pi_state->refcount, 1);
--	pi_state->key = FUTEX_KEY_INIT;
--
--	current->pi_state_cache = pi_state;
--
--	return 0;
--}
--
--static struct futex_pi_state *alloc_pi_state(void)
--{
--	struct futex_pi_state *pi_state = current->pi_state_cache;
--
--	WARN_ON(!pi_state);
--	current->pi_state_cache = NULL;
--
--	return pi_state;
--}
--
--static void pi_state_update_owner(struct futex_pi_state *pi_state,
--				  struct task_struct *new_owner)
--{
--	struct task_struct *old_owner = pi_state->owner;
--
--	lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
--
--	if (old_owner) {
--		raw_spin_lock(&old_owner->pi_lock);
--		WARN_ON(list_empty(&pi_state->list));
--		list_del_init(&pi_state->list);
--		raw_spin_unlock(&old_owner->pi_lock);
--	}
--
--	if (new_owner) {
--		raw_spin_lock(&new_owner->pi_lock);
--		WARN_ON(!list_empty(&pi_state->list));
--		list_add(&pi_state->list, &new_owner->pi_state_list);
--		pi_state->owner = new_owner;
--		raw_spin_unlock(&new_owner->pi_lock);
--	}
--}
--
--static void get_pi_state(struct futex_pi_state *pi_state)
--{
--	WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
--}
--
--/*
-- * Drops a reference to the pi_state object and frees or caches it
-- * when the last reference is gone.
-- */
--static void put_pi_state(struct futex_pi_state *pi_state)
--{
--	if (!pi_state)
--		return;
--
--	if (!refcount_dec_and_test(&pi_state->refcount))
--		return;
--
--	/*
--	 * If pi_state->owner is NULL, the owner is most probably dying
--	 * and has cleaned up the pi_state already
--	 */
--	if (pi_state->owner) {
--		unsigned long flags;
--
--		raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
--		pi_state_update_owner(pi_state, NULL);
--		rt_mutex_proxy_unlock(&pi_state->pi_mutex);
--		raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
--	}
--
--	if (current->pi_state_cache) {
--		kfree(pi_state);
--	} else {
--		/*
--		 * pi_state->list is already empty.
--		 * clear pi_state->owner.
--		 * refcount is at 0 - put it back to 1.
--		 */
--		pi_state->owner = NULL;
--		refcount_set(&pi_state->refcount, 1);
--		current->pi_state_cache = pi_state;
--	}
--}
--
--#ifdef CONFIG_FUTEX_PI
--
--/*
-- * This task is holding PI mutexes at exit time => bad.
-- * Kernel cleans up PI-state, but userspace is likely hosed.
-- * (Robust-futex cleanup is separate and might save the day for userspace.)
-- */
--static void exit_pi_state_list(struct task_struct *curr)
--{
--	struct list_head *next, *head = &curr->pi_state_list;
--	struct futex_pi_state *pi_state;
--	struct futex_hash_bucket *hb;
--	union futex_key key = FUTEX_KEY_INIT;
--
--	if (!futex_cmpxchg_enabled)
--		return;
--	/*
--	 * We are a ZOMBIE and nobody can enqueue itself on
--	 * pi_state_list anymore, but we have to be careful
--	 * versus waiters unqueueing themselves:
--	 */
--	raw_spin_lock_irq(&curr->pi_lock);
--	while (!list_empty(head)) {
--		next = head->next;
--		pi_state = list_entry(next, struct futex_pi_state, list);
--		key = pi_state->key;
--		hb = hash_futex(&key);
--
--		/*
--		 * We can race against put_pi_state() removing itself from the
--		 * list (a waiter going away). put_pi_state() will first
--		 * decrement the reference count and then modify the list, so
--		 * its possible to see the list entry but fail this reference
--		 * acquire.
--		 *
--		 * In that case; drop the locks to let put_pi_state() make
--		 * progress and retry the loop.
--		 */
--		if (!refcount_inc_not_zero(&pi_state->refcount)) {
--			raw_spin_unlock_irq(&curr->pi_lock);
--			cpu_relax();
--			raw_spin_lock_irq(&curr->pi_lock);
--			continue;
--		}
--		raw_spin_unlock_irq(&curr->pi_lock);
--
--		spin_lock(&hb->lock);
--		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
--		raw_spin_lock(&curr->pi_lock);
--		/*
--		 * We dropped the pi-lock, so re-check whether this
--		 * task still owns the PI-state:
--		 */
--		if (head->next != next) {
--			/* retain curr->pi_lock for the loop invariant */
--			raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
--			spin_unlock(&hb->lock);
--			put_pi_state(pi_state);
--			continue;
--		}
--
--		WARN_ON(pi_state->owner != curr);
--		WARN_ON(list_empty(&pi_state->list));
--		list_del_init(&pi_state->list);
--		pi_state->owner = NULL;
--
--		raw_spin_unlock(&curr->pi_lock);
--		raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
--		spin_unlock(&hb->lock);
--
--		rt_mutex_futex_unlock(&pi_state->pi_mutex);
--		put_pi_state(pi_state);
--
--		raw_spin_lock_irq(&curr->pi_lock);
--	}
--	raw_spin_unlock_irq(&curr->pi_lock);
--}
--#else
--static inline void exit_pi_state_list(struct task_struct *curr) { }
--#endif
--
--/*
-- * We need to check the following states:
-- *
-- *      Waiter | pi_state | pi->owner | uTID      | uODIED | ?
-- *
-- * [1]  NULL   | ---      | ---       | 0         | 0/1    | Valid
-- * [2]  NULL   | ---      | ---       | >0        | 0/1    | Valid
-- *
-- * [3]  Found  | NULL     | --        | Any       | 0/1    | Invalid
-- *
-- * [4]  Found  | Found    | NULL      | 0         | 1      | Valid
-- * [5]  Found  | Found    | NULL      | >0        | 1      | Invalid
-- *
-- * [6]  Found  | Found    | task      | 0         | 1      | Valid
-- *
-- * [7]  Found  | Found    | NULL      | Any       | 0      | Invalid
-- *
-- * [8]  Found  | Found    | task      | ==taskTID | 0/1    | Valid
-- * [9]  Found  | Found    | task      | 0         | 0      | Invalid
-- * [10] Found  | Found    | task      | !=taskTID | 0/1    | Invalid
-- *
-- * [1]	Indicates that the kernel can acquire the futex atomically. We
-- *	came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
-- *
-- * [2]	Valid, if TID does not belong to a kernel thread. If no matching
-- *      thread is found then it indicates that the owner TID has died.
-- *
-- * [3]	Invalid. The waiter is queued on a non PI futex
-- *
-- * [4]	Valid state after exit_robust_list(), which sets the user space
-- *	value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
-- *
-- * [5]	The user space value got manipulated between exit_robust_list()
-- *	and exit_pi_state_list()
-- *
-- * [6]	Valid state after exit_pi_state_list() which sets the new owner in
-- *	the pi_state but cannot access the user space value.
-- *
-- * [7]	pi_state->owner can only be NULL when the OWNER_DIED bit is set.
-- *
-- * [8]	Owner and user space value match
-- *
-- * [9]	There is no transient state which sets the user space TID to 0
-- *	except exit_robust_list(), but this is indicated by the
-- *	FUTEX_OWNER_DIED bit. See [4]
-- *
-- * [10] There is no transient state which leaves owner and user space
-- *	TID out of sync. Except one error case where the kernel is denied
-- *	write access to the user address, see fixup_pi_state_owner().
-- *
-- *
-- * Serialization and lifetime rules:
-- *
-- * hb->lock:
-- *
-- *	hb -> futex_q, relation
-- *	futex_q -> pi_state, relation
-- *
-- *	(cannot be raw because hb can contain arbitrary amount
-- *	 of futex_q's)
-- *
-- * pi_mutex->wait_lock:
-- *
-- *	{uval, pi_state}
-- *
-- *	(and pi_mutex 'obviously')
-- *
-- * p->pi_lock:
-- *
-- *	p->pi_state_list -> pi_state->list, relation
-- *	pi_mutex->owner -> pi_state->owner, relation
-- *
-- * pi_state->refcount:
-- *
-- *	pi_state lifetime
-- *
-- *
-- * Lock order:
-- *
-- *   hb->lock
-- *     pi_mutex->wait_lock
-- *       p->pi_lock
-- *
-- */
--
--/*
-- * Validate that the existing waiter has a pi_state and sanity check
-- * the pi_state against the user space value. If correct, attach to
-- * it.
-- */
--static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
--			      struct futex_pi_state *pi_state,
--			      struct futex_pi_state **ps)
--{
--	pid_t pid = uval & FUTEX_TID_MASK;
--	u32 uval2;
--	int ret;
--
--	/*
--	 * Userspace might have messed up non-PI and PI futexes [3]
--	 */
--	if (unlikely(!pi_state))
--		return -EINVAL;
--
--	/*
--	 * We get here with hb->lock held, and having found a
--	 * futex_top_waiter(). This means that futex_lock_pi() of said futex_q
--	 * has dropped the hb->lock in between queue_me() and unqueue_me_pi(),
--	 * which in turn means that futex_lock_pi() still has a reference on
--	 * our pi_state.
--	 *
--	 * The waiter holding a reference on @pi_state also protects against
--	 * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
--	 * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
--	 * free pi_state before we can take a reference ourselves.
--	 */
--	WARN_ON(!refcount_read(&pi_state->refcount));
--
--	/*
--	 * Now that we have a pi_state, we can acquire wait_lock
--	 * and do the state validation.
--	 */
--	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
--
--	/*
--	 * Since {uval, pi_state} is serialized by wait_lock, and our current
--	 * uval was read without holding it, it can have changed. Verify it
--	 * still is what we expect it to be, otherwise retry the entire
--	 * operation.
--	 */
--	if (get_futex_value_locked(&uval2, uaddr))
--		goto out_efault;
--
--	if (uval != uval2)
--		goto out_eagain;
--
--	/*
--	 * Handle the owner died case:
--	 */
--	if (uval & FUTEX_OWNER_DIED) {
--		/*
--		 * exit_pi_state_list sets owner to NULL and wakes the
--		 * topmost waiter. The task which acquires the
--		 * pi_state->rt_mutex will fixup owner.
--		 */
--		if (!pi_state->owner) {
--			/*
--			 * No pi state owner, but the user space TID
--			 * is not 0. Inconsistent state. [5]
--			 */
--			if (pid)
--				goto out_einval;
--			/*
--			 * Take a ref on the state and return success. [4]
--			 */
--			goto out_attach;
--		}
--
--		/*
--		 * If TID is 0, then either the dying owner has not
--		 * yet executed exit_pi_state_list() or some waiter
--		 * acquired the rtmutex in the pi state, but did not
--		 * yet fixup the TID in user space.
--		 *
--		 * Take a ref on the state and return success. [6]
--		 */
--		if (!pid)
--			goto out_attach;
--	} else {
--		/*
--		 * If the owner died bit is not set, then the pi_state
--		 * must have an owner. [7]
--		 */
--		if (!pi_state->owner)
--			goto out_einval;
--	}
--
--	/*
--	 * Bail out if user space manipulated the futex value. If pi
--	 * state exists then the owner TID must be the same as the
--	 * user space TID. [9/10]
--	 */
--	if (pid != task_pid_vnr(pi_state->owner))
--		goto out_einval;
--
--out_attach:
--	get_pi_state(pi_state);
--	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
--	*ps = pi_state;
--	return 0;
--
--out_einval:
--	ret = -EINVAL;
--	goto out_error;
--
--out_eagain:
--	ret = -EAGAIN;
--	goto out_error;
--
--out_efault:
--	ret = -EFAULT;
--	goto out_error;
--
--out_error:
--	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
--	return ret;
--}
--
--/**
-- * wait_for_owner_exiting - Block until the owner has exited
-- * @ret: owner's current futex lock status
-- * @exiting:	Pointer to the exiting task
-- *
-- * Caller must hold a refcount on @exiting.
-- */
--static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
--{
--	if (ret != -EBUSY) {
--		WARN_ON_ONCE(exiting);
--		return;
--	}
--
--	if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
--		return;
--
--	mutex_lock(&exiting->futex_exit_mutex);
--	/*
--	 * No point in doing state checking here. If the waiter got here
--	 * while the task was in exec()->exec_futex_release() then it can
--	 * have any FUTEX_STATE_* value when the waiter has acquired the
--	 * mutex. OK, if running, EXITING or DEAD if it reached exit()
--	 * already. Highly unlikely and not a problem. Just one more round
--	 * through the futex maze.
--	 */
--	mutex_unlock(&exiting->futex_exit_mutex);
--
--	put_task_struct(exiting);
--}
--
--static int handle_exit_race(u32 __user *uaddr, u32 uval,
--			    struct task_struct *tsk)
--{
--	u32 uval2;
--
--	/*
--	 * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
--	 * caller that the alleged owner is busy.
--	 */
--	if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
--		return -EBUSY;
--
--	/*
--	 * Reread the user space value to handle the following situation:
--	 *
--	 * CPU0				CPU1
--	 *
--	 * sys_exit()			sys_futex()
--	 *  do_exit()			 futex_lock_pi()
--	 *                                futex_lock_pi_atomic()
--	 *   exit_signals(tsk)		    No waiters:
--	 *    tsk->flags |= PF_EXITING;	    *uaddr == 0x00000PID
--	 *  mm_release(tsk)		    Set waiter bit
--	 *   exit_robust_list(tsk) {	    *uaddr = 0x80000PID;
--	 *      Set owner died		    attach_to_pi_owner() {
--	 *    *uaddr = 0xC0000000;	     tsk = get_task(PID);
--	 *   }				     if (!tsk->flags & PF_EXITING) {
--	 *  ...				       attach();
--	 *  tsk->futex_state =               } else {
--	 *	FUTEX_STATE_DEAD;              if (tsk->futex_state !=
--	 *					  FUTEX_STATE_DEAD)
--	 *				         return -EAGAIN;
--	 *				       return -ESRCH; <--- FAIL
--	 *				     }
--	 *
--	 * Returning ESRCH unconditionally is wrong here because the
--	 * user space value has been changed by the exiting task.
--	 *
--	 * The same logic applies to the case where the exiting task is
--	 * already gone.
--	 */
--	if (get_futex_value_locked(&uval2, uaddr))
--		return -EFAULT;
--
--	/* If the user space value has changed, try again. */
--	if (uval2 != uval)
--		return -EAGAIN;
--
--	/*
--	 * The exiting task did not have a robust list, the robust list was
--	 * corrupted or the user space value in *uaddr is simply bogus.
--	 * Give up and tell user space.
--	 */
--	return -ESRCH;
--}
--
--static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
--				 struct futex_pi_state **ps)
--{
--	/*
--	 * No existing pi state. First waiter. [2]
--	 *
--	 * This creates pi_state, we have hb->lock held, this means nothing can
--	 * observe this state, wait_lock is irrelevant.
--	 */
--	struct futex_pi_state *pi_state = alloc_pi_state();
--
--	/*
--	 * Initialize the pi_mutex in locked state and make @p
--	 * the owner of it:
--	 */
--	rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
--
--	/* Store the key for possible exit cleanups: */
--	pi_state->key = *key;
--
--	WARN_ON(!list_empty(&pi_state->list));
--	list_add(&pi_state->list, &p->pi_state_list);
--	/*
--	 * Assignment without holding pi_state->pi_mutex.wait_lock is safe
--	 * because there is no concurrency as the object is not published yet.
--	 */
--	pi_state->owner = p;
--
--	*ps = pi_state;
--}
--/*
-- * Lookup the task for the TID provided from user space and attach to
-- * it after doing proper sanity checks.
-- */
--static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
--			      struct futex_pi_state **ps,
--			      struct task_struct **exiting)
--{
--	pid_t pid = uval & FUTEX_TID_MASK;
--	struct task_struct *p;
--
--	/*
--	 * We are the first waiter - try to look up the real owner and attach
--	 * the new pi_state to it, but bail out when TID = 0 [1]
--	 *
--	 * The !pid check is paranoid. None of the call sites should end up
--	 * with pid == 0, but better safe than sorry. Let the caller retry
--	 */
--	if (!pid)
--		return -EAGAIN;
--	p = find_get_task_by_vpid(pid);
--	if (!p)
--		return handle_exit_race(uaddr, uval, NULL);
--
--	if (unlikely(p->flags & PF_KTHREAD)) {
--		put_task_struct(p);
--		return -EPERM;
--	}
--
--	/*
--	 * We need to look at the task state to figure out, whether the
--	 * task is exiting. To protect against the change of the task state
--	 * in futex_exit_release(), we do this protected by p->pi_lock:
--	 */
--	raw_spin_lock_irq(&p->pi_lock);
--	if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
--		/*
--		 * The task is on the way out. When the futex state is
--		 * FUTEX_STATE_DEAD, we know that the task has finished
--		 * the cleanup:
--		 */
--		int ret = handle_exit_race(uaddr, uval, p);
--
--		raw_spin_unlock_irq(&p->pi_lock);
--		/*
--		 * If the owner task is between FUTEX_STATE_EXITING and
--		 * FUTEX_STATE_DEAD then store the task pointer and keep
--		 * the reference on the task struct. The calling code will
--		 * drop all locks, wait for the task to reach
--		 * FUTEX_STATE_DEAD and then drop the refcount. This is
--		 * required to prevent a live lock when the current task
--		 * preempted the exiting task between the two states.
--		 */
--		if (ret == -EBUSY)
--			*exiting = p;
--		else
--			put_task_struct(p);
--		return ret;
--	}
--
--	__attach_to_pi_owner(p, key, ps);
--	raw_spin_unlock_irq(&p->pi_lock);
--
--	put_task_struct(p);
--
--	return 0;
--}
--
--static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
--{
--	int err;
--	u32 curval;
--
--	if (unlikely(should_fail_futex(true)))
--		return -EFAULT;
--
--	err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
--	if (unlikely(err))
--		return err;
--
--	/* If user space value changed, let the caller retry */
--	return curval != uval ? -EAGAIN : 0;
--}
--
--/**
-- * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
-- * @uaddr:		the pi futex user address
-- * @hb:			the pi futex hash bucket
-- * @key:		the futex key associated with uaddr and hb
-- * @ps:			the pi_state pointer where we store the result of the
-- *			lookup
-- * @task:		the task to perform the atomic lock work for.  This will
-- *			be "current" except in the case of requeue pi.
-- * @exiting:		Pointer to store the task pointer of the owner task
-- *			which is in the middle of exiting
-- * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
-- *
-- * Return:
-- *  -  0 - ready to wait;
-- *  -  1 - acquired the lock;
-- *  - <0 - error
-- *
-- * The hb->lock must be held by the caller.
-- *
-- * @exiting is only set when the return value is -EBUSY. If so, this holds
-- * a refcount on the exiting task on return and the caller needs to drop it
-- * after waiting for the exit to complete.
-- */
--static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
--				union futex_key *key,
--				struct futex_pi_state **ps,
--				struct task_struct *task,
--				struct task_struct **exiting,
--				int set_waiters)
--{
--	u32 uval, newval, vpid = task_pid_vnr(task);
--	struct futex_q *top_waiter;
--	int ret;
--
--	/*
--	 * Read the user space value first so we can validate a few
--	 * things before proceeding further.
--	 */
--	if (get_futex_value_locked(&uval, uaddr))
--		return -EFAULT;
--
--	if (unlikely(should_fail_futex(true)))
--		return -EFAULT;
--
--	/*
--	 * Detect deadlocks.
--	 */
--	if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
--		return -EDEADLK;
--
--	if ((unlikely(should_fail_futex(true))))
--		return -EDEADLK;
--
--	/*
--	 * Lookup existing state first. If it exists, try to attach to
--	 * its pi_state.
--	 */
--	top_waiter = futex_top_waiter(hb, key);
--	if (top_waiter)
--		return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
--
--	/*
--	 * No waiter and user TID is 0. We are here because the
--	 * waiters or the owner died bit is set or called from
--	 * requeue_cmp_pi or for whatever reason something took the
--	 * syscall.
--	 */
--	if (!(uval & FUTEX_TID_MASK)) {
--		/*
--		 * We take over the futex. No other waiters and the user space
--		 * TID is 0. We preserve the owner died bit.
--		 */
--		newval = uval & FUTEX_OWNER_DIED;
--		newval |= vpid;
--
--		/* The futex requeue_pi code can enforce the waiters bit */
--		if (set_waiters)
--			newval |= FUTEX_WAITERS;
--
--		ret = lock_pi_update_atomic(uaddr, uval, newval);
--		if (ret)
--			return ret;
--
--		/*
--		 * If the waiter bit was requested the caller also needs PI
--		 * state attached to the new owner of the user space futex.
--		 *
--		 * @task is guaranteed to be alive and it cannot be exiting
--		 * because it is either sleeping or waiting in
--		 * futex_requeue_pi_wakeup_sync().
--		 *
--		 * No need to do the full attach_to_pi_owner() exercise
--		 * because @task is known and valid.
--		 */
--		if (set_waiters) {
--			raw_spin_lock_irq(&task->pi_lock);
--			__attach_to_pi_owner(task, key, ps);
--			raw_spin_unlock_irq(&task->pi_lock);
--		}
--		return 1;
--	}
--
--	/*
--	 * First waiter. Set the waiters bit before attaching ourself to
--	 * the owner. If owner tries to unlock, it will be forced into
--	 * the kernel and blocked on hb->lock.
--	 */
--	newval = uval | FUTEX_WAITERS;
--	ret = lock_pi_update_atomic(uaddr, uval, newval);
--	if (ret)
--		return ret;
--	/*
--	 * If the update of the user space value succeeded, we try to
--	 * attach to the owner. If that fails, no harm done, we only
--	 * set the FUTEX_WAITERS bit in the user space variable.
--	 */
--	return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
--}
--
--/**
-- * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
-- * @q:	The futex_q to unqueue
-- *
-- * The q->lock_ptr must not be NULL and must be held by the caller.
-- */
--static void __unqueue_futex(struct futex_q *q)
--{
--	struct futex_hash_bucket *hb;
--
--	if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
--		return;
--	lockdep_assert_held(q->lock_ptr);
--
--	hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
--	plist_del(&q->list, &hb->chain);
--	hb_waiters_dec(hb);
--}
--
--/*
-- * The hash bucket lock must be held when this is called.
-- * Afterwards, the futex_q must not be accessed. Callers
-- * must ensure to later call wake_up_q() for the actual
-- * wakeups to occur.
-- */
--static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
--{
--	struct task_struct *p = q->task;
--
--	if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
--		return;
--
--	get_task_struct(p);
--	__unqueue_futex(q);
--	/*
--	 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
--	 * is written, without taking any locks. This is possible in the event
--	 * of a spurious wakeup, for example. A memory barrier is required here
--	 * to prevent the following store to lock_ptr from getting ahead of the
--	 * plist_del in __unqueue_futex().
--	 */
--	smp_store_release(&q->lock_ptr, NULL);
--
--	/*
--	 * Queue the task for later wakeup for after we've released
--	 * the hb->lock.
--	 */
--	wake_q_add_safe(wake_q, p);
--}
--
--/*
-- * Caller must hold a reference on @pi_state.
-- */
--static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
--{
--	struct rt_mutex_waiter *top_waiter;
--	struct task_struct *new_owner;
--	bool postunlock = false;
--	DEFINE_RT_WAKE_Q(wqh);
--	u32 curval, newval;
--	int ret = 0;
--
--	top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
--	if (WARN_ON_ONCE(!top_waiter)) {
--		/*
--		 * As per the comment in futex_unlock_pi() this should not happen.
--		 *
--		 * When this happens, give up our locks and try again, giving
--		 * the futex_lock_pi() instance time to complete, either by
--		 * waiting on the rtmutex or removing itself from the futex
--		 * queue.
--		 */
--		ret = -EAGAIN;
--		goto out_unlock;
--	}
--
--	new_owner = top_waiter->task;
--
--	/*
--	 * We pass it to the next owner. The WAITERS bit is always kept
--	 * enabled while there is PI state around. We cleanup the owner
--	 * died bit, because we are the owner.
--	 */
--	newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
--
--	if (unlikely(should_fail_futex(true))) {
--		ret = -EFAULT;
--		goto out_unlock;
--	}
--
--	ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
--	if (!ret && (curval != uval)) {
--		/*
--		 * If a unconditional UNLOCK_PI operation (user space did not
--		 * try the TID->0 transition) raced with a waiter setting the
--		 * FUTEX_WAITERS flag between get_user() and locking the hash
--		 * bucket lock, retry the operation.
--		 */
--		if ((FUTEX_TID_MASK & curval) == uval)
--			ret = -EAGAIN;
--		else
--			ret = -EINVAL;
--	}
--
--	if (!ret) {
--		/*
--		 * This is a point of no return; once we modified the uval
--		 * there is no going back and subsequent operations must
--		 * not fail.
--		 */
--		pi_state_update_owner(pi_state, new_owner);
--		postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh);
--	}
--
--out_unlock:
--	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
--
--	if (postunlock)
--		rt_mutex_postunlock(&wqh);
--
--	return ret;
--}
--
--/*
-- * Express the locking dependencies for lockdep:
-- */
--static inline void
--double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
--{
--	if (hb1 <= hb2) {
--		spin_lock(&hb1->lock);
--		if (hb1 < hb2)
--			spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
--	} else { /* hb1 > hb2 */
--		spin_lock(&hb2->lock);
--		spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
--	}
--}
--
--static inline void
--double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
--{
--	spin_unlock(&hb1->lock);
--	if (hb1 != hb2)
--		spin_unlock(&hb2->lock);
--}
--
--/*
-- * Wake up waiters matching bitset queued on this futex (uaddr).
-- */
--static int
--futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
--{
--	struct futex_hash_bucket *hb;
--	struct futex_q *this, *next;
--	union futex_key key = FUTEX_KEY_INIT;
--	int ret;
--	DEFINE_WAKE_Q(wake_q);
--
--	if (!bitset)
--		return -EINVAL;
--
--	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
--	if (unlikely(ret != 0))
--		return ret;
--
--	hb = hash_futex(&key);
--
--	/* Make sure we really have tasks to wakeup */
--	if (!hb_waiters_pending(hb))
--		return ret;
--
--	spin_lock(&hb->lock);
--
--	plist_for_each_entry_safe(this, next, &hb->chain, list) {
--		if (match_futex (&this->key, &key)) {
--			if (this->pi_state || this->rt_waiter) {
--				ret = -EINVAL;
--				break;
--			}
--
--			/* Check if one of the bits is set in both bitsets */
--			if (!(this->bitset & bitset))
--				continue;
--
--			mark_wake_futex(&wake_q, this);
--			if (++ret >= nr_wake)
--				break;
--		}
--	}
--
--	spin_unlock(&hb->lock);
--	wake_up_q(&wake_q);
--	return ret;
--}
--
--static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
--{
--	unsigned int op =	  (encoded_op & 0x70000000) >> 28;
--	unsigned int cmp =	  (encoded_op & 0x0f000000) >> 24;
--	int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
--	int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
--	int oldval, ret;
--
--	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
--		if (oparg < 0 || oparg > 31) {
--			char comm[sizeof(current->comm)];
--			/*
--			 * kill this print and return -EINVAL when userspace
--			 * is sane again
--			 */
--			pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
--					get_task_comm(comm, current), oparg);
--			oparg &= 31;
--		}
--		oparg = 1 << oparg;
--	}
--
--	pagefault_disable();
--	ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
--	pagefault_enable();
--	if (ret)
--		return ret;
--
--	switch (cmp) {
--	case FUTEX_OP_CMP_EQ:
--		return oldval == cmparg;
--	case FUTEX_OP_CMP_NE:
--		return oldval != cmparg;
--	case FUTEX_OP_CMP_LT:
--		return oldval < cmparg;
--	case FUTEX_OP_CMP_GE:
--		return oldval >= cmparg;
--	case FUTEX_OP_CMP_LE:
--		return oldval <= cmparg;
--	case FUTEX_OP_CMP_GT:
--		return oldval > cmparg;
--	default:
--		return -ENOSYS;
--	}
--}
--
--/*
-- * Wake up all waiters hashed on the physical page that is mapped
-- * to this virtual address:
-- */
--static int
--futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
--	      int nr_wake, int nr_wake2, int op)
--{
--	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
--	struct futex_hash_bucket *hb1, *hb2;
--	struct futex_q *this, *next;
--	int ret, op_ret;
--	DEFINE_WAKE_Q(wake_q);
--
--retry:
--	ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
--	if (unlikely(ret != 0))
--		return ret;
--	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
--	if (unlikely(ret != 0))
--		return ret;
--
--	hb1 = hash_futex(&key1);
--	hb2 = hash_futex(&key2);
--
--retry_private:
--	double_lock_hb(hb1, hb2);
--	op_ret = futex_atomic_op_inuser(op, uaddr2);
--	if (unlikely(op_ret < 0)) {
--		double_unlock_hb(hb1, hb2);
--
--		if (!IS_ENABLED(CONFIG_MMU) ||
--		    unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
--			/*
--			 * we don't get EFAULT from MMU faults if we don't have
--			 * an MMU, but we might get them from range checking
--			 */
--			ret = op_ret;
--			return ret;
--		}
--
--		if (op_ret == -EFAULT) {
--			ret = fault_in_user_writeable(uaddr2);
--			if (ret)
--				return ret;
--		}
--
--		cond_resched();
--		if (!(flags & FLAGS_SHARED))
--			goto retry_private;
--		goto retry;
--	}
--
--	plist_for_each_entry_safe(this, next, &hb1->chain, list) {
--		if (match_futex (&this->key, &key1)) {
--			if (this->pi_state || this->rt_waiter) {
--				ret = -EINVAL;
--				goto out_unlock;
--			}
--			mark_wake_futex(&wake_q, this);
--			if (++ret >= nr_wake)
--				break;
--		}
--	}
--
--	if (op_ret > 0) {
--		op_ret = 0;
--		plist_for_each_entry_safe(this, next, &hb2->chain, list) {
--			if (match_futex (&this->key, &key2)) {
--				if (this->pi_state || this->rt_waiter) {
--					ret = -EINVAL;
--					goto out_unlock;
--				}
--				mark_wake_futex(&wake_q, this);
--				if (++op_ret >= nr_wake2)
--					break;
--			}
--		}
--		ret += op_ret;
--	}
--
--out_unlock:
--	double_unlock_hb(hb1, hb2);
--	wake_up_q(&wake_q);
--	return ret;
--}
--
--/**
-- * requeue_futex() - Requeue a futex_q from one hb to another
-- * @q:		the futex_q to requeue
-- * @hb1:	the source hash_bucket
-- * @hb2:	the target hash_bucket
-- * @key2:	the new key for the requeued futex_q
-- */
--static inline
--void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
--		   struct futex_hash_bucket *hb2, union futex_key *key2)
--{
--
--	/*
--	 * If key1 and key2 hash to the same bucket, no need to
--	 * requeue.
--	 */
--	if (likely(&hb1->chain != &hb2->chain)) {
--		plist_del(&q->list, &hb1->chain);
--		hb_waiters_dec(hb1);
--		hb_waiters_inc(hb2);
--		plist_add(&q->list, &hb2->chain);
--		q->lock_ptr = &hb2->lock;
--	}
--	q->key = *key2;
--}
--
--static inline bool futex_requeue_pi_prepare(struct futex_q *q,
--					    struct futex_pi_state *pi_state)
--{
--	int old, new;
--
--	/*
--	 * Set state to Q_REQUEUE_PI_IN_PROGRESS unless an early wakeup has
--	 * already set Q_REQUEUE_PI_IGNORE to signal that requeue should
--	 * ignore the waiter.
--	 */
--	old = atomic_read_acquire(&q->requeue_state);
--	do {
--		if (old == Q_REQUEUE_PI_IGNORE)
--			return false;
--
--		/*
--		 * futex_proxy_trylock_atomic() might have set it to
--		 * IN_PROGRESS and a interleaved early wake to WAIT.
--		 *
--		 * It was considered to have an extra state for that
--		 * trylock, but that would just add more conditionals
--		 * all over the place for a dubious value.
--		 */
--		if (old != Q_REQUEUE_PI_NONE)
--			break;
--
--		new = Q_REQUEUE_PI_IN_PROGRESS;
--	} while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
--
--	q->pi_state = pi_state;
--	return true;
--}
--
--static inline void futex_requeue_pi_complete(struct futex_q *q, int locked)
--{
--	int old, new;
--
--	old = atomic_read_acquire(&q->requeue_state);
--	do {
--		if (old == Q_REQUEUE_PI_IGNORE)
--			return;
--
--		if (locked >= 0) {
--			/* Requeue succeeded. Set DONE or LOCKED */
--			WARN_ON_ONCE(old != Q_REQUEUE_PI_IN_PROGRESS &&
--				     old != Q_REQUEUE_PI_WAIT);
--			new = Q_REQUEUE_PI_DONE + locked;
--		} else if (old == Q_REQUEUE_PI_IN_PROGRESS) {
--			/* Deadlock, no early wakeup interleave */
--			new = Q_REQUEUE_PI_NONE;
--		} else {
--			/* Deadlock, early wakeup interleave. */
--			WARN_ON_ONCE(old != Q_REQUEUE_PI_WAIT);
--			new = Q_REQUEUE_PI_IGNORE;
--		}
--	} while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
--
--#ifdef CONFIG_PREEMPT_RT
--	/* If the waiter interleaved with the requeue let it know */
--	if (unlikely(old == Q_REQUEUE_PI_WAIT))
--		rcuwait_wake_up(&q->requeue_wait);
--#endif
--}
--
--static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q)
--{
--	int old, new;
--
--	old = atomic_read_acquire(&q->requeue_state);
--	do {
--		/* Is requeue done already? */
--		if (old >= Q_REQUEUE_PI_DONE)
--			return old;
--
--		/*
--		 * If not done, then tell the requeue code to either ignore
--		 * the waiter or to wake it up once the requeue is done.
--		 */
--		new = Q_REQUEUE_PI_WAIT;
--		if (old == Q_REQUEUE_PI_NONE)
--			new = Q_REQUEUE_PI_IGNORE;
--	} while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
--
--	/* If the requeue was in progress, wait for it to complete */
--	if (old == Q_REQUEUE_PI_IN_PROGRESS) {
--#ifdef CONFIG_PREEMPT_RT
--		rcuwait_wait_event(&q->requeue_wait,
--				   atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT,
--				   TASK_UNINTERRUPTIBLE);
--#else
--		(void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT);
--#endif
--	}
--
--	/*
--	 * Requeue is now either prohibited or complete. Reread state
--	 * because during the wait above it might have changed. Nothing
--	 * will modify q->requeue_state after this point.
--	 */
--	return atomic_read(&q->requeue_state);
--}
--
--/**
-- * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
-- * @q:		the futex_q
-- * @key:	the key of the requeue target futex
-- * @hb:		the hash_bucket of the requeue target futex
-- *
-- * During futex_requeue, with requeue_pi=1, it is possible to acquire the
-- * target futex if it is uncontended or via a lock steal.
-- *
-- * 1) Set @q::key to the requeue target futex key so the waiter can detect
-- *    the wakeup on the right futex.
-- *
-- * 2) Dequeue @q from the hash bucket.
-- *
-- * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock
-- *    acquisition.
-- *
-- * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that
-- *    the waiter has to fixup the pi state.
-- *
-- * 5) Complete the requeue state so the waiter can make progress. After
-- *    this point the waiter task can return from the syscall immediately in
-- *    case that the pi state does not have to be fixed up.
-- *
-- * 6) Wake the waiter task.
-- *
-- * Must be called with both q->lock_ptr and hb->lock held.
-- */
--static inline
--void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
--			   struct futex_hash_bucket *hb)
--{
--	q->key = *key;
--
--	__unqueue_futex(q);
--
--	WARN_ON(!q->rt_waiter);
--	q->rt_waiter = NULL;
--
--	q->lock_ptr = &hb->lock;
--
--	/* Signal locked state to the waiter */
--	futex_requeue_pi_complete(q, 1);
--	wake_up_state(q->task, TASK_NORMAL);
--}
--
--/**
-- * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
-- * @pifutex:		the user address of the to futex
-- * @hb1:		the from futex hash bucket, must be locked by the caller
-- * @hb2:		the to futex hash bucket, must be locked by the caller
-- * @key1:		the from futex key
-- * @key2:		the to futex key
-- * @ps:			address to store the pi_state pointer
-- * @exiting:		Pointer to store the task pointer of the owner task
-- *			which is in the middle of exiting
-- * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
-- *
-- * Try and get the lock on behalf of the top waiter if we can do it atomically.
-- * Wake the top waiter if we succeed.  If the caller specified set_waiters,
-- * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
-- * hb1 and hb2 must be held by the caller.
-- *
-- * @exiting is only set when the return value is -EBUSY. If so, this holds
-- * a refcount on the exiting task on return and the caller needs to drop it
-- * after waiting for the exit to complete.
-- *
-- * Return:
-- *  -  0 - failed to acquire the lock atomically;
-- *  - >0 - acquired the lock, return value is vpid of the top_waiter
-- *  - <0 - error
-- */
--static int
--futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
--			   struct futex_hash_bucket *hb2, union futex_key *key1,
--			   union futex_key *key2, struct futex_pi_state **ps,
--			   struct task_struct **exiting, int set_waiters)
--{
--	struct futex_q *top_waiter = NULL;
--	u32 curval;
--	int ret;
--
--	if (get_futex_value_locked(&curval, pifutex))
--		return -EFAULT;
--
--	if (unlikely(should_fail_futex(true)))
--		return -EFAULT;
--
--	/*
--	 * Find the top_waiter and determine if there are additional waiters.
--	 * If the caller intends to requeue more than 1 waiter to pifutex,
--	 * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
--	 * as we have means to handle the possible fault.  If not, don't set
--	 * the bit unnecessarily as it will force the subsequent unlock to enter
--	 * the kernel.
--	 */
--	top_waiter = futex_top_waiter(hb1, key1);
--
--	/* There are no waiters, nothing for us to do. */
--	if (!top_waiter)
--		return 0;
--
--	/*
--	 * Ensure that this is a waiter sitting in futex_wait_requeue_pi()
--	 * and waiting on the 'waitqueue' futex which is always !PI.
--	 */
--	if (!top_waiter->rt_waiter || top_waiter->pi_state)
--		return -EINVAL;
--
--	/* Ensure we requeue to the expected futex. */
--	if (!match_futex(top_waiter->requeue_pi_key, key2))
--		return -EINVAL;
--
--	/* Ensure that this does not race against an early wakeup */
--	if (!futex_requeue_pi_prepare(top_waiter, NULL))
--		return -EAGAIN;
--
--	/*
--	 * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit
--	 * in the contended case or if @set_waiters is true.
--	 *
--	 * In the contended case PI state is attached to the lock owner. If
--	 * the user space lock can be acquired then PI state is attached to
--	 * the new owner (@top_waiter->task) when @set_waiters is true.
--	 */
--	ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
--				   exiting, set_waiters);
--	if (ret == 1) {
--		/*
--		 * Lock was acquired in user space and PI state was
--		 * attached to @top_waiter->task. That means state is fully
--		 * consistent and the waiter can return to user space
--		 * immediately after the wakeup.
--		 */
--		requeue_pi_wake_futex(top_waiter, key2, hb2);
--	} else if (ret < 0) {
--		/* Rewind top_waiter::requeue_state */
--		futex_requeue_pi_complete(top_waiter, ret);
--	} else {
--		/*
--		 * futex_lock_pi_atomic() did not acquire the user space
--		 * futex, but managed to establish the proxy lock and pi
--		 * state. top_waiter::requeue_state cannot be fixed up here
--		 * because the waiter is not enqueued on the rtmutex
--		 * yet. This is handled at the callsite depending on the
--		 * result of rt_mutex_start_proxy_lock() which is
--		 * guaranteed to be reached with this function returning 0.
--		 */
--	}
--	return ret;
--}
--
--/**
-- * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
-- * @uaddr1:	source futex user address
-- * @flags:	futex flags (FLAGS_SHARED, etc.)
-- * @uaddr2:	target futex user address
-- * @nr_wake:	number of waiters to wake (must be 1 for requeue_pi)
-- * @nr_requeue:	number of waiters to requeue (0-INT_MAX)
-- * @cmpval:	@uaddr1 expected value (or %NULL)
-- * @requeue_pi:	if we are attempting to requeue from a non-pi futex to a
-- *		pi futex (pi to pi requeue is not supported)
-- *
-- * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
-- * uaddr2 atomically on behalf of the top waiter.
-- *
-- * Return:
-- *  - >=0 - on success, the number of tasks requeued or woken;
-- *  -  <0 - on error
-- */
--static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
--			 u32 __user *uaddr2, int nr_wake, int nr_requeue,
--			 u32 *cmpval, int requeue_pi)
--{
--	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
--	int task_count = 0, ret;
--	struct futex_pi_state *pi_state = NULL;
--	struct futex_hash_bucket *hb1, *hb2;
--	struct futex_q *this, *next;
--	DEFINE_WAKE_Q(wake_q);
--
--	if (nr_wake < 0 || nr_requeue < 0)
--		return -EINVAL;
--
--	/*
--	 * When PI not supported: return -ENOSYS if requeue_pi is true,
--	 * consequently the compiler knows requeue_pi is always false past
--	 * this point which will optimize away all the conditional code
--	 * further down.
--	 */
--	if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
--		return -ENOSYS;
--
--	if (requeue_pi) {
--		/*
--		 * Requeue PI only works on two distinct uaddrs. This
--		 * check is only valid for private futexes. See below.
--		 */
--		if (uaddr1 == uaddr2)
--			return -EINVAL;
--
--		/*
--		 * futex_requeue() allows the caller to define the number
--		 * of waiters to wake up via the @nr_wake argument. With
--		 * REQUEUE_PI, waking up more than one waiter is creating
--		 * more problems than it solves. Waking up a waiter makes
--		 * only sense if the PI futex @uaddr2 is uncontended as
--		 * this allows the requeue code to acquire the futex
--		 * @uaddr2 before waking the waiter. The waiter can then
--		 * return to user space without further action. A secondary
--		 * wakeup would just make the futex_wait_requeue_pi()
--		 * handling more complex, because that code would have to
--		 * look up pi_state and do more or less all the handling
--		 * which the requeue code has to do for the to be requeued
--		 * waiters. So restrict the number of waiters to wake to
--		 * one, and only wake it up when the PI futex is
--		 * uncontended. Otherwise requeue it and let the unlock of
--		 * the PI futex handle the wakeup.
--		 *
--		 * All REQUEUE_PI users, e.g. pthread_cond_signal() and
--		 * pthread_cond_broadcast() must use nr_wake=1.
--		 */
--		if (nr_wake != 1)
--			return -EINVAL;
--
--		/*
--		 * requeue_pi requires a pi_state, try to allocate it now
--		 * without any locks in case it fails.
--		 */
--		if (refill_pi_state_cache())
--			return -ENOMEM;
--	}
--
--retry:
--	ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
--	if (unlikely(ret != 0))
--		return ret;
--	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
--			    requeue_pi ? FUTEX_WRITE : FUTEX_READ);
--	if (unlikely(ret != 0))
--		return ret;
--
--	/*
--	 * The check above which compares uaddrs is not sufficient for
--	 * shared futexes. We need to compare the keys:
--	 */
--	if (requeue_pi && match_futex(&key1, &key2))
--		return -EINVAL;
--
--	hb1 = hash_futex(&key1);
--	hb2 = hash_futex(&key2);
--
--retry_private:
--	hb_waiters_inc(hb2);
--	double_lock_hb(hb1, hb2);
--
--	if (likely(cmpval != NULL)) {
--		u32 curval;
--
--		ret = get_futex_value_locked(&curval, uaddr1);
--
--		if (unlikely(ret)) {
--			double_unlock_hb(hb1, hb2);
--			hb_waiters_dec(hb2);
--
--			ret = get_user(curval, uaddr1);
--			if (ret)
--				return ret;
--
--			if (!(flags & FLAGS_SHARED))
--				goto retry_private;
--
--			goto retry;
--		}
--		if (curval != *cmpval) {
--			ret = -EAGAIN;
--			goto out_unlock;
--		}
--	}
--
--	if (requeue_pi) {
--		struct task_struct *exiting = NULL;
--
--		/*
--		 * Attempt to acquire uaddr2 and wake the top waiter. If we
--		 * intend to requeue waiters, force setting the FUTEX_WAITERS
--		 * bit.  We force this here where we are able to easily handle
--		 * faults rather in the requeue loop below.
--		 *
--		 * Updates topwaiter::requeue_state if a top waiter exists.
--		 */
--		ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
--						 &key2, &pi_state,
--						 &exiting, nr_requeue);
--
--		/*
--		 * At this point the top_waiter has either taken uaddr2 or
--		 * is waiting on it. In both cases pi_state has been
--		 * established and an initial refcount on it. In case of an
--		 * error there's nothing.
--		 *
--		 * The top waiter's requeue_state is up to date:
--		 *
--		 *  - If the lock was acquired atomically (ret == 1), then
--		 *    the state is Q_REQUEUE_PI_LOCKED.
--		 *
--		 *    The top waiter has been dequeued and woken up and can
--		 *    return to user space immediately. The kernel/user
--		 *    space state is consistent. In case that there must be
--		 *    more waiters requeued the WAITERS bit in the user
--		 *    space futex is set so the top waiter task has to go
--		 *    into the syscall slowpath to unlock the futex. This
--		 *    will block until this requeue operation has been
--		 *    completed and the hash bucket locks have been
--		 *    dropped.
--		 *
--		 *  - If the trylock failed with an error (ret < 0) then
--		 *    the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
--		 *    happened", or Q_REQUEUE_PI_IGNORE when there was an
--		 *    interleaved early wakeup.
--		 *
--		 *  - If the trylock did not succeed (ret == 0) then the
--		 *    state is either Q_REQUEUE_PI_IN_PROGRESS or
--		 *    Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
--		 *    This will be cleaned up in the loop below, which
--		 *    cannot fail because futex_proxy_trylock_atomic() did
--		 *    the same sanity checks for requeue_pi as the loop
--		 *    below does.
--		 */
--		switch (ret) {
--		case 0:
--			/* We hold a reference on the pi state. */
--			break;
--
--		case 1:
--			/*
--			 * futex_proxy_trylock_atomic() acquired the user space
--			 * futex. Adjust task_count.
--			 */
--			task_count++;
--			ret = 0;
--			break;
--
--		/*
--		 * If the above failed, then pi_state is NULL and
--		 * waiter::requeue_state is correct.
--		 */
--		case -EFAULT:
--			double_unlock_hb(hb1, hb2);
--			hb_waiters_dec(hb2);
--			ret = fault_in_user_writeable(uaddr2);
--			if (!ret)
--				goto retry;
--			return ret;
--		case -EBUSY:
--		case -EAGAIN:
--			/*
--			 * Two reasons for this:
--			 * - EBUSY: Owner is exiting and we just wait for the
--			 *   exit to complete.
--			 * - EAGAIN: The user space value changed.
--			 */
--			double_unlock_hb(hb1, hb2);
--			hb_waiters_dec(hb2);
--			/*
--			 * Handle the case where the owner is in the middle of
--			 * exiting. Wait for the exit to complete otherwise
--			 * this task might loop forever, aka. live lock.
--			 */
--			wait_for_owner_exiting(ret, exiting);
--			cond_resched();
--			goto retry;
--		default:
--			goto out_unlock;
--		}
--	}
--
--	plist_for_each_entry_safe(this, next, &hb1->chain, list) {
--		if (task_count - nr_wake >= nr_requeue)
--			break;
--
--		if (!match_futex(&this->key, &key1))
--			continue;
--
--		/*
--		 * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
--		 * be paired with each other and no other futex ops.
--		 *
--		 * We should never be requeueing a futex_q with a pi_state,
--		 * which is awaiting a futex_unlock_pi().
--		 */
--		if ((requeue_pi && !this->rt_waiter) ||
--		    (!requeue_pi && this->rt_waiter) ||
--		    this->pi_state) {
--			ret = -EINVAL;
--			break;
--		}
--
--		/* Plain futexes just wake or requeue and are done */
--		if (!requeue_pi) {
--			if (++task_count <= nr_wake)
--				mark_wake_futex(&wake_q, this);
--			else
--				requeue_futex(this, hb1, hb2, &key2);
--			continue;
--		}
--
--		/* Ensure we requeue to the expected futex for requeue_pi. */
--		if (!match_futex(this->requeue_pi_key, &key2)) {
--			ret = -EINVAL;
--			break;
--		}
--
--		/*
--		 * Requeue nr_requeue waiters and possibly one more in the case
--		 * of requeue_pi if we couldn't acquire the lock atomically.
--		 *
--		 * Prepare the waiter to take the rt_mutex. Take a refcount
--		 * on the pi_state and store the pointer in the futex_q
--		 * object of the waiter.
--		 */
--		get_pi_state(pi_state);
--
--		/* Don't requeue when the waiter is already on the way out. */
--		if (!futex_requeue_pi_prepare(this, pi_state)) {
--			/*
--			 * Early woken waiter signaled that it is on the
--			 * way out. Drop the pi_state reference and try the
--			 * next waiter. @this->pi_state is still NULL.
--			 */
--			put_pi_state(pi_state);
--			continue;
--		}
--
--		ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
--						this->rt_waiter,
--						this->task);
--
--		if (ret == 1) {
--			/*
--			 * We got the lock. We do neither drop the refcount
--			 * on pi_state nor clear this->pi_state because the
--			 * waiter needs the pi_state for cleaning up the
--			 * user space value. It will drop the refcount
--			 * after doing so. this::requeue_state is updated
--			 * in the wakeup as well.
--			 */
--			requeue_pi_wake_futex(this, &key2, hb2);
--			task_count++;
--		} else if (!ret) {
--			/* Waiter is queued, move it to hb2 */
--			requeue_futex(this, hb1, hb2, &key2);
--			futex_requeue_pi_complete(this, 0);
--			task_count++;
--		} else {
--			/*
--			 * rt_mutex_start_proxy_lock() detected a potential
--			 * deadlock when we tried to queue that waiter.
--			 * Drop the pi_state reference which we took above
--			 * and remove the pointer to the state from the
--			 * waiters futex_q object.
--			 */
--			this->pi_state = NULL;
--			put_pi_state(pi_state);
--			futex_requeue_pi_complete(this, ret);
--			/*
--			 * We stop queueing more waiters and let user space
--			 * deal with the mess.
--			 */
--			break;
--		}
--	}
--
--	/*
--	 * We took an extra initial reference to the pi_state in
--	 * futex_proxy_trylock_atomic(). We need to drop it here again.
--	 */
--	put_pi_state(pi_state);
--
--out_unlock:
--	double_unlock_hb(hb1, hb2);
--	wake_up_q(&wake_q);
--	hb_waiters_dec(hb2);
--	return ret ? ret : task_count;
--}
--
--/* The key must be already stored in q->key. */
--static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
--	__acquires(&hb->lock)
--{
--	struct futex_hash_bucket *hb;
--
--	hb = hash_futex(&q->key);
--
--	/*
--	 * Increment the counter before taking the lock so that
--	 * a potential waker won't miss a to-be-slept task that is
--	 * waiting for the spinlock. This is safe as all queue_lock()
--	 * users end up calling queue_me(). Similarly, for housekeeping,
--	 * decrement the counter at queue_unlock() when some error has
--	 * occurred and we don't end up adding the task to the list.
--	 */
--	hb_waiters_inc(hb); /* implies smp_mb(); (A) */
--
--	q->lock_ptr = &hb->lock;
--
--	spin_lock(&hb->lock);
--	return hb;
--}
--
--static inline void
--queue_unlock(struct futex_hash_bucket *hb)
--	__releases(&hb->lock)
--{
--	spin_unlock(&hb->lock);
--	hb_waiters_dec(hb);
--}
--
--static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
--{
--	int prio;
--
--	/*
--	 * The priority used to register this element is
--	 * - either the real thread-priority for the real-time threads
--	 * (i.e. threads with a priority lower than MAX_RT_PRIO)
--	 * - or MAX_RT_PRIO for non-RT threads.
--	 * Thus, all RT-threads are woken first in priority order, and
--	 * the others are woken last, in FIFO order.
--	 */
--	prio = min(current->normal_prio, MAX_RT_PRIO);
--
--	plist_node_init(&q->list, prio);
--	plist_add(&q->list, &hb->chain);
--	q->task = current;
--}
--
--/**
-- * queue_me() - Enqueue the futex_q on the futex_hash_bucket
-- * @q:	The futex_q to enqueue
-- * @hb:	The destination hash bucket
-- *
-- * The hb->lock must be held by the caller, and is released here. A call to
-- * queue_me() is typically paired with exactly one call to unqueue_me().  The
-- * exceptions involve the PI related operations, which may use unqueue_me_pi()
-- * or nothing if the unqueue is done as part of the wake process and the unqueue
-- * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
-- * an example).
-- */
--static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
--	__releases(&hb->lock)
--{
--	__queue_me(q, hb);
--	spin_unlock(&hb->lock);
--}
--
--/**
-- * unqueue_me() - Remove the futex_q from its futex_hash_bucket
-- * @q:	The futex_q to unqueue
-- *
-- * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
-- * be paired with exactly one earlier call to queue_me().
-- *
-- * Return:
-- *  - 1 - if the futex_q was still queued (and we removed unqueued it);
-- *  - 0 - if the futex_q was already removed by the waking thread
-- */
--static int unqueue_me(struct futex_q *q)
--{
--	spinlock_t *lock_ptr;
--	int ret = 0;
--
--	/* In the common case we don't take the spinlock, which is nice. */
--retry:
--	/*
--	 * q->lock_ptr can change between this read and the following spin_lock.
--	 * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
--	 * optimizing lock_ptr out of the logic below.
--	 */
--	lock_ptr = READ_ONCE(q->lock_ptr);
--	if (lock_ptr != NULL) {
--		spin_lock(lock_ptr);
--		/*
--		 * q->lock_ptr can change between reading it and
--		 * spin_lock(), causing us to take the wrong lock.  This
--		 * corrects the race condition.
--		 *
--		 * Reasoning goes like this: if we have the wrong lock,
--		 * q->lock_ptr must have changed (maybe several times)
--		 * between reading it and the spin_lock().  It can
--		 * change again after the spin_lock() but only if it was
--		 * already changed before the spin_lock().  It cannot,
--		 * however, change back to the original value.  Therefore
--		 * we can detect whether we acquired the correct lock.
--		 */
--		if (unlikely(lock_ptr != q->lock_ptr)) {
--			spin_unlock(lock_ptr);
--			goto retry;
--		}
--		__unqueue_futex(q);
--
--		BUG_ON(q->pi_state);
--
--		spin_unlock(lock_ptr);
--		ret = 1;
--	}
--
--	return ret;
--}
--
--/*
-- * PI futexes can not be requeued and must remove themselves from the
-- * hash bucket. The hash bucket lock (i.e. lock_ptr) is held.
-- */
--static void unqueue_me_pi(struct futex_q *q)
--{
--	__unqueue_futex(q);
--
--	BUG_ON(!q->pi_state);
--	put_pi_state(q->pi_state);
--	q->pi_state = NULL;
--}
--
--static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
--				  struct task_struct *argowner)
--{
--	struct futex_pi_state *pi_state = q->pi_state;
--	struct task_struct *oldowner, *newowner;
--	u32 uval, curval, newval, newtid;
--	int err = 0;
--
--	oldowner = pi_state->owner;
--
--	/*
--	 * We are here because either:
--	 *
--	 *  - we stole the lock and pi_state->owner needs updating to reflect
--	 *    that (@argowner == current),
--	 *
--	 * or:
--	 *
--	 *  - someone stole our lock and we need to fix things to point to the
--	 *    new owner (@argowner == NULL).
--	 *
--	 * Either way, we have to replace the TID in the user space variable.
--	 * This must be atomic as we have to preserve the owner died bit here.
--	 *
--	 * Note: We write the user space value _before_ changing the pi_state
--	 * because we can fault here. Imagine swapped out pages or a fork
--	 * that marked all the anonymous memory readonly for cow.
--	 *
--	 * Modifying pi_state _before_ the user space value would leave the
--	 * pi_state in an inconsistent state when we fault here, because we
--	 * need to drop the locks to handle the fault. This might be observed
--	 * in the PID checks when attaching to PI state .
--	 */
--retry:
--	if (!argowner) {
--		if (oldowner != current) {
--			/*
--			 * We raced against a concurrent self; things are
--			 * already fixed up. Nothing to do.
--			 */
--			return 0;
--		}
--
--		if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
--			/* We got the lock. pi_state is correct. Tell caller. */
--			return 1;
--		}
--
--		/*
--		 * The trylock just failed, so either there is an owner or
--		 * there is a higher priority waiter than this one.
--		 */
--		newowner = rt_mutex_owner(&pi_state->pi_mutex);
--		/*
--		 * If the higher priority waiter has not yet taken over the
--		 * rtmutex then newowner is NULL. We can't return here with
--		 * that state because it's inconsistent vs. the user space
--		 * state. So drop the locks and try again. It's a valid
--		 * situation and not any different from the other retry
--		 * conditions.
--		 */
--		if (unlikely(!newowner)) {
--			err = -EAGAIN;
--			goto handle_err;
--		}
--	} else {
--		WARN_ON_ONCE(argowner != current);
--		if (oldowner == current) {
--			/*
--			 * We raced against a concurrent self; things are
--			 * already fixed up. Nothing to do.
--			 */
--			return 1;
--		}
--		newowner = argowner;
--	}
--
--	newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
--	/* Owner died? */
--	if (!pi_state->owner)
--		newtid |= FUTEX_OWNER_DIED;
--
--	err = get_futex_value_locked(&uval, uaddr);
--	if (err)
--		goto handle_err;
--
--	for (;;) {
--		newval = (uval & FUTEX_OWNER_DIED) | newtid;
--
--		err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
--		if (err)
--			goto handle_err;
--
--		if (curval == uval)
--			break;
--		uval = curval;
--	}
--
--	/*
--	 * We fixed up user space. Now we need to fix the pi_state
--	 * itself.
--	 */
--	pi_state_update_owner(pi_state, newowner);
--
--	return argowner == current;
--
--	/*
--	 * In order to reschedule or handle a page fault, we need to drop the
--	 * locks here. In the case of a fault, this gives the other task
--	 * (either the highest priority waiter itself or the task which stole
--	 * the rtmutex) the chance to try the fixup of the pi_state. So once we
--	 * are back from handling the fault we need to check the pi_state after
--	 * reacquiring the locks and before trying to do another fixup. When
--	 * the fixup has been done already we simply return.
--	 *
--	 * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
--	 * drop hb->lock since the caller owns the hb -> futex_q relation.
--	 * Dropping the pi_mutex->wait_lock requires the state revalidate.
--	 */
--handle_err:
--	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
--	spin_unlock(q->lock_ptr);
--
--	switch (err) {
--	case -EFAULT:
--		err = fault_in_user_writeable(uaddr);
--		break;
--
--	case -EAGAIN:
--		cond_resched();
--		err = 0;
--		break;
--
--	default:
--		WARN_ON_ONCE(1);
--		break;
--	}
--
--	spin_lock(q->lock_ptr);
--	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
--
--	/*
--	 * Check if someone else fixed it for us:
--	 */
--	if (pi_state->owner != oldowner)
--		return argowner == current;
--
--	/* Retry if err was -EAGAIN or the fault in succeeded */
--	if (!err)
--		goto retry;
--
--	/*
--	 * fault_in_user_writeable() failed so user state is immutable. At
--	 * best we can make the kernel state consistent but user state will
--	 * be most likely hosed and any subsequent unlock operation will be
--	 * rejected due to PI futex rule [10].
--	 *
--	 * Ensure that the rtmutex owner is also the pi_state owner despite
--	 * the user space value claiming something different. There is no
--	 * point in unlocking the rtmutex if current is the owner as it
--	 * would need to wait until the next waiter has taken the rtmutex
--	 * to guarantee consistent state. Keep it simple. Userspace asked
--	 * for this wreckaged state.
--	 *
--	 * The rtmutex has an owner - either current or some other
--	 * task. See the EAGAIN loop above.
--	 */
--	pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
--
--	return err;
--}
--
--static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
--				struct task_struct *argowner)
--{
--	struct futex_pi_state *pi_state = q->pi_state;
--	int ret;
--
--	lockdep_assert_held(q->lock_ptr);
--
--	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
--	ret = __fixup_pi_state_owner(uaddr, q, argowner);
--	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
--	return ret;
--}
--
--static long futex_wait_restart(struct restart_block *restart);
--
--/**
-- * fixup_owner() - Post lock pi_state and corner case management
-- * @uaddr:	user address of the futex
-- * @q:		futex_q (contains pi_state and access to the rt_mutex)
-- * @locked:	if the attempt to take the rt_mutex succeeded (1) or not (0)
-- *
-- * After attempting to lock an rt_mutex, this function is called to cleanup
-- * the pi_state owner as well as handle race conditions that may allow us to
-- * acquire the lock. Must be called with the hb lock held.
-- *
-- * Return:
-- *  -  1 - success, lock taken;
-- *  -  0 - success, lock not taken;
-- *  - <0 - on error (-EFAULT)
-- */
--static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
--{
--	if (locked) {
--		/*
--		 * Got the lock. We might not be the anticipated owner if we
--		 * did a lock-steal - fix up the PI-state in that case:
--		 *
--		 * Speculative pi_state->owner read (we don't hold wait_lock);
--		 * since we own the lock pi_state->owner == current is the
--		 * stable state, anything else needs more attention.
--		 */
--		if (q->pi_state->owner != current)
--			return fixup_pi_state_owner(uaddr, q, current);
--		return 1;
--	}
--
--	/*
--	 * If we didn't get the lock; check if anybody stole it from us. In
--	 * that case, we need to fix up the uval to point to them instead of
--	 * us, otherwise bad things happen. [10]
--	 *
--	 * Another speculative read; pi_state->owner == current is unstable
--	 * but needs our attention.
--	 */
--	if (q->pi_state->owner == current)
--		return fixup_pi_state_owner(uaddr, q, NULL);
--
--	/*
--	 * Paranoia check. If we did not take the lock, then we should not be
--	 * the owner of the rt_mutex. Warn and establish consistent state.
--	 */
--	if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
--		return fixup_pi_state_owner(uaddr, q, current);
--
--	return 0;
--}
--
--/**
-- * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
-- * @hb:		the futex hash bucket, must be locked by the caller
-- * @q:		the futex_q to queue up on
-- * @timeout:	the prepared hrtimer_sleeper, or null for no timeout
-- */
--static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
--				struct hrtimer_sleeper *timeout)
--{
--	/*
--	 * The task state is guaranteed to be set before another task can
--	 * wake it. set_current_state() is implemented using smp_store_mb() and
--	 * queue_me() calls spin_unlock() upon completion, both serializing
--	 * access to the hash list and forcing another memory barrier.
--	 */
--	set_current_state(TASK_INTERRUPTIBLE);
--	queue_me(q, hb);
--
--	/* Arm the timer */
--	if (timeout)
--		hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
--
--	/*
--	 * If we have been removed from the hash list, then another task
--	 * has tried to wake us, and we can skip the call to schedule().
--	 */
--	if (likely(!plist_node_empty(&q->list))) {
--		/*
--		 * If the timer has already expired, current will already be
--		 * flagged for rescheduling. Only call schedule if there
--		 * is no timeout, or if it has yet to expire.
--		 */
--		if (!timeout || timeout->task)
--			freezable_schedule();
--	}
--	__set_current_state(TASK_RUNNING);
--}
--
--/**
-- * futex_wait_setup() - Prepare to wait on a futex
-- * @uaddr:	the futex userspace address
-- * @val:	the expected value
-- * @flags:	futex flags (FLAGS_SHARED, etc.)
-- * @q:		the associated futex_q
-- * @hb:		storage for hash_bucket pointer to be returned to caller
-- *
-- * Setup the futex_q and locate the hash_bucket.  Get the futex value and
-- * compare it with the expected value.  Handle atomic faults internally.
-- * Return with the hb lock held on success, and unlocked on failure.
-- *
-- * Return:
-- *  -  0 - uaddr contains val and hb has been locked;
-- *  - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
-- */
--static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
--			   struct futex_q *q, struct futex_hash_bucket **hb)
--{
--	u32 uval;
--	int ret;
--
--	/*
--	 * Access the page AFTER the hash-bucket is locked.
--	 * Order is important:
--	 *
--	 *   Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
--	 *   Userspace waker:  if (cond(var)) { var = new; futex_wake(&var); }
--	 *
--	 * The basic logical guarantee of a futex is that it blocks ONLY
--	 * if cond(var) is known to be true at the time of blocking, for
--	 * any cond.  If we locked the hash-bucket after testing *uaddr, that
--	 * would open a race condition where we could block indefinitely with
--	 * cond(var) false, which would violate the guarantee.
--	 *
--	 * On the other hand, we insert q and release the hash-bucket only
--	 * after testing *uaddr.  This guarantees that futex_wait() will NOT
--	 * absorb a wakeup if *uaddr does not match the desired values
--	 * while the syscall executes.
--	 */
--retry:
--	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
--	if (unlikely(ret != 0))
--		return ret;
--
--retry_private:
--	*hb = queue_lock(q);
--
--	ret = get_futex_value_locked(&uval, uaddr);
--
--	if (ret) {
--		queue_unlock(*hb);
--
--		ret = get_user(uval, uaddr);
--		if (ret)
--			return ret;
--
--		if (!(flags & FLAGS_SHARED))
--			goto retry_private;
--
--		goto retry;
--	}
--
--	if (uval != val) {
--		queue_unlock(*hb);
--		ret = -EWOULDBLOCK;
--	}
--
--	return ret;
--}
--
--static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
--		      ktime_t *abs_time, u32 bitset)
--{
--	struct hrtimer_sleeper timeout, *to;
--	struct restart_block *restart;
--	struct futex_hash_bucket *hb;
--	struct futex_q q = futex_q_init;
--	int ret;
--
--	if (!bitset)
--		return -EINVAL;
--	q.bitset = bitset;
--
--	to = futex_setup_timer(abs_time, &timeout, flags,
--			       current->timer_slack_ns);
--retry:
--	/*
--	 * Prepare to wait on uaddr. On success, it holds hb->lock and q
--	 * is initialized.
--	 */
--	ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
--	if (ret)
--		goto out;
--
--	/* queue_me and wait for wakeup, timeout, or a signal. */
--	futex_wait_queue_me(hb, &q, to);
--
--	/* If we were woken (and unqueued), we succeeded, whatever. */
--	ret = 0;
--	if (!unqueue_me(&q))
--		goto out;
--	ret = -ETIMEDOUT;
--	if (to && !to->task)
--		goto out;
--
--	/*
--	 * We expect signal_pending(current), but we might be the
--	 * victim of a spurious wakeup as well.
--	 */
--	if (!signal_pending(current))
--		goto retry;
--
--	ret = -ERESTARTSYS;
--	if (!abs_time)
--		goto out;
--
--	restart = &current->restart_block;
--	restart->futex.uaddr = uaddr;
--	restart->futex.val = val;
--	restart->futex.time = *abs_time;
--	restart->futex.bitset = bitset;
--	restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
--
--	ret = set_restart_fn(restart, futex_wait_restart);
--
--out:
--	if (to) {
--		hrtimer_cancel(&to->timer);
--		destroy_hrtimer_on_stack(&to->timer);
--	}
--	return ret;
--}
--
--
--static long futex_wait_restart(struct restart_block *restart)
--{
--	u32 __user *uaddr = restart->futex.uaddr;
--	ktime_t t, *tp = NULL;
--
--	if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
--		t = restart->futex.time;
--		tp = &t;
--	}
--	restart->fn = do_no_restart_syscall;
--
--	return (long)futex_wait(uaddr, restart->futex.flags,
--				restart->futex.val, tp, restart->futex.bitset);
--}
--
--
--/*
-- * Userspace tried a 0 -> TID atomic transition of the futex value
-- * and failed. The kernel side here does the whole locking operation:
-- * if there are waiters then it will block as a consequence of relying
-- * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
-- * a 0 value of the futex too.).
-- *
-- * Also serves as futex trylock_pi()'ing, and due semantics.
-- */
--static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
--			 ktime_t *time, int trylock)
--{
--	struct hrtimer_sleeper timeout, *to;
--	struct task_struct *exiting = NULL;
--	struct rt_mutex_waiter rt_waiter;
--	struct futex_hash_bucket *hb;
--	struct futex_q q = futex_q_init;
--	int res, ret;
--
--	if (!IS_ENABLED(CONFIG_FUTEX_PI))
--		return -ENOSYS;
--
--	if (refill_pi_state_cache())
--		return -ENOMEM;
--
--	to = futex_setup_timer(time, &timeout, flags, 0);
--
--retry:
--	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
--	if (unlikely(ret != 0))
--		goto out;
--
--retry_private:
--	hb = queue_lock(&q);
--
--	ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
--				   &exiting, 0);
--	if (unlikely(ret)) {
--		/*
--		 * Atomic work succeeded and we got the lock,
--		 * or failed. Either way, we do _not_ block.
--		 */
--		switch (ret) {
--		case 1:
--			/* We got the lock. */
--			ret = 0;
--			goto out_unlock_put_key;
--		case -EFAULT:
--			goto uaddr_faulted;
--		case -EBUSY:
--		case -EAGAIN:
--			/*
--			 * Two reasons for this:
--			 * - EBUSY: Task is exiting and we just wait for the
--			 *   exit to complete.
--			 * - EAGAIN: The user space value changed.
--			 */
--			queue_unlock(hb);
--			/*
--			 * Handle the case where the owner is in the middle of
--			 * exiting. Wait for the exit to complete otherwise
--			 * this task might loop forever, aka. live lock.
--			 */
--			wait_for_owner_exiting(ret, exiting);
--			cond_resched();
--			goto retry;
--		default:
--			goto out_unlock_put_key;
--		}
--	}
--
--	WARN_ON(!q.pi_state);
--
--	/*
--	 * Only actually queue now that the atomic ops are done:
--	 */
--	__queue_me(&q, hb);
--
--	if (trylock) {
--		ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
--		/* Fixup the trylock return value: */
--		ret = ret ? 0 : -EWOULDBLOCK;
--		goto no_block;
--	}
--
--	rt_mutex_init_waiter(&rt_waiter);
--
--	/*
--	 * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
--	 * hold it while doing rt_mutex_start_proxy(), because then it will
--	 * include hb->lock in the blocking chain, even through we'll not in
--	 * fact hold it while blocking. This will lead it to report -EDEADLK
--	 * and BUG when futex_unlock_pi() interleaves with this.
--	 *
--	 * Therefore acquire wait_lock while holding hb->lock, but drop the
--	 * latter before calling __rt_mutex_start_proxy_lock(). This
--	 * interleaves with futex_unlock_pi() -- which does a similar lock
--	 * handoff -- such that the latter can observe the futex_q::pi_state
--	 * before __rt_mutex_start_proxy_lock() is done.
--	 */
--	raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
--	spin_unlock(q.lock_ptr);
--	/*
--	 * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
--	 * such that futex_unlock_pi() is guaranteed to observe the waiter when
--	 * it sees the futex_q::pi_state.
--	 */
--	ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
--	raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
--
--	if (ret) {
--		if (ret == 1)
--			ret = 0;
--		goto cleanup;
--	}
--
--	if (unlikely(to))
--		hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
--
--	ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
--
--cleanup:
--	spin_lock(q.lock_ptr);
--	/*
--	 * If we failed to acquire the lock (deadlock/signal/timeout), we must
--	 * first acquire the hb->lock before removing the lock from the
--	 * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
--	 * lists consistent.
--	 *
--	 * In particular; it is important that futex_unlock_pi() can not
--	 * observe this inconsistency.
--	 */
--	if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
--		ret = 0;
--
--no_block:
--	/*
--	 * Fixup the pi_state owner and possibly acquire the lock if we
--	 * haven't already.
--	 */
--	res = fixup_owner(uaddr, &q, !ret);
--	/*
--	 * If fixup_owner() returned an error, propagate that.  If it acquired
--	 * the lock, clear our -ETIMEDOUT or -EINTR.
--	 */
--	if (res)
--		ret = (res < 0) ? res : 0;
--
--	unqueue_me_pi(&q);
--	spin_unlock(q.lock_ptr);
--	goto out;
--
--out_unlock_put_key:
--	queue_unlock(hb);
--
--out:
--	if (to) {
--		hrtimer_cancel(&to->timer);
--		destroy_hrtimer_on_stack(&to->timer);
--	}
--	return ret != -EINTR ? ret : -ERESTARTNOINTR;
--
--uaddr_faulted:
--	queue_unlock(hb);
--
--	ret = fault_in_user_writeable(uaddr);
--	if (ret)
--		goto out;
--
--	if (!(flags & FLAGS_SHARED))
--		goto retry_private;
--
--	goto retry;
--}
--
--/*
-- * Userspace attempted a TID -> 0 atomic transition, and failed.
-- * This is the in-kernel slowpath: we look up the PI state (if any),
-- * and do the rt-mutex unlock.
-- */
--static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
--{
--	u32 curval, uval, vpid = task_pid_vnr(current);
--	union futex_key key = FUTEX_KEY_INIT;
--	struct futex_hash_bucket *hb;
--	struct futex_q *top_waiter;
--	int ret;
--
--	if (!IS_ENABLED(CONFIG_FUTEX_PI))
--		return -ENOSYS;
--
--retry:
--	if (get_user(uval, uaddr))
--		return -EFAULT;
--	/*
--	 * We release only a lock we actually own:
--	 */
--	if ((uval & FUTEX_TID_MASK) != vpid)
--		return -EPERM;
--
--	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
--	if (ret)
--		return ret;
--
--	hb = hash_futex(&key);
--	spin_lock(&hb->lock);
--
--	/*
--	 * Check waiters first. We do not trust user space values at
--	 * all and we at least want to know if user space fiddled
--	 * with the futex value instead of blindly unlocking.
--	 */
--	top_waiter = futex_top_waiter(hb, &key);
--	if (top_waiter) {
--		struct futex_pi_state *pi_state = top_waiter->pi_state;
--
--		ret = -EINVAL;
--		if (!pi_state)
--			goto out_unlock;
--
--		/*
--		 * If current does not own the pi_state then the futex is
--		 * inconsistent and user space fiddled with the futex value.
--		 */
--		if (pi_state->owner != current)
--			goto out_unlock;
--
--		get_pi_state(pi_state);
--		/*
--		 * By taking wait_lock while still holding hb->lock, we ensure
--		 * there is no point where we hold neither; and therefore
--		 * wake_futex_pi() must observe a state consistent with what we
--		 * observed.
--		 *
--		 * In particular; this forces __rt_mutex_start_proxy() to
--		 * complete such that we're guaranteed to observe the
--		 * rt_waiter. Also see the WARN in wake_futex_pi().
--		 */
--		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
--		spin_unlock(&hb->lock);
--
--		/* drops pi_state->pi_mutex.wait_lock */
--		ret = wake_futex_pi(uaddr, uval, pi_state);
--
--		put_pi_state(pi_state);
--
--		/*
--		 * Success, we're done! No tricky corner cases.
--		 */
--		if (!ret)
--			return ret;
--		/*
--		 * The atomic access to the futex value generated a
--		 * pagefault, so retry the user-access and the wakeup:
--		 */
--		if (ret == -EFAULT)
--			goto pi_faulted;
--		/*
--		 * A unconditional UNLOCK_PI op raced against a waiter
--		 * setting the FUTEX_WAITERS bit. Try again.
--		 */
--		if (ret == -EAGAIN)
--			goto pi_retry;
--		/*
--		 * wake_futex_pi has detected invalid state. Tell user
--		 * space.
--		 */
--		return ret;
--	}
--
--	/*
--	 * We have no kernel internal state, i.e. no waiters in the
--	 * kernel. Waiters which are about to queue themselves are stuck
--	 * on hb->lock. So we can safely ignore them. We do neither
--	 * preserve the WAITERS bit not the OWNER_DIED one. We are the
--	 * owner.
--	 */
--	if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
--		spin_unlock(&hb->lock);
--		switch (ret) {
--		case -EFAULT:
--			goto pi_faulted;
--
--		case -EAGAIN:
--			goto pi_retry;
--
--		default:
--			WARN_ON_ONCE(1);
--			return ret;
--		}
--	}
--
--	/*
--	 * If uval has changed, let user space handle it.
--	 */
--	ret = (curval == uval) ? 0 : -EAGAIN;
--
--out_unlock:
--	spin_unlock(&hb->lock);
--	return ret;
--
--pi_retry:
--	cond_resched();
--	goto retry;
--
--pi_faulted:
--
--	ret = fault_in_user_writeable(uaddr);
--	if (!ret)
--		goto retry;
--
--	return ret;
--}
--
--/**
-- * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex
-- * @hb:		the hash_bucket futex_q was original enqueued on
-- * @q:		the futex_q woken while waiting to be requeued
-- * @timeout:	the timeout associated with the wait (NULL if none)
-- *
-- * Determine the cause for the early wakeup.
-- *
-- * Return:
-- *  -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR
-- */
--static inline
--int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
--				   struct futex_q *q,
--				   struct hrtimer_sleeper *timeout)
--{
--	int ret;
--
--	/*
--	 * With the hb lock held, we avoid races while we process the wakeup.
--	 * We only need to hold hb (and not hb2) to ensure atomicity as the
--	 * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
--	 * It can't be requeued from uaddr2 to something else since we don't
--	 * support a PI aware source futex for requeue.
--	 */
--	WARN_ON_ONCE(&hb->lock != q->lock_ptr);
--
--	/*
--	 * We were woken prior to requeue by a timeout or a signal.
--	 * Unqueue the futex_q and determine which it was.
--	 */
--	plist_del(&q->list, &hb->chain);
--	hb_waiters_dec(hb);
--
--	/* Handle spurious wakeups gracefully */
--	ret = -EWOULDBLOCK;
--	if (timeout && !timeout->task)
--		ret = -ETIMEDOUT;
--	else if (signal_pending(current))
--		ret = -ERESTARTNOINTR;
--	return ret;
--}
--
--/**
-- * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
-- * @uaddr:	the futex we initially wait on (non-pi)
-- * @flags:	futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
-- *		the same type, no requeueing from private to shared, etc.
-- * @val:	the expected value of uaddr
-- * @abs_time:	absolute timeout
-- * @bitset:	32 bit wakeup bitset set by userspace, defaults to all
-- * @uaddr2:	the pi futex we will take prior to returning to user-space
-- *
-- * The caller will wait on uaddr and will be requeued by futex_requeue() to
-- * uaddr2 which must be PI aware and unique from uaddr.  Normal wakeup will wake
-- * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
-- * userspace.  This ensures the rt_mutex maintains an owner when it has waiters;
-- * without one, the pi logic would not know which task to boost/deboost, if
-- * there was a need to.
-- *
-- * We call schedule in futex_wait_queue_me() when we enqueue and return there
-- * via the following--
-- * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
-- * 2) wakeup on uaddr2 after a requeue
-- * 3) signal
-- * 4) timeout
-- *
-- * If 3, cleanup and return -ERESTARTNOINTR.
-- *
-- * If 2, we may then block on trying to take the rt_mutex and return via:
-- * 5) successful lock
-- * 6) signal
-- * 7) timeout
-- * 8) other lock acquisition failure
-- *
-- * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
-- *
-- * If 4 or 7, we cleanup and return with -ETIMEDOUT.
-- *
-- * Return:
-- *  -  0 - On success;
-- *  - <0 - On error
-- */
--static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
--				 u32 val, ktime_t *abs_time, u32 bitset,
--				 u32 __user *uaddr2)
--{
--	struct hrtimer_sleeper timeout, *to;
--	struct rt_mutex_waiter rt_waiter;
--	struct futex_hash_bucket *hb;
--	union futex_key key2 = FUTEX_KEY_INIT;
--	struct futex_q q = futex_q_init;
--	struct rt_mutex_base *pi_mutex;
--	int res, ret;
--
--	if (!IS_ENABLED(CONFIG_FUTEX_PI))
--		return -ENOSYS;
--
--	if (uaddr == uaddr2)
--		return -EINVAL;
--
--	if (!bitset)
--		return -EINVAL;
--
--	to = futex_setup_timer(abs_time, &timeout, flags,
--			       current->timer_slack_ns);
--
--	/*
--	 * The waiter is allocated on our stack, manipulated by the requeue
--	 * code while we sleep on uaddr.
--	 */
--	rt_mutex_init_waiter(&rt_waiter);
--
--	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
--	if (unlikely(ret != 0))
--		goto out;
--
--	q.bitset = bitset;
--	q.rt_waiter = &rt_waiter;
--	q.requeue_pi_key = &key2;
--
--	/*
--	 * Prepare to wait on uaddr. On success, it holds hb->lock and q
--	 * is initialized.
--	 */
--	ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
--	if (ret)
--		goto out;
--
--	/*
--	 * The check above which compares uaddrs is not sufficient for
--	 * shared futexes. We need to compare the keys:
--	 */
--	if (match_futex(&q.key, &key2)) {
--		queue_unlock(hb);
--		ret = -EINVAL;
--		goto out;
--	}
--
--	/* Queue the futex_q, drop the hb lock, wait for wakeup. */
--	futex_wait_queue_me(hb, &q, to);
--
--	switch (futex_requeue_pi_wakeup_sync(&q)) {
--	case Q_REQUEUE_PI_IGNORE:
--		/* The waiter is still on uaddr1 */
--		spin_lock(&hb->lock);
--		ret = handle_early_requeue_pi_wakeup(hb, &q, to);
--		spin_unlock(&hb->lock);
--		break;
--
--	case Q_REQUEUE_PI_LOCKED:
--		/* The requeue acquired the lock */
--		if (q.pi_state && (q.pi_state->owner != current)) {
--			spin_lock(q.lock_ptr);
--			ret = fixup_owner(uaddr2, &q, true);
--			/*
--			 * Drop the reference to the pi state which the
--			 * requeue_pi() code acquired for us.
--			 */
--			put_pi_state(q.pi_state);
--			spin_unlock(q.lock_ptr);
--			/*
--			 * Adjust the return value. It's either -EFAULT or
--			 * success (1) but the caller expects 0 for success.
--			 */
--			ret = ret < 0 ? ret : 0;
--		}
--		break;
--
--	case Q_REQUEUE_PI_DONE:
--		/* Requeue completed. Current is 'pi_blocked_on' the rtmutex */
--		pi_mutex = &q.pi_state->pi_mutex;
--		ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
--
--		/* Current is not longer pi_blocked_on */
--		spin_lock(q.lock_ptr);
--		if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
--			ret = 0;
--
--		debug_rt_mutex_free_waiter(&rt_waiter);
--		/*
--		 * Fixup the pi_state owner and possibly acquire the lock if we
--		 * haven't already.
--		 */
--		res = fixup_owner(uaddr2, &q, !ret);
--		/*
--		 * If fixup_owner() returned an error, propagate that.  If it
--		 * acquired the lock, clear -ETIMEDOUT or -EINTR.
--		 */
--		if (res)
--			ret = (res < 0) ? res : 0;
--
--		unqueue_me_pi(&q);
--		spin_unlock(q.lock_ptr);
--
--		if (ret == -EINTR) {
--			/*
--			 * We've already been requeued, but cannot restart
--			 * by calling futex_lock_pi() directly. We could
--			 * restart this syscall, but it would detect that
--			 * the user space "val" changed and return
--			 * -EWOULDBLOCK.  Save the overhead of the restart
--			 * and return -EWOULDBLOCK directly.
--			 */
--			ret = -EWOULDBLOCK;
--		}
--		break;
--	default:
--		BUG();
--	}
--
--out:
--	if (to) {
--		hrtimer_cancel(&to->timer);
--		destroy_hrtimer_on_stack(&to->timer);
--	}
--	return ret;
--}
--
--/*
-- * Support for robust futexes: the kernel cleans up held futexes at
-- * thread exit time.
-- *
-- * Implementation: user-space maintains a per-thread list of locks it
-- * is holding. Upon do_exit(), the kernel carefully walks this list,
-- * and marks all locks that are owned by this thread with the
-- * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
-- * always manipulated with the lock held, so the list is private and
-- * per-thread. Userspace also maintains a per-thread 'list_op_pending'
-- * field, to allow the kernel to clean up if the thread dies after
-- * acquiring the lock, but just before it could have added itself to
-- * the list. There can only be one such pending lock.
-- */
--
--/**
-- * sys_set_robust_list() - Set the robust-futex list head of a task
-- * @head:	pointer to the list-head
-- * @len:	length of the list-head, as userspace expects
-- */
--SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
--		size_t, len)
--{
--	if (!futex_cmpxchg_enabled)
--		return -ENOSYS;
--	/*
--	 * The kernel knows only one size for now:
--	 */
--	if (unlikely(len != sizeof(*head)))
--		return -EINVAL;
--
--	current->robust_list = head;
--
--	return 0;
--}
--
--/**
-- * sys_get_robust_list() - Get the robust-futex list head of a task
-- * @pid:	pid of the process [zero for current task]
-- * @head_ptr:	pointer to a list-head pointer, the kernel fills it in
-- * @len_ptr:	pointer to a length field, the kernel fills in the header size
-- */
--SYSCALL_DEFINE3(get_robust_list, int, pid,
--		struct robust_list_head __user * __user *, head_ptr,
--		size_t __user *, len_ptr)
--{
--	struct robust_list_head __user *head;
--	unsigned long ret;
--	struct task_struct *p;
--
--	if (!futex_cmpxchg_enabled)
--		return -ENOSYS;
--
--	rcu_read_lock();
--
--	ret = -ESRCH;
--	if (!pid)
--		p = current;
--	else {
--		p = find_task_by_vpid(pid);
--		if (!p)
--			goto err_unlock;
--	}
--
--	ret = -EPERM;
--	if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
--		goto err_unlock;
--
--	head = p->robust_list;
--	rcu_read_unlock();
--
--	if (put_user(sizeof(*head), len_ptr))
--		return -EFAULT;
--	return put_user(head, head_ptr);
--
--err_unlock:
--	rcu_read_unlock();
--
--	return ret;
--}
--
--/* Constants for the pending_op argument of handle_futex_death */
--#define HANDLE_DEATH_PENDING	true
--#define HANDLE_DEATH_LIST	false
--
--/*
-- * Process a futex-list entry, check whether it's owned by the
-- * dying task, and do notification if so:
-- */
--static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
--			      bool pi, bool pending_op)
--{
--	u32 uval, nval, mval;
--	int err;
--
--	/* Futex address must be 32bit aligned */
--	if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
--		return -1;
--
--retry:
--	if (get_user(uval, uaddr))
--		return -1;
--
--	/*
--	 * Special case for regular (non PI) futexes. The unlock path in
--	 * user space has two race scenarios:
--	 *
--	 * 1. The unlock path releases the user space futex value and
--	 *    before it can execute the futex() syscall to wake up
--	 *    waiters it is killed.
--	 *
--	 * 2. A woken up waiter is killed before it can acquire the
--	 *    futex in user space.
--	 *
--	 * In both cases the TID validation below prevents a wakeup of
--	 * potential waiters which can cause these waiters to block
--	 * forever.
--	 *
--	 * In both cases the following conditions are met:
--	 *
--	 *	1) task->robust_list->list_op_pending != NULL
--	 *	   @pending_op == true
--	 *	2) User space futex value == 0
--	 *	3) Regular futex: @pi == false
--	 *
--	 * If these conditions are met, it is safe to attempt waking up a
--	 * potential waiter without touching the user space futex value and
--	 * trying to set the OWNER_DIED bit. The user space futex value is
--	 * uncontended and the rest of the user space mutex state is
--	 * consistent, so a woken waiter will just take over the
--	 * uncontended futex. Setting the OWNER_DIED bit would create
--	 * inconsistent state and malfunction of the user space owner died
--	 * handling.
--	 */
--	if (pending_op && !pi && !uval) {
--		futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
--		return 0;
--	}
--
--	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
--		return 0;
--
--	/*
--	 * Ok, this dying thread is truly holding a futex
--	 * of interest. Set the OWNER_DIED bit atomically
--	 * via cmpxchg, and if the value had FUTEX_WAITERS
--	 * set, wake up a waiter (if any). (We have to do a
--	 * futex_wake() even if OWNER_DIED is already set -
--	 * to handle the rare but possible case of recursive
--	 * thread-death.) The rest of the cleanup is done in
--	 * userspace.
--	 */
--	mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
--
--	/*
--	 * We are not holding a lock here, but we want to have
--	 * the pagefault_disable/enable() protection because
--	 * we want to handle the fault gracefully. If the
--	 * access fails we try to fault in the futex with R/W
--	 * verification via get_user_pages. get_user() above
--	 * does not guarantee R/W access. If that fails we
--	 * give up and leave the futex locked.
--	 */
--	if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) {
--		switch (err) {
--		case -EFAULT:
--			if (fault_in_user_writeable(uaddr))
--				return -1;
--			goto retry;
--
--		case -EAGAIN:
--			cond_resched();
--			goto retry;
--
--		default:
--			WARN_ON_ONCE(1);
--			return err;
--		}
--	}
--
--	if (nval != uval)
--		goto retry;
--
--	/*
--	 * Wake robust non-PI futexes here. The wakeup of
--	 * PI futexes happens in exit_pi_state():
--	 */
--	if (!pi && (uval & FUTEX_WAITERS))
--		futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
--
--	return 0;
--}
--
--/*
-- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
-- */
--static inline int fetch_robust_entry(struct robust_list __user **entry,
--				     struct robust_list __user * __user *head,
--				     unsigned int *pi)
--{
--	unsigned long uentry;
--
--	if (get_user(uentry, (unsigned long __user *)head))
--		return -EFAULT;
--
--	*entry = (void __user *)(uentry & ~1UL);
--	*pi = uentry & 1;
--
--	return 0;
--}
--
--/*
-- * Walk curr->robust_list (very carefully, it's a userspace list!)
-- * and mark any locks found there dead, and notify any waiters.
-- *
-- * We silently return on any sign of list-walking problem.
-- */
--static void exit_robust_list(struct task_struct *curr)
--{
--	struct robust_list_head __user *head = curr->robust_list;
--	struct robust_list __user *entry, *next_entry, *pending;
--	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
--	unsigned int next_pi;
--	unsigned long futex_offset;
--	int rc;
--
--	if (!futex_cmpxchg_enabled)
--		return;
--
--	/*
--	 * Fetch the list head (which was registered earlier, via
--	 * sys_set_robust_list()):
--	 */
--	if (fetch_robust_entry(&entry, &head->list.next, &pi))
--		return;
--	/*
--	 * Fetch the relative futex offset:
--	 */
--	if (get_user(futex_offset, &head->futex_offset))
--		return;
--	/*
--	 * Fetch any possibly pending lock-add first, and handle it
--	 * if it exists:
--	 */
--	if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
--		return;
--
--	next_entry = NULL;	/* avoid warning with gcc */
--	while (entry != &head->list) {
--		/*
--		 * Fetch the next entry in the list before calling
--		 * handle_futex_death:
--		 */
--		rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
--		/*
--		 * A pending lock might already be on the list, so
--		 * don't process it twice:
--		 */
--		if (entry != pending) {
--			if (handle_futex_death((void __user *)entry + futex_offset,
--						curr, pi, HANDLE_DEATH_LIST))
--				return;
--		}
--		if (rc)
--			return;
--		entry = next_entry;
--		pi = next_pi;
--		/*
--		 * Avoid excessively long or circular lists:
--		 */
--		if (!--limit)
--			break;
--
--		cond_resched();
--	}
--
--	if (pending) {
--		handle_futex_death((void __user *)pending + futex_offset,
--				   curr, pip, HANDLE_DEATH_PENDING);
--	}
--}
--
--static void futex_cleanup(struct task_struct *tsk)
--{
--	if (unlikely(tsk->robust_list)) {
--		exit_robust_list(tsk);
--		tsk->robust_list = NULL;
--	}
--
--#ifdef CONFIG_COMPAT
--	if (unlikely(tsk->compat_robust_list)) {
--		compat_exit_robust_list(tsk);
--		tsk->compat_robust_list = NULL;
--	}
--#endif
--
--	if (unlikely(!list_empty(&tsk->pi_state_list)))
--		exit_pi_state_list(tsk);
--}
--
--/**
-- * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
-- * @tsk:	task to set the state on
-- *
-- * Set the futex exit state of the task lockless. The futex waiter code
-- * observes that state when a task is exiting and loops until the task has
-- * actually finished the futex cleanup. The worst case for this is that the
-- * waiter runs through the wait loop until the state becomes visible.
-- *
-- * This is called from the recursive fault handling path in do_exit().
-- *
-- * This is best effort. Either the futex exit code has run already or
-- * not. If the OWNER_DIED bit has been set on the futex then the waiter can
-- * take it over. If not, the problem is pushed back to user space. If the
-- * futex exit code did not run yet, then an already queued waiter might
-- * block forever, but there is nothing which can be done about that.
-- */
--void futex_exit_recursive(struct task_struct *tsk)
--{
--	/* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
--	if (tsk->futex_state == FUTEX_STATE_EXITING)
--		mutex_unlock(&tsk->futex_exit_mutex);
--	tsk->futex_state = FUTEX_STATE_DEAD;
--}
--
--static void futex_cleanup_begin(struct task_struct *tsk)
--{
--	/*
--	 * Prevent various race issues against a concurrent incoming waiter
--	 * including live locks by forcing the waiter to block on
--	 * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
--	 * attach_to_pi_owner().
--	 */
--	mutex_lock(&tsk->futex_exit_mutex);
--
--	/*
--	 * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
--	 *
--	 * This ensures that all subsequent checks of tsk->futex_state in
--	 * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
--	 * tsk->pi_lock held.
--	 *
--	 * It guarantees also that a pi_state which was queued right before
--	 * the state change under tsk->pi_lock by a concurrent waiter must
--	 * be observed in exit_pi_state_list().
--	 */
--	raw_spin_lock_irq(&tsk->pi_lock);
--	tsk->futex_state = FUTEX_STATE_EXITING;
--	raw_spin_unlock_irq(&tsk->pi_lock);
--}
--
--static void futex_cleanup_end(struct task_struct *tsk, int state)
--{
--	/*
--	 * Lockless store. The only side effect is that an observer might
--	 * take another loop until it becomes visible.
--	 */
--	tsk->futex_state = state;
--	/*
--	 * Drop the exit protection. This unblocks waiters which observed
--	 * FUTEX_STATE_EXITING to reevaluate the state.
--	 */
--	mutex_unlock(&tsk->futex_exit_mutex);
--}
--
--void futex_exec_release(struct task_struct *tsk)
--{
--	/*
--	 * The state handling is done for consistency, but in the case of
--	 * exec() there is no way to prevent further damage as the PID stays
--	 * the same. But for the unlikely and arguably buggy case that a
--	 * futex is held on exec(), this provides at least as much state
--	 * consistency protection which is possible.
--	 */
--	futex_cleanup_begin(tsk);
--	futex_cleanup(tsk);
--	/*
--	 * Reset the state to FUTEX_STATE_OK. The task is alive and about
--	 * exec a new binary.
--	 */
--	futex_cleanup_end(tsk, FUTEX_STATE_OK);
--}
--
--void futex_exit_release(struct task_struct *tsk)
--{
--	futex_cleanup_begin(tsk);
--	futex_cleanup(tsk);
--	futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
--}
--
--long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
--		u32 __user *uaddr2, u32 val2, u32 val3)
--{
--	int cmd = op & FUTEX_CMD_MASK;
--	unsigned int flags = 0;
--
--	if (!(op & FUTEX_PRIVATE_FLAG))
--		flags |= FLAGS_SHARED;
--
--	if (op & FUTEX_CLOCK_REALTIME) {
--		flags |= FLAGS_CLOCKRT;
--		if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
--		    cmd != FUTEX_LOCK_PI2)
--			return -ENOSYS;
--	}
--
--	switch (cmd) {
--	case FUTEX_LOCK_PI:
--	case FUTEX_LOCK_PI2:
--	case FUTEX_UNLOCK_PI:
--	case FUTEX_TRYLOCK_PI:
--	case FUTEX_WAIT_REQUEUE_PI:
--	case FUTEX_CMP_REQUEUE_PI:
--		if (!futex_cmpxchg_enabled)
--			return -ENOSYS;
--	}
--
--	switch (cmd) {
--	case FUTEX_WAIT:
--		val3 = FUTEX_BITSET_MATCH_ANY;
--		fallthrough;
--	case FUTEX_WAIT_BITSET:
--		return futex_wait(uaddr, flags, val, timeout, val3);
--	case FUTEX_WAKE:
--		val3 = FUTEX_BITSET_MATCH_ANY;
--		fallthrough;
--	case FUTEX_WAKE_BITSET:
--		return futex_wake(uaddr, flags, val, val3);
--	case FUTEX_REQUEUE:
--		return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
--	case FUTEX_CMP_REQUEUE:
--		return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
--	case FUTEX_WAKE_OP:
--		return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
--	case FUTEX_LOCK_PI:
--		flags |= FLAGS_CLOCKRT;
--		fallthrough;
--	case FUTEX_LOCK_PI2:
--		return futex_lock_pi(uaddr, flags, timeout, 0);
--	case FUTEX_UNLOCK_PI:
--		return futex_unlock_pi(uaddr, flags);
--	case FUTEX_TRYLOCK_PI:
--		return futex_lock_pi(uaddr, flags, NULL, 1);
--	case FUTEX_WAIT_REQUEUE_PI:
--		val3 = FUTEX_BITSET_MATCH_ANY;
--		return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
--					     uaddr2);
--	case FUTEX_CMP_REQUEUE_PI:
--		return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
--	}
--	return -ENOSYS;
--}
--
--static __always_inline bool futex_cmd_has_timeout(u32 cmd)
--{
--	switch (cmd) {
--	case FUTEX_WAIT:
--	case FUTEX_LOCK_PI:
--	case FUTEX_LOCK_PI2:
--	case FUTEX_WAIT_BITSET:
--	case FUTEX_WAIT_REQUEUE_PI:
--		return true;
--	}
--	return false;
--}
--
--static __always_inline int
--futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
--{
--	if (!timespec64_valid(ts))
--		return -EINVAL;
--
--	*t = timespec64_to_ktime(*ts);
--	if (cmd == FUTEX_WAIT)
--		*t = ktime_add_safe(ktime_get(), *t);
--	else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
--		*t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
--	return 0;
--}
--
--SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
--		const struct __kernel_timespec __user *, utime,
--		u32 __user *, uaddr2, u32, val3)
--{
--	int ret, cmd = op & FUTEX_CMD_MASK;
--	ktime_t t, *tp = NULL;
--	struct timespec64 ts;
--
--	if (utime && futex_cmd_has_timeout(cmd)) {
--		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
--			return -EFAULT;
--		if (get_timespec64(&ts, utime))
--			return -EFAULT;
--		ret = futex_init_timeout(cmd, op, &ts, &t);
--		if (ret)
--			return ret;
--		tp = &t;
--	}
--
--	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
--}
--
--#ifdef CONFIG_COMPAT
--/*
-- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
-- */
--static inline int
--compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
--		   compat_uptr_t __user *head, unsigned int *pi)
--{
--	if (get_user(*uentry, head))
--		return -EFAULT;
--
--	*entry = compat_ptr((*uentry) & ~1);
--	*pi = (unsigned int)(*uentry) & 1;
--
--	return 0;
--}
--
--static void __user *futex_uaddr(struct robust_list __user *entry,
--				compat_long_t futex_offset)
--{
--	compat_uptr_t base = ptr_to_compat(entry);
--	void __user *uaddr = compat_ptr(base + futex_offset);
--
--	return uaddr;
--}
--
--/*
-- * Walk curr->robust_list (very carefully, it's a userspace list!)
-- * and mark any locks found there dead, and notify any waiters.
-- *
-- * We silently return on any sign of list-walking problem.
-- */
--static void compat_exit_robust_list(struct task_struct *curr)
--{
--	struct compat_robust_list_head __user *head = curr->compat_robust_list;
--	struct robust_list __user *entry, *next_entry, *pending;
--	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
--	unsigned int next_pi;
--	compat_uptr_t uentry, next_uentry, upending;
--	compat_long_t futex_offset;
--	int rc;
--
--	if (!futex_cmpxchg_enabled)
--		return;
--
--	/*
--	 * Fetch the list head (which was registered earlier, via
--	 * sys_set_robust_list()):
--	 */
--	if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
--		return;
--	/*
--	 * Fetch the relative futex offset:
--	 */
--	if (get_user(futex_offset, &head->futex_offset))
--		return;
--	/*
--	 * Fetch any possibly pending lock-add first, and handle it
--	 * if it exists:
--	 */
--	if (compat_fetch_robust_entry(&upending, &pending,
--			       &head->list_op_pending, &pip))
--		return;
--
--	next_entry = NULL;	/* avoid warning with gcc */
--	while (entry != (struct robust_list __user *) &head->list) {
--		/*
--		 * Fetch the next entry in the list before calling
--		 * handle_futex_death:
--		 */
--		rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
--			(compat_uptr_t __user *)&entry->next, &next_pi);
--		/*
--		 * A pending lock might already be on the list, so
--		 * dont process it twice:
--		 */
--		if (entry != pending) {
--			void __user *uaddr = futex_uaddr(entry, futex_offset);
--
--			if (handle_futex_death(uaddr, curr, pi,
--					       HANDLE_DEATH_LIST))
--				return;
--		}
--		if (rc)
--			return;
--		uentry = next_uentry;
--		entry = next_entry;
--		pi = next_pi;
--		/*
--		 * Avoid excessively long or circular lists:
--		 */
--		if (!--limit)
--			break;
--
--		cond_resched();
--	}
--	if (pending) {
--		void __user *uaddr = futex_uaddr(pending, futex_offset);
--
--		handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
--	}
--}
--
--COMPAT_SYSCALL_DEFINE2(set_robust_list,
--		struct compat_robust_list_head __user *, head,
--		compat_size_t, len)
--{
--	if (!futex_cmpxchg_enabled)
--		return -ENOSYS;
--
--	if (unlikely(len != sizeof(*head)))
--		return -EINVAL;
--
--	current->compat_robust_list = head;
--
--	return 0;
--}
--
--COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
--			compat_uptr_t __user *, head_ptr,
--			compat_size_t __user *, len_ptr)
--{
--	struct compat_robust_list_head __user *head;
--	unsigned long ret;
--	struct task_struct *p;
--
--	if (!futex_cmpxchg_enabled)
--		return -ENOSYS;
--
--	rcu_read_lock();
--
--	ret = -ESRCH;
--	if (!pid)
--		p = current;
--	else {
--		p = find_task_by_vpid(pid);
--		if (!p)
--			goto err_unlock;
--	}
--
--	ret = -EPERM;
--	if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
--		goto err_unlock;
--
--	head = p->compat_robust_list;
--	rcu_read_unlock();
--
--	if (put_user(sizeof(*head), len_ptr))
--		return -EFAULT;
--	return put_user(ptr_to_compat(head), head_ptr);
--
--err_unlock:
--	rcu_read_unlock();
--
--	return ret;
--}
--#endif /* CONFIG_COMPAT */
--
--#ifdef CONFIG_COMPAT_32BIT_TIME
--SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
--		const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
--		u32, val3)
--{
--	int ret, cmd = op & FUTEX_CMD_MASK;
--	ktime_t t, *tp = NULL;
--	struct timespec64 ts;
--
--	if (utime && futex_cmd_has_timeout(cmd)) {
--		if (get_old_timespec32(&ts, utime))
--			return -EFAULT;
--		ret = futex_init_timeout(cmd, op, &ts, &t);
--		if (ret)
--			return ret;
--		tp = &t;
--	}
--
--	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
--}
--#endif /* CONFIG_COMPAT_32BIT_TIME */
--
--static void __init futex_detect_cmpxchg(void)
--{
--#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
--	u32 curval;
--
--	/*
--	 * This will fail and we want it. Some arch implementations do
--	 * runtime detection of the futex_atomic_cmpxchg_inatomic()
--	 * functionality. We want to know that before we call in any
--	 * of the complex code paths. Also we want to prevent
--	 * registration of robust lists in that case. NULL is
--	 * guaranteed to fault and we get -EFAULT on functional
--	 * implementation, the non-functional ones will return
--	 * -ENOSYS.
--	 */
--	if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
--		futex_cmpxchg_enabled = 1;
--#endif
--}
--
--static int __init futex_init(void)
--{
--	unsigned int futex_shift;
--	unsigned long i;
--
--#if CONFIG_BASE_SMALL
--	futex_hashsize = 16;
--#else
--	futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
--#endif
--
--	futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
--					       futex_hashsize, 0,
--					       futex_hashsize < 256 ? HASH_SMALL : 0,
--					       &futex_shift, NULL,
--					       futex_hashsize, futex_hashsize);
--	futex_hashsize = 1UL << futex_shift;
--
--	futex_detect_cmpxchg();
--
--	for (i = 0; i < futex_hashsize; i++) {
--		atomic_set(&futex_queues[i].waiters, 0);
--		plist_head_init(&futex_queues[i].chain);
--		spin_lock_init(&futex_queues[i].lock);
--	}
--
--	return 0;
--}
--core_initcall(futex_init);
-diff --git a/kernel/futex/Makefile b/kernel/futex/Makefile
-new file mode 100644
-index 000000000..b77188d1f
---- /dev/null
-+++ b/kernel/futex/Makefile
-@@ -0,0 +1,3 @@
-+# SPDX-License-Identifier: GPL-2.0
-+
-+obj-y += core.o syscalls.o pi.o requeue.o waitwake.o
-diff --git a/kernel/futex/core.c b/kernel/futex/core.c
-new file mode 100644
-index 000000000..25d8a88b3
---- /dev/null
-+++ b/kernel/futex/core.c
-@@ -0,0 +1,1176 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ *  Fast Userspace Mutexes (which I call "Futexes!").
-+ *  (C) Rusty Russell, IBM 2002
-+ *
-+ *  Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
-+ *  (C) Copyright 2003 Red Hat Inc, All Rights Reserved
-+ *
-+ *  Removed page pinning, fix privately mapped COW pages and other cleanups
-+ *  (C) Copyright 2003, 2004 Jamie Lokier
-+ *
-+ *  Robust futex support started by Ingo Molnar
-+ *  (C) Copyright 2006 Red Hat Inc, All Rights Reserved
-+ *  Thanks to Thomas Gleixner for suggestions, analysis and fixes.
-+ *
-+ *  PI-futex support started by Ingo Molnar and Thomas Gleixner
-+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
-+ *  Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
-+ *
-+ *  PRIVATE futexes by Eric Dumazet
-+ *  Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
-+ *
-+ *  Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
-+ *  Copyright (C) IBM Corporation, 2009
-+ *  Thanks to Thomas Gleixner for conceptual design and careful reviews.
-+ *
-+ *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
-+ *  enough at me, Linus for the original (flawed) idea, Matthew
-+ *  Kirkwood for proof-of-concept implementation.
-+ *
-+ *  "The futexes are also cursed."
-+ *  "But they come in a choice of three flavours!"
-+ */
-+#include <linux/compat.h>
-+#include <linux/jhash.h>
-+#include <linux/pagemap.h>
-+#include <linux/memblock.h>
-+#include <linux/fault-inject.h>
-+#include <linux/slab.h>
-+
-+#include "futex.h"
-+#include "../locking/rtmutex_common.h"
-+
-+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
-+int  __read_mostly futex_cmpxchg_enabled;
-+#endif
-+
-+
-+/*
-+ * The base of the bucket array and its size are always used together
-+ * (after initialization only in futex_hash()), so ensure that they
-+ * reside in the same cacheline.
-+ */
-+static struct {
-+	struct futex_hash_bucket *queues;
-+	unsigned long            hashsize;
-+} __futex_data __read_mostly __aligned(2*sizeof(long));
-+#define futex_queues   (__futex_data.queues)
-+#define futex_hashsize (__futex_data.hashsize)
-+
-+
-+/*
-+ * Fault injections for futexes.
-+ */
-+#ifdef CONFIG_FAIL_FUTEX
-+
-+static struct {
-+	struct fault_attr attr;
-+
-+	bool ignore_private;
-+} fail_futex = {
-+	.attr = FAULT_ATTR_INITIALIZER,
-+	.ignore_private = false,
-+};
-+
-+static int __init setup_fail_futex(char *str)
-+{
-+	return setup_fault_attr(&fail_futex.attr, str);
-+}
-+__setup("fail_futex=", setup_fail_futex);
-+
-+bool should_fail_futex(bool fshared)
-+{
-+	if (fail_futex.ignore_private && !fshared)
-+		return false;
-+
-+	return should_fail(&fail_futex.attr, 1);
-+}
-+
-+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-+
-+static int __init fail_futex_debugfs(void)
-+{
-+	umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
-+	struct dentry *dir;
-+
-+	dir = fault_create_debugfs_attr("fail_futex", NULL,
-+					&fail_futex.attr);
-+	if (IS_ERR(dir))
-+		return PTR_ERR(dir);
-+
-+	debugfs_create_bool("ignore-private", mode, dir,
-+			    &fail_futex.ignore_private);
-+	return 0;
-+}
-+
-+late_initcall(fail_futex_debugfs);
-+
-+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
-+
-+#endif /* CONFIG_FAIL_FUTEX */
-+
-+/**
-+ * futex_hash - Return the hash bucket in the global hash
-+ * @key:	Pointer to the futex key for which the hash is calculated
-+ *
-+ * We hash on the keys returned from get_futex_key (see below) and return the
-+ * corresponding hash bucket in the global hash.
-+ */
-+struct futex_hash_bucket *futex_hash(union futex_key *key)
-+{
-+	u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
-+			  key->both.offset);
-+
-+	return &futex_queues[hash & (futex_hashsize - 1)];
-+}
-+
-+
-+/**
-+ * futex_setup_timer - set up the sleeping hrtimer.
-+ * @time:	ptr to the given timeout value
-+ * @timeout:	the hrtimer_sleeper structure to be set up
-+ * @flags:	futex flags
-+ * @range_ns:	optional range in ns
-+ *
-+ * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
-+ *	   value given
-+ */
-+struct hrtimer_sleeper *
-+futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
-+		  int flags, u64 range_ns)
-+{
-+	if (!time)
-+		return NULL;
-+
-+	hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
-+				      CLOCK_REALTIME : CLOCK_MONOTONIC,
-+				      HRTIMER_MODE_ABS);
-+	/*
-+	 * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
-+	 * effectively the same as calling hrtimer_set_expires().
-+	 */
-+	hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
-+
-+	return timeout;
-+}
-+
-+/*
-+ * Generate a machine wide unique identifier for this inode.
-+ *
-+ * This relies on u64 not wrapping in the life-time of the machine; which with
-+ * 1ns resolution means almost 585 years.
-+ *
-+ * This further relies on the fact that a well formed program will not unmap
-+ * the file while it has a (shared) futex waiting on it. This mapping will have
-+ * a file reference which pins the mount and inode.
-+ *
-+ * If for some reason an inode gets evicted and read back in again, it will get
-+ * a new sequence number and will _NOT_ match, even though it is the exact same
-+ * file.
-+ *
-+ * It is important that futex_match() will never have a false-positive, esp.
-+ * for PI futexes that can mess up the state. The above argues that false-negatives
-+ * are only possible for malformed programs.
-+ */
-+static u64 get_inode_sequence_number(struct inode *inode)
-+{
-+	static atomic64_t i_seq;
-+	u64 old;
-+
-+	/* Does the inode already have a sequence number? */
-+	old = atomic64_read(&inode->i_sequence);
-+	if (likely(old))
-+		return old;
-+
-+	for (;;) {
-+		u64 new = atomic64_add_return(1, &i_seq);
-+		if (WARN_ON_ONCE(!new))
-+			continue;
-+
-+		old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
-+		if (old)
-+			return old;
-+		return new;
-+	}
-+}
-+
-+/**
-+ * get_futex_key() - Get parameters which are the keys for a futex
-+ * @uaddr:	virtual address of the futex
-+ * @fshared:	false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
-+ * @key:	address where result is stored.
-+ * @rw:		mapping needs to be read/write (values: FUTEX_READ,
-+ *              FUTEX_WRITE)
-+ *
-+ * Return: a negative error code or 0
-+ *
-+ * The key words are stored in @key on success.
-+ *
-+ * For shared mappings (when @fshared), the key is:
-+ *
-+ *   ( inode->i_sequence, page->index, offset_within_page )
-+ *
-+ * [ also see get_inode_sequence_number() ]
-+ *
-+ * For private mappings (or when !@fshared), the key is:
-+ *
-+ *   ( current->mm, address, 0 )
-+ *
-+ * This allows (cross process, where applicable) identification of the futex
-+ * without keeping the page pinned for the duration of the FUTEX_WAIT.
-+ *
-+ * lock_page() might sleep, the caller should not hold a spinlock.
-+ */
-+int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
-+		  enum futex_access rw)
-+{
-+	unsigned long address = (unsigned long)uaddr;
-+	struct mm_struct *mm = current->mm;
-+	struct page *page, *tail;
-+	struct address_space *mapping;
-+	int err, ro = 0;
-+
-+	/*
-+	 * The futex address must be "naturally" aligned.
-+	 */
-+	key->both.offset = address % PAGE_SIZE;
-+	if (unlikely((address % sizeof(u32)) != 0))
-+		return -EINVAL;
-+	address -= key->both.offset;
-+
-+	if (unlikely(!access_ok(uaddr, sizeof(u32))))
-+		return -EFAULT;
-+
-+	if (unlikely(should_fail_futex(fshared)))
-+		return -EFAULT;
-+
-+	/*
-+	 * PROCESS_PRIVATE futexes are fast.
-+	 * As the mm cannot disappear under us and the 'key' only needs
-+	 * virtual address, we dont even have to find the underlying vma.
-+	 * Note : We do have to check 'uaddr' is a valid user address,
-+	 *        but access_ok() should be faster than find_vma()
-+	 */
-+	if (!fshared) {
-+		key->private.mm = mm;
-+		key->private.address = address;
-+		return 0;
-+	}
-+
-+again:
-+	/* Ignore any VERIFY_READ mapping (futex common case) */
-+	if (unlikely(should_fail_futex(true)))
-+		return -EFAULT;
-+
-+	err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
-+	/*
-+	 * If write access is not required (eg. FUTEX_WAIT), try
-+	 * and get read-only access.
-+	 */
-+	if (err == -EFAULT && rw == FUTEX_READ) {
-+		err = get_user_pages_fast(address, 1, 0, &page);
-+		ro = 1;
-+	}
-+	if (err < 0)
-+		return err;
-+	else
-+		err = 0;
-+
-+	/*
-+	 * The treatment of mapping from this point on is critical. The page
-+	 * lock protects many things but in this context the page lock
-+	 * stabilizes mapping, prevents inode freeing in the shared
-+	 * file-backed region case and guards against movement to swap cache.
-+	 *
-+	 * Strictly speaking the page lock is not needed in all cases being
-+	 * considered here and page lock forces unnecessarily serialization
-+	 * From this point on, mapping will be re-verified if necessary and
-+	 * page lock will be acquired only if it is unavoidable
-+	 *
-+	 * Mapping checks require the head page for any compound page so the
-+	 * head page and mapping is looked up now. For anonymous pages, it
-+	 * does not matter if the page splits in the future as the key is
-+	 * based on the address. For filesystem-backed pages, the tail is
-+	 * required as the index of the page determines the key. For
-+	 * base pages, there is no tail page and tail == page.
-+	 */
-+	tail = page;
-+	page = compound_head(page);
-+	mapping = READ_ONCE(page->mapping);
-+
-+	/*
-+	 * If page->mapping is NULL, then it cannot be a PageAnon
-+	 * page; but it might be the ZERO_PAGE or in the gate area or
-+	 * in a special mapping (all cases which we are happy to fail);
-+	 * or it may have been a good file page when get_user_pages_fast
-+	 * found it, but truncated or holepunched or subjected to
-+	 * invalidate_complete_page2 before we got the page lock (also
-+	 * cases which we are happy to fail).  And we hold a reference,
-+	 * so refcount care in invalidate_complete_page's remove_mapping
-+	 * prevents drop_caches from setting mapping to NULL beneath us.
-+	 *
-+	 * The case we do have to guard against is when memory pressure made
-+	 * shmem_writepage move it from filecache to swapcache beneath us:
-+	 * an unlikely race, but we do need to retry for page->mapping.
-+	 */
-+	if (unlikely(!mapping)) {
-+		int shmem_swizzled;
-+
-+		/*
-+		 * Page lock is required to identify which special case above
-+		 * applies. If this is really a shmem page then the page lock
-+		 * will prevent unexpected transitions.
-+		 */
-+		lock_page(page);
-+		shmem_swizzled = PageSwapCache(page) || page->mapping;
-+		unlock_page(page);
-+		put_page(page);
-+
-+		if (shmem_swizzled)
-+			goto again;
-+
-+		return -EFAULT;
-+	}
-+
-+	/*
-+	 * Private mappings are handled in a simple way.
-+	 *
-+	 * If the futex key is stored on an anonymous page, then the associated
-+	 * object is the mm which is implicitly pinned by the calling process.
-+	 *
-+	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
-+	 * it's a read-only handle, it's expected that futexes attach to
-+	 * the object not the particular process.
-+	 */
-+	if (PageAnon(page)) {
-+		/*
-+		 * A RO anonymous page will never change and thus doesn't make
-+		 * sense for futex operations.
-+		 */
-+		if (unlikely(should_fail_futex(true)) || ro) {
-+			err = -EFAULT;
-+			goto out;
-+		}
-+
-+		key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
-+		key->private.mm = mm;
-+		key->private.address = address;
-+
-+	} else {
-+		struct inode *inode;
-+
-+		/*
-+		 * The associated futex object in this case is the inode and
-+		 * the page->mapping must be traversed. Ordinarily this should
-+		 * be stabilised under page lock but it's not strictly
-+		 * necessary in this case as we just want to pin the inode, not
-+		 * update the radix tree or anything like that.
-+		 *
-+		 * The RCU read lock is taken as the inode is finally freed
-+		 * under RCU. If the mapping still matches expectations then the
-+		 * mapping->host can be safely accessed as being a valid inode.
-+		 */
-+		rcu_read_lock();
-+
-+		if (READ_ONCE(page->mapping) != mapping) {
-+			rcu_read_unlock();
-+			put_page(page);
-+
-+			goto again;
-+		}
-+
-+		inode = READ_ONCE(mapping->host);
-+		if (!inode) {
-+			rcu_read_unlock();
-+			put_page(page);
-+
-+			goto again;
-+		}
-+
-+		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-+		key->shared.i_seq = get_inode_sequence_number(inode);
-+		key->shared.pgoff = page_to_pgoff(tail);
-+		rcu_read_unlock();
-+	}
-+
-+out:
-+	put_page(page);
-+	return err;
-+}
-+
-+/**
-+ * fault_in_user_writeable() - Fault in user address and verify RW access
-+ * @uaddr:	pointer to faulting user space address
-+ *
-+ * Slow path to fixup the fault we just took in the atomic write
-+ * access to @uaddr.
-+ *
-+ * We have no generic implementation of a non-destructive write to the
-+ * user address. We know that we faulted in the atomic pagefault
-+ * disabled section so we can as well avoid the #PF overhead by
-+ * calling get_user_pages() right away.
-+ */
-+int fault_in_user_writeable(u32 __user *uaddr)
-+{
-+	struct mm_struct *mm = current->mm;
-+	int ret;
-+
-+	mmap_read_lock(mm);
-+	ret = fixup_user_fault(mm, (unsigned long)uaddr,
-+			       FAULT_FLAG_WRITE, NULL);
-+	mmap_read_unlock(mm);
-+
-+	return ret < 0 ? ret : 0;
-+}
-+
-+/**
-+ * futex_top_waiter() - Return the highest priority waiter on a futex
-+ * @hb:		the hash bucket the futex_q's reside in
-+ * @key:	the futex key (to distinguish it from other futex futex_q's)
-+ *
-+ * Must be called with the hb lock held.
-+ */
-+struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key)
-+{
-+	struct futex_q *this;
-+
-+	plist_for_each_entry(this, &hb->chain, list) {
-+		if (futex_match(&this->key, key))
-+			return this;
-+	}
-+	return NULL;
-+}
-+
-+int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
-+{
-+	int ret;
-+
-+	pagefault_disable();
-+	ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
-+	pagefault_enable();
-+
-+	return ret;
-+}
-+
-+int futex_get_value_locked(u32 *dest, u32 __user *from)
-+{
-+	int ret;
-+
-+	pagefault_disable();
-+	ret = __get_user(*dest, from);
-+	pagefault_enable();
-+
-+	return ret ? -EFAULT : 0;
-+}
-+
-+/**
-+ * wait_for_owner_exiting - Block until the owner has exited
-+ * @ret: owner's current futex lock status
-+ * @exiting:	Pointer to the exiting task
-+ *
-+ * Caller must hold a refcount on @exiting.
-+ */
-+void wait_for_owner_exiting(int ret, struct task_struct *exiting)
-+{
-+	if (ret != -EBUSY) {
-+		WARN_ON_ONCE(exiting);
-+		return;
-+	}
-+
-+	if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
-+		return;
-+
-+	mutex_lock(&exiting->futex_exit_mutex);
-+	/*
-+	 * No point in doing state checking here. If the waiter got here
-+	 * while the task was in exec()->exec_futex_release() then it can
-+	 * have any FUTEX_STATE_* value when the waiter has acquired the
-+	 * mutex. OK, if running, EXITING or DEAD if it reached exit()
-+	 * already. Highly unlikely and not a problem. Just one more round
-+	 * through the futex maze.
-+	 */
-+	mutex_unlock(&exiting->futex_exit_mutex);
-+
-+	put_task_struct(exiting);
-+}
-+
-+/**
-+ * __futex_unqueue() - Remove the futex_q from its futex_hash_bucket
-+ * @q:	The futex_q to unqueue
-+ *
-+ * The q->lock_ptr must not be NULL and must be held by the caller.
-+ */
-+void __futex_unqueue(struct futex_q *q)
-+{
-+	struct futex_hash_bucket *hb;
-+
-+	if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
-+		return;
-+	lockdep_assert_held(q->lock_ptr);
-+
-+	hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
-+	plist_del(&q->list, &hb->chain);
-+	futex_hb_waiters_dec(hb);
-+}
-+
-+/* The key must be already stored in q->key. */
-+struct futex_hash_bucket *futex_q_lock(struct futex_q *q)
-+	__acquires(&hb->lock)
-+{
-+	struct futex_hash_bucket *hb;
-+
-+	hb = futex_hash(&q->key);
-+
-+	/*
-+	 * Increment the counter before taking the lock so that
-+	 * a potential waker won't miss a to-be-slept task that is
-+	 * waiting for the spinlock. This is safe as all futex_q_lock()
-+	 * users end up calling futex_queue(). Similarly, for housekeeping,
-+	 * decrement the counter at futex_q_unlock() when some error has
-+	 * occurred and we don't end up adding the task to the list.
-+	 */
-+	futex_hb_waiters_inc(hb); /* implies smp_mb(); (A) */
-+
-+	q->lock_ptr = &hb->lock;
-+
-+	spin_lock(&hb->lock);
-+	return hb;
-+}
-+
-+void futex_q_unlock(struct futex_hash_bucket *hb)
-+	__releases(&hb->lock)
-+{
-+	spin_unlock(&hb->lock);
-+	futex_hb_waiters_dec(hb);
-+}
-+
-+void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
-+{
-+	int prio;
-+
-+	/*
-+	 * The priority used to register this element is
-+	 * - either the real thread-priority for the real-time threads
-+	 * (i.e. threads with a priority lower than MAX_RT_PRIO)
-+	 * - or MAX_RT_PRIO for non-RT threads.
-+	 * Thus, all RT-threads are woken first in priority order, and
-+	 * the others are woken last, in FIFO order.
-+	 */
-+	prio = min(current->normal_prio, MAX_RT_PRIO);
-+
-+	plist_node_init(&q->list, prio);
-+	plist_add(&q->list, &hb->chain);
-+	q->task = current;
-+}
-+
-+/**
-+ * futex_unqueue() - Remove the futex_q from its futex_hash_bucket
-+ * @q:	The futex_q to unqueue
-+ *
-+ * The q->lock_ptr must not be held by the caller. A call to futex_unqueue() must
-+ * be paired with exactly one earlier call to futex_queue().
-+ *
-+ * Return:
-+ *  - 1 - if the futex_q was still queued (and we removed unqueued it);
-+ *  - 0 - if the futex_q was already removed by the waking thread
-+ */
-+int futex_unqueue(struct futex_q *q)
-+{
-+	spinlock_t *lock_ptr;
-+	int ret = 0;
-+
-+	/* In the common case we don't take the spinlock, which is nice. */
-+retry:
-+	/*
-+	 * q->lock_ptr can change between this read and the following spin_lock.
-+	 * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
-+	 * optimizing lock_ptr out of the logic below.
-+	 */
-+	lock_ptr = READ_ONCE(q->lock_ptr);
-+	if (lock_ptr != NULL) {
-+		spin_lock(lock_ptr);
-+		/*
-+		 * q->lock_ptr can change between reading it and
-+		 * spin_lock(), causing us to take the wrong lock.  This
-+		 * corrects the race condition.
-+		 *
-+		 * Reasoning goes like this: if we have the wrong lock,
-+		 * q->lock_ptr must have changed (maybe several times)
-+		 * between reading it and the spin_lock().  It can
-+		 * change again after the spin_lock() but only if it was
-+		 * already changed before the spin_lock().  It cannot,
-+		 * however, change back to the original value.  Therefore
-+		 * we can detect whether we acquired the correct lock.
-+		 */
-+		if (unlikely(lock_ptr != q->lock_ptr)) {
-+			spin_unlock(lock_ptr);
-+			goto retry;
-+		}
-+		__futex_unqueue(q);
-+
-+		BUG_ON(q->pi_state);
-+
-+		spin_unlock(lock_ptr);
-+		ret = 1;
-+	}
-+
-+	return ret;
-+}
-+
-+/*
-+ * PI futexes can not be requeued and must remove themselves from the
-+ * hash bucket. The hash bucket lock (i.e. lock_ptr) is held.
-+ */
-+void futex_unqueue_pi(struct futex_q *q)
-+{
-+	__futex_unqueue(q);
-+
-+	BUG_ON(!q->pi_state);
-+	put_pi_state(q->pi_state);
-+	q->pi_state = NULL;
-+}
-+
-+/* Constants for the pending_op argument of handle_futex_death */
-+#define HANDLE_DEATH_PENDING	true
-+#define HANDLE_DEATH_LIST	false
-+
-+/*
-+ * Process a futex-list entry, check whether it's owned by the
-+ * dying task, and do notification if so:
-+ */
-+static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
-+			      bool pi, bool pending_op)
-+{
-+	u32 uval, nval, mval;
-+	int err;
-+
-+	/* Futex address must be 32bit aligned */
-+	if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
-+		return -1;
-+
-+retry:
-+	if (get_user(uval, uaddr))
-+		return -1;
-+
-+	/*
-+	 * Special case for regular (non PI) futexes. The unlock path in
-+	 * user space has two race scenarios:
-+	 *
-+	 * 1. The unlock path releases the user space futex value and
-+	 *    before it can execute the futex() syscall to wake up
-+	 *    waiters it is killed.
-+	 *
-+	 * 2. A woken up waiter is killed before it can acquire the
-+	 *    futex in user space.
-+	 *
-+	 * In both cases the TID validation below prevents a wakeup of
-+	 * potential waiters which can cause these waiters to block
-+	 * forever.
-+	 *
-+	 * In both cases the following conditions are met:
-+	 *
-+	 *	1) task->robust_list->list_op_pending != NULL
-+	 *	   @pending_op == true
-+	 *	2) User space futex value == 0
-+	 *	3) Regular futex: @pi == false
-+	 *
-+	 * If these conditions are met, it is safe to attempt waking up a
-+	 * potential waiter without touching the user space futex value and
-+	 * trying to set the OWNER_DIED bit. The user space futex value is
-+	 * uncontended and the rest of the user space mutex state is
-+	 * consistent, so a woken waiter will just take over the
-+	 * uncontended futex. Setting the OWNER_DIED bit would create
-+	 * inconsistent state and malfunction of the user space owner died
-+	 * handling.
-+	 */
-+	if (pending_op && !pi && !uval) {
-+		futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
-+		return 0;
-+	}
-+
-+	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
-+		return 0;
-+
-+	/*
-+	 * Ok, this dying thread is truly holding a futex
-+	 * of interest. Set the OWNER_DIED bit atomically
-+	 * via cmpxchg, and if the value had FUTEX_WAITERS
-+	 * set, wake up a waiter (if any). (We have to do a
-+	 * futex_wake() even if OWNER_DIED is already set -
-+	 * to handle the rare but possible case of recursive
-+	 * thread-death.) The rest of the cleanup is done in
-+	 * userspace.
-+	 */
-+	mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
-+
-+	/*
-+	 * We are not holding a lock here, but we want to have
-+	 * the pagefault_disable/enable() protection because
-+	 * we want to handle the fault gracefully. If the
-+	 * access fails we try to fault in the futex with R/W
-+	 * verification via get_user_pages. get_user() above
-+	 * does not guarantee R/W access. If that fails we
-+	 * give up and leave the futex locked.
-+	 */
-+	if ((err = futex_cmpxchg_value_locked(&nval, uaddr, uval, mval))) {
-+		switch (err) {
-+		case -EFAULT:
-+			if (fault_in_user_writeable(uaddr))
-+				return -1;
-+			goto retry;
-+
-+		case -EAGAIN:
-+			cond_resched();
-+			goto retry;
-+
-+		default:
-+			WARN_ON_ONCE(1);
-+			return err;
-+		}
-+	}
-+
-+	if (nval != uval)
-+		goto retry;
-+
-+	/*
-+	 * Wake robust non-PI futexes here. The wakeup of
-+	 * PI futexes happens in exit_pi_state():
-+	 */
-+	if (!pi && (uval & FUTEX_WAITERS))
-+		futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
-+
-+	return 0;
-+}
-+
-+/*
-+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
-+ */
-+static inline int fetch_robust_entry(struct robust_list __user **entry,
-+				     struct robust_list __user * __user *head,
-+				     unsigned int *pi)
-+{
-+	unsigned long uentry;
-+
-+	if (get_user(uentry, (unsigned long __user *)head))
-+		return -EFAULT;
-+
-+	*entry = (void __user *)(uentry & ~1UL);
-+	*pi = uentry & 1;
-+
-+	return 0;
-+}
-+
-+/*
-+ * Walk curr->robust_list (very carefully, it's a userspace list!)
-+ * and mark any locks found there dead, and notify any waiters.
-+ *
-+ * We silently return on any sign of list-walking problem.
-+ */
-+static void exit_robust_list(struct task_struct *curr)
-+{
-+	struct robust_list_head __user *head = curr->robust_list;
-+	struct robust_list __user *entry, *next_entry, *pending;
-+	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
-+	unsigned int next_pi;
-+	unsigned long futex_offset;
-+	int rc;
-+
-+	if (!futex_cmpxchg_enabled)
-+		return;
-+
-+	/*
-+	 * Fetch the list head (which was registered earlier, via
-+	 * sys_set_robust_list()):
-+	 */
-+	if (fetch_robust_entry(&entry, &head->list.next, &pi))
-+		return;
-+	/*
-+	 * Fetch the relative futex offset:
-+	 */
-+	if (get_user(futex_offset, &head->futex_offset))
-+		return;
-+	/*
-+	 * Fetch any possibly pending lock-add first, and handle it
-+	 * if it exists:
-+	 */
-+	if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
-+		return;
-+
-+	next_entry = NULL;	/* avoid warning with gcc */
-+	while (entry != &head->list) {
-+		/*
-+		 * Fetch the next entry in the list before calling
-+		 * handle_futex_death:
-+		 */
-+		rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
-+		/*
-+		 * A pending lock might already be on the list, so
-+		 * don't process it twice:
-+		 */
-+		if (entry != pending) {
-+			if (handle_futex_death((void __user *)entry + futex_offset,
-+						curr, pi, HANDLE_DEATH_LIST))
-+				return;
-+		}
-+		if (rc)
-+			return;
-+		entry = next_entry;
-+		pi = next_pi;
-+		/*
-+		 * Avoid excessively long or circular lists:
-+		 */
-+		if (!--limit)
-+			break;
-+
-+		cond_resched();
-+	}
-+
-+	if (pending) {
-+		handle_futex_death((void __user *)pending + futex_offset,
-+				   curr, pip, HANDLE_DEATH_PENDING);
-+	}
-+}
-+
-+#ifdef CONFIG_COMPAT
-+static void __user *futex_uaddr(struct robust_list __user *entry,
-+				compat_long_t futex_offset)
-+{
-+	compat_uptr_t base = ptr_to_compat(entry);
-+	void __user *uaddr = compat_ptr(base + futex_offset);
-+
-+	return uaddr;
-+}
-+
-+/*
-+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
-+ */
-+static inline int
-+compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
-+		   compat_uptr_t __user *head, unsigned int *pi)
-+{
-+	if (get_user(*uentry, head))
-+		return -EFAULT;
-+
-+	*entry = compat_ptr((*uentry) & ~1);
-+	*pi = (unsigned int)(*uentry) & 1;
-+
-+	return 0;
-+}
-+
-+/*
-+ * Walk curr->robust_list (very carefully, it's a userspace list!)
-+ * and mark any locks found there dead, and notify any waiters.
-+ *
-+ * We silently return on any sign of list-walking problem.
-+ */
-+static void compat_exit_robust_list(struct task_struct *curr)
-+{
-+	struct compat_robust_list_head __user *head = curr->compat_robust_list;
-+	struct robust_list __user *entry, *next_entry, *pending;
-+	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
-+	unsigned int next_pi;
-+	compat_uptr_t uentry, next_uentry, upending;
-+	compat_long_t futex_offset;
-+	int rc;
-+
-+	if (!futex_cmpxchg_enabled)
-+		return;
-+
-+	/*
-+	 * Fetch the list head (which was registered earlier, via
-+	 * sys_set_robust_list()):
-+	 */
-+	if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
-+		return;
-+	/*
-+	 * Fetch the relative futex offset:
-+	 */
-+	if (get_user(futex_offset, &head->futex_offset))
-+		return;
-+	/*
-+	 * Fetch any possibly pending lock-add first, and handle it
-+	 * if it exists:
-+	 */
-+	if (compat_fetch_robust_entry(&upending, &pending,
-+			       &head->list_op_pending, &pip))
-+		return;
-+
-+	next_entry = NULL;	/* avoid warning with gcc */
-+	while (entry != (struct robust_list __user *) &head->list) {
-+		/*
-+		 * Fetch the next entry in the list before calling
-+		 * handle_futex_death:
-+		 */
-+		rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
-+			(compat_uptr_t __user *)&entry->next, &next_pi);
-+		/*
-+		 * A pending lock might already be on the list, so
-+		 * dont process it twice:
-+		 */
-+		if (entry != pending) {
-+			void __user *uaddr = futex_uaddr(entry, futex_offset);
-+
-+			if (handle_futex_death(uaddr, curr, pi,
-+					       HANDLE_DEATH_LIST))
-+				return;
-+		}
-+		if (rc)
-+			return;
-+		uentry = next_uentry;
-+		entry = next_entry;
-+		pi = next_pi;
-+		/*
-+		 * Avoid excessively long or circular lists:
-+		 */
-+		if (!--limit)
-+			break;
-+
-+		cond_resched();
-+	}
-+	if (pending) {
-+		void __user *uaddr = futex_uaddr(pending, futex_offset);
-+
-+		handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
-+	}
-+}
-+#endif
-+
-+#ifdef CONFIG_FUTEX_PI
-+
-+/*
-+ * This task is holding PI mutexes at exit time => bad.
-+ * Kernel cleans up PI-state, but userspace is likely hosed.
-+ * (Robust-futex cleanup is separate and might save the day for userspace.)
-+ */
-+static void exit_pi_state_list(struct task_struct *curr)
-+{
-+	struct list_head *next, *head = &curr->pi_state_list;
-+	struct futex_pi_state *pi_state;
-+	struct futex_hash_bucket *hb;
-+	union futex_key key = FUTEX_KEY_INIT;
-+
-+	if (!futex_cmpxchg_enabled)
-+		return;
-+	/*
-+	 * We are a ZOMBIE and nobody can enqueue itself on
-+	 * pi_state_list anymore, but we have to be careful
-+	 * versus waiters unqueueing themselves:
-+	 */
-+	raw_spin_lock_irq(&curr->pi_lock);
-+	while (!list_empty(head)) {
-+		next = head->next;
-+		pi_state = list_entry(next, struct futex_pi_state, list);
-+		key = pi_state->key;
-+		hb = futex_hash(&key);
-+
-+		/*
-+		 * We can race against put_pi_state() removing itself from the
-+		 * list (a waiter going away). put_pi_state() will first
-+		 * decrement the reference count and then modify the list, so
-+		 * its possible to see the list entry but fail this reference
-+		 * acquire.
-+		 *
-+		 * In that case; drop the locks to let put_pi_state() make
-+		 * progress and retry the loop.
-+		 */
-+		if (!refcount_inc_not_zero(&pi_state->refcount)) {
-+			raw_spin_unlock_irq(&curr->pi_lock);
-+			cpu_relax();
-+			raw_spin_lock_irq(&curr->pi_lock);
-+			continue;
-+		}
-+		raw_spin_unlock_irq(&curr->pi_lock);
-+
-+		spin_lock(&hb->lock);
-+		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+		raw_spin_lock(&curr->pi_lock);
-+		/*
-+		 * We dropped the pi-lock, so re-check whether this
-+		 * task still owns the PI-state:
-+		 */
-+		if (head->next != next) {
-+			/* retain curr->pi_lock for the loop invariant */
-+			raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
-+			spin_unlock(&hb->lock);
-+			put_pi_state(pi_state);
-+			continue;
-+		}
-+
-+		WARN_ON(pi_state->owner != curr);
-+		WARN_ON(list_empty(&pi_state->list));
-+		list_del_init(&pi_state->list);
-+		pi_state->owner = NULL;
-+
-+		raw_spin_unlock(&curr->pi_lock);
-+		raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+		spin_unlock(&hb->lock);
-+
-+		rt_mutex_futex_unlock(&pi_state->pi_mutex);
-+		put_pi_state(pi_state);
-+
-+		raw_spin_lock_irq(&curr->pi_lock);
-+	}
-+	raw_spin_unlock_irq(&curr->pi_lock);
-+}
-+#else
-+static inline void exit_pi_state_list(struct task_struct *curr) { }
-+#endif
-+
-+static void futex_cleanup(struct task_struct *tsk)
-+{
-+	if (unlikely(tsk->robust_list)) {
-+		exit_robust_list(tsk);
-+		tsk->robust_list = NULL;
-+	}
-+
-+#ifdef CONFIG_COMPAT
-+	if (unlikely(tsk->compat_robust_list)) {
-+		compat_exit_robust_list(tsk);
-+		tsk->compat_robust_list = NULL;
-+	}
-+#endif
-+
-+	if (unlikely(!list_empty(&tsk->pi_state_list)))
-+		exit_pi_state_list(tsk);
-+}
-+
-+/**
-+ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
-+ * @tsk:	task to set the state on
-+ *
-+ * Set the futex exit state of the task lockless. The futex waiter code
-+ * observes that state when a task is exiting and loops until the task has
-+ * actually finished the futex cleanup. The worst case for this is that the
-+ * waiter runs through the wait loop until the state becomes visible.
-+ *
-+ * This is called from the recursive fault handling path in do_exit().
-+ *
-+ * This is best effort. Either the futex exit code has run already or
-+ * not. If the OWNER_DIED bit has been set on the futex then the waiter can
-+ * take it over. If not, the problem is pushed back to user space. If the
-+ * futex exit code did not run yet, then an already queued waiter might
-+ * block forever, but there is nothing which can be done about that.
-+ */
-+void futex_exit_recursive(struct task_struct *tsk)
-+{
-+	/* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
-+	if (tsk->futex_state == FUTEX_STATE_EXITING)
-+		mutex_unlock(&tsk->futex_exit_mutex);
-+	tsk->futex_state = FUTEX_STATE_DEAD;
-+}
-+
-+static void futex_cleanup_begin(struct task_struct *tsk)
-+{
-+	/*
-+	 * Prevent various race issues against a concurrent incoming waiter
-+	 * including live locks by forcing the waiter to block on
-+	 * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
-+	 * attach_to_pi_owner().
-+	 */
-+	mutex_lock(&tsk->futex_exit_mutex);
-+
-+	/*
-+	 * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
-+	 *
-+	 * This ensures that all subsequent checks of tsk->futex_state in
-+	 * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
-+	 * tsk->pi_lock held.
-+	 *
-+	 * It guarantees also that a pi_state which was queued right before
-+	 * the state change under tsk->pi_lock by a concurrent waiter must
-+	 * be observed in exit_pi_state_list().
-+	 */
-+	raw_spin_lock_irq(&tsk->pi_lock);
-+	tsk->futex_state = FUTEX_STATE_EXITING;
-+	raw_spin_unlock_irq(&tsk->pi_lock);
-+}
-+
-+static void futex_cleanup_end(struct task_struct *tsk, int state)
-+{
-+	/*
-+	 * Lockless store. The only side effect is that an observer might
-+	 * take another loop until it becomes visible.
-+	 */
-+	tsk->futex_state = state;
-+	/*
-+	 * Drop the exit protection. This unblocks waiters which observed
-+	 * FUTEX_STATE_EXITING to reevaluate the state.
-+	 */
-+	mutex_unlock(&tsk->futex_exit_mutex);
-+}
-+
-+void futex_exec_release(struct task_struct *tsk)
-+{
-+	/*
-+	 * The state handling is done for consistency, but in the case of
-+	 * exec() there is no way to prevent further damage as the PID stays
-+	 * the same. But for the unlikely and arguably buggy case that a
-+	 * futex is held on exec(), this provides at least as much state
-+	 * consistency protection which is possible.
-+	 */
-+	futex_cleanup_begin(tsk);
-+	futex_cleanup(tsk);
-+	/*
-+	 * Reset the state to FUTEX_STATE_OK. The task is alive and about
-+	 * exec a new binary.
-+	 */
-+	futex_cleanup_end(tsk, FUTEX_STATE_OK);
-+}
-+
-+void futex_exit_release(struct task_struct *tsk)
-+{
-+	futex_cleanup_begin(tsk);
-+	futex_cleanup(tsk);
-+	futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
-+}
-+
-+static void __init futex_detect_cmpxchg(void)
-+{
-+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
-+	u32 curval;
-+
-+	/*
-+	 * This will fail and we want it. Some arch implementations do
-+	 * runtime detection of the futex_atomic_cmpxchg_inatomic()
-+	 * functionality. We want to know that before we call in any
-+	 * of the complex code paths. Also we want to prevent
-+	 * registration of robust lists in that case. NULL is
-+	 * guaranteed to fault and we get -EFAULT on functional
-+	 * implementation, the non-functional ones will return
-+	 * -ENOSYS.
-+	 */
-+	if (futex_cmpxchg_value_locked(&curval, NULL, 0, 0) == -EFAULT)
-+		futex_cmpxchg_enabled = 1;
-+#endif
-+}
-+
-+static int __init futex_init(void)
-+{
-+	unsigned int futex_shift;
-+	unsigned long i;
-+
-+#if CONFIG_BASE_SMALL
-+	futex_hashsize = 16;
-+#else
-+	futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
-+#endif
-+
-+	futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
-+					       futex_hashsize, 0,
-+					       futex_hashsize < 256 ? HASH_SMALL : 0,
-+					       &futex_shift, NULL,
-+					       futex_hashsize, futex_hashsize);
-+	futex_hashsize = 1UL << futex_shift;
-+
-+	futex_detect_cmpxchg();
-+
-+	for (i = 0; i < futex_hashsize; i++) {
-+		atomic_set(&futex_queues[i].waiters, 0);
-+		plist_head_init(&futex_queues[i].chain);
-+		spin_lock_init(&futex_queues[i].lock);
-+	}
-+
-+	return 0;
-+}
-+core_initcall(futex_init);
-diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
-new file mode 100644
-index 000000000..948fcf317
---- /dev/null
-+++ b/kernel/futex/futex.h
-@@ -0,0 +1,295 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _FUTEX_H
-+#define _FUTEX_H
-+
-+#include <linux/futex.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <asm/futex.h>
-+
-+/*
-+ * Futex flags used to encode options to functions and preserve them across
-+ * restarts.
-+ */
-+#ifdef CONFIG_MMU
-+# define FLAGS_SHARED		0x01
-+#else
-+/*
-+ * NOMMU does not have per process address space. Let the compiler optimize
-+ * code away.
-+ */
-+# define FLAGS_SHARED		0x00
-+#endif
-+#define FLAGS_CLOCKRT		0x02
-+#define FLAGS_HAS_TIMEOUT	0x04
-+
-+#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
-+#define futex_cmpxchg_enabled 1
-+#else
-+extern int  __read_mostly futex_cmpxchg_enabled;
-+#endif
-+
-+#ifdef CONFIG_FAIL_FUTEX
-+extern bool should_fail_futex(bool fshared);
-+#else
-+static inline bool should_fail_futex(bool fshared)
-+{
-+	return false;
-+}
-+#endif
-+
-+/*
-+ * Hash buckets are shared by all the futex_keys that hash to the same
-+ * location.  Each key may have multiple futex_q structures, one for each task
-+ * waiting on a futex.
-+ */
-+struct futex_hash_bucket {
-+	atomic_t waiters;
-+	spinlock_t lock;
-+	struct plist_head chain;
-+} ____cacheline_aligned_in_smp;
-+
-+/*
-+ * Priority Inheritance state:
-+ */
-+struct futex_pi_state {
-+	/*
-+	 * list of 'owned' pi_state instances - these have to be
-+	 * cleaned up in do_exit() if the task exits prematurely:
-+	 */
-+	struct list_head list;
-+
-+	/*
-+	 * The PI object:
-+	 */
-+	struct rt_mutex_base pi_mutex;
-+
-+	struct task_struct *owner;
-+	refcount_t refcount;
-+
-+	union futex_key key;
-+} __randomize_layout;
-+
-+/**
-+ * struct futex_q - The hashed futex queue entry, one per waiting task
-+ * @list:		priority-sorted list of tasks waiting on this futex
-+ * @task:		the task waiting on the futex
-+ * @lock_ptr:		the hash bucket lock
-+ * @key:		the key the futex is hashed on
-+ * @pi_state:		optional priority inheritance state
-+ * @rt_waiter:		rt_waiter storage for use with requeue_pi
-+ * @requeue_pi_key:	the requeue_pi target futex key
-+ * @bitset:		bitset for the optional bitmasked wakeup
-+ * @requeue_state:	State field for futex_requeue_pi()
-+ * @requeue_wait:	RCU wait for futex_requeue_pi() (RT only)
-+ *
-+ * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
-+ * we can wake only the relevant ones (hashed queues may be shared).
-+ *
-+ * A futex_q has a woken state, just like tasks have TASK_RUNNING.
-+ * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
-+ * The order of wakeup is always to make the first condition true, then
-+ * the second.
-+ *
-+ * PI futexes are typically woken before they are removed from the hash list via
-+ * the rt_mutex code. See futex_unqueue_pi().
-+ */
-+struct futex_q {
-+	struct plist_node list;
-+
-+	struct task_struct *task;
-+	spinlock_t *lock_ptr;
-+	union futex_key key;
-+	struct futex_pi_state *pi_state;
-+	struct rt_mutex_waiter *rt_waiter;
-+	union futex_key *requeue_pi_key;
-+	u32 bitset;
-+	atomic_t requeue_state;
-+#ifdef CONFIG_PREEMPT_RT
-+	struct rcuwait requeue_wait;
-+#endif
-+} __randomize_layout;
-+
-+extern const struct futex_q futex_q_init;
-+
-+enum futex_access {
-+	FUTEX_READ,
-+	FUTEX_WRITE
-+};
-+
-+extern int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
-+			 enum futex_access rw);
-+
-+extern struct hrtimer_sleeper *
-+futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
-+		  int flags, u64 range_ns);
-+
-+extern struct futex_hash_bucket *futex_hash(union futex_key *key);
-+
-+/**
-+ * futex_match - Check whether two futex keys are equal
-+ * @key1:	Pointer to key1
-+ * @key2:	Pointer to key2
-+ *
-+ * Return 1 if two futex_keys are equal, 0 otherwise.
-+ */
-+static inline int futex_match(union futex_key *key1, union futex_key *key2)
-+{
-+	return (key1 && key2
-+		&& key1->both.word == key2->both.word
-+		&& key1->both.ptr == key2->both.ptr
-+		&& key1->both.offset == key2->both.offset);
-+}
-+
-+extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
-+			    struct futex_q *q, struct futex_hash_bucket **hb);
-+extern void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
-+				   struct hrtimer_sleeper *timeout);
-+extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
-+
-+extern int fault_in_user_writeable(u32 __user *uaddr);
-+extern int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval);
-+extern int futex_get_value_locked(u32 *dest, u32 __user *from);
-+extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
-+
-+extern void __futex_unqueue(struct futex_q *q);
-+extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb);
-+extern int futex_unqueue(struct futex_q *q);
-+
-+/**
-+ * futex_queue() - Enqueue the futex_q on the futex_hash_bucket
-+ * @q:	The futex_q to enqueue
-+ * @hb:	The destination hash bucket
-+ *
-+ * The hb->lock must be held by the caller, and is released here. A call to
-+ * futex_queue() is typically paired with exactly one call to futex_unqueue().  The
-+ * exceptions involve the PI related operations, which may use futex_unqueue_pi()
-+ * or nothing if the unqueue is done as part of the wake process and the unqueue
-+ * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
-+ * an example).
-+ */
-+static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
-+	__releases(&hb->lock)
-+{
-+	__futex_queue(q, hb);
-+	spin_unlock(&hb->lock);
-+}
-+
-+extern void futex_unqueue_pi(struct futex_q *q);
-+
-+extern void wait_for_owner_exiting(int ret, struct task_struct *exiting);
-+
-+/*
-+ * Reflects a new waiter being added to the waitqueue.
-+ */
-+static inline void futex_hb_waiters_inc(struct futex_hash_bucket *hb)
-+{
-+#ifdef CONFIG_SMP
-+	atomic_inc(&hb->waiters);
-+	/*
-+	 * Full barrier (A), see the ordering comment above.
-+	 */
-+	smp_mb__after_atomic();
-+#endif
-+}
-+
-+/*
-+ * Reflects a waiter being removed from the waitqueue by wakeup
-+ * paths.
-+ */
-+static inline void futex_hb_waiters_dec(struct futex_hash_bucket *hb)
-+{
-+#ifdef CONFIG_SMP
-+	atomic_dec(&hb->waiters);
-+#endif
-+}
-+
-+static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Full barrier (B), see the ordering comment above.
-+	 */
-+	smp_mb();
-+	return atomic_read(&hb->waiters);
-+#else
-+	return 1;
-+#endif
-+}
-+
-+extern struct futex_hash_bucket *futex_q_lock(struct futex_q *q);
-+extern void futex_q_unlock(struct futex_hash_bucket *hb);
-+
-+
-+extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
-+				union futex_key *key,
-+				struct futex_pi_state **ps,
-+				struct task_struct *task,
-+				struct task_struct **exiting,
-+				int set_waiters);
-+
-+extern int refill_pi_state_cache(void);
-+extern void get_pi_state(struct futex_pi_state *pi_state);
-+extern void put_pi_state(struct futex_pi_state *pi_state);
-+extern int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked);
-+
-+/*
-+ * Express the locking dependencies for lockdep:
-+ */
-+static inline void
-+double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
-+{
-+	if (hb1 > hb2)
-+		swap(hb1, hb2);
-+
-+	spin_lock(&hb1->lock);
-+	if (hb1 != hb2)
-+		spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
-+}
-+
-+static inline void
-+double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
-+{
-+	spin_unlock(&hb1->lock);
-+	if (hb1 != hb2)
-+		spin_unlock(&hb2->lock);
-+}
-+
-+/* syscalls */
-+
-+extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32
-+				 val, ktime_t *abs_time, u32 bitset, u32 __user
-+				 *uaddr2);
-+
-+extern int futex_requeue(u32 __user *uaddr1, unsigned int flags,
-+			 u32 __user *uaddr2, int nr_wake, int nr_requeue,
-+			 u32 *cmpval, int requeue_pi);
-+
-+extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
-+		      ktime_t *abs_time, u32 bitset);
-+
-+/**
-+ * struct futex_vector - Auxiliary struct for futex_waitv()
-+ * @w: Userspace provided data
-+ * @q: Kernel side data
-+ *
-+ * Struct used to build an array with all data need for futex_waitv()
-+ */
-+struct futex_vector {
-+	struct futex_waitv w;
-+	struct futex_q q;
-+};
-+
-+extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
-+			       struct hrtimer_sleeper *to);
-+
-+extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset);
-+
-+extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags,
-+			 u32 __user *uaddr2, int nr_wake, int nr_wake2, int op);
-+
-+extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags);
-+
-+extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock);
-+
-+#endif /* _FUTEX_H */
-diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
-new file mode 100644
-index 000000000..183b28c32
---- /dev/null
-+++ b/kernel/futex/pi.c
-@@ -0,0 +1,1233 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+
-+#include <linux/slab.h>
-+#include <linux/sched/task.h>
-+
-+#include "futex.h"
-+#include "../locking/rtmutex_common.h"
-+
-+/*
-+ * PI code:
-+ */
-+int refill_pi_state_cache(void)
-+{
-+	struct futex_pi_state *pi_state;
-+
-+	if (likely(current->pi_state_cache))
-+		return 0;
-+
-+	pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
-+
-+	if (!pi_state)
-+		return -ENOMEM;
-+
-+	INIT_LIST_HEAD(&pi_state->list);
-+	/* pi_mutex gets initialized later */
-+	pi_state->owner = NULL;
-+	refcount_set(&pi_state->refcount, 1);
-+	pi_state->key = FUTEX_KEY_INIT;
-+
-+	current->pi_state_cache = pi_state;
-+
-+	return 0;
-+}
-+
-+static struct futex_pi_state *alloc_pi_state(void)
-+{
-+	struct futex_pi_state *pi_state = current->pi_state_cache;
-+
-+	WARN_ON(!pi_state);
-+	current->pi_state_cache = NULL;
-+
-+	return pi_state;
-+}
-+
-+static void pi_state_update_owner(struct futex_pi_state *pi_state,
-+				  struct task_struct *new_owner)
-+{
-+	struct task_struct *old_owner = pi_state->owner;
-+
-+	lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
-+
-+	if (old_owner) {
-+		raw_spin_lock(&old_owner->pi_lock);
-+		WARN_ON(list_empty(&pi_state->list));
-+		list_del_init(&pi_state->list);
-+		raw_spin_unlock(&old_owner->pi_lock);
-+	}
-+
-+	if (new_owner) {
-+		raw_spin_lock(&new_owner->pi_lock);
-+		WARN_ON(!list_empty(&pi_state->list));
-+		list_add(&pi_state->list, &new_owner->pi_state_list);
-+		pi_state->owner = new_owner;
-+		raw_spin_unlock(&new_owner->pi_lock);
-+	}
-+}
-+
-+void get_pi_state(struct futex_pi_state *pi_state)
-+{
-+	WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
-+}
-+
-+/*
-+ * Drops a reference to the pi_state object and frees or caches it
-+ * when the last reference is gone.
-+ */
-+void put_pi_state(struct futex_pi_state *pi_state)
-+{
-+	if (!pi_state)
-+		return;
-+
-+	if (!refcount_dec_and_test(&pi_state->refcount))
-+		return;
-+
-+	/*
-+	 * If pi_state->owner is NULL, the owner is most probably dying
-+	 * and has cleaned up the pi_state already
-+	 */
-+	if (pi_state->owner) {
-+		unsigned long flags;
-+
-+		raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
-+		pi_state_update_owner(pi_state, NULL);
-+		rt_mutex_proxy_unlock(&pi_state->pi_mutex);
-+		raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
-+	}
-+
-+	if (current->pi_state_cache) {
-+		kfree(pi_state);
-+	} else {
-+		/*
-+		 * pi_state->list is already empty.
-+		 * clear pi_state->owner.
-+		 * refcount is at 0 - put it back to 1.
-+		 */
-+		pi_state->owner = NULL;
-+		refcount_set(&pi_state->refcount, 1);
-+		current->pi_state_cache = pi_state;
-+	}
-+}
-+
-+/*
-+ * We need to check the following states:
-+ *
-+ *      Waiter | pi_state | pi->owner | uTID      | uODIED | ?
-+ *
-+ * [1]  NULL   | ---      | ---       | 0         | 0/1    | Valid
-+ * [2]  NULL   | ---      | ---       | >0        | 0/1    | Valid
-+ *
-+ * [3]  Found  | NULL     | --        | Any       | 0/1    | Invalid
-+ *
-+ * [4]  Found  | Found    | NULL      | 0         | 1      | Valid
-+ * [5]  Found  | Found    | NULL      | >0        | 1      | Invalid
-+ *
-+ * [6]  Found  | Found    | task      | 0         | 1      | Valid
-+ *
-+ * [7]  Found  | Found    | NULL      | Any       | 0      | Invalid
-+ *
-+ * [8]  Found  | Found    | task      | ==taskTID | 0/1    | Valid
-+ * [9]  Found  | Found    | task      | 0         | 0      | Invalid
-+ * [10] Found  | Found    | task      | !=taskTID | 0/1    | Invalid
-+ *
-+ * [1]	Indicates that the kernel can acquire the futex atomically. We
-+ *	came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
-+ *
-+ * [2]	Valid, if TID does not belong to a kernel thread. If no matching
-+ *      thread is found then it indicates that the owner TID has died.
-+ *
-+ * [3]	Invalid. The waiter is queued on a non PI futex
-+ *
-+ * [4]	Valid state after exit_robust_list(), which sets the user space
-+ *	value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
-+ *
-+ * [5]	The user space value got manipulated between exit_robust_list()
-+ *	and exit_pi_state_list()
-+ *
-+ * [6]	Valid state after exit_pi_state_list() which sets the new owner in
-+ *	the pi_state but cannot access the user space value.
-+ *
-+ * [7]	pi_state->owner can only be NULL when the OWNER_DIED bit is set.
-+ *
-+ * [8]	Owner and user space value match
-+ *
-+ * [9]	There is no transient state which sets the user space TID to 0
-+ *	except exit_robust_list(), but this is indicated by the
-+ *	FUTEX_OWNER_DIED bit. See [4]
-+ *
-+ * [10] There is no transient state which leaves owner and user space
-+ *	TID out of sync. Except one error case where the kernel is denied
-+ *	write access to the user address, see fixup_pi_state_owner().
-+ *
-+ *
-+ * Serialization and lifetime rules:
-+ *
-+ * hb->lock:
-+ *
-+ *	hb -> futex_q, relation
-+ *	futex_q -> pi_state, relation
-+ *
-+ *	(cannot be raw because hb can contain arbitrary amount
-+ *	 of futex_q's)
-+ *
-+ * pi_mutex->wait_lock:
-+ *
-+ *	{uval, pi_state}
-+ *
-+ *	(and pi_mutex 'obviously')
-+ *
-+ * p->pi_lock:
-+ *
-+ *	p->pi_state_list -> pi_state->list, relation
-+ *	pi_mutex->owner -> pi_state->owner, relation
-+ *
-+ * pi_state->refcount:
-+ *
-+ *	pi_state lifetime
-+ *
-+ *
-+ * Lock order:
-+ *
-+ *   hb->lock
-+ *     pi_mutex->wait_lock
-+ *       p->pi_lock
-+ *
-+ */
-+
-+/*
-+ * Validate that the existing waiter has a pi_state and sanity check
-+ * the pi_state against the user space value. If correct, attach to
-+ * it.
-+ */
-+static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
-+			      struct futex_pi_state *pi_state,
-+			      struct futex_pi_state **ps)
-+{
-+	pid_t pid = uval & FUTEX_TID_MASK;
-+	u32 uval2;
-+	int ret;
-+
-+	/*
-+	 * Userspace might have messed up non-PI and PI futexes [3]
-+	 */
-+	if (unlikely(!pi_state))
-+		return -EINVAL;
-+
-+	/*
-+	 * We get here with hb->lock held, and having found a
-+	 * futex_top_waiter(). This means that futex_lock_pi() of said futex_q
-+	 * has dropped the hb->lock in between futex_queue() and futex_unqueue_pi(),
-+	 * which in turn means that futex_lock_pi() still has a reference on
-+	 * our pi_state.
-+	 *
-+	 * The waiter holding a reference on @pi_state also protects against
-+	 * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
-+	 * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
-+	 * free pi_state before we can take a reference ourselves.
-+	 */
-+	WARN_ON(!refcount_read(&pi_state->refcount));
-+
-+	/*
-+	 * Now that we have a pi_state, we can acquire wait_lock
-+	 * and do the state validation.
-+	 */
-+	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+
-+	/*
-+	 * Since {uval, pi_state} is serialized by wait_lock, and our current
-+	 * uval was read without holding it, it can have changed. Verify it
-+	 * still is what we expect it to be, otherwise retry the entire
-+	 * operation.
-+	 */
-+	if (futex_get_value_locked(&uval2, uaddr))
-+		goto out_efault;
-+
-+	if (uval != uval2)
-+		goto out_eagain;
-+
-+	/*
-+	 * Handle the owner died case:
-+	 */
-+	if (uval & FUTEX_OWNER_DIED) {
-+		/*
-+		 * exit_pi_state_list sets owner to NULL and wakes the
-+		 * topmost waiter. The task which acquires the
-+		 * pi_state->rt_mutex will fixup owner.
-+		 */
-+		if (!pi_state->owner) {
-+			/*
-+			 * No pi state owner, but the user space TID
-+			 * is not 0. Inconsistent state. [5]
-+			 */
-+			if (pid)
-+				goto out_einval;
-+			/*
-+			 * Take a ref on the state and return success. [4]
-+			 */
-+			goto out_attach;
-+		}
-+
-+		/*
-+		 * If TID is 0, then either the dying owner has not
-+		 * yet executed exit_pi_state_list() or some waiter
-+		 * acquired the rtmutex in the pi state, but did not
-+		 * yet fixup the TID in user space.
-+		 *
-+		 * Take a ref on the state and return success. [6]
-+		 */
-+		if (!pid)
-+			goto out_attach;
-+	} else {
-+		/*
-+		 * If the owner died bit is not set, then the pi_state
-+		 * must have an owner. [7]
-+		 */
-+		if (!pi_state->owner)
-+			goto out_einval;
-+	}
-+
-+	/*
-+	 * Bail out if user space manipulated the futex value. If pi
-+	 * state exists then the owner TID must be the same as the
-+	 * user space TID. [9/10]
-+	 */
-+	if (pid != task_pid_vnr(pi_state->owner))
-+		goto out_einval;
-+
-+out_attach:
-+	get_pi_state(pi_state);
-+	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+	*ps = pi_state;
-+	return 0;
-+
-+out_einval:
-+	ret = -EINVAL;
-+	goto out_error;
-+
-+out_eagain:
-+	ret = -EAGAIN;
-+	goto out_error;
-+
-+out_efault:
-+	ret = -EFAULT;
-+	goto out_error;
-+
-+out_error:
-+	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+	return ret;
-+}
-+
-+static int handle_exit_race(u32 __user *uaddr, u32 uval,
-+			    struct task_struct *tsk)
-+{
-+	u32 uval2;
-+
-+	/*
-+	 * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
-+	 * caller that the alleged owner is busy.
-+	 */
-+	if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
-+		return -EBUSY;
-+
-+	/*
-+	 * Reread the user space value to handle the following situation:
-+	 *
-+	 * CPU0				CPU1
-+	 *
-+	 * sys_exit()			sys_futex()
-+	 *  do_exit()			 futex_lock_pi()
-+	 *                                futex_lock_pi_atomic()
-+	 *   exit_signals(tsk)		    No waiters:
-+	 *    tsk->flags |= PF_EXITING;	    *uaddr == 0x00000PID
-+	 *  mm_release(tsk)		    Set waiter bit
-+	 *   exit_robust_list(tsk) {	    *uaddr = 0x80000PID;
-+	 *      Set owner died		    attach_to_pi_owner() {
-+	 *    *uaddr = 0xC0000000;	     tsk = get_task(PID);
-+	 *   }				     if (!tsk->flags & PF_EXITING) {
-+	 *  ...				       attach();
-+	 *  tsk->futex_state =               } else {
-+	 *	FUTEX_STATE_DEAD;              if (tsk->futex_state !=
-+	 *					  FUTEX_STATE_DEAD)
-+	 *				         return -EAGAIN;
-+	 *				       return -ESRCH; <--- FAIL
-+	 *				     }
-+	 *
-+	 * Returning ESRCH unconditionally is wrong here because the
-+	 * user space value has been changed by the exiting task.
-+	 *
-+	 * The same logic applies to the case where the exiting task is
-+	 * already gone.
-+	 */
-+	if (futex_get_value_locked(&uval2, uaddr))
-+		return -EFAULT;
-+
-+	/* If the user space value has changed, try again. */
-+	if (uval2 != uval)
-+		return -EAGAIN;
-+
-+	/*
-+	 * The exiting task did not have a robust list, the robust list was
-+	 * corrupted or the user space value in *uaddr is simply bogus.
-+	 * Give up and tell user space.
-+	 */
-+	return -ESRCH;
-+}
-+
-+static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
-+				 struct futex_pi_state **ps)
-+{
-+	/*
-+	 * No existing pi state. First waiter. [2]
-+	 *
-+	 * This creates pi_state, we have hb->lock held, this means nothing can
-+	 * observe this state, wait_lock is irrelevant.
-+	 */
-+	struct futex_pi_state *pi_state = alloc_pi_state();
-+
-+	/*
-+	 * Initialize the pi_mutex in locked state and make @p
-+	 * the owner of it:
-+	 */
-+	rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
-+
-+	/* Store the key for possible exit cleanups: */
-+	pi_state->key = *key;
-+
-+	WARN_ON(!list_empty(&pi_state->list));
-+	list_add(&pi_state->list, &p->pi_state_list);
-+	/*
-+	 * Assignment without holding pi_state->pi_mutex.wait_lock is safe
-+	 * because there is no concurrency as the object is not published yet.
-+	 */
-+	pi_state->owner = p;
-+
-+	*ps = pi_state;
-+}
-+/*
-+ * Lookup the task for the TID provided from user space and attach to
-+ * it after doing proper sanity checks.
-+ */
-+static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
-+			      struct futex_pi_state **ps,
-+			      struct task_struct **exiting)
-+{
-+	pid_t pid = uval & FUTEX_TID_MASK;
-+	struct task_struct *p;
-+
-+	/*
-+	 * We are the first waiter - try to look up the real owner and attach
-+	 * the new pi_state to it, but bail out when TID = 0 [1]
-+	 *
-+	 * The !pid check is paranoid. None of the call sites should end up
-+	 * with pid == 0, but better safe than sorry. Let the caller retry
-+	 */
-+	if (!pid)
-+		return -EAGAIN;
-+	p = find_get_task_by_vpid(pid);
-+	if (!p)
-+		return handle_exit_race(uaddr, uval, NULL);
-+
-+	if (unlikely(p->flags & PF_KTHREAD)) {
-+		put_task_struct(p);
-+		return -EPERM;
-+	}
-+
-+	/*
-+	 * We need to look at the task state to figure out, whether the
-+	 * task is exiting. To protect against the change of the task state
-+	 * in futex_exit_release(), we do this protected by p->pi_lock:
-+	 */
-+	raw_spin_lock_irq(&p->pi_lock);
-+	if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
-+		/*
-+		 * The task is on the way out. When the futex state is
-+		 * FUTEX_STATE_DEAD, we know that the task has finished
-+		 * the cleanup:
-+		 */
-+		int ret = handle_exit_race(uaddr, uval, p);
-+
-+		raw_spin_unlock_irq(&p->pi_lock);
-+		/*
-+		 * If the owner task is between FUTEX_STATE_EXITING and
-+		 * FUTEX_STATE_DEAD then store the task pointer and keep
-+		 * the reference on the task struct. The calling code will
-+		 * drop all locks, wait for the task to reach
-+		 * FUTEX_STATE_DEAD and then drop the refcount. This is
-+		 * required to prevent a live lock when the current task
-+		 * preempted the exiting task between the two states.
-+		 */
-+		if (ret == -EBUSY)
-+			*exiting = p;
-+		else
-+			put_task_struct(p);
-+		return ret;
-+	}
-+
-+	__attach_to_pi_owner(p, key, ps);
-+	raw_spin_unlock_irq(&p->pi_lock);
-+
-+	put_task_struct(p);
-+
-+	return 0;
-+}
-+
-+static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
-+{
-+	int err;
-+	u32 curval;
-+
-+	if (unlikely(should_fail_futex(true)))
-+		return -EFAULT;
-+
-+	err = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval);
-+	if (unlikely(err))
-+		return err;
-+
-+	/* If user space value changed, let the caller retry */
-+	return curval != uval ? -EAGAIN : 0;
-+}
-+
-+/**
-+ * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
-+ * @uaddr:		the pi futex user address
-+ * @hb:			the pi futex hash bucket
-+ * @key:		the futex key associated with uaddr and hb
-+ * @ps:			the pi_state pointer where we store the result of the
-+ *			lookup
-+ * @task:		the task to perform the atomic lock work for.  This will
-+ *			be "current" except in the case of requeue pi.
-+ * @exiting:		Pointer to store the task pointer of the owner task
-+ *			which is in the middle of exiting
-+ * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
-+ *
-+ * Return:
-+ *  -  0 - ready to wait;
-+ *  -  1 - acquired the lock;
-+ *  - <0 - error
-+ *
-+ * The hb->lock must be held by the caller.
-+ *
-+ * @exiting is only set when the return value is -EBUSY. If so, this holds
-+ * a refcount on the exiting task on return and the caller needs to drop it
-+ * after waiting for the exit to complete.
-+ */
-+int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
-+			 union futex_key *key,
-+			 struct futex_pi_state **ps,
-+			 struct task_struct *task,
-+			 struct task_struct **exiting,
-+			 int set_waiters)
-+{
-+	u32 uval, newval, vpid = task_pid_vnr(task);
-+	struct futex_q *top_waiter;
-+	int ret;
-+
-+	/*
-+	 * Read the user space value first so we can validate a few
-+	 * things before proceeding further.
-+	 */
-+	if (futex_get_value_locked(&uval, uaddr))
-+		return -EFAULT;
-+
-+	if (unlikely(should_fail_futex(true)))
-+		return -EFAULT;
-+
-+	/*
-+	 * Detect deadlocks.
-+	 */
-+	if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
-+		return -EDEADLK;
-+
-+	if ((unlikely(should_fail_futex(true))))
-+		return -EDEADLK;
-+
-+	/*
-+	 * Lookup existing state first. If it exists, try to attach to
-+	 * its pi_state.
-+	 */
-+	top_waiter = futex_top_waiter(hb, key);
-+	if (top_waiter)
-+		return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
-+
-+	/*
-+	 * No waiter and user TID is 0. We are here because the
-+	 * waiters or the owner died bit is set or called from
-+	 * requeue_cmp_pi or for whatever reason something took the
-+	 * syscall.
-+	 */
-+	if (!(uval & FUTEX_TID_MASK)) {
-+		/*
-+		 * We take over the futex. No other waiters and the user space
-+		 * TID is 0. We preserve the owner died bit.
-+		 */
-+		newval = uval & FUTEX_OWNER_DIED;
-+		newval |= vpid;
-+
-+		/* The futex requeue_pi code can enforce the waiters bit */
-+		if (set_waiters)
-+			newval |= FUTEX_WAITERS;
-+
-+		ret = lock_pi_update_atomic(uaddr, uval, newval);
-+		if (ret)
-+			return ret;
-+
-+		/*
-+		 * If the waiter bit was requested the caller also needs PI
-+		 * state attached to the new owner of the user space futex.
-+		 *
-+		 * @task is guaranteed to be alive and it cannot be exiting
-+		 * because it is either sleeping or waiting in
-+		 * futex_requeue_pi_wakeup_sync().
-+		 *
-+		 * No need to do the full attach_to_pi_owner() exercise
-+		 * because @task is known and valid.
-+		 */
-+		if (set_waiters) {
-+			raw_spin_lock_irq(&task->pi_lock);
-+			__attach_to_pi_owner(task, key, ps);
-+			raw_spin_unlock_irq(&task->pi_lock);
-+		}
-+		return 1;
-+	}
-+
-+	/*
-+	 * First waiter. Set the waiters bit before attaching ourself to
-+	 * the owner. If owner tries to unlock, it will be forced into
-+	 * the kernel and blocked on hb->lock.
-+	 */
-+	newval = uval | FUTEX_WAITERS;
-+	ret = lock_pi_update_atomic(uaddr, uval, newval);
-+	if (ret)
-+		return ret;
-+	/*
-+	 * If the update of the user space value succeeded, we try to
-+	 * attach to the owner. If that fails, no harm done, we only
-+	 * set the FUTEX_WAITERS bit in the user space variable.
-+	 */
-+	return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
-+}
-+
-+/*
-+ * Caller must hold a reference on @pi_state.
-+ */
-+static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
-+{
-+	struct rt_mutex_waiter *top_waiter;
-+	struct task_struct *new_owner;
-+	bool postunlock = false;
-+	DEFINE_RT_WAKE_Q(wqh);
-+	u32 curval, newval;
-+	int ret = 0;
-+
-+	top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
-+	if (WARN_ON_ONCE(!top_waiter)) {
-+		/*
-+		 * As per the comment in futex_unlock_pi() this should not happen.
-+		 *
-+		 * When this happens, give up our locks and try again, giving
-+		 * the futex_lock_pi() instance time to complete, either by
-+		 * waiting on the rtmutex or removing itself from the futex
-+		 * queue.
-+		 */
-+		ret = -EAGAIN;
-+		goto out_unlock;
-+	}
-+
-+	new_owner = top_waiter->task;
-+
-+	/*
-+	 * We pass it to the next owner. The WAITERS bit is always kept
-+	 * enabled while there is PI state around. We cleanup the owner
-+	 * died bit, because we are the owner.
-+	 */
-+	newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
-+
-+	if (unlikely(should_fail_futex(true))) {
-+		ret = -EFAULT;
-+		goto out_unlock;
-+	}
-+
-+	ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval);
-+	if (!ret && (curval != uval)) {
-+		/*
-+		 * If a unconditional UNLOCK_PI operation (user space did not
-+		 * try the TID->0 transition) raced with a waiter setting the
-+		 * FUTEX_WAITERS flag between get_user() and locking the hash
-+		 * bucket lock, retry the operation.
-+		 */
-+		if ((FUTEX_TID_MASK & curval) == uval)
-+			ret = -EAGAIN;
-+		else
-+			ret = -EINVAL;
-+	}
-+
-+	if (!ret) {
-+		/*
-+		 * This is a point of no return; once we modified the uval
-+		 * there is no going back and subsequent operations must
-+		 * not fail.
-+		 */
-+		pi_state_update_owner(pi_state, new_owner);
-+		postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh);
-+	}
-+
-+out_unlock:
-+	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+
-+	if (postunlock)
-+		rt_mutex_postunlock(&wqh);
-+
-+	return ret;
-+}
-+
-+static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-+				  struct task_struct *argowner)
-+{
-+	struct futex_pi_state *pi_state = q->pi_state;
-+	struct task_struct *oldowner, *newowner;
-+	u32 uval, curval, newval, newtid;
-+	int err = 0;
-+
-+	oldowner = pi_state->owner;
-+
-+	/*
-+	 * We are here because either:
-+	 *
-+	 *  - we stole the lock and pi_state->owner needs updating to reflect
-+	 *    that (@argowner == current),
-+	 *
-+	 * or:
-+	 *
-+	 *  - someone stole our lock and we need to fix things to point to the
-+	 *    new owner (@argowner == NULL).
-+	 *
-+	 * Either way, we have to replace the TID in the user space variable.
-+	 * This must be atomic as we have to preserve the owner died bit here.
-+	 *
-+	 * Note: We write the user space value _before_ changing the pi_state
-+	 * because we can fault here. Imagine swapped out pages or a fork
-+	 * that marked all the anonymous memory readonly for cow.
-+	 *
-+	 * Modifying pi_state _before_ the user space value would leave the
-+	 * pi_state in an inconsistent state when we fault here, because we
-+	 * need to drop the locks to handle the fault. This might be observed
-+	 * in the PID checks when attaching to PI state .
-+	 */
-+retry:
-+	if (!argowner) {
-+		if (oldowner != current) {
-+			/*
-+			 * We raced against a concurrent self; things are
-+			 * already fixed up. Nothing to do.
-+			 */
-+			return 0;
-+		}
-+
-+		if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
-+			/* We got the lock. pi_state is correct. Tell caller. */
-+			return 1;
-+		}
-+
-+		/*
-+		 * The trylock just failed, so either there is an owner or
-+		 * there is a higher priority waiter than this one.
-+		 */
-+		newowner = rt_mutex_owner(&pi_state->pi_mutex);
-+		/*
-+		 * If the higher priority waiter has not yet taken over the
-+		 * rtmutex then newowner is NULL. We can't return here with
-+		 * that state because it's inconsistent vs. the user space
-+		 * state. So drop the locks and try again. It's a valid
-+		 * situation and not any different from the other retry
-+		 * conditions.
-+		 */
-+		if (unlikely(!newowner)) {
-+			err = -EAGAIN;
-+			goto handle_err;
-+		}
-+	} else {
-+		WARN_ON_ONCE(argowner != current);
-+		if (oldowner == current) {
-+			/*
-+			 * We raced against a concurrent self; things are
-+			 * already fixed up. Nothing to do.
-+			 */
-+			return 1;
-+		}
-+		newowner = argowner;
-+	}
-+
-+	newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
-+	/* Owner died? */
-+	if (!pi_state->owner)
-+		newtid |= FUTEX_OWNER_DIED;
-+
-+	err = futex_get_value_locked(&uval, uaddr);
-+	if (err)
-+		goto handle_err;
-+
-+	for (;;) {
-+		newval = (uval & FUTEX_OWNER_DIED) | newtid;
-+
-+		err = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval);
-+		if (err)
-+			goto handle_err;
-+
-+		if (curval == uval)
-+			break;
-+		uval = curval;
-+	}
-+
-+	/*
-+	 * We fixed up user space. Now we need to fix the pi_state
-+	 * itself.
-+	 */
-+	pi_state_update_owner(pi_state, newowner);
-+
-+	return argowner == current;
-+
-+	/*
-+	 * In order to reschedule or handle a page fault, we need to drop the
-+	 * locks here. In the case of a fault, this gives the other task
-+	 * (either the highest priority waiter itself or the task which stole
-+	 * the rtmutex) the chance to try the fixup of the pi_state. So once we
-+	 * are back from handling the fault we need to check the pi_state after
-+	 * reacquiring the locks and before trying to do another fixup. When
-+	 * the fixup has been done already we simply return.
-+	 *
-+	 * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
-+	 * drop hb->lock since the caller owns the hb -> futex_q relation.
-+	 * Dropping the pi_mutex->wait_lock requires the state revalidate.
-+	 */
-+handle_err:
-+	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+	spin_unlock(q->lock_ptr);
-+
-+	switch (err) {
-+	case -EFAULT:
-+		err = fault_in_user_writeable(uaddr);
-+		break;
-+
-+	case -EAGAIN:
-+		cond_resched();
-+		err = 0;
-+		break;
-+
-+	default:
-+		WARN_ON_ONCE(1);
-+		break;
-+	}
-+
-+	spin_lock(q->lock_ptr);
-+	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+
-+	/*
-+	 * Check if someone else fixed it for us:
-+	 */
-+	if (pi_state->owner != oldowner)
-+		return argowner == current;
-+
-+	/* Retry if err was -EAGAIN or the fault in succeeded */
-+	if (!err)
-+		goto retry;
-+
-+	/*
-+	 * fault_in_user_writeable() failed so user state is immutable. At
-+	 * best we can make the kernel state consistent but user state will
-+	 * be most likely hosed and any subsequent unlock operation will be
-+	 * rejected due to PI futex rule [10].
-+	 *
-+	 * Ensure that the rtmutex owner is also the pi_state owner despite
-+	 * the user space value claiming something different. There is no
-+	 * point in unlocking the rtmutex if current is the owner as it
-+	 * would need to wait until the next waiter has taken the rtmutex
-+	 * to guarantee consistent state. Keep it simple. Userspace asked
-+	 * for this wreckaged state.
-+	 *
-+	 * The rtmutex has an owner - either current or some other
-+	 * task. See the EAGAIN loop above.
-+	 */
-+	pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
-+
-+	return err;
-+}
-+
-+static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-+				struct task_struct *argowner)
-+{
-+	struct futex_pi_state *pi_state = q->pi_state;
-+	int ret;
-+
-+	lockdep_assert_held(q->lock_ptr);
-+
-+	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+	ret = __fixup_pi_state_owner(uaddr, q, argowner);
-+	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+	return ret;
-+}
-+
-+/**
-+ * fixup_pi_owner() - Post lock pi_state and corner case management
-+ * @uaddr:	user address of the futex
-+ * @q:		futex_q (contains pi_state and access to the rt_mutex)
-+ * @locked:	if the attempt to take the rt_mutex succeeded (1) or not (0)
-+ *
-+ * After attempting to lock an rt_mutex, this function is called to cleanup
-+ * the pi_state owner as well as handle race conditions that may allow us to
-+ * acquire the lock. Must be called with the hb lock held.
-+ *
-+ * Return:
-+ *  -  1 - success, lock taken;
-+ *  -  0 - success, lock not taken;
-+ *  - <0 - on error (-EFAULT)
-+ */
-+int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked)
-+{
-+	if (locked) {
-+		/*
-+		 * Got the lock. We might not be the anticipated owner if we
-+		 * did a lock-steal - fix up the PI-state in that case:
-+		 *
-+		 * Speculative pi_state->owner read (we don't hold wait_lock);
-+		 * since we own the lock pi_state->owner == current is the
-+		 * stable state, anything else needs more attention.
-+		 */
-+		if (q->pi_state->owner != current)
-+			return fixup_pi_state_owner(uaddr, q, current);
-+		return 1;
-+	}
-+
-+	/*
-+	 * If we didn't get the lock; check if anybody stole it from us. In
-+	 * that case, we need to fix up the uval to point to them instead of
-+	 * us, otherwise bad things happen. [10]
-+	 *
-+	 * Another speculative read; pi_state->owner == current is unstable
-+	 * but needs our attention.
-+	 */
-+	if (q->pi_state->owner == current)
-+		return fixup_pi_state_owner(uaddr, q, NULL);
-+
-+	/*
-+	 * Paranoia check. If we did not take the lock, then we should not be
-+	 * the owner of the rt_mutex. Warn and establish consistent state.
-+	 */
-+	if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
-+		return fixup_pi_state_owner(uaddr, q, current);
-+
-+	return 0;
-+}
-+
-+/*
-+ * Userspace tried a 0 -> TID atomic transition of the futex value
-+ * and failed. The kernel side here does the whole locking operation:
-+ * if there are waiters then it will block as a consequence of relying
-+ * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
-+ * a 0 value of the futex too.).
-+ *
-+ * Also serves as futex trylock_pi()'ing, and due semantics.
-+ */
-+int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock)
-+{
-+	struct hrtimer_sleeper timeout, *to;
-+	struct task_struct *exiting = NULL;
-+	struct rt_mutex_waiter rt_waiter;
-+	struct futex_hash_bucket *hb;
-+	struct futex_q q = futex_q_init;
-+	int res, ret;
-+
-+	if (!IS_ENABLED(CONFIG_FUTEX_PI))
-+		return -ENOSYS;
-+
-+	if (refill_pi_state_cache())
-+		return -ENOMEM;
-+
-+	to = futex_setup_timer(time, &timeout, flags, 0);
-+
-+retry:
-+	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
-+	if (unlikely(ret != 0))
-+		goto out;
-+
-+retry_private:
-+	hb = futex_q_lock(&q);
-+
-+	ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
-+				   &exiting, 0);
-+	if (unlikely(ret)) {
-+		/*
-+		 * Atomic work succeeded and we got the lock,
-+		 * or failed. Either way, we do _not_ block.
-+		 */
-+		switch (ret) {
-+		case 1:
-+			/* We got the lock. */
-+			ret = 0;
-+			goto out_unlock_put_key;
-+		case -EFAULT:
-+			goto uaddr_faulted;
-+		case -EBUSY:
-+		case -EAGAIN:
-+			/*
-+			 * Two reasons for this:
-+			 * - EBUSY: Task is exiting and we just wait for the
-+			 *   exit to complete.
-+			 * - EAGAIN: The user space value changed.
-+			 */
-+			futex_q_unlock(hb);
-+			/*
-+			 * Handle the case where the owner is in the middle of
-+			 * exiting. Wait for the exit to complete otherwise
-+			 * this task might loop forever, aka. live lock.
-+			 */
-+			wait_for_owner_exiting(ret, exiting);
-+			cond_resched();
-+			goto retry;
-+		default:
-+			goto out_unlock_put_key;
-+		}
-+	}
-+
-+	WARN_ON(!q.pi_state);
-+
-+	/*
-+	 * Only actually queue now that the atomic ops are done:
-+	 */
-+	__futex_queue(&q, hb);
-+
-+	if (trylock) {
-+		ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
-+		/* Fixup the trylock return value: */
-+		ret = ret ? 0 : -EWOULDBLOCK;
-+		goto no_block;
-+	}
-+
-+	rt_mutex_init_waiter(&rt_waiter);
-+
-+	/*
-+	 * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
-+	 * hold it while doing rt_mutex_start_proxy(), because then it will
-+	 * include hb->lock in the blocking chain, even through we'll not in
-+	 * fact hold it while blocking. This will lead it to report -EDEADLK
-+	 * and BUG when futex_unlock_pi() interleaves with this.
-+	 *
-+	 * Therefore acquire wait_lock while holding hb->lock, but drop the
-+	 * latter before calling __rt_mutex_start_proxy_lock(). This
-+	 * interleaves with futex_unlock_pi() -- which does a similar lock
-+	 * handoff -- such that the latter can observe the futex_q::pi_state
-+	 * before __rt_mutex_start_proxy_lock() is done.
-+	 */
-+	raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
-+	spin_unlock(q.lock_ptr);
-+	/*
-+	 * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
-+	 * such that futex_unlock_pi() is guaranteed to observe the waiter when
-+	 * it sees the futex_q::pi_state.
-+	 */
-+	ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
-+	raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
-+
-+	if (ret) {
-+		if (ret == 1)
-+			ret = 0;
-+		goto cleanup;
-+	}
-+
-+	if (unlikely(to))
-+		hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
-+
-+	ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
-+
-+cleanup:
-+	spin_lock(q.lock_ptr);
-+	/*
-+	 * If we failed to acquire the lock (deadlock/signal/timeout), we must
-+	 * first acquire the hb->lock before removing the lock from the
-+	 * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
-+	 * lists consistent.
-+	 *
-+	 * In particular; it is important that futex_unlock_pi() can not
-+	 * observe this inconsistency.
-+	 */
-+	if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
-+		ret = 0;
-+
-+no_block:
-+	/*
-+	 * Fixup the pi_state owner and possibly acquire the lock if we
-+	 * haven't already.
-+	 */
-+	res = fixup_pi_owner(uaddr, &q, !ret);
-+	/*
-+	 * If fixup_pi_owner() returned an error, propagate that.  If it acquired
-+	 * the lock, clear our -ETIMEDOUT or -EINTR.
-+	 */
-+	if (res)
-+		ret = (res < 0) ? res : 0;
-+
-+	futex_unqueue_pi(&q);
-+	spin_unlock(q.lock_ptr);
-+	goto out;
-+
-+out_unlock_put_key:
-+	futex_q_unlock(hb);
-+
-+out:
-+	if (to) {
-+		hrtimer_cancel(&to->timer);
-+		destroy_hrtimer_on_stack(&to->timer);
-+	}
-+	return ret != -EINTR ? ret : -ERESTARTNOINTR;
-+
-+uaddr_faulted:
-+	futex_q_unlock(hb);
-+
-+	ret = fault_in_user_writeable(uaddr);
-+	if (ret)
-+		goto out;
-+
-+	if (!(flags & FLAGS_SHARED))
-+		goto retry_private;
-+
-+	goto retry;
-+}
-+
-+/*
-+ * Userspace attempted a TID -> 0 atomic transition, and failed.
-+ * This is the in-kernel slowpath: we look up the PI state (if any),
-+ * and do the rt-mutex unlock.
-+ */
-+int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
-+{
-+	u32 curval, uval, vpid = task_pid_vnr(current);
-+	union futex_key key = FUTEX_KEY_INIT;
-+	struct futex_hash_bucket *hb;
-+	struct futex_q *top_waiter;
-+	int ret;
-+
-+	if (!IS_ENABLED(CONFIG_FUTEX_PI))
-+		return -ENOSYS;
-+
-+retry:
-+	if (get_user(uval, uaddr))
-+		return -EFAULT;
-+	/*
-+	 * We release only a lock we actually own:
-+	 */
-+	if ((uval & FUTEX_TID_MASK) != vpid)
-+		return -EPERM;
-+
-+	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
-+	if (ret)
-+		return ret;
-+
-+	hb = futex_hash(&key);
-+	spin_lock(&hb->lock);
-+
-+	/*
-+	 * Check waiters first. We do not trust user space values at
-+	 * all and we at least want to know if user space fiddled
-+	 * with the futex value instead of blindly unlocking.
-+	 */
-+	top_waiter = futex_top_waiter(hb, &key);
-+	if (top_waiter) {
-+		struct futex_pi_state *pi_state = top_waiter->pi_state;
-+
-+		ret = -EINVAL;
-+		if (!pi_state)
-+			goto out_unlock;
-+
-+		/*
-+		 * If current does not own the pi_state then the futex is
-+		 * inconsistent and user space fiddled with the futex value.
-+		 */
-+		if (pi_state->owner != current)
-+			goto out_unlock;
-+
-+		get_pi_state(pi_state);
-+		/*
-+		 * By taking wait_lock while still holding hb->lock, we ensure
-+		 * there is no point where we hold neither; and therefore
-+		 * wake_futex_p() must observe a state consistent with what we
-+		 * observed.
-+		 *
-+		 * In particular; this forces __rt_mutex_start_proxy() to
-+		 * complete such that we're guaranteed to observe the
-+		 * rt_waiter. Also see the WARN in wake_futex_pi().
-+		 */
-+		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+		spin_unlock(&hb->lock);
-+
-+		/* drops pi_state->pi_mutex.wait_lock */
-+		ret = wake_futex_pi(uaddr, uval, pi_state);
-+
-+		put_pi_state(pi_state);
-+
-+		/*
-+		 * Success, we're done! No tricky corner cases.
-+		 */
-+		if (!ret)
-+			return ret;
-+		/*
-+		 * The atomic access to the futex value generated a
-+		 * pagefault, so retry the user-access and the wakeup:
-+		 */
-+		if (ret == -EFAULT)
-+			goto pi_faulted;
-+		/*
-+		 * A unconditional UNLOCK_PI op raced against a waiter
-+		 * setting the FUTEX_WAITERS bit. Try again.
-+		 */
-+		if (ret == -EAGAIN)
-+			goto pi_retry;
-+		/*
-+		 * wake_futex_pi has detected invalid state. Tell user
-+		 * space.
-+		 */
-+		return ret;
-+	}
-+
-+	/*
-+	 * We have no kernel internal state, i.e. no waiters in the
-+	 * kernel. Waiters which are about to queue themselves are stuck
-+	 * on hb->lock. So we can safely ignore them. We do neither
-+	 * preserve the WAITERS bit not the OWNER_DIED one. We are the
-+	 * owner.
-+	 */
-+	if ((ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, 0))) {
-+		spin_unlock(&hb->lock);
-+		switch (ret) {
-+		case -EFAULT:
-+			goto pi_faulted;
-+
-+		case -EAGAIN:
-+			goto pi_retry;
-+
-+		default:
-+			WARN_ON_ONCE(1);
-+			return ret;
-+		}
-+	}
-+
-+	/*
-+	 * If uval has changed, let user space handle it.
-+	 */
-+	ret = (curval == uval) ? 0 : -EAGAIN;
-+
-+out_unlock:
-+	spin_unlock(&hb->lock);
-+	return ret;
-+
-+pi_retry:
-+	cond_resched();
-+	goto retry;
-+
-+pi_faulted:
-+
-+	ret = fault_in_user_writeable(uaddr);
-+	if (!ret)
-+		goto retry;
-+
-+	return ret;
-+}
-+
-diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c
-new file mode 100644
-index 000000000..cba8b1a6a
---- /dev/null
-+++ b/kernel/futex/requeue.c
-@@ -0,0 +1,897 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+
-+#include <linux/sched/signal.h>
-+
-+#include "futex.h"
-+#include "../locking/rtmutex_common.h"
-+
-+/*
-+ * On PREEMPT_RT, the hash bucket lock is a 'sleeping' spinlock with an
-+ * underlying rtmutex. The task which is about to be requeued could have
-+ * just woken up (timeout, signal). After the wake up the task has to
-+ * acquire hash bucket lock, which is held by the requeue code.  As a task
-+ * can only be blocked on _ONE_ rtmutex at a time, the proxy lock blocking
-+ * and the hash bucket lock blocking would collide and corrupt state.
-+ *
-+ * On !PREEMPT_RT this is not a problem and everything could be serialized
-+ * on hash bucket lock, but aside of having the benefit of common code,
-+ * this allows to avoid doing the requeue when the task is already on the
-+ * way out and taking the hash bucket lock of the original uaddr1 when the
-+ * requeue has been completed.
-+ *
-+ * The following state transitions are valid:
-+ *
-+ * On the waiter side:
-+ *   Q_REQUEUE_PI_NONE		-> Q_REQUEUE_PI_IGNORE
-+ *   Q_REQUEUE_PI_IN_PROGRESS	-> Q_REQUEUE_PI_WAIT
-+ *
-+ * On the requeue side:
-+ *   Q_REQUEUE_PI_NONE		-> Q_REQUEUE_PI_INPROGRESS
-+ *   Q_REQUEUE_PI_IN_PROGRESS	-> Q_REQUEUE_PI_DONE/LOCKED
-+ *   Q_REQUEUE_PI_IN_PROGRESS	-> Q_REQUEUE_PI_NONE (requeue failed)
-+ *   Q_REQUEUE_PI_WAIT		-> Q_REQUEUE_PI_DONE/LOCKED
-+ *   Q_REQUEUE_PI_WAIT		-> Q_REQUEUE_PI_IGNORE (requeue failed)
-+ *
-+ * The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this
-+ * signals that the waiter is already on the way out. It also means that
-+ * the waiter is still on the 'wait' futex, i.e. uaddr1.
-+ *
-+ * The waiter side signals early wakeup to the requeue side either through
-+ * setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending
-+ * on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately
-+ * proceed to take the hash bucket lock of uaddr1. If it set state to WAIT,
-+ * which means the wakeup is interleaving with a requeue in progress it has
-+ * to wait for the requeue side to change the state. Either to DONE/LOCKED
-+ * or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex
-+ * and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by
-+ * the requeue side when the requeue attempt failed via deadlock detection
-+ * and therefore the waiter q is still on the uaddr1 futex.
-+ */
-+enum {
-+	Q_REQUEUE_PI_NONE		=  0,
-+	Q_REQUEUE_PI_IGNORE,
-+	Q_REQUEUE_PI_IN_PROGRESS,
-+	Q_REQUEUE_PI_WAIT,
-+	Q_REQUEUE_PI_DONE,
-+	Q_REQUEUE_PI_LOCKED,
-+};
-+
-+const struct futex_q futex_q_init = {
-+	/* list gets initialized in futex_queue()*/
-+	.key		= FUTEX_KEY_INIT,
-+	.bitset		= FUTEX_BITSET_MATCH_ANY,
-+	.requeue_state	= ATOMIC_INIT(Q_REQUEUE_PI_NONE),
-+};
-+
-+/**
-+ * requeue_futex() - Requeue a futex_q from one hb to another
-+ * @q:		the futex_q to requeue
-+ * @hb1:	the source hash_bucket
-+ * @hb2:	the target hash_bucket
-+ * @key2:	the new key for the requeued futex_q
-+ */
-+static inline
-+void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
-+		   struct futex_hash_bucket *hb2, union futex_key *key2)
-+{
-+
-+	/*
-+	 * If key1 and key2 hash to the same bucket, no need to
-+	 * requeue.
-+	 */
-+	if (likely(&hb1->chain != &hb2->chain)) {
-+		plist_del(&q->list, &hb1->chain);
-+		futex_hb_waiters_dec(hb1);
-+		futex_hb_waiters_inc(hb2);
-+		plist_add(&q->list, &hb2->chain);
-+		q->lock_ptr = &hb2->lock;
-+	}
-+	q->key = *key2;
-+}
-+
-+static inline bool futex_requeue_pi_prepare(struct futex_q *q,
-+					    struct futex_pi_state *pi_state)
-+{
-+	int old, new;
-+
-+	/*
-+	 * Set state to Q_REQUEUE_PI_IN_PROGRESS unless an early wakeup has
-+	 * already set Q_REQUEUE_PI_IGNORE to signal that requeue should
-+	 * ignore the waiter.
-+	 */
-+	old = atomic_read_acquire(&q->requeue_state);
-+	do {
-+		if (old == Q_REQUEUE_PI_IGNORE)
-+			return false;
-+
-+		/*
-+		 * futex_proxy_trylock_atomic() might have set it to
-+		 * IN_PROGRESS and a interleaved early wake to WAIT.
-+		 *
-+		 * It was considered to have an extra state for that
-+		 * trylock, but that would just add more conditionals
-+		 * all over the place for a dubious value.
-+		 */
-+		if (old != Q_REQUEUE_PI_NONE)
-+			break;
-+
-+		new = Q_REQUEUE_PI_IN_PROGRESS;
-+	} while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
-+
-+	q->pi_state = pi_state;
-+	return true;
-+}
-+
-+static inline void futex_requeue_pi_complete(struct futex_q *q, int locked)
-+{
-+	int old, new;
-+
-+	old = atomic_read_acquire(&q->requeue_state);
-+	do {
-+		if (old == Q_REQUEUE_PI_IGNORE)
-+			return;
-+
-+		if (locked >= 0) {
-+			/* Requeue succeeded. Set DONE or LOCKED */
-+			WARN_ON_ONCE(old != Q_REQUEUE_PI_IN_PROGRESS &&
-+				     old != Q_REQUEUE_PI_WAIT);
-+			new = Q_REQUEUE_PI_DONE + locked;
-+		} else if (old == Q_REQUEUE_PI_IN_PROGRESS) {
-+			/* Deadlock, no early wakeup interleave */
-+			new = Q_REQUEUE_PI_NONE;
-+		} else {
-+			/* Deadlock, early wakeup interleave. */
-+			WARN_ON_ONCE(old != Q_REQUEUE_PI_WAIT);
-+			new = Q_REQUEUE_PI_IGNORE;
-+		}
-+	} while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
-+
-+#ifdef CONFIG_PREEMPT_RT
-+	/* If the waiter interleaved with the requeue let it know */
-+	if (unlikely(old == Q_REQUEUE_PI_WAIT))
-+		rcuwait_wake_up(&q->requeue_wait);
-+#endif
-+}
-+
-+static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q)
-+{
-+	int old, new;
-+
-+	old = atomic_read_acquire(&q->requeue_state);
-+	do {
-+		/* Is requeue done already? */
-+		if (old >= Q_REQUEUE_PI_DONE)
-+			return old;
-+
-+		/*
-+		 * If not done, then tell the requeue code to either ignore
-+		 * the waiter or to wake it up once the requeue is done.
-+		 */
-+		new = Q_REQUEUE_PI_WAIT;
-+		if (old == Q_REQUEUE_PI_NONE)
-+			new = Q_REQUEUE_PI_IGNORE;
-+	} while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
-+
-+	/* If the requeue was in progress, wait for it to complete */
-+	if (old == Q_REQUEUE_PI_IN_PROGRESS) {
-+#ifdef CONFIG_PREEMPT_RT
-+		rcuwait_wait_event(&q->requeue_wait,
-+				   atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT,
-+				   TASK_UNINTERRUPTIBLE);
-+#else
-+		(void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT);
-+#endif
-+	}
-+
-+	/*
-+	 * Requeue is now either prohibited or complete. Reread state
-+	 * because during the wait above it might have changed. Nothing
-+	 * will modify q->requeue_state after this point.
-+	 */
-+	return atomic_read(&q->requeue_state);
-+}
-+
-+/**
-+ * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
-+ * @q:		the futex_q
-+ * @key:	the key of the requeue target futex
-+ * @hb:		the hash_bucket of the requeue target futex
-+ *
-+ * During futex_requeue, with requeue_pi=1, it is possible to acquire the
-+ * target futex if it is uncontended or via a lock steal.
-+ *
-+ * 1) Set @q::key to the requeue target futex key so the waiter can detect
-+ *    the wakeup on the right futex.
-+ *
-+ * 2) Dequeue @q from the hash bucket.
-+ *
-+ * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock
-+ *    acquisition.
-+ *
-+ * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that
-+ *    the waiter has to fixup the pi state.
-+ *
-+ * 5) Complete the requeue state so the waiter can make progress. After
-+ *    this point the waiter task can return from the syscall immediately in
-+ *    case that the pi state does not have to be fixed up.
-+ *
-+ * 6) Wake the waiter task.
-+ *
-+ * Must be called with both q->lock_ptr and hb->lock held.
-+ */
-+static inline
-+void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
-+			   struct futex_hash_bucket *hb)
-+{
-+	q->key = *key;
-+
-+	__futex_unqueue(q);
-+
-+	WARN_ON(!q->rt_waiter);
-+	q->rt_waiter = NULL;
-+
-+	q->lock_ptr = &hb->lock;
-+
-+	/* Signal locked state to the waiter */
-+	futex_requeue_pi_complete(q, 1);
-+	wake_up_state(q->task, TASK_NORMAL);
-+}
-+
-+/**
-+ * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
-+ * @pifutex:		the user address of the to futex
-+ * @hb1:		the from futex hash bucket, must be locked by the caller
-+ * @hb2:		the to futex hash bucket, must be locked by the caller
-+ * @key1:		the from futex key
-+ * @key2:		the to futex key
-+ * @ps:			address to store the pi_state pointer
-+ * @exiting:		Pointer to store the task pointer of the owner task
-+ *			which is in the middle of exiting
-+ * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
-+ *
-+ * Try and get the lock on behalf of the top waiter if we can do it atomically.
-+ * Wake the top waiter if we succeed.  If the caller specified set_waiters,
-+ * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
-+ * hb1 and hb2 must be held by the caller.
-+ *
-+ * @exiting is only set when the return value is -EBUSY. If so, this holds
-+ * a refcount on the exiting task on return and the caller needs to drop it
-+ * after waiting for the exit to complete.
-+ *
-+ * Return:
-+ *  -  0 - failed to acquire the lock atomically;
-+ *  - >0 - acquired the lock, return value is vpid of the top_waiter
-+ *  - <0 - error
-+ */
-+static int
-+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
-+			   struct futex_hash_bucket *hb2, union futex_key *key1,
-+			   union futex_key *key2, struct futex_pi_state **ps,
-+			   struct task_struct **exiting, int set_waiters)
-+{
-+	struct futex_q *top_waiter = NULL;
-+	u32 curval;
-+	int ret;
-+
-+	if (futex_get_value_locked(&curval, pifutex))
-+		return -EFAULT;
-+
-+	if (unlikely(should_fail_futex(true)))
-+		return -EFAULT;
-+
-+	/*
-+	 * Find the top_waiter and determine if there are additional waiters.
-+	 * If the caller intends to requeue more than 1 waiter to pifutex,
-+	 * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
-+	 * as we have means to handle the possible fault.  If not, don't set
-+	 * the bit unnecessarily as it will force the subsequent unlock to enter
-+	 * the kernel.
-+	 */
-+	top_waiter = futex_top_waiter(hb1, key1);
-+
-+	/* There are no waiters, nothing for us to do. */
-+	if (!top_waiter)
-+		return 0;
-+
-+	/*
-+	 * Ensure that this is a waiter sitting in futex_wait_requeue_pi()
-+	 * and waiting on the 'waitqueue' futex which is always !PI.
-+	 */
-+	if (!top_waiter->rt_waiter || top_waiter->pi_state)
-+		return -EINVAL;
-+
-+	/* Ensure we requeue to the expected futex. */
-+	if (!futex_match(top_waiter->requeue_pi_key, key2))
-+		return -EINVAL;
-+
-+	/* Ensure that this does not race against an early wakeup */
-+	if (!futex_requeue_pi_prepare(top_waiter, NULL))
-+		return -EAGAIN;
-+
-+	/*
-+	 * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit
-+	 * in the contended case or if @set_waiters is true.
-+	 *
-+	 * In the contended case PI state is attached to the lock owner. If
-+	 * the user space lock can be acquired then PI state is attached to
-+	 * the new owner (@top_waiter->task) when @set_waiters is true.
-+	 */
-+	ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
-+				   exiting, set_waiters);
-+	if (ret == 1) {
-+		/*
-+		 * Lock was acquired in user space and PI state was
-+		 * attached to @top_waiter->task. That means state is fully
-+		 * consistent and the waiter can return to user space
-+		 * immediately after the wakeup.
-+		 */
-+		requeue_pi_wake_futex(top_waiter, key2, hb2);
-+	} else if (ret < 0) {
-+		/* Rewind top_waiter::requeue_state */
-+		futex_requeue_pi_complete(top_waiter, ret);
-+	} else {
-+		/*
-+		 * futex_lock_pi_atomic() did not acquire the user space
-+		 * futex, but managed to establish the proxy lock and pi
-+		 * state. top_waiter::requeue_state cannot be fixed up here
-+		 * because the waiter is not enqueued on the rtmutex
-+		 * yet. This is handled at the callsite depending on the
-+		 * result of rt_mutex_start_proxy_lock() which is
-+		 * guaranteed to be reached with this function returning 0.
-+		 */
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
-+ * @uaddr1:	source futex user address
-+ * @flags:	futex flags (FLAGS_SHARED, etc.)
-+ * @uaddr2:	target futex user address
-+ * @nr_wake:	number of waiters to wake (must be 1 for requeue_pi)
-+ * @nr_requeue:	number of waiters to requeue (0-INT_MAX)
-+ * @cmpval:	@uaddr1 expected value (or %NULL)
-+ * @requeue_pi:	if we are attempting to requeue from a non-pi futex to a
-+ *		pi futex (pi to pi requeue is not supported)
-+ *
-+ * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
-+ * uaddr2 atomically on behalf of the top waiter.
-+ *
-+ * Return:
-+ *  - >=0 - on success, the number of tasks requeued or woken;
-+ *  -  <0 - on error
-+ */
-+int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
-+		  int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi)
-+{
-+	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
-+	int task_count = 0, ret;
-+	struct futex_pi_state *pi_state = NULL;
-+	struct futex_hash_bucket *hb1, *hb2;
-+	struct futex_q *this, *next;
-+	DEFINE_WAKE_Q(wake_q);
-+
-+	if (nr_wake < 0 || nr_requeue < 0)
-+		return -EINVAL;
-+
-+	/*
-+	 * When PI not supported: return -ENOSYS if requeue_pi is true,
-+	 * consequently the compiler knows requeue_pi is always false past
-+	 * this point which will optimize away all the conditional code
-+	 * further down.
-+	 */
-+	if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
-+		return -ENOSYS;
-+
-+	if (requeue_pi) {
-+		/*
-+		 * Requeue PI only works on two distinct uaddrs. This
-+		 * check is only valid for private futexes. See below.
-+		 */
-+		if (uaddr1 == uaddr2)
-+			return -EINVAL;
-+
-+		/*
-+		 * futex_requeue() allows the caller to define the number
-+		 * of waiters to wake up via the @nr_wake argument. With
-+		 * REQUEUE_PI, waking up more than one waiter is creating
-+		 * more problems than it solves. Waking up a waiter makes
-+		 * only sense if the PI futex @uaddr2 is uncontended as
-+		 * this allows the requeue code to acquire the futex
-+		 * @uaddr2 before waking the waiter. The waiter can then
-+		 * return to user space without further action. A secondary
-+		 * wakeup would just make the futex_wait_requeue_pi()
-+		 * handling more complex, because that code would have to
-+		 * look up pi_state and do more or less all the handling
-+		 * which the requeue code has to do for the to be requeued
-+		 * waiters. So restrict the number of waiters to wake to
-+		 * one, and only wake it up when the PI futex is
-+		 * uncontended. Otherwise requeue it and let the unlock of
-+		 * the PI futex handle the wakeup.
-+		 *
-+		 * All REQUEUE_PI users, e.g. pthread_cond_signal() and
-+		 * pthread_cond_broadcast() must use nr_wake=1.
-+		 */
-+		if (nr_wake != 1)
-+			return -EINVAL;
-+
-+		/*
-+		 * requeue_pi requires a pi_state, try to allocate it now
-+		 * without any locks in case it fails.
-+		 */
-+		if (refill_pi_state_cache())
-+			return -ENOMEM;
-+	}
-+
-+retry:
-+	ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
-+	if (unlikely(ret != 0))
-+		return ret;
-+	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
-+			    requeue_pi ? FUTEX_WRITE : FUTEX_READ);
-+	if (unlikely(ret != 0))
-+		return ret;
-+
-+	/*
-+	 * The check above which compares uaddrs is not sufficient for
-+	 * shared futexes. We need to compare the keys:
-+	 */
-+	if (requeue_pi && futex_match(&key1, &key2))
-+		return -EINVAL;
-+
-+	hb1 = futex_hash(&key1);
-+	hb2 = futex_hash(&key2);
-+
-+retry_private:
-+	futex_hb_waiters_inc(hb2);
-+	double_lock_hb(hb1, hb2);
-+
-+	if (likely(cmpval != NULL)) {
-+		u32 curval;
-+
-+		ret = futex_get_value_locked(&curval, uaddr1);
-+
-+		if (unlikely(ret)) {
-+			double_unlock_hb(hb1, hb2);
-+			futex_hb_waiters_dec(hb2);
-+
-+			ret = get_user(curval, uaddr1);
-+			if (ret)
-+				return ret;
-+
-+			if (!(flags & FLAGS_SHARED))
-+				goto retry_private;
-+
-+			goto retry;
-+		}
-+		if (curval != *cmpval) {
-+			ret = -EAGAIN;
-+			goto out_unlock;
-+		}
-+	}
-+
-+	if (requeue_pi) {
-+		struct task_struct *exiting = NULL;
-+
-+		/*
-+		 * Attempt to acquire uaddr2 and wake the top waiter. If we
-+		 * intend to requeue waiters, force setting the FUTEX_WAITERS
-+		 * bit.  We force this here where we are able to easily handle
-+		 * faults rather in the requeue loop below.
-+		 *
-+		 * Updates topwaiter::requeue_state if a top waiter exists.
-+		 */
-+		ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
-+						 &key2, &pi_state,
-+						 &exiting, nr_requeue);
-+
-+		/*
-+		 * At this point the top_waiter has either taken uaddr2 or
-+		 * is waiting on it. In both cases pi_state has been
-+		 * established and an initial refcount on it. In case of an
-+		 * error there's nothing.
-+		 *
-+		 * The top waiter's requeue_state is up to date:
-+		 *
-+		 *  - If the lock was acquired atomically (ret == 1), then
-+		 *    the state is Q_REQUEUE_PI_LOCKED.
-+		 *
-+		 *    The top waiter has been dequeued and woken up and can
-+		 *    return to user space immediately. The kernel/user
-+		 *    space state is consistent. In case that there must be
-+		 *    more waiters requeued the WAITERS bit in the user
-+		 *    space futex is set so the top waiter task has to go
-+		 *    into the syscall slowpath to unlock the futex. This
-+		 *    will block until this requeue operation has been
-+		 *    completed and the hash bucket locks have been
-+		 *    dropped.
-+		 *
-+		 *  - If the trylock failed with an error (ret < 0) then
-+		 *    the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
-+		 *    happened", or Q_REQUEUE_PI_IGNORE when there was an
-+		 *    interleaved early wakeup.
-+		 *
-+		 *  - If the trylock did not succeed (ret == 0) then the
-+		 *    state is either Q_REQUEUE_PI_IN_PROGRESS or
-+		 *    Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
-+		 *    This will be cleaned up in the loop below, which
-+		 *    cannot fail because futex_proxy_trylock_atomic() did
-+		 *    the same sanity checks for requeue_pi as the loop
-+		 *    below does.
-+		 */
-+		switch (ret) {
-+		case 0:
-+			/* We hold a reference on the pi state. */
-+			break;
-+
-+		case 1:
-+			/*
-+			 * futex_proxy_trylock_atomic() acquired the user space
-+			 * futex. Adjust task_count.
-+			 */
-+			task_count++;
-+			ret = 0;
-+			break;
-+
-+		/*
-+		 * If the above failed, then pi_state is NULL and
-+		 * waiter::requeue_state is correct.
-+		 */
-+		case -EFAULT:
-+			double_unlock_hb(hb1, hb2);
-+			futex_hb_waiters_dec(hb2);
-+			ret = fault_in_user_writeable(uaddr2);
-+			if (!ret)
-+				goto retry;
-+			return ret;
-+		case -EBUSY:
-+		case -EAGAIN:
-+			/*
-+			 * Two reasons for this:
-+			 * - EBUSY: Owner is exiting and we just wait for the
-+			 *   exit to complete.
-+			 * - EAGAIN: The user space value changed.
-+			 */
-+			double_unlock_hb(hb1, hb2);
-+			futex_hb_waiters_dec(hb2);
-+			/*
-+			 * Handle the case where the owner is in the middle of
-+			 * exiting. Wait for the exit to complete otherwise
-+			 * this task might loop forever, aka. live lock.
-+			 */
-+			wait_for_owner_exiting(ret, exiting);
-+			cond_resched();
-+			goto retry;
-+		default:
-+			goto out_unlock;
-+		}
-+	}
-+
-+	plist_for_each_entry_safe(this, next, &hb1->chain, list) {
-+		if (task_count - nr_wake >= nr_requeue)
-+			break;
-+
-+		if (!futex_match(&this->key, &key1))
-+			continue;
-+
-+		/*
-+		 * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
-+		 * be paired with each other and no other futex ops.
-+		 *
-+		 * We should never be requeueing a futex_q with a pi_state,
-+		 * which is awaiting a futex_unlock_pi().
-+		 */
-+		if ((requeue_pi && !this->rt_waiter) ||
-+		    (!requeue_pi && this->rt_waiter) ||
-+		    this->pi_state) {
-+			ret = -EINVAL;
-+			break;
-+		}
-+
-+		/* Plain futexes just wake or requeue and are done */
-+		if (!requeue_pi) {
-+			if (++task_count <= nr_wake)
-+				futex_wake_mark(&wake_q, this);
-+			else
-+				requeue_futex(this, hb1, hb2, &key2);
-+			continue;
-+		}
-+
-+		/* Ensure we requeue to the expected futex for requeue_pi. */
-+		if (!futex_match(this->requeue_pi_key, &key2)) {
-+			ret = -EINVAL;
-+			break;
-+		}
-+
-+		/*
-+		 * Requeue nr_requeue waiters and possibly one more in the case
-+		 * of requeue_pi if we couldn't acquire the lock atomically.
-+		 *
-+		 * Prepare the waiter to take the rt_mutex. Take a refcount
-+		 * on the pi_state and store the pointer in the futex_q
-+		 * object of the waiter.
-+		 */
-+		get_pi_state(pi_state);
-+
-+		/* Don't requeue when the waiter is already on the way out. */
-+		if (!futex_requeue_pi_prepare(this, pi_state)) {
-+			/*
-+			 * Early woken waiter signaled that it is on the
-+			 * way out. Drop the pi_state reference and try the
-+			 * next waiter. @this->pi_state is still NULL.
-+			 */
-+			put_pi_state(pi_state);
-+			continue;
-+		}
-+
-+		ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
-+						this->rt_waiter,
-+						this->task);
-+
-+		if (ret == 1) {
-+			/*
-+			 * We got the lock. We do neither drop the refcount
-+			 * on pi_state nor clear this->pi_state because the
-+			 * waiter needs the pi_state for cleaning up the
-+			 * user space value. It will drop the refcount
-+			 * after doing so. this::requeue_state is updated
-+			 * in the wakeup as well.
-+			 */
-+			requeue_pi_wake_futex(this, &key2, hb2);
-+			task_count++;
-+		} else if (!ret) {
-+			/* Waiter is queued, move it to hb2 */
-+			requeue_futex(this, hb1, hb2, &key2);
-+			futex_requeue_pi_complete(this, 0);
-+			task_count++;
-+		} else {
-+			/*
-+			 * rt_mutex_start_proxy_lock() detected a potential
-+			 * deadlock when we tried to queue that waiter.
-+			 * Drop the pi_state reference which we took above
-+			 * and remove the pointer to the state from the
-+			 * waiters futex_q object.
-+			 */
-+			this->pi_state = NULL;
-+			put_pi_state(pi_state);
-+			futex_requeue_pi_complete(this, ret);
-+			/*
-+			 * We stop queueing more waiters and let user space
-+			 * deal with the mess.
-+			 */
-+			break;
-+		}
-+	}
-+
-+	/*
-+	 * We took an extra initial reference to the pi_state in
-+	 * futex_proxy_trylock_atomic(). We need to drop it here again.
-+	 */
-+	put_pi_state(pi_state);
-+
-+out_unlock:
-+	double_unlock_hb(hb1, hb2);
-+	wake_up_q(&wake_q);
-+	futex_hb_waiters_dec(hb2);
-+	return ret ? ret : task_count;
-+}
-+
-+/**
-+ * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex
-+ * @hb:		the hash_bucket futex_q was original enqueued on
-+ * @q:		the futex_q woken while waiting to be requeued
-+ * @timeout:	the timeout associated with the wait (NULL if none)
-+ *
-+ * Determine the cause for the early wakeup.
-+ *
-+ * Return:
-+ *  -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR
-+ */
-+static inline
-+int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
-+				   struct futex_q *q,
-+				   struct hrtimer_sleeper *timeout)
-+{
-+	int ret;
-+
-+	/*
-+	 * With the hb lock held, we avoid races while we process the wakeup.
-+	 * We only need to hold hb (and not hb2) to ensure atomicity as the
-+	 * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
-+	 * It can't be requeued from uaddr2 to something else since we don't
-+	 * support a PI aware source futex for requeue.
-+	 */
-+	WARN_ON_ONCE(&hb->lock != q->lock_ptr);
-+
-+	/*
-+	 * We were woken prior to requeue by a timeout or a signal.
-+	 * Unqueue the futex_q and determine which it was.
-+	 */
-+	plist_del(&q->list, &hb->chain);
-+	futex_hb_waiters_dec(hb);
-+
-+	/* Handle spurious wakeups gracefully */
-+	ret = -EWOULDBLOCK;
-+	if (timeout && !timeout->task)
-+		ret = -ETIMEDOUT;
-+	else if (signal_pending(current))
-+		ret = -ERESTARTNOINTR;
-+	return ret;
-+}
-+
-+/**
-+ * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
-+ * @uaddr:	the futex we initially wait on (non-pi)
-+ * @flags:	futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
-+ *		the same type, no requeueing from private to shared, etc.
-+ * @val:	the expected value of uaddr
-+ * @abs_time:	absolute timeout
-+ * @bitset:	32 bit wakeup bitset set by userspace, defaults to all
-+ * @uaddr2:	the pi futex we will take prior to returning to user-space
-+ *
-+ * The caller will wait on uaddr and will be requeued by futex_requeue() to
-+ * uaddr2 which must be PI aware and unique from uaddr.  Normal wakeup will wake
-+ * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
-+ * userspace.  This ensures the rt_mutex maintains an owner when it has waiters;
-+ * without one, the pi logic would not know which task to boost/deboost, if
-+ * there was a need to.
-+ *
-+ * We call schedule in futex_wait_queue() when we enqueue and return there
-+ * via the following--
-+ * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
-+ * 2) wakeup on uaddr2 after a requeue
-+ * 3) signal
-+ * 4) timeout
-+ *
-+ * If 3, cleanup and return -ERESTARTNOINTR.
-+ *
-+ * If 2, we may then block on trying to take the rt_mutex and return via:
-+ * 5) successful lock
-+ * 6) signal
-+ * 7) timeout
-+ * 8) other lock acquisition failure
-+ *
-+ * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
-+ *
-+ * If 4 or 7, we cleanup and return with -ETIMEDOUT.
-+ *
-+ * Return:
-+ *  -  0 - On success;
-+ *  - <0 - On error
-+ */
-+int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
-+			  u32 val, ktime_t *abs_time, u32 bitset,
-+			  u32 __user *uaddr2)
-+{
-+	struct hrtimer_sleeper timeout, *to;
-+	struct rt_mutex_waiter rt_waiter;
-+	struct futex_hash_bucket *hb;
-+	union futex_key key2 = FUTEX_KEY_INIT;
-+	struct futex_q q = futex_q_init;
-+	struct rt_mutex_base *pi_mutex;
-+	int res, ret;
-+
-+	if (!IS_ENABLED(CONFIG_FUTEX_PI))
-+		return -ENOSYS;
-+
-+	if (uaddr == uaddr2)
-+		return -EINVAL;
-+
-+	if (!bitset)
-+		return -EINVAL;
-+
-+	to = futex_setup_timer(abs_time, &timeout, flags,
-+			       current->timer_slack_ns);
-+
-+	/*
-+	 * The waiter is allocated on our stack, manipulated by the requeue
-+	 * code while we sleep on uaddr.
-+	 */
-+	rt_mutex_init_waiter(&rt_waiter);
-+
-+	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
-+	if (unlikely(ret != 0))
-+		goto out;
-+
-+	q.bitset = bitset;
-+	q.rt_waiter = &rt_waiter;
-+	q.requeue_pi_key = &key2;
-+
-+	/*
-+	 * Prepare to wait on uaddr. On success, it holds hb->lock and q
-+	 * is initialized.
-+	 */
-+	ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
-+	if (ret)
-+		goto out;
-+
-+	/*
-+	 * The check above which compares uaddrs is not sufficient for
-+	 * shared futexes. We need to compare the keys:
-+	 */
-+	if (futex_match(&q.key, &key2)) {
-+		futex_q_unlock(hb);
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	/* Queue the futex_q, drop the hb lock, wait for wakeup. */
-+	futex_wait_queue(hb, &q, to);
-+
-+	switch (futex_requeue_pi_wakeup_sync(&q)) {
-+	case Q_REQUEUE_PI_IGNORE:
-+		/* The waiter is still on uaddr1 */
-+		spin_lock(&hb->lock);
-+		ret = handle_early_requeue_pi_wakeup(hb, &q, to);
-+		spin_unlock(&hb->lock);
-+		break;
-+
-+	case Q_REQUEUE_PI_LOCKED:
-+		/* The requeue acquired the lock */
-+		if (q.pi_state && (q.pi_state->owner != current)) {
-+			spin_lock(q.lock_ptr);
-+			ret = fixup_pi_owner(uaddr2, &q, true);
-+			/*
-+			 * Drop the reference to the pi state which the
-+			 * requeue_pi() code acquired for us.
-+			 */
-+			put_pi_state(q.pi_state);
-+			spin_unlock(q.lock_ptr);
-+			/*
-+			 * Adjust the return value. It's either -EFAULT or
-+			 * success (1) but the caller expects 0 for success.
-+			 */
-+			ret = ret < 0 ? ret : 0;
-+		}
-+		break;
-+
-+	case Q_REQUEUE_PI_DONE:
-+		/* Requeue completed. Current is 'pi_blocked_on' the rtmutex */
-+		pi_mutex = &q.pi_state->pi_mutex;
-+		ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
-+
-+		/* Current is not longer pi_blocked_on */
-+		spin_lock(q.lock_ptr);
-+		if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
-+			ret = 0;
-+
-+		debug_rt_mutex_free_waiter(&rt_waiter);
-+		/*
-+		 * Fixup the pi_state owner and possibly acquire the lock if we
-+		 * haven't already.
-+		 */
-+		res = fixup_pi_owner(uaddr2, &q, !ret);
-+		/*
-+		 * If fixup_pi_owner() returned an error, propagate that.  If it
-+		 * acquired the lock, clear -ETIMEDOUT or -EINTR.
-+		 */
-+		if (res)
-+			ret = (res < 0) ? res : 0;
-+
-+		futex_unqueue_pi(&q);
-+		spin_unlock(q.lock_ptr);
-+
-+		if (ret == -EINTR) {
-+			/*
-+			 * We've already been requeued, but cannot restart
-+			 * by calling futex_lock_pi() directly. We could
-+			 * restart this syscall, but it would detect that
-+			 * the user space "val" changed and return
-+			 * -EWOULDBLOCK.  Save the overhead of the restart
-+			 * and return -EWOULDBLOCK directly.
-+			 */
-+			ret = -EWOULDBLOCK;
-+		}
-+		break;
-+	default:
-+		BUG();
-+	}
-+
-+out:
-+	if (to) {
-+		hrtimer_cancel(&to->timer);
-+		destroy_hrtimer_on_stack(&to->timer);
-+	}
-+	return ret;
-+}
-+
-diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
-new file mode 100644
-index 000000000..368e9c17f
---- /dev/null
-+++ b/kernel/futex/syscalls.c
-@@ -0,0 +1,396 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+
-+#include <linux/compat.h>
-+#include <linux/syscalls.h>
-+#include <linux/time_namespace.h>
-+
-+#include "futex.h"
-+
-+/*
-+ * Support for robust futexes: the kernel cleans up held futexes at
-+ * thread exit time.
-+ *
-+ * Implementation: user-space maintains a per-thread list of locks it
-+ * is holding. Upon do_exit(), the kernel carefully walks this list,
-+ * and marks all locks that are owned by this thread with the
-+ * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
-+ * always manipulated with the lock held, so the list is private and
-+ * per-thread. Userspace also maintains a per-thread 'list_op_pending'
-+ * field, to allow the kernel to clean up if the thread dies after
-+ * acquiring the lock, but just before it could have added itself to
-+ * the list. There can only be one such pending lock.
-+ */
-+
-+/**
-+ * sys_set_robust_list() - Set the robust-futex list head of a task
-+ * @head:	pointer to the list-head
-+ * @len:	length of the list-head, as userspace expects
-+ */
-+SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
-+		size_t, len)
-+{
-+	if (!futex_cmpxchg_enabled)
-+		return -ENOSYS;
-+	/*
-+	 * The kernel knows only one size for now:
-+	 */
-+	if (unlikely(len != sizeof(*head)))
-+		return -EINVAL;
-+
-+	current->robust_list = head;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_get_robust_list() - Get the robust-futex list head of a task
-+ * @pid:	pid of the process [zero for current task]
-+ * @head_ptr:	pointer to a list-head pointer, the kernel fills it in
-+ * @len_ptr:	pointer to a length field, the kernel fills in the header size
-+ */
-+SYSCALL_DEFINE3(get_robust_list, int, pid,
-+		struct robust_list_head __user * __user *, head_ptr,
-+		size_t __user *, len_ptr)
-+{
-+	struct robust_list_head __user *head;
-+	unsigned long ret;
-+	struct task_struct *p;
-+
-+	if (!futex_cmpxchg_enabled)
-+		return -ENOSYS;
-+
-+	rcu_read_lock();
-+
-+	ret = -ESRCH;
-+	if (!pid)
-+		p = current;
-+	else {
-+		p = find_task_by_vpid(pid);
-+		if (!p)
-+			goto err_unlock;
-+	}
-+
-+	ret = -EPERM;
-+	if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
-+		goto err_unlock;
-+
-+	head = p->robust_list;
-+	rcu_read_unlock();
-+
-+	if (put_user(sizeof(*head), len_ptr))
-+		return -EFAULT;
-+	return put_user(head, head_ptr);
-+
-+err_unlock:
-+	rcu_read_unlock();
-+
-+	return ret;
-+}
-+
-+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
-+		u32 __user *uaddr2, u32 val2, u32 val3)
-+{
-+	int cmd = op & FUTEX_CMD_MASK;
-+	unsigned int flags = 0;
-+
-+	if (!(op & FUTEX_PRIVATE_FLAG))
-+		flags |= FLAGS_SHARED;
-+
-+	if (op & FUTEX_CLOCK_REALTIME) {
-+		flags |= FLAGS_CLOCKRT;
-+		if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
-+		    cmd != FUTEX_LOCK_PI2)
-+			return -ENOSYS;
-+	}
-+
-+	switch (cmd) {
-+	case FUTEX_LOCK_PI:
-+	case FUTEX_LOCK_PI2:
-+	case FUTEX_UNLOCK_PI:
-+	case FUTEX_TRYLOCK_PI:
-+	case FUTEX_WAIT_REQUEUE_PI:
-+	case FUTEX_CMP_REQUEUE_PI:
-+		if (!futex_cmpxchg_enabled)
-+			return -ENOSYS;
-+	}
-+
-+	switch (cmd) {
-+	case FUTEX_WAIT:
-+		val3 = FUTEX_BITSET_MATCH_ANY;
-+		fallthrough;
-+	case FUTEX_WAIT_BITSET:
-+		return futex_wait(uaddr, flags, val, timeout, val3);
-+	case FUTEX_WAKE:
-+		val3 = FUTEX_BITSET_MATCH_ANY;
-+		fallthrough;
-+	case FUTEX_WAKE_BITSET:
-+		return futex_wake(uaddr, flags, val, val3);
-+	case FUTEX_REQUEUE:
-+		return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
-+	case FUTEX_CMP_REQUEUE:
-+		return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
-+	case FUTEX_WAKE_OP:
-+		return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
-+	case FUTEX_LOCK_PI:
-+		flags |= FLAGS_CLOCKRT;
-+		fallthrough;
-+	case FUTEX_LOCK_PI2:
-+		return futex_lock_pi(uaddr, flags, timeout, 0);
-+	case FUTEX_UNLOCK_PI:
-+		return futex_unlock_pi(uaddr, flags);
-+	case FUTEX_TRYLOCK_PI:
-+		return futex_lock_pi(uaddr, flags, NULL, 1);
-+	case FUTEX_WAIT_REQUEUE_PI:
-+		val3 = FUTEX_BITSET_MATCH_ANY;
-+		return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
-+					     uaddr2);
-+	case FUTEX_CMP_REQUEUE_PI:
-+		return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
-+	}
-+	return -ENOSYS;
-+}
-+
-+static __always_inline bool futex_cmd_has_timeout(u32 cmd)
-+{
-+	switch (cmd) {
-+	case FUTEX_WAIT:
-+	case FUTEX_LOCK_PI:
-+	case FUTEX_LOCK_PI2:
-+	case FUTEX_WAIT_BITSET:
-+	case FUTEX_WAIT_REQUEUE_PI:
-+		return true;
-+	}
-+	return false;
-+}
-+
-+static __always_inline int
-+futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
-+{
-+	if (!timespec64_valid(ts))
-+		return -EINVAL;
-+
-+	*t = timespec64_to_ktime(*ts);
-+	if (cmd == FUTEX_WAIT)
-+		*t = ktime_add_safe(ktime_get(), *t);
-+	else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
-+		*t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
-+	return 0;
-+}
-+
-+SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
-+		const struct __kernel_timespec __user *, utime,
-+		u32 __user *, uaddr2, u32, val3)
-+{
-+	int ret, cmd = op & FUTEX_CMD_MASK;
-+	ktime_t t, *tp = NULL;
-+	struct timespec64 ts;
-+
-+	if (utime && futex_cmd_has_timeout(cmd)) {
-+		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
-+			return -EFAULT;
-+		if (get_timespec64(&ts, utime))
-+			return -EFAULT;
-+		ret = futex_init_timeout(cmd, op, &ts, &t);
-+		if (ret)
-+			return ret;
-+		tp = &t;
-+	}
-+
-+	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
-+}
-+
-+/* Mask of available flags for each futex in futex_waitv list */
-+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
-+
-+/**
-+ * futex_parse_waitv - Parse a waitv array from userspace
-+ * @futexv:	Kernel side list of waiters to be filled
-+ * @uwaitv:     Userspace list to be parsed
-+ * @nr_futexes: Length of futexv
-+ *
-+ * Return: Error code on failure, 0 on success
-+ */
-+static int futex_parse_waitv(struct futex_vector *futexv,
-+			     struct futex_waitv __user *uwaitv,
-+			     unsigned int nr_futexes)
-+{
-+	struct futex_waitv aux;
-+	unsigned int i;
-+
-+	for (i = 0; i < nr_futexes; i++) {
-+		if (copy_from_user(&aux, &uwaitv[i], sizeof(aux)))
-+			return -EFAULT;
-+
-+		if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved)
-+			return -EINVAL;
-+
-+		if (!(aux.flags & FUTEX_32))
-+			return -EINVAL;
-+
-+		futexv[i].w.flags = aux.flags;
-+		futexv[i].w.val = aux.val;
-+		futexv[i].w.uaddr = aux.uaddr;
-+		futexv[i].q = futex_q_init;
-+	}
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_futex_waitv - Wait on a list of futexes
-+ * @waiters:    List of futexes to wait on
-+ * @nr_futexes: Length of futexv
-+ * @flags:      Flag for timeout (monotonic/realtime)
-+ * @timeout:	Optional absolute timeout.
-+ * @clockid:	Clock to be used for the timeout, realtime or monotonic.
-+ *
-+ * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
-+ * if a futex_wake() is performed at any uaddr. The syscall returns immediately
-+ * if any waiter has *uaddr != val. *timeout is an optional timeout value for the
-+ * operation. Each waiter has individual flags. The `flags` argument for the
-+ * syscall should be used solely for specifying the timeout as realtime, if
-+ * needed. Flags for private futexes, sizes, etc. should be used on the
-+ * individual flags of each waiter.
-+ *
-+ * Returns the array index of one of the awaken futexes. There's no given
-+ * information of how many were awakened, or any particular attribute of it (if
-+ * it's the first awakened, if it is of the smaller index...).
-+ */
-+
-+SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
-+		unsigned int, nr_futexes, unsigned int, flags,
-+		struct __kernel_timespec __user *, timeout, clockid_t, clockid)
-+{
-+	struct hrtimer_sleeper to;
-+	struct futex_vector *futexv;
-+	struct timespec64 ts;
-+	ktime_t time;
-+	int ret;
-+
-+	/* This syscall supports no flags for now */
-+	if (flags)
-+		return -EINVAL;
-+
-+	if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
-+		return -EINVAL;
-+
-+	if (timeout) {
-+		int flag_clkid = 0, flag_init = 0;
-+
-+		if (clockid == CLOCK_REALTIME) {
-+			flag_clkid = FLAGS_CLOCKRT;
-+			flag_init = FUTEX_CLOCK_REALTIME;
-+		}
-+
-+		if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
-+			return -EINVAL;
-+
-+		if (get_timespec64(&ts, timeout))
-+			return -EFAULT;
-+
-+		/*
-+		 * Since there's no opcode for futex_waitv, use
-+		 * FUTEX_WAIT_BITSET that uses absolute timeout as well
-+		 */
-+		ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
-+		if (ret)
-+			return ret;
-+
-+		futex_setup_timer(&time, &to, flag_clkid, 0);
-+	}
-+
-+	futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
-+	if (!futexv)
-+		return -ENOMEM;
-+
-+	ret = futex_parse_waitv(futexv, waiters, nr_futexes);
-+	if (!ret)
-+		ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);
-+
-+	if (timeout) {
-+		hrtimer_cancel(&to.timer);
-+		destroy_hrtimer_on_stack(&to.timer);
-+	}
-+
-+	kfree(futexv);
-+	return ret;
-+}
-+
-+#ifdef CONFIG_COMPAT
-+COMPAT_SYSCALL_DEFINE2(set_robust_list,
-+		struct compat_robust_list_head __user *, head,
-+		compat_size_t, len)
-+{
-+	if (!futex_cmpxchg_enabled)
-+		return -ENOSYS;
-+
-+	if (unlikely(len != sizeof(*head)))
-+		return -EINVAL;
-+
-+	current->compat_robust_list = head;
-+
-+	return 0;
-+}
-+
-+COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
-+			compat_uptr_t __user *, head_ptr,
-+			compat_size_t __user *, len_ptr)
-+{
-+	struct compat_robust_list_head __user *head;
-+	unsigned long ret;
-+	struct task_struct *p;
-+
-+	if (!futex_cmpxchg_enabled)
-+		return -ENOSYS;
-+
-+	rcu_read_lock();
-+
-+	ret = -ESRCH;
-+	if (!pid)
-+		p = current;
-+	else {
-+		p = find_task_by_vpid(pid);
-+		if (!p)
-+			goto err_unlock;
-+	}
-+
-+	ret = -EPERM;
-+	if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
-+		goto err_unlock;
-+
-+	head = p->compat_robust_list;
-+	rcu_read_unlock();
-+
-+	if (put_user(sizeof(*head), len_ptr))
-+		return -EFAULT;
-+	return put_user(ptr_to_compat(head), head_ptr);
-+
-+err_unlock:
-+	rcu_read_unlock();
-+
-+	return ret;
-+}
-+#endif /* CONFIG_COMPAT */
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
-+		const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
-+		u32, val3)
-+{
-+	int ret, cmd = op & FUTEX_CMD_MASK;
-+	ktime_t t, *tp = NULL;
-+	struct timespec64 ts;
-+
-+	if (utime && futex_cmd_has_timeout(cmd)) {
-+		if (get_old_timespec32(&ts, utime))
-+			return -EFAULT;
-+		ret = futex_init_timeout(cmd, op, &ts, &t);
-+		if (ret)
-+			return ret;
-+		tp = &t;
-+	}
-+
-+	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
-+}
-+#endif /* CONFIG_COMPAT_32BIT_TIME */
-+
-diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
-new file mode 100644
-index 000000000..b45597aab
---- /dev/null
-+++ b/kernel/futex/waitwake.c
-@@ -0,0 +1,708 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+
-+#include <linux/sched/task.h>
-+#include <linux/sched/signal.h>
-+#include <linux/freezer.h>
-+
-+#include "futex.h"
-+
-+/*
-+ * READ this before attempting to hack on futexes!
-+ *
-+ * Basic futex operation and ordering guarantees
-+ * =============================================
-+ *
-+ * The waiter reads the futex value in user space and calls
-+ * futex_wait(). This function computes the hash bucket and acquires
-+ * the hash bucket lock. After that it reads the futex user space value
-+ * again and verifies that the data has not changed. If it has not changed
-+ * it enqueues itself into the hash bucket, releases the hash bucket lock
-+ * and schedules.
-+ *
-+ * The waker side modifies the user space value of the futex and calls
-+ * futex_wake(). This function computes the hash bucket and acquires the
-+ * hash bucket lock. Then it looks for waiters on that futex in the hash
-+ * bucket and wakes them.
-+ *
-+ * In futex wake up scenarios where no tasks are blocked on a futex, taking
-+ * the hb spinlock can be avoided and simply return. In order for this
-+ * optimization to work, ordering guarantees must exist so that the waiter
-+ * being added to the list is acknowledged when the list is concurrently being
-+ * checked by the waker, avoiding scenarios like the following:
-+ *
-+ * CPU 0                               CPU 1
-+ * val = *futex;
-+ * sys_futex(WAIT, futex, val);
-+ *   futex_wait(futex, val);
-+ *   uval = *futex;
-+ *                                     *futex = newval;
-+ *                                     sys_futex(WAKE, futex);
-+ *                                       futex_wake(futex);
-+ *                                       if (queue_empty())
-+ *                                         return;
-+ *   if (uval == val)
-+ *      lock(hash_bucket(futex));
-+ *      queue();
-+ *     unlock(hash_bucket(futex));
-+ *     schedule();
-+ *
-+ * This would cause the waiter on CPU 0 to wait forever because it
-+ * missed the transition of the user space value from val to newval
-+ * and the waker did not find the waiter in the hash bucket queue.
-+ *
-+ * The correct serialization ensures that a waiter either observes
-+ * the changed user space value before blocking or is woken by a
-+ * concurrent waker:
-+ *
-+ * CPU 0                                 CPU 1
-+ * val = *futex;
-+ * sys_futex(WAIT, futex, val);
-+ *   futex_wait(futex, val);
-+ *
-+ *   waiters++; (a)
-+ *   smp_mb(); (A) <-- paired with -.
-+ *                                  |
-+ *   lock(hash_bucket(futex));      |
-+ *                                  |
-+ *   uval = *futex;                 |
-+ *                                  |        *futex = newval;
-+ *                                  |        sys_futex(WAKE, futex);
-+ *                                  |          futex_wake(futex);
-+ *                                  |
-+ *                                  `--------> smp_mb(); (B)
-+ *   if (uval == val)
-+ *     queue();
-+ *     unlock(hash_bucket(futex));
-+ *     schedule();                         if (waiters)
-+ *                                           lock(hash_bucket(futex));
-+ *   else                                    wake_waiters(futex);
-+ *     waiters--; (b)                        unlock(hash_bucket(futex));
-+ *
-+ * Where (A) orders the waiters increment and the futex value read through
-+ * atomic operations (see futex_hb_waiters_inc) and where (B) orders the write
-+ * to futex and the waiters read (see futex_hb_waiters_pending()).
-+ *
-+ * This yields the following case (where X:=waiters, Y:=futex):
-+ *
-+ *	X = Y = 0
-+ *
-+ *	w[X]=1		w[Y]=1
-+ *	MB		MB
-+ *	r[Y]=y		r[X]=x
-+ *
-+ * Which guarantees that x==0 && y==0 is impossible; which translates back into
-+ * the guarantee that we cannot both miss the futex variable change and the
-+ * enqueue.
-+ *
-+ * Note that a new waiter is accounted for in (a) even when it is possible that
-+ * the wait call can return error, in which case we backtrack from it in (b).
-+ * Refer to the comment in futex_q_lock().
-+ *
-+ * Similarly, in order to account for waiters being requeued on another
-+ * address we always increment the waiters for the destination bucket before
-+ * acquiring the lock. It then decrements them again  after releasing it -
-+ * the code that actually moves the futex(es) between hash buckets (requeue_futex)
-+ * will do the additional required waiter count housekeeping. This is done for
-+ * double_lock_hb() and double_unlock_hb(), respectively.
-+ */
-+
-+/*
-+ * The hash bucket lock must be held when this is called.
-+ * Afterwards, the futex_q must not be accessed. Callers
-+ * must ensure to later call wake_up_q() for the actual
-+ * wakeups to occur.
-+ */
-+void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q)
-+{
-+	struct task_struct *p = q->task;
-+
-+	if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
-+		return;
-+
-+	get_task_struct(p);
-+	__futex_unqueue(q);
-+	/*
-+	 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
-+	 * is written, without taking any locks. This is possible in the event
-+	 * of a spurious wakeup, for example. A memory barrier is required here
-+	 * to prevent the following store to lock_ptr from getting ahead of the
-+	 * plist_del in __futex_unqueue().
-+	 */
-+	smp_store_release(&q->lock_ptr, NULL);
-+
-+	/*
-+	 * Queue the task for later wakeup for after we've released
-+	 * the hb->lock.
-+	 */
-+	wake_q_add_safe(wake_q, p);
-+}
-+
-+/*
-+ * Wake up waiters matching bitset queued on this futex (uaddr).
-+ */
-+int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
-+{
-+	struct futex_hash_bucket *hb;
-+	struct futex_q *this, *next;
-+	union futex_key key = FUTEX_KEY_INIT;
-+	int ret;
-+	DEFINE_WAKE_Q(wake_q);
-+
-+	if (!bitset)
-+		return -EINVAL;
-+
-+	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
-+	if (unlikely(ret != 0))
-+		return ret;
-+
-+	hb = futex_hash(&key);
-+
-+	/* Make sure we really have tasks to wakeup */
-+	if (!futex_hb_waiters_pending(hb))
-+		return ret;
-+
-+	spin_lock(&hb->lock);
-+
-+	plist_for_each_entry_safe(this, next, &hb->chain, list) {
-+		if (futex_match (&this->key, &key)) {
-+			if (this->pi_state || this->rt_waiter) {
-+				ret = -EINVAL;
-+				break;
-+			}
-+
-+			/* Check if one of the bits is set in both bitsets */
-+			if (!(this->bitset & bitset))
-+				continue;
-+
-+			futex_wake_mark(&wake_q, this);
-+			if (++ret >= nr_wake)
-+				break;
-+		}
-+	}
-+
-+	spin_unlock(&hb->lock);
-+	wake_up_q(&wake_q);
-+	return ret;
-+}
-+
-+static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
-+{
-+	unsigned int op =	  (encoded_op & 0x70000000) >> 28;
-+	unsigned int cmp =	  (encoded_op & 0x0f000000) >> 24;
-+	int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
-+	int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
-+	int oldval, ret;
-+
-+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
-+		if (oparg < 0 || oparg > 31) {
-+			char comm[sizeof(current->comm)];
-+			/*
-+			 * kill this print and return -EINVAL when userspace
-+			 * is sane again
-+			 */
-+			pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
-+					get_task_comm(comm, current), oparg);
-+			oparg &= 31;
-+		}
-+		oparg = 1 << oparg;
-+	}
-+
-+	pagefault_disable();
-+	ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
-+	pagefault_enable();
-+	if (ret)
-+		return ret;
-+
-+	switch (cmp) {
-+	case FUTEX_OP_CMP_EQ:
-+		return oldval == cmparg;
-+	case FUTEX_OP_CMP_NE:
-+		return oldval != cmparg;
-+	case FUTEX_OP_CMP_LT:
-+		return oldval < cmparg;
-+	case FUTEX_OP_CMP_GE:
-+		return oldval >= cmparg;
-+	case FUTEX_OP_CMP_LE:
-+		return oldval <= cmparg;
-+	case FUTEX_OP_CMP_GT:
-+		return oldval > cmparg;
-+	default:
-+		return -ENOSYS;
-+	}
-+}
-+
-+/*
-+ * Wake up all waiters hashed on the physical page that is mapped
-+ * to this virtual address:
-+ */
-+int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
-+		  int nr_wake, int nr_wake2, int op)
-+{
-+	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
-+	struct futex_hash_bucket *hb1, *hb2;
-+	struct futex_q *this, *next;
-+	int ret, op_ret;
-+	DEFINE_WAKE_Q(wake_q);
-+
-+retry:
-+	ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
-+	if (unlikely(ret != 0))
-+		return ret;
-+	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
-+	if (unlikely(ret != 0))
-+		return ret;
-+
-+	hb1 = futex_hash(&key1);
-+	hb2 = futex_hash(&key2);
-+
-+retry_private:
-+	double_lock_hb(hb1, hb2);
-+	op_ret = futex_atomic_op_inuser(op, uaddr2);
-+	if (unlikely(op_ret < 0)) {
-+		double_unlock_hb(hb1, hb2);
-+
-+		if (!IS_ENABLED(CONFIG_MMU) ||
-+		    unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
-+			/*
-+			 * we don't get EFAULT from MMU faults if we don't have
-+			 * an MMU, but we might get them from range checking
-+			 */
-+			ret = op_ret;
-+			return ret;
-+		}
-+
-+		if (op_ret == -EFAULT) {
-+			ret = fault_in_user_writeable(uaddr2);
-+			if (ret)
-+				return ret;
-+		}
-+
-+		cond_resched();
-+		if (!(flags & FLAGS_SHARED))
-+			goto retry_private;
-+		goto retry;
-+	}
-+
-+	plist_for_each_entry_safe(this, next, &hb1->chain, list) {
-+		if (futex_match (&this->key, &key1)) {
-+			if (this->pi_state || this->rt_waiter) {
-+				ret = -EINVAL;
-+				goto out_unlock;
-+			}
-+			futex_wake_mark(&wake_q, this);
-+			if (++ret >= nr_wake)
-+				break;
-+		}
-+	}
-+
-+	if (op_ret > 0) {
-+		op_ret = 0;
-+		plist_for_each_entry_safe(this, next, &hb2->chain, list) {
-+			if (futex_match (&this->key, &key2)) {
-+				if (this->pi_state || this->rt_waiter) {
-+					ret = -EINVAL;
-+					goto out_unlock;
-+				}
-+				futex_wake_mark(&wake_q, this);
-+				if (++op_ret >= nr_wake2)
-+					break;
-+			}
-+		}
-+		ret += op_ret;
-+	}
-+
-+out_unlock:
-+	double_unlock_hb(hb1, hb2);
-+	wake_up_q(&wake_q);
-+	return ret;
-+}
-+
-+static long futex_wait_restart(struct restart_block *restart);
-+
-+/**
-+ * futex_wait_queue() - futex_queue() and wait for wakeup, timeout, or signal
-+ * @hb:		the futex hash bucket, must be locked by the caller
-+ * @q:		the futex_q to queue up on
-+ * @timeout:	the prepared hrtimer_sleeper, or null for no timeout
-+ */
-+void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
-+			    struct hrtimer_sleeper *timeout)
-+{
-+	/*
-+	 * The task state is guaranteed to be set before another task can
-+	 * wake it. set_current_state() is implemented using smp_store_mb() and
-+	 * futex_queue() calls spin_unlock() upon completion, both serializing
-+	 * access to the hash list and forcing another memory barrier.
-+	 */
-+	set_current_state(TASK_INTERRUPTIBLE);
-+	futex_queue(q, hb);
-+
-+	/* Arm the timer */
-+	if (timeout)
-+		hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
-+
-+	/*
-+	 * If we have been removed from the hash list, then another task
-+	 * has tried to wake us, and we can skip the call to schedule().
-+	 */
-+	if (likely(!plist_node_empty(&q->list))) {
-+		/*
-+		 * If the timer has already expired, current will already be
-+		 * flagged for rescheduling. Only call schedule if there
-+		 * is no timeout, or if it has yet to expire.
-+		 */
-+		if (!timeout || timeout->task)
-+			freezable_schedule();
-+	}
-+	__set_current_state(TASK_RUNNING);
-+}
-+
-+/**
-+ * unqueue_multiple - Remove various futexes from their hash bucket
-+ * @v:	   The list of futexes to unqueue
-+ * @count: Number of futexes in the list
-+ *
-+ * Helper to unqueue a list of futexes. This can't fail.
-+ *
-+ * Return:
-+ *  - >=0 - Index of the last futex that was awoken;
-+ *  - -1  - No futex was awoken
-+ */
-+static int unqueue_multiple(struct futex_vector *v, int count)
-+{
-+	int ret = -1, i;
-+
-+	for (i = 0; i < count; i++) {
-+		if (!futex_unqueue(&v[i].q))
-+			ret = i;
-+	}
-+
-+	return ret;
-+}
-+
-+/**
-+ * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes
-+ * @vs:		The futex list to wait on
-+ * @count:	The size of the list
-+ * @awaken:	Index of the last awoken futex, if any. Used to notify the
-+ *		caller that it can return this index to userspace (return parameter)
-+ *
-+ * Prepare multiple futexes in a single step and enqueue them. This may fail if
-+ * the futex list is invalid or if any futex was already awoken. On success the
-+ * task is ready to interruptible sleep.
-+ *
-+ * Return:
-+ *  -  1 - One of the futexes was awaken by another thread
-+ *  -  0 - Success
-+ *  - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
-+ */
-+static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *awaken)
-+{
-+	struct futex_hash_bucket *hb;
-+	bool retry = false;
-+	int ret, i;
-+	u32 uval;
-+
-+	/*
-+	 * Enqueuing multiple futexes is tricky, because we need to enqueue
-+	 * each futex in the list before dealing with the next one to avoid
-+	 * deadlocking on the hash bucket. But, before enqueuing, we need to
-+	 * make sure that current->state is TASK_INTERRUPTIBLE, so we don't
-+	 * absorb any awake events, which cannot be done before the
-+	 * get_futex_key of the next key, because it calls get_user_pages,
-+	 * which can sleep. Thus, we fetch the list of futexes keys in two
-+	 * steps, by first pinning all the memory keys in the futex key, and
-+	 * only then we read each key and queue the corresponding futex.
-+	 *
-+	 * Private futexes doesn't need to recalculate hash in retry, so skip
-+	 * get_futex_key() when retrying.
-+	 */
-+retry:
-+	for (i = 0; i < count; i++) {
-+		if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry)
-+			continue;
-+
-+		ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr),
-+				    !(vs[i].w.flags & FUTEX_PRIVATE_FLAG),
-+				    &vs[i].q.key, FUTEX_READ);
-+
-+		if (unlikely(ret))
-+			return ret;
-+	}
-+
-+	set_current_state(TASK_INTERRUPTIBLE);
-+
-+	for (i = 0; i < count; i++) {
-+		u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr;
-+		struct futex_q *q = &vs[i].q;
-+		u32 val = (u32)vs[i].w.val;
-+
-+		hb = futex_q_lock(q);
-+		ret = futex_get_value_locked(&uval, uaddr);
-+
-+		if (!ret && uval == val) {
-+			/*
-+			 * The bucket lock can't be held while dealing with the
-+			 * next futex. Queue each futex at this moment so hb can
-+			 * be unlocked.
-+			 */
-+			futex_queue(q, hb);
-+			continue;
-+		}
-+
-+		futex_q_unlock(hb);
-+		__set_current_state(TASK_RUNNING);
-+
-+		/*
-+		 * Even if something went wrong, if we find out that a futex
-+		 * was awaken, we don't return error and return this index to
-+		 * userspace
-+		 */
-+		*awaken = unqueue_multiple(vs, i);
-+		if (*awaken >= 0)
-+			return 1;
-+
-+		if (ret) {
-+			/*
-+			 * If we need to handle a page fault, we need to do so
-+			 * without any lock and any enqueued futex (otherwise
-+			 * we could lose some wakeup). So we do it here, after
-+			 * undoing all the work done so far. In success, we
-+			 * retry all the work.
-+			 */
-+			if (get_user(uval, uaddr))
-+				return -EFAULT;
-+
-+			retry = true;
-+			goto retry;
-+		}
-+
-+		if (uval != val)
-+			return -EWOULDBLOCK;
-+	}
-+
-+	return 0;
-+}
-+
-+/**
-+ * futex_sleep_multiple - Check sleeping conditions and sleep
-+ * @vs:    List of futexes to wait for
-+ * @count: Length of vs
-+ * @to:    Timeout
-+ *
-+ * Sleep if and only if the timeout hasn't expired and no futex on the list has
-+ * been awaken.
-+ */
-+static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count,
-+				 struct hrtimer_sleeper *to)
-+{
-+	if (to && !to->task)
-+		return;
-+
-+	for (; count; count--, vs++) {
-+		if (!READ_ONCE(vs->q.lock_ptr))
-+			return;
-+	}
-+
-+	freezable_schedule();
-+}
-+
-+/**
-+ * futex_wait_multiple - Prepare to wait on and enqueue several futexes
-+ * @vs:		The list of futexes to wait on
-+ * @count:	The number of objects
-+ * @to:		Timeout before giving up and returning to userspace
-+ *
-+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
-+ * sleeps on a group of futexes and returns on the first futex that is
-+ * wake, or after the timeout has elapsed.
-+ *
-+ * Return:
-+ *  - >=0 - Hint to the futex that was awoken
-+ *  - <0  - On error
-+ */
-+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
-+			struct hrtimer_sleeper *to)
-+{
-+	int ret, hint = 0;
-+
-+	if (to)
-+		hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
-+
-+	while (1) {
-+		ret = futex_wait_multiple_setup(vs, count, &hint);
-+		if (ret) {
-+			if (ret > 0) {
-+				/* A futex was awaken during setup */
-+				ret = hint;
-+			}
-+			return ret;
-+		}
-+
-+		futex_sleep_multiple(vs, count, to);
-+
-+		__set_current_state(TASK_RUNNING);
-+
-+		ret = unqueue_multiple(vs, count);
-+		if (ret >= 0)
-+			return ret;
-+
-+		if (to && !to->task)
-+			return -ETIMEDOUT;
-+		else if (signal_pending(current))
-+			return -ERESTARTSYS;
-+		/*
-+		 * The final case is a spurious wakeup, for
-+		 * which just retry.
-+		 */
-+	}
-+}
-+
-+/**
-+ * futex_wait_setup() - Prepare to wait on a futex
-+ * @uaddr:	the futex userspace address
-+ * @val:	the expected value
-+ * @flags:	futex flags (FLAGS_SHARED, etc.)
-+ * @q:		the associated futex_q
-+ * @hb:		storage for hash_bucket pointer to be returned to caller
-+ *
-+ * Setup the futex_q and locate the hash_bucket.  Get the futex value and
-+ * compare it with the expected value.  Handle atomic faults internally.
-+ * Return with the hb lock held on success, and unlocked on failure.
-+ *
-+ * Return:
-+ *  -  0 - uaddr contains val and hb has been locked;
-+ *  - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
-+ */
-+int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
-+		     struct futex_q *q, struct futex_hash_bucket **hb)
-+{
-+	u32 uval;
-+	int ret;
-+
-+	/*
-+	 * Access the page AFTER the hash-bucket is locked.
-+	 * Order is important:
-+	 *
-+	 *   Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
-+	 *   Userspace waker:  if (cond(var)) { var = new; futex_wake(&var); }
-+	 *
-+	 * The basic logical guarantee of a futex is that it blocks ONLY
-+	 * if cond(var) is known to be true at the time of blocking, for
-+	 * any cond.  If we locked the hash-bucket after testing *uaddr, that
-+	 * would open a race condition where we could block indefinitely with
-+	 * cond(var) false, which would violate the guarantee.
-+	 *
-+	 * On the other hand, we insert q and release the hash-bucket only
-+	 * after testing *uaddr.  This guarantees that futex_wait() will NOT
-+	 * absorb a wakeup if *uaddr does not match the desired values
-+	 * while the syscall executes.
-+	 */
-+retry:
-+	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
-+	if (unlikely(ret != 0))
-+		return ret;
-+
-+retry_private:
-+	*hb = futex_q_lock(q);
-+
-+	ret = futex_get_value_locked(&uval, uaddr);
-+
-+	if (ret) {
-+		futex_q_unlock(*hb);
-+
-+		ret = get_user(uval, uaddr);
-+		if (ret)
-+			return ret;
-+
-+		if (!(flags & FLAGS_SHARED))
-+			goto retry_private;
-+
-+		goto retry;
-+	}
-+
-+	if (uval != val) {
-+		futex_q_unlock(*hb);
-+		ret = -EWOULDBLOCK;
-+	}
-+
-+	return ret;
-+}
-+
-+int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset)
-+{
-+	struct hrtimer_sleeper timeout, *to;
-+	struct restart_block *restart;
-+	struct futex_hash_bucket *hb;
-+	struct futex_q q = futex_q_init;
-+	int ret;
-+
-+	if (!bitset)
-+		return -EINVAL;
-+	q.bitset = bitset;
-+
-+	to = futex_setup_timer(abs_time, &timeout, flags,
-+			       current->timer_slack_ns);
-+retry:
-+	/*
-+	 * Prepare to wait on uaddr. On success, it holds hb->lock and q
-+	 * is initialized.
-+	 */
-+	ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
-+	if (ret)
-+		goto out;
-+
-+	/* futex_queue and wait for wakeup, timeout, or a signal. */
-+	futex_wait_queue(hb, &q, to);
-+
-+	/* If we were woken (and unqueued), we succeeded, whatever. */
-+	ret = 0;
-+	if (!futex_unqueue(&q))
-+		goto out;
-+	ret = -ETIMEDOUT;
-+	if (to && !to->task)
-+		goto out;
-+
-+	/*
-+	 * We expect signal_pending(current), but we might be the
-+	 * victim of a spurious wakeup as well.
-+	 */
-+	if (!signal_pending(current))
-+		goto retry;
-+
-+	ret = -ERESTARTSYS;
-+	if (!abs_time)
-+		goto out;
-+
-+	restart = &current->restart_block;
-+	restart->futex.uaddr = uaddr;
-+	restart->futex.val = val;
-+	restart->futex.time = *abs_time;
-+	restart->futex.bitset = bitset;
-+	restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
-+
-+	ret = set_restart_fn(restart, futex_wait_restart);
-+
-+out:
-+	if (to) {
-+		hrtimer_cancel(&to->timer);
-+		destroy_hrtimer_on_stack(&to->timer);
-+	}
-+	return ret;
-+}
-+
-+static long futex_wait_restart(struct restart_block *restart)
-+{
-+	u32 __user *uaddr = restart->futex.uaddr;
-+	ktime_t t, *tp = NULL;
-+
-+	if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
-+		t = restart->futex.time;
-+		tp = &t;
-+	}
-+	restart->fn = do_no_restart_syscall;
-+
-+	return (long)futex_wait(uaddr, restart->futex.flags,
-+				restart->futex.val, tp, restart->futex.bitset);
-+}
-+
-diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
-index f43d89d92..d1944258c 100644
---- a/kernel/sys_ni.c
-+++ b/kernel/sys_ni.c
-@@ -143,13 +143,14 @@ COND_SYSCALL(capset);
- /* __ARCH_WANT_SYS_CLONE3 */
- COND_SYSCALL(clone3);
- 
--/* kernel/futex.c */
-+/* kernel/futex/syscalls.c */
- COND_SYSCALL(futex);
- COND_SYSCALL(futex_time32);
- COND_SYSCALL(set_robust_list);
- COND_SYSCALL_COMPAT(set_robust_list);
- COND_SYSCALL(get_robust_list);
- COND_SYSCALL_COMPAT(get_robust_list);
-+COND_SYSCALL(futex_waitv);
- 
- /* kernel/hrtimer.c */
- 
-diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
-index 0e78b49d0..fbcbdb696 100644
---- a/tools/testing/selftests/futex/functional/.gitignore
-+++ b/tools/testing/selftests/futex/functional/.gitignore
-@@ -8,3 +8,4 @@ futex_wait_uninitialized_heap
- futex_wait_wouldblock
- futex_wait
- futex_requeue
-+futex_waitv
-diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
-index bd1fec59e..5cc38de9d 100644
---- a/tools/testing/selftests/futex/functional/Makefile
-+++ b/tools/testing/selftests/futex/functional/Makefile
-@@ -17,7 +17,8 @@ TEST_GEN_FILES := \
- 	futex_wait_uninitialized_heap \
- 	futex_wait_private_mapped_file \
- 	futex_wait \
--	futex_requeue
-+	futex_requeue \
-+	futex_waitv
- 
- TEST_PROGS := run.sh
- 
-diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-index 1f8f6daaf..3651ce17b 100644
---- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-@@ -17,6 +17,7 @@
- 
- #include <pthread.h>
- #include "futextest.h"
-+#include "futex2test.h"
- #include "logging.h"
- 
- #define TEST_NAME "futex-wait-timeout"
-@@ -96,6 +97,12 @@ int main(int argc, char *argv[])
- 	struct timespec to;
- 	pthread_t thread;
- 	int c;
-+	struct futex_waitv waitv = {
-+			.uaddr = (uintptr_t)&f1,
-+			.val = f1,
-+			.flags = FUTEX_32,
-+			.__reserved = 0
-+		};
- 
- 	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- 		switch (c) {
-@@ -118,7 +125,7 @@ int main(int argc, char *argv[])
- 	}
- 
- 	ksft_print_header();
--	ksft_set_plan(7);
-+	ksft_set_plan(9);
- 	ksft_print_msg("%s: Block on a futex and wait for timeout\n",
- 	       basename(argv[0]));
- 	ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
-@@ -175,6 +182,18 @@ int main(int argc, char *argv[])
- 	res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME);
- 	test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS);
- 
-+	/* futex_waitv with CLOCK_MONOTONIC */
-+	if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
-+		return RET_FAIL;
-+	res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
-+	test_timeout(res, &ret, "futex_waitv monotonic", ETIMEDOUT);
-+
-+	/* futex_waitv with CLOCK_REALTIME */
-+	if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
-+		return RET_FAIL;
-+	res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME);
-+	test_timeout(res, &ret, "futex_waitv realtime", ETIMEDOUT);
-+
- 	ksft_print_cnts();
- 	return ret;
- }
-diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-index 0ae390ff8..7d7a6a06c 100644
---- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-@@ -22,6 +22,7 @@
- #include <string.h>
- #include <time.h>
- #include "futextest.h"
-+#include "futex2test.h"
- #include "logging.h"
- 
- #define TEST_NAME "futex-wait-wouldblock"
-@@ -42,6 +43,12 @@ int main(int argc, char *argv[])
- 	futex_t f1 = FUTEX_INITIALIZER;
- 	int res, ret = RET_PASS;
- 	int c;
-+	struct futex_waitv waitv = {
-+			.uaddr = (uintptr_t)&f1,
-+			.val = f1+1,
-+			.flags = FUTEX_32,
-+			.__reserved = 0
-+		};
- 
- 	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- 		switch (c) {
-@@ -61,18 +68,44 @@ int main(int argc, char *argv[])
- 	}
- 
- 	ksft_print_header();
--	ksft_set_plan(1);
-+	ksft_set_plan(2);
- 	ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
- 	       basename(argv[0]));
- 
- 	info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
- 	res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
- 	if (!res || errno != EWOULDBLOCK) {
--		fail("futex_wait returned: %d %s\n",
--		     res ? errno : res, res ? strerror(errno) : "");
-+		ksft_test_result_fail("futex_wait returned: %d %s\n",
-+				      res ? errno : res,
-+				      res ? strerror(errno) : "");
- 		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_pass("futex_wait\n");
- 	}
- 
--	print_result(TEST_NAME, ret);
-+	if (clock_gettime(CLOCK_MONOTONIC, &to)) {
-+		error("clock_gettime failed\n", errno);
-+		return errno;
-+	}
-+
-+	to.tv_nsec += timeout_ns;
-+
-+	if (to.tv_nsec >= 1000000000) {
-+		to.tv_sec++;
-+		to.tv_nsec -= 1000000000;
-+	}
-+
-+	info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
-+	res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
-+	if (!res || errno != EWOULDBLOCK) {
-+		ksft_test_result_pass("futex_waitv returned: %d %s\n",
-+				      res ? errno : res,
-+				      res ? strerror(errno) : "");
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_pass("futex_waitv\n");
-+	}
-+
-+	ksft_print_cnts();
- 	return ret;
- }
-diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c
-new file mode 100644
-index 000000000..a94337f67
---- /dev/null
-+++ b/tools/testing/selftests/futex/functional/futex_waitv.c
-@@ -0,0 +1,237 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * futex_waitv() test by André Almeida <andrealmeid@collabora.com>
-+ *
-+ * Copyright 2021 Collabora Ltd.
-+ */
-+
-+#include <errno.h>
-+#include <error.h>
-+#include <getopt.h>
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <time.h>
-+#include <pthread.h>
-+#include <stdint.h>
-+#include <sys/shm.h>
-+#include "futextest.h"
-+#include "futex2test.h"
-+#include "logging.h"
-+
-+#define TEST_NAME "futex-wait"
-+#define WAKE_WAIT_US 10000
-+#define NR_FUTEXES 30
-+static struct futex_waitv waitv[NR_FUTEXES];
-+u_int32_t futexes[NR_FUTEXES] = {0};
-+
-+void usage(char *prog)
-+{
-+	printf("Usage: %s\n", prog);
-+	printf("  -c	Use color\n");
-+	printf("  -h	Display this help message\n");
-+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
-+	       VQUIET, VCRITICAL, VINFO);
-+}
-+
-+void *waiterfn(void *arg)
-+{
-+	struct timespec to;
-+	int res;
-+
-+	/* setting absolute timeout for futex2 */
-+	if (clock_gettime(CLOCK_MONOTONIC, &to))
-+		error("gettime64 failed\n", errno);
-+
-+	to.tv_sec++;
-+
-+	res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
-+	if (res < 0) {
-+		ksft_test_result_fail("futex_waitv returned: %d %s\n",
-+				      errno, strerror(errno));
-+	} else if (res != NR_FUTEXES - 1) {
-+		ksft_test_result_fail("futex_waitv returned: %d, expecting %d\n",
-+				      res, NR_FUTEXES - 1);
-+	}
-+
-+	return NULL;
-+}
-+
-+int main(int argc, char *argv[])
-+{
-+	pthread_t waiter;
-+	int res, ret = RET_PASS;
-+	struct timespec to;
-+	int c, i;
-+
-+	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
-+		switch (c) {
-+		case 'c':
-+			log_color(1);
-+			break;
-+		case 'h':
-+			usage(basename(argv[0]));
-+			exit(0);
-+		case 'v':
-+			log_verbosity(atoi(optarg));
-+			break;
-+		default:
-+			usage(basename(argv[0]));
-+			exit(1);
-+		}
-+	}
-+
-+	ksft_print_header();
-+	ksft_set_plan(7);
-+	ksft_print_msg("%s: Test FUTEX_WAITV\n",
-+		       basename(argv[0]));
-+
-+	for (i = 0; i < NR_FUTEXES; i++) {
-+		waitv[i].uaddr = (uintptr_t)&futexes[i];
-+		waitv[i].flags = FUTEX_32 | FUTEX_PRIVATE_FLAG;
-+		waitv[i].val = 0;
-+		waitv[i].__reserved = 0;
-+	}
-+
-+	/* Private waitv */
-+	if (pthread_create(&waiter, NULL, waiterfn, NULL))
-+		error("pthread_create failed\n", errno);
-+
-+	usleep(WAKE_WAIT_US);
-+
-+	res = futex_wake(u64_to_ptr(waitv[NR_FUTEXES - 1].uaddr), 1, FUTEX_PRIVATE_FLAG);
-+	if (res != 1) {
-+		ksft_test_result_fail("futex_wake private returned: %d %s\n",
-+				      res ? errno : res,
-+				      res ? strerror(errno) : "");
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_pass("futex_waitv private\n");
-+	}
-+
-+	/* Shared waitv */
-+	for (i = 0; i < NR_FUTEXES; i++) {
-+		int shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
-+
-+		if (shm_id < 0) {
-+			perror("shmget");
-+			exit(1);
-+		}
-+
-+		unsigned int *shared_data = shmat(shm_id, NULL, 0);
-+
-+		*shared_data = 0;
-+		waitv[i].uaddr = (uintptr_t)shared_data;
-+		waitv[i].flags = FUTEX_32;
-+		waitv[i].val = 0;
-+		waitv[i].__reserved = 0;
-+	}
-+
-+	if (pthread_create(&waiter, NULL, waiterfn, NULL))
-+		error("pthread_create failed\n", errno);
-+
-+	usleep(WAKE_WAIT_US);
-+
-+	res = futex_wake(u64_to_ptr(waitv[NR_FUTEXES - 1].uaddr), 1, 0);
-+	if (res != 1) {
-+		ksft_test_result_fail("futex_wake shared returned: %d %s\n",
-+				      res ? errno : res,
-+				      res ? strerror(errno) : "");
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_pass("futex_waitv shared\n");
-+	}
-+
-+	for (i = 0; i < NR_FUTEXES; i++)
-+		shmdt(u64_to_ptr(waitv[i].uaddr));
-+
-+	/* Testing a waiter without FUTEX_32 flag */
-+	waitv[0].flags = FUTEX_PRIVATE_FLAG;
-+
-+	if (clock_gettime(CLOCK_MONOTONIC, &to))
-+		error("gettime64 failed\n", errno);
-+
-+	to.tv_sec++;
-+
-+	res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
-+	if (res == EINVAL) {
-+		ksft_test_result_fail("futex_waitv private returned: %d %s\n",
-+				      res ? errno : res,
-+				      res ? strerror(errno) : "");
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_pass("futex_waitv without FUTEX_32\n");
-+	}
-+
-+	/* Testing a waiter with an unaligned address */
-+	waitv[0].flags = FUTEX_PRIVATE_FLAG | FUTEX_32;
-+	waitv[0].uaddr = 1;
-+
-+	if (clock_gettime(CLOCK_MONOTONIC, &to))
-+		error("gettime64 failed\n", errno);
-+
-+	to.tv_sec++;
-+
-+	res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
-+	if (res == EINVAL) {
-+		ksft_test_result_fail("futex_wake private returned: %d %s\n",
-+				      res ? errno : res,
-+				      res ? strerror(errno) : "");
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_pass("futex_waitv with an unaligned address\n");
-+	}
-+
-+	/* Testing a NULL address for waiters.uaddr */
-+	waitv[0].uaddr = 0x00000000;
-+
-+	if (clock_gettime(CLOCK_MONOTONIC, &to))
-+		error("gettime64 failed\n", errno);
-+
-+	to.tv_sec++;
-+
-+	res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
-+	if (res == EINVAL) {
-+		ksft_test_result_fail("futex_waitv private returned: %d %s\n",
-+				      res ? errno : res,
-+				      res ? strerror(errno) : "");
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_pass("futex_waitv NULL address in waitv.uaddr\n");
-+	}
-+
-+	/* Testing a NULL address for *waiters */
-+	if (clock_gettime(CLOCK_MONOTONIC, &to))
-+		error("gettime64 failed\n", errno);
-+
-+	to.tv_sec++;
-+
-+	res = futex_waitv(NULL, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
-+	if (res == EINVAL) {
-+		ksft_test_result_fail("futex_waitv private returned: %d %s\n",
-+				      res ? errno : res,
-+				      res ? strerror(errno) : "");
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_pass("futex_waitv NULL address in *waiters\n");
-+	}
-+
-+	/* Testing an invalid clockid */
-+	if (clock_gettime(CLOCK_MONOTONIC, &to))
-+		error("gettime64 failed\n", errno);
-+
-+	to.tv_sec++;
-+
-+	res = futex_waitv(NULL, NR_FUTEXES, 0, &to, CLOCK_TAI);
-+	if (res == EINVAL) {
-+		ksft_test_result_fail("futex_waitv private returned: %d %s\n",
-+				      res ? errno : res,
-+				      res ? strerror(errno) : "");
-+		ret = RET_FAIL;
-+	} else {
-+		ksft_test_result_pass("futex_waitv invalid clockid\n");
-+	}
-+
-+	ksft_print_cnts();
-+	return ret;
-+}
-diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
-index 11a9d6229..5ccd599da 100755
---- a/tools/testing/selftests/futex/functional/run.sh
-+++ b/tools/testing/selftests/futex/functional/run.sh
-@@ -79,3 +79,6 @@ echo
- 
- echo
- ./futex_requeue $COLOR
-+
-+echo
-+./futex_waitv $COLOR
-diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
-new file mode 100644
-index 000000000..9d305520e
---- /dev/null
-+++ b/tools/testing/selftests/futex/include/futex2test.h
-@@ -0,0 +1,22 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Futex2 library addons for futex tests
-+ *
-+ * Copyright 2021 Collabora Ltd.
-+ */
-+#include <stdint.h>
-+
-+#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
-+
-+/**
-+ * futex_waitv - Wait at multiple futexes, wake on any
-+ * @waiters:    Array of waiters
-+ * @nr_waiters: Length of waiters array
-+ * @flags: Operation flags
-+ * @timo:  Optional timeout for operation
-+ */
-+static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters,
-+			      unsigned long flags, struct timespec *timo, clockid_t clockid)
-+{
-+	return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid);
-+}
--- 
-2.33.1.711.g9d530dc002
-
-
diff --git a/0104-revert-xhci-Add-support-for-Renesas-controller-with-memory.patch b/0104-revert-xhci-Add-support-for-Renesas-controller-with-memory.patch
deleted file mode 100644
index dafb17784fdb..000000000000
--- a/0104-revert-xhci-Add-support-for-Renesas-controller-with-memory.patch
+++ /dev/null
@@ -1,89 +0,0 @@
---- b/drivers/usb/host/xhci-pci.c
-+++ a/drivers/usb/host/xhci-pci.c
-@@ -636,14 +636,7 @@
- 	{ /* end: all zeroes */ }
- };
- MODULE_DEVICE_TABLE(pci, pci_ids);
--
--/*
-- * Without CONFIG_USB_XHCI_PCI_RENESAS renesas_xhci_check_request_fw() won't
-- * load firmware, so don't encumber the xhci-pci driver with it.
-- */
--#if IS_ENABLED(CONFIG_USB_XHCI_PCI_RENESAS)
- MODULE_FIRMWARE("renesas_usb_fw.mem");
--#endif
- 
- /* pci driver glue; this is a "new style" PCI driver module */
- static struct pci_driver xhci_pci_driver = {
---- b/drivers/usb/host/xhci-pci.c
-+++ a/drivers/usb/host/xhci-pci.c
-@@ -16,7 +16,6 @@
- 
- #include "xhci.h"
- #include "xhci-trace.h"
--#include "xhci-pci.h"
- 
- #define SSIC_PORT_NUM		2
- #define SSIC_PORT_CFG2		0x880c
-@@ -92,16 +91,7 @@ static int xhci_pci_reinit(struct xhci_h
- 
- static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
- {
--	struct pci_dev                  *pdev = to_pci_dev(dev);
--	struct xhci_driver_data         *driver_data;
--	const struct pci_device_id      *id;
--
--	id = pci_match_id(pdev->driver->id_table, pdev);
--
--	if (id && id->driver_data) {
--		driver_data = (struct xhci_driver_data *)id->driver_data;
--		xhci->quirks |= driver_data->quirks;
--	}
-+	struct pci_dev		*pdev = to_pci_dev(dev);
- 
- 	/* Look for vendor-specific quirks */
- 	if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
-@@ -346,16 +336,8 @@ static int xhci_pci_probe(struct pci_dev
- 	int retval;
- 	struct xhci_hcd *xhci;
- 	struct usb_hcd *hcd;
--	struct xhci_driver_data *driver_data;
- 	struct reset_control *reset;
- 
--	driver_data = (struct xhci_driver_data *)id->driver_data;
--	if (driver_data && driver_data->quirks & XHCI_RENESAS_FW_QUIRK) {
--		retval = renesas_xhci_check_request_fw(dev, id);
--		if (retval)
--			return retval;
--	}
--
- 	reset = devm_reset_control_get_optional_exclusive(&dev->dev, NULL);
- 	if (IS_ERR(reset))
- 		return PTR_ERR(reset);
-@@ -578,26 +557,14 @@ static void xhci_pci_shutdown(struct usb
- 
- /*-------------------------------------------------------------------------*/
- 
--static const struct xhci_driver_data reneses_data = {
--	.quirks  = XHCI_RENESAS_FW_QUIRK,
--	.firmware = "renesas_usb_fw.mem",
--};
--
- /* PCI driver selection metadata; PCI hotplugging uses this */
- static const struct pci_device_id pci_ids[] = {
--	{ PCI_DEVICE(0x1912, 0x0014),
--		.driver_data =  (unsigned long)&reneses_data,
--	},
--	{ PCI_DEVICE(0x1912, 0x0015),
--		.driver_data =  (unsigned long)&reneses_data,
--	},
- 	/* handle any USB 3.0 xHCI controller */
- 	{ PCI_DEVICE_CLASS(PCI_CLASS_SERIAL_USB_XHCI, ~0),
- 	},
- 	{ /* end: all zeroes */ }
- };
- MODULE_DEVICE_TABLE(pci, pci_ids);
--MODULE_FIRMWARE("renesas_usb_fw.mem");
- 
- /* pci driver glue; this is a "new style" PCI driver module */
- static struct pci_driver xhci_pci_driver = {
diff --git a/0108-drm_i915_Add_workaround_numbers_to_GEN7_COMMON_SLICE_CHICKEN1_whitelisting.patch b/0108-drm_i915_Add_workaround_numbers_to_GEN7_COMMON_SLICE_CHICKEN1_whitelisting.patch
deleted file mode 100644
index 7e59a4802e0a..000000000000
--- a/0108-drm_i915_Add_workaround_numbers_to_GEN7_COMMON_SLICE_CHICKEN1_whitelisting.patch
+++ /dev/null
@@ -1,17 +0,0 @@
-diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
-index cd2935b9e7c81..c3211325c2d3e 100644
---- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
-+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
-@@ -1869,7 +1869,11 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
- 				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
- 				  RING_FORCE_TO_NONPRIV_RANGE_4);
- 
--		/* Wa_1808121037:tgl */
-+		/*
-+		 * Wa_1808121037:tgl
-+		 * Wa_14012131227:dg1
-+		 * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
-+		 */
- 		whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
- 
- 		/* Wa_1806527549:tgl */
diff --git a/0201-lenovo-wmi2.patch b/0201-lenovo-wmi2.patch
deleted file mode 100644
index c6b1b0603651..000000000000
--- a/0201-lenovo-wmi2.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
-index 791449a2370f..45d9010aafcf 100644
---- a/drivers/platform/x86/wmi.c
-+++ b/drivers/platform/x86/wmi.c
-@@ -1081,7 +1081,8 @@  static int wmi_create_device(struct device *wmi_bus_dev,
- 	wblock->dev.dev.bus = &wmi_bus_type;
- 	wblock->dev.dev.parent = wmi_bus_dev;
- 
--	dev_set_name(&wblock->dev.dev, "%pUL", gblock->guid);
-+	dev_set_name(&wblock->dev.dev, "%s-%pUL",
-+		     dev_name(&wblock->acpi_device->dev), gblock->guid);
- 
- 	device_initialize(&wblock->dev.dev);
- 
-
diff --git a/0301-revert-garbage-collect-fbdev-scrolling-acceleration.patch b/0301-revert-garbage-collect-fbdev-scrolling-acceleration.patch
new file mode 100644
index 000000000000..280ed9645c31
--- /dev/null
+++ b/0301-revert-garbage-collect-fbdev-scrolling-acceleration.patch
@@ -0,0 +1,1038 @@
+--- b/Documentation/gpu/todo.rst
++++ a/Documentation/gpu/todo.rst
+@@ -314,19 +314,16 @@
+ Garbage collect fbdev scrolling acceleration
+ --------------------------------------------
+ 
++Scroll acceleration is disabled in fbcon by hard-wiring p->scrollmode =
++SCROLL_REDRAW. There's a ton of code this will allow us to remove:
+-Scroll acceleration has been disabled in fbcon. Now it works as the old
+-SCROLL_REDRAW mode. A ton of code was removed in fbcon.c and the hook bmove was
+-removed from fbcon_ops.
+-Remaining tasks:
+ 
++- lots of code in fbcon.c
++
++- a bunch of the hooks in fbcon_ops, maybe the remaining hooks could be called
+-- a bunch of the hooks in fbcon_ops could be removed or simplified by calling
+   directly instead of the function table (with a switch on p->rotate)
+ 
+ - fb_copyarea is unused after this, and can be deleted from all drivers
+ 
+-- after that, fb_copyarea can be deleted from fb_ops in include/linux/fb.h as
+-  well as cfb_copyarea
+-
+ Note that not all acceleration code can be deleted, since clearing and cursor
+ support is still accelerated, which might be good candidates for further
+ deletion projects.
+--- b/drivers/video/fbdev/core/bitblit.c
++++ a/drivers/video/fbdev/core/bitblit.c
+@@ -43,6 +43,21 @@
+ 	}
+ }
+ 
++static void bit_bmove(struct vc_data *vc, struct fb_info *info, int sy,
++		      int sx, int dy, int dx, int height, int width)
++{
++	struct fb_copyarea area;
++
++	area.sx = sx * vc->vc_font.width;
++	area.sy = sy * vc->vc_font.height;
++	area.dx = dx * vc->vc_font.width;
++	area.dy = dy * vc->vc_font.height;
++	area.height = height * vc->vc_font.height;
++	area.width = width * vc->vc_font.width;
++
++	info->fbops->fb_copyarea(info, &area);
++}
++
+ static void bit_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ 		      int sx, int height, int width)
+ {
+@@ -378,6 +393,7 @@
+ 
+ void fbcon_set_bitops(struct fbcon_ops *ops)
+ {
++	ops->bmove = bit_bmove;
+ 	ops->clear = bit_clear;
+ 	ops->putcs = bit_putcs;
+ 	ops->clear_margins = bit_clear_margins;
+--- b/drivers/video/fbdev/core/fbcon.c
++++ a/drivers/video/fbdev/core/fbcon.c
+@@ -173,6 +173,8 @@
+ 			int count, int ypos, int xpos);
+ static void fbcon_clear_margins(struct vc_data *vc, int bottom_only);
+ static void fbcon_cursor(struct vc_data *vc, int mode);
++static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx,
++			int height, int width);
+ static int fbcon_switch(struct vc_data *vc);
+ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch);
+ static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table);
+@@ -180,8 +182,16 @@
+ /*
+  *  Internal routines
+  */
++static __inline__ void ywrap_up(struct vc_data *vc, int count);
++static __inline__ void ywrap_down(struct vc_data *vc, int count);
++static __inline__ void ypan_up(struct vc_data *vc, int count);
++static __inline__ void ypan_down(struct vc_data *vc, int count);
++static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, int sx,
++			    int dy, int dx, int height, int width, u_int y_break);
+ static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var,
+ 			   int unit);
++static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
++			      int line, int count, int dy);
+ static void fbcon_modechanged(struct fb_info *info);
+ static void fbcon_set_all_vcs(struct fb_info *info);
+ static void fbcon_start(void);
+@@ -1125,6 +1135,14 @@
+ 
+ 	ops->graphics = 0;
+ 
++	/*
++	 * No more hw acceleration for fbcon.
++	 *
++	 * FIXME: Garbage collect all the now dead code after sufficient time
++	 * has passed.
++	 */
++	p->scrollmode = SCROLL_REDRAW;
++
+ 	/*
+ 	 *  ++guenther: console.c:vc_allocate() relies on initializing
+ 	 *  vc_{cols,rows}, but we must not set those if we are only
+@@ -1211,13 +1229,14 @@
+  *  This system is now divided into two levels because of complications
+  *  caused by hardware scrolling. Top level functions:
+  *
++ *	fbcon_bmove(), fbcon_clear(), fbcon_putc(), fbcon_clear_margins()
+- *	fbcon_clear(), fbcon_putc(), fbcon_clear_margins()
+  *
+  *  handles y values in range [0, scr_height-1] that correspond to real
+  *  screen positions. y_wrap shift means that first line of bitmap may be
+  *  anywhere on this display. These functions convert lineoffsets to
+  *  bitmap offsets and deal with the wrap-around case by splitting blits.
+  *
++ *	fbcon_bmove_physical_8()    -- These functions fast implementations
+  *	fbcon_clear_physical_8()    -- of original fbcon_XXX fns.
+  *	fbcon_putc_physical_8()	    -- (font width != 8) may be added later
+  *
+@@ -1390,6 +1409,224 @@
+ 	}
+ }
+ 
++static __inline__ void ywrap_up(struct vc_data *vc, int count)
++{
++	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++	struct fbcon_ops *ops = info->fbcon_par;
++	struct fbcon_display *p = &fb_display[vc->vc_num];
++	
++	p->yscroll += count;
++	if (p->yscroll >= p->vrows)	/* Deal with wrap */
++		p->yscroll -= p->vrows;
++	ops->var.xoffset = 0;
++	ops->var.yoffset = p->yscroll * vc->vc_font.height;
++	ops->var.vmode |= FB_VMODE_YWRAP;
++	ops->update_start(info);
++	scrollback_max += count;
++	if (scrollback_max > scrollback_phys_max)
++		scrollback_max = scrollback_phys_max;
++	scrollback_current = 0;
++}
++
++static __inline__ void ywrap_down(struct vc_data *vc, int count)
++{
++	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++	struct fbcon_ops *ops = info->fbcon_par;
++	struct fbcon_display *p = &fb_display[vc->vc_num];
++	
++	p->yscroll -= count;
++	if (p->yscroll < 0)	/* Deal with wrap */
++		p->yscroll += p->vrows;
++	ops->var.xoffset = 0;
++	ops->var.yoffset = p->yscroll * vc->vc_font.height;
++	ops->var.vmode |= FB_VMODE_YWRAP;
++	ops->update_start(info);
++	scrollback_max -= count;
++	if (scrollback_max < 0)
++		scrollback_max = 0;
++	scrollback_current = 0;
++}
++
++static __inline__ void ypan_up(struct vc_data *vc, int count)
++{
++	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++	struct fbcon_display *p = &fb_display[vc->vc_num];
++	struct fbcon_ops *ops = info->fbcon_par;
++
++	p->yscroll += count;
++	if (p->yscroll > p->vrows - vc->vc_rows) {
++		ops->bmove(vc, info, p->vrows - vc->vc_rows,
++			    0, 0, 0, vc->vc_rows, vc->vc_cols);
++		p->yscroll -= p->vrows - vc->vc_rows;
++	}
++
++	ops->var.xoffset = 0;
++	ops->var.yoffset = p->yscroll * vc->vc_font.height;
++	ops->var.vmode &= ~FB_VMODE_YWRAP;
++	ops->update_start(info);
++	fbcon_clear_margins(vc, 1);
++	scrollback_max += count;
++	if (scrollback_max > scrollback_phys_max)
++		scrollback_max = scrollback_phys_max;
++	scrollback_current = 0;
++}
++
++static __inline__ void ypan_up_redraw(struct vc_data *vc, int t, int count)
++{
++	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++	struct fbcon_ops *ops = info->fbcon_par;
++	struct fbcon_display *p = &fb_display[vc->vc_num];
++
++	p->yscroll += count;
++
++	if (p->yscroll > p->vrows - vc->vc_rows) {
++		p->yscroll -= p->vrows - vc->vc_rows;
++		fbcon_redraw_move(vc, p, t + count, vc->vc_rows - count, t);
++	}
++
++	ops->var.xoffset = 0;
++	ops->var.yoffset = p->yscroll * vc->vc_font.height;
++	ops->var.vmode &= ~FB_VMODE_YWRAP;
++	ops->update_start(info);
++	fbcon_clear_margins(vc, 1);
++	scrollback_max += count;
++	if (scrollback_max > scrollback_phys_max)
++		scrollback_max = scrollback_phys_max;
++	scrollback_current = 0;
++}
++
++static __inline__ void ypan_down(struct vc_data *vc, int count)
++{
++	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++	struct fbcon_display *p = &fb_display[vc->vc_num];
++	struct fbcon_ops *ops = info->fbcon_par;
++	
++	p->yscroll -= count;
++	if (p->yscroll < 0) {
++		ops->bmove(vc, info, 0, 0, p->vrows - vc->vc_rows,
++			    0, vc->vc_rows, vc->vc_cols);
++		p->yscroll += p->vrows - vc->vc_rows;
++	}
++
++	ops->var.xoffset = 0;
++	ops->var.yoffset = p->yscroll * vc->vc_font.height;
++	ops->var.vmode &= ~FB_VMODE_YWRAP;
++	ops->update_start(info);
++	fbcon_clear_margins(vc, 1);
++	scrollback_max -= count;
++	if (scrollback_max < 0)
++		scrollback_max = 0;
++	scrollback_current = 0;
++}
++
++static __inline__ void ypan_down_redraw(struct vc_data *vc, int t, int count)
++{
++	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++	struct fbcon_ops *ops = info->fbcon_par;
++	struct fbcon_display *p = &fb_display[vc->vc_num];
++
++	p->yscroll -= count;
++
++	if (p->yscroll < 0) {
++		p->yscroll += p->vrows - vc->vc_rows;
++		fbcon_redraw_move(vc, p, t, vc->vc_rows - count, t + count);
++	}
++
++	ops->var.xoffset = 0;
++	ops->var.yoffset = p->yscroll * vc->vc_font.height;
++	ops->var.vmode &= ~FB_VMODE_YWRAP;
++	ops->update_start(info);
++	fbcon_clear_margins(vc, 1);
++	scrollback_max -= count;
++	if (scrollback_max < 0)
++		scrollback_max = 0;
++	scrollback_current = 0;
++}
++
++static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
++			      int line, int count, int dy)
++{
++	unsigned short *s = (unsigned short *)
++		(vc->vc_origin + vc->vc_size_row * line);
++
++	while (count--) {
++		unsigned short *start = s;
++		unsigned short *le = advance_row(s, 1);
++		unsigned short c;
++		int x = 0;
++		unsigned short attr = 1;
++
++		do {
++			c = scr_readw(s);
++			if (attr != (c & 0xff00)) {
++				attr = c & 0xff00;
++				if (s > start) {
++					fbcon_putcs(vc, start, s - start,
++						    dy, x);
++					x += s - start;
++					start = s;
++				}
++			}
++			console_conditional_schedule();
++			s++;
++		} while (s < le);
++		if (s > start)
++			fbcon_putcs(vc, start, s - start, dy, x);
++		console_conditional_schedule();
++		dy++;
++	}
++}
++
++static void fbcon_redraw_blit(struct vc_data *vc, struct fb_info *info,
++			struct fbcon_display *p, int line, int count, int ycount)
++{
++	int offset = ycount * vc->vc_cols;
++	unsigned short *d = (unsigned short *)
++	    (vc->vc_origin + vc->vc_size_row * line);
++	unsigned short *s = d + offset;
++	struct fbcon_ops *ops = info->fbcon_par;
++
++	while (count--) {
++		unsigned short *start = s;
++		unsigned short *le = advance_row(s, 1);
++		unsigned short c;
++		int x = 0;
++
++		do {
++			c = scr_readw(s);
++
++			if (c == scr_readw(d)) {
++				if (s > start) {
++					ops->bmove(vc, info, line + ycount, x,
++						   line, x, 1, s-start);
++					x += s - start + 1;
++					start = s + 1;
++				} else {
++					x++;
++					start++;
++				}
++			}
++
++			scr_writew(c, d);
++			console_conditional_schedule();
++			s++;
++			d++;
++		} while (s < le);
++		if (s > start)
++			ops->bmove(vc, info, line + ycount, x, line, x, 1,
++				   s-start);
++		console_conditional_schedule();
++		if (ycount > 0)
++			line++;
++		else {
++			line--;
++			/* NOTE: We subtract two lines from these pointers */
++			s -= vc->vc_size_row;
++			d -= vc->vc_size_row;
++		}
++	}
++}
++
+ static void fbcon_redraw(struct vc_data *vc, struct fbcon_display *p,
+ 			 int line, int count, int offset)
+ {
+@@ -1450,6 +1687,7 @@
+ {
+ 	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ 	struct fbcon_display *p = &fb_display[vc->vc_num];
++	int scroll_partial = info->flags & FBINFO_PARTIAL_PAN_OK;
+ 
+ 	if (fbcon_is_inactive(vc, info))
+ 		return true;
+@@ -1466,32 +1704,249 @@
+ 	case SM_UP:
+ 		if (count > vc->vc_rows)	/* Maximum realistic size */
+ 			count = vc->vc_rows;
++		if (logo_shown >= 0)
++			goto redraw_up;
++		switch (p->scrollmode) {
++		case SCROLL_MOVE:
++			fbcon_redraw_blit(vc, info, p, t, b - t - count,
++				     count);
++			fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
++			scr_memsetw((unsigned short *) (vc->vc_origin +
++							vc->vc_size_row *
++							(b - count)),
++				    vc->vc_video_erase_char,
++				    vc->vc_size_row * count);
++			return true;
++
++		case SCROLL_WRAP_MOVE:
++			if (b - t - count > 3 * vc->vc_rows >> 2) {
++				if (t > 0)
++					fbcon_bmove(vc, 0, 0, count, 0, t,
++						    vc->vc_cols);
++				ywrap_up(vc, count);
++				if (vc->vc_rows - b > 0)
++					fbcon_bmove(vc, b - count, 0, b, 0,
++						    vc->vc_rows - b,
++						    vc->vc_cols);
++			} else if (info->flags & FBINFO_READS_FAST)
++				fbcon_bmove(vc, t + count, 0, t, 0,
++					    b - t - count, vc->vc_cols);
++			else
++				goto redraw_up;
++			fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
++			break;
++
++		case SCROLL_PAN_REDRAW:
++			if ((p->yscroll + count <=
++			     2 * (p->vrows - vc->vc_rows))
++			    && ((!scroll_partial && (b - t == vc->vc_rows))
++				|| (scroll_partial
++				    && (b - t - count >
++					3 * vc->vc_rows >> 2)))) {
++				if (t > 0)
++					fbcon_redraw_move(vc, p, 0, t, count);
++				ypan_up_redraw(vc, t, count);
++				if (vc->vc_rows - b > 0)
++					fbcon_redraw_move(vc, p, b,
++							  vc->vc_rows - b, b);
++			} else
++				fbcon_redraw_move(vc, p, t + count, b - t - count, t);
++			fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
++			break;
++
++		case SCROLL_PAN_MOVE:
++			if ((p->yscroll + count <=
++			     2 * (p->vrows - vc->vc_rows))
++			    && ((!scroll_partial && (b - t == vc->vc_rows))
++				|| (scroll_partial
++				    && (b - t - count >
++					3 * vc->vc_rows >> 2)))) {
++				if (t > 0)
++					fbcon_bmove(vc, 0, 0, count, 0, t,
++						    vc->vc_cols);
++				ypan_up(vc, count);
++				if (vc->vc_rows - b > 0)
++					fbcon_bmove(vc, b - count, 0, b, 0,
++						    vc->vc_rows - b,
++						    vc->vc_cols);
++			} else if (info->flags & FBINFO_READS_FAST)
++				fbcon_bmove(vc, t + count, 0, t, 0,
++					    b - t - count, vc->vc_cols);
++			else
++				goto redraw_up;
++			fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
++			break;
++
++		case SCROLL_REDRAW:
++		      redraw_up:
++			fbcon_redraw(vc, p, t, b - t - count,
++				     count * vc->vc_cols);
++			fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
++			scr_memsetw((unsigned short *) (vc->vc_origin +
++							vc->vc_size_row *
++							(b - count)),
++				    vc->vc_video_erase_char,
++				    vc->vc_size_row * count);
++			return true;
++		}
++		break;
+-		fbcon_redraw(vc, p, t, b - t - count,
+-			     count * vc->vc_cols);
+-		fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+-		scr_memsetw((unsigned short *) (vc->vc_origin +
+-						vc->vc_size_row *
+-						(b - count)),
+-			    vc->vc_video_erase_char,
+-			    vc->vc_size_row * count);
+-		return true;
+ 
+ 	case SM_DOWN:
+ 		if (count > vc->vc_rows)	/* Maximum realistic size */
+ 			count = vc->vc_rows;
++		if (logo_shown >= 0)
++			goto redraw_down;
++		switch (p->scrollmode) {
++		case SCROLL_MOVE:
++			fbcon_redraw_blit(vc, info, p, b - 1, b - t - count,
++				     -count);
++			fbcon_clear(vc, t, 0, count, vc->vc_cols);
++			scr_memsetw((unsigned short *) (vc->vc_origin +
++							vc->vc_size_row *
++							t),
++				    vc->vc_video_erase_char,
++				    vc->vc_size_row * count);
++			return true;
++
++		case SCROLL_WRAP_MOVE:
++			if (b - t - count > 3 * vc->vc_rows >> 2) {
++				if (vc->vc_rows - b > 0)
++					fbcon_bmove(vc, b, 0, b - count, 0,
++						    vc->vc_rows - b,
++						    vc->vc_cols);
++				ywrap_down(vc, count);
++				if (t > 0)
++					fbcon_bmove(vc, count, 0, 0, 0, t,
++						    vc->vc_cols);
++			} else if (info->flags & FBINFO_READS_FAST)
++				fbcon_bmove(vc, t, 0, t + count, 0,
++					    b - t - count, vc->vc_cols);
++			else
++				goto redraw_down;
++			fbcon_clear(vc, t, 0, count, vc->vc_cols);
++			break;
++
++		case SCROLL_PAN_MOVE:
++			if ((count - p->yscroll <= p->vrows - vc->vc_rows)
++			    && ((!scroll_partial && (b - t == vc->vc_rows))
++				|| (scroll_partial
++				    && (b - t - count >
++					3 * vc->vc_rows >> 2)))) {
++				if (vc->vc_rows - b > 0)
++					fbcon_bmove(vc, b, 0, b - count, 0,
++						    vc->vc_rows - b,
++						    vc->vc_cols);
++				ypan_down(vc, count);
++				if (t > 0)
++					fbcon_bmove(vc, count, 0, 0, 0, t,
++						    vc->vc_cols);
++			} else if (info->flags & FBINFO_READS_FAST)
++				fbcon_bmove(vc, t, 0, t + count, 0,
++					    b - t - count, vc->vc_cols);
++			else
++				goto redraw_down;
++			fbcon_clear(vc, t, 0, count, vc->vc_cols);
++			break;
++
++		case SCROLL_PAN_REDRAW:
++			if ((count - p->yscroll <= p->vrows - vc->vc_rows)
++			    && ((!scroll_partial && (b - t == vc->vc_rows))
++				|| (scroll_partial
++				    && (b - t - count >
++					3 * vc->vc_rows >> 2)))) {
++				if (vc->vc_rows - b > 0)
++					fbcon_redraw_move(vc, p, b, vc->vc_rows - b,
++							  b - count);
++				ypan_down_redraw(vc, t, count);
++				if (t > 0)
++					fbcon_redraw_move(vc, p, count, t, 0);
++			} else
++				fbcon_redraw_move(vc, p, t, b - t - count, t + count);
++			fbcon_clear(vc, t, 0, count, vc->vc_cols);
++			break;
++
++		case SCROLL_REDRAW:
++		      redraw_down:
++			fbcon_redraw(vc, p, b - 1, b - t - count,
++				     -count * vc->vc_cols);
++			fbcon_clear(vc, t, 0, count, vc->vc_cols);
++			scr_memsetw((unsigned short *) (vc->vc_origin +
++							vc->vc_size_row *
++							t),
++				    vc->vc_video_erase_char,
++				    vc->vc_size_row * count);
++			return true;
++		}
+-		fbcon_redraw(vc, p, b - 1, b - t - count,
+-			     -count * vc->vc_cols);
+-		fbcon_clear(vc, t, 0, count, vc->vc_cols);
+-		scr_memsetw((unsigned short *) (vc->vc_origin +
+-						vc->vc_size_row *
+-						t),
+-			    vc->vc_video_erase_char,
+-			    vc->vc_size_row * count);
+-		return true;
+ 	}
+ 	return false;
+ }
+ 
++
++static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx,
++			int height, int width)
++{
++	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++	struct fbcon_display *p = &fb_display[vc->vc_num];
++	
++	if (fbcon_is_inactive(vc, info))
++		return;
++
++	if (!width || !height)
++		return;
++
++	/*  Split blits that cross physical y_wrap case.
++	 *  Pathological case involves 4 blits, better to use recursive
++	 *  code rather than unrolled case
++	 *
++	 *  Recursive invocations don't need to erase the cursor over and
++	 *  over again, so we use fbcon_bmove_rec()
++	 */
++	fbcon_bmove_rec(vc, p, sy, sx, dy, dx, height, width,
++			p->vrows - p->yscroll);
++}
++
++static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, int sx,
++			    int dy, int dx, int height, int width, u_int y_break)
++{
++	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
++	struct fbcon_ops *ops = info->fbcon_par;
++	u_int b;
++
++	if (sy < y_break && sy + height > y_break) {
++		b = y_break - sy;
++		if (dy < sy) {	/* Avoid trashing self */
++			fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
++					y_break);
++			fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
++					height - b, width, y_break);
++		} else {
++			fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
++					height - b, width, y_break);
++			fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
++					y_break);
++		}
++		return;
++	}
++
++	if (dy < y_break && dy + height > y_break) {
++		b = y_break - dy;
++		if (dy < sy) {	/* Avoid trashing self */
++			fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
++					y_break);
++			fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
++					height - b, width, y_break);
++		} else {
++			fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
++					height - b, width, y_break);
++			fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
++					y_break);
++		}
++		return;
++	}
++	ops->bmove(vc, info, real_y(p, sy), sx, real_y(p, dy), dx,
++		   height, width);
++}
++
+ static void updatescrollmode(struct fbcon_display *p,
+ 					struct fb_info *info,
+ 					struct vc_data *vc)
+@@ -1664,7 +2119,21 @@
+ 
+ 	updatescrollmode(p, info, vc);
+ 
++	switch (p->scrollmode) {
++	case SCROLL_WRAP_MOVE:
++		scrollback_phys_max = p->vrows - vc->vc_rows;
++		break;
++	case SCROLL_PAN_MOVE:
++	case SCROLL_PAN_REDRAW:
++		scrollback_phys_max = p->vrows - 2 * vc->vc_rows;
++		if (scrollback_phys_max < 0)
++			scrollback_phys_max = 0;
++		break;
++	default:
++		scrollback_phys_max = 0;
++		break;
++	}
++
+-	scrollback_phys_max = 0;
+ 	scrollback_max = 0;
+ 	scrollback_current = 0;
+ 
+--- b/drivers/video/fbdev/core/fbcon.h
++++ a/drivers/video/fbdev/core/fbcon.h
+@@ -29,6 +29,7 @@
+     /* Filled in by the low-level console driver */
+     const u_char *fontdata;
+     int userfont;                   /* != 0 if fontdata kmalloc()ed */
++    u_short scrollmode;             /* Scroll Method */
+     u_short inverse;                /* != 0 text black on white as default */
+     short yscroll;                  /* Hardware scrolling */
+     int vrows;                      /* number of virtual rows */
+@@ -51,6 +52,8 @@
+ };
+ 
+ struct fbcon_ops {
++	void (*bmove)(struct vc_data *vc, struct fb_info *info, int sy,
++		      int sx, int dy, int dx, int height, int width);
+ 	void (*clear)(struct vc_data *vc, struct fb_info *info, int sy,
+ 		      int sx, int height, int width);
+ 	void (*putcs)(struct vc_data *vc, struct fb_info *info,
+@@ -149,6 +152,62 @@
+ #define attr_bgcol_ec(bgshift, vc, info) attr_col_ec(bgshift, vc, info, 0)
+ #define attr_fgcol_ec(fgshift, vc, info) attr_col_ec(fgshift, vc, info, 1)
+ 
++    /*
++     *  Scroll Method
++     */
++     
++/* There are several methods fbcon can use to move text around the screen:
++ *
++ *                     Operation   Pan    Wrap
++ *---------------------------------------------
++ * SCROLL_MOVE         copyarea    No     No
++ * SCROLL_PAN_MOVE     copyarea    Yes    No
++ * SCROLL_WRAP_MOVE    copyarea    No     Yes
++ * SCROLL_REDRAW       imageblit   No     No
++ * SCROLL_PAN_REDRAW   imageblit   Yes    No
++ * SCROLL_WRAP_REDRAW  imageblit   No     Yes
++ *
++ * (SCROLL_WRAP_REDRAW is not implemented yet)
++ *
++ * In general, fbcon will choose the best scrolling
++ * method based on the rule below:
++ *
++ * Pan/Wrap > accel imageblit > accel copyarea >
++ * soft imageblit > (soft copyarea)
++ *
++ * Exception to the rule: Pan + accel copyarea is
++ * preferred over Pan + accel imageblit.
++ *
++ * The above is typical for PCI/AGP cards. Unless
++ * overridden, fbcon will never use soft copyarea.
++ *
++ * If you need to override the above rule, set the
++ * appropriate flags in fb_info->flags.  For example,
++ * to prefer copyarea over imageblit, set
++ * FBINFO_READS_FAST.
++ *
++ * Other notes:
++ * + use the hardware engine to move the text
++ *    (hw-accelerated copyarea() and fillrect())
++ * + use hardware-supported panning on a large virtual screen
++ * + amifb can not only pan, but also wrap the display by N lines
++ *    (i.e. visible line i = physical line (i+N) % yres).
++ * + read what's already rendered on the screen and
++ *     write it in a different place (this is cfb_copyarea())
++ * + re-render the text to the screen
++ *
++ * Whether to use wrapping or panning can only be figured out at
++ * runtime (when we know whether our font height is a multiple
++ * of the pan/wrap step)
++ *
++ */
++
++#define SCROLL_MOVE	   0x001
++#define SCROLL_PAN_MOVE	   0x002
++#define SCROLL_WRAP_MOVE   0x003
++#define SCROLL_REDRAW	   0x004
++#define SCROLL_PAN_REDRAW  0x005
++
+ #ifdef CONFIG_FB_TILEBLITTING
+ extern void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info);
+ #endif
+--- b/drivers/video/fbdev/core/fbcon_ccw.c
++++ a/drivers/video/fbdev/core/fbcon_ccw.c
+@@ -59,12 +59,31 @@
+ 	}
+ }
+ 
++
++static void ccw_bmove(struct vc_data *vc, struct fb_info *info, int sy,
++		     int sx, int dy, int dx, int height, int width)
++{
++	struct fbcon_ops *ops = info->fbcon_par;
++	struct fb_copyarea area;
++	u32 vyres = GETVYRES(ops->p->scrollmode, info);
++
++	area.sx = sy * vc->vc_font.height;
++	area.sy = vyres - ((sx + width) * vc->vc_font.width);
++	area.dx = dy * vc->vc_font.height;
++	area.dy = vyres - ((dx + width) * vc->vc_font.width);
++	area.width = height * vc->vc_font.height;
++	area.height  = width * vc->vc_font.width;
++
++	info->fbops->fb_copyarea(info, &area);
++}
++
+ static void ccw_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ 		     int sx, int height, int width)
+ {
++	struct fbcon_ops *ops = info->fbcon_par;
+ 	struct fb_fillrect region;
+ 	int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
++	u32 vyres = GETVYRES(ops->p->scrollmode, info);
+-	u32 vyres = info->var.yres;
+ 
+ 	region.color = attr_bgcol_ec(bgshift,vc,info);
+ 	region.dx = sy * vc->vc_font.height;
+@@ -121,7 +140,7 @@
+ 	u32 cnt, pitch, size;
+ 	u32 attribute = get_attribute(info, scr_readw(s));
+ 	u8 *dst, *buf = NULL;
++	u32 vyres = GETVYRES(ops->p->scrollmode, info);
+-	u32 vyres = info->var.yres;
+ 
+ 	if (!ops->fontbuffer)
+ 		return;
+@@ -210,7 +229,7 @@
+ 	int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
+ 	int err = 1, dx, dy;
+ 	char *src;
++	u32 vyres = GETVYRES(ops->p->scrollmode, info);
+-	u32 vyres = info->var.yres;
+ 
+ 	if (!ops->fontbuffer)
+ 		return;
+@@ -368,7 +387,7 @@
+ {
+ 	struct fbcon_ops *ops = info->fbcon_par;
+ 	u32 yoffset;
++	u32 vyres = GETVYRES(ops->p->scrollmode, info);
+-	u32 vyres = info->var.yres;
+ 	int err;
+ 
+ 	yoffset = (vyres - info->var.yres) - ops->var.xoffset;
+@@ -383,6 +402,7 @@
+ 
+ void fbcon_rotate_ccw(struct fbcon_ops *ops)
+ {
++	ops->bmove = ccw_bmove;
+ 	ops->clear = ccw_clear;
+ 	ops->putcs = ccw_putcs;
+ 	ops->clear_margins = ccw_clear_margins;
+--- b/drivers/video/fbdev/core/fbcon_cw.c
++++ a/drivers/video/fbdev/core/fbcon_cw.c
+@@ -44,12 +44,31 @@
+ 	}
+ }
+ 
++
++static void cw_bmove(struct vc_data *vc, struct fb_info *info, int sy,
++		     int sx, int dy, int dx, int height, int width)
++{
++	struct fbcon_ops *ops = info->fbcon_par;
++	struct fb_copyarea area;
++	u32 vxres = GETVXRES(ops->p->scrollmode, info);
++
++	area.sx = vxres - ((sy + height) * vc->vc_font.height);
++	area.sy = sx * vc->vc_font.width;
++	area.dx = vxres - ((dy + height) * vc->vc_font.height);
++	area.dy = dx * vc->vc_font.width;
++	area.width = height * vc->vc_font.height;
++	area.height  = width * vc->vc_font.width;
++
++	info->fbops->fb_copyarea(info, &area);
++}
++
+ static void cw_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ 		     int sx, int height, int width)
+ {
++	struct fbcon_ops *ops = info->fbcon_par;
+ 	struct fb_fillrect region;
+ 	int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
++	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+-	u32 vxres = info->var.xres;
+ 
+ 	region.color = attr_bgcol_ec(bgshift,vc,info);
+ 	region.dx = vxres - ((sy + height) * vc->vc_font.height);
+@@ -106,7 +125,7 @@
+ 	u32 cnt, pitch, size;
+ 	u32 attribute = get_attribute(info, scr_readw(s));
+ 	u8 *dst, *buf = NULL;
++	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+-	u32 vxres = info->var.xres;
+ 
+ 	if (!ops->fontbuffer)
+ 		return;
+@@ -193,7 +212,7 @@
+ 	int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
+ 	int err = 1, dx, dy;
+ 	char *src;
++	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+-	u32 vxres = info->var.xres;
+ 
+ 	if (!ops->fontbuffer)
+ 		return;
+@@ -350,7 +369,7 @@
+ static int cw_update_start(struct fb_info *info)
+ {
+ 	struct fbcon_ops *ops = info->fbcon_par;
++	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+-	u32 vxres = info->var.xres;
+ 	u32 xoffset;
+ 	int err;
+ 
+@@ -366,6 +385,7 @@
+ 
+ void fbcon_rotate_cw(struct fbcon_ops *ops)
+ {
++	ops->bmove = cw_bmove;
+ 	ops->clear = cw_clear;
+ 	ops->putcs = cw_putcs;
+ 	ops->clear_margins = cw_clear_margins;
+--- b/drivers/video/fbdev/core/fbcon_rotate.h
++++ a/drivers/video/fbdev/core/fbcon_rotate.h
+@@ -11,6 +11,15 @@
+ #ifndef _FBCON_ROTATE_H
+ #define _FBCON_ROTATE_H
+ 
++#define GETVYRES(s,i) ({                           \
++        (s == SCROLL_REDRAW || s == SCROLL_MOVE) ? \
++        (i)->var.yres : (i)->var.yres_virtual; })
++
++#define GETVXRES(s,i) ({                           \
++        (s == SCROLL_REDRAW || s == SCROLL_MOVE || !(i)->fix.xpanstep) ? \
++        (i)->var.xres : (i)->var.xres_virtual; })
++
++
+ static inline int pattern_test_bit(u32 x, u32 y, u32 pitch, const char *pat)
+ {
+ 	u32 tmp = (y * pitch) + x, index = tmp / 8,  bit = tmp % 8;
+--- b/drivers/video/fbdev/core/fbcon_ud.c
++++ a/drivers/video/fbdev/core/fbcon_ud.c
+@@ -44,13 +44,33 @@
+ 	}
+ }
+ 
++
++static void ud_bmove(struct vc_data *vc, struct fb_info *info, int sy,
++		     int sx, int dy, int dx, int height, int width)
++{
++	struct fbcon_ops *ops = info->fbcon_par;
++	struct fb_copyarea area;
++	u32 vyres = GETVYRES(ops->p->scrollmode, info);
++	u32 vxres = GETVXRES(ops->p->scrollmode, info);
++
++	area.sy = vyres - ((sy + height) * vc->vc_font.height);
++	area.sx = vxres - ((sx + width) * vc->vc_font.width);
++	area.dy = vyres - ((dy + height) * vc->vc_font.height);
++	area.dx = vxres - ((dx + width) * vc->vc_font.width);
++	area.height = height * vc->vc_font.height;
++	area.width  = width * vc->vc_font.width;
++
++	info->fbops->fb_copyarea(info, &area);
++}
++
+ static void ud_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ 		     int sx, int height, int width)
+ {
++	struct fbcon_ops *ops = info->fbcon_par;
+ 	struct fb_fillrect region;
+ 	int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
++	u32 vyres = GETVYRES(ops->p->scrollmode, info);
++	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+-	u32 vyres = info->var.yres;
+-	u32 vxres = info->var.xres;
+ 
+ 	region.color = attr_bgcol_ec(bgshift,vc,info);
+ 	region.dy = vyres - ((sy + height) * vc->vc_font.height);
+@@ -142,8 +162,8 @@
+ 	u32 mod = vc->vc_font.width % 8, cnt, pitch, size;
+ 	u32 attribute = get_attribute(info, scr_readw(s));
+ 	u8 *dst, *buf = NULL;
++	u32 vyres = GETVYRES(ops->p->scrollmode, info);
++	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+-	u32 vyres = info->var.yres;
+-	u32 vxres = info->var.xres;
+ 
+ 	if (!ops->fontbuffer)
+ 		return;
+@@ -239,8 +259,8 @@
+ 	int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
+ 	int err = 1, dx, dy;
+ 	char *src;
++	u32 vyres = GETVYRES(ops->p->scrollmode, info);
++	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+-	u32 vyres = info->var.yres;
+-	u32 vxres = info->var.xres;
+ 
+ 	if (!ops->fontbuffer)
+ 		return;
+@@ -390,8 +410,8 @@
+ {
+ 	struct fbcon_ops *ops = info->fbcon_par;
+ 	int xoffset, yoffset;
++	u32 vyres = GETVYRES(ops->p->scrollmode, info);
++	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+-	u32 vyres = info->var.yres;
+-	u32 vxres = info->var.xres;
+ 	int err;
+ 
+ 	xoffset = vxres - info->var.xres - ops->var.xoffset;
+@@ -409,6 +429,7 @@
+ 
+ void fbcon_rotate_ud(struct fbcon_ops *ops)
+ {
++	ops->bmove = ud_bmove;
+ 	ops->clear = ud_clear;
+ 	ops->putcs = ud_putcs;
+ 	ops->clear_margins = ud_clear_margins;
+--- b/drivers/video/fbdev/core/tileblit.c
++++ a/drivers/video/fbdev/core/tileblit.c
+@@ -16,6 +16,21 @@
+ #include <asm/types.h>
+ #include "fbcon.h"
+ 
++static void tile_bmove(struct vc_data *vc, struct fb_info *info, int sy,
++		       int sx, int dy, int dx, int height, int width)
++{
++	struct fb_tilearea area;
++
++	area.sx = sx;
++	area.sy = sy;
++	area.dx = dx;
++	area.dy = dy;
++	area.height = height;
++	area.width = width;
++
++	info->tileops->fb_tilecopy(info, &area);
++}
++
+ static void tile_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ 		       int sx, int height, int width)
+ {
+@@ -118,6 +133,7 @@
+ 	struct fb_tilemap map;
+ 	struct fbcon_ops *ops = info->fbcon_par;
+ 
++	ops->bmove = tile_bmove;
+ 	ops->clear = tile_clear;
+ 	ops->putcs = tile_putcs;
+ 	ops->clear_margins = tile_clear_margins;
+--- b/drivers/video/fbdev/skeletonfb.c
++++ a/drivers/video/fbdev/skeletonfb.c
+@@ -505,15 +505,15 @@
+ }
+ 
+ /**
++ *      xxxfb_copyarea - REQUIRED function. Can use generic routines if
++ *                       non acclerated hardware and packed pixel based.
+- *      xxxfb_copyarea - OBSOLETE function.
+  *                       Copies one area of the screen to another area.
+- *                       Will be deleted in a future version
+  *
+  *      @info: frame buffer structure that represents a single frame buffer
+  *      @area: Structure providing the data to copy the framebuffer contents
+  *	       from one region to another.
+  *
++ *      This drawing operation copies a rectangular area from one area of the
+- *      This drawing operation copied a rectangular area from one area of the
+  *	screen to another area.
+  */
+ void xxxfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) 
+@@ -645,9 +645,9 @@
+ 	.fb_setcolreg	= xxxfb_setcolreg,
+ 	.fb_blank	= xxxfb_blank,
+ 	.fb_pan_display	= xxxfb_pan_display,
++	.fb_fillrect	= xxxfb_fillrect, 	/* Needed !!! */
++	.fb_copyarea	= xxxfb_copyarea,	/* Needed !!! */
++	.fb_imageblit	= xxxfb_imageblit,	/* Needed !!! */
+-	.fb_fillrect	= xxxfb_fillrect,	/* Needed !!!   */
+-	.fb_copyarea	= xxxfb_copyarea,	/* Obsolete     */
+-	.fb_imageblit	= xxxfb_imageblit,	/* Needed !!!   */
+ 	.fb_cursor	= xxxfb_cursor,		/* Optional !!! */
+ 	.fb_sync	= xxxfb_sync,
+ 	.fb_ioctl	= xxxfb_ioctl,
+--- b/include/linux/fb.h
++++ a/include/linux/fb.h
+@@ -262,7 +262,7 @@
+ 
+ 	/* Draws a rectangle */
+ 	void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect);
++	/* Copy data from area to another */
+-	/* Copy data from area to another. Obsolete. */
+ 	void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region);
+ 	/* Draws a image to the display */
+ 	void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image);
diff --git a/0301-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch b/0302-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
index c12688800eab..c12688800eab 100644
--- a/0301-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
+++ b/0302-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
diff --git a/0302-revert-fbcon-remove-no-op-fbcon_set_origin.patch b/0303-revert-fbcon-remove-no-op-fbcon_set_origin.patch
index 6491c541e883..6491c541e883 100644
--- a/0302-revert-fbcon-remove-no-op-fbcon_set_origin.patch
+++ b/0303-revert-fbcon-remove-no-op-fbcon_set_origin.patch
diff --git a/0303-revert-fbcon-remove-soft-scrollback-code.patch b/0304-revert-fbcon-remove-soft-scrollback-code.patch
index 4f9735447f37..4f9735447f37 100644
--- a/0303-revert-fbcon-remove-soft-scrollback-code.patch
+++ b/0304-revert-fbcon-remove-soft-scrollback-code.patch
diff --git a/0999-acs.gitpatch b/0999-acs.gitpatch
index 401b27c13f1c..e075ec1d3974 100644
--- a/0999-acs.gitpatch
+++ b/0999-acs.gitpatch
@@ -1,28 +1,27 @@
 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 1396fd2..3c0ede4 100644
+index 2fba824..a797d74 100644
 --- a/Documentation/admin-guide/kernel-parameters.txt
 +++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -3892,6 +3892,15 @@
-                nomsi           [MSI] If the PCI_MSI kernel config parameter is
-                                enabled, this kernel boot option can be used to
-                                disable the use of MSI interrupts system-wide.
-+               pcie_acs_override=
-+                                       [PCIE] Override missing PCIe ACS support for:
-+                               downstream
-+                                       All downstream ports - full ACS capabilities
-+                               multfunction
-+                                       All multifunction devices - multifunction ACS subset
-+                               id:nnnn:nnnn
-+                                       Specfic device - full ACS capabilities
-+                                       Specified as vid:did (vendor/device ID) in hex
-                noioapicquirk   [APIC] Disable all boot interrupt quirks.
-                                Safety option to keep boot IRQs enabled. This
-                                should never be necessary.
+@@ -3922,6 +3922,14 @@
+ 		nomsi		[MSI] If the PCI_MSI kernel config parameter is
+ 				enabled, this kernel boot option can be used to
+ 				disable the use of MSI interrupts system-wide.
++                pci_acs_override [PCIE] Override missing PCIe ACS support for:
++                                downstream
++                                        All downstream ports - full ACS capabilities
++                                multifunction
++                                        Add multifunction devices - multifunction ACS subset
++                                id:nnnn:nnnn
++                                        Specific device - full ACS capabilities
++                                        Specified as vid:did (vendor/device ID) in hex
+ 		noioapicquirk	[APIC] Disable all boot interrupt quirks.
+ 				Safety option to keep boot IRQs enabled. This
+ 				should never be necessary.
 diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index 4537d1e..c4f01fe 100644
+index 003950c..d3bb542 100644
 --- a/drivers/pci/quirks.c
 +++ b/drivers/pci/quirks.c
-@@ -193,6 +193,106 @@ static int __init pci_apply_final_quirks(void)
+@@ -193,6 +193,107 @@ static int __init pci_apply_final_quirks(void)
  }
  fs_initcall_sync(pci_apply_final_quirks);
  
@@ -34,6 +33,7 @@ index 4537d1e..c4f01fe 100644
 +       unsigned short vendor;
 +       unsigned short device;
 +};
++
 +static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
 +static u8 max_acs_id;
 +
@@ -129,12 +129,12 @@ index 4537d1e..c4f01fe 100644
  /*
   * Decoding should be disabled for a PCI device during BAR sizing to avoid
   * conflict. But doing so may cause problems on host bridge and perhaps other
-@@ -4949,6 +5049,8 @@ static const struct pci_dev_acs_enabled {
-        { PCI_VENDOR_ID_NXP, 0x8d9b, pci_quirk_nxp_rp_acs },
-        /* Zhaoxin Root/Downstream Ports */
-        { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
-+       /* allow acs for any */
-+       { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
-        { 0 }
+@@ -4950,6 +5051,8 @@ static const struct pci_dev_acs_enabled {
+ 	{ PCI_VENDOR_ID_NXP, 0x8d9b, pci_quirk_nxp_rp_acs },
+ 	/* Zhaoxin Root/Downstream Ports */
+ 	{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
++        /* allow acs for any */
++        { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
+ 	{ 0 }
  };
  
diff --git a/PKGBUILD b/PKGBUILD
index 83921d5466ac..fa62c470252d 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -12,9 +12,9 @@
 pkgbase=linux-acs-manjaro
 pkgname=('linux-acs-manjaro' 'linux-acs-manjaro-headers')
 _kernelname=-ACS-MANJARO
-_basekernel=5.15
-_basever=515
-pkgver=5.15.16
+_basekernel=5.16
+_basever=516
+pkgver=5.16.2
 pkgrel=1
 arch=('x86_64')
 url="https://www.kernel.org/"
@@ -37,25 +37,15 @@ source=("https://www.kernel.org/pub/linux/kernel/v5.x/linux-${_basekernel}.tar.x
         'config'
         # ARCH Patches
         '0001-ZEN-Add-sysctl-and-CONFIG-to-disallow-unprivileged-CLONE_NEWUSER.patch'
-        '0002-PCI_Add_more_NVIDIA_controllers_to_the_MSI_masking_quirk.patch'
-        '0003-iommu_intel_do_deep_dma-unmapping_to_avoid_kernel-flooding.patch'
-        '0004-cpufreq_intel_pstate_ITMT_support_for_overclocked_system.patch'
-        '0005-Bluetooth_btintel_Fix_bdaddress_comparison_with_garbage_value.patch'
-        '0006-lg-laptop_Recognize_more_models.patch'
+        '0002-Btintel_Fix_bdaddress_comparison_with_garbage_value.patch'
         # MANJARO Patches
         '0101-i2c-nuvoton-nc677x-hwmon-driver.patch'
-#        '0102-iomap-iomap_bmap-should-accept-unwritten-maps.patch'
-        '0103-futex.patch' # https://github.com/sirlucjan/kernel-patches
-        '0104-revert-xhci-Add-support-for-Renesas-controller-with-memory.patch'
         '0105-quirk-kernel-org-bug-210681-firmware_rome_error.patch'
-        '0108-drm_i915_Add_workaround_numbers_to_GEN7_COMMON_SLICE_CHICKEN1_whitelisting.patch::https://patchwork.freedesktop.org/patch/463650/raw/'
-        # Lenovo + AMD
-        '0201-lenovo-wmi2.patch'
-        # other patches
         # Bootsplash
-        '0301-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch'        
-        '0302-revert-fbcon-remove-no-op-fbcon_set_origin.patch'
-        '0303-revert-fbcon-remove-soft-scrollback-code.patch'
+        '0301-revert-garbage-collect-fbdev-scrolling-acceleration.patch'
+        '0302-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch'
+        '0303-revert-fbcon-remove-no-op-fbcon_set_origin.patch'
+        '0304-revert-fbcon-remove-soft-scrollback-code.patch'
         '0401-bootsplash.patch'
         '0402-bootsplash.patch'
         '0403-bootsplash.patch'
@@ -69,22 +59,16 @@ source=("https://www.kernel.org/pub/linux/kernel/v5.x/linux-${_basekernel}.tar.x
         '0411-bootsplash.patch'
         '0412-bootsplash.patch'
         '0413-bootsplash.gitpatch'
+        # ACS override patch
         '0999-acs.gitpatch')
-sha256sums=('57b2cf6991910e3b67a1b3490022e8a0674b6965c74c12da1e99d138d1991ee8'
-            '0817171996521675b3c1130568503f08d8b1672c955cc842200a21bf5914cd95'
-            '93320dbe5928e51fb777a4f13dd9a7364eb150d7983073f7dc159e89a6ffa747'
+sha256sums=('027d7e8988bb69ac12ee92406c3be1fe13f990b1ca2249e226225cd1573308bb'
+            '3a09c2f1ad410c09cf03921abeed1a6ca7c38138fb508171ee673d429d179171'
+            'cb2d729cc20743014d9e3bd08facb9f5bdd19d9fa89014f415c61b4a6eb78e97'
             '986f8d802f37b72a54256f0ab84da83cb229388d58c0b6750f7c770818a18421'
-            'e2823eff3355b7c88a3fa327ea2f84f23cbd36569e0a5f0f76599023f63a52ca'
-            'ce53090a4572cd6162d22225113082f7e4df5028a1230529d170460e26dcf849'
-            'ab0360eac59329eb84f028c2f402ee4a17e4b3dfacb7957355e6178d35af87b9'
-            '76701599bbafa49b90ccb073ef29ce2dc3731566e8fa852bd1e9e7796e184754'
-            'a2a0a0542055a6a921542fbb05cedb6eb6f3d3fb0c038bfb2304bfd3931a0f71'
+            'b89188b1bc3516d54965dd36def6a2af3d81379e53ff7e527bbd91f77c6f191b'
             '7823d7488f42bc4ed7dfae6d1014dbde679d8b862c9a3697a39ba0dae5918978'
-            '844e66a95d7df754c55ac2f1ce7e215b1e56e20ca095462d926a993d557b20e0'
-            'd9330ea593829a6ef3b824db9570253280cbff7da2b4beb47cbc037824d1a29b'
             '5e804e1f241ce542f3f0e83d274ede6aa4b0539e510fb9376f8106e8732ce69b'
-            'e8e6120035977903a7117ba215809b9b162b64a789848107513f219180baaada'
-            '1d58ef2991c625f6f0eb33b4cb8303932f53f1c4694e42bae24c9cd36d2ad013'
+            '365d4225a7db60bd064ebbc34ce0ae582a0c378ad6c4cec7960a5ae4641a6757'
             '2b11905b63b05b25807dd64757c779da74dd4c37e36d3f7a46485b1ee5a9d326'
             '94a8538251ad148f1025cc3de446ce64f73dc32b01815426fb159c722e8fa5bc'
             '1f18c5c10a3c63e41ecd05ad34cd9f6653ba96e9f1049ce2b7bb6da2578ae710'
@@ -101,7 +85,7 @@ sha256sums=('57b2cf6991910e3b67a1b3490022e8a0674b6965c74c12da1e99d138d1991ee8'
             '27471eee564ca3149dd271b0817719b5565a9594dc4d884fe3dc51a5f03832bc'
             '60e295601e4fb33d9bf65f198c54c7eb07c0d1e91e2ad1e0dd6cd6e142cb266d'
             '035ea4b2a7621054f4560471f45336b981538a40172d8f17285910d4e0e0b3ef'
-            '6d6b327ec7c7798f628f98ab964f4457d3cf043bad2632eb8f27548478a83cc1')
+            '2542b5cea79ab5817ce3d30c54acd045966b9c14587bfb0b2f50d473da48a1d5')
 
 prepare() {
   cd "linux-${_basekernel}"
@@ -237,6 +221,9 @@ package_linux-acs-manjaro-headers() {
   # add objtool for external module building and enabled VALIDATION_STACK option
   install -Dt "${_builddir}/tools/objtool" tools/objtool/objtool
 
+  # required when DEBUG_INFO_BTF_MODULES is enabled
+  install -Dt "${_builddir}/tools/bpf/resolve_btfids" tools/bpf/resolve_btfids/resolve_btfids
+
   # remove unneeded architectures
   local _arch
   for _arch in "${_builddir}"/arch/*/; do
diff --git a/config b/config
index f2b65cab7cbf..73690e4abb01 100644
--- a/config
+++ b/config
@@ -1,6 +1,6 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/x86 5.15.15-1 Kernel Configuration
+# Linux/x86 5.16.0-1 Kernel Configuration
 #
 CONFIG_CC_VERSION_TEXT="gcc (GCC) 11.1.0"
 CONFIG_CC_IS_GCC=y
@@ -121,6 +121,7 @@ CONFIG_BPF_JIT_DEFAULT_ON=y
 CONFIG_BPF_LSM=y
 # end of BPF subsystem
 
+CONFIG_PREEMPT_BUILD=y
 # CONFIG_PREEMPT_NONE is not set
 # CONFIG_PREEMPT_VOLUNTARY is not set
 CONFIG_PREEMPT=y
@@ -191,6 +192,7 @@ CONFIG_UCLAMP_BUCKETS_COUNT=5
 CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
 CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
 CONFIG_CC_HAS_INT128=y
+CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5"
 CONFIG_ARCH_SUPPORTS_INT128=y
 CONFIG_NUMA_BALANCING=y
 CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
@@ -411,6 +413,7 @@ CONFIG_NR_CPUS_RANGE_BEGIN=2
 CONFIG_NR_CPUS_RANGE_END=512
 CONFIG_NR_CPUS_DEFAULT=64
 CONFIG_NR_CPUS=320
+CONFIG_SCHED_CLUSTER=y
 CONFIG_SCHED_SMT=y
 CONFIG_SCHED_MC=y
 CONFIG_SCHED_MC_PRIO=y
@@ -511,6 +514,7 @@ CONFIG_LEGACY_VSYSCALL_XONLY=y
 # CONFIG_LEGACY_VSYSCALL_NONE is not set
 # CONFIG_CMDLINE_BOOL is not set
 CONFIG_MODIFY_LDT_SYSCALL=y
+# CONFIG_STRICT_SIGALTSTACK_SIZE is not set
 CONFIG_HAVE_LIVEPATCH=y
 # CONFIG_LIVEPATCH is not set
 # end of Processor type and features
@@ -712,6 +716,7 @@ CONFIG_KVM_AMD=m
 CONFIG_KVM_AMD_SEV=y
 CONFIG_KVM_XEN=y
 CONFIG_KVM_MMU_AUDIT=y
+CONFIG_KVM_EXTERNAL_WRITE_TRACKING=y
 CONFIG_AS_AVX512=y
 CONFIG_AS_SHA1_NI=y
 CONFIG_AS_SHA256_NI=y
@@ -740,6 +745,7 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_OPTPROBES=y
 CONFIG_HAVE_KPROBES_ON_FTRACE=y
+CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE=y
 CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
 CONFIG_HAVE_NMI=y
 CONFIG_TRACE_IRQFLAGS_SUPPORT=y
@@ -834,6 +840,7 @@ CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y
 CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
 CONFIG_ARCH_HAS_ELFCORE_COMPAT=y
 CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH=y
+CONFIG_DYNAMIC_SIGFRAME=y
 
 #
 # GCOV-based kernel profiling
@@ -979,10 +986,10 @@ CONFIG_SPARSEMEM_VMEMMAP=y
 CONFIG_HAVE_FAST_GUP=y
 CONFIG_NUMA_KEEP_MEMINFO=y
 CONFIG_MEMORY_ISOLATION=y
+CONFIG_EXCLUSIVE_SYSTEM_RAM=y
 CONFIG_HAVE_BOOTMEM_INFO_NODE=y
 CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_SPARSE=y
 CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
 CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
 CONFIG_MEMORY_HOTREMOVE=y
@@ -1206,6 +1213,8 @@ CONFIG_BRIDGE_NETFILTER=m
 # Core Netfilter Configuration
 #
 CONFIG_NETFILTER_INGRESS=y
+CONFIG_NETFILTER_EGRESS=y
+CONFIG_NETFILTER_SKIP_EGRESS=y
 CONFIG_NETFILTER_NETLINK=m
 CONFIG_NETFILTER_FAMILY_BRIDGE=y
 CONFIG_NETFILTER_FAMILY_ARP=y
@@ -1604,10 +1613,11 @@ CONFIG_NET_DSA_TAG_DSA=m
 CONFIG_NET_DSA_TAG_EDSA=m
 CONFIG_NET_DSA_TAG_MTK=m
 CONFIG_NET_DSA_TAG_KSZ=m
-CONFIG_NET_DSA_TAG_RTL4_A=m
 CONFIG_NET_DSA_TAG_OCELOT=m
 CONFIG_NET_DSA_TAG_OCELOT_8021Q=m
 CONFIG_NET_DSA_TAG_QCA=m
+CONFIG_NET_DSA_TAG_RTL4_A=m
+CONFIG_NET_DSA_TAG_RTL8_4=m
 CONFIG_NET_DSA_TAG_LAN9303=m
 CONFIG_NET_DSA_TAG_SJA1105=m
 CONFIG_NET_DSA_TAG_TRAILER=m
@@ -1963,7 +1973,7 @@ CONFIG_AF_RXRPC_DEBUG=y
 CONFIG_RXKAD=y
 CONFIG_AF_KCM=m
 CONFIG_STREAM_PARSER=y
-CONFIG_MCTP=m
+# CONFIG_MCTP is not set
 CONFIG_FIB_RULES=y
 CONFIG_WIRELESS=y
 CONFIG_WIRELESS_EXT=y
@@ -2063,7 +2073,7 @@ CONFIG_LWTUNNEL_BPF=y
 CONFIG_DST_CACHE=y
 CONFIG_GRO_CELLS=y
 CONFIG_SOCK_VALIDATE_XMIT=y
-CONFIG_NET_SELFTESTS=m
+CONFIG_NET_SELFTESTS=y
 CONFIG_NET_SOCK_MSG=y
 CONFIG_NET_DEVLINK=y
 CONFIG_PAGE_POOL=y
@@ -2258,6 +2268,7 @@ CONFIG_FW_CFG_SYSFS=m
 # CONFIG_FW_CFG_SYSFS_CMDLINE is not set
 CONFIG_SYSFB=y
 # CONFIG_SYSFB_SIMPLEFB is not set
+CONFIG_CS_DSP=m
 CONFIG_GOOGLE_FIRMWARE=y
 # CONFIG_GOOGLE_SMI is not set
 CONFIG_GOOGLE_COREBOOT_TABLE=m
@@ -2334,7 +2345,7 @@ CONFIG_MTD_BLOCK=m
 # CONFIG_SM_FTL is not set
 # CONFIG_MTD_OOPS is not set
 # CONFIG_MTD_SWAP is not set
-CONFIG_MTD_PARTITIONED_MASTER=y
+# CONFIG_MTD_PARTITIONED_MASTER is not set
 
 #
 # RAM/ROM/Flash chip drivers
@@ -2346,7 +2357,7 @@ CONFIG_MTD_MAP_BANK_WIDTH_2=y
 CONFIG_MTD_MAP_BANK_WIDTH_4=y
 CONFIG_MTD_CFI_I1=y
 CONFIG_MTD_CFI_I2=y
-# CONFIG_MTD_RAM is not set
+CONFIG_MTD_RAM=m
 CONFIG_MTD_ROM=m
 # CONFIG_MTD_ABSENT is not set
 # end of RAM/ROM/Flash chip drivers
@@ -2357,7 +2368,7 @@ CONFIG_MTD_ROM=m
 # CONFIG_MTD_COMPLEX_MAPPINGS is not set
 # CONFIG_MTD_PHYSMAP is not set
 # CONFIG_MTD_INTEL_VR_NOR is not set
-# CONFIG_MTD_PLATRAM is not set
+CONFIG_MTD_PLATRAM=m
 # end of Mapping drivers for chip access
 
 #
@@ -2370,9 +2381,7 @@ CONFIG_MTD_ROM=m
 # CONFIG_MTD_SST25L is not set
 # CONFIG_MTD_SLRAM is not set
 CONFIG_MTD_PHRAM=m
-CONFIG_MTD_MTDRAM=m
-CONFIG_MTDRAM_TOTAL_SIZE=4096
-CONFIG_MTDRAM_ERASE_SIZE=128
+# CONFIG_MTD_MTDRAM is not set
 CONFIG_MTD_BLOCK2MTD=m
 
 #
@@ -2473,7 +2482,6 @@ CONFIG_ZRAM_WRITEBACK=y
 # CONFIG_ZRAM_MEMORY_TRACKING is not set
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
-CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 # CONFIG_DRBD_FAULT_INJECTION is not set
 CONFIG_BLK_DEV_NBD=m
@@ -2576,6 +2584,7 @@ CONFIG_INTEL_MEI=m
 CONFIG_INTEL_MEI_ME=m
 CONFIG_INTEL_MEI_TXE=m
 CONFIG_INTEL_MEI_HDCP=m
+# CONFIG_INTEL_MEI_PXP is not set
 CONFIG_VMWARE_VMCI=m
 CONFIG_GENWQE=m
 CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY=0
@@ -2686,6 +2695,7 @@ CONFIG_SCSI_UFS_CDNS_PLATFORM=m
 CONFIG_SCSI_UFS_BSG=y
 CONFIG_SCSI_UFS_CRYPTO=y
 CONFIG_SCSI_UFS_HPB=y
+# CONFIG_SCSI_UFS_HWMON is not set
 CONFIG_SCSI_HPTIOP=m
 CONFIG_SCSI_BUSLOGIC=m
 CONFIG_SCSI_FLASHPOINT=y
@@ -2903,6 +2913,7 @@ CONFIG_DM_SWITCH=m
 CONFIG_DM_LOG_WRITES=m
 CONFIG_DM_INTEGRITY=m
 CONFIG_DM_ZONED=m
+CONFIG_DM_AUDIT=y
 CONFIG_TARGET_CORE=m
 CONFIG_TCM_IBLOCK=m
 CONFIG_TCM_FILEIO=m
@@ -2959,6 +2970,7 @@ CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_BAREUDP=m
 CONFIG_GTP=m
+# CONFIG_AMT is not set
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -3079,6 +3091,9 @@ CONFIG_AMD_XGBE_HAVE_ECC=y
 CONFIG_NET_VENDOR_AQUANTIA=y
 CONFIG_AQTION=m
 CONFIG_NET_VENDOR_ARC=y
+CONFIG_NET_VENDOR_ASIX=y
+CONFIG_SPI_AX88796C=y
+# CONFIG_SPI_AX88796C_COMPRESSION is not set
 CONFIG_NET_VENDOR_ATHEROS=y
 CONFIG_ATL2=m
 CONFIG_ATL1=m
@@ -3189,6 +3204,7 @@ CONFIG_I40E_DCB=y
 CONFIG_IAVF=m
 CONFIG_I40EVF=m
 CONFIG_ICE=m
+CONFIG_ICE_SWITCHDEV=y
 CONFIG_FM10K=m
 CONFIG_IGC=m
 CONFIG_NET_VENDOR_MICROSOFT=y
@@ -3383,10 +3399,10 @@ CONFIG_SKFP=m
 # CONFIG_HIPPI is not set
 CONFIG_NET_SB1000=m
 CONFIG_PHYLINK=m
-CONFIG_PHYLIB=m
+CONFIG_PHYLIB=y
 CONFIG_SWPHY=y
 CONFIG_LED_TRIGGER_PHY=y
-CONFIG_FIXED_PHY=m
+CONFIG_FIXED_PHY=y
 CONFIG_SFP=m
 
 #
@@ -3438,15 +3454,11 @@ CONFIG_DP83869_PHY=m
 CONFIG_VITESSE_PHY=m
 CONFIG_XILINX_GMII2RGMII=m
 CONFIG_MICREL_KS8995MA=m
-
-#
-# MCTP Device Drivers
-#
-CONFIG_MDIO_DEVICE=m
-CONFIG_MDIO_BUS=m
-CONFIG_FWNODE_MDIO=m
-CONFIG_ACPI_MDIO=m
-CONFIG_MDIO_DEVRES=m
+CONFIG_MDIO_DEVICE=y
+CONFIG_MDIO_BUS=y
+CONFIG_FWNODE_MDIO=y
+CONFIG_ACPI_MDIO=y
+CONFIG_MDIO_DEVRES=y
 CONFIG_MDIO_BITBANG=m
 CONFIG_MDIO_BCM_UNIMAC=m
 CONFIG_MDIO_CAVIUM=m
@@ -3740,7 +3752,9 @@ CONFIG_MT7663_USB_SDIO_COMMON=m
 CONFIG_MT7663U=m
 CONFIG_MT7663S=m
 CONFIG_MT7915E=m
+CONFIG_MT7921_COMMON=m
 CONFIG_MT7921E=m
+CONFIG_MT7921S=m
 CONFIG_WLAN_VENDOR_MICROCHIP=y
 CONFIG_WILC1000=m
 CONFIG_WILC1000_SDIO=m
@@ -3812,6 +3826,12 @@ CONFIG_RTW88_8723DE=m
 CONFIG_RTW88_8821CE=m
 CONFIG_RTW88_DEBUG=y
 CONFIG_RTW88_DEBUGFS=y
+CONFIG_RTW89=m
+CONFIG_RTW89_CORE=m
+CONFIG_RTW89_PCI=m
+CONFIG_RTW89_8852AE=m
+# CONFIG_RTW89_DEBUGMSG is not set
+# CONFIG_RTW89_DEBUGFS is not set
 CONFIG_WLAN_VENDOR_RSI=y
 CONFIG_RSI_91X=m
 CONFIG_RSI_DEBUGFS=y
@@ -3955,6 +3975,7 @@ CONFIG_KEYBOARD_TWL4030=m
 CONFIG_KEYBOARD_XTKBD=m
 CONFIG_KEYBOARD_CROS_EC=m
 CONFIG_KEYBOARD_MTK_PMIC=m
+CONFIG_KEYBOARD_CYPRESS_SF=y
 CONFIG_INPUT_MOUSE=y
 CONFIG_MOUSE_PS2=m
 CONFIG_MOUSE_PS2_ALPS=y
@@ -4320,6 +4341,7 @@ CONFIG_HVC_DRIVER=y
 CONFIG_HVC_IRQ=y
 CONFIG_HVC_XEN=y
 CONFIG_HVC_XEN_FRONTEND=y
+# CONFIG_RPMSG_TTY is not set
 CONFIG_SERIAL_DEV_BUS=y
 CONFIG_SERIAL_DEV_CTRL_TTYPORT=y
 CONFIG_PRINTER=m
@@ -4333,6 +4355,7 @@ CONFIG_IPMI_PLAT_DATA=y
 CONFIG_IPMI_DEVICE_INTERFACE=m
 CONFIG_IPMI_SI=m
 CONFIG_IPMI_SSIF=m
+# CONFIG_IPMI_IPMB is not set
 CONFIG_IPMI_WATCHDOG=m
 CONFIG_IPMI_POWEROFF=m
 CONFIG_IPMB_DEVICE_INTERFACE=m
@@ -4592,6 +4615,10 @@ CONFIG_PINCTRL_MCP23S08_I2C=m
 CONFIG_PINCTRL_MCP23S08_SPI=m
 CONFIG_PINCTRL_MCP23S08=m
 CONFIG_PINCTRL_SX150X=y
+
+#
+# Intel pinctrl drivers
+#
 CONFIG_PINCTRL_BAYTRAIL=y
 CONFIG_PINCTRL_CHERRYVIEW=y
 CONFIG_PINCTRL_LYNXPOINT=y
@@ -4610,6 +4637,7 @@ CONFIG_PINCTRL_LAKEFIELD=y
 CONFIG_PINCTRL_LEWISBURG=y
 CONFIG_PINCTRL_SUNRISEPOINT=y
 CONFIG_PINCTRL_TIGERLAKE=y
+# end of Intel pinctrl drivers
 
 #
 # Renesas pinctrl drivers
@@ -4942,6 +4970,7 @@ CONFIG_SENSORS_MAX1668=m
 CONFIG_SENSORS_MAX197=m
 CONFIG_SENSORS_MAX31722=m
 CONFIG_SENSORS_MAX31730=m
+CONFIG_SENSORS_MAX6620=y
 CONFIG_SENSORS_MAX6621=m
 CONFIG_SENSORS_MAX6639=m
 CONFIG_SENSORS_MAX6642=m
@@ -5337,7 +5366,6 @@ CONFIG_MFD_TPS65910=y
 CONFIG_MFD_TPS65912=m
 CONFIG_MFD_TPS65912_I2C=m
 CONFIG_MFD_TPS65912_SPI=m
-CONFIG_MFD_TPS80031=y
 CONFIG_TWL4030_CORE=y
 CONFIG_MFD_TWL4030_AUDIO=y
 CONFIG_TWL6040_CORE=y
@@ -5457,7 +5485,6 @@ CONFIG_REGULATOR_TPS6524X=m
 CONFIG_REGULATOR_TPS6586X=m
 CONFIG_REGULATOR_TPS65910=m
 CONFIG_REGULATOR_TPS65912=m
-CONFIG_REGULATOR_TPS80031=m
 CONFIG_REGULATOR_TWL4030=m
 CONFIG_REGULATOR_WM831X=m
 CONFIG_REGULATOR_WM8350=m
@@ -5497,12 +5524,15 @@ CONFIG_IR_TTUSBIR=m
 CONFIG_RC_LOOPBACK=m
 CONFIG_IR_SERIAL=m
 CONFIG_IR_SERIAL_TRANSMITTER=y
-CONFIG_IR_SIR=m
 CONFIG_RC_XBOX_DVD=m
 CONFIG_IR_TOY=m
 CONFIG_CEC_CORE=y
 CONFIG_CEC_NOTIFIER=y
 CONFIG_CEC_PIN=y
+
+#
+# CEC support
+#
 CONFIG_MEDIA_CEC_RC=y
 # CONFIG_CEC_PIN_ERROR_INJ is not set
 CONFIG_MEDIA_CEC_SUPPORT=y
@@ -5513,6 +5543,8 @@ CONFIG_CEC_SECO=m
 CONFIG_CEC_SECO_RC=y
 CONFIG_USB_PULSE8_CEC=m
 CONFIG_USB_RAINSHADOW_CEC=m
+# end of CEC support
+
 CONFIG_MEDIA_SUPPORT=m
 CONFIG_MEDIA_SUPPORT_FILTER=y
 CONFIG_MEDIA_SUBDRV_AUTOSELECT=y
@@ -5556,10 +5588,6 @@ CONFIG_VIDEOBUF_VMALLOC=m
 #
 CONFIG_MEDIA_CONTROLLER_DVB=y
 CONFIG_MEDIA_CONTROLLER_REQUEST_API=y
-
-#
-# Please notice that the enabled Media controller Request API is EXPERIMENTAL
-#
 # end of Media controller options
 
 #
@@ -5935,6 +5963,7 @@ CONFIG_VIDEO_M52790=m
 CONFIG_VIDEO_APTINA_PLL=m
 CONFIG_VIDEO_CCS_PLL=m
 CONFIG_VIDEO_HI556=m
+# CONFIG_VIDEO_HI846 is not set
 CONFIG_VIDEO_IMX208=m
 CONFIG_VIDEO_IMX214=m
 CONFIG_VIDEO_IMX219=m
@@ -5966,6 +5995,7 @@ CONFIG_VIDEO_OV9640=m
 CONFIG_VIDEO_OV9650=m
 CONFIG_VIDEO_OV9734=m
 CONFIG_VIDEO_OV13858=m
+# CONFIG_VIDEO_OV13B10 is not set
 CONFIG_VIDEO_VS6624=m
 CONFIG_VIDEO_MT9M001=m
 CONFIG_VIDEO_MT9M032=m
@@ -6730,6 +6760,9 @@ CONFIG_SND_SOC_AMD_RV_RT5682_MACH=m
 CONFIG_SND_SOC_AMD_RENOIR=m
 CONFIG_SND_SOC_AMD_RENOIR_MACH=m
 CONFIG_SND_SOC_AMD_ACP5x=m
+# CONFIG_SND_SOC_AMD_VANGOGH_MACH is not set
+# CONFIG_SND_SOC_AMD_ACP6x is not set
+# CONFIG_SND_SOC_AMD_ACP_COMMON is not set
 CONFIG_SND_ATMEL_SOC=m
 # CONFIG_SND_BCM63XX_I2S_WHISTLER is not set
 CONFIG_SND_DESIGNWARE_I2S=m
@@ -6820,6 +6853,7 @@ CONFIG_SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH=m
 CONFIG_SND_SOC_INTEL_SOF_RT5682_MACH=m
 CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m
 CONFIG_SND_SOC_INTEL_SOF_PCM512x_MACH=m
+# CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH is not set
 CONFIG_SND_SOC_INTEL_CML_LP_DA7219_MAX98357A_MACH=m
 CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
 CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
@@ -6911,6 +6945,8 @@ CONFIG_SND_SOC_CS35L33=m
 CONFIG_SND_SOC_CS35L34=m
 CONFIG_SND_SOC_CS35L35=m
 CONFIG_SND_SOC_CS35L36=m
+# CONFIG_SND_SOC_CS35L41_SPI is not set
+# CONFIG_SND_SOC_CS35L41_I2C is not set
 CONFIG_SND_SOC_CS42L42=m
 CONFIG_SND_SOC_CS42L51=m
 CONFIG_SND_SOC_CS42L51_I2C=m
@@ -6951,6 +6987,7 @@ CONFIG_SND_SOC_MAX98357A=m
 CONFIG_SND_SOC_MAX98504=m
 CONFIG_SND_SOC_MAX9867=m
 CONFIG_SND_SOC_MAX98927=m
+# CONFIG_SND_SOC_MAX98520 is not set
 CONFIG_SND_SOC_MAX98373=m
 CONFIG_SND_SOC_MAX98373_I2C=m
 CONFIG_SND_SOC_MAX98373_SDW=m
@@ -7003,6 +7040,7 @@ CONFIG_SND_SOC_RT5677_SPI=m
 CONFIG_SND_SOC_RT5682=m
 CONFIG_SND_SOC_RT5682_I2C=m
 CONFIG_SND_SOC_RT5682_SDW=m
+CONFIG_SND_SOC_RT5682S=m
 CONFIG_SND_SOC_RT700=m
 CONFIG_SND_SOC_RT700_SDW=m
 CONFIG_SND_SOC_RT711=m
@@ -7011,6 +7049,7 @@ CONFIG_SND_SOC_RT711_SDCA_SDW=m
 CONFIG_SND_SOC_RT715=m
 CONFIG_SND_SOC_RT715_SDW=m
 CONFIG_SND_SOC_RT715_SDCA_SDW=m
+# CONFIG_SND_SOC_RT9120 is not set
 # CONFIG_SND_SOC_SDW_MOCKUP is not set
 CONFIG_SND_SOC_SGTL5000=m
 CONFIG_SND_SOC_SI476X=m
@@ -7094,6 +7133,7 @@ CONFIG_SND_SOC_MT6660=m
 CONFIG_SND_SOC_NAU8315=m
 CONFIG_SND_SOC_NAU8540=m
 CONFIG_SND_SOC_NAU8810=m
+# CONFIG_SND_SOC_NAU8821 is not set
 CONFIG_SND_SOC_NAU8822=m
 CONFIG_SND_SOC_NAU8824=m
 CONFIG_SND_SOC_NAU8825=m
@@ -7167,6 +7207,7 @@ CONFIG_HID_KYE=m
 CONFIG_HID_UCLOGIC=m
 CONFIG_HID_WALTOP=m
 CONFIG_HID_VIEWSONIC=m
+# CONFIG_HID_XIAOMI is not set
 CONFIG_HID_GYRATION=m
 CONFIG_HID_ICADE=m
 CONFIG_HID_ITE=m
@@ -7190,6 +7231,7 @@ CONFIG_HID_REDRAGON=m
 CONFIG_HID_MICROSOFT=m
 CONFIG_HID_MONTEREY=m
 CONFIG_HID_MULTITOUCH=m
+# CONFIG_HID_NINTENDO is not set
 CONFIG_HID_NTI=m
 CONFIG_HID_NTRIG=m
 CONFIG_HID_ORTEK=m
@@ -7968,7 +8010,6 @@ CONFIG_RTC_DRV_BQ32K=m
 CONFIG_RTC_DRV_PALMAS=m
 CONFIG_RTC_DRV_TPS6586X=m
 CONFIG_RTC_DRV_TPS65910=m
-CONFIG_RTC_DRV_TPS80031=m
 CONFIG_RTC_DRV_RC5T583=m
 CONFIG_RTC_DRV_S35390A=m
 CONFIG_RTC_DRV_FM3130=m
@@ -8141,6 +8182,7 @@ CONFIG_ACRN_HSM=m
 CONFIG_VIRTIO=y
 CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS=y
 CONFIG_VIRTIO_PCI_LIB=m
+CONFIG_VIRTIO_PCI_LIB_LEGACY=m
 CONFIG_VIRTIO_MENU=y
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_PCI_LEGACY=y
@@ -8161,6 +8203,7 @@ CONFIG_IFCVF=m
 CONFIG_MLX5_VDPA=y
 CONFIG_MLX5_VDPA_NET=m
 CONFIG_VP_VDPA=m
+# CONFIG_ALIBABA_ENI_VDPA is not set
 CONFIG_VHOST_IOTLB=m
 CONFIG_VHOST_RING=m
 CONFIG_VHOST=m
@@ -8198,6 +8241,7 @@ CONFIG_XEN_GNTDEV_DMABUF=y
 CONFIG_XEN_GRANT_DEV_ALLOC=m
 CONFIG_XEN_GRANT_DMA_ALLOC=y
 CONFIG_SWIOTLB_XEN=y
+CONFIG_XEN_PCI_STUB=y
 CONFIG_XEN_PCIDEV_BACKEND=m
 CONFIG_XEN_PVCALLS_FRONTEND=m
 CONFIG_XEN_PVCALLS_BACKEND=y
@@ -8228,7 +8272,6 @@ CONFIG_RTL8192E=m
 CONFIG_RTL8723BS=m
 CONFIG_R8712U=m
 CONFIG_R8188EU=m
-CONFIG_88EU_AP_MODE=y
 CONFIG_RTS5208=m
 CONFIG_VT6655=m
 CONFIG_VT6656=m
@@ -8345,6 +8388,7 @@ CONFIG_WMI_BMOF=m
 CONFIG_HUAWEI_WMI=m
 CONFIG_MXM_WMI=m
 CONFIG_PEAQ_WMI=m
+CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m
 CONFIG_XIAOMI_WMI=m
 CONFIG_GIGABYTE_WMI=m
 CONFIG_ACERHDF=m
@@ -8394,6 +8438,7 @@ CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y
 CONFIG_THINKPAD_ACPI_VIDEO=y
 CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
 CONFIG_THINKPAD_LMI=m
+CONFIG_X86_PLATFORM_DRIVERS_INTEL=y
 CONFIG_INTEL_ATOMISP2_PDX86=y
 CONFIG_INTEL_ATOMISP2_LED=m
 CONFIG_INTEL_SAR_INT1092=m
@@ -8420,6 +8465,7 @@ CONFIG_INTEL_INT0002_VGPIO=m
 CONFIG_INTEL_OAKTRAIL=m
 CONFIG_INTEL_BXTWC_PMIC_TMU=m
 CONFIG_INTEL_CHTDC_TI_PWRBTN=m
+# CONFIG_INTEL_ISHTP_ECLITE is not set
 CONFIG_INTEL_MRFLD_PWRBTN=m
 CONFIG_INTEL_PUNIT_IPC=m
 CONFIG_INTEL_RST=m
@@ -8429,6 +8475,7 @@ CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
 CONFIG_MSI_LAPTOP=m
 CONFIG_MSI_WMI=m
 CONFIG_PCENGINES_APU2=m
+# CONFIG_BARCO_P50_GPIO is not set
 CONFIG_SAMSUNG_LAPTOP=m
 CONFIG_SAMSUNG_Q10=m
 CONFIG_ACPI_TOSHIBA=m
@@ -8480,6 +8527,7 @@ CONFIG_WILCO_EC_TELEMETRY=m
 CONFIG_MELLANOX_PLATFORM=y
 CONFIG_MLXREG_HOTPLUG=m
 CONFIG_MLXREG_IO=m
+# CONFIG_MLXREG_LC is not set
 CONFIG_SURFACE_PLATFORMS=y
 CONFIG_SURFACE3_WMI=m
 CONFIG_SURFACE_3_BUTTON=m
@@ -8499,14 +8547,6 @@ CONFIG_HAVE_CLK=y
 CONFIG_HAVE_CLK_PREPARE=y
 CONFIG_COMMON_CLK=y
 CONFIG_COMMON_CLK_WM831X=m
-
-#
-# Clock driver for ARM Reference designs
-#
-# CONFIG_ICST is not set
-# CONFIG_CLK_SP810 is not set
-# end of Clock driver for ARM Reference designs
-
 CONFIG_LMK04832=m
 CONFIG_COMMON_CLK_MAX9485=m
 CONFIG_COMMON_CLK_SI5341=m
@@ -8692,6 +8732,10 @@ CONFIG_IIO_TRIGGERED_EVENT=m
 #
 CONFIG_ADIS16201=m
 CONFIG_ADIS16209=m
+# CONFIG_ADXL313_I2C is not set
+# CONFIG_ADXL313_SPI is not set
+# CONFIG_ADXL355_I2C is not set
+# CONFIG_ADXL355_SPI is not set
 CONFIG_ADXL372=m
 CONFIG_ADXL372_SPI=m
 CONFIG_ADXL372_I2C=m
@@ -8840,11 +8884,13 @@ CONFIG_PMS7003=m
 CONFIG_SCD30_CORE=m
 CONFIG_SCD30_I2C=m
 CONFIG_SCD30_SERIAL=m
+# CONFIG_SCD4X is not set
 CONFIG_SENSIRION_SGP30=m
 CONFIG_SENSIRION_SGP40=m
 CONFIG_SPS30=m
 CONFIG_SPS30_I2C=m
 CONFIG_SPS30_SERIAL=m
+# CONFIG_SENSEAIR_SUNRISE_CO2 is not set
 CONFIG_VZ89X=m
 # end of Chemical Sensors
 
@@ -8937,6 +8983,7 @@ CONFIG_AD9523=m
 #
 CONFIG_ADF4350=m
 CONFIG_ADF4371=m
+# CONFIG_ADRF6780 is not set
 # end of Phase-Locked Loop (PLL) frequency synthesizers
 # end of Frequency Synthesizers DDS/PLL
 
@@ -9232,6 +9279,7 @@ CONFIG_TMP117=m
 CONFIG_TSYS01=m
 CONFIG_TSYS02D=m
 CONFIG_MAX31856=m
+# CONFIG_MAX31865 is not set
 # end of Temperature sensors
 
 CONFIG_NTB=m
@@ -9280,7 +9328,13 @@ CONFIG_RESET_TI_SYSCON=m
 CONFIG_GENERIC_PHY=y
 CONFIG_USB_LGM_PHY=m
 CONFIG_PHY_CAN_TRANSCEIVER=m
+
+#
+# PHY drivers for Broadcom platforms
+#
 CONFIG_BCM_KONA_USB2_PHY=m
+# end of PHY drivers for Broadcom platforms
+
 CONFIG_PHY_PXA_28NM_HSIC=m
 CONFIG_PHY_PXA_28NM_USB2=m
 CONFIG_PHY_CPCAP_USB=m
@@ -9680,6 +9734,7 @@ CONFIG_EROFS_FS_XATTR=y
 CONFIG_EROFS_FS_POSIX_ACL=y
 CONFIG_EROFS_FS_SECURITY=y
 CONFIG_EROFS_FS_ZIP=y
+# CONFIG_EROFS_FS_ZIP_LZMA is not set
 CONFIG_VBOXSF_FS=m
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=m
@@ -9838,7 +9893,6 @@ CONFIG_SECURITY_PATH=y
 CONFIG_LSM_MMAP_MIN_ADDR=65536
 CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
 CONFIG_HARDENED_USERCOPY=y
-CONFIG_HARDENED_USERCOPY_FALLBACK=y
 CONFIG_FORTIFY_SOURCE=y
 # CONFIG_STATIC_USERMODEHELPER is not set
 CONFIG_SECURITY_SELINUX=y
@@ -10228,6 +10282,7 @@ CONFIG_XZ_DEC_IA64=y
 CONFIG_XZ_DEC_ARM=y
 CONFIG_XZ_DEC_ARMTHUMB=y
 CONFIG_XZ_DEC_SPARC=y
+# CONFIG_XZ_DEC_MICROLZMA is not set
 CONFIG_XZ_DEC_BCJ=y
 # CONFIG_XZ_DEC_TEST is not set
 CONFIG_DECOMPRESS_GZIP=y
@@ -10586,6 +10641,8 @@ CONFIG_HIST_TRIGGERS=y
 # CONFIG_HIST_TRIGGERS_DEBUG is not set
 # CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
 # CONFIG_SAMPLES is not set
+CONFIG_HAVE_SAMPLE_FTRACE_DIRECT=y
+CONFIG_HAVE_SAMPLE_FTRACE_DIRECT_MULTI=y
 CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
 CONFIG_STRICT_DEVMEM=y
 CONFIG_IO_STRICT_DEVMEM=y
@@ -10631,7 +10688,6 @@ CONFIG_RUNTIME_TESTING_MENU=y
 # CONFIG_LKDTM is not set
 # CONFIG_TEST_MIN_HEAP is not set
 # CONFIG_TEST_DIV64 is not set
-# CONFIG_KPROBES_SANITY_TEST is not set
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_RBTREE_TEST is not set
 # CONFIG_REED_SOLOMON_TEST is not set