summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorblacksky32023-07-16 13:00:46 -0400
committerblacksky32023-07-16 13:00:46 -0400
commitc236c2f62cf9cd2120493943a052917ffb87cec9 (patch)
tree0d7db89fff18c8f5b8b0f3fa15e5420337352d7e
parent64f4d6a3842c57240c0be0eac2934180497c0640 (diff)
downloadaur-c236c2f62cf9cd2120493943a052917ffb87cec9.tar.gz
latest llvm11 release
-rw-r--r--.SRCINFO14
-rw-r--r--PKGBUILD30
-rw-r--r--cuda-version-detection.patch378
-rw-r--r--no-strict-aliasing-DwarfCompileUnit.patch13
-rw-r--r--stack-clash-fixes.patch870
-rw-r--r--utils-benchmark-fix-missing-include.patch21
6 files changed, 11 insertions, 1315 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 504b5e56f2e7..cf2aed5bcdbb 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,5 +1,5 @@
pkgbase = llvm11-minimal
- pkgver = 11.0.0
+ pkgver = 11.1.0
pkgrel = 1
url = https://llvm.org/
arch = x86_64
@@ -21,23 +21,15 @@ pkgbase = llvm11-minimal
makedepends = gcc12-libs
options = staticlibs
options = !lto
- source = https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-11.0.0.tar.gz
+ source = https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-11.1.0.tar.gz
source = https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/refs/tags/v11.0.0.tar.gz
source = llvm-config.h
- source = stack-clash-fixes.patch
source = amdgpu-avoid-an-illegal-operand-in-si-shrink-instr.patch
- source = utils-benchmark-fix-missing-include.patch
- source = no-strict-aliasing-DwarfCompileUnit.patch
- source = cuda-version-detection.patch
source = enable-SSP-and-PIE-by-default.patch
- sha256sums = 8ad4ddbafac4f2c8f2ea523c2c4196f940e8e16f9e635210537582a48622a5d5
+ sha256sums = 53a0719f3f4b0388013cfffd7b10c7d5682eece1929a9553c722348d1f866e79
sha256sums = 6464a722278d37fca783cb505caf44cc8473c22fd22ff6a5d07198bc92059c4f
sha256sums = 597dc5968c695bbdbb0eac9e8eb5117fcd2773bc91edf5ec103ecffffab8bc48
- sha256sums = bdcaa7559223bd42a381086f7cc23fc73f88ebb1966a7c235f897db0f73b7d20
sha256sums = 85b6977005899bc76fcc548e0b6501cae5f50a8ad03060b9f58d03d775323327
- sha256sums = 5f666675fd45848e4c4b0f94068f7648dd9ff88df4a7b19d2a9f2b83ee358a7e
- sha256sums = d1eff24508e35aae6c26a943dbaa3ef5acb60a145b008fd1ef9ac6f6c4faa662
- sha256sums = 757dc5a288f6847d38e320c364d48fb6454aef25514b2346030b623842ac904e
sha256sums = 248a0e8609b00689e82ce5e05e1de58b7c8ae09a35bbb9625e9069e1f13d2fec
pkgname = llvm11-minimal
diff --git a/PKGBUILD b/PKGBUILD
index 4f24002e1b65..aa8cb364ab35 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -12,9 +12,9 @@
pkgbase=llvm11-minimal
pkgname=(llvm11-minimal clang11-minimal llvm11-libs-minimal clang11-libs-minimal spirv-llvm-translator11-minimal)
url='https://llvm.org/'
-pkgver=11.0.0
+pkgver=11.1.0
pkgrel=1
-_pkgver=11.0.0
+spirvllvmver=11.0.0
arch=(x86_64)
license=('custom:Apache 2.0 with LLVM Exception')
makedepends=(cmake ninja zlib zstd libffi libedit ncurses
@@ -22,13 +22,9 @@ makedepends=(cmake ninja zlib zstd libffi libedit ncurses
python-recommonmark gcc12 gcc12-fortran gcc12-libs)
options=(staticlibs !lto) # Getting thousands of test failures with LTO
source=(https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-${pkgver}.tar.gz
- https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/refs/tags/v${pkgver}.tar.gz
+ https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/refs/tags/v${spirvllvmver}.tar.gz
llvm-config.h
- stack-clash-fixes.patch
amdgpu-avoid-an-illegal-operand-in-si-shrink-instr.patch
- utils-benchmark-fix-missing-include.patch
- no-strict-aliasing-DwarfCompileUnit.patch
- cuda-version-detection.patch
enable-SSP-and-PIE-by-default.patch)
# Both ninja & LIT by default use all available cores. this can lead to heavy stress on systems making them unresponsive.
@@ -36,7 +32,7 @@ source=(https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-${pkgver}
# A reasonable value for them to avoid these issues appears to be 75% of available cores.
# NINJAFLAGS and LITFLAGS are env vars that can be used to achieve this. They should be set on command line or in files read by your shell on login (like .bashrc ) .
# example for systems with 24 cores
-# NINJAFLAGS="-j 18 -l 18"
+NINJAFLAGS="-j 18 -l 18"
# LITFLAGS="-j 18"
# NOTE: It's your responbility to validate the value of NINJAFLAGS and LITFLAGS. If unsure, don't set it.
@@ -64,17 +60,11 @@ _get_distribution_components(){
prepare(){
cd ${srcdir}/llvm-project-llvmorg-${pkgver}/llvm
- # https://bugs.llvm.org/show_bug.cgi?id=48007
- patch -Np2 -i ${srcdir}/stack-clash-fixes.patch
# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4107
# https://bugs.llvm.org/show_bug.cgi?id=48921#c2
patch -Np2 -i ${srcdir}/amdgpu-avoid-an-illegal-operand-in-si-shrink-instr.patch
- patch -Np2 -i ${srcdir}/utils-benchmark-fix-missing-include.patch
- # https://bugs.llvm.org/show_bug.cgi?id=50611#c3
- patch -Np2 -i ${srcdir}/no-strict-aliasing-DwarfCompileUnit.patch
cd ${srcdir}/llvm-project-llvmorg-${pkgver}/clang
- patch -Np2 -i ${srcdir}/cuda-version-detection.patch
patch -Np2 -i ${srcdir}/enable-SSP-and-PIE-by-default.patch
# Attempt to convert script to Python 3
@@ -123,10 +113,10 @@ export CXXFLAGS+=" ${CPPFLAGS}"
-DLLVM_ENABLE_DOXYGEN=OFF
-DLLVM_ENABLE_BINDINGS=OFF
-DLLVM_ENABLE_PROJECTS="compiler-rt;clang-tools-extra;clang"
- -DCOMPILER_RT_INSTALL_PATH=/opt/llvm11/lib/clang/$_pkgver
+ -DCOMPILER_RT_INSTALL_PATH=/opt/llvm11/lib/clang/$pkgver
-DLLVM_ENABLE_DUMP=ON
-DLLVM_EXTERNAL_PROJECTS="SPIRV-LLVM-Translator"
- -DLLVM_EXTERNAL_SPIRV_LLVM_TRANSLATOR_SOURCE_DIR="$srcdir"/SPIRV-LLVM-Translator-${pkgver}
+ -DLLVM_EXTERNAL_SPIRV_LLVM_TRANSLATOR_SOURCE_DIR="$srcdir"/SPIRV-LLVM-Translator-${spirvllvmver}
#-DLLVM_EXTERNAL_SPIRV_HEADERS_SOURCE_DIR=/usr/include/spirv/
-DLLVM_SPIRV_INCLUDE_TESTS=OFF
-DLLVM_LIT_ARGS="$LITFLAGS"" -sv --ignore-fail"
@@ -308,17 +298,13 @@ package_spirv-llvm-translator11-minimal(){
cp --preserve --recursive "$srcdir"/spirv/* "$pkgdir"/
- install -Dm644 "${srcdir}/SPIRV-LLVM-Translator-${pkgver}/LICENSE.TXT" "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
+ install -Dm644 "${srcdir}/SPIRV-LLVM-Translator-${spirvllvmver}/LICENSE.TXT" "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
}
-sha256sums=('8ad4ddbafac4f2c8f2ea523c2c4196f940e8e16f9e635210537582a48622a5d5'
+sha256sums=('53a0719f3f4b0388013cfffd7b10c7d5682eece1929a9553c722348d1f866e79'
'6464a722278d37fca783cb505caf44cc8473c22fd22ff6a5d07198bc92059c4f'
'597dc5968c695bbdbb0eac9e8eb5117fcd2773bc91edf5ec103ecffffab8bc48'
- 'bdcaa7559223bd42a381086f7cc23fc73f88ebb1966a7c235f897db0f73b7d20'
'85b6977005899bc76fcc548e0b6501cae5f50a8ad03060b9f58d03d775323327'
- '5f666675fd45848e4c4b0f94068f7648dd9ff88df4a7b19d2a9f2b83ee358a7e'
- 'd1eff24508e35aae6c26a943dbaa3ef5acb60a145b008fd1ef9ac6f6c4faa662'
- '757dc5a288f6847d38e320c364d48fb6454aef25514b2346030b623842ac904e'
'248a0e8609b00689e82ce5e05e1de58b7c8ae09a35bbb9625e9069e1f13d2fec')
# vim:set ts=8 sts=2 sw=2 et:
diff --git a/cuda-version-detection.patch b/cuda-version-detection.patch
deleted file mode 100644
index 7a1067887af9..000000000000
--- a/cuda-version-detection.patch
+++ /dev/null
@@ -1,378 +0,0 @@
-From d50044e809d2c15c56df0ea808f047a2c81d7344 Mon Sep 17 00:00:00 2001
-From: Artem Belevich <tra@google.com>
-Date: Mon, 19 Oct 2020 16:41:51 -0700
-Subject: [PATCH 1/2] [CUDA] Improve clang's ability to detect recent CUDA
- versions.
-
-CUDA-11.1 does not carry version.txt which causes clang to assume that it's
-CUDA-7.0, which used to be the only CUDA version w/o version.txt.
-
-In order to tell CUDA-7.0 apart from the new versions, clang now probes for the
-presence of libdevice.10.bc which is not present in the old CUDA versions.
-
-This should keep Clang working for CUDA-11.1.
-
-PR47332: https://bugs.llvm.org/show_bug.cgi?id=47332
-
-Differential Revision: https://reviews.llvm.org/D89752
-
-(cherry picked from commit 65d206484c54177641d4b11d42cab1f1acc8c0c7)
----
- clang/lib/Driver/ToolChains/Cuda.cpp | 11 ++++++++---
- .../Driver/Inputs/CUDA_111/usr/local/cuda/bin/.keep | 0
- .../Inputs/CUDA_111/usr/local/cuda/include/.keep | 0
- .../Driver/Inputs/CUDA_111/usr/local/cuda/lib/.keep | 0
- .../Driver/Inputs/CUDA_111/usr/local/cuda/lib64/.keep | 0
- .../usr/local/cuda/nvvm/libdevice/libdevice.10.bc | 0
- clang/test/Driver/cuda-version-check.cu | 7 ++++++-
- 7 files changed, 14 insertions(+), 4 deletions(-)
- create mode 100644 clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/bin/.keep
- create mode 100644 clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/include/.keep
- create mode 100644 clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/lib/.keep
- create mode 100644 clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/lib64/.keep
- create mode 100644 clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/nvvm/libdevice/libdevice.10.bc
-
-diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
-index 110a0bca9bc1..cfd9dae0fa91 100644
---- a/clang/lib/Driver/ToolChains/Cuda.cpp
-+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
-@@ -155,9 +155,14 @@ CudaInstallationDetector::CudaInstallationDetector(
- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
- FS.getBufferForFile(InstallPath + "/version.txt");
- if (!VersionFile) {
-- // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
-- // version.txt isn't present.
-- Version = CudaVersion::CUDA_70;
-+ // CUDA 7.0 and CUDA 11.1+ do not have version.txt file.
-+ // Use libdevice file to distinguish 7.0 from the new versions.
-+ if (FS.exists(LibDevicePath + "/libdevice.10.bc")) {
-+ Version = CudaVersion::LATEST;
-+ DetectedVersionIsNotSupported = Version > CudaVersion::LATEST_SUPPORTED;
-+ } else {
-+ Version = CudaVersion::CUDA_70;
-+ }
- } else {
- ParseCudaVersionFile((*VersionFile)->getBuffer());
- }
-diff --git a/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/bin/.keep b/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/bin/.keep
-new file mode 100644
-index 000000000000..e69de29bb2d1
-diff --git a/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/include/.keep b/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/include/.keep
-new file mode 100644
-index 000000000000..e69de29bb2d1
-diff --git a/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/lib/.keep b/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/lib/.keep
-new file mode 100644
-index 000000000000..e69de29bb2d1
-diff --git a/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/lib64/.keep b/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/lib64/.keep
-new file mode 100644
-index 000000000000..e69de29bb2d1
-diff --git a/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/nvvm/libdevice/libdevice.10.bc b/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/nvvm/libdevice/libdevice.10.bc
-new file mode 100644
-index 000000000000..e69de29bb2d1
-diff --git a/clang/test/Driver/cuda-version-check.cu b/clang/test/Driver/cuda-version-check.cu
-index a09b248304f2..1e6af029202f 100644
---- a/clang/test/Driver/cuda-version-check.cu
-+++ b/clang/test/Driver/cuda-version-check.cu
-@@ -10,6 +10,11 @@
- // RUN: FileCheck %s --check-prefix=OK
- // RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA-unknown/usr/local/cuda 2>&1 %s | \
- // RUN: FileCheck %s --check-prefix=UNKNOWN_VERSION
-+// CUDA versions after 11.0 (update 1) do not carry version.txt file. Make sure
-+// we still detect them as a new version and handle them the same as we handle
-+// other new CUDA versions.
-+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda 2>&1 %s | \
-+// RUN: FileCheck %s --check-prefix=UNKNOWN_VERSION
- // Make sure that we don't warn about CUDA version during C++ compilation.
- // RUN: %clang --target=x86_64-linux -v -### -x c++ --cuda-gpu-arch=sm_60 \
- // RUN: --cuda-path=%S/Inputs/CUDA-unknown/usr/local/cuda 2>&1 %s | \
-@@ -65,5 +70,5 @@
- // ERR_SM61: error: GPU arch sm_61 {{.*}}
- // ERR_SM61-NOT: error: GPU arch sm_61
-
--// UNKNOWN_VERSION: Unknown CUDA version 999.999. Assuming the latest supported version
-+// UNKNOWN_VERSION: Unknown CUDA version {{.*}}. Assuming the latest supported version
- // UNKNOWN_VERSION_CXX-NOT: Unknown CUDA version
-
-From 06f479cba3a09ef47326ea69e719d2aa1c0fba4c Mon Sep 17 00:00:00 2001
-From: Artem Belevich <tra@google.com>
-Date: Tue, 20 Oct 2020 15:11:38 -0700
-Subject: [PATCH 2/2] [CUDA] Extract CUDA version from cuda.h if version.txt is
- not found
-
-If CUDA version can not be determined based on version.txt file, attempt to find
-CUDA_VERSION macro in cuda.h.
-
-This is a follow-up to D89752,
-
-Differntial Revision: https://reviews.llvm.org/D89832
-
-(cherry picked from commit e7fe125b776bf08d95e60ff3354a5c836218a0e6)
----
- .../clang/Basic/DiagnosticDriverKinds.td | 2 +-
- clang/lib/Driver/ToolChains/Cuda.cpp | 118 +++++++++++++-----
- clang/lib/Driver/ToolChains/Cuda.h | 3 -
- .../Inputs/CUDA_102/usr/local/cuda/bin/.keep | 0
- .../CUDA_102/usr/local/cuda/include/.keep | 0
- .../Inputs/CUDA_102/usr/local/cuda/lib/.keep | 0
- .../CUDA_102/usr/local/cuda/lib64/.keep | 0
- .../local/cuda/nvvm/libdevice/libdevice.10.bc | 0
- .../CUDA_102/usr/local/cuda/version.txt | 1 +
- .../CUDA_111/usr/local/cuda/include/cuda.h | 7 ++
- clang/test/Driver/cuda-version-check.cu | 14 ++-
- 11 files changed, 108 insertions(+), 37 deletions(-)
- create mode 100644 clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/bin/.keep
- create mode 100644 clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/include/.keep
- create mode 100644 clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/lib/.keep
- create mode 100644 clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/lib64/.keep
- create mode 100644 clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/nvvm/libdevice/libdevice.10.bc
- create mode 100644 clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/version.txt
- create mode 100644 clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/include/cuda.h
-
-diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
-index 558639ecad6a..acdad15cdf6c 100644
---- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
-+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
-@@ -69,7 +69,7 @@ def err_drv_cuda_version_unsupported : Error<
- "install, pass a different GPU arch with --cuda-gpu-arch, or pass "
- "--no-cuda-version-check.">;
- def warn_drv_unknown_cuda_version: Warning<
-- "Unknown CUDA version %0. Assuming the latest supported version %1">,
-+ "Unknown CUDA version. %0 Assuming the latest supported version %1">,
- InGroup<CudaUnknownVersion>;
- def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
- def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
-diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
-index cfd9dae0fa91..ffc606dd554b 100644
---- a/clang/lib/Driver/ToolChains/Cuda.cpp
-+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
-@@ -16,6 +16,7 @@
- #include "clang/Driver/Driver.h"
- #include "clang/Driver/DriverDiagnostic.h"
- #include "clang/Driver/Options.h"
-+#include "llvm/ADT/Optional.h"
- #include "llvm/Option/ArgList.h"
- #include "llvm/Support/FileSystem.h"
- #include "llvm/Support/Host.h"
-@@ -32,29 +33,80 @@ using namespace clang::driver::tools;
- using namespace clang;
- using namespace llvm::opt;
-
-+namespace {
-+struct CudaVersionInfo {
-+ std::string DetectedVersion;
-+ CudaVersion Version;
-+};
- // Parses the contents of version.txt in an CUDA installation. It should
- // contain one line of the from e.g. "CUDA Version 7.5.2".
--void CudaInstallationDetector::ParseCudaVersionFile(llvm::StringRef V) {
-- Version = CudaVersion::UNKNOWN;
-+CudaVersionInfo parseCudaVersionFile(llvm::StringRef V) {
-+ V = V.trim();
- if (!V.startswith("CUDA Version "))
-- return;
-+ return {V.str(), CudaVersion::UNKNOWN};
- V = V.substr(strlen("CUDA Version "));
- SmallVector<StringRef,4> VersionParts;
- V.split(VersionParts, '.');
-- if (VersionParts.size() < 2)
-- return;
-- DetectedVersion = join_items(".", VersionParts[0], VersionParts[1]);
-- Version = CudaStringToVersion(DetectedVersion);
-- if (Version != CudaVersion::UNKNOWN) {
-- // TODO(tra): remove the warning once we have all features of 10.2 and 11.0
-- // implemented.
-- DetectedVersionIsNotSupported = Version > CudaVersion::LATEST_SUPPORTED;
-- return;
-- }
-+ return {"version.txt: " + V.str() + ".",
-+ VersionParts.size() < 2
-+ ? CudaVersion::UNKNOWN
-+ : CudaStringToVersion(
-+ join_items(".", VersionParts[0], VersionParts[1]))};
-+}
-+
-+CudaVersion getCudaVersion(uint32_t raw_version) {
-+ if (raw_version < 7050)
-+ return CudaVersion::CUDA_70;
-+ if (raw_version < 8000)
-+ return CudaVersion::CUDA_75;
-+ if (raw_version < 9000)
-+ return CudaVersion::CUDA_80;
-+ if (raw_version < 9010)
-+ return CudaVersion::CUDA_90;
-+ if (raw_version < 9020)
-+ return CudaVersion::CUDA_91;
-+ if (raw_version < 10000)
-+ return CudaVersion::CUDA_92;
-+ if (raw_version < 10010)
-+ return CudaVersion::CUDA_100;
-+ if (raw_version < 10020)
-+ return CudaVersion::CUDA_101;
-+ if (raw_version < 11000)
-+ return CudaVersion::CUDA_102;
-+ if (raw_version < 11010)
-+ return CudaVersion::CUDA_110;
-+ return CudaVersion::LATEST;
-+}
-
-- Version = CudaVersion::LATEST_SUPPORTED;
-- DetectedVersionIsNotSupported = true;
-+CudaVersionInfo parseCudaHFile(llvm::StringRef Input) {
-+ // Helper lambda which skips the words if the line starts with them or returns
-+ // None otherwise.
-+ auto StartsWithWords =
-+ [](llvm::StringRef Line,
-+ const SmallVector<StringRef, 3> words) -> llvm::Optional<StringRef> {
-+ for (StringRef word : words) {
-+ if (!Line.consume_front(word))
-+ return {};
-+ Line = Line.ltrim();
-+ }
-+ return Line;
-+ };
-+
-+ Input = Input.ltrim();
-+ while (!Input.empty()) {
-+ if (auto Line =
-+ StartsWithWords(Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
-+ uint32_t RawVersion;
-+ Line->consumeInteger(10, RawVersion);
-+ return {"cuda.h: CUDA_VERSION=" + Twine(RawVersion).str() + ".",
-+ getCudaVersion(RawVersion)};
-+ }
-+ // Find next non-empty line.
-+ Input = Input.drop_front(Input.find_first_of("\n\r")).ltrim();
-+ }
-+ return {"cuda.h: CUDA_VERSION not found.", CudaVersion::UNKNOWN};
- }
-+} // namespace
-
- void CudaInstallationDetector::WarnIfUnsupportedVersion() {
- if (DetectedVersionIsNotSupported)
-@@ -152,21 +204,31 @@ CudaInstallationDetector::CudaInstallationDetector(
- else
- continue;
-
-- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
-- FS.getBufferForFile(InstallPath + "/version.txt");
-- if (!VersionFile) {
-- // CUDA 7.0 and CUDA 11.1+ do not have version.txt file.
-- // Use libdevice file to distinguish 7.0 from the new versions.
-- if (FS.exists(LibDevicePath + "/libdevice.10.bc")) {
-- Version = CudaVersion::LATEST;
-- DetectedVersionIsNotSupported = Version > CudaVersion::LATEST_SUPPORTED;
-- } else {
-- Version = CudaVersion::CUDA_70;
-- }
-- } else {
-- ParseCudaVersionFile((*VersionFile)->getBuffer());
-+ CudaVersionInfo VersionInfo = {"", CudaVersion::UNKNOWN};
-+ if (auto VersionFile = FS.getBufferForFile(InstallPath + "/version.txt"))
-+ VersionInfo = parseCudaVersionFile((*VersionFile)->getBuffer());
-+ // If version file didn't give us the version, try to find it in cuda.h
-+ if (VersionInfo.Version == CudaVersion::UNKNOWN)
-+ if (auto CudaHFile = FS.getBufferForFile(InstallPath + "/include/cuda.h"))
-+ VersionInfo = parseCudaHFile((*CudaHFile)->getBuffer());
-+ // As the last resort, make an educated guess between CUDA-7.0, (which had
-+ // no version.txt file and had old-style libdevice bitcode ) and an unknown
-+ // recent CUDA version (no version.txt, new style bitcode).
-+ if (VersionInfo.Version == CudaVersion::UNKNOWN) {
-+ VersionInfo.Version = (FS.exists(LibDevicePath + "/libdevice.10.bc"))
-+ ? Version = CudaVersion::LATEST
-+ : Version = CudaVersion::CUDA_70;
-+ VersionInfo.DetectedVersion =
-+ "No version found in version.txt or cuda.h.";
- }
-
-+ Version = VersionInfo.Version;
-+ DetectedVersion = VersionInfo.DetectedVersion;
-+
-+ // TODO(tra): remove the warning once we have all features of 10.2
-+ // and 11.0 implemented.
-+ DetectedVersionIsNotSupported = Version > CudaVersion::LATEST_SUPPORTED;
-+
- if (Version >= CudaVersion::CUDA_90) {
- // CUDA-9+ uses single libdevice file for all GPU variants.
- std::string FilePath = LibDevicePath + "/libdevice.10.bc";
-diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h
-index 873eb7338a30..bbf272c468a5 100644
---- a/clang/lib/Driver/ToolChains/Cuda.h
-+++ b/clang/lib/Driver/ToolChains/Cuda.h
-@@ -78,9 +78,6 @@ public:
- return LibDeviceMap.lookup(Gpu);
- }
- void WarnIfUnsupportedVersion();
--
--private:
-- void ParseCudaVersionFile(llvm::StringRef V);
- };
-
- namespace tools {
-diff --git a/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/bin/.keep b/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/bin/.keep
-new file mode 100644
-index 000000000000..e69de29bb2d1
-diff --git a/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/include/.keep b/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/include/.keep
-new file mode 100644
-index 000000000000..e69de29bb2d1
-diff --git a/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/lib/.keep b/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/lib/.keep
-new file mode 100644
-index 000000000000..e69de29bb2d1
-diff --git a/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/lib64/.keep b/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/lib64/.keep
-new file mode 100644
-index 000000000000..e69de29bb2d1
-diff --git a/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/nvvm/libdevice/libdevice.10.bc b/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/nvvm/libdevice/libdevice.10.bc
-new file mode 100644
-index 000000000000..e69de29bb2d1
-diff --git a/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/version.txt b/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/version.txt
-new file mode 100644
-index 000000000000..cd34d385ddf5
---- /dev/null
-+++ b/clang/test/Driver/Inputs/CUDA_102/usr/local/cuda/version.txt
-@@ -0,0 +1 @@
-+CUDA Version 10.2.333
-diff --git a/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/include/cuda.h
-new file mode 100644
-index 000000000000..6ce5b747561d
---- /dev/null
-+++ b/clang/test/Driver/Inputs/CUDA_111/usr/local/cuda/include/cuda.h
-@@ -0,0 +1,7 @@
-+//
-+// Placeholder file for testing CUDA version detection
-+//
-+
-+#define CUDA_VERSION 11010
-+
-+//
-diff --git a/clang/test/Driver/cuda-version-check.cu b/clang/test/Driver/cuda-version-check.cu
-index 1e6af029202f..bc04794375a9 100644
---- a/clang/test/Driver/cuda-version-check.cu
-+++ b/clang/test/Driver/cuda-version-check.cu
-@@ -8,13 +8,15 @@
- // RUN: FileCheck %s --check-prefix=OK
- // RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2>&1 %s | \
- // RUN: FileCheck %s --check-prefix=OK
-+// Test version guess when no version.txt or cuda.h are found
- // RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA-unknown/usr/local/cuda 2>&1 %s | \
- // RUN: FileCheck %s --check-prefix=UNKNOWN_VERSION
--// CUDA versions after 11.0 (update 1) do not carry version.txt file. Make sure
--// we still detect them as a new version and handle them the same as we handle
--// other new CUDA versions.
-+// Unknown version with version.txt present
-+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda 2>&1 %s | \
-+// RUN: FileCheck %s --check-prefix=UNKNOWN_VERSION_V
-+// Unknown version with no version.txt but with version info present in cuda.h
- // RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda 2>&1 %s | \
--// RUN: FileCheck %s --check-prefix=UNKNOWN_VERSION
-+// RUN: FileCheck %s --check-prefix=UNKNOWN_VERSION_H
- // Make sure that we don't warn about CUDA version during C++ compilation.
- // RUN: %clang --target=x86_64-linux -v -### -x c++ --cuda-gpu-arch=sm_60 \
- // RUN: --cuda-path=%S/Inputs/CUDA-unknown/usr/local/cuda 2>&1 %s | \
-@@ -70,5 +72,7 @@
- // ERR_SM61: error: GPU arch sm_61 {{.*}}
- // ERR_SM61-NOT: error: GPU arch sm_61
-
--// UNKNOWN_VERSION: Unknown CUDA version {{.*}}. Assuming the latest supported version
-+// UNKNOWN_VERSION_V: Unknown CUDA version. version.txt:{{.*}}. Assuming the latest supported version
-+// UNKNOWN_VERSION_H: Unknown CUDA version. cuda.h: CUDA_VERSION={{.*}}. Assuming the latest supported version
-+// UNKNOWN_VERSION: Unknown CUDA version. No version found in version.txt or cuda.h. Assuming the latest supported version
- // UNKNOWN_VERSION_CXX-NOT: Unknown CUDA version
diff --git a/no-strict-aliasing-DwarfCompileUnit.patch b/no-strict-aliasing-DwarfCompileUnit.patch
deleted file mode 100644
index c71688699e98..000000000000
--- a/no-strict-aliasing-DwarfCompileUnit.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt b/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt
-index eb924282a75e..85929b54d6ce 100644
---- a/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt
-+++ b/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt
-@@ -44,3 +44,8 @@ add_llvm_component_library(LLVMAsmPrinter
- Support
- Target
- )
-+
-+# https://bugs.llvm.org/show_bug.cgi?id=50611#c3
-+if (CMAKE_COMPILER_IS_GNUCXX)
-+ set_source_files_properties(DwarfCompileUnit.cpp PROPERTIES COMPILE_FLAGS -fno-strict-aliasing)
-+endif()
diff --git a/stack-clash-fixes.patch b/stack-clash-fixes.patch
deleted file mode 100644
index ca5a62390aa6..000000000000
--- a/stack-clash-fixes.patch
+++ /dev/null
@@ -1,870 +0,0 @@
-From a1e0363c7402f7aa58e24e0e6dfa447ebabc1910 Mon Sep 17 00:00:00 2001
-From: serge-sans-paille <sguelton@redhat.com>
-Date: Wed, 30 Sep 2020 11:35:00 +0200
-Subject: [PATCH 1/3] Fix limit behavior of dynamic alloca
-
-When the allocation size is 0, we shouldn't probe. Within [1, PAGE_SIZE], we
-should probe once etc.
-
-This fixes https://bugs.llvm.org/show_bug.cgi?id=47657
-
-Differential Revision: https://reviews.llvm.org/D88548
-
-(cherry picked from commit 9573c9f2a363da71b2c07a3add4e52721e6028a0)
----
- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
- llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll | 8 ++++----
- 2 files changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
-index fd1e6517dfac..f68ae4461fe3 100644
---- a/llvm/lib/Target/X86/X86ISelLowering.cpp
-+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
-@@ -31876,7 +31876,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
-
- BuildMI(testMBB, DL, TII->get(X86::JCC_1))
- .addMBB(tailMBB)
-- .addImm(X86::COND_L);
-+ .addImm(X86::COND_LE);
- testMBB->addSuccessor(blockMBB);
- testMBB->addSuccessor(tailMBB);
-
-diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
-index bc4678564083..82fd67842c8a 100644
---- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
-+++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
-@@ -24,12 +24,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
- ; CHECK-X86-64-NEXT: andq $-16, %rcx
- ; CHECK-X86-64-NEXT: subq %rcx, %rax
- ; CHECK-X86-64-NEXT: cmpq %rsp, %rax
--; CHECK-X86-64-NEXT: jl .LBB0_3
-+; CHECK-X86-64-NEXT: jle .LBB0_3
- ; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
- ; CHECK-X86-64-NEXT: movq $0, (%rsp)
- ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000
- ; CHECK-X86-64-NEXT: cmpq %rsp, %rax
--; CHECK-X86-64-NEXT: jge .LBB0_2
-+; CHECK-X86-64-NEXT: jg .LBB0_2
- ; CHECK-X86-64-NEXT: .LBB0_3:
- ; CHECK-X86-64-NEXT: movq %rax, %rsp
- ; CHECK-X86-64-NEXT: movl $1, 4792(%rax)
-@@ -54,12 +54,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
- ; CHECK-X86-32-NEXT: andl $-16, %ecx
- ; CHECK-X86-32-NEXT: subl %ecx, %eax
- ; CHECK-X86-32-NEXT: cmpl %esp, %eax
--; CHECK-X86-32-NEXT: jl .LBB0_3
-+; CHECK-X86-32-NEXT: jle .LBB0_3
- ; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
- ; CHECK-X86-32-NEXT: movl $0, (%esp)
- ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000
- ; CHECK-X86-32-NEXT: cmpl %esp, %eax
--; CHECK-X86-32-NEXT: jge .LBB0_2
-+; CHECK-X86-32-NEXT: jg .LBB0_2
- ; CHECK-X86-32-NEXT: .LBB0_3:
- ; CHECK-X86-32-NEXT: movl %eax, %esp
- ; CHECK-X86-32-NEXT: movl $1, 4792(%eax)
-
-From aac36687f7978f33751daf2870b5c812124ebfaf Mon Sep 17 00:00:00 2001
-From: serge-sans-paille <sguelton@redhat.com>
-Date: Thu, 23 Jul 2020 16:22:48 +0200
-Subject: [PATCH 2/3] Fix interaction between stack alignment and inline-asm
- stack clash protection
-
-As reported in https://github.com/rust-lang/rust/issues/70143 alignment is not
-taken into account when doing the probing. Fix that by adjusting the first probe
-if the stack align is small, or by extending the dynamic probing if the
-alignment is large.
-
-Differential Revision: https://reviews.llvm.org/D84419
-
-(cherry picked from commit f2c6bfa350de142e4d63808d03335f69bd136d6a)
----
- llvm/lib/Target/X86/X86FrameLowering.cpp | 222 ++++++++++++++++--
- llvm/lib/Target/X86/X86FrameLowering.h | 8 +-
- .../X86/stack-clash-large-large-align.ll | 88 +++++++
- .../CodeGen/X86/stack-clash-no-free-probe.ll | 27 ---
- .../stack-clash-small-alloc-medium-align.ll | 135 +++++++++++
- .../X86/stack-clash-small-large-align.ll | 83 +++++++
- 6 files changed, 512 insertions(+), 51 deletions(-)
- create mode 100644 llvm/test/CodeGen/X86/stack-clash-large-large-align.ll
- delete mode 100644 llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
- create mode 100644 llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
- create mode 100644 llvm/test/CodeGen/X86/stack-clash-small-large-align.ll
-
-diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
-index c7ca6fb2a4fc..db6b68659493 100644
---- a/llvm/lib/Target/X86/X86FrameLowering.cpp
-+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
-@@ -586,29 +586,55 @@ void X86FrameLowering::emitStackProbeInlineGeneric(
- const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
- uint64_t ProbeChunk = StackProbeSize * 8;
-
-+ uint64_t MaxAlign =
-+ TRI->needsStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
-+
- // Synthesize a loop or unroll it, depending on the number of iterations.
-+ // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
-+ // between the unaligned rsp and current rsp.
- if (Offset > ProbeChunk) {
-- emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset);
-+ emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
-+ MaxAlign % StackProbeSize);
- } else {
-- emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset);
-+ emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
-+ MaxAlign % StackProbeSize);
- }
- }
-
- void X86FrameLowering::emitStackProbeInlineGenericBlock(
- MachineFunction &MF, MachineBasicBlock &MBB,
-- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
-- uint64_t Offset) const {
-+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
-+ uint64_t AlignOffset) const {
-
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- const X86TargetLowering &TLI = *STI.getTargetLowering();
- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
- const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
- const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
-+
- uint64_t CurrentOffset = 0;
-- // 0 Thanks to return address being saved on the stack
-- uint64_t CurrentProbeOffset = 0;
-
-- // For the first N - 1 pages, just probe. I tried to take advantage of
-+ assert(AlignOffset < StackProbeSize);
-+
-+ // If the offset is so small it fits within a page, there's nothing to do.
-+ if (StackProbeSize < Offset + AlignOffset) {
-+
-+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
-+ .addReg(StackPtr)
-+ .addImm(StackProbeSize - AlignOffset)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
-+
-+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
-+ .setMIFlag(MachineInstr::FrameSetup),
-+ StackPtr, false, 0)
-+ .addImm(0)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+ NumFrameExtraProbe++;
-+ CurrentOffset = StackProbeSize - AlignOffset;
-+ }
-+
-+ // For the next N - 1 pages, just probe. I tried to take advantage of
- // natural probes but it implies much more logic and there was very few
- // interesting natural probes to interleave.
- while (CurrentOffset + StackProbeSize < Offset) {
-@@ -626,9 +652,9 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
- .setMIFlag(MachineInstr::FrameSetup);
- NumFrameExtraProbe++;
- CurrentOffset += StackProbeSize;
-- CurrentProbeOffset += StackProbeSize;
- }
-
-+ // No need to probe the tail, it is smaller than a Page.
- uint64_t ChunkSize = Offset - CurrentOffset;
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
-@@ -639,8 +665,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
-
- void X86FrameLowering::emitStackProbeInlineGenericLoop(
- MachineFunction &MF, MachineBasicBlock &MBB,
-- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
-- uint64_t Offset) const {
-+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
-+ uint64_t AlignOffset) const {
- assert(Offset && "null offset");
-
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-@@ -648,6 +674,26 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
- const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
- const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
-
-+ if (AlignOffset) {
-+ if (AlignOffset < StackProbeSize) {
-+ // Perform a first smaller allocation followed by a probe.
-+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset);
-+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr)
-+ .addReg(StackPtr)
-+ .addImm(AlignOffset)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
-+
-+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
-+ .setMIFlag(MachineInstr::FrameSetup),
-+ StackPtr, false, 0)
-+ .addImm(0)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+ NumFrameExtraProbe++;
-+ Offset -= AlignOffset;
-+ }
-+ }
-+
- // Synthesize a loop
- NumFrameLoopProbe++;
- const BasicBlock *LLVM_BB = MBB.getBasicBlock();
-@@ -666,8 +712,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
-
- // save loop bound
- {
-- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
-- BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed)
-+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset);
-+ BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
- .addReg(FinalStackProbed)
- .addImm(Offset / StackProbeSize * StackProbeSize)
- .setMIFlag(MachineInstr::FrameSetup);
-@@ -675,8 +721,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
-
- // allocate a page
- {
-- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
-- BuildMI(testMBB, DL, TII.get(Opc), StackPtr)
-+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
-+ BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr)
- .addReg(StackPtr)
- .addImm(StackProbeSize)
- .setMIFlag(MachineInstr::FrameSetup);
-@@ -1052,13 +1098,149 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
- uint64_t MaxAlign) const {
- uint64_t Val = -MaxAlign;
- unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
-- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
-- .addReg(Reg)
-- .addImm(Val)
-- .setMIFlag(MachineInstr::FrameSetup);
-
-- // The EFLAGS implicit def is dead.
-- MI->getOperand(3).setIsDead();
-+ MachineFunction &MF = *MBB.getParent();
-+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-+ const X86TargetLowering &TLI = *STI.getTargetLowering();
-+ const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
-+ const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
-+
-+ // We want to make sure that (in worst case) less than StackProbeSize bytes
-+ // are not probed after the AND. This assumption is used in
-+ // emitStackProbeInlineGeneric.
-+ if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
-+ {
-+ NumFrameLoopProbe++;
-+ MachineBasicBlock *entryMBB =
-+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
-+ MachineBasicBlock *headMBB =
-+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
-+ MachineBasicBlock *bodyMBB =
-+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
-+ MachineBasicBlock *footMBB =
-+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
-+
-+ MachineFunction::iterator MBBIter = MBB.getIterator();
-+ MF.insert(MBBIter, entryMBB);
-+ MF.insert(MBBIter, headMBB);
-+ MF.insert(MBBIter, bodyMBB);
-+ MF.insert(MBBIter, footMBB);
-+ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
-+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
-+
-+ // Setup entry block
-+ {
-+
-+ entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
-+ BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
-+ .addReg(StackPtr)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+ MachineInstr *MI =
-+ BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
-+ .addReg(FinalStackProbed)
-+ .addImm(Val)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+
-+ // The EFLAGS implicit def is dead.
-+ MI->getOperand(3).setIsDead();
-+
-+ BuildMI(entryMBB, DL,
-+ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
-+ .addReg(FinalStackProbed)
-+ .addReg(StackPtr)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+ BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
-+ .addMBB(&MBB)
-+ .addImm(X86::COND_E)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+ entryMBB->addSuccessor(headMBB);
-+ entryMBB->addSuccessor(&MBB);
-+ }
-+
-+ // Loop entry block
-+
-+ {
-+ const unsigned SUBOpc =
-+ getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
-+ BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
-+ .addReg(StackPtr)
-+ .addImm(StackProbeSize)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+
-+ BuildMI(headMBB, DL,
-+ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
-+ .addReg(FinalStackProbed)
-+ .addReg(StackPtr)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+
-+ // jump
-+ BuildMI(headMBB, DL, TII.get(X86::JCC_1))
-+ .addMBB(footMBB)
-+ .addImm(X86::COND_B)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+
-+ headMBB->addSuccessor(bodyMBB);
-+ headMBB->addSuccessor(footMBB);
-+ }
-+
-+ // setup loop body
-+ {
-+ addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
-+ .setMIFlag(MachineInstr::FrameSetup),
-+ StackPtr, false, 0)
-+ .addImm(0)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+
-+ const unsigned SUBOpc =
-+ getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
-+ BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
-+ .addReg(StackPtr)
-+ .addImm(StackProbeSize)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+
-+ // cmp with stack pointer bound
-+ BuildMI(bodyMBB, DL,
-+ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
-+ .addReg(FinalStackProbed)
-+ .addReg(StackPtr)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+
-+ // jump
-+ BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
-+ .addMBB(bodyMBB)
-+ .addImm(X86::COND_B)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+ bodyMBB->addSuccessor(bodyMBB);
-+ bodyMBB->addSuccessor(footMBB);
-+ }
-+
-+ // setup loop footer
-+ {
-+ BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
-+ .addReg(FinalStackProbed)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+ addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
-+ .setMIFlag(MachineInstr::FrameSetup),
-+ StackPtr, false, 0)
-+ .addImm(0)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+ footMBB->addSuccessor(&MBB);
-+ }
-+
-+ recomputeLiveIns(*headMBB);
-+ recomputeLiveIns(*bodyMBB);
-+ recomputeLiveIns(*footMBB);
-+ recomputeLiveIns(MBB);
-+ }
-+ } else {
-+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
-+ .addReg(Reg)
-+ .addImm(Val)
-+ .setMIFlag(MachineInstr::FrameSetup);
-+
-+ // The EFLAGS implicit def is dead.
-+ MI->getOperand(3).setIsDead();
-+ }
- }
-
- bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
-diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
-index c0b4be95f88d..bb2e83205e71 100644
---- a/llvm/lib/Target/X86/X86FrameLowering.h
-+++ b/llvm/lib/Target/X86/X86FrameLowering.h
-@@ -213,14 +213,14 @@ private:
- void emitStackProbeInlineGenericBlock(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
-- const DebugLoc &DL,
-- uint64_t Offset) const;
-+ const DebugLoc &DL, uint64_t Offset,
-+ uint64_t Align) const;
-
- void emitStackProbeInlineGenericLoop(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
-- const DebugLoc &DL,
-- uint64_t Offset) const;
-+ const DebugLoc &DL, uint64_t Offset,
-+ uint64_t Align) const;
-
- /// Emit a stub to later inline the target stack probe.
- MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
-diff --git a/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll
-new file mode 100644
-index 000000000000..6c981cb4ac91
---- /dev/null
-+++ b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll
-@@ -0,0 +1,88 @@
-+; RUN: llc < %s | FileCheck %s
-+
-+
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+define i32 @foo_noprotect() local_unnamed_addr {
-+; CHECK-LABEL: foo_noprotect:
-+; CHECK: # %bb.0:
-+; CHECK-NEXT: pushq %rbp
-+; CHECK-NEXT: .cfi_def_cfa_offset 16
-+; CHECK-NEXT: .cfi_offset %rbp, -16
-+; CHECK-NEXT: movq %rsp, %rbp
-+; CHECK-NEXT: .cfi_def_cfa_register %rbp
-+; CHECK-NEXT: andq $-4096, %rsp # imm = 0xF000
-+; CHECK-NEXT: subq $73728, %rsp # imm = 0x12000
-+; CHECK-NEXT: movl $1, 392(%rsp)
-+; CHECK-NEXT: movl $1, 28792(%rsp)
-+; CHECK-NEXT: movl (%rsp), %eax
-+; CHECK-NEXT: movq %rbp, %rsp
-+; CHECK-NEXT: popq %rbp
-+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
-+; CHECK-NEXT: retq
-+
-+
-+ %a = alloca i32, i64 18000, align 4096
-+ %b0 = getelementptr inbounds i32, i32* %a, i64 98
-+ %b1 = getelementptr inbounds i32, i32* %a, i64 7198
-+ store volatile i32 1, i32* %b0
-+ store volatile i32 1, i32* %b1
-+ %c = load volatile i32, i32* %a
-+ ret i32 %c
-+}
-+
-+define i32 @foo_protect() local_unnamed_addr #0 {
-+; CHECK-LABEL: foo_protect:
-+; CHECK: # %bb.0:
-+; CHECK-NEXT: pushq %rbp
-+; CHECK-NEXT: .cfi_def_cfa_offset 16
-+; CHECK-NEXT: .cfi_offset %rbp, -16
-+; CHECK-NEXT: movq %rsp, %rbp
-+; CHECK-NEXT: .cfi_def_cfa_register %rbp
-+; CHECK-NEXT: movq %rsp, %r11
-+; CHECK-NEXT: andq $-4096, %r11 # imm = 0xF000
-+; CHECK-NEXT: cmpq %rsp, %r11
-+; CHECK-NEXT: je .LBB1_4
-+; CHECK-NEXT:# %bb.1:
-+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-+; CHECK-NEXT: cmpq %rsp, %r11
-+; CHECK-NEXT: jb .LBB1_3
-+; CHECK-NEXT:.LBB1_2: # =>This Inner Loop Header: Depth=1
-+; CHECK-NEXT: movq $0, (%rsp)
-+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-+; CHECK-NEXT: cmpq %rsp, %r11
-+; CHECK-NEXT: jb .LBB1_2
-+; CHECK-NEXT:.LBB1_3:
-+; CHECK-NEXT: movq %r11, %rsp
-+; CHECK-NEXT: movq $0, (%rsp)
-+; CHECK-NEXT:.LBB1_4:
-+; CHECK-NEXT: movq %rsp, %r11
-+; CHECK-NEXT: subq $73728, %r11 # imm = 0x12000
-+; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1
-+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-+; CHECK-NEXT: movq $0, (%rsp)
-+; CHECK-NEXT: cmpq %r11, %rsp
-+; CHECK-NEXT: jne .LBB1_5
-+; CHECK-NEXT:# %bb.6:
-+; CHECK-NEXT: movl $1, 392(%rsp)
-+; CHECK-NEXT: movl $1, 28792(%rsp)
-+; CHECK-NEXT: movl (%rsp), %eax
-+; CHECK-NEXT: movq %rbp, %rsp
-+; CHECK-NEXT: popq %rbp
-+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
-+; CHECK-NEXT: retq
-+
-+
-+
-+
-+ %a = alloca i32, i64 18000, align 4096
-+ %b0 = getelementptr inbounds i32, i32* %a, i64 98
-+ %b1 = getelementptr inbounds i32, i32* %a, i64 7198
-+ store volatile i32 1, i32* %b0
-+ store volatile i32 1, i32* %b1
-+ %c = load volatile i32, i32* %a
-+ ret i32 %c
-+}
-+
-+attributes #0 = {"probe-stack"="inline-asm"}
-diff --git a/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll b/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
-deleted file mode 100644
-index 652acbdf00ba..000000000000
---- a/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
-+++ /dev/null
-@@ -1,27 +0,0 @@
--; RUN: llc < %s | FileCheck %s
--
--target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
--target triple = "x86_64-unknown-linux-gnu"
--
--define i32 @foo(i64 %i) local_unnamed_addr #0 {
--; CHECK-LABEL: foo:
--; CHECK: # %bb.0:
--; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
--; CHECK-NEXT: movq $0, (%rsp)
--; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
--; CHECK-NEXT: .cfi_def_cfa_offset 7888
--; CHECK-NEXT: movl $1, -128(%rsp,%rdi,4)
--; CHECK-NEXT: movl -128(%rsp), %eax
--; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
--; CHECK-NEXT: .cfi_def_cfa_offset 8
--; CHECK-NEXT: retq
--
-- %a = alloca i32, i32 2000, align 16
-- %b = getelementptr inbounds i32, i32* %a, i64 %i
-- store volatile i32 1, i32* %b
-- %c = load volatile i32, i32* %a
-- ret i32 %c
--}
--
--attributes #0 = {"probe-stack"="inline-asm"}
--
-diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
-new file mode 100644
-index 000000000000..eafa86f1eba9
---- /dev/null
-+++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
-@@ -0,0 +1,135 @@
-+; RUN: llc < %s | FileCheck %s
-+
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+; | case1 | alloca + align < probe_size
-+define i32 @foo1(i64 %i) local_unnamed_addr #0 {
-+; CHECK-LABEL: foo1:
-+; CHECK: # %bb.0:
-+; CHECK-NEXT: pushq %rbp
-+; CHECK-NEXT: .cfi_def_cfa_offset 16
-+; CHECK-NEXT: .cfi_offset %rbp, -16
-+; CHECK-NEXT: movq %rsp, %rbp
-+; CHECK-NEXT: .cfi_def_cfa_register %rbp
-+; CHECK-NEXT: andq $-64, %rsp
-+; CHECK-NEXT: subq $832, %rsp # imm = 0x340
-+; CHECK-NEXT: movl $1, (%rsp,%rdi,4)
-+; CHECK-NEXT: movl (%rsp), %eax
-+; CHECK-NEXT: movq %rbp, %rsp
-+; CHECK-NEXT: popq %rbp
-+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
-+; CHECK-NEXT: retq
-+
-+ %a = alloca i32, i32 200, align 64
-+ %b = getelementptr inbounds i32, i32* %a, i64 %i
-+ store volatile i32 1, i32* %b
-+ %c = load volatile i32, i32* %a
-+ ret i32 %c
-+}
-+
-+; | case2 | alloca > probe_size, align > probe_size
-+define i32 @foo2(i64 %i) local_unnamed_addr #0 {
-+; CHECK-LABEL: foo2:
-+; CHECK: # %bb.0:
-+; CHECK-NEXT: pushq %rbp
-+; CHECK-NEXT: .cfi_def_cfa_offset 16
-+; CHECK-NEXT: .cfi_offset %rbp, -16
-+; CHECK-NEXT: movq %rsp, %rbp
-+; CHECK-NEXT: .cfi_def_cfa_register %rbp
-+; CHECK-NEXT: andq $-2048, %rsp # imm = 0xF800
-+; CHECK-NEXT: subq $2048, %rsp # imm = 0x800
-+; CHECK-NEXT: movq $0, (%rsp)
-+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-+; CHECK-NEXT: movq $0, (%rsp)
-+; CHECK-NEXT: subq $2048, %rsp # imm = 0x800
-+; CHECK-NEXT: movl $1, (%rsp,%rdi,4)
-+; CHECK-NEXT: movl (%rsp), %eax
-+; CHECK-NEXT: movq %rbp, %rsp
-+; CHECK-NEXT: popq %rbp
-+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
-+; CHECK-NEXT: retq
-+
-+ %a = alloca i32, i32 2000, align 2048
-+ %b = getelementptr inbounds i32, i32* %a, i64 %i
-+ store volatile i32 1, i32* %b
-+ %c = load volatile i32, i32* %a
-+ ret i32 %c
-+}
-+
-+; | case3 | alloca < probe_size, align < probe_size, alloca + align > probe_size
-+define i32 @foo3(i64 %i) local_unnamed_addr #0 {
-+; CHECK-LABEL: foo3:
-+; CHECK: # %bb.0:
-+; CHECK-NEXT: pushq %rbp
-+; CHECK-NEXT: .cfi_def_cfa_offset 16
-+; CHECK-NEXT: .cfi_offset %rbp, -16
-+; CHECK-NEXT: movq %rsp, %rbp
-+; CHECK-NEXT: .cfi_def_cfa_register %rbp
-+; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00
-+; CHECK-NEXT: subq $3072, %rsp # imm = 0xC00
-+; CHECK-NEXT: movq $0, (%rsp)
-+; CHECK-NEXT: subq $1024, %rsp # imm = 0x400
-+; CHECK-NEXT: movl $1, (%rsp,%rdi,4)
-+; CHECK-NEXT: movl (%rsp), %eax
-+; CHECK-NEXT: movq %rbp, %rsp
-+; CHECK-NEXT: popq %rbp
-+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
-+; CHECK-NEXT: retq
-+
-+
-+ %a = alloca i32, i32 1000, align 1024
-+ %b = getelementptr inbounds i32, i32* %a, i64 %i
-+ store volatile i32 1, i32* %b
-+ %c = load volatile i32, i32* %a
-+ ret i32 %c
-+}
-+
-+; | case4 | alloca + probe_size < probe_size, followed by dynamic alloca
-+define i32 @foo4(i64 %i) local_unnamed_addr #0 {
-+; CHECK-LABEL: foo4:
-+; CHECK: # %bb.0:
-+; CHECK-NEXT: pushq %rbp
-+; CHECK-NEXT: .cfi_def_cfa_offset 16
-+; CHECK-NEXT: .cfi_offset %rbp, -16
-+; CHECK-NEXT: movq %rsp, %rbp
-+; CHECK-NEXT: .cfi_def_cfa_register %rbp
-+; CHECK-NEXT: pushq %rbx
-+; CHECK-NEXT: andq $-64, %rsp
-+; CHECK-NEXT: subq $896, %rsp # imm = 0x380
-+; CHECK-NEXT: movq %rsp, %rbx
-+; CHECK-NEXT: .cfi_offset %rbx, -24
-+; CHECK-NEXT: movl $1, (%rbx,%rdi,4)
-+; CHECK-NEXT: movl (%rbx), %ecx
-+; CHECK-NEXT: movq %rsp, %rax
-+; CHECK-NEXT: leaq 15(,%rcx,4), %rcx
-+; CHECK-NEXT: andq $-16, %rcx
-+; CHECK-NEXT: subq %rcx, %rax
-+; CHECK-NEXT: cmpq %rsp, %rax
-+; CHECK-NEXT: jle .LBB3_3
-+; CHECK-NEXT:.LBB3_2: # =>This Inner Loop Header: Depth=1
-+; CHECK-NEXT: movq $0, (%rsp)
-+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-+; CHECK-NEXT: cmpq %rsp, %rax
-+; CHECK-NEXT: jg .LBB3_2
-+; CHECK-NEXT:.LBB3_3:
-+; CHECK-NEXT: andq $-64, %rax
-+; CHECK-NEXT: movq %rax, %rsp
-+; CHECK-NEXT: movl (%rax), %eax
-+; CHECK-NEXT: leaq -8(%rbp), %rsp
-+; CHECK-NEXT: popq %rbx
-+; CHECK-NEXT: popq %rbp
-+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
-+; CHECK-NEXT: retq
-+
-+ %a = alloca i32, i32 200, align 64
-+ %b = getelementptr inbounds i32, i32* %a, i64 %i
-+ store volatile i32 1, i32* %b
-+ %c = load volatile i32, i32* %a
-+ %d = alloca i32, i32 %c, align 64
-+ %e = load volatile i32, i32* %d
-+ ret i32 %e
-+}
-+
-+attributes #0 = {"probe-stack"="inline-asm"}
-+
-diff --git a/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll
-new file mode 100644
-index 000000000000..e608bab90415
---- /dev/null
-+++ b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll
-@@ -0,0 +1,83 @@
-+; RUN: llc < %s | FileCheck %s
-+
-+
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+define i32 @foo_noprotect() local_unnamed_addr {
-+; CHECK-LABEL: foo_noprotect:
-+; CHECK: # %bb.0:
-+; CHECK-NEXT: pushq %rbp
-+; CHECK-NEXT: .cfi_def_cfa_offset 16
-+; CHECK-NEXT: .cfi_offset %rbp, -16
-+; CHECK-NEXT: movq %rsp, %rbp
-+; CHECK-NEXT: .cfi_def_cfa_register %rbp
-+; CHECK-NEXT: andq $-65536, %rsp
-+; CHECK-NEXT: subq $65536, %rsp
-+; CHECK-NEXT: movl $1, 392(%rsp)
-+; CHECK-NEXT: movl (%rsp), %eax
-+; CHECK-NEXT: movq %rbp, %rsp
-+; CHECK-NEXT: popq %rbp
-+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
-+; CHECK-NEXT: retq
-+
-+
-+
-+ %a = alloca i32, i64 100, align 65536
-+ %b = getelementptr inbounds i32, i32* %a, i64 98
-+ store volatile i32 1, i32* %b
-+ %c = load volatile i32, i32* %a
-+ ret i32 %c
-+}
-+
-+define i32 @foo_protect() local_unnamed_addr #0 {
-+; CHECK-LABEL: foo_protect:
-+; CHECK: # %bb.0:
-+; CHECK-NEXT: pushq %rbp
-+; CHECK-NEXT: .cfi_def_cfa_offset 16
-+; CHECK-NEXT: .cfi_offset %rbp, -16
-+; CHECK-NEXT: movq %rsp, %rbp
-+; CHECK-NEXT: .cfi_def_cfa_register %rbp
-+; CHECK-NEXT: movq %rsp, %r11
-+; CHECK-NEXT: andq $-65536, %r11 # imm = 0xFFFF0000
-+; CHECK-NEXT: cmpq %rsp, %r11
-+; CHECK-NEXT: je .LBB1_4
-+; CHECK-NEXT:# %bb.1:
-+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-+; CHECK-NEXT: cmpq %rsp, %r11
-+; CHECK-NEXT: jb .LBB1_3
-+; CHECK-NEXT:.LBB1_2: # =>This Inner Loop Header: Depth=1
-+; CHECK-NEXT: movq $0, (%rsp)
-+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-+; CHECK-NEXT: cmpq %rsp, %r11
-+; CHECK-NEXT: jb .LBB1_2
-+; CHECK-NEXT:.LBB1_3:
-+; CHECK-NEXT: movq %r11, %rsp
-+; CHECK-NEXT: movq $0, (%rsp)
-+; CHECK-NEXT:.LBB1_4:
-+; CHECK-NEXT: movq %rsp, %r11
-+; CHECK-NEXT: subq $65536, %r11 # imm = 0x10000
-+; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1
-+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
-+; CHECK-NEXT: movq $0, (%rsp)
-+; CHECK-NEXT: cmpq %r11, %rsp
-+; CHECK-NEXT: jne .LBB1_5
-+; CHECK-NEXT:# %bb.6:
-+; CHECK-NEXT: movl $1, 392(%rsp)
-+; CHECK-NEXT: movl (%rsp), %eax
-+; CHECK-NEXT: movq %rbp, %rsp
-+; CHECK-NEXT: popq %rbp
-+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
-+; CHECK-NEXT: retq
-+
-+
-+
-+
-+ %a = alloca i32, i64 100, align 65536
-+ %b = getelementptr inbounds i32, i32* %a, i64 98
-+ store volatile i32 1, i32* %b
-+ %c = load volatile i32, i32* %a
-+ ret i32 %c
-+}
-+
-+attributes #0 = {"probe-stack"="inline-asm"}
-
-From bbe6cbbed8c7460a7e8477373b9250543362e771 Mon Sep 17 00:00:00 2001
-From: serge-sans-paille <sguelton@redhat.com>
-Date: Tue, 27 Oct 2020 10:59:42 +0100
-Subject: [PATCH 3/3] [stack-clash] Fix probing of dynamic alloca
-
-- Perform the probing in the correct direction.
- Related to https://github.com/rust-lang/rust/pull/77885#issuecomment-711062924
-
-- The first touch on a dynamic alloca cannot use a mov because it clobbers
- existing space. Use a xor 0 instead
-
-Differential Revision: https://reviews.llvm.org/D90216
-
-(cherry picked from commit 0f60bcc36c34522618bd1425a45f8c6006568fb6)
----
- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 ++++----
- llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll | 12 ++++++------
- .../X86/stack-clash-small-alloc-medium-align.ll | 6 +++---
- 3 files changed, 13 insertions(+), 13 deletions(-)
-
-diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
-index f68ae4461fe3..afe470cc6e0b 100644
---- a/llvm/lib/Target/X86/X86ISelLowering.cpp
-+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
-@@ -31876,7 +31876,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
-
- BuildMI(testMBB, DL, TII->get(X86::JCC_1))
- .addMBB(tailMBB)
-- .addImm(X86::COND_LE);
-+ .addImm(X86::COND_GE);
- testMBB->addSuccessor(blockMBB);
- testMBB->addSuccessor(tailMBB);
-
-@@ -31892,9 +31892,9 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
- //
- // The property we want to enforce is to never have more than [page alloc] between two probes.
-
-- const unsigned MovMIOpc =
-- TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi;
-- addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
-+ const unsigned XORMIOpc =
-+ TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8;
-+ addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0)
- .addImm(0);
-
- BuildMI(blockMBB, DL,
-diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
-index 82fd67842c8a..6dd8b6ab5897 100644
---- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
-+++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
-@@ -24,12 +24,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
- ; CHECK-X86-64-NEXT: andq $-16, %rcx
- ; CHECK-X86-64-NEXT: subq %rcx, %rax
- ; CHECK-X86-64-NEXT: cmpq %rsp, %rax
--; CHECK-X86-64-NEXT: jle .LBB0_3
-+; CHECK-X86-64-NEXT: jge .LBB0_3
- ; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
--; CHECK-X86-64-NEXT: movq $0, (%rsp)
-+; CHECK-X86-64-NEXT: xorq $0, (%rsp)
- ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000
- ; CHECK-X86-64-NEXT: cmpq %rsp, %rax
--; CHECK-X86-64-NEXT: jg .LBB0_2
-+; CHECK-X86-64-NEXT: jl .LBB0_2
- ; CHECK-X86-64-NEXT: .LBB0_3:
- ; CHECK-X86-64-NEXT: movq %rax, %rsp
- ; CHECK-X86-64-NEXT: movl $1, 4792(%rax)
-@@ -54,12 +54,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
- ; CHECK-X86-32-NEXT: andl $-16, %ecx
- ; CHECK-X86-32-NEXT: subl %ecx, %eax
- ; CHECK-X86-32-NEXT: cmpl %esp, %eax
--; CHECK-X86-32-NEXT: jle .LBB0_3
-+; CHECK-X86-32-NEXT: jge .LBB0_3
- ; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
--; CHECK-X86-32-NEXT: movl $0, (%esp)
-+; CHECK-X86-32-NEXT: xorl $0, (%esp)
- ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000
- ; CHECK-X86-32-NEXT: cmpl %esp, %eax
--; CHECK-X86-32-NEXT: jg .LBB0_2
-+; CHECK-X86-32-NEXT: jl .LBB0_2
- ; CHECK-X86-32-NEXT: .LBB0_3:
- ; CHECK-X86-32-NEXT: movl %eax, %esp
- ; CHECK-X86-32-NEXT: movl $1, 4792(%eax)
-diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
-index eafa86f1eba9..39b6c3640a60 100644
---- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
-+++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
-@@ -106,12 +106,12 @@ define i32 @foo4(i64 %i) local_unnamed_addr #0 {
- ; CHECK-NEXT: andq $-16, %rcx
- ; CHECK-NEXT: subq %rcx, %rax
- ; CHECK-NEXT: cmpq %rsp, %rax
--; CHECK-NEXT: jle .LBB3_3
-+; CHECK-NEXT: jge .LBB3_3
- ; CHECK-NEXT:.LBB3_2: # =>This Inner Loop Header: Depth=1
--; CHECK-NEXT: movq $0, (%rsp)
-+; CHECK-NEXT: xorq $0, (%rsp)
- ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
- ; CHECK-NEXT: cmpq %rsp, %rax
--; CHECK-NEXT: jg .LBB3_2
-+; CHECK-NEXT: jl .LBB3_2
- ; CHECK-NEXT:.LBB3_3:
- ; CHECK-NEXT: andq $-64, %rax
- ; CHECK-NEXT: movq %rax, %rsp
diff --git a/utils-benchmark-fix-missing-include.patch b/utils-benchmark-fix-missing-include.patch
deleted file mode 100644
index 9d1f6dfc48d8..000000000000
--- a/utils-benchmark-fix-missing-include.patch
+++ /dev/null
@@ -1,21 +0,0 @@
-From b498303066a63a203d24f739b2d2e0e56dca70d1 Mon Sep 17 00:00:00 2001
-From: serge-sans-paille <sguelton@redhat.com>
-Date: Tue, 10 Nov 2020 14:55:25 +0100
-Subject: [PATCH] [nfc] Fix missing include
-
----
- llvm/utils/benchmark/src/benchmark_register.h | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/llvm/utils/benchmark/src/benchmark_register.h b/llvm/utils/benchmark/src/benchmark_register.h
-index 0705e219f2fa..4caa5ad4da07 100644
---- a/llvm/utils/benchmark/src/benchmark_register.h
-+++ b/llvm/utils/benchmark/src/benchmark_register.h
-@@ -1,6 +1,7 @@
- #ifndef BENCHMARK_REGISTER_H
- #define BENCHMARK_REGISTER_H
-
-+#include <limits>
- #include <vector>
-
- #include "check.h"