summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authoracxz2021-09-07 13:05:04 -0400
committeracxz2021-09-07 13:05:04 -0400
commitdae9d3d2bfd6c5c7673952fac22a9b1760553e8c (patch)
treeff77d8ecc6b369f0466318b49b66de1f1867a02e
parent4ed74ae4f556b6c54c9a1f1dc0a87b2015867f77 (diff)
downloadaur-dae9d3d2bfd6c5c7673952fac22a9b1760553e8c.tar.gz
updpkg 1.9.0
-rw-r--r--.SRCINFO14
-rw-r--r--PKGBUILD41
-rw-r--r--benchmark-gcc11.patch30
-rw-r--r--fix-building-for-torchvision.patch12
-rw-r--r--fix_c10.patch12
-rw-r--r--nccl_version.patch25
-rw-r--r--xnnpack-gcc11.patch33
7 files changed, 125 insertions, 42 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 667cd1ca18e9..5dfbbf426a33 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,6 +1,6 @@
pkgbase = python-pytorch-rocm
pkgdesc = Tensors and Dynamic neural networks in Python with strong GPU acceleration
- pkgver = 1.8.1
+ pkgver = 1.9.0
pkgrel = 1
url = https://pytorch.org
arch = x86_64
@@ -34,7 +34,7 @@ pkgbase = python-pytorch-rocm
depends = onednn
depends = intel-mkl
depends = python-typing_extensions
- source = pytorch-1.8.1::git+https://github.com/pytorch/pytorch.git#tag=v1.8.1
+ source = pytorch-1.9.0::git+https://github.com/pytorch/pytorch.git#tag=v1.9.0
source = python-pytorch-rocm-ios-cmake::git+https://github.com/Yangqing/ios-cmake.git
source = python-pytorch-rocm-pthreadpool::git+https://github.com/Maratyszcza/pthreadpool.git
source = python-pytorch-rocm-FP16::git+https://github.com/Maratyszcza/FP16.git
@@ -72,6 +72,10 @@ pkgbase = python-pytorch-rocm
source = fix_include_system.patch
source = use-system-libuv.patch
source = fix-building-for-torchvision.patch
+ source = benchmark-gcc11.patch
+ source = xnnpack-gcc11.patch
+ source = https://github.com/pytorch/pytorch/commit/c74c0c571880df886474be297c556562e95c00e0.patch
+ source = fix_c10.patch
source = disable_non_x86_64.patch
sha256sums = SKIP
sha256sums = SKIP
@@ -110,7 +114,11 @@ pkgbase = python-pytorch-rocm
sha256sums = SKIP
sha256sums = 557761502bbd994d9795bef46779e4b8c60ba0b45e7d60841f477d3b7f28a00a
sha256sums = cd9ac4aaa9f946ac5eafc57cf66c5c16b3ea7ac8af32c2558fad0705411bb669
- sha256sums = f4959cde995382c55ba28c8496321b0bb0a5c0f3f46abcce2e88521004993846
+ sha256sums = 689c76e89bcf403df1b4cf7ca784381967b6a6527ed6eb6d0ad6681cf789b738
+ sha256sums = 278fecdb45df065343f51688cc7a1665153b5189f3341a741d546b0b518eac40
+ sha256sums = 64833e96e47a22f88336381f25fcd73127208dc79e2074398295d88c4596c06a
+ sha256sums = ffb13bcd0186f3443a5b576b9fa32791a2ce915222df1d9609bcb0ef789ddd3b
+ sha256sums = ba801238afcfc58a35410e54d4ca6a638c447865c0c6b38ed16917fd6d507954
sha256sums = d3ef8491718ed7e814fe63e81df2f49862fffbea891d2babbcb464796a1bd680
pkgname = python-pytorch-rocm
diff --git a/PKGBUILD b/PKGBUILD
index d60d5f0fa5b5..b0e1bdd449e7 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -13,8 +13,8 @@ pkgname=()
[ "$_build_opt" -eq 1 ] && pkgname+=("python-pytorch-opt-rocm")
_pkgname="pytorch"
-pkgver=1.8.1
-_pkgver=1.8.1
+pkgver=1.9.0
+_pkgver=1.9.0
pkgrel=1
pkgdesc="Tensors and Dynamic neural networks in Python with strong GPU acceleration"
arch=('x86_64')
@@ -64,6 +64,10 @@ source=("${_pkgname}-${pkgver}::git+https://github.com/pytorch/pytorch.git#tag=v
fix_include_system.patch
use-system-libuv.patch
fix-building-for-torchvision.patch
+ benchmark-gcc11.patch
+ xnnpack-gcc11.patch
+ https://github.com/pytorch/pytorch/commit/c74c0c571880df886474be297c556562e95c00e0.patch
+ fix_c10.patch
disable_non_x86_64.patch)
sha256sums=('SKIP'
'SKIP'
@@ -102,7 +106,11 @@ sha256sums=('SKIP'
'SKIP'
'557761502bbd994d9795bef46779e4b8c60ba0b45e7d60841f477d3b7f28a00a'
'cd9ac4aaa9f946ac5eafc57cf66c5c16b3ea7ac8af32c2558fad0705411bb669'
- 'f4959cde995382c55ba28c8496321b0bb0a5c0f3f46abcce2e88521004993846'
+ '689c76e89bcf403df1b4cf7ca784381967b6a6527ed6eb6d0ad6681cf789b738'
+ '278fecdb45df065343f51688cc7a1665153b5189f3341a741d546b0b518eac40'
+ '64833e96e47a22f88336381f25fcd73127208dc79e2074398295d88c4596c06a'
+ 'ffb13bcd0186f3443a5b576b9fa32791a2ce915222df1d9609bcb0ef789ddd3b'
+ 'ba801238afcfc58a35410e54d4ca6a638c447865c0c6b38ed16917fd6d507954'
'd3ef8491718ed7e814fe63e81df2f49862fffbea891d2babbcb464796a1bd680')
prepare() {
@@ -157,8 +165,19 @@ prepare() {
# fix https://github.com/pytorch/vision/issues/3695
patch -Np1 -i "${srcdir}/fix-building-for-torchvision.patch"
+ # GCC 11 fixes
+ patch -Np1 -d third_party/benchmark <../benchmark-gcc11.patch
+ patch -Np1 -d third_party/XNNPACK <../xnnpack-gcc11.patch
+
+ # cuda 11.4 fix
+ patch -Np1 <../c74c0c571880df886474be297c556562e95c00e0.patch
+ # cuda 11.4.1 fix
+ patch -Np1 -i "${srcdir}/fix_c10.patch"
+
# remove local nccl
rm -rf third_party/nccl/nccl
+ # also remove path from nccl module, so it's not checked
+ sed -e '/path = third_party\/nccl\/nccl/d' -i ./.gitmodules
# fix build with google-glog 0.5 https://github.com/pytorch/pytorch/issues/58054
sed -e '/namespace glog_internal_namespace_/d' -e 's|::glog_internal_namespace_||' -i c10/util/Logging.cpp
@@ -173,24 +192,30 @@ prepare() {
export PYTORCH_BUILD_NUMBER=1
# Check tools/setup_helpers/cmake.py, setup.py and CMakeLists.txt for a list of flags that can be set via env vars.
+ export ATEN_NO_TEST=ON
export USE_MKLDNN=ON
- export BUILD_CUSTOM_PROTOBUF=ON
+ export BUILD_CUSTOM_PROTOBUF=OFF
# export BUILD_SHARED_LIBS=OFF
export USE_FFMPEG=ON
export USE_GFLAGS=ON
export USE_GLOG=ON
export BUILD_BINARY=ON
export USE_OPENCV=ON
+ # export USE_SYSTEM_LIBS=ON # experimental, not all libs present in repos
export USE_SYSTEM_NCCL=ON
- # export USE_SYSTEM_LIBS=ON
export NCCL_VERSION=$(pkg-config nccl --modversion)
export NCCL_VER_CODE=$(sed -n 's/^#define NCCL_VERSION_CODE\s*\(.*\).*/\1/p' /usr/include/nccl.h)
- export CUDAHOSTCXX=g++
+ # export BUILD_SPLIT_CUDA=ON # modern preferred build, but splits libs and symbols, ABI break
+ # export USE_FAST_NVCC=ON # parallel build with nvcc, spawns too many processes
+ export USE_CUPTI_SO=ON # make sure cupti.so is used as shared lib
+ export CUDAHOSTCXX=/usr/bin/g++
+ export CUDA_HOST_COMPILER="${CUDAHOSTCXX}"
export CUDA_HOME=/opt/cuda
export CUDNN_LIB_DIR=/usr/lib
export CUDNN_INCLUDE_DIR=/usr/include
- # export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
- export TORCH_CUDA_ARCH_LIST="5.2;5.3;6.0;6.1;6.2;7.0;7.0+PTX;7.2;7.2+PTX;7.5;7.5+PTX;8.0;8.0+PTX;8.6;8.6+PTX"
+ export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
+ export TORCH_CUDA_ARCH_LIST="5.2;6.0;6.2;7.0;7.2;7.5;8.0;8.6;8.6+PTX" #include latest PTX for future compat
+ export OVERRIDE_TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
}
build() {
diff --git a/benchmark-gcc11.patch b/benchmark-gcc11.patch
new file mode 100644
index 000000000000..4aa3e66de449
--- /dev/null
+++ b/benchmark-gcc11.patch
@@ -0,0 +1,30 @@
+From 3d1c2677686718d906f28c1d4da001c42666e6d2 Mon Sep 17 00:00:00 2001
+From: Sergei Trofimovich <slyfox@inbox.ru>
+Date: Thu, 15 Oct 2020 09:12:40 +0100
+Subject: [PATCH] src/benchmark_register.h: add missing <limits> inclusion
+ (#1060)
+
+Noticed missing header when was building llvm with gcc-11:
+
+```
+llvm-project/llvm/utils/benchmark/src/benchmark_register.h:17:30:
+ error: 'numeric_limits' is not a member of 'std'
+ 17 | static const T kmax = std::numeric_limits<T>::max();
+ | ^~~~~~~~~~~~~~
+```
+---
+ src/benchmark_register.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/benchmark_register.h b/src/benchmark_register.h
+index 61377d742..204bf1d9f 100644
+--- a/src/benchmark_register.h
++++ b/src/benchmark_register.h
+@@ -1,6 +1,7 @@
+ #ifndef BENCHMARK_REGISTER_H
+ #define BENCHMARK_REGISTER_H
+
++#include <limits>
+ #include <vector>
+
+ #include "check.h"
diff --git a/fix-building-for-torchvision.patch b/fix-building-for-torchvision.patch
index b62ecb52e8d8..9168d03a8fd1 100644
--- a/fix-building-for-torchvision.patch
+++ b/fix-building-for-torchvision.patch
@@ -4,17 +4,17 @@ Date: Fri, 30 Apr 2021 11:36:30 +0800
Subject: [PATCH] fix building torchvision
---
- aten/src/ATen/core/op_registration/op_whitelist.h | 2 +-
+ aten/src/ATen/core/op_registration/op_allowlist.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
-diff --git a/aten/src/ATen/core/op_registration/op_whitelist.h b/aten/src/ATen/core/op_registration/op_whitelist.h
+diff --git a/aten/src/ATen/core/op_registration/op_allowlist.h b/aten/src/ATen/core/op_registration/op_allowlist.h
index f93462bb2cf..12903d1cc09 100644
---- a/aten/src/ATen/core/op_registration/op_whitelist.h
-+++ b/aten/src/ATen/core/op_registration/op_whitelist.h
-@@ -59,7 +59,7 @@ constexpr bool op_whitelist_contains(string_view allowlist, string_view item) {
+--- a/aten/src/ATen/core/op_registration/op_allowlist.h
++++ b/aten/src/ATen/core/op_registration/op_allowlist.h
+@@ -59,7 +59,7 @@ constexpr bool op_allowlist_contains(string_view allowlist, string_view item) {
// Returns true iff the given op name is on the allowlist
// and should be registered
- constexpr bool op_whitelist_check(string_view op_name) {
+ constexpr bool op_allowlist_check(string_view op_name) {
- assert(op_name.find("::") != string_view::npos);
+// assert(op_name.find("::") != string_view::npos);
#if !defined(TORCH_OPERATOR_WHITELIST)
diff --git a/fix_c10.patch b/fix_c10.patch
new file mode 100644
index 000000000000..fd03d6cb59a2
--- /dev/null
+++ b/fix_c10.patch
@@ -0,0 +1,12 @@
+diff --color -aur pytorch-1.9.0-old/c10/core/TensorImpl.h pytorch-1.9.0-new/c10/core/TensorImpl.h
+--- pytorch-1.9.0-old/c10/core/TensorImpl.h 2021-08-17 19:33:40.324974399 +0300
++++ pytorch-1.9.0-new/c10/core/TensorImpl.h 2021-08-18 01:25:00.005901707 +0300
+@@ -2177,7 +2177,7 @@
+ // DispatchKeySet
+ //
+ static_assert(
+- sizeof(void*) != sizeof(int64_t) || // if 64-bit...
++ sizeof(void*) <= sizeof(int64_t) || // if 64-bit...
+ sizeof(TensorImpl) == sizeof(int64_t) * 23,
+ "You changed the size of TensorImpl on 64-bit arch."
+ "See Note [TensorImpl size constraints] on how to proceed.");
diff --git a/nccl_version.patch b/nccl_version.patch
deleted file mode 100644
index c0cd0005c215..000000000000
--- a/nccl_version.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-diff --git a/cmake/Modules/FindNCCL.cmake b/cmake/Modules/FindNCCL.cmake
-index a16c9aca67..51eb54f14c 100644
---- a/cmake/Modules/FindNCCL.cmake
-+++ b/cmake/Modules/FindNCCL.cmake
-@@ -55,9 +55,10 @@ if(NCCL_FOUND) # obtaining NCCL version and some sanity checks
- set (OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
- list (APPEND CMAKE_REQUIRED_INCLUDES ${NCCL_INCLUDE_DIRS})
- include(CheckCXXSymbolExists)
-- check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED)
-+ set(NCCL_VERSION_CODE $ENV{NCCL_VER_CODE})
-+ set(NCCL_VERSION_DEFINED $ENV{NCCL_VER_CODE})
-
-- if (NCCL_VERSION_DEFINED)
-+ if (DEFINED NCCL_VERSION_DEFINED)
- set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc")
- file(WRITE ${file} "
- #include <iostream>
-@@ -72,6 +73,7 @@ if(NCCL_FOUND) # obtaining NCCL version and some sanity checks
- }
- ")
- try_run(NCCL_VERSION_MATCHED compile_result ${PROJECT_BINARY_DIR} ${file}
-+ CMAKE_FLAGS -DINCLUDE_DIRECTORIES=/opt/cuda/include
- RUN_OUTPUT_VARIABLE NCCL_VERSION_FROM_HEADER
- CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${NCCL_INCLUDE_DIRS}"
- LINK_LIBRARIES ${NCCL_LIBRARIES})
diff --git a/xnnpack-gcc11.patch b/xnnpack-gcc11.patch
new file mode 100644
index 000000000000..f1e1ea5344ba
--- /dev/null
+++ b/xnnpack-gcc11.patch
@@ -0,0 +1,33 @@
+From 042cdaf1c24c675fca5e79eb4d2665839d7df2c2 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Nenad=20Miks=CC=8Ca?= <nenad.miksa@microblink.com>
+Date: Mon, 3 May 2021 13:28:59 +0200
+Subject: [PATCH] GCC 11 no longer needs this polyfill
+
+---
+ src/xnnpack/intrinsics-polyfill.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/src/xnnpack/intrinsics-polyfill.h b/src/xnnpack/intrinsics-polyfill.h
+index 3f198d88..32d7d4f3 100644
+--- a/src/xnnpack/intrinsics-polyfill.h
++++ b/src/xnnpack/intrinsics-polyfill.h
+@@ -11,8 +11,8 @@
+ #if defined(__SSE2__)
+ #include <emmintrin.h>
+
+-// GCC any, Clang pre-8, Android NDK Clang pre-8.0.7, Apple Clang pre-11, and ICC pre-16
+-#if (defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)) || \
++// GCC pre-11, Clang pre-8, Android NDK Clang pre-8.0.7, Apple Clang pre-11, and ICC pre-16
++#if (defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && __GNUC__ < 11) || \
+ (defined(__clang__) && !defined(__apple_build_version__) && (__clang_major__ < 8)) || \
+ (defined(__clang__) && defined(__ANDROID__) && (__clang_major__ == 8) && (__clang_minor__ == 0) && (__clang_patchlevel__ < 7)) || \
+ (defined(__clang__) && defined(__apple_build_version__) && (__apple_build_version__ < 11000000)) || \
+@@ -27,7 +27,7 @@ static XNN_INTRINSIC
+ void _mm_storeu_si32(const void* address, __m128i v) {
+ *((int*) address) = _mm_cvtsi128_si32(v);
+ }
+-#endif // GCC any, Clang pre-8, Android NDK Clang pre-8.0.7, Apple Clang pre-11, and ICC pre-16
++#endif // GCC pre-11, Clang pre-8, Android NDK Clang pre-8.0.7, Apple Clang pre-11, and ICC pre-16
+ #endif // SSE2
+
+ #ifdef __AVX512F__