updpkg 1.9.0

author: acxz 2021-09-07 13:05:04 -0400
committer: acxz 2021-09-07 13:05:04 -0400
commit: dae9d3d2bfd6c5c7673952fac22a9b1760553e8c (patch)
tree: ff77d8ecc6b369f0466318b49b66de1f1867a02e
parent: 4ed74ae4f556b6c54c9a1f1dc0a87b2015867f77 (diff)
download: aur-dae9d3d2bfd6c5c7673952fac22a9b1760553e8c.tar.gz
7 files changed, 125 insertions, 42 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 667cd1ca18e9..5dfbbf426a33 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,6 +1,6 @@
 pkgbase = python-pytorch-rocm
 	pkgdesc = Tensors and Dynamic neural networks in Python with strong GPU acceleration
-	pkgver = 1.8.1
+	pkgver = 1.9.0
 	pkgrel = 1
 	url = https://pytorch.org
 	arch = x86_64
@@ -34,7 +34,7 @@ pkgbase = python-pytorch-rocm
 	depends = onednn
 	depends = intel-mkl
 	depends = python-typing_extensions
-	source = pytorch-1.8.1::git+https://github.com/pytorch/pytorch.git#tag=v1.8.1
+	source = pytorch-1.9.0::git+https://github.com/pytorch/pytorch.git#tag=v1.9.0
 	source = python-pytorch-rocm-ios-cmake::git+https://github.com/Yangqing/ios-cmake.git
 	source = python-pytorch-rocm-pthreadpool::git+https://github.com/Maratyszcza/pthreadpool.git
 	source = python-pytorch-rocm-FP16::git+https://github.com/Maratyszcza/FP16.git
@@ -72,6 +72,10 @@ pkgbase = python-pytorch-rocm
 	source = fix_include_system.patch
 	source = use-system-libuv.patch
 	source = fix-building-for-torchvision.patch
+	source = benchmark-gcc11.patch
+	source = xnnpack-gcc11.patch
+	source = https://github.com/pytorch/pytorch/commit/c74c0c571880df886474be297c556562e95c00e0.patch
+	source = fix_c10.patch
 	source = disable_non_x86_64.patch
 	sha256sums = SKIP
 	sha256sums = SKIP
@@ -110,7 +114,11 @@ pkgbase = python-pytorch-rocm
 	sha256sums = SKIP
 	sha256sums = 557761502bbd994d9795bef46779e4b8c60ba0b45e7d60841f477d3b7f28a00a
 	sha256sums = cd9ac4aaa9f946ac5eafc57cf66c5c16b3ea7ac8af32c2558fad0705411bb669
-	sha256sums = f4959cde995382c55ba28c8496321b0bb0a5c0f3f46abcce2e88521004993846
+	sha256sums = 689c76e89bcf403df1b4cf7ca784381967b6a6527ed6eb6d0ad6681cf789b738
+	sha256sums = 278fecdb45df065343f51688cc7a1665153b5189f3341a741d546b0b518eac40
+	sha256sums = 64833e96e47a22f88336381f25fcd73127208dc79e2074398295d88c4596c06a
+	sha256sums = ffb13bcd0186f3443a5b576b9fa32791a2ce915222df1d9609bcb0ef789ddd3b
+	sha256sums = ba801238afcfc58a35410e54d4ca6a638c447865c0c6b38ed16917fd6d507954
 	sha256sums = d3ef8491718ed7e814fe63e81df2f49862fffbea891d2babbcb464796a1bd680
 
 pkgname = python-pytorch-rocm
diff --git a/PKGBUILD b/PKGBUILD
index d60d5f0fa5b5..b0e1bdd449e7 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -13,8 +13,8 @@ pkgname=()
 [ "$_build_opt" -eq 1 ]    && pkgname+=("python-pytorch-opt-rocm")
 
 _pkgname="pytorch"
-pkgver=1.8.1
-_pkgver=1.8.1
+pkgver=1.9.0
+_pkgver=1.9.0
 pkgrel=1
 pkgdesc="Tensors and Dynamic neural networks in Python with strong GPU acceleration"
 arch=('x86_64')
@@ -64,6 +64,10 @@ source=("${_pkgname}-${pkgver}::git+https://github.com/pytorch/pytorch.git#tag=v
         fix_include_system.patch
         use-system-libuv.patch
         fix-building-for-torchvision.patch
+        benchmark-gcc11.patch
+        xnnpack-gcc11.patch
+        https://github.com/pytorch/pytorch/commit/c74c0c571880df886474be297c556562e95c00e0.patch
+        fix_c10.patch
         disable_non_x86_64.patch)
 sha256sums=('SKIP'
             'SKIP'
@@ -102,7 +106,11 @@ sha256sums=('SKIP'
             'SKIP'
             '557761502bbd994d9795bef46779e4b8c60ba0b45e7d60841f477d3b7f28a00a'
             'cd9ac4aaa9f946ac5eafc57cf66c5c16b3ea7ac8af32c2558fad0705411bb669'
-            'f4959cde995382c55ba28c8496321b0bb0a5c0f3f46abcce2e88521004993846'
+            '689c76e89bcf403df1b4cf7ca784381967b6a6527ed6eb6d0ad6681cf789b738'
+            '278fecdb45df065343f51688cc7a1665153b5189f3341a741d546b0b518eac40'
+            '64833e96e47a22f88336381f25fcd73127208dc79e2074398295d88c4596c06a'
+            'ffb13bcd0186f3443a5b576b9fa32791a2ce915222df1d9609bcb0ef789ddd3b'
+            'ba801238afcfc58a35410e54d4ca6a638c447865c0c6b38ed16917fd6d507954'
             'd3ef8491718ed7e814fe63e81df2f49862fffbea891d2babbcb464796a1bd680')
 
 prepare() {
@@ -157,8 +165,19 @@ prepare() {
   # fix https://github.com/pytorch/vision/issues/3695
   patch -Np1 -i "${srcdir}/fix-building-for-torchvision.patch"
 
+  # GCC 11 fixes
+  patch -Np1 -d third_party/benchmark <../benchmark-gcc11.patch
+  patch -Np1 -d third_party/XNNPACK <../xnnpack-gcc11.patch
+
+  # cuda 11.4 fix
+  patch -Np1 <../c74c0c571880df886474be297c556562e95c00e0.patch
+  # cuda 11.4.1 fix
+  patch -Np1 -i "${srcdir}/fix_c10.patch"
+
   # remove local nccl
   rm -rf third_party/nccl/nccl
+  # also remove path from nccl module, so it's not checked
+  sed -e '/path = third_party\/nccl\/nccl/d' -i ./.gitmodules
 
   # fix build with google-glog 0.5 https://github.com/pytorch/pytorch/issues/58054
   sed -e '/namespace glog_internal_namespace_/d' -e 's|::glog_internal_namespace_||' -i c10/util/Logging.cpp
@@ -173,24 +192,30 @@ prepare() {
   export PYTORCH_BUILD_NUMBER=1
 
   # Check tools/setup_helpers/cmake.py, setup.py and CMakeLists.txt for a list of flags that can be set via env vars.
+  export ATEN_NO_TEST=ON
   export USE_MKLDNN=ON
-  export BUILD_CUSTOM_PROTOBUF=ON
+  export BUILD_CUSTOM_PROTOBUF=OFF
   # export BUILD_SHARED_LIBS=OFF
   export USE_FFMPEG=ON
   export USE_GFLAGS=ON
   export USE_GLOG=ON
   export BUILD_BINARY=ON
   export USE_OPENCV=ON
+  # export USE_SYSTEM_LIBS=ON  # experimental, not all libs present in repos
   export USE_SYSTEM_NCCL=ON
-  # export USE_SYSTEM_LIBS=ON
   export NCCL_VERSION=$(pkg-config nccl --modversion)
   export NCCL_VER_CODE=$(sed -n 's/^#define NCCL_VERSION_CODE\s*\(.*\).*/\1/p' /usr/include/nccl.h)
-  export CUDAHOSTCXX=g++
+  # export BUILD_SPLIT_CUDA=ON  # modern preferred build, but splits libs and symbols, ABI break
+  # export USE_FAST_NVCC=ON  # parallel build with nvcc, spawns too many processes
+  export USE_CUPTI_SO=ON  # make sure cupti.so is used as shared lib
+  export CUDAHOSTCXX=/usr/bin/g++
+  export CUDA_HOST_COMPILER="${CUDAHOSTCXX}"
   export CUDA_HOME=/opt/cuda
   export CUDNN_LIB_DIR=/usr/lib
   export CUDNN_INCLUDE_DIR=/usr/include
-  # export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
-  export TORCH_CUDA_ARCH_LIST="5.2;5.3;6.0;6.1;6.2;7.0;7.0+PTX;7.2;7.2+PTX;7.5;7.5+PTX;8.0;8.0+PTX;8.6;8.6+PTX"
+  export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
+  export TORCH_CUDA_ARCH_LIST="5.2;6.0;6.2;7.0;7.2;7.5;8.0;8.6;8.6+PTX"  #include latest PTX for future compat
+  export OVERRIDE_TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
 }
 
 build() {
diff --git a/benchmark-gcc11.patch b/benchmark-gcc11.patch
new file mode 100644
index 000000000000..4aa3e66de449
--- /dev/null
+++ b/benchmark-gcc11.patch
@@ -0,0 +1,30 @@
+From 3d1c2677686718d906f28c1d4da001c42666e6d2 Mon Sep 17 00:00:00 2001
+From: Sergei Trofimovich <slyfox@inbox.ru>
+Date: Thu, 15 Oct 2020 09:12:40 +0100
+Subject: [PATCH] src/benchmark_register.h: add missing <limits> inclusion
+ (#1060)
+
+Noticed missing header when was building llvm with gcc-11:
+
+```
+llvm-project/llvm/utils/benchmark/src/benchmark_register.h:17:30:
+  error: 'numeric_limits' is not a member of 'std'
+   17 |   static const T kmax = std::numeric_limits<T>::max();
+      |                              ^~~~~~~~~~~~~~
+```
+---
+ src/benchmark_register.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/benchmark_register.h b/src/benchmark_register.h
+index 61377d742..204bf1d9f 100644
+--- a/src/benchmark_register.h
++++ b/src/benchmark_register.h
+@@ -1,6 +1,7 @@
+ #ifndef BENCHMARK_REGISTER_H
+ #define BENCHMARK_REGISTER_H
+ 
++#include <limits>
+ #include <vector>
+ 
+ #include "check.h"
diff --git a/fix-building-for-torchvision.patch b/fix-building-for-torchvision.patch
index b62ecb52e8d8..9168d03a8fd1 100644
--- a/fix-building-for-torchvision.patch
+++ b/fix-building-for-torchvision.patch
@@ -4,17 +4,17 @@ Date: Fri, 30 Apr 2021 11:36:30 +0800
 Subject: [PATCH] fix building torchvision
 
 ---
- aten/src/ATen/core/op_registration/op_whitelist.h | 2 +-
+ aten/src/ATen/core/op_registration/op_allowlist.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
-diff --git a/aten/src/ATen/core/op_registration/op_whitelist.h b/aten/src/ATen/core/op_registration/op_whitelist.h
+diff --git a/aten/src/ATen/core/op_registration/op_allowlist.h b/aten/src/ATen/core/op_registration/op_allowlist.h
 index f93462bb2cf..12903d1cc09 100644
---- a/aten/src/ATen/core/op_registration/op_whitelist.h
-+++ b/aten/src/ATen/core/op_registration/op_whitelist.h
-@@ -59,7 +59,7 @@ constexpr bool op_whitelist_contains(string_view allowlist, string_view item) {
+--- a/aten/src/ATen/core/op_registration/op_allowlist.h
++++ b/aten/src/ATen/core/op_registration/op_allowlist.h
+@@ -59,7 +59,7 @@ constexpr bool op_allowlist_contains(string_view allowlist, string_view item) {
  // Returns true iff the given op name is on the allowlist
  // and should be registered
- constexpr bool op_whitelist_check(string_view op_name) {
+ constexpr bool op_allowlist_check(string_view op_name) {
 -  assert(op_name.find("::") != string_view::npos);
 +//  assert(op_name.find("::") != string_view::npos);
  #if !defined(TORCH_OPERATOR_WHITELIST)
diff --git a/fix_c10.patch b/fix_c10.patch
new file mode 100644
index 000000000000..fd03d6cb59a2
--- /dev/null
+++ b/fix_c10.patch
@@ -0,0 +1,12 @@
+diff --color -aur pytorch-1.9.0-old/c10/core/TensorImpl.h pytorch-1.9.0-new/c10/core/TensorImpl.h
+--- pytorch-1.9.0-old/c10/core/TensorImpl.h	2021-08-17 19:33:40.324974399 +0300
++++ pytorch-1.9.0-new/c10/core/TensorImpl.h	2021-08-18 01:25:00.005901707 +0300
+@@ -2177,7 +2177,7 @@
+ //    DispatchKeySet
+ //
+ static_assert(
+-    sizeof(void*) != sizeof(int64_t) || // if 64-bit...
++    sizeof(void*) <= sizeof(int64_t) || // if 64-bit...
+         sizeof(TensorImpl) == sizeof(int64_t) * 23,
+     "You changed the size of TensorImpl on 64-bit arch."
+     "See Note [TensorImpl size constraints] on how to proceed.");
diff --git a/nccl_version.patch b/nccl_version.patch
deleted file mode 100644
index c0cd0005c215..000000000000
--- a/nccl_version.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-diff --git a/cmake/Modules/FindNCCL.cmake b/cmake/Modules/FindNCCL.cmake
-index a16c9aca67..51eb54f14c 100644
---- a/cmake/Modules/FindNCCL.cmake
-+++ b/cmake/Modules/FindNCCL.cmake
-@@ -55,9 +55,10 @@ if(NCCL_FOUND)  # obtaining NCCL version and some sanity checks
-   set (OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
-   list (APPEND CMAKE_REQUIRED_INCLUDES ${NCCL_INCLUDE_DIRS})
-   include(CheckCXXSymbolExists)
--  check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED)
-+  set(NCCL_VERSION_CODE $ENV{NCCL_VER_CODE})
-+  set(NCCL_VERSION_DEFINED $ENV{NCCL_VER_CODE})
- 
--  if (NCCL_VERSION_DEFINED)
-+  if (DEFINED NCCL_VERSION_DEFINED)
-     set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc")
-     file(WRITE ${file} "
-       #include <iostream>
-@@ -72,6 +73,7 @@ if(NCCL_FOUND)  # obtaining NCCL version and some sanity checks
-       }
- ")
-     try_run(NCCL_VERSION_MATCHED compile_result ${PROJECT_BINARY_DIR} ${file}
-+          CMAKE_FLAGS -DINCLUDE_DIRECTORIES=/opt/cuda/include
-           RUN_OUTPUT_VARIABLE NCCL_VERSION_FROM_HEADER
-           CMAKE_FLAGS  "-DINCLUDE_DIRECTORIES=${NCCL_INCLUDE_DIRS}"
-           LINK_LIBRARIES ${NCCL_LIBRARIES})
diff --git a/xnnpack-gcc11.patch b/xnnpack-gcc11.patch
new file mode 100644
index 000000000000..f1e1ea5344ba
--- /dev/null
+++ b/xnnpack-gcc11.patch
@@ -0,0 +1,33 @@
+From 042cdaf1c24c675fca5e79eb4d2665839d7df2c2 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Nenad=20Miks=CC=8Ca?= <nenad.miksa@microblink.com>
+Date: Mon, 3 May 2021 13:28:59 +0200
+Subject: [PATCH] GCC 11 no longer needs this polyfill
+
+---
+ src/xnnpack/intrinsics-polyfill.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/src/xnnpack/intrinsics-polyfill.h b/src/xnnpack/intrinsics-polyfill.h
+index 3f198d88..32d7d4f3 100644
+--- a/src/xnnpack/intrinsics-polyfill.h
++++ b/src/xnnpack/intrinsics-polyfill.h
+@@ -11,8 +11,8 @@
+ #if defined(__SSE2__)
+ #include <emmintrin.h>
+ 
+-// GCC any, Clang pre-8, Android NDK Clang pre-8.0.7, Apple Clang pre-11, and ICC pre-16
+-#if (defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)) || \
++// GCC pre-11, Clang pre-8, Android NDK Clang pre-8.0.7, Apple Clang pre-11, and ICC pre-16
++#if (defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && __GNUC__ < 11) || \
+     (defined(__clang__) && !defined(__apple_build_version__) && (__clang_major__ < 8)) || \
+     (defined(__clang__) && defined(__ANDROID__) && (__clang_major__ == 8) && (__clang_minor__ == 0) && (__clang_patchlevel__ < 7)) || \
+     (defined(__clang__) && defined(__apple_build_version__) && (__apple_build_version__ < 11000000)) || \
+@@ -27,7 +27,7 @@ static XNN_INTRINSIC
+ void _mm_storeu_si32(const void* address, __m128i v) {
+   *((int*) address) = _mm_cvtsi128_si32(v);
+ }
+-#endif  // GCC any, Clang pre-8, Android NDK Clang pre-8.0.7, Apple Clang pre-11, and ICC pre-16
++#endif  // GCC pre-11, Clang pre-8, Android NDK Clang pre-8.0.7, Apple Clang pre-11, and ICC pre-16
+ #endif  // SSE2
+ 
+ #ifdef __AVX512F__
author	acxz	2021-09-07 13:05:04 -0400
committer	acxz	2021-09-07 13:05:04 -0400
commit	dae9d3d2bfd6c5c7673952fac22a9b1760553e8c (patch)
tree	ff77d8ecc6b369f0466318b49b66de1f1867a02e
parent	4ed74ae4f556b6c54c9a1f1dc0a87b2015867f77 (diff)
download	aur-dae9d3d2bfd6c5c7673952fac22a9b1760553e8c.tar.gz