updpkg 2.4.0

author: acxz 2020-12-16 00:38:44 -0500
committer: acxz 2020-12-16 00:38:44 -0500
commit: 43d4b3d57a605f89713c2af7c07055f3216a2f2b (patch)
tree: 2f6ecc6c5a8163e722a7c27fd986bc33dcafa7a2
parent: fa97314d266ed527c4ca43ab2f4b9a2c7b7d317a (diff)
download: aur-43d4b3d57a605f89713c2af7c07055f3216a2f2b.tar.gz
5 files changed, 130 insertions, 866 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 92973a8994b3..64374efa83ac 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,7 +1,7 @@
 pkgbase = tensorflow-rocm
 	pkgdesc = Library for computation using data flow graphs for scalable machine learning
-	pkgver = 2.3.1
-	pkgrel = 3
+	pkgver = 2.4.0
+	pkgrel = 1
 	url = https://www.tensorflow.org/
 	arch = x86_64
 	license = APACHE
@@ -12,42 +12,48 @@ pkgbase = tensorflow-rocm
 	makedepends = miopen
 	makedepends = rccl
 	makedepends = git
-	makedepends = gcc9
 	makedepends = python-pip
 	makedepends = python-wheel
 	makedepends = python-setuptools
 	makedepends = python-h5py
 	makedepends = python-keras-applications
 	makedepends = python-keras-preprocessing
+	makedepends = cython
 	depends = c-ares
 	depends = intel-mkl
 	depends = onednn
+	depends = pybind11
+	depends = openssl-1.0
+	depends = lmdb
+	depends = libpng
+	depends = curl
+	depends = giflib
+	depends = icu
+	depends = libjpeg-turbo
 	optdepends = tensorboard: Tensorflow visualization toolkit
-	source = tensorflow-rocm-2.3.1.tar.gz::https://github.com/tensorflow/tensorflow/archive/v2.3.1.tar.gz
-	source = numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch
+	source = tensorflow-rocm-2.4.0.tar.gz::https://github.com/tensorflow/tensorflow/archive/v2.4.0.tar.gz
+	source = fix-h5py3.0.patch
 	source = build-against-actual-mkl.patch
-	source = fix_hip_hcc_path.patch::https://github.com/tensorflow/tensorflow/commit/6175b78d8386bd6e5b2beebedb9f40e6b887d5a9.patch
-	source = fix_hipcc_path.patch::https://github.com/tensorflow/tensorflow/commit/9d2b338025dc61828ccf8196bb042ab9c586c7b3.patch
-	source = fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch
-	source = fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch
-	source = fix_occupancy_block.patch
-	source = new-rocm.patch
-	sha512sums = e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba70c7f803f8ca06faf2c6b5396e60b1e0e9b97bfbd667e733b08b6e6d70ef0
-	sha512sums = df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc
+	sha512sums = 4860c148fd931c4dc7c558128e545e2b6384e590a3fbc266a5bfe842a8307f23f1f7e0103bda3a383e7c77edad2bb76dec02da8be400a40956072df19c5d4dbd
+	sha512sums = 9d7b71fed280ffaf4dfcd4889aa9ab5471874c153259f3e77ed6e6efa745e5c5aa8507d3d1f71dead5b6f4bea5f8b1c10c543929f37a6580c3f4a7cbec338a6a
 	sha512sums = e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08
-	sha512sums = 7acc2f2579158be1d8c824da0f6d44d084a56182f1aab3cd7a78d513931b3a16ce72f2e05b44b1de76f5519af39e80431660de294ff337842e4ee8949cb85b28
-	sha512sums = 136d91db88658dd0eab1543f8dec1cd20dca86afc6970606a722e7d01a645d64c42564d590fc1ecb04c204ae0b0fa8f78cf9998e9bcf367f4cc795fa59677591
-	sha512sums = 75972acf0ec53b28aa6c93de77a385acaf675c0d0ae93b6545f67414e9895cbd1074a5d65b211390846b736df271a567b49ec4c992883ad83c060f708bbe0d20
-	sha512sums = 42fc09bc15412f3b9a82f36485735faed0dcc2f47d72c5bfc451bc09a2aad472db59edb387455fb6594b1606de3a7789917e1fb31280c7044898097ec37db3d5
-	sha512sums = 88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126
-	sha512sums = 080fd9d4e1228ceb04901a0caceb18b965ef199704196a9b7711fcada3a8cfc2f65c529c4c0e05960ab1e469d203727bf0bbded82d895c13e0e2ab29ae524317
 
 pkgname = tensorflow-rocm
 	pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM)
 	depends = c-ares
 	depends = intel-mkl
 	depends = onednn
+	depends = pybind11
+	depends = openssl-1.0
+	depends = lmdb
+	depends = libpng
+	depends = curl
+	depends = giflib
+	depends = icu
+	depends = libjpeg-turbo
 	depends = rocm
+	depends = rocm-libs
+	depends = miopen
 	depends = rccl
 	provides = tensorflow
 	conflicts = tensorflow
@@ -57,12 +63,22 @@ pkgname = python-tensorflow-rocm
 	depends = c-ares
 	depends = intel-mkl
 	depends = onednn
+	depends = pybind11
+	depends = openssl-1.0
+	depends = lmdb
+	depends = libpng
+	depends = curl
+	depends = giflib
+	depends = icu
+	depends = libjpeg-turbo
 	depends = tensorflow-rocm
 	depends = python-termcolor
 	depends = python-astor
-	depends = python-gast
+	depends = python-gast03
 	depends = python-numpy
 	depends = rocm
+	depends = rocm-libs
+	depends = miopen
 	depends = python-protobuf
 	depends = absl-py
 	depends = rccl
@@ -72,31 +88,53 @@ pkgname = python-tensorflow-rocm
 	depends = python-tensorflow-estimator
 	depends = python-opt_einsum
 	depends = python-astunparse
+	depends = python-past
+	depends = python-flatbuffers
 	provides = python-tensorflow
 	conflicts = python-tensorflow
 
 pkgname = tensorflow-opt-rocm
-	pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM and CPU optimizations)
+	pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM and AVX2 CPU optimizations)
 	depends = c-ares
 	depends = intel-mkl
 	depends = onednn
+	depends = pybind11
+	depends = openssl-1.0
+	depends = lmdb
+	depends = libpng
+	depends = curl
+	depends = giflib
+	depends = icu
+	depends = libjpeg-turbo
 	depends = rocm
+	depends = rocm-libs
+	depends = miopen
 	depends = rccl
 	provides = tensorflow
 	provides = tensorflow-rocm
 	conflicts = tensorflow
 
 pkgname = python-tensorflow-opt-rocm
-	pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM and CPU optimizations)
+	pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM and AVX2 CPU optimizations)
 	depends = c-ares
 	depends = intel-mkl
 	depends = onednn
+	depends = pybind11
+	depends = openssl-1.0
+	depends = lmdb
+	depends = libpng
+	depends = curl
+	depends = giflib
+	depends = icu
+	depends = libjpeg-turbo
 	depends = tensorflow-opt-rocm
 	depends = python-termcolor
 	depends = python-astor
-	depends = python-gast
+	depends = python-gast03
 	depends = python-numpy
 	depends = rocm
+	depends = rocm-libs
+	depends = miopen
 	depends = python-protobuf
 	depends = absl-py
 	depends = rccl
@@ -106,6 +144,8 @@ pkgname = python-tensorflow-opt-rocm
 	depends = python-tensorflow-estimator
 	depends = python-opt_einsum
 	depends = python-astunparse
+	depends = python-past
+	depends = python-flatbuffers
 	provides = python-tensorflow
 	provides = python-tensorflow-rocm
 	conflicts = python-tensorflow
diff --git a/PKGBUILD b/PKGBUILD
index 9c64e056acaa..bf83faacdcf1 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -14,37 +14,26 @@ pkgname=()
 [ "$_build_no_opt" -eq 1 ] && pkgname+=(tensorflow-rocm python-tensorflow-rocm)
 [ "$_build_opt" -eq 1 ] && pkgname+=(tensorflow-opt-rocm python-tensorflow-opt-rocm)
 
-pkgver=2.3.1
-_pkgver=2.3.1
-pkgrel=3
+pkgver=2.4.0
+_pkgver=2.4.0
+pkgrel=1
 pkgdesc="Library for computation using data flow graphs for scalable machine learning"
 url="https://www.tensorflow.org/"
 license=('APACHE')
 arch=('x86_64')
-depends=('c-ares' 'intel-mkl' 'onednn')
-makedepends=('bazel' 'python-numpy' 'rocm' 'rocm-libs' 'miopen' 'rccl' 'git' 'gcc9'
+depends=('c-ares' 'intel-mkl' 'onednn' 'pybind11' 'openssl-1.0' 'lmdb' 'libpng' 'curl' 'giflib' 'icu' 'libjpeg-turbo')
+makedepends=('bazel' 'python-numpy' 'rocm' 'rocm-libs' 'miopen' 'rccl' 'git'
              'python-pip' 'python-wheel' 'python-setuptools' 'python-h5py'
-             'python-keras-applications' 'python-keras-preprocessing')
+             'python-keras-applications' 'python-keras-preprocessing'
+             'cython')
 optdepends=('tensorboard: Tensorflow visualization toolkit')
 source=("$pkgname-$pkgver.tar.gz::https://github.com/tensorflow/tensorflow/archive/v${_pkgver}.tar.gz"
-        numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch
-        build-against-actual-mkl.patch
-        fix_hip_hcc_path.patch::https://github.com/tensorflow/tensorflow/commit/6175b78d8386bd6e5b2beebedb9f40e6b887d5a9.patch
-        fix_hipcc_path.patch::https://github.com/tensorflow/tensorflow/commit/9d2b338025dc61828ccf8196bb042ab9c586c7b3.patch
-        fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch
-        fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch
-        fix_occupancy_block.patch
-        new-rocm.patch)
-
-sha512sums=('e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba70c7f803f8ca06faf2c6b5396e60b1e0e9b97bfbd667e733b08b6e6d70ef0'
-            'df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc'
-            'e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08'
-            '7acc2f2579158be1d8c824da0f6d44d084a56182f1aab3cd7a78d513931b3a16ce72f2e05b44b1de76f5519af39e80431660de294ff337842e4ee8949cb85b28'
-            '136d91db88658dd0eab1543f8dec1cd20dca86afc6970606a722e7d01a645d64c42564d590fc1ecb04c204ae0b0fa8f78cf9998e9bcf367f4cc795fa59677591'
-            '75972acf0ec53b28aa6c93de77a385acaf675c0d0ae93b6545f67414e9895cbd1074a5d65b211390846b736df271a567b49ec4c992883ad83c060f708bbe0d20'
-            '42fc09bc15412f3b9a82f36485735faed0dcc2f47d72c5bfc451bc09a2aad472db59edb387455fb6594b1606de3a7789917e1fb31280c7044898097ec37db3d5'
-            '88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126'
-            '080fd9d4e1228ceb04901a0caceb18b965ef199704196a9b7711fcada3a8cfc2f65c529c4c0e05960ab1e469d203727bf0bbded82d895c13e0e2ab29ae524317')
+        fix-h5py3.0.patch
+        build-against-actual-mkl.patch)
+
+sha512sums=('4860c148fd931c4dc7c558128e545e2b6384e590a3fbc266a5bfe842a8307f23f1f7e0103bda3a383e7c77edad2bb76dec02da8be400a40956072df19c5d4dbd'
+            '9d7b71fed280ffaf4dfcd4889aa9ab5471874c153259f3e77ed6e6efa745e5c5aa8507d3d1f71dead5b6f4bea5f8b1c10c543929f37a6580c3f4a7cbec338a6a'
+            'e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08')
 
 get_pyver () {
   python -c 'import sys; print(str(sys.version_info[0]) + "." + str(sys.version_info[1]))'
@@ -66,32 +55,17 @@ prepare() {
   # Tensorflow actually wants to build against a slimmed down version of Intel MKL called MKLML
   # See https://github.com/intel/mkl-dnn/issues/102
   # MKLML version that Tensorflow wants to use is https://github.com/intel/mkl-dnn/releases/tag/v0.21
-  patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/build-against-actual-mkl.patch
+  # patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/build-against-actual-mkl.patch
 
   # Compile with C++17 by default (FS#65953)
   #sed -i "s/c++14/c++17/g" tensorflow-${_pkgver}/.bazelrc
 
-  patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/numpy1.20.patch
-
-  # Fix hip_hcc path
-  patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_hip_hcc_path.patch
-
-  # Fix hip_hcc path
-  patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_hipcc_path.patch
-
-  # Fix GpuAtomic redefinition
-  patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_gpu_atomic_redef.patch
+  # FS#68488
+  patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix-h5py3.0.patch
 
-  # Fix ldexp float method
-  patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_ldexp_float.patch
-
-  # Fix missing hipOccupancyMaxPotentialBlockSize method
-  # https://github.com/tensorflow/tensorflow/commit/22def20bae7be6d5b790b360abed5919385b16c2
-  patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_occupancy_block.patch
-
-  # Patch for ROCm 3.7 and later
-  # https://github.com/tensorflow/tensorflow/pull/42689
-  patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/new-rocm.patch
+  # Get rid of hardcoded versions. Not like we ever cared about what upstream
+  # thinks about which versions should be used anyway. ;) (FS#68772)
+  sed -i -E "s/'([0-9a-z_-]+) .= [0-9].+[0-9]'/'\1'/" tensorflow-${_pkgver}/tensorflow/tools/pip_package/setup.py
 
   cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-rocm
   cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-opt-rocm
@@ -104,12 +78,12 @@ build() {
   export PYTHON_BIN_PATH=/usr/bin/python
   export USE_DEFAULT_PYTHON_LIB_PATH=1
   export TF_NEED_JEMALLOC=1
-  export TF_NEED_KAFKA=0
+  export TF_NEED_KAFKA=1
   export TF_NEED_OPENCL_SYCL=0
-  export TF_NEED_AWS=0
-  export TF_NEED_GCP=0
-  export TF_NEED_HDFS=0
-  export TF_NEED_S3=0
+  export TF_NEED_AWS=1
+  export TF_NEED_GCP=1
+  export TF_NEED_HDFS=1
+  export TF_NEED_S3=1
   export TF_ENABLE_XLA=1
   export TF_NEED_GDR=0
   export TF_NEED_VERBS=0
@@ -119,25 +93,33 @@ build() {
   export TF_NEED_NGRAPH=0
   export TF_NEED_IGNITE=0
   export TF_NEED_ROCM=1
+  # See https://github.com/tensorflow/tensorflow/blob/master/third_party/systemlibs/syslibs_configure.bzl
+  export TF_SYSTEM_LIBS="boringssl,curl,cython,gif,icu,libjpeg_turbo,lmdb,nasm,pcre,png,pybind11,zlib"
   export TF_SET_ANDROID_WORKSPACE=0
   export TF_DOWNLOAD_CLANG=0
   export TF_NCCL_VERSION=2.7
   export TF_IGNORE_MAX_BAZEL_VERSION=1
   export TF_MKL_ROOT=/opt/intel/mkl
   export NCCL_INSTALL_PATH=/usr
-  export GCC_HOST_COMPILER_PATH=/usr/bin/gcc-9
-  export HOST_C_COMPILER=/usr/bin/gcc-9
-  export HOST_CXX_COMPILER=/usr/bin/g++-9
+  export GCC_HOST_COMPILER_PATH=/usr/bin/gcc
+  export HOST_C_COMPILER=/usr/bin/gcc
+  export HOST_CXX_COMPILER=/usr/bin/g++
   export TF_CUDA_CLANG=0  # Clang currently disabled because it's not compatible at the moment.
   export CLANG_CUDA_COMPILER_PATH=/usr/bin/clang
   export TF_CUDA_PATHS=/opt/cuda,/usr/lib,/usr
   export TF_CUDA_VERSION=$(/opt/cuda/bin/nvcc --version | sed -n 's/^.*release \(.*\),.*/\1/p')
   export TF_CUDNN_VERSION=$(sed -n 's/^#define CUDNN_MAJOR\s*\(.*\).*/\1/p' /usr/include/cudnn_version.h)
-  export TF_CUDA_COMPUTE_CAPABILITIES=5.2,5.3,6.0,6.1,6.2,7.0,7.2,7.5,8.0
+  export TF_CUDA_COMPUTE_CAPABILITIES=5.2,5.3,6.0,6.1,6.2,7.0,7.2,7.5,8.0,8.6
 
   # Required until https://github.com/tensorflow/tensorflow/issues/39467 is fixed.
-  export CC=gcc-9
-  export CXX=g++-9
+  export CC=gcc
+  export CXX=g++
+
+  export BAZEL_ARGS="--config=mkl -c opt --copt=-I/usr/include/openssl-1.0 --host_copt=-I/usr/include/openssl-1.0 --linkopt=-l:libssl.so.1.0.0 --linkopt=-l:libcrypto.so.1.0.0 --host_linkopt=-l:libssl.so.1.0.0 --host_linkopt=-l:libcrypto.so.1.0.0"
+
+  # Workaround for gcc 10+ warnings related to upb.
+  # See https://github.com/tensorflow/tensorflow/issues/39467
+  export BAZEL_ARGS="$BAZEL_ARGS --host_copt=-Wno-stringop-truncation"
 
   if [ "$_build_no_opt" -eq 1 ]; then
     echo "Building with rocm and without non-x86-64 optimizations"
@@ -147,7 +129,8 @@ build() {
     export TF_NEED_ROCM=1
     ./configure
     bazel \
-      build --config=mkl -c opt \
+      build \
+        ${BAZEL_ARGS[@]} \
         //tensorflow:libtensorflow.so \
         //tensorflow:libtensorflow_cc.so \
         //tensorflow:install_headers \
@@ -162,9 +145,11 @@ build() {
     export CC_OPT_FLAGS="-march=haswell -O3"
     export TF_NEED_CUDA=0
     export TF_NEED_ROCM=1
+    export TF_CUDA_CLANG=0
     ./configure
     bazel \
-      build --config=mkl --config=avx2_linux -c opt \
+      build --config=avx2_linux \
+        ${BAZEL_ARGS[@]} \
         //tensorflow:libtensorflow.so \
         //tensorflow:libtensorflow_cc.so \
         //tensorflow:install_headers \
@@ -232,7 +217,7 @@ _python_package() {
 
 package_tensorflow-rocm() {
   pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM)"
-  depends+=(rocm rccl)
+  depends+=(rocm rocm-libs miopen rccl)
   conflicts=(tensorflow)
   provides=(tensorflow)
 
@@ -241,8 +226,8 @@ package_tensorflow-rocm() {
 }
 
 package_tensorflow-opt-rocm() {
-  pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM and CPU optimizations)"
-  depends+=(rocm rccl)
+  pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM and AVX2 CPU optimizations)"
+  depends+=(rocm rocm-libs miopen rccl)
   conflicts=(tensorflow)
   provides=(tensorflow tensorflow-rocm)
 
@@ -252,7 +237,7 @@ package_tensorflow-opt-rocm() {
 
 package_python-tensorflow-rocm() {
   pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM)"
-  depends+=(tensorflow-rocm python-termcolor python-astor python-gast python-numpy rocm python-protobuf absl-py rccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse)
+  depends+=(tensorflow-rocm python-termcolor python-astor python-gast03 python-numpy rocm rocm-libs miopen python-protobuf absl-py rccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse python-past python-flatbuffers)
   conflicts=(python-tensorflow)
   provides=(python-tensorflow)
 
@@ -261,8 +246,8 @@ package_python-tensorflow-rocm() {
 }
 
 package_python-tensorflow-opt-rocm() {
-  pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM and CPU optimizations)"
-  depends+=(tensorflow-opt-rocm python-termcolor python-astor python-gast python-numpy rocm python-protobuf absl-py rccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse)
+  pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM and AVX2 CPU optimizations)"
+  depends+=(tensorflow-opt-rocm python-termcolor python-astor python-gast03 python-numpy rocm rocm-libs miopen python-protobuf absl-py rccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse python-past python-flatbuffers)
   conflicts=(python-tensorflow)
   provides=(python-tensorflow python-tensorflow-rocm)
 
diff --git a/fix-h5py3.0.patch b/fix-h5py3.0.patch
new file mode 100644
index 000000000000..18e55a5297a4
--- /dev/null
+++ b/fix-h5py3.0.patch
@@ -0,0 +1,18 @@
+diff --git a/tensorflow/python/keras/saving/hdf5_format.py b/tensorflow/python/keras/saving/hdf5_format.py
+index d3bb10c98d..e89f5356bb 100644
+--- a/tensorflow/python/keras/saving/hdf5_format.py
++++ b/tensorflow/python/keras/saving/hdf5_format.py
+@@ -659,11 +659,11 @@ def load_weights_from_hdf5_group(f, layers):
+           and weights file.
+   """
+   if 'keras_version' in f.attrs:
+-    original_keras_version = f.attrs['keras_version'].decode('utf8')
++    original_keras_version = f.attrs['keras_version']
+   else:
+     original_keras_version = '1'
+   if 'backend' in f.attrs:
+-    original_backend = f.attrs['backend'].decode('utf8')
++    original_backend = f.attrs['backend']
+   else:
+     original_backend = None
+ 
diff --git a/fix_occupancy_block.patch b/fix_occupancy_block.patch
deleted file mode 100644
index 137b4e56ea55..000000000000
--- a/fix_occupancy_block.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From 22def20bae7be6d5b790b360abed5919385b16c2 Mon Sep 17 00:00:00 2001
-From: Christian Sigg <csigg@google.com>
-Date: Mon, 29 Jun 2020 04:23:28 -0700
-Subject: [PATCH] New ROCm 3.5 RBE docker based on Ubuntu 18.04, re-enable RBE.
-
-Fix list of cxx_builtin_include_directories. Only a few are needed, but those are more complicated (mix of symlinked and real paths).
-
-Properly return error from crosstool wrapper.
-
-PiperOrigin-RevId: 318788040
-Change-Id: Ia66898e98a9a4d8fb479c7e75317f4114f6081e5
----
- .bazelrc                                      | 17 ++++
- tensorflow/core/util/gpu_launch_config.h      | 40 ++-------
- ....local-toolchain-ubuntu18.04-manylinux2010 | 34 ++++++++
- .../ci_build/Dockerfile.rbe.rocm-ubuntu16.04  | 37 ---------
- ...rocm-ubuntu18.04-manylinux2010-multipython | 79 ++++++++++++++++++
- .../bin/crosstool_wrapper_driver_rocm.tpl     | 19 ++++-
- third_party/gpus/rocm_configure.bzl           | 83 +++----------------
- .../preconfig/generate/containers.bzl         |  2 +-
- .../toolchains/remote_config/configs.bzl      | 12 +--
- .../toolchains/remote_config/containers.bzl   | 10 ++-
- 10 files changed, 184 insertions(+), 149 deletions(-)
- create mode 100644 tensorflow/tools/ci_build/Dockerfile.local-toolchain-ubuntu18.04-manylinux2010
- delete mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04
- create mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython
-
-diff --git a/tensorflow/core/util/gpu_launch_config.h b/tensorflow/core/util/gpu_launch_config.h
-index 4dfaf333d4bf0..0b943e917da01 100644
---- a/tensorflow/core/util/gpu_launch_config.h
-+++ b/tensorflow/core/util/gpu_launch_config.h
-@@ -168,18 +168,10 @@ GpuLaunchConfig GetGpuLaunchConfig(int work_element_count,
-       block_size_limit);
-   CHECK_EQ(err, cudaSuccess);
- #elif TENSORFLOW_USE_ROCM
--  // Earlier versions of this HIP routine incorrectly returned void.
--  // TODO re-enable hipError_t error checking when HIP is fixed.
--  // ROCm interface uses unsigned int, convert after checking
--  uint32_t block_count_uint = 0;
--  uint32_t thread_per_block_uint = 0;
--  CHECK_GE(block_size_limit, 0);
--  uint32_t block_size_limit_uint = static_cast<uint32_t>(block_size_limit);
--  hipOccupancyMaxPotentialBlockSize(&block_count_uint, &thread_per_block_uint,
--                                    func, dynamic_shared_memory_size,
--                                    block_size_limit_uint);
--  block_count = static_cast<int>(block_count_uint);
--  thread_per_block = static_cast<int>(thread_per_block_uint);
-+  hipError_t err = hipOccupancyMaxPotentialBlockSize(
-+      &block_count, &thread_per_block, func, dynamic_shared_memory_size,
-+      block_size_limit);
-+  CHECK_EQ(err, hipSuccess);
- #endif
-
-   block_count =
-@@ -208,27 +200,13 @@ GpuLaunchConfig GetGpuLaunchConfigFixedBlockSize(
-   cudaError_t err = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
-       &block_count, func, fixed_block_size, dynamic_shared_memory_size);
-   CHECK_EQ(err, cudaSuccess);
--  block_count = std::min(block_count * d.getNumGpuMultiProcessors(),
--                         DivUp(work_element_count, fixed_block_size));
- #elif TENSORFLOW_USE_ROCM
--  // ROCM TODO re-enable this after hipOccupancyMaxActiveBlocksPerMultiprocessor
--  // is implemented
--  // hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor(
--  //    &block_count, &thread_per_block, func, dynamic_shared_memory_size,
--  //    block_size_limit);
--  // CHECK_EQ(err, hipSuccess);
--
--  // Apply the heuristic in GetGpuLaunchConfig(int, const Eigen::GpuDevice&)
--  // that the kernel is quite simple and will largely be memory-limited.
--  const int physical_thread_count = std::min(
--      d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor(),
--      work_element_count);
--  // Assume the kernel be simple enough that it is okay to use 1024 threads
--  // per workgroup.
--  int thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock());
--  block_count = std::min(DivUp(physical_thread_count, thread_per_block),
--                         d.getNumGpuMultiProcessors());
-+  hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor(
-+      &block_count, func, fixed_block_size, dynamic_shared_memory_size);
-+  CHECK_EQ(err, hipSuccess);
- #endif
-+  block_count = std::min(block_count * d.getNumGpuMultiProcessors(),
-+                         DivUp(work_element_count, fixed_block_size));
-
-   config.virtual_thread_count = work_element_count;
-   config.thread_per_block = fixed_block_size;
diff --git a/new-rocm.patch b/new-rocm.patch
deleted file mode 100644
index 01eb2b4fab8c..000000000000
--- a/new-rocm.patch
+++ /dev/null
@@ -1,692 +0,0 @@
-From fcc2de09eb38f45b678a5457f594ca594f2572c9 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Thu, 16 Jul 2020 19:38:03 +0000
-Subject: [PATCH 1/8] Change references to libhip_hcc.so to refer to
- libamdhip64.so instead
-
-With the switch to the new hipclang-vdi runtime (in ROCm 3.5), the new name for the HIP runtime library is libamdhip64.so.
-
-For backwards compatibility, ROCm 3.5 and ROCm 3.6 include a "libhip_hcc.so" softlink, which points to libamdhip64.so. That softlink will be going away starting with ROCm 3.7(?).
-
-This commit updates references to libhip_hcc.so (in the TF build) to use libamdhip64.so instead.
-
-See following JIRA tickets for further details:
-
-* http://ontrack-internal.amd.com/browse/SWDEV-244762
-* http://ontrack-internal.amd.com/browse/SWDEV-238533
----
- tensorflow/stream_executor/platform/default/dso_loader.cc | 2 +-
- .../crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl | 7 -------
- third_party/gpus/rocm_configure.bzl                       | 8 +++-----
- 3 files changed, 4 insertions(+), 13 deletions(-)
-
-diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc
-index 70b1ebe070a76..84293b7767a20 100644
---- a/tensorflow/stream_executor/platform/default/dso_loader.cc
-+++ b/tensorflow/stream_executor/platform/default/dso_loader.cc
-@@ -140,7 +140,7 @@ port::StatusOr<void*> GetHipsparseDsoHandle() {
-   return GetDsoHandle("hipsparse", "");
- }
- 
--port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("hip_hcc", ""); }
-+port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("amdhip64", ""); }
- 
- }  // namespace DsoLoader
- 
-diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
-index 8848bd32c2e1d..d5bfe78c6449d 100755
---- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
-+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
-@@ -34,8 +34,6 @@ HIPCC_ENV = '%{hipcc_env}'
- HIPCC_IS_HIPCLANG = '%{hipcc_is_hipclang}'=="True"
- HIP_RUNTIME_PATH = '%{hip_runtime_path}'
- HIP_RUNTIME_LIBRARY = '%{hip_runtime_library}'
--HCC_RUNTIME_PATH = '%{hcc_runtime_path}'
--HCC_RUNTIME_LIBRARY = '%{hcc_runtime_library}'
- ROCR_RUNTIME_PATH = '%{rocr_runtime_path}'
- ROCR_RUNTIME_LIBRARY = '%{rocr_runtime_library}'
- VERBOSE = '%{crosstool_verbose}'=='1'
-@@ -267,11 +265,6 @@ def main():
-     gpu_linker_flags.append('-L' + ROCR_RUNTIME_PATH)
-     gpu_linker_flags.append('-Wl,-rpath=' + ROCR_RUNTIME_PATH)
-     gpu_linker_flags.append('-l' + ROCR_RUNTIME_LIBRARY)
--    # do not link with HCC runtime library in case hip-clang toolchain is used
--    if not HIPCC_IS_HIPCLANG:
--      gpu_linker_flags.append('-L' + HCC_RUNTIME_PATH)
--      gpu_linker_flags.append('-Wl,-rpath=' + HCC_RUNTIME_PATH)
--      gpu_linker_flags.append('-l' + HCC_RUNTIME_LIBRARY)
-     gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH)
-     gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH)
-     gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY)
-diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl
-index 1312574f0aa46..0508279518894 100644
---- a/third_party/gpus/rocm_configure.bzl
-+++ b/third_party/gpus/rocm_configure.bzl
-@@ -390,7 +390,7 @@ def _find_libs(repository_ctx, rocm_config, bash_bin):
-     libs_paths = [
-         (name, _rocm_lib_paths(repository_ctx, name, path))
-         for name, path in [
--            ("hip_hcc", rocm_config.rocm_toolkit_path + "/hip"),
-+            ("amdhip64", rocm_config.rocm_toolkit_path + "/hip"),
-             ("rocblas", rocm_config.rocm_toolkit_path + "/rocblas"),
-             ("rocfft", rocm_config.rocm_toolkit_path + "/rocfft"),
-             ("hiprand", rocm_config.rocm_toolkit_path + "/hiprand"),
-@@ -646,7 +646,7 @@ def _create_local_rocm_repository(repository_ctx):
-         "rocm/BUILD",
-         tpl_paths["rocm:BUILD"],
-         {
--            "%{hip_lib}": rocm_libs["hip_hcc"].file_name,
-+            "%{hip_lib}": rocm_libs["amdhip64"].file_name,
-             "%{rocblas_lib}": rocm_libs["rocblas"].file_name,
-             "%{rocfft_lib}": rocm_libs["rocfft"].file_name,
-             "%{hiprand_lib}": rocm_libs["hiprand"].file_name,
-@@ -733,9 +733,7 @@ def _create_local_rocm_repository(repository_ctx):
-             "%{rocr_runtime_path}": rocm_config.rocm_toolkit_path + "/lib",
-             "%{rocr_runtime_library}": "hsa-runtime64",
-             "%{hip_runtime_path}": rocm_config.rocm_toolkit_path + "/hip/lib",
--            "%{hip_runtime_library}": "hip_hcc",
--            "%{hcc_runtime_path}": rocm_config.rocm_toolkit_path + "/hcc/lib",
--            "%{hcc_runtime_library}": "mcwamp",
-+            "%{hip_runtime_library}": "amdhip64",
-             "%{crosstool_verbose}": _crosstool_verbose(repository_ctx),
-             "%{gcc_host_compiler_path}": str(cc),
-         },
-
-From 77fb7fd1c68f81c416fd909b6677277b3637be05 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Fri, 17 Jul 2020 01:04:58 +0000
-Subject: [PATCH 2/8] Removing references to `*StaticCompiledGEMM` from TF code
-
-This commit is in conjunction with this MIOpen PR which removes scgemm from MIOpen
-https://github.com/ROCmSoftwarePlatform/MIOpen/pull/325
-
-The MIOpen release that includes that change will be included in the next ROCm release.
-This commit removes references to `*StaticCompiledGEMM` from TF code to prepare for switching to the next ROCm release (3.7)
----
- tensorflow/stream_executor/rocm/rocm_dnn.cc | 6 ------
- 1 file changed, 6 deletions(-)
-
-diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.cc b/tensorflow/stream_executor/rocm/rocm_dnn.cc
-index 80306105d4adf..4c5a740dfb090 100644
---- a/tensorflow/stream_executor/rocm/rocm_dnn.cc
-+++ b/tensorflow/stream_executor/rocm/rocm_dnn.cc
-@@ -113,9 +113,6 @@ string ToString(miopenConvFwdAlgorithm_t algorithm) {
-     case miopenConvolutionFwdAlgoImplicitGEMM:
-       s = "Implicit GEMM";
-       break;
--    case miopenConvolutionFwdAlgoStaticCompiledGEMM:
--      s = "Static Compiled GEMM";
--      break;
-   }
-   return s;
- }
-@@ -182,9 +179,6 @@ string ToString(miopenConvAlgorithm_t algorithm) {
-     case miopenConvolutionAlgoImplicitGEMM:
-       s = "Implicit GEMM";
-       break;
--    case miopenConvolutionAlgoStaticCompiledGEMM:
--      s = "Static Compiled GEMM";
--      break;
-   }
-   return s;
- }
-
-From 566d2a95c6140322241bce20fcfea952e837fda1 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Tue, 11 Aug 2020 02:09:46 +0000
-Subject: [PATCH 3/8] Reverting "Provide ldexp float overload for HIP, it's
- missing in their headers. "
-
----
- tensorflow/core/kernels/cwise_ops_gpu_common.cu.h | 6 ------
- tensorflow/core/kernels/rnn/blas_gemm.h           | 5 -----
- 2 files changed, 11 deletions(-)
-
-diff --git a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
-index 8849c3f4eddbb..ecc58da315f6b 100644
---- a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
-+++ b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
-@@ -30,12 +30,6 @@ limitations under the License.
- #include "tensorflow/core/platform/types.h"
- 
- #include "tensorflow/core/platform/logging.h"
--
--#ifdef __HIP_DEVICE_COMPILE__
--// Provide ldexp float overload for HIP, it's missing in their headers.
--__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); }
--#endif
--
- namespace tensorflow {
- namespace functor {
- 
-diff --git a/tensorflow/core/kernels/rnn/blas_gemm.h b/tensorflow/core/kernels/rnn/blas_gemm.h
-index 74f4cd2bb39a4..126e1edef17a9 100644
---- a/tensorflow/core/kernels/rnn/blas_gemm.h
-+++ b/tensorflow/core/kernels/rnn/blas_gemm.h
-@@ -25,11 +25,6 @@ limitations under the License.
- #include "tensorflow/core/kernels/eigen_contraction_kernel.h"
- #endif
- 
--#ifdef __HIP_DEVICE_COMPILE__
--// Provide ldexp float overload for HIP, it's missing in their headers.
--__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); }
--#endif
--
- namespace tensorflow {
- class OpKernelContext;
- namespace functor {
-
-From 9dcaad456e194bf8d1e3962cd6ad272f4879d7f3 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 12 Aug 2020 00:39:02 +0000
-Subject: [PATCH 4/8] updating ROCM CI scripts to use ROCm 3.7
-
----
- .../tools/ci_build/linux/rocm/run_cc_core.sh  | 34 +++++++++++++------
- .../ci_build/linux/rocm/run_csb_tests.sh      | 27 ++++++++++-----
- .../tools/ci_build/linux/rocm/run_py3_core.sh | 23 +++++++++----
- .../tools/ci_build/xla/linux/rocm/run_py3.sh  | 33 ++++++++++++------
- 4 files changed, 79 insertions(+), 38 deletions(-)
-
-diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
-index 1f4a36f8de0f5..92d21cb133be9 100755
---- a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
-+++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
-@@ -18,20 +18,27 @@
- set -e
- set -x
- 
--N_JOBS=$(grep -c ^processor /proc/cpuinfo)
--N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
-+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+TF_TESTS_PER_GPU=1
-+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
- 
- echo ""
--echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
-+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
- echo ""
- 
-+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
-+ROCM_INSTALL_DIR=/opt/rocm-3.7.0
-+if [[ -n $1 ]]; then
-+    ROCM_INSTALL_DIR=$1
-+fi
-+
- # Run configure.
- export PYTHON_BIN_PATH=`which python3`
- export CC_OPT_FLAGS='-mavx'
- 
- export TF_NEED_ROCM=1
--export ROCM_PATH=/opt/rocm-3.3.0
--export TF_GPU_COUNT=${N_GPUS}
-+export ROCM_PATH=$ROCM_INSTALL_DIR
- 
- yes "" | $PYTHON_BIN_PATH configure.py
- 
-@@ -39,15 +46,17 @@ yes "" | $PYTHON_BIN_PATH configure.py
- bazel test \
-       --config=rocm \
-       -k \
--      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-+      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-multi_gpu,-v1only \
-       --test_lang_filters=cc \
--      --jobs=${N_JOBS} \
--      --local_test_jobs=${TF_GPU_COUNT}\
-+      --jobs=${N_BUILD_JOBS} \
-+      --local_test_jobs=${N_TEST_JOBS} \
-+      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
-       --test_timeout 600,900,2400,7200 \
-       --build_tests_only \
-       --test_output=errors \
-       --test_sharding_strategy=disabled \
--      --test_size_filters=small,medium \
-+      --test_size_filters=small,medium,large \
-       --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
-       -- \
-       //tensorflow/... \
-@@ -59,11 +68,14 @@ bazel test \
-       --config=rocm \
-       -k \
-       --test_tag_filters=gpu \
--      --jobs=${N_JOBS} \
--      --local_test_jobs=1 \
-+      --jobs=${N_BUILD_JOBS} \
-+      --local_test_jobs=${N_TEST_JOBS} \
-+      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
-       --test_timeout 600,900,2400,7200 \
-       --build_tests_only \
-       --test_output=errors \
-       --test_sharding_strategy=disabled \
-+      --test_size_filters=small,medium,large \
-       -- \
-       //tensorflow/core/nccl:nccl_manager_test
-diff --git a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
-index 4962b2789b1c0..80c0686e64724 100755
---- a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
-+++ b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
-@@ -18,20 +18,27 @@
- set -e
- set -x
- 
--N_JOBS=$(grep -c ^processor /proc/cpuinfo)
--N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
-+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+TF_TESTS_PER_GPU=1
-+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
- 
- echo ""
--echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
-+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
- echo ""
- 
-+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
-+ROCM_INSTALL_DIR=/opt/rocm-3.7.0
-+if [[ -n $1 ]]; then
-+    ROCM_INSTALL_DIR=$1
-+fi
-+
- # Run configure.
- export PYTHON_BIN_PATH=`which python3`
- export CC_OPT_FLAGS='-mavx'
- 
- export TF_NEED_ROCM=1
--export ROCM_PATH=/opt/rocm-3.3.0
--export TF_GPU_COUNT=${N_GPUS}
-+export ROCM_PATH=$ROCM_INSTALL_DIR
- 
- yes "" | $PYTHON_BIN_PATH configure.py
- 
-@@ -40,8 +47,10 @@ bazel test \
-       --config=rocm \
-       -k \
-       --test_tag_filters=gpu,-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
--      --jobs=${N_JOBS} \
--      --local_test_jobs=${TF_GPU_COUNT} \
-+      --jobs=${N_BUILD_JOBS} \
-+      --local_test_jobs=${N_TEST_JOBS} \
-+      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
-       --test_timeout 600,900,2400,7200 \
-       --test_output=errors \
-       --test_sharding_strategy=disabled \
-@@ -60,8 +69,8 @@ bazel test \
-       --test_tag_filters=gpu \
-       --test_timeout 600,900,2400,7200 \
-       --test_output=errors \
--      --jobs=${N_JOBS} \
--      --local_test_jobs=1 \
-+      --jobs=${N_BUILD_JOBS} \
-+      --local_test_jobs=${N_TEST_JOBS} \
-       --test_sharding_strategy=disabled \
-       -- \
-       //tensorflow/core/nccl:nccl_manager_test
-diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
-index 7ea866f8e2032..3a09081dd6ac6 100755
---- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
-+++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
-@@ -18,20 +18,27 @@
- set -e
- set -x
- 
--N_JOBS=$(grep -c ^processor /proc/cpuinfo)
--N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
-+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+TF_TESTS_PER_GPU=1
-+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
- 
- echo ""
--echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
-+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
- echo ""
- 
-+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
-+ROCM_INSTALL_DIR=/opt/rocm-3.7.0
-+if [[ -n $1 ]]; then
-+    ROCM_INSTALL_DIR=$1
-+fi
-+
- # Run configure.
- export PYTHON_BIN_PATH=`which python3`
- export CC_OPT_FLAGS='-mavx'
- 
- export TF_NEED_ROCM=1
--export ROCM_PATH=/opt/rocm-3.3.0
--export TF_GPU_COUNT=${N_GPUS}
-+export ROCM_PATH=$ROCM_INSTALL_DIR
- 
- yes "" | $PYTHON_BIN_PATH configure.py
- 
-@@ -41,8 +48,10 @@ bazel test \
-       -k \
-       --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-       --test_lang_filters=py \
--      --jobs=${N_JOBS} \
--      --local_test_jobs=${TF_GPU_COUNT} \
-+      --jobs=${N_BUILD_JOBS} \
-+      --local_test_jobs=${N_TEST_JOBS} \
-+      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
-       --test_timeout 600,900,2400,7200 \
-       --build_tests_only \
-       --test_output=errors \
-diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
-index 6ce1fad9cc754..d623b77d5333d 100755
---- a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
-+++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
-@@ -18,20 +18,27 @@
- set -e
- set -x
- 
--N_JOBS=$(grep -c ^processor /proc/cpuinfo)
--N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
-+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+TF_TESTS_PER_GPU=1
-+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
- 
- echo ""
--echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
-+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
- echo ""
- 
-+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
-+ROCM_INSTALL_DIR=/opt/rocm-3.7.0
-+if [[ -n $1 ]]; then
-+    ROCM_INSTALL_DIR=$1
-+fi
-+
- # Run configure.
- export PYTHON_BIN_PATH=`which python3`
- export CC_OPT_FLAGS='-mavx'
- 
- export TF_NEED_ROCM=1
--export ROCM_PATH=/opt/rocm-3.3.0
--export TF_GPU_COUNT=${N_GPUS}
-+export ROCM_PATH=$ROCM_INSTALL_DIR
- 
- yes "" | $PYTHON_BIN_PATH configure.py
- echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
-@@ -41,9 +48,11 @@ bazel test \
-       --config=rocm \
-       --config=xla \
-       -k \
--      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
--      --jobs=${N_JOBS} \
--      --local_test_jobs=${TF_GPU_COUNT} \
-+      --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-+      --jobs=${N_BUILD_JOBS} \
-+      --local_test_jobs=${N_TEST_JOBS} \
-+      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
-       --test_timeout 600,900,2400,7200 \
-       --build_tests_only \
-       --test_output=errors \
-@@ -65,9 +74,11 @@ bazel test \
-       --config=rocm \
-       --config=xla \
-       -k \
--      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
--      --jobs=${N_JOBS} \
--      --local_test_jobs=${TF_GPU_COUNT} \
-+      --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-+      --jobs=${N_BUILD_JOBS} \
-+      --local_test_jobs=${N_TEST_JOBS} \
-+      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
-       --test_timeout 600,900,2400,7200 \
-       --build_tests_only \
-       --test_output=errors \
-
-From 4b76a49a1a5741dece6d368b30f7125e20c12878 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 26 Aug 2020 15:21:31 +0000
-Subject: [PATCH 5/8] Updating Dockerfile.rocm to use ROCm 3.7
-
----
- tensorflow/tools/ci_build/Dockerfile.rocm | 14 ++++++++++----
- 1 file changed, 10 insertions(+), 4 deletions(-)
-
-diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
-index 4f5d3ae7291b1..d209173258ada 100644
---- a/tensorflow/tools/ci_build/Dockerfile.rocm
-+++ b/tensorflow/tools/ci_build/Dockerfile.rocm
-@@ -3,8 +3,10 @@
- FROM ubuntu:bionic
- MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
- 
--ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/3.3/
--ARG ROCM_PATH=/opt/rocm-3.3.0
-+ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/3.7/
-+ARG ROCM_BUILD_NAME=xenial
-+ARG ROCM_BUILD_NUM=main
-+ARG ROCM_PATH=/opt/rocm-3.7.0
- 
- ENV DEBIAN_FRONTEND noninteractive
- ENV TF_NEED_ROCM 1
-@@ -13,8 +15,12 @@ RUN apt update && apt install -y wget software-properties-common
- 
- # Add rocm repository
- RUN apt-get clean all
--RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
--RUN sh -c  "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
-+RUN bin/bash -c 'if [[ $ROCM_DEB_REPO == http://repo.radeon.com/rocm/*  ]] ; then \
-+      wget -qO - $ROCM_DEB_REPO/rocm.gpg.key | apt-key add -; \
-+      echo "deb [arch=amd64] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list; \
-+    else \
-+      echo "deb [arch=amd64 trusted=yes] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list ; \
-+    fi'
- 
- # Install misc pkgs
- RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-
-From f5a822d2012bc3e1cea1de97ff8189404688f84e Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 12 Aug 2020 15:51:34 +0000
-Subject: [PATCH 6/8] Updating TF to acccount for the (ROCm 3.7) change in
- hipDeviceGetStreamPriorityRange
-
-Starting with ROCm 3.7, the `hipDeviceGetStreamPriorityRange` API returns a range of `[-1,1]`.
-This is a departure from the `[0,2]` range that was returned by this API in ROCm 3.3 and prior.
-
-Updating the TF unit test, that has checks based on the range returned by this API, to account for change in the returned range
----
- .../common_runtime/gpu/gpu_device_test.cc     | 34 +++++--------------
- 1 file changed, 8 insertions(+), 26 deletions(-)
-
-diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
-index 6448fc56af7a1..21c75244b5feb 100644
---- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
-+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
-@@ -230,9 +230,9 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndNoPriority) {
- TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
-   {
- #if TENSORFLOW_USE_ROCM
--    // Priority outside the range (0, 2) for AMD GPUs
-+    // Priority outside the range (-1, 1) for AMD GPUs
-     SessionOptions opts =
--        MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}});
-+        MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-2, 1}});
- #else
-     // Priority outside the range (-2, 0) for NVidia GPUs
-     SessionOptions opts =
-@@ -245,7 +245,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
- #if TENSORFLOW_USE_ROCM
-     ExpectErrorMessageSubstr(
-         status,
--        "Priority -1 is outside the range of supported priorities [0,2] for"
-+        "Priority -2 is outside the range of supported priorities [-1,1] for"
-         " virtual device 0 on GPU# 0");
- #else
-     ExpectErrorMessageSubstr(
-@@ -254,8 +254,8 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
-   }
-   {
- #if TENSORFLOW_USE_ROCM
--    // Priority outside the range (0, 2) for AMD GPUs
--    SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 3}});
-+    // Priority outside the range (-1, 1) for AMD GPUs
-+    SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}});
- #else
-     // Priority outside the range (-2, 0) for NVidia GPUs
-     SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
-@@ -267,7 +267,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
- #if TENSORFLOW_USE_ROCM
-     ExpectErrorMessageSubstr(
-         status,
--        "Priority 3 is outside the range of supported priorities [0,2] for"
-+        "Priority 2 is outside the range of supported priorities [-1,1] for"
-         " virtual device 0 on GPU# 0");
- #else
-     ExpectErrorMessageSubstr(
-@@ -288,26 +288,17 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndPriority) {
- }
- 
- TEST_F(GPUDeviceTest, MultipleVirtualDevices) {
--#if TENSORFLOW_USE_ROCM
--  // Valid range for priority values on AMD GPUs in (0,2)
--  SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
--#else
-+  // Valid range for priority values on AMD GPUs in (-1,1)
-   // Valid range for priority values on NVidia GPUs in (-2, 0)
-   SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, -1}});
--#endif
-   std::vector<std::unique_ptr<Device>> devices;
-   TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
-       opts, kDeviceNamePrefix, &devices));
-   EXPECT_EQ(2, devices.size());
-   EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
-   EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
--#if TENSORFLOW_USE_ROCM
--  EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
--  EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
--#else
-   EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
-   EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
--#endif
-   ASSERT_EQ(1, devices[0]->attributes().locality().links().link_size());
-   ASSERT_EQ(1, devices[1]->attributes().locality().links().link_size());
-   EXPECT_EQ(1, devices[0]->attributes().locality().links().link(0).device_id());
-@@ -339,27 +330,18 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) {
-   }
-   {
-     // Multile virtual devices with matching priority.
--#if TENSORFLOW_USE_ROCM
--    // Valid range for priority values on AMD GPUs in (0,2)
--    SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{2, 1}});
--#else
-+    // Valid range for priority values on AMD GPUs in (-1,1)
-     // Valid range for priority values on NVidia GPUs in (-2, 0)
-     SessionOptions opts =
-         MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 0}});
--#endif
-     std::vector<std::unique_ptr<Device>> devices;
-     TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
-         opts, kDeviceNamePrefix, &devices));
-     EXPECT_EQ(2, devices.size());
-     EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
-     EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
--#if TENSORFLOW_USE_ROCM
--    EXPECT_EQ(2, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
--    EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
--#else
-     EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
-     EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
--#endif
-   }
- }
- 
-
-From ae9e3bd2fb8c3e042742b8c534c9020732c2c66d Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 12 Aug 2020 23:05:32 +0000
-Subject: [PATCH 7/8] Commeting out subtests that are failing due to JIRA
- ticket 236756, and also removing the no_rocm tag from the tests that contain
- those subtests
-
----
- tensorflow/python/ops/parallel_for/math_test.py      | 5 +++++
- tensorflow/python/ops/ragged/ragged_dispatch_test.py | 5 +++++
- 2 files changed, 10 insertions(+)
-
-diff --git a/tensorflow/python/ops/parallel_for/math_test.py b/tensorflow/python/ops/parallel_for/math_test.py
-index 933ce765cdbfa..367f40d341115 100644
---- a/tensorflow/python/ops/parallel_for/math_test.py
-+++ b/tensorflow/python/ops/parallel_for/math_test.py
-@@ -82,6 +82,11 @@ def test_unary_cwise_complex_ops(self):
-     self._test_unary_cwise_ops(complex_ops, True)
- 
-   def test_unary_cwise_real_ops_1(self):
-+    if test.is_built_with_rocm():
-+      # TODO(rocm):
-+      # This fails on ROCm...see JIRA ticket 236756
-+      self.skipTest('Fails on ROCM')
-+
-     real_ops = [
-         lambda x: math_ops.acosh(1 + math_ops.square(x)),
-         math_ops.abs,
-diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
-index 0237624aa451d..7a1d7c1882af1 100644
---- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py
-+++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
-@@ -139,6 +139,11 @@ def assertSameShape(self, x, y):
-       ]
-       )  # pyformat: disable
-   def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args):
-+    if test_util.IsBuiltWithROCm():
-+      # TODO(rocm):
-+      # This fails on ROCm...see JIRA ticket 236756
-+      self.skipTest('Fails on ROCM')
-+
-     result = op(x, **extra_args)
- 
-     # Run the wrapped op on the dense values, for comparison.
-
-From d4b8e68a3675bfb2d7465205420bd5ad15701d0b Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 26 Aug 2020 22:01:18 +0000
-Subject: [PATCH 8/8] Adding no_rocm tag to unit-tests that will not pass with
- ROCm 3.7 until PR #42288 gets merged
-
----
- tensorflow/python/BUILD                    | 1 +
- tensorflow/python/keras/optimizer_v2/BUILD | 2 ++
- 2 files changed, 3 insertions(+)
-
-diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
-index a111237e0565d..5252ebbed6e4b 100644
---- a/tensorflow/python/BUILD
-+++ b/tensorflow/python/BUILD
-@@ -5423,6 +5423,7 @@ cuda_py_test(
-     python_version = "PY3",
-     shard_count = 10,
-     tags = [
-+        "no_rocm",
-         "no_windows_gpu",
-         "noasan",  # b/159332048
-         "nomsan",  # b/148630708
-diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD
-index b208e2e1e1e6b..11966ce8211d2 100644
---- a/tensorflow/python/keras/optimizer_v2/BUILD
-+++ b/tensorflow/python/keras/optimizer_v2/BUILD
-@@ -157,6 +157,7 @@ cuda_py_test(
-     size = "medium",
-     srcs = ["adadelta_test.py"],
-     shard_count = 4,
-+    tags = ["no_rocm"],
-     deps = [
-         ":optimizer_v2",
-         "//tensorflow/python:client_testlib",
-@@ -298,6 +299,7 @@ cuda_py_test(
-     size = "medium",
-     srcs = ["rmsprop_test.py"],
-     shard_count = 2,
-+    tags = ["no_rocm"],
-     deps = [
-         ":optimizer_v2",
-         "//tensorflow/python:array_ops",
author	acxz	2020-12-16 00:38:44 -0500
committer	acxz	2020-12-16 00:38:44 -0500
commit	43d4b3d57a605f89713c2af7c07055f3216a2f2b (patch)
tree	2f6ecc6c5a8163e722a7c27fd986bc33dcafa7a2
parent	fa97314d266ed527c4ca43ab2f4b9a2c7b7d317a (diff)
download	aur-43d4b3d57a605f89713c2af7c07055f3216a2f2b.tar.gz