aboutsummarylogtreecommitdiffstats
diff options
context:
space:
mode:
authoracxz2020-12-16 00:38:44 -0500
committeracxz2020-12-16 00:38:44 -0500
commit43d4b3d57a605f89713c2af7c07055f3216a2f2b (patch)
tree2f6ecc6c5a8163e722a7c27fd986bc33dcafa7a2
parentfa97314d266ed527c4ca43ab2f4b9a2c7b7d317a (diff)
downloadaur-43d4b3d57a605f89713c2af7c07055f3216a2f2b.tar.gz
updpkg 2.4.0
-rw-r--r--.SRCINFO86
-rw-r--r--PKGBUILD113
-rw-r--r--fix-h5py3.0.patch18
-rw-r--r--fix_occupancy_block.patch87
-rw-r--r--new-rocm.patch692
5 files changed, 130 insertions, 866 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 92973a8994b3..64374efa83ac 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,7 +1,7 @@
pkgbase = tensorflow-rocm
pkgdesc = Library for computation using data flow graphs for scalable machine learning
- pkgver = 2.3.1
- pkgrel = 3
+ pkgver = 2.4.0
+ pkgrel = 1
url = https://www.tensorflow.org/
arch = x86_64
license = APACHE
@@ -12,42 +12,48 @@ pkgbase = tensorflow-rocm
makedepends = miopen
makedepends = rccl
makedepends = git
- makedepends = gcc9
makedepends = python-pip
makedepends = python-wheel
makedepends = python-setuptools
makedepends = python-h5py
makedepends = python-keras-applications
makedepends = python-keras-preprocessing
+ makedepends = cython
depends = c-ares
depends = intel-mkl
depends = onednn
+ depends = pybind11
+ depends = openssl-1.0
+ depends = lmdb
+ depends = libpng
+ depends = curl
+ depends = giflib
+ depends = icu
+ depends = libjpeg-turbo
optdepends = tensorboard: Tensorflow visualization toolkit
- source = tensorflow-rocm-2.3.1.tar.gz::https://github.com/tensorflow/tensorflow/archive/v2.3.1.tar.gz
- source = numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch
+ source = tensorflow-rocm-2.4.0.tar.gz::https://github.com/tensorflow/tensorflow/archive/v2.4.0.tar.gz
+ source = fix-h5py3.0.patch
source = build-against-actual-mkl.patch
- source = fix_hip_hcc_path.patch::https://github.com/tensorflow/tensorflow/commit/6175b78d8386bd6e5b2beebedb9f40e6b887d5a9.patch
- source = fix_hipcc_path.patch::https://github.com/tensorflow/tensorflow/commit/9d2b338025dc61828ccf8196bb042ab9c586c7b3.patch
- source = fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch
- source = fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch
- source = fix_occupancy_block.patch
- source = new-rocm.patch
- sha512sums = e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba70c7f803f8ca06faf2c6b5396e60b1e0e9b97bfbd667e733b08b6e6d70ef0
- sha512sums = df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc
+ sha512sums = 4860c148fd931c4dc7c558128e545e2b6384e590a3fbc266a5bfe842a8307f23f1f7e0103bda3a383e7c77edad2bb76dec02da8be400a40956072df19c5d4dbd
+ sha512sums = 9d7b71fed280ffaf4dfcd4889aa9ab5471874c153259f3e77ed6e6efa745e5c5aa8507d3d1f71dead5b6f4bea5f8b1c10c543929f37a6580c3f4a7cbec338a6a
sha512sums = e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08
- sha512sums = 7acc2f2579158be1d8c824da0f6d44d084a56182f1aab3cd7a78d513931b3a16ce72f2e05b44b1de76f5519af39e80431660de294ff337842e4ee8949cb85b28
- sha512sums = 136d91db88658dd0eab1543f8dec1cd20dca86afc6970606a722e7d01a645d64c42564d590fc1ecb04c204ae0b0fa8f78cf9998e9bcf367f4cc795fa59677591
- sha512sums = 75972acf0ec53b28aa6c93de77a385acaf675c0d0ae93b6545f67414e9895cbd1074a5d65b211390846b736df271a567b49ec4c992883ad83c060f708bbe0d20
- sha512sums = 42fc09bc15412f3b9a82f36485735faed0dcc2f47d72c5bfc451bc09a2aad472db59edb387455fb6594b1606de3a7789917e1fb31280c7044898097ec37db3d5
- sha512sums = 88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126
- sha512sums = 080fd9d4e1228ceb04901a0caceb18b965ef199704196a9b7711fcada3a8cfc2f65c529c4c0e05960ab1e469d203727bf0bbded82d895c13e0e2ab29ae524317
pkgname = tensorflow-rocm
pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM)
depends = c-ares
depends = intel-mkl
depends = onednn
+ depends = pybind11
+ depends = openssl-1.0
+ depends = lmdb
+ depends = libpng
+ depends = curl
+ depends = giflib
+ depends = icu
+ depends = libjpeg-turbo
depends = rocm
+ depends = rocm-libs
+ depends = miopen
depends = rccl
provides = tensorflow
conflicts = tensorflow
@@ -57,12 +63,22 @@ pkgname = python-tensorflow-rocm
depends = c-ares
depends = intel-mkl
depends = onednn
+ depends = pybind11
+ depends = openssl-1.0
+ depends = lmdb
+ depends = libpng
+ depends = curl
+ depends = giflib
+ depends = icu
+ depends = libjpeg-turbo
depends = tensorflow-rocm
depends = python-termcolor
depends = python-astor
- depends = python-gast
+ depends = python-gast03
depends = python-numpy
depends = rocm
+ depends = rocm-libs
+ depends = miopen
depends = python-protobuf
depends = absl-py
depends = rccl
@@ -72,31 +88,53 @@ pkgname = python-tensorflow-rocm
depends = python-tensorflow-estimator
depends = python-opt_einsum
depends = python-astunparse
+ depends = python-past
+ depends = python-flatbuffers
provides = python-tensorflow
conflicts = python-tensorflow
pkgname = tensorflow-opt-rocm
- pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM and CPU optimizations)
+ pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM and AVX2 CPU optimizations)
depends = c-ares
depends = intel-mkl
depends = onednn
+ depends = pybind11
+ depends = openssl-1.0
+ depends = lmdb
+ depends = libpng
+ depends = curl
+ depends = giflib
+ depends = icu
+ depends = libjpeg-turbo
depends = rocm
+ depends = rocm-libs
+ depends = miopen
depends = rccl
provides = tensorflow
provides = tensorflow-rocm
conflicts = tensorflow
pkgname = python-tensorflow-opt-rocm
- pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM and CPU optimizations)
+ pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM and AVX2 CPU optimizations)
depends = c-ares
depends = intel-mkl
depends = onednn
+ depends = pybind11
+ depends = openssl-1.0
+ depends = lmdb
+ depends = libpng
+ depends = curl
+ depends = giflib
+ depends = icu
+ depends = libjpeg-turbo
depends = tensorflow-opt-rocm
depends = python-termcolor
depends = python-astor
- depends = python-gast
+ depends = python-gast03
depends = python-numpy
depends = rocm
+ depends = rocm-libs
+ depends = miopen
depends = python-protobuf
depends = absl-py
depends = rccl
@@ -106,6 +144,8 @@ pkgname = python-tensorflow-opt-rocm
depends = python-tensorflow-estimator
depends = python-opt_einsum
depends = python-astunparse
+ depends = python-past
+ depends = python-flatbuffers
provides = python-tensorflow
provides = python-tensorflow-rocm
conflicts = python-tensorflow
diff --git a/PKGBUILD b/PKGBUILD
index 9c64e056acaa..bf83faacdcf1 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -14,37 +14,26 @@ pkgname=()
[ "$_build_no_opt" -eq 1 ] && pkgname+=(tensorflow-rocm python-tensorflow-rocm)
[ "$_build_opt" -eq 1 ] && pkgname+=(tensorflow-opt-rocm python-tensorflow-opt-rocm)
-pkgver=2.3.1
-_pkgver=2.3.1
-pkgrel=3
+pkgver=2.4.0
+_pkgver=2.4.0
+pkgrel=1
pkgdesc="Library for computation using data flow graphs for scalable machine learning"
url="https://www.tensorflow.org/"
license=('APACHE')
arch=('x86_64')
-depends=('c-ares' 'intel-mkl' 'onednn')
-makedepends=('bazel' 'python-numpy' 'rocm' 'rocm-libs' 'miopen' 'rccl' 'git' 'gcc9'
+depends=('c-ares' 'intel-mkl' 'onednn' 'pybind11' 'openssl-1.0' 'lmdb' 'libpng' 'curl' 'giflib' 'icu' 'libjpeg-turbo')
+makedepends=('bazel' 'python-numpy' 'rocm' 'rocm-libs' 'miopen' 'rccl' 'git'
'python-pip' 'python-wheel' 'python-setuptools' 'python-h5py'
- 'python-keras-applications' 'python-keras-preprocessing')
+ 'python-keras-applications' 'python-keras-preprocessing'
+ 'cython')
optdepends=('tensorboard: Tensorflow visualization toolkit')
source=("$pkgname-$pkgver.tar.gz::https://github.com/tensorflow/tensorflow/archive/v${_pkgver}.tar.gz"
- numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch
- build-against-actual-mkl.patch
- fix_hip_hcc_path.patch::https://github.com/tensorflow/tensorflow/commit/6175b78d8386bd6e5b2beebedb9f40e6b887d5a9.patch
- fix_hipcc_path.patch::https://github.com/tensorflow/tensorflow/commit/9d2b338025dc61828ccf8196bb042ab9c586c7b3.patch
- fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch
- fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch
- fix_occupancy_block.patch
- new-rocm.patch)
-
-sha512sums=('e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba70c7f803f8ca06faf2c6b5396e60b1e0e9b97bfbd667e733b08b6e6d70ef0'
- 'df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc'
- 'e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08'
- '7acc2f2579158be1d8c824da0f6d44d084a56182f1aab3cd7a78d513931b3a16ce72f2e05b44b1de76f5519af39e80431660de294ff337842e4ee8949cb85b28'
- '136d91db88658dd0eab1543f8dec1cd20dca86afc6970606a722e7d01a645d64c42564d590fc1ecb04c204ae0b0fa8f78cf9998e9bcf367f4cc795fa59677591'
- '75972acf0ec53b28aa6c93de77a385acaf675c0d0ae93b6545f67414e9895cbd1074a5d65b211390846b736df271a567b49ec4c992883ad83c060f708bbe0d20'
- '42fc09bc15412f3b9a82f36485735faed0dcc2f47d72c5bfc451bc09a2aad472db59edb387455fb6594b1606de3a7789917e1fb31280c7044898097ec37db3d5'
- '88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126'
- '080fd9d4e1228ceb04901a0caceb18b965ef199704196a9b7711fcada3a8cfc2f65c529c4c0e05960ab1e469d203727bf0bbded82d895c13e0e2ab29ae524317')
+ fix-h5py3.0.patch
+ build-against-actual-mkl.patch)
+
+sha512sums=('4860c148fd931c4dc7c558128e545e2b6384e590a3fbc266a5bfe842a8307f23f1f7e0103bda3a383e7c77edad2bb76dec02da8be400a40956072df19c5d4dbd'
+ '9d7b71fed280ffaf4dfcd4889aa9ab5471874c153259f3e77ed6e6efa745e5c5aa8507d3d1f71dead5b6f4bea5f8b1c10c543929f37a6580c3f4a7cbec338a6a'
+ 'e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08')
get_pyver () {
python -c 'import sys; print(str(sys.version_info[0]) + "." + str(sys.version_info[1]))'
@@ -66,32 +55,17 @@ prepare() {
# Tensorflow actually wants to build against a slimmed down version of Intel MKL called MKLML
# See https://github.com/intel/mkl-dnn/issues/102
# MKLML version that Tensorflow wants to use is https://github.com/intel/mkl-dnn/releases/tag/v0.21
- patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/build-against-actual-mkl.patch
+ # patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/build-against-actual-mkl.patch
# Compile with C++17 by default (FS#65953)
#sed -i "s/c++14/c++17/g" tensorflow-${_pkgver}/.bazelrc
- patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/numpy1.20.patch
-
- # Fix hip_hcc path
- patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_hip_hcc_path.patch
-
- # Fix hip_hcc path
- patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_hipcc_path.patch
-
- # Fix GpuAtomic redefinition
- patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_gpu_atomic_redef.patch
+ # FS#68488
+ patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix-h5py3.0.patch
- # Fix ldexp float method
- patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_ldexp_float.patch
-
- # Fix missing hipOccupancyMaxPotentialBlockSize method
- # https://github.com/tensorflow/tensorflow/commit/22def20bae7be6d5b790b360abed5919385b16c2
- patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_occupancy_block.patch
-
- # Patch for ROCm 3.7 and later
- # https://github.com/tensorflow/tensorflow/pull/42689
- patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/new-rocm.patch
+ # Get rid of hardcoded versions. Not like we ever cared about what upstream
+ # thinks about which versions should be used anyway. ;) (FS#68772)
+ sed -i -E "s/'([0-9a-z_-]+) .= [0-9].+[0-9]'/'\1'/" tensorflow-${_pkgver}/tensorflow/tools/pip_package/setup.py
cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-rocm
cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-opt-rocm
@@ -104,12 +78,12 @@ build() {
export PYTHON_BIN_PATH=/usr/bin/python
export USE_DEFAULT_PYTHON_LIB_PATH=1
export TF_NEED_JEMALLOC=1
- export TF_NEED_KAFKA=0
+ export TF_NEED_KAFKA=1
export TF_NEED_OPENCL_SYCL=0
- export TF_NEED_AWS=0
- export TF_NEED_GCP=0
- export TF_NEED_HDFS=0
- export TF_NEED_S3=0
+ export TF_NEED_AWS=1
+ export TF_NEED_GCP=1
+ export TF_NEED_HDFS=1
+ export TF_NEED_S3=1
export TF_ENABLE_XLA=1
export TF_NEED_GDR=0
export TF_NEED_VERBS=0
@@ -119,25 +93,33 @@ build() {
export TF_NEED_NGRAPH=0
export TF_NEED_IGNITE=0
export TF_NEED_ROCM=1
+ # See https://github.com/tensorflow/tensorflow/blob/master/third_party/systemlibs/syslibs_configure.bzl
+ export TF_SYSTEM_LIBS="boringssl,curl,cython,gif,icu,libjpeg_turbo,lmdb,nasm,pcre,png,pybind11,zlib"
export TF_SET_ANDROID_WORKSPACE=0
export TF_DOWNLOAD_CLANG=0
export TF_NCCL_VERSION=2.7
export TF_IGNORE_MAX_BAZEL_VERSION=1
export TF_MKL_ROOT=/opt/intel/mkl
export NCCL_INSTALL_PATH=/usr
- export GCC_HOST_COMPILER_PATH=/usr/bin/gcc-9
- export HOST_C_COMPILER=/usr/bin/gcc-9
- export HOST_CXX_COMPILER=/usr/bin/g++-9
+ export GCC_HOST_COMPILER_PATH=/usr/bin/gcc
+ export HOST_C_COMPILER=/usr/bin/gcc
+ export HOST_CXX_COMPILER=/usr/bin/g++
export TF_CUDA_CLANG=0 # Clang currently disabled because it's not compatible at the moment.
export CLANG_CUDA_COMPILER_PATH=/usr/bin/clang
export TF_CUDA_PATHS=/opt/cuda,/usr/lib,/usr
export TF_CUDA_VERSION=$(/opt/cuda/bin/nvcc --version | sed -n 's/^.*release \(.*\),.*/\1/p')
export TF_CUDNN_VERSION=$(sed -n 's/^#define CUDNN_MAJOR\s*\(.*\).*/\1/p' /usr/include/cudnn_version.h)
- export TF_CUDA_COMPUTE_CAPABILITIES=5.2,5.3,6.0,6.1,6.2,7.0,7.2,7.5,8.0
+ export TF_CUDA_COMPUTE_CAPABILITIES=5.2,5.3,6.0,6.1,6.2,7.0,7.2,7.5,8.0,8.6
# Required until https://github.com/tensorflow/tensorflow/issues/39467 is fixed.
- export CC=gcc-9
- export CXX=g++-9
+ export CC=gcc
+ export CXX=g++
+
+ export BAZEL_ARGS="--config=mkl -c opt --copt=-I/usr/include/openssl-1.0 --host_copt=-I/usr/include/openssl-1.0 --linkopt=-l:libssl.so.1.0.0 --linkopt=-l:libcrypto.so.1.0.0 --host_linkopt=-l:libssl.so.1.0.0 --host_linkopt=-l:libcrypto.so.1.0.0"
+
+ # Workaround for gcc 10+ warnings related to upb.
+ # See https://github.com/tensorflow/tensorflow/issues/39467
+ export BAZEL_ARGS="$BAZEL_ARGS --host_copt=-Wno-stringop-truncation"
if [ "$_build_no_opt" -eq 1 ]; then
echo "Building with rocm and without non-x86-64 optimizations"
@@ -147,7 +129,8 @@ build() {
export TF_NEED_ROCM=1
./configure
bazel \
- build --config=mkl -c opt \
+ build \
+ ${BAZEL_ARGS[@]} \
//tensorflow:libtensorflow.so \
//tensorflow:libtensorflow_cc.so \
//tensorflow:install_headers \
@@ -162,9 +145,11 @@ build() {
export CC_OPT_FLAGS="-march=haswell -O3"
export TF_NEED_CUDA=0
export TF_NEED_ROCM=1
+ export TF_CUDA_CLANG=0
./configure
bazel \
- build --config=mkl --config=avx2_linux -c opt \
+ build --config=avx2_linux \
+ ${BAZEL_ARGS[@]} \
//tensorflow:libtensorflow.so \
//tensorflow:libtensorflow_cc.so \
//tensorflow:install_headers \
@@ -232,7 +217,7 @@ _python_package() {
package_tensorflow-rocm() {
pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM)"
- depends+=(rocm rccl)
+ depends+=(rocm rocm-libs miopen rccl)
conflicts=(tensorflow)
provides=(tensorflow)
@@ -241,8 +226,8 @@ package_tensorflow-rocm() {
}
package_tensorflow-opt-rocm() {
- pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM and CPU optimizations)"
- depends+=(rocm rccl)
+ pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM and AVX2 CPU optimizations)"
+ depends+=(rocm rocm-libs miopen rccl)
conflicts=(tensorflow)
provides=(tensorflow tensorflow-rocm)
@@ -252,7 +237,7 @@ package_tensorflow-opt-rocm() {
package_python-tensorflow-rocm() {
pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM)"
- depends+=(tensorflow-rocm python-termcolor python-astor python-gast python-numpy rocm python-protobuf absl-py rccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse)
+ depends+=(tensorflow-rocm python-termcolor python-astor python-gast03 python-numpy rocm rocm-libs miopen python-protobuf absl-py rccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse python-past python-flatbuffers)
conflicts=(python-tensorflow)
provides=(python-tensorflow)
@@ -261,8 +246,8 @@ package_python-tensorflow-rocm() {
}
package_python-tensorflow-opt-rocm() {
- pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM and CPU optimizations)"
- depends+=(tensorflow-opt-rocm python-termcolor python-astor python-gast python-numpy rocm python-protobuf absl-py rccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse)
+ pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM and AVX2 CPU optimizations)"
+ depends+=(tensorflow-opt-rocm python-termcolor python-astor python-gast03 python-numpy rocm rocm-libs miopen python-protobuf absl-py rccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse python-past python-flatbuffers)
conflicts=(python-tensorflow)
provides=(python-tensorflow python-tensorflow-rocm)
diff --git a/fix-h5py3.0.patch b/fix-h5py3.0.patch
new file mode 100644
index 000000000000..18e55a5297a4
--- /dev/null
+++ b/fix-h5py3.0.patch
@@ -0,0 +1,18 @@
+diff --git a/tensorflow/python/keras/saving/hdf5_format.py b/tensorflow/python/keras/saving/hdf5_format.py
+index d3bb10c98d..e89f5356bb 100644
+--- a/tensorflow/python/keras/saving/hdf5_format.py
++++ b/tensorflow/python/keras/saving/hdf5_format.py
+@@ -659,11 +659,11 @@ def load_weights_from_hdf5_group(f, layers):
+ and weights file.
+ """
+ if 'keras_version' in f.attrs:
+- original_keras_version = f.attrs['keras_version'].decode('utf8')
++ original_keras_version = f.attrs['keras_version']
+ else:
+ original_keras_version = '1'
+ if 'backend' in f.attrs:
+- original_backend = f.attrs['backend'].decode('utf8')
++ original_backend = f.attrs['backend']
+ else:
+ original_backend = None
+
diff --git a/fix_occupancy_block.patch b/fix_occupancy_block.patch
deleted file mode 100644
index 137b4e56ea55..000000000000
--- a/fix_occupancy_block.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From 22def20bae7be6d5b790b360abed5919385b16c2 Mon Sep 17 00:00:00 2001
-From: Christian Sigg <csigg@google.com>
-Date: Mon, 29 Jun 2020 04:23:28 -0700
-Subject: [PATCH] New ROCm 3.5 RBE docker based on Ubuntu 18.04, re-enable RBE.
-
-Fix list of cxx_builtin_include_directories. Only a few are needed, but those are more complicated (mix of symlinked and real paths).
-
-Properly return error from crosstool wrapper.
-
-PiperOrigin-RevId: 318788040
-Change-Id: Ia66898e98a9a4d8fb479c7e75317f4114f6081e5
----
- .bazelrc | 17 ++++
- tensorflow/core/util/gpu_launch_config.h | 40 ++-------
- ....local-toolchain-ubuntu18.04-manylinux2010 | 34 ++++++++
- .../ci_build/Dockerfile.rbe.rocm-ubuntu16.04 | 37 ---------
- ...rocm-ubuntu18.04-manylinux2010-multipython | 79 ++++++++++++++++++
- .../bin/crosstool_wrapper_driver_rocm.tpl | 19 ++++-
- third_party/gpus/rocm_configure.bzl | 83 +++----------------
- .../preconfig/generate/containers.bzl | 2 +-
- .../toolchains/remote_config/configs.bzl | 12 +--
- .../toolchains/remote_config/containers.bzl | 10 ++-
- 10 files changed, 184 insertions(+), 149 deletions(-)
- create mode 100644 tensorflow/tools/ci_build/Dockerfile.local-toolchain-ubuntu18.04-manylinux2010
- delete mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04
- create mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython
-
-diff --git a/tensorflow/core/util/gpu_launch_config.h b/tensorflow/core/util/gpu_launch_config.h
-index 4dfaf333d4bf0..0b943e917da01 100644
---- a/tensorflow/core/util/gpu_launch_config.h
-+++ b/tensorflow/core/util/gpu_launch_config.h
-@@ -168,18 +168,10 @@ GpuLaunchConfig GetGpuLaunchConfig(int work_element_count,
- block_size_limit);
- CHECK_EQ(err, cudaSuccess);
- #elif TENSORFLOW_USE_ROCM
-- // Earlier versions of this HIP routine incorrectly returned void.
-- // TODO re-enable hipError_t error checking when HIP is fixed.
-- // ROCm interface uses unsigned int, convert after checking
-- uint32_t block_count_uint = 0;
-- uint32_t thread_per_block_uint = 0;
-- CHECK_GE(block_size_limit, 0);
-- uint32_t block_size_limit_uint = static_cast<uint32_t>(block_size_limit);
-- hipOccupancyMaxPotentialBlockSize(&block_count_uint, &thread_per_block_uint,
-- func, dynamic_shared_memory_size,
-- block_size_limit_uint);
-- block_count = static_cast<int>(block_count_uint);
-- thread_per_block = static_cast<int>(thread_per_block_uint);
-+ hipError_t err = hipOccupancyMaxPotentialBlockSize(
-+ &block_count, &thread_per_block, func, dynamic_shared_memory_size,
-+ block_size_limit);
-+ CHECK_EQ(err, hipSuccess);
- #endif
-
- block_count =
-@@ -208,27 +200,13 @@ GpuLaunchConfig GetGpuLaunchConfigFixedBlockSize(
- cudaError_t err = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
- &block_count, func, fixed_block_size, dynamic_shared_memory_size);
- CHECK_EQ(err, cudaSuccess);
-- block_count = std::min(block_count * d.getNumGpuMultiProcessors(),
-- DivUp(work_element_count, fixed_block_size));
- #elif TENSORFLOW_USE_ROCM
-- // ROCM TODO re-enable this after hipOccupancyMaxActiveBlocksPerMultiprocessor
-- // is implemented
-- // hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor(
-- // &block_count, &thread_per_block, func, dynamic_shared_memory_size,
-- // block_size_limit);
-- // CHECK_EQ(err, hipSuccess);
--
-- // Apply the heuristic in GetGpuLaunchConfig(int, const Eigen::GpuDevice&)
-- // that the kernel is quite simple and will largely be memory-limited.
-- const int physical_thread_count = std::min(
-- d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor(),
-- work_element_count);
-- // Assume the kernel be simple enough that it is okay to use 1024 threads
-- // per workgroup.
-- int thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock());
-- block_count = std::min(DivUp(physical_thread_count, thread_per_block),
-- d.getNumGpuMultiProcessors());
-+ hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor(
-+ &block_count, func, fixed_block_size, dynamic_shared_memory_size);
-+ CHECK_EQ(err, hipSuccess);
- #endif
-+ block_count = std::min(block_count * d.getNumGpuMultiProcessors(),
-+ DivUp(work_element_count, fixed_block_size));
-
- config.virtual_thread_count = work_element_count;
- config.thread_per_block = fixed_block_size;
diff --git a/new-rocm.patch b/new-rocm.patch
deleted file mode 100644
index 01eb2b4fab8c..000000000000
--- a/new-rocm.patch
+++ /dev/null
@@ -1,692 +0,0 @@
-From fcc2de09eb38f45b678a5457f594ca594f2572c9 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Thu, 16 Jul 2020 19:38:03 +0000
-Subject: [PATCH 1/8] Change references to libhip_hcc.so to refer to
- libamdhip64.so instead
-
-With the switch to the new hipclang-vdi runtime (in ROCm 3.5), the new name for the HIP runtime library is libamdhip64.so.
-
-For backwards compatibility, ROCm 3.5 and ROCm 3.6 include a "libhip_hcc.so" softlink, which points to libamdhip64.so. That softlink will be going away starting with ROCm 3.7(?).
-
-This commit updates references to libhip_hcc.so (in the TF build) to use libamdhip64.so instead.
-
-See following JIRA tickets for further details:
-
-* http://ontrack-internal.amd.com/browse/SWDEV-244762
-* http://ontrack-internal.amd.com/browse/SWDEV-238533
----
- tensorflow/stream_executor/platform/default/dso_loader.cc | 2 +-
- .../crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl | 7 -------
- third_party/gpus/rocm_configure.bzl | 8 +++-----
- 3 files changed, 4 insertions(+), 13 deletions(-)
-
-diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc
-index 70b1ebe070a76..84293b7767a20 100644
---- a/tensorflow/stream_executor/platform/default/dso_loader.cc
-+++ b/tensorflow/stream_executor/platform/default/dso_loader.cc
-@@ -140,7 +140,7 @@ port::StatusOr<void*> GetHipsparseDsoHandle() {
- return GetDsoHandle("hipsparse", "");
- }
-
--port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("hip_hcc", ""); }
-+port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("amdhip64", ""); }
-
- } // namespace DsoLoader
-
-diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
-index 8848bd32c2e1d..d5bfe78c6449d 100755
---- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
-+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
-@@ -34,8 +34,6 @@ HIPCC_ENV = '%{hipcc_env}'
- HIPCC_IS_HIPCLANG = '%{hipcc_is_hipclang}'=="True"
- HIP_RUNTIME_PATH = '%{hip_runtime_path}'
- HIP_RUNTIME_LIBRARY = '%{hip_runtime_library}'
--HCC_RUNTIME_PATH = '%{hcc_runtime_path}'
--HCC_RUNTIME_LIBRARY = '%{hcc_runtime_library}'
- ROCR_RUNTIME_PATH = '%{rocr_runtime_path}'
- ROCR_RUNTIME_LIBRARY = '%{rocr_runtime_library}'
- VERBOSE = '%{crosstool_verbose}'=='1'
-@@ -267,11 +265,6 @@ def main():
- gpu_linker_flags.append('-L' + ROCR_RUNTIME_PATH)
- gpu_linker_flags.append('-Wl,-rpath=' + ROCR_RUNTIME_PATH)
- gpu_linker_flags.append('-l' + ROCR_RUNTIME_LIBRARY)
-- # do not link with HCC runtime library in case hip-clang toolchain is used
-- if not HIPCC_IS_HIPCLANG:
-- gpu_linker_flags.append('-L' + HCC_RUNTIME_PATH)
-- gpu_linker_flags.append('-Wl,-rpath=' + HCC_RUNTIME_PATH)
-- gpu_linker_flags.append('-l' + HCC_RUNTIME_LIBRARY)
- gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH)
- gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH)
- gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY)
-diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl
-index 1312574f0aa46..0508279518894 100644
---- a/third_party/gpus/rocm_configure.bzl
-+++ b/third_party/gpus/rocm_configure.bzl
-@@ -390,7 +390,7 @@ def _find_libs(repository_ctx, rocm_config, bash_bin):
- libs_paths = [
- (name, _rocm_lib_paths(repository_ctx, name, path))
- for name, path in [
-- ("hip_hcc", rocm_config.rocm_toolkit_path + "/hip"),
-+ ("amdhip64", rocm_config.rocm_toolkit_path + "/hip"),
- ("rocblas", rocm_config.rocm_toolkit_path + "/rocblas"),
- ("rocfft", rocm_config.rocm_toolkit_path + "/rocfft"),
- ("hiprand", rocm_config.rocm_toolkit_path + "/hiprand"),
-@@ -646,7 +646,7 @@ def _create_local_rocm_repository(repository_ctx):
- "rocm/BUILD",
- tpl_paths["rocm:BUILD"],
- {
-- "%{hip_lib}": rocm_libs["hip_hcc"].file_name,
-+ "%{hip_lib}": rocm_libs["amdhip64"].file_name,
- "%{rocblas_lib}": rocm_libs["rocblas"].file_name,
- "%{rocfft_lib}": rocm_libs["rocfft"].file_name,
- "%{hiprand_lib}": rocm_libs["hiprand"].file_name,
-@@ -733,9 +733,7 @@ def _create_local_rocm_repository(repository_ctx):
- "%{rocr_runtime_path}": rocm_config.rocm_toolkit_path + "/lib",
- "%{rocr_runtime_library}": "hsa-runtime64",
- "%{hip_runtime_path}": rocm_config.rocm_toolkit_path + "/hip/lib",
-- "%{hip_runtime_library}": "hip_hcc",
-- "%{hcc_runtime_path}": rocm_config.rocm_toolkit_path + "/hcc/lib",
-- "%{hcc_runtime_library}": "mcwamp",
-+ "%{hip_runtime_library}": "amdhip64",
- "%{crosstool_verbose}": _crosstool_verbose(repository_ctx),
- "%{gcc_host_compiler_path}": str(cc),
- },
-
-From 77fb7fd1c68f81c416fd909b6677277b3637be05 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Fri, 17 Jul 2020 01:04:58 +0000
-Subject: [PATCH 2/8] Removing references to `*StaticCompiledGEMM` from TF code
-
-This commit is in conjunction with this MIOpen PR which removes scgemm from MIOpen
-https://github.com/ROCmSoftwarePlatform/MIOpen/pull/325
-
-The MIOpen release that includes that change will be included in the next ROCm release.
-This commit removes references to `*StaticCompiledGEMM` from TF code to prepare for switching to the next ROCm release (3.7)
----
- tensorflow/stream_executor/rocm/rocm_dnn.cc | 6 ------
- 1 file changed, 6 deletions(-)
-
-diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.cc b/tensorflow/stream_executor/rocm/rocm_dnn.cc
-index 80306105d4adf..4c5a740dfb090 100644
---- a/tensorflow/stream_executor/rocm/rocm_dnn.cc
-+++ b/tensorflow/stream_executor/rocm/rocm_dnn.cc
-@@ -113,9 +113,6 @@ string ToString(miopenConvFwdAlgorithm_t algorithm) {
- case miopenConvolutionFwdAlgoImplicitGEMM:
- s = "Implicit GEMM";
- break;
-- case miopenConvolutionFwdAlgoStaticCompiledGEMM:
-- s = "Static Compiled GEMM";
-- break;
- }
- return s;
- }
-@@ -182,9 +179,6 @@ string ToString(miopenConvAlgorithm_t algorithm) {
- case miopenConvolutionAlgoImplicitGEMM:
- s = "Implicit GEMM";
- break;
-- case miopenConvolutionAlgoStaticCompiledGEMM:
-- s = "Static Compiled GEMM";
-- break;
- }
- return s;
- }
-
-From 566d2a95c6140322241bce20fcfea952e837fda1 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Tue, 11 Aug 2020 02:09:46 +0000
-Subject: [PATCH 3/8] Reverting "Provide ldexp float overload for HIP, it's
- missing in their headers. "
-
----
- tensorflow/core/kernels/cwise_ops_gpu_common.cu.h | 6 ------
- tensorflow/core/kernels/rnn/blas_gemm.h | 5 -----
- 2 files changed, 11 deletions(-)
-
-diff --git a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
-index 8849c3f4eddbb..ecc58da315f6b 100644
---- a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
-+++ b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
-@@ -30,12 +30,6 @@ limitations under the License.
- #include "tensorflow/core/platform/types.h"
-
- #include "tensorflow/core/platform/logging.h"
--
--#ifdef __HIP_DEVICE_COMPILE__
--// Provide ldexp float overload for HIP, it's missing in their headers.
--__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); }
--#endif
--
- namespace tensorflow {
- namespace functor {
-
-diff --git a/tensorflow/core/kernels/rnn/blas_gemm.h b/tensorflow/core/kernels/rnn/blas_gemm.h
-index 74f4cd2bb39a4..126e1edef17a9 100644
---- a/tensorflow/core/kernels/rnn/blas_gemm.h
-+++ b/tensorflow/core/kernels/rnn/blas_gemm.h
-@@ -25,11 +25,6 @@ limitations under the License.
- #include "tensorflow/core/kernels/eigen_contraction_kernel.h"
- #endif
-
--#ifdef __HIP_DEVICE_COMPILE__
--// Provide ldexp float overload for HIP, it's missing in their headers.
--__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); }
--#endif
--
- namespace tensorflow {
- class OpKernelContext;
- namespace functor {
-
-From 9dcaad456e194bf8d1e3962cd6ad272f4879d7f3 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 12 Aug 2020 00:39:02 +0000
-Subject: [PATCH 4/8] updating ROCM CI scripts to use ROCm 3.7
-
----
- .../tools/ci_build/linux/rocm/run_cc_core.sh | 34 +++++++++++++------
- .../ci_build/linux/rocm/run_csb_tests.sh | 27 ++++++++++-----
- .../tools/ci_build/linux/rocm/run_py3_core.sh | 23 +++++++++----
- .../tools/ci_build/xla/linux/rocm/run_py3.sh | 33 ++++++++++++------
- 4 files changed, 79 insertions(+), 38 deletions(-)
-
-diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
-index 1f4a36f8de0f5..92d21cb133be9 100755
---- a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
-+++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
-@@ -18,20 +18,27 @@
- set -e
- set -x
-
--N_JOBS=$(grep -c ^processor /proc/cpuinfo)
--N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
-+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+TF_TESTS_PER_GPU=1
-+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
-
- echo ""
--echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
-+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
- echo ""
-
-+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
-+ROCM_INSTALL_DIR=/opt/rocm-3.7.0
-+if [[ -n $1 ]]; then
-+ ROCM_INSTALL_DIR=$1
-+fi
-+
- # Run configure.
- export PYTHON_BIN_PATH=`which python3`
- export CC_OPT_FLAGS='-mavx'
-
- export TF_NEED_ROCM=1
--export ROCM_PATH=/opt/rocm-3.3.0
--export TF_GPU_COUNT=${N_GPUS}
-+export ROCM_PATH=$ROCM_INSTALL_DIR
-
- yes "" | $PYTHON_BIN_PATH configure.py
-
-@@ -39,15 +46,17 @@ yes "" | $PYTHON_BIN_PATH configure.py
- bazel test \
- --config=rocm \
- -k \
-- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-+ --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-multi_gpu,-v1only \
- --test_lang_filters=cc \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=${TF_GPU_COUNT}\
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
-+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
- --test_timeout 600,900,2400,7200 \
- --build_tests_only \
- --test_output=errors \
- --test_sharding_strategy=disabled \
-- --test_size_filters=small,medium \
-+ --test_size_filters=small,medium,large \
- --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
- -- \
- //tensorflow/... \
-@@ -59,11 +68,14 @@ bazel test \
- --config=rocm \
- -k \
- --test_tag_filters=gpu \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=1 \
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
-+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
- --test_timeout 600,900,2400,7200 \
- --build_tests_only \
- --test_output=errors \
- --test_sharding_strategy=disabled \
-+ --test_size_filters=small,medium,large \
- -- \
- //tensorflow/core/nccl:nccl_manager_test
-diff --git a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
-index 4962b2789b1c0..80c0686e64724 100755
---- a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
-+++ b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
-@@ -18,20 +18,27 @@
- set -e
- set -x
-
--N_JOBS=$(grep -c ^processor /proc/cpuinfo)
--N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
-+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+TF_TESTS_PER_GPU=1
-+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
-
- echo ""
--echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
-+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
- echo ""
-
-+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
-+ROCM_INSTALL_DIR=/opt/rocm-3.7.0
-+if [[ -n $1 ]]; then
-+ ROCM_INSTALL_DIR=$1
-+fi
-+
- # Run configure.
- export PYTHON_BIN_PATH=`which python3`
- export CC_OPT_FLAGS='-mavx'
-
- export TF_NEED_ROCM=1
--export ROCM_PATH=/opt/rocm-3.3.0
--export TF_GPU_COUNT=${N_GPUS}
-+export ROCM_PATH=$ROCM_INSTALL_DIR
-
- yes "" | $PYTHON_BIN_PATH configure.py
-
-@@ -40,8 +47,10 @@ bazel test \
- --config=rocm \
- -k \
- --test_tag_filters=gpu,-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=${TF_GPU_COUNT} \
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
-+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
- --test_timeout 600,900,2400,7200 \
- --test_output=errors \
- --test_sharding_strategy=disabled \
-@@ -60,8 +69,8 @@ bazel test \
- --test_tag_filters=gpu \
- --test_timeout 600,900,2400,7200 \
- --test_output=errors \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=1 \
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
- --test_sharding_strategy=disabled \
- -- \
- //tensorflow/core/nccl:nccl_manager_test
-diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
-index 7ea866f8e2032..3a09081dd6ac6 100755
---- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
-+++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
-@@ -18,20 +18,27 @@
- set -e
- set -x
-
--N_JOBS=$(grep -c ^processor /proc/cpuinfo)
--N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
-+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+TF_TESTS_PER_GPU=1
-+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
-
- echo ""
--echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
-+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
- echo ""
-
-+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
-+ROCM_INSTALL_DIR=/opt/rocm-3.7.0
-+if [[ -n $1 ]]; then
-+ ROCM_INSTALL_DIR=$1
-+fi
-+
- # Run configure.
- export PYTHON_BIN_PATH=`which python3`
- export CC_OPT_FLAGS='-mavx'
-
- export TF_NEED_ROCM=1
--export ROCM_PATH=/opt/rocm-3.3.0
--export TF_GPU_COUNT=${N_GPUS}
-+export ROCM_PATH=$ROCM_INSTALL_DIR
-
- yes "" | $PYTHON_BIN_PATH configure.py
-
-@@ -41,8 +48,10 @@ bazel test \
- -k \
- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
- --test_lang_filters=py \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=${TF_GPU_COUNT} \
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
-+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
- --test_timeout 600,900,2400,7200 \
- --build_tests_only \
- --test_output=errors \
-diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
-index 6ce1fad9cc754..d623b77d5333d 100755
---- a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
-+++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
-@@ -18,20 +18,27 @@
- set -e
- set -x
-
--N_JOBS=$(grep -c ^processor /proc/cpuinfo)
--N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
-+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+TF_TESTS_PER_GPU=1
-+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
-
- echo ""
--echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
-+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
- echo ""
-
-+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
-+ROCM_INSTALL_DIR=/opt/rocm-3.7.0
-+if [[ -n $1 ]]; then
-+ ROCM_INSTALL_DIR=$1
-+fi
-+
- # Run configure.
- export PYTHON_BIN_PATH=`which python3`
- export CC_OPT_FLAGS='-mavx'
-
- export TF_NEED_ROCM=1
--export ROCM_PATH=/opt/rocm-3.3.0
--export TF_GPU_COUNT=${N_GPUS}
-+export ROCM_PATH=$ROCM_INSTALL_DIR
-
- yes "" | $PYTHON_BIN_PATH configure.py
- echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
-@@ -41,9 +48,11 @@ bazel test \
- --config=rocm \
- --config=xla \
- -k \
-- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=${TF_GPU_COUNT} \
-+ --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
-+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
- --test_timeout 600,900,2400,7200 \
- --build_tests_only \
- --test_output=errors \
-@@ -65,9 +74,11 @@ bazel test \
- --config=rocm \
- --config=xla \
- -k \
-- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=${TF_GPU_COUNT} \
-+ --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
-+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
- --test_timeout 600,900,2400,7200 \
- --build_tests_only \
- --test_output=errors \
-
-From 4b76a49a1a5741dece6d368b30f7125e20c12878 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 26 Aug 2020 15:21:31 +0000
-Subject: [PATCH 5/8] Updating Dockerfile.rocm to use ROCm 3.7
-
----
- tensorflow/tools/ci_build/Dockerfile.rocm | 14 ++++++++++----
- 1 file changed, 10 insertions(+), 4 deletions(-)
-
-diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
-index 4f5d3ae7291b1..d209173258ada 100644
---- a/tensorflow/tools/ci_build/Dockerfile.rocm
-+++ b/tensorflow/tools/ci_build/Dockerfile.rocm
-@@ -3,8 +3,10 @@
- FROM ubuntu:bionic
- MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
-
--ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/3.3/
--ARG ROCM_PATH=/opt/rocm-3.3.0
-+ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/3.7/
-+ARG ROCM_BUILD_NAME=xenial
-+ARG ROCM_BUILD_NUM=main
-+ARG ROCM_PATH=/opt/rocm-3.7.0
-
- ENV DEBIAN_FRONTEND noninteractive
- ENV TF_NEED_ROCM 1
-@@ -13,8 +15,12 @@ RUN apt update && apt install -y wget software-properties-common
-
- # Add rocm repository
- RUN apt-get clean all
--RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
--RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
-+RUN bin/bash -c 'if [[ $ROCM_DEB_REPO == http://repo.radeon.com/rocm/* ]] ; then \
-+ wget -qO - $ROCM_DEB_REPO/rocm.gpg.key | apt-key add -; \
-+ echo "deb [arch=amd64] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list; \
-+ else \
-+ echo "deb [arch=amd64 trusted=yes] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list ; \
-+ fi'
-
- # Install misc pkgs
- RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-
-From f5a822d2012bc3e1cea1de97ff8189404688f84e Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 12 Aug 2020 15:51:34 +0000
-Subject: [PATCH 6/8] Updating TF to acccount for the (ROCm 3.7) change in
- hipDeviceGetStreamPriorityRange
-
-Starting with ROCm 3.7, the `hipDeviceGetStreamPriorityRange` API returns a range of `[-1,1]`.
-This is a departure from the `[0,2]` range that was returned by this API in ROCm 3.3 and prior.
-
-Updating the TF unit test, that has checks based on the range returned by this API, to account for change in the returned range
----
- .../common_runtime/gpu/gpu_device_test.cc | 34 +++++--------------
- 1 file changed, 8 insertions(+), 26 deletions(-)
-
-diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
-index 6448fc56af7a1..21c75244b5feb 100644
---- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
-+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
-@@ -230,9 +230,9 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndNoPriority) {
- TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
- {
- #if TENSORFLOW_USE_ROCM
-- // Priority outside the range (0, 2) for AMD GPUs
-+ // Priority outside the range (-1, 1) for AMD GPUs
- SessionOptions opts =
-- MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}});
-+ MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-2, 1}});
- #else
- // Priority outside the range (-2, 0) for NVidia GPUs
- SessionOptions opts =
-@@ -245,7 +245,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
- #if TENSORFLOW_USE_ROCM
- ExpectErrorMessageSubstr(
- status,
-- "Priority -1 is outside the range of supported priorities [0,2] for"
-+ "Priority -2 is outside the range of supported priorities [-1,1] for"
- " virtual device 0 on GPU# 0");
- #else
- ExpectErrorMessageSubstr(
-@@ -254,8 +254,8 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
- }
- {
- #if TENSORFLOW_USE_ROCM
-- // Priority outside the range (0, 2) for AMD GPUs
-- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 3}});
-+ // Priority outside the range (-1, 1) for AMD GPUs
-+ SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}});
- #else
- // Priority outside the range (-2, 0) for NVidia GPUs
- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
-@@ -267,7 +267,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
- #if TENSORFLOW_USE_ROCM
- ExpectErrorMessageSubstr(
- status,
-- "Priority 3 is outside the range of supported priorities [0,2] for"
-+ "Priority 2 is outside the range of supported priorities [-1,1] for"
- " virtual device 0 on GPU# 0");
- #else
- ExpectErrorMessageSubstr(
-@@ -288,26 +288,17 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndPriority) {
- }
-
- TEST_F(GPUDeviceTest, MultipleVirtualDevices) {
--#if TENSORFLOW_USE_ROCM
-- // Valid range for priority values on AMD GPUs in (0,2)
-- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
--#else
-+ // Valid range for priority values on AMD GPUs in (-1,1)
- // Valid range for priority values on NVidia GPUs in (-2, 0)
- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, -1}});
--#endif
- std::vector<std::unique_ptr<Device>> devices;
- TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
- opts, kDeviceNamePrefix, &devices));
- EXPECT_EQ(2, devices.size());
- EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
- EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
--#if TENSORFLOW_USE_ROCM
-- EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
-- EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
--#else
- EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
- EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
--#endif
- ASSERT_EQ(1, devices[0]->attributes().locality().links().link_size());
- ASSERT_EQ(1, devices[1]->attributes().locality().links().link_size());
- EXPECT_EQ(1, devices[0]->attributes().locality().links().link(0).device_id());
-@@ -339,27 +330,18 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) {
- }
- {
- // Multile virtual devices with matching priority.
--#if TENSORFLOW_USE_ROCM
-- // Valid range for priority values on AMD GPUs in (0,2)
-- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{2, 1}});
--#else
-+ // Valid range for priority values on AMD GPUs in (-1,1)
- // Valid range for priority values on NVidia GPUs in (-2, 0)
- SessionOptions opts =
- MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 0}});
--#endif
- std::vector<std::unique_ptr<Device>> devices;
- TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
- opts, kDeviceNamePrefix, &devices));
- EXPECT_EQ(2, devices.size());
- EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
- EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
--#if TENSORFLOW_USE_ROCM
-- EXPECT_EQ(2, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
-- EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
--#else
- EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
- EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
--#endif
- }
- }
-
-
-From ae9e3bd2fb8c3e042742b8c534c9020732c2c66d Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 12 Aug 2020 23:05:32 +0000
-Subject: [PATCH 7/8] Commeting out subtests that are failing due to JIRA
- ticket 236756, and also removing the no_rocm tag from the tests that contain
- those subtests
-
----
- tensorflow/python/ops/parallel_for/math_test.py | 5 +++++
- tensorflow/python/ops/ragged/ragged_dispatch_test.py | 5 +++++
- 2 files changed, 10 insertions(+)
-
-diff --git a/tensorflow/python/ops/parallel_for/math_test.py b/tensorflow/python/ops/parallel_for/math_test.py
-index 933ce765cdbfa..367f40d341115 100644
---- a/tensorflow/python/ops/parallel_for/math_test.py
-+++ b/tensorflow/python/ops/parallel_for/math_test.py
-@@ -82,6 +82,11 @@ def test_unary_cwise_complex_ops(self):
- self._test_unary_cwise_ops(complex_ops, True)
-
- def test_unary_cwise_real_ops_1(self):
-+ if test.is_built_with_rocm():
-+ # TODO(rocm):
-+ # This fails on ROCm...see JIRA ticket 236756
-+ self.skipTest('Fails on ROCM')
-+
- real_ops = [
- lambda x: math_ops.acosh(1 + math_ops.square(x)),
- math_ops.abs,
-diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
-index 0237624aa451d..7a1d7c1882af1 100644
---- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py
-+++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
-@@ -139,6 +139,11 @@ def assertSameShape(self, x, y):
- ]
- ) # pyformat: disable
- def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args):
-+ if test_util.IsBuiltWithROCm():
-+ # TODO(rocm):
-+ # This fails on ROCm...see JIRA ticket 236756
-+ self.skipTest('Fails on ROCM')
-+
- result = op(x, **extra_args)
-
- # Run the wrapped op on the dense values, for comparison.
-
-From d4b8e68a3675bfb2d7465205420bd5ad15701d0b Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 26 Aug 2020 22:01:18 +0000
-Subject: [PATCH 8/8] Adding no_rocm tag to unit-tests that will not pass with
- ROCm 3.7 until PR #42288 gets merged
-
----
- tensorflow/python/BUILD | 1 +
- tensorflow/python/keras/optimizer_v2/BUILD | 2 ++
- 2 files changed, 3 insertions(+)
-
-diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
-index a111237e0565d..5252ebbed6e4b 100644
---- a/tensorflow/python/BUILD
-+++ b/tensorflow/python/BUILD
-@@ -5423,6 +5423,7 @@ cuda_py_test(
- python_version = "PY3",
- shard_count = 10,
- tags = [
-+ "no_rocm",
- "no_windows_gpu",
- "noasan", # b/159332048
- "nomsan", # b/148630708
-diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD
-index b208e2e1e1e6b..11966ce8211d2 100644
---- a/tensorflow/python/keras/optimizer_v2/BUILD
-+++ b/tensorflow/python/keras/optimizer_v2/BUILD
-@@ -157,6 +157,7 @@ cuda_py_test(
- size = "medium",
- srcs = ["adadelta_test.py"],
- shard_count = 4,
-+ tags = ["no_rocm"],
- deps = [
- ":optimizer_v2",
- "//tensorflow/python:client_testlib",
-@@ -298,6 +299,7 @@ cuda_py_test(
- size = "medium",
- srcs = ["rmsprop_test.py"],
- shard_count = 2,
-+ tags = ["no_rocm"],
- deps = [
- ":optimizer_v2",
- "//tensorflow/python:array_ops",