diff options
author | acxz | 2020-12-16 00:38:44 -0500 |
---|---|---|
committer | acxz | 2020-12-16 00:38:44 -0500 |
commit | 43d4b3d57a605f89713c2af7c07055f3216a2f2b (patch) | |
tree | 2f6ecc6c5a8163e722a7c27fd986bc33dcafa7a2 | |
parent | fa97314d266ed527c4ca43ab2f4b9a2c7b7d317a (diff) | |
download | aur-43d4b3d57a605f89713c2af7c07055f3216a2f2b.tar.gz |
updpkg 2.4.0
-rw-r--r-- | .SRCINFO | 86 | ||||
-rw-r--r-- | PKGBUILD | 113 | ||||
-rw-r--r-- | fix-h5py3.0.patch | 18 | ||||
-rw-r--r-- | fix_occupancy_block.patch | 87 | ||||
-rw-r--r-- | new-rocm.patch | 692 |
5 files changed, 130 insertions, 866 deletions
@@ -1,7 +1,7 @@ pkgbase = tensorflow-rocm pkgdesc = Library for computation using data flow graphs for scalable machine learning - pkgver = 2.3.1 - pkgrel = 3 + pkgver = 2.4.0 + pkgrel = 1 url = https://www.tensorflow.org/ arch = x86_64 license = APACHE @@ -12,42 +12,48 @@ pkgbase = tensorflow-rocm makedepends = miopen makedepends = rccl makedepends = git - makedepends = gcc9 makedepends = python-pip makedepends = python-wheel makedepends = python-setuptools makedepends = python-h5py makedepends = python-keras-applications makedepends = python-keras-preprocessing + makedepends = cython depends = c-ares depends = intel-mkl depends = onednn + depends = pybind11 + depends = openssl-1.0 + depends = lmdb + depends = libpng + depends = curl + depends = giflib + depends = icu + depends = libjpeg-turbo optdepends = tensorboard: Tensorflow visualization toolkit - source = tensorflow-rocm-2.3.1.tar.gz::https://github.com/tensorflow/tensorflow/archive/v2.3.1.tar.gz - source = numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch + source = tensorflow-rocm-2.4.0.tar.gz::https://github.com/tensorflow/tensorflow/archive/v2.4.0.tar.gz + source = fix-h5py3.0.patch source = build-against-actual-mkl.patch - source = fix_hip_hcc_path.patch::https://github.com/tensorflow/tensorflow/commit/6175b78d8386bd6e5b2beebedb9f40e6b887d5a9.patch - source = fix_hipcc_path.patch::https://github.com/tensorflow/tensorflow/commit/9d2b338025dc61828ccf8196bb042ab9c586c7b3.patch - source = fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch - source = fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch - source = fix_occupancy_block.patch - source = new-rocm.patch - sha512sums = e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba70c7f803f8ca06faf2c6b5396e60b1e0e9b97bfbd667e733b08b6e6d70ef0 - sha512sums = df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc + sha512sums = 4860c148fd931c4dc7c558128e545e2b6384e590a3fbc266a5bfe842a8307f23f1f7e0103bda3a383e7c77edad2bb76dec02da8be400a40956072df19c5d4dbd + sha512sums = 9d7b71fed280ffaf4dfcd4889aa9ab5471874c153259f3e77ed6e6efa745e5c5aa8507d3d1f71dead5b6f4bea5f8b1c10c543929f37a6580c3f4a7cbec338a6a sha512sums = e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08 - sha512sums = 7acc2f2579158be1d8c824da0f6d44d084a56182f1aab3cd7a78d513931b3a16ce72f2e05b44b1de76f5519af39e80431660de294ff337842e4ee8949cb85b28 - sha512sums = 136d91db88658dd0eab1543f8dec1cd20dca86afc6970606a722e7d01a645d64c42564d590fc1ecb04c204ae0b0fa8f78cf9998e9bcf367f4cc795fa59677591 - sha512sums = 75972acf0ec53b28aa6c93de77a385acaf675c0d0ae93b6545f67414e9895cbd1074a5d65b211390846b736df271a567b49ec4c992883ad83c060f708bbe0d20 - sha512sums = 42fc09bc15412f3b9a82f36485735faed0dcc2f47d72c5bfc451bc09a2aad472db59edb387455fb6594b1606de3a7789917e1fb31280c7044898097ec37db3d5 - sha512sums = 88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126 - sha512sums = 080fd9d4e1228ceb04901a0caceb18b965ef199704196a9b7711fcada3a8cfc2f65c529c4c0e05960ab1e469d203727bf0bbded82d895c13e0e2ab29ae524317 pkgname = tensorflow-rocm pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM) depends = c-ares depends = intel-mkl depends = onednn + depends = pybind11 + depends = openssl-1.0 + depends = lmdb + depends = libpng + depends = curl + depends = giflib + depends = icu + depends = libjpeg-turbo depends = rocm + depends = rocm-libs + depends = miopen depends = rccl provides = tensorflow conflicts = tensorflow @@ -57,12 +63,22 @@ pkgname = python-tensorflow-rocm depends = c-ares depends = intel-mkl depends = onednn + depends = pybind11 + depends = openssl-1.0 + depends = lmdb + depends = libpng + depends = curl + depends = giflib + depends = icu + depends = libjpeg-turbo depends = tensorflow-rocm depends = python-termcolor depends = python-astor - depends = python-gast + depends = python-gast03 depends = python-numpy depends = rocm + depends = rocm-libs + depends = miopen depends = python-protobuf depends = absl-py depends = rccl @@ -72,31 +88,53 @@ pkgname = python-tensorflow-rocm depends = python-tensorflow-estimator depends = python-opt_einsum depends = python-astunparse + depends = python-past + depends = python-flatbuffers provides = python-tensorflow conflicts = python-tensorflow pkgname = tensorflow-opt-rocm - pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM and CPU optimizations) + pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM and AVX2 CPU optimizations) depends = c-ares depends = intel-mkl depends = onednn + depends = pybind11 + depends = openssl-1.0 + depends = lmdb + depends = libpng + depends = curl + depends = giflib + depends = icu + depends = libjpeg-turbo depends = rocm + depends = rocm-libs + depends = miopen depends = rccl provides = tensorflow provides = tensorflow-rocm conflicts = tensorflow pkgname = python-tensorflow-opt-rocm - pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM and CPU optimizations) + pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM and AVX2 CPU optimizations) depends = c-ares depends = intel-mkl depends = onednn + depends = pybind11 + depends = openssl-1.0 + depends = lmdb + depends = libpng + depends = curl + depends = giflib + depends = icu + depends = libjpeg-turbo depends = tensorflow-opt-rocm depends = python-termcolor depends = python-astor - depends = python-gast + depends = python-gast03 depends = python-numpy depends = rocm + depends = rocm-libs + depends = miopen depends = python-protobuf depends = absl-py depends = rccl @@ -106,6 +144,8 @@ pkgname = python-tensorflow-opt-rocm depends = python-tensorflow-estimator depends = python-opt_einsum depends = python-astunparse + depends = python-past + depends = python-flatbuffers provides = python-tensorflow provides = python-tensorflow-rocm conflicts = python-tensorflow @@ -14,37 +14,26 @@ pkgname=() [ "$_build_no_opt" -eq 1 ] && pkgname+=(tensorflow-rocm python-tensorflow-rocm) [ "$_build_opt" -eq 1 ] && pkgname+=(tensorflow-opt-rocm python-tensorflow-opt-rocm) -pkgver=2.3.1 -_pkgver=2.3.1 -pkgrel=3 +pkgver=2.4.0 +_pkgver=2.4.0 +pkgrel=1 pkgdesc="Library for computation using data flow graphs for scalable machine learning" url="https://www.tensorflow.org/" license=('APACHE') arch=('x86_64') -depends=('c-ares' 'intel-mkl' 'onednn') -makedepends=('bazel' 'python-numpy' 'rocm' 'rocm-libs' 'miopen' 'rccl' 'git' 'gcc9' +depends=('c-ares' 'intel-mkl' 'onednn' 'pybind11' 'openssl-1.0' 'lmdb' 'libpng' 'curl' 'giflib' 'icu' 'libjpeg-turbo') +makedepends=('bazel' 'python-numpy' 'rocm' 'rocm-libs' 'miopen' 'rccl' 'git' 'python-pip' 'python-wheel' 'python-setuptools' 'python-h5py' - 'python-keras-applications' 'python-keras-preprocessing') + 'python-keras-applications' 'python-keras-preprocessing' + 'cython') optdepends=('tensorboard: Tensorflow visualization toolkit') source=("$pkgname-$pkgver.tar.gz::https://github.com/tensorflow/tensorflow/archive/v${_pkgver}.tar.gz" - numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch - build-against-actual-mkl.patch - fix_hip_hcc_path.patch::https://github.com/tensorflow/tensorflow/commit/6175b78d8386bd6e5b2beebedb9f40e6b887d5a9.patch - fix_hipcc_path.patch::https://github.com/tensorflow/tensorflow/commit/9d2b338025dc61828ccf8196bb042ab9c586c7b3.patch - fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch - fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch - fix_occupancy_block.patch - new-rocm.patch) - -sha512sums=('e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba70c7f803f8ca06faf2c6b5396e60b1e0e9b97bfbd667e733b08b6e6d70ef0' - 'df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc' - 'e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08' - '7acc2f2579158be1d8c824da0f6d44d084a56182f1aab3cd7a78d513931b3a16ce72f2e05b44b1de76f5519af39e80431660de294ff337842e4ee8949cb85b28' - '136d91db88658dd0eab1543f8dec1cd20dca86afc6970606a722e7d01a645d64c42564d590fc1ecb04c204ae0b0fa8f78cf9998e9bcf367f4cc795fa59677591' - '75972acf0ec53b28aa6c93de77a385acaf675c0d0ae93b6545f67414e9895cbd1074a5d65b211390846b736df271a567b49ec4c992883ad83c060f708bbe0d20' - '42fc09bc15412f3b9a82f36485735faed0dcc2f47d72c5bfc451bc09a2aad472db59edb387455fb6594b1606de3a7789917e1fb31280c7044898097ec37db3d5' - '88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126' - '080fd9d4e1228ceb04901a0caceb18b965ef199704196a9b7711fcada3a8cfc2f65c529c4c0e05960ab1e469d203727bf0bbded82d895c13e0e2ab29ae524317') + fix-h5py3.0.patch + build-against-actual-mkl.patch) + +sha512sums=('4860c148fd931c4dc7c558128e545e2b6384e590a3fbc266a5bfe842a8307f23f1f7e0103bda3a383e7c77edad2bb76dec02da8be400a40956072df19c5d4dbd' + '9d7b71fed280ffaf4dfcd4889aa9ab5471874c153259f3e77ed6e6efa745e5c5aa8507d3d1f71dead5b6f4bea5f8b1c10c543929f37a6580c3f4a7cbec338a6a' + 'e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08') get_pyver () { python -c 'import sys; print(str(sys.version_info[0]) + "." + str(sys.version_info[1]))' @@ -66,32 +55,17 @@ prepare() { # Tensorflow actually wants to build against a slimmed down version of Intel MKL called MKLML # See https://github.com/intel/mkl-dnn/issues/102 # MKLML version that Tensorflow wants to use is https://github.com/intel/mkl-dnn/releases/tag/v0.21 - patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/build-against-actual-mkl.patch + # patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/build-against-actual-mkl.patch # Compile with C++17 by default (FS#65953) #sed -i "s/c++14/c++17/g" tensorflow-${_pkgver}/.bazelrc - patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/numpy1.20.patch - - # Fix hip_hcc path - patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_hip_hcc_path.patch - - # Fix hip_hcc path - patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_hipcc_path.patch - - # Fix GpuAtomic redefinition - patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_gpu_atomic_redef.patch + # FS#68488 + patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix-h5py3.0.patch - # Fix ldexp float method - patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_ldexp_float.patch - - # Fix missing hipOccupancyMaxPotentialBlockSize method - # https://github.com/tensorflow/tensorflow/commit/22def20bae7be6d5b790b360abed5919385b16c2 - patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_occupancy_block.patch - - # Patch for ROCm 3.7 and later - # https://github.com/tensorflow/tensorflow/pull/42689 - patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/new-rocm.patch + # Get rid of hardcoded versions. Not like we ever cared about what upstream + # thinks about which versions should be used anyway. ;) (FS#68772) + sed -i -E "s/'([0-9a-z_-]+) .= [0-9].+[0-9]'/'\1'/" tensorflow-${_pkgver}/tensorflow/tools/pip_package/setup.py cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-rocm cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-opt-rocm @@ -104,12 +78,12 @@ build() { export PYTHON_BIN_PATH=/usr/bin/python export USE_DEFAULT_PYTHON_LIB_PATH=1 export TF_NEED_JEMALLOC=1 - export TF_NEED_KAFKA=0 + export TF_NEED_KAFKA=1 export TF_NEED_OPENCL_SYCL=0 - export TF_NEED_AWS=0 - export TF_NEED_GCP=0 - export TF_NEED_HDFS=0 - export TF_NEED_S3=0 + export TF_NEED_AWS=1 + export TF_NEED_GCP=1 + export TF_NEED_HDFS=1 + export TF_NEED_S3=1 export TF_ENABLE_XLA=1 export TF_NEED_GDR=0 export TF_NEED_VERBS=0 @@ -119,25 +93,33 @@ build() { export TF_NEED_NGRAPH=0 export TF_NEED_IGNITE=0 export TF_NEED_ROCM=1 + # See https://github.com/tensorflow/tensorflow/blob/master/third_party/systemlibs/syslibs_configure.bzl + export TF_SYSTEM_LIBS="boringssl,curl,cython,gif,icu,libjpeg_turbo,lmdb,nasm,pcre,png,pybind11,zlib" export TF_SET_ANDROID_WORKSPACE=0 export TF_DOWNLOAD_CLANG=0 export TF_NCCL_VERSION=2.7 export TF_IGNORE_MAX_BAZEL_VERSION=1 export TF_MKL_ROOT=/opt/intel/mkl export NCCL_INSTALL_PATH=/usr - export GCC_HOST_COMPILER_PATH=/usr/bin/gcc-9 - export HOST_C_COMPILER=/usr/bin/gcc-9 - export HOST_CXX_COMPILER=/usr/bin/g++-9 + export GCC_HOST_COMPILER_PATH=/usr/bin/gcc + export HOST_C_COMPILER=/usr/bin/gcc + export HOST_CXX_COMPILER=/usr/bin/g++ export TF_CUDA_CLANG=0 # Clang currently disabled because it's not compatible at the moment. export CLANG_CUDA_COMPILER_PATH=/usr/bin/clang export TF_CUDA_PATHS=/opt/cuda,/usr/lib,/usr export TF_CUDA_VERSION=$(/opt/cuda/bin/nvcc --version | sed -n 's/^.*release \(.*\),.*/\1/p') export TF_CUDNN_VERSION=$(sed -n 's/^#define CUDNN_MAJOR\s*\(.*\).*/\1/p' /usr/include/cudnn_version.h) - export TF_CUDA_COMPUTE_CAPABILITIES=5.2,5.3,6.0,6.1,6.2,7.0,7.2,7.5,8.0 + export TF_CUDA_COMPUTE_CAPABILITIES=5.2,5.3,6.0,6.1,6.2,7.0,7.2,7.5,8.0,8.6 # Required until https://github.com/tensorflow/tensorflow/issues/39467 is fixed. - export CC=gcc-9 - export CXX=g++-9 + export CC=gcc + export CXX=g++ + + export BAZEL_ARGS="--config=mkl -c opt --copt=-I/usr/include/openssl-1.0 --host_copt=-I/usr/include/openssl-1.0 --linkopt=-l:libssl.so.1.0.0 --linkopt=-l:libcrypto.so.1.0.0 --host_linkopt=-l:libssl.so.1.0.0 --host_linkopt=-l:libcrypto.so.1.0.0" + + # Workaround for gcc 10+ warnings related to upb. + # See https://github.com/tensorflow/tensorflow/issues/39467 + export BAZEL_ARGS="$BAZEL_ARGS --host_copt=-Wno-stringop-truncation" if [ "$_build_no_opt" -eq 1 ]; then echo "Building with rocm and without non-x86-64 optimizations" @@ -147,7 +129,8 @@ build() { export TF_NEED_ROCM=1 ./configure bazel \ - build --config=mkl -c opt \ + build \ + ${BAZEL_ARGS[@]} \ //tensorflow:libtensorflow.so \ //tensorflow:libtensorflow_cc.so \ //tensorflow:install_headers \ @@ -162,9 +145,11 @@ build() { export CC_OPT_FLAGS="-march=haswell -O3" export TF_NEED_CUDA=0 export TF_NEED_ROCM=1 + export TF_CUDA_CLANG=0 ./configure bazel \ - build --config=mkl --config=avx2_linux -c opt \ + build --config=avx2_linux \ + ${BAZEL_ARGS[@]} \ //tensorflow:libtensorflow.so \ //tensorflow:libtensorflow_cc.so \ //tensorflow:install_headers \ @@ -232,7 +217,7 @@ _python_package() { package_tensorflow-rocm() { pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM)" - depends+=(rocm rccl) + depends+=(rocm rocm-libs miopen rccl) conflicts=(tensorflow) provides=(tensorflow) @@ -241,8 +226,8 @@ package_tensorflow-rocm() { } package_tensorflow-opt-rocm() { - pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM and CPU optimizations)" - depends+=(rocm rccl) + pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM and AVX2 CPU optimizations)" + depends+=(rocm rocm-libs miopen rccl) conflicts=(tensorflow) provides=(tensorflow tensorflow-rocm) @@ -252,7 +237,7 @@ package_tensorflow-opt-rocm() { package_python-tensorflow-rocm() { pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM)" - depends+=(tensorflow-rocm python-termcolor python-astor python-gast python-numpy rocm python-protobuf absl-py rccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse) + depends+=(tensorflow-rocm python-termcolor python-astor python-gast03 python-numpy rocm rocm-libs miopen python-protobuf absl-py rccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse python-past python-flatbuffers) conflicts=(python-tensorflow) provides=(python-tensorflow) @@ -261,8 +246,8 @@ package_python-tensorflow-rocm() { } package_python-tensorflow-opt-rocm() { - pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM and CPU optimizations)" - depends+=(tensorflow-opt-rocm python-termcolor python-astor python-gast python-numpy rocm python-protobuf absl-py rccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse) + pkgdesc="Library for computation using data flow graphs for scalable machine learning (with ROCM and AVX2 CPU optimizations)" + depends+=(tensorflow-opt-rocm python-termcolor python-astor python-gast03 python-numpy rocm rocm-libs miopen python-protobuf absl-py rccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse python-past python-flatbuffers) conflicts=(python-tensorflow) provides=(python-tensorflow python-tensorflow-rocm) diff --git a/fix-h5py3.0.patch b/fix-h5py3.0.patch new file mode 100644 index 000000000000..18e55a5297a4 --- /dev/null +++ b/fix-h5py3.0.patch @@ -0,0 +1,18 @@ +diff --git a/tensorflow/python/keras/saving/hdf5_format.py b/tensorflow/python/keras/saving/hdf5_format.py +index d3bb10c98d..e89f5356bb 100644 +--- a/tensorflow/python/keras/saving/hdf5_format.py ++++ b/tensorflow/python/keras/saving/hdf5_format.py +@@ -659,11 +659,11 @@ def load_weights_from_hdf5_group(f, layers): + and weights file. + """ + if 'keras_version' in f.attrs: +- original_keras_version = f.attrs['keras_version'].decode('utf8') ++ original_keras_version = f.attrs['keras_version'] + else: + original_keras_version = '1' + if 'backend' in f.attrs: +- original_backend = f.attrs['backend'].decode('utf8') ++ original_backend = f.attrs['backend'] + else: + original_backend = None + diff --git a/fix_occupancy_block.patch b/fix_occupancy_block.patch deleted file mode 100644 index 137b4e56ea55..000000000000 --- a/fix_occupancy_block.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 22def20bae7be6d5b790b360abed5919385b16c2 Mon Sep 17 00:00:00 2001 -From: Christian Sigg <csigg@google.com> -Date: Mon, 29 Jun 2020 04:23:28 -0700 -Subject: [PATCH] New ROCm 3.5 RBE docker based on Ubuntu 18.04, re-enable RBE. - -Fix list of cxx_builtin_include_directories. Only a few are needed, but those are more complicated (mix of symlinked and real paths). - -Properly return error from crosstool wrapper. - -PiperOrigin-RevId: 318788040 -Change-Id: Ia66898e98a9a4d8fb479c7e75317f4114f6081e5 ---- - .bazelrc | 17 ++++ - tensorflow/core/util/gpu_launch_config.h | 40 ++------- - ....local-toolchain-ubuntu18.04-manylinux2010 | 34 ++++++++ - .../ci_build/Dockerfile.rbe.rocm-ubuntu16.04 | 37 --------- - ...rocm-ubuntu18.04-manylinux2010-multipython | 79 ++++++++++++++++++ - .../bin/crosstool_wrapper_driver_rocm.tpl | 19 ++++- - third_party/gpus/rocm_configure.bzl | 83 +++---------------- - .../preconfig/generate/containers.bzl | 2 +- - .../toolchains/remote_config/configs.bzl | 12 +-- - .../toolchains/remote_config/containers.bzl | 10 ++- - 10 files changed, 184 insertions(+), 149 deletions(-) - create mode 100644 tensorflow/tools/ci_build/Dockerfile.local-toolchain-ubuntu18.04-manylinux2010 - delete mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04 - create mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython - -diff --git a/tensorflow/core/util/gpu_launch_config.h b/tensorflow/core/util/gpu_launch_config.h -index 4dfaf333d4bf0..0b943e917da01 100644 ---- a/tensorflow/core/util/gpu_launch_config.h -+++ b/tensorflow/core/util/gpu_launch_config.h -@@ -168,18 +168,10 @@ GpuLaunchConfig GetGpuLaunchConfig(int work_element_count, - block_size_limit); - CHECK_EQ(err, cudaSuccess); - #elif TENSORFLOW_USE_ROCM -- // Earlier versions of this HIP routine incorrectly returned void. -- // TODO re-enable hipError_t error checking when HIP is fixed. -- // ROCm interface uses unsigned int, convert after checking -- uint32_t block_count_uint = 0; -- uint32_t thread_per_block_uint = 0; -- CHECK_GE(block_size_limit, 0); -- uint32_t block_size_limit_uint = static_cast<uint32_t>(block_size_limit); -- hipOccupancyMaxPotentialBlockSize(&block_count_uint, &thread_per_block_uint, -- func, dynamic_shared_memory_size, -- block_size_limit_uint); -- block_count = static_cast<int>(block_count_uint); -- thread_per_block = static_cast<int>(thread_per_block_uint); -+ hipError_t err = hipOccupancyMaxPotentialBlockSize( -+ &block_count, &thread_per_block, func, dynamic_shared_memory_size, -+ block_size_limit); -+ CHECK_EQ(err, hipSuccess); - #endif - - block_count = -@@ -208,27 +200,13 @@ GpuLaunchConfig GetGpuLaunchConfigFixedBlockSize( - cudaError_t err = cudaOccupancyMaxActiveBlocksPerMultiprocessor( - &block_count, func, fixed_block_size, dynamic_shared_memory_size); - CHECK_EQ(err, cudaSuccess); -- block_count = std::min(block_count * d.getNumGpuMultiProcessors(), -- DivUp(work_element_count, fixed_block_size)); - #elif TENSORFLOW_USE_ROCM -- // ROCM TODO re-enable this after hipOccupancyMaxActiveBlocksPerMultiprocessor -- // is implemented -- // hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor( -- // &block_count, &thread_per_block, func, dynamic_shared_memory_size, -- // block_size_limit); -- // CHECK_EQ(err, hipSuccess); -- -- // Apply the heuristic in GetGpuLaunchConfig(int, const Eigen::GpuDevice&) -- // that the kernel is quite simple and will largely be memory-limited. -- const int physical_thread_count = std::min( -- d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor(), -- work_element_count); -- // Assume the kernel be simple enough that it is okay to use 1024 threads -- // per workgroup. -- int thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock()); -- block_count = std::min(DivUp(physical_thread_count, thread_per_block), -- d.getNumGpuMultiProcessors()); -+ hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor( -+ &block_count, func, fixed_block_size, dynamic_shared_memory_size); -+ CHECK_EQ(err, hipSuccess); - #endif -+ block_count = std::min(block_count * d.getNumGpuMultiProcessors(), -+ DivUp(work_element_count, fixed_block_size)); - - config.virtual_thread_count = work_element_count; - config.thread_per_block = fixed_block_size; diff --git a/new-rocm.patch b/new-rocm.patch deleted file mode 100644 index 01eb2b4fab8c..000000000000 --- a/new-rocm.patch +++ /dev/null @@ -1,692 +0,0 @@ -From fcc2de09eb38f45b678a5457f594ca594f2572c9 Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Thu, 16 Jul 2020 19:38:03 +0000 -Subject: [PATCH 1/8] Change references to libhip_hcc.so to refer to - libamdhip64.so instead - -With the switch to the new hipclang-vdi runtime (in ROCm 3.5), the new name for the HIP runtime library is libamdhip64.so. - -For backwards compatibility, ROCm 3.5 and ROCm 3.6 include a "libhip_hcc.so" softlink, which points to libamdhip64.so. That softlink will be going away starting with ROCm 3.7(?). - -This commit updates references to libhip_hcc.so (in the TF build) to use libamdhip64.so instead. - -See following JIRA tickets for further details: - -* http://ontrack-internal.amd.com/browse/SWDEV-244762 -* http://ontrack-internal.amd.com/browse/SWDEV-238533 ---- - tensorflow/stream_executor/platform/default/dso_loader.cc | 2 +- - .../crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl | 7 ------- - third_party/gpus/rocm_configure.bzl | 8 +++----- - 3 files changed, 4 insertions(+), 13 deletions(-) - -diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc -index 70b1ebe070a76..84293b7767a20 100644 ---- a/tensorflow/stream_executor/platform/default/dso_loader.cc -+++ b/tensorflow/stream_executor/platform/default/dso_loader.cc -@@ -140,7 +140,7 @@ port::StatusOr<void*> GetHipsparseDsoHandle() { - return GetDsoHandle("hipsparse", ""); - } - --port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("hip_hcc", ""); } -+port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("amdhip64", ""); } - - } // namespace DsoLoader - -diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl -index 8848bd32c2e1d..d5bfe78c6449d 100755 ---- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl -+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl -@@ -34,8 +34,6 @@ HIPCC_ENV = '%{hipcc_env}' - HIPCC_IS_HIPCLANG = '%{hipcc_is_hipclang}'=="True" - HIP_RUNTIME_PATH = '%{hip_runtime_path}' - HIP_RUNTIME_LIBRARY = '%{hip_runtime_library}' --HCC_RUNTIME_PATH = '%{hcc_runtime_path}' --HCC_RUNTIME_LIBRARY = '%{hcc_runtime_library}' - ROCR_RUNTIME_PATH = '%{rocr_runtime_path}' - ROCR_RUNTIME_LIBRARY = '%{rocr_runtime_library}' - VERBOSE = '%{crosstool_verbose}'=='1' -@@ -267,11 +265,6 @@ def main(): - gpu_linker_flags.append('-L' + ROCR_RUNTIME_PATH) - gpu_linker_flags.append('-Wl,-rpath=' + ROCR_RUNTIME_PATH) - gpu_linker_flags.append('-l' + ROCR_RUNTIME_LIBRARY) -- # do not link with HCC runtime library in case hip-clang toolchain is used -- if not HIPCC_IS_HIPCLANG: -- gpu_linker_flags.append('-L' + HCC_RUNTIME_PATH) -- gpu_linker_flags.append('-Wl,-rpath=' + HCC_RUNTIME_PATH) -- gpu_linker_flags.append('-l' + HCC_RUNTIME_LIBRARY) - gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH) - gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH) - gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY) -diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl -index 1312574f0aa46..0508279518894 100644 ---- a/third_party/gpus/rocm_configure.bzl -+++ b/third_party/gpus/rocm_configure.bzl -@@ -390,7 +390,7 @@ def _find_libs(repository_ctx, rocm_config, bash_bin): - libs_paths = [ - (name, _rocm_lib_paths(repository_ctx, name, path)) - for name, path in [ -- ("hip_hcc", rocm_config.rocm_toolkit_path + "/hip"), -+ ("amdhip64", rocm_config.rocm_toolkit_path + "/hip"), - ("rocblas", rocm_config.rocm_toolkit_path + "/rocblas"), - ("rocfft", rocm_config.rocm_toolkit_path + "/rocfft"), - ("hiprand", rocm_config.rocm_toolkit_path + "/hiprand"), -@@ -646,7 +646,7 @@ def _create_local_rocm_repository(repository_ctx): - "rocm/BUILD", - tpl_paths["rocm:BUILD"], - { -- "%{hip_lib}": rocm_libs["hip_hcc"].file_name, -+ "%{hip_lib}": rocm_libs["amdhip64"].file_name, - "%{rocblas_lib}": rocm_libs["rocblas"].file_name, - "%{rocfft_lib}": rocm_libs["rocfft"].file_name, - "%{hiprand_lib}": rocm_libs["hiprand"].file_name, -@@ -733,9 +733,7 @@ def _create_local_rocm_repository(repository_ctx): - "%{rocr_runtime_path}": rocm_config.rocm_toolkit_path + "/lib", - "%{rocr_runtime_library}": "hsa-runtime64", - "%{hip_runtime_path}": rocm_config.rocm_toolkit_path + "/hip/lib", -- "%{hip_runtime_library}": "hip_hcc", -- "%{hcc_runtime_path}": rocm_config.rocm_toolkit_path + "/hcc/lib", -- "%{hcc_runtime_library}": "mcwamp", -+ "%{hip_runtime_library}": "amdhip64", - "%{crosstool_verbose}": _crosstool_verbose(repository_ctx), - "%{gcc_host_compiler_path}": str(cc), - }, - -From 77fb7fd1c68f81c416fd909b6677277b3637be05 Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Fri, 17 Jul 2020 01:04:58 +0000 -Subject: [PATCH 2/8] Removing references to `*StaticCompiledGEMM` from TF code - -This commit is in conjunction with this MIOpen PR which removes scgemm from MIOpen -https://github.com/ROCmSoftwarePlatform/MIOpen/pull/325 - -The MIOpen release that includes that change will be included in the next ROCm release. -This commit removes references to `*StaticCompiledGEMM` from TF code to prepare for switching to the next ROCm release (3.7) ---- - tensorflow/stream_executor/rocm/rocm_dnn.cc | 6 ------ - 1 file changed, 6 deletions(-) - -diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.cc b/tensorflow/stream_executor/rocm/rocm_dnn.cc -index 80306105d4adf..4c5a740dfb090 100644 ---- a/tensorflow/stream_executor/rocm/rocm_dnn.cc -+++ b/tensorflow/stream_executor/rocm/rocm_dnn.cc -@@ -113,9 +113,6 @@ string ToString(miopenConvFwdAlgorithm_t algorithm) { - case miopenConvolutionFwdAlgoImplicitGEMM: - s = "Implicit GEMM"; - break; -- case miopenConvolutionFwdAlgoStaticCompiledGEMM: -- s = "Static Compiled GEMM"; -- break; - } - return s; - } -@@ -182,9 +179,6 @@ string ToString(miopenConvAlgorithm_t algorithm) { - case miopenConvolutionAlgoImplicitGEMM: - s = "Implicit GEMM"; - break; -- case miopenConvolutionAlgoStaticCompiledGEMM: -- s = "Static Compiled GEMM"; -- break; - } - return s; - } - -From 566d2a95c6140322241bce20fcfea952e837fda1 Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Tue, 11 Aug 2020 02:09:46 +0000 -Subject: [PATCH 3/8] Reverting "Provide ldexp float overload for HIP, it's - missing in their headers. " - ---- - tensorflow/core/kernels/cwise_ops_gpu_common.cu.h | 6 ------ - tensorflow/core/kernels/rnn/blas_gemm.h | 5 ----- - 2 files changed, 11 deletions(-) - -diff --git a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h -index 8849c3f4eddbb..ecc58da315f6b 100644 ---- a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h -+++ b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h -@@ -30,12 +30,6 @@ limitations under the License. - #include "tensorflow/core/platform/types.h" - - #include "tensorflow/core/platform/logging.h" -- --#ifdef __HIP_DEVICE_COMPILE__ --// Provide ldexp float overload for HIP, it's missing in their headers. --__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); } --#endif -- - namespace tensorflow { - namespace functor { - -diff --git a/tensorflow/core/kernels/rnn/blas_gemm.h b/tensorflow/core/kernels/rnn/blas_gemm.h -index 74f4cd2bb39a4..126e1edef17a9 100644 ---- a/tensorflow/core/kernels/rnn/blas_gemm.h -+++ b/tensorflow/core/kernels/rnn/blas_gemm.h -@@ -25,11 +25,6 @@ limitations under the License. - #include "tensorflow/core/kernels/eigen_contraction_kernel.h" - #endif - --#ifdef __HIP_DEVICE_COMPILE__ --// Provide ldexp float overload for HIP, it's missing in their headers. --__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); } --#endif -- - namespace tensorflow { - class OpKernelContext; - namespace functor { - -From 9dcaad456e194bf8d1e3962cd6ad272f4879d7f3 Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Wed, 12 Aug 2020 00:39:02 +0000 -Subject: [PATCH 4/8] updating ROCM CI scripts to use ROCm 3.7 - ---- - .../tools/ci_build/linux/rocm/run_cc_core.sh | 34 +++++++++++++------ - .../ci_build/linux/rocm/run_csb_tests.sh | 27 ++++++++++----- - .../tools/ci_build/linux/rocm/run_py3_core.sh | 23 +++++++++---- - .../tools/ci_build/xla/linux/rocm/run_py3.sh | 33 ++++++++++++------ - 4 files changed, 79 insertions(+), 38 deletions(-) - -diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh -index 1f4a36f8de0f5..92d21cb133be9 100755 ---- a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh -+++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh -@@ -18,20 +18,27 @@ - set -e - set -x - --N_JOBS=$(grep -c ^processor /proc/cpuinfo) --N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) -+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+TF_TESTS_PER_GPU=1 -+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) - - echo "" --echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)." -+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." - echo "" - -+# First positional argument (if any) specifies the ROCM_INSTALL_DIR -+ROCM_INSTALL_DIR=/opt/rocm-3.7.0 -+if [[ -n $1 ]]; then -+ ROCM_INSTALL_DIR=$1 -+fi -+ - # Run configure. - export PYTHON_BIN_PATH=`which python3` - export CC_OPT_FLAGS='-mavx' - - export TF_NEED_ROCM=1 --export ROCM_PATH=/opt/rocm-3.3.0 --export TF_GPU_COUNT=${N_GPUS} -+export ROCM_PATH=$ROCM_INSTALL_DIR - - yes "" | $PYTHON_BIN_PATH configure.py - -@@ -39,15 +46,17 @@ yes "" | $PYTHON_BIN_PATH configure.py - bazel test \ - --config=rocm \ - -k \ -- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ -+ --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-multi_gpu,-v1only \ - --test_lang_filters=cc \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=${TF_GPU_COUNT}\ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ -+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ -+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ - --test_timeout 600,900,2400,7200 \ - --build_tests_only \ - --test_output=errors \ - --test_sharding_strategy=disabled \ -- --test_size_filters=small,medium \ -+ --test_size_filters=small,medium,large \ - --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ - -- \ - //tensorflow/... \ -@@ -59,11 +68,14 @@ bazel test \ - --config=rocm \ - -k \ - --test_tag_filters=gpu \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=1 \ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ -+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ -+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ - --test_timeout 600,900,2400,7200 \ - --build_tests_only \ - --test_output=errors \ - --test_sharding_strategy=disabled \ -+ --test_size_filters=small,medium,large \ - -- \ - //tensorflow/core/nccl:nccl_manager_test -diff --git a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh -index 4962b2789b1c0..80c0686e64724 100755 ---- a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh -+++ b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh -@@ -18,20 +18,27 @@ - set -e - set -x - --N_JOBS=$(grep -c ^processor /proc/cpuinfo) --N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) -+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+TF_TESTS_PER_GPU=1 -+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) - - echo "" --echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)." -+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." - echo "" - -+# First positional argument (if any) specifies the ROCM_INSTALL_DIR -+ROCM_INSTALL_DIR=/opt/rocm-3.7.0 -+if [[ -n $1 ]]; then -+ ROCM_INSTALL_DIR=$1 -+fi -+ - # Run configure. - export PYTHON_BIN_PATH=`which python3` - export CC_OPT_FLAGS='-mavx' - - export TF_NEED_ROCM=1 --export ROCM_PATH=/opt/rocm-3.3.0 --export TF_GPU_COUNT=${N_GPUS} -+export ROCM_PATH=$ROCM_INSTALL_DIR - - yes "" | $PYTHON_BIN_PATH configure.py - -@@ -40,8 +47,10 @@ bazel test \ - --config=rocm \ - -k \ - --test_tag_filters=gpu,-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=${TF_GPU_COUNT} \ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ -+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ -+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ - --test_timeout 600,900,2400,7200 \ - --test_output=errors \ - --test_sharding_strategy=disabled \ -@@ -60,8 +69,8 @@ bazel test \ - --test_tag_filters=gpu \ - --test_timeout 600,900,2400,7200 \ - --test_output=errors \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=1 \ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ - --test_sharding_strategy=disabled \ - -- \ - //tensorflow/core/nccl:nccl_manager_test -diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh -index 7ea866f8e2032..3a09081dd6ac6 100755 ---- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh -+++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh -@@ -18,20 +18,27 @@ - set -e - set -x - --N_JOBS=$(grep -c ^processor /proc/cpuinfo) --N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) -+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+TF_TESTS_PER_GPU=1 -+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) - - echo "" --echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)." -+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." - echo "" - -+# First positional argument (if any) specifies the ROCM_INSTALL_DIR -+ROCM_INSTALL_DIR=/opt/rocm-3.7.0 -+if [[ -n $1 ]]; then -+ ROCM_INSTALL_DIR=$1 -+fi -+ - # Run configure. - export PYTHON_BIN_PATH=`which python3` - export CC_OPT_FLAGS='-mavx' - - export TF_NEED_ROCM=1 --export ROCM_PATH=/opt/rocm-3.3.0 --export TF_GPU_COUNT=${N_GPUS} -+export ROCM_PATH=$ROCM_INSTALL_DIR - - yes "" | $PYTHON_BIN_PATH configure.py - -@@ -41,8 +48,10 @@ bazel test \ - -k \ - --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ - --test_lang_filters=py \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=${TF_GPU_COUNT} \ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ -+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ -+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ - --test_timeout 600,900,2400,7200 \ - --build_tests_only \ - --test_output=errors \ -diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh -index 6ce1fad9cc754..d623b77d5333d 100755 ---- a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh -+++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh -@@ -18,20 +18,27 @@ - set -e - set -x - --N_JOBS=$(grep -c ^processor /proc/cpuinfo) --N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) -+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+TF_TESTS_PER_GPU=1 -+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) - - echo "" --echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)." -+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." - echo "" - -+# First positional argument (if any) specifies the ROCM_INSTALL_DIR -+ROCM_INSTALL_DIR=/opt/rocm-3.7.0 -+if [[ -n $1 ]]; then -+ ROCM_INSTALL_DIR=$1 -+fi -+ - # Run configure. - export PYTHON_BIN_PATH=`which python3` - export CC_OPT_FLAGS='-mavx' - - export TF_NEED_ROCM=1 --export ROCM_PATH=/opt/rocm-3.3.0 --export TF_GPU_COUNT=${N_GPUS} -+export ROCM_PATH=$ROCM_INSTALL_DIR - - yes "" | $PYTHON_BIN_PATH configure.py - echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc -@@ -41,9 +48,11 @@ bazel test \ - --config=rocm \ - --config=xla \ - -k \ -- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=${TF_GPU_COUNT} \ -+ --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ -+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ -+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ - --test_timeout 600,900,2400,7200 \ - --build_tests_only \ - --test_output=errors \ -@@ -65,9 +74,11 @@ bazel test \ - --config=rocm \ - --config=xla \ - -k \ -- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=${TF_GPU_COUNT} \ -+ --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ -+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ -+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ - --test_timeout 600,900,2400,7200 \ - --build_tests_only \ - --test_output=errors \ - -From 4b76a49a1a5741dece6d368b30f7125e20c12878 Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Wed, 26 Aug 2020 15:21:31 +0000 -Subject: [PATCH 5/8] Updating Dockerfile.rocm to use ROCm 3.7 - ---- - tensorflow/tools/ci_build/Dockerfile.rocm | 14 ++++++++++---- - 1 file changed, 10 insertions(+), 4 deletions(-) - -diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm -index 4f5d3ae7291b1..d209173258ada 100644 ---- a/tensorflow/tools/ci_build/Dockerfile.rocm -+++ b/tensorflow/tools/ci_build/Dockerfile.rocm -@@ -3,8 +3,10 @@ - FROM ubuntu:bionic - MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com> - --ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/3.3/ --ARG ROCM_PATH=/opt/rocm-3.3.0 -+ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/3.7/ -+ARG ROCM_BUILD_NAME=xenial -+ARG ROCM_BUILD_NUM=main -+ARG ROCM_PATH=/opt/rocm-3.7.0 - - ENV DEBIAN_FRONTEND noninteractive - ENV TF_NEED_ROCM 1 -@@ -13,8 +15,12 @@ RUN apt update && apt install -y wget software-properties-common - - # Add rocm repository - RUN apt-get clean all --RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add - --RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list" -+RUN bin/bash -c 'if [[ $ROCM_DEB_REPO == http://repo.radeon.com/rocm/* ]] ; then \ -+ wget -qO - $ROCM_DEB_REPO/rocm.gpg.key | apt-key add -; \ -+ echo "deb [arch=amd64] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list; \ -+ else \ -+ echo "deb [arch=amd64 trusted=yes] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list ; \ -+ fi' - - # Install misc pkgs - RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - -From f5a822d2012bc3e1cea1de97ff8189404688f84e Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Wed, 12 Aug 2020 15:51:34 +0000 -Subject: [PATCH 6/8] Updating TF to acccount for the (ROCm 3.7) change in - hipDeviceGetStreamPriorityRange - -Starting with ROCm 3.7, the `hipDeviceGetStreamPriorityRange` API returns a range of `[-1,1]`. -This is a departure from the `[0,2]` range that was returned by this API in ROCm 3.3 and prior. - -Updating the TF unit test, that has checks based on the range returned by this API, to account for change in the returned range ---- - .../common_runtime/gpu/gpu_device_test.cc | 34 +++++-------------- - 1 file changed, 8 insertions(+), 26 deletions(-) - -diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc -index 6448fc56af7a1..21c75244b5feb 100644 ---- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc -+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc -@@ -230,9 +230,9 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndNoPriority) { - TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) { - { - #if TENSORFLOW_USE_ROCM -- // Priority outside the range (0, 2) for AMD GPUs -+ // Priority outside the range (-1, 1) for AMD GPUs - SessionOptions opts = -- MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}}); -+ MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-2, 1}}); - #else - // Priority outside the range (-2, 0) for NVidia GPUs - SessionOptions opts = -@@ -245,7 +245,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) { - #if TENSORFLOW_USE_ROCM - ExpectErrorMessageSubstr( - status, -- "Priority -1 is outside the range of supported priorities [0,2] for" -+ "Priority -2 is outside the range of supported priorities [-1,1] for" - " virtual device 0 on GPU# 0"); - #else - ExpectErrorMessageSubstr( -@@ -254,8 +254,8 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) { - } - { - #if TENSORFLOW_USE_ROCM -- // Priority outside the range (0, 2) for AMD GPUs -- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 3}}); -+ // Priority outside the range (-1, 1) for AMD GPUs -+ SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}}); - #else - // Priority outside the range (-2, 0) for NVidia GPUs - SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}}); -@@ -267,7 +267,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) { - #if TENSORFLOW_USE_ROCM - ExpectErrorMessageSubstr( - status, -- "Priority 3 is outside the range of supported priorities [0,2] for" -+ "Priority 2 is outside the range of supported priorities [-1,1] for" - " virtual device 0 on GPU# 0"); - #else - ExpectErrorMessageSubstr( -@@ -288,26 +288,17 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndPriority) { - } - - TEST_F(GPUDeviceTest, MultipleVirtualDevices) { --#if TENSORFLOW_USE_ROCM -- // Valid range for priority values on AMD GPUs in (0,2) -- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}}); --#else -+ // Valid range for priority values on AMD GPUs in (-1,1) - // Valid range for priority values on NVidia GPUs in (-2, 0) - SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, -1}}); --#endif - std::vector<std::unique_ptr<Device>> devices; - TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( - opts, kDeviceNamePrefix, &devices)); - EXPECT_EQ(2, devices.size()); - EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit()); - EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit()); --#if TENSORFLOW_USE_ROCM -- EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority()); -- EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority()); --#else - EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority()); - EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[1].get())->priority()); --#endif - ASSERT_EQ(1, devices[0]->attributes().locality().links().link_size()); - ASSERT_EQ(1, devices[1]->attributes().locality().links().link_size()); - EXPECT_EQ(1, devices[0]->attributes().locality().links().link(0).device_id()); -@@ -339,27 +330,18 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) { - } - { - // Multile virtual devices with matching priority. --#if TENSORFLOW_USE_ROCM -- // Valid range for priority values on AMD GPUs in (0,2) -- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{2, 1}}); --#else -+ // Valid range for priority values on AMD GPUs in (-1,1) - // Valid range for priority values on NVidia GPUs in (-2, 0) - SessionOptions opts = - MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 0}}); --#endif - std::vector<std::unique_ptr<Device>> devices; - TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( - opts, kDeviceNamePrefix, &devices)); - EXPECT_EQ(2, devices.size()); - EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit()); - EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit()); --#if TENSORFLOW_USE_ROCM -- EXPECT_EQ(2, static_cast<BaseGPUDevice*>(devices[0].get())->priority()); -- EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority()); --#else - EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[0].get())->priority()); - EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[1].get())->priority()); --#endif - } - } - - -From ae9e3bd2fb8c3e042742b8c534c9020732c2c66d Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Wed, 12 Aug 2020 23:05:32 +0000 -Subject: [PATCH 7/8] Commeting out subtests that are failing due to JIRA - ticket 236756, and also removing the no_rocm tag from the tests that contain - those subtests - ---- - tensorflow/python/ops/parallel_for/math_test.py | 5 +++++ - tensorflow/python/ops/ragged/ragged_dispatch_test.py | 5 +++++ - 2 files changed, 10 insertions(+) - -diff --git a/tensorflow/python/ops/parallel_for/math_test.py b/tensorflow/python/ops/parallel_for/math_test.py -index 933ce765cdbfa..367f40d341115 100644 ---- a/tensorflow/python/ops/parallel_for/math_test.py -+++ b/tensorflow/python/ops/parallel_for/math_test.py -@@ -82,6 +82,11 @@ def test_unary_cwise_complex_ops(self): - self._test_unary_cwise_ops(complex_ops, True) - - def test_unary_cwise_real_ops_1(self): -+ if test.is_built_with_rocm(): -+ # TODO(rocm): -+ # This fails on ROCm...see JIRA ticket 236756 -+ self.skipTest('Fails on ROCM') -+ - real_ops = [ - lambda x: math_ops.acosh(1 + math_ops.square(x)), - math_ops.abs, -diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py -index 0237624aa451d..7a1d7c1882af1 100644 ---- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py -+++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py -@@ -139,6 +139,11 @@ def assertSameShape(self, x, y): - ] - ) # pyformat: disable - def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args): -+ if test_util.IsBuiltWithROCm(): -+ # TODO(rocm): -+ # This fails on ROCm...see JIRA ticket 236756 -+ self.skipTest('Fails on ROCM') -+ - result = op(x, **extra_args) - - # Run the wrapped op on the dense values, for comparison. - -From d4b8e68a3675bfb2d7465205420bd5ad15701d0b Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Wed, 26 Aug 2020 22:01:18 +0000 -Subject: [PATCH 8/8] Adding no_rocm tag to unit-tests that will not pass with - ROCm 3.7 until PR #42288 gets merged - ---- - tensorflow/python/BUILD | 1 + - tensorflow/python/keras/optimizer_v2/BUILD | 2 ++ - 2 files changed, 3 insertions(+) - -diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD -index a111237e0565d..5252ebbed6e4b 100644 ---- a/tensorflow/python/BUILD -+++ b/tensorflow/python/BUILD -@@ -5423,6 +5423,7 @@ cuda_py_test( - python_version = "PY3", - shard_count = 10, - tags = [ -+ "no_rocm", - "no_windows_gpu", - "noasan", # b/159332048 - "nomsan", # b/148630708 -diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD -index b208e2e1e1e6b..11966ce8211d2 100644 ---- a/tensorflow/python/keras/optimizer_v2/BUILD -+++ b/tensorflow/python/keras/optimizer_v2/BUILD -@@ -157,6 +157,7 @@ cuda_py_test( - size = "medium", - srcs = ["adadelta_test.py"], - shard_count = 4, -+ tags = ["no_rocm"], - deps = [ - ":optimizer_v2", - "//tensorflow/python:client_testlib", -@@ -298,6 +299,7 @@ cuda_py_test( - size = "medium", - srcs = ["rmsprop_test.py"], - shard_count = 2, -+ tags = ["no_rocm"], - deps = [ - ":optimizer_v2", - "//tensorflow/python:array_ops", |